1681d9761SEric Taylor /*
2681d9761SEric Taylor  * CDDL HEADER START
3681d9761SEric Taylor  *
4681d9761SEric Taylor  * The contents of this file are subject to the terms of the
5681d9761SEric Taylor  * Common Development and Distribution License (the "License").
6681d9761SEric Taylor  * You may not use this file except in compliance with the License.
7681d9761SEric Taylor  *
8681d9761SEric Taylor  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9681d9761SEric Taylor  * or http://www.opensolaris.org/os/licensing.
10681d9761SEric Taylor  * See the License for the specific language governing permissions
11681d9761SEric Taylor  * and limitations under the License.
12681d9761SEric Taylor  *
13681d9761SEric Taylor  * When distributing Covered Code, include this CDDL HEADER in each
14681d9761SEric Taylor  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15681d9761SEric Taylor  * If applicable, add the following below this CDDL HEADER, with the
16681d9761SEric Taylor  * fields enclosed by brackets "[]" replaced with your own identifying
17681d9761SEric Taylor  * information: Portions Copyright [yyyy] [name of copyright owner]
18681d9761SEric Taylor  *
19681d9761SEric Taylor  * CDDL HEADER END
20681d9761SEric Taylor  */
21681d9761SEric Taylor /*
22681d9761SEric Taylor  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23681d9761SEric Taylor  * Use is subject to license terms.
2445b17475SAlex Wilson  * Copyright 2013, 2016 Joyent, Inc.  All rights reserved.
25470bc2d6SGeorge Wilson  * Copyright (c) 2014 by Delphix. All rights reserved.
26681d9761SEric Taylor  */
27681d9761SEric Taylor 
28681d9761SEric Taylor /* vnode ops for the /dev/zvol directory */
29681d9761SEric Taylor 
30681d9761SEric Taylor #include <sys/types.h>
31681d9761SEric Taylor #include <sys/param.h>
32681d9761SEric Taylor #include <sys/sysmacros.h>
33681d9761SEric Taylor #include <sys/ddi.h>
34681d9761SEric Taylor #include <sys/sunndi.h>
35681d9761SEric Taylor #include <sys/sunldi.h>
36681d9761SEric Taylor #include <fs/fs_subr.h>
37681d9761SEric Taylor #include <sys/fs/dv_node.h>
38681d9761SEric Taylor #include <sys/fs/sdev_impl.h>
39681d9761SEric Taylor #include <sys/zfs_ioctl.h>
40681d9761SEric Taylor #include <sys/policy.h>
41681d9761SEric Taylor #include <sys/stat.h>
42681d9761SEric Taylor #include <sys/vfs_opreg.h>
43681d9761SEric Taylor 
44681d9761SEric Taylor struct vnodeops	*devzvol_vnodeops;
450ad555adSAlex Wilson static major_t devzvol_major;
460ad555adSAlex Wilson static taskq_ent_t devzvol_zclist_task;
470ad555adSAlex Wilson 
480ad555adSAlex Wilson static kmutex_t devzvol_mtx;
490ad555adSAlex Wilson /* Below are protected by devzvol_mtx */
500ad555adSAlex Wilson static boolean_t devzvol_isopen;
510ad555adSAlex Wilson static boolean_t devzvol_zclist_task_running = B_FALSE;
52681d9761SEric Taylor static uint64_t devzvol_gen = 0;
53681d9761SEric Taylor static uint64_t devzvol_zclist;
54681d9761SEric Taylor static size_t devzvol_zclist_size;
55681d9761SEric Taylor static ldi_ident_t devzvol_li;
56681d9761SEric Taylor static ldi_handle_t devzvol_lh;
57681d9761SEric Taylor 
58681d9761SEric Taylor /*
59681d9761SEric Taylor  * we need to use ddi_mod* since fs/dev gets loaded early on in
60681d9761SEric Taylor  * startup(), and linking fs/dev to fs/zfs would drag in a lot of
61681d9761SEric Taylor  * other stuff (like drv/random) before the rest of the system is
62681d9761SEric Taylor  * ready to go
63681d9761SEric Taylor  */
64681d9761SEric Taylor ddi_modhandle_t zfs_mod;
65681d9761SEric Taylor int (*szcm)(char *);
66681d9761SEric Taylor int (*szn2m)(char *, minor_t *);
67681d9761SEric Taylor 
68470bc2d6SGeorge Wilson 
69470bc2d6SGeorge Wilson /*
70470bc2d6SGeorge Wilson  * Enable/disable snapshots from being created in /dev/zvol. By default,
71470bc2d6SGeorge Wilson  * they are enabled, preserving the historic behavior.
72470bc2d6SGeorge Wilson  */
73470bc2d6SGeorge Wilson boolean_t devzvol_snaps_allowed = B_TRUE;
74470bc2d6SGeorge Wilson 
75681d9761SEric Taylor int
sdev_zvol_create_minor(char * dsname)76681d9761SEric Taylor sdev_zvol_create_minor(char *dsname)
77681d9761SEric Taylor {
78dd9c3b29SJerry Jelinek 	if (szcm == NULL)
79dd9c3b29SJerry Jelinek 		return (-1);
80681d9761SEric Taylor 	return ((*szcm)(dsname));
81681d9761SEric Taylor }
82681d9761SEric Taylor 
83681d9761SEric Taylor int
sdev_zvol_name2minor(char * dsname,minor_t * minor)84681d9761SEric Taylor sdev_zvol_name2minor(char *dsname, minor_t *minor)
85681d9761SEric Taylor {
86dd9c3b29SJerry Jelinek 	if (szn2m == NULL)
87dd9c3b29SJerry Jelinek 		return (-1);
88681d9761SEric Taylor 	return ((*szn2m)(dsname, minor));
89681d9761SEric Taylor }
90681d9761SEric Taylor 
91681d9761SEric Taylor int
devzvol_open_zfs()92681d9761SEric Taylor devzvol_open_zfs()
93681d9761SEric Taylor {
94681d9761SEric Taylor 	int rc;
95dd9c3b29SJerry Jelinek 	dev_t dv;
96681d9761SEric Taylor 
97681d9761SEric Taylor 	devzvol_li = ldi_ident_from_anon();
98681d9761SEric Taylor 	if (ldi_open_by_name("/dev/zfs", FREAD | FWRITE, kcred,
99681d9761SEric Taylor 	    &devzvol_lh, devzvol_li))
100681d9761SEric Taylor 		return (-1);
101681d9761SEric Taylor 	if (zfs_mod == NULL && ((zfs_mod = ddi_modopen("fs/zfs",
102681d9761SEric Taylor 	    KRTLD_MODE_FIRST, &rc)) == NULL)) {
103681d9761SEric Taylor 		return (rc);
104681d9761SEric Taylor 	}
105681d9761SEric Taylor 	ASSERT(szcm == NULL && szn2m == NULL);
106681d9761SEric Taylor 	if ((szcm = (int (*)(char *))
107681d9761SEric Taylor 	    ddi_modsym(zfs_mod, "zvol_create_minor", &rc)) == NULL) {
108681d9761SEric Taylor 		cmn_err(CE_WARN, "couldn't resolve zvol_create_minor");
109681d9761SEric Taylor 		return (rc);
110681d9761SEric Taylor 	}
111681d9761SEric Taylor 	if ((szn2m = (int(*)(char *, minor_t *))
112681d9761SEric Taylor 	    ddi_modsym(zfs_mod, "zvol_name2minor", &rc)) == NULL) {
113681d9761SEric Taylor 		cmn_err(CE_WARN, "couldn't resolve zvol_name2minor");
114681d9761SEric Taylor 		return (rc);
115681d9761SEric Taylor 	}
116dd9c3b29SJerry Jelinek 	if (ldi_get_dev(devzvol_lh, &dv))
117dd9c3b29SJerry Jelinek 		return (-1);
118dd9c3b29SJerry Jelinek 	devzvol_major = getmajor(dv);
119681d9761SEric Taylor 	return (0);
120681d9761SEric Taylor }
121681d9761SEric Taylor 
122681d9761SEric Taylor void
devzvol_close_zfs()123681d9761SEric Taylor devzvol_close_zfs()
124681d9761SEric Taylor {
125681d9761SEric Taylor 	szcm = NULL;
126681d9761SEric Taylor 	szn2m = NULL;
127681d9761SEric Taylor 	(void) ldi_close(devzvol_lh, FREAD|FWRITE, kcred);
128681d9761SEric Taylor 	ldi_ident_release(devzvol_li);
129681d9761SEric Taylor 	if (zfs_mod != NULL) {
130681d9761SEric Taylor 		(void) ddi_modclose(zfs_mod);
131681d9761SEric Taylor 		zfs_mod = NULL;
132681d9761SEric Taylor 	}
133681d9761SEric Taylor }
134681d9761SEric Taylor 
135681d9761SEric Taylor int
devzvol_handle_ioctl(int cmd,zfs_cmd_t * zc,size_t * alloc_size)136681d9761SEric Taylor devzvol_handle_ioctl(int cmd, zfs_cmd_t *zc, size_t *alloc_size)
137681d9761SEric Taylor {
138681d9761SEric Taylor 	uint64_t cookie;
139681d9761SEric Taylor 	int size = 8000;
140681d9761SEric Taylor 	int unused;
141681d9761SEric Taylor 	int rc;
142681d9761SEric Taylor 
143681d9761SEric Taylor 	if (cmd != ZFS_IOC_POOL_CONFIGS)
144681d9761SEric Taylor 		mutex_enter(&devzvol_mtx);
145ff060bd8SEric Taylor 	if (!devzvol_isopen) {
146681d9761SEric Taylor 		if ((rc = devzvol_open_zfs()) == 0) {
147ff060bd8SEric Taylor 			devzvol_isopen = B_TRUE;
148681d9761SEric Taylor 		} else {
149681d9761SEric Taylor 			if (cmd != ZFS_IOC_POOL_CONFIGS)
150681d9761SEric Taylor 				mutex_exit(&devzvol_mtx);
151681d9761SEric Taylor 			return (ENXIO);
152681d9761SEric Taylor 		}
153681d9761SEric Taylor 	}
154681d9761SEric Taylor 	cookie = zc->zc_cookie;
155681d9761SEric Taylor again:
156681d9761SEric Taylor 	zc->zc_nvlist_dst = (uint64_t)(intptr_t)kmem_alloc(size,
157681d9761SEric Taylor 	    KM_SLEEP);
158681d9761SEric Taylor 	zc->zc_nvlist_dst_size = size;
159681d9761SEric Taylor 	rc = ldi_ioctl(devzvol_lh, cmd, (intptr_t)zc, FKIOCTL, kcred,
160681d9761SEric Taylor 	    &unused);
161681d9761SEric Taylor 	if (rc == ENOMEM) {
162681d9761SEric Taylor 		int newsize;
163681d9761SEric Taylor 		newsize = zc->zc_nvlist_dst_size;
164681d9761SEric Taylor 		ASSERT(newsize > size);
165681d9761SEric Taylor 		kmem_free((void *)(uintptr_t)zc->zc_nvlist_dst, size);
166681d9761SEric Taylor 		size = newsize;
167681d9761SEric Taylor 		zc->zc_cookie = cookie;
168681d9761SEric Taylor 		goto again;
169681d9761SEric Taylor 	}
170681d9761SEric Taylor 	if (alloc_size == NULL)
171681d9761SEric Taylor 		kmem_free((void *)(uintptr_t)zc->zc_nvlist_dst, size);
172681d9761SEric Taylor 	else
173681d9761SEric Taylor 		*alloc_size = size;
174681d9761SEric Taylor 	if (cmd != ZFS_IOC_POOL_CONFIGS)
175681d9761SEric Taylor 		mutex_exit(&devzvol_mtx);
176681d9761SEric Taylor 	return (rc);
177681d9761SEric Taylor }
178681d9761SEric Taylor 
179681d9761SEric Taylor /* figures out if the objset exists and returns its type */
180681d9761SEric Taylor int
devzvol_objset_check(char * dsname,dmu_objset_type_t * type)181681d9761SEric Taylor devzvol_objset_check(char *dsname, dmu_objset_type_t *type)
182681d9761SEric Taylor {
183470bc2d6SGeorge Wilson 	boolean_t	ispool, is_snapshot;
184681d9761SEric Taylor 	zfs_cmd_t	*zc;
185681d9761SEric Taylor 	int rc;
186*814e7298SToomas Soome 	nvlist_t	*nvl;
18745b17475SAlex Wilson 	size_t nvsz;
188681d9761SEric Taylor 
189470bc2d6SGeorge Wilson 	ispool = (strchr(dsname, '/') == NULL);
190470bc2d6SGeorge Wilson 	is_snapshot = (strchr(dsname, '@') != NULL);
191470bc2d6SGeorge Wilson 
192470bc2d6SGeorge Wilson 	if (is_snapshot && !devzvol_snaps_allowed)
193470bc2d6SGeorge Wilson 		return (ENOTSUP);
194470bc2d6SGeorge Wilson 
195681d9761SEric Taylor 	zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
196681d9761SEric Taylor 	(void) strlcpy(zc->zc_name, dsname, MAXPATHLEN);
197681d9761SEric Taylor 
19845b17475SAlex Wilson 	nvl = fnvlist_alloc();
19945b17475SAlex Wilson 	fnvlist_add_boolean_value(nvl, "cachedpropsonly", B_TRUE);
20045b17475SAlex Wilson 	zc->zc_nvlist_src = (uintptr_t)fnvlist_pack(nvl, &nvsz);
20145b17475SAlex Wilson 	zc->zc_nvlist_src_size = nvsz;
20245b17475SAlex Wilson 	fnvlist_free(nvl);
20345b17475SAlex Wilson 
204681d9761SEric Taylor 	rc = devzvol_handle_ioctl(ispool ? ZFS_IOC_POOL_STATS :
205681d9761SEric Taylor 	    ZFS_IOC_OBJSET_STATS, zc, NULL);
206681d9761SEric Taylor 	if (type && rc == 0)
207681d9761SEric Taylor 		*type = (ispool) ? DMU_OST_ZFS :
208681d9761SEric Taylor 		    zc->zc_objset_stats.dds_type;
20945b17475SAlex Wilson 	fnvlist_pack_free((char *)(uintptr_t)zc->zc_nvlist_src, nvsz);
210681d9761SEric Taylor 	kmem_free(zc, sizeof (zfs_cmd_t));
211681d9761SEric Taylor 	return (rc);
212681d9761SEric Taylor }
213681d9761SEric Taylor 
214681d9761SEric Taylor /*
215e3c6427aSAlex Wilson  * Returns what the zfs dataset name should be, given the /dev/zvol
216e3c6427aSAlex Wilson  * path and an optional name (can be NULL).
217e3c6427aSAlex Wilson  *
218e3c6427aSAlex Wilson  * Note that if the name param is NULL, then path must be an
219e3c6427aSAlex Wilson  * actual dataset's directory and not one of the top-level
220e3c6427aSAlex Wilson  * /dev/zvol/{dsk,rdsk} dirs, as these do not correspond to a
221e3c6427aSAlex Wilson  * specific dataset.
222681d9761SEric Taylor  */
223681d9761SEric Taylor char *
devzvol_make_dsname(const char * path,const char * name)224681d9761SEric Taylor devzvol_make_dsname(const char *path, const char *name)
225681d9761SEric Taylor {
226681d9761SEric Taylor 	char *dsname;
227681d9761SEric Taylor 	const char *ptr;
228681d9761SEric Taylor 	int dslen;
229681d9761SEric Taylor 
230681d9761SEric Taylor 	if (strcmp(path, ZVOL_DIR) == 0)
231681d9761SEric Taylor 		return (NULL);
232681d9761SEric Taylor 	if (name && (strcmp(name, ".") == 0 || strcmp(name, "..") == 0))
233681d9761SEric Taylor 		return (NULL);
234681d9761SEric Taylor 	ptr = path + strlen(ZVOL_DIR);
235681d9761SEric Taylor 	if (strncmp(ptr, "/dsk", 4) == 0)
236681d9761SEric Taylor 		ptr += strlen("/dsk");
237681d9761SEric Taylor 	else if (strncmp(ptr, "/rdsk", 5) == 0)
238681d9761SEric Taylor 		ptr += strlen("/rdsk");
239681d9761SEric Taylor 	else
240681d9761SEric Taylor 		return (NULL);
241e3c6427aSAlex Wilson 
242681d9761SEric Taylor 	if (*ptr == '/')
243681d9761SEric Taylor 		ptr++;
244e3c6427aSAlex Wilson 	else if (name == NULL)
245e3c6427aSAlex Wilson 		return (NULL);
246681d9761SEric Taylor 
247681d9761SEric Taylor 	dslen = strlen(ptr);
248681d9761SEric Taylor 	if (dslen)
249681d9761SEric Taylor 		dslen++;			/* plus null */
250681d9761SEric Taylor 	if (name)
251681d9761SEric Taylor 		dslen += strlen(name) + 1;	/* plus slash */
252681d9761SEric Taylor 	dsname = kmem_zalloc(dslen, KM_SLEEP);
253681d9761SEric Taylor 	if (*ptr) {
254681d9761SEric Taylor 		(void) strlcpy(dsname, ptr, dslen);
255681d9761SEric Taylor 		if (name)
256681d9761SEric Taylor 			(void) strlcat(dsname, "/", dslen);
257681d9761SEric Taylor 	}
258681d9761SEric Taylor 	if (name)
259681d9761SEric Taylor 		(void) strlcat(dsname, name, dslen);
260681d9761SEric Taylor 	return (dsname);
261681d9761SEric Taylor }
262681d9761SEric Taylor 
263681d9761SEric Taylor /*
264681d9761SEric Taylor  * check if the zvol's sdev_node is still valid, which means make
265681d9761SEric Taylor  * sure the zvol is still valid.  zvol minors aren't proactively
266681d9761SEric Taylor  * destroyed when the zvol is destroyed, so we use a validator to clean
267681d9761SEric Taylor  * these up (in other words, when such nodes are encountered during
268681d9761SEric Taylor  * subsequent lookup() and readdir() operations) so that only valid
269681d9761SEric Taylor  * nodes are returned.  The ordering between devname_lookup_func and
270681d9761SEric Taylor  * devzvol_validate is a little inefficient in the case of invalid
271681d9761SEric Taylor  * or stale nodes because devname_lookup_func calls
272681d9761SEric Taylor  * devzvol_create_{dir, link}, then the validator says it's invalid,
273681d9761SEric Taylor  * and then the node gets cleaned up.
274681d9761SEric Taylor  */
275681d9761SEric Taylor int
devzvol_validate(struct sdev_node * dv)276681d9761SEric Taylor devzvol_validate(struct sdev_node *dv)
277681d9761SEric Taylor {
27845b17475SAlex Wilson 	vnode_t *vn = SDEVTOV(dv);
279681d9761SEric Taylor 	dmu_objset_type_t do_type;
280681d9761SEric Taylor 	char *dsname;
281681d9761SEric Taylor 	char *nm = dv->sdev_name;
282681d9761SEric Taylor 	int rc;
283681d9761SEric Taylor 
284681d9761SEric Taylor 	sdcmn_err13(("validating ('%s' '%s')", dv->sdev_path, nm));
285681d9761SEric Taylor 	/*
286681d9761SEric Taylor 	 * validate only READY nodes; if someone is sitting on the
287681d9761SEric Taylor 	 * directory of a dataset that just got destroyed we could
288681d9761SEric Taylor 	 * get a zombie node which we just skip.
289681d9761SEric Taylor 	 */
290681d9761SEric Taylor 	if (dv->sdev_state != SDEV_READY) {
291681d9761SEric Taylor 		sdcmn_err13(("skipping '%s'", nm));
292681d9761SEric Taylor 		return (SDEV_VTOR_SKIP);
293681d9761SEric Taylor 	}
294681d9761SEric Taylor 
295681d9761SEric Taylor 	if ((strcmp(dv->sdev_path, ZVOL_DIR "/dsk") == 0) ||
296681d9761SEric Taylor 	    (strcmp(dv->sdev_path, ZVOL_DIR "/rdsk") == 0))
297681d9761SEric Taylor 		return (SDEV_VTOR_VALID);
298681d9761SEric Taylor 	dsname = devzvol_make_dsname(dv->sdev_path, NULL);
299681d9761SEric Taylor 	if (dsname == NULL)
300681d9761SEric Taylor 		return (SDEV_VTOR_INVALID);
301681d9761SEric Taylor 
30245b17475SAlex Wilson 	/*
30345b17475SAlex Wilson 	 * Leave any nodes alone that have been explicitly created by
30445b17475SAlex Wilson 	 * sdev profiles.
30545b17475SAlex Wilson 	 */
30645b17475SAlex Wilson 	if (!(dv->sdev_flags & SDEV_GLOBAL) && dv->sdev_origin != NULL) {
30745b17475SAlex Wilson 		kmem_free(dsname, strlen(dsname) + 1);
30845b17475SAlex Wilson 		return (SDEV_VTOR_VALID);
30945b17475SAlex Wilson 	}
31045b17475SAlex Wilson 
311681d9761SEric Taylor 	rc = devzvol_objset_check(dsname, &do_type);
312681d9761SEric Taylor 	sdcmn_err13(("  '%s' rc %d", dsname, rc));
313681d9761SEric Taylor 	if (rc != 0) {
31445b17475SAlex Wilson 		sdev_node_t *parent = dv->sdev_dotdot;
31545b17475SAlex Wilson 		/*
31645b17475SAlex Wilson 		 * Explicitly passed-through zvols in our sdev profile can't
31745b17475SAlex Wilson 		 * be created as prof_* shadow nodes, because in the GZ they
31845b17475SAlex Wilson 		 * are symlinks, but in the NGZ they are actual device files.
31945b17475SAlex Wilson 		 *
32045b17475SAlex Wilson 		 * The objset_check will fail on these as they are outside
32145b17475SAlex Wilson 		 * any delegated dataset (zfs will not allow ioctl access to
32245b17475SAlex Wilson 		 * them from this zone). We still want them to work, though.
32345b17475SAlex Wilson 		 */
32445b17475SAlex Wilson 		if (!(parent->sdev_flags & SDEV_GLOBAL) &&
32545b17475SAlex Wilson 		    parent->sdev_origin != NULL &&
32645b17475SAlex Wilson 		    !(dv->sdev_flags & SDEV_GLOBAL) &&
32745b17475SAlex Wilson 		    (vn->v_type == VBLK || vn->v_type == VCHR) &&
32845b17475SAlex Wilson 		    prof_name_matched(nm, parent)) {
32945b17475SAlex Wilson 			do_type = DMU_OST_ZVOL;
33045b17475SAlex Wilson 		} else {
33145b17475SAlex Wilson 			kmem_free(dsname, strlen(dsname) + 1);
33245b17475SAlex Wilson 			return (SDEV_VTOR_INVALID);
33345b17475SAlex Wilson 		}
334681d9761SEric Taylor 	}
33545b17475SAlex Wilson 
336681d9761SEric Taylor 	sdcmn_err13(("  v_type %d do_type %d",
33745b17475SAlex Wilson 	    vn->v_type, do_type));
33845b17475SAlex Wilson 	if ((vn->v_type == VLNK && do_type != DMU_OST_ZVOL) ||
33945b17475SAlex Wilson 	    ((vn->v_type == VBLK || vn->v_type == VCHR) &&
340dd9c3b29SJerry Jelinek 	    do_type != DMU_OST_ZVOL) ||
34145b17475SAlex Wilson 	    (vn->v_type == VDIR && do_type == DMU_OST_ZVOL)) {
342681d9761SEric Taylor 		kmem_free(dsname, strlen(dsname) + 1);
343681d9761SEric Taylor 		return (SDEV_VTOR_STALE);
344681d9761SEric Taylor 	}
34545b17475SAlex Wilson 	if (vn->v_type == VLNK) {
346681d9761SEric Taylor 		char *ptr, *link;
347681d9761SEric Taylor 		long val = 0;
348681d9761SEric Taylor 		minor_t lminor, ominor;
349681d9761SEric Taylor 
35045b17475SAlex Wilson 		rc = sdev_getlink(vn, &link);
351681d9761SEric Taylor 		ASSERT(rc == 0);
352681d9761SEric Taylor 
353681d9761SEric Taylor 		ptr = strrchr(link, ':') + 1;
354681d9761SEric Taylor 		rc = ddi_strtol(ptr, NULL, 10, &val);
355681d9761SEric Taylor 		kmem_free(link, strlen(link) + 1);
356681d9761SEric Taylor 		ASSERT(rc == 0 && val != 0);
357681d9761SEric Taylor 		lminor = (minor_t)val;
358681d9761SEric Taylor 		if (sdev_zvol_name2minor(dsname, &ominor) < 0 ||
359681d9761SEric Taylor 		    ominor != lminor) {
360681d9761SEric Taylor 			kmem_free(dsname, strlen(dsname) + 1);
361681d9761SEric Taylor 			return (SDEV_VTOR_STALE);
362681d9761SEric Taylor 		}
363681d9761SEric Taylor 	}
364681d9761SEric Taylor 	kmem_free(dsname, strlen(dsname) + 1);
365681d9761SEric Taylor 	return (SDEV_VTOR_VALID);
366681d9761SEric Taylor }
367681d9761SEric Taylor 
368681d9761SEric Taylor /*
3690ad555adSAlex Wilson  * Taskq callback to update the devzvol_zclist.
3700ad555adSAlex Wilson  *
3710ad555adSAlex Wilson  * We need to defer this to the taskq to avoid it running with a user
3720ad555adSAlex Wilson  * context that might be associated with some non-global zone, and thus
3730ad555adSAlex Wilson  * not being able to list all of the pools on the entire system.
374681d9761SEric Taylor  */
3750ad555adSAlex Wilson /*ARGSUSED*/
3760ad555adSAlex Wilson static void
devzvol_update_zclist_cb(void * arg)3770ad555adSAlex Wilson devzvol_update_zclist_cb(void *arg)
378681d9761SEric Taylor {
379681d9761SEric Taylor 	zfs_cmd_t	*zc;
3800ad555adSAlex Wilson 	int		rc;
3810ad555adSAlex Wilson 	size_t		size;
382681d9761SEric Taylor 
383681d9761SEric Taylor 	zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
384681d9761SEric Taylor 	mutex_enter(&devzvol_mtx);
385681d9761SEric Taylor 	zc->zc_cookie = devzvol_gen;
386681d9761SEric Taylor 
387681d9761SEric Taylor 	rc = devzvol_handle_ioctl(ZFS_IOC_POOL_CONFIGS, zc, &size);
388681d9761SEric Taylor 	switch (rc) {
389681d9761SEric Taylor 		case 0:
390681d9761SEric Taylor 			/* new generation */
391681d9761SEric Taylor 			ASSERT(devzvol_gen != zc->zc_cookie);
392681d9761SEric Taylor 			devzvol_gen = zc->zc_cookie;
393681d9761SEric Taylor 			if (devzvol_zclist)
394681d9761SEric Taylor 				kmem_free((void *)(uintptr_t)devzvol_zclist,
395681d9761SEric Taylor 				    devzvol_zclist_size);
396681d9761SEric Taylor 			devzvol_zclist = zc->zc_nvlist_dst;
3970ad555adSAlex Wilson 			/* Keep the alloc'd size, not the nvlist size. */
398681d9761SEric Taylor 			devzvol_zclist_size = size;
399681d9761SEric Taylor 			break;
4000ad555adSAlex Wilson 		default:
401681d9761SEric Taylor 			/*
4020ad555adSAlex Wilson 			 * Either there was no change in pool configuration
4030ad555adSAlex Wilson 			 * since we last asked (rc == EEXIST) or we got a
4040ad555adSAlex Wilson 			 * catastrophic error.
4050ad555adSAlex Wilson 			 *
4060ad555adSAlex Wilson 			 * Give up memory and exit.
407681d9761SEric Taylor 			 */
408681d9761SEric Taylor 			kmem_free((void *)(uintptr_t)zc->zc_nvlist_dst,
409681d9761SEric Taylor 			    size);
410681d9761SEric Taylor 			break;
411681d9761SEric Taylor 	}
4120ad555adSAlex Wilson 
4130ad555adSAlex Wilson 	VERIFY(devzvol_zclist_task_running == B_TRUE);
4140ad555adSAlex Wilson 	devzvol_zclist_task_running = B_FALSE;
4150ad555adSAlex Wilson 	mutex_exit(&devzvol_mtx);
4160ad555adSAlex Wilson 
4170ad555adSAlex Wilson 	kmem_free(zc, sizeof (zfs_cmd_t));
4180ad555adSAlex Wilson }
4190ad555adSAlex Wilson 
4200ad555adSAlex Wilson static void
devzvol_update_zclist(void)4210ad555adSAlex Wilson devzvol_update_zclist(void)
4220ad555adSAlex Wilson {
4230ad555adSAlex Wilson 	mutex_enter(&devzvol_mtx);
4240ad555adSAlex Wilson 	if (devzvol_zclist_task_running == B_TRUE) {
4250ad555adSAlex Wilson 		mutex_exit(&devzvol_mtx);
4260ad555adSAlex Wilson 		goto wait;
4270ad555adSAlex Wilson 	}
4280ad555adSAlex Wilson 
4290ad555adSAlex Wilson 	devzvol_zclist_task_running = B_TRUE;
4300ad555adSAlex Wilson 
4310ad555adSAlex Wilson 	taskq_dispatch_ent(sdev_taskq, devzvol_update_zclist_cb, NULL, 0,
4320ad555adSAlex Wilson 	    &devzvol_zclist_task);
4330ad555adSAlex Wilson 
4340ad555adSAlex Wilson 	mutex_exit(&devzvol_mtx);
4350ad555adSAlex Wilson 
4360ad555adSAlex Wilson wait:
4370ad555adSAlex Wilson 	taskq_wait(sdev_taskq);
4380ad555adSAlex Wilson }
4390ad555adSAlex Wilson 
4400ad555adSAlex Wilson /*
4410ad555adSAlex Wilson  * Creates sub-directories for each zpool as needed in response to a
4420ad555adSAlex Wilson  * readdir on one of the /dev/zvol/{dsk,rdsk} directories.
4430ad555adSAlex Wilson  */
4440ad555adSAlex Wilson void
devzvol_create_pool_dirs(struct vnode * dvp)4450ad555adSAlex Wilson devzvol_create_pool_dirs(struct vnode *dvp)
4460ad555adSAlex Wilson {
4470ad555adSAlex Wilson 	nvlist_t *nv = NULL;
4480ad555adSAlex Wilson 	nvpair_t *elem = NULL;
4490ad555adSAlex Wilson 	int pools = 0;
4500ad555adSAlex Wilson 	int rc;
4510ad555adSAlex Wilson 
4520ad555adSAlex Wilson 	sdcmn_err13(("devzvol_create_pool_dirs"));
4530ad555adSAlex Wilson 
4540ad555adSAlex Wilson 	devzvol_update_zclist();
4550ad555adSAlex Wilson 
4560ad555adSAlex Wilson 	mutex_enter(&devzvol_mtx);
4570ad555adSAlex Wilson 
458681d9761SEric Taylor 	rc = nvlist_unpack((char *)(uintptr_t)devzvol_zclist,
459681d9761SEric Taylor 	    devzvol_zclist_size, &nv, 0);
460681d9761SEric Taylor 	if (rc) {
461681d9761SEric Taylor 		ASSERT(rc == 0);
462681d9761SEric Taylor 		kmem_free((void *)(uintptr_t)devzvol_zclist,
463681d9761SEric Taylor 		    devzvol_zclist_size);
464681d9761SEric Taylor 		devzvol_gen = 0;
465*814e7298SToomas Soome 		devzvol_zclist = 0;
466681d9761SEric Taylor 		devzvol_zclist_size = 0;
467681d9761SEric Taylor 		goto out;
468681d9761SEric Taylor 	}
469681d9761SEric Taylor 	mutex_exit(&devzvol_mtx);
470681d9761SEric Taylor 	while ((elem = nvlist_next_nvpair(nv, elem)) != NULL) {
471681d9761SEric Taylor 		struct vnode *vp;
472681d9761SEric Taylor 		ASSERT(dvp->v_count > 0);
473681d9761SEric Taylor 		rc = VOP_LOOKUP(dvp, nvpair_name(elem), &vp, NULL, 0,
474681d9761SEric Taylor 		    NULL, kcred, NULL, 0, NULL);
475681d9761SEric Taylor 		/* should either work, or not be visible from a zone */
476681d9761SEric Taylor 		ASSERT(rc == 0 || rc == ENOENT);
477681d9761SEric Taylor 		if (rc == 0)
478681d9761SEric Taylor 			VN_RELE(vp);
479681d9761SEric Taylor 		pools++;
480681d9761SEric Taylor 	}
481681d9761SEric Taylor 	nvlist_free(nv);
482681d9761SEric Taylor 	mutex_enter(&devzvol_mtx);
483ff060bd8SEric Taylor 	if (devzvol_isopen && pools == 0) {
484681d9761SEric Taylor 		/* clean up so zfs can be unloaded */
485681d9761SEric Taylor 		devzvol_close_zfs();
486ff060bd8SEric Taylor 		devzvol_isopen = B_FALSE;
487681d9761SEric Taylor 	}
488681d9761SEric Taylor out:
489681d9761SEric Taylor 	mutex_exit(&devzvol_mtx);
490681d9761SEric Taylor }
491681d9761SEric Taylor 
492681d9761SEric Taylor /*ARGSUSED3*/
493681d9761SEric Taylor static int
devzvol_create_dir(struct sdev_node * ddv,char * nm,void ** arg,cred_t * cred,void * whatever,char * whichever)494681d9761SEric Taylor devzvol_create_dir(struct sdev_node *ddv, char *nm, void **arg,
495681d9761SEric Taylor     cred_t *cred, void *whatever, char *whichever)
496681d9761SEric Taylor {
497681d9761SEric Taylor 	timestruc_t now;
498681d9761SEric Taylor 	struct vattr *vap = (struct vattr *)arg;
499681d9761SEric Taylor 
500681d9761SEric Taylor 	sdcmn_err13(("create_dir (%s) (%s) '%s'", ddv->sdev_name,
501681d9761SEric Taylor 	    ddv->sdev_path, nm));
502681d9761SEric Taylor 	ASSERT(strncmp(ddv->sdev_path, ZVOL_DIR,
503681d9761SEric Taylor 	    strlen(ZVOL_DIR)) == 0);
504681d9761SEric Taylor 	*vap = *sdev_getdefault_attr(VDIR);
505681d9761SEric Taylor 	gethrestime(&now);
506681d9761SEric Taylor 	vap->va_atime = now;
507681d9761SEric Taylor 	vap->va_mtime = now;
508681d9761SEric Taylor 	vap->va_ctime = now;
509681d9761SEric Taylor 	return (0);
510681d9761SEric Taylor }
511681d9761SEric Taylor 
512681d9761SEric Taylor /*ARGSUSED3*/
513681d9761SEric Taylor static int
devzvol_create_link(struct sdev_node * ddv,char * nm,void ** arg,cred_t * cred,void * whatever,char * whichever)514681d9761SEric Taylor devzvol_create_link(struct sdev_node *ddv, char *nm,
515681d9761SEric Taylor     void **arg, cred_t *cred, void *whatever, char *whichever)
516681d9761SEric Taylor {
517681d9761SEric Taylor 	minor_t minor;
518681d9761SEric Taylor 	char *pathname = (char *)*arg;
519681d9761SEric Taylor 	int rc;
520681d9761SEric Taylor 	char *dsname;
521681d9761SEric Taylor 	char *x;
522681d9761SEric Taylor 	char str[MAXNAMELEN];
523681d9761SEric Taylor 	sdcmn_err13(("create_link (%s) (%s) '%s'", ddv->sdev_name,
524681d9761SEric Taylor 	    ddv->sdev_path, nm));
525681d9761SEric Taylor 	dsname = devzvol_make_dsname(ddv->sdev_path, nm);
526681d9761SEric Taylor 	rc = sdev_zvol_create_minor(dsname);
527681d9761SEric Taylor 	if ((rc != 0 && rc != EEXIST && rc != EBUSY) ||
528681d9761SEric Taylor 	    sdev_zvol_name2minor(dsname, &minor)) {
529681d9761SEric Taylor 		sdcmn_err13(("devzvol_create_link %d", rc));
530681d9761SEric Taylor 		kmem_free(dsname, strlen(dsname) + 1);
531681d9761SEric Taylor 		return (-1);
532681d9761SEric Taylor 	}
533681d9761SEric Taylor 	kmem_free(dsname, strlen(dsname) + 1);
534681d9761SEric Taylor 
535681d9761SEric Taylor 	/*
536681d9761SEric Taylor 	 * This is a valid zvol; create a symlink that points to the
537681d9761SEric Taylor 	 * minor which was created under /devices/pseudo/zfs@0
538681d9761SEric Taylor 	 */
539681d9761SEric Taylor 	*pathname = '\0';
540681d9761SEric Taylor 	for (x = ddv->sdev_path; x = strchr(x, '/'); x++)
541681d9761SEric Taylor 		(void) strcat(pathname, "../");
542681d9761SEric Taylor 	(void) snprintf(str, sizeof (str), ZVOL_PSEUDO_DEV "%u", minor);
543681d9761SEric Taylor 	(void) strncat(pathname, str, MAXPATHLEN);
544681d9761SEric Taylor 	if (strncmp(ddv->sdev_path, ZVOL_FULL_RDEV_DIR,
545681d9761SEric Taylor 	    strlen(ZVOL_FULL_RDEV_DIR)) == 0)
546681d9761SEric Taylor 		(void) strcat(pathname, ",raw");
547681d9761SEric Taylor 	return (0);
548681d9761SEric Taylor }
549681d9761SEric Taylor 
550681d9761SEric Taylor /* Clean zvol sdev_nodes that are no longer valid.  */
551681d9761SEric Taylor static void
devzvol_prunedir(struct sdev_node * ddv)552681d9761SEric Taylor devzvol_prunedir(struct sdev_node *ddv)
553681d9761SEric Taylor {
554681d9761SEric Taylor 	struct sdev_node *dv;
555681d9761SEric Taylor 
556681d9761SEric Taylor 	ASSERT(RW_READ_HELD(&ddv->sdev_contents));
557681d9761SEric Taylor 
558681d9761SEric Taylor 	sdcmn_err13(("prunedir '%s'", ddv->sdev_name));
559681d9761SEric Taylor 	ASSERT(strncmp(ddv->sdev_path, ZVOL_DIR, strlen(ZVOL_DIR)) == 0);
560681d9761SEric Taylor 	if (rw_tryupgrade(&ddv->sdev_contents) == 0) {
561681d9761SEric Taylor 		rw_exit(&ddv->sdev_contents);
562681d9761SEric Taylor 		rw_enter(&ddv->sdev_contents, RW_WRITER);
563681d9761SEric Taylor 	}
564681d9761SEric Taylor 
565681d9761SEric Taylor 	dv = SDEV_FIRST_ENTRY(ddv);
566681d9761SEric Taylor 	while (dv) {
567681d9761SEric Taylor 		sdcmn_err13(("sdev_name '%s'", dv->sdev_name));
568681d9761SEric Taylor 
569681d9761SEric Taylor 		switch (devzvol_validate(dv)) {
570681d9761SEric Taylor 		case SDEV_VTOR_VALID:
571681d9761SEric Taylor 		case SDEV_VTOR_SKIP:
572681d9761SEric Taylor 			dv = SDEV_NEXT_ENTRY(ddv, dv);
573681d9761SEric Taylor 			continue;
574681d9761SEric Taylor 		case SDEV_VTOR_INVALID:
575681d9761SEric Taylor 			sdcmn_err7(("prunedir: destroy invalid "
576681d9761SEric Taylor 			    "node: %s\n", dv->sdev_name));
577681d9761SEric Taylor 			break;
578681d9761SEric Taylor 		}
579681d9761SEric Taylor 
580681d9761SEric Taylor 		if ((SDEVTOV(dv)->v_type == VDIR) &&
581681d9761SEric Taylor 		    (sdev_cleandir(dv, NULL, 0) != 0)) {
582681d9761SEric Taylor 			dv = SDEV_NEXT_ENTRY(ddv, dv);
583681d9761SEric Taylor 			continue;
584681d9761SEric Taylor 		}
585681d9761SEric Taylor 		SDEV_HOLD(dv);
586681d9761SEric Taylor 		/* remove the cache node */
5879e5aa9d8SRobert Mustacchi 		sdev_cache_update(ddv, &dv, dv->sdev_name,
5889e5aa9d8SRobert Mustacchi 		    SDEV_CACHE_DELETE);
5899e5aa9d8SRobert Mustacchi 		SDEV_RELE(dv);
5909e5aa9d8SRobert Mustacchi 		dv = SDEV_FIRST_ENTRY(ddv);
591681d9761SEric Taylor 	}
592681d9761SEric Taylor 	rw_downgrade(&ddv->sdev_contents);
593681d9761SEric Taylor }
594681d9761SEric Taylor 
595dd9c3b29SJerry Jelinek /*
596dd9c3b29SJerry Jelinek  * This function is used to create a dir or dev inside a zone's /dev when the
597dd9c3b29SJerry Jelinek  * zone has a zvol that is dynamically created within the zone (i.e. inside
598dd9c3b29SJerry Jelinek  * of a delegated dataset.  Since there is no /devices tree within a zone,
599dd9c3b29SJerry Jelinek  * we create the chr/blk devices directly inside the zone's /dev instead of
600dd9c3b29SJerry Jelinek  * making symlinks.
601dd9c3b29SJerry Jelinek  */
602dd9c3b29SJerry Jelinek static int
devzvol_mk_ngz_node(struct sdev_node * parent,char * nm)603dd9c3b29SJerry Jelinek devzvol_mk_ngz_node(struct sdev_node *parent, char *nm)
604dd9c3b29SJerry Jelinek {
605dd9c3b29SJerry Jelinek 	struct vattr vattr;
606dd9c3b29SJerry Jelinek 	timestruc_t now;
607dd9c3b29SJerry Jelinek 	enum vtype expected_type = VDIR;
608dd9c3b29SJerry Jelinek 	dmu_objset_type_t do_type;
609dd9c3b29SJerry Jelinek 	struct sdev_node *dv = NULL;
610dd9c3b29SJerry Jelinek 	int res;
611dd9c3b29SJerry Jelinek 	char *dsname;
612dd9c3b29SJerry Jelinek 
613dd9c3b29SJerry Jelinek 	bzero(&vattr, sizeof (vattr));
614dd9c3b29SJerry Jelinek 	gethrestime(&now);
615dd9c3b29SJerry Jelinek 	vattr.va_mask = AT_TYPE|AT_MODE|AT_UID|AT_GID;
616dd9c3b29SJerry Jelinek 	vattr.va_uid = SDEV_UID_DEFAULT;
617dd9c3b29SJerry Jelinek 	vattr.va_gid = SDEV_GID_DEFAULT;
618dd9c3b29SJerry Jelinek 	vattr.va_type = VNON;
619dd9c3b29SJerry Jelinek 	vattr.va_atime = now;
620dd9c3b29SJerry Jelinek 	vattr.va_mtime = now;
621dd9c3b29SJerry Jelinek 	vattr.va_ctime = now;
622dd9c3b29SJerry Jelinek 
623dd9c3b29SJerry Jelinek 	if ((dsname = devzvol_make_dsname(parent->sdev_path, nm)) == NULL)
624dd9c3b29SJerry Jelinek 		return (ENOENT);
625dd9c3b29SJerry Jelinek 
626dd9c3b29SJerry Jelinek 	if (devzvol_objset_check(dsname, &do_type) != 0) {
62745b17475SAlex Wilson 		/*
62845b17475SAlex Wilson 		 * objset_check will succeed on any valid objset in the global
62945b17475SAlex Wilson 		 * zone, and any valid delegated dataset. It will fail, however,
63045b17475SAlex Wilson 		 * in non-global zones on explicitly whitelisted zvol devices
63145b17475SAlex Wilson 		 * that are outside any delegated dataset.
63245b17475SAlex Wilson 		 *
63345b17475SAlex Wilson 		 * The directories leading up to the zvol device itself will be
63445b17475SAlex Wilson 		 * created by prof for us in advance (and will always validate
63545b17475SAlex Wilson 		 * because of the matching check in devzvol_validate). The zvol
63645b17475SAlex Wilson 		 * device itself can't be created by prof though because in the
63745b17475SAlex Wilson 		 * GZ it's a symlink, and in the NGZ it is not. So, we create
63845b17475SAlex Wilson 		 * such zvol device files here.
63945b17475SAlex Wilson 		 */
64045b17475SAlex Wilson 		if (!(parent->sdev_flags & SDEV_GLOBAL) &&
64145b17475SAlex Wilson 		    parent->sdev_origin != NULL &&
64245b17475SAlex Wilson 		    prof_name_matched(nm, parent)) {
64345b17475SAlex Wilson 			do_type = DMU_OST_ZVOL;
64445b17475SAlex Wilson 		} else {
64545b17475SAlex Wilson 			kmem_free(dsname, strlen(dsname) + 1);
64645b17475SAlex Wilson 			return (ENOENT);
64745b17475SAlex Wilson 		}
648dd9c3b29SJerry Jelinek 	}
64945b17475SAlex Wilson 
650dd9c3b29SJerry Jelinek 	if (do_type == DMU_OST_ZVOL)
651dd9c3b29SJerry Jelinek 		expected_type = VBLK;
652dd9c3b29SJerry Jelinek 
653dd9c3b29SJerry Jelinek 	if (expected_type == VDIR) {
654dd9c3b29SJerry Jelinek 		vattr.va_type = VDIR;
655dd9c3b29SJerry Jelinek 		vattr.va_mode = SDEV_DIRMODE_DEFAULT;
656dd9c3b29SJerry Jelinek 	} else {
657dd9c3b29SJerry Jelinek 		minor_t minor;
658dd9c3b29SJerry Jelinek 		dev_t devnum;
659dd9c3b29SJerry Jelinek 		int rc;
660dd9c3b29SJerry Jelinek 
661dd9c3b29SJerry Jelinek 		rc = sdev_zvol_create_minor(dsname);
662dd9c3b29SJerry Jelinek 		if ((rc != 0 && rc != EEXIST && rc != EBUSY) ||
663dd9c3b29SJerry Jelinek 		    sdev_zvol_name2minor(dsname, &minor)) {
664dd9c3b29SJerry Jelinek 			kmem_free(dsname, strlen(dsname) + 1);
665dd9c3b29SJerry Jelinek 			return (ENOENT);
666dd9c3b29SJerry Jelinek 		}
667dd9c3b29SJerry Jelinek 
668dd9c3b29SJerry Jelinek 		devnum = makedevice(devzvol_major, minor);
669dd9c3b29SJerry Jelinek 		vattr.va_rdev = devnum;
670dd9c3b29SJerry Jelinek 
671dd9c3b29SJerry Jelinek 		if (strstr(parent->sdev_path, "/rdsk/") != NULL)
672dd9c3b29SJerry Jelinek 			vattr.va_type = VCHR;
673dd9c3b29SJerry Jelinek 		else
674dd9c3b29SJerry Jelinek 			vattr.va_type = VBLK;
675dd9c3b29SJerry Jelinek 		vattr.va_mode = SDEV_DEVMODE_DEFAULT;
676dd9c3b29SJerry Jelinek 	}
677dd9c3b29SJerry Jelinek 	kmem_free(dsname, strlen(dsname) + 1);
678dd9c3b29SJerry Jelinek 
679dd9c3b29SJerry Jelinek 	rw_enter(&parent->sdev_contents, RW_WRITER);
680dd9c3b29SJerry Jelinek 
681dd9c3b29SJerry Jelinek 	res = sdev_mknode(parent, nm, &dv, &vattr,
682dd9c3b29SJerry Jelinek 	    NULL, NULL, kcred, SDEV_READY);
683dd9c3b29SJerry Jelinek 	rw_exit(&parent->sdev_contents);
684dd9c3b29SJerry Jelinek 	if (res != 0)
685dd9c3b29SJerry Jelinek 		return (ENOENT);
686dd9c3b29SJerry Jelinek 
687dd9c3b29SJerry Jelinek 	SDEV_RELE(dv);
688dd9c3b29SJerry Jelinek 	return (0);
689dd9c3b29SJerry Jelinek }
690dd9c3b29SJerry Jelinek 
691681d9761SEric Taylor /*ARGSUSED*/
692681d9761SEric Taylor static int
devzvol_lookup(struct vnode * dvp,char * nm,struct vnode ** vpp,struct pathname * pnp,int flags,struct vnode * rdir,struct cred * cred,caller_context_t * ct,int * direntflags,pathname_t * realpnp)693681d9761SEric Taylor devzvol_lookup(struct vnode *dvp, char *nm, struct vnode **vpp,
694681d9761SEric Taylor     struct pathname *pnp, int flags, struct vnode *rdir, struct cred *cred,
695681d9761SEric Taylor     caller_context_t *ct, int *direntflags, pathname_t *realpnp)
696681d9761SEric Taylor {
697681d9761SEric Taylor 	enum vtype expected_type = VDIR;
698681d9761SEric Taylor 	struct sdev_node *parent = VTOSDEV(dvp);
699681d9761SEric Taylor 	char *dsname;
700681d9761SEric Taylor 	dmu_objset_type_t do_type;
701681d9761SEric Taylor 	int error;
702681d9761SEric Taylor 
703681d9761SEric Taylor 	sdcmn_err13(("devzvol_lookup '%s' '%s'", parent->sdev_path, nm));
704681d9761SEric Taylor 	*vpp = NULL;
705681d9761SEric Taylor 	/* execute access is required to search the directory */
706681d9761SEric Taylor 	if ((error = VOP_ACCESS(dvp, VEXEC, 0, cred, ct)) != 0)
707681d9761SEric Taylor 		return (error);
708681d9761SEric Taylor 
709681d9761SEric Taylor 	rw_enter(&parent->sdev_contents, RW_READER);
71045b17475SAlex Wilson 	if (!SDEV_IS_GLOBAL(parent)) {
711dd9c3b29SJerry Jelinek 		int res;
712dd9c3b29SJerry Jelinek 
713681d9761SEric Taylor 		rw_exit(&parent->sdev_contents);
714dd9c3b29SJerry Jelinek 
715dd9c3b29SJerry Jelinek 		/*
716dd9c3b29SJerry Jelinek 		 * If we're in the global zone and reach down into a non-global
717dd9c3b29SJerry Jelinek 		 * zone's /dev/zvol then this action could trigger the creation
718dd9c3b29SJerry Jelinek 		 * of all of the zvol devices for every zone into the non-global
719dd9c3b29SJerry Jelinek 		 * zone's /dev tree. This could be a big security hole. To
720dd9c3b29SJerry Jelinek 		 * prevent this, disallow the global zone from looking inside
721dd9c3b29SJerry Jelinek 		 * a non-global zones /dev/zvol. This behavior is similar to
722dd9c3b29SJerry Jelinek 		 * delegated datasets, which cannot be used by the global zone.
723dd9c3b29SJerry Jelinek 		 */
724dd9c3b29SJerry Jelinek 		if (getzoneid() == GLOBAL_ZONEID)
725dd9c3b29SJerry Jelinek 			return (EPERM);
726dd9c3b29SJerry Jelinek 
727dd9c3b29SJerry Jelinek 		res = prof_lookup(dvp, nm, vpp, cred);
728dd9c3b29SJerry Jelinek 
729dd9c3b29SJerry Jelinek 		/*
730dd9c3b29SJerry Jelinek 		 * We won't find a zvol that was dynamically created inside
731dd9c3b29SJerry Jelinek 		 * a NGZ, within a delegated dataset, in the zone's dev profile
732dd9c3b29SJerry Jelinek 		 * but prof_lookup will also find it via sdev_cache_lookup.
733dd9c3b29SJerry Jelinek 		 */
734dd9c3b29SJerry Jelinek 		if (res == ENOENT) {
735dd9c3b29SJerry Jelinek 			/*
736dd9c3b29SJerry Jelinek 			 * We have to create the sdev node for the dymamically
737dd9c3b29SJerry Jelinek 			 * created zvol.
738dd9c3b29SJerry Jelinek 			 */
739dd9c3b29SJerry Jelinek 			if (devzvol_mk_ngz_node(parent, nm) != 0)
740dd9c3b29SJerry Jelinek 				return (ENOENT);
741dd9c3b29SJerry Jelinek 			res = prof_lookup(dvp, nm, vpp, cred);
742dd9c3b29SJerry Jelinek 		}
743dd9c3b29SJerry Jelinek 
744dd9c3b29SJerry Jelinek 		return (res);
745681d9761SEric Taylor 	}
746681d9761SEric Taylor 
74745b17475SAlex Wilson 	/*
74845b17475SAlex Wilson 	 * Don't let the global-zone style lookup succeed here when we're not
74945b17475SAlex Wilson 	 * running in the global zone. This can happen because prof calls into
75045b17475SAlex Wilson 	 * us (in prof_filldir) trying to create an explicitly passed-through
75145b17475SAlex Wilson 	 * zvol device outside any delegated dataset.
75245b17475SAlex Wilson 	 *
75345b17475SAlex Wilson 	 * We have to stop this here or else we will create prof shadows of
75445b17475SAlex Wilson 	 * the global zone symlink, which will make no sense at all in the
75545b17475SAlex Wilson 	 * non-global zone (it has no /devices for the symlink to point at).
75645b17475SAlex Wilson 	 *
75745b17475SAlex Wilson 	 * These zvols will be created later (at access time) by mk_ngz_node
75845b17475SAlex Wilson 	 * instead. The dirs leading up to them will be created by prof
75945b17475SAlex Wilson 	 * internally.
76045b17475SAlex Wilson 	 *
76145b17475SAlex Wilson 	 * We have to return EPERM here, because ENOENT is given special
76245b17475SAlex Wilson 	 * meaning by prof in this context.
76345b17475SAlex Wilson 	 */
76445b17475SAlex Wilson 	if (getzoneid() != GLOBAL_ZONEID) {
76545b17475SAlex Wilson 		rw_exit(&parent->sdev_contents);
76645b17475SAlex Wilson 		return (EPERM);
76745b17475SAlex Wilson 	}
76845b17475SAlex Wilson 
769681d9761SEric Taylor 	dsname = devzvol_make_dsname(parent->sdev_path, nm);
770681d9761SEric Taylor 	rw_exit(&parent->sdev_contents);
771681d9761SEric Taylor 	sdcmn_err13(("rvp dsname %s", dsname ? dsname : "(null)"));
772681d9761SEric Taylor 	if (dsname) {
773681d9761SEric Taylor 		error = devzvol_objset_check(dsname, &do_type);
774681d9761SEric Taylor 		if (error != 0) {
775681d9761SEric Taylor 			error = ENOENT;
776681d9761SEric Taylor 			goto out;
777681d9761SEric Taylor 		}
778681d9761SEric Taylor 		if (do_type == DMU_OST_ZVOL)
779681d9761SEric Taylor 			expected_type = VLNK;
780681d9761SEric Taylor 	}
781681d9761SEric Taylor 	/*
782681d9761SEric Taylor 	 * the callbacks expect:
783681d9761SEric Taylor 	 *
784681d9761SEric Taylor 	 * parent->sdev_path		   nm
785681d9761SEric Taylor 	 * /dev/zvol			   {r}dsk
786681d9761SEric Taylor 	 * /dev/zvol/{r}dsk		   <pool name>
787681d9761SEric Taylor 	 * /dev/zvol/{r}dsk/<dataset name> <last ds component>
788681d9761SEric Taylor 	 *
789681d9761SEric Taylor 	 * sdev_name is always last path component of sdev_path
790681d9761SEric Taylor 	 */
791681d9761SEric Taylor 	if (expected_type == VDIR) {
792681d9761SEric Taylor 		error = devname_lookup_func(parent, nm, vpp, cred,
793681d9761SEric Taylor 		    devzvol_create_dir, SDEV_VATTR);
794681d9761SEric Taylor 	} else {
795681d9761SEric Taylor 		error = devname_lookup_func(parent, nm, vpp, cred,
796681d9761SEric Taylor 		    devzvol_create_link, SDEV_VLINK);
797681d9761SEric Taylor 	}
798681d9761SEric Taylor 	sdcmn_err13(("devzvol_lookup %d %d", expected_type, error));
799681d9761SEric Taylor 	ASSERT(error || ((*vpp)->v_type == expected_type));
800681d9761SEric Taylor out:
801681d9761SEric Taylor 	if (dsname)
802681d9761SEric Taylor 		kmem_free(dsname, strlen(dsname) + 1);
803681d9761SEric Taylor 	sdcmn_err13(("devzvol_lookup %d", error));
804681d9761SEric Taylor 	return (error);
805681d9761SEric Taylor }
806681d9761SEric Taylor 
807681d9761SEric Taylor /*
808681d9761SEric Taylor  * We allow create to find existing nodes
809681d9761SEric Taylor  *	- if the node doesn't exist - EROFS
810681d9761SEric Taylor  *	- creating an existing dir read-only succeeds, otherwise EISDIR
811681d9761SEric Taylor  *	- exclusive creates fail - EEXIST
812681d9761SEric Taylor  */
813681d9761SEric Taylor /*ARGSUSED2*/
814681d9761SEric Taylor static int
devzvol_create(struct vnode * dvp,char * nm,struct vattr * vap,vcexcl_t excl,int mode,struct vnode ** vpp,struct cred * cred,int flag,caller_context_t * ct,vsecattr_t * vsecp)815681d9761SEric Taylor devzvol_create(struct vnode *dvp, char *nm, struct vattr *vap, vcexcl_t excl,
816681d9761SEric Taylor     int mode, struct vnode **vpp, struct cred *cred, int flag,
817681d9761SEric Taylor     caller_context_t *ct, vsecattr_t *vsecp)
818681d9761SEric Taylor {
819681d9761SEric Taylor 	int error;
820681d9761SEric Taylor 	struct vnode *vp;
821681d9761SEric Taylor 
822681d9761SEric Taylor 	*vpp = NULL;
823681d9761SEric Taylor 
824681d9761SEric Taylor 	error = devzvol_lookup(dvp, nm, &vp, NULL, 0, NULL, cred, ct, NULL,
825681d9761SEric Taylor 	    NULL);
826681d9761SEric Taylor 	if (error == 0) {
827681d9761SEric Taylor 		if (excl == EXCL)
828681d9761SEric Taylor 			error = EEXIST;
829681d9761SEric Taylor 		else if (vp->v_type == VDIR && (mode & VWRITE))
830681d9761SEric Taylor 			error = EISDIR;
831681d9761SEric Taylor 		else
832681d9761SEric Taylor 			error = VOP_ACCESS(vp, mode, 0, cred, ct);
833681d9761SEric Taylor 
834681d9761SEric Taylor 		if (error) {
835681d9761SEric Taylor 			VN_RELE(vp);
836681d9761SEric Taylor 		} else
837681d9761SEric Taylor 			*vpp = vp;
838681d9761SEric Taylor 	} else if (error == ENOENT) {
839681d9761SEric Taylor 		error = EROFS;
840681d9761SEric Taylor 	}
841681d9761SEric Taylor 
842681d9761SEric Taylor 	return (error);
843681d9761SEric Taylor }
844681d9761SEric Taylor 
845681d9761SEric Taylor void sdev_iter_snapshots(struct vnode *dvp, char *name);
846681d9761SEric Taylor 
847681d9761SEric Taylor void
sdev_iter_datasets(struct vnode * dvp,int arg,char * name)848681d9761SEric Taylor sdev_iter_datasets(struct vnode *dvp, int arg, char *name)
849681d9761SEric Taylor {
850681d9761SEric Taylor 	zfs_cmd_t	*zc;
851681d9761SEric Taylor 	int rc;
852681d9761SEric Taylor 
853681d9761SEric Taylor 	sdcmn_err13(("iter name is '%s' (arg %x)", name, arg));
854681d9761SEric Taylor 	zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
855681d9761SEric Taylor 	(void) strcpy(zc->zc_name, name);
856681d9761SEric Taylor 
857681d9761SEric Taylor 	while ((rc = devzvol_handle_ioctl(arg, zc, B_FALSE)) == 0) {
858681d9761SEric Taylor 		struct vnode *vpp;
859681d9761SEric Taylor 		char *ptr;
860681d9761SEric Taylor 
861681d9761SEric Taylor 		sdcmn_err13(("  name %s", zc->zc_name));
862681d9761SEric Taylor 		if (strchr(zc->zc_name, '$') || strchr(zc->zc_name, '%'))
863681d9761SEric Taylor 			goto skip;
864681d9761SEric Taylor 		ptr = strrchr(zc->zc_name, '/') + 1;
865681d9761SEric Taylor 		rc = devzvol_lookup(dvp, ptr, &vpp, NULL, 0, NULL,
866681d9761SEric Taylor 		    kcred, NULL, NULL, NULL);
867681d9761SEric Taylor 		if (rc == 0) {
868681d9761SEric Taylor 			VN_RELE(vpp);
869681d9761SEric Taylor 		} else if (rc == ENOENT) {
870681d9761SEric Taylor 			goto skip;
871681d9761SEric Taylor 		} else {
872dd9c3b29SJerry Jelinek 			/*
873dd9c3b29SJerry Jelinek 			 * EBUSY == problem with zvols's dmu holds?
874dd9c3b29SJerry Jelinek 			 * EPERM when in a NGZ and traversing up and out.
875dd9c3b29SJerry Jelinek 			 */
876681d9761SEric Taylor 			goto skip;
877681d9761SEric Taylor 		}
878681d9761SEric Taylor 		if (arg == ZFS_IOC_DATASET_LIST_NEXT &&
879470bc2d6SGeorge Wilson 		    zc->zc_objset_stats.dds_type == DMU_OST_ZVOL &&
880470bc2d6SGeorge Wilson 		    devzvol_snaps_allowed)
881681d9761SEric Taylor 			sdev_iter_snapshots(dvp, zc->zc_name);
882681d9761SEric Taylor skip:
883681d9761SEric Taylor 		(void) strcpy(zc->zc_name, name);
884681d9761SEric Taylor 	}
885681d9761SEric Taylor 	kmem_free(zc, sizeof (zfs_cmd_t));
886681d9761SEric Taylor }
887681d9761SEric Taylor 
888681d9761SEric Taylor void
sdev_iter_snapshots(struct vnode * dvp,char * name)889681d9761SEric Taylor sdev_iter_snapshots(struct vnode *dvp, char *name)
890681d9761SEric Taylor {
891681d9761SEric Taylor 	sdev_iter_datasets(dvp, ZFS_IOC_SNAPSHOT_LIST_NEXT, name);
892681d9761SEric Taylor }
893681d9761SEric Taylor 
894681d9761SEric Taylor /*ARGSUSED4*/
895681d9761SEric Taylor static int
devzvol_readdir(struct vnode * dvp,struct uio * uiop,struct cred * cred,int * eofp,caller_context_t * ct_unused,int flags_unused)896681d9761SEric Taylor devzvol_readdir(struct vnode *dvp, struct uio *uiop, struct cred *cred,
897681d9761SEric Taylor     int *eofp, caller_context_t *ct_unused, int flags_unused)
898681d9761SEric Taylor {
899681d9761SEric Taylor 	struct sdev_node *sdvp = VTOSDEV(dvp);
900681d9761SEric Taylor 	char *ptr;
901681d9761SEric Taylor 
902681d9761SEric Taylor 	sdcmn_err13(("zv readdir of '%s' %s'", sdvp->sdev_path,
903681d9761SEric Taylor 	    sdvp->sdev_name));
904681d9761SEric Taylor 
905681d9761SEric Taylor 	if (strcmp(sdvp->sdev_path, ZVOL_DIR) == 0) {
906681d9761SEric Taylor 		struct vnode *vp;
907681d9761SEric Taylor 
908681d9761SEric Taylor 		rw_exit(&sdvp->sdev_contents);
909681d9761SEric Taylor 		(void) devname_lookup_func(sdvp, "dsk", &vp, cred,
910681d9761SEric Taylor 		    devzvol_create_dir, SDEV_VATTR);
911681d9761SEric Taylor 		VN_RELE(vp);
912681d9761SEric Taylor 		(void) devname_lookup_func(sdvp, "rdsk", &vp, cred,
913681d9761SEric Taylor 		    devzvol_create_dir, SDEV_VATTR);
914681d9761SEric Taylor 		VN_RELE(vp);
915681d9761SEric Taylor 		rw_enter(&sdvp->sdev_contents, RW_READER);
916681d9761SEric Taylor 		return (devname_readdir_func(dvp, uiop, cred, eofp, 0));
917681d9761SEric Taylor 	}
918681d9761SEric Taylor 	if (uiop->uio_offset == 0)
919681d9761SEric Taylor 		devzvol_prunedir(sdvp);
920681d9761SEric Taylor 	ptr = sdvp->sdev_path + strlen(ZVOL_DIR);
921681d9761SEric Taylor 	if ((strcmp(ptr, "/dsk") == 0) || (strcmp(ptr, "/rdsk") == 0)) {
922681d9761SEric Taylor 		rw_exit(&sdvp->sdev_contents);
923681d9761SEric Taylor 		devzvol_create_pool_dirs(dvp);
924681d9761SEric Taylor 		rw_enter(&sdvp->sdev_contents, RW_READER);
925681d9761SEric Taylor 		return (devname_readdir_func(dvp, uiop, cred, eofp, 0));
926681d9761SEric Taylor 	}
927681d9761SEric Taylor 
928d6568684SRobert Mustacchi 	ptr = strchr(ptr + 1, '/');
929d6568684SRobert Mustacchi 	if (ptr == NULL)
930d6568684SRobert Mustacchi 		return (ENOENT);
931d6568684SRobert Mustacchi 	ptr++;
932681d9761SEric Taylor 	rw_exit(&sdvp->sdev_contents);
933681d9761SEric Taylor 	sdev_iter_datasets(dvp, ZFS_IOC_DATASET_LIST_NEXT, ptr);
934681d9761SEric Taylor 	rw_enter(&sdvp->sdev_contents, RW_READER);
935681d9761SEric Taylor 	return (devname_readdir_func(dvp, uiop, cred, eofp, 0));
936681d9761SEric Taylor }
937681d9761SEric Taylor 
938681d9761SEric Taylor const fs_operation_def_t devzvol_vnodeops_tbl[] = {
939681d9761SEric Taylor 	VOPNAME_READDIR,	{ .vop_readdir = devzvol_readdir },
940681d9761SEric Taylor 	VOPNAME_LOOKUP,		{ .vop_lookup = devzvol_lookup },
941681d9761SEric Taylor 	VOPNAME_CREATE,		{ .vop_create = devzvol_create },
942681d9761SEric Taylor 	VOPNAME_RENAME,		{ .error = fs_nosys },
943681d9761SEric Taylor 	VOPNAME_MKDIR,		{ .error = fs_nosys },
944681d9761SEric Taylor 	VOPNAME_RMDIR,		{ .error = fs_nosys },
945681d9761SEric Taylor 	VOPNAME_REMOVE,		{ .error = fs_nosys },
946681d9761SEric Taylor 	VOPNAME_SYMLINK,	{ .error = fs_nosys },
947681d9761SEric Taylor 	NULL,			NULL
948681d9761SEric Taylor };
949