xref: /illumos-gate/usr/src/uts/common/fs/zfs/zfs_vfsops.c (revision da6c28aaf62fa55f0fdb8004aa40f88f23bf53f0)
1fa9e4066Sahrens /*
2fa9e4066Sahrens  * CDDL HEADER START
3fa9e4066Sahrens  *
4fa9e4066Sahrens  * The contents of this file are subject to the terms of the
5033f9833Sek  * Common Development and Distribution License (the "License").
6033f9833Sek  * You may not use this file except in compliance with the License.
7fa9e4066Sahrens  *
8fa9e4066Sahrens  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9fa9e4066Sahrens  * or http://www.opensolaris.org/os/licensing.
10fa9e4066Sahrens  * See the License for the specific language governing permissions
11fa9e4066Sahrens  * and limitations under the License.
12fa9e4066Sahrens  *
13fa9e4066Sahrens  * When distributing Covered Code, include this CDDL HEADER in each
14fa9e4066Sahrens  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15fa9e4066Sahrens  * If applicable, add the following below this CDDL HEADER, with the
16fa9e4066Sahrens  * fields enclosed by brackets "[]" replaced with your own identifying
17fa9e4066Sahrens  * information: Portions Copyright [yyyy] [name of copyright owner]
18fa9e4066Sahrens  *
19fa9e4066Sahrens  * CDDL HEADER END
20fa9e4066Sahrens  */
21fa9e4066Sahrens /*
22893a6d32Sahrens  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23fa9e4066Sahrens  * Use is subject to license terms.
24fa9e4066Sahrens  */
25fa9e4066Sahrens 
2678077464Sck #pragma ident	"%Z%%M%	%I%	%E% SMI"
27fa9e4066Sahrens 
28fa9e4066Sahrens #include <sys/types.h>
29fa9e4066Sahrens #include <sys/param.h>
30fa9e4066Sahrens #include <sys/systm.h>
31fa9e4066Sahrens #include <sys/sysmacros.h>
32fa9e4066Sahrens #include <sys/kmem.h>
33fa9e4066Sahrens #include <sys/pathname.h>
34fa9e4066Sahrens #include <sys/vnode.h>
35fa9e4066Sahrens #include <sys/vfs.h>
36aa59c4cbSrsb #include <sys/vfs_opreg.h>
37fa9e4066Sahrens #include <sys/mntent.h>
38fa9e4066Sahrens #include <sys/mount.h>
39fa9e4066Sahrens #include <sys/cmn_err.h>
40fa9e4066Sahrens #include "fs/fs_subr.h"
41fa9e4066Sahrens #include <sys/zfs_znode.h>
42893a6d32Sahrens #include <sys/zfs_dir.h>
43*da6c28aaSamw #include <sys/zfs_i18n.h>
44fa9e4066Sahrens #include <sys/zil.h>
45fa9e4066Sahrens #include <sys/fs/zfs.h>
46fa9e4066Sahrens #include <sys/dmu.h>
47fa9e4066Sahrens #include <sys/dsl_prop.h>
48b1b8ab34Slling #include <sys/dsl_dataset.h>
49ecd6cf80Smarks #include <sys/dsl_deleg.h>
50fa9e4066Sahrens #include <sys/spa.h>
51fa9e4066Sahrens #include <sys/zap.h>
52fa9e4066Sahrens #include <sys/varargs.h>
53fa9e4066Sahrens #include <sys/policy.h>
54fa9e4066Sahrens #include <sys/atomic.h>
55fa9e4066Sahrens #include <sys/mkdev.h>
56fa9e4066Sahrens #include <sys/modctl.h>
57ecd6cf80Smarks #include <sys/refstr.h>
58fa9e4066Sahrens #include <sys/zfs_ioctl.h>
59fa9e4066Sahrens #include <sys/zfs_ctldir.h>
60*da6c28aaSamw #include <sys/zfs_fuid.h>
61ea8dc4b6Seschrock #include <sys/bootconf.h>
62a0965f35Sbonwick #include <sys/sunddi.h>
63033f9833Sek #include <sys/dnlc.h>
64f18faf3fSek #include <sys/dmu_objset.h>
65fa9e4066Sahrens 
66fa9e4066Sahrens int zfsfstype;
67fa9e4066Sahrens vfsops_t *zfs_vfsops = NULL;
68a0965f35Sbonwick static major_t zfs_major;
69fa9e4066Sahrens static minor_t zfs_minor;
70fa9e4066Sahrens static kmutex_t	zfs_dev_mtx;
71fa9e4066Sahrens 
72fa9e4066Sahrens static int zfs_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr);
73fa9e4066Sahrens static int zfs_umount(vfs_t *vfsp, int fflag, cred_t *cr);
74ea8dc4b6Seschrock static int zfs_mountroot(vfs_t *vfsp, enum whymountroot);
75fa9e4066Sahrens static int zfs_root(vfs_t *vfsp, vnode_t **vpp);
76fa9e4066Sahrens static int zfs_statvfs(vfs_t *vfsp, struct statvfs64 *statp);
77fa9e4066Sahrens static int zfs_vget(vfs_t *vfsp, vnode_t **vpp, fid_t *fidp);
78fa9e4066Sahrens static void zfs_freevfs(vfs_t *vfsp);
79fa9e4066Sahrens 
80fa9e4066Sahrens static const fs_operation_def_t zfs_vfsops_template[] = {
81aa59c4cbSrsb 	VFSNAME_MOUNT,		{ .vfs_mount = zfs_mount },
82aa59c4cbSrsb 	VFSNAME_MOUNTROOT,	{ .vfs_mountroot = zfs_mountroot },
83aa59c4cbSrsb 	VFSNAME_UNMOUNT,	{ .vfs_unmount = zfs_umount },
84aa59c4cbSrsb 	VFSNAME_ROOT,		{ .vfs_root = zfs_root },
85aa59c4cbSrsb 	VFSNAME_STATVFS,	{ .vfs_statvfs = zfs_statvfs },
86aa59c4cbSrsb 	VFSNAME_SYNC,		{ .vfs_sync = zfs_sync },
87aa59c4cbSrsb 	VFSNAME_VGET,		{ .vfs_vget = zfs_vget },
88aa59c4cbSrsb 	VFSNAME_FREEVFS,	{ .vfs_freevfs = zfs_freevfs },
89aa59c4cbSrsb 	NULL,			NULL
90fa9e4066Sahrens };
91fa9e4066Sahrens 
92fa9e4066Sahrens static const fs_operation_def_t zfs_vfsops_eio_template[] = {
93aa59c4cbSrsb 	VFSNAME_FREEVFS,	{ .vfs_freevfs =  zfs_freevfs },
94aa59c4cbSrsb 	NULL,			NULL
95fa9e4066Sahrens };
96fa9e4066Sahrens 
97fa9e4066Sahrens /*
98fa9e4066Sahrens  * We need to keep a count of active fs's.
99fa9e4066Sahrens  * This is necessary to prevent our module
100fa9e4066Sahrens  * from being unloaded after a umount -f
101fa9e4066Sahrens  */
102fa9e4066Sahrens static uint32_t	zfs_active_fs_count = 0;
103fa9e4066Sahrens 
104fa9e4066Sahrens static char *noatime_cancel[] = { MNTOPT_ATIME, NULL };
105fa9e4066Sahrens static char *atime_cancel[] = { MNTOPT_NOATIME, NULL };
1067b55fa8eSck static char *noxattr_cancel[] = { MNTOPT_XATTR, NULL };
1077b55fa8eSck static char *xattr_cancel[] = { MNTOPT_NOXATTR, NULL };
108fa9e4066Sahrens 
1097b55fa8eSck /*
110b510d378Slling  * MO_DEFAULT is not used since the default value is determined
111b510d378Slling  * by the equivalent property.
1127b55fa8eSck  */
113fa9e4066Sahrens static mntopt_t mntopts[] = {
1147b55fa8eSck 	{ MNTOPT_NOXATTR, noxattr_cancel, NULL, 0, NULL },
1157b55fa8eSck 	{ MNTOPT_XATTR, xattr_cancel, NULL, 0, NULL },
116b510d378Slling 	{ MNTOPT_NOATIME, noatime_cancel, NULL, 0, NULL },
117fa9e4066Sahrens 	{ MNTOPT_ATIME, atime_cancel, NULL, 0, NULL }
118fa9e4066Sahrens };
119fa9e4066Sahrens 
120fa9e4066Sahrens static mntopts_t zfs_mntopts = {
121fa9e4066Sahrens 	sizeof (mntopts) / sizeof (mntopt_t),
122fa9e4066Sahrens 	mntopts
123fa9e4066Sahrens };
124fa9e4066Sahrens 
125fa9e4066Sahrens /*ARGSUSED*/
126fa9e4066Sahrens int
127fa9e4066Sahrens zfs_sync(vfs_t *vfsp, short flag, cred_t *cr)
128fa9e4066Sahrens {
129fa9e4066Sahrens 	/*
130fa9e4066Sahrens 	 * Data integrity is job one.  We don't want a compromised kernel
131fa9e4066Sahrens 	 * writing to the storage pool, so we never sync during panic.
132fa9e4066Sahrens 	 */
133fa9e4066Sahrens 	if (panicstr)
134fa9e4066Sahrens 		return (0);
135fa9e4066Sahrens 
136fa9e4066Sahrens 	/*
137fa9e4066Sahrens 	 * SYNC_ATTR is used by fsflush() to force old filesystems like UFS
138fa9e4066Sahrens 	 * to sync metadata, which they would otherwise cache indefinitely.
139fa9e4066Sahrens 	 * Semantically, the only requirement is that the sync be initiated.
140fa9e4066Sahrens 	 * The DMU syncs out txgs frequently, so there's nothing to do.
141fa9e4066Sahrens 	 */
142fa9e4066Sahrens 	if (flag & SYNC_ATTR)
143fa9e4066Sahrens 		return (0);
144fa9e4066Sahrens 
145fa9e4066Sahrens 	if (vfsp != NULL) {
146fa9e4066Sahrens 		/*
147fa9e4066Sahrens 		 * Sync a specific filesystem.
148fa9e4066Sahrens 		 */
149fa9e4066Sahrens 		zfsvfs_t *zfsvfs = vfsp->vfs_data;
150fa9e4066Sahrens 
151fa9e4066Sahrens 		ZFS_ENTER(zfsvfs);
152fa9e4066Sahrens 		if (zfsvfs->z_log != NULL)
153b19a79ecSperrin 			zil_commit(zfsvfs->z_log, UINT64_MAX, 0);
154fa9e4066Sahrens 		else
155fa9e4066Sahrens 			txg_wait_synced(dmu_objset_pool(zfsvfs->z_os), 0);
156fa9e4066Sahrens 		ZFS_EXIT(zfsvfs);
157fa9e4066Sahrens 	} else {
158fa9e4066Sahrens 		/*
159fa9e4066Sahrens 		 * Sync all ZFS filesystems.  This is what happens when you
160fa9e4066Sahrens 		 * run sync(1M).  Unlike other filesystems, ZFS honors the
161fa9e4066Sahrens 		 * request by waiting for all pools to commit all dirty data.
162fa9e4066Sahrens 		 */
163fa9e4066Sahrens 		spa_sync_allpools();
164fa9e4066Sahrens 	}
165fa9e4066Sahrens 
166fa9e4066Sahrens 	return (0);
167fa9e4066Sahrens }
168fa9e4066Sahrens 
169ea8dc4b6Seschrock static int
170ea8dc4b6Seschrock zfs_create_unique_device(dev_t *dev)
171ea8dc4b6Seschrock {
172ea8dc4b6Seschrock 	major_t new_major;
173ea8dc4b6Seschrock 
174ea8dc4b6Seschrock 	do {
175ea8dc4b6Seschrock 		ASSERT3U(zfs_minor, <=, MAXMIN32);
176ea8dc4b6Seschrock 		minor_t start = zfs_minor;
177ea8dc4b6Seschrock 		do {
178ea8dc4b6Seschrock 			mutex_enter(&zfs_dev_mtx);
179ea8dc4b6Seschrock 			if (zfs_minor >= MAXMIN32) {
180ea8dc4b6Seschrock 				/*
181ea8dc4b6Seschrock 				 * If we're still using the real major
182ea8dc4b6Seschrock 				 * keep out of /dev/zfs and /dev/zvol minor
183ea8dc4b6Seschrock 				 * number space.  If we're using a getudev()'ed
184ea8dc4b6Seschrock 				 * major number, we can use all of its minors.
185ea8dc4b6Seschrock 				 */
186ea8dc4b6Seschrock 				if (zfs_major == ddi_name_to_major(ZFS_DRIVER))
187ea8dc4b6Seschrock 					zfs_minor = ZFS_MIN_MINOR;
188ea8dc4b6Seschrock 				else
189ea8dc4b6Seschrock 					zfs_minor = 0;
190ea8dc4b6Seschrock 			} else {
191ea8dc4b6Seschrock 				zfs_minor++;
192ea8dc4b6Seschrock 			}
193ea8dc4b6Seschrock 			*dev = makedevice(zfs_major, zfs_minor);
194ea8dc4b6Seschrock 			mutex_exit(&zfs_dev_mtx);
195ea8dc4b6Seschrock 		} while (vfs_devismounted(*dev) && zfs_minor != start);
196ea8dc4b6Seschrock 		if (zfs_minor == start) {
197ea8dc4b6Seschrock 			/*
198ea8dc4b6Seschrock 			 * We are using all ~262,000 minor numbers for the
199ea8dc4b6Seschrock 			 * current major number.  Create a new major number.
200ea8dc4b6Seschrock 			 */
201ea8dc4b6Seschrock 			if ((new_major = getudev()) == (major_t)-1) {
202ea8dc4b6Seschrock 				cmn_err(CE_WARN,
203ea8dc4b6Seschrock 				    "zfs_mount: Can't get unique major "
204ea8dc4b6Seschrock 				    "device number.");
205ea8dc4b6Seschrock 				return (-1);
206ea8dc4b6Seschrock 			}
207ea8dc4b6Seschrock 			mutex_enter(&zfs_dev_mtx);
208ea8dc4b6Seschrock 			zfs_major = new_major;
209ea8dc4b6Seschrock 			zfs_minor = 0;
210ea8dc4b6Seschrock 
211ea8dc4b6Seschrock 			mutex_exit(&zfs_dev_mtx);
212ea8dc4b6Seschrock 		} else {
213ea8dc4b6Seschrock 			break;
214ea8dc4b6Seschrock 		}
215ea8dc4b6Seschrock 		/* CONSTANTCONDITION */
216ea8dc4b6Seschrock 	} while (1);
217ea8dc4b6Seschrock 
218ea8dc4b6Seschrock 	return (0);
219ea8dc4b6Seschrock }
220ea8dc4b6Seschrock 
221fa9e4066Sahrens static void
222fa9e4066Sahrens atime_changed_cb(void *arg, uint64_t newval)
223fa9e4066Sahrens {
224fa9e4066Sahrens 	zfsvfs_t *zfsvfs = arg;
225fa9e4066Sahrens 
226fa9e4066Sahrens 	if (newval == TRUE) {
227fa9e4066Sahrens 		zfsvfs->z_atime = TRUE;
228fa9e4066Sahrens 		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME);
229fa9e4066Sahrens 		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_ATIME, NULL, 0);
230fa9e4066Sahrens 	} else {
231fa9e4066Sahrens 		zfsvfs->z_atime = FALSE;
232fa9e4066Sahrens 		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_ATIME);
233fa9e4066Sahrens 		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOATIME, NULL, 0);
234fa9e4066Sahrens 	}
235fa9e4066Sahrens }
236fa9e4066Sahrens 
2377b55fa8eSck static void
2387b55fa8eSck xattr_changed_cb(void *arg, uint64_t newval)
2397b55fa8eSck {
2407b55fa8eSck 	zfsvfs_t *zfsvfs = arg;
2417b55fa8eSck 
2427b55fa8eSck 	if (newval == TRUE) {
2437b55fa8eSck 		/* XXX locking on vfs_flag? */
2447b55fa8eSck 		zfsvfs->z_vfs->vfs_flag |= VFS_XATTR;
2457b55fa8eSck 		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOXATTR);
2467b55fa8eSck 		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_XATTR, NULL, 0);
2477b55fa8eSck 	} else {
2487b55fa8eSck 		/* XXX locking on vfs_flag? */
2497b55fa8eSck 		zfsvfs->z_vfs->vfs_flag &= ~VFS_XATTR;
2507b55fa8eSck 		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_XATTR);
2517b55fa8eSck 		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOXATTR, NULL, 0);
2527b55fa8eSck 	}
2537b55fa8eSck }
2547b55fa8eSck 
255fa9e4066Sahrens static void
256fa9e4066Sahrens blksz_changed_cb(void *arg, uint64_t newval)
257fa9e4066Sahrens {
258fa9e4066Sahrens 	zfsvfs_t *zfsvfs = arg;
259fa9e4066Sahrens 
260fa9e4066Sahrens 	if (newval < SPA_MINBLOCKSIZE ||
261fa9e4066Sahrens 	    newval > SPA_MAXBLOCKSIZE || !ISP2(newval))
262fa9e4066Sahrens 		newval = SPA_MAXBLOCKSIZE;
263fa9e4066Sahrens 
264fa9e4066Sahrens 	zfsvfs->z_max_blksz = newval;
265fa9e4066Sahrens 	zfsvfs->z_vfs->vfs_bsize = newval;
266fa9e4066Sahrens }
267fa9e4066Sahrens 
268fa9e4066Sahrens static void
269fa9e4066Sahrens readonly_changed_cb(void *arg, uint64_t newval)
270fa9e4066Sahrens {
271fa9e4066Sahrens 	zfsvfs_t *zfsvfs = arg;
272fa9e4066Sahrens 
273fa9e4066Sahrens 	if (newval) {
274fa9e4066Sahrens 		/* XXX locking on vfs_flag? */
275fa9e4066Sahrens 		zfsvfs->z_vfs->vfs_flag |= VFS_RDONLY;
276fa9e4066Sahrens 		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RW);
277fa9e4066Sahrens 		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RO, NULL, 0);
278fa9e4066Sahrens 	} else {
279fa9e4066Sahrens 		/* XXX locking on vfs_flag? */
280fa9e4066Sahrens 		zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY;
281fa9e4066Sahrens 		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_RO);
282fa9e4066Sahrens 		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_RW, NULL, 0);
283fa9e4066Sahrens 	}
284fa9e4066Sahrens }
285fa9e4066Sahrens 
286fa9e4066Sahrens static void
287fa9e4066Sahrens devices_changed_cb(void *arg, uint64_t newval)
288fa9e4066Sahrens {
289fa9e4066Sahrens 	zfsvfs_t *zfsvfs = arg;
290fa9e4066Sahrens 
291fa9e4066Sahrens 	if (newval == FALSE) {
292fa9e4066Sahrens 		zfsvfs->z_vfs->vfs_flag |= VFS_NODEVICES;
293fa9e4066Sahrens 		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_DEVICES);
294fa9e4066Sahrens 		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NODEVICES, NULL, 0);
295fa9e4066Sahrens 	} else {
296fa9e4066Sahrens 		zfsvfs->z_vfs->vfs_flag &= ~VFS_NODEVICES;
297fa9e4066Sahrens 		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NODEVICES);
298fa9e4066Sahrens 		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_DEVICES, NULL, 0);
299fa9e4066Sahrens 	}
300fa9e4066Sahrens }
301fa9e4066Sahrens 
302fa9e4066Sahrens static void
303fa9e4066Sahrens setuid_changed_cb(void *arg, uint64_t newval)
304fa9e4066Sahrens {
305fa9e4066Sahrens 	zfsvfs_t *zfsvfs = arg;
306fa9e4066Sahrens 
307fa9e4066Sahrens 	if (newval == FALSE) {
308fa9e4066Sahrens 		zfsvfs->z_vfs->vfs_flag |= VFS_NOSETUID;
309fa9e4066Sahrens 		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_SETUID);
310fa9e4066Sahrens 		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID, NULL, 0);
311fa9e4066Sahrens 	} else {
312fa9e4066Sahrens 		zfsvfs->z_vfs->vfs_flag &= ~VFS_NOSETUID;
313fa9e4066Sahrens 		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOSETUID);
314fa9e4066Sahrens 		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_SETUID, NULL, 0);
315fa9e4066Sahrens 	}
316fa9e4066Sahrens }
317fa9e4066Sahrens 
318fa9e4066Sahrens static void
319fa9e4066Sahrens exec_changed_cb(void *arg, uint64_t newval)
320fa9e4066Sahrens {
321fa9e4066Sahrens 	zfsvfs_t *zfsvfs = arg;
322fa9e4066Sahrens 
323fa9e4066Sahrens 	if (newval == FALSE) {
324fa9e4066Sahrens 		zfsvfs->z_vfs->vfs_flag |= VFS_NOEXEC;
325fa9e4066Sahrens 		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_EXEC);
326fa9e4066Sahrens 		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC, NULL, 0);
327fa9e4066Sahrens 	} else {
328fa9e4066Sahrens 		zfsvfs->z_vfs->vfs_flag &= ~VFS_NOEXEC;
329fa9e4066Sahrens 		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NOEXEC);
330fa9e4066Sahrens 		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_EXEC, NULL, 0);
331fa9e4066Sahrens 	}
332fa9e4066Sahrens }
333fa9e4066Sahrens 
334*da6c28aaSamw /*
335*da6c28aaSamw  * The nbmand mount option can be changed at mount time.
336*da6c28aaSamw  * We can't allow it to be toggled on live file systems or incorrect
337*da6c28aaSamw  * behavior may be seen from cifs clients
338*da6c28aaSamw  *
339*da6c28aaSamw  * This property isn't registered via dsl_prop_register(), but this callback
340*da6c28aaSamw  * will be called when a file system is first mounted
341*da6c28aaSamw  */
342*da6c28aaSamw static void
343*da6c28aaSamw nbmand_changed_cb(void *arg, uint64_t newval)
344*da6c28aaSamw {
345*da6c28aaSamw 	zfsvfs_t *zfsvfs = arg;
346*da6c28aaSamw 	if (newval == FALSE) {
347*da6c28aaSamw 		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NBMAND);
348*da6c28aaSamw 		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NONBMAND, NULL, 0);
349*da6c28aaSamw 	} else {
350*da6c28aaSamw 		vfs_clearmntopt(zfsvfs->z_vfs, MNTOPT_NONBMAND);
351*da6c28aaSamw 		vfs_setmntopt(zfsvfs->z_vfs, MNTOPT_NBMAND, NULL, 0);
352*da6c28aaSamw 	}
353*da6c28aaSamw }
354*da6c28aaSamw 
355fa9e4066Sahrens static void
356fa9e4066Sahrens snapdir_changed_cb(void *arg, uint64_t newval)
357fa9e4066Sahrens {
358fa9e4066Sahrens 	zfsvfs_t *zfsvfs = arg;
359fa9e4066Sahrens 
360fa9e4066Sahrens 	zfsvfs->z_show_ctldir = newval;
361fa9e4066Sahrens }
362fa9e4066Sahrens 
363*da6c28aaSamw static void
364*da6c28aaSamw vscan_changed_cb(void *arg, uint64_t newval)
365*da6c28aaSamw {
366*da6c28aaSamw 	zfsvfs_t *zfsvfs = arg;
367*da6c28aaSamw 
368*da6c28aaSamw 	zfsvfs->z_vscan = newval;
369*da6c28aaSamw }
370*da6c28aaSamw 
371fa9e4066Sahrens static void
372fa9e4066Sahrens acl_mode_changed_cb(void *arg, uint64_t newval)
373fa9e4066Sahrens {
374fa9e4066Sahrens 	zfsvfs_t *zfsvfs = arg;
375fa9e4066Sahrens 
376fa9e4066Sahrens 	zfsvfs->z_acl_mode = newval;
377fa9e4066Sahrens }
378fa9e4066Sahrens 
379fa9e4066Sahrens static void
380fa9e4066Sahrens acl_inherit_changed_cb(void *arg, uint64_t newval)
381fa9e4066Sahrens {
382fa9e4066Sahrens 	zfsvfs_t *zfsvfs = arg;
383fa9e4066Sahrens 
384fa9e4066Sahrens 	zfsvfs->z_acl_inherit = newval;
385fa9e4066Sahrens }
386fa9e4066Sahrens 
387*da6c28aaSamw static int
388*da6c28aaSamw zfs_normalization_set(char *osname, zfsvfs_t *zfsvfs)
389*da6c28aaSamw {
390*da6c28aaSamw 	uint64_t pval;
391*da6c28aaSamw 	int error;
392*da6c28aaSamw 
393*da6c28aaSamw 	if (zfsvfs->z_version < ZPL_VERSION_FUID)
394*da6c28aaSamw 		return (0);
395*da6c28aaSamw 
396*da6c28aaSamw 	error = dsl_prop_get_integer(osname, "normalization", &pval, NULL);
397*da6c28aaSamw 	if (error)
398*da6c28aaSamw 		goto normquit;
399*da6c28aaSamw 	switch ((int)pval) {
400*da6c28aaSamw 	case ZFS_NORMALIZE_NONE:
401*da6c28aaSamw 		break;
402*da6c28aaSamw 	case ZFS_NORMALIZE_C:
403*da6c28aaSamw 		zfsvfs->z_norm |= U8_TEXTPREP_NFC;
404*da6c28aaSamw 		break;
405*da6c28aaSamw 	case ZFS_NORMALIZE_KC:
406*da6c28aaSamw 		zfsvfs->z_norm |= U8_TEXTPREP_NFKC;
407*da6c28aaSamw 		break;
408*da6c28aaSamw 	case ZFS_NORMALIZE_D:
409*da6c28aaSamw 		zfsvfs->z_norm |= U8_TEXTPREP_NFD;
410*da6c28aaSamw 		break;
411*da6c28aaSamw 	case ZFS_NORMALIZE_KD:
412*da6c28aaSamw 		zfsvfs->z_norm |= U8_TEXTPREP_NFKD;
413*da6c28aaSamw 		break;
414*da6c28aaSamw 	default:
415*da6c28aaSamw 		ASSERT(pval <= ZFS_NORMALIZE_KD);
416*da6c28aaSamw 		break;
417*da6c28aaSamw 	}
418*da6c28aaSamw 
419*da6c28aaSamw 	error = dsl_prop_get_integer(osname, "utf8only", &pval, NULL);
420*da6c28aaSamw 	if (error)
421*da6c28aaSamw 		goto normquit;
422*da6c28aaSamw 	if (pval)
423*da6c28aaSamw 		zfsvfs->z_case |= ZFS_UTF8_ONLY;
424*da6c28aaSamw 	else
425*da6c28aaSamw 		zfsvfs->z_case &= ~ZFS_UTF8_ONLY;
426*da6c28aaSamw 
427*da6c28aaSamw 	error = dsl_prop_get_integer(osname, "casesensitivity", &pval, NULL);
428*da6c28aaSamw 	if (error)
429*da6c28aaSamw 		goto normquit;
430*da6c28aaSamw 	vfs_set_feature(zfsvfs->z_vfs, VFSFT_DIRENTFLAGS);
431*da6c28aaSamw 	switch ((int)pval) {
432*da6c28aaSamw 	case ZFS_CASE_SENSITIVE:
433*da6c28aaSamw 		break;
434*da6c28aaSamw 	case ZFS_CASE_INSENSITIVE:
435*da6c28aaSamw 		zfsvfs->z_norm |= U8_TEXTPREP_TOUPPER;
436*da6c28aaSamw 		zfsvfs->z_case |= ZFS_CI_ONLY;
437*da6c28aaSamw 		vfs_set_feature(zfsvfs->z_vfs, VFSFT_CASEINSENSITIVE);
438*da6c28aaSamw 		vfs_set_feature(zfsvfs->z_vfs, VFSFT_NOCASESENSITIVE);
439*da6c28aaSamw 		break;
440*da6c28aaSamw 	case ZFS_CASE_MIXED:
441*da6c28aaSamw 		zfsvfs->z_norm |= U8_TEXTPREP_TOUPPER;
442*da6c28aaSamw 		zfsvfs->z_case |= ZFS_CI_MIXD;
443*da6c28aaSamw 		vfs_set_feature(zfsvfs->z_vfs, VFSFT_CASEINSENSITIVE);
444*da6c28aaSamw 		break;
445*da6c28aaSamw 	default:
446*da6c28aaSamw 		ASSERT(pval <= ZFS_CASE_MIXED);
447*da6c28aaSamw 		break;
448*da6c28aaSamw 	}
449*da6c28aaSamw 
450*da6c28aaSamw normquit:
451*da6c28aaSamw 	return (error);
452*da6c28aaSamw }
453*da6c28aaSamw 
454ea8dc4b6Seschrock static int
455ea8dc4b6Seschrock zfs_register_callbacks(vfs_t *vfsp)
456ea8dc4b6Seschrock {
457ea8dc4b6Seschrock 	struct dsl_dataset *ds = NULL;
458ea8dc4b6Seschrock 	objset_t *os = NULL;
459ea8dc4b6Seschrock 	zfsvfs_t *zfsvfs = NULL;
460*da6c28aaSamw 	uint64_t nbmand;
461*da6c28aaSamw 	int readonly, do_readonly = B_FALSE;
462*da6c28aaSamw 	int setuid, do_setuid = B_FALSE;
463*da6c28aaSamw 	int exec, do_exec = B_FALSE;
464*da6c28aaSamw 	int devices, do_devices = B_FALSE;
465*da6c28aaSamw 	int xattr, do_xattr = B_FALSE;
466*da6c28aaSamw 	int atime, do_atime = B_FALSE;
467ea8dc4b6Seschrock 	int error = 0;
468ea8dc4b6Seschrock 
469ea8dc4b6Seschrock 	ASSERT(vfsp);
470ea8dc4b6Seschrock 	zfsvfs = vfsp->vfs_data;
471ea8dc4b6Seschrock 	ASSERT(zfsvfs);
472ea8dc4b6Seschrock 	os = zfsvfs->z_os;
473fa9e4066Sahrens 
474fa9e4066Sahrens 	/*
475ea8dc4b6Seschrock 	 * The act of registering our callbacks will destroy any mount
476ea8dc4b6Seschrock 	 * options we may have.  In order to enable temporary overrides
4777b55fa8eSck 	 * of mount options, we stash away the current values and
478ea8dc4b6Seschrock 	 * restore them after we register the callbacks.
479fa9e4066Sahrens 	 */
480ea8dc4b6Seschrock 	if (vfs_optionisset(vfsp, MNTOPT_RO, NULL)) {
481ea8dc4b6Seschrock 		readonly = B_TRUE;
482ea8dc4b6Seschrock 		do_readonly = B_TRUE;
483ea8dc4b6Seschrock 	} else if (vfs_optionisset(vfsp, MNTOPT_RW, NULL)) {
484ea8dc4b6Seschrock 		readonly = B_FALSE;
485ea8dc4b6Seschrock 		do_readonly = B_TRUE;
486ea8dc4b6Seschrock 	}
487ea8dc4b6Seschrock 	if (vfs_optionisset(vfsp, MNTOPT_NOSUID, NULL)) {
488ea8dc4b6Seschrock 		devices = B_FALSE;
489ea8dc4b6Seschrock 		setuid = B_FALSE;
490ea8dc4b6Seschrock 		do_devices = B_TRUE;
491ea8dc4b6Seschrock 		do_setuid = B_TRUE;
492ea8dc4b6Seschrock 	} else {
493ea8dc4b6Seschrock 		if (vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL)) {
494ea8dc4b6Seschrock 			devices = B_FALSE;
495ea8dc4b6Seschrock 			do_devices = B_TRUE;
496b1b8ab34Slling 		} else if (vfs_optionisset(vfsp, MNTOPT_DEVICES, NULL)) {
497ea8dc4b6Seschrock 			devices = B_TRUE;
498ea8dc4b6Seschrock 			do_devices = B_TRUE;
499fa9e4066Sahrens 		}
500fa9e4066Sahrens 
501ea8dc4b6Seschrock 		if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL)) {
502ea8dc4b6Seschrock 			setuid = B_FALSE;
503ea8dc4b6Seschrock 			do_setuid = B_TRUE;
504ea8dc4b6Seschrock 		} else if (vfs_optionisset(vfsp, MNTOPT_SETUID, NULL)) {
505ea8dc4b6Seschrock 			setuid = B_TRUE;
506ea8dc4b6Seschrock 			do_setuid = B_TRUE;
507fa9e4066Sahrens 		}
508ea8dc4b6Seschrock 	}
509ea8dc4b6Seschrock 	if (vfs_optionisset(vfsp, MNTOPT_NOEXEC, NULL)) {
510ea8dc4b6Seschrock 		exec = B_FALSE;
511ea8dc4b6Seschrock 		do_exec = B_TRUE;
512ea8dc4b6Seschrock 	} else if (vfs_optionisset(vfsp, MNTOPT_EXEC, NULL)) {
513ea8dc4b6Seschrock 		exec = B_TRUE;
514ea8dc4b6Seschrock 		do_exec = B_TRUE;
515fa9e4066Sahrens 	}
5167b55fa8eSck 	if (vfs_optionisset(vfsp, MNTOPT_NOXATTR, NULL)) {
5177b55fa8eSck 		xattr = B_FALSE;
5187b55fa8eSck 		do_xattr = B_TRUE;
5197b55fa8eSck 	} else if (vfs_optionisset(vfsp, MNTOPT_XATTR, NULL)) {
5207b55fa8eSck 		xattr = B_TRUE;
5217b55fa8eSck 		do_xattr = B_TRUE;
5227b55fa8eSck 	}
523b510d378Slling 	if (vfs_optionisset(vfsp, MNTOPT_NOATIME, NULL)) {
524b510d378Slling 		atime = B_FALSE;
525b510d378Slling 		do_atime = B_TRUE;
526b510d378Slling 	} else if (vfs_optionisset(vfsp, MNTOPT_ATIME, NULL)) {
527b510d378Slling 		atime = B_TRUE;
528b510d378Slling 		do_atime = B_TRUE;
529b510d378Slling 	}
530fa9e4066Sahrens 
531*da6c28aaSamw 	/*
532*da6c28aaSamw 	 * nbmand is a special property.  It can only be changed at
533*da6c28aaSamw 	 * mount time.
534*da6c28aaSamw 	 *
535*da6c28aaSamw 	 * This is weird, but it is documented to only be changeable
536*da6c28aaSamw 	 * at mount time.
537*da6c28aaSamw 	 */
538*da6c28aaSamw 	if (vfs_optionisset(vfsp, MNTOPT_NONBMAND, NULL)) {
539*da6c28aaSamw 		nbmand = B_FALSE;
540*da6c28aaSamw 	} else if (vfs_optionisset(vfsp, MNTOPT_NBMAND, NULL)) {
541*da6c28aaSamw 		nbmand = B_TRUE;
542*da6c28aaSamw 	} else {
543*da6c28aaSamw 		char osname[MAXNAMELEN];
544*da6c28aaSamw 
545*da6c28aaSamw 		dmu_objset_name(os, osname);
546*da6c28aaSamw 		if (error = dsl_prop_get_integer(osname, "nbmand", &nbmand,
547*da6c28aaSamw 		    NULL))
548*da6c28aaSamw 		return (error);
549*da6c28aaSamw 	}
550*da6c28aaSamw 
551fa9e4066Sahrens 	/*
552ea8dc4b6Seschrock 	 * Register property callbacks.
553ea8dc4b6Seschrock 	 *
554ea8dc4b6Seschrock 	 * It would probably be fine to just check for i/o error from
555ea8dc4b6Seschrock 	 * the first prop_register(), but I guess I like to go
556ea8dc4b6Seschrock 	 * overboard...
557fa9e4066Sahrens 	 */
558ea8dc4b6Seschrock 	ds = dmu_objset_ds(os);
559ea8dc4b6Seschrock 	error = dsl_prop_register(ds, "atime", atime_changed_cb, zfsvfs);
5607b55fa8eSck 	error = error ? error : dsl_prop_register(ds,
5617b55fa8eSck 	    "xattr", xattr_changed_cb, zfsvfs);
562ea8dc4b6Seschrock 	error = error ? error : dsl_prop_register(ds,
563ea8dc4b6Seschrock 	    "recordsize", blksz_changed_cb, zfsvfs);
564ea8dc4b6Seschrock 	error = error ? error : dsl_prop_register(ds,
565ea8dc4b6Seschrock 	    "readonly", readonly_changed_cb, zfsvfs);
566ea8dc4b6Seschrock 	error = error ? error : dsl_prop_register(ds,
567ea8dc4b6Seschrock 	    "devices", devices_changed_cb, zfsvfs);
568ea8dc4b6Seschrock 	error = error ? error : dsl_prop_register(ds,
569ea8dc4b6Seschrock 	    "setuid", setuid_changed_cb, zfsvfs);
570ea8dc4b6Seschrock 	error = error ? error : dsl_prop_register(ds,
571ea8dc4b6Seschrock 	    "exec", exec_changed_cb, zfsvfs);
572ea8dc4b6Seschrock 	error = error ? error : dsl_prop_register(ds,
573ea8dc4b6Seschrock 	    "snapdir", snapdir_changed_cb, zfsvfs);
574ea8dc4b6Seschrock 	error = error ? error : dsl_prop_register(ds,
575ea8dc4b6Seschrock 	    "aclmode", acl_mode_changed_cb, zfsvfs);
576ea8dc4b6Seschrock 	error = error ? error : dsl_prop_register(ds,
577ea8dc4b6Seschrock 	    "aclinherit", acl_inherit_changed_cb, zfsvfs);
578*da6c28aaSamw 	error = error ? error : dsl_prop_register(ds,
579*da6c28aaSamw 	    "vscan", vscan_changed_cb, zfsvfs);
580ea8dc4b6Seschrock 	if (error)
581ea8dc4b6Seschrock 		goto unregister;
582fa9e4066Sahrens 
583ea8dc4b6Seschrock 	/*
584ea8dc4b6Seschrock 	 * Invoke our callbacks to restore temporary mount options.
585ea8dc4b6Seschrock 	 */
586ea8dc4b6Seschrock 	if (do_readonly)
587ea8dc4b6Seschrock 		readonly_changed_cb(zfsvfs, readonly);
588ea8dc4b6Seschrock 	if (do_setuid)
589ea8dc4b6Seschrock 		setuid_changed_cb(zfsvfs, setuid);
590ea8dc4b6Seschrock 	if (do_exec)
591ea8dc4b6Seschrock 		exec_changed_cb(zfsvfs, exec);
592ea8dc4b6Seschrock 	if (do_devices)
593ea8dc4b6Seschrock 		devices_changed_cb(zfsvfs, devices);
5947b55fa8eSck 	if (do_xattr)
5957b55fa8eSck 		xattr_changed_cb(zfsvfs, xattr);
596b510d378Slling 	if (do_atime)
597b510d378Slling 		atime_changed_cb(zfsvfs, atime);
598fa9e4066Sahrens 
599*da6c28aaSamw 	nbmand_changed_cb(zfsvfs, nbmand);
600*da6c28aaSamw 
601ea8dc4b6Seschrock 	return (0);
602fa9e4066Sahrens 
603ea8dc4b6Seschrock unregister:
604fa9e4066Sahrens 	/*
605ea8dc4b6Seschrock 	 * We may attempt to unregister some callbacks that are not
606ea8dc4b6Seschrock 	 * registered, but this is OK; it will simply return ENOMSG,
607ea8dc4b6Seschrock 	 * which we will ignore.
608fa9e4066Sahrens 	 */
609ea8dc4b6Seschrock 	(void) dsl_prop_unregister(ds, "atime", atime_changed_cb, zfsvfs);
6107b55fa8eSck 	(void) dsl_prop_unregister(ds, "xattr", xattr_changed_cb, zfsvfs);
611ea8dc4b6Seschrock 	(void) dsl_prop_unregister(ds, "recordsize", blksz_changed_cb, zfsvfs);
612ea8dc4b6Seschrock 	(void) dsl_prop_unregister(ds, "readonly", readonly_changed_cb, zfsvfs);
613ea8dc4b6Seschrock 	(void) dsl_prop_unregister(ds, "devices", devices_changed_cb, zfsvfs);
614ea8dc4b6Seschrock 	(void) dsl_prop_unregister(ds, "setuid", setuid_changed_cb, zfsvfs);
615ea8dc4b6Seschrock 	(void) dsl_prop_unregister(ds, "exec", exec_changed_cb, zfsvfs);
616ea8dc4b6Seschrock 	(void) dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb, zfsvfs);
617ea8dc4b6Seschrock 	(void) dsl_prop_unregister(ds, "aclmode", acl_mode_changed_cb, zfsvfs);
618ea8dc4b6Seschrock 	(void) dsl_prop_unregister(ds, "aclinherit", acl_inherit_changed_cb,
619ea8dc4b6Seschrock 	    zfsvfs);
620*da6c28aaSamw 	(void) dsl_prop_unregister(ds, "vscan", vscan_changed_cb, zfsvfs);
621ea8dc4b6Seschrock 	return (error);
622ea8dc4b6Seschrock 
623ea8dc4b6Seschrock }
624ea8dc4b6Seschrock 
625f18faf3fSek static int
626f18faf3fSek zfsvfs_setup(zfsvfs_t *zfsvfs, boolean_t mounting)
627f18faf3fSek {
628f18faf3fSek 	uint_t readonly;
629f18faf3fSek 	int error;
630f18faf3fSek 
631f18faf3fSek 	error = zfs_register_callbacks(zfsvfs->z_vfs);
632f18faf3fSek 	if (error)
633f18faf3fSek 		return (error);
634f18faf3fSek 
635f18faf3fSek 	/*
636f18faf3fSek 	 * Set the objset user_ptr to track its zfsvfs.
637f18faf3fSek 	 */
638f18faf3fSek 	mutex_enter(&zfsvfs->z_os->os->os_user_ptr_lock);
639f18faf3fSek 	dmu_objset_set_user(zfsvfs->z_os, zfsvfs);
640f18faf3fSek 	mutex_exit(&zfsvfs->z_os->os->os_user_ptr_lock);
641f18faf3fSek 
642f18faf3fSek 	/*
643f18faf3fSek 	 * If we are not mounting (ie: online recv), then we don't
644f18faf3fSek 	 * have to worry about replaying the log as we blocked all
645f18faf3fSek 	 * operations out since we closed the ZIL.
646f18faf3fSek 	 */
647f18faf3fSek 	if (mounting) {
648f18faf3fSek 		/*
649f18faf3fSek 		 * During replay we remove the read only flag to
650f18faf3fSek 		 * allow replays to succeed.
651f18faf3fSek 		 */
652f18faf3fSek 		readonly = zfsvfs->z_vfs->vfs_flag & VFS_RDONLY;
653f18faf3fSek 		if (readonly != 0)
654f18faf3fSek 			zfsvfs->z_vfs->vfs_flag &= ~VFS_RDONLY;
655f18faf3fSek 		else
656f18faf3fSek 			zfs_unlinked_drain(zfsvfs);
657f18faf3fSek 
658f18faf3fSek 		/*
659f18faf3fSek 		 * Parse and replay the intent log.
660f18faf3fSek 		 *
661f18faf3fSek 		 * Because of ziltest, this must be done after
662f18faf3fSek 		 * zfs_unlinked_drain().  (Further note: ziltest doesn't
663f18faf3fSek 		 * use readonly mounts, where zfs_unlinked_drain() isn't
664f18faf3fSek 		 * called.)  This is because ziltest causes spa_sync()
665f18faf3fSek 		 * to think it's committed, but actually it is not, so
666f18faf3fSek 		 * the intent log contains many txg's worth of changes.
667f18faf3fSek 		 *
668f18faf3fSek 		 * In particular, if object N is in the unlinked set in
669f18faf3fSek 		 * the last txg to actually sync, then it could be
670f18faf3fSek 		 * actually freed in a later txg and then reallocated in
671f18faf3fSek 		 * a yet later txg.  This would write a "create object
672f18faf3fSek 		 * N" record to the intent log.  Normally, this would be
673f18faf3fSek 		 * fine because the spa_sync() would have written out
674f18faf3fSek 		 * the fact that object N is free, before we could write
675f18faf3fSek 		 * the "create object N" intent log record.
676f18faf3fSek 		 *
677f18faf3fSek 		 * But when we are in ziltest mode, we advance the "open
678f18faf3fSek 		 * txg" without actually spa_sync()-ing the changes to
679f18faf3fSek 		 * disk.  So we would see that object N is still
680f18faf3fSek 		 * allocated and in the unlinked set, and there is an
681f18faf3fSek 		 * intent log record saying to allocate it.
682f18faf3fSek 		 */
683f18faf3fSek 		zil_replay(zfsvfs->z_os, zfsvfs, &zfsvfs->z_assign,
684f18faf3fSek 		    zfs_replay_vector);
685f18faf3fSek 
686f18faf3fSek 		zfsvfs->z_vfs->vfs_flag |= readonly; /* restore readonly bit */
687f18faf3fSek 	}
688f18faf3fSek 
689f18faf3fSek 	if (!zil_disable)
690f18faf3fSek 		zfsvfs->z_log = zil_open(zfsvfs->z_os, zfs_get_data);
691f18faf3fSek 
692f18faf3fSek 	return (0);
693f18faf3fSek }
694f18faf3fSek 
695ea8dc4b6Seschrock static int
696ea8dc4b6Seschrock zfs_domount(vfs_t *vfsp, char *osname, cred_t *cr)
697ea8dc4b6Seschrock {
698ea8dc4b6Seschrock 	dev_t mount_dev;
699ea8dc4b6Seschrock 	uint64_t recordsize, readonly;
700ea8dc4b6Seschrock 	int error = 0;
701ea8dc4b6Seschrock 	int mode;
702ea8dc4b6Seschrock 	zfsvfs_t *zfsvfs;
703ea8dc4b6Seschrock 	znode_t *zp = NULL;
704ea8dc4b6Seschrock 
705ea8dc4b6Seschrock 	ASSERT(vfsp);
706ea8dc4b6Seschrock 	ASSERT(osname);
707fa9e4066Sahrens 
708fa9e4066Sahrens 	/*
709fa9e4066Sahrens 	 * Initialize the zfs-specific filesystem structure.
710fa9e4066Sahrens 	 * Should probably make this a kmem cache, shuffle fields,
711ea8dc4b6Seschrock 	 * and just bzero up to z_hold_mtx[].
712fa9e4066Sahrens 	 */
713fa9e4066Sahrens 	zfsvfs = kmem_zalloc(sizeof (zfsvfs_t), KM_SLEEP);
714fa9e4066Sahrens 	zfsvfs->z_vfs = vfsp;
715fa9e4066Sahrens 	zfsvfs->z_parent = zfsvfs;
716fa9e4066Sahrens 	zfsvfs->z_assign = TXG_NOWAIT;
717fa9e4066Sahrens 	zfsvfs->z_max_blksz = SPA_MAXBLOCKSIZE;
718a0965f35Sbonwick 	zfsvfs->z_show_ctldir = ZFS_SNAPDIR_VISIBLE;
719fa9e4066Sahrens 
720fa9e4066Sahrens 	mutex_init(&zfsvfs->z_znodes_lock, NULL, MUTEX_DEFAULT, NULL);
721fa9e4066Sahrens 	list_create(&zfsvfs->z_all_znodes, sizeof (znode_t),
722fa9e4066Sahrens 	    offsetof(znode_t, z_link_node));
723f18faf3fSek 	rrw_init(&zfsvfs->z_teardown_lock);
724f18faf3fSek 	rw_init(&zfsvfs->z_teardown_inactive_lock, NULL, RW_DEFAULT, NULL);
725fa9e4066Sahrens 
726ea8dc4b6Seschrock 	/* Initialize the generic filesystem structure. */
727fa9e4066Sahrens 	vfsp->vfs_bcount = 0;
728fa9e4066Sahrens 	vfsp->vfs_data = NULL;
729fa9e4066Sahrens 
730ea8dc4b6Seschrock 	if (zfs_create_unique_device(&mount_dev) == -1) {
731ea8dc4b6Seschrock 		error = ENODEV;
732ea8dc4b6Seschrock 		goto out;
733ea8dc4b6Seschrock 	}
734fa9e4066Sahrens 	ASSERT(vfs_devismounted(mount_dev) == 0);
735fa9e4066Sahrens 
736ea8dc4b6Seschrock 	if (error = dsl_prop_get_integer(osname, "recordsize", &recordsize,
737ea8dc4b6Seschrock 	    NULL))
738ea8dc4b6Seschrock 		goto out;
739fa9e4066Sahrens 
740fa9e4066Sahrens 	vfsp->vfs_dev = mount_dev;
741fa9e4066Sahrens 	vfsp->vfs_fstype = zfsfstype;
742fa9e4066Sahrens 	vfsp->vfs_bsize = recordsize;
743fa9e4066Sahrens 	vfsp->vfs_flag |= VFS_NOTRUNC;
744fa9e4066Sahrens 	vfsp->vfs_data = zfsvfs;
745fa9e4066Sahrens 
746ea8dc4b6Seschrock 	if (error = dsl_prop_get_integer(osname, "readonly", &readonly, NULL))
747fa9e4066Sahrens 		goto out;
748fa9e4066Sahrens 
749fa9e4066Sahrens 	if (readonly)
750fa9e4066Sahrens 		mode = DS_MODE_PRIMARY | DS_MODE_READONLY;
751fa9e4066Sahrens 	else
752fa9e4066Sahrens 		mode = DS_MODE_PRIMARY;
753fa9e4066Sahrens 
754fa9e4066Sahrens 	error = dmu_objset_open(osname, DMU_OST_ZFS, mode, &zfsvfs->z_os);
755fa9e4066Sahrens 	if (error == EROFS) {
756fa9e4066Sahrens 		mode = DS_MODE_PRIMARY | DS_MODE_READONLY;
757fa9e4066Sahrens 		error = dmu_objset_open(osname, DMU_OST_ZFS, mode,
758fa9e4066Sahrens 		    &zfsvfs->z_os);
759fa9e4066Sahrens 	}
760fa9e4066Sahrens 
761fa9e4066Sahrens 	if (error)
762fa9e4066Sahrens 		goto out;
763fa9e4066Sahrens 
764fa9e4066Sahrens 	if (error = zfs_init_fs(zfsvfs, &zp, cr))
765fa9e4066Sahrens 		goto out;
766fa9e4066Sahrens 
767ea8dc4b6Seschrock 	/* The call to zfs_init_fs leaves the vnode held, release it here. */
768ea8dc4b6Seschrock 	VN_RELE(ZTOV(zp));
769ea8dc4b6Seschrock 
770*da6c28aaSamw 	/*
771*da6c28aaSamw 	 * Set features for file system.
772*da6c28aaSamw 	 */
773*da6c28aaSamw 	zfsvfs->z_use_fuids = USE_FUIDS(zfsvfs->z_version, zfsvfs->z_os);
774*da6c28aaSamw 	if (zfsvfs->z_use_fuids) {
775*da6c28aaSamw 		vfs_set_feature(vfsp, VFSFT_XVATTR);
776*da6c28aaSamw 		vfs_set_feature(vfsp, VFSFT_ACEMASKONACCESS);
777*da6c28aaSamw 		vfs_set_feature(vfsp, VFSFT_ACLONCREATE);
778*da6c28aaSamw 	}
779*da6c28aaSamw 
780*da6c28aaSamw 	/*
781*da6c28aaSamw 	 * Set normalization regardless of whether or not the object
782*da6c28aaSamw 	 * set is a snapshot.  Snapshots and clones need to have
783*da6c28aaSamw 	 * identical normalization as did the file system they
784*da6c28aaSamw 	 * originated from.
785*da6c28aaSamw 	 */
786*da6c28aaSamw 	if ((error = zfs_normalization_set(osname, zfsvfs)) != 0)
787*da6c28aaSamw 		goto out;
788*da6c28aaSamw 
789ea8dc4b6Seschrock 	if (dmu_objset_is_snapshot(zfsvfs->z_os)) {
790*da6c28aaSamw 		uint64_t pval;
7917b55fa8eSck 
792fa9e4066Sahrens 		ASSERT(mode & DS_MODE_READONLY);
793fa9e4066Sahrens 		atime_changed_cb(zfsvfs, B_FALSE);
794fa9e4066Sahrens 		readonly_changed_cb(zfsvfs, B_TRUE);
795*da6c28aaSamw 		if (error = dsl_prop_get_integer(osname, "xattr", &pval, NULL))
7967b55fa8eSck 			goto out;
797*da6c28aaSamw 		xattr_changed_cb(zfsvfs, pval);
798fa9e4066Sahrens 		zfsvfs->z_issnap = B_TRUE;
799fa9e4066Sahrens 	} else {
800f18faf3fSek 		error = zfsvfs_setup(zfsvfs, B_TRUE);
801ea8dc4b6Seschrock 	}
802fa9e4066Sahrens 
803ea8dc4b6Seschrock 	if (!zfsvfs->z_issnap)
804ea8dc4b6Seschrock 		zfsctl_create(zfsvfs);
805ea8dc4b6Seschrock out:
806ea8dc4b6Seschrock 	if (error) {
807ea8dc4b6Seschrock 		if (zfsvfs->z_os)
808ea8dc4b6Seschrock 			dmu_objset_close(zfsvfs->z_os);
809c25056deSgw 		mutex_destroy(&zfsvfs->z_znodes_lock);
810c25056deSgw 		list_destroy(&zfsvfs->z_all_znodes);
811f18faf3fSek 		rrw_destroy(&zfsvfs->z_teardown_lock);
812f18faf3fSek 		rw_destroy(&zfsvfs->z_teardown_inactive_lock);
813ea8dc4b6Seschrock 		kmem_free(zfsvfs, sizeof (zfsvfs_t));
814ea8dc4b6Seschrock 	} else {
815ea8dc4b6Seschrock 		atomic_add_32(&zfs_active_fs_count, 1);
816ea8dc4b6Seschrock 	}
817fa9e4066Sahrens 
818ea8dc4b6Seschrock 	return (error);
819ea8dc4b6Seschrock }
820ea8dc4b6Seschrock 
821ea8dc4b6Seschrock void
822ea8dc4b6Seschrock zfs_unregister_callbacks(zfsvfs_t *zfsvfs)
823ea8dc4b6Seschrock {
824ea8dc4b6Seschrock 	objset_t *os = zfsvfs->z_os;
825ea8dc4b6Seschrock 	struct dsl_dataset *ds;
826ea8dc4b6Seschrock 
827ea8dc4b6Seschrock 	/*
828ea8dc4b6Seschrock 	 * Unregister properties.
829ea8dc4b6Seschrock 	 */
830ea8dc4b6Seschrock 	if (!dmu_objset_is_snapshot(os)) {
831fa9e4066Sahrens 		ds = dmu_objset_ds(os);
832ea8dc4b6Seschrock 		VERIFY(dsl_prop_unregister(ds, "atime", atime_changed_cb,
833fa9e4066Sahrens 		    zfsvfs) == 0);
834fa9e4066Sahrens 
8357b55fa8eSck 		VERIFY(dsl_prop_unregister(ds, "xattr", xattr_changed_cb,
8367b55fa8eSck 		    zfsvfs) == 0);
8377b55fa8eSck 
838ea8dc4b6Seschrock 		VERIFY(dsl_prop_unregister(ds, "recordsize", blksz_changed_cb,
839fa9e4066Sahrens 		    zfsvfs) == 0);
840fa9e4066Sahrens 
841ea8dc4b6Seschrock 		VERIFY(dsl_prop_unregister(ds, "readonly", readonly_changed_cb,
842fa9e4066Sahrens 		    zfsvfs) == 0);
843fa9e4066Sahrens 
844ea8dc4b6Seschrock 		VERIFY(dsl_prop_unregister(ds, "devices", devices_changed_cb,
845fa9e4066Sahrens 		    zfsvfs) == 0);
846fa9e4066Sahrens 
847ea8dc4b6Seschrock 		VERIFY(dsl_prop_unregister(ds, "setuid", setuid_changed_cb,
848fa9e4066Sahrens 		    zfsvfs) == 0);
849fa9e4066Sahrens 
850ea8dc4b6Seschrock 		VERIFY(dsl_prop_unregister(ds, "exec", exec_changed_cb,
851fa9e4066Sahrens 		    zfsvfs) == 0);
852fa9e4066Sahrens 
853ea8dc4b6Seschrock 		VERIFY(dsl_prop_unregister(ds, "snapdir", snapdir_changed_cb,
854fa9e4066Sahrens 		    zfsvfs) == 0);
855fa9e4066Sahrens 
856ea8dc4b6Seschrock 		VERIFY(dsl_prop_unregister(ds, "aclmode", acl_mode_changed_cb,
857fa9e4066Sahrens 		    zfsvfs) == 0);
858fa9e4066Sahrens 
859ea8dc4b6Seschrock 		VERIFY(dsl_prop_unregister(ds, "aclinherit",
860fa9e4066Sahrens 		    acl_inherit_changed_cb, zfsvfs) == 0);
861*da6c28aaSamw 
862*da6c28aaSamw 		VERIFY(dsl_prop_unregister(ds, "vscan",
863*da6c28aaSamw 		    vscan_changed_cb, zfsvfs) == 0);
864ea8dc4b6Seschrock 	}
865ea8dc4b6Seschrock }
866fa9e4066Sahrens 
867b1b8ab34Slling /*
868b1b8ab34Slling  * Convert a decimal digit string to a uint64_t integer.
869b1b8ab34Slling  */
870b1b8ab34Slling static int
871b1b8ab34Slling str_to_uint64(char *str, uint64_t *objnum)
872b1b8ab34Slling {
873b1b8ab34Slling 	uint64_t num = 0;
874b1b8ab34Slling 
875b1b8ab34Slling 	while (*str) {
876b1b8ab34Slling 		if (*str < '0' || *str > '9')
877b1b8ab34Slling 			return (EINVAL);
878b1b8ab34Slling 
879b1b8ab34Slling 		num = num*10 + *str++ - '0';
880b1b8ab34Slling 	}
881b1b8ab34Slling 
882b1b8ab34Slling 	*objnum = num;
883b1b8ab34Slling 	return (0);
884b1b8ab34Slling }
885b1b8ab34Slling 
886b1b8ab34Slling /*
887b1b8ab34Slling  * The boot path passed from the boot loader is in the form of
888b1b8ab34Slling  * "rootpool-name/root-filesystem-object-number'. Convert this
889b1b8ab34Slling  * string to a dataset name: "rootpool-name/root-filesystem-name".
890b1b8ab34Slling  */
891b1b8ab34Slling static int
892b1b8ab34Slling parse_bootpath(char *bpath, char *outpath)
893b1b8ab34Slling {
894b1b8ab34Slling 	char *slashp;
895b1b8ab34Slling 	uint64_t objnum;
896b1b8ab34Slling 	int error;
897b1b8ab34Slling 
898b1b8ab34Slling 	if (*bpath == 0 || *bpath == '/')
899b1b8ab34Slling 		return (EINVAL);
900b1b8ab34Slling 
901b1b8ab34Slling 	slashp = strchr(bpath, '/');
902b1b8ab34Slling 
903b1b8ab34Slling 	/* if no '/', just return the pool name */
904b1b8ab34Slling 	if (slashp == NULL) {
905b1b8ab34Slling 		(void) strcpy(outpath, bpath);
906b1b8ab34Slling 		return (0);
907b1b8ab34Slling 	}
908b1b8ab34Slling 
909b1b8ab34Slling 	if (error = str_to_uint64(slashp+1, &objnum))
910b1b8ab34Slling 		return (error);
911b1b8ab34Slling 
912b1b8ab34Slling 	*slashp = '\0';
913b1b8ab34Slling 	error = dsl_dsobj_to_dsname(bpath, objnum, outpath);
914b1b8ab34Slling 	*slashp = '/';
915b1b8ab34Slling 
916b1b8ab34Slling 	return (error);
917b1b8ab34Slling }
918b1b8ab34Slling 
919ea8dc4b6Seschrock static int
920ea8dc4b6Seschrock zfs_mountroot(vfs_t *vfsp, enum whymountroot why)
921ea8dc4b6Seschrock {
922ea8dc4b6Seschrock 	int error = 0;
923ea8dc4b6Seschrock 	int ret = 0;
924ea8dc4b6Seschrock 	static int zfsrootdone = 0;
925ea8dc4b6Seschrock 	zfsvfs_t *zfsvfs = NULL;
926ea8dc4b6Seschrock 	znode_t *zp = NULL;
927ea8dc4b6Seschrock 	vnode_t *vp = NULL;
928b1b8ab34Slling 	char *zfs_bootpath;
929ea8dc4b6Seschrock 
930ea8dc4b6Seschrock 	ASSERT(vfsp);
931ea8dc4b6Seschrock 
932ea8dc4b6Seschrock 	/*
933b1b8ab34Slling 	 * The filesystem that we mount as root is defined in the
934b1b8ab34Slling 	 * "zfs-bootfs" property.
935ea8dc4b6Seschrock 	 */
936ea8dc4b6Seschrock 	if (why == ROOT_INIT) {
937ea8dc4b6Seschrock 		if (zfsrootdone++)
938ea8dc4b6Seschrock 			return (EBUSY);
939fa9e4066Sahrens 
940b1b8ab34Slling 		if (ddi_prop_lookup_string(DDI_DEV_T_ANY, ddi_root_node(),
941b1b8ab34Slling 		    DDI_PROP_DONTPASS, "zfs-bootfs", &zfs_bootpath) !=
942b1b8ab34Slling 		    DDI_SUCCESS)
943b1b8ab34Slling 			return (EIO);
944b1b8ab34Slling 
945b1b8ab34Slling 		error = parse_bootpath(zfs_bootpath, rootfs.bo_name);
946b1b8ab34Slling 		ddi_prop_free(zfs_bootpath);
947b1b8ab34Slling 
948b1b8ab34Slling 		if (error)
949b1b8ab34Slling 			return (error);
950fa9e4066Sahrens 
951ea8dc4b6Seschrock 		if (error = vfs_lock(vfsp))
952ea8dc4b6Seschrock 			return (error);
953fa9e4066Sahrens 
954b1b8ab34Slling 		if (error = zfs_domount(vfsp, rootfs.bo_name, CRED()))
955ea8dc4b6Seschrock 			goto out;
956ea8dc4b6Seschrock 
957ea8dc4b6Seschrock 		zfsvfs = (zfsvfs_t *)vfsp->vfs_data;
958ea8dc4b6Seschrock 		ASSERT(zfsvfs);
959ea8dc4b6Seschrock 		if (error = zfs_zget(zfsvfs, zfsvfs->z_root, &zp))
960ea8dc4b6Seschrock 			goto out;
961ea8dc4b6Seschrock 
962ea8dc4b6Seschrock 		vp = ZTOV(zp);
963ea8dc4b6Seschrock 		mutex_enter(&vp->v_lock);
964ea8dc4b6Seschrock 		vp->v_flag |= VROOT;
965ea8dc4b6Seschrock 		mutex_exit(&vp->v_lock);
966ea8dc4b6Seschrock 		rootvp = vp;
967ea8dc4b6Seschrock 
968ea8dc4b6Seschrock 		/*
969ea8dc4b6Seschrock 		 * The zfs_zget call above returns with a hold on vp, we release
970ea8dc4b6Seschrock 		 * it here.
971ea8dc4b6Seschrock 		 */
972fa9e4066Sahrens 		VN_RELE(vp);
973ea8dc4b6Seschrock 
974ea8dc4b6Seschrock 		/*
975ea8dc4b6Seschrock 		 * Mount root as readonly initially, it will be remouted
976ea8dc4b6Seschrock 		 * read/write by /lib/svc/method/fs-usr.
977ea8dc4b6Seschrock 		 */
978ea8dc4b6Seschrock 		readonly_changed_cb(vfsp->vfs_data, B_TRUE);
979ea8dc4b6Seschrock 		vfs_add((struct vnode *)0, vfsp,
980ea8dc4b6Seschrock 		    (vfsp->vfs_flag & VFS_RDONLY) ? MS_RDONLY : 0);
981ea8dc4b6Seschrock out:
982ea8dc4b6Seschrock 		vfs_unlock(vfsp);
983ea8dc4b6Seschrock 		ret = (error) ? error : 0;
984ea8dc4b6Seschrock 		return (ret);
985ea8dc4b6Seschrock 	} else if (why == ROOT_REMOUNT) {
986ea8dc4b6Seschrock 		readonly_changed_cb(vfsp->vfs_data, B_FALSE);
987ea8dc4b6Seschrock 		vfsp->vfs_flag |= VFS_REMOUNT;
988b510d378Slling 
989b510d378Slling 		/* refresh mount options */
990b510d378Slling 		zfs_unregister_callbacks(vfsp->vfs_data);
991b510d378Slling 		return (zfs_register_callbacks(vfsp));
992b510d378Slling 
993ea8dc4b6Seschrock 	} else if (why == ROOT_UNMOUNT) {
994ea8dc4b6Seschrock 		zfs_unregister_callbacks((zfsvfs_t *)vfsp->vfs_data);
995ea8dc4b6Seschrock 		(void) zfs_sync(vfsp, 0, 0);
996ea8dc4b6Seschrock 		return (0);
997ea8dc4b6Seschrock 	}
998ea8dc4b6Seschrock 
999ea8dc4b6Seschrock 	/*
1000ea8dc4b6Seschrock 	 * if "why" is equal to anything else other than ROOT_INIT,
1001ea8dc4b6Seschrock 	 * ROOT_REMOUNT, or ROOT_UNMOUNT, we do not support it.
1002ea8dc4b6Seschrock 	 */
1003ea8dc4b6Seschrock 	return (ENOTSUP);
1004ea8dc4b6Seschrock }
1005ea8dc4b6Seschrock 
1006ea8dc4b6Seschrock /*ARGSUSED*/
1007ea8dc4b6Seschrock static int
1008ea8dc4b6Seschrock zfs_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr)
1009ea8dc4b6Seschrock {
1010ea8dc4b6Seschrock 	char		*osname;
1011ea8dc4b6Seschrock 	pathname_t	spn;
1012ea8dc4b6Seschrock 	int		error = 0;
1013ea8dc4b6Seschrock 	uio_seg_t	fromspace = (uap->flags & MS_SYSSPACE) ?
1014b1b8ab34Slling 	    UIO_SYSSPACE : UIO_USERSPACE;
1015ea8dc4b6Seschrock 	int		canwrite;
1016ea8dc4b6Seschrock 
1017ea8dc4b6Seschrock 	if (mvp->v_type != VDIR)
1018ea8dc4b6Seschrock 		return (ENOTDIR);
1019ea8dc4b6Seschrock 
1020ea8dc4b6Seschrock 	mutex_enter(&mvp->v_lock);
1021ea8dc4b6Seschrock 	if ((uap->flags & MS_REMOUNT) == 0 &&
1022ea8dc4b6Seschrock 	    (uap->flags & MS_OVERLAY) == 0 &&
1023ea8dc4b6Seschrock 	    (mvp->v_count != 1 || (mvp->v_flag & VROOT))) {
1024ea8dc4b6Seschrock 		mutex_exit(&mvp->v_lock);
1025ea8dc4b6Seschrock 		return (EBUSY);
1026ea8dc4b6Seschrock 	}
1027ea8dc4b6Seschrock 	mutex_exit(&mvp->v_lock);
1028ea8dc4b6Seschrock 
1029ea8dc4b6Seschrock 	/*
1030ea8dc4b6Seschrock 	 * ZFS does not support passing unparsed data in via MS_DATA.
1031ea8dc4b6Seschrock 	 * Users should use the MS_OPTIONSTR interface; this means
1032ea8dc4b6Seschrock 	 * that all option parsing is already done and the options struct
1033ea8dc4b6Seschrock 	 * can be interrogated.
1034ea8dc4b6Seschrock 	 */
1035ea8dc4b6Seschrock 	if ((uap->flags & MS_DATA) && uap->datalen > 0)
1036ea8dc4b6Seschrock 		return (EINVAL);
1037ea8dc4b6Seschrock 
1038ea8dc4b6Seschrock 	/*
1039ea8dc4b6Seschrock 	 * Get the objset name (the "special" mount argument).
1040ea8dc4b6Seschrock 	 */
1041ea8dc4b6Seschrock 	if (error = pn_get(uap->spec, fromspace, &spn))
1042ea8dc4b6Seschrock 		return (error);
1043ea8dc4b6Seschrock 
1044ea8dc4b6Seschrock 	osname = spn.pn_path;
1045ea8dc4b6Seschrock 
1046ecd6cf80Smarks 	/*
1047ecd6cf80Smarks 	 * Check for mount privilege?
1048ecd6cf80Smarks 	 *
1049ecd6cf80Smarks 	 * If we don't have privilege then see if
1050ecd6cf80Smarks 	 * we have local permission to allow it
1051ecd6cf80Smarks 	 */
1052ecd6cf80Smarks 	error = secpolicy_fs_mount(cr, mvp, vfsp);
1053ecd6cf80Smarks 	if (error) {
1054ecd6cf80Smarks 		error = dsl_deleg_access(osname, ZFS_DELEG_PERM_MOUNT, cr);
1055ecd6cf80Smarks 		if (error == 0) {
1056ecd6cf80Smarks 			vattr_t		vattr;
1057ecd6cf80Smarks 
1058ecd6cf80Smarks 			/*
1059ecd6cf80Smarks 			 * Make sure user is the owner of the mount point
1060ecd6cf80Smarks 			 * or has sufficient privileges.
1061ecd6cf80Smarks 			 */
1062ecd6cf80Smarks 
1063ecd6cf80Smarks 			vattr.va_mask = AT_UID;
1064ecd6cf80Smarks 
1065*da6c28aaSamw 			if (error = VOP_GETATTR(mvp, &vattr, 0, cr, NULL)) {
1066ecd6cf80Smarks 				goto out;
1067ecd6cf80Smarks 			}
1068ecd6cf80Smarks 
1069ecd6cf80Smarks 			if (error = secpolicy_vnode_owner(cr, vattr.va_uid)) {
1070ecd6cf80Smarks 				goto out;
1071ecd6cf80Smarks 			}
1072ecd6cf80Smarks 
1073*da6c28aaSamw 			if (error = VOP_ACCESS(mvp, VWRITE, 0, cr, NULL)) {
1074ecd6cf80Smarks 				goto out;
1075ecd6cf80Smarks 			}
1076ecd6cf80Smarks 
1077ecd6cf80Smarks 			secpolicy_fs_mount_clearopts(cr, vfsp);
1078ecd6cf80Smarks 		} else {
1079ecd6cf80Smarks 			goto out;
1080ecd6cf80Smarks 		}
1081ecd6cf80Smarks 	}
1082ea8dc4b6Seschrock 
1083ea8dc4b6Seschrock 	/*
1084ea8dc4b6Seschrock 	 * Refuse to mount a filesystem if we are in a local zone and the
1085ea8dc4b6Seschrock 	 * dataset is not visible.
1086ea8dc4b6Seschrock 	 */
1087ea8dc4b6Seschrock 	if (!INGLOBALZONE(curproc) &&
1088ea8dc4b6Seschrock 	    (!zone_dataset_visible(osname, &canwrite) || !canwrite)) {
1089ea8dc4b6Seschrock 		error = EPERM;
1090ea8dc4b6Seschrock 		goto out;
1091ea8dc4b6Seschrock 	}
1092ea8dc4b6Seschrock 
1093b510d378Slling 	/*
1094b510d378Slling 	 * When doing a remount, we simply refresh our temporary properties
1095b510d378Slling 	 * according to those options set in the current VFS options.
1096b510d378Slling 	 */
1097b510d378Slling 	if (uap->flags & MS_REMOUNT) {
1098b510d378Slling 		/* refresh mount options */
1099b510d378Slling 		zfs_unregister_callbacks(vfsp->vfs_data);
1100b510d378Slling 		error = zfs_register_callbacks(vfsp);
1101b510d378Slling 		goto out;
1102b510d378Slling 	}
1103b510d378Slling 
1104ea8dc4b6Seschrock 	error = zfs_domount(vfsp, osname, cr);
1105ea8dc4b6Seschrock 
1106ea8dc4b6Seschrock out:
1107fa9e4066Sahrens 	pn_free(&spn);
1108fa9e4066Sahrens 	return (error);
1109fa9e4066Sahrens }
1110fa9e4066Sahrens 
1111fa9e4066Sahrens static int
1112fa9e4066Sahrens zfs_statvfs(vfs_t *vfsp, struct statvfs64 *statp)
1113fa9e4066Sahrens {
1114fa9e4066Sahrens 	zfsvfs_t *zfsvfs = vfsp->vfs_data;
1115fa9e4066Sahrens 	dev32_t d32;
1116a2eea2e1Sahrens 	uint64_t refdbytes, availbytes, usedobjs, availobjs;
1117fa9e4066Sahrens 
1118fa9e4066Sahrens 	ZFS_ENTER(zfsvfs);
1119fa9e4066Sahrens 
1120a2eea2e1Sahrens 	dmu_objset_space(zfsvfs->z_os,
1121a2eea2e1Sahrens 	    &refdbytes, &availbytes, &usedobjs, &availobjs);
1122fa9e4066Sahrens 
1123fa9e4066Sahrens 	/*
1124fa9e4066Sahrens 	 * The underlying storage pool actually uses multiple block sizes.
1125fa9e4066Sahrens 	 * We report the fragsize as the smallest block size we support,
1126fa9e4066Sahrens 	 * and we report our blocksize as the filesystem's maximum blocksize.
1127fa9e4066Sahrens 	 */
1128fa9e4066Sahrens 	statp->f_frsize = 1UL << SPA_MINBLOCKSHIFT;
1129fa9e4066Sahrens 	statp->f_bsize = zfsvfs->z_max_blksz;
1130fa9e4066Sahrens 
1131fa9e4066Sahrens 	/*
1132fa9e4066Sahrens 	 * The following report "total" blocks of various kinds in the
1133fa9e4066Sahrens 	 * file system, but reported in terms of f_frsize - the
1134fa9e4066Sahrens 	 * "fragment" size.
1135fa9e4066Sahrens 	 */
1136fa9e4066Sahrens 
1137a2eea2e1Sahrens 	statp->f_blocks = (refdbytes + availbytes) >> SPA_MINBLOCKSHIFT;
1138a2eea2e1Sahrens 	statp->f_bfree = availbytes >> SPA_MINBLOCKSHIFT;
1139fa9e4066Sahrens 	statp->f_bavail = statp->f_bfree; /* no root reservation */
1140fa9e4066Sahrens 
1141fa9e4066Sahrens 	/*
1142fa9e4066Sahrens 	 * statvfs() should really be called statufs(), because it assumes
1143fa9e4066Sahrens 	 * static metadata.  ZFS doesn't preallocate files, so the best
1144fa9e4066Sahrens 	 * we can do is report the max that could possibly fit in f_files,
1145fa9e4066Sahrens 	 * and that minus the number actually used in f_ffree.
1146fa9e4066Sahrens 	 * For f_ffree, report the smaller of the number of object available
1147fa9e4066Sahrens 	 * and the number of blocks (each object will take at least a block).
1148fa9e4066Sahrens 	 */
1149a2eea2e1Sahrens 	statp->f_ffree = MIN(availobjs, statp->f_bfree);
1150fa9e4066Sahrens 	statp->f_favail = statp->f_ffree;	/* no "root reservation" */
1151a2eea2e1Sahrens 	statp->f_files = statp->f_ffree + usedobjs;
1152fa9e4066Sahrens 
1153fa9e4066Sahrens 	(void) cmpldev(&d32, vfsp->vfs_dev);
1154fa9e4066Sahrens 	statp->f_fsid = d32;
1155fa9e4066Sahrens 
1156fa9e4066Sahrens 	/*
1157fa9e4066Sahrens 	 * We're a zfs filesystem.
1158fa9e4066Sahrens 	 */
1159fa9e4066Sahrens 	(void) strcpy(statp->f_basetype, vfssw[vfsp->vfs_fstype].vsw_name);
1160fa9e4066Sahrens 
1161a5be7ebbSmarks 	statp->f_flag = vf_to_stf(vfsp->vfs_flag);
1162fa9e4066Sahrens 
1163fa9e4066Sahrens 	statp->f_namemax = ZFS_MAXNAMELEN;
1164fa9e4066Sahrens 
1165fa9e4066Sahrens 	/*
1166fa9e4066Sahrens 	 * We have all of 32 characters to stuff a string here.
1167fa9e4066Sahrens 	 * Is there anything useful we could/should provide?
1168fa9e4066Sahrens 	 */
1169fa9e4066Sahrens 	bzero(statp->f_fstr, sizeof (statp->f_fstr));
1170fa9e4066Sahrens 
1171fa9e4066Sahrens 	ZFS_EXIT(zfsvfs);
1172fa9e4066Sahrens 	return (0);
1173fa9e4066Sahrens }
1174fa9e4066Sahrens 
1175fa9e4066Sahrens static int
1176fa9e4066Sahrens zfs_root(vfs_t *vfsp, vnode_t **vpp)
1177fa9e4066Sahrens {
1178fa9e4066Sahrens 	zfsvfs_t *zfsvfs = vfsp->vfs_data;
1179fa9e4066Sahrens 	znode_t *rootzp;
1180fa9e4066Sahrens 	int error;
1181fa9e4066Sahrens 
1182fa9e4066Sahrens 	ZFS_ENTER(zfsvfs);
1183fa9e4066Sahrens 
1184fa9e4066Sahrens 	error = zfs_zget(zfsvfs, zfsvfs->z_root, &rootzp);
1185fa9e4066Sahrens 	if (error == 0)
1186fa9e4066Sahrens 		*vpp = ZTOV(rootzp);
1187fa9e4066Sahrens 
1188fa9e4066Sahrens 	ZFS_EXIT(zfsvfs);
1189fa9e4066Sahrens 	return (error);
1190fa9e4066Sahrens }
1191fa9e4066Sahrens 
1192f18faf3fSek /*
1193f18faf3fSek  * Teardown the zfsvfs::z_os.
1194f18faf3fSek  *
1195f18faf3fSek  * Note, if 'unmounting' if FALSE, we return with the 'z_teardown_lock'
1196f18faf3fSek  * and 'z_teardown_inactive_lock' held.
1197f18faf3fSek  */
1198f18faf3fSek static int
1199f18faf3fSek zfsvfs_teardown(zfsvfs_t *zfsvfs, boolean_t unmounting)
1200f18faf3fSek {
1201f18faf3fSek 	objset_t *os = zfsvfs->z_os;
1202f18faf3fSek 	znode_t	*zp, *nextzp;
1203f18faf3fSek 	znode_t markerzp;
1204f18faf3fSek 
1205f18faf3fSek 	rrw_enter(&zfsvfs->z_teardown_lock, RW_WRITER, FTAG);
1206f18faf3fSek 
1207f18faf3fSek 	if (!unmounting) {
1208f18faf3fSek 		/*
1209f18faf3fSek 		 * We purge the parent filesystem's vfsp as the parent
1210f18faf3fSek 		 * filesystem and all of its snapshots have their vnode's
1211f18faf3fSek 		 * v_vfsp set to the parent's filesystem's vfsp.  Note,
1212f18faf3fSek 		 * 'z_parent' is self referential for non-snapshots.
1213f18faf3fSek 		 */
1214f18faf3fSek 		(void) dnlc_purge_vfsp(zfsvfs->z_parent->z_vfs, 0);
1215f18faf3fSek 	}
1216f18faf3fSek 
1217f18faf3fSek 	/*
1218f18faf3fSek 	 * Close the zil. NB: Can't close the zil while zfs_inactive
1219f18faf3fSek 	 * threads are blocked as zil_close can call zfs_inactive.
1220f18faf3fSek 	 */
1221f18faf3fSek 	if (zfsvfs->z_log) {
1222f18faf3fSek 		zil_close(zfsvfs->z_log);
1223f18faf3fSek 		zfsvfs->z_log = NULL;
1224f18faf3fSek 	}
1225f18faf3fSek 
1226f18faf3fSek 	rw_enter(&zfsvfs->z_teardown_inactive_lock, RW_WRITER);
1227f18faf3fSek 
1228f18faf3fSek 	/*
1229f18faf3fSek 	 * If we are not unmounting (ie: online recv) and someone already
1230f18faf3fSek 	 * unmounted this file system while we were doing the switcheroo,
1231f18faf3fSek 	 * or a reopen of z_os failed then just bail out now.
1232f18faf3fSek 	 */
1233f18faf3fSek 	if (!unmounting && (zfsvfs->z_unmounted || zfsvfs->z_os == NULL)) {
1234f18faf3fSek 		rw_exit(&zfsvfs->z_teardown_inactive_lock);
1235f18faf3fSek 		rrw_exit(&zfsvfs->z_teardown_lock, FTAG);
1236f18faf3fSek 		return (EIO);
1237f18faf3fSek 	}
1238f18faf3fSek 
1239f18faf3fSek 	/*
1240f18faf3fSek 	 * At this point there are no vops active, and any new vops will
1241f18faf3fSek 	 * fail with EIO since we have z_teardown_lock for writer (only
1242f18faf3fSek 	 * relavent for forced unmount).
1243f18faf3fSek 	 *
1244f18faf3fSek 	 * Release all holds on dbufs.
1245f18faf3fSek 	 * Note, the dmu can still callback via znode_pageout_func()
1246f18faf3fSek 	 * which can zfs_znode_free() the znode.  So we lock
1247f18faf3fSek 	 * z_all_znodes; search the list for a held dbuf; drop the lock
1248f18faf3fSek 	 * (we know zp can't disappear if we hold a dbuf lock) then
1249f18faf3fSek 	 * regrab the lock and restart.
1250f18faf3fSek 	 *
1251f18faf3fSek 	 * Since we have to restart the search after finding each held dbuf,
1252f18faf3fSek 	 * we do two things to speed up searching: we insert a dummy znode
1253f18faf3fSek 	 * ('markerzp') to detect the original tail of the list, and move
1254f18faf3fSek 	 * non-held znodes to the end of the list.  Once we hit 'markerzp',
1255f18faf3fSek 	 * we know we've looked at each znode and can break out.
1256f18faf3fSek 	 */
1257f18faf3fSek 	mutex_enter(&zfsvfs->z_znodes_lock);
1258f18faf3fSek 	list_insert_tail(&zfsvfs->z_all_znodes, &markerzp);
1259f18faf3fSek 	for (zp = list_head(&zfsvfs->z_all_znodes); zp != &markerzp;
1260f18faf3fSek 	    zp = nextzp) {
1261f18faf3fSek 		nextzp = list_next(&zfsvfs->z_all_znodes, zp);
1262f18faf3fSek 		if (zp->z_dbuf_held) {
1263f18faf3fSek 			/* dbufs should only be held when force unmounting */
1264f18faf3fSek 			zp->z_dbuf_held = 0;
1265f18faf3fSek 			mutex_exit(&zfsvfs->z_znodes_lock);
1266f18faf3fSek 			dmu_buf_rele(zp->z_dbuf, NULL);
1267f18faf3fSek 			/* Start again */
1268f18faf3fSek 			mutex_enter(&zfsvfs->z_znodes_lock);
1269f18faf3fSek 			nextzp = list_head(&zfsvfs->z_all_znodes);
1270f18faf3fSek 		} else {
1271f18faf3fSek 			list_remove(&zfsvfs->z_all_znodes, zp);
1272f18faf3fSek 			list_insert_tail(&zfsvfs->z_all_znodes, zp);
1273f18faf3fSek 		}
1274f18faf3fSek 	}
1275f18faf3fSek 	list_remove(&zfsvfs->z_all_znodes, &markerzp);
1276f18faf3fSek 	mutex_exit(&zfsvfs->z_znodes_lock);
1277f18faf3fSek 
1278f18faf3fSek 	/*
1279f18faf3fSek 	 * If we are unmounting, set the unmounted flag and let new vops
1280f18faf3fSek 	 * unblock.  zfs_inactive will have the unmounted behavior, and all
1281f18faf3fSek 	 * other vops will fail with EIO.
1282f18faf3fSek 	 */
1283f18faf3fSek 	if (unmounting) {
1284f18faf3fSek 		zfsvfs->z_unmounted = B_TRUE;
1285f18faf3fSek 		rrw_exit(&zfsvfs->z_teardown_lock, FTAG);
1286f18faf3fSek 		rw_exit(&zfsvfs->z_teardown_inactive_lock);
1287f18faf3fSek 	}
1288f18faf3fSek 
1289f18faf3fSek 	/*
1290f18faf3fSek 	 * z_os will be NULL if there was an error in attempting to reopen
1291f18faf3fSek 	 * zfsvfs, so just return as the properties had already been
1292f18faf3fSek 	 * unregistered and cached data had been evicted before.
1293f18faf3fSek 	 */
1294f18faf3fSek 	if (zfsvfs->z_os == NULL)
1295f18faf3fSek 		return (0);
1296f18faf3fSek 
1297f18faf3fSek 	/*
1298f18faf3fSek 	 * Unregister properties.
1299f18faf3fSek 	 */
1300f18faf3fSek 	zfs_unregister_callbacks(zfsvfs);
1301f18faf3fSek 
1302f18faf3fSek 	/*
1303f18faf3fSek 	 * Evict cached data
1304f18faf3fSek 	 */
1305f18faf3fSek 	(void) dmu_objset_evict_dbufs(os);
1306f18faf3fSek 
1307f18faf3fSek 	return (0);
1308f18faf3fSek }
1309f18faf3fSek 
1310fa9e4066Sahrens /*ARGSUSED*/
1311fa9e4066Sahrens static int
1312fa9e4066Sahrens zfs_umount(vfs_t *vfsp, int fflag, cred_t *cr)
1313fa9e4066Sahrens {
1314fa9e4066Sahrens 	zfsvfs_t *zfsvfs = vfsp->vfs_data;
1315f18faf3fSek 	objset_t *os;
1316fa9e4066Sahrens 	int ret;
1317fa9e4066Sahrens 
1318ecd6cf80Smarks 	ret = secpolicy_fs_unmount(cr, vfsp);
1319ecd6cf80Smarks 	if (ret) {
1320ecd6cf80Smarks 		ret = dsl_deleg_access((char *)refstr_value(vfsp->vfs_resource),
1321ecd6cf80Smarks 		    ZFS_DELEG_PERM_MOUNT, cr);
1322ecd6cf80Smarks 		if (ret)
1323ecd6cf80Smarks 			return (ret);
1324ecd6cf80Smarks 	}
1325033f9833Sek 
1326ed097989Sek 	/*
1327ed097989Sek 	 * We purge the parent filesystem's vfsp as the parent filesystem
1328ed097989Sek 	 * and all of its snapshots have their vnode's v_vfsp set to the
1329ed097989Sek 	 * parent's filesystem's vfsp.  Note, 'z_parent' is self
1330ed097989Sek 	 * referential for non-snapshots.
1331ed097989Sek 	 */
1332ed097989Sek 	(void) dnlc_purge_vfsp(zfsvfs->z_parent->z_vfs, 0);
1333033f9833Sek 
1334fa9e4066Sahrens 	/*
1335fa9e4066Sahrens 	 * Unmount any snapshots mounted under .zfs before unmounting the
1336fa9e4066Sahrens 	 * dataset itself.
1337fa9e4066Sahrens 	 */
1338fa9e4066Sahrens 	if (zfsvfs->z_ctldir != NULL &&
1339ecd6cf80Smarks 	    (ret = zfsctl_umount_snapshots(vfsp, fflag, cr)) != 0) {
1340fa9e4066Sahrens 		return (ret);
1341ecd6cf80Smarks 	}
1342fa9e4066Sahrens 
134391ebeef5Sahrens 	if (!(fflag & MS_FORCE)) {
1344fa9e4066Sahrens 		/*
134591ebeef5Sahrens 		 * Check the number of active vnodes in the file system.
134691ebeef5Sahrens 		 * Our count is maintained in the vfs structure, but the
134791ebeef5Sahrens 		 * number is off by 1 to indicate a hold on the vfs
134891ebeef5Sahrens 		 * structure itself.
134991ebeef5Sahrens 		 *
135091ebeef5Sahrens 		 * The '.zfs' directory maintains a reference of its
135191ebeef5Sahrens 		 * own, and any active references underneath are
135291ebeef5Sahrens 		 * reflected in the vnode count.
1353fa9e4066Sahrens 		 */
135491ebeef5Sahrens 		if (zfsvfs->z_ctldir == NULL) {
135591ebeef5Sahrens 			if (vfsp->vfs_count > 1)
135691ebeef5Sahrens 				return (EBUSY);
135791ebeef5Sahrens 		} else {
135891ebeef5Sahrens 			if (vfsp->vfs_count > 2 ||
1359f18faf3fSek 			    zfsvfs->z_ctldir->v_count > 1)
136091ebeef5Sahrens 				return (EBUSY);
1361fa9e4066Sahrens 		}
136291ebeef5Sahrens 	}
1363fa9e4066Sahrens 
136491ebeef5Sahrens 	vfsp->vfs_flag |= VFS_UNMOUNTED;
136591ebeef5Sahrens 
1366f18faf3fSek 	VERIFY(zfsvfs_teardown(zfsvfs, B_TRUE) == 0);
1367f18faf3fSek 	os = zfsvfs->z_os;
136891ebeef5Sahrens 
136991ebeef5Sahrens 	/*
1370f18faf3fSek 	 * z_os will be NULL if there was an error in
1371f18faf3fSek 	 * attempting to reopen zfsvfs.
137291ebeef5Sahrens 	 */
1373f18faf3fSek 	if (os != NULL) {
1374f18faf3fSek 		/*
1375f18faf3fSek 		 * Unset the objset user_ptr.
1376f18faf3fSek 		 */
1377f18faf3fSek 		mutex_enter(&os->os->os_user_ptr_lock);
1378f18faf3fSek 		dmu_objset_set_user(os, NULL);
1379f18faf3fSek 		mutex_exit(&os->os->os_user_ptr_lock);
138091ebeef5Sahrens 
1381f18faf3fSek 		/*
1382f18faf3fSek 		 * Finally close the objset
1383f18faf3fSek 		 */
1384f18faf3fSek 		dmu_objset_close(os);
138591ebeef5Sahrens 	}
138691ebeef5Sahrens 
138791ebeef5Sahrens 	/*
138891ebeef5Sahrens 	 * We can now safely destroy the '.zfs' directory node.
138991ebeef5Sahrens 	 */
139091ebeef5Sahrens 	if (zfsvfs->z_ctldir != NULL)
139191ebeef5Sahrens 		zfsctl_destroy(zfsvfs);
1392fa9e4066Sahrens 
1393fa9e4066Sahrens 	return (0);
1394fa9e4066Sahrens }
1395fa9e4066Sahrens 
1396fa9e4066Sahrens static int
1397fa9e4066Sahrens zfs_vget(vfs_t *vfsp, vnode_t **vpp, fid_t *fidp)
1398fa9e4066Sahrens {
1399fa9e4066Sahrens 	zfsvfs_t	*zfsvfs = vfsp->vfs_data;
1400fa9e4066Sahrens 	znode_t		*zp;
1401fa9e4066Sahrens 	uint64_t	object = 0;
1402fa9e4066Sahrens 	uint64_t	fid_gen = 0;
1403fa9e4066Sahrens 	uint64_t	gen_mask;
1404fa9e4066Sahrens 	uint64_t	zp_gen;
1405fa9e4066Sahrens 	int 		i, err;
1406fa9e4066Sahrens 
1407fa9e4066Sahrens 	*vpp = NULL;
1408fa9e4066Sahrens 
1409fa9e4066Sahrens 	ZFS_ENTER(zfsvfs);
1410fa9e4066Sahrens 
1411fa9e4066Sahrens 	if (fidp->fid_len == LONG_FID_LEN) {
1412fa9e4066Sahrens 		zfid_long_t	*zlfid = (zfid_long_t *)fidp;
1413fa9e4066Sahrens 		uint64_t	objsetid = 0;
1414fa9e4066Sahrens 		uint64_t	setgen = 0;
1415fa9e4066Sahrens 
1416fa9e4066Sahrens 		for (i = 0; i < sizeof (zlfid->zf_setid); i++)
1417fa9e4066Sahrens 			objsetid |= ((uint64_t)zlfid->zf_setid[i]) << (8 * i);
1418fa9e4066Sahrens 
1419fa9e4066Sahrens 		for (i = 0; i < sizeof (zlfid->zf_setgen); i++)
1420fa9e4066Sahrens 			setgen |= ((uint64_t)zlfid->zf_setgen[i]) << (8 * i);
1421fa9e4066Sahrens 
1422fa9e4066Sahrens 		ZFS_EXIT(zfsvfs);
1423fa9e4066Sahrens 
1424fa9e4066Sahrens 		err = zfsctl_lookup_objset(vfsp, objsetid, &zfsvfs);
1425fa9e4066Sahrens 		if (err)
1426fa9e4066Sahrens 			return (EINVAL);
1427fa9e4066Sahrens 		ZFS_ENTER(zfsvfs);
1428fa9e4066Sahrens 	}
1429fa9e4066Sahrens 
1430fa9e4066Sahrens 	if (fidp->fid_len == SHORT_FID_LEN || fidp->fid_len == LONG_FID_LEN) {
1431fa9e4066Sahrens 		zfid_short_t	*zfid = (zfid_short_t *)fidp;
1432fa9e4066Sahrens 
1433fa9e4066Sahrens 		for (i = 0; i < sizeof (zfid->zf_object); i++)
1434fa9e4066Sahrens 			object |= ((uint64_t)zfid->zf_object[i]) << (8 * i);
1435fa9e4066Sahrens 
1436fa9e4066Sahrens 		for (i = 0; i < sizeof (zfid->zf_gen); i++)
1437fa9e4066Sahrens 			fid_gen |= ((uint64_t)zfid->zf_gen[i]) << (8 * i);
1438fa9e4066Sahrens 	} else {
1439fa9e4066Sahrens 		ZFS_EXIT(zfsvfs);
1440fa9e4066Sahrens 		return (EINVAL);
1441fa9e4066Sahrens 	}
1442fa9e4066Sahrens 
1443fa9e4066Sahrens 	/* A zero fid_gen means we are in the .zfs control directories */
1444fa9e4066Sahrens 	if (fid_gen == 0 &&
1445fa9e4066Sahrens 	    (object == ZFSCTL_INO_ROOT || object == ZFSCTL_INO_SNAPDIR)) {
1446fa9e4066Sahrens 		*vpp = zfsvfs->z_ctldir;
1447fa9e4066Sahrens 		ASSERT(*vpp != NULL);
1448fa9e4066Sahrens 		if (object == ZFSCTL_INO_SNAPDIR) {
1449fa9e4066Sahrens 			VERIFY(zfsctl_root_lookup(*vpp, "snapshot", vpp, NULL,
1450*da6c28aaSamw 			    0, NULL, NULL, NULL, NULL, NULL) == 0);
1451fa9e4066Sahrens 		} else {
1452fa9e4066Sahrens 			VN_HOLD(*vpp);
1453fa9e4066Sahrens 		}
1454fa9e4066Sahrens 		ZFS_EXIT(zfsvfs);
1455fa9e4066Sahrens 		return (0);
1456fa9e4066Sahrens 	}
1457fa9e4066Sahrens 
1458fa9e4066Sahrens 	gen_mask = -1ULL >> (64 - 8 * i);
1459fa9e4066Sahrens 
1460fa9e4066Sahrens 	dprintf("getting %llu [%u mask %llx]\n", object, fid_gen, gen_mask);
1461fa9e4066Sahrens 	if (err = zfs_zget(zfsvfs, object, &zp)) {
1462fa9e4066Sahrens 		ZFS_EXIT(zfsvfs);
1463fa9e4066Sahrens 		return (err);
1464fa9e4066Sahrens 	}
1465fa9e4066Sahrens 	zp_gen = zp->z_phys->zp_gen & gen_mask;
1466fa9e4066Sahrens 	if (zp_gen == 0)
1467fa9e4066Sahrens 		zp_gen = 1;
1468893a6d32Sahrens 	if (zp->z_unlinked || zp_gen != fid_gen) {
1469fa9e4066Sahrens 		dprintf("znode gen (%u) != fid gen (%u)\n", zp_gen, fid_gen);
1470fa9e4066Sahrens 		VN_RELE(ZTOV(zp));
1471fa9e4066Sahrens 		ZFS_EXIT(zfsvfs);
1472fa9e4066Sahrens 		return (EINVAL);
1473fa9e4066Sahrens 	}
1474fa9e4066Sahrens 
1475fa9e4066Sahrens 	*vpp = ZTOV(zp);
1476fa9e4066Sahrens 	ZFS_EXIT(zfsvfs);
1477fa9e4066Sahrens 	return (0);
1478fa9e4066Sahrens }
1479fa9e4066Sahrens 
1480f18faf3fSek /*
1481f18faf3fSek  * Block out VOPs and close zfsvfs_t::z_os
1482f18faf3fSek  *
1483f18faf3fSek  * Note, if successful, then we return with the 'z_teardown_lock' and
1484f18faf3fSek  * 'z_teardown_inactive_lock' write held.
1485f18faf3fSek  */
1486f18faf3fSek int
1487f18faf3fSek zfs_suspend_fs(zfsvfs_t *zfsvfs, char *name, int *mode)
1488f18faf3fSek {
1489f18faf3fSek 	int error;
1490f18faf3fSek 
1491f18faf3fSek 	if ((error = zfsvfs_teardown(zfsvfs, B_FALSE)) != 0)
1492f18faf3fSek 		return (error);
1493f18faf3fSek 
1494f18faf3fSek 	*mode = zfsvfs->z_os->os_mode;
1495f18faf3fSek 	dmu_objset_name(zfsvfs->z_os, name);
1496f18faf3fSek 	dmu_objset_close(zfsvfs->z_os);
1497f18faf3fSek 
1498f18faf3fSek 	return (0);
1499f18faf3fSek }
1500f18faf3fSek 
1501f18faf3fSek /*
1502f18faf3fSek  * Reopen zfsvfs_t::z_os and release VOPs.
1503f18faf3fSek  */
1504f18faf3fSek int
1505f18faf3fSek zfs_resume_fs(zfsvfs_t *zfsvfs, const char *osname, int mode)
1506f18faf3fSek {
1507f18faf3fSek 	int err;
1508f18faf3fSek 
1509f18faf3fSek 	ASSERT(RRW_WRITE_HELD(&zfsvfs->z_teardown_lock));
1510f18faf3fSek 	ASSERT(RW_WRITE_HELD(&zfsvfs->z_teardown_inactive_lock));
1511f18faf3fSek 
1512f18faf3fSek 	err = dmu_objset_open(osname, DMU_OST_ZFS, mode, &zfsvfs->z_os);
1513f18faf3fSek 	if (err) {
1514f18faf3fSek 		zfsvfs->z_os = NULL;
1515f18faf3fSek 	} else {
1516f18faf3fSek 		znode_t *zp;
1517f18faf3fSek 
1518f18faf3fSek 		VERIFY(zfsvfs_setup(zfsvfs, B_FALSE) == 0);
1519f18faf3fSek 
1520f18faf3fSek 		/*
1521f18faf3fSek 		 * Attempt to re-establish all the active znodes with
1522f18faf3fSek 		 * their dbufs.  If a zfs_rezget() fails, then we'll let
1523f18faf3fSek 		 * any potential callers discover that via ZFS_ENTER_VERIFY_VP
1524f18faf3fSek 		 * when they try to use their znode.
1525f18faf3fSek 		 */
1526f18faf3fSek 		mutex_enter(&zfsvfs->z_znodes_lock);
1527f18faf3fSek 		for (zp = list_head(&zfsvfs->z_all_znodes); zp;
1528f18faf3fSek 		    zp = list_next(&zfsvfs->z_all_znodes, zp)) {
1529f18faf3fSek 			ASSERT(!zp->z_dbuf_held);
1530f18faf3fSek 			(void) zfs_rezget(zp);
1531f18faf3fSek 		}
1532f18faf3fSek 		mutex_exit(&zfsvfs->z_znodes_lock);
1533f18faf3fSek 
1534f18faf3fSek 	}
1535f18faf3fSek 
1536f18faf3fSek 	/* release the VOPs */
1537f18faf3fSek 	rw_exit(&zfsvfs->z_teardown_inactive_lock);
1538f18faf3fSek 	rrw_exit(&zfsvfs->z_teardown_lock, FTAG);
1539f18faf3fSek 
1540f18faf3fSek 	if (err) {
1541f18faf3fSek 		/*
1542f18faf3fSek 		 * Since we couldn't reopen zfsvfs::z_os, force
1543f18faf3fSek 		 * unmount this file system.
1544f18faf3fSek 		 */
1545f18faf3fSek 		if (vn_vfswlock(zfsvfs->z_vfs->vfs_vnodecovered) == 0)
1546f18faf3fSek 			(void) dounmount(zfsvfs->z_vfs, MS_FORCE, CRED());
1547f18faf3fSek 	}
1548f18faf3fSek 	return (err);
1549f18faf3fSek }
1550f18faf3fSek 
1551fa9e4066Sahrens static void
1552fa9e4066Sahrens zfs_freevfs(vfs_t *vfsp)
1553fa9e4066Sahrens {
1554fa9e4066Sahrens 	zfsvfs_t *zfsvfs = vfsp->vfs_data;
1555c25056deSgw 	int i;
1556c25056deSgw 
1557c25056deSgw 	for (i = 0; i != ZFS_OBJ_MTX_SZ; i++)
1558c25056deSgw 		mutex_destroy(&zfsvfs->z_hold_mtx[i]);
1559fa9e4066Sahrens 
156091ebeef5Sahrens 	mutex_destroy(&zfsvfs->z_znodes_lock);
1561c25056deSgw 	list_destroy(&zfsvfs->z_all_znodes);
1562f18faf3fSek 	rrw_destroy(&zfsvfs->z_teardown_lock);
1563f18faf3fSek 	rw_destroy(&zfsvfs->z_teardown_inactive_lock);
1564*da6c28aaSamw 	zfs_fuid_destroy(zfsvfs);
1565fa9e4066Sahrens 	kmem_free(zfsvfs, sizeof (zfsvfs_t));
1566fa9e4066Sahrens 
1567fa9e4066Sahrens 	atomic_add_32(&zfs_active_fs_count, -1);
1568fa9e4066Sahrens }
1569fa9e4066Sahrens 
1570fa9e4066Sahrens /*
1571fa9e4066Sahrens  * VFS_INIT() initialization.  Note that there is no VFS_FINI(),
1572fa9e4066Sahrens  * so we can't safely do any non-idempotent initialization here.
1573fa9e4066Sahrens  * Leave that to zfs_init() and zfs_fini(), which are called
1574fa9e4066Sahrens  * from the module's _init() and _fini() entry points.
1575fa9e4066Sahrens  */
1576fa9e4066Sahrens /*ARGSUSED*/
1577fa9e4066Sahrens static int
1578fa9e4066Sahrens zfs_vfsinit(int fstype, char *name)
1579fa9e4066Sahrens {
1580fa9e4066Sahrens 	int error;
1581fa9e4066Sahrens 
1582fa9e4066Sahrens 	zfsfstype = fstype;
1583fa9e4066Sahrens 
1584fa9e4066Sahrens 	/*
1585fa9e4066Sahrens 	 * Setup vfsops and vnodeops tables.
1586fa9e4066Sahrens 	 */
1587fa9e4066Sahrens 	error = vfs_setfsops(fstype, zfs_vfsops_template, &zfs_vfsops);
1588fa9e4066Sahrens 	if (error != 0) {
1589fa9e4066Sahrens 		cmn_err(CE_WARN, "zfs: bad vfs ops template");
1590fa9e4066Sahrens 	}
1591fa9e4066Sahrens 
1592fa9e4066Sahrens 	error = zfs_create_op_tables();
1593fa9e4066Sahrens 	if (error) {
1594fa9e4066Sahrens 		zfs_remove_op_tables();
1595fa9e4066Sahrens 		cmn_err(CE_WARN, "zfs: bad vnode ops template");
1596fa9e4066Sahrens 		(void) vfs_freevfsops_by_type(zfsfstype);
1597fa9e4066Sahrens 		return (error);
1598fa9e4066Sahrens 	}
1599fa9e4066Sahrens 
1600fa9e4066Sahrens 	mutex_init(&zfs_dev_mtx, NULL, MUTEX_DEFAULT, NULL);
1601fa9e4066Sahrens 
1602fa9e4066Sahrens 	/*
1603a0965f35Sbonwick 	 * Unique major number for all zfs mounts.
1604a0965f35Sbonwick 	 * If we run out of 32-bit minors, we'll getudev() another major.
1605fa9e4066Sahrens 	 */
1606a0965f35Sbonwick 	zfs_major = ddi_name_to_major(ZFS_DRIVER);
1607a0965f35Sbonwick 	zfs_minor = ZFS_MIN_MINOR;
1608fa9e4066Sahrens 
1609fa9e4066Sahrens 	return (0);
1610fa9e4066Sahrens }
1611fa9e4066Sahrens 
1612fa9e4066Sahrens void
1613fa9e4066Sahrens zfs_init(void)
1614fa9e4066Sahrens {
1615fa9e4066Sahrens 	/*
1616fa9e4066Sahrens 	 * Initialize .zfs directory structures
1617fa9e4066Sahrens 	 */
1618fa9e4066Sahrens 	zfsctl_init();
1619fa9e4066Sahrens 
1620fa9e4066Sahrens 	/*
1621fa9e4066Sahrens 	 * Initialize znode cache, vnode ops, etc...
1622fa9e4066Sahrens 	 */
1623fa9e4066Sahrens 	zfs_znode_init();
1624fa9e4066Sahrens }
1625fa9e4066Sahrens 
1626fa9e4066Sahrens void
1627fa9e4066Sahrens zfs_fini(void)
1628fa9e4066Sahrens {
1629fa9e4066Sahrens 	zfsctl_fini();
1630fa9e4066Sahrens 	zfs_znode_fini();
1631fa9e4066Sahrens }
1632fa9e4066Sahrens 
1633fa9e4066Sahrens int
1634fa9e4066Sahrens zfs_busy(void)
1635fa9e4066Sahrens {
1636fa9e4066Sahrens 	return (zfs_active_fs_count != 0);
1637fa9e4066Sahrens }
1638fa9e4066Sahrens 
1639e7437265Sahrens int
1640bd00f61bSrm zfs_get_version(objset_t *os, uint64_t *version)
1641e7437265Sahrens {
1642e7437265Sahrens 	int error;
1643e7437265Sahrens 
1644bd00f61bSrm 	error = zap_lookup(os, MASTER_NODE_OBJ, ZPL_VERSION_STR, 8, 1, version);
1645e7437265Sahrens 	return (error);
1646e7437265Sahrens }
1647e7437265Sahrens 
1648e7437265Sahrens int
1649e7437265Sahrens zfs_set_version(const char *name, uint64_t newvers)
1650e7437265Sahrens {
1651e7437265Sahrens 	int error;
1652e7437265Sahrens 	objset_t *os;
1653e7437265Sahrens 	dmu_tx_t *tx;
1654e7437265Sahrens 	uint64_t curvers;
1655e7437265Sahrens 
1656e7437265Sahrens 	/*
1657e7437265Sahrens 	 * XXX for now, require that the filesystem be unmounted.  Would
1658e7437265Sahrens 	 * be nice to find the zfsvfs_t and just update that if
1659e7437265Sahrens 	 * possible.
1660e7437265Sahrens 	 */
1661e7437265Sahrens 
1662e7437265Sahrens 	if (newvers < ZPL_VERSION_INITIAL || newvers > ZPL_VERSION)
1663e7437265Sahrens 		return (EINVAL);
1664e7437265Sahrens 
1665e7437265Sahrens 	error = dmu_objset_open(name, DMU_OST_ZFS, DS_MODE_PRIMARY, &os);
1666e7437265Sahrens 	if (error)
1667e7437265Sahrens 		return (error);
1668e7437265Sahrens 
1669e7437265Sahrens 	error = zap_lookup(os, MASTER_NODE_OBJ, ZPL_VERSION_STR,
1670e7437265Sahrens 	    8, 1, &curvers);
1671e7437265Sahrens 	if (error)
1672e7437265Sahrens 		goto out;
1673e7437265Sahrens 	if (newvers < curvers) {
1674e7437265Sahrens 		error = EINVAL;
1675e7437265Sahrens 		goto out;
1676e7437265Sahrens 	}
1677e7437265Sahrens 
1678e7437265Sahrens 	tx = dmu_tx_create(os);
1679e7437265Sahrens 	dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, 0, ZPL_VERSION_STR);
1680e7437265Sahrens 	error = dmu_tx_assign(tx, TXG_WAIT);
1681e7437265Sahrens 	if (error) {
1682e7437265Sahrens 		dmu_tx_abort(tx);
1683e7437265Sahrens 		goto out;
1684e7437265Sahrens 	}
1685e7437265Sahrens 	error = zap_update(os, MASTER_NODE_OBJ, ZPL_VERSION_STR, 8, 1,
1686e7437265Sahrens 	    &newvers, tx);
1687e7437265Sahrens 
1688e7437265Sahrens 	spa_history_internal_log(LOG_DS_UPGRADE,
1689e7437265Sahrens 	    dmu_objset_spa(os), tx, CRED(),
1690e7437265Sahrens 	    "oldver=%llu newver=%llu dataset = %llu", curvers, newvers,
1691e7437265Sahrens 	    dmu_objset_id(os));
1692e7437265Sahrens 	dmu_tx_commit(tx);
1693e7437265Sahrens 
1694e7437265Sahrens out:
1695e7437265Sahrens 	dmu_objset_close(os);
1696e7437265Sahrens 	return (error);
1697e7437265Sahrens }
1698e7437265Sahrens 
1699fa9e4066Sahrens static vfsdef_t vfw = {
1700fa9e4066Sahrens 	VFSDEF_VERSION,
1701fa9e4066Sahrens 	MNTTYPE_ZFS,
1702fa9e4066Sahrens 	zfs_vfsinit,
1703*da6c28aaSamw 	VSW_HASPROTO|VSW_CANRWRO|VSW_CANREMOUNT|VSW_VOLATILEDEV|VSW_STATS|
1704*da6c28aaSamw 	    VSW_XID,
1705fa9e4066Sahrens 	&zfs_mntopts
1706fa9e4066Sahrens };
1707fa9e4066Sahrens 
1708fa9e4066Sahrens struct modlfs zfs_modlfs = {
1709e7437265Sahrens 	&mod_fsops, "ZFS filesystem version " SPA_VERSION_STRING, &vfw
1710fa9e4066Sahrens };
1711