1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright (c) 1988, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright 2016 Joyent, Inc.
25 * Copyright 2016 Toomas Soome <tsoome@me.com>
26 * Copyright (c) 2016 by Delphix. All rights reserved.
27 * Copyright 2017 RackTop Systems.
28 * Copyright 2018 Nexenta Systems, Inc.
29 */
30
31/*	Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T	*/
32/*	  All Rights Reserved	*/
33
34/*
35 * University Copyright- Copyright (c) 1982, 1986, 1988
36 * The Regents of the University of California
37 * All Rights Reserved
38 *
39 * University Acknowledgment- Portions of this document are derived from
40 * software developed by the University of California, Berkeley, and its
41 * contributors.
42 */
43
44/*
45 * This file contains those functions from fs/vfs.c that can be
46 * used with relatively little change.  Functions that differ
47 * significantly from that are in other files.
48 */
49
50#include <sys/types.h>
51#include <sys/t_lock.h>
52#include <sys/param.h>
53#include <sys/errno.h>
54#include <sys/user.h>
55#include <sys/fstyp.h>
56#include <sys/kmem.h>
57#include <sys/systm.h>
58#include <sys/proc.h>
59#include <sys/mount.h>
60#include <sys/vfs.h>
61#include <sys/vfs_opreg.h>
62#include <sys/fem.h>
63#include <sys/mntent.h>
64#include <sys/stat.h>
65#include <sys/statvfs.h>
66#include <sys/statfs.h>
67#include <sys/cred.h>
68#include <sys/vnode.h>
69#include <sys/rwstlock.h>
70#include <sys/dnlc.h>
71#include <sys/file.h>
72#include <sys/time.h>
73#include <sys/atomic.h>
74#include <sys/cmn_err.h>
75#include <sys/buf.h>
76#include <sys/debug.h>
77#include <sys/vnode.h>
78#include <sys/ddi.h>
79#include <sys/pathname.h>
80#include <sys/poll.h>
81#include <sys/sunddi.h>
82#include <sys/sysmacros.h>
83#include <sys/zone.h>
84#include <sys/policy.h>
85#include <sys/attr.h>
86#include <fs/fs_subr.h>
87
88#include <libfksmbfs.h>
89
90static void vfs_clearmntopt_nolock(mntopts_t *, const char *, int);
91static void vfs_setmntopt_nolock(mntopts_t *, const char *,
92    const char *, int, int);
93static int  vfs_optionisset_nolock(const mntopts_t *, const char *, char **);
94// static void vfs_freemnttab(struct vfs *);
95static void vfs_freeopt(mntopt_t *);
96static void vfs_swapopttbl_nolock(mntopts_t *, mntopts_t *);
97static void vfs_swapopttbl(mntopts_t *, mntopts_t *);
98static void vfs_copyopttbl_extend(const mntopts_t *, mntopts_t *, int);
99// static void vfs_createopttbl_extend(mntopts_t *, const char *,
100//    const mntopts_t *);
101// static char **vfs_copycancelopt_extend(char **const, int);
102static void vfs_freecancelopt(char **);
103
104/*
105 * VFS global data.
106 */
107vnode_t *rootdir;		/* pointer to root inode vnode. */
108struct vfs *rootvfs = NULL;	/* pointer to root vfs; head of VFS list. */
109static krwlock_t vfslist;
110struct vfs	*zone_vfslist;	/* list of FS's mounted in zone */
111
112/* from os/vfs_conf.c */
113const int nfstype = 5;
114struct vfssw vfssw[10] = {
115	{ "BADVFS" },				/* 0:invalid */
116	{ "" },					/* reserved for loadable fs */
117	{ "" },
118	{ "" },
119	{ "" },
120};
121
122/*
123 * Table for generic options recognized in the VFS layer and acted
124 * on at this level before parsing file system specific options.
125 * The nosuid option is stronger than any of the devices and setuid
126 * options, so those are canceled when nosuid is seen.
127 *
128 * All options which are added here need to be added to the
129 * list of standard options in usr/src/cmd/fs.d/fslib.c as well.
130 */
131/*
132 * VFS Mount options table
133 */
134static char *ro_cancel[] = { MNTOPT_RW, NULL };
135static char *rw_cancel[] = { MNTOPT_RO, NULL };
136static char *suid_cancel[] = { MNTOPT_NOSUID, NULL };
137static char *nosuid_cancel[] = { MNTOPT_SUID, MNTOPT_DEVICES, MNTOPT_NODEVICES,
138    MNTOPT_NOSETUID, MNTOPT_SETUID, NULL };
139static char *devices_cancel[] = { MNTOPT_NODEVICES, NULL };
140static char *nodevices_cancel[] = { MNTOPT_DEVICES, NULL };
141static char *setuid_cancel[] = { MNTOPT_NOSETUID, NULL };
142static char *nosetuid_cancel[] = { MNTOPT_SETUID, NULL };
143static char *nbmand_cancel[] = { MNTOPT_NONBMAND, NULL };
144static char *nonbmand_cancel[] = { MNTOPT_NBMAND, NULL };
145static char *exec_cancel[] = { MNTOPT_NOEXEC, NULL };
146static char *noexec_cancel[] = { MNTOPT_EXEC, NULL };
147
148static const mntopt_t mntopts[] = {
149/*
150 *	option name		cancel options		default arg	flags
151 */
152	{ MNTOPT_REMOUNT,	NULL,			NULL,
153		MO_NODISPLAY, (void *)0 },
154	{ MNTOPT_RO,		ro_cancel,		NULL,		0,
155		(void *)0 },
156	{ MNTOPT_RW,		rw_cancel,		NULL,		0,
157		(void *)0 },
158	{ MNTOPT_SUID,		suid_cancel,		NULL,		0,
159		(void *)0 },
160	{ MNTOPT_NOSUID,	nosuid_cancel,		NULL,		0,
161		(void *)0 },
162	{ MNTOPT_DEVICES,	devices_cancel,		NULL,		0,
163		(void *)0 },
164	{ MNTOPT_NODEVICES,	nodevices_cancel,	NULL,		0,
165		(void *)0 },
166	{ MNTOPT_SETUID,	setuid_cancel,		NULL,		0,
167		(void *)0 },
168	{ MNTOPT_NOSETUID,	nosetuid_cancel,	NULL,		0,
169		(void *)0 },
170	{ MNTOPT_NBMAND,	nbmand_cancel,		NULL,		0,
171		(void *)0 },
172	{ MNTOPT_NONBMAND,	nonbmand_cancel,	NULL,		0,
173		(void *)0 },
174	{ MNTOPT_EXEC,		exec_cancel,		NULL,		0,
175		(void *)0 },
176	{ MNTOPT_NOEXEC,	noexec_cancel,		NULL,		0,
177		(void *)0 },
178};
179
180const mntopts_t vfs_mntopts = {
181	sizeof (mntopts) / sizeof (mntopt_t),
182	(mntopt_t *)&mntopts[0]
183};
184
185/*
186 * File system operation dispatch functions.
187 */
188
189int
190fsop_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr)
191{
192	return (*(vfsp)->vfs_op->vfs_mount)(vfsp, mvp, uap, cr);
193}
194
195int
196fsop_unmount(vfs_t *vfsp, int flag, cred_t *cr)
197{
198	return (*(vfsp)->vfs_op->vfs_unmount)(vfsp, flag, cr);
199}
200
201int
202fsop_root(vfs_t *vfsp, vnode_t **vpp)
203{
204	return ((*(vfsp)->vfs_op->vfs_root)(vfsp, vpp));
205}
206
207int
208fsop_statfs(vfs_t *vfsp, statvfs64_t *sp)
209{
210	return (*(vfsp)->vfs_op->vfs_statvfs)(vfsp, sp);
211}
212
213int
214fsop_sync(vfs_t *vfsp, short flag, cred_t *cr)
215{
216	return (*(vfsp)->vfs_op->vfs_sync)(vfsp, flag, cr);
217}
218
219int
220fsop_vget(vfs_t *vfsp, vnode_t **vpp, fid_t *fidp)
221{
222	return (*(vfsp)->vfs_op->vfs_vget)(vfsp, vpp, fidp);
223}
224
225int
226fsop_mountroot(vfs_t *vfsp, enum whymountroot reason)
227{
228	return (*(vfsp)->vfs_op->vfs_mountroot)(vfsp, reason);
229}
230
231void
232fsop_freefs(vfs_t *vfsp)
233{
234	(*(vfsp)->vfs_op->vfs_freevfs)(vfsp);
235}
236
237int
238fsop_vnstate(vfs_t *vfsp, vnode_t *vp, vntrans_t nstate)
239{
240	return ((*(vfsp)->vfs_op->vfs_vnstate)(vfsp, vp, nstate));
241}
242
243int
244fsop_sync_by_kind(int fstype, short flag, cred_t *cr)
245{
246	ASSERT((fstype >= 0) && (fstype < nfstype));
247
248	if (ALLOCATED_VFSSW(&vfssw[fstype]) && VFS_INSTALLED(&vfssw[fstype]))
249		return (*vfssw[fstype].vsw_vfsops.vfs_sync) (NULL, flag, cr);
250	else
251		return (ENOTSUP);
252}
253
254/*
255 * File system initialization.  vfs_setfsops() must be called from a file
256 * system's init routine.
257 */
258
259static int
260fs_copyfsops(const fs_operation_def_t *template, vfsops_t *actual,
261    int *unused_ops)
262{
263	static const fs_operation_trans_def_t vfs_ops_table[] = {
264		VFSNAME_MOUNT, offsetof(vfsops_t, vfs_mount),
265			fs_nosys, fs_nosys,
266
267		VFSNAME_UNMOUNT, offsetof(vfsops_t, vfs_unmount),
268			fs_nosys, fs_nosys,
269
270		VFSNAME_ROOT, offsetof(vfsops_t, vfs_root),
271			fs_nosys, fs_nosys,
272
273		VFSNAME_STATVFS, offsetof(vfsops_t, vfs_statvfs),
274			fs_nosys, fs_nosys,
275
276		VFSNAME_SYNC, offsetof(vfsops_t, vfs_sync),
277			(fs_generic_func_p) fs_sync,
278			(fs_generic_func_p) fs_sync,	/* No errors allowed */
279
280		VFSNAME_VGET, offsetof(vfsops_t, vfs_vget),
281			fs_nosys, fs_nosys,
282
283		VFSNAME_MOUNTROOT, offsetof(vfsops_t, vfs_mountroot),
284			fs_nosys, fs_nosys,
285
286		VFSNAME_FREEVFS, offsetof(vfsops_t, vfs_freevfs),
287			(fs_generic_func_p)(uintptr_t)fs_freevfs,
288			(fs_generic_func_p)(uintptr_t)
289			fs_freevfs,	/* Shouldn't fail */
290
291		VFSNAME_VNSTATE, offsetof(vfsops_t, vfs_vnstate),
292			(fs_generic_func_p)fs_nosys,
293			(fs_generic_func_p)fs_nosys,
294
295		NULL, 0, NULL, NULL
296	};
297
298	return (fs_build_vector(actual, unused_ops, vfs_ops_table, template));
299}
300
301/* zfs_boot_init() */
302
303int
304vfs_setfsops(int fstype, const fs_operation_def_t *template, vfsops_t **actual)
305{
306	int error;
307	int unused_ops;
308
309	/*
310	 * Verify that fstype refers to a valid fs.  Note that
311	 * 0 is valid since it's used to set "stray" ops.
312	 */
313	if ((fstype < 0) || (fstype >= nfstype))
314		return (EINVAL);
315
316	if (!ALLOCATED_VFSSW(&vfssw[fstype]))
317		return (EINVAL);
318
319	/* Set up the operations vector. */
320
321	error = fs_copyfsops(template, &vfssw[fstype].vsw_vfsops, &unused_ops);
322
323	if (error != 0)
324		return (error);
325
326	vfssw[fstype].vsw_flag |= VSW_INSTALLED;
327
328	if (actual != NULL)
329		*actual = &vfssw[fstype].vsw_vfsops;
330
331#if DEBUG
332	if (unused_ops != 0)
333		cmn_err(CE_WARN, "vfs_setfsops: %s: %d operations supplied "
334		    "but not used", vfssw[fstype].vsw_name, unused_ops);
335#endif
336
337	return (0);
338}
339
340int
341vfs_makefsops(const fs_operation_def_t *template, vfsops_t **actual)
342{
343	int error;
344	int unused_ops;
345
346	*actual = (vfsops_t *)kmem_alloc(sizeof (vfsops_t), KM_SLEEP);
347
348	error = fs_copyfsops(template, *actual, &unused_ops);
349	if (error != 0) {
350		kmem_free(*actual, sizeof (vfsops_t));
351		*actual = NULL;
352		return (error);
353	}
354
355	return (0);
356}
357
358/*
359 * Free a vfsops structure created as a result of vfs_makefsops().
360 * NOTE: For a vfsops structure initialized by vfs_setfsops(), use
361 * vfs_freevfsops_by_type().
362 */
363void
364vfs_freevfsops(vfsops_t *vfsops)
365{
366	kmem_free(vfsops, sizeof (vfsops_t));
367}
368
369/*
370 * Since the vfsops structure is part of the vfssw table and wasn't
371 * really allocated, we're not really freeing anything.  We keep
372 * the name for consistency with vfs_freevfsops().  We do, however,
373 * need to take care of a little bookkeeping.
374 * NOTE: For a vfsops structure created by vfs_setfsops(), use
375 * vfs_freevfsops_by_type().
376 */
377int
378vfs_freevfsops_by_type(int fstype)
379{
380
381	/* Verify that fstype refers to a loaded fs (and not fsid 0). */
382	if ((fstype <= 0) || (fstype >= nfstype))
383		return (EINVAL);
384
385	WLOCK_VFSSW();
386	if ((vfssw[fstype].vsw_flag & VSW_INSTALLED) == 0) {
387		WUNLOCK_VFSSW();
388		return (EINVAL);
389	}
390
391	vfssw[fstype].vsw_flag &= ~VSW_INSTALLED;
392	WUNLOCK_VFSSW();
393
394	return (0);
395}
396
397/* Support routines used to reference vfs_op */
398
399/* Set the operations vector for a vfs */
400void
401vfs_setops(vfs_t *vfsp, vfsops_t *vfsops)
402{
403
404	ASSERT(vfsp != NULL);
405	ASSERT(vfsops != NULL);
406
407	vfsp->vfs_op = vfsops;
408}
409
410/* Retrieve the operations vector for a vfs */
411vfsops_t *
412vfs_getops(vfs_t *vfsp)
413{
414
415	ASSERT(vfsp != NULL);
416
417	return (vfsp->vfs_op);
418}
419
420/*
421 * Returns non-zero (1) if the vfsops matches that of the vfs.
422 * Returns zero (0) if not.
423 */
424int
425vfs_matchops(vfs_t *vfsp, vfsops_t *vfsops)
426{
427	return (vfs_getops(vfsp) == vfsops);
428}
429
430/*
431 * Returns non-zero (1) if the file system has installed a non-default,
432 * non-error vfs_sync routine.  Returns zero (0) otherwise.
433 */
434int
435vfs_can_sync(vfs_t *vfsp)
436{
437	/* vfs_sync() routine is not the default/error function */
438	return (vfs_getops(vfsp)->vfs_sync != fs_sync);
439}
440
441/*
442 * Initialize a vfs structure.
443 */
444void
445vfs_init(vfs_t *vfsp, vfsops_t *op, void *data)
446{
447	/* Always do full init, like vfs_alloc() */
448	bzero(vfsp, sizeof (vfs_t));
449	vfsp->vfs_count = 0;
450	vfsp->vfs_next = vfsp;
451	vfsp->vfs_prev = vfsp;
452	vfsp->vfs_zone_next = vfsp;
453	vfsp->vfs_zone_prev = vfsp;
454	vfsp->vfs_lofi_id = 0;
455	sema_init(&vfsp->vfs_reflock, 1, NULL, SEMA_DEFAULT, NULL);
456	vfsimpl_setup(vfsp);
457	vfsp->vfs_data = (data);
458	vfs_setops((vfsp), (op));
459}
460
461/*
462 * Allocate and initialize the vfs implementation private data
463 * structure, vfs_impl_t.
464 */
465void
466vfsimpl_setup(vfs_t *vfsp)
467{
468	int i;
469
470	if (vfsp->vfs_implp != NULL) {
471		return;
472	}
473
474	vfsp->vfs_implp = kmem_alloc(sizeof (vfs_impl_t), KM_SLEEP);
475	/* Note that these are #define'd in vfs.h */
476	vfsp->vfs_vskap = NULL;
477	vfsp->vfs_fstypevsp = NULL;
478
479	/* Set size of counted array, then zero the array */
480	vfsp->vfs_featureset[0] = VFS_FEATURE_MAXSZ - 1;
481	for (i = 1; i <  VFS_FEATURE_MAXSZ; i++) {
482		vfsp->vfs_featureset[i] = 0;
483	}
484}
485
486/*
487 * Release the vfs_impl_t structure, if it exists. Some unbundled
488 * filesystems may not use the newer version of vfs and thus
489 * would not contain this implementation private data structure.
490 */
491void
492vfsimpl_teardown(vfs_t *vfsp)
493{
494	vfs_impl_t	*vip = vfsp->vfs_implp;
495
496	if (vip == NULL)
497		return;
498
499	kmem_free(vfsp->vfs_implp, sizeof (vfs_impl_t));
500	vfsp->vfs_implp = NULL;
501}
502
503/*
504 * VFS system calls: mount, umount, syssync, statfs, fstatfs, statvfs,
505 * fstatvfs, and sysfs moved to common/syscall.
506 */
507
508// vfs_sync, sync
509
510/*
511 * External routines.
512 */
513
514krwlock_t vfssw_lock;	/* lock accesses to vfssw */
515
516/*
517 * Lock for accessing the vfs linked list.  Initialized in vfs_mountroot(),
518 * but otherwise should be accessed only via vfs_list_lock() and
519 * vfs_list_unlock().  Also used to protect the timestamp for mods to the list.
520 */
521static krwlock_t vfslist;
522
523// vfs_mountdevices(void)
524// vfs_mountdev1(void)
525// vfs_mountfs()
526// vfs_mountroot()
527// lofi_add, lofi_remove
528
529
530/*
531 * Mount the FS for the test jig.  Based on domount()
532 */
533int
534fake_domount(char *fsname, struct mounta *uap, struct vfs **vfspp)
535{
536	vnode_t		*vp;
537	struct cred	*credp;
538	struct vfssw	*vswp;
539	vfsops_t	*vfsops;
540	struct vfs	*vfsp = NULL;
541	mntopts_t	mnt_mntopts;
542	int		error = 0;
543	int		copyout_error = 0;
544	char		*opts = uap->optptr;
545	char		*inargs = opts;
546	int		optlen = uap->optlen;
547
548	credp = CRED();
549
550	/*
551	 * Test jig specific: mount on rootdir
552	 */
553	if (rootvfs != NULL)
554		return (EBUSY);
555	vp = rootdir;
556
557	/*
558	 * The v_flag value for the mount point vp is permanently set
559	 * to VVFSLOCK so that no one bypasses the vn_vfs*locks routine
560	 * for mount point locking.
561	 */
562	mutex_enter(&vp->v_lock);
563	vp->v_flag |= VVFSLOCK;
564	mutex_exit(&vp->v_lock);
565
566	mnt_mntopts.mo_count = 0;
567
568	/*
569	 * Find the ops vector to use to invoke the file system-specific mount
570	 * method.  If the fsname argument is non-NULL, use it directly.
571	 */
572	if ((vswp = vfs_getvfssw(fsname)) == NULL) {
573		return (EINVAL);
574	}
575	if (!VFS_INSTALLED(vswp))
576		return (EINVAL);
577
578	// secpolicy_fs_allowed_mount(fsname)
579
580	vfsops = &vswp->vsw_vfsops;
581
582	vfs_copyopttbl(&vswp->vsw_optproto, &mnt_mntopts);
583
584	/*
585	 * Fetch mount options and parse them for generic vfs options
586	 */
587	if (uap->flags & MS_OPTIONSTR) {
588		/*
589		 * Limit the buffer size
590		 */
591		if (optlen < 0 || optlen > MAX_MNTOPT_STR) {
592			error = EINVAL;
593			goto errout;
594		}
595		if ((uap->flags & MS_SYSSPACE) == 0) {
596			inargs = kmem_alloc(MAX_MNTOPT_STR, KM_SLEEP);
597			inargs[0] = '\0';
598			if (optlen) {
599				error = copyinstr(opts, inargs, (size_t)optlen,
600				    NULL);
601				if (error) {
602					goto errout;
603				}
604			}
605		}
606		vfs_parsemntopts(&mnt_mntopts, inargs, 0);
607	}
608	/*
609	 * Flag bits override the options string.
610	 */
611	if (uap->flags & MS_REMOUNT)
612		vfs_setmntopt_nolock(&mnt_mntopts, MNTOPT_REMOUNT, NULL, 0, 0);
613	if (uap->flags & MS_RDONLY)
614		vfs_setmntopt_nolock(&mnt_mntopts, MNTOPT_RO, NULL, 0, 0);
615	if (uap->flags & MS_NOSUID)
616		vfs_setmntopt_nolock(&mnt_mntopts, MNTOPT_NOSUID, NULL, 0, 0);
617
618	/*
619	 * Check if this is a remount; must be set in the option string and
620	 * the file system must support a remount option.
621	 */
622	if (vfs_optionisset_nolock(&mnt_mntopts,
623	    MNTOPT_REMOUNT, NULL)) {
624		/* disallow here */
625		error = ENOTSUP;
626		goto errout;
627	}
628
629	/*
630	 * uap->flags and vfs_optionisset() should agree.
631	 */
632	if (vfs_optionisset_nolock(&mnt_mntopts, MNTOPT_RO, NULL)) {
633		uap->flags |= MS_RDONLY;
634	}
635	if (vfs_optionisset_nolock(&mnt_mntopts, MNTOPT_NOSUID, NULL)) {
636		uap->flags |= MS_NOSUID;
637	}
638	// nbmand ...
639
640	/*
641	 * If we are splicing the fs into the namespace,
642	 * perform mount point checks...
643	 * (always splice=0 here)
644	 */
645
646	if ((uap->flags & (MS_DATA | MS_OPTIONSTR)) == 0) {
647		uap->dataptr = NULL;
648		uap->datalen = 0;
649	}
650
651	/*
652	 * If this is a remount, ... (never here)
653	 */
654	vfsp = vfs_alloc(KM_SLEEP);
655	VFS_INIT(vfsp, vfsops, NULL);
656
657	VFS_HOLD(vfsp);
658
659	// lofi_add(fsname, vfsp, &mnt_mntopts, uap)
660
661	/*
662	 * PRIV_SYS_MOUNT doesn't mean you can become root.
663	 */
664	uap->flags |= MS_NOSUID;
665	vfs_setmntopt_nolock(&mnt_mntopts, MNTOPT_NOSUID, NULL, 0, 0);
666
667	/*
668	 * The vfs_reflock...
669	 */
670
671	/*
672	 * Lock the vfs...
673	 */
674	if ((error = vfs_lock(vfsp)) != 0) {
675		vfs_free(vfsp);
676		vfsp = NULL;
677		goto errout;
678	}
679
680	/*
681	 * Add device to mount in progress table...
682	 */
683	/*
684	 * Invalidate cached entry for the mount point.
685	 */
686
687	/*
688	 * If have an option string but the filesystem doesn't supply a
689	 * prototype options table, create a table...
690	 */
691
692	/*
693	 * Serialize with zone state transitions...
694	 */
695
696	// mount_in_progress(zone);
697
698	/*
699	 * Instantiate (or reinstantiate) the file system...
700	 */
701	vfs_swapopttbl(&mnt_mntopts, &vfsp->vfs_mntopts);
702
703	vfs_setresource(vfsp, uap->spec, 0);
704	vfs_setmntpoint(vfsp, uap->dir, 0);
705
706	/*
707	 * going to mount on this vnode, so notify.
708	 */
709	// vnevent_mountedover(vp, NULL);
710	error = VFS_MOUNT(vfsp, vp, uap, credp);
711
712	if (uap->flags & MS_RDONLY)
713		vfs_setmntopt(vfsp, MNTOPT_RO, NULL, 0);
714	if (uap->flags & MS_NOSUID)
715		vfs_setmntopt(vfsp, MNTOPT_NOSUID, NULL, 0);
716	if (uap->flags & MS_GLOBAL)
717		vfs_setmntopt(vfsp, MNTOPT_GLOBAL, NULL, 0);
718
719	if (error) {
720		// lofi_remove(vfsp);
721
722		// (remount == 0)
723		vfs_unlock(vfsp);
724		// vfs_freemnttab(vfsp);
725		vfs_free(vfsp);
726		vfsp = NULL;
727	} else {
728		/*
729		 * Set the mount time to now
730		 */
731		// vfsp->vfs_mtime = ddi_get_time();
732		// if (remount) ...
733		// else if (splice) vfs_add(vp, vfsp, flags)
734		// else VFS_HOLD(vfsp);
735
736		/*
737		 * Test jig specific:
738		 * Do sort of like vfs_add for vp=rootdir
739		 * Already have hold on vp.
740		 */
741		vfsp->vfs_vnodecovered = vp;
742		vfsp->vfs_flag |= (VFS_NOSETUID|VFS_NODEVICES);
743		VFS_HOLD(vfsp);
744		rootvfs = vfsp;
745
746		/*
747		 * Set flags for global options encountered
748		 */
749		if (vfs_optionisset(vfsp, MNTOPT_RO, NULL))
750			vfsp->vfs_flag |= VFS_RDONLY;
751		else
752			vfsp->vfs_flag &= ~VFS_RDONLY;
753		if (vfs_optionisset(vfsp, MNTOPT_NOSUID, NULL)) {
754			vfsp->vfs_flag |= (VFS_NOSETUID|VFS_NODEVICES);
755		} else {
756			if (vfs_optionisset(vfsp, MNTOPT_NODEVICES, NULL))
757				vfsp->vfs_flag |= VFS_NODEVICES;
758			else
759				vfsp->vfs_flag &= ~VFS_NODEVICES;
760			if (vfs_optionisset(vfsp, MNTOPT_NOSETUID, NULL))
761				vfsp->vfs_flag |= VFS_NOSETUID;
762			else
763				vfsp->vfs_flag &= ~VFS_NOSETUID;
764		}
765		if (vfs_optionisset(vfsp, MNTOPT_NBMAND, NULL))
766			vfsp->vfs_flag |= VFS_NBMAND;
767		else
768			vfsp->vfs_flag &= ~VFS_NBMAND;
769
770		if (vfs_optionisset(vfsp, MNTOPT_XATTR, NULL))
771			vfsp->vfs_flag |= VFS_XATTR;
772		else
773			vfsp->vfs_flag &= ~VFS_XATTR;
774
775		if (vfs_optionisset(vfsp, MNTOPT_NOEXEC, NULL))
776			vfsp->vfs_flag |= VFS_NOEXEC;
777		else
778			vfsp->vfs_flag &= ~VFS_NOEXEC;
779
780		/*
781		 * Now construct the output option string of options
782		 * we recognized.
783		 */
784		if (uap->flags & MS_OPTIONSTR) {
785			vfs_list_read_lock();
786			copyout_error = vfs_buildoptionstr(
787			    &vfsp->vfs_mntopts, inargs, optlen);
788			vfs_list_unlock();
789			if (copyout_error == 0 &&
790			    (uap->flags & MS_SYSSPACE) == 0) {
791				copyout_error = copyout(inargs, opts, optlen);
792			}
793		}
794
795		/*
796		 * If this isn't a remount, set up the vopstats...
797		 */
798		if (vswp->vsw_flag & VSW_XID)
799			vfsp->vfs_flag |= VFS_XID;
800
801		vfs_unlock(vfsp);
802
803		/*
804		 * Test jig specicific:
805		 * Replace rootdir with the mounted root.
806		 */
807		error = VFS_ROOT(vfsp, &rootdir);
808		if (error != 0) {
809			panic("fake_domount, get root %d\n", error);
810		}
811	}
812	// mount_completed(zone);
813	// zone_rele(zone);
814
815	// if (splice)
816	//	vn_vfsunlock(vp);
817
818	if ((error == 0) && (copyout_error == 0)) {
819		/* get_vskstat_anchor() */
820		/* Return vfsp to caller. */
821		*vfspp = vfsp;
822	}
823errout:
824	vfs_freeopttbl(&mnt_mntopts);
825	/* resource, mountpt not allocated */
826	/* no addmip, delmip */
827	ASSERT(vswp != NULL);
828	vfs_unrefvfssw(vswp);
829	if (inargs != opts)
830		kmem_free(inargs, MAX_MNTOPT_STR);
831	if (copyout_error) {
832		if (vfsp != NULL) {
833			// lofi_remove(vfsp);
834			VFS_RELE(vfsp);
835		}
836		error = copyout_error;
837	}
838	return (error);
839}
840
841
842static void
843vfs_setpath(
844    struct vfs *vfsp,		/* vfs being updated */
845    refstr_t **refp,		/* Ref-count string to contain the new path */
846    const char *newpath,	/* Path to add to refp (above) */
847    uint32_t flag)		/* flag */
848{
849	// size_t len;
850	refstr_t *ref;
851	// char *sp;
852	int have_list_lock = 0;
853
854	ASSERT(!VFS_ON_LIST(vfsp) || vfs_lock_held(vfsp));
855
856	/*
857	 * New path must be less than MAXPATHLEN because mntfs
858	 * will only display up to MAXPATHLEN bytes. This is currently
859	 * safe, because domount() uses pn_get(), and other callers
860	 * similarly cap the size to fewer than MAXPATHLEN bytes.
861	 */
862
863	ASSERT(strlen(newpath) < MAXPATHLEN);
864
865	/* mntfs requires consistency while vfs list lock is held */
866
867	if (VFS_ON_LIST(vfsp)) {
868		have_list_lock = 1;
869		vfs_list_lock();
870	}
871
872	if (*refp != NULL)
873		refstr_rele(*refp);
874
875	/*
876	 * If we are in a non-global zone... (do something else)
877	 */
878	ref = refstr_alloc(newpath);
879	*refp = ref;
880
881	if (have_list_lock) {
882		vfs_mnttab_modtimeupd();
883		vfs_list_unlock();
884	}
885}
886
887/*
888 * Record a mounted resource name in a vfs structure.
889 * If vfsp is already mounted, caller must hold the vfs lock.
890 */
891void
892vfs_setresource(struct vfs *vfsp, const char *resource, uint32_t flag)
893{
894	if (resource == NULL || resource[0] == '\0')
895		resource = VFS_NORESOURCE;
896	vfs_setpath(vfsp, &vfsp->vfs_resource, resource, flag);
897}
898
899/*
900 * Record a mount point name in a vfs structure.
901 * If vfsp is already mounted, caller must hold the vfs lock.
902 */
903void
904vfs_setmntpoint(struct vfs *vfsp, const char *mntpt, uint32_t flag)
905{
906	if (mntpt == NULL || mntpt[0] == '\0')
907		mntpt = VFS_NOMNTPT;
908	vfs_setpath(vfsp, &vfsp->vfs_mntpt, mntpt, flag);
909}
910
911/* Returns the vfs_resource. Caller must call refstr_rele() when finished. */
912
913refstr_t *
914vfs_getresource(const struct vfs *vfsp)
915{
916	refstr_t *resource;
917
918	vfs_list_read_lock();
919	resource = vfsp->vfs_resource;
920	refstr_hold(resource);
921	vfs_list_unlock();
922
923	return (resource);
924}
925
926/* Returns the vfs_mntpt. Caller must call refstr_rele() when finished. */
927
928refstr_t *
929vfs_getmntpoint(const struct vfs *vfsp)
930{
931	refstr_t *mntpt;
932
933	vfs_list_read_lock();
934	mntpt = vfsp->vfs_mntpt;
935	refstr_hold(mntpt);
936	vfs_list_unlock();
937
938	return (mntpt);
939}
940
941// vfs_createopttbl_extend
942// vfs_createopttbl
943
944/*
945 * Swap two mount options tables
946 */
947static void
948vfs_swapopttbl_nolock(mntopts_t *optbl1, mntopts_t *optbl2)
949{
950	uint_t tmpcnt;
951	mntopt_t *tmplist;
952
953	tmpcnt = optbl2->mo_count;
954	tmplist = optbl2->mo_list;
955	optbl2->mo_count = optbl1->mo_count;
956	optbl2->mo_list = optbl1->mo_list;
957	optbl1->mo_count = tmpcnt;
958	optbl1->mo_list = tmplist;
959}
960
961static void
962vfs_swapopttbl(mntopts_t *optbl1, mntopts_t *optbl2)
963{
964	vfs_list_lock();
965	vfs_swapopttbl_nolock(optbl1, optbl2);
966	vfs_mnttab_modtimeupd();
967	vfs_list_unlock();
968}
969
970static char **
971vfs_copycancelopt_extend(char **const moc, int extend)
972{
973	int i = 0;
974	int j;
975	char **result;
976
977	if (moc != NULL) {
978		for (; moc[i] != NULL; i++)
979			/* count number of options to cancel */;
980	}
981
982	if (i + extend == 0)
983		return (NULL);
984
985	result = kmem_alloc((i + extend + 1) * sizeof (char *), KM_SLEEP);
986
987	for (j = 0; j < i; j++) {
988		result[j] = kmem_alloc(strlen(moc[j]) + 1, KM_SLEEP);
989		(void) strcpy(result[j], moc[j]);
990	}
991	for (; j <= i + extend; j++)
992		result[j] = NULL;
993
994	return (result);
995}
996
997static void
998vfs_copyopt(const mntopt_t *s, mntopt_t *d)
999{
1000	char *sp, *dp;
1001
1002	d->mo_flags = s->mo_flags;
1003	d->mo_data = s->mo_data;
1004	sp = s->mo_name;
1005	if (sp != NULL) {
1006		dp = kmem_alloc(strlen(sp) + 1, KM_SLEEP);
1007		(void) strcpy(dp, sp);
1008		d->mo_name = dp;
1009	} else {
1010		d->mo_name = NULL; /* should never happen */
1011	}
1012
1013	d->mo_cancel = vfs_copycancelopt_extend(s->mo_cancel, 0);
1014
1015	sp = s->mo_arg;
1016	if (sp != NULL) {
1017		dp = kmem_alloc(strlen(sp) + 1, KM_SLEEP);
1018		(void) strcpy(dp, sp);
1019		d->mo_arg = dp;
1020	} else {
1021		d->mo_arg = NULL;
1022	}
1023}
1024
1025// vfs_copyopttbl_extend
1026// vfs_copyopttbl
1027
1028/*
1029 * Copy a mount options table, possibly allocating some spare
1030 * slots at the end.  It is permissible to copy_extend the NULL table.
1031 */
1032static void
1033vfs_copyopttbl_extend(const mntopts_t *smo, mntopts_t *dmo, int extra)
1034{
1035	uint_t i, count;
1036	mntopt_t *motbl;
1037
1038	/*
1039	 * Clear out any existing stuff in the options table being initialized
1040	 */
1041	vfs_freeopttbl(dmo);
1042	count = (smo == NULL) ? 0 : smo->mo_count;
1043	if ((count + extra) == 0)	/* nothing to do */
1044		return;
1045	dmo->mo_count = count + extra;
1046	motbl = kmem_zalloc((count + extra) * sizeof (mntopt_t), KM_SLEEP);
1047	dmo->mo_list = motbl;
1048	for (i = 0; i < count; i++) {
1049		vfs_copyopt(&smo->mo_list[i], &motbl[i]);
1050	}
1051	for (i = count; i < count + extra; i++) {
1052		motbl[i].mo_flags = MO_EMPTY;
1053	}
1054}
1055
1056/*
1057 * Copy a mount options table.
1058 *
1059 * This function is *not* for general use by filesystems.
1060 *
1061 * Note: caller is responsible for locking the vfs list, if needed,
1062 *       to protect smo and dmo.
1063 */
1064void
1065vfs_copyopttbl(const mntopts_t *smo, mntopts_t *dmo)
1066{
1067	vfs_copyopttbl_extend(smo, dmo, 0);
1068}
1069
1070static char **
1071vfs_mergecancelopts(const mntopt_t *mop1, const mntopt_t *mop2)
1072{
1073	int c1 = 0;
1074	int c2 = 0;
1075	char **result;
1076	char **sp1, **sp2, **dp;
1077
1078	/*
1079	 * First we count both lists of cancel options.
1080	 * If either is NULL or has no elements, we return a copy of
1081	 * the other.
1082	 */
1083	if (mop1->mo_cancel != NULL) {
1084		for (; mop1->mo_cancel[c1] != NULL; c1++)
1085			/* count cancel options in mop1 */;
1086	}
1087
1088	if (c1 == 0)
1089		return (vfs_copycancelopt_extend(mop2->mo_cancel, 0));
1090
1091	if (mop2->mo_cancel != NULL) {
1092		for (; mop2->mo_cancel[c2] != NULL; c2++)
1093			/* count cancel options in mop2 */;
1094	}
1095
1096	result = vfs_copycancelopt_extend(mop1->mo_cancel, c2);
1097
1098	if (c2 == 0)
1099		return (result);
1100
1101	/*
1102	 * When we get here, we've got two sets of cancel options;
1103	 * we need to merge the two sets.  We know that the result
1104	 * array has "c1+c2+1" entries and in the end we might shrink
1105	 * it.
1106	 * Result now has a copy of the c1 entries from mop1; we'll
1107	 * now lookup all the entries of mop2 in mop1 and copy it if
1108	 * it is unique.
1109	 * This operation is O(n^2) but it's only called once per
1110	 * filesystem per duplicate option.  This is a situation
1111	 * which doesn't arise with the filesystems in ON and
1112	 * n is generally 1.
1113	 */
1114
1115	dp = &result[c1];
1116	for (sp2 = mop2->mo_cancel; *sp2 != NULL; sp2++) {
1117		for (sp1 = mop1->mo_cancel; *sp1 != NULL; sp1++) {
1118			if (strcmp(*sp1, *sp2) == 0)
1119				break;
1120		}
1121		if (*sp1 == NULL) {
1122			/*
1123			 * Option *sp2 not found in mop1, so copy it.
1124			 * The calls to vfs_copycancelopt_extend()
1125			 * guarantee that there's enough room.
1126			 */
1127			*dp = kmem_alloc(strlen(*sp2) + 1, KM_SLEEP);
1128			(void) strcpy(*dp++, *sp2);
1129		}
1130	}
1131	if (dp != &result[c1+c2]) {
1132		size_t bytes = (dp - result + 1) * sizeof (char *);
1133		char **nres = kmem_alloc(bytes, KM_SLEEP);
1134
1135		bcopy(result, nres, bytes);
1136		kmem_free(result, (c1 + c2 + 1) * sizeof (char *));
1137		result = nres;
1138	}
1139	return (result);
1140}
1141
1142/*
1143 * Merge two mount option tables (outer and inner) into one.  This is very
1144 * similar to "merging" global variables and automatic variables in C.
1145 *
1146 * This isn't (and doesn't have to be) fast.
1147 *
1148 * This function is *not* for general use by filesystems.
1149 *
1150 * Note: caller is responsible for locking the vfs list, if needed,
1151 *       to protect omo, imo & dmo.
1152 */
1153void
1154vfs_mergeopttbl(const mntopts_t *omo, const mntopts_t *imo, mntopts_t *dmo)
1155{
1156	uint_t i, count;
1157	mntopt_t *mop, *motbl;
1158	uint_t freeidx;
1159
1160	/*
1161	 * First determine how much space we need to allocate.
1162	 */
1163	count = omo->mo_count;
1164	for (i = 0; i < imo->mo_count; i++) {
1165		if (imo->mo_list[i].mo_flags & MO_EMPTY)
1166			continue;
1167		if (vfs_hasopt(omo, imo->mo_list[i].mo_name) == NULL)
1168			count++;
1169	}
1170	ASSERT(count >= omo->mo_count &&
1171	    count <= omo->mo_count + imo->mo_count);
1172	motbl = kmem_alloc(count * sizeof (mntopt_t), KM_SLEEP);
1173	for (i = 0; i < omo->mo_count; i++)
1174		vfs_copyopt(&omo->mo_list[i], &motbl[i]);
1175	freeidx = omo->mo_count;
1176	for (i = 0; i < imo->mo_count; i++) {
1177		if (imo->mo_list[i].mo_flags & MO_EMPTY)
1178			continue;
1179		if ((mop = vfs_hasopt(omo, imo->mo_list[i].mo_name)) != NULL) {
1180			char **newcanp;
1181			uint_t index = mop - omo->mo_list;
1182
1183			newcanp = vfs_mergecancelopts(mop, &motbl[index]);
1184
1185			vfs_freeopt(&motbl[index]);
1186			vfs_copyopt(&imo->mo_list[i], &motbl[index]);
1187
1188			vfs_freecancelopt(motbl[index].mo_cancel);
1189			motbl[index].mo_cancel = newcanp;
1190		} else {
1191			/*
1192			 * If it's a new option, just copy it over to the first
1193			 * free location.
1194			 */
1195			vfs_copyopt(&imo->mo_list[i], &motbl[freeidx++]);
1196		}
1197	}
1198	dmo->mo_count = count;
1199	dmo->mo_list = motbl;
1200}
1201
1202/*
1203 * Functions to set and clear mount options in a mount options table.
1204 */
1205
1206/*
1207 * Clear a mount option, if it exists.
1208 *
1209 * The update_mnttab arg indicates whether mops is part of a vfs that is on
1210 * the vfs list.
1211 */
1212static void
1213vfs_clearmntopt_nolock(mntopts_t *mops, const char *opt, int update_mnttab)
1214{
1215	struct mntopt *mop;
1216	uint_t i, count;
1217
1218	ASSERT(!update_mnttab || RW_WRITE_HELD(&vfslist));
1219
1220	count = mops->mo_count;
1221	for (i = 0; i < count; i++) {
1222		mop = &mops->mo_list[i];
1223
1224		if (mop->mo_flags & MO_EMPTY)
1225			continue;
1226		if (strcmp(opt, mop->mo_name))
1227			continue;
1228		mop->mo_flags &= ~MO_SET;
1229		if (mop->mo_arg != NULL) {
1230			kmem_free(mop->mo_arg, strlen(mop->mo_arg) + 1);
1231		}
1232		mop->mo_arg = NULL;
1233		if (update_mnttab)
1234			vfs_mnttab_modtimeupd();
1235		break;
1236	}
1237}
1238
1239void
1240vfs_clearmntopt(struct vfs *vfsp, const char *opt)
1241{
1242	int gotlock = 0;
1243
1244	if (VFS_ON_LIST(vfsp)) {
1245		gotlock = 1;
1246		vfs_list_lock();
1247	}
1248	vfs_clearmntopt_nolock(&vfsp->vfs_mntopts, opt, gotlock);
1249	if (gotlock)
1250		vfs_list_unlock();
1251}
1252
1253
1254/*
1255 * Set a mount option on...
1256 */
1257static void
1258vfs_setmntopt_nolock(mntopts_t *mops, const char *opt,
1259    const char *arg, int flags, int update_mnttab)
1260{
1261	mntopt_t *mop;
1262	uint_t i, count;
1263	char *sp;
1264
1265	ASSERT(!update_mnttab || RW_WRITE_HELD(&vfslist));
1266
1267	if (flags & VFS_CREATEOPT) {
1268		if (vfs_hasopt(mops, opt) != NULL) {
1269			flags &= ~VFS_CREATEOPT;
1270		}
1271	}
1272	count = mops->mo_count;
1273	for (i = 0; i < count; i++) {
1274		mop = &mops->mo_list[i];
1275
1276		if (mop->mo_flags & MO_EMPTY) {
1277			if ((flags & VFS_CREATEOPT) == 0)
1278				continue;
1279			sp = kmem_alloc(strlen(opt) + 1, KM_SLEEP);
1280			(void) strcpy(sp, opt);
1281			mop->mo_name = sp;
1282			if (arg != NULL)
1283				mop->mo_flags = MO_HASVALUE;
1284			else
1285				mop->mo_flags = 0;
1286		} else if (strcmp(opt, mop->mo_name)) {
1287			continue;
1288		}
1289		if ((mop->mo_flags & MO_IGNORE) && (flags & VFS_NOFORCEOPT))
1290			break;
1291		if (arg != NULL && (mop->mo_flags & MO_HASVALUE) != 0) {
1292			sp = kmem_alloc(strlen(arg) + 1, KM_SLEEP);
1293			(void) strcpy(sp, arg);
1294		} else {
1295			sp = NULL;
1296		}
1297		if (mop->mo_arg != NULL)
1298			kmem_free(mop->mo_arg, strlen(mop->mo_arg) + 1);
1299		mop->mo_arg = sp;
1300		if (flags & VFS_DISPLAY)
1301			mop->mo_flags &= ~MO_NODISPLAY;
1302		if (flags & VFS_NODISPLAY)
1303			mop->mo_flags |= MO_NODISPLAY;
1304		mop->mo_flags |= MO_SET;
1305		if (mop->mo_cancel != NULL) {
1306			char **cp;
1307
1308			for (cp = mop->mo_cancel; *cp != NULL; cp++)
1309				vfs_clearmntopt_nolock(mops, *cp, 0);
1310		}
1311		if (update_mnttab)
1312			vfs_mnttab_modtimeupd();
1313		break;
1314	}
1315}
1316
1317void
1318vfs_setmntopt(struct vfs *vfsp, const char *opt, const char *arg, int flags)
1319{
1320	int gotlock = 0;
1321
1322	if (VFS_ON_LIST(vfsp)) {
1323		gotlock = 1;
1324		vfs_list_lock();
1325	}
1326	vfs_setmntopt_nolock(&vfsp->vfs_mntopts, opt, arg, flags, gotlock);
1327	if (gotlock)
1328		vfs_list_unlock();
1329}
1330
1331// vfs_addtag
1332// vfs_settag
1333// vfs_clrtag
1334
1335/*
1336 * Function to parse an option string and fill in a mount options table.
1337 * Unknown options are silently ignored.  The input option string is modified
1338 * by replacing separators with nulls.  If the create flag is set, options
1339 * not found in the table are just added on the fly.  The table must have
1340 * an option slot marked MO_EMPTY to add an option on the fly.
1341 *
1342 * This function is *not* for general use by filesystems.
1343 *
1344 * Note: caller is responsible for locking the vfs list, if needed,
1345 *       to protect mops..
1346 */
1347void
1348vfs_parsemntopts(mntopts_t *mops, char *osp, int create)
1349{
1350	char *s = osp, *p, *nextop, *valp, *cp, *ep = NULL;
1351	int setflg = VFS_NOFORCEOPT;
1352
1353	if (osp == NULL)
1354		return;
1355	while (*s != '\0') {
1356		p = strchr(s, ',');	/* find next option */
1357		if (p == NULL) {
1358			cp = NULL;
1359			p = s + strlen(s);
1360		} else {
1361			cp = p;		/* save location of comma */
1362			*p++ = '\0';	/* mark end and point to next option */
1363		}
1364		nextop = p;
1365		p = strchr(s, '=');	/* look for value */
1366		if (p == NULL) {
1367			valp = NULL;	/* no value supplied */
1368			ep = NULL;
1369		} else {
1370			ep = p;		/* save location of equals */
1371			*p++ = '\0';	/* end option and point to value */
1372			valp = p;
1373		}
1374		/*
1375		 * set option into options table
1376		 */
1377		if (create)
1378			setflg |= VFS_CREATEOPT;
1379		vfs_setmntopt_nolock(mops, s, valp, setflg, 0);
1380		if (cp != NULL)
1381			*cp = ',';	/* restore the comma */
1382		if (valp != NULL)
1383			*ep = '=';	/* restore the equals */
1384		s = nextop;
1385	}
1386}
1387
1388/*
1389 * Function to inquire if an option exists in a mount options table.
1390 * Returns a pointer to the option if it exists, else NULL.
1391 */
1392struct mntopt *
1393vfs_hasopt(const mntopts_t *mops, const char *opt)
1394{
1395	struct mntopt *mop;
1396	uint_t i, count;
1397
1398	count = mops->mo_count;
1399	for (i = 0; i < count; i++) {
1400		mop = &mops->mo_list[i];
1401
1402		if (mop->mo_flags & MO_EMPTY)
1403			continue;
1404		if (strcmp(opt, mop->mo_name) == 0)
1405			return (mop);
1406	}
1407	return (NULL);
1408}
1409
1410/*
1411 * Function to inquire if an option is set in a mount options table.
1412 * Returns non-zero if set and fills in the arg pointer with a pointer to
1413 * the argument string or NULL if there is no argument string.
1414 */
1415static int
1416vfs_optionisset_nolock(const mntopts_t *mops, const char *opt, char **argp)
1417{
1418	struct mntopt *mop;
1419	uint_t i, count;
1420
1421	count = mops->mo_count;
1422	for (i = 0; i < count; i++) {
1423		mop = &mops->mo_list[i];
1424
1425		if (mop->mo_flags & MO_EMPTY)
1426			continue;
1427		if (strcmp(opt, mop->mo_name))
1428			continue;
1429		if ((mop->mo_flags & MO_SET) == 0)
1430			return (0);
1431		if (argp != NULL && (mop->mo_flags & MO_HASVALUE) != 0)
1432			*argp = mop->mo_arg;
1433		return (1);
1434	}
1435	return (0);
1436}
1437
1438
1439int
1440vfs_optionisset(const struct vfs *vfsp, const char *opt, char **argp)
1441{
1442	int ret;
1443
1444	vfs_list_read_lock();
1445	ret = vfs_optionisset_nolock(&vfsp->vfs_mntopts, opt, argp);
1446	vfs_list_unlock();
1447	return (ret);
1448}
1449
1450
1451/*
1452 * Construct a comma separated string of the options set in the given
1453 * mount table, return the string in the given buffer.  Return non-zero if
1454 * the buffer would overflow.
1455 *
1456 * This function is *not* for general use by filesystems.
1457 *
1458 * Note: caller is responsible for locking the vfs list, if needed,
1459 *       to protect mp.
1460 */
1461int
1462vfs_buildoptionstr(const mntopts_t *mp, char *buf, int len)
1463{
1464	char *cp;
1465	uint_t i;
1466
1467	buf[0] = '\0';
1468	cp = buf;
1469	for (i = 0; i < mp->mo_count; i++) {
1470		struct mntopt *mop;
1471
1472		mop = &mp->mo_list[i];
1473		if (mop->mo_flags & MO_SET) {
1474			int optlen, comma = 0;
1475
1476			if (buf[0] != '\0')
1477				comma = 1;
1478			optlen = strlen(mop->mo_name);
1479			if (strlen(buf) + comma + optlen + 1 > len)
1480				goto err;
1481			if (comma)
1482				*cp++ = ',';
1483			(void) strcpy(cp, mop->mo_name);
1484			cp += optlen;
1485			/*
1486			 * Append option value if there is one
1487			 */
1488			if (mop->mo_arg != NULL) {
1489				int arglen;
1490
1491				arglen = strlen(mop->mo_arg);
1492				if (strlen(buf) + arglen + 2 > len)
1493					goto err;
1494				*cp++ = '=';
1495				(void) strcpy(cp, mop->mo_arg);
1496				cp += arglen;
1497			}
1498		}
1499	}
1500	return (0);
1501err:
1502	return (EOVERFLOW);
1503}
1504
1505static void
1506vfs_freecancelopt(char **moc)
1507{
1508	if (moc != NULL) {
1509		int ccnt = 0;
1510		char **cp;
1511
1512		for (cp = moc; *cp != NULL; cp++) {
1513			kmem_free(*cp, strlen(*cp) + 1);
1514			ccnt++;
1515		}
1516		kmem_free(moc, (ccnt + 1) * sizeof (char *));
1517	}
1518}
1519
1520static void
1521vfs_freeopt(mntopt_t *mop)
1522{
1523	if (mop->mo_name != NULL)
1524		kmem_free(mop->mo_name, strlen(mop->mo_name) + 1);
1525
1526	vfs_freecancelopt(mop->mo_cancel);
1527
1528	if (mop->mo_arg != NULL)
1529		kmem_free(mop->mo_arg, strlen(mop->mo_arg) + 1);
1530}
1531
1532/*
1533 * Free a mount options table
1534 *
1535 * This function is *not* for general use by filesystems.
1536 *
1537 * Note: caller is responsible for locking the vfs list, if needed,
1538 *       to protect mp.
1539 */
1540void
1541vfs_freeopttbl(mntopts_t *mp)
1542{
1543	uint_t i, count;
1544
1545	count = mp->mo_count;
1546	for (i = 0; i < count; i++) {
1547		vfs_freeopt(&mp->mo_list[i]);
1548	}
1549	if (count) {
1550		kmem_free(mp->mo_list, sizeof (mntopt_t) * count);
1551		mp->mo_count = 0;
1552		mp->mo_list = NULL;
1553	}
1554}
1555
1556// vfs_mntdummyread
1557// vfs_mntdummywrite
1558// vfs_mntdummygetattr
1559// vfs_mnttabvp_setup
1560// vfs_mnttab_rwop
1561// vfs_mnttab_writeop
1562// vfs_mnttab_readop
1563// vfs_freemnttab
1564// vfs_mnttab_modtime
1565// vfs_mnttab_poll
1566// vfs_mono_time
1567
1568/*
1569 * Update the mnttab modification time...
1570 */
1571void
1572vfs_mnttab_modtimeupd()
1573{
1574}
1575
1576/*
1577 * Unlike the real dounmount, we don't have
1578 * vn_vfswlock_held(coveredvp)
1579 */
1580int
1581fake_dounmount(struct vfs *vfsp, int flag)
1582{
1583	cred_t *cr = CRED();
1584	vnode_t *coveredvp;
1585	int error;
1586
1587	/*
1588	 * Get covered vnode. This will be NULL if the vfs is not linked
1589	 * into the file system name space (i.e., domount() with MNT_NOSPICE).
1590	 */
1591	coveredvp = vfsp->vfs_vnodecovered;
1592
1593	/* For forcible umount, skip VFS_SYNC() since it may hang */
1594	if ((flag & MS_FORCE) == 0)
1595		(void) VFS_SYNC(vfsp, 0, cr);
1596
1597	/*
1598	 * Test-jig specific:
1599	 * Need to release rootdir before unmount or VFS_UNMOUNT
1600	 * may fail due to that node being active.
1601	 */
1602	if (rootdir != NULL) {
1603		ASSERT(rootdir != coveredvp);
1604		VN_RELE(rootdir);
1605		rootdir = NULL;
1606	}
1607
1608	/*
1609	 * Lock the vfs to maintain fs status quo during unmount.  This
1610	 * has to be done after the sync because ufs_update tries to acquire
1611	 * the vfs_reflock.
1612	 */
1613	vfs_lock_wait(vfsp);
1614
1615	if ((error = VFS_UNMOUNT(vfsp, flag, cr)) != 0) {
1616		int err2;
1617		vfs_unlock(vfsp);
1618		/* Get rootdir back */
1619		err2 = VFS_ROOT(vfsp, &rootdir);
1620		if (err2 != 0) {
1621			panic("fake_dounmount, get root %d\n", err2);
1622		}
1623	} else {
1624		/*
1625		 * Real dounmount does vfs_remove.
1626		 *
1627		 * Test-jig specific:
1628		 * Restore the covered rootdir,
1629		 * release the rootvfs hold and clear.
1630		 */
1631		if (coveredvp != NULL) {
1632			// vfs_list_remove(vfsp);
1633			vfsp->vfs_vnodecovered = NULL;
1634			rootdir = coveredvp;
1635		}
1636		if (rootvfs == vfsp) {
1637			VFS_RELE(vfsp);
1638			rootvfs = NULL;
1639		}
1640
1641		/*
1642		 * Release the (final) reference to vfs
1643		 */
1644		vfs_unlock(vfsp);
1645		VFS_RELE(vfsp);
1646	}
1647	return (error);
1648}
1649
1650// vfs_unmountall(void)
1651// vfs_addmip
1652// vfs_delmip
1653// vfs_add
1654// vfs_remove
1655
1656static krwlock_t vpvfsentry_ve_lock;
1657
1658/*
1659 * Lock a filesystem to prevent access to it while mounting,
1660 * unmounting and syncing.  Return EBUSY immediately if lock
1661 * can't be acquired.
1662 */
1663int
1664vfs_lock(vfs_t *vfsp)
1665{
1666
1667	if (rw_tryenter(&vpvfsentry_ve_lock, RW_WRITER))
1668		return (0);
1669
1670	return (EBUSY);
1671}
1672
1673int
1674vfs_rlock(vfs_t *vfsp)
1675{
1676
1677	if (rw_tryenter(&vpvfsentry_ve_lock, RW_READER))
1678		return (0);
1679
1680	return (EBUSY);
1681}
1682
1683void
1684vfs_lock_wait(vfs_t *vfsp)
1685{
1686
1687	rw_enter(&vpvfsentry_ve_lock, RW_WRITER);
1688}
1689
1690void
1691vfs_rlock_wait(vfs_t *vfsp)
1692{
1693	rw_enter(&vpvfsentry_ve_lock, RW_READER);
1694}
1695
1696/*
1697 * Unlock a locked filesystem.
1698 */
1699void
1700vfs_unlock(vfs_t *vfsp)
1701{
1702
1703	rw_exit(&vpvfsentry_ve_lock);
1704}
1705
1706/*
1707 * Utility routine that allows a filesystem to construct its
1708 * fsid in "the usual way" - by munging some underlying dev_t and
1709 * the filesystem type number into the 64-bit fsid. ...
1710 */
1711void
1712vfs_make_fsid(fsid_t *fsi, dev_t dev, int val)
1713{
1714	if (!cmpldev((dev32_t *)&fsi->val[0], dev))
1715		panic("device number too big for fsid!");
1716	fsi->val[1] = val;
1717}
1718
1719int
1720vfs_lock_held(vfs_t *vfsp)
1721{
1722	int held;
1723
1724	held = rw_write_held(&vpvfsentry_ve_lock);
1725
1726	return (held);
1727}
1728
1729// vfs_lock_owner
1730
1731/*
1732 * vfs list locking.
1733 */
1734
1735void
1736vfs_list_lock()
1737{
1738	rw_enter(&vfslist, RW_WRITER);
1739}
1740
1741void
1742vfs_list_read_lock()
1743{
1744	rw_enter(&vfslist, RW_READER);
1745}
1746
1747void
1748vfs_list_unlock()
1749{
1750	rw_exit(&vfslist);
1751}
1752
1753/*
1754 * Low level worker routines for adding entries to and removing entries from
1755 * the vfs list.
1756 */
1757
1758// vfs_hash_add
1759// vfs_hash_remove
1760// vfs_list_add
1761// vfs_list_remove
1762// getvfs
1763// vfs_devmounting
1764
1765/*
1766 * Search the vfs list for a specified device.  Returns 1, if entry is found
1767 * or 0 if no suitable entry is found.
1768 */
1769
1770int
1771vfs_devismounted(dev_t dev)
1772{
1773	return (0);
1774}
1775
1776// vfs_dev2vfsp
1777// vfs_mntpoint2vfsp
1778
1779/*
1780 * Search the vfs list for a specified vfsops.
1781 * if vfs entry is found then return 1, else 0.
1782 */
1783int
1784vfs_opsinuse(vfsops_t *ops)
1785{
1786	return (0);
1787}
1788
1789/*
1790 * Allocate an entry in vfssw for a file system type
1791 */
1792struct vfssw *
1793allocate_vfssw(const char *type)
1794{
1795	struct vfssw *vswp;
1796
1797	if (type[0] == '\0' || strlen(type) + 1 > _ST_FSTYPSZ) {
1798		/*
1799		 * The vfssw table uses the empty string to identify an
1800		 * available entry; we cannot add any type which has
1801		 * a leading NUL. The string length is limited to
1802		 * the size of the st_fstype array in struct stat.
1803		 */
1804		return (NULL);
1805	}
1806
1807	ASSERT(VFSSW_WRITE_LOCKED());
1808	for (vswp = &vfssw[1]; vswp < &vfssw[nfstype]; vswp++)
1809		if (!ALLOCATED_VFSSW(vswp)) {
1810			vswp->vsw_name = kmem_alloc(strlen(type) + 1, KM_SLEEP);
1811			(void) strcpy(vswp->vsw_name, type);
1812			ASSERT(vswp->vsw_count == 0);
1813			vswp->vsw_count = 1;
1814			mutex_init(&vswp->vsw_lock, NULL, MUTEX_DEFAULT, NULL);
1815			return (vswp);
1816		}
1817	return (NULL);
1818}
1819
1820// vfs_to_modname
1821// vfs_getvfssw
1822
1823/*
1824 * Find a vfssw entry given a file system type name.
1825 */
1826struct vfssw *
1827vfs_getvfssw(const char *type)
1828{
1829	struct vfssw *vswp;
1830
1831	if (type == NULL || *type == '\0')
1832		return (NULL);
1833
1834	for (vswp = &vfssw[1]; vswp < &vfssw[nfstype]; vswp++) {
1835		if (strcmp(type, vswp->vsw_name) == 0) {
1836			return (vswp);
1837		}
1838	}
1839
1840	return (NULL);
1841
1842}
1843
1844/*
1845 * Find a vfssw entry given a file system type name.
1846 */
1847struct vfssw *
1848vfs_getvfsswbyname(const char *type)
1849{
1850	struct vfssw *vswp;
1851
1852	ASSERT(VFSSW_LOCKED());
1853	if (type == NULL || *type == '\0')
1854		return (NULL);
1855
1856	for (vswp = &vfssw[1]; vswp < &vfssw[nfstype]; vswp++) {
1857		if (strcmp(type, vswp->vsw_name) == 0) {
1858			vfs_refvfssw(vswp);
1859			return (vswp);
1860		}
1861	}
1862
1863	return (NULL);
1864}
1865
1866// vfs_getvfsswbyvfsops
1867
1868/*
1869 * Reference a vfssw entry.
1870 */
1871void
1872vfs_refvfssw(struct vfssw *vswp)
1873{
1874
1875	mutex_enter(&vswp->vsw_lock);
1876	vswp->vsw_count++;
1877	mutex_exit(&vswp->vsw_lock);
1878}
1879
1880/*
1881 * Unreference a vfssw entry.
1882 */
1883void
1884vfs_unrefvfssw(struct vfssw *vswp)
1885{
1886
1887	mutex_enter(&vswp->vsw_lock);
1888	vswp->vsw_count--;
1889	mutex_exit(&vswp->vsw_lock);
1890}
1891
1892// vfs_syncall
1893
1894/*
1895 * Map VFS flags to statvfs flags.  These shouldn't really be separate
1896 * flags at all.
1897 */
1898uint_t
1899vf_to_stf(uint_t vf)
1900{
1901	uint_t stf = 0;
1902
1903	if (vf & VFS_RDONLY)
1904		stf |= ST_RDONLY;
1905	if (vf & VFS_NOSETUID)
1906		stf |= ST_NOSUID;
1907	if (vf & VFS_NOTRUNC)
1908		stf |= ST_NOTRUNC;
1909
1910	return (stf);
1911}
1912
1913// vfsstray_sync
1914// vfsstray
1915// vfs_EIO
1916// vfs_EIO_sync
1917// EIO_vfs
1918// EIO_vfsops
1919
1920#pragma init(vfsinit)
1921
1922/*
1923 * Called from startup() to initialize all loaded vfs's
1924 */
1925void
1926vfsinit(void)
1927{
1928	vn_create_cache();
1929
1930	/* Temporary, until we mount root */
1931	rootdir = vn_alloc(KM_SLEEP);
1932	rootdir->v_type = VDIR;
1933}
1934
1935vfs_t *
1936vfs_alloc(int kmflag)
1937{
1938	vfs_t *vfsp;
1939
1940	vfsp = kmem_alloc(sizeof (struct vfs), kmflag);
1941
1942	/*
1943	 * Do the simplest initialization here.
1944	 * Everything else gets done in vfs_init()
1945	 */
1946	bzero(vfsp, sizeof (vfs_t));
1947	return (vfsp);
1948}
1949
1950void
1951vfs_free(vfs_t *vfsp)
1952{
1953	/*
1954	 * One would be tempted to assert that "vfsp->vfs_count == 0".
1955	 * Don't.  See fs/vfs.c
1956	 */
1957
1958	/* If FEM was in use, make sure everything gets cleaned up */
1959
1960	if (vfsp->vfs_implp)
1961		vfsimpl_teardown(vfsp);
1962	sema_destroy(&vfsp->vfs_reflock);
1963	kmem_free(vfsp, sizeof (struct vfs));
1964}
1965
1966/*
1967 * Increments the vfs reference count by one atomically.
1968 */
1969void
1970vfs_hold(vfs_t *vfsp)
1971{
1972	atomic_inc_32(&vfsp->vfs_count);
1973	ASSERT(vfsp->vfs_count != 0);
1974}
1975
1976/*
1977 * Decrements the vfs reference count by one atomically. When
1978 * vfs reference count becomes zero, it calls the file system
1979 * specific vfs_freevfs() to free up the resources.
1980 */
1981void
1982vfs_rele(vfs_t *vfsp)
1983{
1984	ASSERT(vfsp->vfs_count != 0);
1985	if (atomic_dec_32_nv(&vfsp->vfs_count) == 0) {
1986		VFS_FREEVFS(vfsp);
1987		// lofi_remove(vfsp);
1988		// zone_rele_ref...
1989		// vfs_freemnttab(vfsp);
1990		vfs_free(vfsp);
1991	}
1992}
1993
1994/*
1995 * Generic operations vector support.
1996 */
1997
1998int
1999fs_build_vector(void *vector, int *unused_ops,
2000    const fs_operation_trans_def_t *translation,
2001    const fs_operation_def_t *operations)
2002{
2003	int i, num_trans, num_ops, used;
2004
2005	/*
2006	 * Count the number of translations and the number of supplied
2007	 * operations.
2008	 */
2009
2010	{
2011		const fs_operation_trans_def_t *p;
2012
2013		for (num_trans = 0, p = translation;
2014		    p->name != NULL;
2015		    num_trans++, p++)
2016			;
2017	}
2018
2019	{
2020		const fs_operation_def_t *p;
2021
2022		for (num_ops = 0, p = operations;
2023		    p->name != NULL;
2024		    num_ops++, p++)
2025			;
2026	}
2027
2028	/* Walk through each operation known to our caller.  There will be */
2029	/* one entry in the supplied "translation table" for each. */
2030
2031	used = 0;
2032
2033	for (i = 0; i < num_trans; i++) {
2034		int j, found;
2035		char *curname;
2036		fs_generic_func_p result;
2037		fs_generic_func_p *location;
2038
2039		curname = translation[i].name;
2040
2041		/* Look for a matching operation in the list supplied by the */
2042		/* file system. */
2043
2044		found = 0;
2045
2046		for (j = 0; j < num_ops; j++) {
2047			if (strcmp(operations[j].name, curname) == 0) {
2048				used++;
2049				found = 1;
2050				break;
2051			}
2052		}
2053
2054		/*
2055		 * If the file system is using a "placeholder" for default
2056		 * or error functions, grab the appropriate function out of
2057		 * the translation table.  If the file system didn't supply
2058		 * this operation at all, use the default function.
2059		 */
2060
2061		if (found) {
2062			result = operations[j].func.fs_generic;
2063			if (result == fs_default) {
2064				result = translation[i].defaultFunc;
2065			} else if (result == fs_error) {
2066				result = translation[i].errorFunc;
2067			} else if (result == NULL) {
2068				/* Null values are PROHIBITED */
2069				return (EINVAL);
2070			}
2071		} else {
2072			result = translation[i].defaultFunc;
2073		}
2074
2075		/* Now store the function into the operations vector. */
2076
2077		/* LINTED E_BAD_PTR_CAST_ALIGN */
2078		location = (fs_generic_func_p *)
2079		    (((char *)vector) + translation[i].offset);
2080
2081		*location = result;
2082	}
2083
2084	*unused_ops = num_ops - used;
2085
2086	return (0);
2087}
2088
2089/* Placeholder functions, should never be called. */
2090
2091int
2092fs_error(void)
2093{
2094	cmn_err(CE_PANIC, "fs_error called");
2095	return (0);
2096}
2097
2098int
2099fs_default(void)
2100{
2101	cmn_err(CE_PANIC, "fs_default called");
2102	return (0);
2103}
2104
2105// rootconf
2106// getfsname
2107// getrootfs
2108
2109/*
2110 * VFS feature routines
2111 */
2112
2113#define	VFTINDEX(feature)	(((feature) >> 32) & 0xFFFFFFFF)
2114#define	VFTBITS(feature)	((feature) & 0xFFFFFFFFLL)
2115
2116/* Register a feature in the vfs */
2117void
2118vfs_set_feature(vfs_t *vfsp, vfs_feature_t feature)
2119{
2120	/* Note that vfs_featureset[] is found in *vfsp->vfs_implp */
2121	if (vfsp->vfs_implp == NULL)
2122		return;
2123
2124	vfsp->vfs_featureset[VFTINDEX(feature)] |= VFTBITS(feature);
2125}
2126
2127void
2128vfs_clear_feature(vfs_t *vfsp, vfs_feature_t feature)
2129{
2130	/* Note that vfs_featureset[] is found in *vfsp->vfs_implp */
2131	if (vfsp->vfs_implp == NULL)
2132		return;
2133	vfsp->vfs_featureset[VFTINDEX(feature)] &= VFTBITS(~feature);
2134}
2135
2136/*
2137 * Query a vfs for a feature.
2138 * Returns 1 if feature is present, 0 if not
2139 */
2140int
2141vfs_has_feature(vfs_t *vfsp, vfs_feature_t feature)
2142{
2143	int	ret = 0;
2144
2145	/* Note that vfs_featureset[] is found in *vfsp->vfs_implp */
2146	if (vfsp->vfs_implp == NULL)
2147		return (ret);
2148
2149	if (vfsp->vfs_featureset[VFTINDEX(feature)] & VFTBITS(feature))
2150		ret = 1;
2151
2152	return (ret);
2153}
2154
2155// vfs_propagate_features
2156// vfs_get_lofi
2157