xref: /illumos-gate/usr/src/uts/common/fs/smbclnt/smbfs/smbfs_vfsops.c (revision 4e72ade1d48747d1105e26d42fc4787278f8f35e)
1 /*
2  * Copyright (c) 2000-2001, Boris Popov
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *    This product includes software developed by Boris Popov.
16  * 4. Neither the name of the author nor the names of any co-contributors
17  *    may be used to endorse or promote products derived from this software
18  *    without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  *
32  * $Id: smbfs_vfsops.c,v 1.73.64.1 2005/05/27 02:35:28 lindak Exp $
33  */
34 
35 /*
36  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
37  * Copyright 2012 Nexenta Systems, Inc.  All rights reserved.
38  * Copyright 2013, Joyent, Inc. All rights reserved.
39  * Copyright (c) 2016 by Delphix. All rights reserved.
40  */
41 
42 #include <sys/systm.h>
43 #include <sys/cred.h>
44 #include <sys/time.h>
45 #include <sys/vfs.h>
46 #include <sys/vnode.h>
47 #include <fs/fs_subr.h>
48 #include <sys/sysmacros.h>
49 #include <sys/kmem.h>
50 #include <sys/mkdev.h>
51 #include <sys/mount.h>
52 #include <sys/statvfs.h>
53 #include <sys/errno.h>
54 #include <sys/debug.h>
55 #include <sys/disp.h>
56 #include <sys/cmn_err.h>
57 #include <sys/modctl.h>
58 #include <sys/policy.h>
59 #include <sys/atomic.h>
60 #include <sys/zone.h>
61 #include <sys/vfs_opreg.h>
62 #include <sys/mntent.h>
63 #include <sys/priv.h>
64 #include <sys/taskq.h>
65 #include <sys/tsol/label.h>
66 #include <sys/tsol/tndb.h>
67 #include <inet/ip.h>
68 
69 #include <netsmb/smb_osdep.h>
70 #include <netsmb/smb.h>
71 #include <netsmb/smb_conn.h>
72 #include <netsmb/smb_subr.h>
73 #include <netsmb/smb_dev.h>
74 
75 #include <smbfs/smbfs.h>
76 #include <smbfs/smbfs_node.h>
77 #include <smbfs/smbfs_subr.h>
78 
79 /*
80  * Should smbfs mount enable "-o acl" by default?  There are good
81  * arguments for both.  The most common use case is individual users
82  * accessing files on some SMB server, for which "noacl" is the more
83  * convenient default.  A less common use case is data migration,
84  * where the "acl" option might be a desirable default.  We'll make
85  * the common use case the default.  This default can be changed via
86  * /etc/system, and/or set per-mount via the "acl" mount option.
87  */
88 int smbfs_default_opt_acl = 0;
89 
90 /*
91  * How many taskq threads per-mount should we use.
92  * Just one is fine (until we do more async work).
93  */
94 int smbfs_tq_nthread = 1;
95 
96 /*
97  * Local functions definitions.
98  */
99 int		smbfsinit(int fstyp, char *name);
100 void		smbfsfini();
101 static int	smbfs_mount_label_policy(vfs_t *, void *, int, cred_t *);
102 
103 /*
104  * SMBFS Mount options table for MS_OPTIONSTR
105  * Note: These are not all the options.
106  * Some options come in via MS_DATA.
107  * Others are generic (see vfs.c)
108  */
109 static char *intr_cancel[] = { MNTOPT_NOINTR, NULL };
110 static char *nointr_cancel[] = { MNTOPT_INTR, NULL };
111 static char *acl_cancel[] = { MNTOPT_NOACL, NULL };
112 static char *noacl_cancel[] = { MNTOPT_ACL, NULL };
113 static char *xattr_cancel[] = { MNTOPT_NOXATTR, NULL };
114 static char *noxattr_cancel[] = { MNTOPT_XATTR, NULL };
115 
116 static mntopt_t mntopts[] = {
117 /*
118  *	option name		cancel option	default arg	flags
119  *		ufs arg flag
120  */
121 	{ MNTOPT_INTR,		intr_cancel,	NULL,	MO_DEFAULT, 0 },
122 	{ MNTOPT_NOINTR,	nointr_cancel,	NULL,	0,	0 },
123 	{ MNTOPT_ACL,		acl_cancel,	NULL,	0,	0 },
124 	{ MNTOPT_NOACL,		noacl_cancel,	NULL,	0,	0 },
125 	{ MNTOPT_XATTR,		xattr_cancel,	NULL,	MO_DEFAULT, 0 },
126 	{ MNTOPT_NOXATTR,	noxattr_cancel, NULL,	0,	0 }
127 };
128 
129 static mntopts_t smbfs_mntopts = {
130 	sizeof (mntopts) / sizeof (mntopt_t),
131 	mntopts
132 };
133 
134 static const char fs_type_name[FSTYPSZ] = "smbfs";
135 
136 static vfsdef_t vfw = {
137 	VFSDEF_VERSION,
138 	(char *)fs_type_name,
139 	smbfsinit,		/* init routine */
140 	VSW_HASPROTO|VSW_NOTZONESAFE,	/* flags */
141 	&smbfs_mntopts			/* mount options table prototype */
142 };
143 
144 static struct modlfs modlfs = {
145 	&mod_fsops,
146 	"SMBFS filesystem",
147 	&vfw
148 };
149 
150 static struct modlinkage modlinkage = {
151 	MODREV_1, (void *)&modlfs, NULL
152 };
153 
154 /*
155  * Mutex to protect the following variables:
156  *	  smbfs_major
157  *	  smbfs_minor
158  */
159 extern	kmutex_t	smbfs_minor_lock;
160 extern	int		smbfs_major;
161 extern	int		smbfs_minor;
162 
163 /*
164  * Prevent unloads while we have mounts
165  */
166 uint32_t	smbfs_mountcount;
167 
168 /*
169  * smbfs vfs operations.
170  */
171 static int	smbfs_mount(vfs_t *, vnode_t *, struct mounta *, cred_t *);
172 static int	smbfs_unmount(vfs_t *, int, cred_t *);
173 static int	smbfs_root(vfs_t *, vnode_t **);
174 static int	smbfs_statvfs(vfs_t *, statvfs64_t *);
175 static int	smbfs_sync(vfs_t *, short, cred_t *);
176 static void	smbfs_freevfs(vfs_t *);
177 
178 /*
179  * Module loading
180  */
181 
182 /*
183  * This routine is invoked automatically when the kernel module
184  * containing this routine is loaded.  This allows module specific
185  * initialization to be done when the module is loaded.
186  */
187 int
188 _init(void)
189 {
190 	int		error;
191 
192 	/*
193 	 * Check compiled-in version of "nsmb"
194 	 * that we're linked with.  (paranoid)
195 	 */
196 	if (nsmb_version != NSMB_VERSION) {
197 		cmn_err(CE_WARN, "_init: nsmb version mismatch");
198 		return (ENOTTY);
199 	}
200 
201 	smbfs_mountcount = 0;
202 
203 	/*
204 	 * NFS calls these two in _clntinit
205 	 * Easier to follow this way.
206 	 */
207 	if ((error = smbfs_subrinit()) != 0) {
208 		cmn_err(CE_WARN, "_init: smbfs_subrinit failed");
209 		return (error);
210 	}
211 
212 	if ((error = smbfs_vfsinit()) != 0) {
213 		cmn_err(CE_WARN, "_init: smbfs_vfsinit failed");
214 		smbfs_subrfini();
215 		return (error);
216 	}
217 
218 	if ((error = smbfs_clntinit()) != 0) {
219 		cmn_err(CE_WARN, "_init: smbfs_clntinit failed");
220 		smbfs_vfsfini();
221 		smbfs_subrfini();
222 		return (error);
223 	}
224 
225 	error = mod_install((struct modlinkage *)&modlinkage);
226 	return (error);
227 }
228 
229 /*
230  * Free kernel module resources that were allocated in _init
231  * and remove the linkage information into the kernel
232  */
233 int
234 _fini(void)
235 {
236 	int	error;
237 
238 	/*
239 	 * If a forcedly unmounted instance is still hanging around,
240 	 * we cannot allow the module to be unloaded because that would
241 	 * cause panics once the VFS framework decides it's time to call
242 	 * into VFS_FREEVFS().
243 	 */
244 	if (smbfs_mountcount)
245 		return (EBUSY);
246 
247 	error = mod_remove(&modlinkage);
248 	if (error)
249 		return (error);
250 
251 	/*
252 	 * Free the allocated smbnodes, etc.
253 	 */
254 	smbfs_clntfini();
255 
256 	/* NFS calls these two in _clntfini */
257 	smbfs_vfsfini();
258 	smbfs_subrfini();
259 
260 	/*
261 	 * Free the ops vectors
262 	 */
263 	smbfsfini();
264 	return (0);
265 }
266 
267 /*
268  * Return information about the module
269  */
270 int
271 _info(struct modinfo *modinfop)
272 {
273 	return (mod_info((struct modlinkage *)&modlinkage, modinfop));
274 }
275 
276 /*
277  * Initialize the vfs structure
278  */
279 
280 int smbfsfstyp;
281 vfsops_t *smbfs_vfsops = NULL;
282 
283 static const fs_operation_def_t smbfs_vfsops_template[] = {
284 	{ VFSNAME_MOUNT, { .vfs_mount = smbfs_mount } },
285 	{ VFSNAME_UNMOUNT, { .vfs_unmount = smbfs_unmount } },
286 	{ VFSNAME_ROOT,	{ .vfs_root = smbfs_root } },
287 	{ VFSNAME_STATVFS, { .vfs_statvfs = smbfs_statvfs } },
288 	{ VFSNAME_SYNC,	{ .vfs_sync = smbfs_sync } },
289 	{ VFSNAME_VGET,	{ .error = fs_nosys } },
290 	{ VFSNAME_MOUNTROOT, { .error = fs_nosys } },
291 	{ VFSNAME_FREEVFS, { .vfs_freevfs = smbfs_freevfs } },
292 	{ NULL, NULL }
293 };
294 
295 int
296 smbfsinit(int fstyp, char *name)
297 {
298 	int		error;
299 
300 	error = vfs_setfsops(fstyp, smbfs_vfsops_template, &smbfs_vfsops);
301 	if (error != 0) {
302 		zcmn_err(GLOBAL_ZONEID, CE_WARN,
303 		    "smbfsinit: bad vfs ops template");
304 		return (error);
305 	}
306 
307 	error = vn_make_ops(name, smbfs_vnodeops_template, &smbfs_vnodeops);
308 	if (error != 0) {
309 		(void) vfs_freevfsops_by_type(fstyp);
310 		zcmn_err(GLOBAL_ZONEID, CE_WARN,
311 		    "smbfsinit: bad vnode ops template");
312 		return (error);
313 	}
314 
315 	smbfsfstyp = fstyp;
316 
317 	return (0);
318 }
319 
320 void
321 smbfsfini()
322 {
323 	if (smbfs_vfsops) {
324 		(void) vfs_freevfsops_by_type(smbfsfstyp);
325 		smbfs_vfsops = NULL;
326 	}
327 	if (smbfs_vnodeops) {
328 		vn_freevnodeops(smbfs_vnodeops);
329 		smbfs_vnodeops = NULL;
330 	}
331 }
332 
333 void
334 smbfs_free_smi(smbmntinfo_t *smi)
335 {
336 	if (smi == NULL)
337 		return;
338 
339 	if (smi->smi_zone_ref.zref_zone != NULL)
340 		zone_rele_ref(&smi->smi_zone_ref, ZONE_REF_SMBFS);
341 
342 	if (smi->smi_share != NULL)
343 		smb_share_rele(smi->smi_share);
344 
345 	avl_destroy(&smi->smi_hash_avl);
346 	rw_destroy(&smi->smi_hash_lk);
347 	cv_destroy(&smi->smi_statvfs_cv);
348 	mutex_destroy(&smi->smi_lock);
349 
350 	kmem_free(smi, sizeof (smbmntinfo_t));
351 }
352 
353 /*
354  * smbfs mount vfsop
355  * Set up mount info record and attach it to vfs struct.
356  */
357 static int
358 smbfs_mount(vfs_t *vfsp, vnode_t *mvp, struct mounta *uap, cred_t *cr)
359 {
360 	char		*data = uap->dataptr;
361 	int		error;
362 	smbnode_t	*rtnp = NULL;	/* root of this fs */
363 	smbmntinfo_t	*smi = NULL;
364 	dev_t		smbfs_dev;
365 	int		version;
366 	int		devfd;
367 	zone_t		*zone = curproc->p_zone;
368 	zone_t		*mntzone = NULL;
369 	smb_share_t	*ssp = NULL;
370 	smb_cred_t	scred;
371 	int		flags, sec;
372 
373 	STRUCT_DECL(smbfs_args, args);		/* smbfs mount arguments */
374 
375 	if ((error = secpolicy_fs_mount(cr, mvp, vfsp)) != 0)
376 		return (error);
377 
378 	if (mvp->v_type != VDIR)
379 		return (ENOTDIR);
380 
381 	/*
382 	 * get arguments
383 	 *
384 	 * uap->datalen might be different from sizeof (args)
385 	 * in a compatible situation.
386 	 */
387 	STRUCT_INIT(args, get_udatamodel());
388 	bzero(STRUCT_BUF(args), SIZEOF_STRUCT(smbfs_args, DATAMODEL_NATIVE));
389 	if (copyin(data, STRUCT_BUF(args), MIN(uap->datalen,
390 	    SIZEOF_STRUCT(smbfs_args, DATAMODEL_NATIVE))))
391 		return (EFAULT);
392 
393 	/*
394 	 * Check mount program version
395 	 */
396 	version = STRUCT_FGET(args, version);
397 	if (version != SMBFS_VERSION) {
398 		cmn_err(CE_WARN, "mount version mismatch:"
399 		    " kernel=%d, mount=%d\n",
400 		    SMBFS_VERSION, version);
401 		return (EINVAL);
402 	}
403 
404 	/*
405 	 * Deal with re-mount requests.
406 	 */
407 	if (uap->flags & MS_REMOUNT) {
408 		cmn_err(CE_WARN, "MS_REMOUNT not implemented");
409 		return (ENOTSUP);
410 	}
411 
412 	/*
413 	 * Check for busy
414 	 */
415 	mutex_enter(&mvp->v_lock);
416 	if (!(uap->flags & MS_OVERLAY) &&
417 	    (mvp->v_count != 1 || (mvp->v_flag & VROOT))) {
418 		mutex_exit(&mvp->v_lock);
419 		return (EBUSY);
420 	}
421 	mutex_exit(&mvp->v_lock);
422 
423 	/*
424 	 * Get the "share" from the netsmb driver (ssp).
425 	 * It is returned with a "ref" (hold) for us.
426 	 * Release this hold: at errout below, or in
427 	 * smbfs_freevfs().
428 	 */
429 	devfd = STRUCT_FGET(args, devfd);
430 	error = smb_dev2share(devfd, &ssp);
431 	if (error) {
432 		cmn_err(CE_WARN, "invalid device handle %d (%d)\n",
433 		    devfd, error);
434 		return (error);
435 	}
436 
437 	/*
438 	 * Use "goto errout" from here on.
439 	 * See: ssp, smi, rtnp, mntzone
440 	 */
441 
442 	/*
443 	 * Determine the zone we're being mounted into.
444 	 */
445 	zone_hold(mntzone = zone);		/* start with this assumption */
446 	if (getzoneid() == GLOBAL_ZONEID) {
447 		zone_rele(mntzone);
448 		mntzone = zone_find_by_path(refstr_value(vfsp->vfs_mntpt));
449 		ASSERT(mntzone != NULL);
450 		if (mntzone != zone) {
451 			error = EBUSY;
452 			goto errout;
453 		}
454 	}
455 
456 	/*
457 	 * Stop the mount from going any further if the zone is going away.
458 	 */
459 	if (zone_status_get(mntzone) >= ZONE_IS_SHUTTING_DOWN) {
460 		error = EBUSY;
461 		goto errout;
462 	}
463 
464 	/*
465 	 * On a Trusted Extensions client, we may have to force read-only
466 	 * for read-down mounts.
467 	 */
468 	if (is_system_labeled()) {
469 		void *addr;
470 		int ipvers = 0;
471 		struct smb_vc *vcp;
472 
473 		vcp = SSTOVC(ssp);
474 		addr = smb_vc_getipaddr(vcp, &ipvers);
475 		error = smbfs_mount_label_policy(vfsp, addr, ipvers, cr);
476 
477 		if (error > 0)
478 			goto errout;
479 
480 		if (error == -1) {
481 			/* change mount to read-only to prevent write-down */
482 			vfs_setmntopt(vfsp, MNTOPT_RO, NULL, 0);
483 		}
484 	}
485 
486 	/* Prevent unload. */
487 	atomic_inc_32(&smbfs_mountcount);
488 
489 	/*
490 	 * Create a mount record and link it to the vfs struct.
491 	 * No more possiblities for errors from here on.
492 	 * Tear-down of this stuff is in smbfs_free_smi()
493 	 *
494 	 * Compare with NFS: nfsrootvp()
495 	 */
496 	smi = kmem_zalloc(sizeof (*smi), KM_SLEEP);
497 
498 	mutex_init(&smi->smi_lock, NULL, MUTEX_DEFAULT, NULL);
499 	cv_init(&smi->smi_statvfs_cv, NULL, CV_DEFAULT, NULL);
500 
501 	rw_init(&smi->smi_hash_lk, NULL, RW_DEFAULT, NULL);
502 	smbfs_init_hash_avl(&smi->smi_hash_avl);
503 
504 	smi->smi_share = ssp;
505 	ssp = NULL;
506 
507 	/*
508 	 * Convert the anonymous zone hold acquired via zone_hold() above
509 	 * into a zone reference.
510 	 */
511 	zone_init_ref(&smi->smi_zone_ref);
512 	zone_hold_ref(mntzone, &smi->smi_zone_ref, ZONE_REF_SMBFS);
513 	zone_rele(mntzone);
514 	mntzone = NULL;
515 
516 	/*
517 	 * Initialize option defaults
518 	 */
519 	smi->smi_flags	= SMI_LLOCK;
520 	smi->smi_acregmin = SEC2HR(SMBFS_ACREGMIN);
521 	smi->smi_acregmax = SEC2HR(SMBFS_ACREGMAX);
522 	smi->smi_acdirmin = SEC2HR(SMBFS_ACDIRMIN);
523 	smi->smi_acdirmax = SEC2HR(SMBFS_ACDIRMAX);
524 
525 	/*
526 	 * All "generic" mount options have already been
527 	 * handled in vfs.c:domount() - see mntopts stuff.
528 	 * Query generic options using vfs_optionisset().
529 	 * Give ACL an adjustable system-wide default.
530 	 */
531 	if (smbfs_default_opt_acl ||
532 	    vfs_optionisset(vfsp, MNTOPT_ACL, NULL))
533 		smi->smi_flags |= SMI_ACL;
534 	if (vfs_optionisset(vfsp, MNTOPT_NOACL, NULL))
535 		smi->smi_flags &= ~SMI_ACL;
536 	if (vfs_optionisset(vfsp, MNTOPT_INTR, NULL))
537 		smi->smi_flags |= SMI_INT;
538 
539 	/*
540 	 * Get the mount options that come in as smbfs_args,
541 	 * starting with args.flags (SMBFS_MF_xxx)
542 	 */
543 	flags = STRUCT_FGET(args, flags);
544 	smi->smi_uid	= STRUCT_FGET(args, uid);
545 	smi->smi_gid	= STRUCT_FGET(args, gid);
546 	smi->smi_fmode	= STRUCT_FGET(args, file_mode) & 0777;
547 	smi->smi_dmode	= STRUCT_FGET(args, dir_mode) & 0777;
548 
549 	/*
550 	 * Hande the SMBFS_MF_xxx flags.
551 	 */
552 	if (flags & SMBFS_MF_NOAC)
553 		smi->smi_flags |= SMI_NOAC;
554 	if (flags & SMBFS_MF_ACREGMIN) {
555 		sec = STRUCT_FGET(args, acregmin);
556 		if (sec < 0 || sec > SMBFS_ACMINMAX)
557 			sec = SMBFS_ACMINMAX;
558 		smi->smi_acregmin = SEC2HR(sec);
559 	}
560 	if (flags & SMBFS_MF_ACREGMAX) {
561 		sec = STRUCT_FGET(args, acregmax);
562 		if (sec < 0 || sec > SMBFS_ACMAXMAX)
563 			sec = SMBFS_ACMAXMAX;
564 		smi->smi_acregmax = SEC2HR(sec);
565 	}
566 	if (flags & SMBFS_MF_ACDIRMIN) {
567 		sec = STRUCT_FGET(args, acdirmin);
568 		if (sec < 0 || sec > SMBFS_ACMINMAX)
569 			sec = SMBFS_ACMINMAX;
570 		smi->smi_acdirmin = SEC2HR(sec);
571 	}
572 	if (flags & SMBFS_MF_ACDIRMAX) {
573 		sec = STRUCT_FGET(args, acdirmax);
574 		if (sec < 0 || sec > SMBFS_ACMAXMAX)
575 			sec = SMBFS_ACMAXMAX;
576 		smi->smi_acdirmax = SEC2HR(sec);
577 	}
578 
579 	/*
580 	 * Get attributes of the remote file system,
581 	 * i.e. ACL support, named streams, etc.
582 	 */
583 	smb_credinit(&scred, cr);
584 	error = smbfs_smb_qfsattr(smi->smi_share, &smi->smi_fsa, &scred);
585 	smb_credrele(&scred);
586 	if (error) {
587 		SMBVDEBUG("smbfs_smb_qfsattr error %d\n", error);
588 	}
589 
590 	/*
591 	 * We enable XATTR by default (via smbfs_mntopts)
592 	 * but if the share does not support named streams,
593 	 * force the NOXATTR option (also clears XATTR).
594 	 * Caller will set or clear VFS_XATTR after this.
595 	 */
596 	if ((smi->smi_fsattr & FILE_NAMED_STREAMS) == 0)
597 		vfs_setmntopt(vfsp, MNTOPT_NOXATTR, NULL, 0);
598 
599 	/*
600 	 * Ditto ACLs (disable if not supported on this share)
601 	 */
602 	if ((smi->smi_fsattr & FILE_PERSISTENT_ACLS) == 0) {
603 		vfs_setmntopt(vfsp, MNTOPT_NOACL, NULL, 0);
604 		smi->smi_flags &= ~SMI_ACL;
605 	}
606 
607 	/*
608 	 * Assign a unique device id to the mount
609 	 */
610 	mutex_enter(&smbfs_minor_lock);
611 	do {
612 		smbfs_minor = (smbfs_minor + 1) & MAXMIN32;
613 		smbfs_dev = makedevice(smbfs_major, smbfs_minor);
614 	} while (vfs_devismounted(smbfs_dev));
615 	mutex_exit(&smbfs_minor_lock);
616 
617 	vfsp->vfs_dev	= smbfs_dev;
618 	vfs_make_fsid(&vfsp->vfs_fsid, smbfs_dev, smbfsfstyp);
619 	vfsp->vfs_data	= (caddr_t)smi;
620 	vfsp->vfs_fstype = smbfsfstyp;
621 	vfsp->vfs_bsize = MAXBSIZE;
622 	vfsp->vfs_bcount = 0;
623 
624 	smi->smi_vfsp	= vfsp;
625 	smbfs_zonelist_add(smi);	/* undo in smbfs_freevfs */
626 
627 	/* PSARC 2007/227 VFS Feature Registration */
628 	vfs_set_feature(vfsp, VFSFT_XVATTR);
629 	vfs_set_feature(vfsp, VFSFT_SYSATTR_VIEWS);
630 
631 	/*
632 	 * Create the root vnode, which we need in unmount
633 	 * for the call to smbfs_check_table(), etc.
634 	 * Release this hold in smbfs_unmount.
635 	 */
636 	rtnp = smbfs_node_findcreate(smi, "\\", 1, NULL, 0, 0,
637 	    &smbfs_fattr0);
638 	ASSERT(rtnp != NULL);
639 	rtnp->r_vnode->v_type = VDIR;
640 	rtnp->r_vnode->v_flag |= VROOT;
641 	smi->smi_root = rtnp;
642 
643 	/*
644 	 * Create a taskq for async work (i.e. putpage)
645 	 */
646 	smi->smi_taskq = taskq_create_proc("smbfs",
647 	    smbfs_tq_nthread, minclsyspri,
648 	    smbfs_tq_nthread, smbfs_tq_nthread * 2,
649 	    zone->zone_zsched, TASKQ_PREPOPULATE);
650 
651 	/*
652 	 * NFS does other stuff here too:
653 	 *   async worker threads
654 	 *   init kstats
655 	 *
656 	 * End of code from NFS nfsrootvp()
657 	 */
658 	return (0);
659 
660 errout:
661 	vfsp->vfs_data = NULL;
662 	if (smi != NULL)
663 		smbfs_free_smi(smi);
664 
665 	if (mntzone != NULL)
666 		zone_rele(mntzone);
667 
668 	if (ssp != NULL)
669 		smb_share_rele(ssp);
670 
671 	return (error);
672 }
673 
674 /*
675  * vfs operations
676  */
677 static int
678 smbfs_unmount(vfs_t *vfsp, int flag, cred_t *cr)
679 {
680 	smbmntinfo_t	*smi;
681 	smbnode_t	*rtnp;
682 
683 	smi = VFTOSMI(vfsp);
684 
685 	if (secpolicy_fs_unmount(cr, vfsp) != 0)
686 		return (EPERM);
687 
688 	if ((flag & MS_FORCE) == 0) {
689 		smbfs_rflush(vfsp, cr);
690 
691 		/*
692 		 * If there are any active vnodes on this file system,
693 		 * (other than the root vnode) then the file system is
694 		 * busy and can't be umounted.
695 		 */
696 		if (smbfs_check_table(vfsp, smi->smi_root))
697 			return (EBUSY);
698 
699 		/*
700 		 * We normally hold a ref to the root vnode, so
701 		 * check for references beyond the one we expect:
702 		 *   smbmntinfo_t -> smi_root
703 		 * Note that NFS does not hold the root vnode.
704 		 */
705 		if (smi->smi_root &&
706 		    smi->smi_root->r_vnode->v_count > 1)
707 			return (EBUSY);
708 	}
709 
710 	/*
711 	 * common code for both forced and non-forced
712 	 *
713 	 * Setting VFS_UNMOUNTED prevents new operations.
714 	 * Operations already underway may continue,
715 	 * but not for long.
716 	 */
717 	vfsp->vfs_flag |= VFS_UNMOUNTED;
718 
719 	/*
720 	 * If we hold the root VP (and we normally do)
721 	 * then it's safe to release it now.
722 	 */
723 	if (smi->smi_root) {
724 		rtnp = smi->smi_root;
725 		smi->smi_root = NULL;
726 		VN_RELE(rtnp->r_vnode);	/* release root vnode */
727 	}
728 
729 	/*
730 	 * Remove all nodes from the node hash tables.
731 	 * This (indirectly) calls: smbfs_addfree, smbinactive,
732 	 * which will try to flush dirty pages, etc. so
733 	 * don't destroy the underlying share just yet.
734 	 *
735 	 * Also, with a forced unmount, some nodes may
736 	 * remain active, and those will get cleaned up
737 	 * after their last vn_rele.
738 	 */
739 	smbfs_destroy_table(vfsp);
740 
741 	/*
742 	 * Shutdown any outstanding I/O requests on this share,
743 	 * and force a tree disconnect.  The share object will
744 	 * continue to hang around until smb_share_rele().
745 	 * This should also cause most active nodes to be
746 	 * released as their operations fail with EIO.
747 	 */
748 	smb_share_kill(smi->smi_share);
749 
750 	/*
751 	 * Any async taskq work should be giving up.
752 	 * Wait for those to exit.
753 	 */
754 	taskq_destroy(smi->smi_taskq);
755 
756 	/*
757 	 * Delete our kstats...
758 	 *
759 	 * Doing it here, rather than waiting until
760 	 * smbfs_freevfs so these are not visible
761 	 * after the unmount.
762 	 */
763 	if (smi->smi_io_kstats) {
764 		kstat_delete(smi->smi_io_kstats);
765 		smi->smi_io_kstats = NULL;
766 	}
767 	if (smi->smi_ro_kstats) {
768 		kstat_delete(smi->smi_ro_kstats);
769 		smi->smi_ro_kstats = NULL;
770 	}
771 
772 	/*
773 	 * The rest happens in smbfs_freevfs()
774 	 */
775 	return (0);
776 }
777 
778 
779 /*
780  * find root of smbfs
781  */
782 static int
783 smbfs_root(vfs_t *vfsp, vnode_t **vpp)
784 {
785 	smbmntinfo_t	*smi;
786 	vnode_t		*vp;
787 
788 	smi = VFTOSMI(vfsp);
789 
790 	if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
791 		return (EPERM);
792 
793 	if (smi->smi_flags & SMI_DEAD || vfsp->vfs_flag & VFS_UNMOUNTED)
794 		return (EIO);
795 
796 	/*
797 	 * The root vp is created in mount and held
798 	 * until unmount, so this is paranoia.
799 	 */
800 	if (smi->smi_root == NULL)
801 		return (EIO);
802 
803 	/* Just take a reference and return it. */
804 	vp = SMBTOV(smi->smi_root);
805 	VN_HOLD(vp);
806 	*vpp = vp;
807 
808 	return (0);
809 }
810 
811 /*
812  * Get file system statistics.
813  */
814 static int
815 smbfs_statvfs(vfs_t *vfsp, statvfs64_t *sbp)
816 {
817 	int		error;
818 	smbmntinfo_t	*smi = VFTOSMI(vfsp);
819 	smb_share_t	*ssp = smi->smi_share;
820 	statvfs64_t	stvfs;
821 	hrtime_t now;
822 	smb_cred_t	scred;
823 
824 	if (curproc->p_zone != smi->smi_zone_ref.zref_zone)
825 		return (EPERM);
826 
827 	if (smi->smi_flags & SMI_DEAD || vfsp->vfs_flag & VFS_UNMOUNTED)
828 		return (EIO);
829 
830 	mutex_enter(&smi->smi_lock);
831 
832 	/*
833 	 * Use cached result if still valid.
834 	 */
835 recheck:
836 	now = gethrtime();
837 	if (now < smi->smi_statfstime) {
838 		error = 0;
839 		goto cache_hit;
840 	}
841 
842 	/*
843 	 * FS attributes are stale, so someone
844 	 * needs to do an OTW call to get them.
845 	 * Serialize here so only one thread
846 	 * does the OTW call.
847 	 */
848 	if (smi->smi_status & SM_STATUS_STATFS_BUSY) {
849 		smi->smi_status |= SM_STATUS_STATFS_WANT;
850 		if (!cv_wait_sig(&smi->smi_statvfs_cv, &smi->smi_lock)) {
851 			mutex_exit(&smi->smi_lock);
852 			return (EINTR);
853 		}
854 		/* Hope status is valid now. */
855 		goto recheck;
856 	}
857 	smi->smi_status |= SM_STATUS_STATFS_BUSY;
858 	mutex_exit(&smi->smi_lock);
859 
860 	/*
861 	 * Do the OTW call.  Note: lock NOT held.
862 	 */
863 	smb_credinit(&scred, NULL);
864 	bzero(&stvfs, sizeof (stvfs));
865 	error = smbfs_smb_statfs(ssp, &stvfs, &scred);
866 	smb_credrele(&scred);
867 	if (error) {
868 		SMBVDEBUG("statfs error=%d\n", error);
869 	} else {
870 
871 		/*
872 		 * Set a few things the OTW call didn't get.
873 		 */
874 		stvfs.f_frsize = stvfs.f_bsize;
875 		stvfs.f_favail = stvfs.f_ffree;
876 		stvfs.f_fsid = (unsigned long)vfsp->vfs_fsid.val[0];
877 		bcopy(fs_type_name, stvfs.f_basetype, FSTYPSZ);
878 		stvfs.f_flag	= vf_to_stf(vfsp->vfs_flag);
879 		stvfs.f_namemax	= smi->smi_fsa.fsa_maxname;
880 
881 		/*
882 		 * Save the result, update lifetime
883 		 */
884 		now = gethrtime();
885 		smi->smi_statfstime = now +
886 		    (SM_MAX_STATFSTIME * (hrtime_t)NANOSEC);
887 		smi->smi_statvfsbuf = stvfs; /* struct assign! */
888 	}
889 
890 	mutex_enter(&smi->smi_lock);
891 	if (smi->smi_status & SM_STATUS_STATFS_WANT)
892 		cv_broadcast(&smi->smi_statvfs_cv);
893 	smi->smi_status &= ~(SM_STATUS_STATFS_BUSY | SM_STATUS_STATFS_WANT);
894 
895 	/*
896 	 * Copy the statvfs data to caller's buf.
897 	 * Note: struct assignment
898 	 */
899 cache_hit:
900 	if (error == 0)
901 		*sbp = smi->smi_statvfsbuf;
902 	mutex_exit(&smi->smi_lock);
903 	return (error);
904 }
905 
906 /*
907  * Flush dirty smbfs files for file system vfsp.
908  * If vfsp == NULL, all smbfs files are flushed.
909  */
910 /*ARGSUSED*/
911 static int
912 smbfs_sync(vfs_t *vfsp, short flag, cred_t *cr)
913 {
914 
915 	/*
916 	 * SYNC_ATTR is used by fsflush() to force old filesystems like UFS
917 	 * to sync metadata, which they would otherwise cache indefinitely.
918 	 * Semantically, the only requirement is that the sync be initiated.
919 	 * Assume the server-side takes care of attribute sync.
920 	 */
921 	if (flag & SYNC_ATTR)
922 		return (0);
923 
924 	if (vfsp == NULL) {
925 		/*
926 		 * Flush ALL smbfs mounts in this zone.
927 		 */
928 		smbfs_flushall(cr);
929 		return (0);
930 	}
931 
932 	smbfs_rflush(vfsp, cr);
933 
934 	return (0);
935 }
936 
937 /*
938  * Initialization routine for VFS routines.  Should only be called once
939  */
940 int
941 smbfs_vfsinit(void)
942 {
943 	return (0);
944 }
945 
946 /*
947  * Shutdown routine for VFS routines.  Should only be called once
948  */
949 void
950 smbfs_vfsfini(void)
951 {
952 }
953 
954 void
955 smbfs_freevfs(vfs_t *vfsp)
956 {
957 	smbmntinfo_t    *smi;
958 
959 	/* free up the resources */
960 	smi = VFTOSMI(vfsp);
961 
962 	/*
963 	 * By this time we should have already deleted the
964 	 * smi kstats in the unmount code.  If they are still around
965 	 * something is wrong
966 	 */
967 	ASSERT(smi->smi_io_kstats == NULL);
968 
969 	smbfs_zonelist_remove(smi);
970 
971 	smbfs_free_smi(smi);
972 
973 	/*
974 	 * Allow _fini() to succeed now, if so desired.
975 	 */
976 	atomic_dec_32(&smbfs_mountcount);
977 }
978 
979 /*
980  * smbfs_mount_label_policy:
981  *	Determine whether the mount is allowed according to MAC check,
982  *	by comparing (where appropriate) label of the remote server
983  *	against the label of the zone being mounted into.
984  *
985  *	Returns:
986  *		 0 :	access allowed
987  *		-1 :	read-only access allowed (i.e., read-down)
988  *		>0 :	error code, such as EACCES
989  *
990  * NB:
991  * NFS supports Cipso labels by parsing the vfs_resource
992  * to see what the Solaris server global zone has shared.
993  * We can't support that for CIFS since resource names
994  * contain share names, not paths.
995  */
996 static int
997 smbfs_mount_label_policy(vfs_t *vfsp, void *ipaddr, int addr_type, cred_t *cr)
998 {
999 	bslabel_t	*server_sl, *mntlabel;
1000 	zone_t		*mntzone = NULL;
1001 	ts_label_t	*zlabel;
1002 	tsol_tpc_t	*tp;
1003 	ts_label_t	*tsl = NULL;
1004 	int		retv;
1005 
1006 	/*
1007 	 * Get the zone's label.  Each zone on a labeled system has a label.
1008 	 */
1009 	mntzone = zone_find_by_any_path(refstr_value(vfsp->vfs_mntpt), B_FALSE);
1010 	zlabel = mntzone->zone_slabel;
1011 	ASSERT(zlabel != NULL);
1012 	label_hold(zlabel);
1013 
1014 	retv = EACCES;				/* assume the worst */
1015 
1016 	/*
1017 	 * Next, get the assigned label of the remote server.
1018 	 */
1019 	tp = find_tpc(ipaddr, addr_type, B_FALSE);
1020 	if (tp == NULL)
1021 		goto out;			/* error getting host entry */
1022 
1023 	if (tp->tpc_tp.tp_doi != zlabel->tsl_doi)
1024 		goto rel_tpc;			/* invalid domain */
1025 	if ((tp->tpc_tp.host_type != UNLABELED))
1026 		goto rel_tpc;			/* invalid hosttype */
1027 
1028 	server_sl = &tp->tpc_tp.tp_def_label;
1029 	mntlabel = label2bslabel(zlabel);
1030 
1031 	/*
1032 	 * Now compare labels to complete the MAC check.  If the labels
1033 	 * are equal or if the requestor is in the global zone and has
1034 	 * NET_MAC_AWARE, then allow read-write access.   (Except for
1035 	 * mounts into the global zone itself; restrict these to
1036 	 * read-only.)
1037 	 *
1038 	 * If the requestor is in some other zone, but their label
1039 	 * dominates the server, then allow read-down.
1040 	 *
1041 	 * Otherwise, access is denied.
1042 	 */
1043 	if (blequal(mntlabel, server_sl) ||
1044 	    (crgetzoneid(cr) == GLOBAL_ZONEID &&
1045 	    getpflags(NET_MAC_AWARE, cr) != 0)) {
1046 		if ((mntzone == global_zone) ||
1047 		    !blequal(mntlabel, server_sl))
1048 			retv = -1;		/* read-only */
1049 		else
1050 			retv = 0;		/* access OK */
1051 	} else if (bldominates(mntlabel, server_sl)) {
1052 		retv = -1;			/* read-only */
1053 	} else {
1054 		retv = EACCES;
1055 	}
1056 
1057 	if (tsl != NULL)
1058 		label_rele(tsl);
1059 
1060 rel_tpc:
1061 	/*LINTED*/
1062 	TPC_RELE(tp);
1063 out:
1064 	if (mntzone)
1065 		zone_rele(mntzone);
1066 	label_rele(zlabel);
1067 	return (retv);
1068 }
1069