xref: /illumos-gate/usr/src/uts/common/fs/ufs/ufs_lockfs.c (revision 7c478bd9)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/types.h>
30 #include <sys/t_lock.h>
31 #include <sys/param.h>
32 #include <sys/time.h>
33 #include <sys/systm.h>
34 #include <sys/sysmacros.h>
35 #include <sys/resource.h>
36 #include <sys/signal.h>
37 #include <sys/cred.h>
38 #include <sys/user.h>
39 #include <sys/buf.h>
40 #include <sys/vfs.h>
41 #include <sys/vnode.h>
42 #include <sys/proc.h>
43 #include <sys/disp.h>
44 #include <sys/file.h>
45 #include <sys/fcntl.h>
46 #include <sys/flock.h>
47 #include <sys/kmem.h>
48 #include <sys/uio.h>
49 #include <sys/conf.h>
50 #include <sys/mman.h>
51 #include <sys/pathname.h>
52 #include <sys/debug.h>
53 #include <sys/vmmeter.h>
54 #include <sys/vmsystm.h>
55 #include <sys/cmn_err.h>
56 #include <sys/vtrace.h>
57 #include <sys/acct.h>
58 #include <sys/dnlc.h>
59 #include <sys/swap.h>
60 
61 #include <sys/fs/ufs_fs.h>
62 #include <sys/fs/ufs_inode.h>
63 #include <sys/fs/ufs_fsdir.h>
64 #include <sys/fs/ufs_trans.h>
65 #include <sys/fs/ufs_panic.h>
66 #include <sys/fs/ufs_mount.h>
67 #include <sys/fs/ufs_bio.h>
68 #include <sys/fs/ufs_log.h>
69 #include <sys/fs/ufs_quota.h>
70 #include <sys/dirent.h>		/* must be AFTER <sys/fs/fsdir.h>! */
71 #include <sys/errno.h>
72 #include <sys/sysinfo.h>
73 
74 #include <vm/hat.h>
75 #include <vm/pvn.h>
76 #include <vm/as.h>
77 #include <vm/seg.h>
78 #include <vm/seg_map.h>
79 #include <vm/seg_vn.h>
80 #include <vm/rm.h>
81 #include <vm/anon.h>
82 #include <sys/swap.h>
83 #include <sys/dnlc.h>
84 
85 extern struct vnode *common_specvp(struct vnode *vp);
86 
87 /* error lock status */
88 #define	UN_ERRLCK	(-1)
89 #define	SET_ERRLCK	1
90 #define	RE_ERRLCK	2
91 #define	NO_ERRLCK	0
92 
93 /*
94  * Index to be used in TSD for storing lockfs data
95  */
96 uint_t ufs_lockfs_key;
97 
98 typedef struct _ulockfs_info {
99 	struct _ulockfs_info *next;
100 	struct ulockfs *ulp;
101 } ulockfs_info_t;
102 
103 /*
104  * Check in TSD that whether we are already doing any VOP on this filesystem
105  */
106 #define	IS_REC_VOP(found, head, ulp, free)		\
107 {							\
108 	ulockfs_info_t *_curr;				\
109 							\
110 	for (found = 0, free = NULL, _curr = head;	\
111 	    _curr != NULL; _curr = _curr->next) {	\
112 		if ((free == NULL) &&			\
113 		    (_curr->ulp == NULL))		\
114 			free = _curr;			\
115 		if (_curr->ulp == ulp) {		\
116 			found = 1;			\
117 			break;				\
118 		}					\
119 	}						\
120 }
121 
122 /*
123  * Get the lockfs data from TSD so that lockfs handles the recursive VOP
124  * properly
125  */
126 #define	SEARCH_ULOCKFSP(head, ulp, info)		\
127 {							\
128 	ulockfs_info_t *_curr;				\
129 							\
130 	for (_curr = head; _curr != NULL;		\
131 	    _curr = _curr->next) {			\
132 		if (_curr->ulp == ulp) {		\
133 			break;				\
134 		}					\
135 	}						\
136 							\
137 	info = _curr;					\
138 }
139 
140 /*
141  * Validate lockfs request
142  */
143 static int
144 ufs_getlfd(
145 	struct lockfs *lockfsp,		/* new lock request */
146 	struct lockfs *ul_lockfsp)	/* old lock state */
147 {
148 	int	error = 0;
149 
150 	/*
151 	 * no input flags defined
152 	 */
153 	if (lockfsp->lf_flags != 0) {
154 		error = EINVAL;
155 		goto errout;
156 	}
157 
158 	/*
159 	 * check key
160 	 */
161 	if (!LOCKFS_IS_ULOCK(ul_lockfsp))
162 		if (lockfsp->lf_key != ul_lockfsp->lf_key) {
163 			error = EINVAL;
164 			goto errout;
165 	}
166 
167 	lockfsp->lf_key = ul_lockfsp->lf_key + 1;
168 
169 errout:
170 	return (error);
171 }
172 
173 /*
174  * ufs_checkaccton
175  *	check if accounting is turned on on this fs
176  */
177 
178 int
179 ufs_checkaccton(struct vnode *vp)
180 {
181 	if (acct_fs_in_use(vp))
182 		return (EDEADLK);
183 	return (0);
184 }
185 
186 /*
187  * ufs_checkswapon
188  *	check if local swapping is to file on this fs
189  */
190 int
191 ufs_checkswapon(struct vnode *vp)
192 {
193 	struct swapinfo	*sip;
194 
195 	mutex_enter(&swapinfo_lock);
196 	for (sip = swapinfo; sip; sip = sip->si_next)
197 		if (sip->si_vp->v_vfsp == vp->v_vfsp) {
198 			mutex_exit(&swapinfo_lock);
199 			return (EDEADLK);
200 		}
201 	mutex_exit(&swapinfo_lock);
202 	return (0);
203 }
204 
205 /*
206  * ufs_freeze
207  *	pend future accesses for current lock and desired lock
208  */
209 void
210 ufs_freeze(struct ulockfs *ulp, struct lockfs *lockfsp)
211 {
212 	/*
213 	 * set to new lock type
214 	 */
215 	ulp->ul_lockfs.lf_lock = lockfsp->lf_lock;
216 	ulp->ul_lockfs.lf_key = lockfsp->lf_key;
217 	ulp->ul_lockfs.lf_comlen = lockfsp->lf_comlen;
218 	ulp->ul_lockfs.lf_comment = lockfsp->lf_comment;
219 
220 	ulp->ul_fs_lock = (1 << ulp->ul_lockfs.lf_lock);
221 }
222 
223 /*
224  * ufs_quiesce
225  *	wait for outstanding accesses to finish
226  */
227 int
228 ufs_quiesce(struct ulockfs *ulp)
229 {
230 	int error = 0;
231 
232 	/*
233 	 * Set a softlock to suspend future ufs_vnops so that
234 	 * this lockfs request will not be starved
235 	 */
236 	ULOCKFS_SET_SLOCK(ulp);
237 
238 	/* check if there is any outstanding ufs vnodeops calls */
239 	while (ulp->ul_vnops_cnt)
240 		if (!cv_wait_sig(&ulp->ul_cv, &ulp->ul_lock)) {
241 			error = EINTR;
242 			goto out;
243 		}
244 
245 out:
246 	/*
247 	 * unlock the soft lock
248 	 */
249 	ULOCKFS_CLR_SLOCK(ulp);
250 
251 	return (error);
252 }
253 /*
254  * ufs_flush_inode
255  */
256 int
257 ufs_flush_inode(struct inode *ip, void *arg)
258 {
259 	int	error;
260 	int	saverror	= 0;
261 
262 	/*
263 	 * wrong file system; keep looking
264 	 */
265 	if (ip->i_ufsvfs != (struct ufsvfs *)arg)
266 		return (0);
267 
268 	/*
269 	 * asynchronously push all the dirty pages
270 	 */
271 	if (((error = TRANS_SYNCIP(ip, B_ASYNC, 0, TOP_SYNCIP_FLUSHI)) != 0) &&
272 	    (error != EAGAIN))
273 		saverror = error;
274 	/*
275 	 * wait for io and discard all mappings
276 	 */
277 	if (error = TRANS_SYNCIP(ip, B_INVAL, 0, TOP_SYNCIP_FLUSHI))
278 		saverror = error;
279 
280 	if (ITOV(ip)->v_type == VDIR) {
281 		dnlc_dir_purge(&ip->i_danchor);
282 	}
283 
284 	return (saverror);
285 }
286 
287 /*
288  * ufs_flush
289  *	Flush everything that is currently dirty; this includes invalidating
290  *	any mappings.
291  */
292 int
293 ufs_flush(struct vfs *vfsp)
294 {
295 	int		error;
296 	int		saverror = 0;
297 	struct ufsvfs	*ufsvfsp	= (struct ufsvfs *)vfsp->vfs_data;
298 	struct fs	*fs		= ufsvfsp->vfs_fs;
299 
300 	ASSERT(vfs_lock_held(vfsp));
301 
302 	/*
303 	 * purge dnlc
304 	 */
305 	(void) dnlc_purge_vfsp(vfsp, 0);
306 
307 	/*
308 	 * drain the delete and idle threads
309 	 */
310 	ufs_delete_drain(vfsp, 0, 0);
311 	ufs_idle_drain(vfsp);
312 
313 	/*
314 	 * flush and invalidate quota records
315 	 */
316 	(void) qsync(ufsvfsp);
317 
318 	/*
319 	 * flush w/invalidate the inodes for vfsp
320 	 */
321 	if (error = ufs_scan_inodes(0, ufs_flush_inode, ufsvfsp, ufsvfsp))
322 		saverror = error;
323 
324 	/*
325 	 * synchronously flush superblock and summary info
326 	 */
327 	if (fs->fs_ronly == 0 && fs->fs_fmod) {
328 		fs->fs_fmod = 0;
329 		TRANS_SBUPDATE(ufsvfsp, vfsp, TOP_SBUPDATE_FLUSH);
330 	}
331 	/*
332 	 * flush w/invalidate block device pages and buf cache
333 	 */
334 	if ((error = VOP_PUTPAGE(common_specvp(ufsvfsp->vfs_devvp),
335 	    (offset_t)0, 0, B_INVAL, CRED())) > 0)
336 		saverror = error;
337 
338 	(void) bflush((dev_t)vfsp->vfs_dev);
339 	(void) bfinval((dev_t)vfsp->vfs_dev, 0);
340 
341 	/*
342 	 * drain the delete and idle threads again
343 	 */
344 	ufs_delete_drain(vfsp, 0, 0);
345 	ufs_idle_drain(vfsp);
346 
347 	/*
348 	 * play with the clean flag
349 	 */
350 	if (saverror == 0)
351 		ufs_checkclean(vfsp);
352 
353 	/*
354 	 * flush any outstanding transactions and roll the log
355 	 */
356 	if (TRANS_ISTRANS(ufsvfsp)) {
357 		curthread->t_flag |= T_DONTBLOCK;
358 		TRANS_BEGIN_SYNC(ufsvfsp, TOP_COMMIT_FLUSH, TOP_COMMIT_SIZE,
359 		    error);
360 		if (!error) {
361 			TRANS_END_SYNC(ufsvfsp, saverror, TOP_COMMIT_FLUSH,
362 			    TOP_COMMIT_SIZE);
363 		}
364 		curthread->t_flag &= ~T_DONTBLOCK;
365 
366 		logmap_roll_dev(ufsvfsp->vfs_log); /* fully roll the log */
367 	}
368 
369 	return (saverror);
370 }
371 
372 /*
373  * ufs_thaw_wlock
374  *	special processing when thawing down to wlock
375  */
376 static int
377 ufs_thaw_wlock(struct inode *ip, void *arg)
378 {
379 	/*
380 	 * wrong file system; keep looking
381 	 */
382 	if (ip->i_ufsvfs != (struct ufsvfs *)arg)
383 		return (0);
384 
385 	/*
386 	 * iupdat refuses to clear flags if the fs is read only.  The fs
387 	 * may become read/write during the lock and we wouldn't want
388 	 * these inodes being written to disk.  So clear the flags.
389 	 */
390 	rw_enter(&ip->i_contents, RW_WRITER);
391 	ip->i_flag &= ~(IMOD|IMODACC|IACC|IUPD|ICHG|IATTCHG);
392 	rw_exit(&ip->i_contents);
393 
394 	/*
395 	 * pages are mlocked -- fail wlock
396 	 */
397 	if (ITOV(ip)->v_type != VCHR && vn_has_cached_data(ITOV(ip)))
398 		return (EBUSY);
399 
400 	return (0);
401 }
402 
403 /*
404  * ufs_thaw_hlock
405  *	special processing when thawing down to hlock or elock
406  */
407 static int
408 ufs_thaw_hlock(struct inode *ip, void *arg)
409 {
410 	struct vnode	*vp	= ITOV(ip);
411 
412 	/*
413 	 * wrong file system; keep looking
414 	 */
415 	if (ip->i_ufsvfs != (struct ufsvfs *)arg)
416 		return (0);
417 
418 	/*
419 	 * blow away all pages - even if they are mlocked
420 	 */
421 	do {
422 		(void) TRANS_SYNCIP(ip, B_INVAL | B_FORCE, 0, TOP_SYNCIP_HLOCK);
423 	} while ((vp->v_type != VCHR) && vn_has_cached_data(vp));
424 	rw_enter(&ip->i_contents, RW_WRITER);
425 	ip->i_flag &= ~(IMOD|IMODACC|IACC|IUPD|ICHG|IATTCHG);
426 	rw_exit(&ip->i_contents);
427 
428 	return (0);
429 }
430 
431 /*
432  * ufs_thaw
433  *	thaw file system lock down to current value
434  */
435 int
436 ufs_thaw(struct vfs *vfsp, struct ufsvfs *ufsvfsp, struct ulockfs *ulp)
437 {
438 	int		error	= 0;
439 	int		noidel	= (int)(ulp->ul_flag & ULOCKFS_NOIDEL);
440 
441 	/*
442 	 * if wlock or hlock or elock
443 	 */
444 	if (ULOCKFS_IS_WLOCK(ulp) || ULOCKFS_IS_HLOCK(ulp) ||
445 	    ULOCKFS_IS_ELOCK(ulp)) {
446 
447 		/*
448 		 * don't keep access times
449 		 * don't free deleted files
450 		 * if superblock writes are allowed, limit them to me for now
451 		 */
452 		ulp->ul_flag |= (ULOCKFS_NOIACC|ULOCKFS_NOIDEL);
453 		if (ulp->ul_sbowner != (kthread_id_t)-1)
454 			ulp->ul_sbowner = curthread;
455 
456 		/*
457 		 * wait for writes for deleted files and superblock updates
458 		 */
459 		(void) ufs_flush(vfsp);
460 
461 		/*
462 		 * now make sure the quota file is up-to-date
463 		 *	expensive; but effective
464 		 */
465 		error = ufs_flush(vfsp);
466 		/*
467 		 * no one can write the superblock
468 		 */
469 		ulp->ul_sbowner = (kthread_id_t)-1;
470 
471 		/*
472 		 * special processing for wlock/hlock/elock
473 		 */
474 		if (ULOCKFS_IS_WLOCK(ulp)) {
475 			if (error)
476 				goto errout;
477 			error = bfinval(ufsvfsp->vfs_dev, 0);
478 			if (error)
479 				goto errout;
480 			error = ufs_scan_inodes(0, ufs_thaw_wlock,
481 					(void *)ufsvfsp, ufsvfsp);
482 			if (error)
483 				goto errout;
484 		}
485 		if (ULOCKFS_IS_HLOCK(ulp) || ULOCKFS_IS_ELOCK(ulp)) {
486 			error = 0;
487 			(void) ufs_scan_inodes(0, ufs_thaw_hlock,
488 					(void *)ufsvfsp, ufsvfsp);
489 			(void) bfinval(ufsvfsp->vfs_dev, 1);
490 		}
491 	} else {
492 
493 		/*
494 		 * okay to keep access times
495 		 * okay to free deleted files
496 		 * okay to write the superblock
497 		 */
498 		ulp->ul_flag &= ~(ULOCKFS_NOIACC|ULOCKFS_NOIDEL);
499 		ulp->ul_sbowner = NULL;
500 
501 		/*
502 		 * flush in case deleted files are in memory
503 		 */
504 		if (noidel) {
505 			if (error = ufs_flush(vfsp))
506 				goto errout;
507 		}
508 	}
509 
510 errout:
511 	cv_broadcast(&ulp->ul_cv);
512 	return (error);
513 }
514 
515 /*
516  * ufs_reconcile_fs
517  *	reconcile incore superblock with ondisk superblock
518  */
519 int
520 ufs_reconcile_fs(struct vfs *vfsp, struct ufsvfs *ufsvfsp, int errlck)
521 {
522 	struct fs	*mfs; 	/* in-memory superblock */
523 	struct fs	*dfs;	/* on-disk   superblock */
524 	struct buf	*bp;	/* on-disk   superblock buf */
525 	int		 needs_unlock;
526 	char		 finished_fsclean;
527 
528 	mfs = ufsvfsp->vfs_fs;
529 
530 	/*
531 	 * get the on-disk copy of the superblock
532 	 */
533 	bp = UFS_BREAD(ufsvfsp, vfsp->vfs_dev, SBLOCK, SBSIZE);
534 	bp->b_flags |= (B_STALE|B_AGE);
535 	if (bp->b_flags & B_ERROR) {
536 		brelse(bp);
537 		return (EIO);
538 	}
539 	dfs = bp->b_un.b_fs;
540 
541 	/* error locks may only unlock after the fs has been made consistent */
542 	if (errlck == UN_ERRLCK) {
543 		if (dfs->fs_clean == FSFIX) {	/* being repaired */
544 			brelse(bp);
545 			return (EAGAIN);
546 		}
547 		/* repair not yet started? */
548 		finished_fsclean = TRANS_ISTRANS(ufsvfsp)? FSLOG: FSCLEAN;
549 		if (dfs->fs_clean != finished_fsclean) {
550 			brelse(bp);
551 			return (EBUSY);
552 		}
553 	}
554 
555 	/*
556 	 * if superblock has changed too much, abort
557 	 */
558 	if ((mfs->fs_sblkno		!= dfs->fs_sblkno) ||
559 	    (mfs->fs_cblkno		!= dfs->fs_cblkno) ||
560 	    (mfs->fs_iblkno		!= dfs->fs_iblkno) ||
561 	    (mfs->fs_dblkno		!= dfs->fs_dblkno) ||
562 	    (mfs->fs_cgoffset		!= dfs->fs_cgoffset) ||
563 	    (mfs->fs_cgmask		!= dfs->fs_cgmask) ||
564 	    (mfs->fs_bsize		!= dfs->fs_bsize) ||
565 	    (mfs->fs_fsize		!= dfs->fs_fsize) ||
566 	    (mfs->fs_frag		!= dfs->fs_frag) ||
567 	    (mfs->fs_bmask		!= dfs->fs_bmask) ||
568 	    (mfs->fs_fmask		!= dfs->fs_fmask) ||
569 	    (mfs->fs_bshift		!= dfs->fs_bshift) ||
570 	    (mfs->fs_fshift		!= dfs->fs_fshift) ||
571 	    (mfs->fs_fragshift		!= dfs->fs_fragshift) ||
572 	    (mfs->fs_fsbtodb		!= dfs->fs_fsbtodb) ||
573 	    (mfs->fs_sbsize		!= dfs->fs_sbsize) ||
574 	    (mfs->fs_nindir		!= dfs->fs_nindir) ||
575 	    (mfs->fs_nspf		!= dfs->fs_nspf) ||
576 	    (mfs->fs_trackskew		!= dfs->fs_trackskew) ||
577 	    (mfs->fs_cgsize		!= dfs->fs_cgsize) ||
578 	    (mfs->fs_ntrak		!= dfs->fs_ntrak) ||
579 	    (mfs->fs_nsect		!= dfs->fs_nsect) ||
580 	    (mfs->fs_spc		!= dfs->fs_spc) ||
581 	    (mfs->fs_cpg		!= dfs->fs_cpg) ||
582 	    (mfs->fs_ipg		!= dfs->fs_ipg) ||
583 	    (mfs->fs_fpg		!= dfs->fs_fpg) ||
584 	    (mfs->fs_postblformat	!= dfs->fs_postblformat) ||
585 	    (mfs->fs_magic		!= dfs->fs_magic)) {
586 		brelse(bp);
587 		return (EACCES);
588 	}
589 	if (dfs->fs_clean == FSBAD || FSOKAY != dfs->fs_state + dfs->fs_time)
590 		if (mfs->fs_clean == FSLOG) {
591 			brelse(bp);
592 			return (EACCES);
593 		}
594 
595 	/*
596 	 * get new summary info
597 	 */
598 	if (ufs_getsummaryinfo(vfsp->vfs_dev, ufsvfsp, dfs)) {
599 		brelse(bp);
600 		return (EIO);
601 	}
602 
603 	/*
604 	 * release old summary info and update in-memory superblock
605 	 */
606 	kmem_free(mfs->fs_u.fs_csp, mfs->fs_cssize);
607 	mfs->fs_u.fs_csp = dfs->fs_u.fs_csp;	/* Only entry 0 used */
608 
609 	/*
610 	 * update fields allowed to change
611 	 */
612 	mfs->fs_size		= dfs->fs_size;
613 	mfs->fs_dsize		= dfs->fs_dsize;
614 	mfs->fs_ncg		= dfs->fs_ncg;
615 	mfs->fs_minfree		= dfs->fs_minfree;
616 	mfs->fs_rotdelay	= dfs->fs_rotdelay;
617 	mfs->fs_rps		= dfs->fs_rps;
618 	mfs->fs_maxcontig	= dfs->fs_maxcontig;
619 	mfs->fs_maxbpg		= dfs->fs_maxbpg;
620 	mfs->fs_csmask		= dfs->fs_csmask;
621 	mfs->fs_csshift		= dfs->fs_csshift;
622 	mfs->fs_optim		= dfs->fs_optim;
623 	mfs->fs_csaddr		= dfs->fs_csaddr;
624 	mfs->fs_cssize		= dfs->fs_cssize;
625 	mfs->fs_ncyl		= dfs->fs_ncyl;
626 	mfs->fs_cstotal		= dfs->fs_cstotal;
627 	mfs->fs_reclaim		= dfs->fs_reclaim;
628 
629 	if (mfs->fs_reclaim & (FS_RECLAIM|FS_RECLAIMING)) {
630 		mfs->fs_reclaim &= ~FS_RECLAIM;
631 		mfs->fs_reclaim |=  FS_RECLAIMING;
632 		ufs_thread_start(&ufsvfsp->vfs_reclaim,
633 			ufs_thread_reclaim, vfsp);
634 	}
635 
636 	/* XXX What to do about sparecon? */
637 
638 	/* XXX need to copy volume label */
639 
640 	/*
641 	 * ondisk clean flag overrides inmemory clean flag iff == FSBAD
642 	 * or if error-locked and ondisk is now clean
643 	 */
644 	needs_unlock = !MUTEX_HELD(&ufsvfsp->vfs_lock);
645 	if (needs_unlock)
646 		mutex_enter(&ufsvfsp->vfs_lock);
647 
648 	if (errlck == UN_ERRLCK) {
649 		if (finished_fsclean == dfs->fs_clean)
650 			mfs->fs_clean = finished_fsclean;
651 		else
652 			mfs->fs_clean = FSBAD;
653 		mfs->fs_state = FSOKAY - dfs->fs_time;
654 	}
655 
656 	if (FSOKAY != dfs->fs_state + dfs->fs_time ||
657 	    (dfs->fs_clean == FSBAD))
658 		mfs->fs_clean = FSBAD;
659 
660 	if (needs_unlock)
661 		mutex_exit(&ufsvfsp->vfs_lock);
662 
663 	brelse(bp);
664 
665 	return (0);
666 }
667 
668 /*
669  * ufs_reconcile_inode
670  *	reconcile ondisk inode with incore inode
671  */
672 static int
673 ufs_reconcile_inode(struct inode *ip, void *arg)
674 {
675 	int		i;
676 	int		ndaddr;
677 	int		niaddr;
678 	struct dinode	*dp;		/* ondisk inode */
679 	struct buf	*bp	= NULL;
680 	uid_t		d_uid;
681 	gid_t		d_gid;
682 	int		error = 0;
683 	struct fs	*fs;
684 
685 	/*
686 	 * not an inode we care about
687 	 */
688 	if (ip->i_ufsvfs != (struct ufsvfs *)arg)
689 		return (0);
690 
691 	fs = ip->i_fs;
692 
693 	/*
694 	 * Inode reconciliation fails: we made the filesystem quiescent
695 	 * and we did a ufs_flush() before calling ufs_reconcile_inode()
696 	 * and thus the inode should not have been changed inbetween.
697 	 * Any discrepancies indicate a logic error and a pretty
698 	 * significant run-state inconsistency we should complain about.
699 	 */
700 	if (ip->i_flag & (IMOD|IMODACC|IACC|IUPD|ICHG|IATTCHG)) {
701 		cmn_err(CE_WARN, "%s: Inode reconciliation failed for"
702 		    "inode %llu", fs->fs_fsmnt, (u_longlong_t)ip->i_number);
703 		return (EINVAL);
704 	}
705 
706 	/*
707 	 * get the dinode
708 	 */
709 	bp = UFS_BREAD(ip->i_ufsvfs,
710 			ip->i_dev, (daddr_t)fsbtodb(fs, itod(fs, ip->i_number)),
711 	    (int)fs->fs_bsize);
712 	if (bp->b_flags & B_ERROR) {
713 		brelse(bp);
714 		return (EIO);
715 	}
716 	dp  = bp->b_un.b_dino;
717 	dp += itoo(fs, ip->i_number);
718 
719 	/*
720 	 * handle Sun's implementation of EFT
721 	 */
722 	d_uid = (dp->di_suid == UID_LONG) ? dp->di_uid : (uid_t)dp->di_suid;
723 	d_gid = (dp->di_sgid == GID_LONG) ? dp->di_gid : (uid_t)dp->di_sgid;
724 
725 	rw_enter(&ip->i_contents, RW_WRITER);
726 
727 	/*
728 	 * some fields are not allowed to change
729 	 */
730 	if ((ip->i_mode  != dp->di_mode) ||
731 	    (ip->i_gen   != dp->di_gen) ||
732 	    (ip->i_uid   != d_uid) ||
733 	    (ip->i_gid   != d_gid)) {
734 		error = EACCES;
735 		goto out;
736 	}
737 
738 	/*
739 	 * and some are allowed to change
740 	 */
741 	ip->i_size		= dp->di_size;
742 	ip->i_ic.ic_flags	= dp->di_ic.ic_flags;
743 	ip->i_blocks		= dp->di_blocks;
744 	ip->i_nlink		= dp->di_nlink;
745 	if (ip->i_flag & IFASTSYMLNK) {
746 		ndaddr = 1;
747 		niaddr = 0;
748 	} else {
749 		ndaddr = NDADDR;
750 		niaddr = NIADDR;
751 	}
752 	for (i = 0; i < ndaddr; ++i)
753 		ip->i_db[i] = dp->di_db[i];
754 	for (i = 0; i < niaddr; ++i)
755 		ip->i_ib[i] = dp->di_ib[i];
756 
757 out:
758 	rw_exit(&ip->i_contents);
759 	brelse(bp);
760 	return (error);
761 }
762 
763 /*
764  * ufs_reconcile
765  *	reconcile ondisk superblock/inodes with any incore
766  */
767 static int
768 ufs_reconcile(struct vfs *vfsp, struct ufsvfs *ufsvfsp, int errlck)
769 {
770 	int	error = 0;
771 
772 	/*
773 	 * get rid of as much inmemory data as possible
774 	 */
775 	(void) ufs_flush(vfsp);
776 
777 	/*
778 	 * reconcile the superblock and inodes
779 	 */
780 	if (error = ufs_reconcile_fs(vfsp, ufsvfsp, errlck))
781 		return (error);
782 	if (error = ufs_scan_inodes(0, ufs_reconcile_inode, ufsvfsp, ufsvfsp))
783 		return (error);
784 	/*
785 	 * allocation blocks may be incorrect; get rid of them
786 	 */
787 	(void) ufs_flush(vfsp);
788 
789 	return (error);
790 }
791 
792 /*
793  * File system locking
794  */
795 int
796 ufs_fiolfs(struct vnode *vp, struct lockfs *lockfsp, int from_log)
797 {
798 	return (ufs__fiolfs(vp, lockfsp, /* from_user */ 1, from_log));
799 }
800 
801 /* kernel-internal interface, also used by fix-on-panic */
802 int
803 ufs__fiolfs(
804 	struct vnode *vp,
805 	struct lockfs *lockfsp,
806 	int from_user,
807 	int from_log)
808 {
809 	struct ulockfs	*ulp;
810 	struct lockfs	lfs;
811 	int		error;
812 	struct vfs	*vfsp;
813 	struct ufsvfs	*ufsvfsp;
814 	int		 errlck		= NO_ERRLCK;
815 	int		 poll_events	= POLLPRI;
816 	extern struct pollhead ufs_pollhd;
817 
818 	/* check valid lock type */
819 	if (!lockfsp || lockfsp->lf_lock > LOCKFS_MAXLOCK)
820 		return (EINVAL);
821 
822 	if (!vp || !vp->v_vfsp || !vp->v_vfsp->vfs_data)
823 		return (EIO);
824 
825 	vfsp = vp->v_vfsp;
826 	ufsvfsp = (struct ufsvfs *)vfsp->vfs_data;
827 	ulp = &ufsvfsp->vfs_ulockfs;
828 
829 	/*
830 	 * Suspend both the reclaim thread and the delete thread.
831 	 * This must be done outside the lockfs locking protocol.
832 	 */
833 	ufs_thread_suspend(&ufsvfsp->vfs_reclaim);
834 	ufs_thread_suspend(&ufsvfsp->vfs_delete);
835 
836 	/*
837 	 * Acquire vfs_reflock around ul_lock to avoid deadlock with
838 	 * umount/remount/sync.
839 	 */
840 	vfs_lock_wait(vfsp);
841 	mutex_enter(&ulp->ul_lock);
842 
843 	/*
844 	 * Quit if there is another lockfs request in progress
845 	 * that is waiting for existing ufs_vnops to complete.
846 	 */
847 	if (ULOCKFS_IS_BUSY(ulp)) {
848 		error = EBUSY;
849 		goto errexit;
850 	}
851 
852 	/* cannot ulocked or downgrade a hard-lock */
853 	if (ULOCKFS_IS_HLOCK(ulp)) {
854 		error = EIO;
855 		goto errexit;
856 	}
857 
858 	/* an error lock may be unlocked or relocked, only */
859 	if (ULOCKFS_IS_ELOCK(ulp)) {
860 		if (!LOCKFS_IS_ULOCK(lockfsp) && !LOCKFS_IS_ELOCK(lockfsp)) {
861 			error = EBUSY;
862 			goto errexit;
863 		}
864 	}
865 
866 	/*
867 	 * a read-only error lock may only be upgraded to an
868 	 * error lock or hard lock
869 	 */
870 	if (ULOCKFS_IS_ROELOCK(ulp)) {
871 		if (!LOCKFS_IS_HLOCK(lockfsp) && !LOCKFS_IS_ELOCK(lockfsp)) {
872 			error = EBUSY;
873 			goto errexit;
874 		}
875 	}
876 
877 	/*
878 	 * until read-only error locks are fully implemented
879 	 * just return EINVAL
880 	 */
881 	if (LOCKFS_IS_ROELOCK(lockfsp)) {
882 		error = EINVAL;
883 		goto errexit;
884 	}
885 
886 	/*
887 	 * an error lock may only be applied if the file system is
888 	 * unlocked or already error locked.
889 	 * (this is to prevent the case where a fs gets changed out from
890 	 * underneath a fs that is locked for backup,
891 	 * that is, name/delete/write-locked.)
892 	 */
893 	if ((!ULOCKFS_IS_ULOCK(ulp) && !ULOCKFS_IS_ELOCK(ulp) &&
894 	    !ULOCKFS_IS_ROELOCK(ulp)) &&
895 	    (LOCKFS_IS_ELOCK(lockfsp) || LOCKFS_IS_ROELOCK(lockfsp))) {
896 		error = EBUSY;
897 		goto errexit;
898 	}
899 
900 	/* get and validate the input lockfs request */
901 	if (error = ufs_getlfd(lockfsp, &ulp->ul_lockfs))
902 		goto errexit;
903 
904 	/*
905 	 * save current ulockfs struct
906 	 */
907 	bcopy(&ulp->ul_lockfs, &lfs, sizeof (struct lockfs));
908 
909 	/*
910 	 * Freeze the file system (pend future accesses)
911 	 */
912 	ufs_freeze(ulp, lockfsp);
913 
914 	/*
915 	 * Set locking in progress because ufs_quiesce may free the
916 	 * ul_lock mutex.
917 	 */
918 	ULOCKFS_SET_BUSY(ulp);
919 	/* update the ioctl copy */
920 	LOCKFS_SET_BUSY(&ulp->ul_lockfs);
921 
922 	/*
923 	 * Quiesce (wait for outstanding accesses to finish)
924 	 */
925 	if (error = ufs_quiesce(ulp))
926 		goto errout;
927 
928 	/*
929 	 * can't wlock or (ro)elock fs with accounting or local swap file
930 	 */
931 	if ((ULOCKFS_IS_WLOCK(ulp) || ULOCKFS_IS_ELOCK(ulp) ||
932 	    ULOCKFS_IS_ROELOCK(ulp)) && !from_log) {
933 		if (error = ufs_checkaccton(vp))
934 			goto errout;
935 		if (error = ufs_checkswapon(vp))
936 			goto errout;
937 	}
938 
939 	/*
940 	 * save error lock status to pass down to reconcilation
941 	 * routines and for later cleanup
942 	 */
943 	if (LOCKFS_IS_ELOCK(&lfs) && ULOCKFS_IS_ULOCK(ulp))
944 		errlck = UN_ERRLCK;
945 
946 	if (ULOCKFS_IS_ELOCK(ulp) || ULOCKFS_IS_ROELOCK(ulp)) {
947 		int needs_unlock;
948 		int needs_sbwrite;
949 
950 		poll_events |= POLLERR;
951 		errlck = LOCKFS_IS_ELOCK(&lfs) || LOCKFS_IS_ROELOCK(&lfs)?
952 							RE_ERRLCK: SET_ERRLCK;
953 
954 		needs_unlock = !MUTEX_HELD(&ufsvfsp->vfs_lock);
955 		if (needs_unlock)
956 			mutex_enter(&ufsvfsp->vfs_lock);
957 
958 		/* disable delayed i/o */
959 		needs_sbwrite = 0;
960 
961 		if (errlck == SET_ERRLCK) {
962 			ufsvfsp->vfs_fs->fs_clean = FSBAD;
963 			needs_sbwrite = 1;
964 		}
965 
966 		needs_sbwrite |= ufsvfsp->vfs_dio;
967 		ufsvfsp->vfs_dio = 0;
968 
969 		if (needs_unlock)
970 			mutex_exit(&ufsvfsp->vfs_lock);
971 
972 		if (needs_sbwrite) {
973 			ulp->ul_sbowner = curthread;
974 			TRANS_SBWRITE(ufsvfsp, TOP_SBWRITE_STABLE);
975 
976 			if (needs_unlock)
977 				mutex_enter(&ufsvfsp->vfs_lock);
978 
979 			ufsvfsp->vfs_fs->fs_fmod = 0;
980 
981 			if (needs_unlock)
982 				mutex_exit(&ufsvfsp->vfs_lock);
983 		}
984 	}
985 
986 	/*
987 	 * reconcile superblock and inodes if was wlocked
988 	 */
989 	if (LOCKFS_IS_WLOCK(&lfs) || LOCKFS_IS_ELOCK(&lfs)) {
990 		if (error = ufs_reconcile(vfsp, ufsvfsp, errlck))
991 			goto errout;
992 		/*
993 		 * in case the fs grew; reset the metadata map for logging tests
994 		 */
995 		TRANS_MATA_UMOUNT(ufsvfsp);
996 		TRANS_MATA_MOUNT(ufsvfsp);
997 		TRANS_MATA_SI(ufsvfsp, ufsvfsp->vfs_fs);
998 	}
999 
1000 	/*
1001 	 * At least everything *currently* dirty goes out.
1002 	 */
1003 
1004 	if ((error = ufs_flush(vfsp)) != 0 && !ULOCKFS_IS_HLOCK(ulp) &&
1005 	    !ULOCKFS_IS_ELOCK(ulp))
1006 		goto errout;
1007 
1008 	/*
1009 	 * thaw file system and wakeup pended processes
1010 	 */
1011 	if (error = ufs_thaw(vfsp, ufsvfsp, ulp))
1012 		if (!ULOCKFS_IS_HLOCK(ulp) && !ULOCKFS_IS_ELOCK(ulp))
1013 			goto errout;
1014 
1015 	/*
1016 	 * reset modified flag if not already write locked
1017 	 */
1018 	if (!LOCKFS_IS_WLOCK(&lfs))
1019 		ULOCKFS_CLR_MOD(ulp);
1020 
1021 	/*
1022 	 * idle the lock struct
1023 	 */
1024 	ULOCKFS_CLR_BUSY(ulp);
1025 	/* update the ioctl copy */
1026 	LOCKFS_CLR_BUSY(&ulp->ul_lockfs);
1027 
1028 	/*
1029 	 * free current comment
1030 	 */
1031 	if (lfs.lf_comment && lfs.lf_comlen != 0) {
1032 		kmem_free(lfs.lf_comment, lfs.lf_comlen);
1033 		lfs.lf_comment = NULL;
1034 		lfs.lf_comlen = 0;
1035 	}
1036 
1037 	/* do error lock cleanup */
1038 	if (errlck == UN_ERRLCK)
1039 		ufsfx_unlockfs(ufsvfsp);
1040 
1041 	else if (errlck == RE_ERRLCK)
1042 		ufsfx_lockfs(ufsvfsp);
1043 
1044 	/* don't allow error lock from user to invoke panic */
1045 	else if (from_user && errlck == SET_ERRLCK &&
1046 		!(ufsvfsp->vfs_fsfx.fx_flags & (UFSMNT_ONERROR_PANIC >> 4)))
1047 		(void) ufs_fault(ufsvfsp->vfs_root,
1048 		    ulp->ul_lockfs.lf_comment && ulp->ul_lockfs.lf_comlen > 0 ?
1049 		    ulp->ul_lockfs.lf_comment: "user-applied error lock");
1050 
1051 	mutex_exit(&ulp->ul_lock);
1052 	vfs_unlock(vfsp);
1053 
1054 	if (ULOCKFS_IS_HLOCK(&ufsvfsp->vfs_ulockfs))
1055 		poll_events |= POLLERR;
1056 
1057 	pollwakeup(&ufs_pollhd, poll_events);
1058 
1059 	/*
1060 	 * Allow both the delete thread and the reclaim thread to
1061 	 * continue.
1062 	 */
1063 	ufs_thread_continue(&ufsvfsp->vfs_delete);
1064 	ufs_thread_continue(&ufsvfsp->vfs_reclaim);
1065 
1066 	return (0);
1067 
1068 errout:
1069 	/*
1070 	 * Lock failed. Reset the old lock in ufsvfs if not hard locked.
1071 	 */
1072 	if (!LOCKFS_IS_HLOCK(&ulp->ul_lockfs)) {
1073 		bcopy(&lfs, &ulp->ul_lockfs, sizeof (struct lockfs));
1074 		ulp->ul_fs_lock = (1 << lfs.lf_lock);
1075 	}
1076 	(void) ufs_thaw(vfsp, ufsvfsp, ulp);
1077 	ULOCKFS_CLR_BUSY(ulp);
1078 	LOCKFS_CLR_BUSY(&ulp->ul_lockfs);
1079 
1080 errexit:
1081 	mutex_exit(&ulp->ul_lock);
1082 	vfs_unlock(vfsp);
1083 
1084 	/*
1085 	 * Allow both the delete thread and the reclaim thread to
1086 	 * continue.
1087 	 */
1088 	ufs_thread_continue(&ufsvfsp->vfs_delete);
1089 	ufs_thread_continue(&ufsvfsp->vfs_reclaim);
1090 
1091 	return (error);
1092 }
1093 
1094 /*
1095  * fiolfss
1096  * 	return the current file system locking state info
1097  */
1098 int
1099 ufs_fiolfss(struct vnode *vp, struct lockfs *lockfsp)
1100 {
1101 	struct ulockfs	*ulp;
1102 
1103 	if (!vp || !vp->v_vfsp || !VTOI(vp))
1104 		return (EINVAL);
1105 
1106 	/* file system has been forcibly unmounted */
1107 	if (VTOI(vp)->i_ufsvfs == NULL)
1108 		return (EIO);
1109 
1110 	ulp = VTOUL(vp);
1111 
1112 	if (ULOCKFS_IS_HLOCK(ulp)) {
1113 		*lockfsp = ulp->ul_lockfs;	/* structure assignment */
1114 		return (0);
1115 	}
1116 
1117 	mutex_enter(&ulp->ul_lock);
1118 
1119 	*lockfsp = ulp->ul_lockfs;	/* structure assignment */
1120 
1121 	if (ULOCKFS_IS_MOD(ulp))
1122 		lockfsp->lf_flags |= LOCKFS_MOD;
1123 
1124 	mutex_exit(&ulp->ul_lock);
1125 
1126 	return (0);
1127 }
1128 
1129 /*
1130  * ufs_check_lockfs
1131  *	check whether a ufs_vnops conflicts with the file system lock
1132  */
1133 int
1134 ufs_check_lockfs(struct ufsvfs *ufsvfsp, struct ulockfs *ulp, ulong_t mask)
1135 {
1136 	k_sigset_t	smask;
1137 	int		sig, slock;
1138 
1139 	ASSERT(MUTEX_HELD(&ulp->ul_lock));
1140 
1141 	while (ulp->ul_fs_lock & mask) {
1142 		slock = (int)ULOCKFS_IS_SLOCK(ulp);
1143 		if ((curthread->t_flag & T_DONTPEND) && !slock) {
1144 			curthread->t_flag |= T_WOULDBLOCK;
1145 			return (EAGAIN);
1146 		}
1147 		curthread->t_flag &= ~T_WOULDBLOCK;
1148 
1149 		if (ULOCKFS_IS_HLOCK(ulp))
1150 			return (EIO);
1151 
1152 		/*
1153 		 * wait for lock status to change
1154 		 */
1155 		if (slock || ufsvfsp->vfs_nointr) {
1156 			cv_wait(&ulp->ul_cv, &ulp->ul_lock);
1157 		} else {
1158 			sigintr(&smask, 1);
1159 			sig = cv_wait_sig(&ulp->ul_cv, &ulp->ul_lock);
1160 			sigunintr(&smask);
1161 			if ((!sig && (ulp->ul_fs_lock & mask)) ||
1162 				ufsvfsp->vfs_dontblock)
1163 				return (EINTR);
1164 		}
1165 	}
1166 	ulp->ul_vnops_cnt++;
1167 	return (0);
1168 }
1169 
1170 /*
1171  * Check whether we came across the handcrafted lockfs protocol path. We can't
1172  * simply check for T_DONTBLOCK here as one would assume since this can also
1173  * falsely catch recursive VOP's going to a different filesystem, instead we
1174  * check if we already hold the ulockfs->ul_lock mutex.
1175  */
1176 static int
1177 ufs_lockfs_is_under_rawlockfs(struct ulockfs *ulp)
1178 {
1179 	return ((mutex_owner(&ulp->ul_lock) != curthread) ? 0 : 1);
1180 }
1181 
1182 /*
1183  * ufs_lockfs_begin - start the lockfs locking protocol
1184  */
1185 int
1186 ufs_lockfs_begin(struct ufsvfs *ufsvfsp, struct ulockfs **ulpp, ulong_t mask)
1187 {
1188 	int 		error;
1189 	int		rec_vop;
1190 	struct ulockfs *ulp;
1191 	ulockfs_info_t	*ulockfs_info;
1192 	ulockfs_info_t	*ulockfs_info_free;
1193 	ulockfs_info_t	*ulockfs_info_temp;
1194 
1195 	/*
1196 	 * file system has been forcibly unmounted
1197 	 */
1198 	if (ufsvfsp == NULL)
1199 		return (EIO);
1200 
1201 	*ulpp = ulp = &ufsvfsp->vfs_ulockfs;
1202 
1203 	/*
1204 	 * Do lockfs protocol
1205 	 */
1206 	ulockfs_info = (ulockfs_info_t *)tsd_get(ufs_lockfs_key);
1207 	IS_REC_VOP(rec_vop, ulockfs_info, ulp, ulockfs_info_free);
1208 
1209 	/*
1210 	 * Detect recursive VOP call or handcrafted internal lockfs protocol
1211 	 * path and bail out in that case.
1212 	 */
1213 	if (rec_vop || ufs_lockfs_is_under_rawlockfs(ulp)) {
1214 		*ulpp = NULL;
1215 		return (0);
1216 	} else {
1217 		if (ulockfs_info_free == NULL) {
1218 			if ((ulockfs_info_temp = (ulockfs_info_t *)
1219 			    kmem_zalloc(sizeof (ulockfs_info_t),
1220 			    KM_NOSLEEP)) == NULL) {
1221 				*ulpp = NULL;
1222 				return (ENOMEM);
1223 			}
1224 		}
1225 	}
1226 
1227 	/*
1228 	 * First time VOP call
1229 	 */
1230 	mutex_enter(&ulp->ul_lock);
1231 	if (ULOCKFS_IS_JUSTULOCK(ulp))
1232 		ulp->ul_vnops_cnt++;
1233 	else {
1234 		if (error = ufs_check_lockfs(ufsvfsp, ulp, mask)) {
1235 			mutex_exit(&ulp->ul_lock);
1236 			if (ulockfs_info_free == NULL)
1237 				kmem_free(ulockfs_info_temp,
1238 				    sizeof (ulockfs_info_t));
1239 			return (error);
1240 		}
1241 	}
1242 	mutex_exit(&ulp->ul_lock);
1243 
1244 	if (ulockfs_info_free != NULL) {
1245 		ulockfs_info_free->ulp = ulp;
1246 	} else {
1247 		ulockfs_info_temp->ulp = ulp;
1248 		ulockfs_info_temp->next = ulockfs_info;
1249 		ASSERT(ufs_lockfs_key != 0);
1250 		(void) tsd_set(ufs_lockfs_key, (void *)ulockfs_info_temp);
1251 	}
1252 
1253 	curthread->t_flag |= T_DONTBLOCK;
1254 	return (0);
1255 }
1256 
1257 /*
1258  * Check whether we are returning from the top level VOP.
1259  */
1260 static int
1261 ufs_lockfs_top_vop_return(ulockfs_info_t *head)
1262 {
1263 	ulockfs_info_t *info;
1264 	int result = 1;
1265 
1266 	for (info = head; info != NULL; info = info->next) {
1267 		if (info->ulp != NULL) {
1268 			result = 0;
1269 			break;
1270 		}
1271 	}
1272 
1273 	return (result);
1274 }
1275 
1276 /*
1277  * ufs_lockfs_end - terminate the lockfs locking protocol
1278  */
1279 void
1280 ufs_lockfs_end(struct ulockfs *ulp)
1281 {
1282 	ulockfs_info_t *info;
1283 	ulockfs_info_t *head;
1284 
1285 	/*
1286 	 * end-of-VOP protocol
1287 	 */
1288 	if (ulp == NULL)
1289 		return;
1290 
1291 	head = (ulockfs_info_t *)tsd_get(ufs_lockfs_key);
1292 	SEARCH_ULOCKFSP(head, ulp, info);
1293 
1294 	/*
1295 	 * If we're called from a first level VOP, we have to have a
1296 	 * valid ulockfs record in the TSD.
1297 	 */
1298 	ASSERT(info != NULL);
1299 
1300 	/*
1301 	 * Invalidate the ulockfs record.
1302 	 */
1303 	info->ulp = NULL;
1304 
1305 	if (ufs_lockfs_top_vop_return(head))
1306 		curthread->t_flag &= ~T_DONTBLOCK;
1307 
1308 	mutex_enter(&ulp->ul_lock);
1309 
1310 	if (--ulp->ul_vnops_cnt == 0)
1311 		cv_broadcast(&ulp->ul_cv);
1312 
1313 	mutex_exit(&ulp->ul_lock);
1314 }
1315 
1316 /*
1317  * specialized version of ufs_lockfs_begin() called by ufs_getpage().
1318  */
1319 int
1320 ufs_lockfs_begin_getpage(
1321 	struct ufsvfs	*ufsvfsp,
1322 	struct ulockfs	**ulpp,
1323 	struct seg	*seg,
1324 	int		read_access,
1325 	uint_t		*protp)
1326 {
1327 	ulong_t			mask;
1328 	int 			error;
1329 	int			rec_vop;
1330 	struct ulockfs		*ulp;
1331 	ulockfs_info_t		*ulockfs_info;
1332 	ulockfs_info_t		*ulockfs_info_free;
1333 	ulockfs_info_t		*ulockfs_info_temp;
1334 
1335 	/*
1336 	 * file system has been forcibly unmounted
1337 	 */
1338 	if (ufsvfsp == NULL)
1339 		return (EIO);
1340 
1341 	*ulpp = ulp = &ufsvfsp->vfs_ulockfs;
1342 
1343 	/*
1344 	 * Do lockfs protocol
1345 	 */
1346 	ulockfs_info = (ulockfs_info_t *)tsd_get(ufs_lockfs_key);
1347 	IS_REC_VOP(rec_vop, ulockfs_info, ulp, ulockfs_info_free);
1348 
1349 	/*
1350 	 * Detect recursive VOP call or handcrafted internal lockfs protocol
1351 	 * path and bail out in that case.
1352 	 */
1353 	if (rec_vop || ufs_lockfs_is_under_rawlockfs(ulp)) {
1354 		*ulpp = NULL;
1355 		return (0);
1356 	} else {
1357 		if (ulockfs_info_free == NULL) {
1358 			if ((ulockfs_info_temp = (ulockfs_info_t *)
1359 			    kmem_zalloc(sizeof (ulockfs_info_t),
1360 			    KM_NOSLEEP)) == NULL) {
1361 				*ulpp = NULL;
1362 				return (ENOMEM);
1363 			}
1364 		}
1365 	}
1366 
1367 	/*
1368 	 * First time VOP call
1369 	 */
1370 	mutex_enter(&ulp->ul_lock);
1371 	if (ULOCKFS_IS_JUSTULOCK(ulp))
1372 		/*
1373 		 * fs is not locked, simply inc the active-ops counter
1374 		 */
1375 		ulp->ul_vnops_cnt++;
1376 	else {
1377 		if (seg->s_ops == &segvn_ops &&
1378 		    ((struct segvn_data *)seg->s_data)->type != MAP_SHARED) {
1379 			mask = (ulong_t)ULOCKFS_GETREAD_MASK;
1380 		} else if (protp && read_access) {
1381 			/*
1382 			 * Restrict the mapping to readonly.
1383 			 * Writes to this mapping will cause
1384 			 * another fault which will then
1385 			 * be suspended if fs is write locked
1386 			 */
1387 			*protp &= ~PROT_WRITE;
1388 			mask = (ulong_t)ULOCKFS_GETREAD_MASK;
1389 		} else
1390 			mask = (ulong_t)ULOCKFS_GETWRITE_MASK;
1391 
1392 		/*
1393 		 * will sleep if this fs is locked against this VOP
1394 		 */
1395 		if (error = ufs_check_lockfs(ufsvfsp, ulp, mask)) {
1396 			mutex_exit(&ulp->ul_lock);
1397 			if (ulockfs_info_free == NULL)
1398 				kmem_free(ulockfs_info_temp,
1399 				    sizeof (ulockfs_info_t));
1400 			return (error);
1401 		}
1402 	}
1403 	mutex_exit(&ulp->ul_lock);
1404 
1405 	if (ulockfs_info_free != NULL) {
1406 		ulockfs_info_free->ulp = ulp;
1407 	} else {
1408 		ulockfs_info_temp->ulp = ulp;
1409 		ulockfs_info_temp->next = ulockfs_info;
1410 		ASSERT(ufs_lockfs_key != 0);
1411 		(void) tsd_set(ufs_lockfs_key, (void *)ulockfs_info_temp);
1412 	}
1413 
1414 	curthread->t_flag |= T_DONTBLOCK;
1415 	return (0);
1416 }
1417 
1418 void
1419 ufs_lockfs_tsd_destructor(void *head)
1420 {
1421 	ulockfs_info_t *curr = (ulockfs_info_t *)head;
1422 	ulockfs_info_t *temp;
1423 
1424 	for (; curr != NULL; ) {
1425 		/*
1426 		 * The TSD destructor is being called when the thread exits
1427 		 * (via thread_exit()). At that time it must have cleaned up
1428 		 * all VOPs via ufs_lockfs_end() and there must not be a
1429 		 * valid ulockfs record exist while a thread is exiting.
1430 		 */
1431 		temp = curr;
1432 		curr = curr->next;
1433 		ASSERT(temp->ulp == NULL);
1434 		kmem_free(temp, sizeof (ulockfs_info_t));
1435 	}
1436 }
1437