xref: /illumos-gate/usr/src/uts/common/fs/ufs/ufs_lockfs.c (revision 13237b7e)
17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate  * CDDL HEADER START
37c478bd9Sstevel@tonic-gate  *
47c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
566c9f83dSowenr  * Common Development and Distribution License (the "License").
666c9f83dSowenr  * You may not use this file except in compliance with the License.
77c478bd9Sstevel@tonic-gate  *
87c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
107c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
117c478bd9Sstevel@tonic-gate  * and limitations under the License.
127c478bd9Sstevel@tonic-gate  *
137c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
147c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
167c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
177c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bd9Sstevel@tonic-gate  *
197c478bd9Sstevel@tonic-gate  * CDDL HEADER END
207c478bd9Sstevel@tonic-gate  */
217c478bd9Sstevel@tonic-gate /*
22*13237b7eSbatschul  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
237c478bd9Sstevel@tonic-gate  * Use is subject to license terms.
247c478bd9Sstevel@tonic-gate  */
257c478bd9Sstevel@tonic-gate 
267c478bd9Sstevel@tonic-gate #include <sys/types.h>
277c478bd9Sstevel@tonic-gate #include <sys/t_lock.h>
287c478bd9Sstevel@tonic-gate #include <sys/param.h>
297c478bd9Sstevel@tonic-gate #include <sys/time.h>
307c478bd9Sstevel@tonic-gate #include <sys/systm.h>
317c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h>
327c478bd9Sstevel@tonic-gate #include <sys/resource.h>
337c478bd9Sstevel@tonic-gate #include <sys/signal.h>
347c478bd9Sstevel@tonic-gate #include <sys/cred.h>
357c478bd9Sstevel@tonic-gate #include <sys/user.h>
367c478bd9Sstevel@tonic-gate #include <sys/buf.h>
377c478bd9Sstevel@tonic-gate #include <sys/vfs.h>
387c478bd9Sstevel@tonic-gate #include <sys/vnode.h>
397c478bd9Sstevel@tonic-gate #include <sys/proc.h>
407c478bd9Sstevel@tonic-gate #include <sys/disp.h>
417c478bd9Sstevel@tonic-gate #include <sys/file.h>
427c478bd9Sstevel@tonic-gate #include <sys/fcntl.h>
437c478bd9Sstevel@tonic-gate #include <sys/flock.h>
44bc69f433Saguzovsk #include <sys/atomic.h>
457c478bd9Sstevel@tonic-gate #include <sys/kmem.h>
467c478bd9Sstevel@tonic-gate #include <sys/uio.h>
477c478bd9Sstevel@tonic-gate #include <sys/conf.h>
487c478bd9Sstevel@tonic-gate #include <sys/mman.h>
497c478bd9Sstevel@tonic-gate #include <sys/pathname.h>
507c478bd9Sstevel@tonic-gate #include <sys/debug.h>
517c478bd9Sstevel@tonic-gate #include <sys/vmmeter.h>
527c478bd9Sstevel@tonic-gate #include <sys/vmsystm.h>
537c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h>
547c478bd9Sstevel@tonic-gate #include <sys/acct.h>
557c478bd9Sstevel@tonic-gate #include <sys/dnlc.h>
567c478bd9Sstevel@tonic-gate #include <sys/swap.h>
577c478bd9Sstevel@tonic-gate 
587c478bd9Sstevel@tonic-gate #include <sys/fs/ufs_fs.h>
597c478bd9Sstevel@tonic-gate #include <sys/fs/ufs_inode.h>
607c478bd9Sstevel@tonic-gate #include <sys/fs/ufs_fsdir.h>
617c478bd9Sstevel@tonic-gate #include <sys/fs/ufs_trans.h>
627c478bd9Sstevel@tonic-gate #include <sys/fs/ufs_panic.h>
637c478bd9Sstevel@tonic-gate #include <sys/fs/ufs_mount.h>
647c478bd9Sstevel@tonic-gate #include <sys/fs/ufs_bio.h>
657c478bd9Sstevel@tonic-gate #include <sys/fs/ufs_log.h>
667c478bd9Sstevel@tonic-gate #include <sys/fs/ufs_quota.h>
677c478bd9Sstevel@tonic-gate #include <sys/dirent.h>		/* must be AFTER <sys/fs/fsdir.h>! */
687c478bd9Sstevel@tonic-gate #include <sys/errno.h>
697c478bd9Sstevel@tonic-gate #include <sys/sysinfo.h>
707c478bd9Sstevel@tonic-gate 
717c478bd9Sstevel@tonic-gate #include <vm/hat.h>
727c478bd9Sstevel@tonic-gate #include <vm/pvn.h>
737c478bd9Sstevel@tonic-gate #include <vm/as.h>
747c478bd9Sstevel@tonic-gate #include <vm/seg.h>
757c478bd9Sstevel@tonic-gate #include <vm/seg_map.h>
767c478bd9Sstevel@tonic-gate #include <vm/seg_vn.h>
777c478bd9Sstevel@tonic-gate #include <vm/rm.h>
787c478bd9Sstevel@tonic-gate #include <vm/anon.h>
797c478bd9Sstevel@tonic-gate #include <sys/swap.h>
807c478bd9Sstevel@tonic-gate #include <sys/dnlc.h>
817c478bd9Sstevel@tonic-gate 
827c478bd9Sstevel@tonic-gate extern struct vnode *common_specvp(struct vnode *vp);
837c478bd9Sstevel@tonic-gate 
847c478bd9Sstevel@tonic-gate /* error lock status */
857c478bd9Sstevel@tonic-gate #define	UN_ERRLCK	(-1)
867c478bd9Sstevel@tonic-gate #define	SET_ERRLCK	1
877c478bd9Sstevel@tonic-gate #define	RE_ERRLCK	2
887c478bd9Sstevel@tonic-gate #define	NO_ERRLCK	0
897c478bd9Sstevel@tonic-gate 
907c478bd9Sstevel@tonic-gate /*
917c478bd9Sstevel@tonic-gate  * Index to be used in TSD for storing lockfs data
927c478bd9Sstevel@tonic-gate  */
937c478bd9Sstevel@tonic-gate uint_t ufs_lockfs_key;
947c478bd9Sstevel@tonic-gate 
957c478bd9Sstevel@tonic-gate typedef struct _ulockfs_info {
967c478bd9Sstevel@tonic-gate 	struct _ulockfs_info *next;
977c478bd9Sstevel@tonic-gate 	struct ulockfs *ulp;
98303bf60bSsdebnath 	uint_t flags;
997c478bd9Sstevel@tonic-gate } ulockfs_info_t;
1007c478bd9Sstevel@tonic-gate 
101303bf60bSsdebnath #define	ULOCK_INFO_FALLOCATE	0x00000001	/* fallocate thread */
102303bf60bSsdebnath 
1037c478bd9Sstevel@tonic-gate /*
1047c478bd9Sstevel@tonic-gate  * Check in TSD that whether we are already doing any VOP on this filesystem
1057c478bd9Sstevel@tonic-gate  */
1067c478bd9Sstevel@tonic-gate #define	IS_REC_VOP(found, head, ulp, free)		\
1077c478bd9Sstevel@tonic-gate {							\
1087c478bd9Sstevel@tonic-gate 	ulockfs_info_t *_curr;				\
1097c478bd9Sstevel@tonic-gate 							\
1107c478bd9Sstevel@tonic-gate 	for (found = 0, free = NULL, _curr = head;	\
1117c478bd9Sstevel@tonic-gate 	    _curr != NULL; _curr = _curr->next) {	\
1127c478bd9Sstevel@tonic-gate 		if ((free == NULL) &&			\
1137c478bd9Sstevel@tonic-gate 		    (_curr->ulp == NULL))		\
1147c478bd9Sstevel@tonic-gate 			free = _curr;			\
1157c478bd9Sstevel@tonic-gate 		if (_curr->ulp == ulp) {		\
1167c478bd9Sstevel@tonic-gate 			found = 1;			\
1177c478bd9Sstevel@tonic-gate 			break;				\
1187c478bd9Sstevel@tonic-gate 		}					\
1197c478bd9Sstevel@tonic-gate 	}						\
1207c478bd9Sstevel@tonic-gate }
1217c478bd9Sstevel@tonic-gate 
1227c478bd9Sstevel@tonic-gate /*
1237c478bd9Sstevel@tonic-gate  * Get the lockfs data from TSD so that lockfs handles the recursive VOP
1247c478bd9Sstevel@tonic-gate  * properly
1257c478bd9Sstevel@tonic-gate  */
1267c478bd9Sstevel@tonic-gate #define	SEARCH_ULOCKFSP(head, ulp, info)		\
1277c478bd9Sstevel@tonic-gate {							\
1287c478bd9Sstevel@tonic-gate 	ulockfs_info_t *_curr;				\
1297c478bd9Sstevel@tonic-gate 							\
1307c478bd9Sstevel@tonic-gate 	for (_curr = head; _curr != NULL;		\
1317c478bd9Sstevel@tonic-gate 	    _curr = _curr->next) {			\
1327c478bd9Sstevel@tonic-gate 		if (_curr->ulp == ulp) {		\
1337c478bd9Sstevel@tonic-gate 			break;				\
1347c478bd9Sstevel@tonic-gate 		}					\
1357c478bd9Sstevel@tonic-gate 	}						\
1367c478bd9Sstevel@tonic-gate 							\
1377c478bd9Sstevel@tonic-gate 	info = _curr;					\
1387c478bd9Sstevel@tonic-gate }
1397c478bd9Sstevel@tonic-gate 
1407c478bd9Sstevel@tonic-gate /*
1417c478bd9Sstevel@tonic-gate  * Validate lockfs request
1427c478bd9Sstevel@tonic-gate  */
1437c478bd9Sstevel@tonic-gate static int
1447c478bd9Sstevel@tonic-gate ufs_getlfd(
1457c478bd9Sstevel@tonic-gate 	struct lockfs *lockfsp,		/* new lock request */
1467c478bd9Sstevel@tonic-gate 	struct lockfs *ul_lockfsp)	/* old lock state */
1477c478bd9Sstevel@tonic-gate {
1487c478bd9Sstevel@tonic-gate 	int	error = 0;
1497c478bd9Sstevel@tonic-gate 
1507c478bd9Sstevel@tonic-gate 	/*
1517c478bd9Sstevel@tonic-gate 	 * no input flags defined
1527c478bd9Sstevel@tonic-gate 	 */
1537c478bd9Sstevel@tonic-gate 	if (lockfsp->lf_flags != 0) {
1547c478bd9Sstevel@tonic-gate 		error = EINVAL;
1557c478bd9Sstevel@tonic-gate 		goto errout;
1567c478bd9Sstevel@tonic-gate 	}
1577c478bd9Sstevel@tonic-gate 
1587c478bd9Sstevel@tonic-gate 	/*
1597c478bd9Sstevel@tonic-gate 	 * check key
1607c478bd9Sstevel@tonic-gate 	 */
1617c478bd9Sstevel@tonic-gate 	if (!LOCKFS_IS_ULOCK(ul_lockfsp))
1627c478bd9Sstevel@tonic-gate 		if (lockfsp->lf_key != ul_lockfsp->lf_key) {
1637c478bd9Sstevel@tonic-gate 			error = EINVAL;
1647c478bd9Sstevel@tonic-gate 			goto errout;
1657c478bd9Sstevel@tonic-gate 	}
1667c478bd9Sstevel@tonic-gate 
1677c478bd9Sstevel@tonic-gate 	lockfsp->lf_key = ul_lockfsp->lf_key + 1;
1687c478bd9Sstevel@tonic-gate 
1697c478bd9Sstevel@tonic-gate errout:
1707c478bd9Sstevel@tonic-gate 	return (error);
1717c478bd9Sstevel@tonic-gate }
1727c478bd9Sstevel@tonic-gate 
1737c478bd9Sstevel@tonic-gate /*
1747c478bd9Sstevel@tonic-gate  * ufs_checkaccton
1757c478bd9Sstevel@tonic-gate  *	check if accounting is turned on on this fs
1767c478bd9Sstevel@tonic-gate  */
1777c478bd9Sstevel@tonic-gate 
1787c478bd9Sstevel@tonic-gate int
1797c478bd9Sstevel@tonic-gate ufs_checkaccton(struct vnode *vp)
1807c478bd9Sstevel@tonic-gate {
1817c478bd9Sstevel@tonic-gate 	if (acct_fs_in_use(vp))
1827c478bd9Sstevel@tonic-gate 		return (EDEADLK);
1837c478bd9Sstevel@tonic-gate 	return (0);
1847c478bd9Sstevel@tonic-gate }
1857c478bd9Sstevel@tonic-gate 
1867c478bd9Sstevel@tonic-gate /*
1877c478bd9Sstevel@tonic-gate  * ufs_checkswapon
1887c478bd9Sstevel@tonic-gate  *	check if local swapping is to file on this fs
1897c478bd9Sstevel@tonic-gate  */
1907c478bd9Sstevel@tonic-gate int
1917c478bd9Sstevel@tonic-gate ufs_checkswapon(struct vnode *vp)
1927c478bd9Sstevel@tonic-gate {
1937c478bd9Sstevel@tonic-gate 	struct swapinfo	*sip;
1947c478bd9Sstevel@tonic-gate 
1957c478bd9Sstevel@tonic-gate 	mutex_enter(&swapinfo_lock);
1967c478bd9Sstevel@tonic-gate 	for (sip = swapinfo; sip; sip = sip->si_next)
1977c478bd9Sstevel@tonic-gate 		if (sip->si_vp->v_vfsp == vp->v_vfsp) {
1987c478bd9Sstevel@tonic-gate 			mutex_exit(&swapinfo_lock);
1997c478bd9Sstevel@tonic-gate 			return (EDEADLK);
2007c478bd9Sstevel@tonic-gate 		}
2017c478bd9Sstevel@tonic-gate 	mutex_exit(&swapinfo_lock);
2027c478bd9Sstevel@tonic-gate 	return (0);
2037c478bd9Sstevel@tonic-gate }
2047c478bd9Sstevel@tonic-gate 
2057c478bd9Sstevel@tonic-gate /*
2067c478bd9Sstevel@tonic-gate  * ufs_freeze
2077c478bd9Sstevel@tonic-gate  *	pend future accesses for current lock and desired lock
2087c478bd9Sstevel@tonic-gate  */
2097c478bd9Sstevel@tonic-gate void
2107c478bd9Sstevel@tonic-gate ufs_freeze(struct ulockfs *ulp, struct lockfs *lockfsp)
2117c478bd9Sstevel@tonic-gate {
2127c478bd9Sstevel@tonic-gate 	/*
2137c478bd9Sstevel@tonic-gate 	 * set to new lock type
2147c478bd9Sstevel@tonic-gate 	 */
2157c478bd9Sstevel@tonic-gate 	ulp->ul_lockfs.lf_lock = lockfsp->lf_lock;
2167c478bd9Sstevel@tonic-gate 	ulp->ul_lockfs.lf_key = lockfsp->lf_key;
2177c478bd9Sstevel@tonic-gate 	ulp->ul_lockfs.lf_comlen = lockfsp->lf_comlen;
2187c478bd9Sstevel@tonic-gate 	ulp->ul_lockfs.lf_comment = lockfsp->lf_comment;
2197c478bd9Sstevel@tonic-gate 
2207c478bd9Sstevel@tonic-gate 	ulp->ul_fs_lock = (1 << ulp->ul_lockfs.lf_lock);
2217c478bd9Sstevel@tonic-gate }
2227c478bd9Sstevel@tonic-gate 
223bc69f433Saguzovsk /*
224bc69f433Saguzovsk  * All callers of ufs_quiesce() atomically increment ufs_quiesce_pend before
225bc69f433Saguzovsk  * starting ufs_quiesce() protocol and decrement it only when a file system no
226bc69f433Saguzovsk  * longer has to be in quiescent state. This allows ufs_pageio() to detect
227bc69f433Saguzovsk  * that another thread wants to quiesce a file system. See more comments in
228bc69f433Saguzovsk  * ufs_pageio().
229bc69f433Saguzovsk  */
230bc69f433Saguzovsk ulong_t ufs_quiesce_pend = 0;
231bc69f433Saguzovsk 
2327c478bd9Sstevel@tonic-gate /*
2337c478bd9Sstevel@tonic-gate  * ufs_quiesce
2347c478bd9Sstevel@tonic-gate  *	wait for outstanding accesses to finish
2357c478bd9Sstevel@tonic-gate  */
2367c478bd9Sstevel@tonic-gate int
2377c478bd9Sstevel@tonic-gate ufs_quiesce(struct ulockfs *ulp)
2387c478bd9Sstevel@tonic-gate {
2397c478bd9Sstevel@tonic-gate 	int error = 0;
240303bf60bSsdebnath 	ulockfs_info_t *head;
241303bf60bSsdebnath 	ulockfs_info_t *info;
242303bf60bSsdebnath 
243303bf60bSsdebnath 	head = (ulockfs_info_t *)tsd_get(ufs_lockfs_key);
244303bf60bSsdebnath 	SEARCH_ULOCKFSP(head, ulp, info);
2457c478bd9Sstevel@tonic-gate 
2467c478bd9Sstevel@tonic-gate 	/*
2477c478bd9Sstevel@tonic-gate 	 * Set a softlock to suspend future ufs_vnops so that
2487c478bd9Sstevel@tonic-gate 	 * this lockfs request will not be starved
2497c478bd9Sstevel@tonic-gate 	 */
2507c478bd9Sstevel@tonic-gate 	ULOCKFS_SET_SLOCK(ulp);
251bc69f433Saguzovsk 	ASSERT(ufs_quiesce_pend);
2527c478bd9Sstevel@tonic-gate 
2537c478bd9Sstevel@tonic-gate 	/* check if there is any outstanding ufs vnodeops calls */
254303bf60bSsdebnath 	while (ulp->ul_vnops_cnt || ulp->ul_falloc_cnt) {
255bc69f433Saguzovsk 		/*
256bc69f433Saguzovsk 		 * use timed version of cv_wait_sig() to make sure we don't
257bc69f433Saguzovsk 		 * miss a wake up call from ufs_pageio() when it doesn't use
258bc69f433Saguzovsk 		 * ul_lock.
259303bf60bSsdebnath 		 *
260303bf60bSsdebnath 		 * when a fallocate thread comes in, the only way it returns
261303bf60bSsdebnath 		 * from this function is if there are no other vnode operations
262303bf60bSsdebnath 		 * going on (remember fallocate threads are tracked using
263303bf60bSsdebnath 		 * ul_falloc_cnt not ul_vnops_cnt), and another fallocate thread
264303bf60bSsdebnath 		 * hasn't already grabbed the fs write lock.
265bc69f433Saguzovsk 		 */
266303bf60bSsdebnath 		if (info && (info->flags & ULOCK_INFO_FALLOCATE)) {
267303bf60bSsdebnath 			if (!ulp->ul_vnops_cnt && !ULOCKFS_IS_FWLOCK(ulp))
268303bf60bSsdebnath 				goto out;
269303bf60bSsdebnath 		}
270bc69f433Saguzovsk 		if (!cv_timedwait_sig(&ulp->ul_cv, &ulp->ul_lock, lbolt + hz)) {
2717c478bd9Sstevel@tonic-gate 			error = EINTR;
2727c478bd9Sstevel@tonic-gate 			goto out;
2737c478bd9Sstevel@tonic-gate 		}
274303bf60bSsdebnath 	}
2757c478bd9Sstevel@tonic-gate 
2767c478bd9Sstevel@tonic-gate out:
2777c478bd9Sstevel@tonic-gate 	/*
2787c478bd9Sstevel@tonic-gate 	 * unlock the soft lock
2797c478bd9Sstevel@tonic-gate 	 */
2807c478bd9Sstevel@tonic-gate 	ULOCKFS_CLR_SLOCK(ulp);
2817c478bd9Sstevel@tonic-gate 
2827c478bd9Sstevel@tonic-gate 	return (error);
2837c478bd9Sstevel@tonic-gate }
284303bf60bSsdebnath 
2857c478bd9Sstevel@tonic-gate /*
2867c478bd9Sstevel@tonic-gate  * ufs_flush_inode
2877c478bd9Sstevel@tonic-gate  */
2887c478bd9Sstevel@tonic-gate int
2897c478bd9Sstevel@tonic-gate ufs_flush_inode(struct inode *ip, void *arg)
2907c478bd9Sstevel@tonic-gate {
2917c478bd9Sstevel@tonic-gate 	int	error;
2927c478bd9Sstevel@tonic-gate 	int	saverror	= 0;
2937c478bd9Sstevel@tonic-gate 
2947c478bd9Sstevel@tonic-gate 	/*
2957c478bd9Sstevel@tonic-gate 	 * wrong file system; keep looking
2967c478bd9Sstevel@tonic-gate 	 */
2977c478bd9Sstevel@tonic-gate 	if (ip->i_ufsvfs != (struct ufsvfs *)arg)
2987c478bd9Sstevel@tonic-gate 		return (0);
2997c478bd9Sstevel@tonic-gate 
3007c478bd9Sstevel@tonic-gate 	/*
3017c478bd9Sstevel@tonic-gate 	 * asynchronously push all the dirty pages
3027c478bd9Sstevel@tonic-gate 	 */
3037c478bd9Sstevel@tonic-gate 	if (((error = TRANS_SYNCIP(ip, B_ASYNC, 0, TOP_SYNCIP_FLUSHI)) != 0) &&
3047c478bd9Sstevel@tonic-gate 	    (error != EAGAIN))
3057c478bd9Sstevel@tonic-gate 		saverror = error;
3067c478bd9Sstevel@tonic-gate 	/*
3077c478bd9Sstevel@tonic-gate 	 * wait for io and discard all mappings
3087c478bd9Sstevel@tonic-gate 	 */
3097c478bd9Sstevel@tonic-gate 	if (error = TRANS_SYNCIP(ip, B_INVAL, 0, TOP_SYNCIP_FLUSHI))
3107c478bd9Sstevel@tonic-gate 		saverror = error;
3117c478bd9Sstevel@tonic-gate 
3127c478bd9Sstevel@tonic-gate 	if (ITOV(ip)->v_type == VDIR) {
3137c478bd9Sstevel@tonic-gate 		dnlc_dir_purge(&ip->i_danchor);
3147c478bd9Sstevel@tonic-gate 	}
3157c478bd9Sstevel@tonic-gate 
3167c478bd9Sstevel@tonic-gate 	return (saverror);
3177c478bd9Sstevel@tonic-gate }
3187c478bd9Sstevel@tonic-gate 
3197c478bd9Sstevel@tonic-gate /*
3207c478bd9Sstevel@tonic-gate  * ufs_flush
3217c478bd9Sstevel@tonic-gate  *	Flush everything that is currently dirty; this includes invalidating
3227c478bd9Sstevel@tonic-gate  *	any mappings.
3237c478bd9Sstevel@tonic-gate  */
3247c478bd9Sstevel@tonic-gate int
3257c478bd9Sstevel@tonic-gate ufs_flush(struct vfs *vfsp)
3267c478bd9Sstevel@tonic-gate {
3277c478bd9Sstevel@tonic-gate 	int		error;
3287c478bd9Sstevel@tonic-gate 	int		saverror = 0;
3297c478bd9Sstevel@tonic-gate 	struct ufsvfs	*ufsvfsp	= (struct ufsvfs *)vfsp->vfs_data;
3307c478bd9Sstevel@tonic-gate 	struct fs	*fs		= ufsvfsp->vfs_fs;
33146ac4468Smishra 	int		tdontblock = 0;
3327c478bd9Sstevel@tonic-gate 
3337c478bd9Sstevel@tonic-gate 	ASSERT(vfs_lock_held(vfsp));
3347c478bd9Sstevel@tonic-gate 
3357c478bd9Sstevel@tonic-gate 	/*
3367c478bd9Sstevel@tonic-gate 	 * purge dnlc
3377c478bd9Sstevel@tonic-gate 	 */
3387c478bd9Sstevel@tonic-gate 	(void) dnlc_purge_vfsp(vfsp, 0);
3397c478bd9Sstevel@tonic-gate 
3407c478bd9Sstevel@tonic-gate 	/*
3417c478bd9Sstevel@tonic-gate 	 * drain the delete and idle threads
3427c478bd9Sstevel@tonic-gate 	 */
3437c478bd9Sstevel@tonic-gate 	ufs_delete_drain(vfsp, 0, 0);
3447c478bd9Sstevel@tonic-gate 	ufs_idle_drain(vfsp);
3457c478bd9Sstevel@tonic-gate 
3467c478bd9Sstevel@tonic-gate 	/*
3477c478bd9Sstevel@tonic-gate 	 * flush and invalidate quota records
3487c478bd9Sstevel@tonic-gate 	 */
3497c478bd9Sstevel@tonic-gate 	(void) qsync(ufsvfsp);
3507c478bd9Sstevel@tonic-gate 
3517c478bd9Sstevel@tonic-gate 	/*
3527c478bd9Sstevel@tonic-gate 	 * flush w/invalidate the inodes for vfsp
3537c478bd9Sstevel@tonic-gate 	 */
3547c478bd9Sstevel@tonic-gate 	if (error = ufs_scan_inodes(0, ufs_flush_inode, ufsvfsp, ufsvfsp))
3557c478bd9Sstevel@tonic-gate 		saverror = error;
3567c478bd9Sstevel@tonic-gate 
3577c478bd9Sstevel@tonic-gate 	/*
3587c478bd9Sstevel@tonic-gate 	 * synchronously flush superblock and summary info
3597c478bd9Sstevel@tonic-gate 	 */
3607c478bd9Sstevel@tonic-gate 	if (fs->fs_ronly == 0 && fs->fs_fmod) {
3617c478bd9Sstevel@tonic-gate 		fs->fs_fmod = 0;
3627c478bd9Sstevel@tonic-gate 		TRANS_SBUPDATE(ufsvfsp, vfsp, TOP_SBUPDATE_FLUSH);
3637c478bd9Sstevel@tonic-gate 	}
3647c478bd9Sstevel@tonic-gate 	/*
3657c478bd9Sstevel@tonic-gate 	 * flush w/invalidate block device pages and buf cache
3667c478bd9Sstevel@tonic-gate 	 */
3677c478bd9Sstevel@tonic-gate 	if ((error = VOP_PUTPAGE(common_specvp(ufsvfsp->vfs_devvp),
368da6c28aaSamw 	    (offset_t)0, 0, B_INVAL, CRED(), NULL)) > 0)
3697c478bd9Sstevel@tonic-gate 		saverror = error;
3707c478bd9Sstevel@tonic-gate 
3717c478bd9Sstevel@tonic-gate 	(void) bflush((dev_t)vfsp->vfs_dev);
3727c478bd9Sstevel@tonic-gate 	(void) bfinval((dev_t)vfsp->vfs_dev, 0);
3737c478bd9Sstevel@tonic-gate 
3747c478bd9Sstevel@tonic-gate 	/*
3757c478bd9Sstevel@tonic-gate 	 * drain the delete and idle threads again
3767c478bd9Sstevel@tonic-gate 	 */
3777c478bd9Sstevel@tonic-gate 	ufs_delete_drain(vfsp, 0, 0);
3787c478bd9Sstevel@tonic-gate 	ufs_idle_drain(vfsp);
3797c478bd9Sstevel@tonic-gate 
3807c478bd9Sstevel@tonic-gate 	/*
3817c478bd9Sstevel@tonic-gate 	 * play with the clean flag
3827c478bd9Sstevel@tonic-gate 	 */
3837c478bd9Sstevel@tonic-gate 	if (saverror == 0)
3847c478bd9Sstevel@tonic-gate 		ufs_checkclean(vfsp);
3857c478bd9Sstevel@tonic-gate 
3867c478bd9Sstevel@tonic-gate 	/*
38714c932c0Sbatschul 	 * Flush any outstanding transactions and roll the log
38814c932c0Sbatschul 	 * only if we are supposed to do, i.e. LDL_NOROLL not set.
38914c932c0Sbatschul 	 * We can not simply check for fs_ronly here since fsck also may
39014c932c0Sbatschul 	 * use this code to roll the log on a read-only filesystem, e.g.
39114c932c0Sbatschul 	 * root during early stages of boot, if other then a sanity check is
39214c932c0Sbatschul 	 * done, it will clear LDL_NOROLL before.
39314c932c0Sbatschul 	 * In addition we assert that the deltamap does not contain any deltas
39414c932c0Sbatschul 	 * in case LDL_NOROLL is set since this is not supposed to happen.
3957c478bd9Sstevel@tonic-gate 	 */
3967c478bd9Sstevel@tonic-gate 	if (TRANS_ISTRANS(ufsvfsp)) {
39714c932c0Sbatschul 		ml_unit_t	*ul	= ufsvfsp->vfs_log;
39814c932c0Sbatschul 		mt_map_t	*mtm	= ul->un_deltamap;
39914c932c0Sbatschul 
40014c932c0Sbatschul 		if (ul->un_flags & LDL_NOROLL) {
40114c932c0Sbatschul 			ASSERT(mtm->mtm_nme == 0);
40214c932c0Sbatschul 		} else {
40346ac4468Smishra 			/*
40446ac4468Smishra 			 * Do not set T_DONTBLOCK if there is a
40546ac4468Smishra 			 * transaction opened by caller.
40646ac4468Smishra 			 */
40746ac4468Smishra 			if (curthread->t_flag & T_DONTBLOCK)
40846ac4468Smishra 				tdontblock = 1;
40946ac4468Smishra 			else
41046ac4468Smishra 				curthread->t_flag |= T_DONTBLOCK;
41146ac4468Smishra 
41214c932c0Sbatschul 			TRANS_BEGIN_SYNC(ufsvfsp, TOP_COMMIT_FLUSH,
41314c932c0Sbatschul 			    TOP_COMMIT_SIZE, error);
41446ac4468Smishra 
41514c932c0Sbatschul 			if (!error) {
41614c932c0Sbatschul 				TRANS_END_SYNC(ufsvfsp, saverror,
41714c932c0Sbatschul 				    TOP_COMMIT_FLUSH, TOP_COMMIT_SIZE);
41814c932c0Sbatschul 			}
41946ac4468Smishra 
42046ac4468Smishra 			if (tdontblock == 0)
42146ac4468Smishra 				curthread->t_flag &= ~T_DONTBLOCK;
42246ac4468Smishra 
42314c932c0Sbatschul 			logmap_roll_dev(ufsvfsp->vfs_log);
4247c478bd9Sstevel@tonic-gate 		}
4257c478bd9Sstevel@tonic-gate 	}
4267c478bd9Sstevel@tonic-gate 
4277c478bd9Sstevel@tonic-gate 	return (saverror);
4287c478bd9Sstevel@tonic-gate }
4297c478bd9Sstevel@tonic-gate 
4307c478bd9Sstevel@tonic-gate /*
4317c478bd9Sstevel@tonic-gate  * ufs_thaw_wlock
4327c478bd9Sstevel@tonic-gate  *	special processing when thawing down to wlock
4337c478bd9Sstevel@tonic-gate  */
4347c478bd9Sstevel@tonic-gate static int
4357c478bd9Sstevel@tonic-gate ufs_thaw_wlock(struct inode *ip, void *arg)
4367c478bd9Sstevel@tonic-gate {
4377c478bd9Sstevel@tonic-gate 	/*
4387c478bd9Sstevel@tonic-gate 	 * wrong file system; keep looking
4397c478bd9Sstevel@tonic-gate 	 */
4407c478bd9Sstevel@tonic-gate 	if (ip->i_ufsvfs != (struct ufsvfs *)arg)
4417c478bd9Sstevel@tonic-gate 		return (0);
4427c478bd9Sstevel@tonic-gate 
4437c478bd9Sstevel@tonic-gate 	/*
4447c478bd9Sstevel@tonic-gate 	 * iupdat refuses to clear flags if the fs is read only.  The fs
4457c478bd9Sstevel@tonic-gate 	 * may become read/write during the lock and we wouldn't want
4467c478bd9Sstevel@tonic-gate 	 * these inodes being written to disk.  So clear the flags.
4477c478bd9Sstevel@tonic-gate 	 */
4487c478bd9Sstevel@tonic-gate 	rw_enter(&ip->i_contents, RW_WRITER);
4497c478bd9Sstevel@tonic-gate 	ip->i_flag &= ~(IMOD|IMODACC|IACC|IUPD|ICHG|IATTCHG);
4507c478bd9Sstevel@tonic-gate 	rw_exit(&ip->i_contents);
4517c478bd9Sstevel@tonic-gate 
4527c478bd9Sstevel@tonic-gate 	/*
4537c478bd9Sstevel@tonic-gate 	 * pages are mlocked -- fail wlock
4547c478bd9Sstevel@tonic-gate 	 */
4557c478bd9Sstevel@tonic-gate 	if (ITOV(ip)->v_type != VCHR && vn_has_cached_data(ITOV(ip)))
4567c478bd9Sstevel@tonic-gate 		return (EBUSY);
4577c478bd9Sstevel@tonic-gate 
4587c478bd9Sstevel@tonic-gate 	return (0);
4597c478bd9Sstevel@tonic-gate }
4607c478bd9Sstevel@tonic-gate 
4617c478bd9Sstevel@tonic-gate /*
4627c478bd9Sstevel@tonic-gate  * ufs_thaw_hlock
4637c478bd9Sstevel@tonic-gate  *	special processing when thawing down to hlock or elock
4647c478bd9Sstevel@tonic-gate  */
4657c478bd9Sstevel@tonic-gate static int
4667c478bd9Sstevel@tonic-gate ufs_thaw_hlock(struct inode *ip, void *arg)
4677c478bd9Sstevel@tonic-gate {
4687c478bd9Sstevel@tonic-gate 	struct vnode	*vp	= ITOV(ip);
4697c478bd9Sstevel@tonic-gate 
4707c478bd9Sstevel@tonic-gate 	/*
4717c478bd9Sstevel@tonic-gate 	 * wrong file system; keep looking
4727c478bd9Sstevel@tonic-gate 	 */
4737c478bd9Sstevel@tonic-gate 	if (ip->i_ufsvfs != (struct ufsvfs *)arg)
4747c478bd9Sstevel@tonic-gate 		return (0);
4757c478bd9Sstevel@tonic-gate 
4767c478bd9Sstevel@tonic-gate 	/*
4777c478bd9Sstevel@tonic-gate 	 * blow away all pages - even if they are mlocked
4787c478bd9Sstevel@tonic-gate 	 */
4797c478bd9Sstevel@tonic-gate 	do {
4807c478bd9Sstevel@tonic-gate 		(void) TRANS_SYNCIP(ip, B_INVAL | B_FORCE, 0, TOP_SYNCIP_HLOCK);
4817c478bd9Sstevel@tonic-gate 	} while ((vp->v_type != VCHR) && vn_has_cached_data(vp));
4827c478bd9Sstevel@tonic-gate 	rw_enter(&ip->i_contents, RW_WRITER);
4837c478bd9Sstevel@tonic-gate 	ip->i_flag &= ~(IMOD|IMODACC|IACC|IUPD|ICHG|IATTCHG);
4847c478bd9Sstevel@tonic-gate 	rw_exit(&ip->i_contents);
4857c478bd9Sstevel@tonic-gate 
4867c478bd9Sstevel@tonic-gate 	return (0);
4877c478bd9Sstevel@tonic-gate }
4887c478bd9Sstevel@tonic-gate 
4897c478bd9Sstevel@tonic-gate /*
4907c478bd9Sstevel@tonic-gate  * ufs_thaw
4917c478bd9Sstevel@tonic-gate  *	thaw file system lock down to current value
4927c478bd9Sstevel@tonic-gate  */
4937c478bd9Sstevel@tonic-gate int
4947c478bd9Sstevel@tonic-gate ufs_thaw(struct vfs *vfsp, struct ufsvfs *ufsvfsp, struct ulockfs *ulp)
4957c478bd9Sstevel@tonic-gate {
4967c478bd9Sstevel@tonic-gate 	int		error	= 0;
4977c478bd9Sstevel@tonic-gate 	int		noidel	= (int)(ulp->ul_flag & ULOCKFS_NOIDEL);
4987c478bd9Sstevel@tonic-gate 
4997c478bd9Sstevel@tonic-gate 	/*
5007c478bd9Sstevel@tonic-gate 	 * if wlock or hlock or elock
5017c478bd9Sstevel@tonic-gate 	 */
5027c478bd9Sstevel@tonic-gate 	if (ULOCKFS_IS_WLOCK(ulp) || ULOCKFS_IS_HLOCK(ulp) ||
5037c478bd9Sstevel@tonic-gate 	    ULOCKFS_IS_ELOCK(ulp)) {
5047c478bd9Sstevel@tonic-gate 
5057c478bd9Sstevel@tonic-gate 		/*
5067c478bd9Sstevel@tonic-gate 		 * don't keep access times
5077c478bd9Sstevel@tonic-gate 		 * don't free deleted files
5087c478bd9Sstevel@tonic-gate 		 * if superblock writes are allowed, limit them to me for now
5097c478bd9Sstevel@tonic-gate 		 */
5107c478bd9Sstevel@tonic-gate 		ulp->ul_flag |= (ULOCKFS_NOIACC|ULOCKFS_NOIDEL);
5117c478bd9Sstevel@tonic-gate 		if (ulp->ul_sbowner != (kthread_id_t)-1)
5127c478bd9Sstevel@tonic-gate 			ulp->ul_sbowner = curthread;
5137c478bd9Sstevel@tonic-gate 
5147c478bd9Sstevel@tonic-gate 		/*
5157c478bd9Sstevel@tonic-gate 		 * wait for writes for deleted files and superblock updates
5167c478bd9Sstevel@tonic-gate 		 */
5177c478bd9Sstevel@tonic-gate 		(void) ufs_flush(vfsp);
5187c478bd9Sstevel@tonic-gate 
5197c478bd9Sstevel@tonic-gate 		/*
5207c478bd9Sstevel@tonic-gate 		 * now make sure the quota file is up-to-date
5217c478bd9Sstevel@tonic-gate 		 *	expensive; but effective
5227c478bd9Sstevel@tonic-gate 		 */
5237c478bd9Sstevel@tonic-gate 		error = ufs_flush(vfsp);
5247c478bd9Sstevel@tonic-gate 		/*
5257c478bd9Sstevel@tonic-gate 		 * no one can write the superblock
5267c478bd9Sstevel@tonic-gate 		 */
5277c478bd9Sstevel@tonic-gate 		ulp->ul_sbowner = (kthread_id_t)-1;
5287c478bd9Sstevel@tonic-gate 
5297c478bd9Sstevel@tonic-gate 		/*
5307c478bd9Sstevel@tonic-gate 		 * special processing for wlock/hlock/elock
5317c478bd9Sstevel@tonic-gate 		 */
5327c478bd9Sstevel@tonic-gate 		if (ULOCKFS_IS_WLOCK(ulp)) {
5337c478bd9Sstevel@tonic-gate 			if (error)
5347c478bd9Sstevel@tonic-gate 				goto errout;
5357c478bd9Sstevel@tonic-gate 			error = bfinval(ufsvfsp->vfs_dev, 0);
5367c478bd9Sstevel@tonic-gate 			if (error)
5377c478bd9Sstevel@tonic-gate 				goto errout;
5387c478bd9Sstevel@tonic-gate 			error = ufs_scan_inodes(0, ufs_thaw_wlock,
53980d34432Sfrankho 			    (void *)ufsvfsp, ufsvfsp);
5407c478bd9Sstevel@tonic-gate 			if (error)
5417c478bd9Sstevel@tonic-gate 				goto errout;
5427c478bd9Sstevel@tonic-gate 		}
5437c478bd9Sstevel@tonic-gate 		if (ULOCKFS_IS_HLOCK(ulp) || ULOCKFS_IS_ELOCK(ulp)) {
5447c478bd9Sstevel@tonic-gate 			error = 0;
5457c478bd9Sstevel@tonic-gate 			(void) ufs_scan_inodes(0, ufs_thaw_hlock,
54680d34432Sfrankho 			    (void *)ufsvfsp, ufsvfsp);
5477c478bd9Sstevel@tonic-gate 			(void) bfinval(ufsvfsp->vfs_dev, 1);
5487c478bd9Sstevel@tonic-gate 		}
5497c478bd9Sstevel@tonic-gate 	} else {
5507c478bd9Sstevel@tonic-gate 
5517c478bd9Sstevel@tonic-gate 		/*
5527c478bd9Sstevel@tonic-gate 		 * okay to keep access times
5537c478bd9Sstevel@tonic-gate 		 * okay to free deleted files
5547c478bd9Sstevel@tonic-gate 		 * okay to write the superblock
5557c478bd9Sstevel@tonic-gate 		 */
5567c478bd9Sstevel@tonic-gate 		ulp->ul_flag &= ~(ULOCKFS_NOIACC|ULOCKFS_NOIDEL);
5577c478bd9Sstevel@tonic-gate 		ulp->ul_sbowner = NULL;
5587c478bd9Sstevel@tonic-gate 
5597c478bd9Sstevel@tonic-gate 		/*
5607c478bd9Sstevel@tonic-gate 		 * flush in case deleted files are in memory
5617c478bd9Sstevel@tonic-gate 		 */
5627c478bd9Sstevel@tonic-gate 		if (noidel) {
5637c478bd9Sstevel@tonic-gate 			if (error = ufs_flush(vfsp))
5647c478bd9Sstevel@tonic-gate 				goto errout;
5657c478bd9Sstevel@tonic-gate 		}
5667c478bd9Sstevel@tonic-gate 	}
5677c478bd9Sstevel@tonic-gate 
5687c478bd9Sstevel@tonic-gate errout:
5697c478bd9Sstevel@tonic-gate 	cv_broadcast(&ulp->ul_cv);
5707c478bd9Sstevel@tonic-gate 	return (error);
5717c478bd9Sstevel@tonic-gate }
5727c478bd9Sstevel@tonic-gate 
5737c478bd9Sstevel@tonic-gate /*
5747c478bd9Sstevel@tonic-gate  * ufs_reconcile_fs
5757c478bd9Sstevel@tonic-gate  *	reconcile incore superblock with ondisk superblock
5767c478bd9Sstevel@tonic-gate  */
5777c478bd9Sstevel@tonic-gate int
5787c478bd9Sstevel@tonic-gate ufs_reconcile_fs(struct vfs *vfsp, struct ufsvfs *ufsvfsp, int errlck)
5797c478bd9Sstevel@tonic-gate {
5807c478bd9Sstevel@tonic-gate 	struct fs	*mfs; 	/* in-memory superblock */
5817c478bd9Sstevel@tonic-gate 	struct fs	*dfs;	/* on-disk   superblock */
5827c478bd9Sstevel@tonic-gate 	struct buf	*bp;	/* on-disk   superblock buf */
5837c478bd9Sstevel@tonic-gate 	int		 needs_unlock;
5847c478bd9Sstevel@tonic-gate 	char		 finished_fsclean;
5857c478bd9Sstevel@tonic-gate 
5867c478bd9Sstevel@tonic-gate 	mfs = ufsvfsp->vfs_fs;
5877c478bd9Sstevel@tonic-gate 
5887c478bd9Sstevel@tonic-gate 	/*
5897c478bd9Sstevel@tonic-gate 	 * get the on-disk copy of the superblock
5907c478bd9Sstevel@tonic-gate 	 */
5917c478bd9Sstevel@tonic-gate 	bp = UFS_BREAD(ufsvfsp, vfsp->vfs_dev, SBLOCK, SBSIZE);
5927c478bd9Sstevel@tonic-gate 	bp->b_flags |= (B_STALE|B_AGE);
5937c478bd9Sstevel@tonic-gate 	if (bp->b_flags & B_ERROR) {
5947c478bd9Sstevel@tonic-gate 		brelse(bp);
5957c478bd9Sstevel@tonic-gate 		return (EIO);
5967c478bd9Sstevel@tonic-gate 	}
5977c478bd9Sstevel@tonic-gate 	dfs = bp->b_un.b_fs;
5987c478bd9Sstevel@tonic-gate 
5997c478bd9Sstevel@tonic-gate 	/* error locks may only unlock after the fs has been made consistent */
6007c478bd9Sstevel@tonic-gate 	if (errlck == UN_ERRLCK) {
6017c478bd9Sstevel@tonic-gate 		if (dfs->fs_clean == FSFIX) {	/* being repaired */
6027c478bd9Sstevel@tonic-gate 			brelse(bp);
6037c478bd9Sstevel@tonic-gate 			return (EAGAIN);
6047c478bd9Sstevel@tonic-gate 		}
6057c478bd9Sstevel@tonic-gate 		/* repair not yet started? */
6067c478bd9Sstevel@tonic-gate 		finished_fsclean = TRANS_ISTRANS(ufsvfsp)? FSLOG: FSCLEAN;
6077c478bd9Sstevel@tonic-gate 		if (dfs->fs_clean != finished_fsclean) {
6087c478bd9Sstevel@tonic-gate 			brelse(bp);
6097c478bd9Sstevel@tonic-gate 			return (EBUSY);
6107c478bd9Sstevel@tonic-gate 		}
6117c478bd9Sstevel@tonic-gate 	}
6127c478bd9Sstevel@tonic-gate 
6137c478bd9Sstevel@tonic-gate 	/*
6147c478bd9Sstevel@tonic-gate 	 * if superblock has changed too much, abort
6157c478bd9Sstevel@tonic-gate 	 */
6167c478bd9Sstevel@tonic-gate 	if ((mfs->fs_sblkno		!= dfs->fs_sblkno) ||
6177c478bd9Sstevel@tonic-gate 	    (mfs->fs_cblkno		!= dfs->fs_cblkno) ||
6187c478bd9Sstevel@tonic-gate 	    (mfs->fs_iblkno		!= dfs->fs_iblkno) ||
6197c478bd9Sstevel@tonic-gate 	    (mfs->fs_dblkno		!= dfs->fs_dblkno) ||
6207c478bd9Sstevel@tonic-gate 	    (mfs->fs_cgoffset		!= dfs->fs_cgoffset) ||
6217c478bd9Sstevel@tonic-gate 	    (mfs->fs_cgmask		!= dfs->fs_cgmask) ||
6227c478bd9Sstevel@tonic-gate 	    (mfs->fs_bsize		!= dfs->fs_bsize) ||
6237c478bd9Sstevel@tonic-gate 	    (mfs->fs_fsize		!= dfs->fs_fsize) ||
6247c478bd9Sstevel@tonic-gate 	    (mfs->fs_frag		!= dfs->fs_frag) ||
6257c478bd9Sstevel@tonic-gate 	    (mfs->fs_bmask		!= dfs->fs_bmask) ||
6267c478bd9Sstevel@tonic-gate 	    (mfs->fs_fmask		!= dfs->fs_fmask) ||
6277c478bd9Sstevel@tonic-gate 	    (mfs->fs_bshift		!= dfs->fs_bshift) ||
6287c478bd9Sstevel@tonic-gate 	    (mfs->fs_fshift		!= dfs->fs_fshift) ||
6297c478bd9Sstevel@tonic-gate 	    (mfs->fs_fragshift		!= dfs->fs_fragshift) ||
6307c478bd9Sstevel@tonic-gate 	    (mfs->fs_fsbtodb		!= dfs->fs_fsbtodb) ||
6317c478bd9Sstevel@tonic-gate 	    (mfs->fs_sbsize		!= dfs->fs_sbsize) ||
6327c478bd9Sstevel@tonic-gate 	    (mfs->fs_nindir		!= dfs->fs_nindir) ||
6337c478bd9Sstevel@tonic-gate 	    (mfs->fs_nspf		!= dfs->fs_nspf) ||
6347c478bd9Sstevel@tonic-gate 	    (mfs->fs_trackskew		!= dfs->fs_trackskew) ||
6357c478bd9Sstevel@tonic-gate 	    (mfs->fs_cgsize		!= dfs->fs_cgsize) ||
6367c478bd9Sstevel@tonic-gate 	    (mfs->fs_ntrak		!= dfs->fs_ntrak) ||
6377c478bd9Sstevel@tonic-gate 	    (mfs->fs_nsect		!= dfs->fs_nsect) ||
6387c478bd9Sstevel@tonic-gate 	    (mfs->fs_spc		!= dfs->fs_spc) ||
6397c478bd9Sstevel@tonic-gate 	    (mfs->fs_cpg		!= dfs->fs_cpg) ||
6407c478bd9Sstevel@tonic-gate 	    (mfs->fs_ipg		!= dfs->fs_ipg) ||
6417c478bd9Sstevel@tonic-gate 	    (mfs->fs_fpg		!= dfs->fs_fpg) ||
6427c478bd9Sstevel@tonic-gate 	    (mfs->fs_postblformat	!= dfs->fs_postblformat) ||
6437c478bd9Sstevel@tonic-gate 	    (mfs->fs_magic		!= dfs->fs_magic)) {
6447c478bd9Sstevel@tonic-gate 		brelse(bp);
6457c478bd9Sstevel@tonic-gate 		return (EACCES);
6467c478bd9Sstevel@tonic-gate 	}
6477c478bd9Sstevel@tonic-gate 	if (dfs->fs_clean == FSBAD || FSOKAY != dfs->fs_state + dfs->fs_time)
6487c478bd9Sstevel@tonic-gate 		if (mfs->fs_clean == FSLOG) {
6497c478bd9Sstevel@tonic-gate 			brelse(bp);
6507c478bd9Sstevel@tonic-gate 			return (EACCES);
6517c478bd9Sstevel@tonic-gate 		}
6527c478bd9Sstevel@tonic-gate 
6537c478bd9Sstevel@tonic-gate 	/*
6547c478bd9Sstevel@tonic-gate 	 * get new summary info
6557c478bd9Sstevel@tonic-gate 	 */
6567c478bd9Sstevel@tonic-gate 	if (ufs_getsummaryinfo(vfsp->vfs_dev, ufsvfsp, dfs)) {
6577c478bd9Sstevel@tonic-gate 		brelse(bp);
6587c478bd9Sstevel@tonic-gate 		return (EIO);
6597c478bd9Sstevel@tonic-gate 	}
6607c478bd9Sstevel@tonic-gate 
6617c478bd9Sstevel@tonic-gate 	/*
6627c478bd9Sstevel@tonic-gate 	 * release old summary info and update in-memory superblock
6637c478bd9Sstevel@tonic-gate 	 */
6647c478bd9Sstevel@tonic-gate 	kmem_free(mfs->fs_u.fs_csp, mfs->fs_cssize);
6657c478bd9Sstevel@tonic-gate 	mfs->fs_u.fs_csp = dfs->fs_u.fs_csp;	/* Only entry 0 used */
6667c478bd9Sstevel@tonic-gate 
6677c478bd9Sstevel@tonic-gate 	/*
6687c478bd9Sstevel@tonic-gate 	 * update fields allowed to change
6697c478bd9Sstevel@tonic-gate 	 */
6707c478bd9Sstevel@tonic-gate 	mfs->fs_size		= dfs->fs_size;
6717c478bd9Sstevel@tonic-gate 	mfs->fs_dsize		= dfs->fs_dsize;
6727c478bd9Sstevel@tonic-gate 	mfs->fs_ncg		= dfs->fs_ncg;
6737c478bd9Sstevel@tonic-gate 	mfs->fs_minfree		= dfs->fs_minfree;
6747c478bd9Sstevel@tonic-gate 	mfs->fs_rotdelay	= dfs->fs_rotdelay;
6757c478bd9Sstevel@tonic-gate 	mfs->fs_rps		= dfs->fs_rps;
6767c478bd9Sstevel@tonic-gate 	mfs->fs_maxcontig	= dfs->fs_maxcontig;
6777c478bd9Sstevel@tonic-gate 	mfs->fs_maxbpg		= dfs->fs_maxbpg;
6787c478bd9Sstevel@tonic-gate 	mfs->fs_csmask		= dfs->fs_csmask;
6797c478bd9Sstevel@tonic-gate 	mfs->fs_csshift		= dfs->fs_csshift;
6807c478bd9Sstevel@tonic-gate 	mfs->fs_optim		= dfs->fs_optim;
6817c478bd9Sstevel@tonic-gate 	mfs->fs_csaddr		= dfs->fs_csaddr;
6827c478bd9Sstevel@tonic-gate 	mfs->fs_cssize		= dfs->fs_cssize;
6837c478bd9Sstevel@tonic-gate 	mfs->fs_ncyl		= dfs->fs_ncyl;
6847c478bd9Sstevel@tonic-gate 	mfs->fs_cstotal		= dfs->fs_cstotal;
6857c478bd9Sstevel@tonic-gate 	mfs->fs_reclaim		= dfs->fs_reclaim;
6867c478bd9Sstevel@tonic-gate 
6877c478bd9Sstevel@tonic-gate 	if (mfs->fs_reclaim & (FS_RECLAIM|FS_RECLAIMING)) {
6887c478bd9Sstevel@tonic-gate 		mfs->fs_reclaim &= ~FS_RECLAIM;
6897c478bd9Sstevel@tonic-gate 		mfs->fs_reclaim |=  FS_RECLAIMING;
6907c478bd9Sstevel@tonic-gate 		ufs_thread_start(&ufsvfsp->vfs_reclaim,
69180d34432Sfrankho 		    ufs_thread_reclaim, vfsp);
6927c478bd9Sstevel@tonic-gate 	}
6937c478bd9Sstevel@tonic-gate 
6947c478bd9Sstevel@tonic-gate 	/* XXX What to do about sparecon? */
6957c478bd9Sstevel@tonic-gate 
6967c478bd9Sstevel@tonic-gate 	/* XXX need to copy volume label */
6977c478bd9Sstevel@tonic-gate 
6987c478bd9Sstevel@tonic-gate 	/*
6997c478bd9Sstevel@tonic-gate 	 * ondisk clean flag overrides inmemory clean flag iff == FSBAD
7007c478bd9Sstevel@tonic-gate 	 * or if error-locked and ondisk is now clean
7017c478bd9Sstevel@tonic-gate 	 */
7027c478bd9Sstevel@tonic-gate 	needs_unlock = !MUTEX_HELD(&ufsvfsp->vfs_lock);
7037c478bd9Sstevel@tonic-gate 	if (needs_unlock)
7047c478bd9Sstevel@tonic-gate 		mutex_enter(&ufsvfsp->vfs_lock);
7057c478bd9Sstevel@tonic-gate 
7067c478bd9Sstevel@tonic-gate 	if (errlck == UN_ERRLCK) {
7077c478bd9Sstevel@tonic-gate 		if (finished_fsclean == dfs->fs_clean)
7087c478bd9Sstevel@tonic-gate 			mfs->fs_clean = finished_fsclean;
7097c478bd9Sstevel@tonic-gate 		else
7107c478bd9Sstevel@tonic-gate 			mfs->fs_clean = FSBAD;
7117c478bd9Sstevel@tonic-gate 		mfs->fs_state = FSOKAY - dfs->fs_time;
7127c478bd9Sstevel@tonic-gate 	}
7137c478bd9Sstevel@tonic-gate 
7147c478bd9Sstevel@tonic-gate 	if (FSOKAY != dfs->fs_state + dfs->fs_time ||
7157c478bd9Sstevel@tonic-gate 	    (dfs->fs_clean == FSBAD))
7167c478bd9Sstevel@tonic-gate 		mfs->fs_clean = FSBAD;
7177c478bd9Sstevel@tonic-gate 
7187c478bd9Sstevel@tonic-gate 	if (needs_unlock)
7197c478bd9Sstevel@tonic-gate 		mutex_exit(&ufsvfsp->vfs_lock);
7207c478bd9Sstevel@tonic-gate 
7217c478bd9Sstevel@tonic-gate 	brelse(bp);
7227c478bd9Sstevel@tonic-gate 
7237c478bd9Sstevel@tonic-gate 	return (0);
7247c478bd9Sstevel@tonic-gate }
7257c478bd9Sstevel@tonic-gate 
7267c478bd9Sstevel@tonic-gate /*
7277c478bd9Sstevel@tonic-gate  * ufs_reconcile_inode
7287c478bd9Sstevel@tonic-gate  *	reconcile ondisk inode with incore inode
7297c478bd9Sstevel@tonic-gate  */
7307c478bd9Sstevel@tonic-gate static int
7317c478bd9Sstevel@tonic-gate ufs_reconcile_inode(struct inode *ip, void *arg)
7327c478bd9Sstevel@tonic-gate {
7337c478bd9Sstevel@tonic-gate 	int		i;
7347c478bd9Sstevel@tonic-gate 	int		ndaddr;
7357c478bd9Sstevel@tonic-gate 	int		niaddr;
7367c478bd9Sstevel@tonic-gate 	struct dinode	*dp;		/* ondisk inode */
7377c478bd9Sstevel@tonic-gate 	struct buf	*bp	= NULL;
7387c478bd9Sstevel@tonic-gate 	uid_t		d_uid;
7397c478bd9Sstevel@tonic-gate 	gid_t		d_gid;
7407c478bd9Sstevel@tonic-gate 	int		error = 0;
7417c478bd9Sstevel@tonic-gate 	struct fs	*fs;
7427c478bd9Sstevel@tonic-gate 
7437c478bd9Sstevel@tonic-gate 	/*
7447c478bd9Sstevel@tonic-gate 	 * not an inode we care about
7457c478bd9Sstevel@tonic-gate 	 */
7467c478bd9Sstevel@tonic-gate 	if (ip->i_ufsvfs != (struct ufsvfs *)arg)
7477c478bd9Sstevel@tonic-gate 		return (0);
7487c478bd9Sstevel@tonic-gate 
7497c478bd9Sstevel@tonic-gate 	fs = ip->i_fs;
7507c478bd9Sstevel@tonic-gate 
7517c478bd9Sstevel@tonic-gate 	/*
7527c478bd9Sstevel@tonic-gate 	 * Inode reconciliation fails: we made the filesystem quiescent
7537c478bd9Sstevel@tonic-gate 	 * and we did a ufs_flush() before calling ufs_reconcile_inode()
7547c478bd9Sstevel@tonic-gate 	 * and thus the inode should not have been changed inbetween.
7557c478bd9Sstevel@tonic-gate 	 * Any discrepancies indicate a logic error and a pretty
7567c478bd9Sstevel@tonic-gate 	 * significant run-state inconsistency we should complain about.
7577c478bd9Sstevel@tonic-gate 	 */
7587c478bd9Sstevel@tonic-gate 	if (ip->i_flag & (IMOD|IMODACC|IACC|IUPD|ICHG|IATTCHG)) {
7597c478bd9Sstevel@tonic-gate 		cmn_err(CE_WARN, "%s: Inode reconciliation failed for"
7607c478bd9Sstevel@tonic-gate 		    "inode %llu", fs->fs_fsmnt, (u_longlong_t)ip->i_number);
7617c478bd9Sstevel@tonic-gate 		return (EINVAL);
7627c478bd9Sstevel@tonic-gate 	}
7637c478bd9Sstevel@tonic-gate 
7647c478bd9Sstevel@tonic-gate 	/*
7657c478bd9Sstevel@tonic-gate 	 * get the dinode
7667c478bd9Sstevel@tonic-gate 	 */
7677c478bd9Sstevel@tonic-gate 	bp = UFS_BREAD(ip->i_ufsvfs,
76880d34432Sfrankho 	    ip->i_dev, (daddr_t)fsbtodb(fs, itod(fs, ip->i_number)),
7697c478bd9Sstevel@tonic-gate 	    (int)fs->fs_bsize);
7707c478bd9Sstevel@tonic-gate 	if (bp->b_flags & B_ERROR) {
7717c478bd9Sstevel@tonic-gate 		brelse(bp);
7727c478bd9Sstevel@tonic-gate 		return (EIO);
7737c478bd9Sstevel@tonic-gate 	}
7747c478bd9Sstevel@tonic-gate 	dp  = bp->b_un.b_dino;
7757c478bd9Sstevel@tonic-gate 	dp += itoo(fs, ip->i_number);
7767c478bd9Sstevel@tonic-gate 
7777c478bd9Sstevel@tonic-gate 	/*
7787c478bd9Sstevel@tonic-gate 	 * handle Sun's implementation of EFT
7797c478bd9Sstevel@tonic-gate 	 */
7807c478bd9Sstevel@tonic-gate 	d_uid = (dp->di_suid == UID_LONG) ? dp->di_uid : (uid_t)dp->di_suid;
7817c478bd9Sstevel@tonic-gate 	d_gid = (dp->di_sgid == GID_LONG) ? dp->di_gid : (uid_t)dp->di_sgid;
7827c478bd9Sstevel@tonic-gate 
7837c478bd9Sstevel@tonic-gate 	rw_enter(&ip->i_contents, RW_WRITER);
7847c478bd9Sstevel@tonic-gate 
7857c478bd9Sstevel@tonic-gate 	/*
7867c478bd9Sstevel@tonic-gate 	 * some fields are not allowed to change
7877c478bd9Sstevel@tonic-gate 	 */
7887c478bd9Sstevel@tonic-gate 	if ((ip->i_mode  != dp->di_mode) ||
7897c478bd9Sstevel@tonic-gate 	    (ip->i_gen   != dp->di_gen) ||
7907c478bd9Sstevel@tonic-gate 	    (ip->i_uid   != d_uid) ||
7917c478bd9Sstevel@tonic-gate 	    (ip->i_gid   != d_gid)) {
7927c478bd9Sstevel@tonic-gate 		error = EACCES;
7937c478bd9Sstevel@tonic-gate 		goto out;
7947c478bd9Sstevel@tonic-gate 	}
7957c478bd9Sstevel@tonic-gate 
7967c478bd9Sstevel@tonic-gate 	/*
7977c478bd9Sstevel@tonic-gate 	 * and some are allowed to change
7987c478bd9Sstevel@tonic-gate 	 */
7997c478bd9Sstevel@tonic-gate 	ip->i_size		= dp->di_size;
8007c478bd9Sstevel@tonic-gate 	ip->i_ic.ic_flags	= dp->di_ic.ic_flags;
8017c478bd9Sstevel@tonic-gate 	ip->i_blocks		= dp->di_blocks;
8027c478bd9Sstevel@tonic-gate 	ip->i_nlink		= dp->di_nlink;
8037c478bd9Sstevel@tonic-gate 	if (ip->i_flag & IFASTSYMLNK) {
8047c478bd9Sstevel@tonic-gate 		ndaddr = 1;
8057c478bd9Sstevel@tonic-gate 		niaddr = 0;
8067c478bd9Sstevel@tonic-gate 	} else {
8077c478bd9Sstevel@tonic-gate 		ndaddr = NDADDR;
8087c478bd9Sstevel@tonic-gate 		niaddr = NIADDR;
8097c478bd9Sstevel@tonic-gate 	}
8107c478bd9Sstevel@tonic-gate 	for (i = 0; i < ndaddr; ++i)
8117c478bd9Sstevel@tonic-gate 		ip->i_db[i] = dp->di_db[i];
8127c478bd9Sstevel@tonic-gate 	for (i = 0; i < niaddr; ++i)
8137c478bd9Sstevel@tonic-gate 		ip->i_ib[i] = dp->di_ib[i];
8147c478bd9Sstevel@tonic-gate 
8157c478bd9Sstevel@tonic-gate out:
8167c478bd9Sstevel@tonic-gate 	rw_exit(&ip->i_contents);
8177c478bd9Sstevel@tonic-gate 	brelse(bp);
8187c478bd9Sstevel@tonic-gate 	return (error);
8197c478bd9Sstevel@tonic-gate }
8207c478bd9Sstevel@tonic-gate 
8217c478bd9Sstevel@tonic-gate /*
8227c478bd9Sstevel@tonic-gate  * ufs_reconcile
8237c478bd9Sstevel@tonic-gate  *	reconcile ondisk superblock/inodes with any incore
8247c478bd9Sstevel@tonic-gate  */
8257c478bd9Sstevel@tonic-gate static int
8267c478bd9Sstevel@tonic-gate ufs_reconcile(struct vfs *vfsp, struct ufsvfs *ufsvfsp, int errlck)
8277c478bd9Sstevel@tonic-gate {
8287c478bd9Sstevel@tonic-gate 	int	error = 0;
8297c478bd9Sstevel@tonic-gate 
8307c478bd9Sstevel@tonic-gate 	/*
8317c478bd9Sstevel@tonic-gate 	 * get rid of as much inmemory data as possible
8327c478bd9Sstevel@tonic-gate 	 */
8337c478bd9Sstevel@tonic-gate 	(void) ufs_flush(vfsp);
8347c478bd9Sstevel@tonic-gate 
8357c478bd9Sstevel@tonic-gate 	/*
8367c478bd9Sstevel@tonic-gate 	 * reconcile the superblock and inodes
8377c478bd9Sstevel@tonic-gate 	 */
8387c478bd9Sstevel@tonic-gate 	if (error = ufs_reconcile_fs(vfsp, ufsvfsp, errlck))
8397c478bd9Sstevel@tonic-gate 		return (error);
8407c478bd9Sstevel@tonic-gate 	if (error = ufs_scan_inodes(0, ufs_reconcile_inode, ufsvfsp, ufsvfsp))
8417c478bd9Sstevel@tonic-gate 		return (error);
8427c478bd9Sstevel@tonic-gate 	/*
8437c478bd9Sstevel@tonic-gate 	 * allocation blocks may be incorrect; get rid of them
8447c478bd9Sstevel@tonic-gate 	 */
8457c478bd9Sstevel@tonic-gate 	(void) ufs_flush(vfsp);
8467c478bd9Sstevel@tonic-gate 
8477c478bd9Sstevel@tonic-gate 	return (error);
8487c478bd9Sstevel@tonic-gate }
8497c478bd9Sstevel@tonic-gate 
8507c478bd9Sstevel@tonic-gate /*
8517c478bd9Sstevel@tonic-gate  * File system locking
8527c478bd9Sstevel@tonic-gate  */
8537c478bd9Sstevel@tonic-gate int
8547c478bd9Sstevel@tonic-gate ufs_fiolfs(struct vnode *vp, struct lockfs *lockfsp, int from_log)
8557c478bd9Sstevel@tonic-gate {
8567c478bd9Sstevel@tonic-gate 	return (ufs__fiolfs(vp, lockfsp, /* from_user */ 1, from_log));
8577c478bd9Sstevel@tonic-gate }
8587c478bd9Sstevel@tonic-gate 
8597c478bd9Sstevel@tonic-gate /* kernel-internal interface, also used by fix-on-panic */
8607c478bd9Sstevel@tonic-gate int
8617c478bd9Sstevel@tonic-gate ufs__fiolfs(
8627c478bd9Sstevel@tonic-gate 	struct vnode *vp,
8637c478bd9Sstevel@tonic-gate 	struct lockfs *lockfsp,
8647c478bd9Sstevel@tonic-gate 	int from_user,
8657c478bd9Sstevel@tonic-gate 	int from_log)
8667c478bd9Sstevel@tonic-gate {
8677c478bd9Sstevel@tonic-gate 	struct ulockfs	*ulp;
8687c478bd9Sstevel@tonic-gate 	struct lockfs	lfs;
8697c478bd9Sstevel@tonic-gate 	int		error;
8707c478bd9Sstevel@tonic-gate 	struct vfs	*vfsp;
8717c478bd9Sstevel@tonic-gate 	struct ufsvfs	*ufsvfsp;
8727c478bd9Sstevel@tonic-gate 	int		 errlck		= NO_ERRLCK;
8737c478bd9Sstevel@tonic-gate 	int		 poll_events	= POLLPRI;
8747c478bd9Sstevel@tonic-gate 	extern struct pollhead ufs_pollhd;
875303bf60bSsdebnath 	ulockfs_info_t *head;
876303bf60bSsdebnath 	ulockfs_info_t *info;
87746ac4468Smishra 	int signal = 0;
8787c478bd9Sstevel@tonic-gate 
8797c478bd9Sstevel@tonic-gate 	/* check valid lock type */
8807c478bd9Sstevel@tonic-gate 	if (!lockfsp || lockfsp->lf_lock > LOCKFS_MAXLOCK)
8817c478bd9Sstevel@tonic-gate 		return (EINVAL);
8827c478bd9Sstevel@tonic-gate 
8837c478bd9Sstevel@tonic-gate 	if (!vp || !vp->v_vfsp || !vp->v_vfsp->vfs_data)
8847c478bd9Sstevel@tonic-gate 		return (EIO);
8857c478bd9Sstevel@tonic-gate 
886*13237b7eSbatschul 	vfsp = vp->v_vfsp;
887*13237b7eSbatschul 
888*13237b7eSbatschul 	if (vfsp->vfs_flag & VFS_UNMOUNTED) /* has been unmounted */
88902ffed0eSjr 		return (EIO);
89002ffed0eSjr 
89102ffed0eSjr 	/* take the lock and check again */
892*13237b7eSbatschul 	vfs_lock_wait(vfsp);
893*13237b7eSbatschul 	if (vfsp->vfs_flag & VFS_UNMOUNTED) {
894*13237b7eSbatschul 		vfs_unlock(vfsp);
89502ffed0eSjr 		return (EIO);
89602ffed0eSjr 	}
89702ffed0eSjr 
898*13237b7eSbatschul 	/*
899*13237b7eSbatschul 	 * Can't wlock or ro/elock fs with accounting or local swap file
900*13237b7eSbatschul 	 * We need to check for this before we grab the ul_lock to avoid
901*13237b7eSbatschul 	 * deadlocks with the accounting framework.
902*13237b7eSbatschul 	 */
903*13237b7eSbatschul 	if ((LOCKFS_IS_WLOCK(lockfsp) || LOCKFS_IS_ELOCK(lockfsp) ||
904*13237b7eSbatschul 	    LOCKFS_IS_ROELOCK(lockfsp)) && !from_log) {
905*13237b7eSbatschul 		if (ufs_checkaccton(vp) || ufs_checkswapon(vp)) {
906*13237b7eSbatschul 			vfs_unlock(vfsp);
907*13237b7eSbatschul 			return (EDEADLK);
908*13237b7eSbatschul 		}
909*13237b7eSbatschul 	}
910*13237b7eSbatschul 
9117c478bd9Sstevel@tonic-gate 	ufsvfsp = (struct ufsvfs *)vfsp->vfs_data;
9127c478bd9Sstevel@tonic-gate 	ulp = &ufsvfsp->vfs_ulockfs;
913303bf60bSsdebnath 	head = (ulockfs_info_t *)tsd_get(ufs_lockfs_key);
914303bf60bSsdebnath 	SEARCH_ULOCKFSP(head, ulp, info);
915303bf60bSsdebnath 
9167c478bd9Sstevel@tonic-gate 	/*
9177c478bd9Sstevel@tonic-gate 	 * Suspend both the reclaim thread and the delete thread.
9187c478bd9Sstevel@tonic-gate 	 * This must be done outside the lockfs locking protocol.
9197c478bd9Sstevel@tonic-gate 	 */
9207c478bd9Sstevel@tonic-gate 	ufs_thread_suspend(&ufsvfsp->vfs_reclaim);
9217c478bd9Sstevel@tonic-gate 	ufs_thread_suspend(&ufsvfsp->vfs_delete);
9227c478bd9Sstevel@tonic-gate 
9237c478bd9Sstevel@tonic-gate 	mutex_enter(&ulp->ul_lock);
924bc69f433Saguzovsk 	atomic_add_long(&ufs_quiesce_pend, 1);
9257c478bd9Sstevel@tonic-gate 
9267c478bd9Sstevel@tonic-gate 	/*
9277c478bd9Sstevel@tonic-gate 	 * Quit if there is another lockfs request in progress
9287c478bd9Sstevel@tonic-gate 	 * that is waiting for existing ufs_vnops to complete.
9297c478bd9Sstevel@tonic-gate 	 */
9307c478bd9Sstevel@tonic-gate 	if (ULOCKFS_IS_BUSY(ulp)) {
9317c478bd9Sstevel@tonic-gate 		error = EBUSY;
9327c478bd9Sstevel@tonic-gate 		goto errexit;
9337c478bd9Sstevel@tonic-gate 	}
9347c478bd9Sstevel@tonic-gate 
9357c478bd9Sstevel@tonic-gate 	/* cannot ulocked or downgrade a hard-lock */
9367c478bd9Sstevel@tonic-gate 	if (ULOCKFS_IS_HLOCK(ulp)) {
9377c478bd9Sstevel@tonic-gate 		error = EIO;
9387c478bd9Sstevel@tonic-gate 		goto errexit;
9397c478bd9Sstevel@tonic-gate 	}
9407c478bd9Sstevel@tonic-gate 
9417c478bd9Sstevel@tonic-gate 	/* an error lock may be unlocked or relocked, only */
9427c478bd9Sstevel@tonic-gate 	if (ULOCKFS_IS_ELOCK(ulp)) {
9437c478bd9Sstevel@tonic-gate 		if (!LOCKFS_IS_ULOCK(lockfsp) && !LOCKFS_IS_ELOCK(lockfsp)) {
9447c478bd9Sstevel@tonic-gate 			error = EBUSY;
9457c478bd9Sstevel@tonic-gate 			goto errexit;
9467c478bd9Sstevel@tonic-gate 		}
9477c478bd9Sstevel@tonic-gate 	}
9487c478bd9Sstevel@tonic-gate 
9497c478bd9Sstevel@tonic-gate 	/*
9507c478bd9Sstevel@tonic-gate 	 * a read-only error lock may only be upgraded to an
9517c478bd9Sstevel@tonic-gate 	 * error lock or hard lock
9527c478bd9Sstevel@tonic-gate 	 */
9537c478bd9Sstevel@tonic-gate 	if (ULOCKFS_IS_ROELOCK(ulp)) {
9547c478bd9Sstevel@tonic-gate 		if (!LOCKFS_IS_HLOCK(lockfsp) && !LOCKFS_IS_ELOCK(lockfsp)) {
9557c478bd9Sstevel@tonic-gate 			error = EBUSY;
9567c478bd9Sstevel@tonic-gate 			goto errexit;
9577c478bd9Sstevel@tonic-gate 		}
9587c478bd9Sstevel@tonic-gate 	}
9597c478bd9Sstevel@tonic-gate 
9607c478bd9Sstevel@tonic-gate 	/*
9617c478bd9Sstevel@tonic-gate 	 * until read-only error locks are fully implemented
9627c478bd9Sstevel@tonic-gate 	 * just return EINVAL
9637c478bd9Sstevel@tonic-gate 	 */
9647c478bd9Sstevel@tonic-gate 	if (LOCKFS_IS_ROELOCK(lockfsp)) {
9657c478bd9Sstevel@tonic-gate 		error = EINVAL;
9667c478bd9Sstevel@tonic-gate 		goto errexit;
9677c478bd9Sstevel@tonic-gate 	}
9687c478bd9Sstevel@tonic-gate 
9697c478bd9Sstevel@tonic-gate 	/*
9707c478bd9Sstevel@tonic-gate 	 * an error lock may only be applied if the file system is
9717c478bd9Sstevel@tonic-gate 	 * unlocked or already error locked.
9727c478bd9Sstevel@tonic-gate 	 * (this is to prevent the case where a fs gets changed out from
9737c478bd9Sstevel@tonic-gate 	 * underneath a fs that is locked for backup,
9747c478bd9Sstevel@tonic-gate 	 * that is, name/delete/write-locked.)
9757c478bd9Sstevel@tonic-gate 	 */
9767c478bd9Sstevel@tonic-gate 	if ((!ULOCKFS_IS_ULOCK(ulp) && !ULOCKFS_IS_ELOCK(ulp) &&
9777c478bd9Sstevel@tonic-gate 	    !ULOCKFS_IS_ROELOCK(ulp)) &&
9787c478bd9Sstevel@tonic-gate 	    (LOCKFS_IS_ELOCK(lockfsp) || LOCKFS_IS_ROELOCK(lockfsp))) {
9797c478bd9Sstevel@tonic-gate 		error = EBUSY;
9807c478bd9Sstevel@tonic-gate 		goto errexit;
9817c478bd9Sstevel@tonic-gate 	}
9827c478bd9Sstevel@tonic-gate 
9837c478bd9Sstevel@tonic-gate 	/* get and validate the input lockfs request */
9847c478bd9Sstevel@tonic-gate 	if (error = ufs_getlfd(lockfsp, &ulp->ul_lockfs))
9857c478bd9Sstevel@tonic-gate 		goto errexit;
9867c478bd9Sstevel@tonic-gate 
9877c478bd9Sstevel@tonic-gate 	/*
9887c478bd9Sstevel@tonic-gate 	 * save current ulockfs struct
9897c478bd9Sstevel@tonic-gate 	 */
9907c478bd9Sstevel@tonic-gate 	bcopy(&ulp->ul_lockfs, &lfs, sizeof (struct lockfs));
9917c478bd9Sstevel@tonic-gate 
9927c478bd9Sstevel@tonic-gate 	/*
9937c478bd9Sstevel@tonic-gate 	 * Freeze the file system (pend future accesses)
9947c478bd9Sstevel@tonic-gate 	 */
9957c478bd9Sstevel@tonic-gate 	ufs_freeze(ulp, lockfsp);
9967c478bd9Sstevel@tonic-gate 
9977c478bd9Sstevel@tonic-gate 	/*
9987c478bd9Sstevel@tonic-gate 	 * Set locking in progress because ufs_quiesce may free the
9997c478bd9Sstevel@tonic-gate 	 * ul_lock mutex.
10007c478bd9Sstevel@tonic-gate 	 */
10017c478bd9Sstevel@tonic-gate 	ULOCKFS_SET_BUSY(ulp);
10027c478bd9Sstevel@tonic-gate 	/* update the ioctl copy */
10037c478bd9Sstevel@tonic-gate 	LOCKFS_SET_BUSY(&ulp->ul_lockfs);
10047c478bd9Sstevel@tonic-gate 
1005303bf60bSsdebnath 	/*
1006303bf60bSsdebnath 	 * We  need to unset FWLOCK status before we call ufs_quiesce
1007303bf60bSsdebnath 	 * so that the thread doesnt get suspended. We do this only if
1008303bf60bSsdebnath 	 * this (fallocate) thread requested an unlock operation.
1009303bf60bSsdebnath 	 */
1010303bf60bSsdebnath 	if (info && (info->flags & ULOCK_INFO_FALLOCATE)) {
1011303bf60bSsdebnath 		if (!ULOCKFS_IS_WLOCK(ulp))
1012303bf60bSsdebnath 			ULOCKFS_CLR_FWLOCK(ulp);
1013303bf60bSsdebnath 	}
1014303bf60bSsdebnath 
10157c478bd9Sstevel@tonic-gate 	/*
10167c478bd9Sstevel@tonic-gate 	 * Quiesce (wait for outstanding accesses to finish)
10177c478bd9Sstevel@tonic-gate 	 */
101846ac4468Smishra 	if (error = ufs_quiesce(ulp)) {
101946ac4468Smishra 		/*
102046ac4468Smishra 		 * Interrupted due to signal. There could still be
102146ac4468Smishra 		 * pending vnops.
102246ac4468Smishra 		 */
102346ac4468Smishra 		signal = 1;
102446ac4468Smishra 
102546ac4468Smishra 		/*
102646ac4468Smishra 		 * We do broadcast because lock-status
102746ac4468Smishra 		 * could be reverted to old status.
102846ac4468Smishra 		 */
102946ac4468Smishra 		cv_broadcast(&ulp->ul_cv);
10307c478bd9Sstevel@tonic-gate 		goto errout;
103146ac4468Smishra 	}
10327c478bd9Sstevel@tonic-gate 
1033303bf60bSsdebnath 	/*
1034303bf60bSsdebnath 	 * If the fallocate thread requested a write fs lock operation
1035303bf60bSsdebnath 	 * then we set fwlock status in the ulp.
1036303bf60bSsdebnath 	 */
1037303bf60bSsdebnath 	if (info && (info->flags & ULOCK_INFO_FALLOCATE)) {
1038303bf60bSsdebnath 		if (ULOCKFS_IS_WLOCK(ulp))
1039303bf60bSsdebnath 			ULOCKFS_SET_FWLOCK(ulp);
1040303bf60bSsdebnath 	}
1041303bf60bSsdebnath 
10427c478bd9Sstevel@tonic-gate 	/*
10437c478bd9Sstevel@tonic-gate 	 * save error lock status to pass down to reconcilation
10447c478bd9Sstevel@tonic-gate 	 * routines and for later cleanup
10457c478bd9Sstevel@tonic-gate 	 */
10467c478bd9Sstevel@tonic-gate 	if (LOCKFS_IS_ELOCK(&lfs) && ULOCKFS_IS_ULOCK(ulp))
10477c478bd9Sstevel@tonic-gate 		errlck = UN_ERRLCK;
10487c478bd9Sstevel@tonic-gate 
10497c478bd9Sstevel@tonic-gate 	if (ULOCKFS_IS_ELOCK(ulp) || ULOCKFS_IS_ROELOCK(ulp)) {
10507c478bd9Sstevel@tonic-gate 		int needs_unlock;
10517c478bd9Sstevel@tonic-gate 		int needs_sbwrite;
10527c478bd9Sstevel@tonic-gate 
10537c478bd9Sstevel@tonic-gate 		poll_events |= POLLERR;
105480d34432Sfrankho 		errlck = LOCKFS_IS_ELOCK(&lfs) || LOCKFS_IS_ROELOCK(&lfs) ?
105580d34432Sfrankho 		    RE_ERRLCK : SET_ERRLCK;
10567c478bd9Sstevel@tonic-gate 
10577c478bd9Sstevel@tonic-gate 		needs_unlock = !MUTEX_HELD(&ufsvfsp->vfs_lock);
10587c478bd9Sstevel@tonic-gate 		if (needs_unlock)
10597c478bd9Sstevel@tonic-gate 			mutex_enter(&ufsvfsp->vfs_lock);
10607c478bd9Sstevel@tonic-gate 
10617c478bd9Sstevel@tonic-gate 		/* disable delayed i/o */
10627c478bd9Sstevel@tonic-gate 		needs_sbwrite = 0;
10637c478bd9Sstevel@tonic-gate 
10647c478bd9Sstevel@tonic-gate 		if (errlck == SET_ERRLCK) {
10657c478bd9Sstevel@tonic-gate 			ufsvfsp->vfs_fs->fs_clean = FSBAD;
10667c478bd9Sstevel@tonic-gate 			needs_sbwrite = 1;
10677c478bd9Sstevel@tonic-gate 		}
10687c478bd9Sstevel@tonic-gate 
10697c478bd9Sstevel@tonic-gate 		needs_sbwrite |= ufsvfsp->vfs_dio;
10707c478bd9Sstevel@tonic-gate 		ufsvfsp->vfs_dio = 0;
10717c478bd9Sstevel@tonic-gate 
10727c478bd9Sstevel@tonic-gate 		if (needs_unlock)
10737c478bd9Sstevel@tonic-gate 			mutex_exit(&ufsvfsp->vfs_lock);
10747c478bd9Sstevel@tonic-gate 
10757c478bd9Sstevel@tonic-gate 		if (needs_sbwrite) {
10767c478bd9Sstevel@tonic-gate 			ulp->ul_sbowner = curthread;
10777c478bd9Sstevel@tonic-gate 			TRANS_SBWRITE(ufsvfsp, TOP_SBWRITE_STABLE);
10787c478bd9Sstevel@tonic-gate 
10797c478bd9Sstevel@tonic-gate 			if (needs_unlock)
10807c478bd9Sstevel@tonic-gate 				mutex_enter(&ufsvfsp->vfs_lock);
10817c478bd9Sstevel@tonic-gate 
10827c478bd9Sstevel@tonic-gate 			ufsvfsp->vfs_fs->fs_fmod = 0;
10837c478bd9Sstevel@tonic-gate 
10847c478bd9Sstevel@tonic-gate 			if (needs_unlock)
10857c478bd9Sstevel@tonic-gate 				mutex_exit(&ufsvfsp->vfs_lock);
10867c478bd9Sstevel@tonic-gate 		}
10877c478bd9Sstevel@tonic-gate 	}
10887c478bd9Sstevel@tonic-gate 
10897c478bd9Sstevel@tonic-gate 	/*
10907c478bd9Sstevel@tonic-gate 	 * reconcile superblock and inodes if was wlocked
10917c478bd9Sstevel@tonic-gate 	 */
10927c478bd9Sstevel@tonic-gate 	if (LOCKFS_IS_WLOCK(&lfs) || LOCKFS_IS_ELOCK(&lfs)) {
10937c478bd9Sstevel@tonic-gate 		if (error = ufs_reconcile(vfsp, ufsvfsp, errlck))
10947c478bd9Sstevel@tonic-gate 			goto errout;
10957c478bd9Sstevel@tonic-gate 		/*
10967c478bd9Sstevel@tonic-gate 		 * in case the fs grew; reset the metadata map for logging tests
10977c478bd9Sstevel@tonic-gate 		 */
10987c478bd9Sstevel@tonic-gate 		TRANS_MATA_UMOUNT(ufsvfsp);
10997c478bd9Sstevel@tonic-gate 		TRANS_MATA_MOUNT(ufsvfsp);
11007c478bd9Sstevel@tonic-gate 		TRANS_MATA_SI(ufsvfsp, ufsvfsp->vfs_fs);
11017c478bd9Sstevel@tonic-gate 	}
11027c478bd9Sstevel@tonic-gate 
11037c478bd9Sstevel@tonic-gate 	/*
11047c478bd9Sstevel@tonic-gate 	 * At least everything *currently* dirty goes out.
11057c478bd9Sstevel@tonic-gate 	 */
11067c478bd9Sstevel@tonic-gate 
11077c478bd9Sstevel@tonic-gate 	if ((error = ufs_flush(vfsp)) != 0 && !ULOCKFS_IS_HLOCK(ulp) &&
11087c478bd9Sstevel@tonic-gate 	    !ULOCKFS_IS_ELOCK(ulp))
11097c478bd9Sstevel@tonic-gate 		goto errout;
11107c478bd9Sstevel@tonic-gate 
11117c478bd9Sstevel@tonic-gate 	/*
11127c478bd9Sstevel@tonic-gate 	 * thaw file system and wakeup pended processes
11137c478bd9Sstevel@tonic-gate 	 */
11147c478bd9Sstevel@tonic-gate 	if (error = ufs_thaw(vfsp, ufsvfsp, ulp))
11157c478bd9Sstevel@tonic-gate 		if (!ULOCKFS_IS_HLOCK(ulp) && !ULOCKFS_IS_ELOCK(ulp))
11167c478bd9Sstevel@tonic-gate 			goto errout;
11177c478bd9Sstevel@tonic-gate 
11187c478bd9Sstevel@tonic-gate 	/*
11197c478bd9Sstevel@tonic-gate 	 * reset modified flag if not already write locked
11207c478bd9Sstevel@tonic-gate 	 */
11217c478bd9Sstevel@tonic-gate 	if (!LOCKFS_IS_WLOCK(&lfs))
11227c478bd9Sstevel@tonic-gate 		ULOCKFS_CLR_MOD(ulp);
11237c478bd9Sstevel@tonic-gate 
11247c478bd9Sstevel@tonic-gate 	/*
11257c478bd9Sstevel@tonic-gate 	 * idle the lock struct
11267c478bd9Sstevel@tonic-gate 	 */
11277c478bd9Sstevel@tonic-gate 	ULOCKFS_CLR_BUSY(ulp);
11287c478bd9Sstevel@tonic-gate 	/* update the ioctl copy */
11297c478bd9Sstevel@tonic-gate 	LOCKFS_CLR_BUSY(&ulp->ul_lockfs);
11307c478bd9Sstevel@tonic-gate 
11317c478bd9Sstevel@tonic-gate 	/*
11327c478bd9Sstevel@tonic-gate 	 * free current comment
11337c478bd9Sstevel@tonic-gate 	 */
11347c478bd9Sstevel@tonic-gate 	if (lfs.lf_comment && lfs.lf_comlen != 0) {
11357c478bd9Sstevel@tonic-gate 		kmem_free(lfs.lf_comment, lfs.lf_comlen);
11367c478bd9Sstevel@tonic-gate 		lfs.lf_comment = NULL;
11377c478bd9Sstevel@tonic-gate 		lfs.lf_comlen = 0;
11387c478bd9Sstevel@tonic-gate 	}
11397c478bd9Sstevel@tonic-gate 
11407c478bd9Sstevel@tonic-gate 	/* do error lock cleanup */
11417c478bd9Sstevel@tonic-gate 	if (errlck == UN_ERRLCK)
11427c478bd9Sstevel@tonic-gate 		ufsfx_unlockfs(ufsvfsp);
11437c478bd9Sstevel@tonic-gate 
11447c478bd9Sstevel@tonic-gate 	else if (errlck == RE_ERRLCK)
11457c478bd9Sstevel@tonic-gate 		ufsfx_lockfs(ufsvfsp);
11467c478bd9Sstevel@tonic-gate 
11477c478bd9Sstevel@tonic-gate 	/* don't allow error lock from user to invoke panic */
11487c478bd9Sstevel@tonic-gate 	else if (from_user && errlck == SET_ERRLCK &&
114980d34432Sfrankho 	    !(ufsvfsp->vfs_fsfx.fx_flags & (UFSMNT_ONERROR_PANIC >> 4)))
11507c478bd9Sstevel@tonic-gate 		(void) ufs_fault(ufsvfsp->vfs_root,
11517c478bd9Sstevel@tonic-gate 		    ulp->ul_lockfs.lf_comment && ulp->ul_lockfs.lf_comlen > 0 ?
11527c478bd9Sstevel@tonic-gate 		    ulp->ul_lockfs.lf_comment: "user-applied error lock");
11537c478bd9Sstevel@tonic-gate 
1154bc69f433Saguzovsk 	atomic_add_long(&ufs_quiesce_pend, -1);
11557c478bd9Sstevel@tonic-gate 	mutex_exit(&ulp->ul_lock);
11567c478bd9Sstevel@tonic-gate 	vfs_unlock(vfsp);
11577c478bd9Sstevel@tonic-gate 
11587c478bd9Sstevel@tonic-gate 	if (ULOCKFS_IS_HLOCK(&ufsvfsp->vfs_ulockfs))
11597c478bd9Sstevel@tonic-gate 		poll_events |= POLLERR;
11607c478bd9Sstevel@tonic-gate 
11617c478bd9Sstevel@tonic-gate 	pollwakeup(&ufs_pollhd, poll_events);
11627c478bd9Sstevel@tonic-gate 
11637c478bd9Sstevel@tonic-gate 	/*
11647c478bd9Sstevel@tonic-gate 	 * Allow both the delete thread and the reclaim thread to
11657c478bd9Sstevel@tonic-gate 	 * continue.
11667c478bd9Sstevel@tonic-gate 	 */
11677c478bd9Sstevel@tonic-gate 	ufs_thread_continue(&ufsvfsp->vfs_delete);
11687c478bd9Sstevel@tonic-gate 	ufs_thread_continue(&ufsvfsp->vfs_reclaim);
11697c478bd9Sstevel@tonic-gate 
11707c478bd9Sstevel@tonic-gate 	return (0);
11717c478bd9Sstevel@tonic-gate 
11727c478bd9Sstevel@tonic-gate errout:
11737c478bd9Sstevel@tonic-gate 	/*
11747c478bd9Sstevel@tonic-gate 	 * Lock failed. Reset the old lock in ufsvfs if not hard locked.
11757c478bd9Sstevel@tonic-gate 	 */
11767c478bd9Sstevel@tonic-gate 	if (!LOCKFS_IS_HLOCK(&ulp->ul_lockfs)) {
11777c478bd9Sstevel@tonic-gate 		bcopy(&lfs, &ulp->ul_lockfs, sizeof (struct lockfs));
11787c478bd9Sstevel@tonic-gate 		ulp->ul_fs_lock = (1 << lfs.lf_lock);
11797c478bd9Sstevel@tonic-gate 	}
118046ac4468Smishra 
118146ac4468Smishra 	/*
118246ac4468Smishra 	 * Don't call ufs_thaw() when there's a signal during
118346ac4468Smishra 	 * ufs quiesce operation as it can lead to deadlock
118446ac4468Smishra 	 * with getpage.
118546ac4468Smishra 	 */
118646ac4468Smishra 	if (signal == 0)
118746ac4468Smishra 		(void) ufs_thaw(vfsp, ufsvfsp, ulp);
118846ac4468Smishra 
11897c478bd9Sstevel@tonic-gate 	ULOCKFS_CLR_BUSY(ulp);
11907c478bd9Sstevel@tonic-gate 	LOCKFS_CLR_BUSY(&ulp->ul_lockfs);
11917c478bd9Sstevel@tonic-gate 
11927c478bd9Sstevel@tonic-gate errexit:
1193bc69f433Saguzovsk 	atomic_add_long(&ufs_quiesce_pend, -1);
11947c478bd9Sstevel@tonic-gate 	mutex_exit(&ulp->ul_lock);
11957c478bd9Sstevel@tonic-gate 	vfs_unlock(vfsp);
11967c478bd9Sstevel@tonic-gate 
11977c478bd9Sstevel@tonic-gate 	/*
11987c478bd9Sstevel@tonic-gate 	 * Allow both the delete thread and the reclaim thread to
11997c478bd9Sstevel@tonic-gate 	 * continue.
12007c478bd9Sstevel@tonic-gate 	 */
12017c478bd9Sstevel@tonic-gate 	ufs_thread_continue(&ufsvfsp->vfs_delete);
12027c478bd9Sstevel@tonic-gate 	ufs_thread_continue(&ufsvfsp->vfs_reclaim);
12037c478bd9Sstevel@tonic-gate 
12047c478bd9Sstevel@tonic-gate 	return (error);
12057c478bd9Sstevel@tonic-gate }
12067c478bd9Sstevel@tonic-gate 
12077c478bd9Sstevel@tonic-gate /*
12087c478bd9Sstevel@tonic-gate  * fiolfss
12097c478bd9Sstevel@tonic-gate  * 	return the current file system locking state info
12107c478bd9Sstevel@tonic-gate  */
12117c478bd9Sstevel@tonic-gate int
12127c478bd9Sstevel@tonic-gate ufs_fiolfss(struct vnode *vp, struct lockfs *lockfsp)
12137c478bd9Sstevel@tonic-gate {
12147c478bd9Sstevel@tonic-gate 	struct ulockfs	*ulp;
12157c478bd9Sstevel@tonic-gate 
12167c478bd9Sstevel@tonic-gate 	if (!vp || !vp->v_vfsp || !VTOI(vp))
12177c478bd9Sstevel@tonic-gate 		return (EINVAL);
12187c478bd9Sstevel@tonic-gate 
12197c478bd9Sstevel@tonic-gate 	/* file system has been forcibly unmounted */
12207c478bd9Sstevel@tonic-gate 	if (VTOI(vp)->i_ufsvfs == NULL)
12217c478bd9Sstevel@tonic-gate 		return (EIO);
12227c478bd9Sstevel@tonic-gate 
12237c478bd9Sstevel@tonic-gate 	ulp = VTOUL(vp);
12247c478bd9Sstevel@tonic-gate 
12257c478bd9Sstevel@tonic-gate 	if (ULOCKFS_IS_HLOCK(ulp)) {
12267c478bd9Sstevel@tonic-gate 		*lockfsp = ulp->ul_lockfs;	/* structure assignment */
12277c478bd9Sstevel@tonic-gate 		return (0);
12287c478bd9Sstevel@tonic-gate 	}
12297c478bd9Sstevel@tonic-gate 
12307c478bd9Sstevel@tonic-gate 	mutex_enter(&ulp->ul_lock);
12317c478bd9Sstevel@tonic-gate 
12327c478bd9Sstevel@tonic-gate 	*lockfsp = ulp->ul_lockfs;	/* structure assignment */
12337c478bd9Sstevel@tonic-gate 
12347c478bd9Sstevel@tonic-gate 	if (ULOCKFS_IS_MOD(ulp))
12357c478bd9Sstevel@tonic-gate 		lockfsp->lf_flags |= LOCKFS_MOD;
12367c478bd9Sstevel@tonic-gate 
12377c478bd9Sstevel@tonic-gate 	mutex_exit(&ulp->ul_lock);
12387c478bd9Sstevel@tonic-gate 
12397c478bd9Sstevel@tonic-gate 	return (0);
12407c478bd9Sstevel@tonic-gate }
12417c478bd9Sstevel@tonic-gate 
12427c478bd9Sstevel@tonic-gate /*
12437c478bd9Sstevel@tonic-gate  * ufs_check_lockfs
12447c478bd9Sstevel@tonic-gate  *	check whether a ufs_vnops conflicts with the file system lock
12457c478bd9Sstevel@tonic-gate  */
12467c478bd9Sstevel@tonic-gate int
12477c478bd9Sstevel@tonic-gate ufs_check_lockfs(struct ufsvfs *ufsvfsp, struct ulockfs *ulp, ulong_t mask)
12487c478bd9Sstevel@tonic-gate {
12497c478bd9Sstevel@tonic-gate 	k_sigset_t	smask;
12507c478bd9Sstevel@tonic-gate 	int		sig, slock;
12517c478bd9Sstevel@tonic-gate 
12527c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&ulp->ul_lock));
12537c478bd9Sstevel@tonic-gate 
12547c478bd9Sstevel@tonic-gate 	while (ulp->ul_fs_lock & mask) {
12557c478bd9Sstevel@tonic-gate 		slock = (int)ULOCKFS_IS_SLOCK(ulp);
12567c478bd9Sstevel@tonic-gate 		if ((curthread->t_flag & T_DONTPEND) && !slock) {
12577c478bd9Sstevel@tonic-gate 			curthread->t_flag |= T_WOULDBLOCK;
12587c478bd9Sstevel@tonic-gate 			return (EAGAIN);
12597c478bd9Sstevel@tonic-gate 		}
12607c478bd9Sstevel@tonic-gate 		curthread->t_flag &= ~T_WOULDBLOCK;
12617c478bd9Sstevel@tonic-gate 
126266c9f83dSowenr 		/*
126366c9f83dSowenr 		 * In the case of an onerr umount of the fs, threads could
126466c9f83dSowenr 		 * have blocked before coming into ufs_check_lockfs and
126566c9f83dSowenr 		 * need to check for the special case of ELOCK and
126666c9f83dSowenr 		 * vfs_dontblock being set which would indicate that the fs
126766c9f83dSowenr 		 * is on its way out and will not return therefore making
126866c9f83dSowenr 		 * EIO the appropriate response.
126966c9f83dSowenr 		 */
127066c9f83dSowenr 		if (ULOCKFS_IS_HLOCK(ulp) ||
127166c9f83dSowenr 		    (ULOCKFS_IS_ELOCK(ulp) && ufsvfsp->vfs_dontblock))
12727c478bd9Sstevel@tonic-gate 			return (EIO);
12737c478bd9Sstevel@tonic-gate 
12747c478bd9Sstevel@tonic-gate 		/*
12757c478bd9Sstevel@tonic-gate 		 * wait for lock status to change
12767c478bd9Sstevel@tonic-gate 		 */
12777c478bd9Sstevel@tonic-gate 		if (slock || ufsvfsp->vfs_nointr) {
12787c478bd9Sstevel@tonic-gate 			cv_wait(&ulp->ul_cv, &ulp->ul_lock);
12797c478bd9Sstevel@tonic-gate 		} else {
12807c478bd9Sstevel@tonic-gate 			sigintr(&smask, 1);
12817c478bd9Sstevel@tonic-gate 			sig = cv_wait_sig(&ulp->ul_cv, &ulp->ul_lock);
12827c478bd9Sstevel@tonic-gate 			sigunintr(&smask);
12837c478bd9Sstevel@tonic-gate 			if ((!sig && (ulp->ul_fs_lock & mask)) ||
128480d34432Sfrankho 			    ufsvfsp->vfs_dontblock)
12857c478bd9Sstevel@tonic-gate 				return (EINTR);
12867c478bd9Sstevel@tonic-gate 		}
12877c478bd9Sstevel@tonic-gate 	}
1288303bf60bSsdebnath 
1289303bf60bSsdebnath 	if (mask & ULOCKFS_FWLOCK) {
1290303bf60bSsdebnath 		atomic_add_long(&ulp->ul_falloc_cnt, 1);
1291303bf60bSsdebnath 		ULOCKFS_SET_FALLOC(ulp);
1292303bf60bSsdebnath 	} else {
1293303bf60bSsdebnath 		atomic_add_long(&ulp->ul_vnops_cnt, 1);
1294303bf60bSsdebnath 	}
1295303bf60bSsdebnath 
12967c478bd9Sstevel@tonic-gate 	return (0);
12977c478bd9Sstevel@tonic-gate }
12987c478bd9Sstevel@tonic-gate 
12997c478bd9Sstevel@tonic-gate /*
13007c478bd9Sstevel@tonic-gate  * Check whether we came across the handcrafted lockfs protocol path. We can't
13017c478bd9Sstevel@tonic-gate  * simply check for T_DONTBLOCK here as one would assume since this can also
13027c478bd9Sstevel@tonic-gate  * falsely catch recursive VOP's going to a different filesystem, instead we
13037c478bd9Sstevel@tonic-gate  * check if we already hold the ulockfs->ul_lock mutex.
13047c478bd9Sstevel@tonic-gate  */
13057c478bd9Sstevel@tonic-gate static int
13067c478bd9Sstevel@tonic-gate ufs_lockfs_is_under_rawlockfs(struct ulockfs *ulp)
13077c478bd9Sstevel@tonic-gate {
13087c478bd9Sstevel@tonic-gate 	return ((mutex_owner(&ulp->ul_lock) != curthread) ? 0 : 1);
13097c478bd9Sstevel@tonic-gate }
13107c478bd9Sstevel@tonic-gate 
13117c478bd9Sstevel@tonic-gate /*
13127c478bd9Sstevel@tonic-gate  * ufs_lockfs_begin - start the lockfs locking protocol
13137c478bd9Sstevel@tonic-gate  */
13147c478bd9Sstevel@tonic-gate int
13157c478bd9Sstevel@tonic-gate ufs_lockfs_begin(struct ufsvfs *ufsvfsp, struct ulockfs **ulpp, ulong_t mask)
13167c478bd9Sstevel@tonic-gate {
13177c478bd9Sstevel@tonic-gate 	int 		error;
13187c478bd9Sstevel@tonic-gate 	int		rec_vop;
13196ea97f2eSvsakar 	ushort_t	op_cnt_incremented = 0;
13206ea97f2eSvsakar 	ulong_t		*ctr;
13217c478bd9Sstevel@tonic-gate 	struct ulockfs *ulp;
13227c478bd9Sstevel@tonic-gate 	ulockfs_info_t	*ulockfs_info;
13237c478bd9Sstevel@tonic-gate 	ulockfs_info_t	*ulockfs_info_free;
13247c478bd9Sstevel@tonic-gate 	ulockfs_info_t	*ulockfs_info_temp;
13257c478bd9Sstevel@tonic-gate 
13267c478bd9Sstevel@tonic-gate 	/*
13277c478bd9Sstevel@tonic-gate 	 * file system has been forcibly unmounted
13287c478bd9Sstevel@tonic-gate 	 */
13297c478bd9Sstevel@tonic-gate 	if (ufsvfsp == NULL)
13307c478bd9Sstevel@tonic-gate 		return (EIO);
13317c478bd9Sstevel@tonic-gate 
13327c478bd9Sstevel@tonic-gate 	*ulpp = ulp = &ufsvfsp->vfs_ulockfs;
13337c478bd9Sstevel@tonic-gate 
13347c478bd9Sstevel@tonic-gate 	/*
13357c478bd9Sstevel@tonic-gate 	 * Do lockfs protocol
13367c478bd9Sstevel@tonic-gate 	 */
13377c478bd9Sstevel@tonic-gate 	ulockfs_info = (ulockfs_info_t *)tsd_get(ufs_lockfs_key);
13387c478bd9Sstevel@tonic-gate 	IS_REC_VOP(rec_vop, ulockfs_info, ulp, ulockfs_info_free);
13397c478bd9Sstevel@tonic-gate 
13407c478bd9Sstevel@tonic-gate 	/*
13417c478bd9Sstevel@tonic-gate 	 * Detect recursive VOP call or handcrafted internal lockfs protocol
13427c478bd9Sstevel@tonic-gate 	 * path and bail out in that case.
13437c478bd9Sstevel@tonic-gate 	 */
13447c478bd9Sstevel@tonic-gate 	if (rec_vop || ufs_lockfs_is_under_rawlockfs(ulp)) {
13457c478bd9Sstevel@tonic-gate 		*ulpp = NULL;
13467c478bd9Sstevel@tonic-gate 		return (0);
13477c478bd9Sstevel@tonic-gate 	} else {
13487c478bd9Sstevel@tonic-gate 		if (ulockfs_info_free == NULL) {
13497c478bd9Sstevel@tonic-gate 			if ((ulockfs_info_temp = (ulockfs_info_t *)
13507c478bd9Sstevel@tonic-gate 			    kmem_zalloc(sizeof (ulockfs_info_t),
13517c478bd9Sstevel@tonic-gate 			    KM_NOSLEEP)) == NULL) {
13527c478bd9Sstevel@tonic-gate 				*ulpp = NULL;
13537c478bd9Sstevel@tonic-gate 				return (ENOMEM);
13547c478bd9Sstevel@tonic-gate 			}
13557c478bd9Sstevel@tonic-gate 		}
13567c478bd9Sstevel@tonic-gate 	}
13577c478bd9Sstevel@tonic-gate 
13587c478bd9Sstevel@tonic-gate 	/*
13597c478bd9Sstevel@tonic-gate 	 * First time VOP call
13606ea97f2eSvsakar 	 *
13616ea97f2eSvsakar 	 * Increment the ctr irrespective of the lockfs state. If the lockfs
13626ea97f2eSvsakar 	 * state is not ULOCKFS_ULOCK, we can decrement it later. However,
13636ea97f2eSvsakar 	 * before incrementing we need to check if there is a pending quiesce
13646ea97f2eSvsakar 	 * request because if we have a continuous stream of ufs_lockfs_begin
13656ea97f2eSvsakar 	 * requests pounding on a few cpu's then the ufs_quiesce thread might
13666ea97f2eSvsakar 	 * never see the value of zero for ctr - a livelock kind of scenario.
13676ea97f2eSvsakar 	 */
13686ea97f2eSvsakar 	ctr = (mask & ULOCKFS_FWLOCK) ?
13696ea97f2eSvsakar 	    &ulp->ul_falloc_cnt : &ulp->ul_vnops_cnt;
13706ea97f2eSvsakar 	if (!ULOCKFS_IS_SLOCK(ulp)) {
13716ea97f2eSvsakar 		atomic_add_long(ctr, 1);
13726ea97f2eSvsakar 		op_cnt_incremented++;
13736ea97f2eSvsakar 	}
13746ea97f2eSvsakar 
13756ea97f2eSvsakar 	/*
13766ea97f2eSvsakar 	 * If the lockfs state (indicated by ul_fs_lock) is not just
13776ea97f2eSvsakar 	 * ULOCKFS_ULOCK, then we will be routed through ufs_check_lockfs
13786ea97f2eSvsakar 	 * where there is a check with an appropriate mask to selectively allow
13796ea97f2eSvsakar 	 * operations permitted for that kind of lockfs state.
13806ea97f2eSvsakar 	 *
13816ea97f2eSvsakar 	 * Even these selective operations should not be allowed to go through
13826ea97f2eSvsakar 	 * if a lockfs request is in progress because that could result in inode
13836ea97f2eSvsakar 	 * modifications during a quiesce and could hence result in inode
13846ea97f2eSvsakar 	 * reconciliation failures. ULOCKFS_SLOCK alone would not be sufficient,
13856ea97f2eSvsakar 	 * so make use of ufs_quiesce_pend to disallow vnode operations when a
13866ea97f2eSvsakar 	 * quiesce is in progress.
13876ea97f2eSvsakar 	 */
13886ea97f2eSvsakar 	if (!ULOCKFS_IS_JUSTULOCK(ulp) || ufs_quiesce_pend) {
13896ea97f2eSvsakar 		if (op_cnt_incremented)
13906ea97f2eSvsakar 			if (!atomic_add_long_nv(ctr, -1))
13916ea97f2eSvsakar 				cv_broadcast(&ulp->ul_cv);
13926ea97f2eSvsakar 		mutex_enter(&ulp->ul_lock);
13936ea97f2eSvsakar 		error = ufs_check_lockfs(ufsvfsp, ulp, mask);
13946ea97f2eSvsakar 		mutex_exit(&ulp->ul_lock);
13956ea97f2eSvsakar 		if (error) {
13967c478bd9Sstevel@tonic-gate 			if (ulockfs_info_free == NULL)
13977c478bd9Sstevel@tonic-gate 				kmem_free(ulockfs_info_temp,
13987c478bd9Sstevel@tonic-gate 				    sizeof (ulockfs_info_t));
13997c478bd9Sstevel@tonic-gate 			return (error);
14007c478bd9Sstevel@tonic-gate 		}
14016ea97f2eSvsakar 	} else {
14026ea97f2eSvsakar 		/*
14036ea97f2eSvsakar 		 * This is the common case of file system in a unlocked state.
14046ea97f2eSvsakar 		 *
14056ea97f2eSvsakar 		 * If a file system is unlocked, we would expect the ctr to have
14066ea97f2eSvsakar 		 * been incremented by now. But this will not be true when a
14076ea97f2eSvsakar 		 * quiesce is winding up - SLOCK was set when we checked before
14086ea97f2eSvsakar 		 * incrementing the ctr, but by the time we checked for
14096ea97f2eSvsakar 		 * ULOCKFS_IS_JUSTULOCK, the quiesce thread was gone. It is okay
14106ea97f2eSvsakar 		 * to take ul_lock and go through the slow path in this uncommon
14116ea97f2eSvsakar 		 * case.
14126ea97f2eSvsakar 		 */
14136ea97f2eSvsakar 		if (op_cnt_incremented == 0) {
14146ea97f2eSvsakar 			mutex_enter(&ulp->ul_lock);
14156ea97f2eSvsakar 			error = ufs_check_lockfs(ufsvfsp, ulp, mask);
14166ea97f2eSvsakar 			if (error) {
14176ea97f2eSvsakar 				mutex_exit(&ulp->ul_lock);
14186ea97f2eSvsakar 				if (ulockfs_info_free == NULL)
14196ea97f2eSvsakar 					kmem_free(ulockfs_info_temp,
14206ea97f2eSvsakar 					    sizeof (ulockfs_info_t));
14216ea97f2eSvsakar 				return (error);
14226ea97f2eSvsakar 			}
14236ea97f2eSvsakar 			if (mask & ULOCKFS_FWLOCK)
14246ea97f2eSvsakar 				ULOCKFS_SET_FALLOC(ulp);
14256ea97f2eSvsakar 			mutex_exit(&ulp->ul_lock);
14266ea97f2eSvsakar 		} else if (mask & ULOCKFS_FWLOCK) {
14276ea97f2eSvsakar 			mutex_enter(&ulp->ul_lock);
14286ea97f2eSvsakar 			ULOCKFS_SET_FALLOC(ulp);
14296ea97f2eSvsakar 			mutex_exit(&ulp->ul_lock);
14306ea97f2eSvsakar 		}
14317c478bd9Sstevel@tonic-gate 	}
14327c478bd9Sstevel@tonic-gate 
14337c478bd9Sstevel@tonic-gate 	if (ulockfs_info_free != NULL) {
14347c478bd9Sstevel@tonic-gate 		ulockfs_info_free->ulp = ulp;
1435303bf60bSsdebnath 		if (mask & ULOCKFS_FWLOCK)
1436303bf60bSsdebnath 			ulockfs_info_free->flags |= ULOCK_INFO_FALLOCATE;
14377c478bd9Sstevel@tonic-gate 	} else {
14387c478bd9Sstevel@tonic-gate 		ulockfs_info_temp->ulp = ulp;
14397c478bd9Sstevel@tonic-gate 		ulockfs_info_temp->next = ulockfs_info;
1440303bf60bSsdebnath 		if (mask & ULOCKFS_FWLOCK)
1441303bf60bSsdebnath 			ulockfs_info_temp->flags |= ULOCK_INFO_FALLOCATE;
14427c478bd9Sstevel@tonic-gate 		ASSERT(ufs_lockfs_key != 0);
14437c478bd9Sstevel@tonic-gate 		(void) tsd_set(ufs_lockfs_key, (void *)ulockfs_info_temp);
14447c478bd9Sstevel@tonic-gate 	}
14457c478bd9Sstevel@tonic-gate 
14467c478bd9Sstevel@tonic-gate 	curthread->t_flag |= T_DONTBLOCK;
14477c478bd9Sstevel@tonic-gate 	return (0);
14487c478bd9Sstevel@tonic-gate }
14497c478bd9Sstevel@tonic-gate 
14507c478bd9Sstevel@tonic-gate /*
14517c478bd9Sstevel@tonic-gate  * Check whether we are returning from the top level VOP.
14527c478bd9Sstevel@tonic-gate  */
14537c478bd9Sstevel@tonic-gate static int
14547c478bd9Sstevel@tonic-gate ufs_lockfs_top_vop_return(ulockfs_info_t *head)
14557c478bd9Sstevel@tonic-gate {
14567c478bd9Sstevel@tonic-gate 	ulockfs_info_t *info;
14577c478bd9Sstevel@tonic-gate 	int result = 1;
14587c478bd9Sstevel@tonic-gate 
14597c478bd9Sstevel@tonic-gate 	for (info = head; info != NULL; info = info->next) {
14607c478bd9Sstevel@tonic-gate 		if (info->ulp != NULL) {
14617c478bd9Sstevel@tonic-gate 			result = 0;
14627c478bd9Sstevel@tonic-gate 			break;
14637c478bd9Sstevel@tonic-gate 		}
14647c478bd9Sstevel@tonic-gate 	}
14657c478bd9Sstevel@tonic-gate 
14667c478bd9Sstevel@tonic-gate 	return (result);
14677c478bd9Sstevel@tonic-gate }
14687c478bd9Sstevel@tonic-gate 
14697c478bd9Sstevel@tonic-gate /*
14707c478bd9Sstevel@tonic-gate  * ufs_lockfs_end - terminate the lockfs locking protocol
14717c478bd9Sstevel@tonic-gate  */
14727c478bd9Sstevel@tonic-gate void
14737c478bd9Sstevel@tonic-gate ufs_lockfs_end(struct ulockfs *ulp)
14747c478bd9Sstevel@tonic-gate {
14757c478bd9Sstevel@tonic-gate 	ulockfs_info_t *info;
14767c478bd9Sstevel@tonic-gate 	ulockfs_info_t *head;
14777c478bd9Sstevel@tonic-gate 
14787c478bd9Sstevel@tonic-gate 	/*
14797c478bd9Sstevel@tonic-gate 	 * end-of-VOP protocol
14807c478bd9Sstevel@tonic-gate 	 */
14817c478bd9Sstevel@tonic-gate 	if (ulp == NULL)
14827c478bd9Sstevel@tonic-gate 		return;
14837c478bd9Sstevel@tonic-gate 
14847c478bd9Sstevel@tonic-gate 	head = (ulockfs_info_t *)tsd_get(ufs_lockfs_key);
14857c478bd9Sstevel@tonic-gate 	SEARCH_ULOCKFSP(head, ulp, info);
14867c478bd9Sstevel@tonic-gate 
14877c478bd9Sstevel@tonic-gate 	/*
14887c478bd9Sstevel@tonic-gate 	 * If we're called from a first level VOP, we have to have a
14897c478bd9Sstevel@tonic-gate 	 * valid ulockfs record in the TSD.
14907c478bd9Sstevel@tonic-gate 	 */
14917c478bd9Sstevel@tonic-gate 	ASSERT(info != NULL);
14927c478bd9Sstevel@tonic-gate 
14937c478bd9Sstevel@tonic-gate 	/*
14947c478bd9Sstevel@tonic-gate 	 * Invalidate the ulockfs record.
14957c478bd9Sstevel@tonic-gate 	 */
14967c478bd9Sstevel@tonic-gate 	info->ulp = NULL;
14977c478bd9Sstevel@tonic-gate 
14987c478bd9Sstevel@tonic-gate 	if (ufs_lockfs_top_vop_return(head))
14997c478bd9Sstevel@tonic-gate 		curthread->t_flag &= ~T_DONTBLOCK;
15007c478bd9Sstevel@tonic-gate 
1501303bf60bSsdebnath 	/* fallocate thread */
1502303bf60bSsdebnath 	if (ULOCKFS_IS_FALLOC(ulp) && info->flags & ULOCK_INFO_FALLOCATE) {
15036ea97f2eSvsakar 		/* Clear the thread's fallocate state */
15046ea97f2eSvsakar 		info->flags &= ~ULOCK_INFO_FALLOCATE;
15056ea97f2eSvsakar 		if (!atomic_add_long_nv(&ulp->ul_falloc_cnt, -1)) {
15066ea97f2eSvsakar 			mutex_enter(&ulp->ul_lock);
1507303bf60bSsdebnath 			ULOCKFS_CLR_FALLOC(ulp);
15086ea97f2eSvsakar 			cv_broadcast(&ulp->ul_cv);
15096ea97f2eSvsakar 			mutex_exit(&ulp->ul_lock);
15106ea97f2eSvsakar 		}
1511303bf60bSsdebnath 	} else  { /* normal thread */
1512303bf60bSsdebnath 		if (!atomic_add_long_nv(&ulp->ul_vnops_cnt, -1))
1513303bf60bSsdebnath 			cv_broadcast(&ulp->ul_cv);
1514303bf60bSsdebnath 	}
15157c478bd9Sstevel@tonic-gate }
15167c478bd9Sstevel@tonic-gate 
15176ac3b8a8Svsakar /*
15186ac3b8a8Svsakar  * ufs_lockfs_trybegin - try to start the lockfs locking protocol without
15196ac3b8a8Svsakar  * blocking.
15206ac3b8a8Svsakar  */
15216ac3b8a8Svsakar int
15226ac3b8a8Svsakar ufs_lockfs_trybegin(struct ufsvfs *ufsvfsp, struct ulockfs **ulpp, ulong_t mask)
15236ac3b8a8Svsakar {
15246ac3b8a8Svsakar 	int 		error = 0;
15256ac3b8a8Svsakar 	int		rec_vop;
15266ea97f2eSvsakar 	ushort_t	op_cnt_incremented = 0;
15276ea97f2eSvsakar 	ulong_t		*ctr;
15286ac3b8a8Svsakar 	struct ulockfs *ulp;
15296ac3b8a8Svsakar 	ulockfs_info_t	*ulockfs_info;
15306ac3b8a8Svsakar 	ulockfs_info_t	*ulockfs_info_free;
15316ac3b8a8Svsakar 	ulockfs_info_t	*ulockfs_info_temp;
15326ac3b8a8Svsakar 
15336ac3b8a8Svsakar 	/*
15346ac3b8a8Svsakar 	 * file system has been forcibly unmounted
15356ac3b8a8Svsakar 	 */
15366ac3b8a8Svsakar 	if (ufsvfsp == NULL)
15376ac3b8a8Svsakar 		return (EIO);
15386ac3b8a8Svsakar 
15396ac3b8a8Svsakar 	*ulpp = ulp = &ufsvfsp->vfs_ulockfs;
15406ac3b8a8Svsakar 
15416ac3b8a8Svsakar 	/*
15426ac3b8a8Svsakar 	 * Do lockfs protocol
15436ac3b8a8Svsakar 	 */
15446ac3b8a8Svsakar 	ulockfs_info = (ulockfs_info_t *)tsd_get(ufs_lockfs_key);
15456ac3b8a8Svsakar 	IS_REC_VOP(rec_vop, ulockfs_info, ulp, ulockfs_info_free);
15466ac3b8a8Svsakar 
15476ac3b8a8Svsakar 	/*
15486ac3b8a8Svsakar 	 * Detect recursive VOP call or handcrafted internal lockfs protocol
15496ac3b8a8Svsakar 	 * path and bail out in that case.
15506ac3b8a8Svsakar 	 */
15516ac3b8a8Svsakar 	if (rec_vop || ufs_lockfs_is_under_rawlockfs(ulp)) {
15526ac3b8a8Svsakar 		*ulpp = NULL;
15536ac3b8a8Svsakar 		return (0);
15546ac3b8a8Svsakar 	} else {
15556ac3b8a8Svsakar 		if (ulockfs_info_free == NULL) {
15566ac3b8a8Svsakar 			if ((ulockfs_info_temp = (ulockfs_info_t *)
15576ac3b8a8Svsakar 			    kmem_zalloc(sizeof (ulockfs_info_t),
15586ac3b8a8Svsakar 			    KM_NOSLEEP)) == NULL) {
15596ac3b8a8Svsakar 				*ulpp = NULL;
15606ac3b8a8Svsakar 				return (ENOMEM);
15616ac3b8a8Svsakar 			}
15626ac3b8a8Svsakar 		}
15636ac3b8a8Svsakar 	}
15646ac3b8a8Svsakar 
15656ac3b8a8Svsakar 	/*
15666ac3b8a8Svsakar 	 * First time VOP call
15676ea97f2eSvsakar 	 *
15686ea97f2eSvsakar 	 * Increment the ctr irrespective of the lockfs state. If the lockfs
15696ea97f2eSvsakar 	 * state is not ULOCKFS_ULOCK, we can decrement it later. However,
15706ea97f2eSvsakar 	 * before incrementing we need to check if there is a pending quiesce
15716ea97f2eSvsakar 	 * request because if we have a continuous stream of ufs_lockfs_begin
15726ea97f2eSvsakar 	 * requests pounding on a few cpu's then the ufs_quiesce thread might
15736ea97f2eSvsakar 	 * never see the value of zero for ctr - a livelock kind of scenario.
15746ea97f2eSvsakar 	 */
15756ea97f2eSvsakar 	ctr = (mask & ULOCKFS_FWLOCK) ?
15766ea97f2eSvsakar 	    &ulp->ul_falloc_cnt : &ulp->ul_vnops_cnt;
15776ea97f2eSvsakar 	if (!ULOCKFS_IS_SLOCK(ulp)) {
15786ea97f2eSvsakar 		atomic_add_long(ctr, 1);
15796ea97f2eSvsakar 		op_cnt_incremented++;
15806ea97f2eSvsakar 	}
15816ea97f2eSvsakar 
15826ea97f2eSvsakar 	if (!ULOCKFS_IS_JUSTULOCK(ulp) || ufs_quiesce_pend) {
15836ac3b8a8Svsakar 		/*
15846ac3b8a8Svsakar 		 * Non-blocking version of ufs_check_lockfs() code.
15856ac3b8a8Svsakar 		 *
15866ac3b8a8Svsakar 		 * If the file system is not hard locked or error locked
15876ac3b8a8Svsakar 		 * and if ulp->ul_fs_lock allows this operation, increment
15886ac3b8a8Svsakar 		 * the appropriate counter and proceed (For eg., In case the
15896ac3b8a8Svsakar 		 * file system is delete locked, a mmap can still go through).
15906ac3b8a8Svsakar 		 */
15916ea97f2eSvsakar 		if (op_cnt_incremented)
15926ea97f2eSvsakar 			if (!atomic_add_long_nv(ctr, -1))
15936ea97f2eSvsakar 				cv_broadcast(&ulp->ul_cv);
15946ea97f2eSvsakar 		mutex_enter(&ulp->ul_lock);
15956ac3b8a8Svsakar 		if (ULOCKFS_IS_HLOCK(ulp) ||
15966ac3b8a8Svsakar 		    (ULOCKFS_IS_ELOCK(ulp) && ufsvfsp->vfs_dontblock))
15976ac3b8a8Svsakar 			error = EIO;
15986ac3b8a8Svsakar 		else if (ulp->ul_fs_lock & mask)
15996ac3b8a8Svsakar 			error = EAGAIN;
16006ac3b8a8Svsakar 
16016ac3b8a8Svsakar 		if (error) {
16026ac3b8a8Svsakar 			mutex_exit(&ulp->ul_lock);
16036ac3b8a8Svsakar 			if (ulockfs_info_free == NULL)
16046ac3b8a8Svsakar 				kmem_free(ulockfs_info_temp,
16056ac3b8a8Svsakar 				    sizeof (ulockfs_info_t));
16066ac3b8a8Svsakar 			return (error);
16076ea97f2eSvsakar 		}
16086ea97f2eSvsakar 		atomic_add_long(ctr, 1);
16096ea97f2eSvsakar 		if (mask & ULOCKFS_FWLOCK)
16106ea97f2eSvsakar 			ULOCKFS_SET_FALLOC(ulp);
16116ea97f2eSvsakar 		mutex_exit(&ulp->ul_lock);
16126ea97f2eSvsakar 	} else {
16136ea97f2eSvsakar 		/*
16146ea97f2eSvsakar 		 * This is the common case of file system in a unlocked state.
16156ea97f2eSvsakar 		 *
16166ea97f2eSvsakar 		 * If a file system is unlocked, we would expect the ctr to have
16176ea97f2eSvsakar 		 * been incremented by now. But this will not be true when a
16186ea97f2eSvsakar 		 * quiesce is winding up - SLOCK was set when we checked before
16196ea97f2eSvsakar 		 * incrementing the ctr, but by the time we checked for
16206ea97f2eSvsakar 		 * ULOCKFS_IS_JUSTULOCK, the quiesce thread was gone. Take
16216ea97f2eSvsakar 		 * ul_lock and go through the non-blocking version of
16226ea97f2eSvsakar 		 * ufs_check_lockfs() code.
16236ea97f2eSvsakar 		 */
16246ea97f2eSvsakar 		if (op_cnt_incremented == 0) {
16256ea97f2eSvsakar 			mutex_enter(&ulp->ul_lock);
16266ea97f2eSvsakar 			if (ULOCKFS_IS_HLOCK(ulp) ||
16276ea97f2eSvsakar 			    (ULOCKFS_IS_ELOCK(ulp) && ufsvfsp->vfs_dontblock))
16286ea97f2eSvsakar 				error = EIO;
16296ea97f2eSvsakar 			else if (ulp->ul_fs_lock & mask)
16306ea97f2eSvsakar 				error = EAGAIN;
16316ea97f2eSvsakar 
16326ea97f2eSvsakar 			if (error) {
16336ea97f2eSvsakar 				mutex_exit(&ulp->ul_lock);
16346ea97f2eSvsakar 				if (ulockfs_info_free == NULL)
16356ea97f2eSvsakar 					kmem_free(ulockfs_info_temp,
16366ea97f2eSvsakar 					    sizeof (ulockfs_info_t));
16376ea97f2eSvsakar 				return (error);
16386ac3b8a8Svsakar 			}
16396ea97f2eSvsakar 			atomic_add_long(ctr, 1);
16406ea97f2eSvsakar 			if (mask & ULOCKFS_FWLOCK)
16416ea97f2eSvsakar 				ULOCKFS_SET_FALLOC(ulp);
16426ea97f2eSvsakar 			mutex_exit(&ulp->ul_lock);
16436ea97f2eSvsakar 		} else if (mask & ULOCKFS_FWLOCK) {
16446ea97f2eSvsakar 			mutex_enter(&ulp->ul_lock);
16456ea97f2eSvsakar 			ULOCKFS_SET_FALLOC(ulp);
16466ea97f2eSvsakar 			mutex_exit(&ulp->ul_lock);
16476ac3b8a8Svsakar 		}
16486ac3b8a8Svsakar 	}
16496ac3b8a8Svsakar 
16506ac3b8a8Svsakar 	if (ulockfs_info_free != NULL) {
16516ac3b8a8Svsakar 		ulockfs_info_free->ulp = ulp;
16526ac3b8a8Svsakar 		if (mask & ULOCKFS_FWLOCK)
16536ac3b8a8Svsakar 			ulockfs_info_free->flags |= ULOCK_INFO_FALLOCATE;
16546ac3b8a8Svsakar 	} else {
16556ac3b8a8Svsakar 		ulockfs_info_temp->ulp = ulp;
16566ac3b8a8Svsakar 		ulockfs_info_temp->next = ulockfs_info;
16576ac3b8a8Svsakar 		if (mask & ULOCKFS_FWLOCK)
16586ac3b8a8Svsakar 			ulockfs_info_temp->flags |= ULOCK_INFO_FALLOCATE;
16596ac3b8a8Svsakar 		ASSERT(ufs_lockfs_key != 0);
16606ac3b8a8Svsakar 		(void) tsd_set(ufs_lockfs_key, (void *)ulockfs_info_temp);
16616ac3b8a8Svsakar 	}
16626ac3b8a8Svsakar 
16636ac3b8a8Svsakar 	curthread->t_flag |= T_DONTBLOCK;
16646ac3b8a8Svsakar 	return (0);
16656ac3b8a8Svsakar }
16666ac3b8a8Svsakar 
16677c478bd9Sstevel@tonic-gate /*
16687c478bd9Sstevel@tonic-gate  * specialized version of ufs_lockfs_begin() called by ufs_getpage().
16697c478bd9Sstevel@tonic-gate  */
16707c478bd9Sstevel@tonic-gate int
16717c478bd9Sstevel@tonic-gate ufs_lockfs_begin_getpage(
16727c478bd9Sstevel@tonic-gate 	struct ufsvfs	*ufsvfsp,
16737c478bd9Sstevel@tonic-gate 	struct ulockfs	**ulpp,
16747c478bd9Sstevel@tonic-gate 	struct seg	*seg,
16757c478bd9Sstevel@tonic-gate 	int		read_access,
16767c478bd9Sstevel@tonic-gate 	uint_t		*protp)
16777c478bd9Sstevel@tonic-gate {
16787c478bd9Sstevel@tonic-gate 	ulong_t			mask;
16797c478bd9Sstevel@tonic-gate 	int 			error;
16807c478bd9Sstevel@tonic-gate 	int			rec_vop;
16817c478bd9Sstevel@tonic-gate 	struct ulockfs		*ulp;
16827c478bd9Sstevel@tonic-gate 	ulockfs_info_t		*ulockfs_info;
16837c478bd9Sstevel@tonic-gate 	ulockfs_info_t		*ulockfs_info_free;
16847c478bd9Sstevel@tonic-gate 	ulockfs_info_t		*ulockfs_info_temp;
16857c478bd9Sstevel@tonic-gate 
16867c478bd9Sstevel@tonic-gate 	/*
16877c478bd9Sstevel@tonic-gate 	 * file system has been forcibly unmounted
16887c478bd9Sstevel@tonic-gate 	 */
16897c478bd9Sstevel@tonic-gate 	if (ufsvfsp == NULL)
16907c478bd9Sstevel@tonic-gate 		return (EIO);
16917c478bd9Sstevel@tonic-gate 
16927c478bd9Sstevel@tonic-gate 	*ulpp = ulp = &ufsvfsp->vfs_ulockfs;
16937c478bd9Sstevel@tonic-gate 
16947c478bd9Sstevel@tonic-gate 	/*
16957c478bd9Sstevel@tonic-gate 	 * Do lockfs protocol
16967c478bd9Sstevel@tonic-gate 	 */
16977c478bd9Sstevel@tonic-gate 	ulockfs_info = (ulockfs_info_t *)tsd_get(ufs_lockfs_key);
16987c478bd9Sstevel@tonic-gate 	IS_REC_VOP(rec_vop, ulockfs_info, ulp, ulockfs_info_free);
16997c478bd9Sstevel@tonic-gate 
17007c478bd9Sstevel@tonic-gate 	/*
17017c478bd9Sstevel@tonic-gate 	 * Detect recursive VOP call or handcrafted internal lockfs protocol
17027c478bd9Sstevel@tonic-gate 	 * path and bail out in that case.
17037c478bd9Sstevel@tonic-gate 	 */
17047c478bd9Sstevel@tonic-gate 	if (rec_vop || ufs_lockfs_is_under_rawlockfs(ulp)) {
17057c478bd9Sstevel@tonic-gate 		*ulpp = NULL;
17067c478bd9Sstevel@tonic-gate 		return (0);
17077c478bd9Sstevel@tonic-gate 	} else {
17087c478bd9Sstevel@tonic-gate 		if (ulockfs_info_free == NULL) {
17097c478bd9Sstevel@tonic-gate 			if ((ulockfs_info_temp = (ulockfs_info_t *)
17107c478bd9Sstevel@tonic-gate 			    kmem_zalloc(sizeof (ulockfs_info_t),
17117c478bd9Sstevel@tonic-gate 			    KM_NOSLEEP)) == NULL) {
17127c478bd9Sstevel@tonic-gate 				*ulpp = NULL;
17137c478bd9Sstevel@tonic-gate 				return (ENOMEM);
17147c478bd9Sstevel@tonic-gate 			}
17157c478bd9Sstevel@tonic-gate 		}
17167c478bd9Sstevel@tonic-gate 	}
17177c478bd9Sstevel@tonic-gate 
17187c478bd9Sstevel@tonic-gate 	/*
17197c478bd9Sstevel@tonic-gate 	 * First time VOP call
17207c478bd9Sstevel@tonic-gate 	 */
17216ea97f2eSvsakar 	atomic_add_long(&ulp->ul_vnops_cnt, 1);
17226ea97f2eSvsakar 	if (!ULOCKFS_IS_JUSTULOCK(ulp) || ufs_quiesce_pend) {
17236ea97f2eSvsakar 		if (!atomic_add_long_nv(&ulp->ul_vnops_cnt, -1))
17246ea97f2eSvsakar 			cv_broadcast(&ulp->ul_cv);
17256ea97f2eSvsakar 		mutex_enter(&ulp->ul_lock);
17267c478bd9Sstevel@tonic-gate 		if (seg->s_ops == &segvn_ops &&
17277c478bd9Sstevel@tonic-gate 		    ((struct segvn_data *)seg->s_data)->type != MAP_SHARED) {
17287c478bd9Sstevel@tonic-gate 			mask = (ulong_t)ULOCKFS_GETREAD_MASK;
17297c478bd9Sstevel@tonic-gate 		} else if (protp && read_access) {
17307c478bd9Sstevel@tonic-gate 			/*
17317c478bd9Sstevel@tonic-gate 			 * Restrict the mapping to readonly.
17327c478bd9Sstevel@tonic-gate 			 * Writes to this mapping will cause
17337c478bd9Sstevel@tonic-gate 			 * another fault which will then
17347c478bd9Sstevel@tonic-gate 			 * be suspended if fs is write locked
17357c478bd9Sstevel@tonic-gate 			 */
17367c478bd9Sstevel@tonic-gate 			*protp &= ~PROT_WRITE;
17377c478bd9Sstevel@tonic-gate 			mask = (ulong_t)ULOCKFS_GETREAD_MASK;
17387c478bd9Sstevel@tonic-gate 		} else
17397c478bd9Sstevel@tonic-gate 			mask = (ulong_t)ULOCKFS_GETWRITE_MASK;
17407c478bd9Sstevel@tonic-gate 
17417c478bd9Sstevel@tonic-gate 		/*
17427c478bd9Sstevel@tonic-gate 		 * will sleep if this fs is locked against this VOP
17437c478bd9Sstevel@tonic-gate 		 */
17446ea97f2eSvsakar 		error = ufs_check_lockfs(ufsvfsp, ulp, mask);
17456ea97f2eSvsakar 		mutex_exit(&ulp->ul_lock);
17466ea97f2eSvsakar 		if (error) {
17477c478bd9Sstevel@tonic-gate 			if (ulockfs_info_free == NULL)
17487c478bd9Sstevel@tonic-gate 				kmem_free(ulockfs_info_temp,
17497c478bd9Sstevel@tonic-gate 				    sizeof (ulockfs_info_t));
17507c478bd9Sstevel@tonic-gate 			return (error);
17517c478bd9Sstevel@tonic-gate 		}
17527c478bd9Sstevel@tonic-gate 	}
17537c478bd9Sstevel@tonic-gate 
17547c478bd9Sstevel@tonic-gate 	if (ulockfs_info_free != NULL) {
17557c478bd9Sstevel@tonic-gate 		ulockfs_info_free->ulp = ulp;
17567c478bd9Sstevel@tonic-gate 	} else {
17577c478bd9Sstevel@tonic-gate 		ulockfs_info_temp->ulp = ulp;
17587c478bd9Sstevel@tonic-gate 		ulockfs_info_temp->next = ulockfs_info;
17597c478bd9Sstevel@tonic-gate 		ASSERT(ufs_lockfs_key != 0);
17607c478bd9Sstevel@tonic-gate 		(void) tsd_set(ufs_lockfs_key, (void *)ulockfs_info_temp);
17617c478bd9Sstevel@tonic-gate 	}
17627c478bd9Sstevel@tonic-gate 
17637c478bd9Sstevel@tonic-gate 	curthread->t_flag |= T_DONTBLOCK;
17647c478bd9Sstevel@tonic-gate 	return (0);
17657c478bd9Sstevel@tonic-gate }
17667c478bd9Sstevel@tonic-gate 
17677c478bd9Sstevel@tonic-gate void
17687c478bd9Sstevel@tonic-gate ufs_lockfs_tsd_destructor(void *head)
17697c478bd9Sstevel@tonic-gate {
17707c478bd9Sstevel@tonic-gate 	ulockfs_info_t *curr = (ulockfs_info_t *)head;
17717c478bd9Sstevel@tonic-gate 	ulockfs_info_t *temp;
17727c478bd9Sstevel@tonic-gate 
17737c478bd9Sstevel@tonic-gate 	for (; curr != NULL; ) {
17747c478bd9Sstevel@tonic-gate 		/*
17757c478bd9Sstevel@tonic-gate 		 * The TSD destructor is being called when the thread exits
17767c478bd9Sstevel@tonic-gate 		 * (via thread_exit()). At that time it must have cleaned up
17777c478bd9Sstevel@tonic-gate 		 * all VOPs via ufs_lockfs_end() and there must not be a
17787c478bd9Sstevel@tonic-gate 		 * valid ulockfs record exist while a thread is exiting.
17797c478bd9Sstevel@tonic-gate 		 */
17807c478bd9Sstevel@tonic-gate 		temp = curr;
17817c478bd9Sstevel@tonic-gate 		curr = curr->next;
17827c478bd9Sstevel@tonic-gate 		ASSERT(temp->ulp == NULL);
17837c478bd9Sstevel@tonic-gate 		kmem_free(temp, sizeof (ulockfs_info_t));
17847c478bd9Sstevel@tonic-gate 	}
17857c478bd9Sstevel@tonic-gate }
1786