17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate  * CDDL HEADER START
37c478bd9Sstevel@tonic-gate  *
47c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
550a83466Sjwahlig  * Common Development and Distribution License (the "License").
650a83466Sjwahlig  * You may not use this file except in compliance with the License.
77c478bd9Sstevel@tonic-gate  *
87c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
107c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
117c478bd9Sstevel@tonic-gate  * and limitations under the License.
127c478bd9Sstevel@tonic-gate  *
137c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
147c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
167c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
177c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bd9Sstevel@tonic-gate  *
197c478bd9Sstevel@tonic-gate  * CDDL HEADER END
207c478bd9Sstevel@tonic-gate  */
217c478bd9Sstevel@tonic-gate /*
2250a83466Sjwahlig  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
237c478bd9Sstevel@tonic-gate  * Use is subject to license terms.
247c478bd9Sstevel@tonic-gate  */
257c478bd9Sstevel@tonic-gate 
267c478bd9Sstevel@tonic-gate /*
277c478bd9Sstevel@tonic-gate  *  	Copyright (c) 1983,1984,1985,1986,1987,1988,1989  AT&T.
287c478bd9Sstevel@tonic-gate  *	All Rights Reserved
297c478bd9Sstevel@tonic-gate  */
307c478bd9Sstevel@tonic-gate 
317c478bd9Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
327c478bd9Sstevel@tonic-gate 
337c478bd9Sstevel@tonic-gate #include <sys/param.h>
347c478bd9Sstevel@tonic-gate #include <sys/types.h>
357c478bd9Sstevel@tonic-gate #include <sys/systm.h>
367c478bd9Sstevel@tonic-gate #include <sys/thread.h>
377c478bd9Sstevel@tonic-gate #include <sys/t_lock.h>
387c478bd9Sstevel@tonic-gate #include <sys/time.h>
397c478bd9Sstevel@tonic-gate #include <sys/vnode.h>
407c478bd9Sstevel@tonic-gate #include <sys/vfs.h>
417c478bd9Sstevel@tonic-gate #include <sys/errno.h>
427c478bd9Sstevel@tonic-gate #include <sys/buf.h>
437c478bd9Sstevel@tonic-gate #include <sys/stat.h>
447c478bd9Sstevel@tonic-gate #include <sys/cred.h>
457c478bd9Sstevel@tonic-gate #include <sys/kmem.h>
467c478bd9Sstevel@tonic-gate #include <sys/debug.h>
477c478bd9Sstevel@tonic-gate #include <sys/dnlc.h>
487c478bd9Sstevel@tonic-gate #include <sys/vmsystm.h>
497c478bd9Sstevel@tonic-gate #include <sys/flock.h>
507c478bd9Sstevel@tonic-gate #include <sys/share.h>
517c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h>
527c478bd9Sstevel@tonic-gate #include <sys/tiuser.h>
537c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h>
547c478bd9Sstevel@tonic-gate #include <sys/callb.h>
557c478bd9Sstevel@tonic-gate #include <sys/acl.h>
567c478bd9Sstevel@tonic-gate #include <sys/kstat.h>
577c478bd9Sstevel@tonic-gate #include <sys/signal.h>
587c478bd9Sstevel@tonic-gate #include <sys/disp.h>
597c478bd9Sstevel@tonic-gate #include <sys/atomic.h>
607c478bd9Sstevel@tonic-gate #include <sys/list.h>
617c478bd9Sstevel@tonic-gate #include <sys/sdt.h>
627c478bd9Sstevel@tonic-gate 
637c478bd9Sstevel@tonic-gate #include <rpc/types.h>
647c478bd9Sstevel@tonic-gate #include <rpc/xdr.h>
657c478bd9Sstevel@tonic-gate #include <rpc/auth.h>
667c478bd9Sstevel@tonic-gate #include <rpc/clnt.h>
677c478bd9Sstevel@tonic-gate 
687c478bd9Sstevel@tonic-gate #include <nfs/nfs.h>
697c478bd9Sstevel@tonic-gate #include <nfs/nfs_clnt.h>
707c478bd9Sstevel@tonic-gate #include <nfs/nfs_acl.h>
717c478bd9Sstevel@tonic-gate 
727c478bd9Sstevel@tonic-gate #include <nfs/nfs4.h>
737c478bd9Sstevel@tonic-gate #include <nfs/rnode4.h>
747c478bd9Sstevel@tonic-gate #include <nfs/nfs4_clnt.h>
757c478bd9Sstevel@tonic-gate 
767c478bd9Sstevel@tonic-gate #include <vm/hat.h>
777c478bd9Sstevel@tonic-gate #include <vm/as.h>
787c478bd9Sstevel@tonic-gate #include <vm/page.h>
797c478bd9Sstevel@tonic-gate #include <vm/pvn.h>
807c478bd9Sstevel@tonic-gate #include <vm/seg.h>
817c478bd9Sstevel@tonic-gate #include <vm/seg_map.h>
827c478bd9Sstevel@tonic-gate #include <vm/seg_vn.h>
837c478bd9Sstevel@tonic-gate 
847c478bd9Sstevel@tonic-gate #include <sys/ddi.h>
857c478bd9Sstevel@tonic-gate 
867c478bd9Sstevel@tonic-gate /*
877c478bd9Sstevel@tonic-gate  * Arguments to page-flush thread.
887c478bd9Sstevel@tonic-gate  */
897c478bd9Sstevel@tonic-gate typedef struct {
907c478bd9Sstevel@tonic-gate 	vnode_t *vp;
917c478bd9Sstevel@tonic-gate 	cred_t *cr;
927c478bd9Sstevel@tonic-gate } pgflush_t;
937c478bd9Sstevel@tonic-gate 
947c478bd9Sstevel@tonic-gate #ifdef DEBUG
957c478bd9Sstevel@tonic-gate int nfs4_client_lease_debug;
967c478bd9Sstevel@tonic-gate int nfs4_sharedfh_debug;
977c478bd9Sstevel@tonic-gate int nfs4_fname_debug;
987c478bd9Sstevel@tonic-gate 
997c478bd9Sstevel@tonic-gate /* temporary: panic if v_type is inconsistent with r_attr va_type */
1007c478bd9Sstevel@tonic-gate int nfs4_vtype_debug;
1017c478bd9Sstevel@tonic-gate 
1027c478bd9Sstevel@tonic-gate uint_t nfs4_tsd_key;
1037c478bd9Sstevel@tonic-gate #endif
1047c478bd9Sstevel@tonic-gate 
1057c478bd9Sstevel@tonic-gate static time_t	nfs4_client_resumed = 0;
1067c478bd9Sstevel@tonic-gate static	callb_id_t cid = 0;
1077c478bd9Sstevel@tonic-gate 
1087c478bd9Sstevel@tonic-gate static int	nfs4renew(nfs4_server_t *);
1097c478bd9Sstevel@tonic-gate static void	nfs4_attrcache_va(vnode_t *, nfs4_ga_res_t *, int);
1107c478bd9Sstevel@tonic-gate static void	nfs4_pgflush_thread(pgflush_t *);
1117c478bd9Sstevel@tonic-gate static void	flush_pages(vnode_t *, cred_t *);
1127c478bd9Sstevel@tonic-gate 
1137c478bd9Sstevel@tonic-gate static boolean_t nfs4_client_cpr_callb(void *, int);
1147c478bd9Sstevel@tonic-gate 
1157c478bd9Sstevel@tonic-gate struct mi4_globals {
1167c478bd9Sstevel@tonic-gate 	kmutex_t	mig_lock;  /* lock protecting mig_list */
1177c478bd9Sstevel@tonic-gate 	list_t		mig_list;  /* list of NFS v4 mounts in zone */
1187c478bd9Sstevel@tonic-gate 	boolean_t	mig_destructor_called;
1197c478bd9Sstevel@tonic-gate };
1207c478bd9Sstevel@tonic-gate 
1217c478bd9Sstevel@tonic-gate static zone_key_t mi4_list_key;
1227c478bd9Sstevel@tonic-gate 
1237c478bd9Sstevel@tonic-gate /*
1247c478bd9Sstevel@tonic-gate  * Attributes caching:
1257c478bd9Sstevel@tonic-gate  *
1267c478bd9Sstevel@tonic-gate  * Attributes are cached in the rnode in struct vattr form.
1277c478bd9Sstevel@tonic-gate  * There is a time associated with the cached attributes (r_time_attr_inval)
1287c478bd9Sstevel@tonic-gate  * which tells whether the attributes are valid. The time is initialized
1297c478bd9Sstevel@tonic-gate  * to the difference between current time and the modify time of the vnode
1307c478bd9Sstevel@tonic-gate  * when new attributes are cached. This allows the attributes for
1317c478bd9Sstevel@tonic-gate  * files that have changed recently to be timed out sooner than for files
1327c478bd9Sstevel@tonic-gate  * that have not changed for a long time. There are minimum and maximum
1337c478bd9Sstevel@tonic-gate  * timeout values that can be set per mount point.
1347c478bd9Sstevel@tonic-gate  */
1357c478bd9Sstevel@tonic-gate 
1367c478bd9Sstevel@tonic-gate /*
1377c478bd9Sstevel@tonic-gate  * If a cache purge is in progress, wait for it to finish.
1387c478bd9Sstevel@tonic-gate  *
1397c478bd9Sstevel@tonic-gate  * The current thread must not be in the middle of an
1407c478bd9Sstevel@tonic-gate  * nfs4_start_op/nfs4_end_op region.  Otherwise, there could be a deadlock
1417c478bd9Sstevel@tonic-gate  * between this thread, a recovery thread, and the page flush thread.
1427c478bd9Sstevel@tonic-gate  */
1437c478bd9Sstevel@tonic-gate int
1447c478bd9Sstevel@tonic-gate nfs4_waitfor_purge_complete(vnode_t *vp)
1457c478bd9Sstevel@tonic-gate {
1467c478bd9Sstevel@tonic-gate 	rnode4_t *rp;
1477c478bd9Sstevel@tonic-gate 	k_sigset_t smask;
1487c478bd9Sstevel@tonic-gate 
1497c478bd9Sstevel@tonic-gate 	rp = VTOR4(vp);
1507c478bd9Sstevel@tonic-gate 	if ((rp->r_serial != NULL && rp->r_serial != curthread) ||
1517c478bd9Sstevel@tonic-gate 	    ((rp->r_flags & R4PGFLUSH) && rp->r_pgflush != curthread)) {
1527c478bd9Sstevel@tonic-gate 		mutex_enter(&rp->r_statelock);
1537c478bd9Sstevel@tonic-gate 		sigintr(&smask, VTOMI4(vp)->mi_flags & MI4_INT);
1547c478bd9Sstevel@tonic-gate 		while ((rp->r_serial != NULL && rp->r_serial != curthread) ||
1557c478bd9Sstevel@tonic-gate 		    ((rp->r_flags & R4PGFLUSH) &&
1567c478bd9Sstevel@tonic-gate 		    rp->r_pgflush != curthread)) {
1577c478bd9Sstevel@tonic-gate 			if (!cv_wait_sig(&rp->r_cv, &rp->r_statelock)) {
1587c478bd9Sstevel@tonic-gate 				sigunintr(&smask);
1597c478bd9Sstevel@tonic-gate 				mutex_exit(&rp->r_statelock);
1607c478bd9Sstevel@tonic-gate 				return (EINTR);
1617c478bd9Sstevel@tonic-gate 			}
1627c478bd9Sstevel@tonic-gate 		}
1637c478bd9Sstevel@tonic-gate 		sigunintr(&smask);
1647c478bd9Sstevel@tonic-gate 		mutex_exit(&rp->r_statelock);
1657c478bd9Sstevel@tonic-gate 	}
1667c478bd9Sstevel@tonic-gate 	return (0);
1677c478bd9Sstevel@tonic-gate }
1687c478bd9Sstevel@tonic-gate 
1697c478bd9Sstevel@tonic-gate /*
1707c478bd9Sstevel@tonic-gate  * Validate caches by checking cached attributes. If they have timed out,
1717c478bd9Sstevel@tonic-gate  * then get new attributes from the server.  As a side effect, cache
1727c478bd9Sstevel@tonic-gate  * invalidation is done if the attributes have changed.
1737c478bd9Sstevel@tonic-gate  *
1747c478bd9Sstevel@tonic-gate  * If the attributes have not timed out and if there is a cache
1757c478bd9Sstevel@tonic-gate  * invalidation being done by some other thread, then wait until that
1767c478bd9Sstevel@tonic-gate  * thread has completed the cache invalidation.
1777c478bd9Sstevel@tonic-gate  */
1787c478bd9Sstevel@tonic-gate int
1797c478bd9Sstevel@tonic-gate nfs4_validate_caches(vnode_t *vp, cred_t *cr)
1807c478bd9Sstevel@tonic-gate {
1817c478bd9Sstevel@tonic-gate 	int error;
1827c478bd9Sstevel@tonic-gate 	nfs4_ga_res_t gar;
1837c478bd9Sstevel@tonic-gate 
1847c478bd9Sstevel@tonic-gate 	if (ATTRCACHE4_VALID(vp)) {
1857c478bd9Sstevel@tonic-gate 		error = nfs4_waitfor_purge_complete(vp);
1867c478bd9Sstevel@tonic-gate 		if (error)
1877c478bd9Sstevel@tonic-gate 			return (error);
1887c478bd9Sstevel@tonic-gate 		return (0);
1897c478bd9Sstevel@tonic-gate 	}
1907c478bd9Sstevel@tonic-gate 
1917c478bd9Sstevel@tonic-gate 	gar.n4g_va.va_mask = AT_ALL;
1927c478bd9Sstevel@tonic-gate 	return (nfs4_getattr_otw(vp, &gar, cr, 0));
1937c478bd9Sstevel@tonic-gate }
1947c478bd9Sstevel@tonic-gate 
1957c478bd9Sstevel@tonic-gate /*
1967c478bd9Sstevel@tonic-gate  * Fill in attribute from the cache.
1977c478bd9Sstevel@tonic-gate  * If valid, then return 0 to indicate that no error occurred,
1987c478bd9Sstevel@tonic-gate  * otherwise return 1 to indicate that an error occurred.
1997c478bd9Sstevel@tonic-gate  */
2007c478bd9Sstevel@tonic-gate static int
2017c478bd9Sstevel@tonic-gate nfs4_getattr_cache(vnode_t *vp, struct vattr *vap)
2027c478bd9Sstevel@tonic-gate {
2037c478bd9Sstevel@tonic-gate 	rnode4_t *rp;
2047c478bd9Sstevel@tonic-gate 
2057c478bd9Sstevel@tonic-gate 	rp = VTOR4(vp);
2067c478bd9Sstevel@tonic-gate 	mutex_enter(&rp->r_statelock);
2077c478bd9Sstevel@tonic-gate 	mutex_enter(&rp->r_statev4_lock);
2087c478bd9Sstevel@tonic-gate 	if (ATTRCACHE4_VALID(vp)) {
2097c478bd9Sstevel@tonic-gate 		mutex_exit(&rp->r_statev4_lock);
2107c478bd9Sstevel@tonic-gate 		/*
2117c478bd9Sstevel@tonic-gate 		 * Cached attributes are valid
2127c478bd9Sstevel@tonic-gate 		 */
2137c478bd9Sstevel@tonic-gate 		*vap = rp->r_attr;
2147c478bd9Sstevel@tonic-gate 		mutex_exit(&rp->r_statelock);
2157c478bd9Sstevel@tonic-gate 		return (0);
2167c478bd9Sstevel@tonic-gate 	}
2177c478bd9Sstevel@tonic-gate 	mutex_exit(&rp->r_statev4_lock);
2187c478bd9Sstevel@tonic-gate 	mutex_exit(&rp->r_statelock);
2197c478bd9Sstevel@tonic-gate 	return (1);
2207c478bd9Sstevel@tonic-gate }
2217c478bd9Sstevel@tonic-gate 
2227c478bd9Sstevel@tonic-gate 
2237c478bd9Sstevel@tonic-gate /*
2247c478bd9Sstevel@tonic-gate  * If returned error is ESTALE flush all caches.  The nfs4_purge_caches()
2257c478bd9Sstevel@tonic-gate  * call is synchronous because all the pages were invalidated by the
2267c478bd9Sstevel@tonic-gate  * nfs4_invalidate_pages() call.
2277c478bd9Sstevel@tonic-gate  */
2287c478bd9Sstevel@tonic-gate void
2297c478bd9Sstevel@tonic-gate nfs4_purge_stale_fh(int errno, vnode_t *vp, cred_t *cr)
2307c478bd9Sstevel@tonic-gate {
2317c478bd9Sstevel@tonic-gate 	struct rnode4 *rp = VTOR4(vp);
2327c478bd9Sstevel@tonic-gate 
2337c478bd9Sstevel@tonic-gate 	/* Ensure that the ..._end_op() call has been done */
2347c478bd9Sstevel@tonic-gate 	ASSERT(tsd_get(nfs4_tsd_key) == NULL);
2357c478bd9Sstevel@tonic-gate 
2367c478bd9Sstevel@tonic-gate 	if (errno != ESTALE)
2377c478bd9Sstevel@tonic-gate 		return;
2387c478bd9Sstevel@tonic-gate 
2397c478bd9Sstevel@tonic-gate 	mutex_enter(&rp->r_statelock);
2407c478bd9Sstevel@tonic-gate 	rp->r_flags |= R4STALE;
2417c478bd9Sstevel@tonic-gate 	if (!rp->r_error)
2427c478bd9Sstevel@tonic-gate 		rp->r_error = errno;
2437c478bd9Sstevel@tonic-gate 	mutex_exit(&rp->r_statelock);
2447c478bd9Sstevel@tonic-gate 	if (nfs4_has_pages(vp))
2457c478bd9Sstevel@tonic-gate 		nfs4_invalidate_pages(vp, (u_offset_t)0, cr);
2467c478bd9Sstevel@tonic-gate 	nfs4_purge_caches(vp, NFS4_PURGE_DNLC, cr, FALSE);
2477c478bd9Sstevel@tonic-gate }
2487c478bd9Sstevel@tonic-gate 
2497c478bd9Sstevel@tonic-gate /*
2507c478bd9Sstevel@tonic-gate  * Purge all of the various NFS `data' caches.  If "asyncpg" is TRUE, the
2517c478bd9Sstevel@tonic-gate  * page purge is done asynchronously.
2527c478bd9Sstevel@tonic-gate  */
2537c478bd9Sstevel@tonic-gate void
2547c478bd9Sstevel@tonic-gate nfs4_purge_caches(vnode_t *vp, int purge_dnlc, cred_t *cr, int asyncpg)
2557c478bd9Sstevel@tonic-gate {
2567c478bd9Sstevel@tonic-gate 	rnode4_t *rp;
2577c478bd9Sstevel@tonic-gate 	char *contents;
2587c478bd9Sstevel@tonic-gate 	vnode_t *xattr;
2597c478bd9Sstevel@tonic-gate 	int size;
2607c478bd9Sstevel@tonic-gate 	int pgflush;			/* are we the page flush thread? */
2617c478bd9Sstevel@tonic-gate 
2627c478bd9Sstevel@tonic-gate 	/*
2637c478bd9Sstevel@tonic-gate 	 * Purge the DNLC for any entries which refer to this file.
2647c478bd9Sstevel@tonic-gate 	 */
2657c478bd9Sstevel@tonic-gate 	if (vp->v_count > 1 &&
2667c478bd9Sstevel@tonic-gate 	    (vp->v_type == VDIR || purge_dnlc == NFS4_PURGE_DNLC))
2677c478bd9Sstevel@tonic-gate 		dnlc_purge_vp(vp);
2687c478bd9Sstevel@tonic-gate 
2697c478bd9Sstevel@tonic-gate 	/*
2707c478bd9Sstevel@tonic-gate 	 * Clear any readdir state bits and purge the readlink response cache.
2717c478bd9Sstevel@tonic-gate 	 */
2727c478bd9Sstevel@tonic-gate 	rp = VTOR4(vp);
2737c478bd9Sstevel@tonic-gate 	mutex_enter(&rp->r_statelock);
2747c478bd9Sstevel@tonic-gate 	rp->r_flags &= ~R4LOOKUP;
2757c478bd9Sstevel@tonic-gate 	contents = rp->r_symlink.contents;
2767c478bd9Sstevel@tonic-gate 	size = rp->r_symlink.size;
2777c478bd9Sstevel@tonic-gate 	rp->r_symlink.contents = NULL;
2787c478bd9Sstevel@tonic-gate 
2797c478bd9Sstevel@tonic-gate 	xattr = rp->r_xattr_dir;
2807c478bd9Sstevel@tonic-gate 	rp->r_xattr_dir = NULL;
2817c478bd9Sstevel@tonic-gate 
2827c478bd9Sstevel@tonic-gate 	/*
2837c478bd9Sstevel@tonic-gate 	 * Purge pathconf cache too.
2847c478bd9Sstevel@tonic-gate 	 */
2857c478bd9Sstevel@tonic-gate 	rp->r_pathconf.pc4_xattr_valid = 0;
2867c478bd9Sstevel@tonic-gate 	rp->r_pathconf.pc4_cache_valid = 0;
2877c478bd9Sstevel@tonic-gate 
2887c478bd9Sstevel@tonic-gate 	pgflush = (curthread == rp->r_pgflush);
2897c478bd9Sstevel@tonic-gate 	mutex_exit(&rp->r_statelock);
2907c478bd9Sstevel@tonic-gate 
2917c478bd9Sstevel@tonic-gate 	if (contents != NULL) {
2927c478bd9Sstevel@tonic-gate 
2937c478bd9Sstevel@tonic-gate 		kmem_free((void *)contents, size);
2947c478bd9Sstevel@tonic-gate 	}
2957c478bd9Sstevel@tonic-gate 
2967c478bd9Sstevel@tonic-gate 	if (xattr != NULL)
2977c478bd9Sstevel@tonic-gate 		VN_RELE(xattr);
2987c478bd9Sstevel@tonic-gate 
2997c478bd9Sstevel@tonic-gate 	/*
3007c478bd9Sstevel@tonic-gate 	 * Flush the page cache.  If the current thread is the page flush
3017c478bd9Sstevel@tonic-gate 	 * thread, don't initiate a new page flush.  There's no need for
3027c478bd9Sstevel@tonic-gate 	 * it, and doing it correctly is hard.
3037c478bd9Sstevel@tonic-gate 	 */
3047c478bd9Sstevel@tonic-gate 	if (nfs4_has_pages(vp) && !pgflush) {
3057c478bd9Sstevel@tonic-gate 		if (!asyncpg) {
3067c478bd9Sstevel@tonic-gate 			(void) nfs4_waitfor_purge_complete(vp);
3077c478bd9Sstevel@tonic-gate 			flush_pages(vp, cr);
3087c478bd9Sstevel@tonic-gate 		} else {
3097c478bd9Sstevel@tonic-gate 			pgflush_t *args;
3107c478bd9Sstevel@tonic-gate 
3117c478bd9Sstevel@tonic-gate 			/*
3127c478bd9Sstevel@tonic-gate 			 * We don't hold r_statelock while creating the
3137c478bd9Sstevel@tonic-gate 			 * thread, in case the call blocks.  So we use a
3147c478bd9Sstevel@tonic-gate 			 * flag to indicate that a page flush thread is
3157c478bd9Sstevel@tonic-gate 			 * active.
3167c478bd9Sstevel@tonic-gate 			 */
3177c478bd9Sstevel@tonic-gate 			mutex_enter(&rp->r_statelock);
3187c478bd9Sstevel@tonic-gate 			if (rp->r_flags & R4PGFLUSH) {
3197c478bd9Sstevel@tonic-gate 				mutex_exit(&rp->r_statelock);
3207c478bd9Sstevel@tonic-gate 			} else {
3217c478bd9Sstevel@tonic-gate 				rp->r_flags |= R4PGFLUSH;
3227c478bd9Sstevel@tonic-gate 				mutex_exit(&rp->r_statelock);
3237c478bd9Sstevel@tonic-gate 
3247c478bd9Sstevel@tonic-gate 				args = kmem_alloc(sizeof (pgflush_t),
3257c478bd9Sstevel@tonic-gate 						KM_SLEEP);
3267c478bd9Sstevel@tonic-gate 				args->vp = vp;
3277c478bd9Sstevel@tonic-gate 				VN_HOLD(args->vp);
3287c478bd9Sstevel@tonic-gate 				args->cr = cr;
3297c478bd9Sstevel@tonic-gate 				crhold(args->cr);
3307c478bd9Sstevel@tonic-gate 				(void) zthread_create(NULL, 0,
3317c478bd9Sstevel@tonic-gate 						nfs4_pgflush_thread, args, 0,
3327c478bd9Sstevel@tonic-gate 						minclsyspri);
3337c478bd9Sstevel@tonic-gate 			}
3347c478bd9Sstevel@tonic-gate 		}
3357c478bd9Sstevel@tonic-gate 	}
3367c478bd9Sstevel@tonic-gate 
3377c478bd9Sstevel@tonic-gate 	/*
3387c478bd9Sstevel@tonic-gate 	 * Flush the readdir response cache.
3397c478bd9Sstevel@tonic-gate 	 */
3407c478bd9Sstevel@tonic-gate 	nfs4_purge_rddir_cache(vp);
3417c478bd9Sstevel@tonic-gate }
3427c478bd9Sstevel@tonic-gate 
3437c478bd9Sstevel@tonic-gate /*
3447c478bd9Sstevel@tonic-gate  * Invalidate all pages for the given file, after writing back the dirty
3457c478bd9Sstevel@tonic-gate  * ones.
3467c478bd9Sstevel@tonic-gate  */
3477c478bd9Sstevel@tonic-gate 
3487c478bd9Sstevel@tonic-gate static void
3497c478bd9Sstevel@tonic-gate flush_pages(vnode_t *vp, cred_t *cr)
3507c478bd9Sstevel@tonic-gate {
3517c478bd9Sstevel@tonic-gate 	int error;
3527c478bd9Sstevel@tonic-gate 	rnode4_t *rp = VTOR4(vp);
3537c478bd9Sstevel@tonic-gate 
3547c478bd9Sstevel@tonic-gate 	error = VOP_PUTPAGE(vp, (u_offset_t)0, 0, B_INVAL, cr);
3557c478bd9Sstevel@tonic-gate 	if (error == ENOSPC || error == EDQUOT) {
3567c478bd9Sstevel@tonic-gate 		mutex_enter(&rp->r_statelock);
3577c478bd9Sstevel@tonic-gate 		if (!rp->r_error)
3587c478bd9Sstevel@tonic-gate 			rp->r_error = error;
3597c478bd9Sstevel@tonic-gate 		mutex_exit(&rp->r_statelock);
3607c478bd9Sstevel@tonic-gate 	}
3617c478bd9Sstevel@tonic-gate }
3627c478bd9Sstevel@tonic-gate 
3637c478bd9Sstevel@tonic-gate /*
3647c478bd9Sstevel@tonic-gate  * Page flush thread.
3657c478bd9Sstevel@tonic-gate  */
3667c478bd9Sstevel@tonic-gate 
3677c478bd9Sstevel@tonic-gate static void
3687c478bd9Sstevel@tonic-gate nfs4_pgflush_thread(pgflush_t *args)
3697c478bd9Sstevel@tonic-gate {
3707c478bd9Sstevel@tonic-gate 	rnode4_t *rp = VTOR4(args->vp);
3717c478bd9Sstevel@tonic-gate 
3727c478bd9Sstevel@tonic-gate 	/* remember which thread we are, so we don't deadlock ourselves */
3737c478bd9Sstevel@tonic-gate 	mutex_enter(&rp->r_statelock);
3747c478bd9Sstevel@tonic-gate 	ASSERT(rp->r_pgflush == NULL);
3757c478bd9Sstevel@tonic-gate 	rp->r_pgflush = curthread;
3767c478bd9Sstevel@tonic-gate 	mutex_exit(&rp->r_statelock);
3777c478bd9Sstevel@tonic-gate 
3787c478bd9Sstevel@tonic-gate 	flush_pages(args->vp, args->cr);
3797c478bd9Sstevel@tonic-gate 
3807c478bd9Sstevel@tonic-gate 	mutex_enter(&rp->r_statelock);
3817c478bd9Sstevel@tonic-gate 	rp->r_pgflush = NULL;
3827c478bd9Sstevel@tonic-gate 	rp->r_flags &= ~R4PGFLUSH;
3837c478bd9Sstevel@tonic-gate 	cv_broadcast(&rp->r_cv);
3847c478bd9Sstevel@tonic-gate 	mutex_exit(&rp->r_statelock);
3857c478bd9Sstevel@tonic-gate 
3867c478bd9Sstevel@tonic-gate 	VN_RELE(args->vp);
3877c478bd9Sstevel@tonic-gate 	crfree(args->cr);
3887c478bd9Sstevel@tonic-gate 	kmem_free(args, sizeof (pgflush_t));
3897c478bd9Sstevel@tonic-gate 	zthread_exit();
3907c478bd9Sstevel@tonic-gate }
3917c478bd9Sstevel@tonic-gate 
3927c478bd9Sstevel@tonic-gate /*
3937c478bd9Sstevel@tonic-gate  * Purge the readdir cache of all entries which are not currently
3947c478bd9Sstevel@tonic-gate  * being filled.
3957c478bd9Sstevel@tonic-gate  */
3967c478bd9Sstevel@tonic-gate void
3977c478bd9Sstevel@tonic-gate nfs4_purge_rddir_cache(vnode_t *vp)
3987c478bd9Sstevel@tonic-gate {
3997c478bd9Sstevel@tonic-gate 	rnode4_t *rp;
4007c478bd9Sstevel@tonic-gate 
4017c478bd9Sstevel@tonic-gate 	rp = VTOR4(vp);
4027c478bd9Sstevel@tonic-gate 
4037c478bd9Sstevel@tonic-gate 	mutex_enter(&rp->r_statelock);
4047c478bd9Sstevel@tonic-gate 	rp->r_direof = NULL;
4057c478bd9Sstevel@tonic-gate 	rp->r_flags &= ~R4LOOKUP;
4067c478bd9Sstevel@tonic-gate 	rp->r_flags |= R4READDIRWATTR;
4077c478bd9Sstevel@tonic-gate 	rddir4_cache_purge(rp);
4087c478bd9Sstevel@tonic-gate 	mutex_exit(&rp->r_statelock);
4097c478bd9Sstevel@tonic-gate }
4107c478bd9Sstevel@tonic-gate 
4117c478bd9Sstevel@tonic-gate /*
4127c478bd9Sstevel@tonic-gate  * Set attributes cache for given vnode using virtual attributes.  There is
4137c478bd9Sstevel@tonic-gate  * no cache validation, but if the attributes are deemed to be stale, they
4147c478bd9Sstevel@tonic-gate  * are ignored.  This corresponds to nfs3_attrcache().
4157c478bd9Sstevel@tonic-gate  *
4167c478bd9Sstevel@tonic-gate  * Set the timeout value on the attribute cache and fill it
4177c478bd9Sstevel@tonic-gate  * with the passed in attributes.
4187c478bd9Sstevel@tonic-gate  */
4197c478bd9Sstevel@tonic-gate void
4207c478bd9Sstevel@tonic-gate nfs4_attrcache_noinval(vnode_t *vp, nfs4_ga_res_t *garp, hrtime_t t)
4217c478bd9Sstevel@tonic-gate {
4227c478bd9Sstevel@tonic-gate 	rnode4_t *rp = VTOR4(vp);
4237c478bd9Sstevel@tonic-gate 
4247c478bd9Sstevel@tonic-gate 	mutex_enter(&rp->r_statelock);
4257c478bd9Sstevel@tonic-gate 	if (rp->r_time_attr_saved <= t)
4267c478bd9Sstevel@tonic-gate 		nfs4_attrcache_va(vp, garp, FALSE);
4277c478bd9Sstevel@tonic-gate 	mutex_exit(&rp->r_statelock);
4287c478bd9Sstevel@tonic-gate }
4297c478bd9Sstevel@tonic-gate 
4307c478bd9Sstevel@tonic-gate /*
4317c478bd9Sstevel@tonic-gate  * Use the passed in virtual attributes to check to see whether the
4327c478bd9Sstevel@tonic-gate  * data and metadata caches are valid, cache the new attributes, and
4337c478bd9Sstevel@tonic-gate  * then do the cache invalidation if required.
4347c478bd9Sstevel@tonic-gate  *
4357c478bd9Sstevel@tonic-gate  * The cache validation and caching of the new attributes is done
4367c478bd9Sstevel@tonic-gate  * atomically via the use of the mutex, r_statelock.  If required,
4377c478bd9Sstevel@tonic-gate  * the cache invalidation is done atomically w.r.t. the cache
4387c478bd9Sstevel@tonic-gate  * validation and caching of the attributes via the pseudo lock,
4397c478bd9Sstevel@tonic-gate  * r_serial.
4407c478bd9Sstevel@tonic-gate  *
4417c478bd9Sstevel@tonic-gate  * This routine is used to do cache validation and attributes caching
4427c478bd9Sstevel@tonic-gate  * for operations with a single set of post operation attributes.
4437c478bd9Sstevel@tonic-gate  */
4447c478bd9Sstevel@tonic-gate 
4457c478bd9Sstevel@tonic-gate void
4467c478bd9Sstevel@tonic-gate nfs4_attr_cache(vnode_t *vp, nfs4_ga_res_t *garp,
4477c478bd9Sstevel@tonic-gate 		hrtime_t t, cred_t *cr, int async,
4487c478bd9Sstevel@tonic-gate 		change_info4 *cinfo)
4497c478bd9Sstevel@tonic-gate {
4507c478bd9Sstevel@tonic-gate 	rnode4_t *rp;
4517c478bd9Sstevel@tonic-gate 	int mtime_changed;
4527c478bd9Sstevel@tonic-gate 	int ctime_changed;
4537c478bd9Sstevel@tonic-gate 	vsecattr_t *vsp;
4547c478bd9Sstevel@tonic-gate 	int was_serial, set_time_cache_inval, recov;
4557c478bd9Sstevel@tonic-gate 	vattr_t *vap = &garp->n4g_va;
4567c478bd9Sstevel@tonic-gate 	mntinfo4_t *mi = VTOMI4(vp);
4577c478bd9Sstevel@tonic-gate 
4587c478bd9Sstevel@tonic-gate 	ASSERT(mi->mi_vfsp->vfs_dev == garp->n4g_va.va_fsid);
4597c478bd9Sstevel@tonic-gate 
4607c478bd9Sstevel@tonic-gate 	/* Is curthread the recovery thread? */
4617c478bd9Sstevel@tonic-gate 	mutex_enter(&mi->mi_lock);
4627c478bd9Sstevel@tonic-gate 	recov = (VTOMI4(vp)->mi_recovthread == curthread);
4637c478bd9Sstevel@tonic-gate 	mutex_exit(&mi->mi_lock);
4647c478bd9Sstevel@tonic-gate 
4657c478bd9Sstevel@tonic-gate 	rp = VTOR4(vp);
4667c478bd9Sstevel@tonic-gate 	mutex_enter(&rp->r_statelock);
4677c478bd9Sstevel@tonic-gate 	was_serial = (rp->r_serial == curthread);
4687c478bd9Sstevel@tonic-gate 	if (rp->r_serial && !was_serial) {
4697c478bd9Sstevel@tonic-gate 		klwp_t *lwp = ttolwp(curthread);
4707c478bd9Sstevel@tonic-gate 
4717c478bd9Sstevel@tonic-gate 		/*
4727c478bd9Sstevel@tonic-gate 		 * If we're the recovery thread, then purge current attrs
4737c478bd9Sstevel@tonic-gate 		 * and bail out to avoid potential deadlock between another
4747c478bd9Sstevel@tonic-gate 		 * thread caching attrs (r_serial thread), recov thread,
4757c478bd9Sstevel@tonic-gate 		 * and an async writer thread.
4767c478bd9Sstevel@tonic-gate 		 */
4777c478bd9Sstevel@tonic-gate 		if (recov) {
4787c478bd9Sstevel@tonic-gate 			PURGE_ATTRCACHE4_LOCKED(rp);
4797c478bd9Sstevel@tonic-gate 			mutex_exit(&rp->r_statelock);
4807c478bd9Sstevel@tonic-gate 			return;
4817c478bd9Sstevel@tonic-gate 		}
4827c478bd9Sstevel@tonic-gate 
4837c478bd9Sstevel@tonic-gate 		if (lwp != NULL)
4847c478bd9Sstevel@tonic-gate 			lwp->lwp_nostop++;
4857c478bd9Sstevel@tonic-gate 		while (rp->r_serial != NULL) {
4867c478bd9Sstevel@tonic-gate 			if (!cv_wait_sig(&rp->r_cv, &rp->r_statelock)) {
4877c478bd9Sstevel@tonic-gate 				mutex_exit(&rp->r_statelock);
4887c478bd9Sstevel@tonic-gate 				if (lwp != NULL)
4897c478bd9Sstevel@tonic-gate 					lwp->lwp_nostop--;
4907c478bd9Sstevel@tonic-gate 				return;
4917c478bd9Sstevel@tonic-gate 			}
4927c478bd9Sstevel@tonic-gate 		}
4937c478bd9Sstevel@tonic-gate 		if (lwp != NULL)
4947c478bd9Sstevel@tonic-gate 			lwp->lwp_nostop--;
4957c478bd9Sstevel@tonic-gate 	}
4967c478bd9Sstevel@tonic-gate 
4977c478bd9Sstevel@tonic-gate 	/*
4987c478bd9Sstevel@tonic-gate 	 * If there is a page flush thread, the current thread needs to
4997c478bd9Sstevel@tonic-gate 	 * bail out, to prevent a possible deadlock between the current
5007c478bd9Sstevel@tonic-gate 	 * thread (which might be in a start_op/end_op region), the
5017c478bd9Sstevel@tonic-gate 	 * recovery thread, and the page flush thread.  Expire the
5027c478bd9Sstevel@tonic-gate 	 * attribute cache, so that any attributes the current thread was
5037c478bd9Sstevel@tonic-gate 	 * going to set are not lost.
5047c478bd9Sstevel@tonic-gate 	 */
5057c478bd9Sstevel@tonic-gate 	if ((rp->r_flags & R4PGFLUSH) && rp->r_pgflush != curthread) {
5067c478bd9Sstevel@tonic-gate 		PURGE_ATTRCACHE4_LOCKED(rp);
5077c478bd9Sstevel@tonic-gate 		mutex_exit(&rp->r_statelock);
5087c478bd9Sstevel@tonic-gate 		return;
5097c478bd9Sstevel@tonic-gate 	}
5107c478bd9Sstevel@tonic-gate 
5117c478bd9Sstevel@tonic-gate 	if (rp->r_time_attr_saved > t) {
5127c478bd9Sstevel@tonic-gate 		/*
5137c478bd9Sstevel@tonic-gate 		 * Attributes have been cached since these attributes were
514*00fdf600Smaheshvs 		 * probably made. If there is an inconsistency in what is
515*00fdf600Smaheshvs 		 * cached, mark them invalid. If not, don't act on them.
5167c478bd9Sstevel@tonic-gate 		 */
517*00fdf600Smaheshvs 		if (!CACHE4_VALID(rp, vap->va_mtime, vap->va_size))
518*00fdf600Smaheshvs 			PURGE_ATTRCACHE4_LOCKED(rp);
5197c478bd9Sstevel@tonic-gate 		mutex_exit(&rp->r_statelock);
5207c478bd9Sstevel@tonic-gate 		return;
5217c478bd9Sstevel@tonic-gate 	}
5227c478bd9Sstevel@tonic-gate 	set_time_cache_inval = 0;
5237c478bd9Sstevel@tonic-gate 	if (cinfo) {
5247c478bd9Sstevel@tonic-gate 		/*
5257c478bd9Sstevel@tonic-gate 		 * Only directory modifying callers pass non-NULL cinfo.
5267c478bd9Sstevel@tonic-gate 		 */
5277c478bd9Sstevel@tonic-gate 		ASSERT(vp->v_type == VDIR);
5287c478bd9Sstevel@tonic-gate 		/*
5297c478bd9Sstevel@tonic-gate 		 * If the cache timeout either doesn't exist or hasn't expired,
5307c478bd9Sstevel@tonic-gate 		 * and dir didn't changed on server before dirmod op
5317c478bd9Sstevel@tonic-gate 		 * and dir didn't change after dirmod op but before getattr
5327c478bd9Sstevel@tonic-gate 		 * then there's a chance that the client's cached data for
5337c478bd9Sstevel@tonic-gate 		 * this object is current (not stale).  No immediate cache
5347c478bd9Sstevel@tonic-gate 		 * flush is required.
5357c478bd9Sstevel@tonic-gate 		 *
5367c478bd9Sstevel@tonic-gate 		 */
5377c478bd9Sstevel@tonic-gate 		if ((! rp->r_time_cache_inval || t < rp->r_time_cache_inval) &&
5387c478bd9Sstevel@tonic-gate 		    cinfo->before == rp->r_change &&
5397c478bd9Sstevel@tonic-gate 		    (garp->n4g_change_valid &&
5407c478bd9Sstevel@tonic-gate 		    cinfo->after == garp->n4g_change)) {
5417c478bd9Sstevel@tonic-gate 
5427c478bd9Sstevel@tonic-gate 			/*
5437c478bd9Sstevel@tonic-gate 			 * If atomic isn't set, then the before/after info
5447c478bd9Sstevel@tonic-gate 			 * cannot be blindly trusted.  For this case, we tell
5457c478bd9Sstevel@tonic-gate 			 * nfs4_attrcache_va to cache the attrs but also
5467c478bd9Sstevel@tonic-gate 			 * establish an absolute maximum cache timeout.  When
5477c478bd9Sstevel@tonic-gate 			 * the timeout is reached, caches will be flushed.
5487c478bd9Sstevel@tonic-gate 			 */
5497c478bd9Sstevel@tonic-gate 			if (! cinfo->atomic)
5507c478bd9Sstevel@tonic-gate 				set_time_cache_inval = 1;
5517c478bd9Sstevel@tonic-gate 
5527c478bd9Sstevel@tonic-gate 			mtime_changed = 0;
5537c478bd9Sstevel@tonic-gate 			ctime_changed = 0;
5547c478bd9Sstevel@tonic-gate 		} else {
5557c478bd9Sstevel@tonic-gate 
5567c478bd9Sstevel@tonic-gate 			/*
5577c478bd9Sstevel@tonic-gate 			 * We're not sure exactly what changed, but we know
5587c478bd9Sstevel@tonic-gate 			 * what to do.  flush all caches for dir.  remove the
5597c478bd9Sstevel@tonic-gate 			 * attr timeout.
5607c478bd9Sstevel@tonic-gate 			 *
5617c478bd9Sstevel@tonic-gate 			 * a) timeout expired.  flush all caches.
5627c478bd9Sstevel@tonic-gate 			 * b) r_change != cinfo.before.  flush all caches.
5637c478bd9Sstevel@tonic-gate 			 * c) r_change == cinfo.before, but cinfo.after !=
5647c478bd9Sstevel@tonic-gate 			 *    post-op getattr(change).  flush all caches.
5657c478bd9Sstevel@tonic-gate 			 * d) post-op getattr(change) not provided by server.
5667c478bd9Sstevel@tonic-gate 			 *    flush all caches.
5677c478bd9Sstevel@tonic-gate 			 */
5687c478bd9Sstevel@tonic-gate 			mtime_changed = 1;
5697c478bd9Sstevel@tonic-gate 			ctime_changed = 1;
5707c478bd9Sstevel@tonic-gate 			rp->r_time_cache_inval = 0;
5717c478bd9Sstevel@tonic-gate 		}
5727c478bd9Sstevel@tonic-gate 	} else {
5737c478bd9Sstevel@tonic-gate 		if (!(rp->r_flags & R4WRITEMODIFIED)) {
5747c478bd9Sstevel@tonic-gate 			if (!CACHE4_VALID(rp, vap->va_mtime, vap->va_size))
5757c478bd9Sstevel@tonic-gate 				mtime_changed = 1;
5767c478bd9Sstevel@tonic-gate 			else
5777c478bd9Sstevel@tonic-gate 				mtime_changed = 0;
5787c478bd9Sstevel@tonic-gate 			if (rp->r_attr.va_ctime.tv_sec !=
5797c478bd9Sstevel@tonic-gate 			    vap->va_ctime.tv_sec ||
5807c478bd9Sstevel@tonic-gate 			    rp->r_attr.va_ctime.tv_nsec !=
5817c478bd9Sstevel@tonic-gate 			    vap->va_ctime.tv_nsec)
5827c478bd9Sstevel@tonic-gate 				ctime_changed = 1;
5837c478bd9Sstevel@tonic-gate 			else
5847c478bd9Sstevel@tonic-gate 				ctime_changed = 0;
5857c478bd9Sstevel@tonic-gate 		} else {
5867c478bd9Sstevel@tonic-gate 			mtime_changed = 0;
5877c478bd9Sstevel@tonic-gate 			ctime_changed = 0;
5887c478bd9Sstevel@tonic-gate 		}
5897c478bd9Sstevel@tonic-gate 	}
5907c478bd9Sstevel@tonic-gate 
5917c478bd9Sstevel@tonic-gate 	nfs4_attrcache_va(vp, garp, set_time_cache_inval);
5927c478bd9Sstevel@tonic-gate 
5937c478bd9Sstevel@tonic-gate 	if (!mtime_changed && !ctime_changed) {
5947c478bd9Sstevel@tonic-gate 		mutex_exit(&rp->r_statelock);
5957c478bd9Sstevel@tonic-gate 		return;
5967c478bd9Sstevel@tonic-gate 	}
5977c478bd9Sstevel@tonic-gate 
5987c478bd9Sstevel@tonic-gate 	rp->r_serial = curthread;
5997c478bd9Sstevel@tonic-gate 
6007c478bd9Sstevel@tonic-gate 	mutex_exit(&rp->r_statelock);
6017c478bd9Sstevel@tonic-gate 
6027c478bd9Sstevel@tonic-gate 	/*
6037c478bd9Sstevel@tonic-gate 	 * If we're the recov thread, then force async nfs4_purge_caches
6047c478bd9Sstevel@tonic-gate 	 * to avoid potential deadlock.
6057c478bd9Sstevel@tonic-gate 	 */
6067c478bd9Sstevel@tonic-gate 	if (mtime_changed)
6077c478bd9Sstevel@tonic-gate 		nfs4_purge_caches(vp, NFS4_NOPURGE_DNLC, cr, recov ? 1 : async);
6087c478bd9Sstevel@tonic-gate 
6097c478bd9Sstevel@tonic-gate 	if (ctime_changed) {
6107c478bd9Sstevel@tonic-gate 		(void) nfs4_access_purge_rp(rp);
6117c478bd9Sstevel@tonic-gate 		if (rp->r_secattr != NULL) {
6127c478bd9Sstevel@tonic-gate 			mutex_enter(&rp->r_statelock);
6137c478bd9Sstevel@tonic-gate 			vsp = rp->r_secattr;
6147c478bd9Sstevel@tonic-gate 			rp->r_secattr = NULL;
6157c478bd9Sstevel@tonic-gate 			mutex_exit(&rp->r_statelock);
6167c478bd9Sstevel@tonic-gate 			if (vsp != NULL)
6177c478bd9Sstevel@tonic-gate 				nfs4_acl_free_cache(vsp);
6187c478bd9Sstevel@tonic-gate 		}
6197c478bd9Sstevel@tonic-gate 	}
6207c478bd9Sstevel@tonic-gate 
6217c478bd9Sstevel@tonic-gate 	if (!was_serial) {
6227c478bd9Sstevel@tonic-gate 		mutex_enter(&rp->r_statelock);
6237c478bd9Sstevel@tonic-gate 		rp->r_serial = NULL;
6247c478bd9Sstevel@tonic-gate 		cv_broadcast(&rp->r_cv);
6257c478bd9Sstevel@tonic-gate 		mutex_exit(&rp->r_statelock);
6267c478bd9Sstevel@tonic-gate 	}
6277c478bd9Sstevel@tonic-gate }
6287c478bd9Sstevel@tonic-gate 
6297c478bd9Sstevel@tonic-gate /*
6307c478bd9Sstevel@tonic-gate  * Set attributes cache for given vnode using virtual attributes.
6317c478bd9Sstevel@tonic-gate  *
6327c478bd9Sstevel@tonic-gate  * Set the timeout value on the attribute cache and fill it
6337c478bd9Sstevel@tonic-gate  * with the passed in attributes.
6347c478bd9Sstevel@tonic-gate  *
6357c478bd9Sstevel@tonic-gate  * The caller must be holding r_statelock.
6367c478bd9Sstevel@tonic-gate  */
6377c478bd9Sstevel@tonic-gate static void
6387c478bd9Sstevel@tonic-gate nfs4_attrcache_va(vnode_t *vp, nfs4_ga_res_t *garp, int set_cache_timeout)
6397c478bd9Sstevel@tonic-gate {
6407c478bd9Sstevel@tonic-gate 	rnode4_t *rp;
6417c478bd9Sstevel@tonic-gate 	mntinfo4_t *mi;
6427c478bd9Sstevel@tonic-gate 	hrtime_t delta;
6437c478bd9Sstevel@tonic-gate 	hrtime_t now;
6447c478bd9Sstevel@tonic-gate 	vattr_t *vap = &garp->n4g_va;
6457c478bd9Sstevel@tonic-gate 
6467c478bd9Sstevel@tonic-gate 	rp = VTOR4(vp);
6477c478bd9Sstevel@tonic-gate 
6487c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&rp->r_statelock));
6497c478bd9Sstevel@tonic-gate 	ASSERT(vap->va_mask == AT_ALL);
6507c478bd9Sstevel@tonic-gate 
6517c478bd9Sstevel@tonic-gate 	/* Switch to master before checking v_flag */
6527c478bd9Sstevel@tonic-gate 	if (IS_SHADOW(vp, rp))
6537c478bd9Sstevel@tonic-gate 		vp = RTOV4(rp);
6547c478bd9Sstevel@tonic-gate 
6557c478bd9Sstevel@tonic-gate 	now = gethrtime();
6567c478bd9Sstevel@tonic-gate 
6577c478bd9Sstevel@tonic-gate 	mi = VTOMI4(vp);
6587c478bd9Sstevel@tonic-gate 
6597c478bd9Sstevel@tonic-gate 	/*
6607c478bd9Sstevel@tonic-gate 	 * Only establish a new cache timeout (if requested).  Never
6617c478bd9Sstevel@tonic-gate 	 * extend a timeout.  Never clear a timeout.  Clearing a timeout
6627c478bd9Sstevel@tonic-gate 	 * is done by nfs4_update_dircaches (ancestor in our call chain)
6637c478bd9Sstevel@tonic-gate 	 */
6647c478bd9Sstevel@tonic-gate 	if (set_cache_timeout && ! rp->r_time_cache_inval)
6657c478bd9Sstevel@tonic-gate 		rp->r_time_cache_inval = now + mi->mi_acdirmax;
6667c478bd9Sstevel@tonic-gate 
6677c478bd9Sstevel@tonic-gate 	/*
6687c478bd9Sstevel@tonic-gate 	 * Delta is the number of nanoseconds that we will
6697c478bd9Sstevel@tonic-gate 	 * cache the attributes of the file.  It is based on
6707c478bd9Sstevel@tonic-gate 	 * the number of nanoseconds since the last time that
6717c478bd9Sstevel@tonic-gate 	 * we detected a change.  The assumption is that files
6727c478bd9Sstevel@tonic-gate 	 * that changed recently are likely to change again.
6737c478bd9Sstevel@tonic-gate 	 * There is a minimum and a maximum for regular files
6747c478bd9Sstevel@tonic-gate 	 * and for directories which is enforced though.
6757c478bd9Sstevel@tonic-gate 	 *
6767c478bd9Sstevel@tonic-gate 	 * Using the time since last change was detected
6777c478bd9Sstevel@tonic-gate 	 * eliminates direct comparison or calculation
6787c478bd9Sstevel@tonic-gate 	 * using mixed client and server times.  NFS does
6797c478bd9Sstevel@tonic-gate 	 * not make any assumptions regarding the client
6807c478bd9Sstevel@tonic-gate 	 * and server clocks being synchronized.
6817c478bd9Sstevel@tonic-gate 	 */
6827c478bd9Sstevel@tonic-gate 	if (vap->va_mtime.tv_sec != rp->r_attr.va_mtime.tv_sec ||
6837c478bd9Sstevel@tonic-gate 	    vap->va_mtime.tv_nsec != rp->r_attr.va_mtime.tv_nsec ||
6847c478bd9Sstevel@tonic-gate 	    vap->va_size != rp->r_attr.va_size) {
6857c478bd9Sstevel@tonic-gate 		rp->r_time_attr_saved = now;
6867c478bd9Sstevel@tonic-gate 	}
6877c478bd9Sstevel@tonic-gate 
6887c478bd9Sstevel@tonic-gate 	if ((mi->mi_flags & MI4_NOAC) || (vp->v_flag & VNOCACHE))
6897c478bd9Sstevel@tonic-gate 		delta = 0;
6907c478bd9Sstevel@tonic-gate 	else {
6917c478bd9Sstevel@tonic-gate 		delta = now - rp->r_time_attr_saved;
6927c478bd9Sstevel@tonic-gate 		if (vp->v_type == VDIR) {
6937c478bd9Sstevel@tonic-gate 			if (delta < mi->mi_acdirmin)
6947c478bd9Sstevel@tonic-gate 				delta = mi->mi_acdirmin;
6957c478bd9Sstevel@tonic-gate 			else if (delta > mi->mi_acdirmax)
6967c478bd9Sstevel@tonic-gate 				delta = mi->mi_acdirmax;
6977c478bd9Sstevel@tonic-gate 		} else {
6987c478bd9Sstevel@tonic-gate 			if (delta < mi->mi_acregmin)
6997c478bd9Sstevel@tonic-gate 				delta = mi->mi_acregmin;
7007c478bd9Sstevel@tonic-gate 			else if (delta > mi->mi_acregmax)
7017c478bd9Sstevel@tonic-gate 				delta = mi->mi_acregmax;
7027c478bd9Sstevel@tonic-gate 		}
7037c478bd9Sstevel@tonic-gate 	}
7047c478bd9Sstevel@tonic-gate 	rp->r_time_attr_inval = now + delta;
7057c478bd9Sstevel@tonic-gate 
7067c478bd9Sstevel@tonic-gate 	rp->r_attr = *vap;
7077c478bd9Sstevel@tonic-gate 	if (garp->n4g_change_valid)
7087c478bd9Sstevel@tonic-gate 		rp->r_change = garp->n4g_change;
7097c478bd9Sstevel@tonic-gate 
7107c478bd9Sstevel@tonic-gate 	/*
7117c478bd9Sstevel@tonic-gate 	 * The attributes that were returned may be valid and can
7127c478bd9Sstevel@tonic-gate 	 * be used, but they may not be allowed to be cached.
7137c478bd9Sstevel@tonic-gate 	 * Reset the timers to cause immediate invalidation and
7147c478bd9Sstevel@tonic-gate 	 * clear r_change so no VERIFY operations will suceed
7157c478bd9Sstevel@tonic-gate 	 */
7167c478bd9Sstevel@tonic-gate 	if (garp->n4g_attrwhy == NFS4_GETATTR_NOCACHE_OK) {
7177c478bd9Sstevel@tonic-gate 		rp->r_time_attr_inval = now;
7187c478bd9Sstevel@tonic-gate 		rp->r_time_attr_saved = now;
7197c478bd9Sstevel@tonic-gate 		rp->r_change = 0;
7207c478bd9Sstevel@tonic-gate 	}
7217c478bd9Sstevel@tonic-gate 
7227c478bd9Sstevel@tonic-gate 	/*
7237c478bd9Sstevel@tonic-gate 	 * If mounted_on_fileid returned AND the object is a stub,
7247c478bd9Sstevel@tonic-gate 	 * then set object's va_nodeid to the mounted over fid
7257c478bd9Sstevel@tonic-gate 	 * returned by server.
7267c478bd9Sstevel@tonic-gate 	 *
7277c478bd9Sstevel@tonic-gate 	 * If mounted_on_fileid not provided/supported, then
7287c478bd9Sstevel@tonic-gate 	 * just set it to 0 for now.  Eventually it would be
7297c478bd9Sstevel@tonic-gate 	 * better to set it to a hashed version of FH.  This
7307c478bd9Sstevel@tonic-gate 	 * would probably be good enough to provide a unique
7317c478bd9Sstevel@tonic-gate 	 * fid/d_ino within a dir.
7327c478bd9Sstevel@tonic-gate 	 *
7337c478bd9Sstevel@tonic-gate 	 * We don't need to carry mounted_on_fileid in the
7347c478bd9Sstevel@tonic-gate 	 * rnode as long as the client never requests fileid
7357c478bd9Sstevel@tonic-gate 	 * without also requesting mounted_on_fileid.  For
7367c478bd9Sstevel@tonic-gate 	 * now, it stays.
7377c478bd9Sstevel@tonic-gate 	 */
7387c478bd9Sstevel@tonic-gate 	if (garp->n4g_mon_fid_valid) {
7397c478bd9Sstevel@tonic-gate 		rp->r_mntd_fid = garp->n4g_mon_fid;
7407c478bd9Sstevel@tonic-gate 
7417c478bd9Sstevel@tonic-gate 		if (rp->r_flags & R4SRVSTUB)
7427c478bd9Sstevel@tonic-gate 			rp->r_attr.va_nodeid = rp->r_mntd_fid;
7437c478bd9Sstevel@tonic-gate 	}
7447c478bd9Sstevel@tonic-gate 
7457c478bd9Sstevel@tonic-gate 	/*
7467c478bd9Sstevel@tonic-gate 	 * Check to see if there are valid pathconf bits to
7477c478bd9Sstevel@tonic-gate 	 * cache in the rnode.
7487c478bd9Sstevel@tonic-gate 	 */
7497c478bd9Sstevel@tonic-gate 	if (garp->n4g_ext_res) {
7507c478bd9Sstevel@tonic-gate 		if (garp->n4g_ext_res->n4g_pc4.pc4_cache_valid) {
7517c478bd9Sstevel@tonic-gate 			rp->r_pathconf = garp->n4g_ext_res->n4g_pc4;
7527c478bd9Sstevel@tonic-gate 		} else {
7537c478bd9Sstevel@tonic-gate 			if (garp->n4g_ext_res->n4g_pc4.pc4_xattr_valid) {
7547c478bd9Sstevel@tonic-gate 				rp->r_pathconf.pc4_xattr_valid = TRUE;
7557c478bd9Sstevel@tonic-gate 				rp->r_pathconf.pc4_xattr_exists =
7567c478bd9Sstevel@tonic-gate 				    garp->n4g_ext_res->n4g_pc4.pc4_xattr_exists;
7577c478bd9Sstevel@tonic-gate 			}
7587c478bd9Sstevel@tonic-gate 		}
7597c478bd9Sstevel@tonic-gate 	}
7607c478bd9Sstevel@tonic-gate 	/*
7617c478bd9Sstevel@tonic-gate 	 * Update the size of the file if there is no cached data or if
7627c478bd9Sstevel@tonic-gate 	 * the cached data is clean and there is no data being written
7637c478bd9Sstevel@tonic-gate 	 * out.
7647c478bd9Sstevel@tonic-gate 	 */
7657c478bd9Sstevel@tonic-gate 	if (rp->r_size != vap->va_size &&
7667c478bd9Sstevel@tonic-gate 	    (!vn_has_cached_data(vp) ||
7677c478bd9Sstevel@tonic-gate 	    (!(rp->r_flags & R4DIRTY) && rp->r_count == 0))) {
7687c478bd9Sstevel@tonic-gate 		rp->r_size = vap->va_size;
7697c478bd9Sstevel@tonic-gate 	}
7707c478bd9Sstevel@tonic-gate 	nfs_setswaplike(vp, vap);
7717c478bd9Sstevel@tonic-gate 	rp->r_flags &= ~R4WRITEMODIFIED;
7727c478bd9Sstevel@tonic-gate }
7737c478bd9Sstevel@tonic-gate 
7747c478bd9Sstevel@tonic-gate /*
7757c478bd9Sstevel@tonic-gate  * Get attributes over-the-wire and update attributes cache
7767c478bd9Sstevel@tonic-gate  * if no error occurred in the over-the-wire operation.
7777c478bd9Sstevel@tonic-gate  * Return 0 if successful, otherwise error.
7787c478bd9Sstevel@tonic-gate  */
7797c478bd9Sstevel@tonic-gate int
7807c478bd9Sstevel@tonic-gate nfs4_getattr_otw(vnode_t *vp, nfs4_ga_res_t *garp, cred_t *cr, int get_acl)
7817c478bd9Sstevel@tonic-gate {
7827c478bd9Sstevel@tonic-gate 	mntinfo4_t *mi = VTOMI4(vp);
7837c478bd9Sstevel@tonic-gate 	hrtime_t t;
7847c478bd9Sstevel@tonic-gate 	nfs4_recov_state_t recov_state;
7857c478bd9Sstevel@tonic-gate 	nfs4_error_t e = { 0, NFS4_OK, RPC_SUCCESS };
7867c478bd9Sstevel@tonic-gate 
7877c478bd9Sstevel@tonic-gate 	recov_state.rs_flags = 0;
7887c478bd9Sstevel@tonic-gate 	recov_state.rs_num_retry_despite_err = 0;
7897c478bd9Sstevel@tonic-gate 
7907c478bd9Sstevel@tonic-gate 	/* Save the original mount point security flavor */
7917c478bd9Sstevel@tonic-gate 	(void) save_mnt_secinfo(mi->mi_curr_serv);
7927c478bd9Sstevel@tonic-gate 
7937c478bd9Sstevel@tonic-gate recov_retry:
7947c478bd9Sstevel@tonic-gate 	if ((e.error = nfs4_start_fop(mi, vp, NULL, OH_GETATTR,
7957c478bd9Sstevel@tonic-gate 						&recov_state, NULL))) {
7967c478bd9Sstevel@tonic-gate 		(void) check_mnt_secinfo(mi->mi_curr_serv, vp);
7977c478bd9Sstevel@tonic-gate 		return (e.error);
7987c478bd9Sstevel@tonic-gate 	}
7997c478bd9Sstevel@tonic-gate 
8007c478bd9Sstevel@tonic-gate 	t = gethrtime();
8017c478bd9Sstevel@tonic-gate 
8027c478bd9Sstevel@tonic-gate 	nfs4_getattr_otw_norecovery(vp, garp, &e, cr, get_acl);
8037c478bd9Sstevel@tonic-gate 
8047c478bd9Sstevel@tonic-gate 	if (nfs4_needs_recovery(&e, FALSE, vp->v_vfsp)) {
8057c478bd9Sstevel@tonic-gate 		if (nfs4_start_recovery(&e, VTOMI4(vp), vp, NULL, NULL,
8067c478bd9Sstevel@tonic-gate 		    NULL, OP_GETATTR, NULL) == FALSE)  {
8077c478bd9Sstevel@tonic-gate 			nfs4_end_fop(VTOMI4(vp), vp, NULL, OH_GETATTR,
8087c478bd9Sstevel@tonic-gate 					&recov_state, 1);
8097c478bd9Sstevel@tonic-gate 			goto recov_retry;
8107c478bd9Sstevel@tonic-gate 		}
8117c478bd9Sstevel@tonic-gate 	}
8127c478bd9Sstevel@tonic-gate 
8137c478bd9Sstevel@tonic-gate 	nfs4_end_fop(VTOMI4(vp), vp, NULL, OH_GETATTR, &recov_state, 0);
8147c478bd9Sstevel@tonic-gate 
8157c478bd9Sstevel@tonic-gate 	if (!e.error) {
8167c478bd9Sstevel@tonic-gate 		if (e.stat == NFS4_OK) {
8177c478bd9Sstevel@tonic-gate 			nfs4_attr_cache(vp, garp, t, cr, FALSE, NULL);
8187c478bd9Sstevel@tonic-gate 		} else {
8197c478bd9Sstevel@tonic-gate 			e.error = geterrno4(e.stat);
8207c478bd9Sstevel@tonic-gate 
8217c478bd9Sstevel@tonic-gate 			nfs4_purge_stale_fh(e.error, vp, cr);
8227c478bd9Sstevel@tonic-gate 		}
8237c478bd9Sstevel@tonic-gate 	}
8247c478bd9Sstevel@tonic-gate 
8257c478bd9Sstevel@tonic-gate 	/*
8267c478bd9Sstevel@tonic-gate 	 * If getattr a node that is a stub for a crossed
8277c478bd9Sstevel@tonic-gate 	 * mount point, keep the original secinfo flavor for
8287c478bd9Sstevel@tonic-gate 	 * the current file system, not the crossed one.
8297c478bd9Sstevel@tonic-gate 	 */
8307c478bd9Sstevel@tonic-gate 	(void) check_mnt_secinfo(mi->mi_curr_serv, vp);
8317c478bd9Sstevel@tonic-gate 
8327c478bd9Sstevel@tonic-gate 	return (e.error);
8337c478bd9Sstevel@tonic-gate }
8347c478bd9Sstevel@tonic-gate 
8357c478bd9Sstevel@tonic-gate /*
8367c478bd9Sstevel@tonic-gate  * Generate a compound to get attributes over-the-wire.
8377c478bd9Sstevel@tonic-gate  */
8387c478bd9Sstevel@tonic-gate void
8397c478bd9Sstevel@tonic-gate nfs4_getattr_otw_norecovery(vnode_t *vp, nfs4_ga_res_t *garp,
8407c478bd9Sstevel@tonic-gate 		nfs4_error_t *ep, cred_t *cr, int get_acl)
8417c478bd9Sstevel@tonic-gate {
8427c478bd9Sstevel@tonic-gate 	COMPOUND4args_clnt args;
8437c478bd9Sstevel@tonic-gate 	COMPOUND4res_clnt res;
8447c478bd9Sstevel@tonic-gate 	int doqueue;
8457c478bd9Sstevel@tonic-gate 	rnode4_t *rp = VTOR4(vp);
8467c478bd9Sstevel@tonic-gate 	nfs_argop4 argop[2];
8477c478bd9Sstevel@tonic-gate 
8487c478bd9Sstevel@tonic-gate 	args.ctag = TAG_GETATTR;
8497c478bd9Sstevel@tonic-gate 
8507c478bd9Sstevel@tonic-gate 	args.array_len = 2;
8517c478bd9Sstevel@tonic-gate 	args.array = argop;
8527c478bd9Sstevel@tonic-gate 
8537c478bd9Sstevel@tonic-gate 	/* putfh */
8547c478bd9Sstevel@tonic-gate 	argop[0].argop = OP_CPUTFH;
8557c478bd9Sstevel@tonic-gate 	argop[0].nfs_argop4_u.opcputfh.sfh = rp->r_fh;
8567c478bd9Sstevel@tonic-gate 
8577c478bd9Sstevel@tonic-gate 	/* getattr */
8587c478bd9Sstevel@tonic-gate 	/*
8597c478bd9Sstevel@tonic-gate 	 * Unlike nfs version 2 and 3, where getattr returns all the
8607c478bd9Sstevel@tonic-gate 	 * attributes, nfs version 4 returns only the ones explicitely
8617c478bd9Sstevel@tonic-gate 	 * asked for. This creates problems, as some system functions
8627c478bd9Sstevel@tonic-gate 	 * (e.g. cache check) require certain attributes and if the
8637c478bd9Sstevel@tonic-gate 	 * cached node lacks some attributes such as uid/gid, it can
8647c478bd9Sstevel@tonic-gate 	 * affect system utilities (e.g. "ls") that rely on the information
8657c478bd9Sstevel@tonic-gate 	 * to be there. This can lead to anything from system crashes to
8667c478bd9Sstevel@tonic-gate 	 * corrupted information processed by user apps.
8677c478bd9Sstevel@tonic-gate 	 * So to ensure that all bases are covered, request at least
8687c478bd9Sstevel@tonic-gate 	 * the AT_ALL attribute mask.
8697c478bd9Sstevel@tonic-gate 	 */
8707c478bd9Sstevel@tonic-gate 	argop[1].argop = OP_GETATTR;
8717c478bd9Sstevel@tonic-gate 	argop[1].nfs_argop4_u.opgetattr.attr_request = NFS4_VATTR_MASK;
8727c478bd9Sstevel@tonic-gate 	if (get_acl)
8737c478bd9Sstevel@tonic-gate 		argop[1].nfs_argop4_u.opgetattr.attr_request |= FATTR4_ACL_MASK;
8747c478bd9Sstevel@tonic-gate 	argop[1].nfs_argop4_u.opgetattr.mi = VTOMI4(vp);
8757c478bd9Sstevel@tonic-gate 
8767c478bd9Sstevel@tonic-gate 	doqueue = 1;
8777c478bd9Sstevel@tonic-gate 
8787c478bd9Sstevel@tonic-gate 	rfs4call(VTOMI4(vp), &args, &res, cr, &doqueue, 0, ep);
8797c478bd9Sstevel@tonic-gate 
8807c478bd9Sstevel@tonic-gate 	if (ep->error)
8817c478bd9Sstevel@tonic-gate 		return;
8827c478bd9Sstevel@tonic-gate 
8837c478bd9Sstevel@tonic-gate 	if (res.status != NFS4_OK) {
8847c478bd9Sstevel@tonic-gate 		(void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res);
8857c478bd9Sstevel@tonic-gate 		return;
8867c478bd9Sstevel@tonic-gate 	}
8877c478bd9Sstevel@tonic-gate 
8887c478bd9Sstevel@tonic-gate 	*garp = res.array[1].nfs_resop4_u.opgetattr.ga_res;
8897c478bd9Sstevel@tonic-gate 
8907c478bd9Sstevel@tonic-gate 	(void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res);
8917c478bd9Sstevel@tonic-gate }
8927c478bd9Sstevel@tonic-gate 
8937c478bd9Sstevel@tonic-gate /*
8947c478bd9Sstevel@tonic-gate  * Return either cached or remote attributes. If get remote attr
8957c478bd9Sstevel@tonic-gate  * use them to check and invalidate caches, then cache the new attributes.
8967c478bd9Sstevel@tonic-gate  */
8977c478bd9Sstevel@tonic-gate int
8987c478bd9Sstevel@tonic-gate nfs4getattr(vnode_t *vp, vattr_t *vap, cred_t *cr)
8997c478bd9Sstevel@tonic-gate {
9007c478bd9Sstevel@tonic-gate 	int error;
9017c478bd9Sstevel@tonic-gate 	rnode4_t *rp;
9027c478bd9Sstevel@tonic-gate 	nfs4_ga_res_t gar;
9037c478bd9Sstevel@tonic-gate 
9047c478bd9Sstevel@tonic-gate 	ASSERT(nfs4_consistent_type(vp));
9057c478bd9Sstevel@tonic-gate 
9067c478bd9Sstevel@tonic-gate 	/*
9077c478bd9Sstevel@tonic-gate 	 * If we've got cached attributes, we're done, otherwise go
9087c478bd9Sstevel@tonic-gate 	 * to the server to get attributes, which will update the cache
9097c478bd9Sstevel@tonic-gate 	 * in the process.
9107c478bd9Sstevel@tonic-gate 	 */
9117c478bd9Sstevel@tonic-gate 	rp = VTOR4(vp);
9127c478bd9Sstevel@tonic-gate 	mutex_enter(&rp->r_statelock);
9137c478bd9Sstevel@tonic-gate 	mutex_enter(&rp->r_statev4_lock);
9147c478bd9Sstevel@tonic-gate 	if (ATTRCACHE4_VALID(vp)) {
9157c478bd9Sstevel@tonic-gate 		mutex_exit(&rp->r_statev4_lock);
9167c478bd9Sstevel@tonic-gate 		/*
9177c478bd9Sstevel@tonic-gate 		 * Cached attributes are valid
9187c478bd9Sstevel@tonic-gate 		 * Return the client's view of file size
9197c478bd9Sstevel@tonic-gate 		 */
9207c478bd9Sstevel@tonic-gate 		*vap = rp->r_attr;
9217c478bd9Sstevel@tonic-gate 		vap->va_size = rp->r_size;
9227c478bd9Sstevel@tonic-gate 		mutex_exit(&rp->r_statelock);
9237c478bd9Sstevel@tonic-gate 
9247c478bd9Sstevel@tonic-gate 		ASSERT(nfs4_consistent_type(vp));
9257c478bd9Sstevel@tonic-gate 
9267c478bd9Sstevel@tonic-gate 		return (0);
9277c478bd9Sstevel@tonic-gate 	}
9287c478bd9Sstevel@tonic-gate 	mutex_exit(&rp->r_statev4_lock);
9297c478bd9Sstevel@tonic-gate 	mutex_exit(&rp->r_statelock);
9307c478bd9Sstevel@tonic-gate 
9317c478bd9Sstevel@tonic-gate 	error = nfs4_getattr_otw(vp, &gar, cr, 0);
9327c478bd9Sstevel@tonic-gate 	if (!error)
9337c478bd9Sstevel@tonic-gate 		*vap = gar.n4g_va;
9347c478bd9Sstevel@tonic-gate 
9357c478bd9Sstevel@tonic-gate 	/* Return the client's view of file size */
9367c478bd9Sstevel@tonic-gate 	mutex_enter(&rp->r_statelock);
9377c478bd9Sstevel@tonic-gate 	vap->va_size = rp->r_size;
9387c478bd9Sstevel@tonic-gate 	mutex_exit(&rp->r_statelock);
9397c478bd9Sstevel@tonic-gate 
9407c478bd9Sstevel@tonic-gate 	ASSERT(nfs4_consistent_type(vp));
9417c478bd9Sstevel@tonic-gate 
9427c478bd9Sstevel@tonic-gate 	return (error);
9437c478bd9Sstevel@tonic-gate }
9447c478bd9Sstevel@tonic-gate 
9457c478bd9Sstevel@tonic-gate int
9467c478bd9Sstevel@tonic-gate nfs4_attr_otw(vnode_t *vp, nfs4_tag_type_t tag_type,
9477c478bd9Sstevel@tonic-gate 		nfs4_ga_res_t *garp, bitmap4 reqbitmap, cred_t *cr)
9487c478bd9Sstevel@tonic-gate {
9497c478bd9Sstevel@tonic-gate 	COMPOUND4args_clnt args;
9507c478bd9Sstevel@tonic-gate 	COMPOUND4res_clnt res;
9517c478bd9Sstevel@tonic-gate 	int doqueue;
9527c478bd9Sstevel@tonic-gate 	nfs_argop4 argop[2];
9537c478bd9Sstevel@tonic-gate 	mntinfo4_t *mi = VTOMI4(vp);
9547c478bd9Sstevel@tonic-gate 	bool_t needrecov = FALSE;
9557c478bd9Sstevel@tonic-gate 	nfs4_recov_state_t recov_state;
9567c478bd9Sstevel@tonic-gate 	nfs4_error_t e = { 0, NFS4_OK, RPC_SUCCESS };
9577c478bd9Sstevel@tonic-gate 	nfs4_ga_ext_res_t *gerp;
9587c478bd9Sstevel@tonic-gate 
9597c478bd9Sstevel@tonic-gate 	recov_state.rs_flags = 0;
9607c478bd9Sstevel@tonic-gate 	recov_state.rs_num_retry_despite_err = 0;
9617c478bd9Sstevel@tonic-gate 
9627c478bd9Sstevel@tonic-gate recov_retry:
9637c478bd9Sstevel@tonic-gate 	args.ctag = tag_type;
9647c478bd9Sstevel@tonic-gate 
9657c478bd9Sstevel@tonic-gate 	args.array_len = 2;
9667c478bd9Sstevel@tonic-gate 	args.array = argop;
9677c478bd9Sstevel@tonic-gate 
9687c478bd9Sstevel@tonic-gate 	e.error = nfs4_start_fop(mi, vp, NULL, OH_GETATTR, &recov_state, NULL);
9697c478bd9Sstevel@tonic-gate 	if (e.error)
9707c478bd9Sstevel@tonic-gate 		return (e.error);
9717c478bd9Sstevel@tonic-gate 
9727c478bd9Sstevel@tonic-gate 	/* putfh */
9737c478bd9Sstevel@tonic-gate 	argop[0].argop = OP_CPUTFH;
9747c478bd9Sstevel@tonic-gate 	argop[0].nfs_argop4_u.opcputfh.sfh = VTOR4(vp)->r_fh;
9757c478bd9Sstevel@tonic-gate 
9767c478bd9Sstevel@tonic-gate 	/* getattr */
9777c478bd9Sstevel@tonic-gate 	argop[1].argop = OP_GETATTR;
9787c478bd9Sstevel@tonic-gate 	argop[1].nfs_argop4_u.opgetattr.attr_request = reqbitmap;
9797c478bd9Sstevel@tonic-gate 	argop[1].nfs_argop4_u.opgetattr.mi = mi;
9807c478bd9Sstevel@tonic-gate 
9817c478bd9Sstevel@tonic-gate 	doqueue = 1;
9827c478bd9Sstevel@tonic-gate 
9837c478bd9Sstevel@tonic-gate 	NFS4_DEBUG(nfs4_client_call_debug, (CE_NOTE,
9847c478bd9Sstevel@tonic-gate 	    "nfs4_attr_otw: %s call, rp %s", needrecov ? "recov" : "first",
9857c478bd9Sstevel@tonic-gate 	    rnode4info(VTOR4(vp))));
9867c478bd9Sstevel@tonic-gate 
9877c478bd9Sstevel@tonic-gate 	rfs4call(mi, &args, &res, cr, &doqueue, 0, &e);
9887c478bd9Sstevel@tonic-gate 
9897c478bd9Sstevel@tonic-gate 	needrecov = nfs4_needs_recovery(&e, FALSE, vp->v_vfsp);
9907c478bd9Sstevel@tonic-gate 	if (!needrecov && e.error) {
9917c478bd9Sstevel@tonic-gate 		nfs4_end_fop(VTOMI4(vp), vp, NULL, OH_GETATTR, &recov_state,
9927c478bd9Sstevel@tonic-gate 			    needrecov);
9937c478bd9Sstevel@tonic-gate 		return (e.error);
9947c478bd9Sstevel@tonic-gate 	}
9957c478bd9Sstevel@tonic-gate 
9967c478bd9Sstevel@tonic-gate 	if (needrecov) {
9977c478bd9Sstevel@tonic-gate 		bool_t abort;
9987c478bd9Sstevel@tonic-gate 
9997c478bd9Sstevel@tonic-gate 		NFS4_DEBUG(nfs4_client_recov_debug, (CE_NOTE,
10007c478bd9Sstevel@tonic-gate 		    "nfs4_attr_otw: initiating recovery\n"));
10017c478bd9Sstevel@tonic-gate 
10027c478bd9Sstevel@tonic-gate 		abort = nfs4_start_recovery(&e, VTOMI4(vp), vp, NULL, NULL,
10037c478bd9Sstevel@tonic-gate 			    NULL, OP_GETATTR, NULL);
10047c478bd9Sstevel@tonic-gate 		nfs4_end_fop(VTOMI4(vp), vp, NULL, OH_GETATTR, &recov_state,
10057c478bd9Sstevel@tonic-gate 				needrecov);
10067c478bd9Sstevel@tonic-gate 		if (!e.error) {
10077c478bd9Sstevel@tonic-gate 			(void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res);
10087c478bd9Sstevel@tonic-gate 			e.error = geterrno4(res.status);
10097c478bd9Sstevel@tonic-gate 		}
10107c478bd9Sstevel@tonic-gate 		if (abort == FALSE)
10117c478bd9Sstevel@tonic-gate 			goto recov_retry;
10127c478bd9Sstevel@tonic-gate 		return (e.error);
10137c478bd9Sstevel@tonic-gate 	}
10147c478bd9Sstevel@tonic-gate 
10157c478bd9Sstevel@tonic-gate 	if (res.status) {
10167c478bd9Sstevel@tonic-gate 		e.error = geterrno4(res.status);
10177c478bd9Sstevel@tonic-gate 	} else {
10187c478bd9Sstevel@tonic-gate 		gerp = garp->n4g_ext_res;
10197c478bd9Sstevel@tonic-gate 		bcopy(&res.array[1].nfs_resop4_u.opgetattr.ga_res,
10207c478bd9Sstevel@tonic-gate 			garp, sizeof (nfs4_ga_res_t));
10217c478bd9Sstevel@tonic-gate 		garp->n4g_ext_res = gerp;
10227c478bd9Sstevel@tonic-gate 		if (garp->n4g_ext_res &&
10237c478bd9Sstevel@tonic-gate 		    res.array[1].nfs_resop4_u.opgetattr.ga_res.n4g_ext_res)
10247c478bd9Sstevel@tonic-gate 			bcopy(res.array[1].nfs_resop4_u.opgetattr.
10257c478bd9Sstevel@tonic-gate 				ga_res.n4g_ext_res,
10267c478bd9Sstevel@tonic-gate 				garp->n4g_ext_res, sizeof (nfs4_ga_ext_res_t));
10277c478bd9Sstevel@tonic-gate 	}
10287c478bd9Sstevel@tonic-gate 	(void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res);
10297c478bd9Sstevel@tonic-gate 	nfs4_end_fop(VTOMI4(vp), vp, NULL, OH_GETATTR, &recov_state,
10307c478bd9Sstevel@tonic-gate 		    needrecov);
10317c478bd9Sstevel@tonic-gate 	return (e.error);
10327c478bd9Sstevel@tonic-gate }
10337c478bd9Sstevel@tonic-gate 
10347c478bd9Sstevel@tonic-gate /*
10357c478bd9Sstevel@tonic-gate  * Asynchronous I/O parameters.  nfs_async_threads is the high-water mark
10367c478bd9Sstevel@tonic-gate  * for the demand-based allocation of async threads per-mount.  The
10377c478bd9Sstevel@tonic-gate  * nfs_async_timeout is the amount of time a thread will live after it
10387c478bd9Sstevel@tonic-gate  * becomes idle, unless new I/O requests are received before the thread
10397c478bd9Sstevel@tonic-gate  * dies.  See nfs4_async_putpage and nfs4_async_start.
10407c478bd9Sstevel@tonic-gate  */
10417c478bd9Sstevel@tonic-gate 
10427c478bd9Sstevel@tonic-gate static void	nfs4_async_start(struct vfs *);
10437c478bd9Sstevel@tonic-gate 
10447c478bd9Sstevel@tonic-gate static void
10457c478bd9Sstevel@tonic-gate free_async_args4(struct nfs4_async_reqs *args)
10467c478bd9Sstevel@tonic-gate {
10477c478bd9Sstevel@tonic-gate 	rnode4_t *rp;
10487c478bd9Sstevel@tonic-gate 
10497c478bd9Sstevel@tonic-gate 	if (args->a_io != NFS4_INACTIVE) {
10507c478bd9Sstevel@tonic-gate 		rp = VTOR4(args->a_vp);
10517c478bd9Sstevel@tonic-gate 		mutex_enter(&rp->r_statelock);
10527c478bd9Sstevel@tonic-gate 		rp->r_count--;
10537c478bd9Sstevel@tonic-gate 		if (args->a_io == NFS4_PUTAPAGE ||
10547c478bd9Sstevel@tonic-gate 		    args->a_io == NFS4_PAGEIO)
10557c478bd9Sstevel@tonic-gate 			rp->r_awcount--;
10567c478bd9Sstevel@tonic-gate 		cv_broadcast(&rp->r_cv);
10577c478bd9Sstevel@tonic-gate 		mutex_exit(&rp->r_statelock);
10587c478bd9Sstevel@tonic-gate 		VN_RELE(args->a_vp);
10597c478bd9Sstevel@tonic-gate 	}
10607c478bd9Sstevel@tonic-gate 	crfree(args->a_cred);
10617c478bd9Sstevel@tonic-gate 	kmem_free(args, sizeof (*args));
10627c478bd9Sstevel@tonic-gate }
10637c478bd9Sstevel@tonic-gate 
10647c478bd9Sstevel@tonic-gate /*
10657c478bd9Sstevel@tonic-gate  * Cross-zone thread creation and NFS access is disallowed, yet fsflush() and
10667c478bd9Sstevel@tonic-gate  * pageout(), running in the global zone, have legitimate reasons to do
10677c478bd9Sstevel@tonic-gate  * VOP_PUTPAGE(B_ASYNC) on other zones' NFS mounts.  We avoid the problem by
10687c478bd9Sstevel@tonic-gate  * use of a a per-mount "asynchronous requests manager thread" which is
10697c478bd9Sstevel@tonic-gate  * signaled by the various asynchronous work routines when there is
10707c478bd9Sstevel@tonic-gate  * asynchronous work to be done.  It is responsible for creating new
10717c478bd9Sstevel@tonic-gate  * worker threads if necessary, and notifying existing worker threads
10727c478bd9Sstevel@tonic-gate  * that there is work to be done.
10737c478bd9Sstevel@tonic-gate  *
10747c478bd9Sstevel@tonic-gate  * In other words, it will "take the specifications from the customers and
10757c478bd9Sstevel@tonic-gate  * give them to the engineers."
10767c478bd9Sstevel@tonic-gate  *
10777c478bd9Sstevel@tonic-gate  * Worker threads die off of their own accord if they are no longer
10787c478bd9Sstevel@tonic-gate  * needed.
10797c478bd9Sstevel@tonic-gate  *
10807c478bd9Sstevel@tonic-gate  * This thread is killed when the zone is going away or the filesystem
10817c478bd9Sstevel@tonic-gate  * is being unmounted.
10827c478bd9Sstevel@tonic-gate  */
10837c478bd9Sstevel@tonic-gate void
10847c478bd9Sstevel@tonic-gate nfs4_async_manager(vfs_t *vfsp)
10857c478bd9Sstevel@tonic-gate {
10867c478bd9Sstevel@tonic-gate 	callb_cpr_t cprinfo;
10877c478bd9Sstevel@tonic-gate 	mntinfo4_t *mi;
10887c478bd9Sstevel@tonic-gate 	uint_t max_threads;
10897c478bd9Sstevel@tonic-gate 
10907c478bd9Sstevel@tonic-gate 	mi = VFTOMI4(vfsp);
10917c478bd9Sstevel@tonic-gate 
10927c478bd9Sstevel@tonic-gate 	CALLB_CPR_INIT(&cprinfo, &mi->mi_async_lock, callb_generic_cpr,
10937c478bd9Sstevel@tonic-gate 		    "nfs4_async_manager");
10947c478bd9Sstevel@tonic-gate 
10957c478bd9Sstevel@tonic-gate 	mutex_enter(&mi->mi_async_lock);
10967c478bd9Sstevel@tonic-gate 	/*
10977c478bd9Sstevel@tonic-gate 	 * We want to stash the max number of threads that this mount was
10987c478bd9Sstevel@tonic-gate 	 * allowed so we can use it later when the variable is set to zero as
10997c478bd9Sstevel@tonic-gate 	 * part of the zone/mount going away.
11007c478bd9Sstevel@tonic-gate 	 *
11017c478bd9Sstevel@tonic-gate 	 * We want to be able to create at least one thread to handle
11027c478bd9Sstevel@tonic-gate 	 * asyncrhonous inactive calls.
11037c478bd9Sstevel@tonic-gate 	 */
11047c478bd9Sstevel@tonic-gate 	max_threads = MAX(mi->mi_max_threads, 1);
11057c478bd9Sstevel@tonic-gate 	mutex_enter(&mi->mi_lock);
11067c478bd9Sstevel@tonic-gate 	/*
11077c478bd9Sstevel@tonic-gate 	 * We don't want to wait for mi_max_threads to go to zero, since that
11087c478bd9Sstevel@tonic-gate 	 * happens as part of a failed unmount, but this thread should only
11097c478bd9Sstevel@tonic-gate 	 * exit when the mount is really going away.
11107c478bd9Sstevel@tonic-gate 	 *
11117c478bd9Sstevel@tonic-gate 	 * Once MI4_ASYNC_MGR_STOP is set, no more async operations will be
11127c478bd9Sstevel@tonic-gate 	 * attempted: the various _async_*() functions know to do things
11137c478bd9Sstevel@tonic-gate 	 * inline if mi_max_threads == 0.  Henceforth we just drain out the
11147c478bd9Sstevel@tonic-gate 	 * outstanding requests.
11157c478bd9Sstevel@tonic-gate 	 *
11167c478bd9Sstevel@tonic-gate 	 * Note that we still create zthreads even if we notice the zone is
11177c478bd9Sstevel@tonic-gate 	 * shutting down (MI4_ASYNC_MGR_STOP is set); this may cause the zone
11187c478bd9Sstevel@tonic-gate 	 * shutdown sequence to take slightly longer in some cases, but
11197c478bd9Sstevel@tonic-gate 	 * doesn't violate the protocol, as all threads will exit as soon as
11207c478bd9Sstevel@tonic-gate 	 * they're done processing the remaining requests.
11217c478bd9Sstevel@tonic-gate 	 */
11227c478bd9Sstevel@tonic-gate 	while (!(mi->mi_flags & MI4_ASYNC_MGR_STOP) ||
11237c478bd9Sstevel@tonic-gate 	    mi->mi_async_req_count > 0) {
11247c478bd9Sstevel@tonic-gate 		mutex_exit(&mi->mi_lock);
11257c478bd9Sstevel@tonic-gate 		CALLB_CPR_SAFE_BEGIN(&cprinfo);
11267c478bd9Sstevel@tonic-gate 		cv_wait(&mi->mi_async_reqs_cv, &mi->mi_async_lock);
11277c478bd9Sstevel@tonic-gate 		CALLB_CPR_SAFE_END(&cprinfo, &mi->mi_async_lock);
11287c478bd9Sstevel@tonic-gate 		while (mi->mi_async_req_count > 0) {
11297c478bd9Sstevel@tonic-gate 			/*
11307c478bd9Sstevel@tonic-gate 			 * Paranoia: If the mount started out having
11317c478bd9Sstevel@tonic-gate 			 * (mi->mi_max_threads == 0), and the value was
11327c478bd9Sstevel@tonic-gate 			 * later changed (via a debugger or somesuch),
11337c478bd9Sstevel@tonic-gate 			 * we could be confused since we will think we
11347c478bd9Sstevel@tonic-gate 			 * can't create any threads, and the calling
11357c478bd9Sstevel@tonic-gate 			 * code (which looks at the current value of
11367c478bd9Sstevel@tonic-gate 			 * mi->mi_max_threads, now non-zero) thinks we
11377c478bd9Sstevel@tonic-gate 			 * can.
11387c478bd9Sstevel@tonic-gate 			 *
11397c478bd9Sstevel@tonic-gate 			 * So, because we're paranoid, we create threads
11407c478bd9Sstevel@tonic-gate 			 * up to the maximum of the original and the
11417c478bd9Sstevel@tonic-gate 			 * current value. This means that future
11427c478bd9Sstevel@tonic-gate 			 * (debugger-induced) alterations of
11437c478bd9Sstevel@tonic-gate 			 * mi->mi_max_threads are ignored for our
11447c478bd9Sstevel@tonic-gate 			 * purposes, but who told them they could change
11457c478bd9Sstevel@tonic-gate 			 * random values on a live kernel anyhow?
11467c478bd9Sstevel@tonic-gate 			 */
11477c478bd9Sstevel@tonic-gate 			if (mi->mi_threads <
11487c478bd9Sstevel@tonic-gate 			    MAX(mi->mi_max_threads, max_threads)) {
11497c478bd9Sstevel@tonic-gate 				mi->mi_threads++;
11507c478bd9Sstevel@tonic-gate 				mutex_exit(&mi->mi_async_lock);
115150a83466Sjwahlig 				MI4_HOLD(mi);
11527c478bd9Sstevel@tonic-gate 				VFS_HOLD(vfsp);	/* hold for new thread */
11537c478bd9Sstevel@tonic-gate 				(void) zthread_create(NULL, 0, nfs4_async_start,
11547c478bd9Sstevel@tonic-gate 				    vfsp, 0, minclsyspri);
11557c478bd9Sstevel@tonic-gate 				mutex_enter(&mi->mi_async_lock);
11567c478bd9Sstevel@tonic-gate 			}
11577c478bd9Sstevel@tonic-gate 			cv_signal(&mi->mi_async_work_cv);
11587c478bd9Sstevel@tonic-gate 			ASSERT(mi->mi_async_req_count != 0);
11597c478bd9Sstevel@tonic-gate 			mi->mi_async_req_count--;
11607c478bd9Sstevel@tonic-gate 		}
11617c478bd9Sstevel@tonic-gate 		mutex_enter(&mi->mi_lock);
11627c478bd9Sstevel@tonic-gate 	}
11637c478bd9Sstevel@tonic-gate 	mutex_exit(&mi->mi_lock);
11647c478bd9Sstevel@tonic-gate 
11657c478bd9Sstevel@tonic-gate 	NFS4_DEBUG(nfs4_client_zone_debug, (CE_NOTE,
11667c478bd9Sstevel@tonic-gate 	    "nfs4_async_manager exiting for vfs %p\n", (void *)mi->mi_vfsp));
11677c478bd9Sstevel@tonic-gate 	/*
11687c478bd9Sstevel@tonic-gate 	 * Let everyone know we're done.
11697c478bd9Sstevel@tonic-gate 	 */
11707c478bd9Sstevel@tonic-gate 	mi->mi_manager_thread = NULL;
11717c478bd9Sstevel@tonic-gate 	/*
11727c478bd9Sstevel@tonic-gate 	 * Wake up the inactive thread.
11737c478bd9Sstevel@tonic-gate 	 */
11747c478bd9Sstevel@tonic-gate 	cv_broadcast(&mi->mi_inact_req_cv);
11757c478bd9Sstevel@tonic-gate 	/*
11767c478bd9Sstevel@tonic-gate 	 * Wake up anyone sitting in nfs4_async_manager_stop()
11777c478bd9Sstevel@tonic-gate 	 */
11787c478bd9Sstevel@tonic-gate 	cv_broadcast(&mi->mi_async_cv);
11797c478bd9Sstevel@tonic-gate 	/*
11807c478bd9Sstevel@tonic-gate 	 * There is no explicit call to mutex_exit(&mi->mi_async_lock)
11817c478bd9Sstevel@tonic-gate 	 * since CALLB_CPR_EXIT is actually responsible for releasing
11827c478bd9Sstevel@tonic-gate 	 * 'mi_async_lock'.
11837c478bd9Sstevel@tonic-gate 	 */
11847c478bd9Sstevel@tonic-gate 	CALLB_CPR_EXIT(&cprinfo);
11857c478bd9Sstevel@tonic-gate 	VFS_RELE(vfsp);	/* release thread's hold */
118650a83466Sjwahlig 	MI4_RELE(mi);
11877c478bd9Sstevel@tonic-gate 	zthread_exit();
11887c478bd9Sstevel@tonic-gate }
11897c478bd9Sstevel@tonic-gate 
11907c478bd9Sstevel@tonic-gate /*
11917c478bd9Sstevel@tonic-gate  * Signal (and wait for) the async manager thread to clean up and go away.
11927c478bd9Sstevel@tonic-gate  */
11937c478bd9Sstevel@tonic-gate void
11947c478bd9Sstevel@tonic-gate nfs4_async_manager_stop(vfs_t *vfsp)
11957c478bd9Sstevel@tonic-gate {
11967c478bd9Sstevel@tonic-gate 	mntinfo4_t *mi = VFTOMI4(vfsp);
11977c478bd9Sstevel@tonic-gate 
11987c478bd9Sstevel@tonic-gate 	mutex_enter(&mi->mi_async_lock);
11997c478bd9Sstevel@tonic-gate 	mutex_enter(&mi->mi_lock);
12007c478bd9Sstevel@tonic-gate 	mi->mi_flags |= MI4_ASYNC_MGR_STOP;
12017c478bd9Sstevel@tonic-gate 	mutex_exit(&mi->mi_lock);
12027c478bd9Sstevel@tonic-gate 	cv_broadcast(&mi->mi_async_reqs_cv);
12037c478bd9Sstevel@tonic-gate 	/*
12047c478bd9Sstevel@tonic-gate 	 * Wait for the async manager thread to die.
12057c478bd9Sstevel@tonic-gate 	 */
12067c478bd9Sstevel@tonic-gate 	while (mi->mi_manager_thread != NULL)
12077c478bd9Sstevel@tonic-gate 		cv_wait(&mi->mi_async_cv, &mi->mi_async_lock);
12087c478bd9Sstevel@tonic-gate 	mutex_exit(&mi->mi_async_lock);
12097c478bd9Sstevel@tonic-gate }
12107c478bd9Sstevel@tonic-gate 
12117c478bd9Sstevel@tonic-gate int
12127c478bd9Sstevel@tonic-gate nfs4_async_readahead(vnode_t *vp, u_offset_t blkoff, caddr_t addr,
12137c478bd9Sstevel@tonic-gate 	struct seg *seg, cred_t *cr, void (*readahead)(vnode_t *,
12147c478bd9Sstevel@tonic-gate 	u_offset_t, caddr_t, struct seg *, cred_t *))
12157c478bd9Sstevel@tonic-gate {
12167c478bd9Sstevel@tonic-gate 	rnode4_t *rp;
12177c478bd9Sstevel@tonic-gate 	mntinfo4_t *mi;
12187c478bd9Sstevel@tonic-gate 	struct nfs4_async_reqs *args;
12197c478bd9Sstevel@tonic-gate 
12207c478bd9Sstevel@tonic-gate 	rp = VTOR4(vp);
12217c478bd9Sstevel@tonic-gate 	ASSERT(rp->r_freef == NULL);
12227c478bd9Sstevel@tonic-gate 
12237c478bd9Sstevel@tonic-gate 	mi = VTOMI4(vp);
12247c478bd9Sstevel@tonic-gate 
12257c478bd9Sstevel@tonic-gate 	/*
12267c478bd9Sstevel@tonic-gate 	 * If addr falls in a different segment, don't bother doing readahead.
12277c478bd9Sstevel@tonic-gate 	 */
12287c478bd9Sstevel@tonic-gate 	if (addr >= seg->s_base + seg->s_size)
12297c478bd9Sstevel@tonic-gate 		return (-1);
12307c478bd9Sstevel@tonic-gate 
12317c478bd9Sstevel@tonic-gate 	/*
12327c478bd9Sstevel@tonic-gate 	 * If we can't allocate a request structure, punt on the readahead.
12337c478bd9Sstevel@tonic-gate 	 */
12347c478bd9Sstevel@tonic-gate 	if ((args = kmem_alloc(sizeof (*args), KM_NOSLEEP)) == NULL)
12357c478bd9Sstevel@tonic-gate 		return (-1);
12367c478bd9Sstevel@tonic-gate 
12377c478bd9Sstevel@tonic-gate 	/*
12387c478bd9Sstevel@tonic-gate 	 * If a lock operation is pending, don't initiate any new
12397c478bd9Sstevel@tonic-gate 	 * readaheads.  Otherwise, bump r_count to indicate the new
12407c478bd9Sstevel@tonic-gate 	 * asynchronous I/O.
12417c478bd9Sstevel@tonic-gate 	 */
12427c478bd9Sstevel@tonic-gate 	if (!nfs_rw_tryenter(&rp->r_lkserlock, RW_READER)) {
12437c478bd9Sstevel@tonic-gate 		kmem_free(args, sizeof (*args));
12447c478bd9Sstevel@tonic-gate 		return (-1);
12457c478bd9Sstevel@tonic-gate 	}
12467c478bd9Sstevel@tonic-gate 	mutex_enter(&rp->r_statelock);
12477c478bd9Sstevel@tonic-gate 	rp->r_count++;
12487c478bd9Sstevel@tonic-gate 	mutex_exit(&rp->r_statelock);
12497c478bd9Sstevel@tonic-gate 	nfs_rw_exit(&rp->r_lkserlock);
12507c478bd9Sstevel@tonic-gate 
12517c478bd9Sstevel@tonic-gate 	args->a_next = NULL;
12527c478bd9Sstevel@tonic-gate #ifdef DEBUG
12537c478bd9Sstevel@tonic-gate 	args->a_queuer = curthread;
12547c478bd9Sstevel@tonic-gate #endif
12557c478bd9Sstevel@tonic-gate 	VN_HOLD(vp);
12567c478bd9Sstevel@tonic-gate 	args->a_vp = vp;
12577c478bd9Sstevel@tonic-gate 	ASSERT(cr != NULL);
12587c478bd9Sstevel@tonic-gate 	crhold(cr);
12597c478bd9Sstevel@tonic-gate 	args->a_cred = cr;
12607c478bd9Sstevel@tonic-gate 	args->a_io = NFS4_READ_AHEAD;
12617c478bd9Sstevel@tonic-gate 	args->a_nfs4_readahead = readahead;
12627c478bd9Sstevel@tonic-gate 	args->a_nfs4_blkoff = blkoff;
12637c478bd9Sstevel@tonic-gate 	args->a_nfs4_seg = seg;
12647c478bd9Sstevel@tonic-gate 	args->a_nfs4_addr = addr;
12657c478bd9Sstevel@tonic-gate 
12667c478bd9Sstevel@tonic-gate 	mutex_enter(&mi->mi_async_lock);
12677c478bd9Sstevel@tonic-gate 
12687c478bd9Sstevel@tonic-gate 	/*
12697c478bd9Sstevel@tonic-gate 	 * If asyncio has been disabled, don't bother readahead.
12707c478bd9Sstevel@tonic-gate 	 */
12717c478bd9Sstevel@tonic-gate 	if (mi->mi_max_threads == 0) {
12727c478bd9Sstevel@tonic-gate 		mutex_exit(&mi->mi_async_lock);
12737c478bd9Sstevel@tonic-gate 		goto noasync;
12747c478bd9Sstevel@tonic-gate 	}
12757c478bd9Sstevel@tonic-gate 
12767c478bd9Sstevel@tonic-gate 	/*
12777c478bd9Sstevel@tonic-gate 	 * Link request structure into the async list and
12787c478bd9Sstevel@tonic-gate 	 * wakeup async thread to do the i/o.
12797c478bd9Sstevel@tonic-gate 	 */
12807c478bd9Sstevel@tonic-gate 	if (mi->mi_async_reqs[NFS4_READ_AHEAD] == NULL) {
12817c478bd9Sstevel@tonic-gate 		mi->mi_async_reqs[NFS4_READ_AHEAD] = args;
12827c478bd9Sstevel@tonic-gate 		mi->mi_async_tail[NFS4_READ_AHEAD] = args;
12837c478bd9Sstevel@tonic-gate 	} else {
12847c478bd9Sstevel@tonic-gate 		mi->mi_async_tail[NFS4_READ_AHEAD]->a_next = args;
12857c478bd9Sstevel@tonic-gate 		mi->mi_async_tail[NFS4_READ_AHEAD] = args;
12867c478bd9Sstevel@tonic-gate 	}
12877c478bd9Sstevel@tonic-gate 
12887c478bd9Sstevel@tonic-gate 	if (mi->mi_io_kstats) {
12897c478bd9Sstevel@tonic-gate 		mutex_enter(&mi->mi_lock);
12907c478bd9Sstevel@tonic-gate 		kstat_waitq_enter(KSTAT_IO_PTR(mi->mi_io_kstats));
12917c478bd9Sstevel@tonic-gate 		mutex_exit(&mi->mi_lock);
12927c478bd9Sstevel@tonic-gate 	}
12937c478bd9Sstevel@tonic-gate 
12947c478bd9Sstevel@tonic-gate 	mi->mi_async_req_count++;
12957c478bd9Sstevel@tonic-gate 	ASSERT(mi->mi_async_req_count != 0);
12967c478bd9Sstevel@tonic-gate 	cv_signal(&mi->mi_async_reqs_cv);
12977c478bd9Sstevel@tonic-gate 	mutex_exit(&mi->mi_async_lock);
12987c478bd9Sstevel@tonic-gate 	return (0);
12997c478bd9Sstevel@tonic-gate 
13007c478bd9Sstevel@tonic-gate noasync:
13017c478bd9Sstevel@tonic-gate 	mutex_enter(&rp->r_statelock);
13027c478bd9Sstevel@tonic-gate 	rp->r_count--;
13037c478bd9Sstevel@tonic-gate 	cv_broadcast(&rp->r_cv);
13047c478bd9Sstevel@tonic-gate 	mutex_exit(&rp->r_statelock);
13057c478bd9Sstevel@tonic-gate 	VN_RELE(vp);
13067c478bd9Sstevel@tonic-gate 	crfree(cr);
13077c478bd9Sstevel@tonic-gate 	kmem_free(args, sizeof (*args));
13087c478bd9Sstevel@tonic-gate 	return (-1);
13097c478bd9Sstevel@tonic-gate }
13107c478bd9Sstevel@tonic-gate 
13117c478bd9Sstevel@tonic-gate /*
13127c478bd9Sstevel@tonic-gate  * The async queues for each mounted file system are arranged as a
13137c478bd9Sstevel@tonic-gate  * set of queues, one for each async i/o type.  Requests are taken
13147c478bd9Sstevel@tonic-gate  * from the queues in a round-robin fashion.  A number of consecutive
13157c478bd9Sstevel@tonic-gate  * requests are taken from each queue before moving on to the next
13167c478bd9Sstevel@tonic-gate  * queue.  This functionality may allow the NFS Version 2 server to do
13177c478bd9Sstevel@tonic-gate  * write clustering, even if the client is mixing writes and reads
13187c478bd9Sstevel@tonic-gate  * because it will take multiple write requests from the queue
13197c478bd9Sstevel@tonic-gate  * before processing any of the other async i/o types.
13207c478bd9Sstevel@tonic-gate  *
13217c478bd9Sstevel@tonic-gate  * XXX The nfs4_async_start thread is unsafe in the light of the present
13227c478bd9Sstevel@tonic-gate  * model defined by cpr to suspend the system. Specifically over the
13237c478bd9Sstevel@tonic-gate  * wire calls are cpr-unsafe. The thread should be reevaluated in
13247c478bd9Sstevel@tonic-gate  * case of future updates to the cpr model.
13257c478bd9Sstevel@tonic-gate  */
13267c478bd9Sstevel@tonic-gate static void
13277c478bd9Sstevel@tonic-gate nfs4_async_start(struct vfs *vfsp)
13287c478bd9Sstevel@tonic-gate {
13297c478bd9Sstevel@tonic-gate 	struct nfs4_async_reqs *args;
13307c478bd9Sstevel@tonic-gate 	mntinfo4_t *mi = VFTOMI4(vfsp);
13317c478bd9Sstevel@tonic-gate 	clock_t time_left = 1;
13327c478bd9Sstevel@tonic-gate 	callb_cpr_t cprinfo;
13337c478bd9Sstevel@tonic-gate 	int i;
13347c478bd9Sstevel@tonic-gate 	extern int nfs_async_timeout;
13357c478bd9Sstevel@tonic-gate 
13367c478bd9Sstevel@tonic-gate 	/*
13377c478bd9Sstevel@tonic-gate 	 * Dynamic initialization of nfs_async_timeout to allow nfs to be
13387c478bd9Sstevel@tonic-gate 	 * built in an implementation independent manner.
13397c478bd9Sstevel@tonic-gate 	 */
13407c478bd9Sstevel@tonic-gate 	if (nfs_async_timeout == -1)
13417c478bd9Sstevel@tonic-gate 		nfs_async_timeout = NFS_ASYNC_TIMEOUT;
13427c478bd9Sstevel@tonic-gate 
13437c478bd9Sstevel@tonic-gate 	CALLB_CPR_INIT(&cprinfo, &mi->mi_async_lock, callb_generic_cpr, "nas");
13447c478bd9Sstevel@tonic-gate 
13457c478bd9Sstevel@tonic-gate 	mutex_enter(&mi->mi_async_lock);
13467c478bd9Sstevel@tonic-gate 	for (;;) {
13477c478bd9Sstevel@tonic-gate 		/*
13487c478bd9Sstevel@tonic-gate 		 * Find the next queue containing an entry.  We start
13497c478bd9Sstevel@tonic-gate 		 * at the current queue pointer and then round robin
13507c478bd9Sstevel@tonic-gate 		 * through all of them until we either find a non-empty
13517c478bd9Sstevel@tonic-gate 		 * queue or have looked through all of them.
13527c478bd9Sstevel@tonic-gate 		 */
13537c478bd9Sstevel@tonic-gate 		for (i = 0; i < NFS4_ASYNC_TYPES; i++) {
13547c478bd9Sstevel@tonic-gate 			args = *mi->mi_async_curr;
13557c478bd9Sstevel@tonic-gate 			if (args != NULL)
13567c478bd9Sstevel@tonic-gate 				break;
13577c478bd9Sstevel@tonic-gate 			mi->mi_async_curr++;
13587c478bd9Sstevel@tonic-gate 			if (mi->mi_async_curr ==
13597c478bd9Sstevel@tonic-gate 			    &mi->mi_async_reqs[NFS4_ASYNC_TYPES])
13607c478bd9Sstevel@tonic-gate 				mi->mi_async_curr = &mi->mi_async_reqs[0];
13617c478bd9Sstevel@tonic-gate 		}
13627c478bd9Sstevel@tonic-gate 		/*
13637c478bd9Sstevel@tonic-gate 		 * If we didn't find a entry, then block until woken up
13647c478bd9Sstevel@tonic-gate 		 * again and then look through the queues again.
13657c478bd9Sstevel@tonic-gate 		 */
13667c478bd9Sstevel@tonic-gate 		if (args == NULL) {
13677c478bd9Sstevel@tonic-gate 			/*
13687c478bd9Sstevel@tonic-gate 			 * Exiting is considered to be safe for CPR as well
13697c478bd9Sstevel@tonic-gate 			 */
13707c478bd9Sstevel@tonic-gate 			CALLB_CPR_SAFE_BEGIN(&cprinfo);
13717c478bd9Sstevel@tonic-gate 
13727c478bd9Sstevel@tonic-gate 			/*
13737c478bd9Sstevel@tonic-gate 			 * Wakeup thread waiting to unmount the file
13747c478bd9Sstevel@tonic-gate 			 * system only if all async threads are inactive.
13757c478bd9Sstevel@tonic-gate 			 *
13767c478bd9Sstevel@tonic-gate 			 * If we've timed-out and there's nothing to do,
13777c478bd9Sstevel@tonic-gate 			 * then get rid of this thread.
13787c478bd9Sstevel@tonic-gate 			 */
13797c478bd9Sstevel@tonic-gate 			if (mi->mi_max_threads == 0 || time_left <= 0) {
13807c478bd9Sstevel@tonic-gate 				if (--mi->mi_threads == 0)
13817c478bd9Sstevel@tonic-gate 					cv_signal(&mi->mi_async_cv);
13827c478bd9Sstevel@tonic-gate 				CALLB_CPR_EXIT(&cprinfo);
13837c478bd9Sstevel@tonic-gate 				VFS_RELE(vfsp);	/* release thread's hold */
138450a83466Sjwahlig 				MI4_RELE(mi);
13857c478bd9Sstevel@tonic-gate 				zthread_exit();
13867c478bd9Sstevel@tonic-gate 				/* NOTREACHED */
13877c478bd9Sstevel@tonic-gate 			}
13887c478bd9Sstevel@tonic-gate 			time_left = cv_timedwait(&mi->mi_async_work_cv,
13897c478bd9Sstevel@tonic-gate 			    &mi->mi_async_lock, nfs_async_timeout + lbolt);
13907c478bd9Sstevel@tonic-gate 
13917c478bd9Sstevel@tonic-gate 			CALLB_CPR_SAFE_END(&cprinfo, &mi->mi_async_lock);
13927c478bd9Sstevel@tonic-gate 
13937c478bd9Sstevel@tonic-gate 			continue;
13947c478bd9Sstevel@tonic-gate 		} else {
13957c478bd9Sstevel@tonic-gate 			time_left = 1;
13967c478bd9Sstevel@tonic-gate 		}
13977c478bd9Sstevel@tonic-gate 
13987c478bd9Sstevel@tonic-gate 		/*
13997c478bd9Sstevel@tonic-gate 		 * Remove the request from the async queue and then
14007c478bd9Sstevel@tonic-gate 		 * update the current async request queue pointer.  If
14017c478bd9Sstevel@tonic-gate 		 * the current queue is empty or we have removed enough
14027c478bd9Sstevel@tonic-gate 		 * consecutive entries from it, then reset the counter
14037c478bd9Sstevel@tonic-gate 		 * for this queue and then move the current pointer to
14047c478bd9Sstevel@tonic-gate 		 * the next queue.
14057c478bd9Sstevel@tonic-gate 		 */
14067c478bd9Sstevel@tonic-gate 		*mi->mi_async_curr = args->a_next;
14077c478bd9Sstevel@tonic-gate 		if (*mi->mi_async_curr == NULL ||
14087c478bd9Sstevel@tonic-gate 		    --mi->mi_async_clusters[args->a_io] == 0) {
14097c478bd9Sstevel@tonic-gate 			mi->mi_async_clusters[args->a_io] =
14107c478bd9Sstevel@tonic-gate 						mi->mi_async_init_clusters;
14117c478bd9Sstevel@tonic-gate 			mi->mi_async_curr++;
14127c478bd9Sstevel@tonic-gate 			if (mi->mi_async_curr ==
14137c478bd9Sstevel@tonic-gate 			    &mi->mi_async_reqs[NFS4_ASYNC_TYPES])
14147c478bd9Sstevel@tonic-gate 				mi->mi_async_curr = &mi->mi_async_reqs[0];
14157c478bd9Sstevel@tonic-gate 		}
14167c478bd9Sstevel@tonic-gate 
14177c478bd9Sstevel@tonic-gate 		if (args->a_io != NFS4_INACTIVE && mi->mi_io_kstats) {
14187c478bd9Sstevel@tonic-gate 			mutex_enter(&mi->mi_lock);
14197c478bd9Sstevel@tonic-gate 			kstat_waitq_exit(KSTAT_IO_PTR(mi->mi_io_kstats));
14207c478bd9Sstevel@tonic-gate 			mutex_exit(&mi->mi_lock);
14217c478bd9Sstevel@tonic-gate 		}
14227c478bd9Sstevel@tonic-gate 
14237c478bd9Sstevel@tonic-gate 		mutex_exit(&mi->mi_async_lock);
14247c478bd9Sstevel@tonic-gate 
14257c478bd9Sstevel@tonic-gate 		/*
14267c478bd9Sstevel@tonic-gate 		 * Obtain arguments from the async request structure.
14277c478bd9Sstevel@tonic-gate 		 */
14287c478bd9Sstevel@tonic-gate 		if (args->a_io == NFS4_READ_AHEAD && mi->mi_max_threads > 0) {
14297c478bd9Sstevel@tonic-gate 			(*args->a_nfs4_readahead)(args->a_vp,
14307c478bd9Sstevel@tonic-gate 					args->a_nfs4_blkoff,
14317c478bd9Sstevel@tonic-gate 					args->a_nfs4_addr, args->a_nfs4_seg,
14327c478bd9Sstevel@tonic-gate 					args->a_cred);
14337c478bd9Sstevel@tonic-gate 		} else if (args->a_io == NFS4_PUTAPAGE) {
14347c478bd9Sstevel@tonic-gate 			(void) (*args->a_nfs4_putapage)(args->a_vp,
14357c478bd9Sstevel@tonic-gate 					args->a_nfs4_pp, args->a_nfs4_off,
14367c478bd9Sstevel@tonic-gate 					args->a_nfs4_len, args->a_nfs4_flags,
14377c478bd9Sstevel@tonic-gate 					args->a_cred);
14387c478bd9Sstevel@tonic-gate 		} else if (args->a_io == NFS4_PAGEIO) {
14397c478bd9Sstevel@tonic-gate 			(void) (*args->a_nfs4_pageio)(args->a_vp,
14407c478bd9Sstevel@tonic-gate 					args->a_nfs4_pp, args->a_nfs4_off,
14417c478bd9Sstevel@tonic-gate 					args->a_nfs4_len, args->a_nfs4_flags,
14427c478bd9Sstevel@tonic-gate 					args->a_cred);
14437c478bd9Sstevel@tonic-gate 		} else if (args->a_io == NFS4_READDIR) {
14447c478bd9Sstevel@tonic-gate 			(void) ((*args->a_nfs4_readdir)(args->a_vp,
14457c478bd9Sstevel@tonic-gate 					args->a_nfs4_rdc, args->a_cred));
14467c478bd9Sstevel@tonic-gate 		} else if (args->a_io == NFS4_COMMIT) {
14477c478bd9Sstevel@tonic-gate 			(*args->a_nfs4_commit)(args->a_vp, args->a_nfs4_plist,
14487c478bd9Sstevel@tonic-gate 					args->a_nfs4_offset, args->a_nfs4_count,
14497c478bd9Sstevel@tonic-gate 					args->a_cred);
14507c478bd9Sstevel@tonic-gate 		} else if (args->a_io == NFS4_INACTIVE) {
14517c478bd9Sstevel@tonic-gate 			nfs4_inactive_otw(args->a_vp, args->a_cred);
14527c478bd9Sstevel@tonic-gate 		}
14537c478bd9Sstevel@tonic-gate 
14547c478bd9Sstevel@tonic-gate 		/*
14557c478bd9Sstevel@tonic-gate 		 * Now, release the vnode and free the credentials
14567c478bd9Sstevel@tonic-gate 		 * structure.
14577c478bd9Sstevel@tonic-gate 		 */
14587c478bd9Sstevel@tonic-gate 		free_async_args4(args);
14597c478bd9Sstevel@tonic-gate 		/*
14607c478bd9Sstevel@tonic-gate 		 * Reacquire the mutex because it will be needed above.
14617c478bd9Sstevel@tonic-gate 		 */
14627c478bd9Sstevel@tonic-gate 		mutex_enter(&mi->mi_async_lock);
14637c478bd9Sstevel@tonic-gate 	}
14647c478bd9Sstevel@tonic-gate }
14657c478bd9Sstevel@tonic-gate 
14667c478bd9Sstevel@tonic-gate /*
14677c478bd9Sstevel@tonic-gate  * nfs4_inactive_thread - look for vnodes that need over-the-wire calls as
14687c478bd9Sstevel@tonic-gate  * part of VOP_INACTIVE.
14697c478bd9Sstevel@tonic-gate  */
14707c478bd9Sstevel@tonic-gate 
14717c478bd9Sstevel@tonic-gate void
14727c478bd9Sstevel@tonic-gate nfs4_inactive_thread(mntinfo4_t *mi)
14737c478bd9Sstevel@tonic-gate {
14747c478bd9Sstevel@tonic-gate 	struct nfs4_async_reqs *args;
14757c478bd9Sstevel@tonic-gate 	callb_cpr_t cprinfo;
14767c478bd9Sstevel@tonic-gate 	vfs_t *vfsp = mi->mi_vfsp;
14777c478bd9Sstevel@tonic-gate 
14787c478bd9Sstevel@tonic-gate 	CALLB_CPR_INIT(&cprinfo, &mi->mi_async_lock, callb_generic_cpr,
14797c478bd9Sstevel@tonic-gate 		    "nfs4_inactive_thread");
14807c478bd9Sstevel@tonic-gate 
14817c478bd9Sstevel@tonic-gate 	for (;;) {
14827c478bd9Sstevel@tonic-gate 		mutex_enter(&mi->mi_async_lock);
14837c478bd9Sstevel@tonic-gate 		args = mi->mi_async_reqs[NFS4_INACTIVE];
14847c478bd9Sstevel@tonic-gate 		if (args == NULL) {
14857c478bd9Sstevel@tonic-gate 			mutex_enter(&mi->mi_lock);
14867c478bd9Sstevel@tonic-gate 			/*
148750a83466Sjwahlig 			 * We don't want to exit until the async manager is done
14887c478bd9Sstevel@tonic-gate 			 * with its work; hence the check for mi_manager_thread
14897c478bd9Sstevel@tonic-gate 			 * being NULL.
14907c478bd9Sstevel@tonic-gate 			 *
14917c478bd9Sstevel@tonic-gate 			 * The async manager thread will cv_broadcast() on
14927c478bd9Sstevel@tonic-gate 			 * mi_inact_req_cv when it's done, at which point we'll
14937c478bd9Sstevel@tonic-gate 			 * wake up and exit.
14947c478bd9Sstevel@tonic-gate 			 */
149550a83466Sjwahlig 			if (mi->mi_manager_thread == NULL)
14967c478bd9Sstevel@tonic-gate 				goto die;
14977c478bd9Sstevel@tonic-gate 			mi->mi_flags |= MI4_INACTIVE_IDLE;
14987c478bd9Sstevel@tonic-gate 			mutex_exit(&mi->mi_lock);
14997c478bd9Sstevel@tonic-gate 			cv_signal(&mi->mi_async_cv);
15007c478bd9Sstevel@tonic-gate 			CALLB_CPR_SAFE_BEGIN(&cprinfo);
15017c478bd9Sstevel@tonic-gate 			cv_wait(&mi->mi_inact_req_cv, &mi->mi_async_lock);
15027c478bd9Sstevel@tonic-gate 			CALLB_CPR_SAFE_END(&cprinfo, &mi->mi_async_lock);
15037c478bd9Sstevel@tonic-gate 			mutex_exit(&mi->mi_async_lock);
15047c478bd9Sstevel@tonic-gate 		} else {
15057c478bd9Sstevel@tonic-gate 			mutex_enter(&mi->mi_lock);
15067c478bd9Sstevel@tonic-gate 			mi->mi_flags &= ~MI4_INACTIVE_IDLE;
15077c478bd9Sstevel@tonic-gate 			mutex_exit(&mi->mi_lock);
15087c478bd9Sstevel@tonic-gate 			mi->mi_async_reqs[NFS4_INACTIVE] = args->a_next;
15097c478bd9Sstevel@tonic-gate 			mutex_exit(&mi->mi_async_lock);
15107c478bd9Sstevel@tonic-gate 			nfs4_inactive_otw(args->a_vp, args->a_cred);
15117c478bd9Sstevel@tonic-gate 			crfree(args->a_cred);
15127c478bd9Sstevel@tonic-gate 			kmem_free(args, sizeof (*args));
15137c478bd9Sstevel@tonic-gate 		}
15147c478bd9Sstevel@tonic-gate 	}
15157c478bd9Sstevel@tonic-gate die:
15167c478bd9Sstevel@tonic-gate 	mutex_exit(&mi->mi_lock);
15177c478bd9Sstevel@tonic-gate 	mi->mi_inactive_thread = NULL;
15187c478bd9Sstevel@tonic-gate 	cv_signal(&mi->mi_async_cv);
151950a83466Sjwahlig 
15207c478bd9Sstevel@tonic-gate 	/*
15217c478bd9Sstevel@tonic-gate 	 * There is no explicit call to mutex_exit(&mi->mi_async_lock) since
15227c478bd9Sstevel@tonic-gate 	 * CALLB_CPR_EXIT is actually responsible for releasing 'mi_async_lock'.
15237c478bd9Sstevel@tonic-gate 	 */
15247c478bd9Sstevel@tonic-gate 	CALLB_CPR_EXIT(&cprinfo);
152550a83466Sjwahlig 
15267c478bd9Sstevel@tonic-gate 	NFS4_DEBUG(nfs4_client_zone_debug, (CE_NOTE,
15277c478bd9Sstevel@tonic-gate 	    "nfs4_inactive_thread exiting for vfs %p\n", (void *)vfsp));
152850a83466Sjwahlig 
152950a83466Sjwahlig 	MI4_RELE(mi);
15307c478bd9Sstevel@tonic-gate 	zthread_exit();
15317c478bd9Sstevel@tonic-gate 	/* NOTREACHED */
15327c478bd9Sstevel@tonic-gate }
15337c478bd9Sstevel@tonic-gate 
15347c478bd9Sstevel@tonic-gate /*
15357c478bd9Sstevel@tonic-gate  * nfs_async_stop:
15367c478bd9Sstevel@tonic-gate  * Wait for all outstanding putpage operations and the inactive thread to
15377c478bd9Sstevel@tonic-gate  * complete; nfs4_async_stop_sig() without interruptibility.
15387c478bd9Sstevel@tonic-gate  */
15397c478bd9Sstevel@tonic-gate void
15407c478bd9Sstevel@tonic-gate nfs4_async_stop(struct vfs *vfsp)
15417c478bd9Sstevel@tonic-gate {
15427c478bd9Sstevel@tonic-gate 	mntinfo4_t *mi = VFTOMI4(vfsp);
15437c478bd9Sstevel@tonic-gate 
15447c478bd9Sstevel@tonic-gate 	/*
15457c478bd9Sstevel@tonic-gate 	 * Wait for all outstanding async operations to complete and for
15467c478bd9Sstevel@tonic-gate 	 * worker threads to exit.
15477c478bd9Sstevel@tonic-gate 	 */
15487c478bd9Sstevel@tonic-gate 	mutex_enter(&mi->mi_async_lock);
15497c478bd9Sstevel@tonic-gate 	mi->mi_max_threads = 0;
15507c478bd9Sstevel@tonic-gate 	cv_broadcast(&mi->mi_async_work_cv);
15517c478bd9Sstevel@tonic-gate 	while (mi->mi_threads != 0)
15527c478bd9Sstevel@tonic-gate 		cv_wait(&mi->mi_async_cv, &mi->mi_async_lock);
15537c478bd9Sstevel@tonic-gate 
15547c478bd9Sstevel@tonic-gate 	/*
15557c478bd9Sstevel@tonic-gate 	 * Wait for the inactive thread to finish doing what it's doing.  It
15567c478bd9Sstevel@tonic-gate 	 * won't exit until the last reference to the vfs_t goes away.
15577c478bd9Sstevel@tonic-gate 	 */
15587c478bd9Sstevel@tonic-gate 	if (mi->mi_inactive_thread != NULL) {
15597c478bd9Sstevel@tonic-gate 		mutex_enter(&mi->mi_lock);
15607c478bd9Sstevel@tonic-gate 		while (!(mi->mi_flags & MI4_INACTIVE_IDLE) ||
15617c478bd9Sstevel@tonic-gate 		    (mi->mi_async_reqs[NFS4_INACTIVE] != NULL)) {
15627c478bd9Sstevel@tonic-gate 			mutex_exit(&mi->mi_lock);
15637c478bd9Sstevel@tonic-gate 			cv_wait(&mi->mi_async_cv, &mi->mi_async_lock);
15647c478bd9Sstevel@tonic-gate 			mutex_enter(&mi->mi_lock);
15657c478bd9Sstevel@tonic-gate 		}
15667c478bd9Sstevel@tonic-gate 		mutex_exit(&mi->mi_lock);
15677c478bd9Sstevel@tonic-gate 	}
15687c478bd9Sstevel@tonic-gate 	mutex_exit(&mi->mi_async_lock);
15697c478bd9Sstevel@tonic-gate }
15707c478bd9Sstevel@tonic-gate 
15717c478bd9Sstevel@tonic-gate /*
15727c478bd9Sstevel@tonic-gate  * nfs_async_stop_sig:
15737c478bd9Sstevel@tonic-gate  * Wait for all outstanding putpage operations and the inactive thread to
15747c478bd9Sstevel@tonic-gate  * complete. If a signal is delivered we will abort and return non-zero;
15757c478bd9Sstevel@tonic-gate  * otherwise return 0. Since this routine is called from nfs4_unmount, we
15767c478bd9Sstevel@tonic-gate  * need to make it interruptable.
15777c478bd9Sstevel@tonic-gate  */
15787c478bd9Sstevel@tonic-gate int
15797c478bd9Sstevel@tonic-gate nfs4_async_stop_sig(struct vfs *vfsp)
15807c478bd9Sstevel@tonic-gate {
15817c478bd9Sstevel@tonic-gate 	mntinfo4_t *mi = VFTOMI4(vfsp);
15827c478bd9Sstevel@tonic-gate 	ushort_t omax;
15837c478bd9Sstevel@tonic-gate 	bool_t intr = FALSE;
15847c478bd9Sstevel@tonic-gate 
15857c478bd9Sstevel@tonic-gate 	/*
15867c478bd9Sstevel@tonic-gate 	 * Wait for all outstanding putpage operations to complete and for
15877c478bd9Sstevel@tonic-gate 	 * worker threads to exit.
15887c478bd9Sstevel@tonic-gate 	 */
15897c478bd9Sstevel@tonic-gate 	mutex_enter(&mi->mi_async_lock);
15907c478bd9Sstevel@tonic-gate 	omax = mi->mi_max_threads;
15917c478bd9Sstevel@tonic-gate 	mi->mi_max_threads = 0;
15927c478bd9Sstevel@tonic-gate 	cv_broadcast(&mi->mi_async_work_cv);
15937c478bd9Sstevel@tonic-gate 	while (mi->mi_threads != 0) {
15947c478bd9Sstevel@tonic-gate 		if (!cv_wait_sig(&mi->mi_async_cv, &mi->mi_async_lock)) {
15957c478bd9Sstevel@tonic-gate 			intr = TRUE;
15967c478bd9Sstevel@tonic-gate 			goto interrupted;
15977c478bd9Sstevel@tonic-gate 		}
15987c478bd9Sstevel@tonic-gate 	}
15997c478bd9Sstevel@tonic-gate 
16007c478bd9Sstevel@tonic-gate 	/*
16017c478bd9Sstevel@tonic-gate 	 * Wait for the inactive thread to finish doing what it's doing.  It
16027c478bd9Sstevel@tonic-gate 	 * won't exit until the a last reference to the vfs_t goes away.
16037c478bd9Sstevel@tonic-gate 	 */
16047c478bd9Sstevel@tonic-gate 	if (mi->mi_inactive_thread != NULL) {
16057c478bd9Sstevel@tonic-gate 		mutex_enter(&mi->mi_lock);
16067c478bd9Sstevel@tonic-gate 		while (!(mi->mi_flags & MI4_INACTIVE_IDLE) ||
16077c478bd9Sstevel@tonic-gate 		    (mi->mi_async_reqs[NFS4_INACTIVE] != NULL)) {
16087c478bd9Sstevel@tonic-gate 			mutex_exit(&mi->mi_lock);
16097c478bd9Sstevel@tonic-gate 			if (!cv_wait_sig(&mi->mi_async_cv,
16107c478bd9Sstevel@tonic-gate 			    &mi->mi_async_lock)) {
16117c478bd9Sstevel@tonic-gate 				intr = TRUE;
16127c478bd9Sstevel@tonic-gate 				goto interrupted;
16137c478bd9Sstevel@tonic-gate 			}
16147c478bd9Sstevel@tonic-gate 			mutex_enter(&mi->mi_lock);
16157c478bd9Sstevel@tonic-gate 		}
16167c478bd9Sstevel@tonic-gate 		mutex_exit(&mi->mi_lock);
16177c478bd9Sstevel@tonic-gate 	}
16187c478bd9Sstevel@tonic-gate interrupted:
16197c478bd9Sstevel@tonic-gate 	if (intr)
16207c478bd9Sstevel@tonic-gate 		mi->mi_max_threads = omax;
16217c478bd9Sstevel@tonic-gate 	mutex_exit(&mi->mi_async_lock);
16227c478bd9Sstevel@tonic-gate 
16237c478bd9Sstevel@tonic-gate 	return (intr);
16247c478bd9Sstevel@tonic-gate }
16257c478bd9Sstevel@tonic-gate 
16267c478bd9Sstevel@tonic-gate int
16277c478bd9Sstevel@tonic-gate nfs4_async_putapage(vnode_t *vp, page_t *pp, u_offset_t off, size_t len,
16287c478bd9Sstevel@tonic-gate 	int flags, cred_t *cr, int (*putapage)(vnode_t *, page_t *,
16297c478bd9Sstevel@tonic-gate 	u_offset_t, size_t, int, cred_t *))
16307c478bd9Sstevel@tonic-gate {
16317c478bd9Sstevel@tonic-gate 	rnode4_t *rp;
16327c478bd9Sstevel@tonic-gate 	mntinfo4_t *mi;
16337c478bd9Sstevel@tonic-gate 	struct nfs4_async_reqs *args;
16347c478bd9Sstevel@tonic-gate 
16357c478bd9Sstevel@tonic-gate 	ASSERT(flags & B_ASYNC);
16367c478bd9Sstevel@tonic-gate 	ASSERT(vp->v_vfsp != NULL);
16377c478bd9Sstevel@tonic-gate 
16387c478bd9Sstevel@tonic-gate 	rp = VTOR4(vp);
16397c478bd9Sstevel@tonic-gate 	ASSERT(rp->r_count > 0);
16407c478bd9Sstevel@tonic-gate 
16417c478bd9Sstevel@tonic-gate 	mi = VTOMI4(vp);
16427c478bd9Sstevel@tonic-gate 
16437c478bd9Sstevel@tonic-gate 	/*
16447c478bd9Sstevel@tonic-gate 	 * If we can't allocate a request structure, do the putpage
16457c478bd9Sstevel@tonic-gate 	 * operation synchronously in this thread's context.
16467c478bd9Sstevel@tonic-gate 	 */
16477c478bd9Sstevel@tonic-gate 	if ((args = kmem_alloc(sizeof (*args), KM_NOSLEEP)) == NULL)
16487c478bd9Sstevel@tonic-gate 		goto noasync;
16497c478bd9Sstevel@tonic-gate 
16507c478bd9Sstevel@tonic-gate 	args->a_next = NULL;
16517c478bd9Sstevel@tonic-gate #ifdef DEBUG
16527c478bd9Sstevel@tonic-gate 	args->a_queuer = curthread;
16537c478bd9Sstevel@tonic-gate #endif
16547c478bd9Sstevel@tonic-gate 	VN_HOLD(vp);
16557c478bd9Sstevel@tonic-gate 	args->a_vp = vp;
16567c478bd9Sstevel@tonic-gate 	ASSERT(cr != NULL);
16577c478bd9Sstevel@tonic-gate 	crhold(cr);
16587c478bd9Sstevel@tonic-gate 	args->a_cred = cr;
16597c478bd9Sstevel@tonic-gate 	args->a_io = NFS4_PUTAPAGE;
16607c478bd9Sstevel@tonic-gate 	args->a_nfs4_putapage = putapage;
16617c478bd9Sstevel@tonic-gate 	args->a_nfs4_pp = pp;
16627c478bd9Sstevel@tonic-gate 	args->a_nfs4_off = off;
16637c478bd9Sstevel@tonic-gate 	args->a_nfs4_len = (uint_t)len;
16647c478bd9Sstevel@tonic-gate 	args->a_nfs4_flags = flags;
16657c478bd9Sstevel@tonic-gate 
16667c478bd9Sstevel@tonic-gate 	mutex_enter(&mi->mi_async_lock);
16677c478bd9Sstevel@tonic-gate 
16687c478bd9Sstevel@tonic-gate 	/*
16697c478bd9Sstevel@tonic-gate 	 * If asyncio has been disabled, then make a synchronous request.
16707c478bd9Sstevel@tonic-gate 	 * This check is done a second time in case async io was diabled
16717c478bd9Sstevel@tonic-gate 	 * while this thread was blocked waiting for memory pressure to
16727c478bd9Sstevel@tonic-gate 	 * reduce or for the queue to drain.
16737c478bd9Sstevel@tonic-gate 	 */
16747c478bd9Sstevel@tonic-gate 	if (mi->mi_max_threads == 0) {
16757c478bd9Sstevel@tonic-gate 		mutex_exit(&mi->mi_async_lock);
16767c478bd9Sstevel@tonic-gate 
16777c478bd9Sstevel@tonic-gate 		VN_RELE(vp);
16787c478bd9Sstevel@tonic-gate 		crfree(cr);
16797c478bd9Sstevel@tonic-gate 		kmem_free(args, sizeof (*args));
16807c478bd9Sstevel@tonic-gate 		goto noasync;
16817c478bd9Sstevel@tonic-gate 	}
16827c478bd9Sstevel@tonic-gate 
16837c478bd9Sstevel@tonic-gate 	/*
16847c478bd9Sstevel@tonic-gate 	 * Link request structure into the async list and
16857c478bd9Sstevel@tonic-gate 	 * wakeup async thread to do the i/o.
16867c478bd9Sstevel@tonic-gate 	 */
16877c478bd9Sstevel@tonic-gate 	if (mi->mi_async_reqs[NFS4_PUTAPAGE] == NULL) {
16887c478bd9Sstevel@tonic-gate 		mi->mi_async_reqs[NFS4_PUTAPAGE] = args;
16897c478bd9Sstevel@tonic-gate 		mi->mi_async_tail[NFS4_PUTAPAGE] = args;
16907c478bd9Sstevel@tonic-gate 	} else {
16917c478bd9Sstevel@tonic-gate 		mi->mi_async_tail[NFS4_PUTAPAGE]->a_next = args;
16927c478bd9Sstevel@tonic-gate 		mi->mi_async_tail[NFS4_PUTAPAGE] = args;
16937c478bd9Sstevel@tonic-gate 	}
16947c478bd9Sstevel@tonic-gate 
16957c478bd9Sstevel@tonic-gate 	mutex_enter(&rp->r_statelock);
16967c478bd9Sstevel@tonic-gate 	rp->r_count++;
16977c478bd9Sstevel@tonic-gate 	rp->r_awcount++;
16987c478bd9Sstevel@tonic-gate 	mutex_exit(&rp->r_statelock);
16997c478bd9Sstevel@tonic-gate 
17007c478bd9Sstevel@tonic-gate 	if (mi->mi_io_kstats) {
17017c478bd9Sstevel@tonic-gate 		mutex_enter(&mi->mi_lock);
17027c478bd9Sstevel@tonic-gate 		kstat_waitq_enter(KSTAT_IO_PTR(mi->mi_io_kstats));
17037c478bd9Sstevel@tonic-gate 		mutex_exit(&mi->mi_lock);
17047c478bd9Sstevel@tonic-gate 	}
17057c478bd9Sstevel@tonic-gate 
17067c478bd9Sstevel@tonic-gate 	mi->mi_async_req_count++;
17077c478bd9Sstevel@tonic-gate 	ASSERT(mi->mi_async_req_count != 0);
17087c478bd9Sstevel@tonic-gate 	cv_signal(&mi->mi_async_reqs_cv);
17097c478bd9Sstevel@tonic-gate 	mutex_exit(&mi->mi_async_lock);
17107c478bd9Sstevel@tonic-gate 	return (0);
17117c478bd9Sstevel@tonic-gate 
17127c478bd9Sstevel@tonic-gate noasync:
17137c478bd9Sstevel@tonic-gate 
17147c478bd9Sstevel@tonic-gate 	if (curproc == proc_pageout || curproc == proc_fsflush ||
1715108322fbScarlsonj 	    nfs_zone() == mi->mi_zone) {
17167c478bd9Sstevel@tonic-gate 		/*
17177c478bd9Sstevel@tonic-gate 		 * If we get here in the context of the pageout/fsflush,
17187c478bd9Sstevel@tonic-gate 		 * or we have run out of memory or we're attempting to
17197c478bd9Sstevel@tonic-gate 		 * unmount we refuse to do a sync write, because this may
17207c478bd9Sstevel@tonic-gate 		 * hang pageout/fsflush and the machine. In this case,
17217c478bd9Sstevel@tonic-gate 		 * we just re-mark the page as dirty and punt on the page.
17227c478bd9Sstevel@tonic-gate 		 *
17237c478bd9Sstevel@tonic-gate 		 * Make sure B_FORCE isn't set.  We can re-mark the
17247c478bd9Sstevel@tonic-gate 		 * pages as dirty and unlock the pages in one swoop by
17257c478bd9Sstevel@tonic-gate 		 * passing in B_ERROR to pvn_write_done().  However,
17267c478bd9Sstevel@tonic-gate 		 * we should make sure B_FORCE isn't set - we don't
17277c478bd9Sstevel@tonic-gate 		 * want the page tossed before it gets written out.
17287c478bd9Sstevel@tonic-gate 		 */
17297c478bd9Sstevel@tonic-gate 		if (flags & B_FORCE)
17307c478bd9Sstevel@tonic-gate 			flags &= ~(B_INVAL | B_FORCE);
17317c478bd9Sstevel@tonic-gate 		pvn_write_done(pp, flags | B_ERROR);
17327c478bd9Sstevel@tonic-gate 		return (0);
17337c478bd9Sstevel@tonic-gate 	}
17347c478bd9Sstevel@tonic-gate 
17357c478bd9Sstevel@tonic-gate 	/*
1736108322fbScarlsonj 	 * We'll get here only if (nfs_zone() != mi->mi_zone)
17377c478bd9Sstevel@tonic-gate 	 * which means that this was a cross-zone sync putpage.
17387c478bd9Sstevel@tonic-gate 	 *
17397c478bd9Sstevel@tonic-gate 	 * We pass in B_ERROR to pvn_write_done() to re-mark the pages
17407c478bd9Sstevel@tonic-gate 	 * as dirty and unlock them.
17417c478bd9Sstevel@tonic-gate 	 *
17427c478bd9Sstevel@tonic-gate 	 * We don't want to clear B_FORCE here as the caller presumably
17437c478bd9Sstevel@tonic-gate 	 * knows what they're doing if they set it.
17447c478bd9Sstevel@tonic-gate 	 */
17457c478bd9Sstevel@tonic-gate 	pvn_write_done(pp, flags | B_ERROR);
17467c478bd9Sstevel@tonic-gate 	return (EPERM);
17477c478bd9Sstevel@tonic-gate }
17487c478bd9Sstevel@tonic-gate 
17497c478bd9Sstevel@tonic-gate int
17507c478bd9Sstevel@tonic-gate nfs4_async_pageio(vnode_t *vp, page_t *pp, u_offset_t io_off, size_t io_len,
17517c478bd9Sstevel@tonic-gate 	int flags, cred_t *cr, int (*pageio)(vnode_t *, page_t *, u_offset_t,
17527c478bd9Sstevel@tonic-gate 	size_t, int, cred_t *))
17537c478bd9Sstevel@tonic-gate {
17547c478bd9Sstevel@tonic-gate 	rnode4_t *rp;
17557c478bd9Sstevel@tonic-gate 	mntinfo4_t *mi;
17567c478bd9Sstevel@tonic-gate 	struct nfs4_async_reqs *args;
17577c478bd9Sstevel@tonic-gate 
17587c478bd9Sstevel@tonic-gate 	ASSERT(flags & B_ASYNC);
17597c478bd9Sstevel@tonic-gate 	ASSERT(vp->v_vfsp != NULL);
17607c478bd9Sstevel@tonic-gate 
17617c478bd9Sstevel@tonic-gate 	rp = VTOR4(vp);
17627c478bd9Sstevel@tonic-gate 	ASSERT(rp->r_count > 0);
17637c478bd9Sstevel@tonic-gate 
17647c478bd9Sstevel@tonic-gate 	mi = VTOMI4(vp);
17657c478bd9Sstevel@tonic-gate 
17667c478bd9Sstevel@tonic-gate 	/*
17677c478bd9Sstevel@tonic-gate 	 * If we can't allocate a request structure, do the pageio
17687c478bd9Sstevel@tonic-gate 	 * request synchronously in this thread's context.
17697c478bd9Sstevel@tonic-gate 	 */
17707c478bd9Sstevel@tonic-gate 	if ((args = kmem_alloc(sizeof (*args), KM_NOSLEEP)) == NULL)
17717c478bd9Sstevel@tonic-gate 		goto noasync;
17727c478bd9Sstevel@tonic-gate 
17737c478bd9Sstevel@tonic-gate 	args->a_next = NULL;
17747c478bd9Sstevel@tonic-gate #ifdef DEBUG
17757c478bd9Sstevel@tonic-gate 	args->a_queuer = curthread;
17767c478bd9Sstevel@tonic-gate #endif
17777c478bd9Sstevel@tonic-gate 	VN_HOLD(vp);
17787c478bd9Sstevel@tonic-gate 	args->a_vp = vp;
17797c478bd9Sstevel@tonic-gate 	ASSERT(cr != NULL);
17807c478bd9Sstevel@tonic-gate 	crhold(cr);
17817c478bd9Sstevel@tonic-gate 	args->a_cred = cr;
17827c478bd9Sstevel@tonic-gate 	args->a_io = NFS4_PAGEIO;
17837c478bd9Sstevel@tonic-gate 	args->a_nfs4_pageio = pageio;
17847c478bd9Sstevel@tonic-gate 	args->a_nfs4_pp = pp;
17857c478bd9Sstevel@tonic-gate 	args->a_nfs4_off = io_off;
17867c478bd9Sstevel@tonic-gate 	args->a_nfs4_len = (uint_t)io_len;
17877c478bd9Sstevel@tonic-gate 	args->a_nfs4_flags = flags;
17887c478bd9Sstevel@tonic-gate 
17897c478bd9Sstevel@tonic-gate 	mutex_enter(&mi->mi_async_lock);
17907c478bd9Sstevel@tonic-gate 
17917c478bd9Sstevel@tonic-gate 	/*
17927c478bd9Sstevel@tonic-gate 	 * If asyncio has been disabled, then make a synchronous request.
17937c478bd9Sstevel@tonic-gate 	 * This check is done a second time in case async io was diabled
17947c478bd9Sstevel@tonic-gate 	 * while this thread was blocked waiting for memory pressure to
17957c478bd9Sstevel@tonic-gate 	 * reduce or for the queue to drain.
17967c478bd9Sstevel@tonic-gate 	 */
17977c478bd9Sstevel@tonic-gate 	if (mi->mi_max_threads == 0) {
17987c478bd9Sstevel@tonic-gate 		mutex_exit(&mi->mi_async_lock);
17997c478bd9Sstevel@tonic-gate 
18007c478bd9Sstevel@tonic-gate 		VN_RELE(vp);
18017c478bd9Sstevel@tonic-gate 		crfree(cr);
18027c478bd9Sstevel@tonic-gate 		kmem_free(args, sizeof (*args));
18037c478bd9Sstevel@tonic-gate 		goto noasync;
18047c478bd9Sstevel@tonic-gate 	}
18057c478bd9Sstevel@tonic-gate 
18067c478bd9Sstevel@tonic-gate 	/*
18077c478bd9Sstevel@tonic-gate 	 * Link request structure into the async list and
18087c478bd9Sstevel@tonic-gate 	 * wakeup async thread to do the i/o.
18097c478bd9Sstevel@tonic-gate 	 */
18107c478bd9Sstevel@tonic-gate 	if (mi->mi_async_reqs[NFS4_PAGEIO] == NULL) {
18117c478bd9Sstevel@tonic-gate 		mi->mi_async_reqs[NFS4_PAGEIO] = args;
18127c478bd9Sstevel@tonic-gate 		mi->mi_async_tail[NFS4_PAGEIO] = args;
18137c478bd9Sstevel@tonic-gate 	} else {
18147c478bd9Sstevel@tonic-gate 		mi->mi_async_tail[NFS4_PAGEIO]->a_next = args;
18157c478bd9Sstevel@tonic-gate 		mi->mi_async_tail[NFS4_PAGEIO] = args;
18167c478bd9Sstevel@tonic-gate 	}
18177c478bd9Sstevel@tonic-gate 
18187c478bd9Sstevel@tonic-gate 	mutex_enter(&rp->r_statelock);
18197c478bd9Sstevel@tonic-gate 	rp->r_count++;
18207c478bd9Sstevel@tonic-gate 	rp->r_awcount++;
18217c478bd9Sstevel@tonic-gate 	mutex_exit(&rp->r_statelock);
18227c478bd9Sstevel@tonic-gate 
18237c478bd9Sstevel@tonic-gate 	if (mi->mi_io_kstats) {
18247c478bd9Sstevel@tonic-gate 		mutex_enter(&mi->mi_lock);
18257c478bd9Sstevel@tonic-gate 		kstat_waitq_enter(KSTAT_IO_PTR(mi->mi_io_kstats));
18267c478bd9Sstevel@tonic-gate 		mutex_exit(&mi->mi_lock);
18277c478bd9Sstevel@tonic-gate 	}
18287c478bd9Sstevel@tonic-gate 
18297c478bd9Sstevel@tonic-gate 	mi->mi_async_req_count++;
18307c478bd9Sstevel@tonic-gate 	ASSERT(mi->mi_async_req_count != 0);
18317c478bd9Sstevel@tonic-gate 	cv_signal(&mi->mi_async_reqs_cv);
18327c478bd9Sstevel@tonic-gate 	mutex_exit(&mi->mi_async_lock);
18337c478bd9Sstevel@tonic-gate 	return (0);
18347c478bd9Sstevel@tonic-gate 
18357c478bd9Sstevel@tonic-gate noasync:
18367c478bd9Sstevel@tonic-gate 	/*
18377c478bd9Sstevel@tonic-gate 	 * If we can't do it ASYNC, for reads we do nothing (but cleanup
18387c478bd9Sstevel@tonic-gate 	 * the page list), for writes we do it synchronously, except for
18397c478bd9Sstevel@tonic-gate 	 * proc_pageout/proc_fsflush as described below.
18407c478bd9Sstevel@tonic-gate 	 */
18417c478bd9Sstevel@tonic-gate 	if (flags & B_READ) {
18427c478bd9Sstevel@tonic-gate 		pvn_read_done(pp, flags | B_ERROR);
18437c478bd9Sstevel@tonic-gate 		return (0);
18447c478bd9Sstevel@tonic-gate 	}
18457c478bd9Sstevel@tonic-gate 
18467c478bd9Sstevel@tonic-gate 	if (curproc == proc_pageout || curproc == proc_fsflush) {
18477c478bd9Sstevel@tonic-gate 		/*
18487c478bd9Sstevel@tonic-gate 		 * If we get here in the context of the pageout/fsflush,
18497c478bd9Sstevel@tonic-gate 		 * we refuse to do a sync write, because this may hang
18507c478bd9Sstevel@tonic-gate 		 * pageout/fsflush (and the machine). In this case, we just
18517c478bd9Sstevel@tonic-gate 		 * re-mark the page as dirty and punt on the page.
18527c478bd9Sstevel@tonic-gate 		 *
18537c478bd9Sstevel@tonic-gate 		 * Make sure B_FORCE isn't set.  We can re-mark the
18547c478bd9Sstevel@tonic-gate 		 * pages as dirty and unlock the pages in one swoop by
18557c478bd9Sstevel@tonic-gate 		 * passing in B_ERROR to pvn_write_done().  However,
18567c478bd9Sstevel@tonic-gate 		 * we should make sure B_FORCE isn't set - we don't
18577c478bd9Sstevel@tonic-gate 		 * want the page tossed before it gets written out.
18587c478bd9Sstevel@tonic-gate 		 */
18597c478bd9Sstevel@tonic-gate 		if (flags & B_FORCE)
18607c478bd9Sstevel@tonic-gate 			flags &= ~(B_INVAL | B_FORCE);
18617c478bd9Sstevel@tonic-gate 		pvn_write_done(pp, flags | B_ERROR);
18627c478bd9Sstevel@tonic-gate 		return (0);
18637c478bd9Sstevel@tonic-gate 	}
18647c478bd9Sstevel@tonic-gate 
1865108322fbScarlsonj 	if (nfs_zone() != mi->mi_zone) {
18667c478bd9Sstevel@tonic-gate 		/*
18677c478bd9Sstevel@tonic-gate 		 * So this was a cross-zone sync pageio.  We pass in B_ERROR
18687c478bd9Sstevel@tonic-gate 		 * to pvn_write_done() to re-mark the pages as dirty and unlock
18697c478bd9Sstevel@tonic-gate 		 * them.
18707c478bd9Sstevel@tonic-gate 		 *
18717c478bd9Sstevel@tonic-gate 		 * We don't want to clear B_FORCE here as the caller presumably
18727c478bd9Sstevel@tonic-gate 		 * knows what they're doing if they set it.
18737c478bd9Sstevel@tonic-gate 		 */
18747c478bd9Sstevel@tonic-gate 		pvn_write_done(pp, flags | B_ERROR);
18757c478bd9Sstevel@tonic-gate 		return (EPERM);
18767c478bd9Sstevel@tonic-gate 	}
18777c478bd9Sstevel@tonic-gate 	return ((*pageio)(vp, pp, io_off, io_len, flags, cr));
18787c478bd9Sstevel@tonic-gate }
18797c478bd9Sstevel@tonic-gate 
18807c478bd9Sstevel@tonic-gate void
18817c478bd9Sstevel@tonic-gate nfs4_async_readdir(vnode_t *vp, rddir4_cache *rdc, cred_t *cr,
18827c478bd9Sstevel@tonic-gate 	int (*readdir)(vnode_t *, rddir4_cache *, cred_t *))
18837c478bd9Sstevel@tonic-gate {
18847c478bd9Sstevel@tonic-gate 	rnode4_t *rp;
18857c478bd9Sstevel@tonic-gate 	mntinfo4_t *mi;
18867c478bd9Sstevel@tonic-gate 	struct nfs4_async_reqs *args;
18877c478bd9Sstevel@tonic-gate 
18887c478bd9Sstevel@tonic-gate 	rp = VTOR4(vp);
18897c478bd9Sstevel@tonic-gate 	ASSERT(rp->r_freef == NULL);
18907c478bd9Sstevel@tonic-gate 
18917c478bd9Sstevel@tonic-gate 	mi = VTOMI4(vp);
18927c478bd9Sstevel@tonic-gate 
18937c478bd9Sstevel@tonic-gate 	/*
18947c478bd9Sstevel@tonic-gate 	 * If we can't allocate a request structure, skip the readdir.
18957c478bd9Sstevel@tonic-gate 	 */
18967c478bd9Sstevel@tonic-gate 	if ((args = kmem_alloc(sizeof (*args), KM_NOSLEEP)) == NULL)
18977c478bd9Sstevel@tonic-gate 		goto noasync;
18987c478bd9Sstevel@tonic-gate 
18997c478bd9Sstevel@tonic-gate 	args->a_next = NULL;
19007c478bd9Sstevel@tonic-gate #ifdef DEBUG
19017c478bd9Sstevel@tonic-gate 	args->a_queuer = curthread;
19027c478bd9Sstevel@tonic-gate #endif
19037c478bd9Sstevel@tonic-gate 	VN_HOLD(vp);
19047c478bd9Sstevel@tonic-gate 	args->a_vp = vp;
19057c478bd9Sstevel@tonic-gate 	ASSERT(cr != NULL);
19067c478bd9Sstevel@tonic-gate 	crhold(cr);
19077c478bd9Sstevel@tonic-gate 	args->a_cred = cr;
19087c478bd9Sstevel@tonic-gate 	args->a_io = NFS4_READDIR;
19097c478bd9Sstevel@tonic-gate 	args->a_nfs4_readdir = readdir;
19107c478bd9Sstevel@tonic-gate 	args->a_nfs4_rdc = rdc;
19117c478bd9Sstevel@tonic-gate 
19127c478bd9Sstevel@tonic-gate 	mutex_enter(&mi->mi_async_lock);
19137c478bd9Sstevel@tonic-gate 
19147c478bd9Sstevel@tonic-gate 	/*
19157c478bd9Sstevel@tonic-gate 	 * If asyncio has been disabled, then skip this request
19167c478bd9Sstevel@tonic-gate 	 */
19177c478bd9Sstevel@tonic-gate 	if (mi->mi_max_threads == 0) {
19187c478bd9Sstevel@tonic-gate 		mutex_exit(&mi->mi_async_lock);
19197c478bd9Sstevel@tonic-gate 
19207c478bd9Sstevel@tonic-gate 		VN_RELE(vp);
19217c478bd9Sstevel@tonic-gate 		crfree(cr);
19227c478bd9Sstevel@tonic-gate 		kmem_free(args, sizeof (*args));
19237c478bd9Sstevel@tonic-gate 		goto noasync;
19247c478bd9Sstevel@tonic-gate 	}
19257c478bd9Sstevel@tonic-gate 
19267c478bd9Sstevel@tonic-gate 	/*
19277c478bd9Sstevel@tonic-gate 	 * Link request structure into the async list and
19287c478bd9Sstevel@tonic-gate 	 * wakeup async thread to do the i/o.
19297c478bd9Sstevel@tonic-gate 	 */
19307c478bd9Sstevel@tonic-gate 	if (mi->mi_async_reqs[NFS4_READDIR] == NULL) {
19317c478bd9Sstevel@tonic-gate 		mi->mi_async_reqs[NFS4_READDIR] = args;
19327c478bd9Sstevel@tonic-gate 		mi->mi_async_tail[NFS4_READDIR] = args;
19337c478bd9Sstevel@tonic-gate 	} else {
19347c478bd9Sstevel@tonic-gate 		mi->mi_async_tail[NFS4_READDIR]->a_next = args;
19357c478bd9Sstevel@tonic-gate 		mi->mi_async_tail[NFS4_READDIR] = args;
19367c478bd9Sstevel@tonic-gate 	}
19377c478bd9Sstevel@tonic-gate 
19387c478bd9Sstevel@tonic-gate 	mutex_enter(&rp->r_statelock);
19397c478bd9Sstevel@tonic-gate 	rp->r_count++;
19407c478bd9Sstevel@tonic-gate 	mutex_exit(&rp->r_statelock);
19417c478bd9Sstevel@tonic-gate 
19427c478bd9Sstevel@tonic-gate 	if (mi->mi_io_kstats) {
19437c478bd9Sstevel@tonic-gate 		mutex_enter(&mi->mi_lock);
19447c478bd9Sstevel@tonic-gate 		kstat_waitq_enter(KSTAT_IO_PTR(mi->mi_io_kstats));
19457c478bd9Sstevel@tonic-gate 		mutex_exit(&mi->mi_lock);
19467c478bd9Sstevel@tonic-gate 	}
19477c478bd9Sstevel@tonic-gate 
19487c478bd9Sstevel@tonic-gate 	mi->mi_async_req_count++;
19497c478bd9Sstevel@tonic-gate 	ASSERT(mi->mi_async_req_count != 0);
19507c478bd9Sstevel@tonic-gate 	cv_signal(&mi->mi_async_reqs_cv);
19517c478bd9Sstevel@tonic-gate 	mutex_exit(&mi->mi_async_lock);
19527c478bd9Sstevel@tonic-gate 	return;
19537c478bd9Sstevel@tonic-gate 
19547c478bd9Sstevel@tonic-gate noasync:
19557c478bd9Sstevel@tonic-gate 	mutex_enter(&rp->r_statelock);
19567c478bd9Sstevel@tonic-gate 	rdc->entries = NULL;
19577c478bd9Sstevel@tonic-gate 	/*
19587c478bd9Sstevel@tonic-gate 	 * Indicate that no one is trying to fill this entry and
19597c478bd9Sstevel@tonic-gate 	 * it still needs to be filled.
19607c478bd9Sstevel@tonic-gate 	 */
19617c478bd9Sstevel@tonic-gate 	rdc->flags &= ~RDDIR;
19627c478bd9Sstevel@tonic-gate 	rdc->flags |= RDDIRREQ;
19637c478bd9Sstevel@tonic-gate 	rddir4_cache_rele(rp, rdc);
19647c478bd9Sstevel@tonic-gate 	mutex_exit(&rp->r_statelock);
19657c478bd9Sstevel@tonic-gate }
19667c478bd9Sstevel@tonic-gate 
19677c478bd9Sstevel@tonic-gate void
19687c478bd9Sstevel@tonic-gate nfs4_async_commit(vnode_t *vp, page_t *plist, offset3 offset, count3 count,
19697c478bd9Sstevel@tonic-gate 	cred_t *cr, void (*commit)(vnode_t *, page_t *, offset3, count3,
19707c478bd9Sstevel@tonic-gate 	cred_t *))
19717c478bd9Sstevel@tonic-gate {
19727c478bd9Sstevel@tonic-gate 	rnode4_t *rp;
19737c478bd9Sstevel@tonic-gate 	mntinfo4_t *mi;
19747c478bd9Sstevel@tonic-gate 	struct nfs4_async_reqs *args;
19757c478bd9Sstevel@tonic-gate 	page_t *pp;
19767c478bd9Sstevel@tonic-gate 
19777c478bd9Sstevel@tonic-gate 	rp = VTOR4(vp);
19787c478bd9Sstevel@tonic-gate 	mi = VTOMI4(vp);
19797c478bd9Sstevel@tonic-gate 
19807c478bd9Sstevel@tonic-gate 	/*
19817c478bd9Sstevel@tonic-gate 	 * If we can't allocate a request structure, do the commit
19827c478bd9Sstevel@tonic-gate 	 * operation synchronously in this thread's context.
19837c478bd9Sstevel@tonic-gate 	 */
19847c478bd9Sstevel@tonic-gate 	if ((args = kmem_alloc(sizeof (*args), KM_NOSLEEP)) == NULL)
19857c478bd9Sstevel@tonic-gate 		goto noasync;
19867c478bd9Sstevel@tonic-gate 
19877c478bd9Sstevel@tonic-gate 	args->a_next = NULL;
19887c478bd9Sstevel@tonic-gate #ifdef DEBUG
19897c478bd9Sstevel@tonic-gate 	args->a_queuer = curthread;
19907c478bd9Sstevel@tonic-gate #endif
19917c478bd9Sstevel@tonic-gate 	VN_HOLD(vp);
19927c478bd9Sstevel@tonic-gate 	args->a_vp = vp;
19937c478bd9Sstevel@tonic-gate 	ASSERT(cr != NULL);
19947c478bd9Sstevel@tonic-gate 	crhold(cr);
19957c478bd9Sstevel@tonic-gate 	args->a_cred = cr;
19967c478bd9Sstevel@tonic-gate 	args->a_io = NFS4_COMMIT;
19977c478bd9Sstevel@tonic-gate 	args->a_nfs4_commit = commit;
19987c478bd9Sstevel@tonic-gate 	args->a_nfs4_plist = plist;
19997c478bd9Sstevel@tonic-gate 	args->a_nfs4_offset = offset;
20007c478bd9Sstevel@tonic-gate 	args->a_nfs4_count = count;
20017c478bd9Sstevel@tonic-gate 
20027c478bd9Sstevel@tonic-gate 	mutex_enter(&mi->mi_async_lock);
20037c478bd9Sstevel@tonic-gate 
20047c478bd9Sstevel@tonic-gate 	/*
20057c478bd9Sstevel@tonic-gate 	 * If asyncio has been disabled, then make a synchronous request.
20067c478bd9Sstevel@tonic-gate 	 * This check is done a second time in case async io was diabled
20077c478bd9Sstevel@tonic-gate 	 * while this thread was blocked waiting for memory pressure to
20087c478bd9Sstevel@tonic-gate 	 * reduce or for the queue to drain.
20097c478bd9Sstevel@tonic-gate 	 */
20107c478bd9Sstevel@tonic-gate 	if (mi->mi_max_threads == 0) {
20117c478bd9Sstevel@tonic-gate 		mutex_exit(&mi->mi_async_lock);
20127c478bd9Sstevel@tonic-gate 
20137c478bd9Sstevel@tonic-gate 		VN_RELE(vp);
20147c478bd9Sstevel@tonic-gate 		crfree(cr);
20157c478bd9Sstevel@tonic-gate 		kmem_free(args, sizeof (*args));
20167c478bd9Sstevel@tonic-gate 		goto noasync;
20177c478bd9Sstevel@tonic-gate 	}
20187c478bd9Sstevel@tonic-gate 
20197c478bd9Sstevel@tonic-gate 	/*
20207c478bd9Sstevel@tonic-gate 	 * Link request structure into the async list and
20217c478bd9Sstevel@tonic-gate 	 * wakeup async thread to do the i/o.
20227c478bd9Sstevel@tonic-gate 	 */
20237c478bd9Sstevel@tonic-gate 	if (mi->mi_async_reqs[NFS4_COMMIT] == NULL) {
20247c478bd9Sstevel@tonic-gate 		mi->mi_async_reqs[NFS4_COMMIT] = args;
20257c478bd9Sstevel@tonic-gate 		mi->mi_async_tail[NFS4_COMMIT] = args;
20267c478bd9Sstevel@tonic-gate 	} else {
20277c478bd9Sstevel@tonic-gate 		mi->mi_async_tail[NFS4_COMMIT]->a_next = args;
20287c478bd9Sstevel@tonic-gate 		mi->mi_async_tail[NFS4_COMMIT] = args;
20297c478bd9Sstevel@tonic-gate 	}
20307c478bd9Sstevel@tonic-gate 
20317c478bd9Sstevel@tonic-gate 	mutex_enter(&rp->r_statelock);
20327c478bd9Sstevel@tonic-gate 	rp->r_count++;
20337c478bd9Sstevel@tonic-gate 	mutex_exit(&rp->r_statelock);
20347c478bd9Sstevel@tonic-gate 
20357c478bd9Sstevel@tonic-gate 	if (mi->mi_io_kstats) {
20367c478bd9Sstevel@tonic-gate 		mutex_enter(&mi->mi_lock);
20377c478bd9Sstevel@tonic-gate 		kstat_waitq_enter(KSTAT_IO_PTR(mi->mi_io_kstats));
20387c478bd9Sstevel@tonic-gate 		mutex_exit(&mi->mi_lock);
20397c478bd9Sstevel@tonic-gate 	}
20407c478bd9Sstevel@tonic-gate 
20417c478bd9Sstevel@tonic-gate 	mi->mi_async_req_count++;
20427c478bd9Sstevel@tonic-gate 	ASSERT(mi->mi_async_req_count != 0);
20437c478bd9Sstevel@tonic-gate 	cv_signal(&mi->mi_async_reqs_cv);
20447c478bd9Sstevel@tonic-gate 	mutex_exit(&mi->mi_async_lock);
20457c478bd9Sstevel@tonic-gate 	return;
20467c478bd9Sstevel@tonic-gate 
20477c478bd9Sstevel@tonic-gate noasync:
20487c478bd9Sstevel@tonic-gate 	if (curproc == proc_pageout || curproc == proc_fsflush ||
2049108322fbScarlsonj 	    nfs_zone() != mi->mi_zone) {
20507c478bd9Sstevel@tonic-gate 		while (plist != NULL) {
20517c478bd9Sstevel@tonic-gate 			pp = plist;
20527c478bd9Sstevel@tonic-gate 			page_sub(&plist, pp);
20537c478bd9Sstevel@tonic-gate 			pp->p_fsdata = C_COMMIT;
20547c478bd9Sstevel@tonic-gate 			page_unlock(pp);
20557c478bd9Sstevel@tonic-gate 		}
20567c478bd9Sstevel@tonic-gate 		return;
20577c478bd9Sstevel@tonic-gate 	}
20587c478bd9Sstevel@tonic-gate 	(*commit)(vp, plist, offset, count, cr);
20597c478bd9Sstevel@tonic-gate }
20607c478bd9Sstevel@tonic-gate 
20617c478bd9Sstevel@tonic-gate /*
20627c478bd9Sstevel@tonic-gate  * nfs4_async_inactive - hand off a VOP_INACTIVE call to a thread.  The
20637c478bd9Sstevel@tonic-gate  * reference to the vnode is handed over to the thread; the caller should
20647c478bd9Sstevel@tonic-gate  * no longer refer to the vnode.
20657c478bd9Sstevel@tonic-gate  *
20667c478bd9Sstevel@tonic-gate  * Unlike most of the async routines, this handoff is needed for
20677c478bd9Sstevel@tonic-gate  * correctness reasons, not just performance.  So doing operations in the
20687c478bd9Sstevel@tonic-gate  * context of the current thread is not an option.
20697c478bd9Sstevel@tonic-gate  */
20707c478bd9Sstevel@tonic-gate void
20717c478bd9Sstevel@tonic-gate nfs4_async_inactive(vnode_t *vp, cred_t *cr)
20727c478bd9Sstevel@tonic-gate {
20737c478bd9Sstevel@tonic-gate 	mntinfo4_t *mi;
20747c478bd9Sstevel@tonic-gate 	struct nfs4_async_reqs *args;
20757c478bd9Sstevel@tonic-gate 	boolean_t signal_inactive_thread = B_FALSE;
20767c478bd9Sstevel@tonic-gate 
20777c478bd9Sstevel@tonic-gate 	mi = VTOMI4(vp);
20787c478bd9Sstevel@tonic-gate 
20797c478bd9Sstevel@tonic-gate 	args = kmem_alloc(sizeof (*args), KM_SLEEP);
20807c478bd9Sstevel@tonic-gate 	args->a_next = NULL;
20817c478bd9Sstevel@tonic-gate #ifdef DEBUG
20827c478bd9Sstevel@tonic-gate 	args->a_queuer = curthread;
20837c478bd9Sstevel@tonic-gate #endif
20847c478bd9Sstevel@tonic-gate 	args->a_vp = vp;
20857c478bd9Sstevel@tonic-gate 	ASSERT(cr != NULL);
20867c478bd9Sstevel@tonic-gate 	crhold(cr);
20877c478bd9Sstevel@tonic-gate 	args->a_cred = cr;
20887c478bd9Sstevel@tonic-gate 	args->a_io = NFS4_INACTIVE;
20897c478bd9Sstevel@tonic-gate 
20907c478bd9Sstevel@tonic-gate 	/*
20917c478bd9Sstevel@tonic-gate 	 * Note that we don't check mi->mi_max_threads here, since we
20927c478bd9Sstevel@tonic-gate 	 * *need* to get rid of this vnode regardless of whether someone
20937c478bd9Sstevel@tonic-gate 	 * set nfs4_max_threads to zero in /etc/system.
20947c478bd9Sstevel@tonic-gate 	 *
20957c478bd9Sstevel@tonic-gate 	 * The manager thread knows about this and is willing to create
20967c478bd9Sstevel@tonic-gate 	 * at least one thread to accomodate us.
20977c478bd9Sstevel@tonic-gate 	 */
20987c478bd9Sstevel@tonic-gate 	mutex_enter(&mi->mi_async_lock);
20997c478bd9Sstevel@tonic-gate 	if (mi->mi_inactive_thread == NULL) {
21007c478bd9Sstevel@tonic-gate 		rnode4_t *rp;
21017c478bd9Sstevel@tonic-gate 		vnode_t *unldvp = NULL;
21027c478bd9Sstevel@tonic-gate 		char *unlname;
21037c478bd9Sstevel@tonic-gate 		cred_t *unlcred;
21047c478bd9Sstevel@tonic-gate 
21057c478bd9Sstevel@tonic-gate 		mutex_exit(&mi->mi_async_lock);
21067c478bd9Sstevel@tonic-gate 		/*
21077c478bd9Sstevel@tonic-gate 		 * We just need to free up the memory associated with the
21087c478bd9Sstevel@tonic-gate 		 * vnode, which can be safely done from within the current
21097c478bd9Sstevel@tonic-gate 		 * context.
21107c478bd9Sstevel@tonic-gate 		 */
21117c478bd9Sstevel@tonic-gate 		crfree(cr);	/* drop our reference */
21127c478bd9Sstevel@tonic-gate 		kmem_free(args, sizeof (*args));
21137c478bd9Sstevel@tonic-gate 		rp = VTOR4(vp);
21147c478bd9Sstevel@tonic-gate 		mutex_enter(&rp->r_statelock);
21157c478bd9Sstevel@tonic-gate 		if (rp->r_unldvp != NULL) {
21167c478bd9Sstevel@tonic-gate 			unldvp = rp->r_unldvp;
21177c478bd9Sstevel@tonic-gate 			rp->r_unldvp = NULL;
21187c478bd9Sstevel@tonic-gate 			unlname = rp->r_unlname;
21197c478bd9Sstevel@tonic-gate 			rp->r_unlname = NULL;
21207c478bd9Sstevel@tonic-gate 			unlcred = rp->r_unlcred;
21217c478bd9Sstevel@tonic-gate 			rp->r_unlcred = NULL;
21227c478bd9Sstevel@tonic-gate 		}
21237c478bd9Sstevel@tonic-gate 		mutex_exit(&rp->r_statelock);
21247c478bd9Sstevel@tonic-gate 		/*
21257c478bd9Sstevel@tonic-gate 		 * No need to explicitly throw away any cached pages.  The
21267c478bd9Sstevel@tonic-gate 		 * eventual r4inactive() will attempt a synchronous
21277c478bd9Sstevel@tonic-gate 		 * VOP_PUTPAGE() which will immediately fail since the request
21287c478bd9Sstevel@tonic-gate 		 * is coming from the wrong zone, and then will proceed to call
21297c478bd9Sstevel@tonic-gate 		 * nfs4_invalidate_pages() which will clean things up for us.
21307c478bd9Sstevel@tonic-gate 		 *
21317c478bd9Sstevel@tonic-gate 		 * Throw away the delegation here so rp4_addfree()'s attempt to
21327c478bd9Sstevel@tonic-gate 		 * return any existing delegations becomes a no-op.
21337c478bd9Sstevel@tonic-gate 		 */
213450a83466Sjwahlig 		if (rp->r_deleg_type != OPEN_DELEGATE_NONE) {
213550a83466Sjwahlig 			(void) nfs_rw_enter_sig(&mi->mi_recovlock, RW_READER,
213650a83466Sjwahlig 				FALSE);
21377c478bd9Sstevel@tonic-gate 			(void) nfs4delegreturn(rp, NFS4_DR_DISCARD);
213850a83466Sjwahlig 			nfs_rw_exit(&mi->mi_recovlock);
213950a83466Sjwahlig 		}
21407c478bd9Sstevel@tonic-gate 		nfs4_clear_open_streams(rp);
21417c478bd9Sstevel@tonic-gate 
21427c478bd9Sstevel@tonic-gate 		rp4_addfree(rp, cr);
21437c478bd9Sstevel@tonic-gate 		if (unldvp != NULL) {
21447c478bd9Sstevel@tonic-gate 			kmem_free(unlname, MAXNAMELEN);
21457c478bd9Sstevel@tonic-gate 			VN_RELE(unldvp);
21467c478bd9Sstevel@tonic-gate 			crfree(unlcred);
21477c478bd9Sstevel@tonic-gate 		}
21487c478bd9Sstevel@tonic-gate 		return;
21497c478bd9Sstevel@tonic-gate 	}
21507c478bd9Sstevel@tonic-gate 
21517c478bd9Sstevel@tonic-gate 	if (mi->mi_manager_thread == NULL) {
21527c478bd9Sstevel@tonic-gate 		/*
21537c478bd9Sstevel@tonic-gate 		 * We want to talk to the inactive thread.
21547c478bd9Sstevel@tonic-gate 		 */
21557c478bd9Sstevel@tonic-gate 		signal_inactive_thread = B_TRUE;
21567c478bd9Sstevel@tonic-gate 	}
21577c478bd9Sstevel@tonic-gate 
21587c478bd9Sstevel@tonic-gate 	/*
21597c478bd9Sstevel@tonic-gate 	 * Enqueue the vnode and wake up either the special thread (empty
21607c478bd9Sstevel@tonic-gate 	 * list) or an async thread.
21617c478bd9Sstevel@tonic-gate 	 */
21627c478bd9Sstevel@tonic-gate 	if (mi->mi_async_reqs[NFS4_INACTIVE] == NULL) {
21637c478bd9Sstevel@tonic-gate 		mi->mi_async_reqs[NFS4_INACTIVE] = args;
21647c478bd9Sstevel@tonic-gate 		mi->mi_async_tail[NFS4_INACTIVE] = args;
21657c478bd9Sstevel@tonic-gate 		signal_inactive_thread = B_TRUE;
21667c478bd9Sstevel@tonic-gate 	} else {
21677c478bd9Sstevel@tonic-gate 		mi->mi_async_tail[NFS4_INACTIVE]->a_next = args;
21687c478bd9Sstevel@tonic-gate 		mi->mi_async_tail[NFS4_INACTIVE] = args;
21697c478bd9Sstevel@tonic-gate 	}
21707c478bd9Sstevel@tonic-gate 	if (signal_inactive_thread) {
21717c478bd9Sstevel@tonic-gate 		cv_signal(&mi->mi_inact_req_cv);
21727c478bd9Sstevel@tonic-gate 	} else  {
21737c478bd9Sstevel@tonic-gate 		mi->mi_async_req_count++;
21747c478bd9Sstevel@tonic-gate 		ASSERT(mi->mi_async_req_count != 0);
21757c478bd9Sstevel@tonic-gate 		cv_signal(&mi->mi_async_reqs_cv);
21767c478bd9Sstevel@tonic-gate 	}
21777c478bd9Sstevel@tonic-gate 
21787c478bd9Sstevel@tonic-gate 	mutex_exit(&mi->mi_async_lock);
21797c478bd9Sstevel@tonic-gate }
21807c478bd9Sstevel@tonic-gate 
21817c478bd9Sstevel@tonic-gate int
21827c478bd9Sstevel@tonic-gate writerp4(rnode4_t *rp, caddr_t base, int tcount, struct uio *uio, int pgcreated)
21837c478bd9Sstevel@tonic-gate {
21847c478bd9Sstevel@tonic-gate 	int pagecreate;
21857c478bd9Sstevel@tonic-gate 	int n;
21867c478bd9Sstevel@tonic-gate 	int saved_n;
21877c478bd9Sstevel@tonic-gate 	caddr_t saved_base;
21887c478bd9Sstevel@tonic-gate 	u_offset_t offset;
21897c478bd9Sstevel@tonic-gate 	int error;
21907c478bd9Sstevel@tonic-gate 	int sm_error;
2191a5652762Spraks 	vnode_t *vp = RTOV(rp);
21927c478bd9Sstevel@tonic-gate 
21937c478bd9Sstevel@tonic-gate 	ASSERT(tcount <= MAXBSIZE && tcount <= uio->uio_resid);
21947c478bd9Sstevel@tonic-gate 	ASSERT(nfs_rw_lock_held(&rp->r_rwlock, RW_WRITER));
2195a5652762Spraks 	if (!vpm_enable) {
2196a5652762Spraks 		ASSERT(((uintptr_t)base & MAXBOFFSET) + tcount <= MAXBSIZE);
2197a5652762Spraks 	}
21987c478bd9Sstevel@tonic-gate 
21997c478bd9Sstevel@tonic-gate 	/*
22007c478bd9Sstevel@tonic-gate 	 * Move bytes in at most PAGESIZE chunks. We must avoid
22017c478bd9Sstevel@tonic-gate 	 * spanning pages in uiomove() because page faults may cause
22027c478bd9Sstevel@tonic-gate 	 * the cache to be invalidated out from under us. The r_size is not
22037c478bd9Sstevel@tonic-gate 	 * updated until after the uiomove. If we push the last page of a
22047c478bd9Sstevel@tonic-gate 	 * file before r_size is correct, we will lose the data written past
22057c478bd9Sstevel@tonic-gate 	 * the current (and invalid) r_size.
22067c478bd9Sstevel@tonic-gate 	 */
22077c478bd9Sstevel@tonic-gate 	do {
22087c478bd9Sstevel@tonic-gate 		offset = uio->uio_loffset;
22097c478bd9Sstevel@tonic-gate 		pagecreate = 0;
22107c478bd9Sstevel@tonic-gate 
22117c478bd9Sstevel@tonic-gate 		/*
22127c478bd9Sstevel@tonic-gate 		 * n is the number of bytes required to satisfy the request
22137c478bd9Sstevel@tonic-gate 		 *   or the number of bytes to fill out the page.
22147c478bd9Sstevel@tonic-gate 		 */
2215a5652762Spraks 		n = (int)MIN((PAGESIZE - (offset & PAGEOFFSET)), tcount);
22167c478bd9Sstevel@tonic-gate 
22177c478bd9Sstevel@tonic-gate 		/*
22187c478bd9Sstevel@tonic-gate 		 * Check to see if we can skip reading in the page
22197c478bd9Sstevel@tonic-gate 		 * and just allocate the memory.  We can do this
22207c478bd9Sstevel@tonic-gate 		 * if we are going to rewrite the entire mapping
22217c478bd9Sstevel@tonic-gate 		 * or if we are going to write to or beyond the current
22227c478bd9Sstevel@tonic-gate 		 * end of file from the beginning of the mapping.
22237c478bd9Sstevel@tonic-gate 		 *
22247c478bd9Sstevel@tonic-gate 		 * The read of r_size is now protected by r_statelock.
22257c478bd9Sstevel@tonic-gate 		 */
22267c478bd9Sstevel@tonic-gate 		mutex_enter(&rp->r_statelock);
22277c478bd9Sstevel@tonic-gate 		/*
22287c478bd9Sstevel@tonic-gate 		 * When pgcreated is nonzero the caller has already done
22297c478bd9Sstevel@tonic-gate 		 * a segmap_getmapflt with forcefault 0 and S_WRITE. With
22307c478bd9Sstevel@tonic-gate 		 * segkpm this means we already have at least one page
22317c478bd9Sstevel@tonic-gate 		 * created and mapped at base.
22327c478bd9Sstevel@tonic-gate 		 */
22337c478bd9Sstevel@tonic-gate 		pagecreate = pgcreated ||
2234a5652762Spraks 			((offset & PAGEOFFSET) == 0 &&
22357c478bd9Sstevel@tonic-gate 			(n == PAGESIZE || ((offset + n) >= rp->r_size)));
22367c478bd9Sstevel@tonic-gate 
22377c478bd9Sstevel@tonic-gate 		mutex_exit(&rp->r_statelock);
22387c478bd9Sstevel@tonic-gate 
2239a5652762Spraks 		if (!vpm_enable && pagecreate) {
22407c478bd9Sstevel@tonic-gate 			/*
22417c478bd9Sstevel@tonic-gate 			 * The last argument tells segmap_pagecreate() to
22427c478bd9Sstevel@tonic-gate 			 * always lock the page, as opposed to sometimes
22437c478bd9Sstevel@tonic-gate 			 * returning with the page locked. This way we avoid a
22447c478bd9Sstevel@tonic-gate 			 * fault on the ensuing uiomove(), but also
22457c478bd9Sstevel@tonic-gate 			 * more importantly (to fix bug 1094402) we can
22467c478bd9Sstevel@tonic-gate 			 * call segmap_fault() to unlock the page in all
22477c478bd9Sstevel@tonic-gate 			 * cases. An alternative would be to modify
22487c478bd9Sstevel@tonic-gate 			 * segmap_pagecreate() to tell us when it is
22497c478bd9Sstevel@tonic-gate 			 * locking a page, but that's a fairly major
22507c478bd9Sstevel@tonic-gate 			 * interface change.
22517c478bd9Sstevel@tonic-gate 			 */
22527c478bd9Sstevel@tonic-gate 			if (pgcreated == 0)
22537c478bd9Sstevel@tonic-gate 				(void) segmap_pagecreate(segkmap, base,
22547c478bd9Sstevel@tonic-gate 							(uint_t)n, 1);
22557c478bd9Sstevel@tonic-gate 			saved_base = base;
22567c478bd9Sstevel@tonic-gate 			saved_n = n;
22577c478bd9Sstevel@tonic-gate 		}
22587c478bd9Sstevel@tonic-gate 
22597c478bd9Sstevel@tonic-gate 		/*
22607c478bd9Sstevel@tonic-gate 		 * The number of bytes of data in the last page can not
22617c478bd9Sstevel@tonic-gate 		 * be accurately be determined while page is being
22627c478bd9Sstevel@tonic-gate 		 * uiomove'd to and the size of the file being updated.
22637c478bd9Sstevel@tonic-gate 		 * Thus, inform threads which need to know accurately
22647c478bd9Sstevel@tonic-gate 		 * how much data is in the last page of the file.  They
22657c478bd9Sstevel@tonic-gate 		 * will not do the i/o immediately, but will arrange for
22667c478bd9Sstevel@tonic-gate 		 * the i/o to happen later when this modify operation
22677c478bd9Sstevel@tonic-gate 		 * will have finished.
22687c478bd9Sstevel@tonic-gate 		 */
22697c478bd9Sstevel@tonic-gate 		ASSERT(!(rp->r_flags & R4MODINPROGRESS));
22707c478bd9Sstevel@tonic-gate 		mutex_enter(&rp->r_statelock);
22717c478bd9Sstevel@tonic-gate 		rp->r_flags |= R4MODINPROGRESS;
22727c478bd9Sstevel@tonic-gate 		rp->r_modaddr = (offset & MAXBMASK);
22737c478bd9Sstevel@tonic-gate 		mutex_exit(&rp->r_statelock);
22747c478bd9Sstevel@tonic-gate 
2275a5652762Spraks 		if (vpm_enable) {
2276a5652762Spraks 			/*
2277a5652762Spraks 			 * Copy data. If new pages are created, part of
2278a5652762Spraks 			 * the page that is not written will be initizliazed
2279a5652762Spraks 			 * with zeros.
2280a5652762Spraks 			 */
2281a5652762Spraks 			error = vpm_data_copy(vp, offset, n, uio,
2282a5652762Spraks 				!pagecreate, NULL, 0, S_WRITE);
2283a5652762Spraks 		} else {
2284a5652762Spraks 			error = uiomove(base, n, UIO_WRITE, uio);
2285a5652762Spraks 		}
22867c478bd9Sstevel@tonic-gate 
22877c478bd9Sstevel@tonic-gate 		/*
22887c478bd9Sstevel@tonic-gate 		 * r_size is the maximum number of
22897c478bd9Sstevel@tonic-gate 		 * bytes known to be in the file.
22907c478bd9Sstevel@tonic-gate 		 * Make sure it is at least as high as the
22917c478bd9Sstevel@tonic-gate 		 * first unwritten byte pointed to by uio_loffset.
22927c478bd9Sstevel@tonic-gate 		 */
22937c478bd9Sstevel@tonic-gate 		mutex_enter(&rp->r_statelock);
22947c478bd9Sstevel@tonic-gate 		if (rp->r_size < uio->uio_loffset)
22957c478bd9Sstevel@tonic-gate 			rp->r_size = uio->uio_loffset;
22967c478bd9Sstevel@tonic-gate 		rp->r_flags &= ~R4MODINPROGRESS;
22977c478bd9Sstevel@tonic-gate 		rp->r_flags |= R4DIRTY;
22987c478bd9Sstevel@tonic-gate 		mutex_exit(&rp->r_statelock);
22997c478bd9Sstevel@tonic-gate 
23007c478bd9Sstevel@tonic-gate 		/* n = # of bytes written */
23017c478bd9Sstevel@tonic-gate 		n = (int)(uio->uio_loffset - offset);
2302a5652762Spraks 
2303a5652762Spraks 		if (!vpm_enable) {
2304a5652762Spraks 			base += n;
2305a5652762Spraks 		}
2306a5652762Spraks 
23077c478bd9Sstevel@tonic-gate 		tcount -= n;
23087c478bd9Sstevel@tonic-gate 		/*
23097c478bd9Sstevel@tonic-gate 		 * If we created pages w/o initializing them completely,
23107c478bd9Sstevel@tonic-gate 		 * we need to zero the part that wasn't set up.
23117c478bd9Sstevel@tonic-gate 		 * This happens on a most EOF write cases and if
23127c478bd9Sstevel@tonic-gate 		 * we had some sort of error during the uiomove.
23137c478bd9Sstevel@tonic-gate 		 */
2314a5652762Spraks 		if (!vpm_enable && pagecreate) {
23157c478bd9Sstevel@tonic-gate 			if ((uio->uio_loffset & PAGEOFFSET) || n == 0)
23167c478bd9Sstevel@tonic-gate 				(void) kzero(base, PAGESIZE - n);
23177c478bd9Sstevel@tonic-gate 
23187c478bd9Sstevel@tonic-gate 			if (pgcreated) {
23197c478bd9Sstevel@tonic-gate 				/*
23207c478bd9Sstevel@tonic-gate 				 * Caller is responsible for this page,
23217c478bd9Sstevel@tonic-gate 				 * it was not created in this loop.
23227c478bd9Sstevel@tonic-gate 				 */
23237c478bd9Sstevel@tonic-gate 				pgcreated = 0;
23247c478bd9Sstevel@tonic-gate 			} else {
23257c478bd9Sstevel@tonic-gate 				/*
23267c478bd9Sstevel@tonic-gate 				 * For bug 1094402: segmap_pagecreate locks
23277c478bd9Sstevel@tonic-gate 				 * page. Unlock it. This also unlocks the
23287c478bd9Sstevel@tonic-gate 				 * pages allocated by page_create_va() in
23297c478bd9Sstevel@tonic-gate 				 * segmap_pagecreate().
23307c478bd9Sstevel@tonic-gate 				 */
23317c478bd9Sstevel@tonic-gate 				sm_error = segmap_fault(kas.a_hat, segkmap,
2332a5652762Spraks 					saved_base, saved_n,
2333a5652762Spraks 					F_SOFTUNLOCK, S_WRITE);
23347c478bd9Sstevel@tonic-gate 				if (error == 0)
23357c478bd9Sstevel@tonic-gate 					error = sm_error;
23367c478bd9Sstevel@tonic-gate 			}
23377c478bd9Sstevel@tonic-gate 		}
23387c478bd9Sstevel@tonic-gate 	} while (tcount > 0 && error == 0);
23397c478bd9Sstevel@tonic-gate 
23407c478bd9Sstevel@tonic-gate 	return (error);
23417c478bd9Sstevel@tonic-gate }
23427c478bd9Sstevel@tonic-gate 
23437c478bd9Sstevel@tonic-gate int
23447c478bd9Sstevel@tonic-gate nfs4_putpages(vnode_t *vp, u_offset_t off, size_t len, int flags, cred_t *cr)
23457c478bd9Sstevel@tonic-gate {
23467c478bd9Sstevel@tonic-gate 	rnode4_t *rp;
23477c478bd9Sstevel@tonic-gate 	page_t *pp;
23487c478bd9Sstevel@tonic-gate 	u_offset_t eoff;
23497c478bd9Sstevel@tonic-gate 	u_offset_t io_off;
23507c478bd9Sstevel@tonic-gate 	size_t io_len;
23517c478bd9Sstevel@tonic-gate 	int error;
23527c478bd9Sstevel@tonic-gate 	int rdirty;
23537c478bd9Sstevel@tonic-gate 	int err;
23547c478bd9Sstevel@tonic-gate 
23557c478bd9Sstevel@tonic-gate 	rp = VTOR4(vp);
23567c478bd9Sstevel@tonic-gate 	ASSERT(rp->r_count > 0);
23577c478bd9Sstevel@tonic-gate 
23587c478bd9Sstevel@tonic-gate 	if (!nfs4_has_pages(vp))
23597c478bd9Sstevel@tonic-gate 		return (0);
23607c478bd9Sstevel@tonic-gate 
23617c478bd9Sstevel@tonic-gate 	ASSERT(vp->v_type != VCHR);
23627c478bd9Sstevel@tonic-gate 
23637c478bd9Sstevel@tonic-gate 	/*
23647c478bd9Sstevel@tonic-gate 	 * If R4OUTOFSPACE is set, then all writes turn into B_INVAL
23657c478bd9Sstevel@tonic-gate 	 * writes.  B_FORCE is set to force the VM system to actually
23667c478bd9Sstevel@tonic-gate 	 * invalidate the pages, even if the i/o failed.  The pages
23677c478bd9Sstevel@tonic-gate 	 * need to get invalidated because they can't be written out
23687c478bd9Sstevel@tonic-gate 	 * because there isn't any space left on either the server's
23697c478bd9Sstevel@tonic-gate 	 * file system or in the user's disk quota.  The B_FREE bit
23707c478bd9Sstevel@tonic-gate 	 * is cleared to avoid confusion as to whether this is a
23717c478bd9Sstevel@tonic-gate 	 * request to place the page on the freelist or to destroy
23727c478bd9Sstevel@tonic-gate 	 * it.
23737c478bd9Sstevel@tonic-gate 	 */
23747c478bd9Sstevel@tonic-gate 	if ((rp->r_flags & R4OUTOFSPACE) ||
23757c478bd9Sstevel@tonic-gate 	    (vp->v_vfsp->vfs_flag & VFS_UNMOUNTED))
23767c478bd9Sstevel@tonic-gate 		flags = (flags & ~B_FREE) | B_INVAL | B_FORCE;
23777c478bd9Sstevel@tonic-gate 
23787c478bd9Sstevel@tonic-gate 	if (len == 0) {
23797c478bd9Sstevel@tonic-gate 		/*
23807c478bd9Sstevel@tonic-gate 		 * If doing a full file synchronous operation, then clear
23817c478bd9Sstevel@tonic-gate 		 * the R4DIRTY bit.  If a page gets dirtied while the flush
23827c478bd9Sstevel@tonic-gate 		 * is happening, then R4DIRTY will get set again.  The
23837c478bd9Sstevel@tonic-gate 		 * R4DIRTY bit must get cleared before the flush so that
23847c478bd9Sstevel@tonic-gate 		 * we don't lose this information.
238584d68d8eSthurlow 		 *
238684d68d8eSthurlow 		 * If there are no full file async write operations
238784d68d8eSthurlow 		 * pending and RDIRTY bit is set, clear it.
23887c478bd9Sstevel@tonic-gate 		 */
23897c478bd9Sstevel@tonic-gate 		if (off == (u_offset_t)0 &&
23907c478bd9Sstevel@tonic-gate 		    !(flags & B_ASYNC) &&
23917c478bd9Sstevel@tonic-gate 		    (rp->r_flags & R4DIRTY)) {
23927c478bd9Sstevel@tonic-gate 			mutex_enter(&rp->r_statelock);
23937c478bd9Sstevel@tonic-gate 			rdirty = (rp->r_flags & R4DIRTY);
23947c478bd9Sstevel@tonic-gate 			rp->r_flags &= ~R4DIRTY;
23957c478bd9Sstevel@tonic-gate 			mutex_exit(&rp->r_statelock);
239684d68d8eSthurlow 		} else if (flags & B_ASYNC && off == (u_offset_t)0) {
239784d68d8eSthurlow 			mutex_enter(&rp->r_statelock);
239884d68d8eSthurlow 			if (rp->r_flags & R4DIRTY && rp->r_awcount == 0) {
239984d68d8eSthurlow 				rdirty = (rp->r_flags & R4DIRTY);
240084d68d8eSthurlow 				rp->r_flags &= ~R4DIRTY;
240184d68d8eSthurlow 			}
240284d68d8eSthurlow 			mutex_exit(&rp->r_statelock);
24037c478bd9Sstevel@tonic-gate 		} else
24047c478bd9Sstevel@tonic-gate 			rdirty = 0;
24057c478bd9Sstevel@tonic-gate 
24067c478bd9Sstevel@tonic-gate 		/*
24077c478bd9Sstevel@tonic-gate 		 * Search the entire vp list for pages >= off, and flush
24087c478bd9Sstevel@tonic-gate 		 * the dirty pages.
24097c478bd9Sstevel@tonic-gate 		 */
24107c478bd9Sstevel@tonic-gate 		error = pvn_vplist_dirty(vp, off, rp->r_putapage,
24117c478bd9Sstevel@tonic-gate 					flags, cr);
24127c478bd9Sstevel@tonic-gate 
24137c478bd9Sstevel@tonic-gate 		/*
24147c478bd9Sstevel@tonic-gate 		 * If an error occured and the file was marked as dirty
24157c478bd9Sstevel@tonic-gate 		 * before and we aren't forcibly invalidating pages, then
24167c478bd9Sstevel@tonic-gate 		 * reset the R4DIRTY flag.
24177c478bd9Sstevel@tonic-gate 		 */
24187c478bd9Sstevel@tonic-gate 		if (error && rdirty &&
24197c478bd9Sstevel@tonic-gate 		    (flags & (B_INVAL | B_FORCE)) != (B_INVAL | B_FORCE)) {
24207c478bd9Sstevel@tonic-gate 			mutex_enter(&rp->r_statelock);
24217c478bd9Sstevel@tonic-gate 			rp->r_flags |= R4DIRTY;
24227c478bd9Sstevel@tonic-gate 			mutex_exit(&rp->r_statelock);
24237c478bd9Sstevel@tonic-gate 		}
24247c478bd9Sstevel@tonic-gate 	} else {
24257c478bd9Sstevel@tonic-gate 		/*
24267c478bd9Sstevel@tonic-gate 		 * Do a range from [off...off + len) looking for pages
24277c478bd9Sstevel@tonic-gate 		 * to deal with.
24287c478bd9Sstevel@tonic-gate 		 */
24297c478bd9Sstevel@tonic-gate 		error = 0;
24307c478bd9Sstevel@tonic-gate 		io_len = 0;
24317c478bd9Sstevel@tonic-gate 		eoff = off + len;
24327c478bd9Sstevel@tonic-gate 		mutex_enter(&rp->r_statelock);
24337c478bd9Sstevel@tonic-gate 		for (io_off = off; io_off < eoff && io_off < rp->r_size;
24347c478bd9Sstevel@tonic-gate 		    io_off += io_len) {
24357c478bd9Sstevel@tonic-gate 			mutex_exit(&rp->r_statelock);
24367c478bd9Sstevel@tonic-gate 			/*
24377c478bd9Sstevel@tonic-gate 			 * If we are not invalidating, synchronously
24387c478bd9Sstevel@tonic-gate 			 * freeing or writing pages use the routine
24397c478bd9Sstevel@tonic-gate 			 * page_lookup_nowait() to prevent reclaiming
24407c478bd9Sstevel@tonic-gate 			 * them from the free list.
24417c478bd9Sstevel@tonic-gate 			 */
24427c478bd9Sstevel@tonic-gate 			if ((flags & B_INVAL) || !(flags & B_ASYNC)) {
24437c478bd9Sstevel@tonic-gate 				pp = page_lookup(vp, io_off,
24447c478bd9Sstevel@tonic-gate 				    (flags & (B_INVAL | B_FREE)) ?
24457c478bd9Sstevel@tonic-gate 				    SE_EXCL : SE_SHARED);
24467c478bd9Sstevel@tonic-gate 			} else {
24477c478bd9Sstevel@tonic-gate 				pp = page_lookup_nowait(vp, io_off,
24487c478bd9Sstevel@tonic-gate 				    (flags & B_FREE) ? SE_EXCL : SE_SHARED);
24497c478bd9Sstevel@tonic-gate 			}
24507c478bd9Sstevel@tonic-gate 
24517c478bd9Sstevel@tonic-gate 			if (pp == NULL || !pvn_getdirty(pp, flags))
24527c478bd9Sstevel@tonic-gate 				io_len = PAGESIZE;
24537c478bd9Sstevel@tonic-gate 			else {
24547c478bd9Sstevel@tonic-gate 				err = (*rp->r_putapage)(vp, pp, &io_off,
24557c478bd9Sstevel@tonic-gate 				    &io_len, flags, cr);
24567c478bd9Sstevel@tonic-gate 				if (!error)
24577c478bd9Sstevel@tonic-gate 					error = err;
24587c478bd9Sstevel@tonic-gate 				/*
24597c478bd9Sstevel@tonic-gate 				 * "io_off" and "io_len" are returned as
24607c478bd9Sstevel@tonic-gate 				 * the range of pages we actually wrote.
24617c478bd9Sstevel@tonic-gate 				 * This allows us to skip ahead more quickly
24627c478bd9Sstevel@tonic-gate 				 * since several pages may've been dealt
24637c478bd9Sstevel@tonic-gate 				 * with by this iteration of the loop.
24647c478bd9Sstevel@tonic-gate 				 */
24657c478bd9Sstevel@tonic-gate 			}
24667c478bd9Sstevel@tonic-gate 			mutex_enter(&rp->r_statelock);
24677c478bd9Sstevel@tonic-gate 		}
24687c478bd9Sstevel@tonic-gate 		mutex_exit(&rp->r_statelock);
24697c478bd9Sstevel@tonic-gate 	}
24707c478bd9Sstevel@tonic-gate 
24717c478bd9Sstevel@tonic-gate 	return (error);
24727c478bd9Sstevel@tonic-gate }
24737c478bd9Sstevel@tonic-gate 
24747c478bd9Sstevel@tonic-gate void
24757c478bd9Sstevel@tonic-gate nfs4_invalidate_pages(vnode_t *vp, u_offset_t off, cred_t *cr)
24767c478bd9Sstevel@tonic-gate {
24777c478bd9Sstevel@tonic-gate 	rnode4_t *rp;
24787c478bd9Sstevel@tonic-gate 
24797c478bd9Sstevel@tonic-gate 	rp = VTOR4(vp);
24807c478bd9Sstevel@tonic-gate 	if (IS_SHADOW(vp, rp))
24817c478bd9Sstevel@tonic-gate 		vp = RTOV4(rp);
24827c478bd9Sstevel@tonic-gate 	mutex_enter(&rp->r_statelock);
24837c478bd9Sstevel@tonic-gate 	while (rp->r_flags & R4TRUNCATE)
24847c478bd9Sstevel@tonic-gate 		cv_wait(&rp->r_cv, &rp->r_statelock);
24857c478bd9Sstevel@tonic-gate 	rp->r_flags |= R4TRUNCATE;
24867c478bd9Sstevel@tonic-gate 	if (off == (u_offset_t)0) {
24877c478bd9Sstevel@tonic-gate 		rp->r_flags &= ~R4DIRTY;
24887c478bd9Sstevel@tonic-gate 		if (!(rp->r_flags & R4STALE))
24897c478bd9Sstevel@tonic-gate 			rp->r_error = 0;
24907c478bd9Sstevel@tonic-gate 	}
24917c478bd9Sstevel@tonic-gate 	rp->r_truncaddr = off;
24927c478bd9Sstevel@tonic-gate 	mutex_exit(&rp->r_statelock);
24937c478bd9Sstevel@tonic-gate 	(void) pvn_vplist_dirty(vp, off, rp->r_putapage,
24947c478bd9Sstevel@tonic-gate 		B_INVAL | B_TRUNC, cr);
24957c478bd9Sstevel@tonic-gate 	mutex_enter(&rp->r_statelock);
24967c478bd9Sstevel@tonic-gate 	rp->r_flags &= ~R4TRUNCATE;
24977c478bd9Sstevel@tonic-gate 	cv_broadcast(&rp->r_cv);
24987c478bd9Sstevel@tonic-gate 	mutex_exit(&rp->r_statelock);
24997c478bd9Sstevel@tonic-gate }
25007c478bd9Sstevel@tonic-gate 
25017c478bd9Sstevel@tonic-gate static int
25027c478bd9Sstevel@tonic-gate nfs4_mnt_kstat_update(kstat_t *ksp, int rw)
25037c478bd9Sstevel@tonic-gate {
25047c478bd9Sstevel@tonic-gate 	mntinfo4_t *mi;
25057c478bd9Sstevel@tonic-gate 	struct mntinfo_kstat *mik;
25067c478bd9Sstevel@tonic-gate 	vfs_t *vfsp;
25077c478bd9Sstevel@tonic-gate 
25087c478bd9Sstevel@tonic-gate 	/* this is a read-only kstat. Bail out on a write */
25097c478bd9Sstevel@tonic-gate 	if (rw == KSTAT_WRITE)
25107c478bd9Sstevel@tonic-gate 		return (EACCES);
25117c478bd9Sstevel@tonic-gate 
25127c478bd9Sstevel@tonic-gate 
25137c478bd9Sstevel@tonic-gate 	/*
25147c478bd9Sstevel@tonic-gate 	 * We don't want to wait here as kstat_chain_lock could be held by
25157c478bd9Sstevel@tonic-gate 	 * dounmount(). dounmount() takes vfs_reflock before the chain lock
25167c478bd9Sstevel@tonic-gate 	 * and thus could lead to a deadlock.
25177c478bd9Sstevel@tonic-gate 	 */
25187c478bd9Sstevel@tonic-gate 	vfsp = (struct vfs *)ksp->ks_private;
25197c478bd9Sstevel@tonic-gate 
25207c478bd9Sstevel@tonic-gate 	mi = VFTOMI4(vfsp);
25217c478bd9Sstevel@tonic-gate 	mik = (struct mntinfo_kstat *)ksp->ks_data;
25227c478bd9Sstevel@tonic-gate 
25237c478bd9Sstevel@tonic-gate 	(void) strcpy(mik->mik_proto, mi->mi_curr_serv->sv_knconf->knc_proto);
25247c478bd9Sstevel@tonic-gate 
25257c478bd9Sstevel@tonic-gate 	mik->mik_vers = (uint32_t)mi->mi_vers;
25267c478bd9Sstevel@tonic-gate 	mik->mik_flags = mi->mi_flags;
25277c478bd9Sstevel@tonic-gate 	/*
25287c478bd9Sstevel@tonic-gate 	 * The sv_secdata holds the flavor the client specifies.
25297c478bd9Sstevel@tonic-gate 	 * If the client uses default and a security negotiation
25307c478bd9Sstevel@tonic-gate 	 * occurs, sv_currsec will point to the current flavor
25317c478bd9Sstevel@tonic-gate 	 * selected from the server flavor list.
25327c478bd9Sstevel@tonic-gate 	 * sv_currsec is NULL if no security negotiation takes place.
25337c478bd9Sstevel@tonic-gate 	 */
25347c478bd9Sstevel@tonic-gate 	mik->mik_secmod = mi->mi_curr_serv->sv_currsec ?
25357c478bd9Sstevel@tonic-gate 			mi->mi_curr_serv->sv_currsec->secmod :
25367c478bd9Sstevel@tonic-gate 			mi->mi_curr_serv->sv_secdata->secmod;
25377c478bd9Sstevel@tonic-gate 	mik->mik_curread = (uint32_t)mi->mi_curread;
25387c478bd9Sstevel@tonic-gate 	mik->mik_curwrite = (uint32_t)mi->mi_curwrite;
25397c478bd9Sstevel@tonic-gate 	mik->mik_retrans = mi->mi_retrans;
25407c478bd9Sstevel@tonic-gate 	mik->mik_timeo = mi->mi_timeo;
25417c478bd9Sstevel@tonic-gate 	mik->mik_acregmin = HR2SEC(mi->mi_acregmin);
25427c478bd9Sstevel@tonic-gate 	mik->mik_acregmax = HR2SEC(mi->mi_acregmax);
25437c478bd9Sstevel@tonic-gate 	mik->mik_acdirmin = HR2SEC(mi->mi_acdirmin);
25447c478bd9Sstevel@tonic-gate 	mik->mik_acdirmax = HR2SEC(mi->mi_acdirmax);
25457c478bd9Sstevel@tonic-gate 	mik->mik_noresponse = (uint32_t)mi->mi_noresponse;
25467c478bd9Sstevel@tonic-gate 	mik->mik_failover = (uint32_t)mi->mi_failover;
25477c478bd9Sstevel@tonic-gate 	mik->mik_remap = (uint32_t)mi->mi_remap;
25487c478bd9Sstevel@tonic-gate 
25497c478bd9Sstevel@tonic-gate 	(void) strcpy(mik->mik_curserver, mi->mi_curr_serv->sv_hostname);
25507c478bd9Sstevel@tonic-gate 
25517c478bd9Sstevel@tonic-gate 	return (0);
25527c478bd9Sstevel@tonic-gate }
25537c478bd9Sstevel@tonic-gate 
25547c478bd9Sstevel@tonic-gate void
25557c478bd9Sstevel@tonic-gate nfs4_mnt_kstat_init(struct vfs *vfsp)
25567c478bd9Sstevel@tonic-gate {
25577c478bd9Sstevel@tonic-gate 	mntinfo4_t *mi = VFTOMI4(vfsp);
25587c478bd9Sstevel@tonic-gate 
25597c478bd9Sstevel@tonic-gate 	/*
25607c478bd9Sstevel@tonic-gate 	 * PSARC 2001/697 Contract Private Interface
25617c478bd9Sstevel@tonic-gate 	 * All nfs kstats are under SunMC contract
25627c478bd9Sstevel@tonic-gate 	 * Please refer to the PSARC listed above and contact
25637c478bd9Sstevel@tonic-gate 	 * SunMC before making any changes!
25647c478bd9Sstevel@tonic-gate 	 *
25657c478bd9Sstevel@tonic-gate 	 * Changes must be reviewed by Solaris File Sharing
25667c478bd9Sstevel@tonic-gate 	 * Changes must be communicated to contract-2001-697@sun.com
25677c478bd9Sstevel@tonic-gate 	 *
25687c478bd9Sstevel@tonic-gate 	 */
25697c478bd9Sstevel@tonic-gate 
25707c478bd9Sstevel@tonic-gate 	mi->mi_io_kstats = kstat_create_zone("nfs", getminor(vfsp->vfs_dev),
25717c478bd9Sstevel@tonic-gate 	    NULL, "nfs", KSTAT_TYPE_IO, 1, 0, mi->mi_zone->zone_id);
25727c478bd9Sstevel@tonic-gate 	if (mi->mi_io_kstats) {
25737c478bd9Sstevel@tonic-gate 		if (mi->mi_zone->zone_id != GLOBAL_ZONEID)
25747c478bd9Sstevel@tonic-gate 			kstat_zone_add(mi->mi_io_kstats, GLOBAL_ZONEID);
25757c478bd9Sstevel@tonic-gate 		mi->mi_io_kstats->ks_lock = &mi->mi_lock;
25767c478bd9Sstevel@tonic-gate 		kstat_install(mi->mi_io_kstats);
25777c478bd9Sstevel@tonic-gate 	}
25787c478bd9Sstevel@tonic-gate 
25797c478bd9Sstevel@tonic-gate 	if ((mi->mi_ro_kstats = kstat_create_zone("nfs",
25807c478bd9Sstevel@tonic-gate 	    getminor(vfsp->vfs_dev), "mntinfo", "misc", KSTAT_TYPE_RAW,
25817c478bd9Sstevel@tonic-gate 	    sizeof (struct mntinfo_kstat), 0, mi->mi_zone->zone_id)) != NULL) {
25827c478bd9Sstevel@tonic-gate 		if (mi->mi_zone->zone_id != GLOBAL_ZONEID)
25837c478bd9Sstevel@tonic-gate 			kstat_zone_add(mi->mi_ro_kstats, GLOBAL_ZONEID);
25847c478bd9Sstevel@tonic-gate 		mi->mi_ro_kstats->ks_update = nfs4_mnt_kstat_update;
25857c478bd9Sstevel@tonic-gate 		mi->mi_ro_kstats->ks_private = (void *)vfsp;
25867c478bd9Sstevel@tonic-gate 		kstat_install(mi->mi_ro_kstats);
25877c478bd9Sstevel@tonic-gate 	}
25887c478bd9Sstevel@tonic-gate 
25897c478bd9Sstevel@tonic-gate 	nfs4_mnt_recov_kstat_init(vfsp);
25907c478bd9Sstevel@tonic-gate }
25917c478bd9Sstevel@tonic-gate 
25927c478bd9Sstevel@tonic-gate void
25937c478bd9Sstevel@tonic-gate nfs4_write_error(vnode_t *vp, int error, cred_t *cr)
25947c478bd9Sstevel@tonic-gate {
25957c478bd9Sstevel@tonic-gate 	mntinfo4_t *mi;
25967c478bd9Sstevel@tonic-gate 
25977c478bd9Sstevel@tonic-gate 	mi = VTOMI4(vp);
25987c478bd9Sstevel@tonic-gate 	/*
25997c478bd9Sstevel@tonic-gate 	 * In case of forced unmount, do not print any messages
26007c478bd9Sstevel@tonic-gate 	 * since it can flood the console with error messages.
26017c478bd9Sstevel@tonic-gate 	 */
26027c478bd9Sstevel@tonic-gate 	if (mi->mi_vfsp->vfs_flag & VFS_UNMOUNTED)
26037c478bd9Sstevel@tonic-gate 		return;
26047c478bd9Sstevel@tonic-gate 
26057c478bd9Sstevel@tonic-gate 	/*
26067c478bd9Sstevel@tonic-gate 	 * If the mount point is dead, not recoverable, do not
26077c478bd9Sstevel@tonic-gate 	 * print error messages that can flood the console.
26087c478bd9Sstevel@tonic-gate 	 */
26097c478bd9Sstevel@tonic-gate 	if (mi->mi_flags & MI4_RECOV_FAIL)
26107c478bd9Sstevel@tonic-gate 		return;
26117c478bd9Sstevel@tonic-gate 
26127c478bd9Sstevel@tonic-gate 	/*
26137c478bd9Sstevel@tonic-gate 	 * No use in flooding the console with ENOSPC
26147c478bd9Sstevel@tonic-gate 	 * messages from the same file system.
26157c478bd9Sstevel@tonic-gate 	 */
26167c478bd9Sstevel@tonic-gate 	if ((error != ENOSPC && error != EDQUOT) ||
26177c478bd9Sstevel@tonic-gate 	    lbolt - mi->mi_printftime > 0) {
26187c478bd9Sstevel@tonic-gate 		zoneid_t zoneid = mi->mi_zone->zone_id;
26197c478bd9Sstevel@tonic-gate 
26207c478bd9Sstevel@tonic-gate #ifdef DEBUG
26217c478bd9Sstevel@tonic-gate 		nfs_perror(error, "NFS%ld write error on host %s: %m.\n",
26227c478bd9Sstevel@tonic-gate 		    mi->mi_vers, VTOR4(vp)->r_server->sv_hostname, NULL);
26237c478bd9Sstevel@tonic-gate #else
26247c478bd9Sstevel@tonic-gate 		nfs_perror(error, "NFS write error on host %s: %m.\n",
26257c478bd9Sstevel@tonic-gate 		    VTOR4(vp)->r_server->sv_hostname, NULL);
26267c478bd9Sstevel@tonic-gate #endif
26277c478bd9Sstevel@tonic-gate 		if (error == ENOSPC || error == EDQUOT) {
26287c478bd9Sstevel@tonic-gate 			zcmn_err(zoneid, CE_CONT,
26297c478bd9Sstevel@tonic-gate 			    "^File: userid=%d, groupid=%d\n",
26307c478bd9Sstevel@tonic-gate 			    crgetuid(cr), crgetgid(cr));
26317c478bd9Sstevel@tonic-gate 			if (crgetuid(curthread->t_cred) != crgetuid(cr) ||
26327c478bd9Sstevel@tonic-gate 			    crgetgid(curthread->t_cred) != crgetgid(cr)) {
26337c478bd9Sstevel@tonic-gate 				zcmn_err(zoneid, CE_CONT,
26347c478bd9Sstevel@tonic-gate 				    "^User: userid=%d, groupid=%d\n",
26357c478bd9Sstevel@tonic-gate 				    crgetuid(curthread->t_cred),
26367c478bd9Sstevel@tonic-gate 				    crgetgid(curthread->t_cred));
26377c478bd9Sstevel@tonic-gate 			}
26387c478bd9Sstevel@tonic-gate 			mi->mi_printftime = lbolt +
26397c478bd9Sstevel@tonic-gate 			    nfs_write_error_interval * hz;
26407c478bd9Sstevel@tonic-gate 		}
26417c478bd9Sstevel@tonic-gate 		sfh4_printfhandle(VTOR4(vp)->r_fh);
26427c478bd9Sstevel@tonic-gate #ifdef DEBUG
26437c478bd9Sstevel@tonic-gate 		if (error == EACCES) {
26447c478bd9Sstevel@tonic-gate 			zcmn_err(zoneid, CE_CONT,
26457c478bd9Sstevel@tonic-gate 			    "nfs_bio: cred is%s kcred\n",
26467c478bd9Sstevel@tonic-gate 			    cr == kcred ? "" : " not");
26477c478bd9Sstevel@tonic-gate 		}
26487c478bd9Sstevel@tonic-gate #endif
26497c478bd9Sstevel@tonic-gate 	}
26507c478bd9Sstevel@tonic-gate }
26517c478bd9Sstevel@tonic-gate 
26527c478bd9Sstevel@tonic-gate /*
26537c478bd9Sstevel@tonic-gate  * Return non-zero if the given file can be safely memory mapped.  Locks
26547c478bd9Sstevel@tonic-gate  * are safe if whole-file (length and offset are both zero).
26557c478bd9Sstevel@tonic-gate  */
26567c478bd9Sstevel@tonic-gate 
26577c478bd9Sstevel@tonic-gate #define	SAFE_LOCK(flk)	((flk).l_start == 0 && (flk).l_len == 0)
26587c478bd9Sstevel@tonic-gate 
26597c478bd9Sstevel@tonic-gate static int
26607c478bd9Sstevel@tonic-gate nfs4_safemap(const vnode_t *vp)
26617c478bd9Sstevel@tonic-gate {
26627c478bd9Sstevel@tonic-gate 	locklist_t	*llp, *next_llp;
26637c478bd9Sstevel@tonic-gate 	int		safe = 1;
26647c478bd9Sstevel@tonic-gate 	rnode4_t	*rp = VTOR4(vp);
26657c478bd9Sstevel@tonic-gate 
26667c478bd9Sstevel@tonic-gate 	ASSERT(nfs_rw_lock_held(&rp->r_lkserlock, RW_WRITER));
26677c478bd9Sstevel@tonic-gate 
26687c478bd9Sstevel@tonic-gate 	NFS4_DEBUG(nfs4_client_map_debug, (CE_NOTE, "nfs4_safemap: "
26697c478bd9Sstevel@tonic-gate 		"vp = %p", (void *)vp));
26707c478bd9Sstevel@tonic-gate 
26717c478bd9Sstevel@tonic-gate 	/*
26727c478bd9Sstevel@tonic-gate 	 * Review all the locks for the vnode, both ones that have been
26737c478bd9Sstevel@tonic-gate 	 * acquired and ones that are pending.  We assume that
26747c478bd9Sstevel@tonic-gate 	 * flk_active_locks_for_vp() has merged any locks that can be
26757c478bd9Sstevel@tonic-gate 	 * merged (so that if a process has the entire file locked, it is
26767c478bd9Sstevel@tonic-gate 	 * represented as a single lock).
26777c478bd9Sstevel@tonic-gate 	 *
26787c478bd9Sstevel@tonic-gate 	 * Note that we can't bail out of the loop if we find a non-safe
26797c478bd9Sstevel@tonic-gate 	 * lock, because we have to free all the elements in the llp list.
26807c478bd9Sstevel@tonic-gate 	 * We might be able to speed up this code slightly by not looking
26817c478bd9Sstevel@tonic-gate 	 * at each lock's l_start and l_len fields once we've found a
26827c478bd9Sstevel@tonic-gate 	 * non-safe lock.
26837c478bd9Sstevel@tonic-gate 	 */
26847c478bd9Sstevel@tonic-gate 
26857c478bd9Sstevel@tonic-gate 	llp = flk_active_locks_for_vp(vp);
26867c478bd9Sstevel@tonic-gate 	while (llp) {
26877c478bd9Sstevel@tonic-gate 		NFS4_DEBUG(nfs4_client_map_debug, (CE_NOTE,
26887c478bd9Sstevel@tonic-gate 		    "nfs4_safemap: active lock (%" PRId64 ", %" PRId64 ")",
26897c478bd9Sstevel@tonic-gate 		    llp->ll_flock.l_start, llp->ll_flock.l_len));
26907c478bd9Sstevel@tonic-gate 		if (!SAFE_LOCK(llp->ll_flock)) {
26917c478bd9Sstevel@tonic-gate 			safe = 0;
26927c478bd9Sstevel@tonic-gate 			NFS4_DEBUG(nfs4_client_map_debug, (CE_NOTE,
26937c478bd9Sstevel@tonic-gate 			    "nfs4_safemap: unsafe active lock (%" PRId64
26947c478bd9Sstevel@tonic-gate 			    ", %" PRId64 ")", llp->ll_flock.l_start,
26957c478bd9Sstevel@tonic-gate 			    llp->ll_flock.l_len));
26967c478bd9Sstevel@tonic-gate 		}
26977c478bd9Sstevel@tonic-gate 		next_llp = llp->ll_next;
26987c478bd9Sstevel@tonic-gate 		VN_RELE(llp->ll_vp);
26997c478bd9Sstevel@tonic-gate 		kmem_free(llp, sizeof (*llp));
27007c478bd9Sstevel@tonic-gate 		llp = next_llp;
27017c478bd9Sstevel@tonic-gate 	}
27027c478bd9Sstevel@tonic-gate 
27037c478bd9Sstevel@tonic-gate 	NFS4_DEBUG(nfs4_client_map_debug, (CE_NOTE, "nfs4_safemap: %s",
27047c478bd9Sstevel@tonic-gate 		safe ? "safe" : "unsafe"));
27057c478bd9Sstevel@tonic-gate 	return (safe);
27067c478bd9Sstevel@tonic-gate }
27077c478bd9Sstevel@tonic-gate 
27087c478bd9Sstevel@tonic-gate /*
27097c478bd9Sstevel@tonic-gate  * Return whether there is a lost LOCK or LOCKU queued up for the given
27107c478bd9Sstevel@tonic-gate  * file that would make an mmap request unsafe.  cf. nfs4_safemap().
27117c478bd9Sstevel@tonic-gate  */
27127c478bd9Sstevel@tonic-gate 
27137c478bd9Sstevel@tonic-gate bool_t
27147c478bd9Sstevel@tonic-gate nfs4_map_lost_lock_conflict(vnode_t *vp)
27157c478bd9Sstevel@tonic-gate {
27167c478bd9Sstevel@tonic-gate 	bool_t conflict = FALSE;
27177c478bd9Sstevel@tonic-gate 	nfs4_lost_rqst_t *lrp;
27187c478bd9Sstevel@tonic-gate 	mntinfo4_t *mi = VTOMI4(vp);
27197c478bd9Sstevel@tonic-gate 
27207c478bd9Sstevel@tonic-gate 	mutex_enter(&mi->mi_lock);
27217c478bd9Sstevel@tonic-gate 	for (lrp = list_head(&mi->mi_lost_state); lrp != NULL;
27227c478bd9Sstevel@tonic-gate 	    lrp = list_next(&mi->mi_lost_state, lrp)) {
27237c478bd9Sstevel@tonic-gate 		if (lrp->lr_op != OP_LOCK && lrp->lr_op != OP_LOCKU)
27247c478bd9Sstevel@tonic-gate 			continue;
27257c478bd9Sstevel@tonic-gate 		ASSERT(lrp->lr_vp != NULL);
27267c478bd9Sstevel@tonic-gate 		if (!VOP_CMP(lrp->lr_vp, vp))
27277c478bd9Sstevel@tonic-gate 			continue;	/* different file */
27287c478bd9Sstevel@tonic-gate 		if (!SAFE_LOCK(*lrp->lr_flk)) {
27297c478bd9Sstevel@tonic-gate 			conflict = TRUE;
27307c478bd9Sstevel@tonic-gate 			break;
27317c478bd9Sstevel@tonic-gate 		}
27327c478bd9Sstevel@tonic-gate 	}
27337c478bd9Sstevel@tonic-gate 
27347c478bd9Sstevel@tonic-gate 	mutex_exit(&mi->mi_lock);
27357c478bd9Sstevel@tonic-gate 	return (conflict);
27367c478bd9Sstevel@tonic-gate }
27377c478bd9Sstevel@tonic-gate 
27387c478bd9Sstevel@tonic-gate /*
27397c478bd9Sstevel@tonic-gate  * nfs_lockcompletion:
27407c478bd9Sstevel@tonic-gate  *
27417c478bd9Sstevel@tonic-gate  * If the vnode has a lock that makes it unsafe to cache the file, mark it
27427c478bd9Sstevel@tonic-gate  * as non cachable (set VNOCACHE bit).
27437c478bd9Sstevel@tonic-gate  */
27447c478bd9Sstevel@tonic-gate 
27457c478bd9Sstevel@tonic-gate void
27467c478bd9Sstevel@tonic-gate nfs4_lockcompletion(vnode_t *vp, int cmd)
27477c478bd9Sstevel@tonic-gate {
27487c478bd9Sstevel@tonic-gate 	rnode4_t *rp = VTOR4(vp);
27497c478bd9Sstevel@tonic-gate 
27507c478bd9Sstevel@tonic-gate 	ASSERT(nfs_rw_lock_held(&rp->r_lkserlock, RW_WRITER));
27517c478bd9Sstevel@tonic-gate 	ASSERT(!IS_SHADOW(vp, rp));
27527c478bd9Sstevel@tonic-gate 
27537c478bd9Sstevel@tonic-gate 	if (cmd == F_SETLK || cmd == F_SETLKW) {
27547c478bd9Sstevel@tonic-gate 
27557c478bd9Sstevel@tonic-gate 		if (!nfs4_safemap(vp)) {
27567c478bd9Sstevel@tonic-gate 			mutex_enter(&vp->v_lock);
27577c478bd9Sstevel@tonic-gate 			vp->v_flag |= VNOCACHE;
27587c478bd9Sstevel@tonic-gate 			mutex_exit(&vp->v_lock);
27597c478bd9Sstevel@tonic-gate 		} else {
27607c478bd9Sstevel@tonic-gate 			mutex_enter(&vp->v_lock);
27617c478bd9Sstevel@tonic-gate 			vp->v_flag &= ~VNOCACHE;
27627c478bd9Sstevel@tonic-gate 			mutex_exit(&vp->v_lock);
27637c478bd9Sstevel@tonic-gate 		}
27647c478bd9Sstevel@tonic-gate 	}
27657c478bd9Sstevel@tonic-gate 	/*
27667c478bd9Sstevel@tonic-gate 	 * The cached attributes of the file are stale after acquiring
27677c478bd9Sstevel@tonic-gate 	 * the lock on the file. They were updated when the file was
27687c478bd9Sstevel@tonic-gate 	 * opened, but not updated when the lock was acquired. Therefore the
27697c478bd9Sstevel@tonic-gate 	 * cached attributes are invalidated after the lock is obtained.
27707c478bd9Sstevel@tonic-gate 	 */
27717c478bd9Sstevel@tonic-gate 	PURGE_ATTRCACHE4(vp);
27727c478bd9Sstevel@tonic-gate }
27737c478bd9Sstevel@tonic-gate 
27747c478bd9Sstevel@tonic-gate /* ARGSUSED */
27757c478bd9Sstevel@tonic-gate static void *
27767c478bd9Sstevel@tonic-gate nfs4_mi_init(zoneid_t zoneid)
27777c478bd9Sstevel@tonic-gate {
27787c478bd9Sstevel@tonic-gate 	struct mi4_globals *mig;
27797c478bd9Sstevel@tonic-gate 
27807c478bd9Sstevel@tonic-gate 	mig = kmem_alloc(sizeof (*mig), KM_SLEEP);
27817c478bd9Sstevel@tonic-gate 	mutex_init(&mig->mig_lock, NULL, MUTEX_DEFAULT, NULL);
27827c478bd9Sstevel@tonic-gate 	list_create(&mig->mig_list, sizeof (mntinfo4_t),
27837c478bd9Sstevel@tonic-gate 	    offsetof(mntinfo4_t, mi_zone_node));
27847c478bd9Sstevel@tonic-gate 	mig->mig_destructor_called = B_FALSE;
27857c478bd9Sstevel@tonic-gate 	return (mig);
27867c478bd9Sstevel@tonic-gate }
27877c478bd9Sstevel@tonic-gate 
27887c478bd9Sstevel@tonic-gate /*
27897c478bd9Sstevel@tonic-gate  * Callback routine to tell all NFSv4 mounts in the zone to start tearing down
27907c478bd9Sstevel@tonic-gate  * state and killing off threads.
27917c478bd9Sstevel@tonic-gate  */
27927c478bd9Sstevel@tonic-gate /* ARGSUSED */
27937c478bd9Sstevel@tonic-gate static void
27947c478bd9Sstevel@tonic-gate nfs4_mi_shutdown(zoneid_t zoneid, void *data)
27957c478bd9Sstevel@tonic-gate {
27967c478bd9Sstevel@tonic-gate 	struct mi4_globals *mig = data;
27977c478bd9Sstevel@tonic-gate 	mntinfo4_t *mi;
27987c478bd9Sstevel@tonic-gate 	nfs4_server_t *np;
27997c478bd9Sstevel@tonic-gate 
28007c478bd9Sstevel@tonic-gate 	NFS4_DEBUG(nfs4_client_zone_debug, (CE_NOTE,
28017c478bd9Sstevel@tonic-gate 	    "nfs4_mi_shutdown zone %d\n", zoneid));
28027c478bd9Sstevel@tonic-gate 	ASSERT(mig != NULL);
280350a83466Sjwahlig 	for (;;) {
280450a83466Sjwahlig 		mutex_enter(&mig->mig_lock);
280550a83466Sjwahlig 		mi = list_head(&mig->mig_list);
280650a83466Sjwahlig 		if (mi == NULL) {
280750a83466Sjwahlig 			mutex_exit(&mig->mig_lock);
280850a83466Sjwahlig 			break;
280950a83466Sjwahlig 		}
28103fd6cc29Sthurlow 
28117c478bd9Sstevel@tonic-gate 		NFS4_DEBUG(nfs4_client_zone_debug, (CE_NOTE,
28127c478bd9Sstevel@tonic-gate 		    "nfs4_mi_shutdown stopping vfs %p\n", (void *)mi->mi_vfsp));
28137c478bd9Sstevel@tonic-gate 		/*
28147c478bd9Sstevel@tonic-gate 		 * purge the DNLC for this filesystem
28157c478bd9Sstevel@tonic-gate 		 */
28167c478bd9Sstevel@tonic-gate 		(void) dnlc_purge_vfsp(mi->mi_vfsp, 0);
28177c478bd9Sstevel@tonic-gate 		/*
28187c478bd9Sstevel@tonic-gate 		 * Tell existing async worker threads to exit.
28197c478bd9Sstevel@tonic-gate 		 */
282050a83466Sjwahlig 		mutex_enter(&mi->mi_async_lock);
28217c478bd9Sstevel@tonic-gate 		mi->mi_max_threads = 0;
28227c478bd9Sstevel@tonic-gate 		cv_broadcast(&mi->mi_async_work_cv);
28237c478bd9Sstevel@tonic-gate 		/*
282450a83466Sjwahlig 		 * Set the appropriate flags, signal and wait for both the
282550a83466Sjwahlig 		 * async manager and the inactive thread to exit when they're
282650a83466Sjwahlig 		 * done with their current work.
28277c478bd9Sstevel@tonic-gate 		 */
28287c478bd9Sstevel@tonic-gate 		mutex_enter(&mi->mi_lock);
28297c478bd9Sstevel@tonic-gate 		mi->mi_flags |= (MI4_ASYNC_MGR_STOP|MI4_DEAD);
28307c478bd9Sstevel@tonic-gate 		mutex_exit(&mi->mi_lock);
283150a83466Sjwahlig 		mutex_exit(&mi->mi_async_lock);
283250a83466Sjwahlig 		if (mi->mi_manager_thread) {
283350a83466Sjwahlig 			nfs4_async_manager_stop(mi->mi_vfsp);
283450a83466Sjwahlig 		}
283550a83466Sjwahlig 		if (mi->mi_inactive_thread) {
283650a83466Sjwahlig 			mutex_enter(&mi->mi_async_lock);
283750a83466Sjwahlig 			cv_signal(&mi->mi_inact_req_cv);
283850a83466Sjwahlig 			/*
283950a83466Sjwahlig 			 * Wait for the inactive thread to exit.
284050a83466Sjwahlig 			 */
284150a83466Sjwahlig 			while (mi->mi_inactive_thread != NULL) {
284250a83466Sjwahlig 				cv_wait(&mi->mi_async_cv, &mi->mi_async_lock);
284350a83466Sjwahlig 			}
284450a83466Sjwahlig 			mutex_exit(&mi->mi_async_lock);
284550a83466Sjwahlig 		}
28467c478bd9Sstevel@tonic-gate 		/*
284750a83466Sjwahlig 		 * Wait for the recovery thread to complete, that is, it will
284850a83466Sjwahlig 		 * signal when it is done using the "mi" structure and about
284950a83466Sjwahlig 		 * to exit
28507c478bd9Sstevel@tonic-gate 		 */
285150a83466Sjwahlig 		mutex_enter(&mi->mi_lock);
285250a83466Sjwahlig 		while (mi->mi_in_recovery > 0)
285350a83466Sjwahlig 			cv_wait(&mi->mi_cv_in_recov, &mi->mi_lock);
285450a83466Sjwahlig 		mutex_exit(&mi->mi_lock);
28553fd6cc29Sthurlow 		/*
28563fd6cc29Sthurlow 		 * We're done when every mi has been done or the list is empty.
285750a83466Sjwahlig 		 * This one is done, remove it from the list.
28583fd6cc29Sthurlow 		 */
285950a83466Sjwahlig 		list_remove(&mig->mig_list, mi);
28603fd6cc29Sthurlow 		mutex_exit(&mig->mig_lock);
286150a83466Sjwahlig 		zone_rele(mi->mi_zone);
286250a83466Sjwahlig 		/*
286350a83466Sjwahlig 		 * Release hold on vfs and mi done to prevent race with zone
286450a83466Sjwahlig 		 * shutdown. This releases the hold in nfs4_mi_zonelist_add.
286550a83466Sjwahlig 		 */
28663fd6cc29Sthurlow 		VFS_RELE(mi->mi_vfsp);
286750a83466Sjwahlig 		MI4_RELE(mi);
28687c478bd9Sstevel@tonic-gate 	}
28697c478bd9Sstevel@tonic-gate 	/*
28707c478bd9Sstevel@tonic-gate 	 * Tell each renew thread in the zone to exit
28717c478bd9Sstevel@tonic-gate 	 */
28727c478bd9Sstevel@tonic-gate 	mutex_enter(&nfs4_server_lst_lock);
28737c478bd9Sstevel@tonic-gate 	for (np = nfs4_server_lst.forw; np != &nfs4_server_lst; np = np->forw) {
28747c478bd9Sstevel@tonic-gate 		mutex_enter(&np->s_lock);
28757c478bd9Sstevel@tonic-gate 		if (np->zoneid == zoneid) {
28767c478bd9Sstevel@tonic-gate 			/*
28777c478bd9Sstevel@tonic-gate 			 * We add another hold onto the nfs4_server_t
28787c478bd9Sstevel@tonic-gate 			 * because this will make sure tha the nfs4_server_t
28797c478bd9Sstevel@tonic-gate 			 * stays around until nfs4_callback_fini_zone destroys
28807c478bd9Sstevel@tonic-gate 			 * the zone. This way, the renew thread can
28817c478bd9Sstevel@tonic-gate 			 * unconditionally release its holds on the
28827c478bd9Sstevel@tonic-gate 			 * nfs4_server_t.
28837c478bd9Sstevel@tonic-gate 			 */
28847c478bd9Sstevel@tonic-gate 			np->s_refcnt++;
28857c478bd9Sstevel@tonic-gate 			nfs4_mark_srv_dead(np);
28867c478bd9Sstevel@tonic-gate 		}
28877c478bd9Sstevel@tonic-gate 		mutex_exit(&np->s_lock);
28887c478bd9Sstevel@tonic-gate 	}
28897c478bd9Sstevel@tonic-gate 	mutex_exit(&nfs4_server_lst_lock);
28907c478bd9Sstevel@tonic-gate }
28917c478bd9Sstevel@tonic-gate 
28927c478bd9Sstevel@tonic-gate static void
28937c478bd9Sstevel@tonic-gate nfs4_mi_free_globals(struct mi4_globals *mig)
28947c478bd9Sstevel@tonic-gate {
28957c478bd9Sstevel@tonic-gate 	list_destroy(&mig->mig_list);	/* makes sure the list is empty */
28967c478bd9Sstevel@tonic-gate 	mutex_destroy(&mig->mig_lock);
28977c478bd9Sstevel@tonic-gate 	kmem_free(mig, sizeof (*mig));
28987c478bd9Sstevel@tonic-gate }
28997c478bd9Sstevel@tonic-gate 
29007c478bd9Sstevel@tonic-gate /* ARGSUSED */
29017c478bd9Sstevel@tonic-gate static void
29027c478bd9Sstevel@tonic-gate nfs4_mi_destroy(zoneid_t zoneid, void *data)
29037c478bd9Sstevel@tonic-gate {
29047c478bd9Sstevel@tonic-gate 	struct mi4_globals *mig = data;
29057c478bd9Sstevel@tonic-gate 
29067c478bd9Sstevel@tonic-gate 	NFS4_DEBUG(nfs4_client_zone_debug, (CE_NOTE,
29077c478bd9Sstevel@tonic-gate 	    "nfs4_mi_destroy zone %d\n", zoneid));
29087c478bd9Sstevel@tonic-gate 	ASSERT(mig != NULL);
29097c478bd9Sstevel@tonic-gate 	mutex_enter(&mig->mig_lock);
29107c478bd9Sstevel@tonic-gate 	if (list_head(&mig->mig_list) != NULL) {
29117c478bd9Sstevel@tonic-gate 		/* Still waiting for VFS_FREEVFS() */
29127c478bd9Sstevel@tonic-gate 		mig->mig_destructor_called = B_TRUE;
29137c478bd9Sstevel@tonic-gate 		mutex_exit(&mig->mig_lock);
29147c478bd9Sstevel@tonic-gate 		return;
29157c478bd9Sstevel@tonic-gate 	}
29167c478bd9Sstevel@tonic-gate 	nfs4_mi_free_globals(mig);
29177c478bd9Sstevel@tonic-gate }
29187c478bd9Sstevel@tonic-gate 
29197c478bd9Sstevel@tonic-gate /*
29207c478bd9Sstevel@tonic-gate  * Add an NFS mount to the per-zone list of NFS mounts.
29217c478bd9Sstevel@tonic-gate  */
29227c478bd9Sstevel@tonic-gate void
29237c478bd9Sstevel@tonic-gate nfs4_mi_zonelist_add(mntinfo4_t *mi)
29247c478bd9Sstevel@tonic-gate {
29257c478bd9Sstevel@tonic-gate 	struct mi4_globals *mig;
29267c478bd9Sstevel@tonic-gate 
29277c478bd9Sstevel@tonic-gate 	mig = zone_getspecific(mi4_list_key, mi->mi_zone);
29287c478bd9Sstevel@tonic-gate 	mutex_enter(&mig->mig_lock);
29297c478bd9Sstevel@tonic-gate 	list_insert_head(&mig->mig_list, mi);
293050a83466Sjwahlig 	/*
293150a83466Sjwahlig 	 * hold added to eliminate race with zone shutdown -this will be
293250a83466Sjwahlig 	 * released in mi_shutdown
293350a83466Sjwahlig 	 */
293450a83466Sjwahlig 	MI4_HOLD(mi);
293550a83466Sjwahlig 	VFS_HOLD(mi->mi_vfsp);
29367c478bd9Sstevel@tonic-gate 	mutex_exit(&mig->mig_lock);
29377c478bd9Sstevel@tonic-gate }
29387c478bd9Sstevel@tonic-gate 
29397c478bd9Sstevel@tonic-gate /*
29407c478bd9Sstevel@tonic-gate  * Remove an NFS mount from the per-zone list of NFS mounts.
29417c478bd9Sstevel@tonic-gate  */
294250a83466Sjwahlig int
29437c478bd9Sstevel@tonic-gate nfs4_mi_zonelist_remove(mntinfo4_t *mi)
29447c478bd9Sstevel@tonic-gate {
29457c478bd9Sstevel@tonic-gate 	struct mi4_globals *mig;
294650a83466Sjwahlig 	int ret = 0;
29477c478bd9Sstevel@tonic-gate 
29487c478bd9Sstevel@tonic-gate 	mig = zone_getspecific(mi4_list_key, mi->mi_zone);
29497c478bd9Sstevel@tonic-gate 	mutex_enter(&mig->mig_lock);
295050a83466Sjwahlig 	mutex_enter(&mi->mi_lock);
295150a83466Sjwahlig 	/* if this mi is marked dead, then the zone already released it */
295250a83466Sjwahlig 	if (!(mi->mi_flags & MI4_DEAD)) {
295350a83466Sjwahlig 		list_remove(&mig->mig_list, mi);
295450a83466Sjwahlig 
295550a83466Sjwahlig 		/* release the holds put on in zonelist_add(). */
295650a83466Sjwahlig 		VFS_RELE(mi->mi_vfsp);
295750a83466Sjwahlig 		MI4_RELE(mi);
295850a83466Sjwahlig 		ret = 1;
295950a83466Sjwahlig 	}
296050a83466Sjwahlig 	mutex_exit(&mi->mi_lock);
296150a83466Sjwahlig 
29627c478bd9Sstevel@tonic-gate 	/*
29637c478bd9Sstevel@tonic-gate 	 * We can be called asynchronously by VFS_FREEVFS() after the zone
29647c478bd9Sstevel@tonic-gate 	 * shutdown/destroy callbacks have executed; if so, clean up the zone's
29657c478bd9Sstevel@tonic-gate 	 * mi globals.
29667c478bd9Sstevel@tonic-gate 	 */
29677c478bd9Sstevel@tonic-gate 	if (list_head(&mig->mig_list) == NULL &&
29687c478bd9Sstevel@tonic-gate 	    mig->mig_destructor_called == B_TRUE) {
29697c478bd9Sstevel@tonic-gate 		nfs4_mi_free_globals(mig);
297050a83466Sjwahlig 		return (ret);
29717c478bd9Sstevel@tonic-gate 	}
29727c478bd9Sstevel@tonic-gate 	mutex_exit(&mig->mig_lock);
297350a83466Sjwahlig 	return (ret);
29747c478bd9Sstevel@tonic-gate }
29757c478bd9Sstevel@tonic-gate 
29767c478bd9Sstevel@tonic-gate void
29777c478bd9Sstevel@tonic-gate nfs_free_mi4(mntinfo4_t *mi)
29787c478bd9Sstevel@tonic-gate {
29797c478bd9Sstevel@tonic-gate 	nfs4_open_owner_t	*foop;
298050a83466Sjwahlig 	nfs4_oo_hash_bucket_t   *bucketp;
29817c478bd9Sstevel@tonic-gate 	nfs4_debug_msg_t	*msgp;
29827c478bd9Sstevel@tonic-gate 	int i;
298350a83466Sjwahlig 	servinfo4_t 		*svp;
29847c478bd9Sstevel@tonic-gate 
29857c478bd9Sstevel@tonic-gate 	mutex_enter(&mi->mi_lock);
29867c478bd9Sstevel@tonic-gate 	ASSERT(mi->mi_recovthread == NULL);
29877c478bd9Sstevel@tonic-gate 	ASSERT(mi->mi_flags & MI4_ASYNC_MGR_STOP);
29887c478bd9Sstevel@tonic-gate 	mutex_exit(&mi->mi_lock);
29897c478bd9Sstevel@tonic-gate 	mutex_enter(&mi->mi_async_lock);
29907c478bd9Sstevel@tonic-gate 	ASSERT(mi->mi_threads == 0);
29917c478bd9Sstevel@tonic-gate 	ASSERT(mi->mi_manager_thread == NULL);
299250a83466Sjwahlig 	mutex_exit(&mi->mi_async_lock);
299350a83466Sjwahlig 	svp = mi->mi_servers;
299450a83466Sjwahlig 	sv4_free(svp);
299550a83466Sjwahlig 	if (mi->mi_io_kstats) {
299650a83466Sjwahlig 		kstat_delete(mi->mi_io_kstats);
299750a83466Sjwahlig 		mi->mi_io_kstats = NULL;
29987c478bd9Sstevel@tonic-gate 	}
299950a83466Sjwahlig 	if (mi->mi_ro_kstats) {
300050a83466Sjwahlig 		kstat_delete(mi->mi_ro_kstats);
300150a83466Sjwahlig 		mi->mi_ro_kstats = NULL;
300250a83466Sjwahlig 	}
300350a83466Sjwahlig 	if (mi->mi_recov_ksp) {
300450a83466Sjwahlig 		kstat_delete(mi->mi_recov_ksp);
300550a83466Sjwahlig 		mi->mi_recov_ksp = NULL;
30067c478bd9Sstevel@tonic-gate 	}
30077c478bd9Sstevel@tonic-gate 	mutex_enter(&mi->mi_msg_list_lock);
30087c478bd9Sstevel@tonic-gate 	while (msgp = list_head(&mi->mi_msg_list)) {
30097c478bd9Sstevel@tonic-gate 		list_remove(&mi->mi_msg_list, msgp);
30107c478bd9Sstevel@tonic-gate 		nfs4_free_msg(msgp);
30117c478bd9Sstevel@tonic-gate 	}
30127c478bd9Sstevel@tonic-gate 	mutex_exit(&mi->mi_msg_list_lock);
30137c478bd9Sstevel@tonic-gate 	list_destroy(&mi->mi_msg_list);
30147c478bd9Sstevel@tonic-gate 	if (mi->mi_rootfh != NULL)
30157c478bd9Sstevel@tonic-gate 		sfh4_rele(&mi->mi_rootfh);
30167c478bd9Sstevel@tonic-gate 	if (mi->mi_srvparentfh != NULL)
30177c478bd9Sstevel@tonic-gate 		sfh4_rele(&mi->mi_srvparentfh);
30187c478bd9Sstevel@tonic-gate 	mutex_destroy(&mi->mi_lock);
30197c478bd9Sstevel@tonic-gate 	mutex_destroy(&mi->mi_async_lock);
30207c478bd9Sstevel@tonic-gate 	mutex_destroy(&mi->mi_msg_list_lock);
30217c478bd9Sstevel@tonic-gate 	nfs_rw_destroy(&mi->mi_recovlock);
30227c478bd9Sstevel@tonic-gate 	nfs_rw_destroy(&mi->mi_rename_lock);
30237c478bd9Sstevel@tonic-gate 	nfs_rw_destroy(&mi->mi_fh_lock);
30247c478bd9Sstevel@tonic-gate 	cv_destroy(&mi->mi_failover_cv);
30257c478bd9Sstevel@tonic-gate 	cv_destroy(&mi->mi_async_reqs_cv);
30267c478bd9Sstevel@tonic-gate 	cv_destroy(&mi->mi_async_work_cv);
30277c478bd9Sstevel@tonic-gate 	cv_destroy(&mi->mi_async_cv);
30287c478bd9Sstevel@tonic-gate 	cv_destroy(&mi->mi_inact_req_cv);
30297c478bd9Sstevel@tonic-gate 	/*
30307c478bd9Sstevel@tonic-gate 	 * Destroy the oo hash lists and mutexes for the cred hash table.
30317c478bd9Sstevel@tonic-gate 	 */
30327c478bd9Sstevel@tonic-gate 	for (i = 0; i < NFS4_NUM_OO_BUCKETS; i++) {
30337c478bd9Sstevel@tonic-gate 		bucketp = &(mi->mi_oo_list[i]);
30347c478bd9Sstevel@tonic-gate 		/* Destroy any remaining open owners on the list */
30357c478bd9Sstevel@tonic-gate 		foop = list_head(&bucketp->b_oo_hash_list);
30367c478bd9Sstevel@tonic-gate 		while (foop != NULL) {
30377c478bd9Sstevel@tonic-gate 			list_remove(&bucketp->b_oo_hash_list, foop);
30387c478bd9Sstevel@tonic-gate 			nfs4_destroy_open_owner(foop);
30397c478bd9Sstevel@tonic-gate 			foop = list_head(&bucketp->b_oo_hash_list);
30407c478bd9Sstevel@tonic-gate 		}
30417c478bd9Sstevel@tonic-gate 		list_destroy(&bucketp->b_oo_hash_list);
30427c478bd9Sstevel@tonic-gate 		mutex_destroy(&bucketp->b_lock);
30437c478bd9Sstevel@tonic-gate 	}
30447c478bd9Sstevel@tonic-gate 	/*
30457c478bd9Sstevel@tonic-gate 	 * Empty and destroy the freed open owner list.
30467c478bd9Sstevel@tonic-gate 	 */
30477c478bd9Sstevel@tonic-gate 	foop = list_head(&mi->mi_foo_list);
30487c478bd9Sstevel@tonic-gate 	while (foop != NULL) {
30497c478bd9Sstevel@tonic-gate 		list_remove(&mi->mi_foo_list, foop);
30507c478bd9Sstevel@tonic-gate 		nfs4_destroy_open_owner(foop);
30517c478bd9Sstevel@tonic-gate 		foop = list_head(&mi->mi_foo_list);
30527c478bd9Sstevel@tonic-gate 	}
30537c478bd9Sstevel@tonic-gate 	list_destroy(&mi->mi_foo_list);
30547c478bd9Sstevel@tonic-gate 	list_destroy(&mi->mi_bseqid_list);
30557c478bd9Sstevel@tonic-gate 	list_destroy(&mi->mi_lost_state);
30567c478bd9Sstevel@tonic-gate 	avl_destroy(&mi->mi_filehandles);
30577c478bd9Sstevel@tonic-gate 	fn_rele(&mi->mi_fname);
30587c478bd9Sstevel@tonic-gate 	kmem_free(mi, sizeof (*mi));
30597c478bd9Sstevel@tonic-gate }
306050a83466Sjwahlig void
306150a83466Sjwahlig mi_hold(mntinfo4_t *mi)
306250a83466Sjwahlig {
306350a83466Sjwahlig 	atomic_add_32(&mi->mi_count, 1);
306450a83466Sjwahlig 	ASSERT(mi->mi_count != 0);
306550a83466Sjwahlig }
306650a83466Sjwahlig 
306750a83466Sjwahlig void
306850a83466Sjwahlig mi_rele(mntinfo4_t *mi)
306950a83466Sjwahlig {
307050a83466Sjwahlig 	ASSERT(mi->mi_count != 0);
307150a83466Sjwahlig 	if (atomic_add_32_nv(&mi->mi_count, -1) == 0) {
307250a83466Sjwahlig 		nfs_free_mi4(mi);
307350a83466Sjwahlig 	}
307450a83466Sjwahlig }
30757c478bd9Sstevel@tonic-gate 
30767c478bd9Sstevel@tonic-gate vnode_t    nfs4_xattr_notsupp_vnode;
30777c478bd9Sstevel@tonic-gate 
30787c478bd9Sstevel@tonic-gate void
30797c478bd9Sstevel@tonic-gate nfs4_clnt_init(void)
30807c478bd9Sstevel@tonic-gate {
30817c478bd9Sstevel@tonic-gate 	nfs4_vnops_init();
30827c478bd9Sstevel@tonic-gate 	(void) nfs4_rnode_init();
30837c478bd9Sstevel@tonic-gate 	(void) nfs4_shadow_init();
30847c478bd9Sstevel@tonic-gate 	(void) nfs4_acache_init();
30857c478bd9Sstevel@tonic-gate 	(void) nfs4_subr_init();
30867c478bd9Sstevel@tonic-gate 	nfs4_acl_init();
30877c478bd9Sstevel@tonic-gate 	nfs_idmap_init();
30887c478bd9Sstevel@tonic-gate 	nfs4_callback_init();
30897c478bd9Sstevel@tonic-gate 	nfs4_secinfo_init();
30907c478bd9Sstevel@tonic-gate #ifdef	DEBUG
30917c478bd9Sstevel@tonic-gate 	tsd_create(&nfs4_tsd_key, NULL);
30927c478bd9Sstevel@tonic-gate #endif
30937c478bd9Sstevel@tonic-gate 
30947c478bd9Sstevel@tonic-gate 	/*
30957c478bd9Sstevel@tonic-gate 	 * Add a CPR callback so that we can update client
30967c478bd9Sstevel@tonic-gate 	 * lease after a suspend and resume.
30977c478bd9Sstevel@tonic-gate 	 */
30987c478bd9Sstevel@tonic-gate 	cid = callb_add(nfs4_client_cpr_callb, 0, CB_CL_CPR_RPC, "nfs4");
30997c478bd9Sstevel@tonic-gate 
31007c478bd9Sstevel@tonic-gate 	zone_key_create(&mi4_list_key, nfs4_mi_init, nfs4_mi_shutdown,
31017c478bd9Sstevel@tonic-gate 	    nfs4_mi_destroy);
31027c478bd9Sstevel@tonic-gate 
31037c478bd9Sstevel@tonic-gate 	/*
31047c478bd9Sstevel@tonic-gate 	 * Initialise the reference count of the notsupp xattr cache vnode to 1
31057c478bd9Sstevel@tonic-gate 	 * so that it never goes away (VOP_INACTIVE isn't called on it).
31067c478bd9Sstevel@tonic-gate 	 */
31077c478bd9Sstevel@tonic-gate 	nfs4_xattr_notsupp_vnode.v_count = 1;
31087c478bd9Sstevel@tonic-gate }
31097c478bd9Sstevel@tonic-gate 
31107c478bd9Sstevel@tonic-gate void
31117c478bd9Sstevel@tonic-gate nfs4_clnt_fini(void)
31127c478bd9Sstevel@tonic-gate {
31137c478bd9Sstevel@tonic-gate 	(void) zone_key_delete(mi4_list_key);
31147c478bd9Sstevel@tonic-gate 	nfs4_vnops_fini();
31157c478bd9Sstevel@tonic-gate 	(void) nfs4_rnode_fini();
31167c478bd9Sstevel@tonic-gate 	(void) nfs4_shadow_fini();
31177c478bd9Sstevel@tonic-gate 	(void) nfs4_acache_fini();
31187c478bd9Sstevel@tonic-gate 	(void) nfs4_subr_fini();
31197c478bd9Sstevel@tonic-gate 	nfs_idmap_fini();
31207c478bd9Sstevel@tonic-gate 	nfs4_callback_fini();
31217c478bd9Sstevel@tonic-gate 	nfs4_secinfo_fini();
31227c478bd9Sstevel@tonic-gate #ifdef	DEBUG
31237c478bd9Sstevel@tonic-gate 	tsd_destroy(&nfs4_tsd_key);
31247c478bd9Sstevel@tonic-gate #endif
31257c478bd9Sstevel@tonic-gate 	if (cid)
31267c478bd9Sstevel@tonic-gate 		(void) callb_delete(cid);
31277c478bd9Sstevel@tonic-gate }
31287c478bd9Sstevel@tonic-gate 
31297c478bd9Sstevel@tonic-gate /*ARGSUSED*/
31307c478bd9Sstevel@tonic-gate static boolean_t
31317c478bd9Sstevel@tonic-gate nfs4_client_cpr_callb(void *arg, int code)
31327c478bd9Sstevel@tonic-gate {
31337c478bd9Sstevel@tonic-gate 	/*
31347c478bd9Sstevel@tonic-gate 	 * We get called for Suspend and Resume events.
31357c478bd9Sstevel@tonic-gate 	 * For the suspend case we simply don't care!
31367c478bd9Sstevel@tonic-gate 	 */
31377c478bd9Sstevel@tonic-gate 	if (code == CB_CODE_CPR_CHKPT) {
31387c478bd9Sstevel@tonic-gate 		return (B_TRUE);
31397c478bd9Sstevel@tonic-gate 	}
31407c478bd9Sstevel@tonic-gate 
31417c478bd9Sstevel@tonic-gate 	/*
31427c478bd9Sstevel@tonic-gate 	 * When we get to here we are in the process of
31437c478bd9Sstevel@tonic-gate 	 * resuming the system from a previous suspend.
31447c478bd9Sstevel@tonic-gate 	 */
31457c478bd9Sstevel@tonic-gate 	nfs4_client_resumed = gethrestime_sec();
31467c478bd9Sstevel@tonic-gate 	return (B_TRUE);
31477c478bd9Sstevel@tonic-gate }
31487c478bd9Sstevel@tonic-gate 
31497c478bd9Sstevel@tonic-gate void
31507c478bd9Sstevel@tonic-gate nfs4_renew_lease_thread(nfs4_server_t *sp)
31517c478bd9Sstevel@tonic-gate {
31527c478bd9Sstevel@tonic-gate 	int	error = 0;
31537c478bd9Sstevel@tonic-gate 	time_t	tmp_last_renewal_time, tmp_time, tmp_now_time, kip_secs;
31547c478bd9Sstevel@tonic-gate 	clock_t	tick_delay = 0;
31557c478bd9Sstevel@tonic-gate 	clock_t time_left = 0;
31567c478bd9Sstevel@tonic-gate 	callb_cpr_t cpr_info;
31577c478bd9Sstevel@tonic-gate 	kmutex_t cpr_lock;
31587c478bd9Sstevel@tonic-gate 
31597c478bd9Sstevel@tonic-gate 	NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE,
31607c478bd9Sstevel@tonic-gate 		"nfs4_renew_lease_thread: acting on sp 0x%p", (void*)sp));
31617c478bd9Sstevel@tonic-gate 	mutex_init(&cpr_lock, NULL, MUTEX_DEFAULT, NULL);
31627c478bd9Sstevel@tonic-gate 	CALLB_CPR_INIT(&cpr_info, &cpr_lock, callb_generic_cpr, "nfsv4Lease");
31637c478bd9Sstevel@tonic-gate 
31647c478bd9Sstevel@tonic-gate 	mutex_enter(&sp->s_lock);
31657c478bd9Sstevel@tonic-gate 	/* sp->s_lease_time is set via a GETATTR */
31667c478bd9Sstevel@tonic-gate 	sp->last_renewal_time = gethrestime_sec();
31677c478bd9Sstevel@tonic-gate 	sp->lease_valid = NFS4_LEASE_UNINITIALIZED;
31687c478bd9Sstevel@tonic-gate 	ASSERT(sp->s_refcnt >= 1);
31697c478bd9Sstevel@tonic-gate 
31707c478bd9Sstevel@tonic-gate 	for (;;) {
31717c478bd9Sstevel@tonic-gate 		if (!sp->state_ref_count ||
31727c478bd9Sstevel@tonic-gate 			sp->lease_valid != NFS4_LEASE_VALID) {
31737c478bd9Sstevel@tonic-gate 
31747c478bd9Sstevel@tonic-gate 			kip_secs = MAX((sp->s_lease_time >> 1) -
31757c478bd9Sstevel@tonic-gate 				(3 * sp->propagation_delay.tv_sec), 1);
31767c478bd9Sstevel@tonic-gate 
31777c478bd9Sstevel@tonic-gate 			tick_delay = SEC_TO_TICK(kip_secs);
31787c478bd9Sstevel@tonic-gate 
31797c478bd9Sstevel@tonic-gate 			NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE,
31807c478bd9Sstevel@tonic-gate 				"nfs4_renew_lease_thread: no renew : thread "
31817c478bd9Sstevel@tonic-gate 				"wait %ld secs", kip_secs));
31827c478bd9Sstevel@tonic-gate 
31837c478bd9Sstevel@tonic-gate 			NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE,
31847c478bd9Sstevel@tonic-gate 				"nfs4_renew_lease_thread: no renew : "
31857c478bd9Sstevel@tonic-gate 				"state_ref_count %d, lease_valid %d",
31867c478bd9Sstevel@tonic-gate 				sp->state_ref_count, sp->lease_valid));
31877c478bd9Sstevel@tonic-gate 
31887c478bd9Sstevel@tonic-gate 			mutex_enter(&cpr_lock);
31897c478bd9Sstevel@tonic-gate 			CALLB_CPR_SAFE_BEGIN(&cpr_info);
31907c478bd9Sstevel@tonic-gate 			mutex_exit(&cpr_lock);
31917c478bd9Sstevel@tonic-gate 			time_left = cv_timedwait(&sp->cv_thread_exit,
31927c478bd9Sstevel@tonic-gate 				&sp->s_lock, tick_delay + lbolt);
31937c478bd9Sstevel@tonic-gate 			mutex_enter(&cpr_lock);
31947c478bd9Sstevel@tonic-gate 			CALLB_CPR_SAFE_END(&cpr_info, &cpr_lock);
31957c478bd9Sstevel@tonic-gate 			mutex_exit(&cpr_lock);
31967c478bd9Sstevel@tonic-gate 
31977c478bd9Sstevel@tonic-gate 			NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE,
31987c478bd9Sstevel@tonic-gate 				"nfs4_renew_lease_thread: no renew: "
31997c478bd9Sstevel@tonic-gate 				"time left %ld", time_left));
32007c478bd9Sstevel@tonic-gate 
32017c478bd9Sstevel@tonic-gate 			if (sp->s_thread_exit == NFS4_THREAD_EXIT)
32027c478bd9Sstevel@tonic-gate 				goto die;
32037c478bd9Sstevel@tonic-gate 			continue;
32047c478bd9Sstevel@tonic-gate 		}
32057c478bd9Sstevel@tonic-gate 
32067c478bd9Sstevel@tonic-gate 		tmp_last_renewal_time = sp->last_renewal_time;
32077c478bd9Sstevel@tonic-gate 
32087c478bd9Sstevel@tonic-gate 		tmp_time = gethrestime_sec() - sp->last_renewal_time +
32097c478bd9Sstevel@tonic-gate 			(3 * sp->propagation_delay.tv_sec);
32107c478bd9Sstevel@tonic-gate 
32117c478bd9Sstevel@tonic-gate 		NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE,
32127c478bd9Sstevel@tonic-gate 			"nfs4_renew_lease_thread: tmp_time %ld, "
32137c478bd9Sstevel@tonic-gate 			"sp->last_renewal_time %ld", tmp_time,
32147c478bd9Sstevel@tonic-gate 			sp->last_renewal_time));
32157c478bd9Sstevel@tonic-gate 
32167c478bd9Sstevel@tonic-gate 		kip_secs = MAX((sp->s_lease_time >> 1) - tmp_time, 1);
32177c478bd9Sstevel@tonic-gate 
32187c478bd9Sstevel@tonic-gate 		tick_delay = SEC_TO_TICK(kip_secs);
32197c478bd9Sstevel@tonic-gate 
32207c478bd9Sstevel@tonic-gate 		NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE,
32217c478bd9Sstevel@tonic-gate 			"nfs4_renew_lease_thread: valid lease: sleep for %ld "
32227c478bd9Sstevel@tonic-gate 			"secs", kip_secs));
32237c478bd9Sstevel@tonic-gate 
32247c478bd9Sstevel@tonic-gate 		mutex_enter(&cpr_lock);
32257c478bd9Sstevel@tonic-gate 		CALLB_CPR_SAFE_BEGIN(&cpr_info);
32267c478bd9Sstevel@tonic-gate 		mutex_exit(&cpr_lock);
32277c478bd9Sstevel@tonic-gate 		time_left = cv_timedwait(&sp->cv_thread_exit, &sp->s_lock,
32287c478bd9Sstevel@tonic-gate 			tick_delay + lbolt);
32297c478bd9Sstevel@tonic-gate 		mutex_enter(&cpr_lock);
32307c478bd9Sstevel@tonic-gate 		CALLB_CPR_SAFE_END(&cpr_info, &cpr_lock);
32317c478bd9Sstevel@tonic-gate 		mutex_exit(&cpr_lock);
32327c478bd9Sstevel@tonic-gate 
32337c478bd9Sstevel@tonic-gate 		NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE,
32347c478bd9Sstevel@tonic-gate 			"nfs4_renew_lease_thread: valid lease: time left %ld :"
32357c478bd9Sstevel@tonic-gate 			"sp last_renewal_time %ld, nfs4_client_resumed %ld, "
32367c478bd9Sstevel@tonic-gate 			"tmp_last_renewal_time %ld", time_left,
32377c478bd9Sstevel@tonic-gate 			sp->last_renewal_time, nfs4_client_resumed,
32387c478bd9Sstevel@tonic-gate 			tmp_last_renewal_time));
32397c478bd9Sstevel@tonic-gate 
32407c478bd9Sstevel@tonic-gate 		if (sp->s_thread_exit == NFS4_THREAD_EXIT)
32417c478bd9Sstevel@tonic-gate 			goto die;
32427c478bd9Sstevel@tonic-gate 
32437c478bd9Sstevel@tonic-gate 		if (tmp_last_renewal_time == sp->last_renewal_time ||
32447c478bd9Sstevel@tonic-gate 			(nfs4_client_resumed != 0 &&
32457c478bd9Sstevel@tonic-gate 			nfs4_client_resumed > sp->last_renewal_time)) {
32467c478bd9Sstevel@tonic-gate 			/*
32477c478bd9Sstevel@tonic-gate 			 * Issue RENEW op since we haven't renewed the lease
32487c478bd9Sstevel@tonic-gate 			 * since we slept.
32497c478bd9Sstevel@tonic-gate 			 */
32507c478bd9Sstevel@tonic-gate 			tmp_now_time = gethrestime_sec();
32517c478bd9Sstevel@tonic-gate 			error = nfs4renew(sp);
32527c478bd9Sstevel@tonic-gate 			/*
32537c478bd9Sstevel@tonic-gate 			 * Need to re-acquire sp's lock, nfs4renew()
32547c478bd9Sstevel@tonic-gate 			 * relinqueshes it.
32557c478bd9Sstevel@tonic-gate 			 */
32567c478bd9Sstevel@tonic-gate 			mutex_enter(&sp->s_lock);
32577c478bd9Sstevel@tonic-gate 
32587c478bd9Sstevel@tonic-gate 			/*
32597c478bd9Sstevel@tonic-gate 			 * See if someone changed s_thread_exit while we gave
32607c478bd9Sstevel@tonic-gate 			 * up s_lock.
32617c478bd9Sstevel@tonic-gate 			 */
32627c478bd9Sstevel@tonic-gate 			if (sp->s_thread_exit == NFS4_THREAD_EXIT)
32637c478bd9Sstevel@tonic-gate 				goto die;
32647c478bd9Sstevel@tonic-gate 
32657c478bd9Sstevel@tonic-gate 			if (!error) {
32667c478bd9Sstevel@tonic-gate 				/*
32677c478bd9Sstevel@tonic-gate 				 * check to see if we implicitly renewed while
32687c478bd9Sstevel@tonic-gate 				 * we waited for a reply for our RENEW call.
32697c478bd9Sstevel@tonic-gate 				 */
32707c478bd9Sstevel@tonic-gate 				if (tmp_last_renewal_time ==
32717c478bd9Sstevel@tonic-gate 					sp->last_renewal_time) {
32727c478bd9Sstevel@tonic-gate 					/* no implicit renew came */
32737c478bd9Sstevel@tonic-gate 					sp->last_renewal_time = tmp_now_time;
32747c478bd9Sstevel@tonic-gate 				} else {
32757c478bd9Sstevel@tonic-gate 					NFS4_DEBUG(nfs4_client_lease_debug,
32767c478bd9Sstevel@tonic-gate 						(CE_NOTE, "renew_thread: did "
32777c478bd9Sstevel@tonic-gate 						"implicit renewal before reply "
32787c478bd9Sstevel@tonic-gate 						"from server for RENEW"));
32797c478bd9Sstevel@tonic-gate 				}
32807c478bd9Sstevel@tonic-gate 			} else {
32817c478bd9Sstevel@tonic-gate 				/* figure out error */
32827c478bd9Sstevel@tonic-gate 				NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE,
32837c478bd9Sstevel@tonic-gate 					"renew_thread: nfs4renew returned error"
32847c478bd9Sstevel@tonic-gate 					" %d", error));
32857c478bd9Sstevel@tonic-gate 			}
32867c478bd9Sstevel@tonic-gate 
32877c478bd9Sstevel@tonic-gate 		}
32887c478bd9Sstevel@tonic-gate 	}
32897c478bd9Sstevel@tonic-gate 
32907c478bd9Sstevel@tonic-gate die:
32917c478bd9Sstevel@tonic-gate 	NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE,
32927c478bd9Sstevel@tonic-gate 		"nfs4_renew_lease_thread: thread exiting"));
32937c478bd9Sstevel@tonic-gate 
32947c478bd9Sstevel@tonic-gate 	while (sp->s_otw_call_count != 0) {
32957c478bd9Sstevel@tonic-gate 		NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE,
32967c478bd9Sstevel@tonic-gate 			"nfs4_renew_lease_thread: waiting for outstanding "
32977c478bd9Sstevel@tonic-gate 			"otw calls to finish for sp 0x%p, current "
32987c478bd9Sstevel@tonic-gate 			"s_otw_call_count %d", (void *)sp,
32997c478bd9Sstevel@tonic-gate 			sp->s_otw_call_count));
33007c478bd9Sstevel@tonic-gate 		mutex_enter(&cpr_lock);
33017c478bd9Sstevel@tonic-gate 		CALLB_CPR_SAFE_BEGIN(&cpr_info);
33027c478bd9Sstevel@tonic-gate 		mutex_exit(&cpr_lock);
33037c478bd9Sstevel@tonic-gate 		cv_wait(&sp->s_cv_otw_count, &sp->s_lock);
33047c478bd9Sstevel@tonic-gate 		mutex_enter(&cpr_lock);
33057c478bd9Sstevel@tonic-gate 		CALLB_CPR_SAFE_END(&cpr_info, &cpr_lock);
33067c478bd9Sstevel@tonic-gate 		mutex_exit(&cpr_lock);
33077c478bd9Sstevel@tonic-gate 	}
33087c478bd9Sstevel@tonic-gate 	mutex_exit(&sp->s_lock);
33097c478bd9Sstevel@tonic-gate 
33107c478bd9Sstevel@tonic-gate 	nfs4_server_rele(sp);		/* free the thread's reference */
33117c478bd9Sstevel@tonic-gate 	nfs4_server_rele(sp);		/* free the list's reference */
33127c478bd9Sstevel@tonic-gate 	sp = NULL;
33137c478bd9Sstevel@tonic-gate 
33147c478bd9Sstevel@tonic-gate done:
33157c478bd9Sstevel@tonic-gate 	mutex_enter(&cpr_lock);
33167c478bd9Sstevel@tonic-gate 	CALLB_CPR_EXIT(&cpr_info);	/* drops cpr_lock */
33177c478bd9Sstevel@tonic-gate 	mutex_destroy(&cpr_lock);
33187c478bd9Sstevel@tonic-gate 
33197c478bd9Sstevel@tonic-gate 	NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE,
33207c478bd9Sstevel@tonic-gate 		"nfs4_renew_lease_thread: renew thread exit officially"));
33217c478bd9Sstevel@tonic-gate 
33227c478bd9Sstevel@tonic-gate 	zthread_exit();
33237c478bd9Sstevel@tonic-gate 	/* NOT REACHED */
33247c478bd9Sstevel@tonic-gate }
33257c478bd9Sstevel@tonic-gate 
33267c478bd9Sstevel@tonic-gate /*
33277c478bd9Sstevel@tonic-gate  * Send out a RENEW op to the server.
33287c478bd9Sstevel@tonic-gate  * Assumes sp is locked down.
33297c478bd9Sstevel@tonic-gate  */
33307c478bd9Sstevel@tonic-gate static int
33317c478bd9Sstevel@tonic-gate nfs4renew(nfs4_server_t *sp)
33327c478bd9Sstevel@tonic-gate {
33337c478bd9Sstevel@tonic-gate 	COMPOUND4args_clnt args;
33347c478bd9Sstevel@tonic-gate 	COMPOUND4res_clnt res;
33357c478bd9Sstevel@tonic-gate 	nfs_argop4 argop[1];
33367c478bd9Sstevel@tonic-gate 	int doqueue = 1;
33377c478bd9Sstevel@tonic-gate 	int rpc_error;
33387c478bd9Sstevel@tonic-gate 	cred_t *cr;
33397c478bd9Sstevel@tonic-gate 	mntinfo4_t *mi;
33407c478bd9Sstevel@tonic-gate 	timespec_t prop_time, after_time;
33417c478bd9Sstevel@tonic-gate 	int needrecov = FALSE;
33427c478bd9Sstevel@tonic-gate 	nfs4_recov_state_t recov_state;
33437c478bd9Sstevel@tonic-gate 	nfs4_error_t e = { 0, NFS4_OK, RPC_SUCCESS };
33447c478bd9Sstevel@tonic-gate 
33457c478bd9Sstevel@tonic-gate 	NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, "nfs4renew"));
33467c478bd9Sstevel@tonic-gate 
33477c478bd9Sstevel@tonic-gate 	recov_state.rs_flags = 0;
33487c478bd9Sstevel@tonic-gate 	recov_state.rs_num_retry_despite_err = 0;
33497c478bd9Sstevel@tonic-gate 
33507c478bd9Sstevel@tonic-gate recov_retry:
33517c478bd9Sstevel@tonic-gate 	mi = sp->mntinfo4_list;
33527c478bd9Sstevel@tonic-gate 	VFS_HOLD(mi->mi_vfsp);
33537c478bd9Sstevel@tonic-gate 	mutex_exit(&sp->s_lock);
33547c478bd9Sstevel@tonic-gate 	ASSERT(mi != NULL);
33557c478bd9Sstevel@tonic-gate 
33567c478bd9Sstevel@tonic-gate 	e.error = nfs4_start_op(mi, NULL, NULL, &recov_state);
33577c478bd9Sstevel@tonic-gate 	if (e.error) {
33587c478bd9Sstevel@tonic-gate 		VFS_RELE(mi->mi_vfsp);
33597c478bd9Sstevel@tonic-gate 		return (e.error);
33607c478bd9Sstevel@tonic-gate 	}
33617c478bd9Sstevel@tonic-gate 
33627c478bd9Sstevel@tonic-gate 	/* Check to see if we're dealing with a marked-dead sp */
33637c478bd9Sstevel@tonic-gate 	mutex_enter(&sp->s_lock);
33647c478bd9Sstevel@tonic-gate 	if (sp->s_thread_exit == NFS4_THREAD_EXIT) {
33657c478bd9Sstevel@tonic-gate 		mutex_exit(&sp->s_lock);
33667c478bd9Sstevel@tonic-gate 		nfs4_end_op(mi, NULL, NULL, &recov_state, needrecov);
33677c478bd9Sstevel@tonic-gate 		VFS_RELE(mi->mi_vfsp);
33687c478bd9Sstevel@tonic-gate 		return (0);
33697c478bd9Sstevel@tonic-gate 	}
33707c478bd9Sstevel@tonic-gate 
33717c478bd9Sstevel@tonic-gate 	/* Make sure mi hasn't changed on us */
33727c478bd9Sstevel@tonic-gate 	if (mi != sp->mntinfo4_list) {
33737c478bd9Sstevel@tonic-gate 		/* Must drop sp's lock to avoid a recursive mutex enter */
33747c478bd9Sstevel@tonic-gate 		mutex_exit(&sp->s_lock);
33757c478bd9Sstevel@tonic-gate 		nfs4_end_op(mi, NULL, NULL, &recov_state, needrecov);
33767c478bd9Sstevel@tonic-gate 		VFS_RELE(mi->mi_vfsp);
33777c478bd9Sstevel@tonic-gate 		mutex_enter(&sp->s_lock);
33787c478bd9Sstevel@tonic-gate 		goto recov_retry;
33797c478bd9Sstevel@tonic-gate 	}
33807c478bd9Sstevel@tonic-gate 	mutex_exit(&sp->s_lock);
33817c478bd9Sstevel@tonic-gate 
33827c478bd9Sstevel@tonic-gate 	args.ctag = TAG_RENEW;
33837c478bd9Sstevel@tonic-gate 
33847c478bd9Sstevel@tonic-gate 	args.array_len = 1;
33857c478bd9Sstevel@tonic-gate 	args.array = argop;
33867c478bd9Sstevel@tonic-gate 
33877c478bd9Sstevel@tonic-gate 	argop[0].argop = OP_RENEW;
33887c478bd9Sstevel@tonic-gate 
33897c478bd9Sstevel@tonic-gate 	mutex_enter(&sp->s_lock);
33907c478bd9Sstevel@tonic-gate 	argop[0].nfs_argop4_u.oprenew.clientid = sp->clientid;
33917c478bd9Sstevel@tonic-gate 	cr = sp->s_cred;
33927c478bd9Sstevel@tonic-gate 	crhold(cr);
33937c478bd9Sstevel@tonic-gate 	mutex_exit(&sp->s_lock);
33947c478bd9Sstevel@tonic-gate 
33957c478bd9Sstevel@tonic-gate 	ASSERT(cr != NULL);
33967c478bd9Sstevel@tonic-gate 
33977c478bd9Sstevel@tonic-gate 	/* used to figure out RTT for sp */
33987c478bd9Sstevel@tonic-gate 	gethrestime(&prop_time);
33997c478bd9Sstevel@tonic-gate 
34007c478bd9Sstevel@tonic-gate 	NFS4_DEBUG(nfs4_client_call_debug, (CE_NOTE,
34017c478bd9Sstevel@tonic-gate 	    "nfs4renew: %s call, sp 0x%p", needrecov ? "recov" : "first",
34027c478bd9Sstevel@tonic-gate 	    (void*)sp));
34037c478bd9Sstevel@tonic-gate 	NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, "before: %ld s %ld ns ",
34047c478bd9Sstevel@tonic-gate 		prop_time.tv_sec, prop_time.tv_nsec));
34057c478bd9Sstevel@tonic-gate 
34067c478bd9Sstevel@tonic-gate 	DTRACE_PROBE2(nfs4__renew__start, nfs4_server_t *, sp,
34077c478bd9Sstevel@tonic-gate 			mntinfo4_t *, mi);
34087c478bd9Sstevel@tonic-gate 
34097c478bd9Sstevel@tonic-gate 	rfs4call(mi, &args, &res, cr, &doqueue, 0, &e);
34107c478bd9Sstevel@tonic-gate 	crfree(cr);
34117c478bd9Sstevel@tonic-gate 
34127c478bd9Sstevel@tonic-gate 	DTRACE_PROBE2(nfs4__renew__end, nfs4_server_t *, sp,
34137c478bd9Sstevel@tonic-gate 			mntinfo4_t *, mi);
34147c478bd9Sstevel@tonic-gate 
34157c478bd9Sstevel@tonic-gate 	gethrestime(&after_time);
34167c478bd9Sstevel@tonic-gate 
34177c478bd9Sstevel@tonic-gate 	mutex_enter(&sp->s_lock);
34187c478bd9Sstevel@tonic-gate 	sp->propagation_delay.tv_sec =
34197c478bd9Sstevel@tonic-gate 		MAX(1, after_time.tv_sec - prop_time.tv_sec);
34207c478bd9Sstevel@tonic-gate 	mutex_exit(&sp->s_lock);
34217c478bd9Sstevel@tonic-gate 
34227c478bd9Sstevel@tonic-gate 	NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, "after : %ld s %ld ns ",
34237c478bd9Sstevel@tonic-gate 		after_time.tv_sec, after_time.tv_nsec));
34247c478bd9Sstevel@tonic-gate 
34257c478bd9Sstevel@tonic-gate 	if (e.error == 0 && res.status == NFS4ERR_CB_PATH_DOWN) {
3426d1ea0e82Swebaker 		(void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res);
34277c478bd9Sstevel@tonic-gate 		nfs4_delegreturn_all(sp);
34287c478bd9Sstevel@tonic-gate 		nfs4_end_op(mi, NULL, NULL, &recov_state, needrecov);
34297c478bd9Sstevel@tonic-gate 		VFS_RELE(mi->mi_vfsp);
34307c478bd9Sstevel@tonic-gate 		/*
34317c478bd9Sstevel@tonic-gate 		 * If the server returns CB_PATH_DOWN, it has renewed
34327c478bd9Sstevel@tonic-gate 		 * the lease and informed us that the callback path is
34337c478bd9Sstevel@tonic-gate 		 * down.  Since the lease is renewed, just return 0 and
34347c478bd9Sstevel@tonic-gate 		 * let the renew thread proceed as normal.
34357c478bd9Sstevel@tonic-gate 		 */
34367c478bd9Sstevel@tonic-gate 		return (0);
34377c478bd9Sstevel@tonic-gate 	}
34387c478bd9Sstevel@tonic-gate 
34397c478bd9Sstevel@tonic-gate 	needrecov = nfs4_needs_recovery(&e, FALSE, mi->mi_vfsp);
34407c478bd9Sstevel@tonic-gate 	if (!needrecov && e.error) {
34417c478bd9Sstevel@tonic-gate 		nfs4_end_op(mi, NULL, NULL, &recov_state, needrecov);
34427c478bd9Sstevel@tonic-gate 		VFS_RELE(mi->mi_vfsp);
34437c478bd9Sstevel@tonic-gate 		return (e.error);
34447c478bd9Sstevel@tonic-gate 	}
34457c478bd9Sstevel@tonic-gate 
34467c478bd9Sstevel@tonic-gate 	rpc_error = e.error;
34477c478bd9Sstevel@tonic-gate 
34487c478bd9Sstevel@tonic-gate 	if (needrecov) {
34497c478bd9Sstevel@tonic-gate 		NFS4_DEBUG(nfs4_client_recov_debug, (CE_NOTE,
34507c478bd9Sstevel@tonic-gate 		    "nfs4renew: initiating recovery\n"));
34517c478bd9Sstevel@tonic-gate 
34527c478bd9Sstevel@tonic-gate 		if (nfs4_start_recovery(&e, mi, NULL, NULL, NULL, NULL,
34537c478bd9Sstevel@tonic-gate 		    OP_RENEW, NULL) == FALSE) {
34547c478bd9Sstevel@tonic-gate 			nfs4_end_op(mi, NULL, NULL, &recov_state, needrecov);
34557c478bd9Sstevel@tonic-gate 			VFS_RELE(mi->mi_vfsp);
34567c478bd9Sstevel@tonic-gate 			if (!e.error)
34577c478bd9Sstevel@tonic-gate 				(void) xdr_free(xdr_COMPOUND4res_clnt,
34587c478bd9Sstevel@tonic-gate 								(caddr_t)&res);
34597c478bd9Sstevel@tonic-gate 			mutex_enter(&sp->s_lock);
34607c478bd9Sstevel@tonic-gate 			goto recov_retry;
34617c478bd9Sstevel@tonic-gate 		}
34627c478bd9Sstevel@tonic-gate 		/* fall through for res.status case */
34637c478bd9Sstevel@tonic-gate 	}
34647c478bd9Sstevel@tonic-gate 
34657c478bd9Sstevel@tonic-gate 	if (res.status) {
34667c478bd9Sstevel@tonic-gate 		if (res.status == NFS4ERR_LEASE_MOVED) {
34677c478bd9Sstevel@tonic-gate 			/*EMPTY*/
34687c478bd9Sstevel@tonic-gate 			/*
34697c478bd9Sstevel@tonic-gate 			 * XXX need to try every mntinfo4 in sp->mntinfo4_list
34707c478bd9Sstevel@tonic-gate 			 * to renew the lease on that server
34717c478bd9Sstevel@tonic-gate 			 */
34727c478bd9Sstevel@tonic-gate 		}
34737c478bd9Sstevel@tonic-gate 		e.error = geterrno4(res.status);
34747c478bd9Sstevel@tonic-gate 	}
34757c478bd9Sstevel@tonic-gate 
34767c478bd9Sstevel@tonic-gate 	if (!rpc_error)
34777c478bd9Sstevel@tonic-gate 		(void) xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res);
34787c478bd9Sstevel@tonic-gate 
34797c478bd9Sstevel@tonic-gate 	nfs4_end_op(mi, NULL, NULL, &recov_state, needrecov);
34807c478bd9Sstevel@tonic-gate 
34817c478bd9Sstevel@tonic-gate 	VFS_RELE(mi->mi_vfsp);
34827c478bd9Sstevel@tonic-gate 
34837c478bd9Sstevel@tonic-gate 	return (e.error);
34847c478bd9Sstevel@tonic-gate }
34857c478bd9Sstevel@tonic-gate 
34867c478bd9Sstevel@tonic-gate void
34877c478bd9Sstevel@tonic-gate nfs4_inc_state_ref_count(mntinfo4_t *mi)
34887c478bd9Sstevel@tonic-gate {
34897c478bd9Sstevel@tonic-gate 	nfs4_server_t	*sp;
34907c478bd9Sstevel@tonic-gate 
34917c478bd9Sstevel@tonic-gate 	/* this locks down sp if it is found */
34927c478bd9Sstevel@tonic-gate 	sp = find_nfs4_server(mi);
34937c478bd9Sstevel@tonic-gate 
34947c478bd9Sstevel@tonic-gate 	if (sp != NULL) {
34957c478bd9Sstevel@tonic-gate 		nfs4_inc_state_ref_count_nolock(sp, mi);
34967c478bd9Sstevel@tonic-gate 		mutex_exit(&sp->s_lock);
34977c478bd9Sstevel@tonic-gate 		nfs4_server_rele(sp);
34987c478bd9Sstevel@tonic-gate 	}
34997c478bd9Sstevel@tonic-gate }
35007c478bd9Sstevel@tonic-gate 
35017c478bd9Sstevel@tonic-gate /*
35027c478bd9Sstevel@tonic-gate  * Bump the number of OPEN files (ie: those with state) so we know if this
35037c478bd9Sstevel@tonic-gate  * nfs4_server has any state to maintain a lease for or not.
35047c478bd9Sstevel@tonic-gate  *
35057c478bd9Sstevel@tonic-gate  * Also, marks the nfs4_server's lease valid if it hasn't been done so already.
35067c478bd9Sstevel@tonic-gate  */
35077c478bd9Sstevel@tonic-gate void
35087c478bd9Sstevel@tonic-gate nfs4_inc_state_ref_count_nolock(nfs4_server_t *sp, mntinfo4_t *mi)
35097c478bd9Sstevel@tonic-gate {
35107c478bd9Sstevel@tonic-gate 	ASSERT(mutex_owned(&sp->s_lock));
35117c478bd9Sstevel@tonic-gate 
35127c478bd9Sstevel@tonic-gate 	sp->state_ref_count++;
35137c478bd9Sstevel@tonic-gate 	NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE,
35147c478bd9Sstevel@tonic-gate 		"nfs4_inc_state_ref_count: state_ref_count now %d",
35157c478bd9Sstevel@tonic-gate 		sp->state_ref_count));
35167c478bd9Sstevel@tonic-gate 
35177c478bd9Sstevel@tonic-gate 	if (sp->lease_valid == NFS4_LEASE_UNINITIALIZED)
35187c478bd9Sstevel@tonic-gate 		sp->lease_valid = NFS4_LEASE_VALID;
35197c478bd9Sstevel@tonic-gate 
35207c478bd9Sstevel@tonic-gate 	/*
35217c478bd9Sstevel@tonic-gate 	 * If this call caused the lease to be marked valid and/or
35227c478bd9Sstevel@tonic-gate 	 * took the state_ref_count from 0 to 1, then start the time
35237c478bd9Sstevel@tonic-gate 	 * on lease renewal.
35247c478bd9Sstevel@tonic-gate 	 */
35257c478bd9Sstevel@tonic-gate 	if (sp->lease_valid == NFS4_LEASE_VALID && sp->state_ref_count == 1)
35267c478bd9Sstevel@tonic-gate 		sp->last_renewal_time = gethrestime_sec();
35277c478bd9Sstevel@tonic-gate 
35287c478bd9Sstevel@tonic-gate 	/* update the number of open files for mi */
35297c478bd9Sstevel@tonic-gate 	mi->mi_open_files++;
35307c478bd9Sstevel@tonic-gate }
35317c478bd9Sstevel@tonic-gate 
35327c478bd9Sstevel@tonic-gate void
35337c478bd9Sstevel@tonic-gate nfs4_dec_state_ref_count(mntinfo4_t *mi)
35347c478bd9Sstevel@tonic-gate {
35357c478bd9Sstevel@tonic-gate 	nfs4_server_t	*sp;
35367c478bd9Sstevel@tonic-gate 
35377c478bd9Sstevel@tonic-gate 	/* this locks down sp if it is found */
35387c478bd9Sstevel@tonic-gate 	sp = find_nfs4_server_all(mi, 1);
35397c478bd9Sstevel@tonic-gate 
35407c478bd9Sstevel@tonic-gate 	if (sp != NULL) {
35417c478bd9Sstevel@tonic-gate 		nfs4_dec_state_ref_count_nolock(sp, mi);
35427c478bd9Sstevel@tonic-gate 		mutex_exit(&sp->s_lock);
35437c478bd9Sstevel@tonic-gate 		nfs4_server_rele(sp);
35447c478bd9Sstevel@tonic-gate 	}
35457c478bd9Sstevel@tonic-gate }
35467c478bd9Sstevel@tonic-gate 
35477c478bd9Sstevel@tonic-gate /*
35487c478bd9Sstevel@tonic-gate  * Decrement the number of OPEN files (ie: those with state) so we know if
35497c478bd9Sstevel@tonic-gate  * this nfs4_server has any state to maintain a lease for or not.
35507c478bd9Sstevel@tonic-gate  */
35517c478bd9Sstevel@tonic-gate void
35527c478bd9Sstevel@tonic-gate nfs4_dec_state_ref_count_nolock(nfs4_server_t *sp, mntinfo4_t *mi)
35537c478bd9Sstevel@tonic-gate {
35547c478bd9Sstevel@tonic-gate 	ASSERT(mutex_owned(&sp->s_lock));
35557c478bd9Sstevel@tonic-gate 	ASSERT(sp->state_ref_count != 0);
35567c478bd9Sstevel@tonic-gate 	sp->state_ref_count--;
35577c478bd9Sstevel@tonic-gate 
35587c478bd9Sstevel@tonic-gate 	NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE,
35597c478bd9Sstevel@tonic-gate 		"nfs4_dec_state_ref_count: state ref count now %d",
35607c478bd9Sstevel@tonic-gate 		sp->state_ref_count));
35617c478bd9Sstevel@tonic-gate 
35627c478bd9Sstevel@tonic-gate 	mi->mi_open_files--;
35637c478bd9Sstevel@tonic-gate 	NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE,
35647c478bd9Sstevel@tonic-gate 		"nfs4_dec_state_ref_count: mi open files %d, v4 flags 0x%x",
35657c478bd9Sstevel@tonic-gate 		mi->mi_open_files, mi->mi_flags));
35667c478bd9Sstevel@tonic-gate 
35677c478bd9Sstevel@tonic-gate 	/* We don't have to hold the mi_lock to test mi_flags */
35687c478bd9Sstevel@tonic-gate 	if (mi->mi_open_files == 0 &&
35697c478bd9Sstevel@tonic-gate 	    (mi->mi_flags & MI4_REMOVE_ON_LAST_CLOSE)) {
35707c478bd9Sstevel@tonic-gate 		NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE,
35717c478bd9Sstevel@tonic-gate 			"nfs4_dec_state_ref_count: remove mntinfo4 %p since "
35727c478bd9Sstevel@tonic-gate 			"we have closed the last open file", (void*)mi));
35737c478bd9Sstevel@tonic-gate 		nfs4_remove_mi_from_server(mi, sp);
35747c478bd9Sstevel@tonic-gate 	}
35757c478bd9Sstevel@tonic-gate }
35767c478bd9Sstevel@tonic-gate 
35777c478bd9Sstevel@tonic-gate bool_t
35787c478bd9Sstevel@tonic-gate inlease(nfs4_server_t *sp)
35797c478bd9Sstevel@tonic-gate {
35807c478bd9Sstevel@tonic-gate 	bool_t result;
35817c478bd9Sstevel@tonic-gate 
35827c478bd9Sstevel@tonic-gate 	ASSERT(mutex_owned(&sp->s_lock));
35837c478bd9Sstevel@tonic-gate 
35847c478bd9Sstevel@tonic-gate 	if (sp->lease_valid == NFS4_LEASE_VALID &&
35857c478bd9Sstevel@tonic-gate 	    gethrestime_sec() < sp->last_renewal_time + sp->s_lease_time)
35867c478bd9Sstevel@tonic-gate 		result = TRUE;
35877c478bd9Sstevel@tonic-gate 	else
35887c478bd9Sstevel@tonic-gate 		result = FALSE;
35897c478bd9Sstevel@tonic-gate 
35907c478bd9Sstevel@tonic-gate 	return (result);
35917c478bd9Sstevel@tonic-gate }
35927c478bd9Sstevel@tonic-gate 
35937c478bd9Sstevel@tonic-gate 
35947c478bd9Sstevel@tonic-gate /*
35957c478bd9Sstevel@tonic-gate  * Return non-zero if the given nfs4_server_t is going through recovery.
35967c478bd9Sstevel@tonic-gate  */
35977c478bd9Sstevel@tonic-gate 
35987c478bd9Sstevel@tonic-gate int
35997c478bd9Sstevel@tonic-gate nfs4_server_in_recovery(nfs4_server_t *sp)
36007c478bd9Sstevel@tonic-gate {
36017c478bd9Sstevel@tonic-gate 	return (nfs_rw_lock_held(&sp->s_recovlock, RW_WRITER));
36027c478bd9Sstevel@tonic-gate }
36037c478bd9Sstevel@tonic-gate 
36047c478bd9Sstevel@tonic-gate /*
36057c478bd9Sstevel@tonic-gate  * Compare two shared filehandle objects.  Returns -1, 0, or +1, if the
36067c478bd9Sstevel@tonic-gate  * first is less than, equal to, or greater than the second.
36077c478bd9Sstevel@tonic-gate  */
36087c478bd9Sstevel@tonic-gate 
36097c478bd9Sstevel@tonic-gate int
36107c478bd9Sstevel@tonic-gate sfh4cmp(const void *p1, const void *p2)
36117c478bd9Sstevel@tonic-gate {
36127c478bd9Sstevel@tonic-gate 	const nfs4_sharedfh_t *sfh1 = (const nfs4_sharedfh_t *)p1;
36137c478bd9Sstevel@tonic-gate 	const nfs4_sharedfh_t *sfh2 = (const nfs4_sharedfh_t *)p2;
36147c478bd9Sstevel@tonic-gate 
36157c478bd9Sstevel@tonic-gate 	return (nfs4cmpfh(&sfh1->sfh_fh, &sfh2->sfh_fh));
36167c478bd9Sstevel@tonic-gate }
36177c478bd9Sstevel@tonic-gate 
36187c478bd9Sstevel@tonic-gate /*
36197c478bd9Sstevel@tonic-gate  * Create a table for shared filehandle objects.
36207c478bd9Sstevel@tonic-gate  */
36217c478bd9Sstevel@tonic-gate 
36227c478bd9Sstevel@tonic-gate void
36237c478bd9Sstevel@tonic-gate sfh4_createtab(avl_tree_t *tab)
36247c478bd9Sstevel@tonic-gate {
36257c478bd9Sstevel@tonic-gate 	avl_create(tab, sfh4cmp, sizeof (nfs4_sharedfh_t),
36267c478bd9Sstevel@tonic-gate 		offsetof(nfs4_sharedfh_t, sfh_tree));
36277c478bd9Sstevel@tonic-gate }
36287c478bd9Sstevel@tonic-gate 
36297c478bd9Sstevel@tonic-gate /*
36307c478bd9Sstevel@tonic-gate  * Return a shared filehandle object for the given filehandle.  The caller
36317c478bd9Sstevel@tonic-gate  * is responsible for eventually calling sfh4_rele().
36327c478bd9Sstevel@tonic-gate  */
36337c478bd9Sstevel@tonic-gate 
36347c478bd9Sstevel@tonic-gate nfs4_sharedfh_t *
36357c478bd9Sstevel@tonic-gate sfh4_put(const nfs_fh4 *fh, mntinfo4_t *mi, nfs4_sharedfh_t *key)
36367c478bd9Sstevel@tonic-gate {
36377c478bd9Sstevel@tonic-gate 	nfs4_sharedfh_t *sfh, *nsfh;
36387c478bd9Sstevel@tonic-gate 	avl_index_t where;
36397c478bd9Sstevel@tonic-gate 	nfs4_sharedfh_t skey;
36407c478bd9Sstevel@tonic-gate 
36417c478bd9Sstevel@tonic-gate 	if (!key) {
36427c478bd9Sstevel@tonic-gate 		skey.sfh_fh = *fh;
36437c478bd9Sstevel@tonic-gate 		key = &skey;
36447c478bd9Sstevel@tonic-gate 	}
36457c478bd9Sstevel@tonic-gate 
36467c478bd9Sstevel@tonic-gate 	nsfh = kmem_alloc(sizeof (nfs4_sharedfh_t), KM_SLEEP);
36477c478bd9Sstevel@tonic-gate 	nsfh->sfh_fh.nfs_fh4_len = fh->nfs_fh4_len;
36487c478bd9Sstevel@tonic-gate 	/*
36497c478bd9Sstevel@tonic-gate 	 * We allocate the largest possible filehandle size because it's
36507c478bd9Sstevel@tonic-gate 	 * not that big, and it saves us from possibly having to resize the
36517c478bd9Sstevel@tonic-gate 	 * buffer later.
36527c478bd9Sstevel@tonic-gate 	 */
36537c478bd9Sstevel@tonic-gate 	nsfh->sfh_fh.nfs_fh4_val = kmem_alloc(NFS4_FHSIZE, KM_SLEEP);
36547c478bd9Sstevel@tonic-gate 	bcopy(fh->nfs_fh4_val, nsfh->sfh_fh.nfs_fh4_val, fh->nfs_fh4_len);
36557c478bd9Sstevel@tonic-gate 	mutex_init(&nsfh->sfh_lock, NULL, MUTEX_DEFAULT, NULL);
36567c478bd9Sstevel@tonic-gate 	nsfh->sfh_refcnt = 1;
36577c478bd9Sstevel@tonic-gate 	nsfh->sfh_flags = SFH4_IN_TREE;
36587c478bd9Sstevel@tonic-gate 	nsfh->sfh_mi = mi;
36597c478bd9Sstevel@tonic-gate 	NFS4_DEBUG(nfs4_sharedfh_debug, (CE_NOTE, "sfh4_get: new object (%p)",
36607c478bd9Sstevel@tonic-gate 			(void *)nsfh));
36617c478bd9Sstevel@tonic-gate 
36627c478bd9Sstevel@tonic-gate 	(void) nfs_rw_enter_sig(&mi->mi_fh_lock, RW_WRITER, 0);
36637c478bd9Sstevel@tonic-gate 	sfh = avl_find(&mi->mi_filehandles, key, &where);
36647c478bd9Sstevel@tonic-gate 	if (sfh != NULL) {
36657c478bd9Sstevel@tonic-gate 		mutex_enter(&sfh->sfh_lock);
36667c478bd9Sstevel@tonic-gate 		sfh->sfh_refcnt++;
36677c478bd9Sstevel@tonic-gate 		mutex_exit(&sfh->sfh_lock);
36687c478bd9Sstevel@tonic-gate 		nfs_rw_exit(&mi->mi_fh_lock);
36697c478bd9Sstevel@tonic-gate 		/* free our speculative allocs */
36707c478bd9Sstevel@tonic-gate 		kmem_free(nsfh->sfh_fh.nfs_fh4_val, NFS4_FHSIZE);
36717c478bd9Sstevel@tonic-gate 		kmem_free(nsfh, sizeof (nfs4_sharedfh_t));
36727c478bd9Sstevel@tonic-gate 		return (sfh);
36737c478bd9Sstevel@tonic-gate 	}
36747c478bd9Sstevel@tonic-gate 
36757c478bd9Sstevel@tonic-gate 	avl_insert(&mi->mi_filehandles, nsfh, where);
36767c478bd9Sstevel@tonic-gate 	nfs_rw_exit(&mi->mi_fh_lock);
36777c478bd9Sstevel@tonic-gate 
36787c478bd9Sstevel@tonic-gate 	return (nsfh);
36797c478bd9Sstevel@tonic-gate }
36807c478bd9Sstevel@tonic-gate 
36817c478bd9Sstevel@tonic-gate /*
36827c478bd9Sstevel@tonic-gate  * Return a shared filehandle object for the given filehandle.  The caller
36837c478bd9Sstevel@tonic-gate  * is responsible for eventually calling sfh4_rele().
36847c478bd9Sstevel@tonic-gate  */
36857c478bd9Sstevel@tonic-gate 
36867c478bd9Sstevel@tonic-gate nfs4_sharedfh_t *
36877c478bd9Sstevel@tonic-gate sfh4_get(const nfs_fh4 *fh, mntinfo4_t *mi)
36887c478bd9Sstevel@tonic-gate {
36897c478bd9Sstevel@tonic-gate 	nfs4_sharedfh_t *sfh;
36907c478bd9Sstevel@tonic-gate 	nfs4_sharedfh_t key;
36917c478bd9Sstevel@tonic-gate 
36927c478bd9Sstevel@tonic-gate 	ASSERT(fh->nfs_fh4_len <= NFS4_FHSIZE);
36937c478bd9Sstevel@tonic-gate 
36947c478bd9Sstevel@tonic-gate #ifdef DEBUG
36957c478bd9Sstevel@tonic-gate 	if (nfs4_sharedfh_debug) {
36967c478bd9Sstevel@tonic-gate 		nfs4_fhandle_t fhandle;
36977c478bd9Sstevel@tonic-gate 
36987c478bd9Sstevel@tonic-gate 		fhandle.fh_len = fh->nfs_fh4_len;
36997c478bd9Sstevel@tonic-gate 		bcopy(fh->nfs_fh4_val, fhandle.fh_buf, fhandle.fh_len);
37007c478bd9Sstevel@tonic-gate 		zcmn_err(mi->mi_zone->zone_id, CE_NOTE, "sfh4_get:");
37017c478bd9Sstevel@tonic-gate 		nfs4_printfhandle(&fhandle);
37027c478bd9Sstevel@tonic-gate 	}
37037c478bd9Sstevel@tonic-gate #endif
37047c478bd9Sstevel@tonic-gate 
37057c478bd9Sstevel@tonic-gate 	/*
37067c478bd9Sstevel@tonic-gate 	 * If there's already an object for the given filehandle, bump the
37077c478bd9Sstevel@tonic-gate 	 * reference count and return it.  Otherwise, create a new object
37087c478bd9Sstevel@tonic-gate 	 * and add it to the AVL tree.
37097c478bd9Sstevel@tonic-gate 	 */
37107c478bd9Sstevel@tonic-gate 
37117c478bd9Sstevel@tonic-gate 	key.sfh_fh = *fh;
37127c478bd9Sstevel@tonic-gate 
37137c478bd9Sstevel@tonic-gate 	(void) nfs_rw_enter_sig(&mi->mi_fh_lock, RW_READER, 0);
37147c478bd9Sstevel@tonic-gate 	sfh = avl_find(&mi->mi_filehandles, &key, NULL);
37157c478bd9Sstevel@tonic-gate 	if (sfh != NULL) {
37167c478bd9Sstevel@tonic-gate 		mutex_enter(&sfh->sfh_lock);
37177c478bd9Sstevel@tonic-gate 		sfh->sfh_refcnt++;
37187c478bd9Sstevel@tonic-gate 		NFS4_DEBUG(nfs4_sharedfh_debug, (CE_NOTE,
37197c478bd9Sstevel@tonic-gate 			"sfh4_get: found existing %p, new refcnt=%d",
37207c478bd9Sstevel@tonic-gate 			(void *)sfh, sfh->sfh_refcnt));
37217c478bd9Sstevel@tonic-gate 		mutex_exit(&sfh->sfh_lock);
37227c478bd9Sstevel@tonic-gate 		nfs_rw_exit(&mi->mi_fh_lock);
37237c478bd9Sstevel@tonic-gate 		return (sfh);
37247c478bd9Sstevel@tonic-gate 	}
37257c478bd9Sstevel@tonic-gate 	nfs_rw_exit(&mi->mi_fh_lock);
37267c478bd9Sstevel@tonic-gate 
37277c478bd9Sstevel@tonic-gate 	return (sfh4_put(fh, mi, &key));
37287c478bd9Sstevel@tonic-gate }
37297c478bd9Sstevel@tonic-gate 
37307c478bd9Sstevel@tonic-gate /*
37317c478bd9Sstevel@tonic-gate  * Get a reference to the given shared filehandle object.
37327c478bd9Sstevel@tonic-gate  */
37337c478bd9Sstevel@tonic-gate 
37347c478bd9Sstevel@tonic-gate void
37357c478bd9Sstevel@tonic-gate sfh4_hold(nfs4_sharedfh_t *sfh)
37367c478bd9Sstevel@tonic-gate {
37377c478bd9Sstevel@tonic-gate 	ASSERT(sfh->sfh_refcnt > 0);
37387c478bd9Sstevel@tonic-gate 
37397c478bd9Sstevel@tonic-gate 	mutex_enter(&sfh->sfh_lock);
37407c478bd9Sstevel@tonic-gate 	sfh->sfh_refcnt++;
37417c478bd9Sstevel@tonic-gate 	NFS4_DEBUG(nfs4_sharedfh_debug,
37427c478bd9Sstevel@tonic-gate 		(CE_NOTE, "sfh4_hold %p, new refcnt=%d",
37437c478bd9Sstevel@tonic-gate 		(void *)sfh, sfh->sfh_refcnt));
37447c478bd9Sstevel@tonic-gate 	mutex_exit(&sfh->sfh_lock);
37457c478bd9Sstevel@tonic-gate }
37467c478bd9Sstevel@tonic-gate 
37477c478bd9Sstevel@tonic-gate /*
37487c478bd9Sstevel@tonic-gate  * Release a reference to the given shared filehandle object and null out
37497c478bd9Sstevel@tonic-gate  * the given pointer.
37507c478bd9Sstevel@tonic-gate  */
37517c478bd9Sstevel@tonic-gate 
37527c478bd9Sstevel@tonic-gate void
37537c478bd9Sstevel@tonic-gate sfh4_rele(nfs4_sharedfh_t **sfhpp)
37547c478bd9Sstevel@tonic-gate {
37557c478bd9Sstevel@tonic-gate 	mntinfo4_t *mi;
37567c478bd9Sstevel@tonic-gate 	nfs4_sharedfh_t *sfh = *sfhpp;
37577c478bd9Sstevel@tonic-gate 
37587c478bd9Sstevel@tonic-gate 	ASSERT(sfh->sfh_refcnt > 0);
37597c478bd9Sstevel@tonic-gate 
37607c478bd9Sstevel@tonic-gate 	mutex_enter(&sfh->sfh_lock);
37617c478bd9Sstevel@tonic-gate 	if (sfh->sfh_refcnt > 1) {
37627c478bd9Sstevel@tonic-gate 		sfh->sfh_refcnt--;
37637c478bd9Sstevel@tonic-gate 		NFS4_DEBUG(nfs4_sharedfh_debug, (CE_NOTE,
37647c478bd9Sstevel@tonic-gate 		    "sfh4_rele %p, new refcnt=%d",
37657c478bd9Sstevel@tonic-gate 		    (void *)sfh, sfh->sfh_refcnt));
37667c478bd9Sstevel@tonic-gate 		mutex_exit(&sfh->sfh_lock);
37677c478bd9Sstevel@tonic-gate 		goto finish;
37687c478bd9Sstevel@tonic-gate 	}
37697c478bd9Sstevel@tonic-gate 	mutex_exit(&sfh->sfh_lock);
37707c478bd9Sstevel@tonic-gate 
37717c478bd9Sstevel@tonic-gate 	/*
37727c478bd9Sstevel@tonic-gate 	 * Possibly the last reference, so get the lock for the table in
37737c478bd9Sstevel@tonic-gate 	 * case it's time to remove the object from the table.
37747c478bd9Sstevel@tonic-gate 	 */
37757c478bd9Sstevel@tonic-gate 	mi = sfh->sfh_mi;
37767c478bd9Sstevel@tonic-gate 	(void) nfs_rw_enter_sig(&mi->mi_fh_lock, RW_WRITER, 0);
37777c478bd9Sstevel@tonic-gate 	mutex_enter(&sfh->sfh_lock);
37787c478bd9Sstevel@tonic-gate 	sfh->sfh_refcnt--;
37797c478bd9Sstevel@tonic-gate 	if (sfh->sfh_refcnt > 0) {
37807c478bd9Sstevel@tonic-gate 		NFS4_DEBUG(nfs4_sharedfh_debug, (CE_NOTE,
37817c478bd9Sstevel@tonic-gate 		    "sfh4_rele %p, new refcnt=%d",
37827c478bd9Sstevel@tonic-gate 		    (void *)sfh, sfh->sfh_refcnt));
37837c478bd9Sstevel@tonic-gate 		mutex_exit(&sfh->sfh_lock);
37847c478bd9Sstevel@tonic-gate 		nfs_rw_exit(&mi->mi_fh_lock);
37857c478bd9Sstevel@tonic-gate 		goto finish;
37867c478bd9Sstevel@tonic-gate 	}
37877c478bd9Sstevel@tonic-gate 
37887c478bd9Sstevel@tonic-gate 	NFS4_DEBUG(nfs4_sharedfh_debug, (CE_NOTE,
37897c478bd9Sstevel@tonic-gate 		"sfh4_rele %p, last ref", (void *)sfh));
37907c478bd9Sstevel@tonic-gate 	if (sfh->sfh_flags & SFH4_IN_TREE) {
37917c478bd9Sstevel@tonic-gate 		avl_remove(&mi->mi_filehandles, sfh);
37927c478bd9Sstevel@tonic-gate 		sfh->sfh_flags &= ~SFH4_IN_TREE;
37937c478bd9Sstevel@tonic-gate 	}
37947c478bd9Sstevel@tonic-gate 	mutex_exit(&sfh->sfh_lock);
37957c478bd9Sstevel@tonic-gate 	nfs_rw_exit(&mi->mi_fh_lock);
37967c478bd9Sstevel@tonic-gate 	mutex_destroy(&sfh->sfh_lock);
37977c478bd9Sstevel@tonic-gate 	kmem_free(sfh->sfh_fh.nfs_fh4_val, NFS4_FHSIZE);
37987c478bd9Sstevel@tonic-gate 	kmem_free(sfh, sizeof (nfs4_sharedfh_t));
37997c478bd9Sstevel@tonic-gate 
38007c478bd9Sstevel@tonic-gate finish:
38017c478bd9Sstevel@tonic-gate 	*sfhpp = NULL;
38027c478bd9Sstevel@tonic-gate }
38037c478bd9Sstevel@tonic-gate 
38047c478bd9Sstevel@tonic-gate /*
38057c478bd9Sstevel@tonic-gate  * Update the filehandle for the given shared filehandle object.
38067c478bd9Sstevel@tonic-gate  */
38077c478bd9Sstevel@tonic-gate 
38087c478bd9Sstevel@tonic-gate int nfs4_warn_dupfh = 0;	/* if set, always warn about dup fhs below */
38097c478bd9Sstevel@tonic-gate 
38107c478bd9Sstevel@tonic-gate void
38117c478bd9Sstevel@tonic-gate sfh4_update(nfs4_sharedfh_t *sfh, const nfs_fh4 *newfh)
38127c478bd9Sstevel@tonic-gate {
38137c478bd9Sstevel@tonic-gate 	mntinfo4_t *mi = sfh->sfh_mi;
38147c478bd9Sstevel@tonic-gate 	nfs4_sharedfh_t *dupsfh;
38157c478bd9Sstevel@tonic-gate 	avl_index_t where;
38167c478bd9Sstevel@tonic-gate 	nfs4_sharedfh_t key;
38177c478bd9Sstevel@tonic-gate 
38187c478bd9Sstevel@tonic-gate #ifdef DEBUG
38197c478bd9Sstevel@tonic-gate 	mutex_enter(&sfh->sfh_lock);
38207c478bd9Sstevel@tonic-gate 	ASSERT(sfh->sfh_refcnt > 0);
38217c478bd9Sstevel@tonic-gate 	mutex_exit(&sfh->sfh_lock);
38227c478bd9Sstevel@tonic-gate #endif
38237c478bd9Sstevel@tonic-gate 	ASSERT(newfh->nfs_fh4_len <= NFS4_FHSIZE);
38247c478bd9Sstevel@tonic-gate 
38257c478bd9Sstevel@tonic-gate 	/*
38267c478bd9Sstevel@tonic-gate 	 * The basic plan is to remove the shared filehandle object from
38277c478bd9Sstevel@tonic-gate 	 * the table, update it to have the new filehandle, then reinsert
38287c478bd9Sstevel@tonic-gate 	 * it.
38297c478bd9Sstevel@tonic-gate 	 */
38307c478bd9Sstevel@tonic-gate 
38317c478bd9Sstevel@tonic-gate 	(void) nfs_rw_enter_sig(&mi->mi_fh_lock, RW_WRITER, 0);
38327c478bd9Sstevel@tonic-gate 	mutex_enter(&sfh->sfh_lock);
38337c478bd9Sstevel@tonic-gate 	if (sfh->sfh_flags & SFH4_IN_TREE) {
38347c478bd9Sstevel@tonic-gate 		avl_remove(&mi->mi_filehandles, sfh);
38357c478bd9Sstevel@tonic-gate 		sfh->sfh_flags &= ~SFH4_IN_TREE;
38367c478bd9Sstevel@tonic-gate 	}
38377c478bd9Sstevel@tonic-gate 	mutex_exit(&sfh->sfh_lock);
38387c478bd9Sstevel@tonic-gate 	sfh->sfh_fh.nfs_fh4_len = newfh->nfs_fh4_len;
38397c478bd9Sstevel@tonic-gate 	bcopy(newfh->nfs_fh4_val, sfh->sfh_fh.nfs_fh4_val,
38407c478bd9Sstevel@tonic-gate 	    sfh->sfh_fh.nfs_fh4_len);
38417c478bd9Sstevel@tonic-gate 
38427c478bd9Sstevel@tonic-gate 	/*
38437c478bd9Sstevel@tonic-gate 	 * XXX If there is already a shared filehandle object with the new
38447c478bd9Sstevel@tonic-gate 	 * filehandle, we're in trouble, because the rnode code assumes
38457c478bd9Sstevel@tonic-gate 	 * that there is only one shared filehandle object for a given
38467c478bd9Sstevel@tonic-gate 	 * filehandle.  So issue a warning (for read-write mounts only)
38477c478bd9Sstevel@tonic-gate 	 * and don't try to re-insert the given object into the table.
38487c478bd9Sstevel@tonic-gate 	 * Hopefully the given object will quickly go away and everyone
38497c478bd9Sstevel@tonic-gate 	 * will use the new object.
38507c478bd9Sstevel@tonic-gate 	 */
38517c478bd9Sstevel@tonic-gate 	key.sfh_fh = *newfh;
38527c478bd9Sstevel@tonic-gate 	dupsfh = avl_find(&mi->mi_filehandles, &key, &where);
38537c478bd9Sstevel@tonic-gate 	if (dupsfh != NULL) {
38547c478bd9Sstevel@tonic-gate 		if (!(mi->mi_vfsp->vfs_flag & VFS_RDONLY) || nfs4_warn_dupfh) {
38557c478bd9Sstevel@tonic-gate 			zcmn_err(mi->mi_zone->zone_id, CE_WARN, "sfh4_update: "
38567c478bd9Sstevel@tonic-gate 			    "duplicate filehandle detected");
38577c478bd9Sstevel@tonic-gate 			sfh4_printfhandle(dupsfh);
38587c478bd9Sstevel@tonic-gate 		}
38597c478bd9Sstevel@tonic-gate 	} else {
38607c478bd9Sstevel@tonic-gate 		avl_insert(&mi->mi_filehandles, sfh, where);
38617c478bd9Sstevel@tonic-gate 		mutex_enter(&sfh->sfh_lock);
38627c478bd9Sstevel@tonic-gate 		sfh->sfh_flags |= SFH4_IN_TREE;
38637c478bd9Sstevel@tonic-gate 		mutex_exit(&sfh->sfh_lock);
38647c478bd9Sstevel@tonic-gate 	}
38657c478bd9Sstevel@tonic-gate 	nfs_rw_exit(&mi->mi_fh_lock);
38667c478bd9Sstevel@tonic-gate }
38677c478bd9Sstevel@tonic-gate 
38687c478bd9Sstevel@tonic-gate /*
38697c478bd9Sstevel@tonic-gate  * Copy out the current filehandle for the given shared filehandle object.
38707c478bd9Sstevel@tonic-gate  */
38717c478bd9Sstevel@tonic-gate 
38727c478bd9Sstevel@tonic-gate void
38737c478bd9Sstevel@tonic-gate sfh4_copyval(const nfs4_sharedfh_t *sfh, nfs4_fhandle_t *fhp)
38747c478bd9Sstevel@tonic-gate {
38757c478bd9Sstevel@tonic-gate 	mntinfo4_t *mi = sfh->sfh_mi;
38767c478bd9Sstevel@tonic-gate 
38777c478bd9Sstevel@tonic-gate 	ASSERT(sfh->sfh_refcnt > 0);
38787c478bd9Sstevel@tonic-gate 
38797c478bd9Sstevel@tonic-gate 	(void) nfs_rw_enter_sig(&mi->mi_fh_lock, RW_READER, 0);
38807c478bd9Sstevel@tonic-gate 	fhp->fh_len = sfh->sfh_fh.nfs_fh4_len;
38817c478bd9Sstevel@tonic-gate 	ASSERT(fhp->fh_len <= NFS4_FHSIZE);
38827c478bd9Sstevel@tonic-gate 	bcopy(sfh->sfh_fh.nfs_fh4_val, fhp->fh_buf, fhp->fh_len);
38837c478bd9Sstevel@tonic-gate 	nfs_rw_exit(&mi->mi_fh_lock);
38847c478bd9Sstevel@tonic-gate }
38857c478bd9Sstevel@tonic-gate 
38867c478bd9Sstevel@tonic-gate /*
38877c478bd9Sstevel@tonic-gate  * Print out the filehandle for the given shared filehandle object.
38887c478bd9Sstevel@tonic-gate  */
38897c478bd9Sstevel@tonic-gate 
38907c478bd9Sstevel@tonic-gate void
38917c478bd9Sstevel@tonic-gate sfh4_printfhandle(const nfs4_sharedfh_t *sfh)
38927c478bd9Sstevel@tonic-gate {
38937c478bd9Sstevel@tonic-gate 	nfs4_fhandle_t fhandle;
38947c478bd9Sstevel@tonic-gate 
38957c478bd9Sstevel@tonic-gate 	sfh4_copyval(sfh, &fhandle);
38967c478bd9Sstevel@tonic-gate 	nfs4_printfhandle(&fhandle);
38977c478bd9Sstevel@tonic-gate }
38987c478bd9Sstevel@tonic-gate 
38997c478bd9Sstevel@tonic-gate /*
39007c478bd9Sstevel@tonic-gate  * Compare 2 fnames.  Returns -1 if the first is "less" than the second, 0
39017c478bd9Sstevel@tonic-gate  * if they're the same, +1 if the first is "greater" than the second.  The
39027c478bd9Sstevel@tonic-gate  * caller (or whoever's calling the AVL package) is responsible for
39037c478bd9Sstevel@tonic-gate  * handling locking issues.
39047c478bd9Sstevel@tonic-gate  */
39057c478bd9Sstevel@tonic-gate 
39067c478bd9Sstevel@tonic-gate static int
39077c478bd9Sstevel@tonic-gate fncmp(const void *p1, const void *p2)
39087c478bd9Sstevel@tonic-gate {
39097c478bd9Sstevel@tonic-gate 	const nfs4_fname_t *f1 = p1;
39107c478bd9Sstevel@tonic-gate 	const nfs4_fname_t *f2 = p2;
39117c478bd9Sstevel@tonic-gate 	int res;
39127c478bd9Sstevel@tonic-gate 
39137c478bd9Sstevel@tonic-gate 	res = strcmp(f1->fn_name, f2->fn_name);
39147c478bd9Sstevel@tonic-gate 	/*
39157c478bd9Sstevel@tonic-gate 	 * The AVL package wants +/-1, not arbitrary positive or negative
39167c478bd9Sstevel@tonic-gate 	 * integers.
39177c478bd9Sstevel@tonic-gate 	 */
39187c478bd9Sstevel@tonic-gate 	if (res > 0)
39197c478bd9Sstevel@tonic-gate 		res = 1;
39207c478bd9Sstevel@tonic-gate 	else if (res < 0)
39217c478bd9Sstevel@tonic-gate 		res = -1;
39227c478bd9Sstevel@tonic-gate 	return (res);
39237c478bd9Sstevel@tonic-gate }
39247c478bd9Sstevel@tonic-gate 
39257c478bd9Sstevel@tonic-gate /*
39267c478bd9Sstevel@tonic-gate  * Get or create an fname with the given name, as a child of the given
39277c478bd9Sstevel@tonic-gate  * fname.  The caller is responsible for eventually releasing the reference
39287c478bd9Sstevel@tonic-gate  * (fn_rele()).  parent may be NULL.
39297c478bd9Sstevel@tonic-gate  */
39307c478bd9Sstevel@tonic-gate 
39317c478bd9Sstevel@tonic-gate nfs4_fname_t *
39327c478bd9Sstevel@tonic-gate fn_get(nfs4_fname_t *parent, char *name)
39337c478bd9Sstevel@tonic-gate {
39347c478bd9Sstevel@tonic-gate 	nfs4_fname_t key;
39357c478bd9Sstevel@tonic-gate 	nfs4_fname_t *fnp;
39367c478bd9Sstevel@tonic-gate 	avl_index_t where;
39377c478bd9Sstevel@tonic-gate 
39387c478bd9Sstevel@tonic-gate 	key.fn_name = name;
39397c478bd9Sstevel@tonic-gate 
39407c478bd9Sstevel@tonic-gate 	/*
39417c478bd9Sstevel@tonic-gate 	 * If there's already an fname registered with the given name, bump
39427c478bd9Sstevel@tonic-gate 	 * its reference count and return it.  Otherwise, create a new one
39437c478bd9Sstevel@tonic-gate 	 * and add it to the parent's AVL tree.
39447c478bd9Sstevel@tonic-gate 	 */
39457c478bd9Sstevel@tonic-gate 
39467c478bd9Sstevel@tonic-gate 	if (parent != NULL) {
39477c478bd9Sstevel@tonic-gate 		mutex_enter(&parent->fn_lock);
39487c478bd9Sstevel@tonic-gate 		fnp = avl_find(&parent->fn_children, &key, &where);
39497c478bd9Sstevel@tonic-gate 		if (fnp != NULL) {
39507c478bd9Sstevel@tonic-gate 			fn_hold(fnp);
39517c478bd9Sstevel@tonic-gate 			mutex_exit(&parent->fn_lock);
39527c478bd9Sstevel@tonic-gate 			return (fnp);
39537c478bd9Sstevel@tonic-gate 		}
39547c478bd9Sstevel@tonic-gate 	}
39557c478bd9Sstevel@tonic-gate 
39567c478bd9Sstevel@tonic-gate 	fnp = kmem_alloc(sizeof (nfs4_fname_t), KM_SLEEP);
39577c478bd9Sstevel@tonic-gate 	mutex_init(&fnp->fn_lock, NULL, MUTEX_DEFAULT, NULL);
39587c478bd9Sstevel@tonic-gate 	fnp->fn_parent = parent;
39597c478bd9Sstevel@tonic-gate 	if (parent != NULL)
39607c478bd9Sstevel@tonic-gate 		fn_hold(parent);
39617c478bd9Sstevel@tonic-gate 	fnp->fn_len = strlen(name);
39627c478bd9Sstevel@tonic-gate 	ASSERT(fnp->fn_len < MAXNAMELEN);
39637c478bd9Sstevel@tonic-gate 	fnp->fn_name = kmem_alloc(fnp->fn_len + 1, KM_SLEEP);
39647c478bd9Sstevel@tonic-gate 	(void) strcpy(fnp->fn_name, name);
39657c478bd9Sstevel@tonic-gate 	fnp->fn_refcnt = 1;
39667c478bd9Sstevel@tonic-gate 	avl_create(&fnp->fn_children, fncmp, sizeof (nfs4_fname_t),
39677c478bd9Sstevel@tonic-gate 	    offsetof(nfs4_fname_t, fn_tree));
39687c478bd9Sstevel@tonic-gate 	NFS4_DEBUG(nfs4_fname_debug, (CE_NOTE,
39697c478bd9Sstevel@tonic-gate 		"fn_get %p:%s, a new nfs4_fname_t!",
39707c478bd9Sstevel@tonic-gate 		(void *)fnp, fnp->fn_name));
39717c478bd9Sstevel@tonic-gate 	if (parent != NULL) {
39727c478bd9Sstevel@tonic-gate 		avl_insert(&parent->fn_children, fnp, where);
39737c478bd9Sstevel@tonic-gate 		mutex_exit(&parent->fn_lock);
39747c478bd9Sstevel@tonic-gate 	}
39757c478bd9Sstevel@tonic-gate 
39767c478bd9Sstevel@tonic-gate 	return (fnp);
39777c478bd9Sstevel@tonic-gate }
39787c478bd9Sstevel@tonic-gate 
39797c478bd9Sstevel@tonic-gate void
39807c478bd9Sstevel@tonic-gate fn_hold(nfs4_fname_t *fnp)
39817c478bd9Sstevel@tonic-gate {
39827c478bd9Sstevel@tonic-gate 	atomic_add_32(&fnp->fn_refcnt, 1);
39837c478bd9Sstevel@tonic-gate 	NFS4_DEBUG(nfs4_fname_debug, (CE_NOTE,
39847c478bd9Sstevel@tonic-gate 		"fn_hold %p:%s, new refcnt=%d",
39857c478bd9Sstevel@tonic-gate 		(void *)fnp, fnp->fn_name, fnp->fn_refcnt));
39867c478bd9Sstevel@tonic-gate }
39877c478bd9Sstevel@tonic-gate 
39887c478bd9Sstevel@tonic-gate /*
39897c478bd9Sstevel@tonic-gate  * Decrement the reference count of the given fname, and destroy it if its
39907c478bd9Sstevel@tonic-gate  * reference count goes to zero.  Nulls out the given pointer.
39917c478bd9Sstevel@tonic-gate  */
39927c478bd9Sstevel@tonic-gate 
39937c478bd9Sstevel@tonic-gate void
39947c478bd9Sstevel@tonic-gate fn_rele(nfs4_fname_t **fnpp)
39957c478bd9Sstevel@tonic-gate {
39967c478bd9Sstevel@tonic-gate 	nfs4_fname_t *parent;
39977c478bd9Sstevel@tonic-gate 	uint32_t newref;
39987c478bd9Sstevel@tonic-gate 	nfs4_fname_t *fnp;
39997c478bd9Sstevel@tonic-gate 
40007c478bd9Sstevel@tonic-gate recur:
40017c478bd9Sstevel@tonic-gate 	fnp = *fnpp;
40027c478bd9Sstevel@tonic-gate 	*fnpp = NULL;
40037c478bd9Sstevel@tonic-gate 
40047c478bd9Sstevel@tonic-gate 	mutex_enter(&fnp->fn_lock);
40057c478bd9Sstevel@tonic-gate 	parent = fnp->fn_parent;
40067c478bd9Sstevel@tonic-gate 	if (parent != NULL)
40077c478bd9Sstevel@tonic-gate 		mutex_enter(&parent->fn_lock);	/* prevent new references */
40087c478bd9Sstevel@tonic-gate 	newref = atomic_add_32_nv(&fnp->fn_refcnt, -1);
40097c478bd9Sstevel@tonic-gate 	if (newref > 0) {
40107c478bd9Sstevel@tonic-gate 		NFS4_DEBUG(nfs4_fname_debug, (CE_NOTE,
40117c478bd9Sstevel@tonic-gate 			"fn_rele %p:%s, new refcnt=%d",
40127c478bd9Sstevel@tonic-gate 			(void *)fnp, fnp->fn_name, fnp->fn_refcnt));
40137c478bd9Sstevel@tonic-gate 		if (parent != NULL)
40147c478bd9Sstevel@tonic-gate 			mutex_exit(&parent->fn_lock);
40157c478bd9Sstevel@tonic-gate 		mutex_exit(&fnp->fn_lock);
40167c478bd9Sstevel@tonic-gate 		return;
40177c478bd9Sstevel@tonic-gate 	}
40187c478bd9Sstevel@tonic-gate 
40197c478bd9Sstevel@tonic-gate 	NFS4_DEBUG(nfs4_fname_debug, (CE_NOTE,
40207c478bd9Sstevel@tonic-gate 		"fn_rele %p:%s, last reference, deleting...",
40217c478bd9Sstevel@tonic-gate 		(void *)fnp, fnp->fn_name));
40227c478bd9Sstevel@tonic-gate 	if (parent != NULL) {
40237c478bd9Sstevel@tonic-gate 		avl_remove(&parent->fn_children, fnp);
40247c478bd9Sstevel@tonic-gate 		mutex_exit(&parent->fn_lock);
40257c478bd9Sstevel@tonic-gate 	}
40267c478bd9Sstevel@tonic-gate 	kmem_free(fnp->fn_name, fnp->fn_len + 1);
40277c478bd9Sstevel@tonic-gate 	mutex_destroy(&fnp->fn_lock);
40287c478bd9Sstevel@tonic-gate 	avl_destroy(&fnp->fn_children);
40297c478bd9Sstevel@tonic-gate 	kmem_free(fnp, sizeof (nfs4_fname_t));
40307c478bd9Sstevel@tonic-gate 	/*
40317c478bd9Sstevel@tonic-gate 	 * Recursivly fn_rele the parent.
40327c478bd9Sstevel@tonic-gate 	 * Use goto instead of a recursive call to avoid stack overflow.
40337c478bd9Sstevel@tonic-gate 	 */
40347c478bd9Sstevel@tonic-gate 	if (parent != NULL) {
40357c478bd9Sstevel@tonic-gate 		fnpp = &parent;
40367c478bd9Sstevel@tonic-gate 		goto recur;
40377c478bd9Sstevel@tonic-gate 	}
40387c478bd9Sstevel@tonic-gate }
40397c478bd9Sstevel@tonic-gate 
40407c478bd9Sstevel@tonic-gate /*
40417c478bd9Sstevel@tonic-gate  * Returns the single component name of the given fname, in a MAXNAMELEN
40427c478bd9Sstevel@tonic-gate  * string buffer, which the caller is responsible for freeing.  Note that
40437c478bd9Sstevel@tonic-gate  * the name may become invalid as a result of fn_move().
40447c478bd9Sstevel@tonic-gate  */
40457c478bd9Sstevel@tonic-gate 
40467c478bd9Sstevel@tonic-gate char *
40477c478bd9Sstevel@tonic-gate fn_name(nfs4_fname_t *fnp)
40487c478bd9Sstevel@tonic-gate {
40497c478bd9Sstevel@tonic-gate 	char *name;
40507c478bd9Sstevel@tonic-gate 
40517c478bd9Sstevel@tonic-gate 	ASSERT(fnp->fn_len < MAXNAMELEN);
40527c478bd9Sstevel@tonic-gate 	name = kmem_alloc(MAXNAMELEN, KM_SLEEP);
40537c478bd9Sstevel@tonic-gate 	mutex_enter(&fnp->fn_lock);
40547c478bd9Sstevel@tonic-gate 	(void) strcpy(name, fnp->fn_name);
40557c478bd9Sstevel@tonic-gate 	mutex_exit(&fnp->fn_lock);
40567c478bd9Sstevel@tonic-gate 
40577c478bd9Sstevel@tonic-gate 	return (name);
40587c478bd9Sstevel@tonic-gate }
40597c478bd9Sstevel@tonic-gate 
40607c478bd9Sstevel@tonic-gate 
40617c478bd9Sstevel@tonic-gate /*
40627c478bd9Sstevel@tonic-gate  * fn_path_realloc
40637c478bd9Sstevel@tonic-gate  *
40647c478bd9Sstevel@tonic-gate  * This function, used only by fn_path, constructs
40657c478bd9Sstevel@tonic-gate  * a new string which looks like "prepend" + "/" + "current".
40667c478bd9Sstevel@tonic-gate  * by allocating a new string and freeing the old one.
40677c478bd9Sstevel@tonic-gate  */
40687c478bd9Sstevel@tonic-gate static void
40697c478bd9Sstevel@tonic-gate fn_path_realloc(char **curses, char *prepend)
40707c478bd9Sstevel@tonic-gate {
40717c478bd9Sstevel@tonic-gate 	int len, curlen = 0;
40727c478bd9Sstevel@tonic-gate 	char *news;
40737c478bd9Sstevel@tonic-gate 
40747c478bd9Sstevel@tonic-gate 	if (*curses == NULL) {
40757c478bd9Sstevel@tonic-gate 		/*
40767c478bd9Sstevel@tonic-gate 		 * Prime the pump, allocate just the
40777c478bd9Sstevel@tonic-gate 		 * space for prepend and return that.
40787c478bd9Sstevel@tonic-gate 		 */
40797c478bd9Sstevel@tonic-gate 		len = strlen(prepend) + 1;
40807c478bd9Sstevel@tonic-gate 		news = kmem_alloc(len, KM_SLEEP);
40817c478bd9Sstevel@tonic-gate 		(void) strncpy(news, prepend, len);
40827c478bd9Sstevel@tonic-gate 	} else {
40837c478bd9Sstevel@tonic-gate 		/*
40847c478bd9Sstevel@tonic-gate 		 * Allocate the space  for a new string
40857c478bd9Sstevel@tonic-gate 		 * +1 +1 is for the "/" and the NULL
40867c478bd9Sstevel@tonic-gate 		 * byte at the end of it all.
40877c478bd9Sstevel@tonic-gate 		 */
40887c478bd9Sstevel@tonic-gate 		curlen = strlen(*curses);
40897c478bd9Sstevel@tonic-gate 		len = curlen + strlen(prepend) + 1 + 1;
40907c478bd9Sstevel@tonic-gate 		news = kmem_alloc(len, KM_SLEEP);
40917c478bd9Sstevel@tonic-gate 		(void) strncpy(news, prepend, len);
40927c478bd9Sstevel@tonic-gate 		(void) strcat(news, "/");
40937c478bd9Sstevel@tonic-gate 		(void) strcat(news, *curses);
40947c478bd9Sstevel@tonic-gate 		kmem_free(*curses, curlen + 1);
40957c478bd9Sstevel@tonic-gate 	}
40967c478bd9Sstevel@tonic-gate 	*curses = news;
40977c478bd9Sstevel@tonic-gate }
40987c478bd9Sstevel@tonic-gate 
40997c478bd9Sstevel@tonic-gate /*
41007c478bd9Sstevel@tonic-gate  * Returns the path name (starting from the fs root) for the given fname.
41017c478bd9Sstevel@tonic-gate  * The caller is responsible for freeing.  Note that the path may be or
41027c478bd9Sstevel@tonic-gate  * become invalid as a result of fn_move().
41037c478bd9Sstevel@tonic-gate  */
41047c478bd9Sstevel@tonic-gate 
41057c478bd9Sstevel@tonic-gate char *
41067c478bd9Sstevel@tonic-gate fn_path(nfs4_fname_t *fnp)
41077c478bd9Sstevel@tonic-gate {
41087c478bd9Sstevel@tonic-gate 	char *path;
41097c478bd9Sstevel@tonic-gate 	nfs4_fname_t *nextfnp;
41107c478bd9Sstevel@tonic-gate 
41117c478bd9Sstevel@tonic-gate 	if (fnp == NULL)
41127c478bd9Sstevel@tonic-gate 		return (NULL);
41137c478bd9Sstevel@tonic-gate 
41147c478bd9Sstevel@tonic-gate 	path = NULL;
41157c478bd9Sstevel@tonic-gate 
41167c478bd9Sstevel@tonic-gate 	/* walk up the tree constructing the pathname.  */
41177c478bd9Sstevel@tonic-gate 
41187c478bd9Sstevel@tonic-gate 	fn_hold(fnp);			/* adjust for later rele */
41197c478bd9Sstevel@tonic-gate 	do {
41207c478bd9Sstevel@tonic-gate 		mutex_enter(&fnp->fn_lock);
41217c478bd9Sstevel@tonic-gate 		/*
41227c478bd9Sstevel@tonic-gate 		 * Add fn_name in front of the current path
41237c478bd9Sstevel@tonic-gate 		 */
41247c478bd9Sstevel@tonic-gate 		fn_path_realloc(&path, fnp->fn_name);
41257c478bd9Sstevel@tonic-gate 		nextfnp = fnp->fn_parent;
41267c478bd9Sstevel@tonic-gate 		if (nextfnp != NULL)
41277c478bd9Sstevel@tonic-gate 			fn_hold(nextfnp);
41287c478bd9Sstevel@tonic-gate 		mutex_exit(&fnp->fn_lock);
41297c478bd9Sstevel@tonic-gate 		fn_rele(&fnp);
41307c478bd9Sstevel@tonic-gate 		fnp = nextfnp;
41317c478bd9Sstevel@tonic-gate 	} while (fnp != NULL);
41327c478bd9Sstevel@tonic-gate 
41337c478bd9Sstevel@tonic-gate 	return (path);
41347c478bd9Sstevel@tonic-gate }
41357c478bd9Sstevel@tonic-gate 
41367c478bd9Sstevel@tonic-gate /*
41377c478bd9Sstevel@tonic-gate  * Return a reference to the parent of the given fname, which the caller is
41387c478bd9Sstevel@tonic-gate  * responsible for eventually releasing.
41397c478bd9Sstevel@tonic-gate  */
41407c478bd9Sstevel@tonic-gate 
41417c478bd9Sstevel@tonic-gate nfs4_fname_t *
41427c478bd9Sstevel@tonic-gate fn_parent(nfs4_fname_t *fnp)
41437c478bd9Sstevel@tonic-gate {
41447c478bd9Sstevel@tonic-gate 	nfs4_fname_t *parent;
41457c478bd9Sstevel@tonic-gate 
41467c478bd9Sstevel@tonic-gate 	mutex_enter(&fnp->fn_lock);
41477c478bd9Sstevel@tonic-gate 	parent = fnp->fn_parent;
41487c478bd9Sstevel@tonic-gate 	if (parent != NULL)
41497c478bd9Sstevel@tonic-gate 		fn_hold(parent);
41507c478bd9Sstevel@tonic-gate 	mutex_exit(&fnp->fn_lock);
41517c478bd9Sstevel@tonic-gate 
41527c478bd9Sstevel@tonic-gate 	return (parent);
41537c478bd9Sstevel@tonic-gate }
41547c478bd9Sstevel@tonic-gate 
41557c478bd9Sstevel@tonic-gate /*
41567c478bd9Sstevel@tonic-gate  * Update fnp so that its parent is newparent and its name is newname.
41577c478bd9Sstevel@tonic-gate  */
41587c478bd9Sstevel@tonic-gate 
41597c478bd9Sstevel@tonic-gate void
41607c478bd9Sstevel@tonic-gate fn_move(nfs4_fname_t *fnp, nfs4_fname_t *newparent, char *newname)
41617c478bd9Sstevel@tonic-gate {
41627c478bd9Sstevel@tonic-gate 	nfs4_fname_t *parent, *tmpfnp;
41637c478bd9Sstevel@tonic-gate 	ssize_t newlen;
41647c478bd9Sstevel@tonic-gate 	nfs4_fname_t key;
41657c478bd9Sstevel@tonic-gate 	avl_index_t where;
41667c478bd9Sstevel@tonic-gate 
41677c478bd9Sstevel@tonic-gate 	/*
41687c478bd9Sstevel@tonic-gate 	 * This assert exists to catch the client trying to rename
41697c478bd9Sstevel@tonic-gate 	 * a dir to be a child of itself.  This happened at a recent
41707c478bd9Sstevel@tonic-gate 	 * bakeoff against a 3rd party (broken) server which allowed
41717c478bd9Sstevel@tonic-gate 	 * the rename to succeed.  If it trips it means that:
41727c478bd9Sstevel@tonic-gate 	 *	a) the code in nfs4rename that detects this case is broken
41737c478bd9Sstevel@tonic-gate 	 *	b) the server is broken (since it allowed the bogus rename)
41747c478bd9Sstevel@tonic-gate 	 *
41757c478bd9Sstevel@tonic-gate 	 * For non-DEBUG kernels, prepare for a recursive mutex_enter
41767c478bd9Sstevel@tonic-gate 	 * panic below from:  mutex_enter(&newparent->fn_lock);
41777c478bd9Sstevel@tonic-gate 	 */
41787c478bd9Sstevel@tonic-gate 	ASSERT(fnp != newparent);
41797c478bd9Sstevel@tonic-gate 
41807c478bd9Sstevel@tonic-gate 	/*
41817c478bd9Sstevel@tonic-gate 	 * Remove fnp from its current parent, change its name, then add it
41827c478bd9Sstevel@tonic-gate 	 * to newparent.
41837c478bd9Sstevel@tonic-gate 	 */
41847c478bd9Sstevel@tonic-gate 	mutex_enter(&fnp->fn_lock);
41857c478bd9Sstevel@tonic-gate 	parent = fnp->fn_parent;
41867c478bd9Sstevel@tonic-gate 	mutex_enter(&parent->fn_lock);
41877c478bd9Sstevel@tonic-gate 	avl_remove(&parent->fn_children, fnp);
41887c478bd9Sstevel@tonic-gate 	mutex_exit(&parent->fn_lock);
41897c478bd9Sstevel@tonic-gate 	fn_rele(&fnp->fn_parent);
41907c478bd9Sstevel@tonic-gate 
41917c478bd9Sstevel@tonic-gate 	newlen = strlen(newname);
41927c478bd9Sstevel@tonic-gate 	if (newlen != fnp->fn_len) {
41937c478bd9Sstevel@tonic-gate 		ASSERT(newlen < MAXNAMELEN);
41947c478bd9Sstevel@tonic-gate 		kmem_free(fnp->fn_name, fnp->fn_len + 1);
41957c478bd9Sstevel@tonic-gate 		fnp->fn_name = kmem_alloc(newlen + 1, KM_SLEEP);
41967c478bd9Sstevel@tonic-gate 		fnp->fn_len = newlen;
41977c478bd9Sstevel@tonic-gate 	}
41987c478bd9Sstevel@tonic-gate 	(void) strcpy(fnp->fn_name, newname);
41997c478bd9Sstevel@tonic-gate 
42007c478bd9Sstevel@tonic-gate again:
42017c478bd9Sstevel@tonic-gate 	mutex_enter(&newparent->fn_lock);
42027c478bd9Sstevel@tonic-gate 	key.fn_name = fnp->fn_name;
42037c478bd9Sstevel@tonic-gate 	tmpfnp = avl_find(&newparent->fn_children, &key, &where);
42047c478bd9Sstevel@tonic-gate 	if (tmpfnp != NULL) {
42057c478bd9Sstevel@tonic-gate 		/*
42067c478bd9Sstevel@tonic-gate 		 * This could be due to a file that was unlinked while
42077c478bd9Sstevel@tonic-gate 		 * open, or perhaps the rnode is in the free list.  Remove
42087c478bd9Sstevel@tonic-gate 		 * it from newparent and let it go away on its own.  The
42097c478bd9Sstevel@tonic-gate 		 * contorted code is to deal with lock order issues and
42107c478bd9Sstevel@tonic-gate 		 * race conditions.
42117c478bd9Sstevel@tonic-gate 		 */
42127c478bd9Sstevel@tonic-gate 		fn_hold(tmpfnp);
42137c478bd9Sstevel@tonic-gate 		mutex_exit(&newparent->fn_lock);
42147c478bd9Sstevel@tonic-gate 		mutex_enter(&tmpfnp->fn_lock);
42157c478bd9Sstevel@tonic-gate 		if (tmpfnp->fn_parent == newparent) {
42167c478bd9Sstevel@tonic-gate 			mutex_enter(&newparent->fn_lock);
42177c478bd9Sstevel@tonic-gate 			avl_remove(&newparent->fn_children, tmpfnp);
42187c478bd9Sstevel@tonic-gate 			mutex_exit(&newparent->fn_lock);
42197c478bd9Sstevel@tonic-gate 			fn_rele(&tmpfnp->fn_parent);
42207c478bd9Sstevel@tonic-gate 		}
42217c478bd9Sstevel@tonic-gate 		mutex_exit(&tmpfnp->fn_lock);
42227c478bd9Sstevel@tonic-gate 		fn_rele(&tmpfnp);
42237c478bd9Sstevel@tonic-gate 		goto again;
42247c478bd9Sstevel@tonic-gate 	}
42257c478bd9Sstevel@tonic-gate 	fnp->fn_parent = newparent;
42267c478bd9Sstevel@tonic-gate 	fn_hold(newparent);
42277c478bd9Sstevel@tonic-gate 	avl_insert(&newparent->fn_children, fnp, where);
42287c478bd9Sstevel@tonic-gate 	mutex_exit(&newparent->fn_lock);
42297c478bd9Sstevel@tonic-gate 	mutex_exit(&fnp->fn_lock);
42307c478bd9Sstevel@tonic-gate }
42317c478bd9Sstevel@tonic-gate 
42327c478bd9Sstevel@tonic-gate #ifdef DEBUG
42337c478bd9Sstevel@tonic-gate /*
42347c478bd9Sstevel@tonic-gate  * Return non-zero if the type information makes sense for the given vnode.
42357c478bd9Sstevel@tonic-gate  * Otherwise panic.
42367c478bd9Sstevel@tonic-gate  */
42377c478bd9Sstevel@tonic-gate int
42387c478bd9Sstevel@tonic-gate nfs4_consistent_type(vnode_t *vp)
42397c478bd9Sstevel@tonic-gate {
42407c478bd9Sstevel@tonic-gate 	rnode4_t *rp = VTOR4(vp);
42417c478bd9Sstevel@tonic-gate 
42427c478bd9Sstevel@tonic-gate 	if (nfs4_vtype_debug && vp->v_type != VNON &&
42437c478bd9Sstevel@tonic-gate 	    rp->r_attr.va_type != VNON && vp->v_type != rp->r_attr.va_type) {
42447c478bd9Sstevel@tonic-gate 		cmn_err(CE_PANIC, "vnode %p type mismatch; v_type=%d, "
42457c478bd9Sstevel@tonic-gate 			"rnode attr type=%d", (void *)vp, vp->v_type,
42467c478bd9Sstevel@tonic-gate 			rp->r_attr.va_type);
42477c478bd9Sstevel@tonic-gate 	}
42487c478bd9Sstevel@tonic-gate 
42497c478bd9Sstevel@tonic-gate 	return (1);
42507c478bd9Sstevel@tonic-gate }
42517c478bd9Sstevel@tonic-gate #endif /* DEBUG */
4252