17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate  * CDDL HEADER START
37c478bd9Sstevel@tonic-gate  *
47c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
550a83466Sjwahlig  * Common Development and Distribution License (the "License").
650a83466Sjwahlig  * You may not use this file except in compliance with the License.
77c478bd9Sstevel@tonic-gate  *
87c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
107c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
117c478bd9Sstevel@tonic-gate  * and limitations under the License.
127c478bd9Sstevel@tonic-gate  *
137c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
147c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
167c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
177c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bd9Sstevel@tonic-gate  *
197c478bd9Sstevel@tonic-gate  * CDDL HEADER END
207c478bd9Sstevel@tonic-gate  */
217c478bd9Sstevel@tonic-gate /*
22a19609f8Sjv  * Copyright (c) 1986, 2010, Oracle and/or its affiliates. All rights reserved.
23ade42b55SSebastien Roy  * Copyright (c) 2017 by Delphix. All rights reserved.
247c478bd9Sstevel@tonic-gate  */
257c478bd9Sstevel@tonic-gate 
267c478bd9Sstevel@tonic-gate /*
27*6dc7d057SMarcel Telka  *	Copyright (c) 1983,1984,1985,1986,1987,1988,1989  AT&T.
287c478bd9Sstevel@tonic-gate  *	All Rights Reserved
297c478bd9Sstevel@tonic-gate  */
307c478bd9Sstevel@tonic-gate 
317c478bd9Sstevel@tonic-gate #include <sys/param.h>
327c478bd9Sstevel@tonic-gate #include <sys/types.h>
337c478bd9Sstevel@tonic-gate #include <sys/systm.h>
347c478bd9Sstevel@tonic-gate #include <sys/thread.h>
357c478bd9Sstevel@tonic-gate #include <sys/t_lock.h>
367c478bd9Sstevel@tonic-gate #include <sys/time.h>
377c478bd9Sstevel@tonic-gate #include <sys/vnode.h>
387c478bd9Sstevel@tonic-gate #include <sys/vfs.h>
397c478bd9Sstevel@tonic-gate #include <sys/errno.h>
407c478bd9Sstevel@tonic-gate #include <sys/buf.h>
417c478bd9Sstevel@tonic-gate #include <sys/stat.h>
427c478bd9Sstevel@tonic-gate #include <sys/cred.h>
437c478bd9Sstevel@tonic-gate #include <sys/kmem.h>
447c478bd9Sstevel@tonic-gate #include <sys/debug.h>
457c478bd9Sstevel@tonic-gate #include <sys/dnlc.h>
467c478bd9Sstevel@tonic-gate #include <sys/vmsystm.h>
477c478bd9Sstevel@tonic-gate #include <sys/flock.h>
487c478bd9Sstevel@tonic-gate #include <sys/share.h>
497c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h>
507c478bd9Sstevel@tonic-gate #include <sys/tiuser.h>
517c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h>
527c478bd9Sstevel@tonic-gate #include <sys/callb.h>
537c478bd9Sstevel@tonic-gate #include <sys/acl.h>
547c478bd9Sstevel@tonic-gate #include <sys/kstat.h>
557c478bd9Sstevel@tonic-gate #include <sys/signal.h>
567c478bd9Sstevel@tonic-gate #include <sys/disp.h>
577c478bd9Sstevel@tonic-gate #include <sys/atomic.h>
587c478bd9Sstevel@tonic-gate #include <sys/list.h>
597c478bd9Sstevel@tonic-gate #include <sys/sdt.h>
607c478bd9Sstevel@tonic-gate 
617c478bd9Sstevel@tonic-gate #include <rpc/types.h>
627c478bd9Sstevel@tonic-gate #include <rpc/xdr.h>
637c478bd9Sstevel@tonic-gate #include <rpc/auth.h>
647c478bd9Sstevel@tonic-gate #include <rpc/clnt.h>
657c478bd9Sstevel@tonic-gate 
667c478bd9Sstevel@tonic-gate #include <nfs/nfs.h>
677c478bd9Sstevel@tonic-gate #include <nfs/nfs_clnt.h>
687c478bd9Sstevel@tonic-gate #include <nfs/nfs_acl.h>
697c478bd9Sstevel@tonic-gate 
707c478bd9Sstevel@tonic-gate #include <nfs/nfs4.h>
717c478bd9Sstevel@tonic-gate #include <nfs/rnode4.h>
727c478bd9Sstevel@tonic-gate #include <nfs/nfs4_clnt.h>
737c478bd9Sstevel@tonic-gate 
747c478bd9Sstevel@tonic-gate #include <vm/hat.h>
757c478bd9Sstevel@tonic-gate #include <vm/as.h>
767c478bd9Sstevel@tonic-gate #include <vm/page.h>
777c478bd9Sstevel@tonic-gate #include <vm/pvn.h>
787c478bd9Sstevel@tonic-gate #include <vm/seg.h>
797c478bd9Sstevel@tonic-gate #include <vm/seg_map.h>
807c478bd9Sstevel@tonic-gate #include <vm/seg_vn.h>
817c478bd9Sstevel@tonic-gate 
827c478bd9Sstevel@tonic-gate #include <sys/ddi.h>
837c478bd9Sstevel@tonic-gate 
847c478bd9Sstevel@tonic-gate /*
857c478bd9Sstevel@tonic-gate  * Arguments to page-flush thread.
867c478bd9Sstevel@tonic-gate  */
877c478bd9Sstevel@tonic-gate typedef struct {
887c478bd9Sstevel@tonic-gate 	vnode_t *vp;
897c478bd9Sstevel@tonic-gate 	cred_t *cr;
907c478bd9Sstevel@tonic-gate } pgflush_t;
917c478bd9Sstevel@tonic-gate 
927c478bd9Sstevel@tonic-gate #ifdef DEBUG
937c478bd9Sstevel@tonic-gate int nfs4_client_lease_debug;
947c478bd9Sstevel@tonic-gate int nfs4_sharedfh_debug;
957c478bd9Sstevel@tonic-gate int nfs4_fname_debug;
967c478bd9Sstevel@tonic-gate 
977c478bd9Sstevel@tonic-gate /* temporary: panic if v_type is inconsistent with r_attr va_type */
987c478bd9Sstevel@tonic-gate int nfs4_vtype_debug;
997c478bd9Sstevel@tonic-gate 
1007c478bd9Sstevel@tonic-gate uint_t nfs4_tsd_key;
1017c478bd9Sstevel@tonic-gate #endif
1027c478bd9Sstevel@tonic-gate 
1037c478bd9Sstevel@tonic-gate static time_t	nfs4_client_resumed = 0;
1047c478bd9Sstevel@tonic-gate static	callb_id_t cid = 0;
1057c478bd9Sstevel@tonic-gate 
1067c478bd9Sstevel@tonic-gate static int	nfs4renew(nfs4_server_t *);
1077c478bd9Sstevel@tonic-gate static void	nfs4_attrcache_va(vnode_t *, nfs4_ga_res_t *, int);
1087c478bd9Sstevel@tonic-gate static void	nfs4_pgflush_thread(pgflush_t *);
1097c478bd9Sstevel@tonic-gate 
1107c478bd9Sstevel@tonic-gate static boolean_t nfs4_client_cpr_callb(void *, int);
1117c478bd9Sstevel@tonic-gate 
1127c478bd9Sstevel@tonic-gate struct mi4_globals {
1137c478bd9Sstevel@tonic-gate 	kmutex_t	mig_lock;  /* lock protecting mig_list */
1147c478bd9Sstevel@tonic-gate 	list_t		mig_list;  /* list of NFS v4 mounts in zone */
1157c478bd9Sstevel@tonic-gate 	boolean_t	mig_destructor_called;
1167c478bd9Sstevel@tonic-gate };
1177c478bd9Sstevel@tonic-gate 
1187c478bd9Sstevel@tonic-gate static zone_key_t mi4_list_key;
1197c478bd9Sstevel@tonic-gate 
1207c478bd9Sstevel@tonic-gate /*
1217c478bd9Sstevel@tonic-gate  * Attributes caching:
1227c478bd9Sstevel@tonic-gate  *
1237c478bd9Sstevel@tonic-gate  * Attributes are cached in the rnode in struct vattr form.
1247c478bd9Sstevel@tonic-gate  * There is a time associated with the cached attributes (r_time_attr_inval)
1257c478bd9Sstevel@tonic-gate  * which tells whether the attributes are valid. The time is initialized
1267c478bd9Sstevel@tonic-gate  * to the difference between current time and the modify time of the vnode
1277c478bd9Sstevel@tonic-gate  * when new attributes are cached. This allows the attributes for
1287c478bd9Sstevel@tonic-gate  * files that have changed recently to be timed out sooner than for files
1297c478bd9Sstevel@tonic-gate  * that have not changed for a long time. There are minimum and maximum
1307c478bd9Sstevel@tonic-gate  * timeout values that can be set per mount point.
1317c478bd9Sstevel@tonic-gate  */
1327c478bd9Sstevel@tonic-gate 
1337c478bd9Sstevel@tonic-gate /*
1347c478bd9Sstevel@tonic-gate  * If a cache purge is in progress, wait for it to finish.
1357c478bd9Sstevel@tonic-gate  *
1367c478bd9Sstevel@tonic-gate  * The current thread must not be in the middle of an
1377c478bd9Sstevel@tonic-gate  * nfs4_start_op/nfs4_end_op region.  Otherwise, there could be a deadlock
1387c478bd9Sstevel@tonic-gate  * between this thread, a recovery thread, and the page flush thread.
1397c478bd9Sstevel@tonic-gate  */
1407c478bd9Sstevel@tonic-gate int
nfs4_waitfor_purge_complete(vnode_t * vp)1417c478bd9Sstevel@tonic-gate nfs4_waitfor_purge_complete(vnode_t *vp)
1427c478bd9Sstevel@tonic-gate {
1437c478bd9Sstevel@tonic-gate 	rnode4_t *rp;
1447c478bd9Sstevel@tonic-gate 	k_sigset_t smask;
1457c478bd9Sstevel@tonic-gate 
1467c478bd9Sstevel@tonic-gate 	rp = VTOR4(vp);
1477c478bd9Sstevel@tonic-gate 	if ((rp->r_serial != NULL && rp->r_serial != curthread) ||
1487c478bd9Sstevel@tonic-gate 	    ((rp->r_flags & R4PGFLUSH) && rp->r_pgflush != curthread)) {
1497c478bd9Sstevel@tonic-gate 		mutex_enter(&rp->r_statelock);
1507c478bd9Sstevel@tonic-gate 		sigintr(&smask, VTOMI4(vp)->mi_flags & MI4_INT);
1517c478bd9Sstevel@tonic-gate 		while ((rp->r_serial != NULL && rp->r_serial != curthread) ||
1527c478bd9Sstevel@tonic-gate 		    ((rp->r_flags & R4PGFLUSH) &&
1537c478bd9Sstevel@tonic-gate 		    rp->r_pgflush != curthread)) {
1547c478bd9Sstevel@tonic-gate 			if (!cv_wait_sig(&rp->r_cv, &rp->r_statelock)) {
1557c478bd9Sstevel@tonic-gate 				sigunintr(&smask);
1567c478bd9Sstevel@tonic-gate 				mutex_exit(&rp->r_statelock);
1577c478bd9Sstevel@tonic-gate 				return (EINTR);
1587c478bd9Sstevel@tonic-gate 			}
1597c478bd9Sstevel@tonic-gate 		}
1607c478bd9Sstevel@tonic-gate 		sigunintr(&smask);
1617c478bd9Sstevel@tonic-gate 		mutex_exit(&rp->r_statelock);
1627c478bd9Sstevel@tonic-gate 	}
1637c478bd9Sstevel@tonic-gate 	return (0);
1647c478bd9Sstevel@tonic-gate }
1657c478bd9Sstevel@tonic-gate 
1667c478bd9Sstevel@tonic-gate /*
1677c478bd9Sstevel@tonic-gate  * Validate caches by checking cached attributes. If they have timed out,
1687c478bd9Sstevel@tonic-gate  * then get new attributes from the server.  As a side effect, cache
1697c478bd9Sstevel@tonic-gate  * invalidation is done if the attributes have changed.
1707c478bd9Sstevel@tonic-gate  *
1717c478bd9Sstevel@tonic-gate  * If the attributes have not timed out and if there is a cache
1727c478bd9Sstevel@tonic-gate  * invalidation being done by some other thread, then wait until that
1737c478bd9Sstevel@tonic-gate  * thread has completed the cache invalidation.
1747c478bd9Sstevel@tonic-gate  */
1757c478bd9Sstevel@tonic-gate int
nfs4_validate_caches(vnode_t * vp,cred_t * cr)1767c478bd9Sstevel@tonic-gate nfs4_validate_caches(vnode_t *vp, cred_t *cr)
1777c478bd9Sstevel@tonic-gate {
1787c478bd9Sstevel@tonic-gate 	int error;
1797c478bd9Sstevel@tonic-gate 	nfs4_ga_res_t gar;
1807c478bd9Sstevel@tonic-gate 
1817c478bd9Sstevel@tonic-gate 	if (ATTRCACHE4_VALID(vp)) {
1827c478bd9Sstevel@tonic-gate 		error = nfs4_waitfor_purge_complete(vp);
1837c478bd9Sstevel@tonic-gate 		if (error)
1847c478bd9Sstevel@tonic-gate 			return (error);
1857c478bd9Sstevel@tonic-gate 		return (0);
1867c478bd9Sstevel@tonic-gate 	}
1877c478bd9Sstevel@tonic-gate 
1887c478bd9Sstevel@tonic-gate 	return (nfs4_getattr_otw(vp, &gar, cr, 0));
1897c478bd9Sstevel@tonic-gate }
1907c478bd9Sstevel@tonic-gate 
1917c478bd9Sstevel@tonic-gate /*
1927c478bd9Sstevel@tonic-gate  * Fill in attribute from the cache.
1937c478bd9Sstevel@tonic-gate  * If valid, then return 0 to indicate that no error occurred,
1947c478bd9Sstevel@tonic-gate  * otherwise return 1 to indicate that an error occurred.
1957c478bd9Sstevel@tonic-gate  */
1967c478bd9Sstevel@tonic-gate static int
nfs4_getattr_cache(vnode_t * vp,struct vattr * vap)1977c478bd9Sstevel@tonic-gate nfs4_getattr_cache(vnode_t *vp, struct vattr *vap)
1987c478bd9Sstevel@tonic-gate {
1997c478bd9Sstevel@tonic-gate 	rnode4_t *rp;
2007c478bd9Sstevel@tonic-gate 
2017c478bd9Sstevel@tonic-gate 	rp = VTOR4(vp);
2027c478bd9Sstevel@tonic-gate 	mutex_enter(&rp->r_statelock);
2037c478bd9Sstevel@tonic-gate 	mutex_enter(&rp->r_statev4_lock);
2047c478bd9Sstevel@tonic-gate 	if (ATTRCACHE4_VALID(vp)) {
2057c478bd9Sstevel@tonic-gate 		mutex_exit(&rp->r_statev4_lock);
2067c478bd9Sstevel@tonic-gate 		/*
2077c478bd9Sstevel@tonic-gate 		 * Cached attributes are valid
2087c478bd9Sstevel@tonic-gate 		 */
2097c478bd9Sstevel@tonic-gate 		*vap = rp->r_attr;
2107c478bd9Sstevel@tonic-gate 		mutex_exit(&rp->r_statelock);
2117c478bd9Sstevel@tonic-gate 		return (0);
2127c478bd9Sstevel@tonic-gate 	}
2137c478bd9Sstevel@tonic-gate 	mutex_exit(&rp->r_statev4_lock);
2147c478bd9Sstevel@tonic-gate 	mutex_exit(&rp->r_statelock);
2157c478bd9Sstevel@tonic-gate 	return (1);
2167c478bd9Sstevel@tonic-gate }
2177c478bd9Sstevel@tonic-gate 
2187c478bd9Sstevel@tonic-gate 
2197c478bd9Sstevel@tonic-gate /*
2207c478bd9Sstevel@tonic-gate  * If returned error is ESTALE flush all caches.  The nfs4_purge_caches()
2217c478bd9Sstevel@tonic-gate  * call is synchronous because all the pages were invalidated by the
2227c478bd9Sstevel@tonic-gate  * nfs4_invalidate_pages() call.
2237c478bd9Sstevel@tonic-gate  */
2247c478bd9Sstevel@tonic-gate void
nfs4_purge_stale_fh(int errno,vnode_t * vp,cred_t * cr)2257c478bd9Sstevel@tonic-gate nfs4_purge_stale_fh(int errno, vnode_t *vp, cred_t *cr)
2267c478bd9Sstevel@tonic-gate {
2277c478bd9Sstevel@tonic-gate 	struct rnode4 *rp = VTOR4(vp);
2287c478bd9Sstevel@tonic-gate 
2297c478bd9Sstevel@tonic-gate 	/* Ensure that the ..._end_op() call has been done */
2307c478bd9Sstevel@tonic-gate 	ASSERT(tsd_get(nfs4_tsd_key) == NULL);
2317c478bd9Sstevel@tonic-gate 
2327c478bd9Sstevel@tonic-gate 	if (errno != ESTALE)
2337c478bd9Sstevel@tonic-gate 		return;
2347c478bd9Sstevel@tonic-gate 
2357c478bd9Sstevel@tonic-gate 	mutex_enter(&rp->r_statelock);
2367c478bd9Sstevel@tonic-gate 	rp->r_flags |= R4STALE;
2377c478bd9Sstevel@tonic-gate 	if (!rp->r_error)
2387c478bd9Sstevel@tonic-gate 		rp->r_error = errno;
2397c478bd9Sstevel@tonic-gate 	mutex_exit(&rp->r_statelock);
2407c478bd9Sstevel@tonic-gate 	if (nfs4_has_pages(vp))
2417c478bd9Sstevel@tonic-gate 		nfs4_invalidate_pages(vp, (u_offset_t)0, cr);
2427c478bd9Sstevel@tonic-gate 	nfs4_purge_caches(vp, NFS4_PURGE_DNLC, cr, FALSE);
2437c478bd9Sstevel@tonic-gate }
2447c478bd9Sstevel@tonic-gate 
2457c478bd9Sstevel@tonic-gate /*
2467c478bd9Sstevel@tonic-gate  * Purge all of the various NFS `data' caches.  If "asyncpg" is TRUE, the
2477c478bd9Sstevel@tonic-gate  * page purge is done asynchronously.
2487c478bd9Sstevel@tonic-gate  */
2497c478bd9Sstevel@tonic-gate void
nfs4_purge_caches(vnode_t * vp,int purge_dnlc,cred_t * cr,int asyncpg)2507c478bd9Sstevel@tonic-gate nfs4_purge_caches(vnode_t *vp, int purge_dnlc, cred_t *cr, int asyncpg)
2517c478bd9Sstevel@tonic-gate {
2527c478bd9Sstevel@tonic-gate 	rnode4_t *rp;
2537c478bd9Sstevel@tonic-gate 	char *contents;
2547c478bd9Sstevel@tonic-gate 	vnode_t *xattr;
2557c478bd9Sstevel@tonic-gate 	int size;
2567c478bd9Sstevel@tonic-gate 	int pgflush;			/* are we the page flush thread? */
2577c478bd9Sstevel@tonic-gate 
2587c478bd9Sstevel@tonic-gate 	/*
2597c478bd9Sstevel@tonic-gate 	 * Purge the DNLC for any entries which refer to this file.
2607c478bd9Sstevel@tonic-gate 	 */
2617c478bd9Sstevel@tonic-gate 	if (vp->v_count > 1 &&
2627c478bd9Sstevel@tonic-gate 	    (vp->v_type == VDIR || purge_dnlc == NFS4_PURGE_DNLC))
2637c478bd9Sstevel@tonic-gate 		dnlc_purge_vp(vp);
2647c478bd9Sstevel@tonic-gate 
2657c478bd9Sstevel@tonic-gate 	/*
2667c478bd9Sstevel@tonic-gate 	 * Clear any readdir state bits and purge the readlink response cache.
2677c478bd9Sstevel@tonic-gate 	 */
2687c478bd9Sstevel@tonic-gate 	rp = VTOR4(vp);
2697c478bd9Sstevel@tonic-gate 	mutex_enter(&rp->r_statelock);
2707c478bd9Sstevel@tonic-gate 	rp->r_flags &= ~R4LOOKUP;
2717c478bd9Sstevel@tonic-gate 	contents = rp->r_symlink.contents;
2727c478bd9Sstevel@tonic-gate 	size = rp->r_symlink.size;
2737c478bd9Sstevel@tonic-gate 	rp->r_symlink.contents = NULL;
2747c478bd9Sstevel@tonic-gate 
2757c478bd9Sstevel@tonic-gate 	xattr = rp->r_xattr_dir;
2767c478bd9Sstevel@tonic-gate 	rp->r_xattr_dir = NULL;
2777c478bd9Sstevel@tonic-gate 
2787c478bd9Sstevel@tonic-gate 	/*
2797c478bd9Sstevel@tonic-gate 	 * Purge pathconf cache too.
2807c478bd9Sstevel@tonic-gate 	 */
2817c478bd9Sstevel@tonic-gate 	rp->r_pathconf.pc4_xattr_valid = 0;
2827c478bd9Sstevel@tonic-gate 	rp->r_pathconf.pc4_cache_valid = 0;
2837c478bd9Sstevel@tonic-gate 
2847c478bd9Sstevel@tonic-gate 	pgflush = (curthread == rp->r_pgflush);
2857c478bd9Sstevel@tonic-gate 	mutex_exit(&rp->r_statelock);
2867c478bd9Sstevel@tonic-gate 
2877c478bd9Sstevel@tonic-gate 	if (contents != NULL) {
2887c478bd9Sstevel@tonic-gate 
2897c478bd9Sstevel@tonic-gate 		kmem_free((void *)contents, size);
2907c478bd9Sstevel@tonic-gate 	}
2917c478bd9Sstevel@tonic-gate 
2927c478bd9Sstevel@tonic-gate 	if (xattr != NULL)
2937c478bd9Sstevel@tonic-gate 		VN_RELE(xattr);
2947c478bd9Sstevel@tonic-gate 
2957c478bd9Sstevel@tonic-gate 	/*
2967c478bd9Sstevel@tonic-gate 	 * Flush the page cache.  If the current thread is the page flush
2977c478bd9Sstevel@tonic-gate 	 * thread, don't initiate a new page flush.  There's no need for
2987c478bd9Sstevel@tonic-gate 	 * it, and doing it correctly is hard.
2997c478bd9Sstevel@tonic-gate 	 */
3007c478bd9Sstevel@tonic-gate 	if (nfs4_has_pages(vp) && !pgflush) {
3017c478bd9Sstevel@tonic-gate 		if (!asyncpg) {
3027c478bd9Sstevel@tonic-gate 			(void) nfs4_waitfor_purge_complete(vp);
303d55e25c3SPavel Filipensky 			nfs4_flush_pages(vp, cr);
3047c478bd9Sstevel@tonic-gate 		} else {
3057c478bd9Sstevel@tonic-gate 			pgflush_t *args;
3067c478bd9Sstevel@tonic-gate 
3077c478bd9Sstevel@tonic-gate 			/*
3087c478bd9Sstevel@tonic-gate 			 * We don't hold r_statelock while creating the
3097c478bd9Sstevel@tonic-gate 			 * thread, in case the call blocks.  So we use a
3107c478bd9Sstevel@tonic-gate 			 * flag to indicate that a page flush thread is
3117c478bd9Sstevel@tonic-gate 			 * active.
3127c478bd9Sstevel@tonic-gate 			 */
3137c478bd9Sstevel@tonic-gate 			mutex_enter(&rp->r_statelock);
3147c478bd9Sstevel@tonic-gate 			if (rp->r_flags & R4PGFLUSH) {
3157c478bd9Sstevel@tonic-gate 				mutex_exit(&rp->r_statelock);
3167c478bd9Sstevel@tonic-gate 			} else {
3177c478bd9Sstevel@tonic-gate 				rp->r_flags |= R4PGFLUSH;
3187c478bd9Sstevel@tonic-gate 				mutex_exit(&rp->r_statelock);
3197c478bd9Sstevel@tonic-gate 
3207c478bd9Sstevel@tonic-gate 				args = kmem_alloc(sizeof (pgflush_t),
321b9238976Sth 				    KM_SLEEP);
3227c478bd9Sstevel@tonic-gate 				args->vp = vp;
3237c478bd9Sstevel@tonic-gate 				VN_HOLD(args->vp);
3247c478bd9Sstevel@tonic-gate 				args->cr = cr;
3257c478bd9Sstevel@tonic-gate 				crhold(args->cr);
3267c478bd9Sstevel@tonic-gate 				(void) zthread_create(NULL, 0,
327b9238976Sth 				    nfs4_pgflush_thread, args, 0,
328b9238976Sth 				    minclsyspri);
3297c478bd9Sstevel@tonic-gate 			}
3307c478bd9Sstevel@tonic-gate 		}
3317c478bd9Sstevel@tonic-gate 	}
3327c478bd9Sstevel@tonic-gate 
3337c478bd9Sstevel@tonic-gate 	/*
3347c478bd9Sstevel@tonic-gate 	 * Flush the readdir response cache.
3357c478bd9Sstevel@tonic-gate 	 */
3367c478bd9Sstevel@tonic-gate 	nfs4_purge_rddir_cache(vp);
3377c478bd9Sstevel@tonic-gate }
3387c478bd9Sstevel@tonic-gate 
3397c478bd9Sstevel@tonic-gate /*
3407c478bd9Sstevel@tonic-gate  * Invalidate all pages for the given file, after writing back the dirty
3417c478bd9Sstevel@tonic-gate  * ones.
3427c478bd9Sstevel@tonic-gate  */
3437c478bd9Sstevel@tonic-gate 
344d55e25c3SPavel Filipensky void
nfs4_flush_pages(vnode_t * vp,cred_t * cr)345d55e25c3SPavel Filipensky nfs4_flush_pages(vnode_t *vp, cred_t *cr)
3467c478bd9Sstevel@tonic-gate {
3477c478bd9Sstevel@tonic-gate 	int error;
3487c478bd9Sstevel@tonic-gate 	rnode4_t *rp = VTOR4(vp);
3497c478bd9Sstevel@tonic-gate 
350da6c28aaSamw 	error = VOP_PUTPAGE(vp, (u_offset_t)0, 0, B_INVAL, cr, NULL);
3517c478bd9Sstevel@tonic-gate 	if (error == ENOSPC || error == EDQUOT) {
3527c478bd9Sstevel@tonic-gate 		mutex_enter(&rp->r_statelock);
3537c478bd9Sstevel@tonic-gate 		if (!rp->r_error)
3547c478bd9Sstevel@tonic-gate 			rp->r_error = error;
3557c478bd9Sstevel@tonic-gate 		mutex_exit(&rp->r_statelock);
3567c478bd9Sstevel@tonic-gate 	}
3577c478bd9Sstevel@tonic-gate }
3587c478bd9Sstevel@tonic-gate 
3597c478bd9Sstevel@tonic-gate /*
3607c478bd9Sstevel@tonic-gate  * Page flush thread.
3617c478bd9Sstevel@tonic-gate  */
3627c478bd9Sstevel@tonic-gate 
3637c478bd9Sstevel@tonic-gate static void
nfs4_pgflush_thread(pgflush_t * args)3647c478bd9Sstevel@tonic-gate nfs4_pgflush_thread(pgflush_t *args)
3657c478bd9Sstevel@tonic-gate {
3667c478bd9Sstevel@tonic-gate 	rnode4_t *rp = VTOR4(args->vp);
3677c478bd9Sstevel@tonic-gate 
3687c478bd9Sstevel@tonic-gate 	/* remember which thread we are, so we don't deadlock ourselves */
3697c478bd9Sstevel@tonic-gate 	mutex_enter(&rp->r_statelock);
3707c478bd9Sstevel@tonic-gate 	ASSERT(rp->r_pgflush == NULL);
3717c478bd9Sstevel@tonic-gate 	rp->r_pgflush = curthread;
3727c478bd9Sstevel@tonic-gate 	mutex_exit(&rp->r_statelock);
3737c478bd9Sstevel@tonic-gate 
374d55e25c3SPavel Filipensky 	nfs4_flush_pages(args->vp, args->cr);
3757c478bd9Sstevel@tonic-gate 
3767c478bd9Sstevel@tonic-gate 	mutex_enter(&rp->r_statelock);
3777c478bd9Sstevel@tonic-gate 	rp->r_pgflush = NULL;
3787c478bd9Sstevel@tonic-gate 	rp->r_flags &= ~R4PGFLUSH;
3797c478bd9Sstevel@tonic-gate 	cv_broadcast(&rp->r_cv);
3807c478bd9Sstevel@tonic-gate 	mutex_exit(&rp->r_statelock);
3817c478bd9Sstevel@tonic-gate 
3827c478bd9Sstevel@tonic-gate 	VN_RELE(args->vp);
3837c478bd9Sstevel@tonic-gate 	crfree(args->cr);
3847c478bd9Sstevel@tonic-gate 	kmem_free(args, sizeof (pgflush_t));
3857c478bd9Sstevel@tonic-gate 	zthread_exit();
3867c478bd9Sstevel@tonic-gate }
3877c478bd9Sstevel@tonic-gate 
3887c478bd9Sstevel@tonic-gate /*
3897c478bd9Sstevel@tonic-gate  * Purge the readdir cache of all entries which are not currently
3907c478bd9Sstevel@tonic-gate  * being filled.
3917c478bd9Sstevel@tonic-gate  */
3927c478bd9Sstevel@tonic-gate void
nfs4_purge_rddir_cache(vnode_t * vp)3937c478bd9Sstevel@tonic-gate nfs4_purge_rddir_cache(vnode_t *vp)
3947c478bd9Sstevel@tonic-gate {
3957c478bd9Sstevel@tonic-gate 	rnode4_t *rp;
3967c478bd9Sstevel@tonic-gate 
3977c478bd9Sstevel@tonic-gate 	rp = VTOR4(vp);
3987c478bd9Sstevel@tonic-gate 
3997c478bd9Sstevel@tonic-gate 	mutex_enter(&rp->r_statelock);
4007c478bd9Sstevel@tonic-gate 	rp->r_direof = NULL;
4017c478bd9Sstevel@tonic-gate 	rp->r_flags &= ~R4LOOKUP;
4027c478bd9Sstevel@tonic-gate 	rp->r_flags |= R4READDIRWATTR;
4037c478bd9Sstevel@tonic-gate 	rddir4_cache_purge(rp);
4047c478bd9Sstevel@tonic-gate 	mutex_exit(&rp->r_statelock);
4057c478bd9Sstevel@tonic-gate }
4067c478bd9Sstevel@tonic-gate 
4077c478bd9Sstevel@tonic-gate /*
4087c478bd9Sstevel@tonic-gate  * Set attributes cache for given vnode using virtual attributes.  There is
4097c478bd9Sstevel@tonic-gate  * no cache validation, but if the attributes are deemed to be stale, they
4107c478bd9Sstevel@tonic-gate  * are ignored.  This corresponds to nfs3_attrcache().
4117c478bd9Sstevel@tonic-gate  *
4127c478bd9Sstevel@tonic-gate  * Set the timeout value on the attribute cache and fill it
4137c478bd9Sstevel@tonic-gate  * with the passed in attributes.
4147c478bd9Sstevel@tonic-gate  */
4157c478bd9Sstevel@tonic-gate void
nfs4_attrcache_noinval(vnode_t * vp,nfs4_ga_res_t * garp,hrtime_t t)4167c478bd9Sstevel@tonic-gate nfs4_attrcache_noinval(vnode_t *vp, nfs4_ga_res_t *garp, hrtime_t t)
4177c478bd9Sstevel@tonic-gate {
4187c478bd9Sstevel@tonic-gate 	rnode4_t *rp = VTOR4(vp);
4197c478bd9Sstevel@tonic-gate 
4207c478bd9Sstevel@tonic-gate 	mutex_enter(&rp->r_statelock);
4217c478bd9Sstevel@tonic-gate 	if (rp->r_time_attr_saved <= t)
4227c478bd9Sstevel@tonic-gate 		nfs4_attrcache_va(vp, garp, FALSE);
4237c478bd9Sstevel@tonic-gate 	mutex_exit(&rp->r_statelock);
4247c478bd9Sstevel@tonic-gate }
4257c478bd9Sstevel@tonic-gate 
4267c478bd9Sstevel@tonic-gate /*
4277c478bd9Sstevel@tonic-gate  * Use the passed in virtual attributes to check to see whether the
4287c478bd9Sstevel@tonic-gate  * data and metadata caches are valid, cache the new attributes, and
4297c478bd9Sstevel@tonic-gate  * then do the cache invalidation if required.
4307c478bd9Sstevel@tonic-gate  *
4317c478bd9Sstevel@tonic-gate  * The cache validation and caching of the new attributes is done
4327c478bd9Sstevel@tonic-gate  * atomically via the use of the mutex, r_statelock.  If required,
4337c478bd9Sstevel@tonic-gate  * the cache invalidation is done atomically w.r.t. the cache
4347c478bd9Sstevel@tonic-gate  * validation and caching of the attributes via the pseudo lock,
4357c478bd9Sstevel@tonic-gate  * r_serial.
4367c478bd9Sstevel@tonic-gate  *
4377c478bd9Sstevel@tonic-gate  * This routine is used to do cache validation and attributes caching
4387c478bd9Sstevel@tonic-gate  * for operations with a single set of post operation attributes.
4397c478bd9Sstevel@tonic-gate  */
4407c478bd9Sstevel@tonic-gate 
4417c478bd9Sstevel@tonic-gate void
nfs4_attr_cache(vnode_t * vp,nfs4_ga_res_t * garp,hrtime_t t,cred_t * cr,int async,change_info4 * cinfo)4427c478bd9Sstevel@tonic-gate nfs4_attr_cache(vnode_t *vp, nfs4_ga_res_t *garp,
443b9238976Sth     hrtime_t t, cred_t *cr, int async,
444b9238976Sth     change_info4 *cinfo)
4457c478bd9Sstevel@tonic-gate {
4467c478bd9Sstevel@tonic-gate 	rnode4_t *rp;
4475e4df02aSvv 	int mtime_changed = 0;
4485e4df02aSvv 	int ctime_changed = 0;
4497c478bd9Sstevel@tonic-gate 	vsecattr_t *vsp;
4507c478bd9Sstevel@tonic-gate 	int was_serial, set_time_cache_inval, recov;
4517c478bd9Sstevel@tonic-gate 	vattr_t *vap = &garp->n4g_va;
4527c478bd9Sstevel@tonic-gate 	mntinfo4_t *mi = VTOMI4(vp);
4535e4df02aSvv 	len_t preattr_rsize;
4545e4df02aSvv 	boolean_t writemodify_set = B_FALSE;
4555e4df02aSvv 	boolean_t cachepurge_set = B_FALSE;
4567c478bd9Sstevel@tonic-gate 
4577c478bd9Sstevel@tonic-gate 	ASSERT(mi->mi_vfsp->vfs_dev == garp->n4g_va.va_fsid);
4587c478bd9Sstevel@tonic-gate 
4597c478bd9Sstevel@tonic-gate 	/* Is curthread the recovery thread? */
4607c478bd9Sstevel@tonic-gate 	mutex_enter(&mi->mi_lock);
4617c478bd9Sstevel@tonic-gate 	recov = (VTOMI4(vp)->mi_recovthread == curthread);
4627c478bd9Sstevel@tonic-gate 	mutex_exit(&mi->mi_lock);
4637c478bd9Sstevel@tonic-gate 
4647c478bd9Sstevel@tonic-gate 	rp = VTOR4(vp);
4657c478bd9Sstevel@tonic-gate 	mutex_enter(&rp->r_statelock);
4667c478bd9Sstevel@tonic-gate 	was_serial = (rp->r_serial == curthread);
467*6dc7d057SMarcel Telka 	if (rp->r_serial != NULL && !was_serial) {
4687c478bd9Sstevel@tonic-gate 		/*
469*6dc7d057SMarcel Telka 		 * Purge current attrs and bail out to avoid potential deadlock
470*6dc7d057SMarcel Telka 		 * between another thread caching attrs (r_serial thread), this
471*6dc7d057SMarcel Telka 		 * thread, and a thread trying to read or write pages.
4727c478bd9Sstevel@tonic-gate 		 */
473*6dc7d057SMarcel Telka 		PURGE_ATTRCACHE4_LOCKED(rp);
474*6dc7d057SMarcel Telka 		mutex_exit(&rp->r_statelock);
475*6dc7d057SMarcel Telka 		return;
4767c478bd9Sstevel@tonic-gate 	}
4777c478bd9Sstevel@tonic-gate 
4787c478bd9Sstevel@tonic-gate 	/*
4797c478bd9Sstevel@tonic-gate 	 * If there is a page flush thread, the current thread needs to
4807c478bd9Sstevel@tonic-gate 	 * bail out, to prevent a possible deadlock between the current
4817c478bd9Sstevel@tonic-gate 	 * thread (which might be in a start_op/end_op region), the
4827c478bd9Sstevel@tonic-gate 	 * recovery thread, and the page flush thread.  Expire the
4837c478bd9Sstevel@tonic-gate 	 * attribute cache, so that any attributes the current thread was
4847c478bd9Sstevel@tonic-gate 	 * going to set are not lost.
4857c478bd9Sstevel@tonic-gate 	 */
4867c478bd9Sstevel@tonic-gate 	if ((rp->r_flags & R4PGFLUSH) && rp->r_pgflush != curthread) {
4877c478bd9Sstevel@tonic-gate 		PURGE_ATTRCACHE4_LOCKED(rp);
4887c478bd9Sstevel@tonic-gate 		mutex_exit(&rp->r_statelock);
4897c478bd9Sstevel@tonic-gate 		return;
4907c478bd9Sstevel@tonic-gate 	}
4917c478bd9Sstevel@tonic-gate 
4927c478bd9Sstevel@tonic-gate 	if (rp->r_time_attr_saved > t) {
4937c478bd9Sstevel@tonic-gate 		/*
4947c478bd9Sstevel@tonic-gate 		 * Attributes have been cached since these attributes were
49500fdf600Smaheshvs 		 * probably made. If there is an inconsistency in what is
49600fdf600Smaheshvs 		 * cached, mark them invalid. If not, don't act on them.
4977c478bd9Sstevel@tonic-gate 		 */
49800fdf600Smaheshvs 		if (!CACHE4_VALID(rp, vap->va_mtime, vap->va_size))
49900fdf600Smaheshvs 			PURGE_ATTRCACHE4_LOCKED(rp);
5007c478bd9Sstevel@tonic-gate 		mutex_exit(&rp->r_statelock);
5017c478bd9Sstevel@tonic-gate 		return;
5027c478bd9Sstevel@tonic-gate 	}
5037c478bd9Sstevel@tonic-gate 	set_time_cache_inval = 0;
5047c478bd9Sstevel@tonic-gate 	if (cinfo) {
5057c478bd9Sstevel@tonic-gate 		/*
5067c478bd9Sstevel@tonic-gate 		 * Only directory modifying callers pass non-NULL cinfo.
5077c478bd9Sstevel@tonic-gate 		 */
5087c478bd9Sstevel@tonic-gate 		ASSERT(vp->v_type == VDIR);
5097c478bd9Sstevel@tonic-gate 		/*
5107c478bd9Sstevel@tonic-gate 		 * If the cache timeout either doesn't exist or hasn't expired,
5117c478bd9Sstevel@tonic-gate 		 * and dir didn't changed on server before dirmod op
5127c478bd9Sstevel@tonic-gate 		 * and dir didn't change after dirmod op but before getattr
5137c478bd9Sstevel@tonic-gate 		 * then there's a chance that the client's cached data for
5147c478bd9Sstevel@tonic-gate 		 * this object is current (not stale).  No immediate cache
5157c478bd9Sstevel@tonic-gate 		 * flush is required.
5167c478bd9Sstevel@tonic-gate 		 *
5177c478bd9Sstevel@tonic-gate 		 */
5187c478bd9Sstevel@tonic-gate 		if ((! rp->r_time_cache_inval || t < rp->r_time_cache_inval) &&
5197c478bd9Sstevel@tonic-gate 		    cinfo->before == rp->r_change &&
5207c478bd9Sstevel@tonic-gate 		    (garp->n4g_change_valid &&
5217c478bd9Sstevel@tonic-gate 		    cinfo->after == garp->n4g_change)) {
5227c478bd9Sstevel@tonic-gate 
5237c478bd9Sstevel@tonic-gate 			/*
5247c478bd9Sstevel@tonic-gate 			 * If atomic isn't set, then the before/after info
5257c478bd9Sstevel@tonic-gate 			 * cannot be blindly trusted.  For this case, we tell
5267c478bd9Sstevel@tonic-gate 			 * nfs4_attrcache_va to cache the attrs but also
5277c478bd9Sstevel@tonic-gate 			 * establish an absolute maximum cache timeout.  When
5287c478bd9Sstevel@tonic-gate 			 * the timeout is reached, caches will be flushed.
5297c478bd9Sstevel@tonic-gate 			 */
5307c478bd9Sstevel@tonic-gate 			if (! cinfo->atomic)
5317c478bd9Sstevel@tonic-gate 				set_time_cache_inval = 1;
5327c478bd9Sstevel@tonic-gate 		} else {
5337c478bd9Sstevel@tonic-gate 
5347c478bd9Sstevel@tonic-gate 			/*
5357c478bd9Sstevel@tonic-gate 			 * We're not sure exactly what changed, but we know
5367c478bd9Sstevel@tonic-gate 			 * what to do.  flush all caches for dir.  remove the
5377c478bd9Sstevel@tonic-gate 			 * attr timeout.
5387c478bd9Sstevel@tonic-gate 			 *
5397c478bd9Sstevel@tonic-gate 			 * a) timeout expired.  flush all caches.
5407c478bd9Sstevel@tonic-gate 			 * b) r_change != cinfo.before.  flush all caches.
5417c478bd9Sstevel@tonic-gate 			 * c) r_change == cinfo.before, but cinfo.after !=
5427c478bd9Sstevel@tonic-gate 			 *    post-op getattr(change).  flush all caches.
5437c478bd9Sstevel@tonic-gate 			 * d) post-op getattr(change) not provided by server.
5447c478bd9Sstevel@tonic-gate 			 *    flush all caches.
5457c478bd9Sstevel@tonic-gate 			 */
5467c478bd9Sstevel@tonic-gate 			mtime_changed = 1;
5477c478bd9Sstevel@tonic-gate 			ctime_changed = 1;
5487c478bd9Sstevel@tonic-gate 			rp->r_time_cache_inval = 0;
5497c478bd9Sstevel@tonic-gate 		}
5507c478bd9Sstevel@tonic-gate 	} else {
5515e4df02aSvv 		/*
5525e4df02aSvv 		 * Write thread after writing data to file on remote server,
5535e4df02aSvv 		 * will always set R4WRITEMODIFIED to indicate that file on
5545e4df02aSvv 		 * remote server was modified with a WRITE operation and would
5555e4df02aSvv 		 * have marked attribute cache as timed out. If R4WRITEMODIFIED
5565e4df02aSvv 		 * is set, then do not check for mtime and ctime change.
5575e4df02aSvv 		 */
5587c478bd9Sstevel@tonic-gate 		if (!(rp->r_flags & R4WRITEMODIFIED)) {
5597c478bd9Sstevel@tonic-gate 			if (!CACHE4_VALID(rp, vap->va_mtime, vap->va_size))
5607c478bd9Sstevel@tonic-gate 				mtime_changed = 1;
5615e4df02aSvv 
5627c478bd9Sstevel@tonic-gate 			if (rp->r_attr.va_ctime.tv_sec !=
5637c478bd9Sstevel@tonic-gate 			    vap->va_ctime.tv_sec ||
5647c478bd9Sstevel@tonic-gate 			    rp->r_attr.va_ctime.tv_nsec !=
5657c478bd9Sstevel@tonic-gate 			    vap->va_ctime.tv_nsec)
5667c478bd9Sstevel@tonic-gate 				ctime_changed = 1;
5674a695956SMarcel Telka 
5684a695956SMarcel Telka 			/*
5694a695956SMarcel Telka 			 * If the change attribute was not provided by server
5704a695956SMarcel Telka 			 * or it differs, then flush all caches.
5714a695956SMarcel Telka 			 */
5724a695956SMarcel Telka 			if (!garp->n4g_change_valid ||
5734a695956SMarcel Telka 			    rp->r_change != garp->n4g_change) {
5744a695956SMarcel Telka 				mtime_changed = 1;
5754a695956SMarcel Telka 				ctime_changed = 1;
5764a695956SMarcel Telka 			}
5777c478bd9Sstevel@tonic-gate 		} else {
5785e4df02aSvv 			writemodify_set = B_TRUE;
5797c478bd9Sstevel@tonic-gate 		}
5807c478bd9Sstevel@tonic-gate 	}
5817c478bd9Sstevel@tonic-gate 
5825e4df02aSvv 	preattr_rsize = rp->r_size;
5835e4df02aSvv 
5847c478bd9Sstevel@tonic-gate 	nfs4_attrcache_va(vp, garp, set_time_cache_inval);
5857c478bd9Sstevel@tonic-gate 
5865e4df02aSvv 	/*
5875e4df02aSvv 	 * If we have updated filesize in nfs4_attrcache_va, as soon as we
5885e4df02aSvv 	 * drop statelock we will be in transition of purging all
5895e4df02aSvv 	 * our caches and updating them. It is possible for another
5905e4df02aSvv 	 * thread to pick this new file size and read in zeroed data.
5915e4df02aSvv 	 * stall other threads till cache purge is complete.
5925e4df02aSvv 	 */
5935e4df02aSvv 	if ((!cinfo) && (rp->r_size != preattr_rsize)) {
5945e4df02aSvv 		/*
5955e4df02aSvv 		 * If R4WRITEMODIFIED was set and we have updated the file
5965e4df02aSvv 		 * size, Server's returned file size need not necessarily
5975e4df02aSvv 		 * be because of this Client's WRITE. We need to purge
5985e4df02aSvv 		 * all caches.
5995e4df02aSvv 		 */
6005e4df02aSvv 		if (writemodify_set)
6015e4df02aSvv 			mtime_changed = 1;
6025e4df02aSvv 
6035e4df02aSvv 		if (mtime_changed && !(rp->r_flags & R4INCACHEPURGE)) {
6045e4df02aSvv 			rp->r_flags |= R4INCACHEPURGE;
6055e4df02aSvv 			cachepurge_set = B_TRUE;
6065e4df02aSvv 		}
6075e4df02aSvv 	}
6085e4df02aSvv 
6097c478bd9Sstevel@tonic-gate 	if (!mtime_changed && !ctime_changed) {
6107c478bd9Sstevel@tonic-gate 		mutex_exit(&rp->r_statelock);
6117c478bd9Sstevel@tonic-gate 		return;
6127c478bd9Sstevel@tonic-gate 	}
6137c478bd9Sstevel@tonic-gate 
6147c478bd9Sstevel@tonic-gate 	rp->r_serial = curthread;
6157c478bd9Sstevel@tonic-gate 
6167c478bd9Sstevel@tonic-gate 	mutex_exit(&rp->r_statelock);
6177c478bd9Sstevel@tonic-gate 
6187c478bd9Sstevel@tonic-gate 	/*
6197c478bd9Sstevel@tonic-gate 	 * If we're the recov thread, then force async nfs4_purge_caches
6207c478bd9Sstevel@tonic-gate 	 * to avoid potential deadlock.
6217c478bd9Sstevel@tonic-gate 	 */
6227c478bd9Sstevel@tonic-gate 	if (mtime_changed)
6237c478bd9Sstevel@tonic-gate 		nfs4_purge_caches(vp, NFS4_NOPURGE_DNLC, cr, recov ? 1 : async);
6247c478bd9Sstevel@tonic-gate 
6255e4df02aSvv 	if ((rp->r_flags & R4INCACHEPURGE) && cachepurge_set) {
6265e4df02aSvv 		mutex_enter(&rp->r_statelock);
6275e4df02aSvv 		rp->r_flags &= ~R4INCACHEPURGE;
6285e4df02aSvv 		cv_broadcast(&rp->r_cv);
6295e4df02aSvv 		mutex_exit(&rp->r_statelock);
6305e4df02aSvv 		cachepurge_set = B_FALSE;
6315e4df02aSvv 	}
6325e4df02aSvv 
6337c478bd9Sstevel@tonic-gate 	if (ctime_changed) {
6347c478bd9Sstevel@tonic-gate 		(void) nfs4_access_purge_rp(rp);
6357c478bd9Sstevel@tonic-gate 		if (rp->r_secattr != NULL) {
6367c478bd9Sstevel@tonic-gate 			mutex_enter(&rp->r_statelock);
6377c478bd9Sstevel@tonic-gate 			vsp = rp->r_secattr;
6387c478bd9Sstevel@tonic-gate 			rp->r_secattr = NULL;
6397c478bd9Sstevel@tonic-gate 			mutex_exit(&rp->r_statelock);
6407c478bd9Sstevel@tonic-gate 			if (vsp != NULL)
6417c478bd9Sstevel@tonic-gate 				nfs4_acl_free_cache(vsp);
6427c478bd9Sstevel@tonic-gate 		}
6437c478bd9Sstevel@tonic-gate 	}
6447c478bd9Sstevel@tonic-gate 
6457c478bd9Sstevel@tonic-gate 	if (!was_serial) {
6467c478bd9Sstevel@tonic-gate 		mutex_enter(&rp->r_statelock);
6477c478bd9Sstevel@tonic-gate 		rp->r_serial = NULL;
6487c478bd9Sstevel@tonic-gate 		cv_broadcast(&rp->r_cv);
6497c478bd9Sstevel@tonic-gate 		mutex_exit(&rp->r_statelock);
6507c478bd9Sstevel@tonic-gate 	}
6517c478bd9Sstevel@tonic-gate }
6527c478bd9Sstevel@tonic-gate 
6537c478bd9Sstevel@tonic-gate /*
6547c478bd9Sstevel@tonic-gate  * Set attributes cache for given vnode using virtual attributes.
6557c478bd9Sstevel@tonic-gate  *
6567c478bd9Sstevel@tonic-gate  * Set the timeout value on the attribute cache and fill it
6577c478bd9Sstevel@tonic-gate  * with the passed in attributes.
6587c478bd9Sstevel@tonic-gate  *
6597c478bd9Sstevel@tonic-gate  * The caller must be holding r_statelock.
6607c478bd9Sstevel@tonic-gate  */
6617c478bd9Sstevel@tonic-gate static void
nfs4_attrcache_va(vnode_t * vp,nfs4_ga_res_t * garp,int set_cache_timeout)6627c478bd9Sstevel@tonic-gate nfs4_attrcache_va(vnode_t *vp, nfs4_ga_res_t *garp, int set_cache_timeout)
6637c478bd9Sstevel@tonic-gate {
6647c478bd9Sstevel@tonic-gate 	rnode4_t *rp;
6657c478bd9Sstevel@tonic-gate 	mntinfo4_t *mi;
6667c478bd9Sstevel@tonic-gate 	hrtime_t delta;
6677c478bd9Sstevel@tonic-gate 	hrtime_t now;
6687c478bd9Sstevel@tonic-gate 	vattr_t *vap = &garp->n4g_va;
6697c478bd9Sstevel@tonic-gate 
6707c478bd9Sstevel@tonic-gate 	rp = VTOR4(vp);
6717c478bd9Sstevel@tonic-gate 
6727c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&rp->r_statelock));
6737c478bd9Sstevel@tonic-gate 	ASSERT(vap->va_mask == AT_ALL);
6747c478bd9Sstevel@tonic-gate 
6757c478bd9Sstevel@tonic-gate 	/* Switch to master before checking v_flag */
6767c478bd9Sstevel@tonic-gate 	if (IS_SHADOW(vp, rp))
6777c478bd9Sstevel@tonic-gate 		vp = RTOV4(rp);
6787c478bd9Sstevel@tonic-gate 
6797c478bd9Sstevel@tonic-gate 	now = gethrtime();
6807c478bd9Sstevel@tonic-gate 
6817c478bd9Sstevel@tonic-gate 	mi = VTOMI4(vp);
6827c478bd9Sstevel@tonic-gate 
6837c478bd9Sstevel@tonic-gate 	/*
6847c478bd9Sstevel@tonic-gate 	 * Only establish a new cache timeout (if requested).  Never
6857c478bd9Sstevel@tonic-gate 	 * extend a timeout.  Never clear a timeout.  Clearing a timeout
6867c478bd9Sstevel@tonic-gate 	 * is done by nfs4_update_dircaches (ancestor in our call chain)
6877c478bd9Sstevel@tonic-gate 	 */
6887c478bd9Sstevel@tonic-gate 	if (set_cache_timeout && ! rp->r_time_cache_inval)
6897c478bd9Sstevel@tonic-gate 		rp->r_time_cache_inval = now + mi->mi_acdirmax;
6907c478bd9Sstevel@tonic-gate 
6917c478bd9Sstevel@tonic-gate 	/*
6927c478bd9Sstevel@tonic-gate 	 * Delta is the number of nanoseconds that we will
6937c478bd9Sstevel@tonic-gate 	 * cache the attributes of the file.  It is based on
6947c478bd9Sstevel@tonic-gate 	 * the number of nanoseconds since the last time that
6957c478bd9Sstevel@tonic-gate 	 * we detected a change.  The assumption is that files
6967c478bd9Sstevel@tonic-gate 	 * that changed recently are likely to change again.
6977c478bd9Sstevel@tonic-gate 	 * There is a minimum and a maximum for regular files
6987c478bd9Sstevel@tonic-gate 	 * and for directories which is enforced though.
6997c478bd9Sstevel@tonic-gate 	 *
7007c478bd9Sstevel@tonic-gate 	 * Using the time since last change was detected
7017c478bd9Sstevel@tonic-gate 	 * eliminates direct comparison or calculation
7027c478bd9Sstevel@tonic-gate 	 * using mixed client and server times.  NFS does
7037c478bd9Sstevel@tonic-gate 	 * not make any assumptions regarding the client
7047c478bd9Sstevel@tonic-gate 	 * and server clocks being synchronized.
7057c478bd9Sstevel@tonic-gate 	 */
7067c478bd9Sstevel@tonic-gate 	if (vap->va_mtime.tv_sec != rp->r_attr.va_mtime.tv_sec ||
7077c478bd9Sstevel@tonic-gate 	    vap->va_mtime.tv_nsec != rp->r_attr.va_mtime.tv_nsec ||
7087c478bd9Sstevel@tonic-gate 	    vap->va_size != rp->r_attr.va_size) {
7097c478bd9Sstevel@tonic-gate 		rp->r_time_attr_saved = now;
7107c478bd9Sstevel@tonic-gate 	}
7117c478bd9Sstevel@tonic-gate 
7127c478bd9Sstevel@tonic-gate 	if ((mi->mi_flags & MI4_NOAC) || (vp->v_flag & VNOCACHE))
7137c478bd9Sstevel@tonic-gate 		delta = 0;
7147c478bd9Sstevel@tonic-gate 	else {
7157c478bd9Sstevel@tonic-gate 		delta = now - rp->r_time_attr_saved;
7167c478bd9Sstevel@tonic-gate 		if (vp->v_type == VDIR) {
7177c478bd9Sstevel@tonic-gate 			if (delta < mi->mi_acdirmin)
7187c478bd9Sstevel@tonic-gate 				delta = mi->mi_acdirmin;
7197c478bd9Sstevel@tonic-gate 			else if (delta > mi->mi_acdirmax)
7207c478bd9Sstevel@tonic-gate 				delta = mi->mi_acdirmax;
7217c478bd9Sstevel@tonic-gate 		} else {
7227c478bd9Sstevel@tonic-gate 			if (delta < mi->mi_acregmin)
7237c478bd9Sstevel@tonic-gate 				delta = mi->mi_acregmin;
7247c478bd9Sstevel@tonic-gate 			else if (delta > mi->mi_acregmax)
7257c478bd9Sstevel@tonic-gate 				delta = mi->mi_acregmax;
7267c478bd9Sstevel@tonic-gate 		}
7277c478bd9Sstevel@tonic-gate 	}
7287c478bd9Sstevel@tonic-gate 	rp->r_time_attr_inval = now + delta;
7297c478bd9Sstevel@tonic-gate 
7307c478bd9Sstevel@tonic-gate 	rp->r_attr = *vap;
7317c478bd9Sstevel@tonic-gate 	if (garp->n4g_change_valid)
7327c478bd9Sstevel@tonic-gate 		rp->r_change = garp->n4g_change;
7337c478bd9Sstevel@tonic-gate 
7347c478bd9Sstevel@tonic-gate 	/*
7357c478bd9Sstevel@tonic-gate 	 * The attributes that were returned may be valid and can
7367c478bd9Sstevel@tonic-gate 	 * be used, but they may not be allowed to be cached.
7377c478bd9Sstevel@tonic-gate 	 * Reset the timers to cause immediate invalidation and
7387c478bd9Sstevel@tonic-gate 	 * clear r_change so no VERIFY operations will suceed
7397c478bd9Sstevel@tonic-gate 	 */
7407c478bd9Sstevel@tonic-gate 	if (garp->n4g_attrwhy == NFS4_GETATTR_NOCACHE_OK) {
7417c478bd9Sstevel@tonic-gate 		rp->r_time_attr_inval = now;
7427c478bd9Sstevel@tonic-gate 		rp->r_time_attr_saved = now;
7437c478bd9Sstevel@tonic-gate 		rp->r_change = 0;
7447c478bd9Sstevel@tonic-gate 	}
7457c478bd9Sstevel@tonic-gate 
7467c478bd9Sstevel@tonic-gate 	/*
7477c478bd9Sstevel@tonic-gate 	 * If mounted_on_fileid returned AND the object is a stub,
7487c478bd9Sstevel@tonic-gate 	 * then set object's va_nodeid to the mounted over fid
7497c478bd9Sstevel@tonic-gate 	 * returned by server.
7507c478bd9Sstevel@tonic-gate 	 *
7517c478bd9Sstevel@tonic-gate 	 * If mounted_on_fileid not provided/supported, then
7527c478bd9Sstevel@tonic-gate 	 * just set it to 0 for now.  Eventually it would be
7537c478bd9Sstevel@tonic-gate 	 * better to set it to a hashed version of FH.  This
7547c478bd9Sstevel@tonic-gate 	 * would probably be good enough to provide a unique
7557c478bd9Sstevel@tonic-gate 	 * fid/d_ino within a dir.
7567c478bd9Sstevel@tonic-gate 	 *
7577c478bd9Sstevel@tonic-gate 	 * We don't need to carry mounted_on_fileid in the
7587c478bd9Sstevel@tonic-gate 	 * rnode as long as the client never requests fileid
7597c478bd9Sstevel@tonic-gate 	 * without also requesting mounted_on_fileid.  For
7607c478bd9Sstevel@tonic-gate 	 * now, it stays.
7617c478bd9Sstevel@tonic-gate 	 */
7627c478bd9Sstevel@tonic-gate 	if (garp->n4g_mon_fid_valid) {
7637c478bd9Sstevel@tonic-gate 		rp->r_mntd_fid = garp->n4g_mon_fid;
7647c478bd9Sstevel@tonic-gate 
765b9238976Sth 		if (RP_ISSTUB(rp))
7667c478bd9Sstevel@tonic-gate 			rp->r_attr.va_nodeid = rp->r_mntd_fid;
7677c478bd9Sstevel@tonic-gate 	}
7687c478bd9Sstevel@tonic-gate 
7697c478bd9Sstevel@tonic-gate 	/*
7707c478bd9Sstevel@tonic-gate 	 * Check to see if there are valid pathconf bits to
7717c478bd9Sstevel@tonic-gate 	 * cache in the rnode.
7727c478bd9Sstevel@tonic-gate 	 */
7737c478bd9Sstevel@tonic-gate 	if (garp->n4g_ext_res) {
7747c478bd9Sstevel@tonic-gate 		if (garp->n4g_ext_res->n4g_pc4.pc4_cache_valid) {
7757c478bd9Sstevel@tonic-gate 			rp->r_pathconf = garp->n4g_ext_res->n4g_pc4;
7767c478bd9Sstevel@tonic-gate 		} else {
7777c478bd9Sstevel@tonic-gate 			if (garp->n4g_ext_res->n4g_pc4.pc4_xattr_valid) {
7787c478bd9Sstevel@tonic-gate 				rp->r_pathconf.pc4_xattr_valid = TRUE;
7797c478bd9Sstevel@tonic-gate 				rp->r_pathconf.pc4_xattr_exists =
7807c478bd9Sstevel@tonic-gate 				    garp->n4g_ext_res->n4g_pc4.pc4_xattr_exists;
7817c478bd9Sstevel@tonic-gate 			}
7827c478bd9Sstevel@tonic-gate 		}
7837c478bd9Sstevel@tonic-gate 	}
7847c478bd9Sstevel@tonic-gate 	/*
7857c478bd9Sstevel@tonic-gate 	 * Update the size of the file if there is no cached data or if
7867c478bd9Sstevel@tonic-gate 	 * the cached data is clean and there is no data being written
7877c478bd9Sstevel@tonic-gate 	 * out.
7887c478bd9Sstevel@tonic-gate 	 */
7897c478bd9Sstevel@tonic-gate 	if (rp->r_size != vap->va_size &&
7907c478bd9Sstevel@tonic-gate 	    (!vn_has_cached_data(vp) ||
7917c478bd9Sstevel@tonic-gate 	    (!(rp->r_flags & R4DIRTY) && rp->r_count == 0))) {
7927c478bd9Sstevel@tonic-gate 		rp->r_size = vap->va_size;
7937c478bd9Sstevel@tonic-gate 	}
7947c478bd9Sstevel@tonic-gate 	nfs_setswaplike(vp, vap);
7957c478bd9Sstevel@tonic-gate 	rp->r_flags &= ~R4WRITEMODIFIED;
7967c478bd9Sstevel@tonic-gate }
7977c478bd9Sstevel@tonic-gate 
7987c478bd9Sstevel@tonic-gate /*
7997c478bd9Sstevel@tonic-gate  * Get attributes over-the-wire and update attributes cache
8007c478bd9Sstevel@tonic-gate  * if no error occurred in the over-the-wire operation.
8017c478bd9Sstevel@tonic-gate  * Return 0 if successful, otherwise error.
8027c478bd9Sstevel@tonic-gate  */
8037c478bd9Sstevel@tonic-gate int
nfs4_getattr_otw(vnode_t * vp,nfs4_ga_res_t * garp,cred_t * cr,int get_acl)8047c478bd9Sstevel@tonic-gate nfs4_getattr_otw(vnode_t *vp, nfs4_ga_res_t *garp, cred_t *cr, int get_acl)
8057c478bd9Sstevel@tonic-gate {
8067c478bd9Sstevel@tonic-gate 	mntinfo4_t *mi = VTOMI4(vp);
8077c478bd9Sstevel@tonic-gate 	hrtime_t t;
8087c478bd9Sstevel@tonic-gate 	nfs4_recov_state_t recov_state;
8097c478bd9Sstevel@tonic-gate 	nfs4_error_t e = { 0, NFS4_OK, RPC_SUCCESS };
8107c478bd9Sstevel@tonic-gate 
8117c478bd9Sstevel@tonic-gate 	recov_state.rs_flags = 0;
8127c478bd9Sstevel@tonic-gate 	recov_state.rs_num_retry_despite_err = 0;
8137c478bd9Sstevel@tonic-gate 
8147c478bd9Sstevel@tonic-gate 	/* Save the original mount point security flavor */
8157c478bd9Sstevel@tonic-gate 	(void) save_mnt_secinfo(mi->mi_curr_serv);
8167c478bd9Sstevel@tonic-gate 
8177c478bd9Sstevel@tonic-gate recov_retry:
818b9238976Sth 
8197c478bd9Sstevel@tonic-gate 	if ((e.error = nfs4_start_fop(mi, vp, NULL, OH_GETATTR,
820b9238976Sth 	    &recov_state, NULL))) {
8217c478bd9Sstevel@tonic-gate 		(void) check_mnt_secinfo(mi->mi_curr_serv, vp);
8227c478bd9Sstevel@tonic-gate 		return (e.error);
8237c478bd9Sstevel@tonic-gate 	}
8247c478bd9Sstevel@tonic-gate 
8257c478bd9Sstevel@tonic-gate 	t = gethrtime();
8267c478bd9Sstevel@tonic-gate 
8277c478bd9Sstevel@tonic-gate 	nfs4_getattr_otw_norecovery(vp, garp, &e, cr, get_acl);
8287c478bd9Sstevel@tonic-gate 
8297c478bd9Sstevel@tonic-gate 	if (nfs4_needs_recovery(&e, FALSE, vp->v_vfsp)) {
8307c478bd9Sstevel@tonic-gate 		if (nfs4_start_recovery(&e, VTOMI4(vp), vp, NULL, NULL,
8312f172c55SRobert Thurlow 		    NULL, OP_GETATTR, NULL, NULL, NULL) == FALSE)  {
8327c478bd9Sstevel@tonic-gate 			nfs4_end_fop(VTOMI4(vp), vp, NULL, OH_GETATTR,
833b9238976Sth 			    &recov_state, 1);
8347c478bd9Sstevel@tonic-gate 			goto recov_retry;
8357c478bd9Sstevel@tonic-gate 		}
8367c478bd9Sstevel@tonic-gate 	}
8377c478bd9Sstevel@tonic-gate 
8387c478bd9Sstevel@tonic-gate 	nfs4_end_fop(VTOMI4(vp), vp, NULL, OH_GETATTR, &recov_state, 0);
8397c478bd9Sstevel@tonic-gate 
8407c478bd9Sstevel@tonic-gate 	if (!e.error) {
8417c478bd9Sstevel@tonic-gate 		if (e.stat == NFS4_OK) {
8427c478bd9Sstevel@tonic-gate 			nfs4_attr_cache(vp, garp, t, cr, FALSE, NULL);
8437c478bd9Sstevel@tonic-gate 		} else {
8447c478bd9Sstevel@tonic-gate 			e.error = geterrno4(e.stat);
8457c478bd9Sstevel@tonic-gate 
8467c478bd9Sstevel@tonic-gate 			nfs4_purge_stale_fh(e.error, vp, cr);
8477c478bd9Sstevel@tonic-gate 		}
8487c478bd9Sstevel@tonic-gate 	}
8497c478bd9Sstevel@tonic-gate 
8507c478bd9Sstevel@tonic-gate 	/*
8517c478bd9Sstevel@tonic-gate 	 * If getattr a node that is a stub for a crossed
8527c478bd9Sstevel@tonic-gate 	 * mount point, keep the original secinfo flavor for
8537c478bd9Sstevel@tonic-gate 	 * the current file system, not the crossed one.
8547c478bd9Sstevel@tonic-gate 	 */
8557c478bd9Sstevel@tonic-gate 	(void) check_mnt_secinfo(mi->mi_curr_serv, vp);
8567c478bd9Sstevel@tonic-gate 
8577c478bd9Sstevel@tonic-gate 	return (e.error);
8587c478bd9Sstevel@tonic-gate }
8597c478bd9Sstevel@tonic-gate 
8607c478bd9Sstevel@tonic-gate /*
8617c478bd9Sstevel@tonic-gate  * Generate a compound to get attributes over-the-wire.
8627c478bd9Sstevel@tonic-gate  */
8637c478bd9Sstevel@tonic-gate void
nfs4_getattr_otw_norecovery(vnode_t * vp,nfs4_ga_res_t * garp,nfs4_error_t * ep,cred_t * cr,int get_acl)8647c478bd9Sstevel@tonic-gate nfs4_getattr_otw_norecovery(vnode_t *vp, nfs4_ga_res_t *garp,
865b9238976Sth     nfs4_error_t *ep, cred_t *cr, int get_acl)
8667c478bd9Sstevel@tonic-gate {
8677c478bd9Sstevel@tonic-gate 	COMPOUND4args_clnt args;
8687c478bd9Sstevel@tonic-gate 	COMPOUND4res_clnt res;
8697c478bd9Sstevel@tonic-gate 	int doqueue;
8707c478bd9Sstevel@tonic-gate 	rnode4_t *rp = VTOR4(vp);
8717c478bd9Sstevel@tonic-gate 	nfs_argop4 argop[2];
8727c478bd9Sstevel@tonic-gate 
8737c478bd9Sstevel@tonic-gate 	args.ctag = TAG_GETATTR;
8747c478bd9Sstevel@tonic-gate 
8757c478bd9Sstevel@tonic-gate 	args.array_len = 2;
8767c478bd9Sstevel@tonic-gate 	args.array = argop;
8777c478bd9Sstevel@tonic-gate 
8787c478bd9Sstevel@tonic-gate 	/* putfh */
8797c478bd9Sstevel@tonic-gate 	argop[0].argop = OP_CPUTFH;
8807c478bd9Sstevel@tonic-gate 	argop[0].nfs_argop4_u.opcputfh.sfh = rp->r_fh;
8817c478bd9Sstevel@tonic-gate 
8827c478bd9Sstevel@tonic-gate 	/* getattr */
8837c478bd9Sstevel@tonic-gate 	/*
8847c478bd9Sstevel@tonic-gate 	 * Unlike nfs version 2 and 3, where getattr returns all the
885da6c28aaSamw 	 * attributes, nfs version 4 returns only the ones explicitly
8867c478bd9Sstevel@tonic-gate 	 * asked for. This creates problems, as some system functions
8877c478bd9Sstevel@tonic-gate 	 * (e.g. cache check) require certain attributes and if the
8887c478bd9Sstevel@tonic-gate 	 * cached node lacks some attributes such as uid/gid, it can
8897c478bd9Sstevel@tonic-gate 	 * affect system utilities (e.g. "ls") that rely on the information
8907c478bd9Sstevel@tonic-gate 	 * to be there. This can lead to anything from system crashes to
8917c478bd9Sstevel@tonic-gate 	 * corrupted information processed by user apps.
8927c478bd9Sstevel@tonic-gate 	 * So to ensure that all bases are covered, request at least
8937c478bd9Sstevel@tonic-gate 	 * the AT_ALL attribute mask.
8947c478bd9Sstevel@tonic-gate 	 */
8957c478bd9Sstevel@tonic-gate 	argop[1].argop = OP_GETATTR;
8967c478bd9Sstevel@tonic-gate 	argop[1].nfs_argop4_u.opgetattr.attr_request = NFS4_VATTR_MASK;
8977c478bd9Sstevel@tonic-gate 	if (get_acl)
8987c478bd9Sstevel@tonic-gate 		argop[1].nfs_argop4_u.opgetattr.attr_request |= FATTR4_ACL_MASK;
8997c478bd9Sstevel@tonic-gate 	argop[1].nfs_argop4_u.opgetattr.mi = VTOMI4(vp);
9007c478bd9Sstevel@tonic-gate 
9017c478bd9Sstevel@tonic-gate 	doqueue = 1;
9027c478bd9Sstevel@tonic-gate 
9037c478bd9Sstevel@tonic-gate 	rfs4call(VTOMI4(vp), &args, &res, cr, &doqueue, 0, ep);
9047c478bd9Sstevel@tonic-gate 
9057c478bd9Sstevel@tonic-gate 	if (ep->error)
9067c478bd9Sstevel@tonic-gate 		return;
9077c478bd9Sstevel@tonic-gate 
9087c478bd9Sstevel@tonic-gate 	if (res.status != NFS4_OK) {
909a17ce845SMarcel Telka 		xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res);
9107c478bd9Sstevel@tonic-gate 		return;
9117c478bd9Sstevel@tonic-gate 	}
9127c478bd9Sstevel@tonic-gate 
9137c478bd9Sstevel@tonic-gate 	*garp = res.array[1].nfs_resop4_u.opgetattr.ga_res;
9147c478bd9Sstevel@tonic-gate 
915a17ce845SMarcel Telka 	xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res);
9167c478bd9Sstevel@tonic-gate }
9177c478bd9Sstevel@tonic-gate 
9187c478bd9Sstevel@tonic-gate /*
9197c478bd9Sstevel@tonic-gate  * Return either cached or remote attributes. If get remote attr
9207c478bd9Sstevel@tonic-gate  * use them to check and invalidate caches, then cache the new attributes.
9217c478bd9Sstevel@tonic-gate  */
9227c478bd9Sstevel@tonic-gate int
nfs4getattr(vnode_t * vp,vattr_t * vap,cred_t * cr)9237c478bd9Sstevel@tonic-gate nfs4getattr(vnode_t *vp, vattr_t *vap, cred_t *cr)
9247c478bd9Sstevel@tonic-gate {
9257c478bd9Sstevel@tonic-gate 	int error;
9267c478bd9Sstevel@tonic-gate 	rnode4_t *rp;
9277c478bd9Sstevel@tonic-gate 	nfs4_ga_res_t gar;
9287c478bd9Sstevel@tonic-gate 
9297c478bd9Sstevel@tonic-gate 	ASSERT(nfs4_consistent_type(vp));
9307c478bd9Sstevel@tonic-gate 
9317c478bd9Sstevel@tonic-gate 	/*
9327c478bd9Sstevel@tonic-gate 	 * If we've got cached attributes, we're done, otherwise go
9337c478bd9Sstevel@tonic-gate 	 * to the server to get attributes, which will update the cache
934b9238976Sth 	 * in the process. Either way, use the cached attributes for
935b9238976Sth 	 * the caller's vattr_t.
936b9238976Sth 	 *
937b9238976Sth 	 * Note that we ignore the gar set by the OTW call: the attr caching
938b9238976Sth 	 * code may make adjustments when storing to the rnode, and we want
939b9238976Sth 	 * to see those changes here.
9407c478bd9Sstevel@tonic-gate 	 */
9417c478bd9Sstevel@tonic-gate 	rp = VTOR4(vp);
942b9238976Sth 	error = 0;
9437c478bd9Sstevel@tonic-gate 	mutex_enter(&rp->r_statelock);
944b9238976Sth 	if (!ATTRCACHE4_VALID(vp)) {
9457c478bd9Sstevel@tonic-gate 		mutex_exit(&rp->r_statelock);
946b9238976Sth 		error = nfs4_getattr_otw(vp, &gar, cr, 0);
947b9238976Sth 		mutex_enter(&rp->r_statelock);
9487c478bd9Sstevel@tonic-gate 	}
9497c478bd9Sstevel@tonic-gate 
9507c478bd9Sstevel@tonic-gate 	if (!error)
951b9238976Sth 		*vap = rp->r_attr;
9527c478bd9Sstevel@tonic-gate 
9537c478bd9Sstevel@tonic-gate 	/* Return the client's view of file size */
9547c478bd9Sstevel@tonic-gate 	vap->va_size = rp->r_size;
955b9238976Sth 
9567c478bd9Sstevel@tonic-gate 	mutex_exit(&rp->r_statelock);
9577c478bd9Sstevel@tonic-gate 
9587c478bd9Sstevel@tonic-gate 	ASSERT(nfs4_consistent_type(vp));
9597c478bd9Sstevel@tonic-gate 
9607c478bd9Sstevel@tonic-gate 	return (error);
9617c478bd9Sstevel@tonic-gate }
9627c478bd9Sstevel@tonic-gate 
9637c478bd9Sstevel@tonic-gate int
nfs4_attr_otw(vnode_t * vp,nfs4_tag_type_t tag_type,nfs4_ga_res_t * garp,bitmap4 reqbitmap,cred_t * cr)9647c478bd9Sstevel@tonic-gate nfs4_attr_otw(vnode_t *vp, nfs4_tag_type_t tag_type,
965b9238976Sth     nfs4_ga_res_t *garp, bitmap4 reqbitmap, cred_t *cr)
9667c478bd9Sstevel@tonic-gate {
9677c478bd9Sstevel@tonic-gate 	COMPOUND4args_clnt args;
9687c478bd9Sstevel@tonic-gate 	COMPOUND4res_clnt res;
9697c478bd9Sstevel@tonic-gate 	int doqueue;
9707c478bd9Sstevel@tonic-gate 	nfs_argop4 argop[2];
9717c478bd9Sstevel@tonic-gate 	mntinfo4_t *mi = VTOMI4(vp);
9727c478bd9Sstevel@tonic-gate 	bool_t needrecov = FALSE;
9737c478bd9Sstevel@tonic-gate 	nfs4_recov_state_t recov_state;
9747c478bd9Sstevel@tonic-gate 	nfs4_error_t e = { 0, NFS4_OK, RPC_SUCCESS };
9757c478bd9Sstevel@tonic-gate 	nfs4_ga_ext_res_t *gerp;
9767c478bd9Sstevel@tonic-gate 
9777c478bd9Sstevel@tonic-gate 	recov_state.rs_flags = 0;
9787c478bd9Sstevel@tonic-gate 	recov_state.rs_num_retry_despite_err = 0;
9797c478bd9Sstevel@tonic-gate 
9807c478bd9Sstevel@tonic-gate recov_retry:
9817c478bd9Sstevel@tonic-gate 	args.ctag = tag_type;
9827c478bd9Sstevel@tonic-gate 
9837c478bd9Sstevel@tonic-gate 	args.array_len = 2;
9847c478bd9Sstevel@tonic-gate 	args.array = argop;
9857c478bd9Sstevel@tonic-gate 
9867c478bd9Sstevel@tonic-gate 	e.error = nfs4_start_fop(mi, vp, NULL, OH_GETATTR, &recov_state, NULL);
9877c478bd9Sstevel@tonic-gate 	if (e.error)
9887c478bd9Sstevel@tonic-gate 		return (e.error);
9897c478bd9Sstevel@tonic-gate 
9907c478bd9Sstevel@tonic-gate 	/* putfh */
9917c478bd9Sstevel@tonic-gate 	argop[0].argop = OP_CPUTFH;
9927c478bd9Sstevel@tonic-gate 	argop[0].nfs_argop4_u.opcputfh.sfh = VTOR4(vp)->r_fh;
9937c478bd9Sstevel@tonic-gate 
9947c478bd9Sstevel@tonic-gate 	/* getattr */
9957c478bd9Sstevel@tonic-gate 	argop[1].argop = OP_GETATTR;
9967c478bd9Sstevel@tonic-gate 	argop[1].nfs_argop4_u.opgetattr.attr_request = reqbitmap;
9977c478bd9Sstevel@tonic-gate 	argop[1].nfs_argop4_u.opgetattr.mi = mi;
9987c478bd9Sstevel@tonic-gate 
9997c478bd9Sstevel@tonic-gate 	doqueue = 1;
10007c478bd9Sstevel@tonic-gate 
10017c478bd9Sstevel@tonic-gate 	NFS4_DEBUG(nfs4_client_call_debug, (CE_NOTE,
10027c478bd9Sstevel@tonic-gate 	    "nfs4_attr_otw: %s call, rp %s", needrecov ? "recov" : "first",
10037c478bd9Sstevel@tonic-gate 	    rnode4info(VTOR4(vp))));
10047c478bd9Sstevel@tonic-gate 
10057c478bd9Sstevel@tonic-gate 	rfs4call(mi, &args, &res, cr, &doqueue, 0, &e);
10067c478bd9Sstevel@tonic-gate 
10077c478bd9Sstevel@tonic-gate 	needrecov = nfs4_needs_recovery(&e, FALSE, vp->v_vfsp);
10087c478bd9Sstevel@tonic-gate 	if (!needrecov && e.error) {
10097c478bd9Sstevel@tonic-gate 		nfs4_end_fop(VTOMI4(vp), vp, NULL, OH_GETATTR, &recov_state,
1010b9238976Sth 		    needrecov);
10117c478bd9Sstevel@tonic-gate 		return (e.error);
10127c478bd9Sstevel@tonic-gate 	}
10137c478bd9Sstevel@tonic-gate 
10147c478bd9Sstevel@tonic-gate 	if (needrecov) {
10157c478bd9Sstevel@tonic-gate 		bool_t abort;
10167c478bd9Sstevel@tonic-gate 
10177c478bd9Sstevel@tonic-gate 		NFS4_DEBUG(nfs4_client_recov_debug, (CE_NOTE,
10187c478bd9Sstevel@tonic-gate 		    "nfs4_attr_otw: initiating recovery\n"));
10197c478bd9Sstevel@tonic-gate 
10207c478bd9Sstevel@tonic-gate 		abort = nfs4_start_recovery(&e, VTOMI4(vp), vp, NULL, NULL,
10212f172c55SRobert Thurlow 		    NULL, OP_GETATTR, NULL, NULL, NULL);
10227c478bd9Sstevel@tonic-gate 		nfs4_end_fop(VTOMI4(vp), vp, NULL, OH_GETATTR, &recov_state,
1023b9238976Sth 		    needrecov);
10247c478bd9Sstevel@tonic-gate 		if (!e.error) {
1025a17ce845SMarcel Telka 			xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res);
10267c478bd9Sstevel@tonic-gate 			e.error = geterrno4(res.status);
10277c478bd9Sstevel@tonic-gate 		}
10287c478bd9Sstevel@tonic-gate 		if (abort == FALSE)
10297c478bd9Sstevel@tonic-gate 			goto recov_retry;
10307c478bd9Sstevel@tonic-gate 		return (e.error);
10317c478bd9Sstevel@tonic-gate 	}
10327c478bd9Sstevel@tonic-gate 
10337c478bd9Sstevel@tonic-gate 	if (res.status) {
10347c478bd9Sstevel@tonic-gate 		e.error = geterrno4(res.status);
10357c478bd9Sstevel@tonic-gate 	} else {
10367c478bd9Sstevel@tonic-gate 		gerp = garp->n4g_ext_res;
10377c478bd9Sstevel@tonic-gate 		bcopy(&res.array[1].nfs_resop4_u.opgetattr.ga_res,
1038b9238976Sth 		    garp, sizeof (nfs4_ga_res_t));
10397c478bd9Sstevel@tonic-gate 		garp->n4g_ext_res = gerp;
10407c478bd9Sstevel@tonic-gate 		if (garp->n4g_ext_res &&
10417c478bd9Sstevel@tonic-gate 		    res.array[1].nfs_resop4_u.opgetattr.ga_res.n4g_ext_res)
10427c478bd9Sstevel@tonic-gate 			bcopy(res.array[1].nfs_resop4_u.opgetattr.
1043b9238976Sth 			    ga_res.n4g_ext_res,
1044b9238976Sth 			    garp->n4g_ext_res, sizeof (nfs4_ga_ext_res_t));
10457c478bd9Sstevel@tonic-gate 	}
1046a17ce845SMarcel Telka 	xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res);
10477c478bd9Sstevel@tonic-gate 	nfs4_end_fop(VTOMI4(vp), vp, NULL, OH_GETATTR, &recov_state,
1048b9238976Sth 	    needrecov);
10497c478bd9Sstevel@tonic-gate 	return (e.error);
10507c478bd9Sstevel@tonic-gate }
10517c478bd9Sstevel@tonic-gate 
10527c478bd9Sstevel@tonic-gate /*
10537c478bd9Sstevel@tonic-gate  * Asynchronous I/O parameters.  nfs_async_threads is the high-water mark
10547c478bd9Sstevel@tonic-gate  * for the demand-based allocation of async threads per-mount.  The
10557c478bd9Sstevel@tonic-gate  * nfs_async_timeout is the amount of time a thread will live after it
10567c478bd9Sstevel@tonic-gate  * becomes idle, unless new I/O requests are received before the thread
10577c478bd9Sstevel@tonic-gate  * dies.  See nfs4_async_putpage and nfs4_async_start.
10587c478bd9Sstevel@tonic-gate  */
10597c478bd9Sstevel@tonic-gate 
10607c478bd9Sstevel@tonic-gate static void	nfs4_async_start(struct vfs *);
10610776f5e6SVallish Vaidyeshwara static void	nfs4_async_pgops_start(struct vfs *);
10620776f5e6SVallish Vaidyeshwara static void	nfs4_async_common_start(struct vfs *, int);
10637c478bd9Sstevel@tonic-gate 
10647c478bd9Sstevel@tonic-gate static void
free_async_args4(struct nfs4_async_reqs * args)10657c478bd9Sstevel@tonic-gate free_async_args4(struct nfs4_async_reqs *args)
10667c478bd9Sstevel@tonic-gate {
10677c478bd9Sstevel@tonic-gate 	rnode4_t *rp;
10687c478bd9Sstevel@tonic-gate 
10697c478bd9Sstevel@tonic-gate 	if (args->a_io != NFS4_INACTIVE) {
10707c478bd9Sstevel@tonic-gate 		rp = VTOR4(args->a_vp);
10717c478bd9Sstevel@tonic-gate 		mutex_enter(&rp->r_statelock);
10727c478bd9Sstevel@tonic-gate 		rp->r_count--;
10737c478bd9Sstevel@tonic-gate 		if (args->a_io == NFS4_PUTAPAGE ||
10747c478bd9Sstevel@tonic-gate 		    args->a_io == NFS4_PAGEIO)
10757c478bd9Sstevel@tonic-gate 			rp->r_awcount--;
10767c478bd9Sstevel@tonic-gate 		cv_broadcast(&rp->r_cv);
10777c478bd9Sstevel@tonic-gate 		mutex_exit(&rp->r_statelock);
10787c478bd9Sstevel@tonic-gate 		VN_RELE(args->a_vp);
10797c478bd9Sstevel@tonic-gate 	}
10807c478bd9Sstevel@tonic-gate 	crfree(args->a_cred);
10817c478bd9Sstevel@tonic-gate 	kmem_free(args, sizeof (*args));
10827c478bd9Sstevel@tonic-gate }
10837c478bd9Sstevel@tonic-gate 
10847c478bd9Sstevel@tonic-gate /*
10857c478bd9Sstevel@tonic-gate  * Cross-zone thread creation and NFS access is disallowed, yet fsflush() and
10867c478bd9Sstevel@tonic-gate  * pageout(), running in the global zone, have legitimate reasons to do
10877c478bd9Sstevel@tonic-gate  * VOP_PUTPAGE(B_ASYNC) on other zones' NFS mounts.  We avoid the problem by
10887c478bd9Sstevel@tonic-gate  * use of a a per-mount "asynchronous requests manager thread" which is
10897c478bd9Sstevel@tonic-gate  * signaled by the various asynchronous work routines when there is
10907c478bd9Sstevel@tonic-gate  * asynchronous work to be done.  It is responsible for creating new
10917c478bd9Sstevel@tonic-gate  * worker threads if necessary, and notifying existing worker threads
10927c478bd9Sstevel@tonic-gate  * that there is work to be done.
10937c478bd9Sstevel@tonic-gate  *
10947c478bd9Sstevel@tonic-gate  * In other words, it will "take the specifications from the customers and
10957c478bd9Sstevel@tonic-gate  * give them to the engineers."
10967c478bd9Sstevel@tonic-gate  *
10977c478bd9Sstevel@tonic-gate  * Worker threads die off of their own accord if they are no longer
10987c478bd9Sstevel@tonic-gate  * needed.
10997c478bd9Sstevel@tonic-gate  *
11007c478bd9Sstevel@tonic-gate  * This thread is killed when the zone is going away or the filesystem
11017c478bd9Sstevel@tonic-gate  * is being unmounted.
11027c478bd9Sstevel@tonic-gate  */
11037c478bd9Sstevel@tonic-gate void
nfs4_async_manager(vfs_t * vfsp)11047c478bd9Sstevel@tonic-gate nfs4_async_manager(vfs_t *vfsp)
11057c478bd9Sstevel@tonic-gate {
11067c478bd9Sstevel@tonic-gate 	callb_cpr_t cprinfo;
11077c478bd9Sstevel@tonic-gate 	mntinfo4_t *mi;
11087c478bd9Sstevel@tonic-gate 	uint_t max_threads;
11097c478bd9Sstevel@tonic-gate 
11107c478bd9Sstevel@tonic-gate 	mi = VFTOMI4(vfsp);
11117c478bd9Sstevel@tonic-gate 
11127c478bd9Sstevel@tonic-gate 	CALLB_CPR_INIT(&cprinfo, &mi->mi_async_lock, callb_generic_cpr,
1113b9238976Sth 	    "nfs4_async_manager");
11147c478bd9Sstevel@tonic-gate 
11157c478bd9Sstevel@tonic-gate 	mutex_enter(&mi->mi_async_lock);
11167c478bd9Sstevel@tonic-gate 	/*
11177c478bd9Sstevel@tonic-gate 	 * We want to stash the max number of threads that this mount was
11187c478bd9Sstevel@tonic-gate 	 * allowed so we can use it later when the variable is set to zero as
11197c478bd9Sstevel@tonic-gate 	 * part of the zone/mount going away.
11207c478bd9Sstevel@tonic-gate 	 *
11217c478bd9Sstevel@tonic-gate 	 * We want to be able to create at least one thread to handle
1122388e50fcSMarcel Telka 	 * asynchronous inactive calls.
11237c478bd9Sstevel@tonic-gate 	 */
11247c478bd9Sstevel@tonic-gate 	max_threads = MAX(mi->mi_max_threads, 1);
11257c478bd9Sstevel@tonic-gate 	/*
11267c478bd9Sstevel@tonic-gate 	 * We don't want to wait for mi_max_threads to go to zero, since that
11277c478bd9Sstevel@tonic-gate 	 * happens as part of a failed unmount, but this thread should only
11287c478bd9Sstevel@tonic-gate 	 * exit when the mount is really going away.
11297c478bd9Sstevel@tonic-gate 	 *
11307c478bd9Sstevel@tonic-gate 	 * Once MI4_ASYNC_MGR_STOP is set, no more async operations will be
11317c478bd9Sstevel@tonic-gate 	 * attempted: the various _async_*() functions know to do things
1132