xref: /illumos-gate/usr/src/uts/common/fs/nfs/nfs4_rnode.c (revision e010bda9)
17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate  * CDDL HEADER START
37c478bd9Sstevel@tonic-gate  *
47c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
59f9e2373Sjwahlig  * Common Development and Distribution License (the "License").
69f9e2373Sjwahlig  * You may not use this file except in compliance with the License.
77c478bd9Sstevel@tonic-gate  *
87c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
107c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
117c478bd9Sstevel@tonic-gate  * and limitations under the License.
127c478bd9Sstevel@tonic-gate  *
137c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
147c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
167c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
177c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bd9Sstevel@tonic-gate  *
197c478bd9Sstevel@tonic-gate  * CDDL HEADER END
207c478bd9Sstevel@tonic-gate  */
217c478bd9Sstevel@tonic-gate /*
2272dd5e52SMarcel Telka  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
237c478bd9Sstevel@tonic-gate  * Use is subject to license terms.
247c478bd9Sstevel@tonic-gate  */
257c478bd9Sstevel@tonic-gate 
267c478bd9Sstevel@tonic-gate /*
277c478bd9Sstevel@tonic-gate  *  	Copyright (c) 1983,1984,1985,1986,1987,1988,1989  AT&T.
287c478bd9Sstevel@tonic-gate  *	All Rights Reserved
297c478bd9Sstevel@tonic-gate  */
307c478bd9Sstevel@tonic-gate 
31f5654033SAlexander Eremin /*
32f5654033SAlexander Eremin  * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
33ade42b55SSebastien Roy  * Copyright (c) 2017 by Delphix. All rights reserved.
34f5654033SAlexander Eremin  */
357c478bd9Sstevel@tonic-gate 
367c478bd9Sstevel@tonic-gate #include <sys/param.h>
377c478bd9Sstevel@tonic-gate #include <sys/types.h>
387c478bd9Sstevel@tonic-gate #include <sys/systm.h>
397c478bd9Sstevel@tonic-gate #include <sys/cred.h>
407c478bd9Sstevel@tonic-gate #include <sys/proc.h>
417c478bd9Sstevel@tonic-gate #include <sys/user.h>
427c478bd9Sstevel@tonic-gate #include <sys/time.h>
437c478bd9Sstevel@tonic-gate #include <sys/buf.h>
447c478bd9Sstevel@tonic-gate #include <sys/vfs.h>
457c478bd9Sstevel@tonic-gate #include <sys/vnode.h>
467c478bd9Sstevel@tonic-gate #include <sys/socket.h>
477c478bd9Sstevel@tonic-gate #include <sys/uio.h>
487c478bd9Sstevel@tonic-gate #include <sys/tiuser.h>
497c478bd9Sstevel@tonic-gate #include <sys/swap.h>
507c478bd9Sstevel@tonic-gate #include <sys/errno.h>
517c478bd9Sstevel@tonic-gate #include <sys/debug.h>
527c478bd9Sstevel@tonic-gate #include <sys/kmem.h>
537c478bd9Sstevel@tonic-gate #include <sys/kstat.h>
547c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h>
557c478bd9Sstevel@tonic-gate #include <sys/vtrace.h>
567c478bd9Sstevel@tonic-gate #include <sys/session.h>
577c478bd9Sstevel@tonic-gate #include <sys/dnlc.h>
587c478bd9Sstevel@tonic-gate #include <sys/bitmap.h>
597c478bd9Sstevel@tonic-gate #include <sys/acl.h>
607c478bd9Sstevel@tonic-gate #include <sys/ddi.h>
617c478bd9Sstevel@tonic-gate #include <sys/pathname.h>
627c478bd9Sstevel@tonic-gate #include <sys/flock.h>
637c478bd9Sstevel@tonic-gate #include <sys/dirent.h>
647c478bd9Sstevel@tonic-gate #include <sys/flock.h>
657c478bd9Sstevel@tonic-gate #include <sys/callb.h>
666962f5b8SThomas Haynes #include <sys/sdt.h>
677c478bd9Sstevel@tonic-gate 
68f8bbc571SPavel Filipensky #include <vm/pvn.h>
69f8bbc571SPavel Filipensky 
707c478bd9Sstevel@tonic-gate #include <rpc/types.h>
717c478bd9Sstevel@tonic-gate #include <rpc/xdr.h>
727c478bd9Sstevel@tonic-gate #include <rpc/auth.h>
737c478bd9Sstevel@tonic-gate #include <rpc/rpcsec_gss.h>
747c478bd9Sstevel@tonic-gate #include <rpc/clnt.h>
757c478bd9Sstevel@tonic-gate 
767c478bd9Sstevel@tonic-gate #include <nfs/nfs.h>
777c478bd9Sstevel@tonic-gate #include <nfs/nfs_clnt.h>
787c478bd9Sstevel@tonic-gate #include <nfs/nfs_acl.h>
797c478bd9Sstevel@tonic-gate 
807c478bd9Sstevel@tonic-gate #include <nfs/nfs4.h>
817c478bd9Sstevel@tonic-gate #include <nfs/rnode4.h>
827c478bd9Sstevel@tonic-gate #include <nfs/nfs4_clnt.h>
837c478bd9Sstevel@tonic-gate 
847c478bd9Sstevel@tonic-gate /*
857c478bd9Sstevel@tonic-gate  * The hash queues for the access to active and cached rnodes
867c478bd9Sstevel@tonic-gate  * are organized as doubly linked lists.  A reader/writer lock
877c478bd9Sstevel@tonic-gate  * for each hash bucket is used to control access and to synchronize
887c478bd9Sstevel@tonic-gate  * lookups, additions, and deletions from the hash queue.
897c478bd9Sstevel@tonic-gate  *
907c478bd9Sstevel@tonic-gate  * The rnode freelist is organized as a doubly linked list with
917c478bd9Sstevel@tonic-gate  * a head pointer.  Additions and deletions are synchronized via
927c478bd9Sstevel@tonic-gate  * a single mutex.
937c478bd9Sstevel@tonic-gate  *
947c478bd9Sstevel@tonic-gate  * In order to add an rnode to the free list, it must be hashed into
957c478bd9Sstevel@tonic-gate  * a hash queue and the exclusive lock to the hash queue be held.
967c478bd9Sstevel@tonic-gate  * If an rnode is not hashed into a hash queue, then it is destroyed
977c478bd9Sstevel@tonic-gate  * because it represents no valuable information that can be reused
987c478bd9Sstevel@tonic-gate  * about the file.  The exclusive lock to the hash queue must be
997c478bd9Sstevel@tonic-gate  * held in order to prevent a lookup in the hash queue from finding
1007c478bd9Sstevel@tonic-gate  * the rnode and using it and assuming that the rnode is not on the
1017c478bd9Sstevel@tonic-gate  * freelist.  The lookup in the hash queue will have the hash queue
1027c478bd9Sstevel@tonic-gate  * locked, either exclusive or shared.
1037c478bd9Sstevel@tonic-gate  *
1047c478bd9Sstevel@tonic-gate  * The vnode reference count for each rnode is not allowed to drop
1057c478bd9Sstevel@tonic-gate  * below 1.  This prevents external entities, such as the VM
1067c478bd9Sstevel@tonic-gate  * subsystem, from acquiring references to vnodes already on the
1077c478bd9Sstevel@tonic-gate  * freelist and then trying to place them back on the freelist
1087c478bd9Sstevel@tonic-gate  * when their reference is released.  This means that the when an
1097c478bd9Sstevel@tonic-gate  * rnode is looked up in the hash queues, then either the rnode
110da6c28aaSamw  * is removed from the freelist and that reference is transferred to
1117c478bd9Sstevel@tonic-gate  * the new reference or the vnode reference count must be incremented
1127c478bd9Sstevel@tonic-gate  * accordingly.  The mutex for the freelist must be held in order to
1137c478bd9Sstevel@tonic-gate  * accurately test to see if the rnode is on the freelist or not.
1147c478bd9Sstevel@tonic-gate  * The hash queue lock might be held shared and it is possible that
1157c478bd9Sstevel@tonic-gate  * two different threads may race to remove the rnode from the
1167c478bd9Sstevel@tonic-gate  * freelist.  This race can be resolved by holding the mutex for the
1177c478bd9Sstevel@tonic-gate  * freelist.  Please note that the mutex for the freelist does not
1187c478bd9Sstevel@tonic-gate  * need to be held if the rnode is not on the freelist.  It can not be
1197c478bd9Sstevel@tonic-gate  * placed on the freelist due to the requirement that the thread
1207c478bd9Sstevel@tonic-gate  * putting the rnode on the freelist must hold the exclusive lock
1217c478bd9Sstevel@tonic-gate  * to the hash queue and the thread doing the lookup in the hash
1227c478bd9Sstevel@tonic-gate  * queue is holding either a shared or exclusive lock to the hash
1237c478bd9Sstevel@tonic-gate  * queue.
1247c478bd9Sstevel@tonic-gate  *
1257c478bd9Sstevel@tonic-gate  * The lock ordering is:
1267c478bd9Sstevel@tonic-gate  *
1277c478bd9Sstevel@tonic-gate  *	hash bucket lock -> vnode lock
1282d1fef97Ssamf  *	hash bucket lock -> freelist lock -> r_statelock
1297c478bd9Sstevel@tonic-gate  */
1307c478bd9Sstevel@tonic-gate r4hashq_t *rtable4;
1317c478bd9Sstevel@tonic-gate 
1327c478bd9Sstevel@tonic-gate static kmutex_t rp4freelist_lock;
1337c478bd9Sstevel@tonic-gate static rnode4_t *rp4freelist = NULL;
1347c478bd9Sstevel@tonic-gate static long rnode4_new = 0;
1357c478bd9Sstevel@tonic-gate int rtable4size;
1367c478bd9Sstevel@tonic-gate static int rtable4mask;
1377c478bd9Sstevel@tonic-gate static struct kmem_cache *rnode4_cache;
1387c478bd9Sstevel@tonic-gate static int rnode4_hashlen = 4;
1397c478bd9Sstevel@tonic-gate 
1407c478bd9Sstevel@tonic-gate static void	r4inactive(rnode4_t *, cred_t *);
1417c478bd9Sstevel@tonic-gate static vnode_t	*make_rnode4(nfs4_sharedfh_t *, r4hashq_t *, struct vfs *,
1427c478bd9Sstevel@tonic-gate 		    struct vnodeops *,
1437c478bd9Sstevel@tonic-gate 		    int (*)(vnode_t *, page_t *, u_offset_t *, size_t *, int,
1447c478bd9Sstevel@tonic-gate 		    cred_t *),
1457c478bd9Sstevel@tonic-gate 		    int *, cred_t *);
1467c478bd9Sstevel@tonic-gate static void	rp4_rmfree(rnode4_t *);
1477c478bd9Sstevel@tonic-gate int		nfs4_free_data_reclaim(rnode4_t *);
1487c478bd9Sstevel@tonic-gate static int	nfs4_active_data_reclaim(rnode4_t *);
1497c478bd9Sstevel@tonic-gate static int	nfs4_free_reclaim(void);
1507c478bd9Sstevel@tonic-gate static int	nfs4_active_reclaim(void);
1517c478bd9Sstevel@tonic-gate static int	nfs4_rnode_reclaim(void);
1527c478bd9Sstevel@tonic-gate static void	nfs4_reclaim(void *);
1537c478bd9Sstevel@tonic-gate static int	isrootfh(nfs4_sharedfh_t *, rnode4_t *);
1547c478bd9Sstevel@tonic-gate static void	uninit_rnode4(rnode4_t *);
1557c478bd9Sstevel@tonic-gate static void	destroy_rnode4(rnode4_t *);
156b9238976Sth static void	r4_stub_set(rnode4_t *, nfs4_stub_type_t);
1577c478bd9Sstevel@tonic-gate 
1587c478bd9Sstevel@tonic-gate #ifdef DEBUG
1597c478bd9Sstevel@tonic-gate static int r4_check_for_dups = 0; /* Flag to enable dup rnode detection. */
1607c478bd9Sstevel@tonic-gate static int nfs4_rnode_debug = 0;
1617c478bd9Sstevel@tonic-gate /* if nonzero, kmem_cache_free() rnodes rather than place on freelist */
1627c478bd9Sstevel@tonic-gate static int nfs4_rnode_nofreelist = 0;
1637c478bd9Sstevel@tonic-gate /* give messages on colliding shared filehandles */
1647c478bd9Sstevel@tonic-gate static void	r4_dup_check(rnode4_t *, vfs_t *);
1657c478bd9Sstevel@tonic-gate #endif
1667c478bd9Sstevel@tonic-gate 
1677c478bd9Sstevel@tonic-gate /*
1689f9e2373Sjwahlig  * If the vnode has pages, run the list and check for any that are
1699f9e2373Sjwahlig  * still dangling.  We call this routine before putting an rnode on
1709f9e2373Sjwahlig  * the free list.
1719f9e2373Sjwahlig  */
1729f9e2373Sjwahlig static int
nfs4_dross_pages(vnode_t * vp)1739f9e2373Sjwahlig nfs4_dross_pages(vnode_t *vp)
1749f9e2373Sjwahlig {
1759f9e2373Sjwahlig 	page_t *pp;
1769f9e2373Sjwahlig 	kmutex_t *vphm;
1779f9e2373Sjwahlig 
1789f9e2373Sjwahlig 	vphm = page_vnode_mutex(vp);
1799f9e2373Sjwahlig 	mutex_enter(vphm);
1809f9e2373Sjwahlig 	if ((pp = vp->v_pages) != NULL) {
1819f9e2373Sjwahlig 		do {
182f8bbc571SPavel Filipensky 			if (pp->p_hash != PVN_VPLIST_HASH_TAG &&
183f8bbc571SPavel Filipensky 			    pp->p_fsdata != C_NOCOMMIT) {
1849f9e2373Sjwahlig 				mutex_exit(vphm);
1859f9e2373Sjwahlig 				return (1);
1869f9e2373Sjwahlig 			}
1879f9e2373Sjwahlig 		} while ((pp = pp->p_vpnext) != vp->v_pages);
1889f9e2373Sjwahlig 	}
1899f9e2373Sjwahlig 	mutex_exit(vphm);
1909f9e2373Sjwahlig 
1919f9e2373Sjwahlig 	return (0);
1929f9e2373Sjwahlig }
1939f9e2373Sjwahlig 
1949f9e2373Sjwahlig /*
1959f9e2373Sjwahlig  * Flush any pages left on this rnode.
1967c478bd9Sstevel@tonic-gate  */
1977c478bd9Sstevel@tonic-gate static void
r4flushpages(rnode4_t * rp,cred_t * cr)1989f9e2373Sjwahlig r4flushpages(rnode4_t *rp, cred_t *cr)
1997c478bd9Sstevel@tonic-gate {
2007c478bd9Sstevel@tonic-gate 	vnode_t *vp;
2017c478bd9Sstevel@tonic-gate 	int error;
2027c478bd9Sstevel@tonic-gate 
2037c478bd9Sstevel@tonic-gate 	/*
2047c478bd9Sstevel@tonic-gate 	 * Before freeing anything, wait until all asynchronous
2057c478bd9Sstevel@tonic-gate 	 * activity is done on this rnode.  This will allow all
2067c478bd9Sstevel@tonic-gate 	 * asynchronous read ahead and write behind i/o's to
2077c478bd9Sstevel@tonic-gate 	 * finish.
2087c478bd9Sstevel@tonic-gate 	 */
2097c478bd9Sstevel@tonic-gate 	mutex_enter(&rp->r_statelock);
2107c478bd9Sstevel@tonic-gate 	while (rp->r_count > 0)
2117c478bd9Sstevel@tonic-gate 		cv_wait(&rp->r_cv, &rp->r_statelock);
2127c478bd9Sstevel@tonic-gate 	mutex_exit(&rp->r_statelock);
2137c478bd9Sstevel@tonic-gate 
2147c478bd9Sstevel@tonic-gate 	/*
2157c478bd9Sstevel@tonic-gate 	 * Flush and invalidate all pages associated with the vnode.
2167c478bd9Sstevel@tonic-gate 	 */
2177c478bd9Sstevel@tonic-gate 	vp = RTOV4(rp);
2187c478bd9Sstevel@tonic-gate 	if (nfs4_has_pages(vp)) {
2197c478bd9Sstevel@tonic-gate 		ASSERT(vp->v_type != VCHR);
2207c478bd9Sstevel@tonic-gate 		if ((rp->r_flags & R4DIRTY) && !rp->r_error) {
221da6c28aaSamw 			error = VOP_PUTPAGE(vp, (u_offset_t)0, 0, 0, cr, NULL);
2227c478bd9Sstevel@tonic-gate 			if (error && (error == ENOSPC || error == EDQUOT)) {
2237c478bd9Sstevel@tonic-gate 				mutex_enter(&rp->r_statelock);
2247c478bd9Sstevel@tonic-gate 				if (!rp->r_error)
2257c478bd9Sstevel@tonic-gate 					rp->r_error = error;
2267c478bd9Sstevel@tonic-gate 				mutex_exit(&rp->r_statelock);
2277c478bd9Sstevel@tonic-gate 			}
2287c478bd9Sstevel@tonic-gate 		}
2297c478bd9Sstevel@tonic-gate 		nfs4_invalidate_pages(vp, (u_offset_t)0, cr);
2307c478bd9Sstevel@tonic-gate 	}
2319f9e2373Sjwahlig }
2329f9e2373Sjwahlig 
2339f9e2373Sjwahlig /*
2349f9e2373Sjwahlig  * Free the resources associated with an rnode.
2359f9e2373Sjwahlig  */
2369f9e2373Sjwahlig static void
r4inactive(rnode4_t * rp,cred_t * cr)2379f9e2373Sjwahlig r4inactive(rnode4_t *rp, cred_t *cr)
2389f9e2373Sjwahlig {
2399f9e2373Sjwahlig 	vnode_t *vp;
2409f9e2373Sjwahlig 	char *contents;
2419f9e2373Sjwahlig 	int size;
2429f9e2373Sjwahlig 	vsecattr_t *vsp;
2439f9e2373Sjwahlig 	vnode_t *xattr;
2449f9e2373Sjwahlig 
2459f9e2373Sjwahlig 	r4flushpages(rp, cr);
2469f9e2373Sjwahlig 
2479f9e2373Sjwahlig 	vp = RTOV4(rp);
2487c478bd9Sstevel@tonic-gate 
2497c478bd9Sstevel@tonic-gate 	/*
2507c478bd9Sstevel@tonic-gate 	 * Free any held caches which may be
2517c478bd9Sstevel@tonic-gate 	 * associated with this rnode.
2527c478bd9Sstevel@tonic-gate 	 */
2537c478bd9Sstevel@tonic-gate 	mutex_enter(&rp->r_statelock);
2547c478bd9Sstevel@tonic-gate 	contents = rp->r_symlink.contents;
2557c478bd9Sstevel@tonic-gate 	size = rp->r_symlink.size;
2567c478bd9Sstevel@tonic-gate 	rp->r_symlink.contents = NULL;
2577c478bd9Sstevel@tonic-gate 	vsp = rp->r_secattr;
2587c478bd9Sstevel@tonic-gate 	rp->r_secattr = NULL;
2597c478bd9Sstevel@tonic-gate 	xattr = rp->r_xattr_dir;
2607c478bd9Sstevel@tonic-gate 	rp->r_xattr_dir = NULL;
2617c478bd9Sstevel@tonic-gate 	mutex_exit(&rp->r_statelock);
2627c478bd9Sstevel@tonic-gate 
2637c478bd9Sstevel@tonic-gate 	/*
2647c478bd9Sstevel@tonic-gate 	 * Free the access cache entries.
2657c478bd9Sstevel@tonic-gate 	 */
2667c478bd9Sstevel@tonic-gate 	(void) nfs4_access_purge_rp(rp);
2677c478bd9Sstevel@tonic-gate 
2687c478bd9Sstevel@tonic-gate 	/*
2697c478bd9Sstevel@tonic-gate 	 * Free the readdir cache entries.
2707c478bd9Sstevel@tonic-gate 	 */
2717c478bd9Sstevel@tonic-gate 	nfs4_purge_rddir_cache(vp);
2727c478bd9Sstevel@tonic-gate 
2737c478bd9Sstevel@tonic-gate 	/*
2747c478bd9Sstevel@tonic-gate 	 * Free the symbolic link cache.
2757c478bd9Sstevel@tonic-gate 	 */
2767c478bd9Sstevel@tonic-gate 	if (contents != NULL) {
2777c478bd9Sstevel@tonic-gate 
2787c478bd9Sstevel@tonic-gate 		kmem_free((void *)contents, size);
2797c478bd9Sstevel@tonic-gate 	}
2807c478bd9Sstevel@tonic-gate 
2817c478bd9Sstevel@tonic-gate 	/*
2827c478bd9Sstevel@tonic-gate 	 * Free any cached ACL.
2837c478bd9Sstevel@tonic-gate 	 */
2847c478bd9Sstevel@tonic-gate 	if (vsp != NULL)
2857c478bd9Sstevel@tonic-gate 		nfs4_acl_free_cache(vsp);
2867c478bd9Sstevel@tonic-gate 
2877c478bd9Sstevel@tonic-gate 	/*
2887c478bd9Sstevel@tonic-gate 	 * Release the cached xattr_dir
2897c478bd9Sstevel@tonic-gate 	 */
2907c478bd9Sstevel@tonic-gate 	if (xattr != NULL)
2917c478bd9Sstevel@tonic-gate 		VN_RELE(xattr);
2927c478bd9Sstevel@tonic-gate }
2937c478bd9Sstevel@tonic-gate 
2947c478bd9Sstevel@tonic-gate /*
2957c478bd9Sstevel@tonic-gate  * We have seen a case that the fh passed in is for "." which
2967c478bd9Sstevel@tonic-gate  * should be a VROOT node, however, the fh is different from the
2977c478bd9Sstevel@tonic-gate  * root fh stored in the mntinfo4_t. The invalid fh might be
2987c478bd9Sstevel@tonic-gate  * from a misbehaved server and will panic the client system at
2997c478bd9Sstevel@tonic-gate  * a later time. To avoid the panic, we drop the bad fh, use
3007c478bd9Sstevel@tonic-gate  * the root fh from mntinfo4_t, and print an error message
3017c478bd9Sstevel@tonic-gate  * for attention.
3027c478bd9Sstevel@tonic-gate  */
3037c478bd9Sstevel@tonic-gate nfs4_sharedfh_t *
badrootfh_check(nfs4_sharedfh_t * fh,nfs4_fname_t * nm,mntinfo4_t * mi,int * wasbad)3047c478bd9Sstevel@tonic-gate badrootfh_check(nfs4_sharedfh_t *fh, nfs4_fname_t *nm, mntinfo4_t *mi,
3057c478bd9Sstevel@tonic-gate     int *wasbad)
3067c478bd9Sstevel@tonic-gate {
3077c478bd9Sstevel@tonic-gate 	char *s;
3087c478bd9Sstevel@tonic-gate 
3097c478bd9Sstevel@tonic-gate 	*wasbad = 0;
3107c478bd9Sstevel@tonic-gate 	s = fn_name(nm);
3117c478bd9Sstevel@tonic-gate 	ASSERT(strcmp(s, "..") != 0);
3127c478bd9Sstevel@tonic-gate 
3137c478bd9Sstevel@tonic-gate 	if ((s[0] == '.' && s[1] == '\0') && fh &&
314b9238976Sth 	    !SFH4_SAME(mi->mi_rootfh, fh)) {
3157c478bd9Sstevel@tonic-gate #ifdef DEBUG
3167c478bd9Sstevel@tonic-gate 		nfs4_fhandle_t fhandle;
3177c478bd9Sstevel@tonic-gate 
3187c478bd9Sstevel@tonic-gate 		zcmn_err(mi->mi_zone->zone_id, CE_WARN,
3197c478bd9Sstevel@tonic-gate 		    "Server %s returns a different "
3207c478bd9Sstevel@tonic-gate 		    "root filehandle for the path %s:",
3217c478bd9Sstevel@tonic-gate 		    mi->mi_curr_serv->sv_hostname,
3227c478bd9Sstevel@tonic-gate 		    mi->mi_curr_serv->sv_path);
3237c478bd9Sstevel@tonic-gate 
3247c478bd9Sstevel@tonic-gate 		/* print the bad fh */
3257c478bd9Sstevel@tonic-gate 		fhandle.fh_len = fh->sfh_fh.nfs_fh4_len;
3267c478bd9Sstevel@tonic-gate 		bcopy(fh->sfh_fh.nfs_fh4_val, fhandle.fh_buf,
327b9238976Sth 		    fhandle.fh_len);
3287c478bd9Sstevel@tonic-gate 		nfs4_printfhandle(&fhandle);
3297c478bd9Sstevel@tonic-gate 
3307c478bd9Sstevel@tonic-gate 		/* print mi_rootfh */
3317c478bd9Sstevel@tonic-gate 		fhandle.fh_len = mi->mi_rootfh->sfh_fh.nfs_fh4_len;
3327c478bd9Sstevel@tonic-gate 		bcopy(mi->mi_rootfh->sfh_fh.nfs_fh4_val, fhandle.fh_buf,
333b9238976Sth 		    fhandle.fh_len);
3347c478bd9Sstevel@tonic-gate 		nfs4_printfhandle(&fhandle);
3357c478bd9Sstevel@tonic-gate #endif
3367c478bd9Sstevel@tonic-gate 		/* use mi_rootfh instead; fh will be rele by the caller */
3377c478bd9Sstevel@tonic-gate 		fh = mi->mi_rootfh;
3387c478bd9Sstevel@tonic-gate 		*wasbad = 1;
3397c478bd9Sstevel@tonic-gate 	}
3407c478bd9Sstevel@tonic-gate 
3417c478bd9Sstevel@tonic-gate 	kmem_free(s, MAXNAMELEN);
3427c478bd9Sstevel@tonic-gate 	return (fh);
3437c478bd9Sstevel@tonic-gate }
3447c478bd9Sstevel@tonic-gate 
3457c478bd9Sstevel@tonic-gate void
r4_do_attrcache(vnode_t * vp,nfs4_ga_res_t * garp,int newnode,hrtime_t t,cred_t * cr,int index)3467c478bd9Sstevel@tonic-gate r4_do_attrcache(vnode_t *vp, nfs4_ga_res_t *garp, int newnode,
3477c478bd9Sstevel@tonic-gate     hrtime_t t, cred_t *cr, int index)
3487c478bd9Sstevel@tonic-gate {
349b9238976Sth 	int is_stub;
3507c478bd9Sstevel@tonic-gate 	vattr_t *attr;
3517c478bd9Sstevel@tonic-gate 	/*
3527c478bd9Sstevel@tonic-gate 	 * Don't add to attrcache if time overflow, but
3537c478bd9Sstevel@tonic-gate 	 * no need to check because either attr is null or the time
3547c478bd9Sstevel@tonic-gate 	 * values in it were processed by nfs4_time_ntov(), which checks
3557c478bd9Sstevel@tonic-gate 	 * for time overflows.
3567c478bd9Sstevel@tonic-gate 	 */
3577c478bd9Sstevel@tonic-gate 	attr = garp ? &garp->n4g_va : NULL;
3587c478bd9Sstevel@tonic-gate 
3597c478bd9Sstevel@tonic-gate 	if (attr) {
3607c478bd9Sstevel@tonic-gate 		if (!newnode) {
3617c478bd9Sstevel@tonic-gate 			rw_exit(&rtable4[index].r_lock);
3627c478bd9Sstevel@tonic-gate #ifdef DEBUG
3637c478bd9Sstevel@tonic-gate 			if (vp->v_type != attr->va_type &&
3647c478bd9Sstevel@tonic-gate 			    vp->v_type != VNON && attr->va_type != VNON) {
3657c478bd9Sstevel@tonic-gate 				zcmn_err(VTOMI4(vp)->mi_zone->zone_id, CE_WARN,
366b9238976Sth 				    "makenfs4node: type (%d) doesn't "
367b9238976Sth 				    "match type of found node at %p (%d)",
368b9238976Sth 				    attr->va_type, (void *)vp, vp->v_type);
3697c478bd9Sstevel@tonic-gate 			}
3707c478bd9Sstevel@tonic-gate #endif
3717c478bd9Sstevel@tonic-gate 			nfs4_attr_cache(vp, garp, t, cr, TRUE, NULL);
3727c478bd9Sstevel@tonic-gate 		} else {
3737c478bd9Sstevel@tonic-gate 			rnode4_t *rp = VTOR4(vp);
3747c478bd9Sstevel@tonic-gate 
3757c478bd9Sstevel@tonic-gate 			vp->v_type = attr->va_type;
3767c478bd9Sstevel@tonic-gate 			vp->v_rdev = attr->va_rdev;
3777c478bd9Sstevel@tonic-gate 
3787c478bd9Sstevel@tonic-gate 			/*
3797c478bd9Sstevel@tonic-gate 			 * Turn this object into a "stub" object if we
380b9238976Sth 			 * crossed an underlying server fs boundary.
381b9238976Sth 			 * To make this check, during mount we save the
3827c478bd9Sstevel@tonic-gate 			 * fsid of the server object being mounted.
3837c478bd9Sstevel@tonic-gate 			 * Here we compare this object's server fsid
3847c478bd9Sstevel@tonic-gate 			 * with the fsid we saved at mount.  If they
3857c478bd9Sstevel@tonic-gate 			 * are different, we crossed server fs boundary.
3867c478bd9Sstevel@tonic-gate 			 *
387b9238976Sth 			 * The stub type is set (or not) at rnode
3887c478bd9Sstevel@tonic-gate 			 * creation time and it never changes for life
389b9238976Sth 			 * of the rnode.
3907c478bd9Sstevel@tonic-gate 			 *
3912f172c55SRobert Thurlow 			 * This stub will be for a mirror-mount, rather than
3922f172c55SRobert Thurlow 			 * a referral (the latter also sets R4SRVSTUB).
3932f172c55SRobert Thurlow 			 *
394b9238976Sth 			 * The stub type is also set during RO failover,
395b9238976Sth 			 * nfs4_remap_file().
396b9238976Sth 			 *
397b9238976Sth 			 * We don't bother with taking r_state_lock to
398b9238976Sth 			 * set the stub type because this is a new rnode
399b9238976Sth 			 * and we're holding the hash bucket r_lock RW_WRITER.
400b9238976Sth 			 * No other thread could have obtained access
401b9238976Sth 			 * to this rnode.
4027c478bd9Sstevel@tonic-gate 			 */
403b9238976Sth 			is_stub = 0;
4047c478bd9Sstevel@tonic-gate 			if (garp->n4g_fsid_valid) {
405b9238976Sth 				fattr4_fsid ga_fsid = garp->n4g_fsid;
406b9238976Sth 				servinfo4_t *svp = rp->r_server;
4077c478bd9Sstevel@tonic-gate 
408b9238976Sth 				rp->r_srv_fsid = ga_fsid;
4097c478bd9Sstevel@tonic-gate 
410b9238976Sth 				(void) nfs_rw_enter_sig(&svp->sv_lock,
411b9238976Sth 				    RW_READER, 0);
412b9238976Sth 				if (!FATTR4_FSID_EQ(&ga_fsid, &svp->sv_fsid))
413b9238976Sth 					is_stub = 1;
414b9238976Sth 				nfs_rw_exit(&svp->sv_lock);
4157c478bd9Sstevel@tonic-gate 			}
4167c478bd9Sstevel@tonic-gate 
417b9238976Sth 			if (is_stub)
418b9238976Sth 				r4_stub_mirrormount(rp);
419b9238976Sth 			else
420b9238976Sth 				r4_stub_none(rp);
421b9238976Sth 
4227c478bd9Sstevel@tonic-gate 			/* Can not cache partial attr */
4237c478bd9Sstevel@tonic-gate 			if (attr->va_mask == AT_ALL)
4247c478bd9Sstevel@tonic-gate 				nfs4_attrcache_noinval(vp, garp, t);
4257c478bd9Sstevel@tonic-gate 			else
4267c478bd9Sstevel@tonic-gate 				PURGE_ATTRCACHE4(vp);
4277c478bd9Sstevel@tonic-gate 
4287c478bd9Sstevel@tonic-gate 			rw_exit(&rtable4[index].r_lock);
4297c478bd9Sstevel@tonic-gate 		}
4307c478bd9Sstevel@tonic-gate 	} else {
4317c478bd9Sstevel@tonic-gate 		if (newnode) {
4327c478bd9Sstevel@tonic-gate 			PURGE_ATTRCACHE4(vp);
4337c478bd9Sstevel@tonic-gate 		}
4347c478bd9Sstevel@tonic-gate 		rw_exit(&rtable4[index].r_lock);
4357c478bd9Sstevel@tonic-gate 	}
4367c478bd9Sstevel@tonic-gate }
4377c478bd9Sstevel@tonic-gate 
4387c478bd9Sstevel@tonic-gate /*
4397c478bd9Sstevel@tonic-gate  * Find or create an rnode based primarily on filehandle.  To be
4407c478bd9Sstevel@tonic-gate  * used when dvp (vnode for parent directory) is not available;
4417c478bd9Sstevel@tonic-gate  * otherwise, makenfs4node() should be used.
4427c478bd9Sstevel@tonic-gate  *
4437c478bd9Sstevel@tonic-gate  * The nfs4_fname_t argument *npp is consumed and nulled out.
4447c478bd9Sstevel@tonic-gate  */
4457c478bd9Sstevel@tonic-gate 
4467c478bd9Sstevel@tonic-gate vnode_t *
makenfs4node_by_fh(nfs4_sharedfh_t * sfh,nfs4_sharedfh_t * psfh,nfs4_fname_t ** npp,nfs4_ga_res_t * garp,mntinfo4_t * mi,cred_t * cr,hrtime_t t)4477c478bd9Sstevel@tonic-gate makenfs4node_by_fh(nfs4_sharedfh_t *sfh, nfs4_sharedfh_t *psfh,
448b9238976Sth     nfs4_fname_t **npp, nfs4_ga_res_t *garp,
449b9238976Sth     mntinfo4_t *mi, cred_t *cr, hrtime_t t)
4507c478bd9Sstevel@tonic-gate {
4517c478bd9Sstevel@tonic-gate 	vfs_t *vfsp = mi->mi_vfsp;
4527c478bd9Sstevel@tonic-gate 	int newnode = 0;
4537c478bd9Sstevel@tonic-gate 	vnode_t *vp;
4547c478bd9Sstevel@tonic-gate 	rnode4_t *rp;
4557c478bd9Sstevel@tonic-gate 	svnode_t *svp;
456bbf2a467SNagakiran Rajashekar 	nfs4_fname_t *name, *svpname;
4577c478bd9Sstevel@tonic-gate 	int index;
4587c478bd9Sstevel@tonic-gate 
4597c478bd9Sstevel@tonic-gate 	ASSERT(npp && *npp);
4607c478bd9Sstevel@tonic-gate 	name = *npp;
4617c478bd9Sstevel@tonic-gate 	*npp = NULL;
4627c478bd9Sstevel@tonic-gate 
4637c478bd9Sstevel@tonic-gate 	index = rtable4hash(sfh);
4647c478bd9Sstevel@tonic-gate 	rw_enter(&rtable4[index].r_lock, RW_READER);
4657c478bd9Sstevel@tonic-gate 
4667c478bd9Sstevel@tonic-gate 	vp = make_rnode4(sfh, &rtable4[index], vfsp,
4677c478bd9Sstevel@tonic-gate 	    nfs4_vnodeops, nfs4_putapage, &newnode, cr);
468bbf2a467SNagakiran Rajashekar 
469bbf2a467SNagakiran Rajashekar 	svp = VTOSV(vp);
470bbf2a467SNagakiran Rajashekar 	rp = VTOR4(vp);
4717c478bd9Sstevel@tonic-gate 	if (newnode) {
4727c478bd9Sstevel@tonic-gate 		svp->sv_forw = svp->sv_back = svp;
4737c478bd9Sstevel@tonic-gate 		svp->sv_name = name;
4747c478bd9Sstevel@tonic-gate 		if (psfh != NULL)
4757c478bd9Sstevel@tonic-gate 			sfh4_hold(psfh);
4767c478bd9Sstevel@tonic-gate 		svp->sv_dfh = psfh;
4774151f947SPavel Filipensky 	} else {
478bbf2a467SNagakiran Rajashekar 		/*
479bbf2a467SNagakiran Rajashekar 		 * It is possible that due to a server
480bbf2a467SNagakiran Rajashekar 		 * side rename fnames have changed.
481bbf2a467SNagakiran Rajashekar 		 * update the fname here.
482bbf2a467SNagakiran Rajashekar 		 */
483bbf2a467SNagakiran Rajashekar 		mutex_enter(&rp->r_svlock);
484bbf2a467SNagakiran Rajashekar 		svpname = svp->sv_name;
485bbf2a467SNagakiran Rajashekar 		if (svp->sv_name != name) {
486bbf2a467SNagakiran Rajashekar 			svp->sv_name = name;
487bbf2a467SNagakiran Rajashekar 			mutex_exit(&rp->r_svlock);
488bbf2a467SNagakiran Rajashekar 			fn_rele(&svpname);
489bbf2a467SNagakiran Rajashekar 		} else {
490bbf2a467SNagakiran Rajashekar 			mutex_exit(&rp->r_svlock);
491bbf2a467SNagakiran Rajashekar 			fn_rele(&name);
492bbf2a467SNagakiran Rajashekar 		}
4937c478bd9Sstevel@tonic-gate 	}
4947c478bd9Sstevel@tonic-gate 
4957c478bd9Sstevel@tonic-gate 	ASSERT(RW_LOCK_HELD(&rtable4[index].r_lock));
4967c478bd9Sstevel@tonic-gate 	r4_do_attrcache(vp, garp, newnode, t, cr, index);
4977c478bd9Sstevel@tonic-gate 	ASSERT(rw_owner(&rtable4[index].r_lock) != curthread);
4987c478bd9Sstevel@tonic-gate 
4997c478bd9Sstevel@tonic-gate 	return (vp);
5007c478bd9Sstevel@tonic-gate }
5017c478bd9Sstevel@tonic-gate 
5027c478bd9Sstevel@tonic-gate /*
5037c478bd9Sstevel@tonic-gate  * Find or create a vnode for the given filehandle, filesystem, parent, and
5047c478bd9Sstevel@tonic-gate  * name.  The reference to nm is consumed, so the caller must first do an
5057c478bd9Sstevel@tonic-gate  * fn_hold() if it wants to continue using nm after this call.
5067c478bd9Sstevel@tonic-gate  */
5077c478bd9Sstevel@tonic-gate vnode_t *
makenfs4node(nfs4_sharedfh_t * fh,nfs4_ga_res_t * garp,struct vfs * vfsp,hrtime_t t,cred_t * cr,vnode_t * dvp,nfs4_fname_t * nm)5087c478bd9Sstevel@tonic-gate makenfs4node(nfs4_sharedfh_t *fh, nfs4_ga_res_t *garp, struct vfs *vfsp,
509b9238976Sth     hrtime_t t, cred_t *cr, vnode_t *dvp, nfs4_fname_t *nm)
5107c478bd9Sstevel@tonic-gate {
5117c478bd9Sstevel@tonic-gate 	vnode_t *vp;
5127c478bd9Sstevel@tonic-gate 	int newnode;
5137c478bd9Sstevel@tonic-gate 	int index;
5147c478bd9Sstevel@tonic-gate 	mntinfo4_t *mi = VFTOMI4(vfsp);
5157c478bd9Sstevel@tonic-gate 	int had_badfh = 0;
5167c478bd9Sstevel@tonic-gate 	rnode4_t *rp;
5177c478bd9Sstevel@tonic-gate 
5187c478bd9Sstevel@tonic-gate 	ASSERT(dvp != NULL);
5197c478bd9Sstevel@tonic-gate 
5207c478bd9Sstevel@tonic-gate 	fh = badrootfh_check(fh, nm, mi, &had_badfh);
5217c478bd9Sstevel@tonic-gate 
5227c478bd9Sstevel@tonic-gate 	index = rtable4hash(fh);
5237c478bd9Sstevel@tonic-gate 	rw_enter(&rtable4[index].r_lock, RW_READER);
5247c478bd9Sstevel@tonic-gate 
5257c478bd9Sstevel@tonic-gate 	/*
5267c478bd9Sstevel@tonic-gate 	 * Note: make_rnode4() may upgrade the hash bucket lock to exclusive.
5277c478bd9Sstevel@tonic-gate 	 */
5287c478bd9Sstevel@tonic-gate 	vp = make_rnode4(fh, &rtable4[index], vfsp, nfs4_vnodeops,
5297c478bd9Sstevel@tonic-gate 	    nfs4_putapage, &newnode, cr);
5307c478bd9Sstevel@tonic-gate 
5317c478bd9Sstevel@tonic-gate 	rp = VTOR4(vp);
5327c478bd9Sstevel@tonic-gate 	sv_activate(&vp, dvp, &nm, newnode);
5337c478bd9Sstevel@tonic-gate 	if (dvp->v_flag & V_XATTRDIR) {
5347c478bd9Sstevel@tonic-gate 		mutex_enter(&rp->r_statelock);
5357c478bd9Sstevel@tonic-gate 		rp->r_flags |= R4ISXATTR;
5367c478bd9Sstevel@tonic-gate 		mutex_exit(&rp->r_statelock);
5377c478bd9Sstevel@tonic-gate 	}
5387c478bd9Sstevel@tonic-gate 
5397c478bd9Sstevel@tonic-gate 	/* if getting a bad file handle, do not cache the attributes. */
5407c478bd9Sstevel@tonic-gate 	if (had_badfh) {
5417c478bd9Sstevel@tonic-gate 		rw_exit(&rtable4[index].r_lock);
5427c478bd9Sstevel@tonic-gate 		return (vp);
5437c478bd9Sstevel@tonic-gate 	}
5447c478bd9Sstevel@tonic-gate 
5457c478bd9Sstevel@tonic-gate 	ASSERT(RW_LOCK_HELD(&rtable4[index].r_lock));
5467c478bd9Sstevel@tonic-gate 	r4_do_attrcache(vp, garp, newnode, t, cr, index);
5477c478bd9Sstevel@tonic-gate 	ASSERT(rw_owner(&rtable4[index].r_lock) != curthread);
5487c478bd9Sstevel@tonic-gate 
5497c478bd9Sstevel@tonic-gate 	return (vp);
5507c478bd9Sstevel@tonic-gate }
5517c478bd9Sstevel@tonic-gate 
5527c478bd9Sstevel@tonic-gate /*
5537c478bd9Sstevel@tonic-gate  * Hash on address of filehandle object.
5547c478bd9Sstevel@tonic-gate  * XXX totally untuned.
5557c478bd9Sstevel@tonic-gate  */
5567c478bd9Sstevel@tonic-gate 
5577c478bd9Sstevel@tonic-gate int
rtable4hash(nfs4_sharedfh_t * fh)5587c478bd9Sstevel@tonic-gate rtable4hash(nfs4_sharedfh_t *fh)
5597c478bd9Sstevel@tonic-gate {
5607c478bd9Sstevel@tonic-gate 	return (((uintptr_t)fh / sizeof (*fh)) & rtable4mask);
5617c478bd9Sstevel@tonic-gate }
5627c478bd9Sstevel@tonic-gate 
5637c478bd9Sstevel@tonic-gate /*
5647c478bd9Sstevel@tonic-gate  * Find or create the vnode for the given filehandle and filesystem.
5657c478bd9Sstevel@tonic-gate  * *newnode is set to zero if the vnode already existed; non-zero if it had
5667c478bd9Sstevel@tonic-gate  * to be created.
5677c478bd9Sstevel@tonic-gate  *
5687c478bd9Sstevel@tonic-gate  * Note: make_rnode4() may upgrade the hash bucket lock to exclusive.
5697c478bd9Sstevel@tonic-gate  */
5707c478bd9Sstevel@tonic-gate 
5717c478bd9Sstevel@tonic-gate static vnode_t *
make_rnode4(nfs4_sharedfh_t * fh,r4hashq_t * rhtp,struct vfs * vfsp,struct vnodeops * vops,int (* putapage)(vnode_t *,page_t *,u_offset_t *,size_t *,int,cred_t *),int * newnode,cred_t * cr)5727c478bd9Sstevel@tonic-gate make_rnode4(nfs4_sharedfh_t *fh, r4hashq_t *rhtp, struct vfs *vfsp,
5737c478bd9Sstevel@tonic-gate     struct vnodeops *vops,
5747c478bd9Sstevel@tonic-gate     int (*putapage)(vnode_t *, page_t *, u_offset_t *, size_t *, int, cred_t *),
5757c478bd9Sstevel@tonic-gate     int *newnode, cred_t *cr)
5767c478bd9Sstevel@tonic-gate {
5777c478bd9Sstevel@tonic-gate 	rnode4_t *rp;
5787c478bd9Sstevel@tonic-gate 	rnode4_t *trp;
5797c478bd9Sstevel@tonic-gate 	vnode_t *vp;
5807c478bd9Sstevel@tonic-gate 	mntinfo4_t *mi;
5817c478bd9Sstevel@tonic-gate 
5827c478bd9Sstevel@tonic-gate 	ASSERT(RW_READ_HELD(&rhtp->r_lock));
5837c478bd9Sstevel@tonic-gate 
5847c478bd9Sstevel@tonic-gate 	mi = VFTOMI4(vfsp);
5857c478bd9Sstevel@tonic-gate 
5867c478bd9Sstevel@tonic-gate start:
5877c478bd9Sstevel@tonic-gate 	if ((rp = r4find(rhtp, fh, vfsp)) != NULL) {
5887c478bd9Sstevel@tonic-gate 		vp = RTOV4(rp);
5897c478bd9Sstevel@tonic-gate 		*newnode = 0;
5907c478bd9Sstevel@tonic-gate 		return (vp);
5917c478bd9Sstevel@tonic-gate 	}
5927c478bd9Sstevel@tonic-gate 	rw_exit(&rhtp->r_lock);
5937c478bd9Sstevel@tonic-gate 
5947c478bd9Sstevel@tonic-gate 	mutex_enter(&rp4freelist_lock);
5957c478bd9Sstevel@tonic-gate 
5967c478bd9Sstevel@tonic-gate 	if (rp4freelist != NULL && rnode4_new >= nrnode) {
5977c478bd9Sstevel@tonic-gate 		rp = rp4freelist;
5987c478bd9Sstevel@tonic-gate 		rp4_rmfree(rp);
5997c478bd9Sstevel@tonic-gate 		mutex_exit(&rp4freelist_lock);
6007c478bd9Sstevel@tonic-gate 
6017c478bd9Sstevel@tonic-gate 		vp = RTOV4(rp);
6027c478bd9Sstevel@tonic-gate 
6037c478bd9Sstevel@tonic-gate 		if (rp->r_flags & R4HASHED) {
6047c478bd9Sstevel@tonic-gate 			rw_enter(&rp->r_hashq->r_lock, RW_WRITER);
6057c478bd9Sstevel@tonic-gate 			mutex_enter(&vp->v_lock);
6067c478bd9Sstevel@tonic-gate 			if (vp->v_count > 1) {
607ade42b55SSebastien Roy 				VN_RELE_LOCKED(vp);
6087c478bd9Sstevel@tonic-gate 				mutex_exit(&vp->v_lock);
6097c478bd9Sstevel@tonic-gate 				rw_exit(&rp->r_hashq->r_lock);
6107c478bd9Sstevel@tonic-gate 				rw_enter(&rhtp->r_lock, RW_READER);
6117c478bd9Sstevel@tonic-gate 				goto start;
6127c478bd9Sstevel@tonic-gate 			}
6137c478bd9Sstevel@tonic-gate 			mutex_exit(&vp->v_lock);
6147c478bd9Sstevel@tonic-gate 			rp4_rmhash_locked(rp);
6157c478bd9Sstevel@tonic-gate 			rw_exit(&rp->r_hashq->r_lock);
6167c478bd9Sstevel@tonic-gate 		}
6177c478bd9Sstevel@tonic-gate 
6187c478bd9Sstevel@tonic-gate 		r4inactive(rp, cr);
6197c478bd9Sstevel@tonic-gate 
6207c478bd9Sstevel@tonic-gate 		mutex_enter(&vp->v_lock);
6217c478bd9Sstevel@tonic-gate 		if (vp->v_count > 1) {
622ade42b55SSebastien Roy 			VN_RELE_LOCKED(vp);
6237c478bd9Sstevel@tonic-gate 			mutex_exit(&vp->v_lock);
6247c478bd9Sstevel@tonic-gate 			rw_enter(&rhtp->r_lock, RW_READER);
6257c478bd9Sstevel@tonic-gate 			goto start;
6267c478bd9Sstevel@tonic-gate 		}
6277c478bd9Sstevel@tonic-gate 		mutex_exit(&vp->v_lock);
6287c478bd9Sstevel@tonic-gate 		vn_invalid(vp);
6297c478bd9Sstevel@tonic-gate 
6307c478bd9Sstevel@tonic-gate 		/*
6317c478bd9Sstevel@tonic-gate 		 * destroy old locks before bzero'ing and
6327c478bd9Sstevel@tonic-gate 		 * recreating the locks below.
6337c478bd9Sstevel@tonic-gate 		 */
6347c478bd9Sstevel@tonic-gate 		uninit_rnode4(rp);
6357c478bd9Sstevel@tonic-gate 
6367c478bd9Sstevel@tonic-gate 		/*
6377c478bd9Sstevel@tonic-gate 		 * Make sure that if rnode is recycled then
6387c478bd9Sstevel@tonic-gate 		 * VFS count is decremented properly before
6397c478bd9Sstevel@tonic-gate 		 * reuse.
6407c478bd9Sstevel@tonic-gate 		 */
6417c478bd9Sstevel@tonic-gate 		VFS_RELE(vp->v_vfsp);
6427c478bd9Sstevel@tonic-gate 		vn_reinit(vp);
6437c478bd9Sstevel@tonic-gate 	} else {
6447c478bd9Sstevel@tonic-gate 		vnode_t *new_vp;
6457c478bd9Sstevel@tonic-gate 
6467c478bd9Sstevel@tonic-gate 		mutex_exit(&rp4freelist_lock);
6477c478bd9Sstevel@tonic-gate 
6487c478bd9Sstevel@tonic-gate 		rp = kmem_cache_alloc(rnode4_cache, KM_SLEEP);
6497c478bd9Sstevel@tonic-gate 		new_vp = vn_alloc(KM_SLEEP);
6507c478bd9Sstevel@tonic-gate 
6511a5e258fSJosef 'Jeff' Sipek 		atomic_inc_ulong((ulong_t *)&rnode4_new);
6527c478bd9Sstevel@tonic-gate #ifdef DEBUG
6537c478bd9Sstevel@tonic-gate 		clstat4_debug.nrnode.value.ui64++;
6547c478bd9Sstevel@tonic-gate #endif
6557c478bd9Sstevel@tonic-gate 		vp = new_vp;
6567c478bd9Sstevel@tonic-gate 	}
6577c478bd9Sstevel@tonic-gate 
6587c478bd9Sstevel@tonic-gate 	bzero(rp, sizeof (*rp));
6597c478bd9Sstevel@tonic-gate 	rp->r_vnode = vp;
6607c478bd9Sstevel@tonic-gate 	nfs_rw_init(&rp->r_rwlock, NULL, RW_DEFAULT, NULL);
6617c478bd9Sstevel@tonic-gate 	nfs_rw_init(&rp->r_lkserlock, NULL, RW_DEFAULT, NULL);
6627c478bd9Sstevel@tonic-gate 	mutex_init(&rp->r_svlock, NULL, MUTEX_DEFAULT, NULL);
6637c478bd9Sstevel@tonic-gate 	mutex_init(&rp->r_statelock, NULL, MUTEX_DEFAULT, NULL);
6647c478bd9Sstevel@tonic-gate 	mutex_init(&rp->r_statev4_lock, NULL, MUTEX_DEFAULT, NULL);
6657c478bd9Sstevel@tonic-gate 	mutex_init(&rp->r_os_lock, NULL, MUTEX_DEFAULT, NULL);
6667c478bd9Sstevel@tonic-gate 	rp->created_v4 = 0;
6677c478bd9Sstevel@tonic-gate 	list_create(&rp->r_open_streams, sizeof (nfs4_open_stream_t),
6687c478bd9Sstevel@tonic-gate 	    offsetof(nfs4_open_stream_t, os_node));
6697c478bd9Sstevel@tonic-gate 	rp->r_lo_head.lo_prev_rnode = &rp->r_lo_head;
6707c478bd9Sstevel@tonic-gate 	rp->r_lo_head.lo_next_rnode = &rp->r_lo_head;
6717c478bd9Sstevel@tonic-gate 	cv_init(&rp->r_cv, NULL, CV_DEFAULT, NULL);
6727c478bd9Sstevel@tonic-gate 	cv_init(&rp->r_commit.c_cv, NULL, CV_DEFAULT, NULL);
6737c478bd9Sstevel@tonic-gate 	rp->r_flags = R4READDIRWATTR;
6747c478bd9Sstevel@tonic-gate 	rp->r_fh = fh;
6757c478bd9Sstevel@tonic-gate 	rp->r_hashq = rhtp;
6767c478bd9Sstevel@tonic-gate 	sfh4_hold(rp->r_fh);
6777c478bd9Sstevel@tonic-gate 	rp->r_server = mi->mi_curr_serv;
6787c478bd9Sstevel@tonic-gate 	rp->r_deleg_type = OPEN_DELEGATE_NONE;
6797c478bd9Sstevel@tonic-gate 	rp->r_deleg_needs_recovery = OPEN_DELEGATE_NONE;
6807c478bd9Sstevel@tonic-gate 	nfs_rw_init(&rp->r_deleg_recall_lock, NULL, RW_DEFAULT, NULL);
6817c478bd9Sstevel@tonic-gate 
6827c478bd9Sstevel@tonic-gate 	rddir4_cache_create(rp);
6837c478bd9Sstevel@tonic-gate 	rp->r_putapage = putapage;
6847c478bd9Sstevel@tonic-gate 	vn_setops(vp, vops);
6857c478bd9Sstevel@tonic-gate 	vp->v_data = (caddr_t)rp;
6867c478bd9Sstevel@tonic-gate 	vp->v_vfsp = vfsp;
6877c478bd9Sstevel@tonic-gate 	VFS_HOLD(vfsp);
6887c478bd9Sstevel@tonic-gate 	vp->v_type = VNON;
689f8bbc571SPavel Filipensky 	vp->v_flag |= VMODSORT;
6907c478bd9Sstevel@tonic-gate 	if (isrootfh(fh, rp))
6917c478bd9Sstevel@tonic-gate 		vp->v_flag = VROOT;
6927c478bd9Sstevel@tonic-gate 	vn_exists(vp);
6937c478bd9Sstevel@tonic-gate 
6947c478bd9Sstevel@tonic-gate 	/*
6957c478bd9Sstevel@tonic-gate 	 * There is a race condition if someone else
6967c478bd9Sstevel@tonic-gate 	 * alloc's the rnode while no locks are held, so we
6977c478bd9Sstevel@tonic-gate 	 * check again and recover if found.
6987c478bd9Sstevel@tonic-gate 	 */
6997c478bd9Sstevel@tonic-gate 	rw_enter(&rhtp->r_lock, RW_WRITER);
7007c478bd9Sstevel@tonic-gate 	if ((trp = r4find(rhtp, fh, vfsp)) != NULL) {
7017c478bd9Sstevel@tonic-gate 		vp = RTOV4(trp);
7027c478bd9Sstevel@tonic-gate 		*newnode = 0;
7037c478bd9Sstevel@tonic-gate 		rw_exit(&rhtp->r_lock);
7047c478bd9Sstevel@tonic-gate 		rp4_addfree(rp, cr);
7057c478bd9Sstevel@tonic-gate 		rw_enter(&rhtp->r_lock, RW_READER);
7067c478bd9Sstevel@tonic-gate 		return (vp);
7077c478bd9Sstevel@tonic-gate 	}
7087c478bd9Sstevel@tonic-gate 	rp4_addhash(rp);
7097c478bd9Sstevel@tonic-gate 	*newnode = 1;
7107c478bd9Sstevel@tonic-gate 	return (vp);
7117c478bd9Sstevel@tonic-gate }
7127c478bd9Sstevel@tonic-gate 
7137c478bd9Sstevel@tonic-gate static void
uninit_rnode4(rnode4_t * rp)7147c478bd9Sstevel@tonic-gate uninit_rnode4(rnode4_t *rp)
7157c478bd9Sstevel@tonic-gate {
7167c478bd9Sstevel@tonic-gate 	vnode_t *vp = RTOV4(rp);
7177c478bd9Sstevel@tonic-gate 
7187c478bd9Sstevel@tonic-gate 	ASSERT(rp != NULL);
7197c478bd9Sstevel@tonic-gate 	ASSERT(vp != NULL);
7207c478bd9Sstevel@tonic-gate 	ASSERT(vp->v_count == 1);
7217c478bd9Sstevel@tonic-gate 	ASSERT(rp->r_count == 0);
7227c478bd9Sstevel@tonic-gate 	ASSERT(rp->r_mapcnt == 0);
7237c478bd9Sstevel@tonic-gate 	if (rp->r_flags & R4LODANGLERS) {
7247c478bd9Sstevel@tonic-gate 		nfs4_flush_lock_owners(rp);
7257c478bd9Sstevel@tonic-gate 	}
7267c478bd9Sstevel@tonic-gate 	ASSERT(rp->r_lo_head.lo_next_rnode == &rp->r_lo_head);
7277c478bd9Sstevel@tonic-gate 	ASSERT(rp->r_lo_head.lo_prev_rnode == &rp->r_lo_head);
7287c478bd9Sstevel@tonic-gate 	ASSERT(!(rp->r_flags & R4HASHED));
7297c478bd9Sstevel@tonic-gate 	ASSERT(rp->r_freef == NULL && rp->r_freeb == NULL);
7307c478bd9Sstevel@tonic-gate 	nfs4_clear_open_streams(rp);
7317c478bd9Sstevel@tonic-gate 	list_destroy(&rp->r_open_streams);
7327c478bd9Sstevel@tonic-gate 
7337c478bd9Sstevel@tonic-gate 	/*
7347c478bd9Sstevel@tonic-gate 	 * Destroy the rddir cache first since we need to grab the r_statelock.
7357c478bd9Sstevel@tonic-gate 	 */
7367c478bd9Sstevel@tonic-gate 	mutex_enter(&rp->r_statelock);
7377c478bd9Sstevel@tonic-gate 	rddir4_cache_destroy(rp);
7387c478bd9Sstevel@tonic-gate 	mutex_exit(&rp->r_statelock);
7397c478bd9Sstevel@tonic-gate 	sv_uninit(&rp->r_svnode);
7407c478bd9Sstevel@tonic-gate 	sfh4_rele(&rp->r_fh);
7417c478bd9Sstevel@tonic-gate 	nfs_rw_destroy(&rp->r_rwlock);
7427c478bd9Sstevel@tonic-gate 	nfs_rw_destroy(&rp->r_lkserlock);
7437c478bd9Sstevel@tonic-gate 	mutex_destroy(&rp->r_statelock);
7447c478bd9Sstevel@tonic-gate 	mutex_destroy(&rp->r_statev4_lock);
7457c478bd9Sstevel@tonic-gate 	mutex_destroy(&rp->r_os_lock);
7467c478bd9Sstevel@tonic-gate 	cv_destroy(&rp->r_cv);
7477c478bd9Sstevel@tonic-gate 	cv_destroy(&rp->r_commit.c_cv);
7487c478bd9Sstevel@tonic-gate 	nfs_rw_destroy(&rp->r_deleg_recall_lock);
7497c478bd9Sstevel@tonic-gate 	if (rp->r_flags & R4DELMAPLIST)
7507c478bd9Sstevel@tonic-gate 		list_destroy(&rp->r_indelmap);
7517c478bd9Sstevel@tonic-gate }
7527c478bd9Sstevel@tonic-gate 
7537c478bd9Sstevel@tonic-gate /*
7547c478bd9Sstevel@tonic-gate  * Put an rnode on the free list.
7557c478bd9Sstevel@tonic-gate  *
7567c478bd9Sstevel@tonic-gate  * Rnodes which were allocated above and beyond the normal limit
7577c478bd9Sstevel@tonic-gate  * are immediately freed.
7587c478bd9Sstevel@tonic-gate  */
7597c478bd9Sstevel@tonic-gate void
rp4_addfree(rnode4_t * rp,cred_t * cr)7607c478bd9Sstevel@tonic-gate rp4_addfree(rnode4_t *rp, cred_t *cr)
7617c478bd9Sstevel@tonic-gate {
7627c478bd9Sstevel@tonic-gate 	vnode_t *vp;
7637c478bd9Sstevel@tonic-gate 	vnode_t *xattr;
7647c478bd9Sstevel@tonic-gate 	struct vfs *vfsp;
7657c478bd9Sstevel@tonic-gate 
7667c478bd9Sstevel@tonic-gate 	vp = RTOV4(rp);
7677c478bd9Sstevel@tonic-gate 	ASSERT(vp->v_count >= 1);
7687c478bd9Sstevel@tonic-gate 	ASSERT(rp->r_freef == NULL && rp->r_freeb == NULL);
7697c478bd9Sstevel@tonic-gate 
7707c478bd9Sstevel@tonic-gate 	/*
7717c478bd9Sstevel@tonic-gate 	 * If we have too many rnodes allocated and there are no
7727c478bd9Sstevel@tonic-gate 	 * references to this rnode, or if the rnode is no longer
7737c478bd9Sstevel@tonic-gate 	 * accessible by it does not reside in the hash queues,
7747c478bd9Sstevel@tonic-gate 	 * or if an i/o error occurred while writing to the file,
7757c478bd9Sstevel@tonic-gate 	 * then just free it instead of putting it on the rnode
7767c478bd9Sstevel@tonic-gate 	 * freelist.
7777c478bd9Sstevel@tonic-gate 	 */
7787c478bd9Sstevel@tonic-gate 	vfsp = vp->v_vfsp;
7797c478bd9Sstevel@tonic-gate 	if (((rnode4_new > nrnode || !(rp->r_flags & R4HASHED) ||
7807c478bd9Sstevel@tonic-gate #ifdef DEBUG
7817c478bd9Sstevel@tonic-gate 	    (nfs4_rnode_nofreelist != 0) ||
7827c478bd9Sstevel@tonic-gate #endif
7837c478bd9Sstevel@tonic-gate 	    rp->r_error || (rp->r_flags & R4RECOVERR) ||
7847c478bd9Sstevel@tonic-gate 	    (vfsp->vfs_flag & VFS_UNMOUNTED)) && rp->r_count == 0)) {
7857c478bd9Sstevel@tonic-gate 		if (rp->r_flags & R4HASHED) {
7867c478bd9Sstevel@tonic-gate 			rw_enter(&rp->r_hashq->r_lock, RW_WRITER);
7877c478bd9Sstevel@tonic-gate 			mutex_enter(&vp->v_lock);
7887c478bd9Sstevel@tonic-gate 			if (vp->v_count > 1) {
789ade42b55SSebastien Roy 				VN_RELE_LOCKED(vp);
7907c478bd9Sstevel@tonic-gate 				mutex_exit(&vp->v_lock);
7917c478bd9Sstevel@tonic-gate 				rw_exit(&rp->r_hashq->r_lock);
7927c478bd9Sstevel@tonic-gate 				return;
7937c478bd9Sstevel@tonic-gate 			}
7947c478bd9Sstevel@tonic-gate 			mutex_exit(&vp->v_lock);
7957c478bd9Sstevel@tonic-gate 			rp4_rmhash_locked(rp);
7967c478bd9Sstevel@tonic-gate 			rw_exit(&rp->r_hashq->r_lock);
7977c478bd9Sstevel@tonic-gate 		}
7987c478bd9Sstevel@tonic-gate 
7997c478bd9Sstevel@tonic-gate 		/*
8007c478bd9Sstevel@tonic-gate 		 * Make sure we don't have a delegation on this rnode
8017c478bd9Sstevel@tonic-gate 		 * before destroying it.
8027c478bd9Sstevel@tonic-gate 		 */
8037c478bd9Sstevel@tonic-gate 		if (rp->r_deleg_type != OPEN_DELEGATE_NONE) {
8047c478bd9Sstevel@tonic-gate 			(void) nfs4delegreturn(rp,
805b9238976Sth 			    NFS4_DR_FORCE|NFS4_DR_PUSH|NFS4_DR_REOPEN);
8067c478bd9Sstevel@tonic-gate 		}
8077c478bd9Sstevel@tonic-gate 
8087c478bd9Sstevel@tonic-gate 		r4inactive(rp, cr);
8097c478bd9Sstevel@tonic-gate 
8107c478bd9Sstevel@tonic-gate 		/*
8117c478bd9Sstevel@tonic-gate 		 * Recheck the vnode reference count.  We need to
8127c478bd9Sstevel@tonic-gate 		 * make sure that another reference has not been
8137c478bd9Sstevel@tonic-gate 		 * acquired while we were not holding v_lock.  The
8147c478bd9Sstevel@tonic-gate 		 * rnode is not in the rnode hash queues; one
8157c478bd9Sstevel@tonic-gate 		 * way for a reference to have been acquired
8167c478bd9Sstevel@tonic-gate 		 * is for a VOP_PUTPAGE because the rnode was marked
8177c478bd9Sstevel@tonic-gate 		 * with R4DIRTY or for a modified page.  This
8187c478bd9Sstevel@tonic-gate 		 * reference may have been acquired before our call
8197c478bd9Sstevel@tonic-gate 		 * to r4inactive.  The i/o may have been completed,
8207c478bd9Sstevel@tonic-gate 		 * thus allowing r4inactive to complete, but the
8217c478bd9Sstevel@tonic-gate 		 * reference to the vnode may not have been released
8227c478bd9Sstevel@tonic-gate 		 * yet.  In any case, the rnode can not be destroyed
8237c478bd9Sstevel@tonic-gate 		 * until the other references to this vnode have been
8247c478bd9Sstevel@tonic-gate 		 * released.  The other references will take care of
8257c478bd9Sstevel@tonic-gate 		 * either destroying the rnode or placing it on the
8267c478bd9Sstevel@tonic-gate 		 * rnode freelist.  If there are no other references,
8277c478bd9Sstevel@tonic-gate 		 * then the rnode may be safely destroyed.
8287c478bd9Sstevel@tonic-gate 		 */
8297c478bd9Sstevel@tonic-gate 		mutex_enter(&vp->v_lock);
8307c478bd9Sstevel@tonic-gate 		if (vp->v_count > 1) {
831ade42b55SSebastien Roy 			VN_RELE_LOCKED(vp);
8327c478bd9Sstevel@tonic-gate 			mutex_exit(&vp->v_lock);
8337c478bd9Sstevel@tonic-gate 			return;
8347c478bd9Sstevel@tonic-gate 		}
8357c478bd9Sstevel@tonic-gate 		mutex_exit(&vp->v_lock);
8367c478bd9Sstevel@tonic-gate 
8377c478bd9Sstevel@tonic-gate 		destroy_rnode4(rp);
8387c478bd9Sstevel@tonic-gate 		return;
8397c478bd9Sstevel@tonic-gate 	}
8407c478bd9Sstevel@tonic-gate 
8417c478bd9Sstevel@tonic-gate 	/*
8427c478bd9Sstevel@tonic-gate 	 * Lock the hash queue and then recheck the reference count
8437c478bd9Sstevel@tonic-gate 	 * to ensure that no other threads have acquired a reference
8447c478bd9Sstevel@tonic-gate 	 * to indicate that the rnode should not be placed on the
8457c478bd9Sstevel@tonic-gate 	 * freelist.  If another reference has been acquired, then
8467c478bd9Sstevel@tonic-gate 	 * just release this one and let the other thread complete
8477c478bd9Sstevel@tonic-gate 	 * the processing of adding this rnode to the freelist.
8487c478bd9Sstevel@tonic-gate 	 */
8497c478bd9Sstevel@tonic-gate again:
8507c478bd9Sstevel@tonic-gate 	rw_enter(&rp->r_hashq->r_lock, RW_WRITER);
8517c478bd9Sstevel@tonic-gate 
8527c478bd9Sstevel@tonic-gate 	mutex_enter(&vp->v_lock);
8537c478bd9Sstevel@tonic-gate 	if (vp->v_count > 1) {
854ade42b55SSebastien Roy 		VN_RELE_LOCKED(vp);
8557c478bd9Sstevel@tonic-gate 		mutex_exit(&vp->v_lock);
8567c478bd9Sstevel@tonic-gate 		rw_exit(&rp->r_hashq->r_lock);
8577c478bd9Sstevel@tonic-gate 		return;
8587c478bd9Sstevel@tonic-gate 	}
8597c478bd9Sstevel@tonic-gate 	mutex_exit(&vp->v_lock);
8607c478bd9Sstevel@tonic-gate 
8617c478bd9Sstevel@tonic-gate 	/*
8627c478bd9Sstevel@tonic-gate 	 * Make sure we don't put an rnode with a delegation
8637c478bd9Sstevel@tonic-gate 	 * on the free list.
8647c478bd9Sstevel@tonic-gate 	 */
8657c478bd9Sstevel@tonic-gate 	if (rp->r_deleg_type != OPEN_DELEGATE_NONE) {
8667c478bd9Sstevel@tonic-gate 		rw_exit(&rp->r_hashq->r_lock);
8677c478bd9Sstevel@tonic-gate 		(void) nfs4delegreturn(rp,
868b9238976Sth 		    NFS4_DR_FORCE|NFS4_DR_PUSH|NFS4_DR_REOPEN);
8697c478bd9Sstevel@tonic-gate 		goto again;
8707c478bd9Sstevel@tonic-gate 	}
8717c478bd9Sstevel@tonic-gate 
8727c478bd9Sstevel@tonic-gate 	/*
8737c478bd9Sstevel@tonic-gate 	 * Now that we have the hash queue lock, and we know there
8747c478bd9Sstevel@tonic-gate 	 * are not anymore references on the vnode, check to make
8757c478bd9Sstevel@tonic-gate 	 * sure there aren't any open streams still on the rnode.
8767c478bd9Sstevel@tonic-gate 	 * If so, drop the hash queue lock, remove the open streams,
8777c478bd9Sstevel@tonic-gate 	 * and recheck the v_count.
8787c478bd9Sstevel@tonic-gate 	 */
8797c478bd9Sstevel@tonic-gate 	mutex_enter(&rp->r_os_lock);
8807c478bd9Sstevel@tonic-gate 	if (list_head(&rp->r_open_streams) != NULL) {
8817c478bd9Sstevel@tonic-gate 		mutex_exit(&rp->r_os_lock);
8827c478bd9Sstevel@tonic-gate 		rw_exit(&rp->r_hashq->r_lock);
883108322fbScarlsonj 		if (nfs_zone() != VTOMI4(vp)->mi_zone)
8847c478bd9Sstevel@tonic-gate 			nfs4_clear_open_streams(rp);
8857c478bd9Sstevel@tonic-gate 		else
8867c478bd9Sstevel@tonic-gate 			(void) nfs4close_all(vp, cr);
8877c478bd9Sstevel@tonic-gate 		goto again;
8887c478bd9Sstevel@tonic-gate 	}
8897c478bd9Sstevel@tonic-gate 	mutex_exit(&rp->r_os_lock);
8907c478bd9Sstevel@tonic-gate 
8919f9e2373Sjwahlig 	/*
8929f9e2373Sjwahlig 	 * Before we put it on the freelist, make sure there are no pages.
8939f9e2373Sjwahlig 	 * If there are, flush and commit of all of the dirty and
8949f9e2373Sjwahlig 	 * uncommitted pages, assuming the file system isn't read only.
8959f9e2373Sjwahlig 	 */
8969f9e2373Sjwahlig 	if (!(vp->v_vfsp->vfs_flag & VFS_RDONLY) && nfs4_dross_pages(vp)) {
8979f9e2373Sjwahlig 		rw_exit(&rp->r_hashq->r_lock);
8989f9e2373Sjwahlig 		r4flushpages(rp, cr);
8999f9e2373Sjwahlig 		goto again;
9009f9e2373Sjwahlig 	}
9019f9e2373Sjwahlig 
9027c478bd9Sstevel@tonic-gate 	/*
9037c478bd9Sstevel@tonic-gate 	 * Before we put it on the freelist, make sure there is no
9047c478bd9Sstevel@tonic-gate 	 * active xattr directory cached, the freelist will not
9057c478bd9Sstevel@tonic-gate 	 * have its entries r4inactive'd if there is still an active
9067c478bd9Sstevel@tonic-gate 	 * rnode, thus nothing in the freelist can hold another
9077c478bd9Sstevel@tonic-gate 	 * rnode active.
9087c478bd9Sstevel@tonic-gate 	 */
9097c478bd9Sstevel@tonic-gate 	xattr = rp->r_xattr_dir;
9107c478bd9Sstevel@tonic-gate 	rp->r_xattr_dir = NULL;
9117c478bd9Sstevel@tonic-gate 
9127c478bd9Sstevel@tonic-gate 	/*
9137c478bd9Sstevel@tonic-gate 	 * If there is no cached data or metadata for this file, then
9147c478bd9Sstevel@tonic-gate 	 * put the rnode on the front of the freelist so that it will
9157c478bd9Sstevel@tonic-gate 	 * be reused before other rnodes which may have cached data or
9167c478bd9Sstevel@tonic-gate 	 * metadata associated with them.
9177c478bd9Sstevel@tonic-gate 	 */
9187c478bd9Sstevel@tonic-gate 	mutex_enter(&rp4freelist_lock);
9197c478bd9Sstevel@tonic-gate 	if (rp4freelist == NULL) {
9207c478bd9Sstevel@tonic-gate 		rp->r_freef = rp;
9217c478bd9Sstevel@tonic-gate 		rp->r_freeb = rp;
9227c478bd9Sstevel@tonic-gate 		rp4freelist = rp;
9237c478bd9Sstevel@tonic-gate 	} else {
9247c478bd9Sstevel@tonic-gate 		rp->r_freef = rp4freelist;
9257c478bd9Sstevel@tonic-gate 		rp->r_freeb = rp4freelist->r_freeb;
9267c478bd9Sstevel@tonic-gate 		rp4freelist->r_freeb->r_freef = rp;
9277c478bd9Sstevel@tonic-gate 		rp4freelist->r_freeb = rp;
9287c478bd9Sstevel@tonic-gate 		if (!nfs4_has_pages(vp) && rp->r_dir == NULL &&
9299f9e2373Sjwahlig 		    rp->r_symlink.contents == NULL && rp->r_secattr == NULL)
9307c478bd9Sstevel@tonic-gate 			rp4freelist = rp;
9317c478bd9Sstevel@tonic-gate 	}
9327c478bd9Sstevel@tonic-gate 	mutex_exit(&rp4freelist_lock);
9337c478bd9Sstevel@tonic-gate 
9347c478bd9Sstevel@tonic-gate 	rw_exit(&rp->r_hashq->r_lock);
9357c478bd9Sstevel@tonic-gate 
9367c478bd9Sstevel@tonic-gate 	if (xattr)
9377c478bd9Sstevel@tonic-gate 		VN_RELE(xattr);
9387c478bd9Sstevel@tonic-gate }
9397c478bd9Sstevel@tonic-gate 
9407c478bd9Sstevel@tonic-gate /*
9417c478bd9Sstevel@tonic-gate  * Remove an rnode from the free list.
9427c478bd9Sstevel@tonic-gate  *
9437c478bd9Sstevel@tonic-gate  * The caller must be holding rp4freelist_lock and the rnode
9447c478bd9Sstevel@tonic-gate  * must be on the freelist.
9457c478bd9Sstevel@tonic-gate  */
9467c478bd9Sstevel@tonic-gate static void
rp4_rmfree(rnode4_t * rp)9477c478bd9Sstevel@tonic-gate rp4_rmfree(rnode4_t *rp)
9487c478bd9Sstevel@tonic-gate {
9497c478bd9Sstevel@tonic-gate 
9507c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&rp4freelist_lock));
9517c478bd9Sstevel@tonic-gate 	ASSERT(rp->r_freef != NULL && rp->r_freeb != NULL);
9527c478bd9Sstevel@tonic-gate 
9537c478bd9Sstevel@tonic-gate 	if (rp == rp4freelist) {
9547c478bd9Sstevel@tonic-gate 		rp4freelist = rp->r_freef;
9557c478bd9Sstevel@tonic-gate 		if (rp == rp4freelist)
9567c478bd9Sstevel@tonic-gate 			rp4freelist = NULL;
9577c478bd9Sstevel@tonic-gate 	}
9587c478bd9Sstevel@tonic-gate 	rp->r_freeb->r_freef = rp->r_freef;
9597c478bd9Sstevel@tonic-gate 	rp->r_freef->r_freeb = rp->r_freeb;
9607c478bd9Sstevel@tonic-gate 
9617c478bd9Sstevel@tonic-gate 	rp->r_freef = rp->r_freeb = NULL;
9627c478bd9Sstevel@tonic-gate }
9637c478bd9Sstevel@tonic-gate 
9647c478bd9Sstevel@tonic-gate /*
9657c478bd9Sstevel@tonic-gate  * Put a rnode in the hash table.
9667c478bd9Sstevel@tonic-gate  *
9677c478bd9Sstevel@tonic-gate  * The caller must be holding the exclusive hash queue lock
9687c478bd9Sstevel@tonic-gate  */
9697c478bd9Sstevel@tonic-gate void
rp4_addhash(rnode4_t * rp)9707c478bd9Sstevel@tonic-gate rp4_addhash(rnode4_t *rp)
9717c478bd9Sstevel@tonic-gate {
972*e010bda9SMarcel Telka 	mntinfo4_t *mi;
973*e010bda9SMarcel Telka 
9747c478bd9Sstevel@tonic-gate 	ASSERT(RW_WRITE_HELD(&rp->r_hashq->r_lock));
9757c478bd9Sstevel@tonic-gate 	ASSERT(!(rp->r_flags & R4HASHED));
9767c478bd9Sstevel@tonic-gate 
9777c478bd9Sstevel@tonic-gate #ifdef DEBUG
9787c478bd9Sstevel@tonic-gate 	r4_dup_check(rp, RTOV4(rp)->v_vfsp);
9797c478bd9Sstevel@tonic-gate #endif
9807c478bd9Sstevel@tonic-gate 
9817c478bd9Sstevel@tonic-gate 	rp->r_hashf = rp->r_hashq->r_hashf;
9827c478bd9Sstevel@tonic-gate 	rp->r_hashq->r_hashf = rp;
9837c478bd9Sstevel@tonic-gate 	rp->r_hashb = (rnode4_t *)rp->r_hashq;
9847c478bd9Sstevel@tonic-gate 	rp->r_hashf->r_hashb = rp;
9857c478bd9Sstevel@tonic-gate 
9867c478bd9Sstevel@tonic-gate 	mutex_enter(&rp->r_statelock);
9877c478bd9Sstevel@tonic-gate 	rp->r_flags |= R4HASHED;
9887c478bd9Sstevel@tonic-gate 	mutex_exit(&rp->r_statelock);
989*e010bda9SMarcel Telka 
990*e010bda9SMarcel Telka 	mi = VTOMI4(RTOV4(rp));
991*e010bda9SMarcel Telka 	mutex_enter(&mi->mi_rnodes_lock);
992*e010bda9SMarcel Telka 	list_insert_tail(&mi->mi_rnodes, rp);
993*e010bda9SMarcel Telka 	mutex_exit(&mi->mi_rnodes_lock);
9947c478bd9Sstevel@tonic-gate }
9957c478bd9Sstevel@tonic-gate 
9967c478bd9Sstevel@tonic-gate /*
9977c478bd9Sstevel@tonic-gate  * Remove a rnode from the hash table.
9987c478bd9Sstevel@tonic-gate  *
9997c478bd9Sstevel@tonic-gate  * The caller must be holding the hash queue lock.
10007c478bd9Sstevel@tonic-gate  */
10017c478bd9Sstevel@tonic-gate void
rp4_rmhash_locked(rnode4_t * rp)10027c478bd9Sstevel@tonic-gate rp4_rmhash_locked(rnode4_t *rp)
10037c478bd9Sstevel@tonic-gate {
1004*e010bda9SMarcel Telka 	mntinfo4_t *mi;
1005*e010bda9SMarcel Telka 
10067c478bd9Sstevel@tonic-gate 	ASSERT(RW_WRITE_HELD(&rp->r_hashq->r_lock));
10077c478bd9Sstevel@tonic-gate 	ASSERT(rp->r_flags & R4HASHED);
10087c478bd9Sstevel@tonic-gate 
10097c478bd9Sstevel@tonic-gate 	rp->r_hashb->r_hashf = rp->r_hashf;
10107c478bd9Sstevel@tonic-gate 	rp->r_hashf->r_hashb = rp->r_hashb;
10117c478bd9Sstevel@tonic-gate 
10127c478bd9Sstevel@tonic-gate 	mutex_enter(&rp->r_statelock);
10137c478bd9Sstevel@tonic-gate 	rp->r_flags &= ~R4HASHED;
10147c478bd9Sstevel@tonic-gate 	mutex_exit(&rp->r_statelock);
1015*e010bda9SMarcel Telka 
1016*e010bda9SMarcel Telka 	mi = VTOMI4(RTOV4(rp));
1017*e010bda9SMarcel Telka 	mutex_enter(&mi->mi_rnodes_lock);
1018*e010bda9SMarcel Telka 	if (list_link_active(&rp->r_mi_link))
1019*e010bda9SMarcel Telka 		list_remove(&mi->mi_rnodes, rp);
1020*e010bda9SMarcel Telka 	mutex_exit(&mi->mi_rnodes_lock);
10217c478bd9Sstevel@tonic-gate }
10227c478bd9Sstevel@tonic-gate 
10237c478bd9Sstevel@tonic-gate /*
10247c478bd9Sstevel@tonic-gate  * Remove a rnode from the hash table.
10257c478bd9Sstevel@tonic-gate  *
10267c478bd9Sstevel@tonic-gate  * The caller must not be holding the hash queue lock.
10277c478bd9Sstevel@tonic-gate  */
10287c478bd9Sstevel@tonic-gate void
rp4_rmhash(rnode4_t * rp)10297c478bd9Sstevel@tonic-gate rp4_rmhash(rnode4_t *rp)
10307c478bd9Sstevel@tonic-gate {
10317c478bd9Sstevel@tonic-gate 	rw_enter(&rp->r_hashq->r_lock, RW_WRITER);
10327c478bd9Sstevel@tonic-gate 	rp4_rmhash_locked(rp);
10337c478bd9Sstevel@tonic-gate 	rw_exit(&rp->r_hashq->r_lock);
10347c478bd9Sstevel@tonic-gate }
10357c478bd9Sstevel@tonic-gate 
10367c478bd9Sstevel@tonic-gate /*
10377c478bd9Sstevel@tonic-gate  * Lookup a rnode by fhandle.  Ignores rnodes that had failed recovery.
10387c478bd9Sstevel@tonic-gate  * Returns NULL if no match.  If an rnode is returned, the reference count
10397c478bd9Sstevel@tonic-gate  * on the master vnode is incremented.
10407c478bd9Sstevel@tonic-gate  *
10417c478bd9Sstevel@tonic-gate  * The caller must be holding the hash queue lock, either shared or exclusive.
10427c478bd9Sstevel@tonic-gate  */
10437c478bd9Sstevel@tonic-gate rnode4_t *
r4find(r4hashq_t * rhtp,nfs4_sharedfh_t * fh,struct vfs * vfsp)10447c478bd9Sstevel@tonic-gate r4find(r4hashq_t *rhtp, nfs4_sharedfh_t *fh, struct vfs *vfsp)
10457c478bd9Sstevel@tonic-gate {
10467c478bd9Sstevel@tonic-gate 	rnode4_t *rp;
10477c478bd9Sstevel@tonic-gate 	vnode_t *vp;
10487c478bd9Sstevel@tonic-gate 
10497c478bd9Sstevel@tonic-gate 	ASSERT(RW_LOCK_HELD(&rhtp->r_lock));
10507c478bd9Sstevel@tonic-gate 
10517c478bd9Sstevel@tonic-gate 	for (rp = rhtp->r_hashf; rp != (rnode4_t *)rhtp; rp = rp->r_hashf) {
10527c478bd9Sstevel@tonic-gate 		vp = RTOV4(rp);
10537c478bd9Sstevel@tonic-gate 		if (vp->v_vfsp == vfsp && SFH4_SAME(rp->r_fh, fh)) {
10547c478bd9Sstevel@tonic-gate 
10557c478bd9Sstevel@tonic-gate 			mutex_enter(&rp->r_statelock);
10567c478bd9Sstevel@tonic-gate 			if (rp->r_flags & R4RECOVERR) {
10577c478bd9Sstevel@tonic-gate 				mutex_exit(&rp->r_statelock);
10587c478bd9Sstevel@tonic-gate 				continue;
10597c478bd9Sstevel@tonic-gate 			}
10607c478bd9Sstevel@tonic-gate 			mutex_exit(&rp->r_statelock);
10617c478bd9Sstevel@tonic-gate #ifdef DEBUG
10627c478bd9Sstevel@tonic-gate 			r4_dup_check(rp, vfsp);
10637c478bd9Sstevel@tonic-gate #endif
10647c478bd9Sstevel@tonic-gate 			if (rp->r_freef != NULL) {
10657c478bd9Sstevel@tonic-gate 				mutex_enter(&rp4freelist_lock);
10667c478bd9Sstevel@tonic-gate 				/*
10677c478bd9Sstevel@tonic-gate 				 * If the rnode is on the freelist,
10687c478bd9Sstevel@tonic-gate 				 * then remove it and use that reference
10697c478bd9Sstevel@tonic-gate 				 * as the new reference.  Otherwise,
10707c478bd9Sstevel@tonic-gate 				 * need to increment the reference count.
10717c478bd9Sstevel@tonic-gate 				 */
10727c478bd9Sstevel@tonic-gate 				if (rp->r_freef != NULL) {
10737c478bd9Sstevel@tonic-gate 					rp4_rmfree(rp);
10747c478bd9Sstevel@tonic-gate 					mutex_exit(&rp4freelist_lock);
10757c478bd9Sstevel@tonic-gate 				} else {
10767c478bd9Sstevel@tonic-gate 					mutex_exit(&rp4freelist_lock);
10777c478bd9Sstevel@tonic-gate 					VN_HOLD(vp);
10787c478bd9Sstevel@tonic-gate 				}
10797c478bd9Sstevel@tonic-gate 			} else
10807c478bd9Sstevel@tonic-gate 				VN_HOLD(vp);
10817c478bd9Sstevel@tonic-gate 
10827c478bd9Sstevel@tonic-gate 			/*
10837c478bd9Sstevel@tonic-gate 			 * if root vnode, set v_flag to indicate that
10847c478bd9Sstevel@tonic-gate 			 */
10857c478bd9Sstevel@tonic-gate 			if (isrootfh(fh, rp)) {
10867c478bd9Sstevel@tonic-gate 				if (!(vp->v_flag & VROOT)) {
10877c478bd9Sstevel@tonic-gate 					mutex_enter(&vp->v_lock);
10887c478bd9Sstevel@tonic-gate 					vp->v_flag |= VROOT;
10897c478bd9Sstevel@tonic-gate 					mutex_exit(&vp->v_lock);
10907c478bd9Sstevel@tonic-gate 				}
10917c478bd9Sstevel@tonic-gate 			}
10927c478bd9Sstevel@tonic-gate 			return (rp);
10937c478bd9Sstevel@tonic-gate 		}
10947c478bd9Sstevel@tonic-gate 	}
10957c478bd9Sstevel@tonic-gate 	return (NULL);
10967c478bd9Sstevel@tonic-gate }
10977c478bd9Sstevel@tonic-gate 
10987c478bd9Sstevel@tonic-gate /*
10997c478bd9Sstevel@tonic-gate  * Lookup an rnode by fhandle. Just a wrapper for r4find()
11007c478bd9Sstevel@tonic-gate  * that assumes the caller hasn't already got the lock
11017c478bd9Sstevel@tonic-gate  * on the hash bucket.
11027c478bd9Sstevel@tonic-gate  */
11037c478bd9Sstevel@tonic-gate rnode4_t *
r4find_unlocked(nfs4_sharedfh_t * fh,struct vfs * vfsp)11047c478bd9Sstevel@tonic-gate r4find_unlocked(nfs4_sharedfh_t *fh, struct