xref: /illumos-gate/usr/src/uts/common/fs/nfs/nfs_subr.c (revision f8bbc571)
17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate  * CDDL HEADER START
37c478bd9Sstevel@tonic-gate  *
47c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
545916cd2Sjpk  * Common Development and Distribution License (the "License").
645916cd2Sjpk  * You may not use this file except in compliance with the License.
77c478bd9Sstevel@tonic-gate  *
87c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
107c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
117c478bd9Sstevel@tonic-gate  * and limitations under the License.
127c478bd9Sstevel@tonic-gate  *
137c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
147c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
167c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
177c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bd9Sstevel@tonic-gate  *
197c478bd9Sstevel@tonic-gate  * CDDL HEADER END
207c478bd9Sstevel@tonic-gate  */
217c478bd9Sstevel@tonic-gate /*
22*f8bbc571SPavel Filipensky  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
237c478bd9Sstevel@tonic-gate  * Use is subject to license terms.
247c478bd9Sstevel@tonic-gate  */
257c478bd9Sstevel@tonic-gate 
267c478bd9Sstevel@tonic-gate #include <sys/param.h>
277c478bd9Sstevel@tonic-gate #include <sys/types.h>
287c478bd9Sstevel@tonic-gate #include <sys/systm.h>
2967dbe2beSCasper H.S. Dik #include <sys/cred.h>
307c478bd9Sstevel@tonic-gate #include <sys/proc.h>
317c478bd9Sstevel@tonic-gate #include <sys/user.h>
327c478bd9Sstevel@tonic-gate #include <sys/time.h>
337c478bd9Sstevel@tonic-gate #include <sys/buf.h>
347c478bd9Sstevel@tonic-gate #include <sys/vfs.h>
357c478bd9Sstevel@tonic-gate #include <sys/vnode.h>
367c478bd9Sstevel@tonic-gate #include <sys/socket.h>
377c478bd9Sstevel@tonic-gate #include <sys/uio.h>
387c478bd9Sstevel@tonic-gate #include <sys/tiuser.h>
397c478bd9Sstevel@tonic-gate #include <sys/swap.h>
407c478bd9Sstevel@tonic-gate #include <sys/errno.h>
417c478bd9Sstevel@tonic-gate #include <sys/debug.h>
427c478bd9Sstevel@tonic-gate #include <sys/kmem.h>
437c478bd9Sstevel@tonic-gate #include <sys/kstat.h>
447c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h>
457c478bd9Sstevel@tonic-gate #include <sys/vtrace.h>
467c478bd9Sstevel@tonic-gate #include <sys/session.h>
477c478bd9Sstevel@tonic-gate #include <sys/dnlc.h>
487c478bd9Sstevel@tonic-gate #include <sys/bitmap.h>
497c478bd9Sstevel@tonic-gate #include <sys/acl.h>
507c478bd9Sstevel@tonic-gate #include <sys/ddi.h>
517c478bd9Sstevel@tonic-gate #include <sys/pathname.h>
527c478bd9Sstevel@tonic-gate #include <sys/flock.h>
537c478bd9Sstevel@tonic-gate #include <sys/dirent.h>
547c478bd9Sstevel@tonic-gate #include <sys/flock.h>
557c478bd9Sstevel@tonic-gate #include <sys/callb.h>
567c478bd9Sstevel@tonic-gate #include <sys/atomic.h>
577c478bd9Sstevel@tonic-gate #include <sys/list.h>
5845916cd2Sjpk #include <sys/tsol/tnet.h>
5945916cd2Sjpk #include <sys/priv.h>
6003986916Sjarrett #include <sys/sdt.h>
6193aeed83Smarks #include <sys/attr.h>
6245916cd2Sjpk 
6345916cd2Sjpk #include <inet/ip6.h>
647c478bd9Sstevel@tonic-gate 
657c478bd9Sstevel@tonic-gate #include <rpc/types.h>
667c478bd9Sstevel@tonic-gate #include <rpc/xdr.h>
677c478bd9Sstevel@tonic-gate #include <rpc/auth.h>
687c478bd9Sstevel@tonic-gate #include <rpc/clnt.h>
697c478bd9Sstevel@tonic-gate 
707c478bd9Sstevel@tonic-gate #include <nfs/nfs.h>
717c478bd9Sstevel@tonic-gate #include <nfs/nfs4.h>
727c478bd9Sstevel@tonic-gate #include <nfs/nfs_clnt.h>
737c478bd9Sstevel@tonic-gate #include <nfs/rnode.h>
747c478bd9Sstevel@tonic-gate #include <nfs/nfs_acl.h>
757c478bd9Sstevel@tonic-gate 
7603986916Sjarrett #include <sys/tsol/label.h>
7703986916Sjarrett 
787c478bd9Sstevel@tonic-gate /*
797c478bd9Sstevel@tonic-gate  * The hash queues for the access to active and cached rnodes
807c478bd9Sstevel@tonic-gate  * are organized as doubly linked lists.  A reader/writer lock
817c478bd9Sstevel@tonic-gate  * for each hash bucket is used to control access and to synchronize
827c478bd9Sstevel@tonic-gate  * lookups, additions, and deletions from the hash queue.
837c478bd9Sstevel@tonic-gate  *
847c478bd9Sstevel@tonic-gate  * The rnode freelist is organized as a doubly linked list with
857c478bd9Sstevel@tonic-gate  * a head pointer.  Additions and deletions are synchronized via
867c478bd9Sstevel@tonic-gate  * a single mutex.
877c478bd9Sstevel@tonic-gate  *
887c478bd9Sstevel@tonic-gate  * In order to add an rnode to the free list, it must be hashed into
897c478bd9Sstevel@tonic-gate  * a hash queue and the exclusive lock to the hash queue be held.
907c478bd9Sstevel@tonic-gate  * If an rnode is not hashed into a hash queue, then it is destroyed
917c478bd9Sstevel@tonic-gate  * because it represents no valuable information that can be reused
927c478bd9Sstevel@tonic-gate  * about the file.  The exclusive lock to the hash queue must be
937c478bd9Sstevel@tonic-gate  * held in order to prevent a lookup in the hash queue from finding
947c478bd9Sstevel@tonic-gate  * the rnode and using it and assuming that the rnode is not on the
957c478bd9Sstevel@tonic-gate  * freelist.  The lookup in the hash queue will have the hash queue
967c478bd9Sstevel@tonic-gate  * locked, either exclusive or shared.
977c478bd9Sstevel@tonic-gate  *
987c478bd9Sstevel@tonic-gate  * The vnode reference count for each rnode is not allowed to drop
997c478bd9Sstevel@tonic-gate  * below 1.  This prevents external entities, such as the VM
1007c478bd9Sstevel@tonic-gate  * subsystem, from acquiring references to vnodes already on the
1017c478bd9Sstevel@tonic-gate  * freelist and then trying to place them back on the freelist
1027c478bd9Sstevel@tonic-gate  * when their reference is released.  This means that the when an
1037c478bd9Sstevel@tonic-gate  * rnode is looked up in the hash queues, then either the rnode
104da6c28aaSamw  * is removed from the freelist and that reference is transferred to
1057c478bd9Sstevel@tonic-gate  * the new reference or the vnode reference count must be incremented
1067c478bd9Sstevel@tonic-gate  * accordingly.  The mutex for the freelist must be held in order to
1077c478bd9Sstevel@tonic-gate  * accurately test to see if the rnode is on the freelist or not.
1087c478bd9Sstevel@tonic-gate  * The hash queue lock might be held shared and it is possible that
1097c478bd9Sstevel@tonic-gate  * two different threads may race to remove the rnode from the
1107c478bd9Sstevel@tonic-gate  * freelist.  This race can be resolved by holding the mutex for the
1117c478bd9Sstevel@tonic-gate  * freelist.  Please note that the mutex for the freelist does not
1127c478bd9Sstevel@tonic-gate  * need to held if the rnode is not on the freelist.  It can not be
1137c478bd9Sstevel@tonic-gate  * placed on the freelist due to the requirement that the thread
1147c478bd9Sstevel@tonic-gate  * putting the rnode on the freelist must hold the exclusive lock
1157c478bd9Sstevel@tonic-gate  * to the hash queue and the thread doing the lookup in the hash
1167c478bd9Sstevel@tonic-gate  * queue is holding either a shared or exclusive lock to the hash
1177c478bd9Sstevel@tonic-gate  * queue.
1187c478bd9Sstevel@tonic-gate  *
1197c478bd9Sstevel@tonic-gate  * The lock ordering is:
1207c478bd9Sstevel@tonic-gate  *
1217c478bd9Sstevel@tonic-gate  *	hash bucket lock -> vnode lock
1227c478bd9Sstevel@tonic-gate  *	hash bucket lock -> freelist lock
1237c478bd9Sstevel@tonic-gate  */
1247c478bd9Sstevel@tonic-gate static rhashq_t *rtable;
1257c478bd9Sstevel@tonic-gate 
1267c478bd9Sstevel@tonic-gate static kmutex_t rpfreelist_lock;
1277c478bd9Sstevel@tonic-gate static rnode_t *rpfreelist = NULL;
1287c478bd9Sstevel@tonic-gate static long rnew = 0;
1297c478bd9Sstevel@tonic-gate long nrnode = 0;
1307c478bd9Sstevel@tonic-gate 
1317c478bd9Sstevel@tonic-gate static int rtablesize;
1327c478bd9Sstevel@tonic-gate static int rtablemask;
1337c478bd9Sstevel@tonic-gate 
1347c478bd9Sstevel@tonic-gate static int hashlen = 4;
1357c478bd9Sstevel@tonic-gate 
1367c478bd9Sstevel@tonic-gate static struct kmem_cache *rnode_cache;
1377c478bd9Sstevel@tonic-gate 
1387c478bd9Sstevel@tonic-gate /*
1397c478bd9Sstevel@tonic-gate  * Mutex to protect the following variables:
1407c478bd9Sstevel@tonic-gate  *	nfs_major
1417c478bd9Sstevel@tonic-gate  *	nfs_minor
1427c478bd9Sstevel@tonic-gate  */
1437c478bd9Sstevel@tonic-gate kmutex_t nfs_minor_lock;
1447c478bd9Sstevel@tonic-gate int nfs_major;
1457c478bd9Sstevel@tonic-gate int nfs_minor;
1467c478bd9Sstevel@tonic-gate 
1477c478bd9Sstevel@tonic-gate /* Do we allow preepoch (negative) time values otw? */
1487c478bd9Sstevel@tonic-gate bool_t nfs_allow_preepoch_time = FALSE;	/* default: do not allow preepoch */
1497c478bd9Sstevel@tonic-gate 
1507c478bd9Sstevel@tonic-gate /*
1517c478bd9Sstevel@tonic-gate  * Access cache
1527c478bd9Sstevel@tonic-gate  */
1537c478bd9Sstevel@tonic-gate static acache_hash_t *acache;
1547c478bd9Sstevel@tonic-gate static long nacache;	/* used strictly to size the number of hash queues */
1557c478bd9Sstevel@tonic-gate 
1567c478bd9Sstevel@tonic-gate static int acachesize;
1577c478bd9Sstevel@tonic-gate static int acachemask;
1587c478bd9Sstevel@tonic-gate static struct kmem_cache *acache_cache;
1597c478bd9Sstevel@tonic-gate 
1607c478bd9Sstevel@tonic-gate /*
1617c478bd9Sstevel@tonic-gate  * Client side utilities
1627c478bd9Sstevel@tonic-gate  */
1637c478bd9Sstevel@tonic-gate 
1647c478bd9Sstevel@tonic-gate /*
1657c478bd9Sstevel@tonic-gate  * client side statistics
1667c478bd9Sstevel@tonic-gate  */
1677c478bd9Sstevel@tonic-gate static const struct clstat clstat_tmpl = {
1687c478bd9Sstevel@tonic-gate 	{ "calls",	KSTAT_DATA_UINT64 },
1697c478bd9Sstevel@tonic-gate 	{ "badcalls",	KSTAT_DATA_UINT64 },
1707c478bd9Sstevel@tonic-gate 	{ "clgets",	KSTAT_DATA_UINT64 },
1717c478bd9Sstevel@tonic-gate 	{ "cltoomany",	KSTAT_DATA_UINT64 },
1727c478bd9Sstevel@tonic-gate #ifdef DEBUG
1737c478bd9Sstevel@tonic-gate 	{ "clalloc",	KSTAT_DATA_UINT64 },
1747c478bd9Sstevel@tonic-gate 	{ "noresponse",	KSTAT_DATA_UINT64 },
1757c478bd9Sstevel@tonic-gate 	{ "failover",	KSTAT_DATA_UINT64 },
1767c478bd9Sstevel@tonic-gate 	{ "remap",	KSTAT_DATA_UINT64 },
1777c478bd9Sstevel@tonic-gate #endif
1787c478bd9Sstevel@tonic-gate };
1797c478bd9Sstevel@tonic-gate 
1807c478bd9Sstevel@tonic-gate /*
1817c478bd9Sstevel@tonic-gate  * The following are statistics that describe behavior of the system as a whole
1827c478bd9Sstevel@tonic-gate  * and doesn't correspond to any one particular zone.
1837c478bd9Sstevel@tonic-gate  */
1847c478bd9Sstevel@tonic-gate #ifdef DEBUG
1857c478bd9Sstevel@tonic-gate static struct clstat_debug {
1867c478bd9Sstevel@tonic-gate 	kstat_named_t	nrnode;			/* number of allocated rnodes */
1877c478bd9Sstevel@tonic-gate 	kstat_named_t	access;			/* size of access cache */
1887c478bd9Sstevel@tonic-gate 	kstat_named_t	dirent;			/* size of readdir cache */
1897c478bd9Sstevel@tonic-gate 	kstat_named_t	dirents;		/* size of readdir buf cache */
1907c478bd9Sstevel@tonic-gate 	kstat_named_t	reclaim;		/* number of reclaims */
1917c478bd9Sstevel@tonic-gate 	kstat_named_t	clreclaim;		/* number of cl reclaims */
1927c478bd9Sstevel@tonic-gate 	kstat_named_t	f_reclaim;		/* number of free reclaims */
1937c478bd9Sstevel@tonic-gate 	kstat_named_t	a_reclaim;		/* number of active reclaims */
1947c478bd9Sstevel@tonic-gate 	kstat_named_t	r_reclaim;		/* number of rnode reclaims */
1957c478bd9Sstevel@tonic-gate 	kstat_named_t	rpath;			/* bytes used to store rpaths */
1967c478bd9Sstevel@tonic-gate } clstat_debug = {
1977c478bd9Sstevel@tonic-gate 	{ "nrnode",	KSTAT_DATA_UINT64 },
1987c478bd9Sstevel@tonic-gate 	{ "access",	KSTAT_DATA_UINT64 },
1997c478bd9Sstevel@tonic-gate 	{ "dirent",	KSTAT_DATA_UINT64 },
2007c478bd9Sstevel@tonic-gate 	{ "dirents",	KSTAT_DATA_UINT64 },
2017c478bd9Sstevel@tonic-gate 	{ "reclaim",	KSTAT_DATA_UINT64 },
2027c478bd9Sstevel@tonic-gate 	{ "clreclaim",	KSTAT_DATA_UINT64 },
2037c478bd9Sstevel@tonic-gate 	{ "f_reclaim",	KSTAT_DATA_UINT64 },
2047c478bd9Sstevel@tonic-gate 	{ "a_reclaim",	KSTAT_DATA_UINT64 },
2057c478bd9Sstevel@tonic-gate 	{ "r_reclaim",	KSTAT_DATA_UINT64 },
2067c478bd9Sstevel@tonic-gate 	{ "r_path",	KSTAT_DATA_UINT64 },
2077c478bd9Sstevel@tonic-gate };
2087c478bd9Sstevel@tonic-gate #endif	/* DEBUG */
2097c478bd9Sstevel@tonic-gate 
2107c478bd9Sstevel@tonic-gate /*
2117c478bd9Sstevel@tonic-gate  * We keep a global list of per-zone client data, so we can clean up all zones
2127c478bd9Sstevel@tonic-gate  * if we get low on memory.
2137c478bd9Sstevel@tonic-gate  */
2147c478bd9Sstevel@tonic-gate static list_t nfs_clnt_list;
2157c478bd9Sstevel@tonic-gate static kmutex_t nfs_clnt_list_lock;
2167c478bd9Sstevel@tonic-gate static zone_key_t nfsclnt_zone_key;
2177c478bd9Sstevel@tonic-gate 
2187c478bd9Sstevel@tonic-gate static struct kmem_cache *chtab_cache;
2197c478bd9Sstevel@tonic-gate 
2207c478bd9Sstevel@tonic-gate /*
2217c478bd9Sstevel@tonic-gate  * Some servers do not properly update the attributes of the
2227c478bd9Sstevel@tonic-gate  * directory when changes are made.  To allow interoperability
2237c478bd9Sstevel@tonic-gate  * with these broken servers, the nfs_disable_rddir_cache
2247c478bd9Sstevel@tonic-gate  * parameter must be set in /etc/system
2257c478bd9Sstevel@tonic-gate  */
2267c478bd9Sstevel@tonic-gate int nfs_disable_rddir_cache = 0;
2277c478bd9Sstevel@tonic-gate 
2287c478bd9Sstevel@tonic-gate int		clget(clinfo_t *, servinfo_t *, cred_t *, CLIENT **,
2297c478bd9Sstevel@tonic-gate 		    struct chtab **);
2307c478bd9Sstevel@tonic-gate void		clfree(CLIENT *, struct chtab *);
2317c478bd9Sstevel@tonic-gate static int	acl_clget(mntinfo_t *, servinfo_t *, cred_t *, CLIENT **,
2327c478bd9Sstevel@tonic-gate 		    struct chtab **, struct nfs_clnt *);
2337c478bd9Sstevel@tonic-gate static int	nfs_clget(mntinfo_t *, servinfo_t *, cred_t *, CLIENT **,
2347c478bd9Sstevel@tonic-gate 		    struct chtab **, struct nfs_clnt *);
2357c478bd9Sstevel@tonic-gate static void	clreclaim(void *);
2367c478bd9Sstevel@tonic-gate static int	nfs_feedback(int, int, mntinfo_t *);
2377c478bd9Sstevel@tonic-gate static int	rfscall(mntinfo_t *, rpcproc_t, xdrproc_t, caddr_t, xdrproc_t,
2387c478bd9Sstevel@tonic-gate 		    caddr_t, cred_t *, int *, enum clnt_stat *, int,
2397c478bd9Sstevel@tonic-gate 		    failinfo_t *);
2407c478bd9Sstevel@tonic-gate static int	aclcall(mntinfo_t *, rpcproc_t, xdrproc_t, caddr_t, xdrproc_t,
2417c478bd9Sstevel@tonic-gate 		    caddr_t, cred_t *, int *, int, failinfo_t *);
2427c478bd9Sstevel@tonic-gate static void	rinactive(rnode_t *, cred_t *);
2437c478bd9Sstevel@tonic-gate static int	rtablehash(nfs_fhandle *);
2447c478bd9Sstevel@tonic-gate static vnode_t	*make_rnode(nfs_fhandle *, rhashq_t *, struct vfs *,
2457c478bd9Sstevel@tonic-gate 		    struct vnodeops *,
2467c478bd9Sstevel@tonic-gate 		    int (*)(vnode_t *, page_t *, u_offset_t *, size_t *, int,
2477c478bd9Sstevel@tonic-gate 			cred_t *),
2487c478bd9Sstevel@tonic-gate 		    int (*)(const void *, const void *), int *, cred_t *,
2497c478bd9Sstevel@tonic-gate 		    char *, char *);
2507c478bd9Sstevel@tonic-gate static void	rp_rmfree(rnode_t *);
2517c478bd9Sstevel@tonic-gate static void	rp_addhash(rnode_t *);
2527c478bd9Sstevel@tonic-gate static void	rp_rmhash_locked(rnode_t *);
2537c478bd9Sstevel@tonic-gate static rnode_t	*rfind(rhashq_t *, nfs_fhandle *, struct vfs *);
2547c478bd9Sstevel@tonic-gate static void	destroy_rnode(rnode_t *);
2557c478bd9Sstevel@tonic-gate static void	rddir_cache_free(rddir_cache *);
2567c478bd9Sstevel@tonic-gate static int	nfs_free_data_reclaim(rnode_t *);
2577c478bd9Sstevel@tonic-gate static int	nfs_active_data_reclaim(rnode_t *);
2587c478bd9Sstevel@tonic-gate static int	nfs_free_reclaim(void);
2597c478bd9Sstevel@tonic-gate static int	nfs_active_reclaim(void);
2607c478bd9Sstevel@tonic-gate static int	nfs_rnode_reclaim(void);
2617c478bd9Sstevel@tonic-gate static void	nfs_reclaim(void *);
2627c478bd9Sstevel@tonic-gate static int	failover_safe(failinfo_t *);
2637c478bd9Sstevel@tonic-gate static void	failover_newserver(mntinfo_t *mi);
2647c478bd9Sstevel@tonic-gate static void	failover_thread(mntinfo_t *mi);
2657c478bd9Sstevel@tonic-gate static int	failover_wait(mntinfo_t *);
2667c478bd9Sstevel@tonic-gate static int	failover_remap(failinfo_t *);
2677c478bd9Sstevel@tonic-gate static int	failover_lookup(char *, vnode_t *,
2687c478bd9Sstevel@tonic-gate 		    int (*)(vnode_t *, char *, vnode_t **,
2697c478bd9Sstevel@tonic-gate 			struct pathname *, int, vnode_t *, cred_t *, int),
2707c478bd9Sstevel@tonic-gate 		    int (*)(vnode_t *, vnode_t **, bool_t, cred_t *, int),
2717c478bd9Sstevel@tonic-gate 		    vnode_t **);
2727c478bd9Sstevel@tonic-gate static void	nfs_free_r_path(rnode_t *);
2737c478bd9Sstevel@tonic-gate static void	nfs_set_vroot(vnode_t *);
2747c478bd9Sstevel@tonic-gate static char	*nfs_getsrvnames(mntinfo_t *, size_t *);
2757c478bd9Sstevel@tonic-gate 
2767c478bd9Sstevel@tonic-gate /*
2777c478bd9Sstevel@tonic-gate  * from rpcsec module (common/rpcsec)
2787c478bd9Sstevel@tonic-gate  */
2797c478bd9Sstevel@tonic-gate extern int sec_clnt_geth(CLIENT *, struct sec_data *, cred_t *, AUTH **);
2807c478bd9Sstevel@tonic-gate extern void sec_clnt_freeh(AUTH *);
2817c478bd9Sstevel@tonic-gate extern void sec_clnt_freeinfo(struct sec_data *);
2827c478bd9Sstevel@tonic-gate 
28345916cd2Sjpk /*
28445916cd2Sjpk  * used in mount policy
28545916cd2Sjpk  */
28645916cd2Sjpk extern ts_label_t *getflabel_cipso(vfs_t *);
28745916cd2Sjpk 
2887c478bd9Sstevel@tonic-gate /*
2897c478bd9Sstevel@tonic-gate  * EIO or EINTR are not recoverable errors.
2907c478bd9Sstevel@tonic-gate  */
2917c478bd9Sstevel@tonic-gate #define	IS_RECOVERABLE_ERROR(error)	!((error == EINTR) || (error == EIO))
2927c478bd9Sstevel@tonic-gate 
293e280ed37SDai Ngo #ifdef DEBUG
294e280ed37SDai Ngo #define	SRV_QFULL_MSG	"send queue to NFS%d server %s is full; still trying\n"
295e280ed37SDai Ngo #define	SRV_NOTRESP_MSG	"NFS%d server %s not responding still trying\n"
296e280ed37SDai Ngo #else
297e280ed37SDai Ngo #define	SRV_QFULL_MSG	"send queue to NFS server %s is full still trying\n"
298e280ed37SDai Ngo #define	SRV_NOTRESP_MSG	"NFS server %s not responding still trying\n"
299e280ed37SDai Ngo #endif
3007c478bd9Sstevel@tonic-gate /*
3017c478bd9Sstevel@tonic-gate  * Common handle get program for NFS, NFS ACL, and NFS AUTH client.
3027c478bd9Sstevel@tonic-gate  */
3037c478bd9Sstevel@tonic-gate static int
3047c478bd9Sstevel@tonic-gate clget_impl(clinfo_t *ci, servinfo_t *svp, cred_t *cr, CLIENT **newcl,
3057c478bd9Sstevel@tonic-gate     struct chtab **chp, struct nfs_clnt *nfscl)
3067c478bd9Sstevel@tonic-gate {
3077c478bd9Sstevel@tonic-gate 	struct chhead *ch, *newch;
3087c478bd9Sstevel@tonic-gate 	struct chhead **plistp;
3097c478bd9Sstevel@tonic-gate 	struct chtab *cp;
3107c478bd9Sstevel@tonic-gate 	int error;
3117c478bd9Sstevel@tonic-gate 	k_sigset_t smask;
3127c478bd9Sstevel@tonic-gate 
3137c478bd9Sstevel@tonic-gate 	if (newcl == NULL || chp == NULL || ci == NULL)
3147c478bd9Sstevel@tonic-gate 		return (EINVAL);
3157c478bd9Sstevel@tonic-gate 
3167c478bd9Sstevel@tonic-gate 	*newcl = NULL;
3177c478bd9Sstevel@tonic-gate 	*chp = NULL;
3187c478bd9Sstevel@tonic-gate 
3197c478bd9Sstevel@tonic-gate 	/*
3207c478bd9Sstevel@tonic-gate 	 * Find an unused handle or create one
3217c478bd9Sstevel@tonic-gate 	 */
3227c478bd9Sstevel@tonic-gate 	newch = NULL;
3237c478bd9Sstevel@tonic-gate 	nfscl->nfscl_stat.clgets.value.ui64++;
3247c478bd9Sstevel@tonic-gate top:
3257c478bd9Sstevel@tonic-gate 	/*
3267c478bd9Sstevel@tonic-gate 	 * Find the correct entry in the cache to check for free
3277c478bd9Sstevel@tonic-gate 	 * client handles.  The search is based on the RPC program
3287c478bd9Sstevel@tonic-gate 	 * number, program version number, dev_t for the transport
3297c478bd9Sstevel@tonic-gate 	 * device, and the protocol family.
3307c478bd9Sstevel@tonic-gate 	 */
3317c478bd9Sstevel@tonic-gate 	mutex_enter(&nfscl->nfscl_chtable_lock);
3327c478bd9Sstevel@tonic-gate 	plistp = &nfscl->nfscl_chtable;
3337c478bd9Sstevel@tonic-gate 	for (ch = nfscl->nfscl_chtable; ch != NULL; ch = ch->ch_next) {
3347c478bd9Sstevel@tonic-gate 		if (ch->ch_prog == ci->cl_prog &&
3357c478bd9Sstevel@tonic-gate 		    ch->ch_vers == ci->cl_vers &&
3367c478bd9Sstevel@tonic-gate 		    ch->ch_dev == svp->sv_knconf->knc_rdev &&
3377c478bd9Sstevel@tonic-gate 		    (strcmp(ch->ch_protofmly,
3387106075aSmarks 		    svp->sv_knconf->knc_protofmly) == 0))
3397c478bd9Sstevel@tonic-gate 			break;
3407c478bd9Sstevel@tonic-gate 		plistp = &ch->ch_next;
3417c478bd9Sstevel@tonic-gate 	}
3427c478bd9Sstevel@tonic-gate 
3437c478bd9Sstevel@tonic-gate 	/*
3447c478bd9Sstevel@tonic-gate 	 * If we didn't find a cache entry for this quadruple, then
3457c478bd9Sstevel@tonic-gate 	 * create one.  If we don't have one already preallocated,
3467c478bd9Sstevel@tonic-gate 	 * then drop the cache lock, create one, and then start over.
3477c478bd9Sstevel@tonic-gate 	 * If we did have a preallocated entry, then just add it to
3487c478bd9Sstevel@tonic-gate 	 * the front of the list.
3497c478bd9Sstevel@tonic-gate 	 */
3507c478bd9Sstevel@tonic-gate 	if (ch == NULL) {
3517c478bd9Sstevel@tonic-gate 		if (newch == NULL) {
3527c478bd9Sstevel@tonic-gate 			mutex_exit(&nfscl->nfscl_chtable_lock);
3537c478bd9Sstevel@tonic-gate 			newch = kmem_alloc(sizeof (*newch), KM_SLEEP);
3547c478bd9Sstevel@tonic-gate 			newch->ch_timesused = 0;
3557c478bd9Sstevel@tonic-gate 			newch->ch_prog = ci->cl_prog;
3567c478bd9Sstevel@tonic-gate 			newch->ch_vers = ci->cl_vers;
3577c478bd9Sstevel@tonic-gate 			newch->ch_dev = svp->sv_knconf->knc_rdev;
3587c478bd9Sstevel@tonic-gate 			newch->ch_protofmly = kmem_alloc(
3597c478bd9Sstevel@tonic-gate 			    strlen(svp->sv_knconf->knc_protofmly) + 1,
3607c478bd9Sstevel@tonic-gate 			    KM_SLEEP);
3617c478bd9Sstevel@tonic-gate 			(void) strcpy(newch->ch_protofmly,
3627c478bd9Sstevel@tonic-gate 			    svp->sv_knconf->knc_protofmly);
3637c478bd9Sstevel@tonic-gate 			newch->ch_list = NULL;
3647c478bd9Sstevel@tonic-gate 			goto top;
3657c478bd9Sstevel@tonic-gate 		}
3667c478bd9Sstevel@tonic-gate 		ch = newch;
3677c478bd9Sstevel@tonic-gate 		newch = NULL;
3687c478bd9Sstevel@tonic-gate 		ch->ch_next = nfscl->nfscl_chtable;
3697c478bd9Sstevel@tonic-gate 		nfscl->nfscl_chtable = ch;
3707c478bd9Sstevel@tonic-gate 	/*
3717c478bd9Sstevel@tonic-gate 	 * We found a cache entry, but if it isn't on the front of the
3727c478bd9Sstevel@tonic-gate 	 * list, then move it to the front of the list to try to take
3737c478bd9Sstevel@tonic-gate 	 * advantage of locality of operations.
3747c478bd9Sstevel@tonic-gate 	 */
3757c478bd9Sstevel@tonic-gate 	} else if (ch != nfscl->nfscl_chtable) {
3767c478bd9Sstevel@tonic-gate 		*plistp = ch->ch_next;
3777c478bd9Sstevel@tonic-gate 		ch->ch_next = nfscl->nfscl_chtable;
3787c478bd9Sstevel@tonic-gate 		nfscl->nfscl_chtable = ch;
3797c478bd9Sstevel@tonic-gate 	}
3807c478bd9Sstevel@tonic-gate 
3817c478bd9Sstevel@tonic-gate 	/*
3827c478bd9Sstevel@tonic-gate 	 * If there was a free client handle cached, then remove it
3837c478bd9Sstevel@tonic-gate 	 * from the list, init it, and use it.
3847c478bd9Sstevel@tonic-gate 	 */
3857c478bd9Sstevel@tonic-gate 	if (ch->ch_list != NULL) {
3867c478bd9Sstevel@tonic-gate 		cp = ch->ch_list;
3877c478bd9Sstevel@tonic-gate 		ch->ch_list = cp->ch_list;
3887c478bd9Sstevel@tonic-gate 		mutex_exit(&nfscl->nfscl_chtable_lock);
3897c478bd9Sstevel@tonic-gate 		if (newch != NULL) {
3907c478bd9Sstevel@tonic-gate 			kmem_free(newch->ch_protofmly,
3917c478bd9Sstevel@tonic-gate 			    strlen(newch->ch_protofmly) + 1);
3927c478bd9Sstevel@tonic-gate 			kmem_free(newch, sizeof (*newch));
3937c478bd9Sstevel@tonic-gate 		}
3947c478bd9Sstevel@tonic-gate 		(void) clnt_tli_kinit(cp->ch_client, svp->sv_knconf,
3957c478bd9Sstevel@tonic-gate 		    &svp->sv_addr, ci->cl_readsize, ci->cl_retrans, cr);
3967c478bd9Sstevel@tonic-gate 		error = sec_clnt_geth(cp->ch_client, svp->sv_secdata, cr,
3977c478bd9Sstevel@tonic-gate 		    &cp->ch_client->cl_auth);
3987c478bd9Sstevel@tonic-gate 		if (error || cp->ch_client->cl_auth == NULL) {
3997c478bd9Sstevel@tonic-gate 			CLNT_DESTROY(cp->ch_client);
4007c478bd9Sstevel@tonic-gate 			kmem_cache_free(chtab_cache, cp);
4017c478bd9Sstevel@tonic-gate 			return ((error != 0) ? error : EINTR);
4027c478bd9Sstevel@tonic-gate 		}
4037c478bd9Sstevel@tonic-gate 		ch->ch_timesused++;
4047c478bd9Sstevel@tonic-gate 		*newcl = cp->ch_client;
4057c478bd9Sstevel@tonic-gate 		*chp = cp;
4067c478bd9Sstevel@tonic-gate 		return (0);
4077c478bd9Sstevel@tonic-gate 	}
4087c478bd9Sstevel@tonic-gate 
4097c478bd9Sstevel@tonic-gate 	/*
4107c478bd9Sstevel@tonic-gate 	 * There weren't any free client handles which fit, so allocate
4117c478bd9Sstevel@tonic-gate 	 * a new one and use that.
4127c478bd9Sstevel@tonic-gate 	 */
4137c478bd9Sstevel@tonic-gate #ifdef DEBUG
4147c478bd9Sstevel@tonic-gate 	atomic_add_64(&nfscl->nfscl_stat.clalloc.value.ui64, 1);
4157c478bd9Sstevel@tonic-gate #endif
4167c478bd9Sstevel@tonic-gate 	mutex_exit(&nfscl->nfscl_chtable_lock);
4177c478bd9Sstevel@tonic-gate 
4187c478bd9Sstevel@tonic-gate 	nfscl->nfscl_stat.cltoomany.value.ui64++;
4197c478bd9Sstevel@tonic-gate 	if (newch != NULL) {
4207c478bd9Sstevel@tonic-gate 		kmem_free(newch->ch_protofmly, strlen(newch->ch_protofmly) + 1);
4217c478bd9Sstevel@tonic-gate 		kmem_free(newch, sizeof (*newch));
4227c478bd9Sstevel@tonic-gate 	}
4237c478bd9Sstevel@tonic-gate 
4247c478bd9Sstevel@tonic-gate 	cp = kmem_cache_alloc(chtab_cache, KM_SLEEP);
4257c478bd9Sstevel@tonic-gate 	cp->ch_head = ch;
4267c478bd9Sstevel@tonic-gate 
4277c478bd9Sstevel@tonic-gate 	sigintr(&smask, (int)ci->cl_flags & MI_INT);
4287c478bd9Sstevel@tonic-gate 	error = clnt_tli_kcreate(svp->sv_knconf, &svp->sv_addr, ci->cl_prog,
4297c478bd9Sstevel@tonic-gate 	    ci->cl_vers, ci->cl_readsize, ci->cl_retrans, cr, &cp->ch_client);
4307c478bd9Sstevel@tonic-gate 	sigunintr(&smask);
4317c478bd9Sstevel@tonic-gate 
4327c478bd9Sstevel@tonic-gate 	if (error != 0) {
4337c478bd9Sstevel@tonic-gate 		kmem_cache_free(chtab_cache, cp);
4347c478bd9Sstevel@tonic-gate #ifdef DEBUG
4357c478bd9Sstevel@tonic-gate 		atomic_add_64(&nfscl->nfscl_stat.clalloc.value.ui64, -1);
4367c478bd9Sstevel@tonic-gate #endif
4377c478bd9Sstevel@tonic-gate 		/*
4387c478bd9Sstevel@tonic-gate 		 * Warning is unnecessary if error is EINTR.
4397c478bd9Sstevel@tonic-gate 		 */
4407c478bd9Sstevel@tonic-gate 		if (error != EINTR) {
4417c478bd9Sstevel@tonic-gate 			nfs_cmn_err(error, CE_WARN,
4427c478bd9Sstevel@tonic-gate 			    "clget: couldn't create handle: %m\n");
4437c478bd9Sstevel@tonic-gate 		}
4447c478bd9Sstevel@tonic-gate 		return (error);
4457c478bd9Sstevel@tonic-gate 	}
4467c478bd9Sstevel@tonic-gate 	(void) CLNT_CONTROL(cp->ch_client, CLSET_PROGRESS, NULL);
4477c478bd9Sstevel@tonic-gate 	auth_destroy(cp->ch_client->cl_auth);
4487c478bd9Sstevel@tonic-gate 	error = sec_clnt_geth(cp->ch_client, svp->sv_secdata, cr,
4497c478bd9Sstevel@tonic-gate 	    &cp->ch_client->cl_auth);
4507c478bd9Sstevel@tonic-gate 	if (error || cp->ch_client->cl_auth == NULL) {
4517c478bd9Sstevel@tonic-gate 		CLNT_DESTROY(cp->ch_client);
4527c478bd9Sstevel@tonic-gate 		kmem_cache_free(chtab_cache, cp);
4537c478bd9Sstevel@tonic-gate #ifdef DEBUG
4547c478bd9Sstevel@tonic-gate 		atomic_add_64(&nfscl->nfscl_stat.clalloc.value.ui64, -1);
4557c478bd9Sstevel@tonic-gate #endif
4567c478bd9Sstevel@tonic-gate 		return ((error != 0) ? error : EINTR);
4577c478bd9Sstevel@tonic-gate 	}
4587c478bd9Sstevel@tonic-gate 	ch->ch_timesused++;
4597c478bd9Sstevel@tonic-gate 	*newcl = cp->ch_client;
4607c478bd9Sstevel@tonic-gate 	ASSERT(cp->ch_client->cl_nosignal == FALSE);
4617c478bd9Sstevel@tonic-gate 	*chp = cp;
4627c478bd9Sstevel@tonic-gate 	return (0);
4637c478bd9Sstevel@tonic-gate }
4647c478bd9Sstevel@tonic-gate 
4657c478bd9Sstevel@tonic-gate int
4667c478bd9Sstevel@tonic-gate clget(clinfo_t *ci, servinfo_t *svp, cred_t *cr, CLIENT **newcl,
4677c478bd9Sstevel@tonic-gate     struct chtab **chp)
4687c478bd9Sstevel@tonic-gate {
4697c478bd9Sstevel@tonic-gate 	struct nfs_clnt *nfscl;
4707c478bd9Sstevel@tonic-gate 
471108322fbScarlsonj 	nfscl = zone_getspecific(nfsclnt_zone_key, nfs_zone());
4727c478bd9Sstevel@tonic-gate 	ASSERT(nfscl != NULL);
4737c478bd9Sstevel@tonic-gate 
4747c478bd9Sstevel@tonic-gate 	return (clget_impl(ci, svp, cr, newcl, chp, nfscl));
4757c478bd9Sstevel@tonic-gate }
4767c478bd9Sstevel@tonic-gate 
4777c478bd9Sstevel@tonic-gate static int
4787c478bd9Sstevel@tonic-gate acl_clget(mntinfo_t *mi, servinfo_t *svp, cred_t *cr, CLIENT **newcl,
4797c478bd9Sstevel@tonic-gate     struct chtab **chp, struct nfs_clnt *nfscl)
4807c478bd9Sstevel@tonic-gate {
4817c478bd9Sstevel@tonic-gate 	clinfo_t ci;
4827c478bd9Sstevel@tonic-gate 	int error;
4837c478bd9Sstevel@tonic-gate 
4847c478bd9Sstevel@tonic-gate 	/*
4857c478bd9Sstevel@tonic-gate 	 * Set read buffer size to rsize
4867c478bd9Sstevel@tonic-gate 	 * and add room for RPC headers.
4877c478bd9Sstevel@tonic-gate 	 */
4887c478bd9Sstevel@tonic-gate 	ci.cl_readsize = mi->mi_tsize;
4897c478bd9Sstevel@tonic-gate 	if (ci.cl_readsize != 0)
4907c478bd9Sstevel@tonic-gate 		ci.cl_readsize += (RPC_MAXDATASIZE - NFS_MAXDATA);
4917c478bd9Sstevel@tonic-gate 
4927c478bd9Sstevel@tonic-gate 	/*
4937c478bd9Sstevel@tonic-gate 	 * If soft mount and server is down just try once.
4947c478bd9Sstevel@tonic-gate 	 * meaning: do not retransmit.
4957c478bd9Sstevel@tonic-gate 	 */
4967c478bd9Sstevel@tonic-gate 	if (!(mi->mi_flags & MI_HARD) && (mi->mi_flags & MI_DOWN))
4977c478bd9Sstevel@tonic-gate 		ci.cl_retrans = 0;
4987c478bd9Sstevel@tonic-gate 	else
4997c478bd9Sstevel@tonic-gate 		ci.cl_retrans = mi->mi_retrans;
5007c478bd9Sstevel@tonic-gate 
5017c478bd9Sstevel@tonic-gate 	ci.cl_prog = NFS_ACL_PROGRAM;
5027c478bd9Sstevel@tonic-gate 	ci.cl_vers = mi->mi_vers;
5037c478bd9Sstevel@tonic-gate 	ci.cl_flags = mi->mi_flags;
5047c478bd9Sstevel@tonic-gate 
5057c478bd9Sstevel@tonic-gate 	/*
5067c478bd9Sstevel@tonic-gate 	 * clget calls sec_clnt_geth() to get an auth handle. For RPCSEC_GSS
5077c478bd9Sstevel@tonic-gate 	 * security flavor, the client tries to establish a security context
5087c478bd9Sstevel@tonic-gate 	 * by contacting the server. If the connection is timed out or reset,
5097c478bd9Sstevel@tonic-gate 	 * e.g. server reboot, we will try again.
5107c478bd9Sstevel@tonic-gate 	 */
5117c478bd9Sstevel@tonic-gate 	do {
5127c478bd9Sstevel@tonic-gate 		error = clget_impl(&ci, svp, cr, newcl, chp, nfscl);
5137c478bd9Sstevel@tonic-gate 
5147c478bd9Sstevel@tonic-gate 		if (error == 0)
5157c478bd9Sstevel@tonic-gate 			break;
5167c478bd9Sstevel@tonic-gate 
5177c478bd9Sstevel@tonic-gate 		/*
5187c478bd9Sstevel@tonic-gate 		 * For forced unmount or zone shutdown, bail out, no retry.
5197c478bd9Sstevel@tonic-gate 		 */
5207c478bd9Sstevel@tonic-gate 		if (FS_OR_ZONE_GONE(mi->mi_vfsp)) {
5217c478bd9Sstevel@tonic-gate 			error = EIO;
5227c478bd9Sstevel@tonic-gate 			break;
5237c478bd9Sstevel@tonic-gate 		}
5247c478bd9Sstevel@tonic-gate 
5257c478bd9Sstevel@tonic-gate 		/* do not retry for softmount */
5267c478bd9Sstevel@tonic-gate 		if (!(mi->mi_flags & MI_HARD))
5277c478bd9Sstevel@tonic-gate 			break;
5287c478bd9Sstevel@tonic-gate 
5297c478bd9Sstevel@tonic-gate 		/* let the caller deal with the failover case */
5307c478bd9Sstevel@tonic-gate 		if (FAILOVER_MOUNT(mi))
5317c478bd9Sstevel@tonic-gate 			break;
5327c478bd9Sstevel@tonic-gate 
5337c478bd9Sstevel@tonic-gate 	} while (error == ETIMEDOUT || error == ECONNRESET);
5347c478bd9Sstevel@tonic-gate 
5357c478bd9Sstevel@tonic-gate 	return (error);
5367c478bd9Sstevel@tonic-gate }
5377c478bd9Sstevel@tonic-gate 
5387c478bd9Sstevel@tonic-gate static int
5397c478bd9Sstevel@tonic-gate nfs_clget(mntinfo_t *mi, servinfo_t *svp, cred_t *cr, CLIENT **newcl,
5407c478bd9Sstevel@tonic-gate     struct chtab **chp, struct nfs_clnt *nfscl)
5417c478bd9Sstevel@tonic-gate {
5427c478bd9Sstevel@tonic-gate 	clinfo_t ci;
5437c478bd9Sstevel@tonic-gate 	int error;
5447c478bd9Sstevel@tonic-gate 
5457c478bd9Sstevel@tonic-gate 	/*
5467c478bd9Sstevel@tonic-gate 	 * Set read buffer size to rsize
5477c478bd9Sstevel@tonic-gate 	 * and add room for RPC headers.
5487c478bd9Sstevel@tonic-gate 	 */
5497c478bd9Sstevel@tonic-gate 	ci.cl_readsize = mi->mi_tsize;
5507c478bd9Sstevel@tonic-gate 	if (ci.cl_readsize != 0)
5517c478bd9Sstevel@tonic-gate 		ci.cl_readsize += (RPC_MAXDATASIZE - NFS_MAXDATA);
5527c478bd9Sstevel@tonic-gate 
5537c478bd9Sstevel@tonic-gate 	/*
5547c478bd9Sstevel@tonic-gate 	 * If soft mount and server is down just try once.
5557c478bd9Sstevel@tonic-gate 	 * meaning: do not retransmit.
5567c478bd9Sstevel@tonic-gate 	 */
5577c478bd9Sstevel@tonic-gate 	if (!(mi->mi_flags & MI_HARD) && (mi->mi_flags & MI_DOWN))
5587c478bd9Sstevel@tonic-gate 		ci.cl_retrans = 0;
5597c478bd9Sstevel@tonic-gate 	else
5607c478bd9Sstevel@tonic-gate 		ci.cl_retrans = mi->mi_retrans;
5617c478bd9Sstevel@tonic-gate 
5627c478bd9Sstevel@tonic-gate 	ci.cl_prog = mi->mi_prog;
5637c478bd9Sstevel@tonic-gate 	ci.cl_vers = mi->mi_vers;
5647c478bd9Sstevel@tonic-gate 	ci.cl_flags = mi->mi_flags;
5657c478bd9Sstevel@tonic-gate 
5667c478bd9Sstevel@tonic-gate 	/*
5677c478bd9Sstevel@tonic-gate 	 * clget calls sec_clnt_geth() to get an auth handle. For RPCSEC_GSS
5687c478bd9Sstevel@tonic-gate 	 * security flavor, the client tries to establish a security context
5697c478bd9Sstevel@tonic-gate 	 * by contacting the server. If the connection is timed out or reset,
5707c478bd9Sstevel@tonic-gate 	 * e.g. server reboot, we will try again.
5717c478bd9Sstevel@tonic-gate 	 */
5727c478bd9Sstevel@tonic-gate 	do {
5737c478bd9Sstevel@tonic-gate 		error = clget_impl(&ci, svp, cr, newcl, chp, nfscl);
5747c478bd9Sstevel@tonic-gate 
5757c478bd9Sstevel@tonic-gate 		if (error == 0)
5767c478bd9Sstevel@tonic-gate 			break;
5777c478bd9Sstevel@tonic-gate 
5787c478bd9Sstevel@tonic-gate 		/*
5797c478bd9Sstevel@tonic-gate 		 * For forced unmount or zone shutdown, bail out, no retry.
5807c478bd9Sstevel@tonic-gate 		 */
5817c478bd9Sstevel@tonic-gate 		if (FS_OR_ZONE_GONE(mi->mi_vfsp)) {
5827c478bd9Sstevel@tonic-gate 			error = EIO;
5837c478bd9Sstevel@tonic-gate 			break;
5847c478bd9Sstevel@tonic-gate 		}
5857c478bd9Sstevel@tonic-gate 
5867c478bd9Sstevel@tonic-gate 		/* do not retry for softmount */
5877c478bd9Sstevel@tonic-gate 		if (!(mi->mi_flags & MI_HARD))
5887c478bd9Sstevel@tonic-gate 			break;
5897c478bd9Sstevel@tonic-gate 
5907c478bd9Sstevel@tonic-gate 		/* let the caller deal with the failover case */
5917c478bd9Sstevel@tonic-gate 		if (FAILOVER_MOUNT(mi))
5927c478bd9Sstevel@tonic-gate 			break;
5937c478bd9Sstevel@tonic-gate 
5947c478bd9Sstevel@tonic-gate 	} while (error == ETIMEDOUT || error == ECONNRESET);
5957c478bd9Sstevel@tonic-gate 
5967c478bd9Sstevel@tonic-gate 	return (error);
5977c478bd9Sstevel@tonic-gate }
5987c478bd9Sstevel@tonic-gate 
5997c478bd9Sstevel@tonic-gate static void
6007c478bd9Sstevel@tonic-gate clfree_impl(CLIENT *cl, struct chtab *cp, struct nfs_clnt *nfscl)
6017c478bd9Sstevel@tonic-gate {
6027c478bd9Sstevel@tonic-gate 	if (cl->cl_auth != NULL) {
6037c478bd9Sstevel@tonic-gate 		sec_clnt_freeh(cl->cl_auth);
6047c478bd9Sstevel@tonic-gate 		cl->cl_auth = NULL;
6057c478bd9Sstevel@tonic-gate 	}
6067c478bd9Sstevel@tonic-gate 
6077c478bd9Sstevel@tonic-gate 	/*
6087c478bd9Sstevel@tonic-gate 	 * Timestamp this cache entry so that we know when it was last
6097c478bd9Sstevel@tonic-gate 	 * used.
6107c478bd9Sstevel@tonic-gate 	 */
6117c478bd9Sstevel@tonic-gate 	cp->ch_freed = gethrestime_sec();
6127c478bd9Sstevel@tonic-gate 
6137c478bd9Sstevel@tonic-gate 	/*
6147c478bd9Sstevel@tonic-gate 	 * Add the free client handle to the front of the list.
6157c478bd9Sstevel@tonic-gate 	 * This way, the list will be sorted in youngest to oldest
6167c478bd9Sstevel@tonic-gate 	 * order.
6177c478bd9Sstevel@tonic-gate 	 */
6187c478bd9Sstevel@tonic-gate 	mutex_enter(&nfscl->nfscl_chtable_lock);
6197c478bd9Sstevel@tonic-gate 	cp->ch_list = cp->ch_head->ch_list;
6207c478bd9Sstevel@tonic-gate 	cp->ch_head->ch_list = cp;
6217c478bd9Sstevel@tonic-gate 	mutex_exit(&nfscl->nfscl_chtable_lock);
6227c478bd9Sstevel@tonic-gate }
6237c478bd9Sstevel@tonic-gate 
6247c478bd9Sstevel@tonic-gate void
6257c478bd9Sstevel@tonic-gate clfree(CLIENT *cl, struct chtab *cp)
6267c478bd9Sstevel@tonic-gate {
6277c478bd9Sstevel@tonic-gate 	struct nfs_clnt *nfscl;
6287c478bd9Sstevel@tonic-gate 
629108322fbScarlsonj 	nfscl = zone_getspecific(nfsclnt_zone_key, nfs_zone());
6307c478bd9Sstevel@tonic-gate 	ASSERT(nfscl != NULL);
6317c478bd9Sstevel@tonic-gate 
6327c478bd9Sstevel@tonic-gate 	clfree_impl(cl, cp, nfscl);
6337c478bd9Sstevel@tonic-gate }
6347c478bd9Sstevel@tonic-gate 
6357c478bd9Sstevel@tonic-gate #define	CL_HOLDTIME	60	/* time to hold client handles */
6367c478bd9Sstevel@tonic-gate 
6377c478bd9Sstevel@tonic-gate static void
6387c478bd9Sstevel@tonic-gate clreclaim_zone(struct nfs_clnt *nfscl, uint_t cl_holdtime)
6397c478bd9Sstevel@tonic-gate {
6407c478bd9Sstevel@tonic-gate 	struct chhead *ch;
6417c478bd9Sstevel@tonic-gate 	struct chtab *cp;	/* list of objects that can be reclaimed */
6427c478bd9Sstevel@tonic-gate 	struct chtab *cpe;
6437c478bd9Sstevel@tonic-gate 	struct chtab *cpl;
6447c478bd9Sstevel@tonic-gate 	struct chtab **cpp;
6457c478bd9Sstevel@tonic-gate #ifdef DEBUG
6467c478bd9Sstevel@tonic-gate 	int n = 0;
6477c478bd9Sstevel@tonic-gate #endif
6487c478bd9Sstevel@tonic-gate 
6497c478bd9Sstevel@tonic-gate 	/*
6507c478bd9Sstevel@tonic-gate 	 * Need to reclaim some memory, so step through the cache
6517c478bd9Sstevel@tonic-gate 	 * looking through the lists for entries which can be freed.
6527c478bd9Sstevel@tonic-gate 	 */
6537c478bd9Sstevel@tonic-gate 	cp = NULL;
6547c478bd9Sstevel@tonic-gate 
6557c478bd9Sstevel@tonic-gate 	mutex_enter(&nfscl->nfscl_chtable_lock);
6567c478bd9Sstevel@tonic-gate 
6577c478bd9Sstevel@tonic-gate 	/*
6587c478bd9Sstevel@tonic-gate 	 * Here we step through each non-NULL quadruple and start to
6597c478bd9Sstevel@tonic-gate 	 * construct the reclaim list pointed to by cp.  Note that
6607c478bd9Sstevel@tonic-gate 	 * cp will contain all eligible chtab entries.  When this traversal
6617c478bd9Sstevel@tonic-gate 	 * completes, chtab entries from the last quadruple will be at the
6627c478bd9Sstevel@tonic-gate 	 * front of cp and entries from previously inspected quadruples have
6637c478bd9Sstevel@tonic-gate 	 * been appended to the rear of cp.
6647c478bd9Sstevel@tonic-gate 	 */
6657c478bd9Sstevel@tonic-gate 	for (ch = nfscl->nfscl_chtable; ch != NULL; ch = ch->ch_next) {
6667c478bd9Sstevel@tonic-gate 		if (ch->ch_list == NULL)
6677c478bd9Sstevel@tonic-gate 			continue;
6687c478bd9Sstevel@tonic-gate 		/*
6697c478bd9Sstevel@tonic-gate 		 * Search each list for entries older then
6707c478bd9Sstevel@tonic-gate 		 * cl_holdtime seconds.  The lists are maintained
6717c478bd9Sstevel@tonic-gate 		 * in youngest to oldest order so that when the
6727c478bd9Sstevel@tonic-gate 		 * first entry is found which is old enough, then
6737c478bd9Sstevel@tonic-gate 		 * all of the rest of the entries on the list will
6747c478bd9Sstevel@tonic-gate 		 * be old enough as well.
6757c478bd9Sstevel@tonic-gate 		 */
6767c478bd9Sstevel@tonic-gate 		cpl = ch->ch_list;
6777c478bd9Sstevel@tonic-gate 		cpp = &ch->ch_list;
6787c478bd9Sstevel@tonic-gate 		while (cpl != NULL &&
6797106075aSmarks 		    cpl->ch_freed + cl_holdtime > gethrestime_sec()) {
6807c478bd9Sstevel@tonic-gate 			cpp = &cpl->ch_list;
6817c478bd9Sstevel@tonic-gate 			cpl = cpl->ch_list;
6827c478bd9Sstevel@tonic-gate 		}
6837c478bd9Sstevel@tonic-gate 		if (cpl != NULL) {
6847c478bd9Sstevel@tonic-gate 			*cpp = NULL;
6857c478bd9Sstevel@tonic-gate 			if (cp != NULL) {
6867c478bd9Sstevel@tonic-gate 				cpe = cpl;
6877c478bd9Sstevel@tonic-gate 				while (cpe->ch_list != NULL)
6887c478bd9Sstevel@tonic-gate 					cpe = cpe->ch_list;
6897c478bd9Sstevel@tonic-gate 				cpe->ch_list = cp;
6907c478bd9Sstevel@tonic-gate 			}
6917c478bd9Sstevel@tonic-gate 			cp = cpl;
6927c478bd9Sstevel@tonic-gate 		}
6937c478bd9Sstevel@tonic-gate 	}
6947c478bd9Sstevel@tonic-gate 
6957c478bd9Sstevel@tonic-gate 	mutex_exit(&nfscl->nfscl_chtable_lock);
6967c478bd9Sstevel@tonic-gate 
6977c478bd9Sstevel@tonic-gate 	/*
6987c478bd9Sstevel@tonic-gate 	 * If cp is empty, then there is nothing to reclaim here.
6997c478bd9Sstevel@tonic-gate 	 */
7007c478bd9Sstevel@tonic-gate 	if (cp == NULL)
7017c478bd9Sstevel@tonic-gate 		return;
7027c478bd9Sstevel@tonic-gate 
7037c478bd9Sstevel@tonic-gate 	/*
7047c478bd9Sstevel@tonic-gate 	 * Step through the list of entries to free, destroying each client
7057c478bd9Sstevel@tonic-gate 	 * handle and kmem_free'ing the memory for each entry.
7067c478bd9Sstevel@tonic-gate 	 */
7077c478bd9Sstevel@tonic-gate 	while (cp != NULL) {
7087c478bd9Sstevel@tonic-gate #ifdef DEBUG
7097c478bd9Sstevel@tonic-gate 		n++;
7107c478bd9Sstevel@tonic-gate #endif
7117c478bd9Sstevel@tonic-gate 		CLNT_DESTROY(cp->ch_client);
7127c478bd9Sstevel@tonic-gate 		cpl = cp->ch_list;
7137c478bd9Sstevel@tonic-gate 		kmem_cache_free(chtab_cache, cp);
7147c478bd9Sstevel@tonic-gate 		cp = cpl;
7157c478bd9Sstevel@tonic-gate 	}
7167c478bd9Sstevel@tonic-gate 
7177c478bd9Sstevel@tonic-gate #ifdef DEBUG
7187c478bd9Sstevel@tonic-gate 	/*
7197c478bd9Sstevel@tonic-gate 	 * Update clalloc so that nfsstat shows the current number
7207c478bd9Sstevel@tonic-gate 	 * of allocated client handles.
7217c478bd9Sstevel@tonic-gate 	 */
7227c478bd9Sstevel@tonic-gate 	atomic_add_64(&nfscl->nfscl_stat.clalloc.value.ui64, -n);
7237c478bd9Sstevel@tonic-gate #endif
7247c478bd9Sstevel@tonic-gate }
7257c478bd9Sstevel@tonic-gate 
7267c478bd9Sstevel@tonic-gate /* ARGSUSED */
7277c478bd9Sstevel@tonic-gate static void
7287c478bd9Sstevel@tonic-gate clreclaim(void *all)
7297c478bd9Sstevel@tonic-gate {
7307c478bd9Sstevel@tonic-gate 	struct nfs_clnt *nfscl;
7317c478bd9Sstevel@tonic-gate 
7327c478bd9Sstevel@tonic-gate #ifdef DEBUG
7337c478bd9Sstevel@tonic-gate 	clstat_debug.clreclaim.value.ui64++;
7347c478bd9Sstevel@tonic-gate #endif
7357c478bd9Sstevel@tonic-gate 	/*
7367c478bd9Sstevel@tonic-gate 	 * The system is low on memory; go through and try to reclaim some from
7377c478bd9Sstevel@tonic-gate 	 * every zone on the system.
7387c478bd9Sstevel@tonic-gate 	 */
7397c478bd9Sstevel@tonic-gate 	mutex_enter(&nfs_clnt_list_lock);
7407c478bd9Sstevel@tonic-gate 	nfscl = list_head(&nfs_clnt_list);
7417c478bd9Sstevel@tonic-gate 	for (; nfscl != NULL; nfscl = list_next(&nfs_clnt_list, nfscl))
7427c478bd9Sstevel@tonic-gate 		clreclaim_zone(nfscl, CL_HOLDTIME);
7437c478bd9Sstevel@tonic-gate 	mutex_exit(&nfs_clnt_list_lock);
7447c478bd9Sstevel@tonic-gate }
7457c478bd9Sstevel@tonic-gate 
7467c478bd9Sstevel@tonic-gate /*
7477c478bd9Sstevel@tonic-gate  * Minimum time-out values indexed by call type
7487c478bd9Sstevel@tonic-gate  * These units are in "eights" of a second to avoid multiplies
7497c478bd9Sstevel@tonic-gate  */
7507c478bd9Sstevel@tonic-gate static unsigned int minimum_timeo[] = {
7517c478bd9Sstevel@tonic-gate 	6, 7, 10
7527c478bd9Sstevel@tonic-gate };
7537c478bd9Sstevel@tonic-gate 
7547c478bd9Sstevel@tonic-gate /*
7557c478bd9Sstevel@tonic-gate  * Back off for retransmission timeout, MAXTIMO is in hz of a sec
7567c478bd9Sstevel@tonic-gate  */
7577c478bd9Sstevel@tonic-gate #define	MAXTIMO	(20*hz)
7587c478bd9Sstevel@tonic-gate #define	backoff(tim)	(((tim) < MAXTIMO) ? dobackoff(tim) : (tim))
7597c478bd9Sstevel@tonic-gate #define	dobackoff(tim)	((((tim) << 1) > MAXTIMO) ? MAXTIMO : ((tim) << 1))
7607c478bd9Sstevel@tonic-gate 
7617c478bd9Sstevel@tonic-gate #define	MIN_NFS_TSIZE 512	/* minimum "chunk" of NFS IO */
7627c478bd9Sstevel@tonic-gate #define	REDUCE_NFS_TIME (hz/2)	/* rtxcur we try to keep under */
7637c478bd9Sstevel@tonic-gate #define	INCREASE_NFS_TIME (hz/3*8) /* srtt we try to keep under (scaled*8) */
7647c478bd9Sstevel@tonic-gate 
7657c478bd9Sstevel@tonic-gate /*
7667c478bd9Sstevel@tonic-gate  * Function called when rfscall notices that we have been
7677c478bd9Sstevel@tonic-gate  * re-transmitting, or when we get a response without retransmissions.
7687c478bd9Sstevel@tonic-gate  * Return 1 if the transfer size was adjusted down - 0 if no change.
7697c478bd9Sstevel@tonic-gate  */
7707c478bd9Sstevel@tonic-gate static int
7717c478bd9Sstevel@tonic-gate nfs_feedback(int flag, int which, mntinfo_t *mi)
7727c478bd9Sstevel@tonic-gate {
7737c478bd9Sstevel@tonic-gate 	int kind;
7747c478bd9Sstevel@tonic-gate 	int r = 0;
7757c478bd9Sstevel@tonic-gate 
7767c478bd9Sstevel@tonic-gate 	mutex_enter(&mi->mi_lock);
7777c478bd9Sstevel@tonic-gate 	if (flag == FEEDBACK_REXMIT1) {
7787c478bd9Sstevel@tonic-gate 		if (mi->mi_timers[NFS_CALLTYPES].rt_rtxcur != 0 &&
7797c478bd9Sstevel@tonic-gate 		    mi->mi_timers[NFS_CALLTYPES].rt_rtxcur < REDUCE_NFS_TIME)
7807c478bd9Sstevel@tonic-gate 			goto done;
7817c478bd9Sstevel@tonic-gate 		if (mi->mi_curread > MIN_NFS_TSIZE) {
7827c478bd9Sstevel@tonic-gate 			mi->mi_curread /= 2;
7837c478bd9Sstevel@tonic-gate 			if (mi->mi_curread < MIN_NFS_TSIZE)
7847c478bd9Sstevel@tonic-gate 				mi->mi_curread = MIN_NFS_TSIZE;
7857c478bd9Sstevel@tonic-gate 			r = 1;
7867c478bd9Sstevel@tonic-gate 		}
7877c478bd9Sstevel@tonic-gate 
7887c478bd9Sstevel@tonic-gate 		if (mi->mi_curwrite > MIN_NFS_TSIZE) {
7897c478bd9Sstevel@tonic-gate 			mi->mi_curwrite /= 2;
7907c478bd9Sstevel@tonic-gate 			if (mi->mi_curwrite < MIN_NFS_TSIZE)
7917c478bd9Sstevel@tonic-gate 				mi->mi_curwrite = MIN_NFS_TSIZE;
7927c478bd9Sstevel@tonic-gate 			r = 1;
7937c478bd9Sstevel@tonic-gate 		}
7947c478bd9Sstevel@tonic-gate 	} else if (flag == FEEDBACK_OK) {
7957c478bd9Sstevel@tonic-gate 		kind = mi->mi_timer_type[which];
7967c478bd9Sstevel@tonic-gate 		if (kind == 0 ||
7977c478bd9Sstevel@tonic-gate 		    mi->mi_timers[kind].rt_srtt >= INCREASE_NFS_TIME)
7987c478bd9Sstevel@tonic-gate 			goto done;
7997c478bd9Sstevel@tonic-gate 		if (kind == 1) {
8007c478bd9Sstevel@tonic-gate 			if (mi->mi_curread >= mi->mi_tsize)
8017c478bd9Sstevel@tonic-gate 				goto done;
8027c478bd9Sstevel@tonic-gate 			mi->mi_curread +=  MIN_NFS_TSIZE;
8037c478bd9Sstevel@tonic-gate 			if (mi->mi_curread > mi->mi_tsize/2)
8047c478bd9Sstevel@tonic-gate 				mi->mi_curread = mi->mi_tsize;
8057c478bd9Sstevel@tonic-gate 		} else if (kind == 2) {
8067c478bd9Sstevel@tonic-gate 			if (mi->mi_curwrite >= mi->mi_stsize)
8077c478bd9Sstevel@tonic-gate 				goto done;
8087c478bd9Sstevel@tonic-gate 			mi->mi_curwrite += MIN_NFS_TSIZE;
8097c478bd9Sstevel@tonic-gate 			if (mi->mi_curwrite > mi->mi_stsize/2)
8107c478bd9Sstevel@tonic-gate 				mi->mi_curwrite = mi->mi_stsize;
8117c478bd9Sstevel@tonic-gate 		}
8127c478bd9Sstevel@tonic-gate 	}
8137c478bd9Sstevel@tonic-gate done:
8147c478bd9Sstevel@tonic-gate 	mutex_exit(&mi->mi_lock);
8157c478bd9Sstevel@tonic-gate 	return (r);
8167c478bd9Sstevel@tonic-gate }
8177c478bd9Sstevel@tonic-gate 
8187c478bd9Sstevel@tonic-gate #ifdef DEBUG
8197c478bd9Sstevel@tonic-gate static int rfs2call_hits = 0;
8207c478bd9Sstevel@tonic-gate static int rfs2call_misses = 0;
8217c478bd9Sstevel@tonic-gate #endif
8227c478bd9Sstevel@tonic-gate 
8237c478bd9Sstevel@tonic-gate int
8247c478bd9Sstevel@tonic-gate rfs2call(mntinfo_t *mi, rpcproc_t which, xdrproc_t xdrargs, caddr_t argsp,
8257c478bd9Sstevel@tonic-gate     xdrproc_t xdrres, caddr_t resp, cred_t *cr, int *douprintf,
8267c478bd9Sstevel@tonic-gate     enum nfsstat *statusp, int flags, failinfo_t *fi)
8277c478bd9Sstevel@tonic-gate {
8287c478bd9Sstevel@tonic-gate 	int rpcerror;
8297c478bd9Sstevel@tonic-gate 	enum clnt_stat rpc_status;
8307c478bd9Sstevel@tonic-gate 
8317c478bd9Sstevel@tonic-gate 	ASSERT(statusp != NULL);
8327c478bd9Sstevel@tonic-gate 
8337c478bd9Sstevel@tonic-gate 	rpcerror = rfscall(mi, which, xdrargs, argsp, xdrres, resp,
8347c478bd9Sstevel@tonic-gate 	    cr, douprintf, &rpc_status, flags, fi);
8357c478bd9Sstevel@tonic-gate 	if (!rpcerror) {
8367c478bd9Sstevel@tonic-gate 		/*
8377c478bd9Sstevel@tonic-gate 		 * See crnetadjust() for comments.
8387c478bd9Sstevel@tonic-gate 		 */
8397c478bd9Sstevel@tonic-gate 		if (*statusp == NFSERR_ACCES &&
8407c478bd9Sstevel@tonic-gate 		    (cr = crnetadjust(cr)) != NULL) {
8417c478bd9Sstevel@tonic-gate #ifdef DEBUG
8427c478bd9Sstevel@tonic-gate 			rfs2call_hits++;
8437c478bd9Sstevel@tonic-gate #endif
8447c478bd9Sstevel@tonic-gate 			rpcerror = rfscall(mi, which, xdrargs, argsp, xdrres,
8457c478bd9Sstevel@tonic-gate 			    resp, cr, douprintf, NULL, flags, fi);
8467c478bd9Sstevel@tonic-gate 			crfree(cr);
8477c478bd9Sstevel@tonic-gate #ifdef DEBUG
8487c478bd9Sstevel@tonic-gate 			if (*statusp == NFSERR_ACCES)
8497c478bd9Sstevel@tonic-gate 				rfs2call_misses++;
8507c478bd9Sstevel@tonic-gate #endif
8517c478bd9Sstevel@tonic-gate 		}
8527c478bd9Sstevel@tonic-gate 	} else if (rpc_status == RPC_PROCUNAVAIL) {
8537c478bd9Sstevel@tonic-gate 		*statusp = NFSERR_OPNOTSUPP;
8547c478bd9Sstevel@tonic-gate 		rpcerror = 0;
8557c478bd9Sstevel@tonic-gate 	}
8567c478bd9Sstevel@tonic-gate 
8577c478bd9Sstevel@tonic-gate 	return (rpcerror);
8587c478bd9Sstevel@tonic-gate }
8597c478bd9Sstevel@tonic-gate 
8607c478bd9Sstevel@tonic-gate #define	NFS3_JUKEBOX_DELAY	10 * hz
8617c478bd9Sstevel@tonic-gate 
8627c478bd9Sstevel@tonic-gate static clock_t nfs3_jukebox_delay = 0;
8637c478bd9Sstevel@tonic-gate 
8647c478bd9Sstevel@tonic-gate #ifdef DEBUG
8657c478bd9Sstevel@tonic-gate static int rfs3call_hits = 0;
8667c478bd9Sstevel@tonic-gate static int rfs3call_misses = 0;
8677c478bd9Sstevel@tonic-gate #endif
8687c478bd9Sstevel@tonic-gate 
8697c478bd9Sstevel@tonic-gate int
8707c478bd9Sstevel@tonic-gate rfs3call(mntinfo_t *mi, rpcproc_t which, xdrproc_t xdrargs, caddr_t argsp,
8717c478bd9Sstevel@tonic-gate     xdrproc_t xdrres, caddr_t resp, cred_t *cr, int *douprintf,
8727c478bd9Sstevel@tonic-gate     nfsstat3 *statusp, int flags, failinfo_t *fi)
8737c478bd9Sstevel@tonic-gate {
8747c478bd9Sstevel@tonic-gate 	int rpcerror;
8757c478bd9Sstevel@tonic-gate 	int user_informed;
8767c478bd9Sstevel@tonic-gate 
8777c478bd9Sstevel@tonic-gate 	user_informed = 0;
8787c478bd9Sstevel@tonic-gate 	do {
8797c478bd9Sstevel@tonic-gate 		rpcerror = rfscall(mi, which, xdrargs, argsp, xdrres, resp,
8807c478bd9Sstevel@tonic-gate 		    cr, douprintf, NULL, flags, fi);
8817c478bd9Sstevel@tonic-gate 		if (!rpcerror) {
8827c478bd9Sstevel@tonic-gate 			cred_t *crr;
8837c478bd9Sstevel@tonic-gate 			if (*statusp == NFS3ERR_JUKEBOX) {
8847c478bd9Sstevel@tonic-gate 				if (ttoproc(curthread) == &p0) {
8857c478bd9Sstevel@tonic-gate 					rpcerror = EAGAIN;
8867c478bd9Sstevel@tonic-gate 					break;
8877c478bd9Sstevel@tonic-gate 				}
8887c478bd9Sstevel@tonic-gate 				if (!user_informed) {
8897c478bd9Sstevel@tonic-gate 					user_informed = 1;
8907c478bd9Sstevel@tonic-gate 					uprintf(
8917c478bd9Sstevel@tonic-gate 		"file temporarily unavailable on the server, retrying...\n");
8927c478bd9Sstevel@tonic-gate 				}
8937c478bd9Sstevel@tonic-gate 				delay(nfs3_jukebox_delay);
8947c478bd9Sstevel@tonic-gate 			}
8957c478bd9Sstevel@tonic-gate 			/*
8967c478bd9Sstevel@tonic-gate 			 * See crnetadjust() for comments.
8977c478bd9Sstevel@tonic-gate 			 */
8987c478bd9Sstevel@tonic-gate 			else if (*statusp == NFS3ERR_ACCES &&
8997c478bd9Sstevel@tonic-gate 			    (crr = crnetadjust(cr)) != NULL) {
9007c478bd9Sstevel@tonic-gate #ifdef DEBUG
9017c478bd9Sstevel@tonic-gate 				rfs3call_hits++;
9027c478bd9Sstevel@tonic-gate #endif
9037c478bd9Sstevel@tonic-gate 				rpcerror = rfscall(mi, which, xdrargs, argsp,
9047c478bd9Sstevel@tonic-gate 				    xdrres, resp, crr, douprintf,
9057c478bd9Sstevel@tonic-gate 				    NULL, flags, fi);
9067c478bd9Sstevel@tonic-gate 
9077c478bd9Sstevel@tonic-gate 				crfree(crr);
9087c478bd9Sstevel@tonic-gate #ifdef DEBUG
9097c478bd9Sstevel@tonic-gate 				if (*statusp == NFS3ERR_ACCES)
9107c478bd9Sstevel@tonic-gate 					rfs3call_misses++;
9117c478bd9Sstevel@tonic-gate #endif
9127c478bd9Sstevel@tonic-gate 			}
9137c478bd9Sstevel@tonic-gate 		}
9147c478bd9Sstevel@tonic-gate 	} while (!rpcerror && *statusp == NFS3ERR_JUKEBOX);
9157c478bd9Sstevel@tonic-gate 
9167c478bd9Sstevel@tonic-gate 	return (rpcerror);
9177c478bd9Sstevel@tonic-gate }
9187c478bd9Sstevel@tonic-gate 
9197c478bd9Sstevel@tonic-gate #define	VALID_FH(fi)	(VTOR(fi->vp)->r_server == VTOMI(fi->vp)->mi_curr_serv)
9207c478bd9Sstevel@tonic-gate #define	INC_READERS(mi)		{ \
9217c478bd9Sstevel@tonic-gate 	mi->mi_readers++; \
9227c478bd9Sstevel@tonic-gate }
9237c478bd9Sstevel@tonic-gate #define	DEC_READERS(mi)		{ \
9247c478bd9Sstevel@tonic-gate 	mi->mi_readers--; \
9257c478bd9Sstevel@tonic-gate 	if (mi->mi_readers == 0) \
9267c478bd9Sstevel@tonic-gate 		cv_broadcast(&mi->mi_failover_cv); \
9277c478bd9Sstevel@tonic-gate }
9287c478bd9Sstevel@tonic-gate 
9297c478bd9Sstevel@tonic-gate static int
9307c478bd9Sstevel@tonic-gate rfscall(mntinfo_t *mi, rpcproc_t which, xdrproc_t xdrargs, caddr_t argsp,
93145916cd2Sjpk     xdrproc_t xdrres, caddr_t resp, cred_t *icr, int *douprintf,
9327c478bd9Sstevel@tonic-gate     enum clnt_stat *rpc_status, int flags, failinfo_t *fi)
9337c478bd9Sstevel@tonic-gate {
9347c478bd9Sstevel@tonic-gate 	CLIENT *client;
9357c478bd9Sstevel@tonic-gate 	struct chtab *ch;
93645916cd2Sjpk 	cred_t *cr = icr;
9377c478bd9Sstevel@tonic-gate 	enum clnt_stat status;
938e280ed37SDai Ngo 	struct rpc_err rpcerr, rpcerr_tmp;
9397c478bd9Sstevel@tonic-gate 	struct timeval wait;
9407c478bd9Sstevel@tonic-gate 	int timeo;		/* in units of hz */
9417c478bd9Sstevel@tonic-gate 	int my_rsize, my_wsize;
9427c478bd9Sstevel@tonic-gate 	bool_t tryagain;
94345916cd2Sjpk 	bool_t cred_cloned = FALSE;
9447c478bd9Sstevel@tonic-gate 	k_sigset_t smask;
9457c478bd9Sstevel@tonic-gate 	servinfo_t *svp;
9467c478bd9Sstevel@tonic-gate 	struct nfs_clnt *nfscl;
9477c478bd9Sstevel@tonic-gate 	zoneid_t zoneid = getzoneid();
948e280ed37SDai Ngo 	char *msg;
9497c478bd9Sstevel@tonic-gate #ifdef DEBUG
9507c478bd9Sstevel@tonic-gate 	char *bufp;
9517c478bd9Sstevel@tonic-gate #endif
9527c478bd9Sstevel@tonic-gate 
9537c478bd9Sstevel@tonic-gate 
9547c478bd9Sstevel@tonic-gate 	TRACE_2(TR_FAC_NFS, TR_RFSCALL_START,
9557106075aSmarks 	    "rfscall_start:which %d mi %p", which, mi);
9567c478bd9Sstevel@tonic-gate 
957108322fbScarlsonj 	nfscl = zone_getspecific(nfsclnt_zone_key, nfs_zone());
9587c478bd9Sstevel@tonic-gate 	ASSERT(nfscl != NULL);
9597c478bd9Sstevel@tonic-gate 
9607c478bd9Sstevel@tonic-gate 	nfscl->nfscl_stat.calls.value.ui64++;
9617c478bd9Sstevel@tonic-gate 	mi->mi_reqs[which].value.ui64++;
9627c478bd9Sstevel@tonic-gate 
9637c478bd9Sstevel@tonic-gate 	rpcerr.re_status = RPC_SUCCESS;
9647c478bd9Sstevel@tonic-gate 
9657c478bd9Sstevel@tonic-gate 	/*
9667c478bd9Sstevel@tonic-gate 	 * In case of forced unmount or zone shutdown, return EIO.
9677c478bd9Sstevel@tonic-gate 	 */
9687c478bd9Sstevel@tonic-gate 
9697c478bd9Sstevel@tonic-gate 	if (FS_OR_ZONE_GONE(mi->mi_vfsp)) {
9707c478bd9Sstevel@tonic-gate 		rpcerr.re_status = RPC_FAILED;
9717c478bd9Sstevel@tonic-gate 		rpcerr.re_errno = EIO;
9727c478bd9Sstevel@tonic-gate 		return (rpcerr.re_errno);
9737c478bd9Sstevel@tonic-gate 	}
9747c478bd9Sstevel@tonic-gate 
9757c478bd9Sstevel@tonic-gate 	/*
9767c478bd9Sstevel@tonic-gate 	 * Remember the transfer sizes in case
9777c478bd9Sstevel@tonic-gate 	 * nfs_feedback changes them underneath us.
9787c478bd9Sstevel@tonic-gate 	 */
9797c478bd9Sstevel@tonic-gate 	my_rsize = mi->mi_curread;
9807c478bd9Sstevel@tonic-gate 	my_wsize = mi->mi_curwrite;
9817c478bd9Sstevel@tonic-gate 
9827c478bd9Sstevel@tonic-gate 	/*
9837c478bd9Sstevel@tonic-gate 	 * NFS client failover support
9847c478bd9Sstevel@tonic-gate 	 *
9857c478bd9Sstevel@tonic-gate 	 * If this rnode is not in sync with the current server (VALID_FH),
9867c478bd9Sstevel@tonic-gate 	 * we'd like to do a remap to get in sync.  We can be interrupted
9877c478bd9Sstevel@tonic-gate 	 * in failover_remap(), and if so we'll bail.  Otherwise, we'll
9887c478bd9Sstevel@tonic-gate 	 * use the best info we have to try the RPC.  Part of that is
9897c478bd9Sstevel@tonic-gate 	 * unconditionally updating the filehandle copy kept for V3.
9907c478bd9Sstevel@tonic-gate 	 *
9917c478bd9Sstevel@tonic-gate 	 * Locking: INC_READERS/DEC_READERS is a poor man's interrruptible
9927c478bd9Sstevel@tonic-gate 	 * rw_enter(); we're trying to keep the current server from being
9937c478bd9Sstevel@tonic-gate 	 * changed on us until we're done with the remapping and have a
9947c478bd9Sstevel@tonic-gate 	 * matching client handle.  We don't want to sending a filehandle
9957c478bd9Sstevel@tonic-gate 	 * to the wrong host.
9967c478bd9Sstevel@tonic-gate 	 */
9977c478bd9Sstevel@tonic-gate failoverretry:
9987c478bd9Sstevel@tonic-gate 	if (FAILOVER_MOUNT(mi)) {
9997c478bd9Sstevel@tonic-gate 		mutex_enter(&mi->mi_lock);
10007c478bd9Sstevel@tonic-gate 		if (!(flags & RFSCALL_SOFT) && failover_safe(fi)) {
10017c478bd9Sstevel@tonic-gate 			if (failover_wait(mi)) {
10027c478bd9Sstevel@tonic-gate 				mutex_exit(&mi->mi_lock);
10037c478bd9Sstevel@tonic-gate 				return (EINTR);
10047c478bd9Sstevel@tonic-gate 			}
10057c478bd9Sstevel@tonic-gate 		}
10067c478bd9Sstevel@tonic-gate 		INC_READERS(mi);
10077c478bd9Sstevel@tonic-gate 		mutex_exit(&mi->mi_lock);
10087c478bd9Sstevel@tonic-gate 		if (fi) {
10097c478bd9Sstevel@tonic-gate 			if (!VALID_FH(fi) &&
10107c478bd9Sstevel@tonic-gate 			    !(flags & RFSCALL_SOFT) && failover_safe(fi)) {
10117c478bd9Sstevel@tonic-gate 				int remaperr;
10127c478bd9Sstevel@tonic-gate 
10137c478bd9Sstevel@tonic-gate 				svp = mi->mi_curr_serv;
10147c478bd9Sstevel@tonic-gate 				remaperr = failover_remap(fi);
10157c478bd9Sstevel@tonic-gate 				if (remaperr != 0) {
10167c478bd9Sstevel@tonic-gate #ifdef DEBUG
10177c478bd9Sstevel@tonic-gate 					if (remaperr != EINTR)
10187c478bd9Sstevel@tonic-gate 						nfs_cmn_err(remaperr, CE_WARN,
10197c478bd9Sstevel@tonic-gate 					    "rfscall couldn't failover: %m");
10207c478bd9Sstevel@tonic-gate #endif
10217c478bd9Sstevel@tonic-gate 					mutex_enter(&mi->mi_lock);
10227c478bd9Sstevel@tonic-gate 					DEC_READERS(mi);
10237c478bd9Sstevel@tonic-gate 					mutex_exit(&mi->mi_lock);
10247c478bd9Sstevel@tonic-gate 					/*
10257c478bd9Sstevel@tonic-gate 					 * If failover_remap returns ETIMEDOUT
10267c478bd9Sstevel@tonic-gate 					 * and the filesystem is hard mounted
10277c478bd9Sstevel@tonic-gate 					 * we have to retry the call with a new
10287c478bd9Sstevel@tonic-gate 					 * server.
10297c478bd9Sstevel@tonic-gate 					 */
10307c478bd9Sstevel@tonic-gate 					if ((mi->mi_flags & MI_HARD) &&
10317c478bd9Sstevel@tonic-gate 					    IS_RECOVERABLE_ERROR(remaperr)) {
10327c478bd9Sstevel@tonic-gate 						if (svp == mi->mi_curr_serv)
10337c478bd9Sstevel@tonic-gate 							failover_newserver(mi);
10347c478bd9Sstevel@tonic-gate 						rpcerr.re_status = RPC_SUCCESS;
10357c478bd9Sstevel@tonic-gate 						goto failoverretry;
10367c478bd9Sstevel@tonic-gate 					}
10377c478bd9Sstevel@tonic-gate 					rpcerr.re_errno = remaperr;
10387c478bd9Sstevel@tonic-gate 					return (remaperr);
10397c478bd9Sstevel@tonic-gate 				}
10407c478bd9Sstevel@tonic-gate 			}
10417c478bd9Sstevel@tonic-gate 			if (fi->fhp && fi->copyproc)
10427c478bd9Sstevel@tonic-gate 				(*fi->copyproc)(fi->fhp, fi->vp);
10437c478bd9Sstevel@tonic-gate 		}
10447c478bd9Sstevel@tonic-gate 	}
10457c478bd9Sstevel@tonic-gate 
104645916cd2Sjpk 	/* For TSOL, use a new cred which has net_mac_aware flag */
104745916cd2Sjpk 	if (!cred_cloned && is_system_labeled()) {
104845916cd2Sjpk 		cred_cloned = TRUE;
104945916cd2Sjpk 		cr = crdup(icr);
105045916cd2Sjpk 		(void) setpflags(NET_MAC_AWARE, 1, cr);
105145916cd2Sjpk 	}
105245916cd2Sjpk 
10537c478bd9Sstevel@tonic-gate 	/*
10547c478bd9Sstevel@tonic-gate 	 * clget() calls clnt_tli_kinit() which clears the xid, so we
10557c478bd9Sstevel@tonic-gate 	 * are guaranteed to reprocess the retry as a new request.
10567c478bd9Sstevel@tonic-gate 	 */
10577c478bd9Sstevel@tonic-gate 	svp = mi->mi_curr_serv;
10587c478bd9Sstevel@tonic-gate 	rpcerr.re_errno = nfs_clget(mi, svp, cr, &client, &ch, nfscl);
10597c478bd9Sstevel@tonic-gate 
10607c478bd9Sstevel@tonic-gate 	if (FAILOVER_MOUNT(mi)) {
10617c478bd9Sstevel@tonic-gate 		mutex_enter(&mi->mi_lock);
10627c478bd9Sstevel@tonic-gate 		DEC_READERS(mi);
10637c478bd9Sstevel@tonic-gate 		mutex_exit(&mi->mi_lock);
10647c478bd9Sstevel@tonic-gate 
10657c478bd9Sstevel@tonic-gate 		if ((rpcerr.re_errno == ETIMEDOUT ||
10667106075aSmarks 		    rpcerr.re_errno == ECONNRESET) &&
10677106075aSmarks 		    failover_safe(fi)) {
10687c478bd9Sstevel@tonic-gate 			if (svp == mi->mi_curr_serv)
10697c478bd9Sstevel@tonic-gate 				failover_newserver(mi);
10707c478bd9Sstevel@tonic-gate 			goto failoverretry;
10717c478bd9Sstevel@tonic-gate 		}
10727c478bd9Sstevel@tonic-gate 	}
10737c478bd9Sstevel@tonic-gate 	if (rpcerr.re_errno != 0)
10747c478bd9Sstevel@tonic-gate 		return (rpcerr.re_errno);
10757c478bd9Sstevel@tonic-gate 
10767c478bd9Sstevel@tonic-gate 	if (svp->sv_knconf->knc_semantics == NC_TPI_COTS_ORD ||
10777c478bd9Sstevel@tonic-gate 	    svp->sv_knconf->knc_semantics == NC_TPI_COTS) {
10787c478bd9Sstevel@tonic-gate 		timeo = (mi->mi_timeo * hz) / 10;
10797c478bd9Sstevel@tonic-gate 	} else {
10807c478bd9Sstevel@tonic-gate 		mutex_enter(&mi->mi_lock);
10817c478bd9Sstevel@tonic-gate 		timeo = CLNT_SETTIMERS(client,
10827c478bd9Sstevel@tonic-gate 		    &(mi->mi_timers[mi->mi_timer_type[which]]),
10837c478bd9Sstevel@tonic-gate 		    &(mi->mi_timers[NFS_CALLTYPES]),
10847c478bd9Sstevel@tonic-gate 		    (minimum_timeo[mi->mi_call_type[which]]*hz)>>3,
10857c478bd9Sstevel@tonic-gate 		    (void (*)())NULL, (caddr_t)mi, 0);
10867c478bd9Sstevel@tonic-gate 		mutex_exit(&mi->mi_lock);
10877c478bd9Sstevel@tonic-gate 	}
10887c478bd9Sstevel@tonic-gate 
10897c478bd9Sstevel@tonic-gate 	/*
10907c478bd9Sstevel@tonic-gate 	 * If hard mounted fs, retry call forever unless hard error occurs.
10917c478bd9Sstevel@tonic-gate 	 */
10927c478bd9Sstevel@tonic-gate 	do {
10937c478bd9Sstevel@tonic-gate 		tryagain = FALSE;
10947c478bd9Sstevel@tonic-gate 
10957c478bd9Sstevel@tonic-gate 		if (FS_OR_ZONE_GONE(mi->mi_vfsp)) {
10967c478bd9Sstevel@tonic-gate 			status = RPC_FAILED;
10977c478bd9Sstevel@tonic-gate 			rpcerr.re_status = RPC_FAILED;
10987c478bd9Sstevel@tonic-gate 			rpcerr.re_errno = EIO;
10997c478bd9Sstevel@tonic-gate 			break;
11007c478bd9Sstevel@tonic-gate 		}
11017c478bd9Sstevel@tonic-gate 
11027c478bd9Sstevel@tonic-gate 		TICK_TO_TIMEVAL(timeo, &wait);
11037c478bd9Sstevel@tonic-gate 
11047c478bd9Sstevel@tonic-gate 		/*
11057c478bd9Sstevel@tonic-gate 		 * Mask out all signals except SIGHUP, SIGINT, SIGQUIT
11067c478bd9Sstevel@tonic-gate 		 * and SIGTERM. (Preserving the existing masks).
11077c478bd9Sstevel@tonic-gate 		 * Mask out SIGINT if mount option nointr is specified.
11087c478bd9Sstevel@tonic-gate 		 */
11097c478bd9Sstevel@tonic-gate 		sigintr(&smask, (int)mi->mi_flags & MI_INT);
11107c478bd9Sstevel@tonic-gate 		if (!(mi->mi_flags & MI_INT))
11117c478bd9Sstevel@tonic-gate 			client->cl_nosignal = TRUE;
11127c478bd9Sstevel@tonic-gate 
11137c478bd9Sstevel@tonic-gate 		/*
11147c478bd9Sstevel@tonic-gate 		 * If there is a current signal, then don't bother
11157c478bd9Sstevel@tonic-gate 		 * even trying to send out the request because we
11167c478bd9Sstevel@tonic-gate 		 * won't be able to block waiting for the response.
11177c478bd9Sstevel@tonic-gate 		 * Simply assume RPC_INTR and get on with it.
11187c478bd9Sstevel@tonic-gate 		 */
11197c478bd9Sstevel@tonic-gate 		if (ttolwp(curthread) != NULL && ISSIG(curthread, JUSTLOOKING))
11207c478bd9Sstevel@tonic-gate 			status = RPC_INTR;
11217c478bd9Sstevel@tonic-gate 		else {
11227c478bd9Sstevel@tonic-gate 			status = CLNT_CALL(client, which, xdrargs, argsp,
11237c478bd9Sstevel@tonic-gate 			    xdrres, resp, wait);
11247c478bd9Sstevel@tonic-gate 		}
11257c478bd9Sstevel@tonic-gate 
11267c478bd9Sstevel@tonic-gate 		if (!(mi->mi_flags & MI_INT))
11277c478bd9Sstevel@tonic-gate 			client->cl_nosignal = FALSE;
11287c478bd9Sstevel@tonic-gate 		/*
11297c478bd9Sstevel@tonic-gate 		 * restore original signal mask
11307c478bd9Sstevel@tonic-gate 		 */
11317c478bd9Sstevel@tonic-gate 		sigunintr(&smask);
11327c478bd9Sstevel@tonic-gate 
11337c478bd9Sstevel@tonic-gate 		switch (status) {
11347c478bd9Sstevel@tonic-gate 		case RPC_SUCCESS:
11357c478bd9Sstevel@tonic-gate 			if ((mi->mi_flags & MI_DYNAMIC) &&
11367c478bd9Sstevel@tonic-gate 			    mi->mi_timer_type[which] != 0 &&
11377c478bd9Sstevel@tonic-gate 			    (mi->mi_curread != my_rsize ||
11387c478bd9Sstevel@tonic-gate 			    mi->mi_curwrite != my_wsize))
11397c478bd9Sstevel@tonic-gate 				(void) nfs_feedback(FEEDBACK_OK, which, mi);
11407c478bd9Sstevel@tonic-gate 			break;
11417c478bd9Sstevel@tonic-gate 
11427c478bd9Sstevel@tonic-gate 		case RPC_INTR:
11437c478bd9Sstevel@tonic-gate 			/*
11447c478bd9Sstevel@tonic-gate 			 * There is no way to recover from this error,
11457c478bd9Sstevel@tonic-gate 			 * even if mount option nointr is specified.
11467c478bd9Sstevel@tonic-gate 			 * SIGKILL, for example, cannot be blocked.
11477c478bd9Sstevel@tonic-gate 			 */
11487c478bd9Sstevel@tonic-gate 			rpcerr.re_status = RPC_INTR;
11497c478bd9Sstevel@tonic-gate 			rpcerr.re_errno = EINTR;
11507c478bd9Sstevel@tonic-gate 			break;
11517c478bd9Sstevel@tonic-gate 
11527c478bd9Sstevel@tonic-gate 		case RPC_UDERROR:
11537c478bd9Sstevel@tonic-gate 			/*
11547c478bd9Sstevel@tonic-gate 			 * If the NFS server is local (vold) and
11557c478bd9Sstevel@tonic-gate 			 * it goes away then we get RPC_UDERROR.
11567c478bd9Sstevel@tonic-gate 			 * This is a retryable error, so we would
11577c478bd9Sstevel@tonic-gate 			 * loop, so check to see if the specific
11587c478bd9Sstevel@tonic-gate 			 * error was ECONNRESET, indicating that
11597c478bd9Sstevel@tonic-gate 			 * target did not exist at all.  If so,
11607c478bd9Sstevel@tonic-gate 			 * return with RPC_PROGUNAVAIL and
11617c478bd9Sstevel@tonic-gate 			 * ECONNRESET to indicate why.
11627c478bd9Sstevel@tonic-gate 			 */
11637c478bd9Sstevel@tonic-gate 			CLNT_GETERR(client, &rpcerr);
11647c478bd9Sstevel@tonic-gate 			if (rpcerr.re_errno == ECONNRESET) {
11657c478bd9Sstevel@tonic-gate 				rpcerr.re_status = RPC_PROGUNAVAIL;
11667c478bd9Sstevel@tonic-gate 				rpcerr.re_errno = ECONNRESET;
11677c478bd9Sstevel@tonic-gate 				break;
11687c478bd9Sstevel@tonic-gate 			}
11697c478bd9Sstevel@tonic-gate 			/*FALLTHROUGH*/
11707c478bd9Sstevel@tonic-gate 
11717c478bd9Sstevel@tonic-gate 		default:		/* probably RPC_TIMEDOUT */
11727c478bd9Sstevel@tonic-gate 			if (IS_UNRECOVERABLE_RPC(status))
11737c478bd9Sstevel@tonic-gate 				break;
11747c478bd9Sstevel@tonic-gate 
11757c478bd9Sstevel@tonic-gate 			/*
11767c478bd9Sstevel@tonic-gate 			 * increment server not responding count
11777c478bd9Sstevel@tonic-gate 			 */
11787c478bd9Sstevel@tonic-gate 			mutex_enter(&mi->mi_lock);
11797c478bd9Sstevel@tonic-gate 			mi->mi_noresponse++;
11807c478bd9Sstevel@tonic-gate 			mutex_exit(&mi->mi_lock);
11817c478bd9Sstevel@tonic-gate #ifdef DEBUG
11827c478bd9Sstevel@tonic-gate 			nfscl->nfscl_stat.noresponse.value.ui64++;
11837c478bd9Sstevel@tonic-gate #endif
11847c478bd9Sstevel@tonic-gate 
11857c478bd9Sstevel@tonic-gate 			if (!(mi->mi_flags & MI_HARD)) {
11867c478bd9Sstevel@tonic-gate 				if (!(mi->mi_flags & MI_SEMISOFT) ||
11877c478bd9Sstevel@tonic-gate 				    (mi->mi_ss_call_type[which] == 0))
11887c478bd9Sstevel@tonic-gate 					break;
11897c478bd9Sstevel@tonic-gate 			}
11907c478bd9Sstevel@tonic-gate 
11917c478bd9Sstevel@tonic-gate 			/*
11927c478bd9Sstevel@tonic-gate 			 * The call is in progress (over COTS).
11937c478bd9Sstevel@tonic-gate 			 * Try the CLNT_CALL again, but don't
11947c478bd9Sstevel@tonic-gate 			 * print a noisy error message.
11957c478bd9Sstevel@tonic-gate 			 */
11967c478bd9Sstevel@tonic-gate 			if (status == RPC_INPROGRESS) {
11977c478bd9Sstevel@tonic-gate 				tryagain = TRUE;
11987c478bd9Sstevel@tonic-gate 				break;
11997c478bd9Sstevel@tonic-gate 			}
12007c478bd9Sstevel@tonic-gate 
12017c478bd9Sstevel@tonic-gate 			if (flags & RFSCALL_SOFT)
12027c478bd9Sstevel@tonic-gate 				break;
12037c478bd9Sstevel@tonic-gate 
12047c478bd9Sstevel@tonic-gate 			/*
12057c478bd9Sstevel@tonic-gate 			 * On zone shutdown, just move on.
12067c478bd9Sstevel@tonic-gate 			 */
12077c478bd9Sstevel@tonic-gate 			if (zone_status_get(curproc->p_zone) >=
12087c478bd9Sstevel@tonic-gate 			    ZONE_IS_SHUTTING_DOWN) {
12097c478bd9Sstevel@tonic-gate 				rpcerr.re_status = RPC_FAILED;
12107c478bd9Sstevel@tonic-gate 				rpcerr.re_errno = EIO;
12117c478bd9Sstevel@tonic-gate 				break;
12127c478bd9Sstevel@tonic-gate 			}
12137c478bd9Sstevel@tonic-gate 
12147c478bd9Sstevel@tonic-gate 			/*
12157c478bd9Sstevel@tonic-gate 			 * NFS client failover support
12167c478bd9Sstevel@tonic-gate 			 *
12177c478bd9Sstevel@tonic-gate 			 * If the current server just failed us, we'll
12187c478bd9Sstevel@tonic-gate 			 * start the process of finding a new server.
12197c478bd9Sstevel@tonic-gate 			 * After that, we can just retry.
12207c478bd9Sstevel@tonic-gate 			 */
12217c478bd9Sstevel@tonic-gate 			if (FAILOVER_MOUNT(mi) && failover_safe(fi)) {
12227c478bd9Sstevel@tonic-gate 				if (svp == mi->mi_curr_serv)
12237c478bd9Sstevel@tonic-gate 					failover_newserver(mi);
12247c478bd9Sstevel@tonic-gate 				clfree_impl(client, ch, nfscl);
12257c478bd9Sstevel@tonic-gate 				goto failoverretry;
12267c478bd9Sstevel@tonic-gate 			}
12277c478bd9Sstevel@tonic-gate 
12287c478bd9Sstevel@tonic-gate 			tryagain = TRUE;
12297c478bd9Sstevel@tonic-gate 			timeo = backoff(timeo);
1230e280ed37SDai Ngo 
1231e280ed37SDai Ngo 			CLNT_GETERR(client, &rpcerr_tmp);
1232e280ed37SDai Ngo 			if ((status == RPC_CANTSEND) &&
1233e280ed37SDai Ngo 			    (rpcerr_tmp.re_errno == ENOBUFS))
1234e280ed37SDai Ngo 				msg = SRV_QFULL_MSG;
1235e280ed37SDai Ngo 			else
1236e280ed37SDai Ngo 				msg = SRV_NOTRESP_MSG;
1237e280ed37SDai Ngo 
12387c478bd9Sstevel@tonic-gate 			mutex_enter(&mi->mi_lock);
12397c478bd9Sstevel@tonic-gate 			if (!(mi->mi_flags & MI_PRINTED)) {
12407c478bd9Sstevel@tonic-gate 				mi->mi_flags |= MI_PRINTED;
12417c478bd9Sstevel@tonic-gate 				mutex_exit(&mi->mi_lock);
12427c478bd9Sstevel@tonic-gate #ifdef DEBUG
1243e280ed37SDai Ngo 				zprintf(zoneid, msg, mi->mi_vers,
12447c478bd9Sstevel@tonic-gate 				    svp->sv_hostname);
1245e280ed37SDai Ngo #else
1246e280ed37SDai Ngo 				zprintf(zoneid, msg, svp->sv_hostname);
12477c478bd9Sstevel@tonic-gate #endif
12487c478bd9Sstevel@tonic-gate 			} else
12497c478bd9Sstevel@tonic-gate 				mutex_exit(&mi->mi_lock);
12509acbbeafSnn 			if (*douprintf && nfs_has_ctty()) {
12517c478bd9Sstevel@tonic-gate 				*douprintf = 0;
12527c478bd9Sstevel@tonic-gate 				if (!(mi->mi_flags & MI_NOPRINT))
12537c478bd9Sstevel@tonic-gate #ifdef DEBUG
1254e280ed37SDai Ngo 					uprintf(msg, mi->mi_vers,
12557c478bd9Sstevel@tonic-gate 					    svp->sv_hostname);
1256e280ed37SDai Ngo #else
1257e280ed37SDai Ngo 					uprintf(msg, svp->sv_hostname);
12587c478bd9Sstevel@tonic-gate #endif
12597c478bd9Sstevel@tonic-gate 			}
12607c478bd9Sstevel@tonic-gate 
12617c478bd9Sstevel@tonic-gate 			/*
12627c478bd9Sstevel@tonic-gate 			 * If doing dynamic adjustment of transfer
12637c478bd9Sstevel@tonic-gate 			 * size and if it's a read or write call
12647c478bd9Sstevel@tonic-gate 			 * and if the transfer size changed while
12657c478bd9Sstevel@tonic-gate 			 * retransmitting or if the feedback routine
12667c478bd9Sstevel@tonic-gate 			 * changed the transfer size,
12677c478bd9Sstevel@tonic-gate 			 * then exit rfscall so that the transfer
12687c478bd9Sstevel@tonic-gate 			 * size can be adjusted at the vnops level.
12697c478bd9Sstevel@tonic-gate 			 */
12707c478bd9Sstevel@tonic-gate 			if ((mi->mi_flags & MI_DYNAMIC) &&
12717c478bd9Sstevel@tonic-gate 			    mi->mi_timer_type[which] != 0 &&
12727c478bd9Sstevel@tonic-gate 			    (mi->mi_curread != my_rsize ||
12737c478bd9Sstevel@tonic-gate 			    mi->mi_curwrite != my_wsize ||
12747c478bd9Sstevel@tonic-gate 			    nfs_feedback(FEEDBACK_REXMIT1, which, mi))) {
12757c478bd9Sstevel@tonic-gate 				/*
12767c478bd9Sstevel@tonic-gate 				 * On read or write calls, return
12777c478bd9Sstevel@tonic-gate 				 * back to the vnode ops level if
12787c478bd9Sstevel@tonic-gate 				 * the transfer size changed.
12797c478bd9Sstevel@tonic-gate 				 */
12807c478bd9Sstevel@tonic-gate 				clfree_impl(client, ch, nfscl);
128145916cd2Sjpk 				if (cred_cloned)
128245916cd2Sjpk 					crfree(cr);
12837c478bd9Sstevel@tonic-gate 				return (ENFS_TRYAGAIN);
12847c478bd9Sstevel@tonic-gate 			}
12857c478bd9Sstevel@tonic-gate 		}
12867c478bd9Sstevel@tonic-gate 	} while (tryagain);
12877c478bd9Sstevel@tonic-gate 
12887c478bd9Sstevel@tonic-gate 	if (status != RPC_SUCCESS) {
12897c478bd9Sstevel@tonic-gate 		/*
12907c478bd9Sstevel@tonic-gate 		 * Let soft mounts use the timed out message.
12917c478bd9Sstevel@tonic-gate 		 */
12927c478bd9Sstevel@tonic-gate 		if (status == RPC_INPROGRESS)
12937c478bd9Sstevel@tonic-gate 			status = RPC_TIMEDOUT;
12947c478bd9Sstevel@tonic-gate 		nfscl->nfscl_stat.badcalls.value.ui64++;
12957c478bd9Sstevel@tonic-gate 		if (status != RPC_INTR) {
12967c478bd9Sstevel@tonic-gate 			mutex_enter(&mi->mi_lock);
12977c478bd9Sstevel@tonic-gate 			mi->mi_flags |= MI_DOWN;
12987c478bd9Sstevel@tonic-gate 			mutex_exit(&mi->mi_lock);
12997c478bd9Sstevel@tonic-gate 			CLNT_GETERR(client, &rpcerr);
13007c478bd9Sstevel@tonic-gate #ifdef DEBUG
13017c478bd9Sstevel@tonic-gate 			bufp = clnt_sperror(client, svp->sv_hostname);
13027c478bd9Sstevel@tonic-gate 			zprintf(zoneid, "NFS%d %s failed for %s\n",
13037c478bd9Sstevel@tonic-gate 			    mi->mi_vers, mi->mi_rfsnames[which], bufp);
13049acbbeafSnn 			if (nfs_has_ctty()) {
13057c478bd9Sstevel@tonic-gate 				if (!(mi->mi_flags & MI_NOPRINT)) {
13067c478bd9Sstevel@tonic-gate 					uprintf("NFS%d %s failed for %s\n",
13077c478bd9Sstevel@tonic-gate 					    mi->mi_vers, mi->mi_rfsnames[which],
13087c478bd9Sstevel@tonic-gate 					    bufp);
13097c478bd9Sstevel@tonic-gate 				}
13107c478bd9Sstevel@tonic-gate 			}
13117c478bd9Sstevel@tonic-gate 			kmem_free(bufp, MAXPATHLEN);
13127c478bd9Sstevel@tonic-gate #else
13137c478bd9Sstevel@tonic-gate 			zprintf(zoneid,
13147c478bd9Sstevel@tonic-gate 			    "NFS %s failed for server %s: error %d (%s)\n",
13157c478bd9Sstevel@tonic-gate 			    mi->mi_rfsnames[which], svp->sv_hostname,
13167c478bd9Sstevel@tonic-gate 			    status, clnt_sperrno(status));
13179acbbeafSnn 			if (nfs_has_ctty()) {
13187c478bd9Sstevel@tonic-gate 				if (!(mi->mi_flags & MI_NOPRINT)) {
13197c478bd9Sstevel@tonic-gate 					uprintf(
13207c478bd9Sstevel@tonic-gate 				"NFS %s failed for server %s: error %d (%s)\n",
13217c478bd9Sstevel@tonic-gate 					    mi->mi_rfsnames[which],
13227c478bd9Sstevel@tonic-gate 					    svp->sv_hostname, status,
13237c478bd9Sstevel@tonic-gate 					    clnt_sperrno(status));
13247c478bd9Sstevel@tonic-gate 				}
13257c478bd9Sstevel@tonic-gate 			}
13267c478bd9Sstevel@tonic-gate #endif
13277c478bd9Sstevel@tonic-gate 			/*
13287c478bd9Sstevel@tonic-gate 			 * when CLNT_CALL() fails with RPC_AUTHERROR,
13297c478bd9Sstevel@tonic-gate 			 * re_errno is set appropriately depending on
13307c478bd9Sstevel@tonic-gate 			 * the authentication error
13317c478bd9Sstevel@tonic-gate 			 */
13327c478bd9Sstevel@tonic-gate 			if (status == RPC_VERSMISMATCH ||
13337c478bd9Sstevel@tonic-gate 			    status == RPC_PROGVERSMISMATCH)
13347c478bd9Sstevel@tonic-gate 				rpcerr.re_errno = EIO;
13357c478bd9Sstevel@tonic-gate 		}
13367c478bd9Sstevel@tonic-gate 	} else {
13377c478bd9Sstevel@tonic-gate 		/*
13387c478bd9Sstevel@tonic-gate 		 * Test the value of mi_down and mi_printed without
13397c478bd9Sstevel@tonic-gate 		 * holding the mi_lock mutex.  If they are both zero,
13407c478bd9Sstevel@tonic-gate 		 * then it is okay to skip the down and printed
13417c478bd9Sstevel@tonic-gate 		 * processing.  This saves on a mutex_enter and
13427c478bd9Sstevel@tonic-gate 		 * mutex_exit pair for a normal, successful RPC.
13437c478bd9Sstevel@tonic-gate 		 * This was just complete overhead.
13447c478bd9Sstevel@tonic-gate 		 */
13457c478bd9Sstevel@tonic-gate 		if (mi->mi_flags & (MI_DOWN | MI_PRINTED)) {
13467c478bd9Sstevel@tonic-gate 			mutex_enter(&mi->mi_lock);
13477c478bd9Sstevel@tonic-gate 			mi->mi_flags &= ~MI_DOWN;
13487c478bd9Sstevel@tonic-gate 			if (mi->mi_flags & MI_PRINTED) {
13497c478bd9Sstevel@tonic-gate 				mi->mi_flags &= ~MI_PRINTED;
13507c478bd9Sstevel@tonic-gate 				mutex_exit(&mi->mi_lock);
13517c478bd9Sstevel@tonic-gate #ifdef DEBUG
13527c478bd9Sstevel@tonic-gate 			if (!(mi->mi_vfsp->vfs_flag & VFS_UNMOUNTED))
13537c478bd9Sstevel@tonic-gate 				zprintf(zoneid, "NFS%d server %s ok\n",
13547c478bd9Sstevel@tonic-gate 				    mi->mi_vers, svp->sv_hostname);
13557c478bd9Sstevel@tonic-gate #else
13567c478bd9Sstevel@tonic-gate 			if (!(mi->mi_vfsp->vfs_flag & VFS_UNMOUNTED))
13577c478bd9Sstevel@tonic-gate 				zprintf(zoneid, "NFS server %s ok\n",
13587c478bd9Sstevel@tonic-gate 				    svp->sv_hostname);
13597c478bd9Sstevel@tonic-gate #endif
13607c478bd9Sstevel@tonic-gate 			} else
13617c478bd9Sstevel@tonic-gate 				mutex_exit(&mi->mi_lock);
13627c478bd9Sstevel@tonic-gate 		}
13637c478bd9Sstevel@tonic-gate 
13647c478bd9Sstevel@tonic-gate 		if (*douprintf == 0) {
13657c478bd9Sstevel@tonic-gate 			if (!(mi->mi_flags & MI_NOPRINT))
13667c478bd9Sstevel@tonic-gate #ifdef DEBUG
13677c478bd9Sstevel@tonic-gate 				if (!(mi->mi_vfsp->vfs_flag & VFS_UNMOUNTED))
13687c478bd9Sstevel@tonic-gate 					uprintf("NFS%d server %s ok\n",
13697c478bd9Sstevel@tonic-gate 					    mi->mi_vers, svp->sv_hostname);
13707c478bd9Sstevel@tonic-gate #else
13717c478bd9Sstevel@tonic-gate 			if (!(mi->mi_vfsp->vfs_flag & VFS_UNMOUNTED))
13727c478bd9Sstevel@tonic-gate 				uprintf("NFS server %s ok\n", svp->sv_hostname);
13737c478bd9Sstevel@tonic-gate #endif
13747c478bd9Sstevel@tonic-gate 			*douprintf = 1;
13757c478bd9Sstevel@tonic-gate 		}
13767c478bd9Sstevel@tonic-gate 	}
13777c478bd9Sstevel@tonic-gate 
13787c478bd9Sstevel@tonic-gate 	clfree_impl(client, ch, nfscl);
137945916cd2Sjpk 	if (cred_cloned)
138045916cd2Sjpk 		crfree(cr);
13817c478bd9Sstevel@tonic-gate 
13827c478bd9Sstevel@tonic-gate 	ASSERT(rpcerr.re_status == RPC_SUCCESS || rpcerr.re_errno != 0);
13837c478bd9Sstevel@tonic-gate 
13847c478bd9Sstevel@tonic-gate 	if (rpc_status != NULL)
13857c478bd9Sstevel@tonic-gate 		*rpc_status = rpcerr.re_status;
13867c478bd9Sstevel@tonic-gate 
13877c478bd9Sstevel@tonic-gate 	TRACE_1(TR_FAC_NFS, TR_RFSCALL_END, "rfscall_end:errno %d",
13887c478bd9Sstevel@tonic-gate 	    rpcerr.re_errno);
13897c478bd9Sstevel@tonic-gate 
13907c478bd9Sstevel@tonic-gate 	return (rpcerr.re_errno);
13917c478bd9Sstevel@tonic-gate }
13927c478bd9Sstevel@tonic-gate 
13937c478bd9Sstevel@tonic-gate #ifdef DEBUG
13947c478bd9Sstevel@tonic-gate static int acl2call_hits = 0;
13957c478bd9Sstevel@tonic-gate static int acl2call_misses = 0;
13967c478bd9Sstevel@tonic-gate #endif
13977c478bd9Sstevel@tonic-gate 
13987c478bd9Sstevel@tonic-gate int
13997c478bd9Sstevel@tonic-gate acl2call(mntinfo_t *mi, rpcproc_t which, xdrproc_t xdrargs, caddr_t argsp,
14007c478bd9Sstevel@tonic-gate     xdrproc_t xdrres, caddr_t resp, cred_t *cr, int *douprintf,
14017c478bd9Sstevel@tonic-gate     enum nfsstat *statusp, int flags, failinfo_t *fi)
14027c478bd9Sstevel@tonic-gate {
14037c478bd9Sstevel@tonic-gate 	int rpcerror;
14047c478bd9Sstevel@tonic-gate 
14057c478bd9Sstevel@tonic-gate 	rpcerror = aclcall(mi, which, xdrargs, argsp, xdrres, resp,
14067c478bd9Sstevel@tonic-gate 	    cr, douprintf, flags, fi);
14077c478bd9Sstevel@tonic-gate 	if (!rpcerror) {
14087c478bd9Sstevel@tonic-gate 		/*
14097c478bd9Sstevel@tonic-gate 		 * See comments with crnetadjust().
14107c478bd9Sstevel@tonic-gate 		 */
14117c478bd9Sstevel@tonic-gate 		if (*statusp == NFSERR_ACCES &&
14127c478bd9Sstevel@tonic-gate 		    (cr = crnetadjust(cr)) != NULL) {
14137c478bd9Sstevel@tonic-gate #ifdef DEBUG
14147c478bd9Sstevel@tonic-gate 			acl2call_hits++;
14157c478bd9Sstevel@tonic-gate #endif
14167c478bd9Sstevel@tonic-gate 			rpcerror = aclcall(mi, which, xdrargs, argsp, xdrres,
14177c478bd9Sstevel@tonic-gate 			    resp, cr, douprintf, flags, fi);
14187c478bd9Sstevel@tonic-gate 			crfree(cr);
14197c478bd9Sstevel@tonic-gate #ifdef DEBUG
14207c478bd9Sstevel@tonic-gate 			if (*statusp == NFSERR_ACCES)
14217c478bd9Sstevel@tonic-gate 				acl2call_misses++;
14227c478bd9Sstevel@tonic-gate #endif
14237c478bd9Sstevel@tonic-gate 		}
14247c478bd9Sstevel@tonic-gate 	}
14257c478bd9Sstevel@tonic-gate 
14267c478bd9Sstevel@tonic-gate 	return (rpcerror);
14277c478bd9Sstevel@tonic-gate }
14287c478bd9Sstevel@tonic-gate 
14297c478bd9Sstevel@tonic-gate #ifdef DEBUG
14307c478bd9Sstevel@tonic-gate static int acl3call_hits = 0;
14317c478bd9Sstevel@tonic-gate static int acl3call_misses = 0;
14327c478bd9Sstevel@tonic-gate #endif
14337c478bd9Sstevel@tonic-gate 
14347c478bd9Sstevel@tonic-gate int
14357c478bd9Sstevel@tonic-gate acl3call(mntinfo_t *mi, rpcproc_t which, xdrproc_t xdrargs, caddr_t argsp,
14367c478bd9Sstevel@tonic-gate     xdrproc_t xdrres, caddr_t resp, cred_t *cr, int *douprintf,
14377c478bd9Sstevel@tonic-gate     nfsstat3 *statusp, int flags, failinfo_t *fi)
14387c478bd9Sstevel@tonic-gate {
14397c478bd9Sstevel@tonic-gate 	int rpcerror;
14407c478bd9Sstevel@tonic-gate 	int user_informed;
14417c478bd9Sstevel@tonic-gate 
14427c478bd9Sstevel@tonic-gate 	user_informed = 0;
14437c478bd9Sstevel@tonic-gate 
14447c478bd9Sstevel@tonic-gate 	do {
14457c478bd9Sstevel@tonic-gate 		rpcerror = aclcall(mi, which, xdrargs, argsp, xdrres, resp,
14467c478bd9Sstevel@tonic-gate 		    cr, douprintf, flags, fi);
14477c478bd9Sstevel@tonic-gate 		if (!rpcerror) {
14487c478bd9Sstevel@tonic-gate 			cred_t *crr;
14497c478bd9Sstevel@tonic-gate 			if (*statusp == NFS3ERR_JUKEBOX) {
14507c478bd9Sstevel@tonic-gate 				if (!user_informed) {
14517c478bd9Sstevel@tonic-gate 					user_informed = 1;
14527c478bd9Sstevel@tonic-gate 					uprintf(
14537c478bd9Sstevel@tonic-gate 		"file temporarily unavailable on the server, retrying...\n");
14547c478bd9Sstevel@tonic-gate 				}
14557c478bd9Sstevel@tonic-gate 				delay(nfs3_jukebox_delay);
14567c478bd9Sstevel@tonic-gate 			}
14577c478bd9Sstevel@tonic-gate 			/*
14587c478bd9Sstevel@tonic-gate 			 * See crnetadjust() for comments.
14597c478bd9Sstevel@tonic-gate 			 */
14607c478bd9Sstevel@tonic-gate 			else if (*statusp == NFS3ERR_ACCES &&
14617c478bd9Sstevel@tonic-gate 			    (crr = crnetadjust(cr)) != NULL) {
14627c478bd9Sstevel@tonic-gate #ifdef DEBUG
14637c478bd9Sstevel@tonic-gate 				acl3call_hits++;
14647c478bd9Sstevel@tonic-gate #endif
14657c478bd9Sstevel@tonic-gate 				rpcerror = aclcall(mi, which, xdrargs, argsp,
14667c478bd9Sstevel@tonic-gate 				    xdrres, resp, crr, douprintf, flags, fi);
14677c478bd9Sstevel@tonic-gate 
14687c478bd9Sstevel@tonic-gate 				crfree(crr);
14697c478bd9Sstevel@tonic-gate #ifdef DEBUG
14707c478bd9Sstevel@tonic-gate 				if (*statusp == NFS3ERR_ACCES)
14717c478bd9Sstevel@tonic-gate 					acl3call_misses++;
14727c478bd9Sstevel@tonic-gate #endif
14737c478bd9Sstevel@tonic-gate 			}
14747c478bd9Sstevel@tonic-gate 		}
14757c478bd9Sstevel@tonic-gate 	} while (!rpcerror && *statusp == NFS3ERR_JUKEBOX);
14767c478bd9Sstevel@tonic-gate 
14777c478bd9Sstevel@tonic-gate 	return (rpcerror);
14787c478bd9Sstevel@tonic-gate }
14797c478bd9Sstevel@tonic-gate 
14807c478bd9Sstevel@tonic-gate static int
14817c478bd9Sstevel@tonic-gate aclcall(mntinfo_t *mi, rpcproc_t which, xdrproc_t xdrargs, caddr_t argsp,
148245916cd2Sjpk     xdrproc_t xdrres, caddr_t resp, cred_t *icr, int *douprintf,
14837c478bd9Sstevel@tonic-gate     int flags, failinfo_t *fi)
14847c478bd9Sstevel@tonic-gate {
14857c478bd9Sstevel@tonic-gate 	CLIENT *client;
14867c478bd9Sstevel@tonic-gate 	struct chtab *ch;
148745916cd2Sjpk 	cred_t *cr = icr;
148845916cd2Sjpk 	bool_t cred_cloned = FALSE;
14897c478bd9Sstevel@tonic-gate 	enum clnt_stat status;
14907c478bd9Sstevel@tonic-gate 	struct rpc_err rpcerr;
14917c478bd9Sstevel@tonic-gate 	struct timeval wait;
14927c478bd9Sstevel@tonic-gate 	int timeo;		/* in units of hz */
14937c478bd9Sstevel@tonic-gate #if 0 /* notyet */
14947c478bd9Sstevel@tonic-gate 	int my_rsize, my_wsize;
14957c478bd9Sstevel@tonic-gate #endif
14967c478bd9Sstevel@tonic-gate 	bool_t tryagain;
14977c478bd9Sstevel@tonic-gate 	k_sigset_t smask;
14987c478bd9Sstevel@tonic-gate 	servinfo_t *svp;
14997c478bd9Sstevel@tonic-gate 	struct nfs_clnt *nfscl;
15007c478bd9Sstevel@tonic-gate 	zoneid_t zoneid = getzoneid();
15017c478bd9Sstevel@tonic-gate #ifdef DEBUG
15027c478bd9Sstevel@tonic-gate 	char *bufp;
15037c478bd9Sstevel@tonic-gate #endif
15047c478bd9Sstevel@tonic-gate 
15057c478bd9Sstevel@tonic-gate #if 0 /* notyet */
15067c478bd9Sstevel@tonic-gate 	TRACE_2(TR_FAC_NFS, TR_RFSCALL_START,
15077106075aSmarks 	    "rfscall_start:which %d mi %p", which, mi);
15087c478bd9Sstevel@tonic-gate #endif
15097c478bd9Sstevel@tonic-gate 
1510108322fbScarlsonj 	nfscl = zone_getspecific(nfsclnt_zone_key, nfs_zone());
15117c478bd9Sstevel@tonic-gate 	ASSERT(nfscl != NULL);
15127c478bd9Sstevel@tonic-gate 
15137c478bd9Sstevel@tonic-gate 	nfscl->nfscl_stat.calls.value.ui64++;
15147c478bd9Sstevel@tonic-gate 	mi->mi_aclreqs[which].value.ui64++;
15157c478bd9Sstevel@tonic-gate 
15167c478bd9Sstevel@tonic-gate 	rpcerr.re_status = RPC_SUCCESS;
15177c478bd9Sstevel@tonic-gate 
15187c478bd9Sstevel@tonic-gate 	if (FS_OR_ZONE_GONE(mi->mi_vfsp)) {
15197c478bd9Sstevel@tonic-gate 		rpcerr.re_status = RPC_FAILED;
15207c478bd9Sstevel@tonic-gate 		rpcerr.re_errno = EIO;
15217c478bd9Sstevel@tonic-gate 		return (rpcerr.re_errno);
15227c478bd9Sstevel@tonic-gate 	}
15237c478bd9Sstevel@tonic-gate 
15247c478bd9Sstevel@tonic-gate #if 0 /* notyet */
15257c478bd9Sstevel@tonic-gate 	/*
15267c478bd9Sstevel@tonic-gate 	 * Remember the transfer sizes in case
15277c478bd9Sstevel@tonic-gate 	 * nfs_feedback changes them underneath us.
15287c478bd9Sstevel@tonic-gate 	 */
15297c478bd9Sstevel@tonic-gate 	my_rsize = mi->mi_curread;
15307c478bd9Sstevel@tonic-gate 	my_wsize = mi->mi_curwrite;
15317c478bd9Sstevel@tonic-gate #endif
15327c478bd9Sstevel@tonic-gate 
15337c478bd9Sstevel@tonic-gate 	/*
15347c478bd9Sstevel@tonic-gate 	 * NFS client failover support
15357c478bd9Sstevel@tonic-gate 	 *
15367c478bd9Sstevel@tonic-gate 	 * If this rnode is not in sync with the current server (VALID_FH),
15377c478bd9Sstevel@tonic-gate 	 * we'd like to do a remap to get in sync.  We can be interrupted
15387c478bd9Sstevel@tonic-gate 	 * in failover_remap(), and if so we'll bail.  Otherwise, we'll
15397c478bd9Sstevel@tonic-gate 	 * use the best info we have to try the RPC.  Part of that is
15407c478bd9Sstevel@tonic-gate 	 * unconditionally updating the filehandle copy kept for V3.
15417c478bd9Sstevel@tonic-gate 	 *
15427c478bd9Sstevel@tonic-gate 	 * Locking: INC_READERS/DEC_READERS is a poor man's interrruptible
15437c478bd9Sstevel@tonic-gate 	 * rw_enter(); we're trying to keep the current server from being
15447c478bd9Sstevel@tonic-gate 	 * changed on us until we're done with the remapping and have a
15457c478bd9Sstevel@tonic-gate 	 * matching client handle.  We don't want to sending a filehandle
15467c478bd9Sstevel@tonic-gate 	 * to the wrong host.
15477c478bd9Sstevel@tonic-gate 	 */
15487c478bd9Sstevel@tonic-gate failoverretry:
15497c478bd9Sstevel@tonic-gate 	if (FAILOVER_MOUNT(mi)) {
15507c478bd9Sstevel@tonic-gate 		mutex_enter(&mi->mi_lock);
15517c478bd9Sstevel@tonic-gate 		if (!(flags & RFSCALL_SOFT) && failover_safe(fi)) {
15527c478bd9Sstevel@tonic-gate 			if (failover_wait(mi)) {
15537c478bd9Sstevel@tonic-gate 				mutex_exit(&mi->mi_lock);
15547c478bd9Sstevel@tonic-gate 				return (EINTR);
15557c478bd9Sstevel@tonic-gate 			}
15567c478bd9Sstevel@tonic-gate 		}
15577c478bd9Sstevel@tonic-gate 		INC_READERS(mi);
15587c478bd9Sstevel@tonic-gate 		mutex_exit(&mi->mi_lock);
15597c478bd9Sstevel@tonic-gate 		if (fi) {
15607c478bd9Sstevel@tonic-gate 			if (!VALID_FH(fi) &&
15617c478bd9Sstevel@tonic-gate 			    !(flags & RFSCALL_SOFT) && failover_safe(fi)) {
15627c478bd9Sstevel@tonic-gate 				int remaperr;
15637c478bd9Sstevel@tonic-gate 
15647c478bd9Sstevel@tonic-gate 				svp = mi->mi_curr_serv;
15657c478bd9Sstevel@tonic-gate 				remaperr = failover_remap(fi);
15667c478bd9Sstevel@tonic-gate 				if (remaperr != 0) {
15677c478bd9Sstevel@tonic-gate #ifdef DEBUG
15687c478bd9Sstevel@tonic-gate 					if (remaperr != EINTR)
15697c478bd9Sstevel@tonic-gate 						nfs_cmn_err(remaperr, CE_WARN,
15707c478bd9Sstevel@tonic-gate 					    "aclcall couldn't failover: %m");
15717c478bd9Sstevel@tonic-gate #endif
15727c478bd9Sstevel@tonic-gate 					mutex_enter(&mi->mi_lock);
15737c478bd9Sstevel@tonic-gate 					DEC_READERS(mi);
15747c478bd9Sstevel@tonic-gate 					mutex_exit(&mi->mi_lock);
15757c478bd9Sstevel@tonic-gate 
15767c478bd9Sstevel@tonic-gate 					/*
15777c478bd9Sstevel@tonic-gate 					 * If failover_remap returns ETIMEDOUT
15787c478bd9Sstevel@tonic-gate 					 * and the filesystem is hard mounted
15797c478bd9Sstevel@tonic-gate 					 * we have to retry the call with a new
15807c478bd9Sstevel@tonic-gate 					 * server.
15817c478bd9Sstevel@tonic-gate 					 */
15827c478bd9Sstevel@tonic-gate 					if ((mi->mi_flags & MI_HARD) &&
15837c478bd9Sstevel@tonic-gate 					    IS_RECOVERABLE_ERROR(remaperr)) {
15847c478bd9Sstevel@tonic-gate 						if (svp == mi->mi_curr_serv)
15857c478bd9Sstevel@tonic-gate 							failover_newserver(mi);
15867c478bd9Sstevel@tonic-gate 						rpcerr.re_status = RPC_SUCCESS;
15877c478bd9Sstevel@tonic-gate 						goto failoverretry;
15887c478bd9Sstevel@tonic-gate 					}
15897c478bd9Sstevel@tonic-gate 					return (remaperr);
15907c478bd9Sstevel@tonic-gate 				}
15917c478bd9Sstevel@tonic-gate 			}
15927c478bd9Sstevel@tonic-gate 			if (fi->fhp && fi->copyproc)
15937c478bd9Sstevel@tonic-gate 				(*fi->copyproc)(fi->fhp, fi->vp);
15947c478bd9Sstevel@tonic-gate 		}
15957c478bd9Sstevel@tonic-gate 	}
15967c478bd9Sstevel@tonic-gate 
159745916cd2Sjpk 	/* For TSOL, use a new cred which has net_mac_aware flag */
159845916cd2Sjpk 	if (!cred_cloned && is_system_labeled()) {
159945916cd2Sjpk 		cred_cloned = TRUE;
160045916cd2Sjpk 		cr = crdup(icr);
160145916cd2Sjpk 		(void) setpflags(NET_MAC_AWARE, 1, cr);
160245916cd2Sjpk 	}
160345916cd2Sjpk 
16047c478bd9Sstevel@tonic-gate 	/*
16057c478bd9Sstevel@tonic-gate 	 * acl_clget() calls clnt_tli_kinit() which clears the xid, so we
16067c478bd9Sstevel@tonic-gate 	 * are guaranteed to reprocess the retry as a new request.
16077c478bd9Sstevel@tonic-gate 	 */
16087c478bd9Sstevel@tonic-gate 	svp = mi->mi_curr_serv;
16097c478bd9Sstevel@tonic-gate 	rpcerr.re_errno = acl_clget(mi, svp, cr, &client, &ch, nfscl);
16107c478bd9Sstevel@tonic-gate 	if (FAILOVER_MOUNT(mi)) {
16117c478bd9Sstevel@tonic-gate 		mutex_enter(&mi->mi_lock);
16127c478bd9Sstevel@tonic-gate 		DEC_READERS(mi);
16137c478bd9Sstevel@tonic-gate 		mutex_exit(&mi->mi_lock);
16147c478bd9Sstevel@tonic-gate 
16157c478bd9Sstevel@tonic-gate 		if ((rpcerr.re_errno == ETIMEDOUT ||
16167106075aSmarks 		    rpcerr.re_errno == ECONNRESET) &&
16177106075aSmarks 		    failover_safe(fi)) {
16187c478bd9Sstevel@tonic-gate 			if (svp == mi->mi_curr_serv)
16197c478bd9Sstevel@tonic-gate 				failover_newserver(mi);
16207c478bd9Sstevel@tonic-gate 			goto failoverretry;
16217c478bd9Sstevel@tonic-gate 		}
16227c478bd9Sstevel@tonic-gate 	}
162345916cd2Sjpk 	if (rpcerr.re_errno != 0) {
162445916cd2Sjpk 		if (cred_cloned)
162545916cd2Sjpk 			crfree(cr);
16267c478bd9Sstevel@tonic-gate 		return (rpcerr.re_errno);
162745916cd2Sjpk 	}
16287c478bd9Sstevel@tonic-gate 
16297c478bd9Sstevel@tonic-gate 	if (svp->sv_knconf->knc_semantics == NC_TPI_COTS_ORD ||
16307c478bd9Sstevel@tonic-gate 	    svp->sv_knconf->knc_semantics == NC_TPI_COTS) {
16317c478bd9Sstevel@tonic-gate 		timeo = (mi->mi_timeo * hz) / 10;
16327c478bd9Sstevel@tonic-gate 	} else {
16337c478bd9Sstevel@tonic-gate 		mutex_enter(&mi->mi_lock);
16347c478bd9Sstevel@tonic-gate 		timeo = CLNT_SETTIMERS(client,
16357c478bd9Sstevel@tonic-gate 		    &(mi->mi_timers[mi->mi_acl_timer_type[which]]),
16367c478bd9Sstevel@tonic-gate 		    &(mi->mi_timers[NFS_CALLTYPES]),
16377c478bd9Sstevel@tonic-gate 		    (minimum_timeo[mi->mi_acl_call_type[which]]*hz)>>3,
16387c478bd9Sstevel@tonic-gate 		    (void (*)()) 0, (caddr_t)mi, 0);
16397c478bd9Sstevel@tonic-gate 		mutex_exit(&mi->mi_lock);
16407c478bd9Sstevel@tonic-gate 	}
16417c478bd9Sstevel@tonic-gate 
16427c478bd9Sstevel@tonic-gate 	/*
16437c478bd9Sstevel@tonic-gate 	 * If hard mounted fs, retry call forever unless hard error occurs.
16447c478bd9Sstevel@tonic-gate 	 */
16457c478bd9Sstevel@tonic-gate 	do {
16467c478bd9Sstevel@tonic-gate 		tryagain = FALSE;
16477c478bd9Sstevel@tonic-gate 
16487c478bd9Sstevel@tonic-gate 		if (FS_OR_ZONE_GONE(mi->mi_vfsp)) {
16497c478bd9Sstevel@tonic-gate 			status = RPC_FAILED;
16507c478bd9Sstevel@tonic-gate 			rpcerr.re_status = RPC_FAILED;
16517c478bd9Sstevel@tonic-gate 			rpcerr.re_errno = EIO;
16527c478bd9Sstevel@tonic-gate 			break;
16537c478bd9Sstevel@tonic-gate 		}
16547c478bd9Sstevel@tonic-gate 
16557c478bd9Sstevel@tonic-gate 		TICK_TO_TIMEVAL(timeo, &wait);
16567c478bd9Sstevel@tonic-gate 
16577c478bd9Sstevel@tonic-gate 		/*
16587c478bd9Sstevel@tonic-gate 		 * Mask out all signals except SIGHUP, SIGINT, SIGQUIT
16597c478bd9Sstevel@tonic-gate 		 * and SIGTERM. (Preserving the existing masks).
16607c478bd9Sstevel@tonic-gate 		 * Mask out SIGINT if mount option nointr is specified.
16617c478bd9Sstevel@tonic-gate 		 */
16627c478bd9Sstevel@tonic-gate 		sigintr(&smask, (int)mi->mi_flags & MI_INT);
16637c478bd9Sstevel@tonic-gate 		if (!(mi->mi_flags & MI_INT))
16647c478bd9Sstevel@tonic-gate 			client->cl_nosignal = TRUE;
16657c478bd9Sstevel@tonic-gate 
16667c478bd9Sstevel@tonic-gate 		/*
16677c478bd9Sstevel@tonic-gate 		 * If there is a current signal, then don't bother
16687c478bd9Sstevel@tonic-gate 		 * even trying to send out the request because we
16697c478bd9Sstevel@tonic-gate 		 * won't be able to block waiting for the response.
16707c478bd9Sstevel@tonic-gate 		 * Simply assume RPC_INTR and get on with it.
16717c478bd9Sstevel@tonic-gate 		 */
16727c478bd9Sstevel@tonic-gate 		if (ttolwp(curthread) != NULL && ISSIG(curthread, JUSTLOOKING))
16737c478bd9Sstevel@tonic-gate 			status = RPC_INTR;
16747c478bd9Sstevel@tonic-gate 		else {
16757c478bd9Sstevel@tonic-gate 			status = CLNT_CALL(client, which, xdrargs, argsp,
16767c478bd9Sstevel@tonic-gate 			    xdrres, resp, wait);
16777c478bd9Sstevel@tonic-gate 		}
16787c478bd9Sstevel@tonic-gate 
16797c478bd9Sstevel@tonic-gate 		if (!(mi->mi_flags & MI_INT))
16807c478bd9Sstevel@tonic-gate 			client->cl_nosignal = FALSE;
16817c478bd9Sstevel@tonic-gate 		/*
16827c478bd9Sstevel@tonic-gate 		 * restore original signal mask
16837c478bd9Sstevel@tonic-gate 		 */
16847c478bd9Sstevel@tonic-gate 		sigunintr(&smask);
16857c478bd9Sstevel@tonic-gate 
16867c478bd9Sstevel@tonic-gate 		switch (status) {
16877c478bd9Sstevel@tonic-gate 		case RPC_SUCCESS:
16887c478bd9Sstevel@tonic-gate #if 0 /* notyet */
16897c478bd9Sstevel@tonic-gate 			if ((mi->mi_flags & MI_DYNAMIC) &&
16907c478bd9Sstevel@tonic-gate 			    mi->mi_timer_type[which] != 0 &&
16917c478bd9Sstevel@tonic-gate 			    (mi->mi_curread != my_rsize ||
16927c478bd9Sstevel@tonic-gate 			    mi->mi_curwrite != my_wsize))
16937c478bd9Sstevel@tonic-gate 				(void) nfs_feedback(FEEDBACK_OK, which, mi);
16947c478bd9Sstevel@tonic-gate #endif
16957c478bd9Sstevel@tonic-gate 			break;
16967c478bd9Sstevel@tonic-gate 
16977c478bd9Sstevel@tonic-gate 		/*
16987c478bd9Sstevel@tonic-gate 		 * Unfortunately, there are servers in the world which
16997c478bd9Sstevel@tonic-gate 		 * are not coded correctly.  They are not prepared to
17007c478bd9Sstevel@tonic-gate 		 * handle RPC requests to the NFS port which are not
17017c478bd9Sstevel@tonic-gate 		 * NFS requests.  Thus, they may try to process the
17027c478bd9Sstevel@tonic-gate 		 * NFS_ACL request as if it were an NFS request.  This
17037c478bd9Sstevel@tonic-gate 		 * does not work.  Generally, an error will be generated
17047c478bd9Sstevel@tonic-gate 		 * on the client because it will not be able to decode
17057c478bd9Sstevel@tonic-gate 		 * the response from the server.  However, it seems
17067c478bd9Sstevel@tonic-gate 		 * possible that the server may not be able to decode
17077c478bd9Sstevel@tonic-gate 		 * the arguments.  Thus, the criteria for deciding
17087c478bd9Sstevel@tonic-gate 		 * whether the server supports NFS_ACL or not is whether
17097c478bd9Sstevel@tonic-gate 		 * the following RPC errors are returned from CLNT_CALL.
17107c478bd9Sstevel@tonic-gate 		 */
17117c478bd9Sstevel@tonic-gate 		case RPC_CANTDECODERES:
17127c478bd9Sstevel@tonic-gate 		case RPC_PROGUNAVAIL:
17137c478bd9Sstevel@tonic-gate 		case RPC_CANTDECODEARGS:
17147c478bd9Sstevel@tonic-gate 		case RPC_PROGVERSMISMATCH:
17157c478bd9Sstevel@tonic-gate 			mutex_enter(&mi->mi_lock);
17167c478bd9Sstevel@tonic-gate 			mi->mi_flags &= ~(MI_ACL | MI_EXTATTR);
17177c478bd9Sstevel@tonic-gate 			mutex_exit(&mi->mi_lock);
17187c478bd9Sstevel@tonic-gate 			break;
17197c478bd9Sstevel@tonic-gate 
17207c478bd9Sstevel@tonic-gate 		/*
17217c478bd9Sstevel@tonic-gate 		 * If the server supports NFS_ACL but not the new ops
17227c478bd9Sstevel@tonic-gate 		 * for extended attributes, make sure we don't retry.
17237c478bd9Sstevel@tonic-gate 		 */
17247c478bd9Sstevel@tonic-gate 		case RPC_PROCUNAVAIL:
17257c478bd9Sstevel@tonic-gate 			mutex_enter(&mi->mi_lock);
17267c478bd9Sstevel@tonic-gate 			mi->mi_flags &= ~MI_EXTATTR;
17277c478bd9Sstevel@tonic-gate 			mutex_exit(&mi->mi_lock);
17287c478bd9Sstevel@tonic-gate 			break;
17297c478bd9Sstevel@tonic-gate 
17307c478bd9Sstevel@tonic-gate 		case RPC_INTR:
17317c478bd9Sstevel@tonic-gate 			/*
17327c478bd9Sstevel@tonic-gate 			 * There is no way to recover from this error,
17337c478bd9Sstevel@tonic-gate 			 * even if mount option nointr is specified.
17347c478bd9Sstevel@tonic-gate 			 * SIGKILL, for example, cannot be blocked.
17357c478bd9Sstevel@tonic-gate 			 */
17367c478bd9Sstevel@tonic-gate 			rpcerr.re_status = RPC_INTR;
17377c478bd9Sstevel@tonic-gate 			rpcerr.re_errno = EINTR;
17387c478bd9Sstevel@tonic-gate 			break;
17397c478bd9Sstevel@tonic-gate 
17407c478bd9Sstevel@tonic-gate 		case RPC_UDERROR:
17417c478bd9Sstevel@tonic-gate 			/*
17427c478bd9Sstevel@tonic-gate 			 * If the NFS server is local (vold) and
17437c478bd9Sstevel@tonic-gate 			 * it goes away then we get RPC_UDERROR.
17447c478bd9Sstevel@tonic-gate 			 * This is a retryable error, so we would
17457c478bd9Sstevel@tonic-gate 			 * loop, so check to see if the specific
17467c478bd9Sstevel@tonic-gate 			 * error was ECONNRESET, indicating that
17477c478bd9Sstevel@tonic-gate 			 * target did not exist at all.  If so,
17487c478bd9Sstevel@tonic-gate 			 * return with RPC_PROGUNAVAIL and
17497c478bd9Sstevel@tonic-gate 			 * ECONNRESET to indicate why.
17507c478bd9Sstevel@tonic-gate 			 */
17517c478bd9Sstevel@tonic-gate 			CLNT_GETERR(client, &rpcerr);
17527c478bd9Sstevel@tonic-gate 			if (rpcerr.re_errno == ECONNRESET) {
17537c478bd9Sstevel@tonic-gate 				rpcerr.re_status = RPC_PROGUNAVAIL;
17547c478bd9Sstevel@tonic-gate 				rpcerr.re_errno = ECONNRESET;
17557c478bd9Sstevel@tonic-gate 				break;
17567c478bd9Sstevel@tonic-gate 			}
17577c478bd9Sstevel@tonic-gate 			/*FALLTHROUGH*/
17587c478bd9Sstevel@tonic-gate 
17597c478bd9Sstevel@tonic-gate 		default:		/* probably RPC_TIMEDOUT */
17607c478bd9Sstevel@tonic-gate 			if (IS_UNRECOVERABLE_RPC(status))
17617c478bd9Sstevel@tonic-gate 				break;
17627c478bd9Sstevel@tonic-gate 
17637c478bd9Sstevel@tonic-gate 			/*
17647c478bd9Sstevel@tonic-gate 			 * increment server not responding count
17657c478bd9Sstevel@tonic-gate 			 */
17667c478bd9Sstevel@tonic-gate 			mutex_enter(&mi->mi_lock);
17677c478bd9Sstevel@tonic-gate 			mi->mi_noresponse++;
17687c478bd9Sstevel@tonic-gate 			mutex_exit(&mi->mi_lock);
17697c478bd9Sstevel@tonic-gate #ifdef DEBUG
17707c478bd9Sstevel@tonic-gate 			nfscl->nfscl_stat.noresponse.value.ui64++;
17717c478bd9Sstevel@tonic-gate #endif
17727c478bd9Sstevel@tonic-gate 
17737c478bd9Sstevel@tonic-gate 			if (!(mi->mi_flags & MI_HARD)) {
17747c478bd9Sstevel@tonic-gate 				if (!(mi->mi_flags & MI_SEMISOFT) ||
17757c478bd9Sstevel@tonic-gate 				    (mi->mi_acl_ss_call_type[which] == 0))
17767c478bd9Sstevel@tonic-gate 					break;
17777c478bd9Sstevel@tonic-gate 			}
17787c478bd9Sstevel@tonic-gate 
17797c478bd9Sstevel@tonic-gate 			/*
17807c478bd9Sstevel@tonic-gate 			 * The call is in progress (over COTS).
17817c478bd9Sstevel@tonic-gate 			 * Try the CLNT_CALL again, but don't
17827c478bd9Sstevel@tonic-gate 			 * print a noisy error message.
17837c478bd9Sstevel@tonic-gate 			 */
17847c478bd9Sstevel@tonic-gate 			if (status == RPC_INPROGRESS) {
17857c478bd9Sstevel@tonic-gate 				tryagain = TRUE;
17867c478bd9Sstevel@tonic-gate 				break;
17877c478bd9Sstevel@tonic-gate 			}
17887c478bd9Sstevel@tonic-gate 
17897c478bd9Sstevel@tonic-gate 			if (flags & RFSCALL_SOFT)
17907c478bd9Sstevel@tonic-gate 				break;
17917c478bd9Sstevel@tonic-gate 
17927c478bd9Sstevel@tonic-gate 			/*
17937c478bd9Sstevel@tonic-gate 			 * On zone shutdown, just move on.
17947c478bd9Sstevel@tonic-gate 			 */
17957c478bd9Sstevel@tonic-gate 			if (zone_status_get(curproc->p_zone) >=
17967c478bd9Sstevel@tonic-gate 			    ZONE_IS_SHUTTING_DOWN) {
17977c478bd9Sstevel@tonic-gate 				rpcerr.re_status = RPC_FAILED;
17987c478bd9Sstevel@tonic-gate 				rpcerr.re_errno = EIO;
17997c478bd9Sstevel@tonic-gate 				break;
18007c478bd9Sstevel@tonic-gate 			}
18017c478bd9Sstevel@tonic-gate 
18027c478bd9Sstevel@tonic-gate 			/*
18037c478bd9Sstevel@tonic-gate 			 * NFS client failover support
18047c478bd9Sstevel@tonic-gate 			 *
18057c478bd9Sstevel@tonic-gate 			 * If the current server just failed us, we'll
18067c478bd9Sstevel@tonic-gate 			 * start the process of finding a new server.
18077c478bd9Sstevel@tonic-gate 			 * After that, we can just retry.
18087c478bd9Sstevel@tonic-gate 			 */
18097c478bd9Sstevel@tonic-gate 			if (FAILOVER_MOUNT(mi) && failover_safe(fi)) {
18107c478bd9Sstevel@tonic-gate 				if (svp == mi->mi_curr_serv)
18117c478bd9Sstevel@tonic-gate 					failover_newserver(mi);
18127c478bd9Sstevel@tonic-gate 				clfree_impl(client, ch, nfscl);
18137c478bd9Sstevel@tonic-gate 				goto failoverretry;
18147c478bd9Sstevel@tonic-gate 			}
18157c478bd9Sstevel@tonic-gate 
18167c478bd9Sstevel@tonic-gate 			tryagain = TRUE;
18177c478bd9Sstevel@tonic-gate 			timeo = backoff(timeo);
18187c478bd9Sstevel@tonic-gate 			mutex_enter(&mi->mi_lock);
18197c478bd9Sstevel@tonic-gate 			if (!(mi->mi_flags & MI_PRINTED)) {
18207c478bd9Sstevel@tonic-gate 				mi->mi_flags |= MI_PRINTED;
18217c478bd9Sstevel@tonic-gate 				mutex_exit(&mi->mi_lock);
18227c478bd9Sstevel@tonic-gate #ifdef DEBUG
18237c478bd9Sstevel@tonic-gate 				zprintf(zoneid,
18247c478bd9Sstevel@tonic-gate 			"NFS_ACL%d server %s not responding still trying\n",
18257c478bd9Sstevel@tonic-gate 				    mi->mi_vers, svp->sv_hostname);
18267c478bd9Sstevel@tonic-gate #else
18277c478bd9Sstevel@tonic-gate 				zprintf(zoneid,
18287c478bd9Sstevel@tonic-gate 			    "NFS server %s not responding still trying\n",
18297c478bd9Sstevel@tonic-gate 				    svp->sv_hostname);
18307c478bd9Sstevel@tonic-gate #endif
18317c478bd9Sstevel@tonic-gate 			} else
18327c478bd9Sstevel@tonic-gate 				mutex_exit(&mi->mi_lock);
18339acbbeafSnn 			if (*douprintf && nfs_has_ctty()) {
18347c478bd9Sstevel@tonic-gate 				*douprintf = 0;
18357c478bd9Sstevel@tonic-gate 				if (!(mi->mi_flags & MI_NOPRINT))
18367c478bd9Sstevel@tonic-gate #ifdef DEBUG
18377c478bd9Sstevel@tonic-gate 					uprintf(
18387c478bd9Sstevel@tonic-gate 			"NFS_ACL%d server %s not responding still trying\n",
18397c478bd9Sstevel@tonic-gate 					    mi->mi_vers, svp->sv_hostname);
18407c478bd9Sstevel@tonic-gate #else
18417c478bd9Sstevel@tonic-gate 					uprintf(
18427c478bd9Sstevel@tonic-gate 			    "NFS server %s not responding still trying\n",
18437c478bd9Sstevel@tonic-gate 					    svp->sv_hostname);
18447c478bd9Sstevel@tonic-gate #endif
18457c478bd9Sstevel@tonic-gate 			}
18467c478bd9Sstevel@tonic-gate 
18477c478bd9Sstevel@tonic-gate #if 0 /* notyet */
18487c478bd9Sstevel@tonic-gate 			/*
18497c478bd9Sstevel@tonic-gate 			 * If doing dynamic adjustment of transfer
18507c478bd9Sstevel@tonic-gate 			 * size and if it's a read or write call
18517c478bd9Sstevel@tonic-gate 			 * and if the transfer size changed while
18527c478bd9Sstevel@tonic-gate 			 * retransmitting or if the feedback routine
18537c478bd9Sstevel@tonic-gate 			 * changed the transfer size,
18547c478bd9Sstevel@tonic-gate 			 * then exit rfscall so that the transfer
18557c478bd9Sstevel@tonic-gate 			 * size can be adjusted at the vnops level.
18567c478bd9Sstevel@tonic-gate 			 */
18577c478bd9Sstevel@tonic-gate 			if ((mi->mi_flags & MI_DYNAMIC) &&
18587c478bd9Sstevel@tonic-gate 			    mi->mi_acl_timer_type[which] != 0 &&
18597c478bd9Sstevel@tonic-gate 			    (mi->mi_curread != my_rsize ||
18607c478bd9Sstevel@tonic-gate 			    mi->mi_curwrite != my_wsize ||
18617c478bd9Sstevel@tonic-gate 			    nfs_feedback(FEEDBACK_REXMIT1, which, mi))) {
18627c478bd9Sstevel@tonic-gate 				/*
18637c478bd9Sstevel@tonic-gate 				 * On read or write calls, return
18647c478bd9Sstevel@tonic-gate 				 * back to the vnode ops level if
18657c478bd9Sstevel@tonic-gate 				 * the transfer size changed.
18667c478bd9Sstevel@tonic-gate 				 */
18677c478bd9Sstevel@tonic-gate 				clfree_impl(client, ch, nfscl);
186845916cd2Sjpk 				if (cred_cloned)
186945916cd2Sjpk 					crfree(cr);
18707c478bd9Sstevel@tonic-gate 				return (ENFS_TRYAGAIN);
18717c478bd9Sstevel@tonic-gate 			}
18727c478bd9Sstevel@tonic-gate #endif
18737c478bd9Sstevel@tonic-gate 		}
18747c478bd9Sstevel@tonic-gate 	} while (tryagain);
18757c478bd9Sstevel@tonic-gate 
18767c478bd9Sstevel@tonic-gate 	if (status != RPC_SUCCESS) {
18777c478bd9Sstevel@tonic-gate 		/*
18787c478bd9Sstevel@tonic-gate 		 * Let soft mounts use the timed out message.
18797c478bd9Sstevel@tonic-gate 		 */
18807c478bd9Sstevel@tonic-gate 		if (status == RPC_INPROGRESS)
18817c478bd9Sstevel@tonic-gate 			status = RPC_TIMEDOUT;
18827c478bd9Sstevel@tonic-gate 		nfscl->nfscl_stat.badcalls.value.ui64++;
18837c478bd9Sstevel@tonic-gate 		if (status == RPC_CANTDECODERES ||
18847c478bd9Sstevel@tonic-gate 		    status == RPC_PROGUNAVAIL ||
18857c478bd9Sstevel@tonic-gate 		    status == RPC_PROCUNAVAIL ||
18867c478bd9Sstevel@tonic-gate 		    status == RPC_CANTDECODEARGS ||
18877c478bd9Sstevel@tonic-gate 		    status == RPC_PROGVERSMISMATCH)
18887c478bd9Sstevel@tonic-gate 			CLNT_GETERR(client, &rpcerr);
18897c478bd9Sstevel@tonic-gate 		else if (status != RPC_INTR) {
18907c478bd9Sstevel@tonic-gate 			mutex_enter(&mi->mi_lock);
18917c478bd9Sstevel@tonic-gate 			mi->mi_flags |= MI_DOWN;
18927c478bd9Sstevel@tonic-gate 			mutex_exit(&mi->mi_lock);
18937c478bd9Sstevel@tonic-gate 			CLNT_GETERR(client, &rpcerr);
18947c478bd9Sstevel@tonic-gate #ifdef DEBUG
18957c478bd9Sstevel@tonic-gate 			bufp = clnt_sperror(client, svp->sv_hostname);
18967c478bd9Sstevel@tonic-gate 			zprintf(zoneid, "NFS_ACL%d %s failed for %s\n",
18977c478bd9Sstevel@tonic-gate 			    mi->mi_vers, mi->mi_aclnames[which], bufp);
18989acbbeafSnn 			if (nfs_has_ctty()) {
18997c478bd9Sstevel@tonic-gate 				if (!(mi->mi_flags & MI_NOPRINT)) {
19007c478bd9Sstevel@tonic-gate 					uprintf("NFS_ACL%d %s failed for %s\n",
19017c478bd9Sstevel@tonic-gate 					    mi->mi_vers, mi->mi_aclnames[which],
19027c478bd9Sstevel@tonic-gate 					    bufp);
19037c478bd9Sstevel@tonic-gate 				}
19047c478bd9Sstevel@tonic-gate 			}
19057c478bd9Sstevel@tonic-gate 			kmem_free(bufp, MAXPATHLEN);
19067c478bd9Sstevel@tonic-gate #else
19077c478bd9Sstevel@tonic-gate 			zprintf(zoneid,
19087c478bd9Sstevel@tonic-gate 			    "NFS %s failed for server %s: error %d (%s)\n",
19097c478bd9Sstevel@tonic-gate 			    mi->mi_aclnames[which], svp->sv_hostname,
19107c478bd9Sstevel@tonic-gate 			    status, clnt_sperrno(status));
19119acbbeafSnn 			if (nfs_has_ctty()) {
19127c478bd9Sstevel@tonic-gate 				if (!(mi->mi_flags & MI_NOPRINT))
19137c478bd9Sstevel@tonic-gate 					uprintf(
19147c478bd9Sstevel@tonic-gate 				"NFS %s failed for server %s: error %d (%s)\n",
19157c478bd9Sstevel@tonic-gate 					    mi->mi_aclnames[which],
19167c478bd9Sstevel@tonic-gate 					    svp->sv_hostname, status,
19177c478bd9Sstevel@tonic-gate 					    clnt_sperrno(status));
19187c478bd9Sstevel@tonic-gate 			}
19197c478bd9Sstevel@tonic-gate #endif
19207c478bd9Sstevel@tonic-gate 			/*
19217c478bd9Sstevel@tonic-gate 			 * when CLNT_CALL() fails with RPC_AUTHERROR,
19227c478bd9Sstevel@tonic-gate 			 * re_errno is set appropriately depending on
19237c478bd9Sstevel@tonic-gate 			 * the authentication error
19247c478bd9Sstevel@tonic-gate 			 */
19257c478bd9Sstevel@tonic-gate 			if (status == RPC_VERSMISMATCH ||
19267c478bd9Sstevel@tonic-gate 			    status == RPC_PROGVERSMISMATCH)
19277c478bd9Sstevel@tonic-gate 				rpcerr.re_errno = EIO;
19287c478bd9Sstevel@tonic-gate 		}
19297c478bd9Sstevel@tonic-gate 	} else {
19307c478bd9Sstevel@tonic-gate 		/*
19317c478bd9Sstevel@tonic-gate 		 * Test the value of mi_down and mi_printed without
19327c478bd9Sstevel@tonic-gate 		 * holding the mi_lock mutex.  If they are both zero,
19337c478bd9Sstevel@tonic-gate 		 * then it is okay to skip the down and printed
19347c478bd9Sstevel@tonic-gate 		 * processing.  This saves on a mutex_enter and
19357c478bd9Sstevel@tonic-gate 		 * mutex_exit pair for a normal, successful RPC.
19367c478bd9Sstevel@tonic-gate 		 * This was just complete overhead.
19377c478bd9Sstevel@tonic-gate 		 */
19387c478bd9Sstevel@tonic-gate 		if (mi->mi_flags & (MI_DOWN | MI_PRINTED)) {
19397c478bd9Sstevel@tonic-gate 			mutex_enter(&mi->mi_lock);
19407c478bd9Sstevel@tonic-gate 			mi->mi_flags &= ~MI_DOWN;
19417c478bd9Sstevel@tonic-gate 			if (mi->mi_flags & MI_PRINTED) {
19427c478bd9Sstevel@tonic-gate 				mi->mi_flags &= ~MI_PRINTED;
19437c478bd9Sstevel@tonic-gate 				mutex_exit(&mi->mi_lock);
19447c478bd9Sstevel@tonic-gate #ifdef DEBUG
19457c478bd9Sstevel@tonic-gate 				zprintf(zoneid, "NFS_ACL%d server %s ok\n",
19467c478bd9Sstevel@tonic-gate 				    mi->mi_vers, svp->sv_hostname);
19477c478bd9Sstevel@tonic-gate #else
19487c478bd9Sstevel@tonic-gate 				zprintf(zoneid, "NFS server %s ok\n",
19497c478bd9Sstevel@tonic-gate 				    svp->sv_hostname);
19507c478bd9Sstevel@tonic-gate #endif
19517c478bd9Sstevel@tonic-gate 			} else
19527c478bd9Sstevel@tonic-gate 				mutex_exit(&mi->mi_lock);
19537c478bd9Sstevel@tonic-gate 		}
19547c478bd9Sstevel@tonic-gate 
19557c478bd9Sstevel@tonic-gate 		if (*douprintf == 0) {
19567c478bd9Sstevel@tonic-gate 			if (!(mi->mi_flags & MI_NOPRINT))
19577c478bd9Sstevel@tonic-gate #ifdef DEBUG
19587c478bd9Sstevel@tonic-gate 				uprintf("NFS_ACL%d server %s ok\n",
19597c478bd9Sstevel@tonic-gate 				    mi->mi_vers, svp->sv_hostname);
19607c478bd9Sstevel@tonic-gate #else
19617c478bd9Sstevel@tonic-gate 				uprintf("NFS server %s ok\n", svp->sv_hostname);
19627c478bd9Sstevel@tonic-gate #endif
19637c478bd9Sstevel@tonic-gate 			*douprintf = 1;
19647c478bd9Sstevel@tonic-gate 		}
19657c478bd9Sstevel@tonic-gate 	}
19667c478bd9Sstevel@tonic-gate 
19677c478bd9Sstevel@tonic-gate 	clfree_impl(client, ch, nfscl);
196845916cd2Sjpk 	if (cred_cloned)
196945916cd2Sjpk 		crfree(cr);
19707c478bd9Sstevel@tonic-gate 
19717c478bd9Sstevel@tonic-gate 	ASSERT(rpcerr.re_status == RPC_SUCCESS || rpcerr.re_errno != 0);
19727c478bd9Sstevel@tonic-gate 
19737c478bd9Sstevel@tonic-gate #if 0 /* notyet */
19747c478bd9Sstevel@tonic-gate 	TRACE_1(TR_FAC_NFS, TR_RFSCALL_END, "rfscall_end:errno %d",
19757c478bd9Sstevel@tonic-gate 	    rpcerr.re_errno);
19767c478bd9Sstevel@tonic-gate #endif
19777c478bd9Sstevel@tonic-gate 
19787c478bd9Sstevel@tonic-gate 	return (rpcerr.re_errno);
19797c478bd9Sstevel@tonic-gate }
19807c478bd9Sstevel@tonic-gate 
19817c478bd9Sstevel@tonic-gate int
19827c478bd9Sstevel@tonic-gate vattr_to_sattr(struct vattr *vap, struct nfssattr *sa)
19837c478bd9Sstevel@tonic-gate {
19847c478bd9Sstevel@tonic-gate 	uint_t mask = vap->va_mask;
19857c478bd9Sstevel@tonic-gate 
19867c478bd9Sstevel@tonic-gate 	if (!(mask & AT_MODE))
19877c478bd9Sstevel@tonic-gate 		sa->sa_mode = (uint32_t)-1;
19887c478bd9Sstevel@tonic-gate 	else
19897c478bd9Sstevel@tonic-gate 		sa->sa_mode = vap->va_mode;
19907c478bd9Sstevel@tonic-gate 	if (!(mask & AT_UID))
19917c478bd9Sstevel@tonic-gate 		sa->sa_uid = (uint32_t)-1;
19927c478bd9Sstevel@tonic-gate 	else
19937c478bd9Sstevel@tonic-gate 		sa->sa_uid = (uint32_t)vap->va_uid;
19947c478bd9Sstevel@tonic-gate 	if (!(mask & AT_GID))
19957c478bd9Sstevel@tonic-gate 		sa->sa_gid = (uint32_t)-1;
19967c478bd9Sstevel@tonic-gate 	else
19977c478bd9Sstevel@tonic-gate 		sa->sa_gid = (uint32_t)vap->va_gid;
19987c478bd9Sstevel@tonic-gate 	if (!(mask & AT_SIZE))
19997c478bd9Sstevel@tonic-gate 		sa->sa_size = (uint32_t)-1;
20007c478bd9Sstevel@tonic-gate 	else
20017c478bd9Sstevel@tonic-gate 		sa->sa_size = (uint32_t)vap->va_size;
20027c478bd9Sstevel@tonic-gate 	if (!(mask & AT_ATIME))
20037c478bd9Sstevel@tonic-gate 		sa->sa_atime.tv_sec = sa->sa_atime.tv_usec = (int32_t)-1;
20047c478bd9Sstevel@tonic-gate 	else {
20057c478bd9Sstevel@tonic-gate 		/* check time validity */
20067c478bd9Sstevel@tonic-gate 		if (! NFS_TIME_T_OK(vap->va_atime.tv_sec)) {
20077c478bd9Sstevel@tonic-gate 			return (EOVERFLOW);
20087c478bd9Sstevel@tonic-gate 		}
20097c478bd9Sstevel@tonic-gate 		sa->sa_atime.tv_sec = vap->va_atime.tv_sec;
20107c478bd9Sstevel@tonic-gate 		sa->sa_atime.tv_usec = vap->va_atime.tv_nsec / 1000;
20117c478bd9Sstevel@tonic-gate 	}
20127c478bd9Sstevel@tonic-gate 	if (!(mask & AT_MTIME))
20137c478bd9Sstevel@tonic-gate 		sa->sa_mtime.tv_sec = sa->sa_mtime.tv_usec = (int32_t)-1;
20147c478bd9Sstevel@tonic-gate 	else {
20157c478bd9Sstevel@tonic-gate 		/* check time validity */
20167c478bd9Sstevel@tonic-gate 		if (! NFS_TIME_T_OK(vap->va_mtime.tv_sec)) {
20177c478bd9Sstevel@tonic-gate 			return (EOVERFLOW);
20187c478bd9Sstevel@tonic-gate 		}
20197c478bd9Sstevel@tonic-gate 		sa->sa_mtime.tv_sec = vap->va_mtime.tv_sec;
20207c478bd9Sstevel@tonic-gate 		sa->sa_mtime.tv_usec = vap->va_mtime.tv_nsec / 1000;
20217c478bd9Sstevel@tonic-gate 	}
20227c478bd9Sstevel@tonic-gate 	return (0);
20237c478bd9Sstevel@tonic-gate }
20247c478bd9Sstevel@tonic-gate 
20257c478bd9Sstevel@tonic-gate int
20267c478bd9Sstevel@tonic-gate vattr_to_sattr3(struct vattr *vap, sattr3 *sa)
20277c478bd9Sstevel@tonic-gate {
20287c478bd9Sstevel@tonic-gate 	uint_t mask = vap->va_mask;
20297c478bd9Sstevel@tonic-gate 
20307c478bd9Sstevel@tonic-gate 	if (!(mask & AT_MODE))
20317c478bd9Sstevel@tonic-gate 		sa->mode.set_it = FALSE;
20327c478bd9Sstevel@tonic-gate 	else {
20337c478bd9Sstevel@tonic-gate 		sa->mode.set_it = TRUE;
20347c478bd9Sstevel@tonic-gate 		sa->mode.mode = (mode3)vap->va_mode;
20357c478bd9Sstevel@tonic-gate 	}
20367c478bd9Sstevel@tonic-gate 	if (!(mask & AT_UID))
20377c478bd9Sstevel@tonic-gate 		sa->uid.set_it = FALSE;
20387c478bd9Sstevel@tonic-gate 	else {
20397c478bd9Sstevel@tonic-gate 		sa->uid.set_it = TRUE;
20407c478bd9Sstevel@tonic-gate 		sa->uid.uid = (uid3)vap->va_uid;
20417c478bd9Sstevel@tonic-gate 	}
20427c478bd9Sstevel@tonic-gate 	if (!(mask & AT_GID))
20437c478bd9Sstevel@tonic-gate 		sa->gid.set_it = FALSE;
20447c478bd9Sstevel@tonic-gate 	else {
20457c478bd9Sstevel@tonic-gate 		sa->gid.set_it = TRUE;
20467c478bd9Sstevel@tonic-gate 		sa->gid.gid = (gid3)vap->va_gid;
20477c478bd9Sstevel@tonic-gate 	}
20487c478bd9Sstevel@tonic-gate 	if (!(mask & AT_SIZE))
20497c478bd9Sstevel@tonic-gate 		sa->size.set_it = FALSE;
20507c478bd9Sstevel@tonic-gate 	else {
20517c478bd9Sstevel@tonic-gate 		sa->size.set_it = TRUE;
20527c478bd9Sstevel@tonic-gate 		sa->size.size = (size3)vap->va_size;
20537c478bd9Sstevel@tonic-gate 	}
20547c478bd9Sstevel@tonic-gate 	if (!(mask & AT_ATIME))
20557c478bd9Sstevel@tonic-gate 		sa->atime.set_it = DONT_CHANGE;
20567c478bd9Sstevel@tonic-gate 	else {
20577c478bd9Sstevel@tonic-gate 		/* check time validity */
20587c478bd9Sstevel@tonic-gate 		if (! NFS_TIME_T_OK(vap->va_atime.tv_sec)) {
20597c478bd9Sstevel@tonic-gate 			return (EOVERFLOW);
20607c478bd9Sstevel@tonic-gate 		}
20617c478bd9Sstevel@tonic-gate 		sa->atime.set_it = SET_TO_CLIENT_TIME;
20627c478bd9Sstevel@tonic-gate 		sa->atime.atime.seconds = (uint32)vap->va_atime.tv_sec;
20637c478bd9Sstevel@tonic-gate 		sa->atime.atime.nseconds = (uint32)vap->va_atime.tv_nsec;
20647c478bd9Sstevel@tonic-gate 	}
20657c478bd9Sstevel@tonic-gate 	if (!(mask & AT_MTIME))
20667c478bd9Sstevel@tonic-gate 		sa->mtime.set_it = DONT_CHANGE;
20677c478bd9Sstevel@tonic-gate 	else {
20687c478bd9Sstevel@tonic-gate 		/* check time validity */
20697c478bd9Sstevel@tonic-gate 		if (! NFS_TIME_T_OK(vap->va_mtime.tv_sec)) {
20707c478bd9Sstevel@tonic-gate 			return (EOVERFLOW);
20717c478bd9Sstevel@tonic-gate 		}
20727c478bd9Sstevel@tonic-gate 		sa->mtime.set_it = SET_TO_CLIENT_TIME;
20737c478bd9Sstevel@tonic-gate 		sa->mtime.mtime.seconds = (uint32)vap->va_mtime.tv_sec;
20747c478bd9Sstevel@tonic-gate 		sa->mtime.mtime.nseconds = (uint32)vap->va_mtime.tv_nsec;
20757c478bd9Sstevel@tonic-gate 	}
20767c478bd9Sstevel@tonic-gate 	return (0);
20777c478bd9Sstevel@tonic-gate }
20787c478bd9Sstevel@tonic-gate 
20797c478bd9Sstevel@tonic-gate void
20807c478bd9Sstevel@tonic-gate setdiropargs(struct nfsdiropargs *da, char *nm, vnode_t *dvp)
20817c478bd9Sstevel@tonic-gate {
20827c478bd9Sstevel@tonic-gate 
20837c478bd9Sstevel@tonic-gate 	da->da_fhandle = VTOFH(dvp);
20847c478bd9Sstevel@tonic-gate 	da->da_name = nm;
20857c478bd9Sstevel@tonic-gate 	da->da_flags = 0;
20867c478bd9Sstevel@tonic-gate }
20877c478bd9Sstevel@tonic-gate 
20887c478bd9Sstevel@tonic-gate void
20897c478bd9Sstevel@tonic-gate setdiropargs3(diropargs3 *da, char *nm, vnode_t *dvp)
20907c478bd9Sstevel@tonic-gate {
20917c478bd9Sstevel@tonic-gate 
20927c478bd9Sstevel@tonic-gate 	da->dirp = VTOFH3(dvp);
20937c478bd9Sstevel@tonic-gate 	da->name = nm;
20947c478bd9Sstevel@tonic-gate }
20957c478bd9Sstevel@tonic-gate 
20967c478bd9Sstevel@tonic-gate int
20977c478bd9Sstevel@tonic-gate setdirgid(vnode_t *dvp, gid_t *gidp, cred_t *cr)
20987c478bd9Sstevel@tonic-gate {
20997c478bd9Sstevel@tonic-gate 	int error;
21007c478bd9Sstevel@tonic-gate 	rnode_t *rp;
21017c478bd9Sstevel@tonic-gate 	struct vattr va;
21027c478bd9Sstevel@tonic-gate 
21037c478bd9Sstevel@tonic-gate 	va.va_mask = AT_MODE | AT_GID;
2104da6c28aaSamw 	error = VOP_GETATTR(dvp, &va, 0, cr, NULL);
21057c478bd9Sstevel@tonic-gate 	if (error)
21067c478bd9Sstevel@tonic-gate 		return (error);
21077c478bd9Sstevel@tonic-gate 
21087c478bd9Sstevel@tonic-gate 	/*
21097c478bd9Sstevel@tonic-gate 	 * To determine the expected group-id of the created file:
21107c478bd9Sstevel@tonic-gate 	 *  1)	If the filesystem was not mounted with the Old-BSD-compatible
21117c478bd9Sstevel@tonic-gate 	 *	GRPID option, and the directory's set-gid bit is clear,
21127c478bd9Sstevel@tonic-gate 	 *	then use the process's gid.
21137c478bd9Sstevel@tonic-gate 	 *  2)	Otherwise, set the group-id to the gid of the parent directory.
21147c478bd9Sstevel@tonic-gate 	 */
21157c478bd9Sstevel@tonic-gate 	rp = VTOR(dvp);
21167c478bd9Sstevel@tonic-gate 	mutex_enter(&rp->r_statelock);
21177c478bd9Sstevel@tonic-gate 	if (!(VTOMI(dvp)->mi_flags & MI_GRPID) && !(va.va_mode & VSGID))
21187c478bd9Sstevel@tonic-gate 		*gidp = crgetgid(cr);
21197c478bd9Sstevel@tonic-gate 	else
21207c478bd9Sstevel@tonic-gate 		*gidp = va.va_gid;
21217c478bd9Sstevel@tonic-gate 	mutex_exit(&rp->r_statelock);
21227c478bd9Sstevel@tonic-gate 	return (0);
21237c478bd9Sstevel@tonic-gate }
21247c478bd9Sstevel@tonic-gate 
21257c478bd9Sstevel@tonic-gate int
21267c478bd9Sstevel@tonic-gate setdirmode(vnode_t *dvp, mode_t *omp, cred_t *cr)
21277c478bd9Sstevel@tonic-gate {
21287c478bd9Sstevel@tonic-gate 	int error;
21297c478bd9Sstevel@tonic-gate 	struct vattr va;
21307c478bd9Sstevel@tonic-gate 
21317c478bd9Sstevel@tonic-gate 	va.va_mask = AT_MODE;
2132da6c28aaSamw 	error = VOP_GETATTR(dvp, &va, 0, cr, NULL);
21337c478bd9Sstevel@tonic-gate 	if (error)
21347c478bd9Sstevel@tonic-gate 		return (error);
21357c478bd9Sstevel@tonic-gate 
21367c478bd9Sstevel@tonic-gate 	/*
21377c478bd9Sstevel@tonic-gate 	 * Modify the expected mode (om) so that the set-gid bit matches
21387c478bd9Sstevel@tonic-gate 	 * that of the parent directory (dvp).
21397c478bd9Sstevel@tonic-gate 	 */
21407c478bd9Sstevel@tonic-gate 	if (va.va_mode & VSGID)
21417c478bd9Sstevel@tonic-gate 		*omp |= VSGID;
21427c478bd9Sstevel@tonic-gate 	else
21437c478bd9Sstevel@tonic-gate 		*omp &= ~VSGID;
21447c478bd9Sstevel@tonic-gate 	return (0);
21457c478bd9Sstevel@tonic-gate }
21467c478bd9Sstevel@tonic-gate 
21477c478bd9Sstevel@tonic-gate void
21487c478bd9Sstevel@tonic-gate nfs_setswaplike(vnode_t *vp, vattr_t *vap)
21497c478bd9Sstevel@tonic-gate {
21507c478bd9Sstevel@tonic-gate 
21517c478bd9Sstevel@tonic-gate 	if (vp->v_type == VREG && (vap->va_mode & (VEXEC | VSVTX)) == VSVTX) {
21527c478bd9Sstevel@tonic-gate 		if (!(vp->v_flag & VSWAPLIKE)) {
21537c478bd9Sstevel@tonic-gate 			mutex_enter(&vp->v_lock);
21547c478bd9Sstevel@tonic-gate 			vp->v_flag |= VSWAPLIKE;
21557c478bd9Sstevel@tonic-gate 			mutex_exit(&vp->v_lock);
21567c478bd9Sstevel@tonic-gate 		}
21577c478bd9Sstevel@tonic-gate 	} else {
21587c478bd9Sstevel@tonic-gate 		if (vp->v_flag & VSWAPLIKE) {
21597c478bd9Sstevel@tonic-gate 			mutex_enter(&vp->v_lock);
21607c478bd9Sstevel@tonic-gate 			vp->v_flag &= ~VSWAPLIKE;
21617c478bd9Sstevel@tonic-gate 			mutex_exit(&vp->v_lock);
21627c478bd9Sstevel@tonic-gate 		}
21637c478bd9Sstevel@tonic-gate 	}
21647c478bd9Sstevel@tonic-gate }
21657c478bd9Sstevel@tonic-gate 
21667c478bd9Sstevel@tonic-gate /*
21677c478bd9Sstevel@tonic-gate  * Free the resources associated with an rnode.
21687c478bd9Sstevel@tonic-gate  */
21697c478bd9Sstevel@tonic-gate static void
21707c478bd9Sstevel@tonic-gate rinactive(rnode_t *rp, cred_t *cr)
21717c478bd9Sstevel@tonic-gate {
21727c478bd9Sstevel@tonic-gate 	vnode_t *vp;
21737c478bd9Sstevel@tonic-gate 	cred_t *cred;
21747c478bd9Sstevel@tonic-gate 	char *contents;
21757c478bd9Sstevel@tonic-gate 	int size;
21767c478bd9Sstevel@tonic-gate 	vsecattr_t *vsp;
21777c478bd9Sstevel@tonic-gate 	int error;
21787c478bd9Sstevel@tonic-gate 	nfs3_pathconf_info *info;
21797c478bd9Sstevel@tonic-gate 
21807c478bd9Sstevel@tonic-gate 	/*
21817c478bd9Sstevel@tonic-gate 	 * Before freeing anything, wait until all asynchronous
21827c478bd9Sstevel@tonic-gate 	 * activity is done on this rnode.  This will allow all
21837c478bd9Sstevel@tonic-gate 	 * asynchronous read ahead and write behind i/o's to
21847c478bd9Sstevel@tonic-gate 	 * finish.
21857c478bd9Sstevel@tonic-gate 	 */
21867c478bd9Sstevel@tonic-gate 	mutex_enter(&rp->r_statelock);
21877c478bd9Sstevel@tonic-gate 	while (rp->r_count > 0)
21887c478bd9Sstevel@tonic-gate 		cv_wait(&rp->r_cv, &rp->r_statelock);
21897c478bd9Sstevel@tonic-gate 	mutex_exit(&rp->r_statelock);
21907c478bd9Sstevel@tonic-gate 
21917c478bd9Sstevel@tonic-gate 	/*
21927c478bd9Sstevel@tonic-gate 	 * Flush and invalidate all pages associated with the vnode.
21937c478bd9Sstevel@tonic-gate 	 */
21947c478bd9Sstevel@tonic-gate 	vp = RTOV(rp);
21957c478bd9Sstevel@tonic-gate 	if (vn_has_cached_data(vp)) {
21967c478bd9Sstevel@tonic-gate 		ASSERT(vp->v_type != VCHR);
21977c478bd9Sstevel@tonic-gate 		if ((rp->r_flags & RDIRTY) && !rp->r_error) {
2198da6c28aaSamw 			error = VOP_PUTPAGE(vp, (u_offset_t)0, 0, 0, cr, NULL);
21997c478bd9Sstevel@tonic-gate 			if (error && (error == ENOSPC || error == EDQUOT)) {
22007c478bd9Sstevel@tonic-gate 				mutex_enter(&rp->r_statelock);
22017c478bd9Sstevel@tonic-gate 				if (!rp->r_error)
22027c478bd9Sstevel@tonic-gate 					rp->r_error = error;
22037c478bd9Sstevel@tonic-gate 				mutex_exit(&rp->r_statelock);
22047c478bd9Sstevel@tonic-gate 			}
22057c478bd9Sstevel@tonic-gate 		}
22067c478bd9Sstevel@tonic-gate 		nfs_invalidate_pages(vp, (u_offset_t)0, cr);
22077c478bd9Sstevel@tonic-gate 	}
22087c478bd9Sstevel@tonic-gate 
22097c478bd9Sstevel@tonic-gate 	/*
22107c478bd9Sstevel@tonic-gate 	 * Free any held credentials and caches which may be associated
22117c478bd9Sstevel@tonic-gate 	 * with this rnode.
22127c478bd9Sstevel@tonic-gate 	 */
22137c478bd9Sstevel@tonic-gate 	mutex_enter(&rp->r_statelock);
22147c478bd9Sstevel@tonic-gate 	cred = rp->r_cred;
22157c478bd9Sstevel@tonic-gate 	rp->r_cred = NULL;
22167c478bd9Sstevel@tonic-gate 	contents = rp->r_symlink.contents;
22177c478bd9Sstevel@tonic-gate 	size = rp->r_symlink.size;
22187c478bd9Sstevel@tonic-gate 	rp->r_symlink.contents = NULL;
22197c478bd9Sstevel@tonic-gate 	vsp = rp->r_secattr;
22207c478bd9Sstevel@tonic-gate 	rp->r_secattr = NULL;
22217c478bd9Sstevel@tonic-gate 	info = rp->r_pathconf;
22227c478bd9Sstevel@tonic-gate 	rp->r_pathconf = NULL;
22237c478bd9Sstevel@tonic-gate 	mutex_exit(&rp->r_statelock);
22247c478bd9Sstevel@tonic-gate 
22257c478bd9Sstevel@tonic-gate 	/*
22267c478bd9Sstevel@tonic-gate 	 * Free the held credential.
22277c478bd9Sstevel@tonic-gate 	 */
22287c478bd9Sstevel@tonic-gate 	if (cred != NULL)
22297c478bd9Sstevel@tonic-gate 		crfree(cred);
22307c478bd9Sstevel@tonic-gate 
22317c478bd9Sstevel@tonic-gate 	/*
22327c478bd9Sstevel@tonic-gate 	 * Free the access cache entries.
22337c478bd9Sstevel@tonic-gate 	 */
22347c478bd9Sstevel@tonic-gate 	(void) nfs_access_purge_rp(rp);
22357c478bd9Sstevel@tonic-gate 
22367c478bd9Sstevel@tonic-gate 	/*
22377c478bd9Sstevel@tonic-gate 	 * Free the readdir cache entries.
22387c478bd9Sstevel@tonic-gate 	 */
22397c478bd9Sstevel@tonic-gate 	if (HAVE_RDDIR_CACHE(rp))
22407c478bd9Sstevel@tonic-gate 		nfs_purge_rddir_cache(vp);
22417c478bd9Sstevel@tonic-gate 
22427c478bd9Sstevel@tonic-gate 	/*
22437c478bd9Sstevel@tonic-gate 	 * Free the symbolic link cache.
22447c478bd9Sstevel@tonic-gate 	 */
22457c478bd9Sstevel@tonic-gate 	if (contents != NULL) {
22467c478bd9Sstevel@tonic-gate 
22477c478bd9Sstevel@tonic-gate 		kmem_free((void *)contents, size);
22487c478bd9Sstevel@tonic-gate 	}
22497c478bd9Sstevel@tonic-gate 
22507c478bd9Sstevel@tonic-gate 	/*
22517c478bd9Sstevel@tonic-gate 	 * Free any cached ACL.
22527c478bd9Sstevel@tonic-gate 	 */
22537c478bd9Sstevel@tonic-gate 	if (vsp != NULL)
22547c478bd9Sstevel@tonic-gate 		nfs_acl_free(vsp);
22557c478bd9Sstevel@tonic-gate 
22567c478bd9Sstevel@tonic-gate 	/*
22577c478bd9Sstevel@tonic-gate 	 * Free any cached pathconf information.
22587c478bd9Sstevel@tonic-gate 	 */
22597c478bd9Sstevel@tonic-gate 	if (info != NULL)
22607c478bd9Sstevel@tonic-gate 		kmem_free(info, sizeof (*info));
22617c478bd9Sstevel@tonic-gate }
22627c478bd9Sstevel@tonic-gate 
22637c478bd9Sstevel@tonic-gate /*
22647c478bd9Sstevel@tonic-gate  * Return a vnode for the given NFS Version 2 file handle.
22657c478bd9Sstevel@tonic-gate  * If no rnode exists for this fhandle, create one and put it
22667c478bd9Sstevel@tonic-gate  * into the hash queues.  If the rnode for this fhandle
22677c478bd9Sstevel@tonic-gate  * already exists, return it.
22687c478bd9Sstevel@tonic-gate  *
22697c478bd9Sstevel@tonic-gate  * Note: make_rnode() may upgrade the hash bucket lock to exclusive.
22707c478bd9Sstevel@tonic-gate  */
22717c478bd9Sstevel@tonic-gate vnode_t *
22727c478bd9Sstevel@tonic-gate makenfsnode(fhandle_t *fh, struct nfsfattr *attr, struct vfs *vfsp,
22737c478bd9Sstevel@tonic-gate     hrtime_t t, cred_t *cr, char *dnm, char *nm)
22747c478bd9Sstevel@tonic-gate {
22757c478bd9Sstevel@tonic-gate 	int newnode;
22767c478bd9Sstevel@tonic-gate 	int index;
22777c478bd9Sstevel@tonic-gate 	vnode_t *vp;
22787c478bd9Sstevel@tonic-gate 	nfs_fhandle nfh;
22797c478bd9Sstevel@tonic-gate 	vattr_t va;
22807c478bd9Sstevel@tonic-gate 
22817c478bd9Sstevel@tonic-gate 	nfh.fh_len = NFS_FHSIZE;
22827c478bd9Sstevel@tonic-gate 	bcopy(fh, nfh.fh_buf, NFS_FHSIZE);
22837c478bd9Sstevel@tonic-gate 
22847c478bd9Sstevel@tonic-gate 	index = rtablehash(&nfh);
22857c478bd9Sstevel@tonic-gate 	rw_enter(&rtable[index].r_lock, RW_READER);
22867c478bd9Sstevel@tonic-gate 
22877c478bd9Sstevel@tonic-gate 	vp = make_rnode(&nfh, &rtable[index], vfsp, nfs_vnodeops,
22887c478bd9Sstevel@tonic-gate 	    nfs_putapage, nfs_rddir_compar, &newnode, cr, dnm, nm);
22897c478bd9Sstevel@tonic-gate 
22907c478bd9Sstevel@tonic-gate 	if (attr != NULL) {
22917c478bd9Sstevel@tonic-gate 		if (!newnode) {
22927c478bd9Sstevel@tonic-gate 			rw_exit(&rtable[index].r_lock);
22937c478bd9Sstevel@tonic-gate 			(void) nfs_cache_fattr(vp, attr, &va, t, cr);
22947c478bd9Sstevel@tonic-gate 		} else {
22957c478bd9Sstevel@tonic-gate 			if (attr->na_type < NFNON || attr->na_type > NFSOC)
22967c478bd9Sstevel@tonic-gate 				vp->v_type = VBAD;
22977c478bd9Sstevel@tonic-gate 			else
22987c478bd9Sstevel@tonic-gate 				vp->v_type = n2v_type(attr);
22997c478bd9Sstevel@tonic-gate 			/*
23007c478bd9Sstevel@tonic-gate 			 * A translation here seems to be necessary
23017c478bd9Sstevel@tonic-gate 			 * because this function can be called
23027c478bd9Sstevel@tonic-gate 			 * with `attr' that has come from the wire,
23037c478bd9Sstevel@tonic-gate 			 * and been operated on by vattr_to_nattr().
23047c478bd9Sstevel@tonic-gate 			 * See nfsrootvp()->VOP_GETTATTR()->nfsgetattr()
23057c478bd9Sstevel@tonic-gate 			 * ->nfs_getattr_otw()->rfscall()->vattr_to_nattr()
23067c478bd9Sstevel@tonic-gate 			 * ->makenfsnode().
23077c478bd9Sstevel@tonic-gate 			 */
23087c478bd9Sstevel@tonic-gate 			if ((attr->na_rdev & 0xffff0000) == 0)
23097c478bd9Sstevel@tonic-gate 				vp->v_rdev = nfsv2_expdev(attr->na_rdev);
23107c478bd9Sstevel@tonic-gate 			else
23117c478bd9Sstevel@tonic-gate 				vp->v_rdev = expldev(n2v_rdev(attr));
23127c478bd9Sstevel@tonic-gate 			nfs_attrcache(vp, attr, t);
23137c478bd9Sstevel@tonic-gate 			rw_exit(&rtable[index].r_lock);
23147c478bd9Sstevel@tonic-gate 		}
23157c478bd9Sstevel@tonic-gate 	} else {
23167c478bd9Sstevel@tonic-gate 		if (newnode) {
23177c478bd9Sstevel@tonic-gate 			PURGE_ATTRCACHE(vp);
23187c478bd9Sstevel@tonic-gate 		}
23197c478bd9Sstevel@tonic-gate 		rw_exit(&rtable[index].r_lock);
23207c478bd9Sstevel@tonic-gate 	}
23217c478bd9Sstevel@tonic-gate 
23227c478bd9Sstevel@tonic-gate 	return (vp);
23237c478bd9Sstevel@tonic-gate }
23247c478bd9Sstevel@tonic-gate 
23257c478bd9Sstevel@tonic-gate /*
23267c478bd9Sstevel@tonic-gate  * Return a vnode for the given NFS Version 3 file handle.
23277c478bd9Sstevel@tonic-gate  * If no rnode exists for this fhandle, create one and put it
23287c478bd9Sstevel@tonic-gate  * into the hash queues.  If the rnode for this fhandle
23297c478bd9Sstevel@tonic-gate  * already exists, return it.
23307c478bd9Sstevel@tonic-gate  *
23317c478bd9Sstevel@tonic-gate  * Note: make_rnode() may upgrade the hash bucket lock to exclusive.
23327c478bd9Sstevel@tonic-gate  */
23337c478bd9Sstevel@tonic-gate vnode_t *
23347c478bd9Sstevel@tonic-gate makenfs3node_va(nfs_fh3 *fh, vattr_t *vap, struct vfs *vfsp, hrtime_t t,
23357c478bd9Sstevel@tonic-gate     cred_t *cr, char *dnm, char *nm)
23367c478bd9Sstevel@tonic-gate {
23377c478bd9Sstevel@tonic-gate 	int newnode;
23387c478bd9Sstevel@tonic-gate 	int index;
23397c478bd9Sstevel@tonic-gate 	vnode_t *vp;
23407c478bd9Sstevel@tonic-gate 
23417c478bd9Sstevel@tonic-gate 	index = rtablehash((nfs_fhandle *)fh);
23427c478bd9Sstevel@tonic-gate 	rw_enter(&rtable[index].r_lock, RW_READER);
23437c478bd9Sstevel@tonic-gate 
23447c478bd9Sstevel@tonic-gate 	vp = make_rnode((nfs_fhandle *)fh, &rtable[index], vfsp,
23457c478bd9Sstevel@tonic-gate 	    nfs3_vnodeops, nfs3_putapage, nfs3_rddir_compar, &newnode, cr,
23467c478bd9Sstevel@tonic-gate 	    dnm, nm);
23477c478bd9Sstevel@tonic-gate 
23487c478bd9Sstevel@tonic-gate 	if (vap == NULL) {
23497c478bd9Sstevel@tonic-gate 		if (newnode) {
23507c478bd9Sstevel@tonic-gate 			PURGE_ATTRCACHE(vp);
23517c478bd9Sstevel@tonic-gate 		}
23527c478bd9Sstevel@tonic-gate 		rw_exit(&rtable[index].r_lock);
23537c478bd9Sstevel@tonic-gate 		return (vp);
23547c478bd9Sstevel@tonic-gate 	}
23557c478bd9Sstevel@tonic-gate 
23567c478bd9Sstevel@tonic-gate 	if (!newnode) {
23577c478bd9Sstevel@tonic-gate 		rw_exit(&rtable[index].r_lock);
23587c478bd9Sstevel@tonic-gate 		nfs_attr_cache(vp, vap, t, cr);
23597c478bd9Sstevel@tonic-gate 	} else {
23607c478bd9Sstevel@tonic-gate 		rnode_t *rp = VTOR(vp);
23617c478bd9Sstevel@tonic-gate 
23627c478bd9Sstevel@tonic-gate 		vp->v_type = vap->va_type;
23637c478bd9Sstevel@tonic-gate 		vp->v_rdev = vap->va_rdev;
23647c478bd9Sstevel@tonic-gate 
23657c478bd9Sstevel@tonic-gate 		mutex_enter(&rp->r_statelock);
23667c478bd9Sstevel@tonic-gate 		if (rp->r_mtime <= t)
23677c478bd9Sstevel@tonic-gate 			nfs_attrcache_va(vp, vap);
23687c478bd9Sstevel@tonic-gate 		mutex_exit(&rp->r_statelock);
23697c478bd9Sstevel@tonic-gate 		rw_exit(&rtable[index].r_lock);
23707c478bd9Sstevel@tonic-gate 	}
23717c478bd9Sstevel@tonic-gate 
23727c478bd9Sstevel@tonic-gate 	return (vp);
23737c478bd9Sstevel@tonic-gate }
23747c478bd9Sstevel@tonic-gate 
23757c478bd9Sstevel@tonic-gate vnode_t *
23767c478bd9Sstevel@tonic-gate makenfs3node(nfs_fh3 *fh, fattr3 *attr, struct vfs *vfsp, hrtime_t t,
23777c478bd9Sstevel@tonic-gate     cred_t *cr, char *dnm, char *nm)
23787c478bd9Sstevel@tonic-gate {
23797c478bd9Sstevel@tonic-gate 	int newnode;
23807c478bd9Sstevel@tonic-gate 	int index;
23817c478bd9Sstevel@tonic-gate 	vnode_t *vp;
23827c478bd9Sstevel@tonic-gate 	vattr_t va;
23837c478bd9Sstevel@tonic-gate 
23847c478bd9Sstevel@tonic-gate 	index = rtablehash((nfs_fhandle *)fh);
23857c478bd9Sstevel@tonic-gate 	rw_enter(&rtable[index].r_lock, RW_READER);
23867c478bd9Sstevel@tonic-gate 
23877c478bd9Sstevel@tonic-gate 	vp = make_rnode((nfs_fhandle *)fh, &rtable[index], vfsp,
23887c478bd9Sstevel@tonic-gate 	    nfs3_vnodeops, nfs3_putapage, nfs3_rddir_compar, &newnode, cr,
23897c478bd9Sstevel@tonic-gate 	    dnm, nm);
23907c478bd9Sstevel@tonic-gate 
23917c478bd9Sstevel@tonic-gate 	if (attr == NULL) {
23927c478bd9Sstevel@tonic-gate 		if (newnode) {
23937c478bd9Sstevel@tonic-gate 			PURGE_ATTRCACHE(vp);
23947c478bd9Sstevel@tonic-gate 		}
23957c478bd9Sstevel@tonic-gate 		rw_exit(&rtable[index].r_lock);
23967c478bd9Sstevel@tonic-gate 		return (vp);
23977c478bd9Sstevel@tonic-gate 	}
23987c478bd9Sstevel@tonic-gate 
23997c478bd9Sstevel@tonic-gate 	if (!newnode) {
24007c478bd9Sstevel@tonic-gate 		rw_exit(&rtable[index].r_lock);
24017c478bd9Sstevel@tonic-gate 		(void) nfs3_cache_fattr3(vp, attr, &va, t, cr);
24027c478bd9Sstevel@tonic-gate 	} else {
24037c478bd9Sstevel@tonic-gate 		if (attr->type < NF3REG || attr->type > NF3FIFO)
24047c478bd9Sstevel@tonic-gate 			vp->v_type = VBAD;
24057c478bd9Sstevel@tonic-gate 		else
24067c478bd9Sstevel@tonic-gate 			vp->v_type = nf3_to_vt[attr->type];
24077c478bd9Sstevel@tonic-gate 		vp->v_rdev = makedevice(attr->rdev.specdata1,
24087106075aSmarks 		    attr->rdev.specdata2);
24097c478bd9Sstevel@tonic-gate 		nfs3_attrcache(vp, attr, t);
24107c478bd9Sstevel@tonic-gate 		rw_exit(&rtable[index].r_lock);
24117c478bd9Sstevel@tonic-gate 	}
24127c478bd9Sstevel@tonic-gate 
24137c478bd9Sstevel@tonic-gate 	return (vp);
24147c478bd9Sstevel@tonic-gate }
24157c478bd9Sstevel@tonic-gate 
24167c478bd9Sstevel@tonic-gate /*
24177c478bd9Sstevel@tonic-gate  * Read this comment before making changes to rtablehash()!
24187c478bd9Sstevel@tonic-gate  * This is a hash function in which seemingly obvious and harmless
24197c478bd9Sstevel@tonic-gate  * changes can cause escalations costing million dollars!
24207c478bd9Sstevel@tonic-gate  * Know what you are doing.
24217c478bd9Sstevel@tonic-gate  *
24227c478bd9Sstevel@tonic-gate  * rtablehash() implements Jenkins' one-at-a-time hash algorithm.  The
24237c478bd9Sstevel@tonic-gate  * algorithm is currently detailed here:
24247c478bd9Sstevel@tonic-gate  *
24257c478bd9Sstevel@tonic-gate  *   http://burtleburtle.net/bob/hash/doobs.html
24267c478bd9Sstevel@tonic-gate  *
24277c478bd9Sstevel@tonic-gate  * Of course, the above link may not be valid by the time you are reading
24287c478bd9Sstevel@tonic-gate  * this, but suffice it to say that the one-at-a-time algorithm works well in
24297c478bd9Sstevel@tonic-gate  * almost all cases.  If you are changing the algorithm be sure to verify that
24307c478bd9Sstevel@tonic-gate  * the hash algorithm still provides even distribution in all cases and with
24317c478bd9Sstevel@tonic-gate  * any server returning filehandles in whatever order (sequential or random).
24327c478bd9Sstevel@tonic-gate  */
24337c478bd9Sstevel@tonic-gate static int
24347c478bd9Sstevel@tonic-gate rtablehash(nfs_fhandle *fh)
24357c478bd9Sstevel@tonic-gate {
24367c478bd9Sstevel@tonic-gate 	ulong_t hash, len, i;
24377c478bd9Sstevel@tonic-gate 	char *key;
24387c478bd9Sstevel@tonic-gate 
24397c478bd9Sstevel@tonic-gate 	key = fh->fh_buf;
24407c478bd9Sstevel@tonic-gate 	len = (ulong_t)fh->fh_len;
24417c478bd9Sstevel@tonic-gate 	for (hash = 0, i = 0; i < len; i++) {
24427c478bd9Sstevel@tonic-gate 		hash += key[i];
24437c478bd9Sstevel@tonic-gate 		hash += (hash << 10);
24447c478bd9Sstevel@tonic-gate 		hash ^= (hash >> 6);
24457c478bd9Sstevel@tonic-gate 	}
24467c478bd9Sstevel@tonic-gate 	hash += (hash << 3);
24477c478bd9Sstevel@tonic-gate 	hash ^= (hash >> 11);
24487c478bd9Sstevel@tonic-gate 	hash += (hash << 15);
24497c478bd9Sstevel@tonic-gate 	return (hash & rtablemask);
24507c478bd9Sstevel@tonic-gate }
24517c478bd9Sstevel@tonic-gate 
24527c478bd9Sstevel@tonic-gate static vnode_t *
24537c478bd9Sstevel@tonic-gate make_rnode(nfs_fhandle *fh, rhashq_t *rhtp, struct vfs *vfsp,
24547c478bd9Sstevel@tonic-gate     struct vnodeops *vops,
24557c478bd9Sstevel@tonic-gate     int (*putapage)(vnode_t *, page_t *, u_offset_t *, size_t *, int, cred_t *),
24567c478bd9Sstevel@tonic-gate     int (*compar)(const void *, const void *),
24577c478bd9Sstevel@tonic-gate     int *newnode, cred_t *cr, char *dnm, char *nm)
24587c478bd9Sstevel@tonic-gate {
24597c478bd9Sstevel@tonic-gate 	rnode_t *rp;
24607c478bd9Sstevel@tonic-gate 	rnode_t *trp;
24617c478bd9Sstevel@tonic-gate 	vnode_t *vp;
24627c478bd9Sstevel@tonic-gate 	mntinfo_t *mi;
24637c478bd9Sstevel@tonic-gate 
24647c478bd9Sstevel@tonic-gate 	ASSERT(RW_READ_HELD(&rhtp->r_lock));
24657c478bd9Sstevel@tonic-gate 
24667c478bd9Sstevel@tonic-gate 	mi = VFTOMI(vfsp);
24677c478bd9Sstevel@tonic-gate start:
24687c478bd9Sstevel@tonic-gate 	if ((rp = rfind(rhtp, fh, vfsp)) != NULL) {
24697c478bd9Sstevel@tonic-gate 		vp = RTOV(rp);
24707c478bd9Sstevel@tonic-gate 		nfs_set_vroot(vp);
24717c478bd9Sstevel@tonic-gate 		*newnode = 0;
24727c478bd9Sstevel@tonic-gate 		return (vp);
24737c478bd9Sstevel@tonic-gate 	}
24747c478bd9Sstevel@tonic-gate 	rw_exit(&rhtp->r_lock);
24757c478bd9Sstevel@tonic-gate 
24767c478bd9Sstevel@tonic-gate 	mutex_enter(&rpfreelist_lock);
24777c478bd9Sstevel@tonic-gate 	if (rpfreelist != NULL && rnew >= nrnode) {
24787c478bd9Sstevel@tonic-gate 		rp = rpfreelist;
24797c478bd9Sstevel@tonic-gate 		rp_rmfree(rp);
24807c478bd9Sstevel@tonic-gate 		mutex_exit(&rpfreelist_lock);
24817c478bd9Sstevel@tonic-gate 
24827c478bd9Sstevel@tonic-gate 		vp = RTOV(rp);
24837c478bd9Sstevel@tonic-gate 
24847c478bd9Sstevel@tonic-gate 		if (rp->r_flags & RHASHED) {
24857c478bd9Sstevel@tonic-gate 			rw_enter(&rp->r_hashq->r_lock, RW_WRITER);
24867c478bd9Sstevel@tonic-gate 			mutex_enter(&vp->v_lock);
24877c478bd9Sstevel@tonic-gate 			if (vp->v_count > 1) {
24887c478bd9Sstevel@tonic-gate 				vp->v_count--;
24897c478bd9Sstevel@tonic-gate 				mutex_exit(&vp->v_lock);
24907c478bd9Sstevel@tonic-gate 				rw_exit(&rp->r_hashq->r_lock);
24917c478bd9Sstevel@tonic-gate 				rw_enter(&rhtp->r_lock, RW_READER);
24927c478bd9Sstevel@tonic-gate 				goto start;
24937c478bd9Sstevel@tonic-gate 			}
24947c478bd9Sstevel@tonic-gate 			mutex_exit(&vp->v_lock);
24957c478bd9Sstevel@tonic-gate 			rp_rmhash_locked(rp);
24967c478bd9Sstevel@tonic-gate 			rw_exit(&rp->r_hashq->r_lock);
24977c478bd9Sstevel@tonic-gate 		}
24987c478bd9Sstevel@tonic-gate 
24997c478bd9Sstevel@tonic-gate 		rinactive(rp, cr);
25007c478bd9Sstevel@tonic-gate 
25017c478bd9Sstevel@tonic-gate 		mutex_enter(&vp->v_lock);
25027c478bd9Sstevel@tonic-gate 		if (vp->v_count > 1) {
25037c478bd9Sstevel@tonic-gate 			vp->v_count--;
25047c478bd9Sstevel@tonic-gate 			mutex_exit(&vp->v_lock);
25057c478bd9Sstevel@tonic-gate 			rw_enter(&rhtp->r_lock, RW_READER);
25067c478bd9Sstevel@tonic-gate 			goto start;
25077c478bd9Sstevel@tonic-gate 		}
25087c478bd9Sstevel@tonic-gate 		mutex_exit(&vp->v_lock);
25097c478bd9Sstevel@tonic-gate 		vn_invalid(vp);
25107c478bd9Sstevel@tonic-gate 		/*
25117c478bd9Sstevel@tonic-gate 		 * destroy old locks before bzero'ing and
25127c478bd9Sstevel@tonic-gate 		 * recreating the locks below.
25137c478bd9Sstevel@tonic-gate 		 */
25147c478bd9Sstevel@tonic-gate 		nfs_rw_destroy(&rp->r_rwlock);
25157c478bd9Sstevel@tonic-gate 		nfs_rw_destroy(&rp->r_lkserlock);
25167c478bd9Sstevel@tonic-gate 		mutex_destroy(&rp->r_statelock);
25177c478bd9Sstevel@tonic-gate 		cv_destroy(&rp->r_cv);
25187c478bd9Sstevel@tonic-gate 		cv_destroy(&rp->r_commit.c_cv);
25197c478bd9Sstevel@tonic-gate 		nfs_free_r_path(rp);
25207c478bd9Sstevel@tonic-gate 		avl_destroy(&rp->r_dir);
25217c478bd9Sstevel@tonic-gate 		/*
25227c478bd9Sstevel@tonic-gate 		 * Make sure that if rnode is recycled then
25237c478bd9Sstevel@tonic-gate 		 * VFS count is decremented properly before
25247c478bd9Sstevel@tonic-gate 		 * reuse.
25257c478bd9Sstevel@tonic-gate 		 */
25267c478bd9Sstevel@tonic-gate 		VFS_RELE(vp->v_vfsp);
25277c478bd9Sstevel@tonic-gate 		vn_reinit(vp);
25287c478bd9Sstevel@tonic-gate 	} else {
25297c478bd9Sstevel@tonic-gate 		vnode_t *new_vp;
25307c478bd9Sstevel@tonic-gate 
25317c478bd9Sstevel@tonic-gate 		mutex_exit(&rpfreelist_lock);
25327c478bd9Sstevel@tonic-gate 
25337c478bd9Sstevel@tonic-gate 		rp = kmem_cache_alloc(rnode_cache, KM_SLEEP);
25347c478bd9Sstevel@tonic-gate 		new_vp = vn_alloc(KM_SLEEP);
25357c478bd9Sstevel@tonic-gate 
25367c478bd9Sstevel@tonic-gate 		atomic_add_long((ulong_t *)&rnew, 1);
25377c478bd9Sstevel@tonic-gate #ifdef DEBUG
25387c478bd9Sstevel@tonic-gate 		clstat_debug.nrnode.value.ui64++;
25397c478bd9Sstevel@tonic-gate #endif
25407c478bd9Sstevel@tonic-gate 		vp = new_vp;
25417c478bd9Sstevel@tonic-gate 	}
25427c478bd9Sstevel@tonic-gate 
25437c478bd9Sstevel@tonic-gate 	bzero(rp, sizeof (*rp));
25447c478bd9Sstevel@tonic-gate 	rp->r_vnode = vp;
25457c478bd9Sstevel@tonic-gate 	nfs_rw_init(&rp->r_rwlock, NULL, RW_DEFAULT, NULL);
25467c478bd9Sstevel@tonic-gate 	nfs_rw_init(&rp->r_lkserlock, NULL, RW_DEFAULT, NULL);
25477c478bd9Sstevel@tonic-gate 	mutex_init(&rp->r_statelock, NULL, MUTEX_DEFAULT, NULL);
25487c478bd9Sstevel@tonic-gate 	cv_init(&rp->r_cv, NULL, CV_DEFAULT, NULL);
25497c478bd9Sstevel@tonic-gate 	cv_init(&rp->r_commit.c_cv, NULL, CV_DEFAULT, NULL);
25507c478bd9Sstevel@tonic-gate 	rp->r_fh.fh_len = fh->fh_len;
25517c478bd9Sstevel@tonic-gate 	bcopy(fh->fh_buf, rp->r_fh.fh_buf, fh->fh_len);
25527c478bd9Sstevel@tonic-gate 	rp->r_server = mi->mi_curr_serv;
25537c478bd9Sstevel@tonic-gate 	if (FAILOVER_MOUNT(mi)) {
25547c478bd9Sstevel@tonic-gate 		/*
25557c478bd9Sstevel@tonic-gate 		 * If replicated servers, stash pathnames
25567c478bd9Sstevel@tonic-gate 		 */
25577c478bd9Sstevel@tonic-gate 		if (dnm != NULL && nm != NULL) {
25587c478bd9Sstevel@tonic-gate 			char *s, *p;
25597c478bd9Sstevel@tonic-gate 			uint_t len;
25607c478bd9Sstevel@tonic-gate 
25617c478bd9Sstevel@tonic-gate 			len = (uint_t)(strlen(dnm) + strlen(nm) + 2);
25627c478bd9Sstevel@tonic-gate 			rp->r_path = kmem_alloc(len, KM_SLEEP);
25637c478bd9Sstevel@tonic-gate #ifdef DEBUG
25647c478bd9Sstevel@tonic-gate 			clstat_debug.rpath.value.ui64 += len;
25657c478bd9Sstevel@tonic-gate #endif
25667c478bd9Sstevel@tonic-gate 			s = rp->r_path;
25677c478bd9Sstevel@tonic-gate 			for (p = dnm; *p; p++)
25687c478bd9Sstevel@tonic-gate 				*s++ = *p;
25697c478bd9Sstevel@tonic-gate 			*s++ = '/';
25707c478bd9Sstevel@tonic-gate 			for (p = nm; *p; p++)
25717c478bd9Sstevel@tonic-gate 				*s++ = *p;
25727c478bd9Sstevel@tonic-gate 			*s = '\0';
25737c478bd9Sstevel@tonic-gate 		} else {
25747c478bd9Sstevel@tonic-gate 			/* special case for root */
25757c478bd9Sstevel@tonic-gate 			rp->r_path = kmem_alloc(2, KM_SLEEP);
25767c478bd9Sstevel@tonic-gate #ifdef DEBUG
25777c478bd9Sstevel@tonic-gate 			clstat_debug.rpath.value.ui64 += 2;
25787c478bd9Sstevel@tonic-gate #endif
25797c478bd9Sstevel@tonic-gate 			*rp->r_path = '.';
25807c478bd9Sstevel@tonic-gate 			*(rp->r_path + 1) = '\0';
25817c478bd9Sstevel@tonic-gate 		}
25827c478bd9Sstevel@tonic-gate 	}
25837c478bd9Sstevel@tonic-gate 	VFS_HOLD(vfsp);
25847c478bd9Sstevel@tonic-gate 	rp->r_putapage = putapage;
25857c478bd9Sstevel@tonic-gate 	rp->r_hashq = rhtp;
25867c478bd9Sstevel@tonic-gate 	rp->r_flags = RREADDIRPLUS;
25877c478bd9Sstevel@tonic-gate 	avl_create(&rp->r_dir, compar, sizeof (rddir_cache),
25887c478bd9Sstevel@tonic-gate 	    offsetof(rddir_cache, tree));
25897c478bd9Sstevel@tonic-gate 	vn_setops(vp, vops);
25907c478bd9Sstevel@tonic-gate 	vp->v_data = (caddr_t)rp;
25917c478bd9Sstevel@tonic-gate 	vp->v_vfsp = vfsp;
25927c478bd9Sstevel@tonic-gate 	vp->v_type = VNON;
2593*f8bbc571SPavel Filipensky 	vp->v_flag |= VMODSORT;
25947c478bd9Sstevel@tonic-gate 	nfs_set_vroot(vp);
25957c478bd9Sstevel@tonic-gate 
25967c478bd9Sstevel@tonic-gate 	/*
25977c478bd9Sstevel@tonic-gate 	 * There is a race condition if someone else
25987c478bd9Sstevel@tonic-gate 	 * alloc's the rnode while no locks are held, so we
25997c478bd9Sstevel@tonic-gate 	 * check again and recover if found.
26007c478bd9Sstevel@tonic-gate 	 */
26017c478bd9Sstevel@tonic-gate 	rw_enter(&rhtp->r_lock, RW_WRITER);
26027c478bd9Sstevel@tonic-gate 	if ((trp = rfind(rhtp, fh, vfsp)) != NULL) {
26037c478bd9Sstevel@tonic-gate 		vp = RTOV(trp);
26047c478bd9Sstevel@tonic-gate 		nfs_set_vroot(vp);
26057c478bd9Sstevel@tonic-gate 		*newnode = 0;
26067c478bd9Sstevel@tonic-gate 		rw_exit(&rhtp->r_lock);
26077c478bd9Sstevel@tonic-gate 		rp_addfree(rp, cr);
26087c478bd9Sstevel@tonic-gate 		rw_enter(&rhtp->r_lock, RW_READER);
26097c478bd9Sstevel@tonic-gate 		return (vp);
26107c478bd9Sstevel@tonic-gate 	}
26117c478bd9Sstevel@tonic-gate 	rp_addhash(rp);
26127c478bd9Sstevel@tonic-gate 	*newnode = 1;
26137c478bd9Sstevel@tonic-gate 	return (vp);
26147c478bd9Sstevel@tonic-gate }
26157c478bd9Sstevel@tonic-gate 
2616*f8bbc571SPavel Filipensky /*
2617*f8bbc571SPavel Filipensky  * Callback function to check if the page should be marked as
2618*f8bbc571SPavel Filipensky  * modified. In the positive case, p_fsdata is set to C_NOCOMMIT.
2619*f8bbc571SPavel Filipensky  */
2620*f8bbc571SPavel Filipensky int
2621*f8bbc571SPavel Filipensky nfs_setmod_check(page_t *pp)
2622*f8bbc571SPavel Filipensky {
2623*f8bbc571SPavel Filipensky 	if (pp->p_fsdata != C_NOCOMMIT) {
2624*f8bbc571SPavel Filipensky 		pp->p_fsdata = C_NOCOMMIT;
2625*f8bbc571SPavel Filipensky 		return (1);
2626*f8bbc571SPavel Filipensky 	}
2627*f8bbc571SPavel Filipensky 	return (0);
2628*f8bbc571SPavel Filipensky }
2629*f8bbc571SPavel Filipensky 
26307c478bd9Sstevel@tonic-gate static void
26317c478bd9Sstevel@tonic-gate nfs_set_vroot(vnode_t *vp)
26327c478bd9Sstevel@tonic-gate {
26337c478bd9Sstevel@tonic-gate 	rnode_t *rp;
26347c478bd9Sstevel@tonic-gate 	nfs_fhandle *rootfh;
26357c478bd9Sstevel@tonic-gate 
26367c478bd9Sstevel@tonic-gate 	rp = VTOR(vp);
26377c478bd9Sstevel@tonic-gate 	rootfh = &rp->r_server->sv_fhandle;
26387c478bd9Sstevel@tonic-gate 	if (rootfh->fh_len == rp->r_fh.fh_len &&
26397c478bd9Sstevel@tonic-gate 	    bcmp(rootfh->fh_buf, rp->r_fh.fh_buf, rp->r_fh.fh_len) == 0) {
26407c478bd9Sstevel@tonic-gate 		if (!(vp->v_flag & VROOT)) {
26417c478bd9Sstevel@tonic-gate 			mutex_enter(&vp->v_lock);
26427c478bd9Sstevel@tonic-gate 			vp->v_flag |= VROOT;
26437c478bd9Sstevel@tonic-gate 			mutex_exit(&vp->v_lock);
26447c478bd9Sstevel@tonic-gate 		}
26457c478bd9Sstevel@tonic-gate 	}
26467c478bd9Sstevel@tonic-gate }
26477c478bd9Sstevel@tonic-gate 
26487c478bd9Sstevel@tonic-gate static void
26497c478bd9Sstevel@tonic-gate nfs_free_r_path(rnode_t *rp)
26507c478bd9Sstevel@tonic-gate {
26517c478bd9Sstevel@tonic-gate 	char *path;
26527c478bd9Sstevel@tonic-gate 	size_t len;
26537c478bd9Sstevel@tonic-gate 
26547c478bd9Sstevel@tonic-gate 	path = rp->r_path;
26557c478bd9Sstevel@tonic-gate 	if (path) {
26567c478bd9Sstevel@tonic-gate 		rp->r_path = NULL;
26577c478bd9Sstevel@tonic-gate 		len = strlen(path) + 1;
26587c478bd9Sstevel@tonic-gate 		kmem_free(path, len);
26597c478bd9Sstevel@tonic-gate #ifdef DEBUG
26607c478bd9Sstevel@tonic-gate 		clstat_debug.rpath.value.ui64 -= len;
26617c478bd9Sstevel@tonic-gate #endif
26627c478bd9Sstevel@tonic-gate 	}
26637c478bd9Sstevel@tonic-gate }
26647c478bd9Sstevel@tonic-gate 
26657c478bd9Sstevel@tonic-gate /*
26667c478bd9Sstevel@tonic-gate  * Put an rnode on the free list.
26677c478bd9Sstevel@tonic-gate  *
26687c478bd9Sstevel@tonic-gate  * Rnodes which were allocated above and beyond the normal limit
26697c478bd9Sstevel@tonic-gate  * are immediately freed.
26707c478bd9Sstevel@tonic-gate  */
26717c478bd9Sstevel@tonic-gate void
26727c478bd9Sstevel@tonic-gate rp_addfree(rnode_t *rp, cred_t *cr)
26737c478bd9Sstevel@tonic-gate {
26747c478bd9Sstevel@tonic-gate 	vnode_t *vp;
26757c478bd9Sstevel@tonic-gate 	struct vfs *vfsp;
26767c478bd9Sstevel@tonic-gate 
26777c478bd9Sstevel@tonic-gate 	vp = RTOV(rp);
26787c478bd9Sstevel@tonic-gate 	ASSERT(vp->v_count >= 1);
26797c478bd9Sstevel@tonic-gate 	ASSERT(rp->r_freef == NULL && rp->r_freeb == NULL);
26807c478bd9Sstevel@tonic-gate 
26817c478bd9Sstevel@tonic-gate 	/*
26827c478bd9Sstevel@tonic-gate 	 * If we have too many rnodes allocated and there are no
26837c478bd9Sstevel@tonic-gate 	 * references to this rnode, or if the rnode is no longer
26847c478bd9Sstevel@tonic-gate 	 * accessible by it does not reside in the hash queues,
26857c478bd9Sstevel@tonic-gate 	 * or if an i/o error occurred while writing to the file,
26867c478bd9Sstevel@tonic-gate 	 * then just free it instead of putting it on the rnode
26877c478bd9Sstevel@tonic-gate 	 * freelist.
26887c478bd9Sstevel@tonic-gate 	 */
26897c478bd9Sstevel@tonic-gate 	vfsp = vp->v_vfsp;
26907c478bd9Sstevel@tonic-gate 	if (((rnew > nrnode || !(rp->r_flags & RHASHED) || rp->r_error ||
26917c478bd9Sstevel@tonic-gate 	    (vfsp->vfs_flag & VFS_UNMOUNTED)) && rp->r_count == 0)) {
26927c478bd9Sstevel@tonic-gate 		if (rp->r_flags & RHASHED) {
26937c478bd9Sstevel@tonic-gate 			rw_enter(&rp->r_hashq->r_lock, RW_WRITER);
26947c478bd9Sstevel@tonic-gate 			mutex_enter(&vp->v_lock);
26957c478bd9Sstevel@tonic-gate 			if (vp->v_count > 1) {
26967c478bd9Sstevel@tonic-gate 				vp->v_count--;
26977c478bd9Sstevel@tonic-gate 				mutex_exit(&vp->v_lock);
26987c478bd9Sstevel@tonic-gate 				rw_exit(&rp->r_hashq->r_lock);
26997c478bd9Sstevel@tonic-gate 				return;
27007c478bd9Sstevel@tonic-gate 			}
27017c478bd9Sstevel@tonic-gate 			mutex_exit(&vp->v_lock);
27027c478bd9Sstevel@tonic-gate 			rp_rmhash_locked(rp);
27037c478bd9Sstevel@tonic-gate 			rw_exit(&rp->r_hashq->r_lock);
27047c478bd9Sstevel@tonic-gate 		}
27057c478bd9Sstevel@tonic-gate 
27067c478bd9Sstevel@tonic-gate 		rinactive(rp, cr);
27077c478bd9Sstevel@tonic-gate 
27087c478bd9Sstevel@tonic-gate 		/*
27097c478bd9Sstevel@tonic-gate 		 * Recheck the vnode reference count.  We need to
27107c478bd9Sstevel@tonic-gate 		 * make sure that another reference has not been
27117c478bd9Sstevel@tonic-gate 		 * acquired while we were not holding v_lock.  The
27127c478bd9Sstevel@tonic-gate 		 * rnode is not in the rnode hash queues, so the
27137c478bd9Sstevel@tonic-gate 		 * only way for a reference to have been acquired
27147c478bd9Sstevel@tonic-gate 		 * is for a VOP_PUTPAGE because the rnode was marked
27157c478bd9Sstevel@tonic-gate 		 * with RDIRTY or for a modified page.  This
27167c478bd9Sstevel@tonic-gate 		 * reference may have been acquired before our call
27177c478bd9Sstevel@tonic-gate 		 * to rinactive.  The i/o may have been completed,
27187c478bd9Sstevel@tonic-gate 		 * thus allowing rinactive to complete, but the
27197c478bd9Sstevel@tonic-gate 		 * reference to the vnode may not have been released
27207c478bd9Sstevel@tonic-gate 		 * yet.  In any case, the rnode can not be destroyed
27217c478bd9Sstevel@tonic-gate 		 * until the other references to this vnode have been
27227c478bd9Sstevel@tonic-gate 		 * released.  The other references will take care of
27237c478bd9Sstevel@tonic-gate 		 * either destroying the rnode or placing it on the
27247c478bd9Sstevel@tonic-gate 		 * rnode freelist.  If there are no other references,
27257c478bd9Sstevel@tonic-gate 		 * then the rnode may be safely destroyed.
27267c478bd9Sstevel@tonic-gate 		 */
27277c478bd9Sstevel@tonic-gate 		mutex_enter(&vp->v_lock);
27287c478bd9Sstevel@tonic-gate 		if (vp->v_count > 1) {
27297c478bd9Sstevel@tonic-gate 			vp->v_count--;
27307c478bd9Sstevel@tonic-gate 			mutex_exit(&vp->v_lock);
27317c478bd9Sstevel@tonic-gate 			return;
27327c478bd9Sstevel@tonic-gate 		}
27337c478bd9Sstevel@tonic-gate 		mutex_exit(&vp->v_lock);
27347c478bd9Sstevel@tonic-gate 
27357c478bd9Sstevel@tonic-gate 		destroy_rnode(rp);
27367c478bd9Sstevel@tonic-gate 		return;
27377c478bd9Sstevel@tonic-gate 	}
27387c478bd9Sstevel@tonic-gate 
27397c478bd9Sstevel@tonic-gate 	/*
27407c478bd9Sstevel@tonic-gate 	 * Lock the hash queue and then recheck the reference count
27417c478bd9Sstevel@tonic-gate 	 * to ensure that no other threads have acquired a reference
27427c478bd9Sstevel@tonic-gate 	 * to indicate that the rnode should not be placed on the
27437c478bd9Sstevel@tonic-gate 	 * freelist.  If another reference has been acquired, then
27447c478bd9Sstevel@tonic-gate 	 * just release this one and let the other thread complete
27457c478bd9Sstevel@tonic-gate 	 * the processing of adding this rnode to the freelist.
27467c478bd9Sstevel@tonic-gate 	 */
27477c478bd9Sstevel@tonic-gate 	rw_enter(&rp->r_hashq->r_lock, RW_WRITER);
27487c478bd9Sstevel@tonic-gate 
27497c478bd9Sstevel@tonic-gate 	mutex_enter(&vp->v_lock);
27507c478bd9Sstevel@tonic-gate 	if (vp->v_count > 1) {
27517c478bd9Sstevel@tonic-gate 		vp->v_count--;
27527c478bd9Sstevel@tonic-gate 		mutex_exit(&vp->v_lock);
27537c478bd9Sstevel@tonic-gate 		rw_exit(&rp->r_hashq->r_lock);
27547c478bd9Sstevel@tonic-gate 		return;
27557c478bd9Sstevel@tonic-gate 	}
27567c478bd9Sstevel@tonic-gate 	mutex_exit(&vp->v_lock);
27577c478bd9Sstevel@tonic-gate 
27587c478bd9Sstevel@tonic-gate 	/*
27597c478bd9Sstevel@tonic-gate 	 * If there is no cached data or metadata for this file, then
27607c478bd9Sstevel@tonic-gate 	 * put the rnode on the front of the freelist so that it will
27617c478bd9Sstevel@tonic-gate 	 * be reused before other rnodes which may have cached data or
27627c478bd9Sstevel@tonic-gate 	 * metadata associated with them.
27637c478bd9Sstevel@tonic-gate 	 */
27647c478bd9Sstevel@tonic-gate 	mutex_enter(&rpfreelist_lock);
27657c478bd9Sstevel@tonic-gate 	if (rpfreelist == NULL) {
27667c478bd9Sstevel@tonic-gate 		rp->r_freef = rp;
27677c478bd9Sstevel@tonic-gate 		rp->r_freeb = rp;
27687c478bd9Sstevel@tonic-gate 		rpfreelist = rp;
27697c478bd9Sstevel@tonic-gate 	} else {
27707c478bd9Sstevel@tonic-gate 		rp->r_freef = rpfreelist;
27717c478bd9Sstevel@tonic-gate 		rp->r_freeb = rpfreelist->r_freeb;
27727c478bd9Sstevel@tonic-gate 		rpfreelist->r_freeb->r_freef = rp;
27737c478bd9Sstevel@tonic-gate 		rpfreelist->r_freeb = rp;
27747c478bd9Sstevel@tonic-gate 		if (!vn_has_cached_data(vp) &&
27757c478bd9Sstevel@tonic-gate 		    !HAVE_RDDIR_CACHE(rp) &&
27767c478bd9Sstevel@tonic-gate 		    rp->r_symlink.contents == NULL &&
27777c478bd9Sstevel@tonic-gate 		    rp->r_secattr == NULL &&
27787c478bd9Sstevel@tonic-gate 		    rp->r_pathconf == NULL)
27797c478bd9Sstevel@tonic-gate 			rpfreelist = rp;
27807c478bd9Sstevel@tonic-gate 	}
27817c478bd9Sstevel@tonic-gate 	mutex_exit(&rpfreelist_lock);
27827c478bd9Sstevel@tonic-gate 
27837c478bd9Sstevel@tonic-gate 	rw_exit(&rp->r_hashq->r_lock);
27847c478bd9Sstevel@tonic-gate }
27857c478bd9Sstevel@tonic-gate 
27867c478bd9Sstevel@tonic-gate /*
27877c478bd9Sstevel@tonic-gate  * Remove an rnode from the free list.
27887c478bd9Sstevel@tonic-gate  *
27897c478bd9Sstevel@tonic-gate  * The caller must be holding rpfreelist_lock and the rnode
27907c478bd9Sstevel@tonic-gate  * must be on the freelist.
27917c478bd9Sstevel@tonic-gate  */
27927c478bd9Sstevel@tonic-gate static void
27937c478bd9Sstevel@tonic-gate rp_rmfree(rnode_t *rp)
27947c478bd9Sstevel@tonic-gate {
27957c478bd9Sstevel@tonic-gate 
27967c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&rpfreelist_lock));
27977c478bd9Sstevel@tonic-gate 	ASSERT(rp->r_freef != NULL && rp->r_freeb != NULL);
27987c478bd9Sstevel@tonic-gate 
27997c478bd9Sstevel@tonic-gate 	if (rp == rpfreelist) {
28007c478bd9Sstevel@tonic-gate 		rpfreelist = rp->r_freef;
28017c478bd9Sstevel@tonic-gate 		if (rp == rpfreelist)
28027c478bd9Sstevel@tonic-gate 			rpfreelist = NULL;
28037c478bd9Sstevel@tonic-gate 	}
28047c478bd9Sstevel@tonic-gate 
28057c478bd9Sstevel@tonic-gate 	rp->r_freeb->r_freef = rp->r_freef;
28067c478bd9Sstevel@tonic-gate 	rp->r_freef->r_freeb = rp->r_freeb;
28077c478bd9Sstevel@tonic-gate 
28087c478bd9Sstevel@tonic-gate 	rp->r_freef = rp->r_freeb = NULL;
28097c478bd9Sstevel@tonic-gate }
28107c478bd9Sstevel@tonic-gate 
28117c478bd9Sstevel@tonic-gate /*
28127c478bd9Sstevel@tonic-gate  * Put a rnode in the hash table.
28137c478bd9Sstevel@tonic-gate  *
28147c478bd9Sstevel@tonic-gate  * The caller must be holding the exclusive hash queue lock.
28157c478bd9Sstevel@tonic-gate  */
28167c478bd9Sstevel@tonic-gate static void
28177c478bd9Sstevel@tonic-gate rp_addhash(rnode_t *rp)
28187c478bd9Sstevel@tonic-gate {
28197c478bd9Sstevel@tonic-gate 
28207c478bd9Sstevel@tonic-gate 	ASSERT(RW_WRITE_HELD(&rp->r_hashq->r_lock));
28217c478bd9Sstevel@tonic-gate 	ASSERT(!(rp->r_flags & RHASHED));
28227c478bd9Sstevel@tonic-gate 
28237c478bd9Sstevel@tonic-gate 	rp->r_hashf = rp->r_hashq->r_hashf;
28247c478bd9Sstevel@tonic-gate 	rp->r_hashq->r_hashf = rp;
28257c478bd9Sstevel@tonic-gate 	rp->r_hashb = (rnode_t *)rp->r_hashq;
28267c478bd9Sstevel@tonic-gate 	rp->r_hashf->r_hashb = rp;
28277c478bd9Sstevel@tonic-gate 
28287c478bd9Sstevel@tonic-gate 	mutex_enter(&rp->r_statelock);
28297c478bd9Sstevel@tonic-gate 	rp->r_flags |= RHASHED;
28307c478bd9Sstevel@tonic-gate 	mutex_exit(&rp->r_statelock);
28317c478bd9Sstevel@tonic-gate }
28327c478bd9Sstevel@tonic-gate 
28337c478bd9Sstevel@tonic-gate /*
28347c478bd9Sstevel@tonic-gate  * Remove a rnode from the hash table.
28357c478bd9Sstevel@tonic-gate  *
28367c478bd9Sstevel@tonic-gate  * The caller must be holding the hash queue lock.
28377c478bd9Sstevel@tonic-gate  */
28387c478bd9Sstevel@tonic-gate static void
28397c478bd9Sstevel@tonic-gate rp_rmhash_locked(rnode_t *rp)
28407c478bd9Sstevel@tonic-gate {
28417c478bd9Sstevel@tonic-gate 
28427c478bd9Sstevel@tonic-gate 	ASSERT(RW_WRITE_HELD(&rp->r_hashq->r_lock));
28437c478bd9Sstevel@tonic-gate 	ASSERT(rp->r_flags & RHASHED);
28447c478bd9Sstevel@tonic-gate 
28457c478bd9Sstevel@tonic-gate 	rp->r_hashb->r_hashf = rp->r_hashf;
28467c478bd9Sstevel@tonic-gate 	rp->r_hashf->r_hashb = rp->r_hashb;
28477c478bd9Sstevel@tonic-gate 
28487c478bd9Sstevel@tonic-gate 	mutex_enter(&rp->r_statelock);
28497c478bd9Sstevel@tonic-gate 	rp->r_flags &= ~RHASHED;
28507c478bd9Sstevel@tonic-gate 	mutex_exit(&rp->r_statelock);
28517c478bd9Sstevel@tonic-gate }
28527c478bd9Sstevel@tonic-gate 
28537c478bd9Sstevel@tonic-gate /*
28547c478bd9Sstevel@tonic-gate  * Remove a rnode from the hash table.
28557c478bd9Sstevel@tonic-gate  *
28567c478bd9Sstevel@tonic-gate  * The caller must not be holding the hash queue lock.
28577c478bd9Sstevel@tonic-gate  */
28587c478bd9Sstevel@tonic-gate void
28597c478bd9Sstevel@tonic-gate rp_rmhash(rnode_t *rp)
28607c478bd9Sstevel@tonic-gate {
28617c478bd9Sstevel@tonic-gate 
28627c478bd9Sstevel@tonic-gate 	rw_enter(&rp->r_hashq->r_lock, RW_WRITER);
28637c478bd9Sstevel@tonic-gate 	rp_rmhash_locked(rp);
28647c478bd9Sstevel@tonic-gate 	rw_exit(&rp->r_hashq->r_lock);
28657c478bd9Sstevel@tonic-gate }
28667c478bd9Sstevel@tonic-gate 
28677c478bd9Sstevel@tonic-gate /*
28687c478bd9Sstevel@tonic-gate  * Lookup a rnode by fhandle.
28697c478bd9Sstevel@tonic-gate  *
28707c478bd9Sstevel@tonic-gate  * The caller must be holding the hash queue lock, either shared or exclusive.
28717c478bd9Sstevel@tonic-gate  */
28727c478bd9Sstevel@tonic-gate static rnode_t *
28737c478bd9Sstevel@tonic-gate rfind(rhashq_t *rhtp, nfs_fhandle *fh, struct vfs *vfsp)
28747c478bd9Sstevel@tonic-gate {
28757c478bd9Sstevel@tonic-gate 	rnode_t *rp;
28767c478bd9Sstevel@tonic-gate 	vnode_t *vp;
28777c478bd9Sstevel@tonic-gate 
28787c478bd9Sstevel@tonic-gate 	ASSERT(RW_LOCK_HELD(&rhtp->r_lock));
28797c478bd9Sstevel@tonic-gate 
28807c478bd9Sstevel@tonic-gate 	for (rp = rhtp->r_hashf; rp != (rnode_t *)rhtp; rp = rp->r_hashf) {
28817c478bd9Sstevel@tonic-gate 		vp = RTOV(rp);
28827c478bd9Sstevel@tonic-gate 		if (vp->v_vfsp == vfsp &&
28837c478bd9Sstevel@tonic-gate 		    rp->r_fh.fh_len == fh->fh_len &&
28847c478bd9Sstevel@tonic-gate 		    bcmp(rp->r_fh.fh_buf, fh->fh_buf, fh->fh_len) == 0) {
28857c478bd9Sstevel@tonic-gate 			/*
28867c478bd9Sstevel@tonic-gate 			 * remove rnode from free list, if necessary.
28877c478bd9Sstevel@tonic-gate 			 */
28887c478bd9Sstevel@tonic-gate 			if (rp->r_freef != NULL) {
28897c478bd9Sstevel@tonic-gate 				mutex_enter(&rpfreelist_lock);
28907c478bd9Sstevel@tonic-gate 				/*
28917c478bd9Sstevel@tonic-gate 				 * If the rnode is on the freelist,
28927c478bd9Sstevel@tonic-gate 				 * then remove it and use that reference
28937c478bd9Sstevel@tonic-gate 				 * as the new reference.  Otherwise,
28947c478bd9Sstevel@tonic-gate 				 * need to increment the reference count.
28957c478bd9Sstevel@tonic-gate 				 */
28967c478bd9Sstevel@tonic-gate 				if (rp->r_freef != NULL) {
28977c478bd9Sstevel@tonic-gate 					rp_rmfree(rp);
28987c478bd9Sstevel@tonic-gate 					mutex_exit(&rpfreelist_lock);
28997c478bd9Sstevel@tonic-gate 				} else {
29007c478bd9Sstevel@tonic-gate 					mutex_exit(&rpfreelist_lock);
29017c478bd9Sstevel@tonic-gate 					VN_HOLD(vp);
29027c478bd9Sstevel@tonic-gate 				}
29037c478bd9Sstevel@tonic-gate 			} else
29047c478bd9Sstevel@tonic-gate 				VN_HOLD(vp);
29057c478bd9Sstevel@tonic-gate 			return (rp);
29067c478bd9Sstevel@tonic-gate 		}
29077c478bd9Sstevel@tonic-gate 	}
29087c478bd9Sstevel@tonic-gate 	return (NULL);
29097c478bd9Sstevel@tonic-gate }
29107c478bd9Sstevel@tonic-gate 
29117c478bd9Sstevel@tonic-gate /*
29127c478bd9Sstevel@tonic-gate  * Return 1 if there is a active vnode belonging to this vfs in the
29137c478bd9Sstevel@tonic-gate  * rtable cache.
29147c478bd9Sstevel@tonic-gate  *
29157c478bd9Sstevel@tonic-gate  * Several of these checks are done without holding the usual
29167c478bd9Sstevel@tonic-gate  * locks.  This is safe because destroy_rtable(), rp_addfree(),
29177c478bd9Sstevel@tonic-gate  * etc. will redo the necessary checks before actually destroying
29187c478bd9Sstevel@tonic-gate  * any rnodes.
29197c478bd9Sstevel@tonic-gate  */
29207c478bd9Sstevel@tonic-gate int
29217c478bd9Sstevel@tonic-gate check_rtable(struct vfs *vfsp)
29227c478bd9Sstevel@tonic-gate {
29237c478bd9Sstevel@tonic-gate 	int index;
29247c478bd9Sstevel@tonic-gate 	rnode_t *rp;
29257c478bd9Sstevel@tonic-gate 	vnode_t *vp;
29267c478bd9Sstevel@tonic-gate 
29277c478bd9Sstevel@tonic-gate 	for (index = 0; index < rtablesize; index++) {
29287c478bd9Sstevel@tonic-gate 		rw_enter(&rtable[index].r_lock, RW_READER);
29297c478bd9Sstevel@tonic-gate 		for (rp = rtable[index].r_hashf;
29307c478bd9Sstevel@tonic-gate 		    rp != (rnode_t *)(&rtable[index]);
29317c478bd9Sstevel@tonic-gate 		    rp = rp->r_hashf) {
29327c478bd9Sstevel@tonic-gate 			vp = RTOV(rp);
29337c478bd9Sstevel@tonic-gate 			if (vp->v_vfsp == vfsp) {
29347c478bd9Sstevel@tonic-gate 				if (rp->r_freef == NULL ||
29357c478bd9Sstevel@tonic-gate 				    (vn_has_cached_data(vp) &&
29367c478bd9Sstevel@tonic-gate 				    (rp->r_flags & RDIRTY)) ||
29377c478bd9Sstevel@tonic-gate 				    rp->r_count > 0) {
29387c478bd9Sstevel@tonic-gate 					rw_exit(&rtable[index].r_lock);
29397c478bd9Sstevel@tonic-gate 					return (1);
29407c478bd9Sstevel@tonic-gate 				}
29417c478bd9Sstevel@tonic-gate 			}
29427c478bd9Sstevel@tonic-gate 		}
29437c478bd9Sstevel@tonic-gate 		rw_exit(&rtable[index].r_lock);
29447c478bd9Sstevel@tonic-gate 	}
29457c478bd9Sstevel@tonic-gate 	return (0);
29467c478bd9Sstevel@tonic-gate }
29477c478bd9Sstevel@tonic-gate 
29487c478bd9Sstevel@tonic-gate /*
29497c478bd9Sstevel@tonic-gate  * Destroy inactive vnodes from the hash queues which belong to this
29507c478bd9Sstevel@tonic-gate  * vfs.  It is essential that we destroy all inactive vnodes during a
29517c478bd9Sstevel@tonic-gate  * forced unmount as well as during a normal unmount.
29527c478bd9Sstevel@tonic-gate  */
29537c478bd9Sstevel@tonic-gate void
29547c478bd9Sstevel@tonic-gate destroy_rtable(struct vfs *vfsp, cred_t *cr)
29557c478bd9Sstevel@tonic-gate {
29567c478bd9Sstevel@tonic-gate 	int index;
29577c478bd9Sstevel@tonic-gate 	rnode_t *rp;
29587c478bd9Sstevel@tonic-gate 	rnode_t *rlist;
29597c478bd9Sstevel@tonic-gate 	rnode_t *r_hashf;
29607c478bd9Sstevel@tonic-gate 	vnode_t *vp;
29617c478bd9Sstevel@tonic-gate 
29627c478bd9Sstevel@tonic-gate 	rlist = NULL;
29637c478bd9Sstevel@tonic-gate 
29647c478bd9Sstevel@tonic-gate 	for (index = 0; index < rtablesize; index++) {
29657c478bd9Sstevel@tonic-gate 		rw_enter(&rtable[index].r_lock, RW_WRITER);
29667c478bd9Sstevel@tonic-gate 		for (rp = rtable[index].r_hashf;
29677c478bd9Sstevel@tonic-gate 		    rp != (rnode_t *)(&rtable[index]);
29687c478bd9Sstevel@tonic-gate 		    rp = r_hashf) {
29697c478bd9Sstevel@tonic-gate 			/* save the hash pointer before destroying */
29707c478bd9Sstevel@tonic-gate 			r_hashf = rp->r_hashf;
29717c478bd9Sstevel@tonic-gate 			vp = RTOV(rp);
29727c478bd9Sstevel@tonic-gate 			if (vp->v_vfsp == vfsp) {
29737c478bd9Sstevel@tonic-gate 				mutex_enter(&rpfreelist_lock);
29747c478bd9Sstevel@tonic-gate 				if (rp->r_freef != NULL) {
29757c478bd9Sstevel@tonic-gate 					rp_rmfree(rp);
29767c478bd9Sstevel@tonic-gate 					mutex_exit(&rpfreelist_lock);
29777c478bd9Sstevel@tonic-gate 					rp_rmhash_locked(rp);
29787c478bd9Sstevel@tonic-gate 					rp->r_hashf = rlist;
29797c478bd9Sstevel@tonic-gate 					rlist = rp;
29807c478bd9Sstevel@tonic-gate 				} else
29817c478bd9Sstevel@tonic-gate 					mutex_exit(&rpfreelist_lock);
29827c478bd9Sstevel@tonic-gate 			}
29837c478bd9Sstevel@tonic-gate 		}
29847c478bd9Sstevel@tonic-gate 		rw_exit(&rtable[index].r_lock);
29857c478bd9Sstevel@tonic-gate 	}
29867c478bd9Sstevel@tonic-gate 
29877c478bd9Sstevel@tonic-gate 	for (rp = rlist; rp != NULL; rp = rlist) {
29887c478bd9Sstevel@tonic-gate 		rlist = rp->r_hashf;
29897c478bd9Sstevel@tonic-gate 		/*
29907c478bd9Sstevel@tonic-gate 		 * This call to rp_addfree will end up destroying the
29917c478bd9Sstevel@tonic-gate 		 * rnode, but in a safe way with the appropriate set
29927c478bd9Sstevel@tonic-gate 		 * of checks done.
29937c478bd9Sstevel@tonic-gate 		 */
29947c478bd9Sstevel@tonic-gate 		rp_addfree(rp, cr);
29957c478bd9Sstevel@tonic-gate 	}
29967c478bd9Sstevel@tonic-gate 
29977c478bd9Sstevel@tonic-gate }
29987c478bd9Sstevel@tonic-gate 
29997c478bd9Sstevel@tonic-gate /*
30007c478bd9Sstevel@tonic-gate  * This routine destroys all the resources associated with the rnode
30017c478bd9Sstevel@tonic-gate  * and then the rnode itself.
30027c478bd9Sstevel@tonic-gate  */
30037c478bd9Sstevel@tonic-gate static void
30047c478bd9Sstevel@tonic-gate destroy_rnode(rnode_t *rp)
30057c478bd9Sstevel@tonic-gate {
30067c478bd9Sstevel@tonic-gate 	vnode_t *vp;
30077c478bd9Sstevel@tonic-gate 	vfs_t *vfsp;
30087c478bd9Sstevel@tonic-gate 
30097c478bd9Sstevel@tonic-gate 	vp = RTOV(rp);
30107c478bd9Sstevel@tonic-gate 	vfsp = vp->v_vfsp;
30117c478bd9Sstevel@tonic-gate 
30127c478bd9Sstevel@tonic-gate 	ASSERT(vp->v_count == 1);
30137c478bd9Sstevel@tonic-gate 	ASSERT(rp->r_count == 0);
30147c478bd9Sstevel@tonic-gate 	ASSERT(rp->r_lmpl == NULL);
30157c478bd9Sstevel@tonic-gate 	ASSERT(rp->r_mapcnt == 0);
30167c478bd9Sstevel@tonic-gate 	ASSERT(!(rp->r_flags & RHASHED));
30177c478bd9Sstevel@tonic-gate 	ASSERT(rp->r_freef == NULL && rp->r_freeb == NULL);
30187c478bd9Sstevel@tonic-gate 	atomic_add_long((ulong_t *)&rnew, -1);
30197c478bd9Sstevel@tonic-gate #ifdef DEBUG
30207c478bd9Sstevel@tonic-gate 	clstat_debug.nrnode.value.ui64--;
30217c478bd9Sstevel@tonic-gate #endif
30227c478bd9Sstevel@tonic-gate 	nfs_rw_destroy(&rp->r_rwlock);
30237c478bd9Sstevel@tonic-gate 	nfs_rw_destroy(&rp->r_lkserlock);
30247c478bd9Sstevel@tonic-gate 	mutex_destroy(&rp->r_statelock);
30257c478bd9Sstevel@tonic-gate 	cv_destroy(&rp->r_cv);
30267c478bd9Sstevel@tonic-gate 	cv_destroy(&rp->r_commit.c_cv);
30277c478bd9Sstevel@tonic-gate 	if (rp->r_flags & RDELMAPLIST)
30287c478bd9Sstevel@tonic-gate 		list_destroy(&rp->r_indelmap);
30297c478bd9Sstevel@tonic-gate 	nfs_free_r_path(rp);
30307c478bd9Sstevel@tonic-gate 	avl_destroy(&rp->r_dir);
30317c478bd9Sstevel@tonic-gate 	vn_invalid(vp);
30327c478bd9Sstevel@tonic-gate 	vn_free(vp);
30337c478bd9Sstevel@tonic-gate 	kmem_cache_free(rnode_cache, rp);
30347c478bd9Sstevel@tonic-gate 	VFS_RELE(vfsp);
30357c478bd9Sstevel@tonic-gate }
30367c478bd9Sstevel@tonic-gate 
30377c478bd9Sstevel@tonic-gate /*
30387c478bd9Sstevel@tonic-gate  * Flush all vnodes in this (or every) vfs.
30397c478bd9Sstevel@tonic-gate  * Used by nfs_sync and by nfs_unmount.
30407c478bd9Sstevel@tonic-gate  */
30417c478bd9Sstevel@tonic-gate void
30427c478bd9Sstevel@tonic-gate rflush(struct vfs *vfsp, cred_t *cr)
30437c478bd9Sstevel@tonic-gate {
30447c478bd9Sstevel@tonic-gate 	int index;
30457c478bd9Sstevel@tonic-gate 	rnode_t *rp;
30467c478bd9Sstevel@tonic-gate 	vnode_t *vp, **vplist;
30477c478bd9Sstevel@tonic-gate 	long num, cnt;
30487c478bd9Sstevel@tonic-gate 
30497c478bd9Sstevel@tonic-gate 	/*
30507c478bd9Sstevel@tonic-gate 	 * Check to see whether there is anything to do.
30517c478bd9Sstevel@tonic-gate 	 */
30527c478bd9Sstevel@tonic-gate 	num = rnew;
30537c478bd9Sstevel@tonic-gate 	if (num == 0)
30547c478bd9Sstevel@tonic-gate 		return;
30557c478bd9Sstevel@tonic-gate 
30567c478bd9Sstevel@tonic-gate 	/*
30577c478bd9Sstevel@tonic-gate 	 * Allocate a slot for all currently active rnodes on the
30587c478bd9Sstevel@tonic-gate 	 * supposition that they all may need flushing.
30597c478bd9Sstevel@tonic-gate 	 */
30607c478bd9Sstevel@tonic-gate 	vplist = kmem_alloc(num * sizeof (*vplist), KM_SLEEP);
30617c478bd9Sstevel@tonic-gate 	cnt = 0;
30627c478bd9Sstevel@tonic-gate 
30637c478bd9Sstevel@tonic-gate 	/*
30647c478bd9Sstevel@tonic-gate 	 * Walk the hash queues looking for rnodes with page
30657c478bd9Sstevel@tonic-gate 	 * lists associated with them.  Make a list of these
30667c478bd9Sstevel@tonic-gate 	 * files.
30677c478bd9Sstevel@tonic-gate 	 */
30687c478bd9Sstevel@tonic-gate 	for (index = 0; index < rtablesize; index++) {
30697c478bd9Sstevel@tonic-gate 		rw_enter(&rtable[index].r_lock, RW_READER);
30707c478bd9Sstevel@tonic-gate 		for (rp = rtable[index].r_hashf;
30717c478bd9Sstevel@tonic-gate 		    rp != (rnode_t *)(&rtable[index]);
30727c478bd9Sstevel@tonic-gate 		    rp = rp->r_hashf) {
30737c478bd9Sstevel@tonic-gate 			vp = RTOV(rp);
30747c478bd9Sstevel@tonic-gate 			/*
30757c478bd9Sstevel@tonic-gate 			 * Don't bother sync'ing a vp if it
30767c478bd9Sstevel@tonic-gate 			 * is part of virtual swap device or
30777c478bd9Sstevel@tonic-gate 			 * if VFS is read-only
30787c478bd9Sstevel@tonic-gate 			 */
30797c478bd9Sstevel@tonic-gate 			if (IS_SWAPVP(vp) || vn_is_readonly(vp))
30807c478bd9Sstevel@tonic-gate 				continue;
30817c478bd9Sstevel@tonic-gate 			/*
30827c478bd9Sstevel@tonic-gate 			 * If flushing all mounted file systems or
30837c478bd9Sstevel@tonic-gate 			 * the vnode belongs to this vfs, has pages
30847c478bd9Sstevel@tonic-gate 			 * and is marked as either dirty or mmap'd,
30857c478bd9Sstevel@tonic-gate 			 * hold and add this vnode to the list of
30867c478bd9Sstevel@tonic-gate 			 * vnodes to flush.
30877c478bd9Sstevel@tonic-gate 			 */
30887c478bd9Sstevel@tonic-gate 			if ((vfsp == NULL || vp->v_vfsp == vfsp) &&
30897c478bd9Sstevel@tonic-gate 			    vn_has_cached_data(vp) &&
30907c478bd9Sstevel@tonic-gate 			    ((rp->r_flags & RDIRTY) || rp->r_mapcnt > 0)) {
30917c478bd9Sstevel@tonic-gate 				VN_HOLD(vp);
30927c478bd9Sstevel@tonic-gate 				vplist[cnt++] = vp;
30937c478bd9Sstevel@tonic-gate 				if (cnt == num) {
30947c478bd9Sstevel@tonic-gate 					rw_exit(&rtable[index].r_lock);
30957c478bd9Sstevel@tonic-gate 					goto toomany;
30967c478bd9Sstevel@tonic-gate 				}
30977c478bd9Sstevel@tonic-gate 			}
30987c478bd9Sstevel@tonic-gate 		}
30997c478bd9Sstevel@tonic-gate 		rw_exit(&rtable[index].r_lock);
31007c478bd9Sstevel@tonic-gate 	}
31017c478bd9Sstevel@tonic-gate toomany:
31027c478bd9Sstevel@tonic-gate 
31037c478bd9Sstevel@tonic-gate 	/*
31047c478bd9Sstevel@tonic-gate 	 * Flush and release all of the files on the list.
31057c478bd9Sstevel@tonic-gate 	 */
31067c478bd9Sstevel@tonic-gate 	while (cnt-- > 0) {
31077c478bd9Sstevel@tonic-gate 		vp = vplist[cnt];
3108da6c28aaSamw 		(void) VOP_PUTPAGE(vp, (u_offset_t)0, 0, B_ASYNC, cr, NULL);
31097c478bd9Sstevel@tonic-gate 		VN_RELE(vp);
31107c478bd9Sstevel@tonic-gate 	}
31117c478bd9Sstevel@tonic-gate 
31127c478bd9Sstevel@tonic-gate 	/*
31137c478bd9Sstevel@tonic-gate 	 * Free the space allocated to hold the list.
31147c478bd9Sstevel@tonic-gate 	 */
31157c478bd9Sstevel@tonic-gate 	kmem_free(vplist, num * sizeof (*vplist));
31167c478bd9Sstevel@tonic-gate }
31177c478bd9Sstevel@tonic-gate 
31187c478bd9Sstevel@tonic-gate /*
31197c478bd9Sstevel@tonic-gate  * This probably needs to be larger than or equal to
31207c478bd9Sstevel@tonic-gate  * log2(sizeof (struct rnode)) due to the way that rnodes are
31217c478bd9Sstevel@tonic-gate  * allocated.
31227c478bd9Sstevel@tonic-gate  */
31237c478bd9Sstevel@tonic-gate #define	ACACHE_SHIFT_BITS	9
31247c478bd9Sstevel@tonic-gate 
31257c478bd9Sstevel@tonic-gate static int
31267c478bd9Sstevel@tonic-gate acachehash(rnode_t *rp, cred_t *cr)
31277c478bd9Sstevel@tonic-gate {
31287c478bd9Sstevel@tonic-gate 
31297c478bd9Sstevel@tonic-gate 	return ((((intptr_t)rp >> ACACHE_SHIFT_BITS) + crgetuid(cr)) &
31307c478bd9Sstevel@tonic-gate 	    acachemask);
31317c478bd9Sstevel@tonic-gate }
31327c478bd9Sstevel@tonic-gate 
31337c478bd9Sstevel@tonic-gate #ifdef DEBUG
31347c478bd9Sstevel@tonic-gate static long nfs_access_cache_hits = 0;
31357c478bd9Sstevel@tonic-gate static long nfs_access_cache_misses = 0;
31367c478bd9Sstevel@tonic-gate #endif
31377c478bd9Sstevel@tonic-gate 
31387c478bd9Sstevel@tonic-gate nfs_access_type_t
31397c478bd9Sstevel@tonic-gate nfs_access_check(rnode_t *rp, uint32_t acc, cred_t *cr)
31407c478bd9Sstevel@tonic-gate {
31417c478bd9Sstevel@tonic-gate 	vnode_t *vp;
31427c478bd9Sstevel@tonic-gate 	acache_t *ap;
31437c478bd9Sstevel@tonic-gate 	acache_hash_t *hp;
31447c478bd9Sstevel@tonic-gate 	nfs_access_type_t all;
31457c478bd9Sstevel@tonic-gate 
31467c478bd9Sstevel@tonic-gate 	vp = RTOV(rp);
31477c478bd9Sstevel@tonic-gate 	if (!ATTRCACHE_VALID(vp) || nfs_waitfor_purge_complete(vp))
31487c478bd9Sstevel@tonic-gate 		return (NFS_ACCESS_UNKNOWN);
31497c478bd9Sstevel@tonic-gate 
31507c478bd9Sstevel@tonic-gate 	if (rp->r_acache != NULL) {
31517c478bd9Sstevel@tonic-gate 		hp = &acache[acachehash(rp, cr)];
31527c478bd9Sstevel@tonic-gate 		rw_enter(&hp->lock, RW_READER);
31537c478bd9Sstevel@tonic-gate 		ap = hp->next;
31547c478bd9Sstevel@tonic-gate 		while (ap != (acache_t *)hp) {
31557c478bd9Sstevel@tonic-gate 			if (crcmp(ap->cred, cr) == 0 && ap->rnode == rp) {
31567c478bd9Sstevel@tonic-gate 				if ((ap->known & acc) == acc) {
31577c478bd9Sstevel@tonic-gate #ifdef DEBUG
31587c478bd9Sstevel@tonic-gate 					nfs_access_cache_hits++;
31597c478bd9Sstevel@tonic-gate #endif
31607c478bd9Sstevel@tonic-gate 					if ((ap->allowed & acc) == acc)
31617c478bd9Sstevel@tonic-gate 						all = NFS_ACCESS_ALLOWED;
31627c478bd9Sstevel@tonic-gate 					else
31637c478bd9Sstevel@tonic-gate 						all = NFS_ACCESS_DENIED;
31647c478bd9Sstevel@tonic-gate 				} else {
31657c478bd9Sstevel@tonic-gate #ifdef DEBUG
31667c478bd9Sstevel@tonic-gate 					nfs_access_cache_misses++;
31677c478bd9Sstevel@tonic-gate #endif
31687c478bd9Sstevel@tonic-gate 					all = NFS_ACCESS_UNKNOWN;
31697c478bd9Sstevel@tonic-gate 				}
31707c478bd9Sstevel@tonic-gate 				rw_exit(&hp->lock);
31717c478bd9Sstevel@tonic-gate 				return (all);
31727c478bd9Sstevel@tonic-gate 			}
31737c478bd9Sstevel@tonic-gate 			ap = ap->next;
31747c478bd9Sstevel@tonic-gate 		}
31757c478bd9Sstevel@tonic-gate 		rw_exit(&hp->lock);
31767c478bd9Sstevel@tonic-gate 	}
31777c478bd9Sstevel@tonic-gate 
31787c478bd9Sstevel@tonic-gate #ifdef DEBUG
31797c478bd9Sstevel@tonic-gate 	nfs_access_cache_misses++;
31807c478bd9Sstevel@tonic-gate #endif
31817c478bd9Sstevel@tonic-gate 	return (NFS_ACCESS_UNKNOWN);
31827c478bd9Sstevel@tonic-gate }
31837c478bd9Sstevel@tonic-gate 
31847c478bd9Sstevel@tonic-gate void
31857c478bd9Sstevel@tonic-gate nfs_access_cache(rnode_t *rp, uint32_t acc, uint32_t resacc, cred_t *cr)
31867c478bd9Sstevel@tonic-gate {
31877c478bd9Sstevel@tonic-gate 	acache_t *ap;
31887c478bd9Sstevel@tonic-gate 	acache_t *nap;
31897c478bd9Sstevel@tonic-gate 	acache_hash_t *hp;
31907c478bd9Sstevel@tonic-gate 
31917c478bd9Sstevel@tonic-gate 	hp = &acache[acachehash(rp, cr)];
31927c478bd9Sstevel@tonic-gate 
31937c478bd9Sstevel@tonic-gate 	/*
31947c478bd9Sstevel@tonic-gate 	 * Allocate now assuming that mostly an allocation will be
31957c478bd9Sstevel@tonic-gate 	 * required.  This allows the allocation to happen without
31967c478bd9Sstevel@tonic-gate 	 * holding the hash bucket locked.
31977c478bd9Sstevel@tonic-gate 	 */
31987c478bd9Sstevel@tonic-gate 	nap = kmem_cache_alloc(acache_cache, KM_NOSLEEP);
31997c478bd9Sstevel@tonic-gate 	if (nap != NULL) {
32007c478bd9Sstevel@tonic-gate 		nap->known = acc;
32017c478bd9Sstevel@tonic-gate 		nap->allowed = resacc;
32027c478bd9Sstevel@tonic-gate 		nap->rnode = rp;
32037c478bd9Sstevel@tonic-gate 		crhold(cr);
32047c478bd9Sstevel@tonic-gate 		nap->cred = cr;
32057c478bd9Sstevel@tonic-gate 		nap->hashq = hp;
32067c478bd9Sstevel@tonic-gate 	}
32077c478bd9Sstevel@tonic-gate 
32087c478bd9Sstevel@tonic-gate 	rw_enter(&hp->lock, RW_WRITER);
32097c478bd9Sstevel@tonic-gate 
32107c478bd9Sstevel@tonic-gate 	if (rp->r_acache != NULL) {
32117c478bd9Sstevel@tonic-gate 		ap = hp->next;
32127c478bd9Sstevel@tonic-gate 		while (ap != (acache_t *)hp) {
32137c478bd9Sstevel@tonic-gate 			if (crcmp(ap->cred, cr) == 0 && ap->rnode == rp) {
32147c478bd9Sstevel@tonic-gate 				ap->known |= acc;
32157c478bd9Sstevel@tonic-gate 				ap->allowed &= ~acc;
32167c478bd9Sstevel@tonic-gate 				ap->allowed |= resacc;
32177c478bd9Sstevel@tonic-gate 				rw_exit(&hp->lock);
32187c478bd9Sstevel@tonic-gate 				if (nap != NULL) {
32197c478bd9Sstevel@tonic-gate 					crfree(nap->cred);
32207c478bd9Sstevel@tonic-gate 					kmem_cache_free(acache_cache, nap);
32217c478bd9Sstevel@tonic-gate 				}
32227c478bd9Sstevel@tonic-gate 				return;
32237c478bd9Sstevel@tonic-gate 			}
32247c478bd9Sstevel@tonic-gate 			ap = ap->next;
32257c478bd9Sstevel@tonic-gate 		}
32267c478bd9Sstevel@tonic-gate 	}
32277c478bd9Sstevel@tonic-gate 
32287c478bd9Sstevel@tonic-gate 	if (nap != NULL) {
32297c478bd9Sstevel@tonic-gate #ifdef DEBUG
32307c478bd9Sstevel@tonic-gate 		clstat_debug.access.value.ui64++;
32317c478bd9Sstevel@tonic-gate #endif
32327c478bd9Sstevel@tonic-gate 		nap->next = hp->next;
32337c478bd9Sstevel@tonic-gate 		hp->next = nap;
32347c478bd9Sstevel@tonic-gate 		nap->next->prev = nap;
32357c478bd9Sstevel@tonic-gate 		nap->prev = (acache_t *)hp;
32367c478bd9Sstevel@tonic-gate 
32377c478bd9Sstevel@tonic-gate 		mutex_enter(&rp->r_statelock);
32387c478bd9Sstevel@tonic-gate 		nap->list = rp->r_acache;
32397c478bd9Sstevel@tonic-gate 		rp->r_acache = nap;
32407c478bd9Sstevel@tonic-gate 		mutex_exit(&rp->r_statelock);
32417c478bd9Sstevel@tonic-gate 	}
32427c478bd9Sstevel@tonic-gate 
32437c478bd9Sstevel@tonic-gate 	rw_exit(&hp->lock);
32447c478bd9Sstevel@tonic-gate }
32457c478bd9Sstevel@tonic-gate 
32467c478bd9Sstevel@tonic-gate int
32477c478bd9Sstevel@tonic-gate nfs_access_purge_rp(rnode_t *rp)
32487c478bd9Sstevel@tonic-gate {
32497c478bd9Sstevel@tonic-gate 	acache_t *ap;
32507c478bd9Sstevel@tonic-gate 	acache_t *tmpap;
32517c478bd9Sstevel@tonic-gate 	acache_t *rplist;
32527c478bd9Sstevel@tonic-gate 
32537c478bd9Sstevel@tonic-gate 	/*
32547c478bd9Sstevel@tonic-gate 	 * If there aren't any cached entries, then there is nothing
32557c478bd9Sstevel@tonic-gate 	 * to free.
32567c478bd9Sstevel@tonic-gate 	 */
32577c478bd9Sstevel@tonic-gate 	if (rp->r_acache == NULL)
32587c478bd9Sstevel@tonic-gate 		return (0);
32597c478bd9Sstevel@tonic-gate 
32607c478bd9Sstevel@tonic-gate 	mutex_enter(&rp->r_statelock);
32617c478bd9Sstevel@tonic-gate 	rplist = rp->r_acache;
32627c478bd9Sstevel@tonic-gate 	rp->r_acache = NULL;
32637c478bd9Sstevel@tonic-gate 	mutex_exit(&rp->r_statelock);
32647c478bd9Sstevel@tonic-gate 
32657c478bd9Sstevel@tonic-gate 	/*
32667c478bd9Sstevel@tonic-gate 	 * Loop through each entry in the list pointed to in the
32677c478bd9Sstevel@tonic-gate 	 * rnode.  Remove each of these entries from the hash
32687c478bd9Sstevel@tonic-gate 	 * queue that it is on and remove it from the list in
32697c478bd9Sstevel@tonic-gate 	 * the rnode.
32707c478bd9Sstevel@tonic-gate 	 */
32717c478bd9Sstevel@tonic-gate 	for (ap = rplist; ap != NULL; ap = tmpap) {
32727c478bd9Sstevel@tonic-gate 		rw_enter(&ap->hashq->lock, RW_WRITER);
32737c478bd9Sstevel@tonic-gate 		ap->prev->next = ap->next;
32747c478bd9Sstevel@tonic-gate 		ap->next->prev = ap->prev;
32757c478bd9Sstevel@tonic-gate 		rw_exit(&ap->hashq->lock);
32767c478bd9Sstevel@tonic-gate 
32777c478bd9Sstevel@tonic-gate 		tmpap = ap->list;
32787c478bd9Sstevel@tonic-gate 		crfree(ap->cred);
32797c478bd9Sstevel@tonic-gate 		kmem_cache_free(acache_cache, ap);
32807c478bd9Sstevel@tonic-gate #ifdef DEBUG
32817c478bd9Sstevel@tonic-gate 		clstat_debug.access.value.ui64--;
32827c478bd9Sstevel@tonic-gate #endif
32837c478bd9Sstevel@tonic-gate 	}
32847c478bd9Sstevel@tonic-gate 
32857c478bd9Sstevel@tonic-gate 	return (1);
32867c478bd9Sstevel@tonic-gate }
32877c478bd9Sstevel@tonic-gate 
32887c478bd9Sstevel@tonic-gate static const char prefix[] = ".nfs";
32897c478bd9Sstevel@tonic-gate 
32907c478bd9Sstevel@tonic-gate static kmutex_t newnum_lock;
32917c478bd9Sstevel@tonic-gate 
32927c478bd9Sstevel@tonic-gate int
32937c478bd9Sstevel@tonic-gate newnum(void)
32947c478bd9Sstevel@tonic-gate {
32957c478bd9Sstevel@tonic-gate 	static uint_t newnum = 0;
32967c478bd9Sstevel@tonic-gate 	uint_t id;
32977c478bd9Sstevel@tonic-gate 
32987c478bd9Sstevel@tonic-gate 	mutex_enter(&newnum_lock);
32997c478bd9Sstevel@tonic-gate 	if (newnum == 0)
33007c478bd9Sstevel@tonic-gate 		newnum = gethrestime_sec() & 0xffff;
33017c478bd9Sstevel@tonic-gate 	id = newnum++;
33027c478bd9Sstevel@tonic-gate 	mutex_exit(&newnum_lock);
33037c478bd9Sstevel@tonic-gate 	return (id);
33047c478bd9Sstevel@tonic-gate }
33057c478bd9Sstevel@tonic-gate 
33067c478bd9Sstevel@tonic-gate char *
33077c478bd9Sstevel@tonic-gate newname(void)
33087c478bd9Sstevel@tonic-gate {
33097c478bd9Sstevel@tonic-gate 	char *news;
33107c478bd9Sstevel@tonic-gate 	char *s;
33117c478bd9Sstevel@tonic-gate 	const char *p;
33127c478bd9Sstevel@tonic-gate 	uint_t id;
33137c478bd9Sstevel@tonic-gate 
33147c478bd9Sstevel@tonic-gate 	id = newnum();
33157c478bd9Sstevel@tonic-gate 	news = kmem_alloc(MAXNAMELEN, KM_SLEEP);
33167c478bd9Sstevel@tonic-gate 	s = news;
33177c478bd9Sstevel@tonic-gate 	p = prefix;
33187c478bd9Sstevel@tonic-gate 	while (*p != '\0')
33197c478bd9Sstevel@tonic-gate 		*s++ = *p++;
33207c478bd9Sstevel@tonic-gate 	while (id != 0) {
33217c478bd9Sstevel@tonic-gate 		*s++ = "0123456789ABCDEF"[id & 0x0f];
33227c478bd9Sstevel@tonic-gate 		id >>= 4;
33237c478bd9Sstevel@tonic-gate 	}
33247c478bd9Sstevel@tonic-gate 	*s = '\0';
33257c478bd9Sstevel@tonic-gate 	return (news);
33267c478bd9Sstevel@tonic-gate }
33277c478bd9Sstevel@tonic-gate 
33287c478bd9Sstevel@tonic-gate /*
33297c478bd9Sstevel@tonic-gate  * Snapshot callback for nfs:0:nfs_client as registered with the kstat
33307c478bd9Sstevel@tonic-gate  * framework.
33317c478bd9Sstevel@tonic-gate  */
33327c478bd9Sstevel@tonic-gate static int
33337c478bd9Sstevel@tonic-gate cl_snapshot(kstat_t *ksp, void *buf, int rw)
33347c478bd9Sstevel@tonic-gate {
33357c478bd9Sstevel@tonic-gate 	ksp->ks_snaptime = gethrtime();
33367c478bd9Sstevel@tonic-gate 	if (rw == KSTAT_WRITE) {
33377c478bd9Sstevel@tonic-gate 		bcopy(buf, ksp->ks_private, sizeof (clstat_tmpl));
33387c478bd9Sstevel@tonic-gate #ifdef DEBUG
33397c478bd9Sstevel@tonic-gate 		/*
33407c478bd9Sstevel@tonic-gate 		 * Currently only the global zone can write to kstats, but we
33417c478bd9Sstevel@tonic-gate 		 * add the check just for paranoia.
33427c478bd9Sstevel@tonic-gate 		 */
33437c478bd9Sstevel@tonic-gate 		if (INGLOBALZONE(curproc))
33447c478bd9Sstevel@tonic-gate 			bcopy((char *)buf + sizeof (clstat_tmpl), &clstat_debug,
33457c478bd9Sstevel@tonic-gate 			    sizeof (clstat_debug));
33467c478bd9Sstevel@tonic-gate #endif
33477c478bd9Sstevel@tonic-gate 	} else {
33487c478bd9Sstevel@tonic-gate 		bcopy(ksp->ks_private, buf, sizeof (clstat_tmpl));
33497c478bd9Sstevel@tonic-gate #ifdef DEBUG
33507c478bd9Sstevel@tonic-gate 		/*
33517c478bd9Sstevel@tonic-gate 		 * If we're displaying the "global" debug kstat values, we
33527c478bd9Sstevel@tonic-gate 		 * display them as-is to all zones since in fact they apply to
33537c478bd9Sstevel@tonic-gate 		 * the system as a whole.
33547c478bd9Sstevel@tonic-gate 		 */
33557c478bd9Sstevel@tonic-gate 		bcopy(&clstat_debug, (char *)buf + sizeof (clstat_tmpl),
33567c478bd9Sstevel@tonic-gate 		    sizeof (clstat_debug));
33577c478bd9Sstevel@tonic-gate #endif
33587c478bd9Sstevel@tonic-gate 	}
33597c478bd9Sstevel@tonic-gate 	return (0);
33607c478bd9Sstevel@tonic-gate }
33617c478bd9Sstevel@tonic-gate 
33627c478bd9Sstevel@tonic-gate static void *
33637c478bd9Sstevel@tonic-gate clinit_zone(zoneid_t zoneid)
33647c478bd9Sstevel@tonic-gate {
33657c478bd9Sstevel@tonic-gate 	kstat_t *nfs_client_kstat;
33667c478bd9Sstevel@tonic-gate 	struct nfs_clnt *nfscl;
33677c478bd9Sstevel@tonic-gate 	uint_t ndata;
33687c478bd9Sstevel@tonic-gate 
33697c478bd9Sstevel@tonic-gate 	nfscl = kmem_alloc(sizeof (*nfscl), KM_SLEEP);
33707c478bd9Sstevel@tonic-gate 	mutex_init(&nfscl->nfscl_chtable_lock, NULL, MUTEX_DEFAULT, NULL);
33717c478bd9Sstevel@tonic-gate 	nfscl->nfscl_chtable = NULL;
33727c478bd9Sstevel@tonic-gate 	nfscl->nfscl_zoneid = zoneid;
33737c478bd9Sstevel@tonic-gate 
33747c478bd9Sstevel@tonic-gate 	bcopy(&clstat_tmpl, &nfscl->nfscl_stat, sizeof (clstat_tmpl));
33757c478bd9Sstevel@tonic-gate 	ndata = sizeof (clstat_tmpl) / sizeof (kstat_named_t);
33767c478bd9Sstevel@tonic-gate #ifdef DEBUG
33777c478bd9Sstevel@tonic-gate 	ndata += sizeof (clstat_debug) / sizeof (kstat_named_t);
33787c478bd9Sstevel@tonic-gate #endif
33797c478bd9Sstevel@tonic-gate 	if ((nfs_client_kstat = kstat_create_zone("nfs", 0, "nfs_client",
33807c478bd9Sstevel@tonic-gate 	    "misc", KSTAT_TYPE_NAMED, ndata,
33817c478bd9Sstevel@tonic-gate 	    KSTAT_FLAG_VIRTUAL | KSTAT_FLAG_WRITABLE, zoneid)) != NULL) {
33827c478bd9Sstevel@tonic-gate 		nfs_client_kstat->ks_private = &nfscl->nfscl_stat;
33837c478bd9Sstevel@tonic-gate 		nfs_client_kstat->ks_snapshot = cl_snapshot;
33847c478bd9Sstevel@tonic-gate 		kstat_install(nfs_client_kstat);
33857c478bd9Sstevel@tonic-gate 	}
33867c478bd9Sstevel@tonic-gate 	mutex_enter(&nfs_clnt_list_lock);
33877c478bd9Sstevel@tonic-gate 	list_insert_head(&nfs_clnt_list, nfscl);
33887c478bd9Sstevel@tonic-gate 	mutex_exit(&nfs_clnt_list_lock);
33897c478bd9Sstevel@tonic-gate 	return (nfscl);
33907c478bd9Sstevel@tonic-gate }
33917c478bd9Sstevel@tonic-gate 
33927c478bd9Sstevel@tonic-gate /*ARGSUSED*/
33937c478bd9Sstevel@tonic-gate static void
33947c478bd9Sstevel@tonic-gate clfini_zone(zoneid_t zoneid, void *arg)
33957c478bd9Sstevel@tonic-gate {
33967c478bd9Sstevel@tonic-gate 	struct nfs_clnt *nfscl = arg;
33977c478bd9Sstevel@tonic-gate 	chhead_t *chp, *next;
33987c478bd9Sstevel@tonic-gate 
33997c478bd9Sstevel@tonic-gate 	if (nfscl == NULL)
34007c478bd9Sstevel@tonic-gate 		return;
34017c478bd9Sstevel@tonic-gate 	mutex_enter(&nfs_clnt_list_lock);
34027c478bd9Sstevel@tonic-gate 	list_remove(&nfs_clnt_list, nfscl);
34037c478bd9Sstevel@tonic-gate 	mutex_exit(&nfs_clnt_list_lock);
34047c478bd9Sstevel@tonic-gate 	clreclaim_zone(nfscl, 0);
34057c478bd9Sstevel@tonic-gate 	for (chp = nfscl->nfscl_chtable; chp != NULL; chp = next) {
34067c478bd9Sstevel@tonic-gate 		ASSERT(chp->ch_list == NULL);
34077c478bd9Sstevel@tonic-gate 		kmem_free(chp->ch_protofmly, strlen(chp->ch_protofmly) + 1);
34087c478bd9Sstevel@tonic-gate 		next = chp->ch_next;
34097c478bd9Sstevel@tonic-gate 		kmem_free(chp, sizeof (*chp));
34107c478bd9Sstevel@tonic-gate 	}
34117c478bd9Sstevel@tonic-gate 	kstat_delete_byname_zone("nfs", 0, "nfs_client", zoneid);
34127c478bd9Sstevel@tonic-gate 	mutex_destroy(&nfscl->nfscl_chtable_lock);
34137c478bd9Sstevel@tonic-gate 	kmem_free(nfscl, sizeof (*nfscl));
34147c478bd9Sstevel@tonic-gate }
34157c478bd9Sstevel@tonic-gate 
34167c478bd9Sstevel@tonic-gate /*
34177c478bd9Sstevel@tonic-gate  * Called by endpnt_destructor to make sure the client handles are
34187c478bd9Sstevel@tonic-gate  * cleaned up before the RPC endpoints.  This becomes a no-op if
34197c478bd9Sstevel@tonic-gate  * clfini_zone (above) is called first.  This function is needed
34207c478bd9Sstevel@tonic-gate  * (rather than relying on clfini_zone to clean up) because the ZSD
34217c478bd9Sstevel@tonic-gate  * callbacks have no ordering mechanism, so we have no way to ensure
34227c478bd9Sstevel@tonic-gate  * that clfini_zone is called before endpnt_destructor.
34237c478bd9Sstevel@tonic-gate  */
34247c478bd9Sstevel@tonic-gate void
34257c478bd9Sstevel@tonic-gate clcleanup_zone(zoneid_t zoneid)
34267c478bd9Sstevel@tonic-gate {
34277c478bd9Sstevel@tonic-gate 	struct nfs_clnt *nfscl;
34287c478bd9Sstevel@tonic-gate 
34297c478bd9Sstevel@tonic-gate 	mutex_enter(&nfs_clnt_list_lock);
34307c478bd9Sstevel@tonic-gate 	nfscl = list_head(&nfs_clnt_list);
34317c478bd9Sstevel@tonic-gate 	for (; nfscl != NULL; nfscl = list_next(&nfs_clnt_list, nfscl)) {
34327c478bd9Sstevel@tonic-gate 		if (nfscl->nfscl_zoneid == zoneid) {
34337c478bd9Sstevel@tonic-gate 			clreclaim_zone(nfscl, 0);
34347c478bd9Sstevel@tonic-gate 			break;
34357c478bd9Sstevel@tonic-gate 		}
34367c478bd9Sstevel@tonic-gate 	}
34377c478bd9Sstevel@tonic-gate 	mutex_exit(&nfs_clnt_list_lock);
34387c478bd9Sstevel@tonic-gate }
34397c478bd9Sstevel@tonic-gate 
34407c478bd9Sstevel@tonic-gate int
34417c478bd9Sstevel@tonic-gate nfs_subrinit(void)
34427c478bd9Sstevel@tonic-gate {
34437c478bd9Sstevel@tonic-gate 	int i;
34447c478bd9Sstevel@tonic-gate 	ulong_t nrnode_max;
34457c478bd9Sstevel@tonic-gate 
34467c478bd9Sstevel@tonic-gate 	/*
34477c478bd9Sstevel@tonic-gate 	 * Allocate and initialize the rnode hash queues
34487c478bd9Sstevel@tonic-gate 	 */
34497c478bd9Sstevel@tonic-gate 	if (nrnode <= 0)
34507c478bd9Sstevel@tonic-gate 		nrnode = ncsize;
34517c478bd9Sstevel@tonic-gate 	nrnode_max = (ulong_t)((kmem_maxavail() >> 2) / sizeof (struct rnode));
34527c478bd9Sstevel@tonic-gate 	if (nrnode > nrnode_max || (nrnode == 0 && ncsize == 0)) {
34537c478bd9Sstevel@tonic-gate 		zcmn_err(GLOBAL_ZONEID, CE_NOTE,
34547c478bd9Sstevel@tonic-gate 		    "setting nrnode to max value of %ld", nrnode_max);
34557c478bd9Sstevel@tonic-gate 		nrnode = nrnode_max;
34567c478bd9Sstevel@tonic-gate 	}
34577c478bd9Sstevel@tonic-gate 
34587c478bd9Sstevel@tonic-gate 	rtablesize = 1 << highbit(nrnode / hashlen);
34597c478bd9Sstevel@tonic-gate 	rtablemask = rtablesize - 1;
34607c478bd9Sstevel@tonic-gate 	rtable = kmem_alloc(rtablesize * sizeof (*rtable), KM_SLEEP);
34617c478bd9Sstevel@tonic-gate 	for (i = 0; i < rtablesize; i++) {
34627c478bd9Sstevel@tonic-gate 		rtable[i].r_hashf = (rnode_t *)(&rtable[i]);
34637c478bd9Sstevel@tonic-gate 		rtable[i].r_hashb = (rnode_t *)(&rtable[i]);
34647c478bd9Sstevel@tonic-gate 		rw_init(&rtable[i].r_lock, NULL, RW_DEFAULT, NULL);
34657c478bd9Sstevel@tonic-gate 	}
34667c478bd9Sstevel@tonic-gate 	rnode_cache = kmem_cache_create("rnode_cache", sizeof (rnode_t),
34677c478bd9Sstevel@tonic-gate 	    0, NULL, NULL, nfs_reclaim, NULL, NULL, 0);
34687c478bd9Sstevel@tonic-gate 
34697c478bd9Sstevel@tonic-gate 	/*
34707c478bd9Sstevel@tonic-gate 	 * Allocate and initialize the access cache
34717c478bd9Sstevel@tonic-gate 	 */
34727c478bd9Sstevel@tonic-gate 
34737c478bd9Sstevel@tonic-gate 	/*
34747c478bd9Sstevel@tonic-gate 	 * Initial guess is one access cache entry per rnode unless
34757c478bd9Sstevel@tonic-gate 	 * nacache is set to a non-zero value and then it is used to
34767c478bd9Sstevel@tonic-gate 	 * indicate a guess at the number of access cache entries.
34777c478bd9Sstevel@tonic-gate 	 */
34787c478bd9Sstevel@tonic-gate 	if (nacache > 0)
34797c478bd9Sstevel@tonic-gate 		acachesize = 1 << highbit(nacache / hashlen);
34807c478bd9Sstevel@tonic-gate 	else
34817c478bd9Sstevel@tonic-gate 		acachesize = rtablesize;
34827c478bd9Sstevel@tonic-gate 	acachemask = acachesize - 1;
34837c478bd9Sstevel@tonic-gate 	acache = kmem_alloc(acachesize * sizeof (*acache), KM_SLEEP);
34847c478bd9Sstevel@tonic-gate 	for (i = 0; i < acachesize; i++) {
34857c478bd9Sstevel@tonic-gate 		acache[i].next = (acache_t *)&acache[i];
34867c478bd9Sstevel@tonic-gate 		acache[i].prev = (acache_t *)&acache[i];
34877c478bd9Sstevel@tonic-gate 		rw_init(&acache[i].lock, NULL, RW_DEFAULT, NULL);
34887c478bd9Sstevel@tonic-gate 	}
34897c478bd9Sstevel@tonic-gate 	acache_cache = kmem_cache_create("nfs_access_cache",
34907c478bd9Sstevel@tonic-gate 	    sizeof (acache_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
34917c478bd9Sstevel@tonic-gate 	/*
34927c478bd9Sstevel@tonic-gate 	 * Allocate and initialize the client handle cache
34937c478bd9Sstevel@tonic-gate 	 */
34947c478bd9Sstevel@tonic-gate 	chtab_cache = kmem_cache_create("client_handle_cache",
34957106075aSmarks 	    sizeof (struct chtab), 0, NULL, NULL, clreclaim, NULL, NULL, 0);
34967c478bd9Sstevel@tonic-gate 	/*
34977c478bd9Sstevel@tonic-gate 	 * Initialize the list of per-zone client handles (and associated data).
34987c478bd9Sstevel@tonic-gate 	 * This needs to be done before we call zone_key_create().
34997c478bd9Sstevel@tonic-gate 	 */
35007c478bd9Sstevel@tonic-gate 	list_create(&nfs_clnt_list, sizeof (struct nfs_clnt),
35017c478bd9Sstevel@tonic-gate 	    offsetof(struct nfs_clnt, nfscl_node));
35027c478bd9Sstevel@tonic-gate 	/*
35037c478bd9Sstevel@tonic-gate 	 * Initialize the zone_key for per-zone client handle lists.
35047c478bd9Sstevel@tonic-gate 	 */
35057c478bd9Sstevel@tonic-gate 	zone_key_create(&nfsclnt_zone_key, clinit_zone, NULL, clfini_zone);
35067c478bd9Sstevel@tonic-gate 	/*
35077c478bd9Sstevel@tonic-gate 	 * Initialize the various mutexes and reader/writer locks
35087c478bd9Sstevel@tonic-gate 	 */
35097c478bd9Sstevel@tonic-gate 	mutex_init(&rpfreelist_lock, NULL, MUTEX_DEFAULT, NULL);
35107c478bd9Sstevel@tonic-gate 	mutex_init(&newnum_lock, NULL, MUTEX_DEFAULT, NULL);
35117c478bd9Sstevel@tonic-gate 	mutex_init(&nfs_minor_lock, NULL, MUTEX_DEFAULT, NULL);
35127c478bd9Sstevel@tonic-gate 
35137c478bd9Sstevel@tonic-gate 	/*
35147c478bd9Sstevel@tonic-gate 	 * Assign unique major number for all nfs mounts
35157c478bd9Sstevel@tonic-gate 	 */
35167c478bd9Sstevel@tonic-gate 	if ((nfs_major = getudev()) == -1) {
35177c478bd9Sstevel@tonic-gate 		zcmn_err(GLOBAL_ZONEID, CE_WARN,
35187c478bd9Sstevel@tonic-gate 		    "nfs: init: can't get unique device number");
35197c478bd9Sstevel@tonic-gate 		nfs_major = 0;
35207c478bd9Sstevel@tonic-gate 	}
35217c478bd9Sstevel@tonic-gate 	nfs_minor = 0;
35227c478bd9Sstevel@tonic-gate 
35237c478bd9Sstevel@tonic-gate 	if (nfs3_jukebox_delay == 0)
35247c478bd9Sstevel@tonic-gate 		nfs3_jukebox_delay = NFS3_JUKEBOX_DELAY;
35257c478bd9Sstevel@tonic-gate 
35267c478bd9Sstevel@tonic-gate 	return (0);
35277c478bd9Sstevel@tonic-gate }
35287c478bd9Sstevel@tonic-gate 
35297c478bd9Sstevel@tonic-gate void
35307c478bd9Sstevel@tonic-gate nfs_subrfini(void)
35317c478bd9Sstevel@tonic-gate {
35327c478bd9Sstevel@tonic-gate 	int i;
35337c478bd9Sstevel@tonic-gate 
35347c478bd9Sstevel@tonic-gate 	/*
35357c478bd9Sstevel@tonic-gate 	 * Deallocate the rnode hash queues
35367c478bd9Sstevel@tonic-gate 	 */
35377c478bd9Sstevel@tonic-gate 	kmem_cache_destroy(rnode_cache);
35387c478bd9Sstevel@tonic-gate 
35397c478bd9Sstevel@tonic-gate 	for (i = 0; i < rtablesize; i++)
35407c478bd9Sstevel@tonic-gate 		rw_destroy(&rtable[i].r_lock);
35417c478bd9Sstevel@tonic-gate 	kmem_free(rtable, rtablesize * sizeof (*rtable));
35427c478bd9Sstevel@tonic-gate 
35437c478bd9Sstevel@tonic-gate 	/*
35447c478bd9Sstevel@tonic-gate 	 * Deallocated the access cache
35457c478bd9Sstevel@tonic-gate 	 */
35467c478bd9Sstevel@tonic-gate 	kmem_cache_destroy(acache_cache);
35477c478bd9Sstevel@tonic-gate 
35487c478bd9Sstevel@tonic-gate 	for (i = 0; i < acachesize; i++)
35497c478bd9Sstevel@tonic-gate 		rw_destroy(&acache[i].lock);
35507c478bd9Sstevel@tonic-gate 	kmem_free(acache, acachesize * sizeof (*acache));
35517c478bd9Sstevel@tonic-gate 
35527c478bd9Sstevel@tonic-gate 	/*
35537c478bd9Sstevel@tonic-gate 	 * Deallocate the client handle cache
35547c478bd9Sstevel@tonic-gate 	 */
35557c478bd9Sstevel@tonic-gate 	kmem_cache_destroy(chtab_cache);
35567c478bd9Sstevel@tonic-gate 
35577c478bd9Sstevel@tonic-gate 	/*
35587c478bd9Sstevel@tonic-gate 	 * Destroy the various mutexes and reader/writer locks
35597c478bd9Sstevel@tonic-gate 	 */
35607c478bd9Sstevel@tonic-gate 	mutex_destroy(&rpfreelist_lock);
35617c478bd9Sstevel@tonic-gate 	mutex_destroy(&newnum_lock);
35627c478bd9Sstevel@tonic-gate 	mutex_destroy(&nfs_minor_lock);
35637c478bd9Sstevel@tonic-gate 	(void) zone_key_delete(nfsclnt_zone_key);
35647c478bd9Sstevel@tonic-gate }
35657c478bd9Sstevel@tonic-gate 
35667c478bd9Sstevel@tonic-gate enum nfsstat
35677c478bd9Sstevel@tonic-gate puterrno(int error)
35687c478bd9Sstevel@tonic-gate {
35697c478bd9Sstevel@tonic-gate 
35707c478bd9Sstevel@tonic-gate 	switch (error) {
35717c478bd9Sstevel@tonic-gate 	case EOPNOTSUPP:
35727c478bd9Sstevel@tonic-gate 		return (NFSERR_OPNOTSUPP);
35737c478bd9Sstevel@tonic-gate 	case ENAMETOOLONG:
35747c478bd9Sstevel@tonic-gate 		return (NFSERR_NAMETOOLONG);
35757c478bd9Sstevel@tonic-gate 	case ENOTEMPTY:
35767c478bd9Sstevel@tonic-gate 		return (NFSERR_NOTEMPTY);
35777c478bd9Sstevel@tonic-gate 	case EDQUOT:
35787c478bd9Sstevel@tonic-gate 		return (NFSERR_DQUOT);
35797c478bd9Sstevel@tonic-gate 	case ESTALE:
35807c478bd9Sstevel@tonic-gate 		return (NFSERR_STALE);
35817c478bd9Sstevel@tonic-gate 	case EREMOTE:
35827c478bd9Sstevel@tonic-gate 		return (NFSERR_REMOTE);
35837c478bd9Sstevel@tonic-gate 	case ENOSYS:
35847c478bd9Sstevel@tonic-gate 		return (NFSERR_OPNOTSUPP);
35857c478bd9Sstevel@tonic-gate 	case EOVERFLOW:
35867c478bd9Sstevel@tonic-gate 		return (NFSERR_INVAL);
35877c478bd9Sstevel@tonic-gate 	default:
35887c478bd9Sstevel@tonic-gate 		return ((enum nfsstat)error);
35897c478bd9Sstevel@tonic-gate 	}
35907c478bd9Sstevel@tonic-gate 	/* NOTREACHED */
35917c478bd9Sstevel@tonic-gate }
35927c478bd9Sstevel@tonic-gate 
35937c478bd9Sstevel@tonic-gate int
35947c478bd9Sstevel@tonic-gate geterrno(enum nfsstat status)
35957c478bd9Sstevel@tonic-gate {
35967c478bd9Sstevel@tonic-gate 
35977c478bd9Sstevel@tonic-gate 	switch (status) {
35987c478bd9Sstevel@tonic-gate 	case NFSERR_OPNOTSUPP:
35997c478bd9Sstevel@tonic-gate 		return (EOPNOTSUPP);
36007c478bd9Sstevel@tonic-gate 	case NFSERR_NAMETOOLONG:
36017c478bd9Sstevel@tonic-gate 		return (ENAMETOOLONG);
36027c478bd9Sstevel@tonic-gate 	case NFSERR_NOTEMPTY:
36037c478bd9Sstevel@tonic-gate 		return (ENOTEMPTY);
36047c478bd9Sstevel@tonic-gate 	case NFSERR_DQUOT:
36057c478bd9Sstevel@tonic-gate 		return (EDQUOT);
36067c478bd9Sstevel@tonic-gate 	case NFSERR_STALE:
36077c478bd9Sstevel@tonic-gate 		return (ESTALE);
36087c478bd9Sstevel@tonic-gate 	case NFSERR_REMOTE:
36097c478bd9Sstevel@tonic-gate 		return (EREMOTE);
36107c478bd9Sstevel@tonic-gate 	case NFSERR_WFLUSH:
36117c478bd9Sstevel@tonic-gate 		return (EIO);
36127c478bd9Sstevel@tonic-gate 	default:
36137c478bd9Sstevel@tonic-gate 		return ((int)status);
36147c478bd9Sstevel@tonic-gate 	}
36157c478bd9Sstevel@tonic-gate 	/* NOTREACHED */
36167c478bd9Sstevel@tonic-gate }
36177c478bd9Sstevel@tonic-gate 
36187c478bd9Sstevel@tonic-gate enum nfsstat3
36197c478bd9Sstevel@tonic-gate puterrno3(int error)
36207c478bd9Sstevel@tonic-gate {
36217c478bd9Sstevel@tonic-gate 
36227c478bd9Sstevel@tonic-gate #ifdef DEBUG
36237c478bd9Sstevel@tonic-gate 	switch (error) {
36247c478bd9Sstevel@tonic-gate 	case 0:
36257c478bd9Sstevel@tonic-gate 		return (NFS3_OK);
36267c478bd9Sstevel@tonic-gate 	case EPERM:
36277c478bd9Sstevel@tonic-gate 		return (NFS3ERR_PERM);
36287c478bd9Sstevel@tonic-gate 	case ENOENT:
36297c478bd9Sstevel@tonic-gate 		return (NFS3ERR_NOENT);
36307c478bd9Sstevel@tonic-gate 	case EIO:
36317c478bd9Sstevel@tonic-gate 		return (NFS3ERR_IO);
36327c478bd9Sstevel@tonic-gate 	case ENXIO:
36337c478bd9Sstevel@tonic-gate 		return (NFS3ERR_NXIO);
36347c478bd9Sstevel@tonic-gate 	case EACCES:
36357c478bd9Sstevel@tonic-gate 		return (NFS3ERR_ACCES);
36367c478bd9Sstevel@tonic-gate 	case EEXIST:
36377c478bd9Sstevel@tonic-gate 		return (NFS3ERR_EXIST);
36387c478bd9Sstevel@tonic-gate 	case EXDEV:
36397c478bd9Sstevel@tonic-gate 		return (NFS3ERR_XDEV);
36407c478bd9Sstevel@tonic-gate 	case ENODEV:
36417c478bd9Sstevel@tonic-gate 		return (NFS3ERR_NODEV);
36427c478bd9Sstevel@tonic-gate 	case ENOTDIR:
36437c478bd9Sstevel@tonic-gate 		return (NFS3ERR_NOTDIR);
36447c478bd9Sstevel@tonic-gate 	case EISDIR:
36457c478bd9Sstevel@tonic-gate 		return (NFS3ERR_ISDIR);
36467c478bd9Sstevel@tonic-gate 	case EINVAL:
36477c478bd9Sstevel@tonic-gate 		return (NFS3ERR_INVAL);
36487c478bd9Sstevel@tonic-gate 	case EFBIG:
36497c478bd9Sstevel@tonic-gate 		return (NFS3ERR_FBIG);
36507c478bd9Sstevel@tonic-gate 	case ENOSPC:
36517c478bd9Sstevel@tonic-gate 		return (NFS3ERR_NOSPC);
36527c478bd9Sstevel@tonic-gate 	case EROFS:
36537c478bd9Sstevel@tonic-gate 		return (NFS3ERR_ROFS);
36547c478bd9Sstevel@tonic-gate 	case EMLINK:
36557c478bd9Sstevel@tonic-gate 		return (NFS3ERR_MLINK);
36567c478bd9Sstevel@tonic-gate 	case ENAMETOOLONG:
36577c478bd9Sstevel@tonic-gate 		return (NFS3ERR_NAMETOOLONG);
36587c478bd9Sstevel@tonic-gate 	case ENOTEMPTY:
36597c478bd9Sstevel@tonic-gate 		return (NFS3ERR_NOTEMPTY);
36607c478bd9Sstevel@tonic-gate 	case EDQUOT:
36617c478bd9Sstevel@tonic-gate 		return (NFS3ERR_DQUOT);
36627c478bd9Sstevel@tonic-gate 	case ESTALE:
36637c478bd9Sstevel@tonic-gate 		return (NFS3ERR_STALE);
36647c478bd9Sstevel@tonic-gate 	case EREMOTE:
36657c478bd9Sstevel@tonic-gate 		return (NFS3ERR_REMOTE);
36667106075aSmarks 	case ENOSYS:
36677c478bd9Sstevel@tonic-gate 	case EOPNOTSUPP:
36687c478bd9Sstevel@tonic-gate 		return (NFS3ERR_NOTSUPP);
36697c478bd9Sstevel@tonic-gate 	case EOVERFLOW:
36707c478bd9Sstevel@tonic-gate 		return (NFS3ERR_INVAL);
36717c478bd9Sstevel@tonic-gate 	default:
36727c478bd9Sstevel@tonic-gate 		zcmn_err(getzoneid(), CE_WARN,
36737c478bd9Sstevel@tonic-gate 		    "puterrno3: got error %d", error);
36747c478bd9Sstevel@tonic-gate 		return ((enum nfsstat3)error);
36757c478bd9Sstevel@tonic-gate 	}
36767c478bd9Sstevel@tonic-gate #else
36777c478bd9Sstevel@tonic-gate 	switch (error) {
36787c478bd9Sstevel@tonic-gate 	case ENAMETOOLONG:
36797c478bd9Sstevel@tonic-gate 		return (NFS3ERR_NAMETOOLONG);
36807c478bd9Sstevel@tonic-gate 	case ENOTEMPTY:
36817c478bd9Sstevel@tonic-gate 		return (NFS3ERR_NOTEMPTY);
36827c478bd9Sstevel@tonic-gate 	case EDQUOT:
36837c478bd9Sstevel@tonic-gate 		return (NFS3ERR_DQUOT);
36847c478bd9Sstevel@tonic-gate 	case ESTALE:
36857c478bd9Sstevel@tonic-gate 		return (NFS3ERR_STALE);
36867106075aSmarks 	case ENOSYS:
36877c478bd9Sstevel@tonic-gate 	case EOPNOTSUPP:
36887c478bd9Sstevel@tonic-gate 		return (NFS3ERR_NOTSUPP);
36897c478bd9Sstevel@tonic-gate 	case EREMOTE:
36907c478bd9Sstevel@tonic-gate 		return (NFS3ERR_REMOTE);
36917c478bd9Sstevel@tonic-gate 	case EOVERFLOW:
36927c478bd9Sstevel@tonic-gate 		return (NFS3ERR_INVAL);
36937c478bd9Sstevel@tonic-gate 	default:
36947c478bd9Sstevel@tonic-gate 		return ((enum nfsstat3)error);
36957c478bd9Sstevel@tonic-gate 	}
36967c478bd9Sstevel@tonic-gate #endif
36977c478bd9Sstevel@tonic-gate }
36987c478bd9Sstevel@tonic-gate 
36997c478bd9Sstevel@tonic-gate int
37007c478bd9Sstevel@tonic-gate geterrno3(enum nfsstat3 status)
37017c478bd9Sstevel@tonic-gate {
37027c478bd9Sstevel@tonic-gate 
37037c478bd9Sstevel@tonic-gate #ifdef DEBUG
37047c478bd9Sstevel@tonic-gate 	switch (status) {
37057c478bd9Sstevel@tonic-gate 	case NFS3_OK:
37067c478bd9Sstevel@tonic-gate 		return (0);
37077c478bd9Sstevel@tonic-gate 	case NFS3ERR_PERM:
37087c478bd9Sstevel@tonic-gate 		return (EPERM);
37097c478bd9Sstevel@tonic-gate 	case NFS3ERR_NOENT:
37107c478bd9Sstevel@tonic-gate 		return (ENOENT);
37117c478bd9Sstevel@tonic-gate 	case NFS3ERR_IO:
37127c478bd9Sstevel@tonic-gate 		return (EIO);
37137c478bd9Sstevel@tonic-gate 	case NFS3ERR_NXIO:
37147c478bd9Sstevel@tonic-gate 		return (ENXIO);
37157c478bd9Sstevel@tonic-gate 	case NFS3ERR_ACCES:
37167c478bd9Sstevel@tonic-gate 		return (EACCES);
37177c478bd9Sstevel@tonic-gate 	case NFS3ERR_EXIST:
37187c478bd9Sstevel@tonic-gate 		return (EEXIST);
37197c478bd9Sstevel@tonic-gate 	case NFS3ERR_XDEV:
37207c478bd9Sstevel@tonic-gate 		return (EXDEV);
37217c478bd9Sstevel@tonic-gate 	case NFS3ERR_NODEV:
37227c478bd9Sstevel@tonic-gate 		return (ENODEV);
37237c478bd9Sstevel@tonic-gate 	case NFS3ERR_NOTDIR:
37247c478bd9Sstevel@tonic-gate 		return (ENOTDIR);
37257c478bd9Sstevel@tonic-gate 	case NFS3ERR_ISDIR:
37267c478bd9Sstevel@tonic-gate 		return (EISDIR);
37277c478bd9Sstevel@tonic-gate 	case NFS3ERR_INVAL:
37287c478bd9Sstevel@tonic-gate 		return (EINVAL);
37297c478bd9Sstevel@tonic-gate 	case NFS3ERR_FBIG:
37307c478bd9Sstevel@tonic-gate 		return (EFBIG);
37317c478bd9Sstevel@tonic-gate 	case NFS3ERR_NOSPC:
37327c478bd9Sstevel@tonic-gate 		return (ENOSPC);
37337c478bd9Sstevel@tonic-gate 	case NFS3ERR_ROFS:
37347c478bd9Sstevel@tonic-gate 		return (EROFS);
37357c478bd9Sstevel@tonic-gate 	case NFS3ERR_MLINK:
37367c478bd9Sstevel@tonic-gate 		return (EMLINK);
37377c478bd9Sstevel@tonic-gate 	case NFS3ERR_NAMETOOLONG:
37387c478bd9Sstevel@tonic-gate 		return (ENAMETOOLONG);
37397c478bd9Sstevel@tonic-gate 	case NFS3ERR_NOTEMPTY:
37407c478bd9Sstevel@tonic-gate 		return (ENOTEMPTY);
37417c478bd9Sstevel@tonic-gate 	case NFS3ERR_DQUOT:
37427c478bd9Sstevel@tonic-gate 		return (EDQUOT);
37437c478bd9Sstevel@tonic-gate 	case NFS3ERR_STALE:
37447c478bd9Sstevel@tonic-gate 		return (ESTALE);
37457c478bd9Sstevel@tonic-gate 	case NFS3ERR_REMOTE:
37467c478bd9Sstevel@tonic-gate 		return (EREMOTE);
37477c478bd9Sstevel@tonic-gate 	case NFS3ERR_BADHANDLE:
37487c478bd9Sstevel@tonic-gate 		return (ESTALE);
37497c478bd9Sstevel@tonic-gate 	case NFS3ERR_NOT_SYNC:
37507c478bd9Sstevel@tonic-gate 		return (EINVAL);
37517c478bd9Sstevel@tonic-gate 	case NFS3ERR_BAD_COOKIE:
37527c478bd9Sstevel@tonic-gate 		return (ENOENT);
37537c478bd9Sstevel@tonic-gate 	case NFS3ERR_NOTSUPP:
37547c478bd9Sstevel@tonic-gate 		return (EOPNOTSUPP);
37557c478bd9Sstevel@tonic-gate 	case NFS3ERR_TOOSMALL:
37567c478bd9Sstevel@tonic-gate 		return (EINVAL);
37577c478bd9Sstevel@tonic-gate 	case NFS3ERR_SERVERFAULT:
37587c478bd9Sstevel@tonic-gate 		return (EIO);
37597c478bd9Sstevel@tonic-gate 	case NFS3ERR_BADTYPE:
37607c478bd9Sstevel@tonic-gate 		return (EINVAL);
37617c478bd9Sstevel@tonic-gate 	case NFS3ERR_JUKEBOX:
37627c478bd9Sstevel@tonic-gate 		return (ENXIO);
37637c478bd9Sstevel@tonic-gate 	default:
37647c478bd9Sstevel@tonic-gate 		zcmn_err(getzoneid(), CE_WARN,
37657c478bd9Sstevel@tonic-gate 		    "geterrno3: got status %d", status);
37667c478bd9Sstevel@tonic-gate 		return ((int)status);
37677c478bd9Sstevel@tonic-gate 	}
37687c478bd9Sstevel@tonic-gate #else
37697c478bd9Sstevel@tonic-gate 	switch (status) {
37707c478bd9Sstevel@tonic-gate 	case NFS3ERR_NAMETOOLONG:
37717c478bd9Sstevel@tonic-gate 		return (ENAMETOOLONG);
37727c478bd9Sstevel@tonic-gate 	case NFS3ERR_NOTEMPTY:
37737c478bd9Sstevel@tonic-gate 		return (ENOTEMPTY);
37747c478bd9Sstevel@tonic-gate 	case NFS3ERR_DQUOT:
37757c478bd9Sstevel@tonic-gate 		return (EDQUOT);
37767c478bd9Sstevel@tonic-gate 	case NFS3ERR_STALE:
37777c478bd9Sstevel@tonic-gate 	case NFS3ERR_BADHANDLE:
37787c478bd9Sstevel@tonic-gate 		return (ESTALE);
37797c478bd9Sstevel@tonic-gate 	case NFS3ERR_NOTSUPP:
37807c478bd9Sstevel@tonic-gate 		return (EOPNOTSUPP);
37817c478bd9Sstevel@tonic-gate 	case NFS3ERR_REMOTE:
37827c478bd9Sstevel@tonic-gate 		return (EREMOTE);
37837c478bd9Sstevel@tonic-gate 	case NFS3ERR_NOT_SYNC:
37847c478bd9Sstevel@tonic-gate 	case NFS3ERR_TOOSMALL:
37857c478bd9Sstevel@tonic-gate 	case NFS3ERR_BADTYPE:
37867c478bd9Sstevel@tonic-gate 		return (EINVAL);
37877c478bd9Sstevel@tonic-gate 	case NFS3ERR_BAD_COOKIE:
37887c478bd9Sstevel@tonic-gate 		return (ENOENT);
37897c478bd9Sstevel@tonic-gate 	case NFS3ERR_SERVERFAULT:
37907c478bd9Sstevel@tonic-gate 		return (EIO);
37917c478bd9Sstevel@tonic-gate 	case NFS3ERR_JUKEBOX:
37927c478bd9Sstevel@tonic-gate 		return (ENXIO);
37937c478bd9Sstevel@tonic-gate 	default:
37947c478bd9Sstevel@tonic-gate 		return ((int)status);
37957c478bd9Sstevel@tonic-gate 	}
37967c478bd9Sstevel@tonic-gate #endif
37977c478bd9Sstevel@tonic-gate }
37987c478bd9Sstevel@tonic-gate 
37997c478bd9Sstevel@tonic-gate rddir_cache *
38007c478bd9Sstevel@tonic-gate rddir_cache_alloc(int flags)
38017c478bd9Sstevel@tonic-gate {
38027c478bd9Sstevel@tonic-gate 	rddir_cache *rc;
38037c478bd9Sstevel@tonic-gate 
38047c478bd9Sstevel@tonic-gate 	rc = kmem_alloc(sizeof (*rc), flags);
38057c478bd9Sstevel@tonic-gate 	if (rc != NULL) {
38067c478bd9Sstevel@tonic-gate 		rc->entries = NULL;
38077c478bd9Sstevel@tonic-gate 		rc->flags = RDDIR;
38087c478bd9Sstevel@tonic-gate 		cv_init(&rc->cv, NULL, CV_DEFAULT, NULL);
38097c478bd9Sstevel@tonic-gate 		mutex_init(&rc->lock, NULL, MUTEX_DEFAULT, NULL);
38107c478bd9Sstevel@tonic-gate 		rc->count = 1;
38117c478bd9Sstevel@tonic-gate #ifdef DEBUG
38127c478bd9Sstevel@tonic-gate 		atomic_add_64(&clstat_debug.dirent.value.ui64, 1);
38137c478bd9Sstevel@tonic-gate #endif
38147c478bd9Sstevel@tonic-gate 	}
38157c478bd9Sstevel@tonic-gate 	return (rc);
38167c478bd9Sstevel@tonic-gate }
38177c478bd9Sstevel@tonic-gate 
38187c478bd9Sstevel@tonic-gate static void
38197c478bd9Sstevel@tonic-gate rddir_cache_free(rddir_cache *rc)
38207c478bd9Sstevel@tonic-gate {
38217c478bd9Sstevel@tonic-gate 
38227c478bd9Sstevel@tonic-gate #ifdef DEBUG
38237c478bd9Sstevel@tonic-gate 	atomic_add_64(&clstat_debug.dirent.value.ui64, -1);
38247c478bd9Sstevel@tonic-gate #endif
38257c478bd9Sstevel@tonic-gate 	if (rc->entries != NULL) {
38267c478bd9Sstevel@tonic-gate #ifdef DEBUG
38277c478bd9Sstevel@tonic-gate 		rddir_cache_buf_free(rc->entries, rc->buflen);
38287c478bd9Sstevel@tonic-gate #else
38297c478bd9Sstevel@tonic-gate 		kmem_free(rc->entries, rc->buflen);
38307c478bd9Sstevel@tonic-gate #endif
38317c478bd9Sstevel@tonic-gate 	}
38327c478bd9Sstevel@tonic-gate 	cv_destroy(&rc->cv);
38337c478bd9Sstevel@tonic-gate 	mutex_destroy(&rc->lock);
38347c478bd9Sstevel@tonic-gate 	kmem_free(rc, sizeof (*rc));
38357c478bd9Sstevel@tonic-gate }
38367c478bd9Sstevel@tonic-gate 
38377c478bd9Sstevel@tonic-gate void
38387c478bd9Sstevel@tonic-gate rddir_cache_hold(rddir_cache *rc)
38397c478bd9Sstevel@tonic-gate {
38407c478bd9Sstevel@tonic-gate 
38417c478bd9Sstevel@tonic-gate 	mutex_enter(&rc->lock);
38427c478bd9Sstevel@tonic-gate 	rc->count++;
38437c478bd9Sstevel@tonic-gate 	mutex_exit(&rc->lock);
38447c478bd9Sstevel@tonic-gate }
38457c478bd9Sstevel@tonic-gate 
38467c478bd9Sstevel@tonic-gate void
38477c478bd9Sstevel@tonic-gate rddir_cache_rele(rddir_cache *rc)
38487c478bd9Sstevel@tonic-gate {
38497c478bd9Sstevel@tonic-gate 
38507c478bd9Sstevel@tonic-gate 	mutex_enter(&rc->lock);
38517c478bd9Sstevel@tonic-gate 	ASSERT(rc->count > 0);
38527c478bd9Sstevel@tonic-gate 	if (--rc->count == 0) {
38537c478bd9Sstevel@tonic-gate 		mutex_exit(&rc->lock);
38547c478bd9Sstevel@tonic-gate 		rddir_cache_free(rc);
38557c478bd9Sstevel@tonic-gate 	} else
38567c478bd9Sstevel@tonic-gate 		mutex_exit(&rc->lock);
38577c478bd9Sstevel@tonic-gate }
38587c478bd9Sstevel@tonic-gate 
38597c478bd9Sstevel@tonic-gate #ifdef DEBUG
38607c478bd9Sstevel@tonic-gate char *
38617c478bd9Sstevel@tonic-gate rddir_cache_buf_alloc(size_t size, int flags)
38627c478bd9Sstevel@tonic-gate {
38637c478bd9Sstevel@tonic-gate 	char *rc;
38647c478bd9Sstevel@tonic-gate 
38657c478bd9Sstevel@tonic-gate 	rc = kmem_alloc(size, flags);
38667c478bd9Sstevel@tonic-gate 	if (rc != NULL)
38677c478bd9Sstevel@tonic-gate 		atomic_add_64(&clstat_debug.dirents.value.ui64, size);
38687c478bd9Sstevel@tonic-gate 	return (rc);
38697c478bd9Sstevel@tonic-gate }
38707c478bd9Sstevel@tonic-gate 
38717c478bd9Sstevel@tonic-gate void
38727c478bd9Sstevel@tonic-gate rddir_cache_buf_free(void *addr, size_t size)
38737c478bd9Sstevel@tonic-gate {
38747c478bd9Sstevel@tonic-gate 
38757c478bd9Sstevel@tonic-gate 	atomic_add_64(&clstat_debug.dirents.value.ui64, -(int64_t)size);
38767c478bd9Sstevel@tonic-gate 	kmem_free(addr, size);
38777c478bd9Sstevel@tonic-gate }
38787c478bd9Sstevel@tonic-gate #endif
38797c478bd9Sstevel@tonic-gate 
38807c478bd9Sstevel@tonic-gate static int
38817c478bd9Sstevel@tonic-gate nfs_free_data_reclaim(rnode_t *rp)
38827c478bd9Sstevel@tonic-gate {
38837c478bd9Sstevel@tonic-gate 	char *contents;
38847c478bd9Sstevel@tonic-gate 	int size;
38857c478bd9Sstevel@tonic-gate 	vsecattr_t *vsp;
38867c478bd9Sstevel@tonic-gate 	nfs3_pathconf_info *info;
38877c478bd9Sstevel@tonic-gate 	int freed;
38887c478bd9Sstevel@tonic-gate 	cred_t *cred;
38897c478bd9Sstevel@tonic-gate 
38907c478bd9Sstevel@tonic-gate 	/*
38917c478bd9Sstevel@tonic-gate 	 * Free any held credentials and caches which
38927c478bd9Sstevel@tonic-gate 	 * may be associated with this rnode.
38937c478bd9Sstevel@tonic-gate 	 */
38947c478bd9Sstevel@tonic-gate 	mutex_enter(&rp->r_statelock);
38957c478bd9Sstevel@tonic-gate 	cred = rp->r_cred;
38967c478bd9Sstevel@tonic-gate 	rp->r_cred = NULL;
38977c478bd9Sstevel@tonic-gate 	contents = rp->r_symlink.contents;
38987c478bd9Sstevel@tonic-gate 	size = rp->r_symlink.size;
38997c478bd9Sstevel@tonic-gate 	rp->r_symlink.contents = NULL;
39007c478bd9Sstevel@tonic-gate 	vsp = rp->r_secattr;
39017c478bd9Sstevel@tonic-gate 	rp->r_secattr = NULL;
39027c478bd9Sstevel@tonic-gate 	info = rp->r_pathconf;
39037c478bd9Sstevel@tonic-gate 	rp->r_pathconf = NULL;
39047c478bd9Sstevel@tonic-gate 	mutex_exit(&rp->r_statelock);
39057c478bd9Sstevel@tonic-gate 
39067c478bd9Sstevel@tonic-gate 	if (cred != NULL)
39077c478bd9Sstevel@tonic-gate 		crfree(cred);
39087c478bd9Sstevel@tonic-gate 
39097c478bd9Sstevel@tonic-gate 	/*
39107c478bd9Sstevel@tonic-gate 	 * Free the access cache entries.
39117c478bd9Sstevel@tonic-gate 	 */
39127c478bd9Sstevel@tonic-gate 	freed = nfs_access_purge_rp(rp);
39137c478bd9Sstevel@tonic-gate 
39147c478bd9Sstevel@tonic-gate 	if (!HAVE_RDDIR_CACHE(rp) &&
39157c478bd9Sstevel@tonic-gate 	    contents == NULL &&
39167c478bd9Sstevel@tonic-gate 	    vsp == NULL &&
39177c478bd9Sstevel@tonic-gate 	    info == NULL)
39187c478bd9Sstevel@tonic-gate 		return (freed);
39197c478bd9Sstevel@tonic-gate 
39207c478bd9Sstevel@tonic-gate 	/*
39217c478bd9Sstevel@tonic-gate 	 * Free the readdir cache entries
39227c478bd9Sstevel@tonic-gate 	 */
39237c478bd9Sstevel@tonic-gate 	if (HAVE_RDDIR_CACHE(rp))
39247c478bd9Sstevel@tonic-gate 		nfs_purge_rddir_cache(RTOV(rp));
39257c478bd9Sstevel@tonic-gate 
39267c478bd9Sstevel@tonic-gate 	/*
39277c478bd9Sstevel@tonic-gate 	 * Free the symbolic link cache.
39287c478bd9Sstevel@tonic-gate 	 */
39297c478bd9Sstevel@tonic-gate 	if (contents != NULL) {
39307c478bd9Sstevel@tonic-gate 
39317c478bd9Sstevel@tonic-gate 		kmem_free((void *)contents, size);
39327c478bd9Sstevel@tonic-gate 	}
39337c478bd9Sstevel@tonic-gate 
39347c478bd9Sstevel@tonic-gate 	/*
39357c478bd9Sstevel@tonic-gate 	 * Free any cached ACL.
39367c478bd9Sstevel@tonic-gate 	 */
39377c478bd9Sstevel@tonic-gate 	if (vsp != NULL)
39387c478bd9Sstevel@tonic-gate 		nfs_acl_free(vsp);
39397c478bd9Sstevel@tonic-gate 
39407c478bd9Sstevel@tonic-gate 	/*
39417c478bd9Sstevel@tonic-gate 	 * Free any cached pathconf information.
39427c478bd9Sstevel@tonic-gate 	 */
39437c478bd9Sstevel@tonic-gate 	if (info != NULL)
39447c478bd9Sstevel@tonic-gate 		kmem_free(info, sizeof (*info));
39457c478bd9Sstevel@tonic-gate 
39467c478bd9Sstevel@tonic-gate 	return (1);
39477c478bd9Sstevel@tonic-gate }
39487c478bd9Sstevel@tonic-gate 
39497c478bd9Sstevel@tonic-gate static int
39507c478bd9Sstevel@tonic-gate nfs_active_data_reclaim(rnode_t *rp)
39517c478bd9Sstevel@tonic-gate {
39527c478bd9Sstevel@tonic-gate 	char *contents;
39537c478bd9Sstevel@tonic-gate 	int size;
39547c478bd9Sstevel@tonic-gate 	vsecattr_t *vsp;
39557c478bd9Sstevel@tonic-gate 	nfs3_pathconf_info *info;
39567c478bd9Sstevel@tonic-gate 	int freed;
39577c478bd9Sstevel@tonic-gate 
39587c478bd9Sstevel@tonic-gate 	/*
39597c478bd9Sstevel@tonic-gate 	 * Free any held credentials and caches which
39607c478bd9Sstevel@tonic-gate 	 * may be associated with this rnode.
39617c478bd9Sstevel@tonic-gate 	 */
39627c478bd9Sstevel@tonic-gate 	if (!mutex_tryenter(&rp->r_statelock))
39637c478bd9Sstevel@tonic-gate 		return (0);
39647c478bd9Sstevel@tonic-gate 	contents = rp->r_symlink.contents;
39657c478bd9Sstevel@tonic-gate 	size = rp->r_symlink.size;
39667c478bd9Sstevel@tonic-gate 	rp->r_symlink.contents = NULL;
39677c478bd9Sstevel@tonic-gate 	vsp = rp->r_secattr;
39687c478bd9Sstevel@tonic-gate 	rp->r_secattr = NULL;
39697c478bd9Sstevel@tonic-gate 	info = rp->r_pathconf;
39707c478bd9Sstevel@tonic-gate 	rp->r_pathconf = NULL;
39717c478bd9Sstevel@tonic-gate 	mutex_exit(&rp->r_statelock);
39727c478bd9Sstevel@tonic-gate 
39737c478bd9Sstevel@tonic-gate 	/*
39747c478bd9Sstevel@tonic-gate 	 * Free the access cache entries.
39757c478bd9Sstevel@tonic-gate 	 */
39767c478bd9Sstevel@tonic-gate 	freed = nfs_access_purge_rp(rp);
39777c478bd9Sstevel@tonic-gate 
39787c478bd9Sstevel@tonic-gate 	if (!HAVE_RDDIR_CACHE(rp) &&
39797c478bd9Sstevel@tonic-gate 	    contents == NULL &&
39807c478bd9Sstevel@tonic-gate 	    vsp == NULL &&
39817c478bd9Sstevel@tonic-gate 	    info == NULL)
39827c478bd9Sstevel@tonic-gate 		return (freed);
39837c478bd9Sstevel@tonic-gate 
39847c478bd9Sstevel@tonic-gate 	/*
39857c478bd9Sstevel@tonic-gate 	 * Free the readdir cache entries
39867c478bd9Sstevel@tonic-gate 	 */
39877c478bd9Sstevel@tonic-gate 	if (HAVE_RDDIR_CACHE(rp))
39887c478bd9Sstevel@tonic-gate 		nfs_purge_rddir_cache(RTOV(rp));
39897c478bd9Sstevel@tonic-gate 
39907c478bd9Sstevel@tonic-gate 	/*
39917c478bd9Sstevel@tonic-gate 	 * Free the symbolic link cache.
39927c478bd9Sstevel@tonic-gate 	 */
39937c478bd9Sstevel@tonic-gate 	if (contents != NULL) {
39947c478bd9Sstevel@tonic-gate 
39957c478bd9Sstevel@tonic-gate 		kmem_free((void *)contents, size);
39967c478bd9Sstevel@tonic-gate 	}
39977c478bd9Sstevel@tonic-gate 
39987c478bd9Sstevel@tonic-gate 	/*
39997c478bd9Sstevel@tonic-gate 	 * Free any cached ACL.
40007c478bd9Sstevel@tonic-gate 	 */
40017c478bd9Sstevel@tonic-gate 	if (vsp != NULL)
40027c478bd9Sstevel@tonic-gate 		nfs_acl_free(vsp);
40037c478bd9Sstevel@tonic-gate 
40047c478bd9Sstevel@tonic-gate 	/*
40057c478bd9Sstevel@tonic-gate 	 * Free any cached pathconf information.
40067c478bd9Sstevel@tonic-gate 	 */
40077c478bd9Sstevel@tonic-gate 	if (info != NULL)
40087c478bd9Sstevel@tonic-gate 		kmem_free(info, sizeof (*info));
40097c478bd9Sstevel@tonic-gate 
40107c478bd9Sstevel@tonic-gate 	return (1);
40117c478bd9Sstevel@tonic-gate }
40127c478bd9Sstevel@tonic-gate 
40137c478bd9Sstevel@tonic-gate static int
40147c478bd9Sstevel@tonic-gate nfs_free_reclaim(void)
40157c478bd9Sstevel@tonic-gate {
40167c478bd9Sstevel@tonic-gate 	int freed;
40177c478bd9Sstevel@tonic-gate 	rnode_t *rp;
40187c478bd9Sstevel@tonic-gate 
40197c478bd9Sstevel@tonic-gate #ifdef DEBUG
40207c478bd9Sstevel@tonic-gate 	clstat_debug.f_reclaim.value.ui64++;
40217c478bd9Sstevel@tonic-gate #endif
40227c478bd9Sstevel@tonic-gate 	freed = 0;
40237c478bd9Sstevel@tonic-gate 	mutex_enter(&rpfreelist_lock);
40247c478bd9Sstevel@tonic-gate 	rp = rpfreelist;
40257c478bd9Sstevel@tonic-gate 	if (rp != NULL) {
40267c478bd9Sstevel@tonic-gate 		do {
40277c478bd9Sstevel@tonic-gate 			if (nfs_free_data_reclaim(rp))
40287c478bd9Sstevel@tonic-gate 				freed = 1;
40297c478bd9Sstevel@tonic-gate 		} while ((rp = rp->r_freef) != rpfreelist);
40307c478bd9Sstevel@tonic-gate 	}
40317c478bd9Sstevel@tonic-gate 	mutex_exit(&rpfreelist_lock);
40327c478bd9Sstevel@tonic-gate 	return (freed);
40337c478bd9Sstevel@tonic-gate }
40347c478bd9Sstevel@tonic-gate 
40357c478bd9Sstevel@tonic-gate static int
40367c478bd9Sstevel@tonic-gate nfs_active_reclaim(void)
40377c478bd9Sstevel@tonic-gate {
40387c478bd9Sstevel@tonic-gate 	int freed;
40397c478bd9Sstevel@tonic-gate 	int index;
40407c478bd9Sstevel@tonic-gate 	rnode_t *rp;
40417c478bd9Sstevel@tonic-gate 
40427c478bd9Sstevel@tonic-gate #ifdef DEBUG
40437c478bd9Sstevel@tonic-gate 	clstat_debug.a_reclaim.value.ui64++;
40447c478bd9Sstevel@tonic-gate #endif
40457c478bd9Sstevel@tonic-gate 	freed = 0;
40467c478bd9Sstevel@tonic-gate 	for (index = 0; index < rtablesize; index++) {
40477c478bd9Sstevel@tonic-gate 		rw_enter(&rtable[index].r_lock, RW_READER);
40487c478bd9Sstevel@tonic-gate 		for (rp = rtable[index].r_hashf;
40497c478bd9Sstevel@tonic-gate 		    rp != (rnode_t *)(&rtable[index]);
40507c478bd9Sstevel@tonic-gate 		    rp = rp->r_hashf) {
40517c478bd9Sstevel@tonic-gate 			if (nfs_active_data_reclaim(rp))
40527c478bd9Sstevel@tonic-gate 				freed = 1;
40537c478bd9Sstevel@tonic-gate 		}
40547c478bd9Sstevel@tonic-gate 		rw_exit(&rtable[index].r_lock);
40557c478bd9Sstevel@tonic-gate 	}
40567c478bd9Sstevel@tonic-gate 	return (freed);
40577c478bd9Sstevel@tonic-gate }
40587c478bd9Sstevel@tonic-gate 
40597c478bd9Sstevel@tonic-gate static int
40607c478bd9Sstevel@tonic-gate nfs_rnode_reclaim(void)
40617c478bd9Sstevel@tonic-gate {
40627c478bd9Sstevel@tonic-gate 	int freed;
40637c478bd9Sstevel@tonic-gate 	rnode_t *rp;
40647c478bd9Sstevel@tonic-gate 	vnode_t *vp;
40657c478bd9Sstevel@tonic-gate 
40667c478bd9Sstevel@tonic-gate #ifdef DEBUG
40677c478bd9Sstevel@tonic-gate 	clstat_debug.r_reclaim.value.ui64++;
40687c478bd9Sstevel@tonic-gate #endif
40697c478bd9Sstevel@tonic-gate 	freed = 0;
40707c478bd9Sstevel@tonic-gate 	mutex_enter(&rpfreelist_lock);
40717c478bd9Sstevel@tonic-gate 	while ((rp = rpfreelist) != NULL) {
40727c478bd9Sstevel@tonic-gate 		rp_rmfree(rp);
40737c478bd9Sstevel@tonic-gate 		mutex_exit(&rpfreelist_lock);
40747c478bd9Sstevel@tonic-gate 		if (rp->r_flags & RHASHED) {
40757c478bd9Sstevel@tonic-gate 			vp = RTOV(rp);
40767c478bd9Sstevel@tonic-gate 			rw_enter(&rp->r_hashq->r_lock, RW_WRITER);
40777c478bd9Sstevel@tonic-gate 			mutex_enter(&vp->v_lock);
40787c478bd9Sstevel@tonic-gate 			if (vp->v_count > 1) {
40797c478bd9Sstevel@tonic-gate 				vp->v_count--;
40807c478bd9Sstevel@tonic-gate 				mutex_exit(&vp->v_lock);
40817c478bd9Sstevel@tonic-gate 				rw_exit(&rp->r_hashq->r_lock);
40827c478bd9Sstevel@tonic-gate 				mutex_enter(&rpfreelist_lock);
40837c478bd9Sstevel@tonic-gate 				continue;
40847c478bd9Sstevel@tonic-gate 			}
40857c478bd9Sstevel@tonic-gate 			mutex_exit(&vp->v_lock);
40867c478bd9Sstevel@tonic-gate 			rp_rmhash_locked(rp);
40877c478bd9Sstevel@tonic-gate 			rw_exit(&rp->r_hashq->r_lock);
40887c478bd9Sstevel@tonic-gate 		}
40897c478bd9Sstevel@tonic-gate 		/*
40907c478bd9Sstevel@tonic-gate 		 * This call to rp_addfree will end up destroying the
40917c478bd9Sstevel@tonic-gate 		 * rnode, but in a safe way with the appropriate set
40927c478bd9Sstevel@tonic-gate 		 * of checks done.
40937c478bd9Sstevel@tonic-gate 		 */
40947c478bd9Sstevel@tonic-gate 		rp_addfree(rp, CRED());
40957c478bd9Sstevel@tonic-gate 		mutex_enter(&rpfreelist_lock);
40967c478bd9Sstevel@tonic-gate 	}
40977c478bd9Sstevel@tonic-gate 	mutex_exit(&rpfreelist_lock);
40987c478bd9Sstevel@tonic-gate 	return (freed);
40997c478bd9Sstevel@tonic-gate }
41007c478bd9Sstevel@tonic-gate 
41017c478bd9Sstevel@tonic-gate /*ARGSUSED*/
41027c478bd9Sstevel@tonic-gate static void
41037c478bd9Sstevel@tonic-gate nfs_reclaim(void *cdrarg)
41047c478bd9Sstevel@tonic-gate {
41057c478bd9Sstevel@tonic-gate 
41067c478bd9Sstevel@tonic-gate #ifdef DEBUG
41077c478bd9Sstevel@tonic-gate 	clstat_debug.reclaim.value.ui64++;
41087c478bd9Sstevel@tonic-gate #endif
41097c478bd9Sstevel@tonic-gate 	if (nfs_free_reclaim())
41107c478bd9Sstevel@tonic-gate 		return;
41117c478bd9Sstevel@tonic-gate 
41127c478bd9Sstevel@tonic-gate 	if (nfs_active_reclaim())
41137c478bd9Sstevel@tonic-gate 		return;
41147c478bd9Sstevel@tonic-gate 
41157c478bd9Sstevel@tonic-gate 	(void) nfs_rnode_reclaim();
41167c478bd9Sstevel@tonic-gate }
41177c478bd9Sstevel@tonic-gate 
41187c478bd9Sstevel@tonic-gate /*
41197c478bd9Sstevel@tonic-gate  * NFS client failover support
41207c478bd9Sstevel@tonic-gate  *
41217c478bd9Sstevel@tonic-gate  * Routines to copy filehandles
41227c478bd9Sstevel@tonic-gate  */
41237c478bd9Sstevel@tonic-gate void
41247c478bd9Sstevel@tonic-gate nfscopyfh(caddr_t fhp, vnode_t *vp)
41257c478bd9Sstevel@tonic-gate {
41267c478bd9Sstevel@tonic-gate 	fhandle_t *dest = (fhandle_t *)fhp;
41277c478bd9Sstevel@tonic-gate 
41287c478bd9Sstevel@tonic-gate 	if (dest != NULL)
41297c478bd9Sstevel@tonic-gate 		*dest = *VTOFH(vp);
41307c478bd9Sstevel@tonic-gate }
41317c478bd9Sstevel@tonic-gate 
41327c478bd9Sstevel@tonic-gate void
41337c478bd9Sstevel@tonic-gate nfs3copyfh(caddr_t fhp, vnode_t *vp)
41347c478bd9Sstevel@tonic-gate {
41357c478bd9Sstevel@tonic-gate 	nfs_fh3 *dest = (nfs_fh3 *)fhp;
41367c478bd9Sstevel@tonic-gate 
41377c478bd9Sstevel@tonic-gate 	if (dest != NULL)
41387c478bd9Sstevel@tonic-gate 		*dest = *VTOFH3(vp);
41397c478bd9Sstevel@tonic-gate }
41407c478bd9Sstevel@tonic-gate 
41417c478bd9Sstevel@tonic-gate /*
41427c478bd9Sstevel@tonic-gate  * NFS client failover support
41437c478bd9Sstevel@tonic-gate  *
41447c478bd9Sstevel@tonic-gate  * failover_safe() will test various conditions to ensure that
41457c478bd9Sstevel@tonic-gate  * failover is permitted for this vnode.  It will be denied
41467c478bd9Sstevel@tonic-gate  * if:
41477c478bd9Sstevel@tonic-gate  *	1) the operation in progress does not support failover (NULL fi)
41487c478bd9Sstevel@tonic-gate  *	2) there are no available replicas (NULL mi_servers->sv_next)
41497c478bd9Sstevel@tonic-gate  *	3) any locks are outstanding on this file
41507c478bd9Sstevel@tonic-gate  */
41517c478bd9Sstevel@tonic-gate static int
41527c478bd9Sstevel@tonic-gate failover_safe(failinfo_t *fi)
41537c478bd9Sstevel@tonic-gate {
41547c478bd9Sstevel@tonic-gate 
41557c478bd9Sstevel@tonic-gate 	/*
41567c478bd9Sstevel@tonic-gate 	 * Does this op permit failover?
41577c478bd9Sstevel@tonic-gate 	 */
41587c478bd9Sstevel@tonic-gate 	if (fi == NULL || fi->vp == NULL)
41597c478bd9Sstevel@tonic-gate 		return (0);
41607c478bd9Sstevel@tonic-gate 
41617c478bd9Sstevel@tonic-gate 	/*
41627c478bd9Sstevel@tonic-gate 	 * Are there any alternates to failover to?
41637c478bd9Sstevel@tonic-gate 	 */
41647c478bd9Sstevel@tonic-gate 	if (VTOMI(fi->vp)->mi_servers->sv_next == NULL)
41657c478bd9Sstevel@tonic-gate 		return (0);
41667c478bd9Sstevel@tonic-gate 
41677c478bd9Sstevel@tonic-gate 	/*
41687c478bd9Sstevel@tonic-gate 	 * Disable check; we've forced local locking
41697c478bd9Sstevel@tonic-gate 	 *
41707c478bd9Sstevel@tonic-gate 	 * if (flk_has_remote_locks(fi->vp))
41717c478bd9Sstevel@tonic-gate 	 *	return (0);
41727c478bd9Sstevel@tonic-gate 	 */
41737c478bd9Sstevel@tonic-gate 
41747c478bd9Sstevel@tonic-gate 	/*
41757c478bd9Sstevel@tonic-gate 	 * If we have no partial path, we can't do anything
41767c478bd9Sstevel@tonic-gate 	 */
41777c478bd9Sstevel@tonic-gate 	if (VTOR(fi->vp)->r_path == NULL)
41787c478bd9Sstevel@tonic-gate 		return (0);
41797c478bd9Sstevel@tonic-gate 
41807c478bd9Sstevel@tonic-gate 	return (1);
41817c478bd9Sstevel@tonic-gate }
41827c478bd9Sstevel@tonic-gate 
41837c478bd9Sstevel@tonic-gate #include <sys/thread.h>
41847c478bd9Sstevel@tonic-gate 
41857c478bd9Sstevel@tonic-gate /*
41867c478bd9Sstevel@tonic-gate  * NFS client failover support
41877c478bd9Sstevel@tonic-gate  *
41887c478bd9Sstevel@tonic-gate  * failover_newserver() will start a search for a new server,
41897c478bd9Sstevel@tonic-gate  * preferably by starting an async thread to do the work.  If
41907c478bd9Sstevel@tonic-gate  * someone is already doing this (recognizable by MI_BINDINPROG
41917c478bd9Sstevel@tonic-gate  * being set), it will simply return and the calling thread
41927c478bd9Sstevel@tonic-gate  * will queue on the mi_failover_cv condition variable.
41937c478bd9Sstevel@tonic-gate  */
41947c478bd9Sstevel@tonic-gate static void
41957c478bd9Sstevel@tonic-gate failover_newserver(mntinfo_t *mi)
41967c478bd9Sstevel@tonic-gate {
41977c478bd9Sstevel@tonic-gate 	/*
41987c478bd9Sstevel@tonic-gate 	 * Check if someone else is doing this already
41997c478bd9Sstevel@tonic-gate 	 */
42007c478bd9Sstevel@tonic-gate 	mutex_enter(&mi->mi_lock);
42017c478bd9Sstevel@tonic-gate 	if (mi->mi_flags & MI_BINDINPROG) {
42027c478bd9Sstevel@tonic-gate 		mutex_exit(&mi->mi_lock);
42037c478bd9Sstevel@tonic-gate 		return;
42047c478bd9Sstevel@tonic-gate 	}
42057c478bd9Sstevel@tonic-gate 	mi->mi_flags |= MI_BINDINPROG;
42067c478bd9Sstevel@tonic-gate 
42077c478bd9Sstevel@tonic-gate 	/*
42087c478bd9Sstevel@tonic-gate 	 * Need to hold the vfs struct so that it can't be released
42097c478bd9Sstevel@tonic-gate 	 * while the failover thread is selecting a new server.
42107c478bd9Sstevel@tonic-gate 	 */
42117c478bd9Sstevel@tonic-gate 	VFS_HOLD(mi->mi_vfsp);
42127c478bd9Sstevel@tonic-gate 
42137c478bd9Sstevel@tonic-gate 	/*
42147c478bd9Sstevel@tonic-gate 	 * Start a thread to do the real searching.
42157c478bd9Sstevel@tonic-gate 	 */
42167c478bd9Sstevel@tonic-gate 	(void) zthread_create(NULL, 0, failover_thread, mi, 0, minclsyspri);
42177c478bd9Sstevel@tonic-gate 
42187c478bd9Sstevel@tonic-gate 	mutex_exit(&mi->mi_lock);
42197c478bd9Sstevel@tonic-gate }
42207c478bd9Sstevel@tonic-gate 
42217c478bd9Sstevel@tonic-gate /*
42227c478bd9Sstevel@tonic-gate  * NFS client failover support
42237c478bd9Sstevel@tonic-gate  *
42247c478bd9Sstevel@tonic-gate  * failover_thread() will find a new server to replace the one
42257c478bd9Sstevel@tonic-gate  * currently in use, wake up other threads waiting on this mount
42267c478bd9Sstevel@tonic-gate  * point, and die.  It will start at the head of the server list
42277c478bd9Sstevel@tonic-gate  * and poll servers until it finds one with an NFS server which is
42287c478bd9Sstevel@tonic-gate  * registered and responds to a NULL procedure ping.
42297c478bd9Sstevel@tonic-gate  *
42307c478bd9Sstevel@tonic-gate  * XXX failover_thread is unsafe within the scope of the
42317c478bd9Sstevel@tonic-gate  * present model defined for cpr to suspend the system.
42327c478bd9Sstevel@tonic-gate  * Specifically, over-the-wire calls made by the thread
42337c478bd9Sstevel@tonic-gate  * are unsafe. The thread needs to be reevaluated in case of
42347c478bd9Sstevel@tonic-gate  * future updates to the cpr suspend model.
42357c478bd9Sstevel@tonic-gate  */
42367c478bd9Sstevel@tonic-gate static void
42377c478bd9Sstevel@tonic-gate failover_thread(mntinfo_t *mi)
42387c478bd9Sstevel@tonic-gate {
42397c478bd9Sstevel@tonic-gate 	servinfo_t *svp = NULL;
42407c478bd9Sstevel@tonic-gate 	CLIENT *cl;
42417c478bd9Sstevel@tonic-gate 	enum clnt_stat status;
42427c478bd9Sstevel@tonic-gate 	struct timeval tv;
42437c478bd9Sstevel@tonic-gate 	int error;
42447c478bd9Sstevel@tonic-gate 	int oncethru = 0;
42457c478bd9Sstevel@tonic-gate 	callb_cpr_t cprinfo;
42467c478bd9Sstevel@tonic-gate 	rnode_t *rp;
42477c478bd9Sstevel@tonic-gate 	int index;
42487c478bd9Sstevel@tonic-gate 	char *srvnames;
42497c478bd9Sstevel@tonic-gate 	size_t srvnames_len;
42507c478bd9Sstevel@tonic-gate 	struct nfs_clnt *nfscl = NULL;
42517c478bd9Sstevel@tonic-gate 	zoneid_t zoneid = getzoneid();
42527c478bd9Sstevel@tonic-gate 
42537c478bd9Sstevel@tonic-gate #ifdef DEBUG
42547c478bd9Sstevel@tonic-gate 	/*
42557c478bd9Sstevel@tonic-gate 	 * This is currently only needed to access counters which exist on
42567c478bd9Sstevel@tonic-gate 	 * DEBUG kernels, hence we don't want to pay the penalty of the lookup
42577c478bd9Sstevel@tonic-gate 	 * on non-DEBUG kernels.
42587c478bd9Sstevel@tonic-gate 	 */
4259108322fbScarlsonj 	nfscl = zone_getspecific(nfsclnt_zone_key, nfs_zone());
42607c478bd9Sstevel@tonic-gate 	ASSERT(nfscl != NULL);
42617c478bd9Sstevel@tonic-gate #endif
42627c478bd9Sstevel@tonic-gate 
42637c478bd9Sstevel@tonic-gate 	/*
42647c478bd9Sstevel@tonic-gate 	 * Its safe to piggyback on the mi_lock since failover_newserver()
42657c478bd9Sstevel@tonic-gate 	 * code guarantees that there will be only one failover thread
42667c478bd9Sstevel@tonic-gate 	 * per mountinfo at any instance.
42677c478bd9Sstevel@tonic-gate 	 */
42687c478bd9Sstevel@tonic-gate 	CALLB_CPR_INIT(&cprinfo, &mi->mi_lock, callb_generic_cpr,
42697c478bd9Sstevel@tonic-gate 	    "failover_thread");
42707c478bd9Sstevel@tonic-gate 
42717c478bd9Sstevel@tonic-gate 	mutex_enter(&mi->mi_lock);
42727c478bd9Sstevel@tonic-gate 	while (mi->mi_readers) {
42737c478bd9Sstevel@tonic-gate 		CALLB_CPR_SAFE_BEGIN(&cprinfo);
42747c478bd9Sstevel@tonic-gate 		cv_wait(&mi->mi_failover_cv, &mi->mi_lock);
42757c478bd9Sstevel@tonic-gate 		CALLB_CPR_SAFE_END(&cprinfo, &mi->mi_lock);
42767c478bd9Sstevel@tonic-gate 	}
42777c478bd9Sstevel@tonic-gate 	mutex_exit(&mi->mi_lock);
42787c478bd9Sstevel@tonic-gate 
42797c478bd9Sstevel@tonic-gate 	tv.tv_sec = 2;
42807c478bd9Sstevel@tonic-gate 	tv.tv_usec = 0;
42817c478bd9Sstevel@tonic-gate 
42827c478bd9Sstevel@tonic-gate 	/*
42837c478bd9Sstevel@tonic-gate 	 * Ping the null NFS procedure of every server in
42847c478bd9Sstevel@tonic-gate 	 * the list until one responds.  We always start
42857c478bd9Sstevel@tonic-gate 	 * at the head of the list and always skip the one
42867c478bd9Sstevel@tonic-gate 	 * that is current, since it's caused us a problem.
42877c478bd9Sstevel@tonic-gate 	 */
42887c478bd9Sstevel@tonic-gate 	while (svp == NULL) {
42897c478bd9Sstevel@tonic-gate 		for (svp = mi->mi_servers; svp; svp = svp->sv_next) {
42907c478bd9Sstevel@tonic-gate 			if (!oncethru && svp == mi->mi_curr_serv)
42917c478bd9Sstevel@tonic-gate 				continue;
42927c478bd9Sstevel@tonic-gate 
42937c478bd9Sstevel@tonic-gate 			/*
42947c478bd9Sstevel@tonic-gate 			 * If the file system was forcibly umounted
42957c478bd9Sstevel@tonic-gate 			 * while trying to do a failover, then just
42967c478bd9Sstevel@tonic-gate 			 * give up on the failover.  It won't matter
42977c478bd9Sstevel@tonic-gate 			 * what the server is.
42987c478bd9Sstevel@tonic-gate 			 */
42997c478bd9Sstevel@tonic-gate 			if (FS_OR_ZONE_GONE(mi->mi_vfsp)) {
43007c478bd9Sstevel@tonic-gate 				svp = NULL;
43017c478bd9Sstevel@tonic-gate 				goto done;
43027c478bd9Sstevel@tonic-gate 			}
43037c478bd9Sstevel@tonic-gate 
43047c478bd9Sstevel@tonic-gate 			error = clnt_tli_kcreate(svp->sv_knconf, &svp->sv_addr,
43057c478bd9Sstevel@tonic-gate 			    NFS_PROGRAM, NFS_VERSION, 0, 1, CRED(), &cl);
43067c478bd9Sstevel@tonic-gate 			if (error)
43077c478bd9Sstevel@tonic-gate 				continue;
43087c478bd9Sstevel@tonic-gate 
43097c478bd9Sstevel@tonic-gate 			if (!(mi->mi_flags & MI_INT))
43107c478bd9Sstevel@tonic-gate 				cl->cl_nosignal = TRUE;
43117c478bd9Sstevel@tonic-gate 			status = CLNT_CALL(cl, RFS_NULL, xdr_void, NULL,
43127c478bd9Sstevel@tonic-gate 			    xdr_void, NULL, tv);
43137c478bd9Sstevel@tonic-gate 			if (!(mi->mi_flags & MI_INT))
43147c478bd9Sstevel@tonic-gate 				cl->cl_nosignal = FALSE;
43157c478bd9Sstevel@tonic-gate 			AUTH_DESTROY(cl->cl_auth);
43167c478bd9Sstevel@tonic-gate 			CLNT_DESTROY(cl);
43177c478bd9Sstevel@tonic-gate 			if (status == RPC_SUCCESS) {
43187c478bd9Sstevel@tonic-gate 				if (svp == mi->mi_curr_serv) {
43197c478bd9Sstevel@tonic-gate #ifdef DEBUG
43207c478bd9Sstevel@tonic-gate 					zcmn_err(zoneid, CE_NOTE,
43217c478bd9Sstevel@tonic-gate 			"NFS%d: failing over: selecting original server %s",
43227c478bd9Sstevel@tonic-gate 					    mi->mi_vers, svp->sv_hostname);
43237c478bd9Sstevel@tonic-gate #else
43247c478bd9Sstevel@tonic-gate 					zcmn_err(zoneid, CE_NOTE,
43257c478bd9Sstevel@tonic-gate 			"NFS: failing over: selecting original server %s",
43267c478bd9Sstevel@tonic-gate 					    svp->sv_hostname);
43277c478bd9Sstevel@tonic-gate #endif
43287c478bd9Sstevel@tonic-gate 				} else {
43297c478bd9Sstevel@tonic-gate #ifdef DEBUG
43307c478bd9Sstevel@tonic-gate 					zcmn_err(zoneid, CE_NOTE,
43317c478bd9Sstevel@tonic-gate 				    "NFS%d: failing over from %s to %s",
43327c478bd9Sstevel@tonic-gate 					    mi->mi_vers,
43337c478bd9Sstevel@tonic-gate 					    mi->mi_curr_serv->sv_hostname,
43347c478bd9Sstevel@tonic-gate 					    svp->sv_hostname);
43357c478bd9Sstevel@tonic-gate #else
43367c478bd9Sstevel@tonic-gate 					zcmn_err(zoneid, CE_NOTE,
43377c478bd9Sstevel@tonic-gate 				    "NFS: failing over from %s to %s",
43387c478bd9Sstevel@tonic-gate 					    mi->mi_curr_serv->sv_hostname,
43397c478bd9Sstevel@tonic-gate 					    svp->sv_hostname);
43407c478bd9Sstevel@tonic-gate #endif
43417c478bd9Sstevel@tonic-gate 				}
43427c478bd9Sstevel@tonic-gate 				break;
43437c478bd9Sstevel@tonic-gate 			}
43447c478bd9Sstevel@tonic-gate 		}
43457c478bd9Sstevel@tonic-gate 
43467c478bd9Sstevel@tonic-gate 		if (svp == NULL) {
43477c478bd9Sstevel@tonic-gate 			if (!oncethru) {
43487c478bd9Sstevel@tonic-gate 				srvnames = nfs_getsrvnames(mi, &srvnames_len);
43497c478bd9Sstevel@tonic-gate #ifdef DEBUG
43507c478bd9Sstevel@tonic-gate 				zprintf(zoneid,
43517c478bd9Sstevel@tonic-gate 				    "NFS%d servers %s not responding "
43527c478bd9Sstevel@tonic-gate 				    "still trying\n", mi->mi_vers, srvnames);
43537c478bd9Sstevel@tonic-gate #else
43547c478bd9Sstevel@tonic-gate 				zprintf(zoneid, "NFS servers %s not responding "
43557c478bd9Sstevel@tonic-gate 				    "still trying\n", srvnames);
43567c478bd9Sstevel@tonic-gate #endif
43577c478bd9Sstevel@tonic-gate 				oncethru = 1;
43587c478bd9Sstevel@tonic-gate 			}
43597c478bd9Sstevel@tonic-gate 			mutex_enter(&mi->mi_lock);
43607c478bd9Sstevel@tonic-gate 			CALLB_CPR_SAFE_BEGIN(&cprinfo);
43617c478bd9Sstevel@tonic-gate 			mutex_exit(&mi->mi_lock);
43627c478bd9Sstevel@tonic-gate 			delay(hz);
43637c478bd9Sstevel@tonic-gate 			mutex_enter(&mi->mi_lock);
43647c478bd9Sstevel@tonic-gate 			CALLB_CPR_SAFE_END(&cprinfo, &mi->mi_lock);
43657c478bd9Sstevel@tonic-gate 			mutex_exit(&mi->mi_lock);
43667c478bd9Sstevel@tonic-gate 		}
43677c478bd9Sstevel@tonic-gate 	}
43687c478bd9Sstevel@tonic-gate 
43697c478bd9Sstevel@tonic-gate 	if (oncethru) {
43707c478bd9Sstevel@tonic-gate #ifdef DEBUG
43717c478bd9Sstevel@tonic-gate 		zprintf(zoneid, "NFS%d servers %s ok\n", mi->mi_vers, srvnames);
43727c478bd9Sstevel@tonic-gate #else
43737c478bd9Sstevel@tonic-gate 		zprintf(zoneid, "NFS servers %s ok\n", srvnames);
43747c478bd9Sstevel@tonic-gate #endif
43757c478bd9Sstevel@tonic-gate 	}
43767c478bd9Sstevel@tonic-gate 
43777c478bd9Sstevel@tonic-gate 	if (svp != mi->mi_curr_serv) {
43787c478bd9Sstevel@tonic-gate 		(void) dnlc_purge_vfsp(mi->mi_vfsp, 0);
43797c478bd9Sstevel@tonic-gate 		index = rtablehash(&mi->mi_curr_serv->sv_fhandle);
43807c478bd9Sstevel@tonic-gate 		rw_enter(&rtable[index].r_lock, RW_WRITER);
43817c478bd9Sstevel@tonic-gate 		rp = rfind(&rtable[index], &mi->mi_curr_serv->sv_fhandle,
43827c478bd9Sstevel@tonic-gate 		    mi->mi_vfsp);
43837c478bd9Sstevel@tonic-gate 		if (rp != NULL) {
43847c478bd9Sstevel@tonic-gate 			if (rp->r_flags & RHASHED)
43857c478bd9Sstevel@tonic-gate 				rp_rmhash_locked(rp);
43867c478bd9Sstevel@tonic-gate 			rw_exit(&rtable[index].r_lock);
43877c478bd9Sstevel@tonic-gate 			rp->r_server = svp;
43887c478bd9Sstevel@tonic-gate 			rp->r_fh = svp->sv_fhandle;
43897c478bd9Sstevel@tonic-gate 			(void) nfs_free_data_reclaim(rp);
43907c478bd9Sstevel@tonic-gate 			index = rtablehash(&rp->r_fh);
43917c478bd9Sstevel@tonic-gate 			rp->r_hashq = &rtable[index];
43927c478bd9Sstevel@tonic-gate 			rw_enter(&rp->r_hashq->r_lock, RW_WRITER);
43937c478bd9Sstevel@tonic-gate 			vn_exists(RTOV(rp));
43947c478bd9Sstevel@tonic-gate 			rp_addhash(rp);
43957c478bd9Sstevel@tonic-gate 			rw_exit(&rp->r_hashq->r_lock);
43967c478bd9Sstevel@tonic-gate 			VN_RELE(RTOV(rp));
43977c478bd9Sstevel@tonic-gate 		} else
43987c478bd9Sstevel@tonic-gate 			rw_exit(&rtable[index].r_lock);
43997c478bd9Sstevel@tonic-gate 	}
44007c478bd9Sstevel@tonic-gate 
44017c478bd9Sstevel@tonic-gate done:
44027c478bd9Sstevel@tonic-gate 	if (oncethru)
44037c478bd9Sstevel@tonic-gate 		kmem_free(srvnames, srvnames_len);
44047c478bd9Sstevel@tonic-gate 	mutex_enter(&mi->mi_lock);
44057c478bd9Sstevel@tonic-gate 	mi->mi_flags &= ~MI_BINDINPROG;
44067c478bd9Sstevel@tonic-gate 	if (svp != NULL) {
44077c478bd9Sstevel@tonic-gate 		mi->mi_curr_serv = svp;
44087c478bd9Sstevel@tonic-gate 		mi->mi_failover++;
44097c478bd9Sstevel@tonic-gate #ifdef DEBUG
44107c478bd9Sstevel@tonic-gate 	nfscl->nfscl_stat.failover.value.ui64++;
44117c478bd9Sstevel@tonic-gate #endif
44127c478bd9Sstevel@tonic-gate 	}
44137c478bd9Sstevel@tonic-gate 	cv_broadcast(&mi->mi_failover_cv);
44147c478bd9Sstevel@tonic-gate 	CALLB_CPR_EXIT(&cprinfo);
44157c478bd9Sstevel@tonic-gate 	VFS_RELE(mi->mi_vfsp);
44167c478bd9Sstevel@tonic-gate 	zthread_exit();
44177c478bd9Sstevel@tonic-gate 	/* NOTREACHED */
44187c478bd9Sstevel@tonic-gate }
44197c478bd9Sstevel@tonic-gate 
44207c478bd9Sstevel@tonic-gate /*
44217c478bd9Sstevel@tonic-gate  * NFS client failover support
44227c478bd9Sstevel@tonic-gate  *
44237c478bd9Sstevel@tonic-gate  * failover_wait() will put the thread to sleep until MI_BINDINPROG
44247c478bd9Sstevel@tonic-gate  * is cleared, meaning that failover is complete.  Called with
44257c478bd9Sstevel@tonic-gate  * mi_lock mutex held.
44267c478bd9Sstevel@tonic-gate  */
44277c478bd9Sstevel@tonic-gate static int
44287c478bd9Sstevel@tonic-gate failover_wait(mntinfo_t *mi)
44297c478bd9Sstevel@tonic-gate {
44307c478bd9Sstevel@tonic-gate 	k_sigset_t smask;
44317c478bd9Sstevel@tonic-gate 
44327c478bd9Sstevel@tonic-gate 	/*
44337c478bd9Sstevel@tonic-gate 	 * If someone else is hunting for a living server,
44347c478bd9Sstevel@tonic-gate 	 * sleep until it's done.  After our sleep, we may
44357c478bd9Sstevel@tonic-gate 	 * be bound to the right server and get off cheaply.
44367c478bd9Sstevel@tonic-gate 	 */
44377c478bd9Sstevel@tonic-gate 	while (mi->mi_flags & MI_BINDINPROG) {
44387c478bd9Sstevel@tonic-gate 		/*
44397c478bd9Sstevel@tonic-gate 		 * Mask out all signals except SIGHUP, SIGINT, SIGQUIT
44407c478bd9Sstevel@tonic-gate 		 * and SIGTERM. (Preserving the existing masks).
44417c478bd9Sstevel@tonic-gate 		 * Mask out SIGINT if mount option nointr is specified.
44427c478bd9Sstevel@tonic-gate 		 */
44437c478bd9Sstevel@tonic-gate 		sigintr(&smask, (int)mi->mi_flags & MI_INT);
44447c478bd9Sstevel@tonic-gate 		if (!cv_wait_sig(&mi->mi_failover_cv, &mi->mi_lock)) {
44457c478bd9Sstevel@tonic-gate 			/*
44467c478bd9Sstevel@tonic-gate 			 * restore original signal mask
44477c478bd9Sstevel@tonic-gate 			 */
44487c478bd9Sstevel@tonic-gate 			sigunintr(&smask);
44497c478bd9Sstevel@tonic-gate 			return (EINTR);
44507c478bd9Sstevel@tonic-gate 		}
44517c478bd9Sstevel@tonic-gate 		/*
44527c478bd9Sstevel@tonic-gate 		 * restore original signal mask
44537c478bd9Sstevel@tonic-gate 		 */
44547c478bd9Sstevel@tonic-gate 		sigunintr(&smask);
44557c478bd9Sstevel@tonic-gate 	}
44567c478bd9Sstevel@tonic-gate 	return (0);
44577c478bd9Sstevel@tonic-gate }
44587c478bd9Sstevel@tonic-gate 
44597c478bd9Sstevel@tonic-gate /*
44607c478bd9Sstevel@tonic-gate  * NFS client failover support
44617c478bd9Sstevel@tonic-gate  *
44627c478bd9Sstevel@tonic-gate  * failover_remap() will do a partial pathname lookup and find the
44637c478bd9Sstevel@tonic-gate  * desired vnode on the current server.  The interim vnode will be
44647c478bd9Sstevel@tonic-gate  * discarded after we pilfer the new filehandle.
44657c478bd9Sstevel@tonic-gate  *
44667c478bd9Sstevel@tonic-gate  * Side effects:
44677c478bd9Sstevel@tonic-gate  * - This routine will also update the filehandle in the args structure
44687c478bd9Sstevel@tonic-gate  *    pointed to by the fi->fhp pointer if it is non-NULL.
44697c478bd9Sstevel@tonic-gate  */
44707c478bd9Sstevel@tonic-gate 
44717c478bd9Sstevel@tonic-gate static int
44727c478bd9Sstevel@tonic-gate failover_remap(failinfo_t *fi)
44737c478bd9Sstevel@tonic-gate {
44747c478bd9Sstevel@tonic-gate 	vnode_t *vp, *nvp, *rootvp;
44757c478bd9Sstevel@tonic-gate 	rnode_t *rp, *nrp;
44767c478bd9Sstevel@tonic-gate 	mntinfo_t *mi;
44777c478bd9Sstevel@tonic-gate 	int error;
44787c478bd9Sstevel@tonic-gate #ifdef DEBUG
44797c478bd9Sstevel@tonic-gate 	struct nfs_clnt *nfscl;
44807c478bd9Sstevel@tonic-gate 
4481108322fbScarlsonj 	nfscl = zone_getspecific(nfsclnt_zone_key, nfs_zone());
44827c478bd9Sstevel@tonic-gate 	ASSERT(nfscl != NULL);
44837c478bd9Sstevel@tonic-gate #endif
44847c478bd9Sstevel@tonic-gate 	/*
44857c478bd9Sstevel@tonic-gate 	 * Sanity check
44867c478bd9Sstevel@tonic-gate 	 */
44877c478bd9Sstevel@tonic-gate 	if (fi == NULL || fi->vp == NULL || fi->lookupproc == NULL)
44887c478bd9Sstevel@tonic-gate 		return (EINVAL);
44897c478bd9Sstevel@tonic-gate 	vp = fi->vp;
44907c478bd9Sstevel@tonic-gate 	rp = VTOR(vp);
44917c478bd9Sstevel@tonic-gate 	mi = VTOMI(vp);
44927c478bd9Sstevel@tonic-gate 
44937c478bd9Sstevel@tonic-gate 	if (!(vp->v_flag & VROOT)) {
44947c478bd9Sstevel@tonic-gate 		/*
44957c478bd9Sstevel@tonic-gate 		 * Given the root fh, use the path stored in
44967c478bd9Sstevel@tonic-gate 		 * the rnode to find the fh for the new server.
44977c478bd9Sstevel@tonic-gate 		 */
44987c478bd9Sstevel@tonic-gate 		error = VFS_ROOT(mi->mi_vfsp, &rootvp);
44997c478bd9Sstevel@tonic-gate 		if (error)
45007c478bd9Sstevel@tonic-gate 			return (error);
45017c478bd9Sstevel@tonic-gate 
45027c478bd9Sstevel@tonic-gate 		error = failover_lookup(rp->r_path, rootvp,
45037c478bd9Sstevel@tonic-gate 		    fi->lookupproc, fi->xattrdirproc, &nvp);
45047c478bd9Sstevel@tonic-gate 
45057c478bd9Sstevel@tonic-gate 		VN_RELE(rootvp);
45067c478bd9Sstevel@tonic-gate 
45077c478bd9Sstevel@tonic-gate 		if (error)
45087c478bd9Sstevel@tonic-gate 			return (error);
45097c478bd9Sstevel@tonic-gate 
45107c478bd9Sstevel@tonic-gate 		/*
45117c478bd9Sstevel@tonic-gate 		 * If we found the same rnode, we're done now
45127c478bd9Sstevel@tonic-gate 		 */
45137c478bd9Sstevel@tonic-gate 		if (nvp == vp) {
45147c478bd9Sstevel@tonic-gate 			/*
45157c478bd9Sstevel@tonic-gate 			 * Failed and the new server may physically be same
45167c478bd9Sstevel@tonic-gate 			 * OR may share a same disk subsystem. In this case
45177c478bd9Sstevel@tonic-gate 			 * file handle for a particular file path is not going
45187c478bd9Sstevel@tonic-gate 			 * to change, given the same filehandle lookup will
45197c478bd9Sstevel@tonic-gate 			 * always locate the same rnode as the existing one.
45207c478bd9Sstevel@tonic-gate 			 * All we might need to do is to update the r_server
45217c478bd9Sstevel@tonic-gate 			 * with the current servinfo.
45227c478bd9Sstevel@tonic-gate 			 */
45237c478bd9Sstevel@tonic-gate 			if (!VALID_FH(fi)) {
45247c478bd9Sstevel@tonic-gate 				rp->r_server = mi->mi_curr_serv;
45257c478bd9Sstevel@tonic-gate 			}
45267c478bd9Sstevel@tonic-gate 			VN_RELE(nvp);
45277c478bd9Sstevel@tonic-gate 			return (0);
45287c478bd9Sstevel@tonic-gate 		}
45297c478bd9Sstevel@tonic-gate 
45307c478bd9Sstevel@tonic-gate 		/*
45317c478bd9Sstevel@tonic-gate 		 * Try to make it so that no one else will find this
45327c478bd9Sstevel@tonic-gate 		 * vnode because it is just a temporary to hold the
45337c478bd9Sstevel@tonic-gate 		 * new file handle until that file handle can be
45347c478bd9Sstevel@tonic-gate 		 * copied to the original vnode/rnode.
45357c478bd9Sstevel@tonic-gate 		 */
45367c478bd9Sstevel@tonic-gate 		nrp = VTOR(nvp);
4537e8dc3b7dSvv 		mutex_enter(&mi->mi_remap_lock);
4538e8dc3b7dSvv 		/*
4539e8dc3b7dSvv 		 * Some other thread could have raced in here and could
4540e8dc3b7dSvv 		 * have done the remap for this particular rnode before
4541e8dc3b7dSvv 		 * this thread here. Check for rp->r_server and
4542e8dc3b7dSvv 		 * mi->mi_curr_serv and return if they are same.
4543e8dc3b7dSvv 		 */
4544e8dc3b7dSvv 		if (VALID_FH(fi)) {
4545e8dc3b7dSvv 			mutex_exit(&mi->mi_remap_lock);
4546e8dc3b7dSvv 			VN_RELE(nvp);
4547e8dc3b7dSvv 			return (0);
4548e8dc3b7dSvv 		}
4549e8dc3b7dSvv 
45507c478bd9Sstevel@tonic-gate 		if (nrp->r_flags & RHASHED)
45517c478bd9Sstevel@tonic-gate 			rp_rmhash(nrp);
45527c478bd9Sstevel@tonic-gate 
45537c478bd9Sstevel@tonic-gate 		/*
45547c478bd9Sstevel@tonic-gate 		 * As a heuristic check on the validity of the new
45557c478bd9Sstevel@tonic-gate 		 * file, check that the size and type match against
45567c478bd9Sstevel@tonic-gate 		 * that we remember from the old version.
45577c478bd9Sstevel@tonic-gate 		 */
45587c478bd9Sstevel@tonic-gate 		if (rp->r_size != nrp->r_size || vp->v_type != nvp->v_type) {
4559e8dc3b7dSvv 			mutex_exit(&mi->mi_remap_lock);
45607c478bd9Sstevel@tonic-gate 			zcmn_err(mi->mi_zone->zone_id, CE_WARN,
45617c478bd9Sstevel@tonic-gate 			    "NFS replicas %s and %s: file %s not same.",
45627c478bd9Sstevel@tonic-gate 			    rp->r_server->sv_hostname,
45637c478bd9Sstevel@tonic-gate 			    nrp->r_server->sv_hostname, rp->r_path);
45647c478bd9Sstevel@tonic-gate 			VN_RELE(nvp);
45657c478bd9Sstevel@tonic-gate 			return (EINVAL);
45667c478bd9Sstevel@tonic-gate 		}
45677c478bd9Sstevel@tonic-gate 
45687c478bd9Sstevel@tonic-gate 		/*
45697c478bd9Sstevel@tonic-gate 		 * snarf the filehandle from the new rnode
45707c478bd9Sstevel@tonic-gate 		 * then release it, again while updating the
45717c478bd9Sstevel@tonic-gate 		 * hash queues for the rnode.
45727c478bd9Sstevel@tonic-gate 		 */
45737c478bd9Sstevel@tonic-gate 		if (rp->r_flags & RHASHED)
45747c478bd9Sstevel@tonic-gate 			rp_rmhash(rp);
45757c478bd9Sstevel@tonic-gate 		rp->r_server = mi->mi_curr_serv;
45767c478bd9Sstevel@tonic-gate 		rp->r_fh = nrp->r_fh;
4577e8dc3b7dSvv 		rp->r_hashq = nrp->r_hashq;
45787c478bd9Sstevel@tonic-gate 		/*
45797c478bd9Sstevel@tonic-gate 		 * Copy the attributes from the new rnode to the old
45807c478bd9Sstevel@tonic-gate 		 * rnode.  This will help to reduce unnecessary page
45817c478bd9Sstevel@tonic-gate 		 * cache flushes.
45827c478bd9Sstevel@tonic-gate 		 */
45837c478bd9Sstevel@tonic-gate 		rp->r_attr = nrp->r_attr;
45847c478bd9Sstevel@tonic-gate 		rp->r_attrtime = nrp->r_attrtime;
45857c478bd9Sstevel@tonic-gate 		rp->r_mtime = nrp->r_mtime;
45867c478bd9Sstevel@tonic-gate 		(void) nfs_free_data_reclaim(rp);
45877c478bd9Sstevel@tonic-gate 		nfs_setswaplike(vp, &rp->r_attr);
45887c478bd9Sstevel@tonic-gate 		rw_enter(&rp->r_hashq->r_lock, RW_WRITER);
45897c478bd9Sstevel@tonic-gate 		rp_addhash(rp);
45907c478bd9Sstevel@tonic-gate 		rw_exit(&rp->r_hashq->r_lock);
4591e8dc3b7dSvv 		mutex_exit(&mi->mi_remap_lock);
45927c478bd9Sstevel@tonic-gate 		VN_RELE(nvp);
45937c478bd9Sstevel@tonic-gate 	}
45947c478bd9Sstevel@tonic-gate 
45957c478bd9Sstevel@tonic-gate 	/*
45967c478bd9Sstevel@tonic-gate 	 * Update successful failover remap count
45977c478bd9Sstevel@tonic-gate 	 */
45987c478bd9Sstevel@tonic-gate 	mutex_enter(&mi->mi_lock);
45997c478bd9Sstevel@tonic-gate 	mi->mi_remap++;
46007c478bd9Sstevel@tonic-gate 	mutex_exit(&mi->mi_lock);
46017c478bd9Sstevel@tonic-gate #ifdef DEBUG
46027c478bd9Sstevel@tonic-gate 	nfscl->nfscl_stat.remap.value.ui64++;
46037c478bd9Sstevel@tonic-gate #endif
46047c478bd9Sstevel@tonic-gate 
46057c478bd9Sstevel@tonic-gate 	/*
46067c478bd9Sstevel@tonic-gate 	 * If we have a copied filehandle to update, do it now.
46077c478bd9Sstevel@tonic-gate 	 */
46087c478bd9Sstevel@tonic-gate 	if (fi->fhp != NULL && fi->copyproc != NULL)
46097c478bd9Sstevel@tonic-gate 		(*fi->copyproc)(fi->fhp, vp);
46107c478bd9Sstevel@tonic-gate 
46117c478bd9Sstevel@tonic-gate 	return (0);
46127c478bd9Sstevel@tonic-gate }
46137c478bd9Sstevel@tonic-gate 
46147c478bd9Sstevel@tonic-gate /*
46157c478bd9Sstevel@tonic-gate  * NFS client failover support
46167c478bd9Sstevel@tonic-gate  *
46177c478bd9Sstevel@tonic-gate  * We want a simple pathname lookup routine to parse the pieces
46187c478bd9Sstevel@tonic-gate  * of path in rp->r_path.  We know that the path was a created
46197c478bd9Sstevel@tonic-gate  * as rnodes were made, so we know we have only to deal with
46207c478bd9Sstevel@tonic-gate  * paths that look like:
46217c478bd9Sstevel@tonic-gate  *	dir1/dir2/dir3/file
46227c478bd9Sstevel@tonic-gate  * Any evidence of anything like .., symlinks, and ENOTDIR
46237c478bd9Sstevel@tonic-gate  * are hard errors, because they mean something in this filesystem
46247c478bd9Sstevel@tonic-gate  * is different from the one we came from, or has changed under
46257c478bd9Sstevel@tonic-gate  * us in some way.  If this is true, we want the failure.
46267c478bd9Sstevel@tonic-gate  *
46277c478bd9Sstevel@tonic-gate  * Extended attributes: if the filesystem is mounted with extended
46287c478bd9Sstevel@tonic-gate  * attributes enabled (-o xattr), the attribute directory will be
46297c478bd9Sstevel@tonic-gate  * represented in the r_path as the magic name XATTR_RPATH. So if
46307c478bd9Sstevel@tonic-gate  * we see that name in the pathname, is must be because this node
46317c478bd9Sstevel@tonic-gate  * is an extended attribute.  Therefore, look it up that way.
46327c478bd9Sstevel@tonic-gate  */
46337c478bd9Sstevel@tonic-gate static int
46347c478bd9Sstevel@tonic-gate failover_lookup(char *path, vnode_t *root,
46357c478bd9Sstevel@tonic-gate     int (*lookupproc)(vnode_t *, char *, vnode_t **, struct pathname *, int,
46367c478bd9Sstevel@tonic-gate 	vnode_t *, cred_t *, int),
46377c478bd9Sstevel@tonic-gate     int (*xattrdirproc)(vnode_t *, vnode_t **, bool_t, cred_t *, int),
46387c478bd9Sstevel@tonic-gate     vnode_t **new)
46397c478bd9Sstevel@tonic-gate {
46407c478bd9Sstevel@tonic-gate 	vnode_t *dvp, *nvp;
46417c478bd9Sstevel@tonic-gate 	int error = EINVAL;
46427c478bd9Sstevel@tonic-gate 	char *s, *p, *tmppath;
46437c478bd9Sstevel@tonic-gate 	size_t len;
46447c478bd9Sstevel@tonic-gate 	mntinfo_t *mi;
46457c478bd9Sstevel@tonic-gate 	bool_t xattr;
46467c478bd9Sstevel@tonic-gate 
46477c478bd9Sstevel@tonic-gate 	/* Make local copy of path */
46487c478bd9Sstevel@tonic-gate 	len = strlen(path) + 1;
46497c478bd9Sstevel@tonic-gate 	tmppath = kmem_alloc(len, KM_SLEEP);
46507c478bd9Sstevel@tonic-gate 	(void) strcpy(tmppath, path);
46517c478bd9Sstevel@tonic-gate 	s = tmppath;
46527c478bd9Sstevel@tonic-gate 
46537c478bd9Sstevel@tonic-gate 	dvp = root;
46547c478bd9Sstevel@tonic-gate 	VN_HOLD(dvp);
46557c478bd9Sstevel@tonic-gate 	mi = VTOMI(root);
46567c478bd9Sstevel@tonic-gate 	xattr = mi->mi_flags & MI_EXTATTR;
46577c478bd9Sstevel@tonic-gate 
46587c478bd9Sstevel@tonic-gate 	do {
46597c478bd9Sstevel@tonic-gate 		p = strchr(s, '/');
46607c478bd9Sstevel@tonic-gate 		if (p != NULL)
46617c478bd9Sstevel@tonic-gate 			*p = '\0';
46627c478bd9Sstevel@tonic-gate 		if (xattr && strcmp(s, XATTR_RPATH) == 0) {
46637c478bd9Sstevel@tonic-gate 			error = (*xattrdirproc)(dvp, &nvp, FALSE, CRED(),
46647c478bd9Sstevel@tonic-gate 			    RFSCALL_SOFT);
46657c478bd9Sstevel@tonic-gate 		} else {
46667c478bd9Sstevel@tonic-gate 			error = (*lookupproc)(dvp, s, &nvp, NULL, 0, NULL,
46677c478bd9Sstevel@tonic-gate 			    CRED(), RFSCALL_SOFT);
46687c478bd9Sstevel@tonic-gate 		}
46697c478bd9Sstevel@tonic-gate 		if (p != NULL)
46707c478bd9Sstevel@tonic-gate 			*p++ = '/';
46717c478bd9Sstevel@tonic-gate 		if (error) {
46727c478bd9Sstevel@tonic-gate 			VN_RELE(dvp);
46737c478bd9Sstevel@tonic-gate 			kmem_free(tmppath, len);
46747c478bd9Sstevel@tonic-gate 			return (error);
46757c478bd9Sstevel@tonic-gate 		}
46767c478bd9Sstevel@tonic-gate 		s = p;
46777c478bd9Sstevel@tonic-gate 		VN_RELE(dvp);
46787c478bd9Sstevel@tonic-gate 		dvp = nvp;
46797c478bd9Sstevel@tonic-gate 	} while (p != NULL);
46807c478bd9Sstevel@tonic-gate 
46817c478bd9Sstevel@tonic-gate 	if (nvp != NULL && new != NULL)
46827c478bd9Sstevel@tonic-gate 		*new = nvp;
46837c478bd9Sstevel@tonic-gate 	kmem_free(tmppath, len);
46847c478bd9Sstevel@tonic-gate 	return (0);
46857c478bd9Sstevel@tonic-gate }
46867c478bd9Sstevel@tonic-gate 
46877c478bd9Sstevel@tonic-gate /*
46887c478bd9Sstevel@tonic-gate  * NFS client failover support
46897c478bd9Sstevel@tonic-gate  *
46907c478bd9Sstevel@tonic-gate  * sv_free() frees the malloc'd portion of a "servinfo_t".
46917c478bd9Sstevel@tonic-gate  */
46927c478bd9Sstevel@tonic-gate void
46937c478bd9Sstevel@tonic-gate sv_free(servinfo_t *svp)
46947c478bd9Sstevel@tonic-gate {
46957c478bd9Sstevel@tonic-gate 	servinfo_t *next;
46967c478bd9Sstevel@tonic-gate 	struct knetconfig *knconf;
46977c478bd9Sstevel@tonic-gate 
46987c478bd9Sstevel@tonic-gate 	while (svp != NULL) {
46997c478bd9Sstevel@tonic-gate 		next = svp->sv_next;
47007c478bd9Sstevel@tonic-gate 		if (svp->sv_secdata)
47017c478bd9Sstevel@tonic-gate 			sec_clnt_freeinfo(svp->sv_secdata);
47027c478bd9Sstevel@tonic-gate 		if (svp->sv_hostname && svp->sv_hostnamelen > 0)
47037c478bd9Sstevel@tonic-gate 			kmem_free(svp->sv_hostname, svp->sv_hostnamelen);
47047c478bd9Sstevel@tonic-gate 		knconf = svp->sv_knconf;
47057c478bd9Sstevel@tonic-gate 		if (knconf != NULL) {
47067c478bd9Sstevel@tonic-gate 			if (knconf->knc_protofmly != NULL)
47077c478bd9Sstevel@tonic-gate 				kmem_free(knconf->knc_protofmly, KNC_STRSIZE);
47087c478bd9Sstevel@tonic-gate 			if (knconf->knc_proto != NULL)
47097c478bd9Sstevel@tonic-gate 				kmem_free(knconf->knc_proto, KNC_STRSIZE);
47107c478bd9Sstevel@tonic-gate 			kmem_free(knconf, sizeof (*knconf));
47117c478bd9Sstevel@tonic-gate 		}
47127c478bd9Sstevel@tonic-gate 		knconf = svp->sv_origknconf;
47137c478bd9Sstevel@tonic-gate 		if (knconf != NULL) {
47147c478bd9Sstevel@tonic-gate 			if (knconf->knc_protofmly != NULL)
47157c478bd9Sstevel@tonic-gate 				kmem_free(knconf->knc_protofmly, KNC_STRSIZE);
47167c478bd9Sstevel@tonic-gate 			if (knconf->knc_proto != NULL)
47177c478bd9Sstevel@tonic-gate 				kmem_free(knconf->knc_proto, KNC_STRSIZE);
47187c478bd9Sstevel@tonic-gate 			kmem_free(knconf, sizeof (*knconf));
47197c478bd9Sstevel@tonic-gate 		}
47207c478bd9Sstevel@tonic-gate 		if (svp->sv_addr.buf != NULL && svp->sv_addr.maxlen != 0)
47217c478bd9Sstevel@tonic-gate 			kmem_free(svp->sv_addr.buf, svp->sv_addr.maxlen);
47227c478bd9Sstevel@tonic-gate 		mutex_destroy(&svp->sv_lock);
47237c478bd9Sstevel@tonic-gate 		kmem_free(svp, sizeof (*svp));
47247c478bd9Sstevel@tonic-gate 		svp = next;
47257c478bd9Sstevel@tonic-gate 	}
47267c478bd9Sstevel@tonic-gate }
47277c478bd9Sstevel@tonic-gate 
47287c478bd9Sstevel@tonic-gate /*
47297c478bd9Sstevel@tonic-gate  * Only can return non-zero if intr != 0.
47307c478bd9Sstevel@tonic-gate  */
47317c478bd9Sstevel@tonic-gate int
47327c478bd9Sstevel@tonic-gate nfs_rw_enter_sig(nfs_rwlock_t *l, krw_t rw, int intr)
47337c478bd9Sstevel@tonic-gate {
47347c478bd9Sstevel@tonic-gate 
47357c478bd9Sstevel@tonic-gate 	mutex_enter(&l->lock);
47367c478bd9Sstevel@tonic-gate 
47377c478bd9Sstevel@tonic-gate 	/*
47387c478bd9Sstevel@tonic-gate 	 * If this is a nested enter, then allow it.  There
47397c478bd9Sstevel@tonic-gate 	 * must be as many exits as enters through.
47407c478bd9Sstevel@tonic-gate 	 */
47417c478bd9Sstevel@tonic-gate 	if (l->owner == curthread) {
47427c478bd9Sstevel@tonic-gate 		/* lock is held for writing by current thread */
47437c478bd9Sstevel@tonic-gate 		ASSERT(rw == RW_READER || rw == RW_WRITER);
47447c478bd9Sstevel@tonic-gate 		l->count--;
47457c478bd9Sstevel@tonic-gate 	} else if (rw == RW_READER) {
47467c478bd9Sstevel@tonic-gate 		/*
47477c478bd9Sstevel@tonic-gate 		 * While there is a writer active or writers waiting,
47487c478bd9Sstevel@tonic-gate 		 * then wait for them to finish up and move on.  Then,
47497c478bd9Sstevel@tonic-gate 		 * increment the count to indicate that a reader is
47507c478bd9Sstevel@tonic-gate 		 * active.
47517c478bd9Sstevel@tonic-gate 		 */
47527c478bd9Sstevel@tonic-gate 		while (l->count < 0 || l->waiters > 0) {
47537c478bd9Sstevel@tonic-gate 			if (intr) {
47547c478bd9Sstevel@tonic-gate 				klwp_t *lwp = ttolwp(curthread);
47557c478bd9Sstevel@tonic-gate 
47567c478bd9Sstevel@tonic-gate 				if (lwp != NULL)
47577c478bd9Sstevel@tonic-gate 					lwp->lwp_nostop++;
47587c478bd9Sstevel@tonic-gate 				if (!cv_wait_sig(&l->cv, &l->lock)) {
47597c478bd9Sstevel@tonic-gate 					if (lwp != NULL)
47607c478bd9Sstevel@tonic-gate 						lwp->lwp_nostop--;
47617c478bd9Sstevel@tonic-gate 					mutex_exit(&l->lock);
47627c478bd9Sstevel@tonic-gate 					return (EINTR);
47637c478bd9Sstevel@tonic-gate 				}
47647c478bd9Sstevel@tonic-gate 				if (lwp != NULL)
47657c478bd9Sstevel@tonic-gate 					lwp->lwp_nostop--;
47667c478bd9Sstevel@tonic-gate 			} else
47677c478bd9Sstevel@tonic-gate 				cv_wait(&l->cv, &l->lock);
47687c478bd9Sstevel@tonic-gate 		}
47697c478bd9Sstevel@tonic-gate 		ASSERT(l->count < INT_MAX);
47707c478bd9Sstevel@tonic-gate #ifdef	DEBUG
47717c478bd9Sstevel@tonic-gate 		if ((l->count % 10000) == 9999)
47727c478bd9Sstevel@tonic-gate 			cmn_err(CE_WARN, "nfs_rw_enter_sig: count %d on"
47737106075aSmarks 			    "rwlock @ %p\n", l->count, (void *)&l);
47747c478bd9Sstevel@tonic-gate #endif
47757c478bd9Sstevel@tonic-gate 		l->count++;
47767c478bd9Sstevel@tonic-gate 	} else {
47777c478bd9Sstevel@tonic-gate 		ASSERT(rw == RW_WRITER);
47787c478bd9Sstevel@tonic-gate 		/*
47797c478bd9Sstevel@tonic-gate 		 * While there are readers active or a writer
47807c478bd9Sstevel@tonic-gate 		 * active, then wait for all of the readers
47817c478bd9Sstevel@tonic-gate 		 * to finish or for the writer to finish.
47827c478bd9Sstevel@tonic-gate 		 * Then, set the owner field to curthread and
47837c478bd9Sstevel@tonic-gate 		 * decrement count to indicate that a writer
47847c478bd9Sstevel@tonic-gate 		 * is active.
47857c478bd9Sstevel@tonic-gate 		 */
47867c478bd9Sstevel@tonic-gate 		while (l->count > 0 || l->owner != NULL) {
47877c478bd9Sstevel@tonic-gate 			l->waiters++;
47887c478bd9Sstevel@tonic-gate 			if (intr) {
47897c478bd9Sstevel@tonic-gate 				klwp_t *lwp = ttolwp(curthread);
47907c478bd9Sstevel@tonic-gate 
47917c478bd9Sstevel@tonic-gate 				if (lwp != NULL)
47927c478bd9Sstevel@tonic-gate 					lwp->lwp_nostop++;
47937c478bd9Sstevel@tonic-gate 				if (!cv_wait_sig(&l->cv, &l->lock)) {
47947c478bd9Sstevel@tonic-gate 					if (lwp != NULL)
47957c478bd9Sstevel@tonic-gate 						lwp->lwp_nostop--;
47967c478bd9Sstevel@tonic-gate 					l->waiters--;
47977c478bd9Sstevel@tonic-gate 					cv_broadcast(&l->cv);
47987c478bd9Sstevel@tonic-gate 					mutex_exit(&l->lock);
47997c478bd9Sstevel@tonic-gate 					return (EINTR);
48007c478bd9Sstevel@tonic-gate 				}
48017c478bd9Sstevel@tonic-gate 				if (lwp != NULL)
48027c478bd9Sstevel@tonic-gate 					lwp->lwp_nostop--;
48037c478bd9Sstevel@tonic-gate 			} else
48047c478bd9Sstevel@tonic-gate 				cv_wait(&l->cv, &l->lock);
48057c478bd9Sstevel@tonic-gate 			l->waiters--;
48067c478bd9Sstevel@tonic-gate 		}
48077c478bd9Sstevel@tonic-gate 		l->owner = curthread;
48087c478bd9Sstevel@tonic-gate 		l->count--;
48097c478bd9Sstevel@tonic-gate 	}
48107c478bd9Sstevel@tonic-gate 
48117c478bd9Sstevel@tonic-gate 	mutex_exit(&l->lock);
48127c478bd9Sstevel@tonic-gate 
48137c478bd9Sstevel@tonic-gate 	return (0);
48147c478bd9Sstevel@tonic-gate }
48157c478bd9Sstevel@tonic-gate 
48167c478bd9Sstevel@tonic-gate /*
48177c478bd9Sstevel@tonic-gate  * If the lock is available, obtain it and return non-zero.  If there is
48187c478bd9Sstevel@tonic-gate  * already a conflicting lock, return 0 immediately.
48197c478bd9Sstevel@tonic-gate  */
48207c478bd9Sstevel@tonic-gate 
48217c478bd9Sstevel@tonic-gate int
48227c478bd9Sstevel@tonic-gate nfs_rw_tryenter(nfs_rwlock_t *l, krw_t rw)
48237c478bd9Sstevel@tonic-gate {
48247c478bd9Sstevel@tonic-gate 	mutex_enter(&l->lock);
48257c478bd9Sstevel@tonic-gate 
48267c478bd9Sstevel@tonic-gate 	/*
48277c478bd9Sstevel@tonic-gate 	 * If this is a nested enter, then allow it.  There
48287c478bd9Sstevel@tonic-gate 	 * must be as many exits as enters through.
48297c478bd9Sstevel@tonic-gate 	 */
48307c478bd9Sstevel@tonic-gate 	if (l->owner == curthread) {
48317c478bd9Sstevel@tonic-gate 		/* lock is held for writing by current thread */
48327c478bd9Sstevel@tonic-gate 		ASSERT(rw == RW_READER || rw == RW_WRITER);
48337c478bd9Sstevel@tonic-gate 		l->count--;
48347c478bd9Sstevel@tonic-gate 	} else if (rw == RW_READER) {
48357c478bd9Sstevel@tonic-gate 		/*
48367c478bd9Sstevel@tonic-gate 		 * If there is a writer active or writers waiting, deny the
48377c478bd9Sstevel@tonic-gate 		 * lock.  Otherwise, bump the count of readers.
48387c478bd9Sstevel@tonic-gate 		 */
48397c478bd9Sstevel@tonic-gate 		if (l->count < 0 || l->waiters > 0) {
48407c478bd9Sstevel@tonic-gate 			mutex_exit(&l->lock);
48417c478bd9Sstevel@tonic-gate 			return (0);
48427c478bd9Sstevel@tonic-gate 		}
48437c478bd9Sstevel@tonic-gate 		l->count++;
48447c478bd9Sstevel@tonic-gate 	} else {
48457c478bd9Sstevel@tonic-gate 		ASSERT(rw == RW_WRITER);
48467c478bd9Sstevel@tonic-gate 		/*
48477c478bd9Sstevel@tonic-gate 		 * If there are readers active or a writer active, deny the
48487c478bd9Sstevel@tonic-gate 		 * lock.  Otherwise, set the owner field to curthread and
48497c478bd9Sstevel@tonic-gate 		 * decrement count to indicate that a writer is active.
48507c478bd9Sstevel@tonic-gate 		 */
48517c478bd9Sstevel@tonic-gate 		if (l->count > 0 || l->owner != NULL) {
48527c478bd9Sstevel@tonic-gate 			mutex_exit(&l->lock);
48537c478bd9Sstevel@tonic-gate 			return (0);
48547c478bd9Sstevel@tonic-gate 		}
48557c478bd9Sstevel@tonic-gate 		l->owner = curthread;
48567c478bd9Sstevel@tonic-gate 		l->count--;
48577c478bd9Sstevel@tonic-gate 	}
48587c478bd9Sstevel@tonic-gate 
48597c478bd9Sstevel@tonic-gate 	mutex_exit(&l->lock);
48607c478bd9Sstevel@tonic-gate 
48617c478bd9Sstevel@tonic-gate 	return (1);
48627c478bd9Sstevel@tonic-gate }
48637c478bd9Sstevel@tonic-gate 
48647c478bd9Sstevel@tonic-gate void
48657c478bd9Sstevel@tonic-gate nfs_rw_exit(nfs_rwlock_t *l)
48667c478bd9Sstevel@tonic-gate {
48677c478bd9Sstevel@tonic-gate 
48687c478bd9Sstevel@tonic-gate 	mutex_enter(&l->lock);
48697c478bd9Sstevel@tonic-gate 	/*
48707c478bd9Sstevel@tonic-gate 	 * If this is releasing a writer lock, then increment count to
48717c478bd9Sstevel@tonic-gate 	 * indicate that there is one less writer active.  If this was
48727c478bd9Sstevel@tonic-gate 	 * the last of possibly nested writer locks, then clear the owner
48737c478bd9Sstevel@tonic-gate 	 * field as well to indicate that there is no writer active
48747c478bd9Sstevel@tonic-gate 	 * and wakeup any possible waiting writers or readers.
48757c478bd9Sstevel@tonic-gate 	 *
48767c478bd9Sstevel@tonic-gate 	 * If releasing a reader lock, then just decrement count to
48777c478bd9Sstevel@tonic-gate 	 * indicate that there is one less reader active.  If this was
48787c478bd9Sstevel@tonic-gate 	 * the last active reader and there are writer(s) waiting,
48797c478bd9Sstevel@tonic-gate 	 * then wake up the first.
48807c478bd9Sstevel@tonic-gate 	 */
48817c478bd9Sstevel@tonic-gate 	if (l->owner != NULL) {
48827c478bd9Sstevel@tonic-gate 		ASSERT(l->owner == curthread);
48837c478bd9Sstevel@tonic-gate 		l->count++;
48847c478bd9Sstevel@tonic-gate 		if (l->count == 0) {
48857c478bd9Sstevel@tonic-gate 			l->owner = NULL;
48867c478bd9Sstevel@tonic-gate 			cv_broadcast(&l->cv);
48877c478bd9Sstevel@tonic-gate 		}
48887c478bd9Sstevel@tonic-gate 	} else {
48897c478bd9Sstevel@tonic-gate 		ASSERT(l->count > 0);
48907c478bd9Sstevel@tonic-gate 		l->count--;
48917c478bd9Sstevel@tonic-gate 		if (l->count == 0 && l->waiters > 0)
48927c478bd9Sstevel@tonic-gate 			cv_broadcast(&l->cv);
48937c478bd9Sstevel@tonic-gate 	}
48947c478bd9Sstevel@tonic-gate 	mutex_exit(&l->lock);
48957c478bd9Sstevel@tonic-gate }
48967c478bd9Sstevel@tonic-gate 
48977c478bd9Sstevel@tonic-gate int
48987c478bd9Sstevel@tonic-gate nfs_rw_lock_held(nfs_rwlock_t *l, krw_t rw)
48997c478bd9Sstevel@tonic-gate {
49007c478bd9Sstevel@tonic-gate 
49017c478bd9Sstevel@tonic-gate 	if (rw == RW_READER)
49027c478bd9Sstevel@tonic-gate 		return (l->count > 0);
49037c478bd9Sstevel@tonic-gate 	ASSERT(rw == RW_WRITER);
49047c478bd9Sstevel@tonic-gate 	return (l->count < 0);
49057c478bd9Sstevel@tonic-gate }
49067c478bd9Sstevel@tonic-gate 
49077c478bd9Sstevel@tonic-gate /* ARGSUSED */
49087c478bd9Sstevel@tonic-gate void
49097c478bd9Sstevel@tonic-gate nfs_rw_init(nfs_rwlock_t *l, char *name, krw_type_t type, void *arg)
49107c478bd9Sstevel@tonic-gate {
49117c478bd9Sstevel@tonic-gate 
49127c478bd9Sstevel@tonic-gate 	l->count = 0;
49137c478bd9Sstevel@tonic-gate 	l->waiters = 0;
49147c478bd9Sstevel@tonic-gate 	l->owner = NULL;
49157c478bd9Sstevel@tonic-gate 	mutex_init(&l->lock, NULL, MUTEX_DEFAULT, NULL);
49167c478bd9Sstevel@tonic-gate 	cv_init(&l->cv, NULL, CV_DEFAULT, NULL);
49177c478bd9Sstevel@tonic-gate }
49187c478bd9Sstevel@tonic-gate 
49197c478bd9Sstevel@tonic-gate void
49207c478bd9Sstevel@tonic-gate nfs_rw_destroy(nfs_rwlock_t *l)
49217c478bd9Sstevel@tonic-gate {
49227c478bd9Sstevel@tonic-gate 
49237c478bd9Sstevel@tonic-gate 	mutex_destroy(&l->lock);
49247c478bd9Sstevel@tonic-gate 	cv_destroy(&l->cv);
49257c478bd9Sstevel@tonic-gate }
49267c478bd9Sstevel@tonic-gate 
49277c478bd9Sstevel@tonic-gate int
49287c478bd9Sstevel@tonic-gate nfs3_rddir_compar(const void *x, const void *y)
49297c478bd9Sstevel@tonic-gate {
49307c478bd9Sstevel@tonic-gate 	rddir_cache *a = (rddir_cache *)x;
49317c478bd9Sstevel@tonic-gate 	rddir_cache *b = (rddir_cache *)y;
49327c478bd9Sstevel@tonic-gate 
49337c478bd9Sstevel@tonic-gate 	if (a->nfs3_cookie == b->nfs3_cookie) {
49347c478bd9Sstevel@tonic-gate 		if (a->buflen == b->buflen)
49357c478bd9Sstevel@tonic-gate 			return (0);
49367c478bd9Sstevel@tonic-gate 		if (a->buflen < b->buflen)
49377c478bd9Sstevel@tonic-gate 			return (-1);
49387c478bd9Sstevel@tonic-gate 		return (1);
49397c478bd9Sstevel@tonic-gate 	}
49407c478bd9Sstevel@tonic-gate 
49417c478bd9Sstevel@tonic-gate 	if (a->nfs3_cookie < b->nfs3_cookie)
49427c478bd9Sstevel@tonic-gate 		return (-1);
49437c478bd9Sstevel@tonic-gate 
49447c478bd9Sstevel@tonic-gate 	return (1);
49457c478bd9Sstevel@tonic-gate }
49467c478bd9Sstevel@tonic-gate 
49477c478bd9Sstevel@tonic-gate int
49487c478bd9Sstevel@tonic-gate nfs_rddir_compar(const void *x, const void *y)
49497c478bd9Sstevel@tonic-gate {
49507c478bd9Sstevel@tonic-gate 	rddir_cache *a = (rddir_cache *)x;
49517c478bd9Sstevel@tonic-gate 	rddir_cache *b = (rddir_cache *)y;
49527c478bd9Sstevel@tonic-gate 
49537c478bd9Sstevel@tonic-gate 	if (a->nfs_cookie == b->nfs_cookie) {
49547c478bd9Sstevel@tonic-gate 		if (a->buflen == b->buflen)
49557c478bd9Sstevel@tonic-gate 			return (0);
49567c478bd9Sstevel@tonic-gate 		if (a->buflen < b->buflen)
49577c478bd9Sstevel@tonic-gate 			return (-1);
49587c478bd9Sstevel@tonic-gate 		return (1);
49597c478bd9Sstevel@tonic-gate 	}
49607c478bd9Sstevel@tonic-gate 
49617c478bd9Sstevel@tonic-gate 	if (a->nfs_cookie < b->nfs_cookie)
49627c478bd9Sstevel@tonic-gate 		return (-1);
49637c478bd9Sstevel@tonic-gate 
49647c478bd9Sstevel@tonic-gate 	return (1);
49657c478bd9Sstevel@tonic-gate }
49667c478bd9Sstevel@tonic-gate 
49677c478bd9Sstevel@tonic-gate static char *
49687c478bd9Sstevel@tonic-gate nfs_getsrvnames(mntinfo_t *mi, size_t *len)
49697c478bd9Sstevel@tonic-gate {
49707c478bd9Sstevel@tonic-gate 	servinfo_t *s;
49717c478bd9Sstevel@tonic-gate 	char *srvnames;
49727c478bd9Sstevel@tonic-gate 	char *namep;
49737c478bd9Sstevel@tonic-gate 	size_t length;
49747c478bd9Sstevel@tonic-gate 
49757c478bd9Sstevel@tonic-gate 	/*
49767c478bd9Sstevel@tonic-gate 	 * Calculate the length of the string required to hold all
49777c478bd9Sstevel@tonic-gate 	 * of the server names plus either a comma or a null
49787c478bd9Sstevel@tonic-gate 	 * character following each individual one.
49797c478bd9Sstevel@tonic-gate 	 */
49807c478bd9Sstevel@tonic-gate 	length = 0;
49817c478bd9Sstevel@tonic-gate 	for (s = mi->mi_servers; s != NULL; s = s->sv_next)
49827c478bd9Sstevel@tonic-gate 		length += s->sv_hostnamelen;
49837c478bd9Sstevel@tonic-gate 
49847c478bd9Sstevel@tonic-gate 	srvnames = kmem_alloc(length, KM_SLEEP);
49857c478bd9Sstevel@tonic-gate 
49867c478bd9Sstevel@tonic-gate 	namep = srvnames;
49877c478bd9Sstevel@tonic-gate 	for (s = mi->mi_servers; s != NULL; s = s->sv_next) {
49887c478bd9Sstevel@tonic-gate 		(void) strcpy(namep, s->sv_hostname);
49897c478bd9Sstevel@tonic-gate 		namep += s->sv_hostnamelen - 1;
49907c478bd9Sstevel@tonic-gate 		*namep++ = ',';
49917c478bd9Sstevel@tonic-gate 	}
49927c478bd9Sstevel@tonic-gate 	*--namep = '\0';
49937c478bd9Sstevel@tonic-gate 
49947c478bd9Sstevel@tonic-gate 	*len = length;
49957c478bd9Sstevel@tonic-gate 
49967c478bd9Sstevel@tonic-gate 	return (srvnames);
49977c478bd9Sstevel@tonic-gate }
4998108322fbScarlsonj 
4999108322fbScarlsonj /*
5000108322fbScarlsonj  * These two functions are temporary and designed for the upgrade-workaround
5001108322fbScarlsonj  * only.  They cannot be used for general zone-crossing NFS client support, and
5002108322fbScarlsonj  * will be removed shortly.
5003108322fbScarlsonj  *
5004108322fbScarlsonj  * When the workaround is enabled, all NFS traffic is forced into the global
5005108322fbScarlsonj  * zone.  These functions are called when the code needs to refer to the state
5006108322fbScarlsonj  * of the underlying network connection.  They're not called when the function
5007108322fbScarlsonj  * needs to refer to the state of the process that invoked the system call.
5008108322fbScarlsonj  * (E.g., when checking whether the zone is shutting down during the mount()
5009108322fbScarlsonj  * call.)
5010108322fbScarlsonj  */
5011108322fbScarlsonj 
5012108322fbScarlsonj struct zone *
5013108322fbScarlsonj nfs_zone(void)
5014108322fbScarlsonj {
5015108322fbScarlsonj 	return (nfs_global_client_only != 0 ? global_zone : curproc->p_zone);
5016108322fbScarlsonj }
5017108322fbScarlsonj 
5018108322fbScarlsonj zoneid_t
5019108322fbScarlsonj nfs_zoneid(void)
5020108322fbScarlsonj {
5021108322fbScarlsonj 	return (nfs_global_client_only != 0 ? GLOBAL_ZONEID : getzoneid());
5022108322fbScarlsonj }
502345916cd2Sjpk 
502445916cd2Sjpk /*
502545916cd2Sjpk  * nfs_mount_label_policy:
502645916cd2Sjpk  *	Determine whether the mount is allowed according to MAC check,
502745916cd2Sjpk  *	by comparing (where appropriate) label of the remote server
502845916cd2Sjpk  *	against the label of the zone being mounted into.
502945916cd2Sjpk  *
503045916cd2Sjpk  *	Returns:
503145916cd2Sjpk  *		 0 :	access allowed
503245916cd2Sjpk  *		-1 :	read-only access allowed (i.e., read-down)
503345916cd2Sjpk  *		>0 :	error code, such as EACCES
503445916cd2Sjpk  */
503545916cd2Sjpk int
503645916cd2Sjpk nfs_mount_label_policy(vfs_t *vfsp, struct netbuf *addr,
503745916cd2Sjpk     struct knetconfig *knconf, cred_t *cr)
503845916cd2Sjpk {
503945916cd2Sjpk 	int		addr_type;
504045916cd2Sjpk 	void		*ipaddr;
504145916cd2Sjpk 	bslabel_t	*server_sl, *mntlabel;
504245916cd2Sjpk 	zone_t		*mntzone = NULL;
504345916cd2Sjpk 	ts_label_t	*zlabel;
504445916cd2Sjpk 	tsol_tpc_t	*tp;
504545916cd2Sjpk 	ts_label_t	*tsl = NULL;
504645916cd2Sjpk 	int		retv;
504745916cd2Sjpk 
504845916cd2Sjpk 	/*
504945916cd2Sjpk 	 * Get the zone's label.  Each zone on a labeled system has a label.
505045916cd2Sjpk 	 */
505145916cd2Sjpk 	mntzone = zone_find_by_any_path(refstr_value(vfsp->vfs_mntpt), B_FALSE);
505245916cd2Sjpk 	zlabel = mntzone->zone_slabel;
505345916cd2Sjpk 	ASSERT(zlabel != NULL);
505445916cd2Sjpk 	label_hold(zlabel);
505545916cd2Sjpk 
505645916cd2Sjpk 	if (strcmp(knconf->knc_protofmly, NC_INET) == 0) {
505745916cd2Sjpk 		addr_type = IPV4_VERSION;
505845916cd2Sjpk 		ipaddr = &((struct sockaddr_in *)addr->buf)->sin_addr;
505945916cd2Sjpk 	} else if (strcmp(knconf->knc_protofmly, NC_INET6) == 0) {
506045916cd2Sjpk 		addr_type = IPV6_VERSION;
506145916cd2Sjpk 		ipaddr = &((struct sockaddr_in6 *)addr->buf)->sin6_addr;
506245916cd2Sjpk 	} else {
506345916cd2Sjpk 		retv = 0;
506445916cd2Sjpk 		goto out;
506545916cd2Sjpk 	}
506645916cd2Sjpk 
506745916cd2Sjpk 	retv = EACCES;				/* assume the worst */
506845916cd2Sjpk 
506945916cd2Sjpk 	/*
507045916cd2Sjpk 	 * Next, get the assigned label of the remote server.
507145916cd2Sjpk 	 */
507245916cd2Sjpk 	tp = find_tpc(ipaddr, addr_type, B_FALSE);
507345916cd2Sjpk 	if (tp == NULL)
507445916cd2Sjpk 		goto out;			/* error getting host entry */
507545916cd2Sjpk 
507645916cd2Sjpk 	if (tp->tpc_tp.tp_doi != zlabel->tsl_doi)
507745916cd2Sjpk 		goto rel_tpc;			/* invalid domain */
507845916cd2Sjpk 	if ((tp->tpc_tp.host_type != SUN_CIPSO) &&
507945916cd2Sjpk 	    (tp->tpc_tp.host_type != UNLABELED))
508045916cd2Sjpk 		goto rel_tpc;			/* invalid hosttype */
508145916cd2Sjpk 
508245916cd2Sjpk 	if (tp->tpc_tp.host_type == SUN_CIPSO) {
508345916cd2Sjpk 		tsl = getflabel_cipso(vfsp);
508445916cd2Sjpk 		if (tsl == NULL)
508545916cd2Sjpk 			goto rel_tpc;		/* error getting server lbl */
508645916cd2Sjpk 
508745916cd2Sjpk 		server_sl = label2bslabel(tsl);
508845916cd2Sjpk 	} else {	/* UNLABELED */
508945916cd2Sjpk 		server_sl = &tp->tpc_tp.tp_def_label;
509045916cd2Sjpk 	}
509145916cd2Sjpk 
509245916cd2Sjpk 	mntlabel = label2bslabel(zlabel);
509345916cd2Sjpk 
509445916cd2Sjpk 	/*
509545916cd2Sjpk 	 * Now compare labels to complete the MAC check.  If the labels
509645916cd2Sjpk 	 * are equal or if the requestor is in the global zone and has
509745916cd2Sjpk 	 * NET_MAC_AWARE, then allow read-write access.   (Except for
509845916cd2Sjpk 	 * mounts into the global zone itself; restrict these to
509945916cd2Sjpk 	 * read-only.)
510045916cd2Sjpk 	 *
510145916cd2Sjpk 	 * If the requestor is in some other zone, but his label
510245916cd2Sjpk 	 * dominates the server, then allow read-down.
510345916cd2Sjpk 	 *
510445916cd2Sjpk 	 * Otherwise, access is denied.
510545916cd2Sjpk 	 */
510645916cd2Sjpk 	if (blequal(mntlabel, server_sl) ||
510745916cd2Sjpk 	    (crgetzoneid(cr) == GLOBAL_ZONEID &&
510845916cd2Sjpk 	    getpflags(NET_MAC_AWARE, cr) != 0)) {
510945916cd2Sjpk 		if ((mntzone == global_zone) ||
511045916cd2Sjpk 		    !blequal(mntlabel, server_sl))
511145916cd2Sjpk 			retv = -1;		/* read-only */
511245916cd2Sjpk 		else
511345916cd2Sjpk 			retv = 0;		/* access OK */
511445916cd2Sjpk 	} else if (bldominates(mntlabel, server_sl)) {
511545916cd2Sjpk 		retv = -1;			/* read-only */
511645916cd2Sjpk 	} else {
511745916cd2Sjpk 		retv = EACCES;
511845916cd2Sjpk 	}
511945916cd2Sjpk 
512045916cd2Sjpk 	if (tsl != NULL)
512145916cd2Sjpk 		label_rele(tsl);
512245916cd2Sjpk 
512345916cd2Sjpk rel_tpc:
512445916cd2Sjpk 	TPC_RELE(tp);
512545916cd2Sjpk out:
512645916cd2Sjpk 	if (mntzone)
512745916cd2Sjpk 		zone_rele(mntzone);
512845916cd2Sjpk 	label_rele(zlabel);
512945916cd2Sjpk 	return (retv);
513045916cd2Sjpk }
51319acbbeafSnn 
51329acbbeafSnn boolean_t
51339acbbeafSnn nfs_has_ctty(void)
51349acbbeafSnn {
51359acbbeafSnn 	boolean_t rv;
51369acbbeafSnn 	mutex_enter(&curproc->p_splock);
51379acbbeafSnn 	rv = (curproc->p_sessp->s_vp != NULL);
51389acbbeafSnn 	mutex_exit(&curproc->p_splock);
51399acbbeafSnn 	return (rv);
51409acbbeafSnn }
514103986916Sjarrett 
514293aeed83Smarks /*
514393aeed83Smarks  * See if xattr directory to see if it has any generic user attributes
514493aeed83Smarks  */
514593aeed83Smarks int
514693aeed83Smarks do_xattr_exists_check(vnode_t *vp, ulong_t *valp, cred_t *cr)
514793aeed83Smarks {
514893aeed83Smarks 	struct uio uio;
514993aeed83Smarks 	struct iovec iov;
515093aeed83Smarks 	char *dbuf;
515193aeed83Smarks 	struct dirent64 *dp;
515293aeed83Smarks 	size_t dlen = 8 * 1024;
515393aeed83Smarks 	size_t dbuflen;
515493aeed83Smarks 	int eof = 0;
515593aeed83Smarks 	int error;
515693aeed83Smarks 
515793aeed83Smarks 	*valp = 0;
515893aeed83Smarks 	dbuf = kmem_alloc(dlen, KM_SLEEP);
515993aeed83Smarks 	uio.uio_iov = &iov;
516093aeed83Smarks 	uio.uio_iovcnt = 1;
516193aeed83Smarks 	uio.uio_segflg = UIO_SYSSPACE;
516293aeed83Smarks 	uio.uio_fmode = 0;
516393aeed83Smarks 	uio.uio_extflg = UIO_COPY_CACHED;
516493aeed83Smarks 	uio.uio_loffset = 0;
516593aeed83Smarks 	uio.uio_resid = dlen;
516693aeed83Smarks 	iov.iov_base = dbuf;
516793aeed83Smarks 	iov.iov_len = dlen;
516893aeed83Smarks 	(void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL);
516993aeed83Smarks 	error = VOP_READDIR(vp, &uio, cr, &eof, NULL, 0);
517093aeed83Smarks 	VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL);
517193aeed83Smarks 
517293aeed83Smarks 	dbuflen = dlen - uio.uio_resid;
517393aeed83Smarks 
517493aeed83Smarks 	if (error || dbuflen == 0) {
517593aeed83Smarks 		kmem_free(dbuf, dlen);
517693aeed83Smarks 		return (error);
517793aeed83Smarks 	}
517893aeed83Smarks 
517993aeed83Smarks 	dp = (dirent64_t *)dbuf;
518093aeed83Smarks 
518193aeed83Smarks 	while ((intptr_t)dp < (intptr_t)dbuf + dbuflen) {
518293aeed83Smarks 		if (strcmp(dp->d_name, ".") == 0 ||
518393aeed83Smarks 		    strcmp(dp->d_name, "..") == 0 || strcmp(dp->d_name,
518493aeed83Smarks 		    VIEW_READWRITE) == 0 || strcmp(dp->d_name,
518593aeed83Smarks 		    VIEW_READONLY) == 0) {
518693aeed83Smarks 			dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen);
518793aeed83Smarks 			continue;
518893aeed83Smarks 		}
518993aeed83Smarks 
519093aeed83Smarks 		*valp = 1;
519193aeed83Smarks 		break;
519293aeed83Smarks 	}
519393aeed83Smarks 	kmem_free(dbuf, dlen);
519493aeed83Smarks 	return (0);
519593aeed83Smarks }
5196