xref: /illumos-gate/usr/src/uts/common/fs/nfs/nfs_subr.c (revision e010bda9)
17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate  * CDDL HEADER START
37c478bd9Sstevel@tonic-gate  *
47c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
545916cd2Sjpk  * Common Development and Distribution License (the "License").
645916cd2Sjpk  * You may not use this file except in compliance with the License.
77c478bd9Sstevel@tonic-gate  *
87c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
107c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
117c478bd9Sstevel@tonic-gate  * and limitations under the License.
127c478bd9Sstevel@tonic-gate  *
137c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
147c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
167c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
177c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bd9Sstevel@tonic-gate  *
197c478bd9Sstevel@tonic-gate  * CDDL HEADER END
207c478bd9Sstevel@tonic-gate  */
217c478bd9Sstevel@tonic-gate /*
22f8bbc571SPavel Filipensky  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
237c478bd9Sstevel@tonic-gate  * Use is subject to license terms.
247c478bd9Sstevel@tonic-gate  */
257c478bd9Sstevel@tonic-gate 
26f5654033SAlexander Eremin /*
27f5654033SAlexander Eremin  * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
28ade42b55SSebastien Roy  * Copyright (c) 2016, 2017 by Delphix. All rights reserved.
29f5654033SAlexander Eremin  */
30f5654033SAlexander Eremin 
317c478bd9Sstevel@tonic-gate #include <sys/param.h>
327c478bd9Sstevel@tonic-gate #include <sys/types.h>
337c478bd9Sstevel@tonic-gate #include <sys/systm.h>
3467dbe2beSCasper H.S. Dik #include <sys/cred.h>
357c478bd9Sstevel@tonic-gate #include <sys/proc.h>
367c478bd9Sstevel@tonic-gate #include <sys/user.h>
377c478bd9Sstevel@tonic-gate #include <sys/time.h>
387c478bd9Sstevel@tonic-gate #include <sys/buf.h>
397c478bd9Sstevel@tonic-gate #include <sys/vfs.h>
407c478bd9Sstevel@tonic-gate #include <sys/vnode.h>
417c478bd9Sstevel@tonic-gate #include <sys/socket.h>
427c478bd9Sstevel@tonic-gate #include <sys/uio.h>
437c478bd9Sstevel@tonic-gate #include <sys/tiuser.h>
447c478bd9Sstevel@tonic-gate #include <sys/swap.h>
457c478bd9Sstevel@tonic-gate #include <sys/errno.h>
467c478bd9Sstevel@tonic-gate #include <sys/debug.h>
477c478bd9Sstevel@tonic-gate #include <sys/kmem.h>
487c478bd9Sstevel@tonic-gate #include <sys/kstat.h>
497c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h>
507c478bd9Sstevel@tonic-gate #include <sys/vtrace.h>
517c478bd9Sstevel@tonic-gate #include <sys/session.h>
527c478bd9Sstevel@tonic-gate #include <sys/dnlc.h>
537c478bd9Sstevel@tonic-gate #include <sys/bitmap.h>
547c478bd9Sstevel@tonic-gate #include <sys/acl.h>
557c478bd9Sstevel@tonic-gate #include <sys/ddi.h>
567c478bd9Sstevel@tonic-gate #include <sys/pathname.h>
577c478bd9Sstevel@tonic-gate #include <sys/flock.h>
587c478bd9Sstevel@tonic-gate #include <sys/dirent.h>
597c478bd9Sstevel@tonic-gate #include <sys/flock.h>
607c478bd9Sstevel@tonic-gate #include <sys/callb.h>
617c478bd9Sstevel@tonic-gate #include <sys/atomic.h>
627c478bd9Sstevel@tonic-gate #include <sys/list.h>
6345916cd2Sjpk #include <sys/tsol/tnet.h>
6445916cd2Sjpk #include <sys/priv.h>
6503986916Sjarrett #include <sys/sdt.h>
6693aeed83Smarks #include <sys/attr.h>
6745916cd2Sjpk 
6845916cd2Sjpk #include <inet/ip6.h>
697c478bd9Sstevel@tonic-gate 
707c478bd9Sstevel@tonic-gate #include <rpc/types.h>
717c478bd9Sstevel@tonic-gate #include <rpc/xdr.h>
727c478bd9Sstevel@tonic-gate #include <rpc/auth.h>
737c478bd9Sstevel@tonic-gate #include <rpc/clnt.h>
747c478bd9Sstevel@tonic-gate 
757c478bd9Sstevel@tonic-gate #include <nfs/nfs.h>
767c478bd9Sstevel@tonic-gate #include <nfs/nfs4.h>
777c478bd9Sstevel@tonic-gate #include <nfs/nfs_clnt.h>
787c478bd9Sstevel@tonic-gate #include <nfs/rnode.h>
797c478bd9Sstevel@tonic-gate #include <nfs/nfs_acl.h>
807c478bd9Sstevel@tonic-gate 
8103986916Sjarrett #include <sys/tsol/label.h>
8203986916Sjarrett 
837c478bd9Sstevel@tonic-gate /*
847c478bd9Sstevel@tonic-gate  * The hash queues for the access to active and cached rnodes
857c478bd9Sstevel@tonic-gate  * are organized as doubly linked lists.  A reader/writer lock
867c478bd9Sstevel@tonic-gate  * for each hash bucket is used to control access and to synchronize
877c478bd9Sstevel@tonic-gate  * lookups, additions, and deletions from the hash queue.
887c478bd9Sstevel@tonic-gate  *
897c478bd9Sstevel@tonic-gate  * The rnode freelist is organized as a doubly linked list with
907c478bd9Sstevel@tonic-gate  * a head pointer.  Additions and deletions are synchronized via
917c478bd9Sstevel@tonic-gate  * a single mutex.
927c478bd9Sstevel@tonic-gate  *
937c478bd9Sstevel@tonic-gate  * In order to add an rnode to the free list, it must be hashed into
947c478bd9Sstevel@tonic-gate  * a hash queue and the exclusive lock to the hash queue be held.
957c478bd9Sstevel@tonic-gate  * If an rnode is not hashed into a hash queue, then it is destroyed
967c478bd9Sstevel@tonic-gate  * because it represents no valuable information that can be reused
977c478bd9Sstevel@tonic-gate  * about the file.  The exclusive lock to the hash queue must be
987c478bd9Sstevel@tonic-gate  * held in order to prevent a lookup in the hash queue from finding
997c478bd9Sstevel@tonic-gate  * the rnode and using it and assuming that the rnode is not on the
1007c478bd9Sstevel@tonic-gate  * freelist.  The lookup in the hash queue will have the hash queue
1017c478bd9Sstevel@tonic-gate  * locked, either exclusive or shared.
1027c478bd9Sstevel@tonic-gate  *
1037c478bd9Sstevel@tonic-gate  * The vnode reference count for each rnode is not allowed to drop
1047c478bd9Sstevel@tonic-gate  * below 1.  This prevents external entities, such as the VM
1057c478bd9Sstevel@tonic-gate  * subsystem, from acquiring references to vnodes already on the
1067c478bd9Sstevel@tonic-gate  * freelist and then trying to place them back on the freelist
1077c478bd9Sstevel@tonic-gate  * when their reference is released.  This means that the when an
1087c478bd9Sstevel@tonic-gate  * rnode is looked up in the hash queues, then either the rnode
109da6c28aaSamw  * is removed from the freelist and that reference is transferred to
1107c478bd9Sstevel@tonic-gate  * the new reference or the vnode reference count must be incremented
1117c478bd9Sstevel@tonic-gate  * accordingly.  The mutex for the freelist must be held in order to
1127c478bd9Sstevel@tonic-gate  * accurately test to see if the rnode is on the freelist or not.
1137c478bd9Sstevel@tonic-gate  * The hash queue lock might be held shared and it is possible that
1147c478bd9Sstevel@tonic-gate  * two different threads may race to remove the rnode from the
1157c478bd9Sstevel@tonic-gate  * freelist.  This race can be resolved by holding the mutex for the
1167c478bd9Sstevel@tonic-gate  * freelist.  Please note that the mutex for the freelist does not
1177c478bd9Sstevel@tonic-gate  * need to held if the rnode is not on the freelist.  It can not be
1187c478bd9Sstevel@tonic-gate  * placed on the freelist due to the requirement that the thread
1197c478bd9Sstevel@tonic-gate  * putting the rnode on the freelist must hold the exclusive lock
1207c478bd9Sstevel@tonic-gate  * to the hash queue and the thread doing the lookup in the hash
1217c478bd9Sstevel@tonic-gate  * queue is holding either a shared or exclusive lock to the hash
1227c478bd9Sstevel@tonic-gate  * queue.
1237c478bd9Sstevel@tonic-gate  *
1247c478bd9Sstevel@tonic-gate  * The lock ordering is:
1257c478bd9Sstevel@tonic-gate  *
1267c478bd9Sstevel@tonic-gate  *	hash bucket lock -> vnode lock
1277c478bd9Sstevel@tonic-gate  *	hash bucket lock -> freelist lock
1287c478bd9Sstevel@tonic-gate  */
1297c478bd9Sstevel@tonic-gate static rhashq_t *rtable;
1307c478bd9Sstevel@tonic-gate 
1317c478bd9Sstevel@tonic-gate static kmutex_t rpfreelist_lock;
1327c478bd9Sstevel@tonic-gate static rnode_t *rpfreelist = NULL;
1337c478bd9Sstevel@tonic-gate static long rnew = 0;
1347c478bd9Sstevel@tonic-gate long nrnode = 0;
1357c478bd9Sstevel@tonic-gate 
1367c478bd9Sstevel@tonic-gate static int rtablesize;
1377c478bd9Sstevel@tonic-gate static int rtablemask;
1387c478bd9Sstevel@tonic-gate 
1397c478bd9Sstevel@tonic-gate static int hashlen = 4;
1407c478bd9Sstevel@tonic-gate 
1417c478bd9Sstevel@tonic-gate static struct kmem_cache *rnode_cache;
1427c478bd9Sstevel@tonic-gate 
1437c478bd9Sstevel@tonic-gate /*
1447c478bd9Sstevel@tonic-gate  * Mutex to protect the following variables:
1457c478bd9Sstevel@tonic-gate  *	nfs_major
1467c478bd9Sstevel@tonic-gate  *	nfs_minor
1477c478bd9Sstevel@tonic-gate  */
1487c478bd9Sstevel@tonic-gate kmutex_t nfs_minor_lock;
1497c478bd9Sstevel@tonic-gate int nfs_major;
1507c478bd9Sstevel@tonic-gate int nfs_minor;
1517c478bd9Sstevel@tonic-gate 
1527c478bd9Sstevel@tonic-gate /* Do we allow preepoch (negative) time values otw? */
1537c478bd9Sstevel@tonic-gate bool_t nfs_allow_preepoch_time = FALSE;	/* default: do not allow preepoch */
1547c478bd9Sstevel@tonic-gate 
1557c478bd9Sstevel@tonic-gate /*
1567c478bd9Sstevel@tonic-gate  * Access cache
1577c478bd9Sstevel@tonic-gate  */
1587c478bd9Sstevel@tonic-gate static acache_hash_t *acache;
1597c478bd9Sstevel@tonic-gate static long nacache;	/* used strictly to size the number of hash queues */
1607c478bd9Sstevel@tonic-gate 
1617c478bd9Sstevel@tonic-gate static int acachesize;
1627c478bd9Sstevel@tonic-gate static int acachemask;
1637c478bd9Sstevel@tonic-gate static struct kmem_cache *acache_cache;
1647c478bd9Sstevel@tonic-gate 
1657c478bd9Sstevel@tonic-gate /*
1667c478bd9Sstevel@tonic-gate  * Client side utilities
1677c478bd9Sstevel@tonic-gate  */
1687c478bd9Sstevel@tonic-gate 
1697c478bd9Sstevel@tonic-gate /*
1707c478bd9Sstevel@tonic-gate  * client side statistics
1717c478bd9Sstevel@tonic-gate  */
1727c478bd9Sstevel@tonic-gate static const struct clstat clstat_tmpl = {
1737c478bd9Sstevel@tonic-gate 	{ "calls",	KSTAT_DATA_UINT64 },
1747c478bd9Sstevel@tonic-gate 	{ "badcalls",	KSTAT_DATA_UINT64 },
1757c478bd9Sstevel@tonic-gate 	{ "clgets",	KSTAT_DATA_UINT64 },
1767c478bd9Sstevel@tonic-gate 	{ "cltoomany",	KSTAT_DATA_UINT64 },
1777c478bd9Sstevel@tonic-gate #ifdef DEBUG
1787c478bd9Sstevel@tonic-gate 	{ "clalloc",	KSTAT_DATA_UINT64 },
1797c478bd9Sstevel@tonic-gate 	{ "noresponse",	KSTAT_DATA_UINT64 },
1807c478bd9Sstevel@tonic-gate 	{ "failover",	KSTAT_DATA_UINT64 },
1817c478bd9Sstevel@tonic-gate 	{ "remap",	KSTAT_DATA_UINT64 },
1827c478bd9Sstevel@tonic-gate #endif
1837c478bd9Sstevel@tonic-gate };
1847c478bd9Sstevel@tonic-gate 
1857c478bd9Sstevel@tonic-gate /*
1867c478bd9Sstevel@tonic-gate  * The following are statistics that describe behavior of the system as a whole
1877c478bd9Sstevel@tonic-gate  * and doesn't correspond to any one particular zone.
1887c478bd9Sstevel@tonic-gate  */
1897c478bd9Sstevel@tonic-gate #ifdef DEBUG
1907c478bd9Sstevel@tonic-gate static struct clstat_debug {
1917c478bd9Sstevel@tonic-gate 	kstat_named_t	nrnode;			/* number of allocated rnodes */
1927c478bd9Sstevel@tonic-gate 	kstat_named_t	access;			/* size of access cache */
1937c478bd9Sstevel@tonic-gate 	kstat_named_t	dirent;			/* size of readdir cache */
1947c478bd9Sstevel@tonic-gate 	kstat_named_t	dirents;		/* size of readdir buf cache */
1957c478bd9Sstevel@tonic-gate 	kstat_named_t	reclaim;		/* number of reclaims */
1967c478bd9Sstevel@tonic-gate 	kstat_named_t	clreclaim;		/* number of cl reclaims */
1977c478bd9Sstevel@tonic-gate 	kstat_named_t	f_reclaim;		/* number of free reclaims */
1987c478bd9Sstevel@tonic-gate 	kstat_named_t	a_reclaim;		/* number of active reclaims */
1997c478bd9Sstevel@tonic-gate 	kstat_named_t	r_reclaim;		/* number of rnode reclaims */
2007c478bd9Sstevel@tonic-gate 	kstat_named_t	rpath;			/* bytes used to store rpaths */
2017c478bd9Sstevel@tonic-gate } clstat_debug = {
2027c478bd9Sstevel@tonic-gate 	{ "nrnode",	KSTAT_DATA_UINT64 },
2037c478bd9Sstevel@tonic-gate 	{ "access",	KSTAT_DATA_UINT64 },
2047c478bd9Sstevel@tonic-gate 	{ "dirent",	KSTAT_DATA_UINT64 },
2057c478bd9Sstevel@tonic-gate 	{ "dirents",	KSTAT_DATA_UINT64 },
2067c478bd9Sstevel@tonic-gate 	{ "reclaim",	KSTAT_DATA_UINT64 },
2077c478bd9Sstevel@tonic-gate 	{ "clreclaim",	KSTAT_DATA_UINT64 },
2087c478bd9Sstevel@tonic-gate 	{ "f_reclaim",	KSTAT_DATA_UINT64 },
2097c478bd9Sstevel@tonic-gate 	{ "a_reclaim",	KSTAT_DATA_UINT64 },
2107c478bd9Sstevel@tonic-gate 	{ "r_reclaim",	KSTAT_DATA_UINT64 },
2117c478bd9Sstevel@tonic-gate 	{ "r_path",	KSTAT_DATA_UINT64 },
2127c478bd9Sstevel@tonic-gate };
2137c478bd9Sstevel@tonic-gate #endif	/* DEBUG */
2147c478bd9Sstevel@tonic-gate 
2157c478bd9Sstevel@tonic-gate /*
2167c478bd9Sstevel@tonic-gate  * We keep a global list of per-zone client data, so we can clean up all zones
2177c478bd9Sstevel@tonic-gate  * if we get low on memory.
2187c478bd9Sstevel@tonic-gate  */
2197c478bd9Sstevel@tonic-gate static list_t nfs_clnt_list;
2207c478bd9Sstevel@tonic-gate static kmutex_t nfs_clnt_list_lock;
2217c478bd9Sstevel@tonic-gate static zone_key_t nfsclnt_zone_key;
2227c478bd9Sstevel@tonic-gate 
2237c478bd9Sstevel@tonic-gate static struct kmem_cache *chtab_cache;
2247c478bd9Sstevel@tonic-gate 
2257c478bd9Sstevel@tonic-gate /*
2267c478bd9Sstevel@tonic-gate  * Some servers do not properly update the attributes of the
2277c478bd9Sstevel@tonic-gate  * directory when changes are made.  To allow interoperability
2287c478bd9Sstevel@tonic-gate  * with these broken servers, the nfs_disable_rddir_cache
2297c478bd9Sstevel@tonic-gate  * parameter must be set in /etc/system
2307c478bd9Sstevel@tonic-gate  */
2317c478bd9Sstevel@tonic-gate int nfs_disable_rddir_cache = 0;
2327c478bd9Sstevel@tonic-gate 
2337c478bd9Sstevel@tonic-gate int		clget(clinfo_t *, servinfo_t *, cred_t *, CLIENT **,
2347c478bd9Sstevel@tonic-gate 		    struct chtab **);
2357c478bd9Sstevel@tonic-gate void		clfree(CLIENT *, struct chtab *);
2367c478bd9Sstevel@tonic-gate static int	acl_clget(mntinfo_t *, servinfo_t *, cred_t *, CLIENT **,
2377c478bd9Sstevel@tonic-gate 		    struct chtab **, struct nfs_clnt *);
2387c478bd9Sstevel@tonic-gate static int	nfs_clget(mntinfo_t *, servinfo_t *, cred_t *, CLIENT **,
2397c478bd9Sstevel@tonic-gate 		    struct chtab **, struct nfs_clnt *);
2407c478bd9Sstevel@tonic-gate static void	clreclaim(void *);
2417c478bd9Sstevel@tonic-gate static int	nfs_feedback(int, int, mntinfo_t *);
2427c478bd9Sstevel@tonic-gate static int	rfscall(mntinfo_t *, rpcproc_t, xdrproc_t, caddr_t, xdrproc_t,
2437c478bd9Sstevel@tonic-gate 		    caddr_t, cred_t *, int *, enum clnt_stat *, int,
2447c478bd9Sstevel@tonic-gate 		    failinfo_t *);
2457c478bd9Sstevel@tonic-gate static int	aclcall(mntinfo_t *, rpcproc_t, xdrproc_t, caddr_t, xdrproc_t,
2467c478bd9Sstevel@tonic-gate 		    caddr_t, cred_t *, int *, int, failinfo_t *);
2477c478bd9Sstevel@tonic-gate static void	rinactive(rnode_t *, cred_t *);
2487c478bd9Sstevel@tonic-gate static int	rtablehash(nfs_fhandle *);
2497c478bd9Sstevel@tonic-gate static vnode_t	*make_rnode(nfs_fhandle *, rhashq_t *, struct vfs *,
2507c478bd9Sstevel@tonic-gate 		    struct vnodeops *,
2517c478bd9Sstevel@tonic-gate 		    int (*)(vnode_t *, page_t *, u_offset_t *, size_t *, int,
2527c478bd9Sstevel@tonic-gate 			cred_t *),
2537c478bd9Sstevel@tonic-gate 		    int (*)(const void *, const void *), int *, cred_t *,
2547c478bd9Sstevel@tonic-gate 		    char *, char *);
2557c478bd9Sstevel@tonic-gate static void	rp_rmfree(rnode_t *);
2567c478bd9Sstevel@tonic-gate static void	rp_addhash(rnode_t *);
2577c478bd9Sstevel@tonic-gate static void	rp_rmhash_locked(rnode_t *);
2587c478bd9Sstevel@tonic-gate static rnode_t	*rfind(rhashq_t *, nfs_fhandle *, struct vfs *);
2597c478bd9Sstevel@tonic-gate static void	destroy_rnode(rnode_t *);
2607c478bd9Sstevel@tonic-gate static void	rddir_cache_free(rddir_cache *);
2617c478bd9Sstevel@tonic-gate static int	nfs_free_data_reclaim(rnode_t *);
2627c478bd9Sstevel@tonic-gate static int	nfs_active_data_reclaim(rnode_t *);
2637c478bd9Sstevel@tonic-gate static int	nfs_free_reclaim(void);
2647c478bd9Sstevel@tonic-gate static int	nfs_active_reclaim(void);
2657c478bd9Sstevel@tonic-gate static int	nfs_rnode_reclaim(void);
2667c478bd9Sstevel@tonic-gate static void	nfs_reclaim(void *);
2677c478bd9Sstevel@tonic-gate static int	failover_safe(failinfo_t *);
2687c478bd9Sstevel@tonic-gate static void	failover_newserver(mntinfo_t *mi);
2697c478bd9Sstevel@tonic-gate static void	failover_thread(mntinfo_t *mi);
2707c478bd9Sstevel@tonic-gate static int	failover_wait(mntinfo_t *);
2717c478bd9Sstevel@tonic-gate static int	failover_remap(failinfo_t *);
2727c478bd9Sstevel@tonic-gate static int	failover_lookup(char *, vnode_t *,
2737c478bd9Sstevel@tonic-gate 		    int (*)(vnode_t *, char *, vnode_t **,
2747c478bd9Sstevel@tonic-gate 			struct pathname *, int, vnode_t *, cred_t *, int),
2757c478bd9Sstevel@tonic-gate 		    int (*)(vnode_t *, vnode_t **, bool_t, cred_t *, int),
2767c478bd9Sstevel@tonic-gate 		    vnode_t **);
2777c478bd9Sstevel@tonic-gate static void	nfs_free_r_path(rnode_t *);
2787c478bd9Sstevel@tonic-gate static void	nfs_set_vroot(vnode_t *);
2797c478bd9Sstevel@tonic-gate static char	*nfs_getsrvnames(mntinfo_t *, size_t *);
2807c478bd9Sstevel@tonic-gate 
2817c478bd9Sstevel@tonic-gate /*
2827c478bd9Sstevel@tonic-gate  * from rpcsec module (common/rpcsec)
2837c478bd9Sstevel@tonic-gate  */
2847c478bd9Sstevel@tonic-gate extern int sec_clnt_geth(CLIENT *, struct sec_data *, cred_t *, AUTH **);
2857c478bd9Sstevel@tonic-gate extern void sec_clnt_freeh(AUTH *);
2867c478bd9Sstevel@tonic-gate extern void sec_clnt_freeinfo(struct sec_data *);
2877c478bd9Sstevel@tonic-gate 
28845916cd2Sjpk /*
28945916cd2Sjpk  * used in mount policy
29045916cd2Sjpk  */
29145916cd2Sjpk extern ts_label_t *getflabel_cipso(vfs_t *);
29245916cd2Sjpk 
2937c478bd9Sstevel@tonic-gate /*
2947c478bd9Sstevel@tonic-gate  * EIO or EINTR are not recoverable errors.
2957c478bd9Sstevel@tonic-gate  */
2967c478bd9Sstevel@tonic-gate #define	IS_RECOVERABLE_ERROR(error)	!((error == EINTR) || (error == EIO))
2977c478bd9Sstevel@tonic-gate 
298e280ed37SDai Ngo #ifdef DEBUG
299e280ed37SDai Ngo #define	SRV_QFULL_MSG	"send queue to NFS%d server %s is full; still trying\n"
300e280ed37SDai Ngo #define	SRV_NOTRESP_MSG	"NFS%d server %s not responding still trying\n"
301e280ed37SDai Ngo #else
302e280ed37SDai Ngo #define	SRV_QFULL_MSG	"send queue to NFS server %s is full still trying\n"
303e280ed37SDai Ngo #define	SRV_NOTRESP_MSG	"NFS server %s not responding still trying\n"
304e280ed37SDai Ngo #endif
3057c478bd9Sstevel@tonic-gate /*
3067c478bd9Sstevel@tonic-gate  * Common handle get program for NFS, NFS ACL, and NFS AUTH client.
3077c478bd9Sstevel@tonic-gate  */
3087c478bd9Sstevel@tonic-gate static int
clget_impl(clinfo_t * ci,servinfo_t * svp,cred_t * cr,CLIENT ** newcl,struct chtab ** chp,struct nfs_clnt * nfscl)3097c478bd9Sstevel@tonic-gate clget_impl(clinfo_t *ci, servinfo_t *svp, cred_t *cr, CLIENT **newcl,
3107c478bd9Sstevel@tonic-gate     struct chtab **chp, struct nfs_clnt *nfscl)
3117c478bd9Sstevel@tonic-gate {
3127c478bd9Sstevel@tonic-gate 	struct chhead *ch, *newch;
3137c478bd9Sstevel@tonic-gate 	struct chhead **plistp;
3147c478bd9Sstevel@tonic-gate 	struct chtab *cp;
3157c478bd9Sstevel@tonic-gate 	int error;
3167c478bd9Sstevel@tonic-gate 	k_sigset_t smask;
3177c478bd9Sstevel@tonic-gate 
3187c478bd9Sstevel@tonic-gate 	if (newcl == NULL || chp == NULL || ci == NULL)
3197c478bd9Sstevel@tonic-gate 		return (EINVAL);
3207c478bd9Sstevel@tonic-gate 
3217c478bd9Sstevel@tonic-gate 	*newcl = NULL;
3227c478bd9Sstevel@tonic-gate 	*chp = NULL;
3237c478bd9Sstevel@tonic-gate 
3247c478bd9Sstevel@tonic-gate 	/*
3257c478bd9Sstevel@tonic-gate 	 * Find an unused handle or create one
3267c478bd9Sstevel@tonic-gate 	 */
3277c478bd9Sstevel@tonic-gate 	newch = NULL;
3287c478bd9Sstevel@tonic-gate 	nfscl->nfscl_stat.clgets.value.ui64++;
3297c478bd9Sstevel@tonic-gate top:
3307c478bd9Sstevel@tonic-gate 	/*
3317c478bd9Sstevel@tonic-gate 	 * Find the correct entry in the cache to check for free
3327c478bd9Sstevel@tonic-gate 	 * client handles.  The search is based on the RPC program
3337c478bd9Sstevel@tonic-gate 	 * number, program version number, dev_t for the transport
3347c478bd9Sstevel@tonic-gate 	 * device, and the protocol family.
3357c478bd9Sstevel@tonic-gate 	 */
3367c478bd9Sstevel@tonic-gate 	mutex_enter(&nfscl->nfscl_chtable_lock);
3377c478bd9Sstevel@tonic-gate 	plistp = &nfscl->nfscl_chtable;
3387c478bd9Sstevel@tonic-gate 	for (ch = nfscl->nfscl_chtable; ch != NULL; ch = ch->ch_next) {
3397c478bd9Sstevel@tonic-gate 		if (ch->ch_prog == ci->cl_prog &&
3407c478bd9Sstevel@tonic-gate 		    ch->ch_vers == ci->cl_vers &&
3417c478bd9Sstevel@tonic-gate 		    ch->ch_dev == svp->sv_knconf->knc_rdev &&
3427c478bd9Sstevel@tonic-gate 		    (strcmp(ch->ch_protofmly,
3437106075aSmarks 		    svp->sv_knconf->knc_protofmly) == 0))
3447c478bd9Sstevel@tonic-gate 			break;
3457c478bd9Sstevel@tonic-gate 		plistp = &ch->ch_next;
3467c478bd9Sstevel@tonic-gate 	}
3477c478bd9Sstevel@tonic-gate 
3487c478bd9Sstevel@tonic-gate 	/*
3497c478bd9Sstevel@tonic-gate 	 * If we didn't find a cache entry for this quadruple, then
3507c478bd9Sstevel@tonic-gate 	 * create one.  If we don't have one already preallocated,
3517c478bd9Sstevel@tonic-gate 	 * then drop the cache lock, create one, and then start over.
3527c478bd9Sstevel@tonic-gate 	 * If we did have a preallocated entry, then just add it to
3537c478bd9Sstevel@tonic-gate 	 * the front of the list.
3547c478bd9Sstevel@tonic-gate 	 */
3557c478bd9Sstevel@tonic-gate 	if (ch == NULL) {
3567c478bd9Sstevel@tonic-gate 		if (newch == NULL) {
3577c478bd9Sstevel@tonic-gate 			mutex_exit(&nfscl->nfscl_chtable_lock);
3587c478bd9Sstevel@tonic-gate 			newch = kmem_alloc(sizeof (*newch), KM_SLEEP);
3597c478bd9Sstevel@tonic-gate 			newch->ch_timesused = 0;
3607c478bd9Sstevel@tonic-gate 			newch->ch_prog = ci->cl_prog;
3617c478bd9Sstevel@tonic-gate 			newch->ch_vers = ci->cl_vers;
3627c478bd9Sstevel@tonic-gate 			newch->ch_dev = svp->sv_knconf->knc_rdev;
3637c478bd9Sstevel@tonic-gate 			newch->ch_protofmly = kmem_alloc(
3647c478bd9Sstevel@tonic-gate 			    strlen(svp->sv_knconf->knc_protofmly) + 1,
3657c478bd9Sstevel@tonic-gate 			    KM_SLEEP);
3667c478bd9Sstevel@tonic-gate 			(void) strcpy(newch->ch_protofmly,
3677c478bd9Sstevel@tonic-gate 			    svp->sv_knconf->knc_protofmly);
3687c478bd9Sstevel@tonic-gate 			newch->ch_list = NULL;
3697c478bd9Sstevel@tonic-gate 			goto top;
3707c478bd9Sstevel@tonic-gate 		}
3717c478bd9Sstevel@tonic-gate 		ch = newch;
3727c478bd9Sstevel@tonic-gate 		newch = NULL;
3737c478bd9Sstevel@tonic-gate 		ch->ch_next = nfscl->nfscl_chtable;
3747c478bd9Sstevel@tonic-gate 		nfscl->nfscl_chtable = ch;
3757c478bd9Sstevel@tonic-gate 	/*
3767c478bd9Sstevel@tonic-gate 	 * We found a cache entry, but if it isn't on the front of the
3777c478bd9Sstevel@tonic-gate 	 * list, then move it to the front of the list to try to take
3787c478bd9Sstevel@tonic-gate 	 * advantage of locality of operations.
3797c478bd9Sstevel@tonic-gate 	 */
3807c478bd9Sstevel@tonic-gate 	} else if (ch != nfscl->nfscl_chtable) {
3817c478bd9Sstevel@tonic-gate 		*plistp = ch->ch_next;
3827c478bd9Sstevel@tonic-gate 		ch->ch_next = nfscl->nfscl_chtable;
3837c478bd9Sstevel@tonic-gate 		nfscl->nfscl_chtable = ch;
3847c478bd9Sstevel@tonic-gate 	}
3857c478bd9Sstevel@tonic-gate 
3867c478bd9Sstevel@tonic-gate 	/*
3877c478bd9Sstevel@tonic-gate 	 * If there was a free client handle cached, then remove it
3887c478bd9Sstevel@tonic-gate 	 * from the list, init it, and use it.
3897c478bd9Sstevel@tonic-gate 	 */
3907c478bd9Sstevel@tonic-gate 	if (ch->ch_list != NULL) {
3917c478bd9Sstevel@tonic-gate 		cp = ch->ch_list;
3927c478bd9Sstevel@tonic-gate 		ch->ch_list = cp->ch_list;
3937c478bd9Sstevel@tonic-gate 		mutex_exit(&nfscl->nfscl_chtable_lock);
3947c478bd9Sstevel@tonic-gate 		if (newch != NULL) {
3957c478bd9Sstevel@tonic-gate 			kmem_free(newch->ch_protofmly,
3967c478bd9Sstevel@tonic-gate 			    strlen(newch->ch_protofmly) + 1);
3977c478bd9Sstevel@tonic-gate 			kmem_free(newch, sizeof (*newch));
3987c478bd9Sstevel@tonic-gate 		}
3997c478bd9Sstevel@tonic-gate 		(void) clnt_tli_kinit(cp->ch_client, svp->sv_knconf,
4007c478bd9Sstevel@tonic-gate 		    &svp->sv_addr, ci->cl_readsize, ci->cl_retrans, cr);
4017c478bd9Sstevel@tonic-gate 		error = sec_clnt_geth(cp->ch_client, svp->sv_secdata, cr,
4027c478bd9Sstevel@tonic-gate 		    &cp->ch_client->cl_auth);
4037c478bd9Sstevel@tonic-gate 		if (error || cp->ch_client->cl_auth == NULL) {
4047c478bd9Sstevel@tonic-gate 			CLNT_DESTROY(cp->ch_client);
4057c478bd9Sstevel@tonic-gate 			kmem_cache_free(chtab_cache, cp);
4067c478bd9Sstevel@tonic-gate 			return ((error != 0) ? error : EINTR);
4077c478bd9Sstevel@tonic-gate 		}
4087c478bd9Sstevel@tonic-gate 		ch->ch_timesused++;
4097c478bd9Sstevel@tonic-gate 		*newcl = cp->ch_client;
4107c478bd9Sstevel@tonic-gate 		*chp = cp;
4117c478bd9Sstevel@tonic-gate 		return (0);
4127c478bd9Sstevel@tonic-gate 	}
4137c478bd9Sstevel@tonic-gate 
4147c478bd9Sstevel@tonic-gate 	/*
4157c478bd9Sstevel@tonic-gate 	 * There weren't any free client handles which fit, so allocate
4167c478bd9Sstevel@tonic-gate 	 * a new one and use that.
4177c478bd9Sstevel@tonic-gate 	 */
4187c478bd9Sstevel@tonic-gate #ifdef DEBUG
4191a5e258fSJosef 'Jeff' Sipek 	atomic_inc_64(&nfscl->nfscl_stat.clalloc.value.ui64);
4207c478bd9Sstevel@tonic-gate #endif
4217c478bd9Sstevel@tonic-gate 	mutex_exit(&nfscl->nfscl_chtable_lock);
4227c478bd9Sstevel@tonic-gate 
4237c478bd9Sstevel@tonic-gate 	nfscl->nfscl_stat.cltoomany.value.ui64++;
4247c478bd9Sstevel@tonic-gate 	if (newch != NULL) {
4257c478bd9Sstevel@tonic-gate 		kmem_free(newch->ch_protofmly, strlen(newch->ch_protofmly) + 1);
4267c478bd9Sstevel@tonic-gate 		kmem_free(newch, sizeof (*newch));
4277c478bd9Sstevel@tonic-gate 	}
4287c478bd9Sstevel@tonic-gate 
4297c478bd9Sstevel@tonic-gate 	cp = kmem_cache_alloc(chtab_cache, KM_SLEEP);
4307c478bd9Sstevel@tonic-gate 	cp->ch_head = ch;
4317c478bd9Sstevel@tonic-gate 
4327c478bd9Sstevel@tonic-gate 	sigintr(&smask, (int)ci->cl_flags & MI_INT);
4337c478bd9Sstevel@tonic-gate 	error = clnt_tli_kcreate(svp->sv_knconf, &svp->sv_addr, ci->cl_prog,
4347c478bd9Sstevel@tonic-gate 	    ci->cl_vers, ci->cl_readsize, ci->cl_retrans, cr, &cp->ch_client);
4357c478bd9Sstevel@tonic-gate 	sigunintr(&smask);
4367c478bd9Sstevel@tonic-gate 
4377c478bd9Sstevel@tonic-gate 	if (error != 0) {
4387c478bd9Sstevel@tonic-gate 		kmem_cache_free(chtab_cache, cp);
4397c478bd9Sstevel@tonic-gate #ifdef DEBUG
4401a5e258fSJosef 'Jeff' Sipek 		atomic_dec_64(&nfscl->nfscl_stat.clalloc.value.ui64);
4417c478bd9Sstevel@tonic-gate #endif
4427c478bd9Sstevel@tonic-gate 		/*
4437c478bd9Sstevel@tonic-gate 		 * Warning is unnecessary if error is EINTR.
4447c478bd9Sstevel@tonic-gate 		 */
4457c478bd9Sstevel@tonic-gate 		if (error != EINTR) {
4467c478bd9Sstevel@tonic-gate 			nfs_cmn_err(error, CE_WARN,
4477c478bd9Sstevel@tonic-gate 			    "clget: couldn't create handle: %m\n");
4487c478bd9Sstevel@tonic-gate 		}
4497c478bd9Sstevel@tonic-gate 		return (error);
4507c478bd9Sstevel@tonic-gate 	}
4517c478bd9Sstevel@tonic-gate 	(void) CLNT_CONTROL(cp->ch_client, CLSET_PROGRESS, NULL);
4527c478bd9Sstevel@tonic-gate 	auth_destroy(cp->ch_client->cl_auth);
4537c478bd9Sstevel@tonic-gate 	error = sec_clnt_geth(cp->ch_client, svp->sv_secdata, cr,
4547c478bd9Sstevel@tonic-gate 	    &cp->ch_client->cl_auth);
4557c478bd9Sstevel@tonic-gate 	if (error || cp->ch_client->cl_auth == NULL) {
4567c478bd9Sstevel@tonic-gate 		CLNT_DESTROY(cp->ch_client);
4577c478bd9Sstevel@tonic-gate 		kmem_cache_free(chtab_cache, cp);
4587c478bd9Sstevel@tonic-gate #ifdef DEBUG
4591a5e258fSJosef 'Jeff' Sipek 		atomic_dec_64(&nfscl->nfscl_stat.clalloc.value.ui64);
4607c478bd9Sstevel@tonic-gate #endif
4617c478bd9Sstevel@tonic-gate 		return ((error != 0) ? error : EINTR);
4627c478bd9Sstevel@tonic-gate 	}
4637c478bd9Sstevel@tonic-gate 	ch->ch_timesused++;
4647c478bd9Sstevel@tonic-gate 	*newcl = cp->ch_client;
4657c478bd9Sstevel@tonic-gate 	ASSERT(cp->ch_client->cl_nosignal == FALSE);
4667c478bd9Sstevel@tonic-gate 	*chp = cp;
4677c478bd9Sstevel@tonic-gate 	return (0);
4687c478bd9Sstevel@tonic-gate }
4697c478bd9Sstevel@tonic-gate 
4707c478bd9Sstevel@tonic-gate int
clget(clinfo_t * ci,servinfo_t * svp,cred_t * cr,CLIENT ** newcl,struct chtab ** chp)4717c478bd9Sstevel@tonic-gate clget(clinfo_t *ci, servinfo_t *svp, cred_t *cr, CLIENT **newcl,
4727c478bd9Sstevel@tonic-gate     struct chtab **chp)
4737c478bd9Sstevel@tonic-gate {
4747c478bd9Sstevel@tonic-gate 	struct nfs_clnt *nfscl;
4757c478bd9Sstevel@tonic-gate 
476108322fbScarlsonj 	nfscl = zone_getspecific(nfsclnt_zone_key, nfs_zone());
4777c478bd9Sstevel@tonic-gate 	ASSERT(nfscl != NULL);
4787c478bd9Sstevel@tonic-gate 
4797c478bd9Sstevel@tonic-gate 	return (clget_impl(ci, svp, cr, newcl, chp, nfscl));
4807c478bd9Sstevel@tonic-gate }
4817c478bd9Sstevel@tonic-gate 
4827c478bd9Sstevel@tonic-gate static int
acl_clget(mntinfo_t * mi,servinfo_t * svp,cred_t * cr,CLIENT ** newcl,struct chtab ** chp,struct nfs_clnt * nfscl)4837c478bd9Sstevel@tonic-gate acl_clget(mntinfo_t *mi, servinfo_t *svp, cred_t *cr, CLIENT **newcl,
4847c478bd9Sstevel@tonic-gate     struct chtab **chp, struct nfs_clnt *nfscl)
4857c478bd9Sstevel@tonic-gate {
4867c478bd9Sstevel@tonic-gate 	clinfo_t ci;
4877c478bd9Sstevel@tonic-gate 	int error;
4887c478bd9Sstevel@tonic-gate 
4897c478bd9Sstevel@tonic-gate 	/*
4907c478bd9Sstevel@tonic-gate 	 * Set read buffer size to rsize
4917c478bd9Sstevel@tonic-gate 	 * and add room for RPC headers.
4927c478bd9Sstevel@tonic-gate 	 */
4937c478bd9Sstevel@tonic-gate 	ci.cl_readsize = mi->mi_tsize;
4947c478bd9Sstevel@tonic-gate 	if (ci.cl_readsize != 0)
4957c478bd9Sstevel@tonic-gate 		ci.cl_readsize += (RPC_MAXDATASIZE - NFS_MAXDATA);
4967c478bd9Sstevel@tonic-gate 
4977c478bd9Sstevel@tonic-gate 	/*
4987c478bd9Sstevel@tonic-gate 	 * If soft mount and server is down just try once.
4997c478bd9Sstevel@tonic-gate 	 * meaning: do not retransmit.
5007c478bd9Sstevel@tonic-gate 	 */
5017c478bd9Sstevel@tonic-gate 	if (!(mi->mi_flags & MI_HARD) && (mi->mi_flags & MI_DOWN))
5027c478bd9Sstevel@tonic-gate 		ci.cl_retrans = 0;
5037c478bd9Sstevel@tonic-gate 	else
5047c478bd9Sstevel@tonic-gate 		ci.cl_retrans = mi->mi_retrans;
5057c478bd9Sstevel@tonic-gate 
5067c478bd9Sstevel@tonic-gate 	ci.cl_prog = NFS_ACL_PROGRAM;
5077c478bd9Sstevel@tonic-gate 	ci.cl_vers = mi->mi_vers;
5087c478bd9Sstevel@tonic-gate 	ci.cl_flags = mi->mi_flags;
5097c478bd9Sstevel@tonic-gate 
5107c478bd9Sstevel@tonic-gate 	/*
5117c478bd9Sstevel@tonic-gate 	 * clget calls sec_clnt_geth() to get an auth handle. For RPCSEC_GSS
5127c478bd9Sstevel@tonic-gate 	 * security flavor, the client tries to establish a security context
5137c478bd9Sstevel@tonic-gate 	 * by contacting the server. If the connection is timed out or reset,
5147c478bd9Sstevel@tonic-gate 	 * e.g. server reboot, we will try again.
5157c478bd9Sstevel@tonic-gate 	 */
5167c478bd9Sstevel@tonic-gate 	do {
5177c478bd9Sstevel@tonic-gate 		error = clget_impl(&ci, svp, cr, newcl, chp, nfscl);
5187c478bd9Sstevel@tonic-gate 
5197c478bd9Sstevel@tonic-gate 		if (error == 0)
5207c478bd9Sstevel@tonic-gate 			break;
5217c478bd9Sstevel@tonic-gate 
5227c478bd9Sstevel@tonic-gate 		/*
5237c478bd9Sstevel@tonic-gate 		 * For forced unmount or zone shutdown, bail out, no retry.
5247c478bd9Sstevel@tonic-gate 		 */
5257c478bd9Sstevel@tonic-gate 		if (FS_OR_ZONE_GONE(mi->mi_vfsp)) {
5267c478bd9Sstevel@tonic-gate 			error = EIO;
5277c478bd9Sstevel@tonic-gate 			break;
5287c478bd9Sstevel@tonic-gate 		}
5297c478bd9Sstevel@tonic-gate 
5307c478bd9Sstevel@tonic-gate 		/* do not retry for softmount */
5317c478bd9Sstevel@tonic-gate 		if (!(mi->mi_flags & MI_HARD))
5327c478bd9Sstevel@tonic-gate 			break;
5337c478bd9Sstevel@tonic-gate 
5347c478bd9Sstevel@tonic-gate 		/* let the caller deal with the failover case */
5357c478bd9Sstevel@tonic-gate 		if (FAILOVER_MOUNT(mi))
5367c478bd9Sstevel@tonic-gate 			break;
5377c478bd9Sstevel@tonic-gate 
5387c478bd9Sstevel@tonic-gate 	} while (error == ETIMEDOUT || error == ECONNRESET);
5397c478bd9Sstevel@tonic-gate 
5407c478bd9Sstevel@tonic-gate 	return (error);
5417c478bd9Sstevel@tonic-gate }
5427c478bd9Sstevel@tonic-gate 
5437c478bd9Sstevel@tonic-gate static int
nfs_clget(mntinfo_t * mi,servinfo_t * svp,cred_t * cr,CLIENT ** newcl,struct chtab ** chp,struct nfs_clnt * nfscl)5447c478bd9Sstevel@tonic-gate nfs_clget(mntinfo_t *mi, servinfo_t *svp, cred_t *cr, CLIENT **newcl,
5457c478bd9Sstevel@tonic-gate     struct chtab **chp, struct nfs_clnt *nfscl)
5467c478bd9Sstevel@tonic-gate {
5477c478bd9Sstevel@tonic-gate 	clinfo_t ci;
5487c478bd9Sstevel@tonic-gate 	int error;
5497c478bd9Sstevel@tonic-gate 
5507c478bd9Sstevel@tonic-gate 	/*
5517c478bd9Sstevel@tonic-gate 	 * Set read buffer size to rsize
5527c478bd9Sstevel@tonic-gate 	 * and add room for RPC headers.
5537c478bd9Sstevel@tonic-gate 	 */
5547c478bd9Sstevel@tonic-gate 	ci.cl_readsize = mi->mi_tsize;
5557c478bd9Sstevel@tonic-gate 	if (ci.cl_readsize != 0)
5567c478bd9Sstevel@tonic-gate 		ci.cl_readsize += (RPC_MAXDATASIZE - NFS_MAXDATA);
5577c478bd9Sstevel@tonic-gate 
5587c478bd9Sstevel@tonic-gate 	/*
5597c478bd9Sstevel@tonic-gate 	 * If soft mount and server is down just try once.
5607c478bd9Sstevel@tonic-gate 	 * meaning: do not retransmit.
5617c478bd9Sstevel@tonic-gate 	 */
5627c478bd9Sstevel@tonic-gate 	if (!(mi->mi_flags & MI_HARD) && (mi->mi_flags & MI_DOWN))
5637c478bd9Sstevel@tonic-gate 		ci.cl_retrans = 0;
5647c478bd9Sstevel@tonic-gate 	else
5657c478bd9Sstevel@tonic-gate 		ci.cl_retrans = mi->mi_retrans;
5667c478bd9Sstevel@tonic-gate 
5677c478bd9Sstevel@tonic-gate 	ci.cl_prog = mi->mi_prog;
5687c478bd9Sstevel@tonic-gate 	ci.cl_vers = mi->mi_vers;
5697c478bd9Sstevel@tonic-gate 	ci.cl_flags = mi->mi_flags;
5707c478bd9Sstevel@tonic-gate 
5717c478bd9Sstevel@tonic-gate 	/*
5727c478bd9Sstevel@tonic-gate 	 * clget calls sec_clnt_geth() to get an auth handle. For RPCSEC_GSS
5737c478bd9Sstevel@tonic-gate 	 * security flavor, the client tries to establish a security context
5747c478bd9Sstevel@tonic-gate 	 * by contacting the server. If the connection is timed out or reset,
5757c478bd9Sstevel@tonic-gate 	 * e.g. server reboot, we will try again.
5767c478bd9Sstevel@tonic-gate 	 */
5777c478bd9Sstevel@tonic-gate 	do {
5787c478bd9Sstevel@tonic-gate 		error = clget_impl(&ci, svp, cr, newcl, chp, nfscl);
5797c478bd9Sstevel@tonic-gate 
5807c478bd9Sstevel@tonic-gate 		if (error == 0)
5817c478bd9Sstevel@tonic-gate 			break;
5827c478bd9Sstevel@tonic-gate 
5837c478bd9Sstevel@tonic-gate 		/*
5847c478bd9Sstevel@tonic-gate 		 * For forced unmount or zone shutdown, bail out, no retry.
5857c478bd9Sstevel@tonic-gate 		 */
5867c478bd9Sstevel@tonic-gate 		if (FS_OR_ZONE_GONE(mi->mi_vfsp)) {
5877c478bd9Sstevel@tonic-gate 			error = EIO;
5887c478bd9Sstevel@tonic-gate 			break;
5897c478bd9Sstevel@tonic-gate 		}
5907c478bd9Sstevel@tonic-gate 
5917c478bd9Sstevel@tonic-gate 		/* do not retry for softmount */
5927c478bd9Sstevel@tonic-gate 		if (!(mi->mi_flags & MI_HARD))
5937c478bd9Sstevel@tonic-gate 			break;
5947c478bd9Sstevel@tonic-gate 
5957c478bd9Sstevel@tonic-gate 		/* let the caller deal with the failover case */
5967c478bd9Sstevel@tonic-gate 		if (FAILOVER_MOUNT(mi))
5977c478bd9Sstevel@tonic-gate 			break;
5987c478bd9Sstevel@tonic-gate 
5997c478bd9Sstevel@tonic-gate 	} while (error == ETIMEDOUT || error == ECONNRESET);
6007c478bd9Sstevel@tonic-gate 
6017c478bd9Sstevel@tonic-gate 	return (error);
6027c478bd9Sstevel@tonic-gate }
6037c478bd9Sstevel@tonic-gate 
6047c478bd9Sstevel@tonic-gate static void
clfree_impl(CLIENT * cl,struct chtab * cp,struct nfs_clnt * nfscl)6057c478bd9Sstevel@tonic-gate clfree_impl(CLIENT *cl, struct chtab *cp, struct nfs_clnt *nfscl)
6067c478bd9Sstevel@tonic-gate {
6077c478bd9Sstevel@tonic-gate 	if (cl->cl_auth != NULL) {
6087c478bd9Sstevel@tonic-gate 		sec_clnt_freeh(cl->cl_auth);
6097c478bd9Sstevel@tonic-gate 		cl->cl_auth = NULL;
6107c478bd9Sstevel@tonic-gate 	}
6117c478bd9Sstevel@tonic-gate 
6127c478bd9Sstevel@tonic-gate 	/*
6137c478bd9Sstevel@tonic-gate 	 * Timestamp this cache entry so that we know when it was last
6147c478bd9Sstevel@tonic-gate 	 * used.
6157c478bd9Sstevel@tonic-gate 	 */
6167c478bd9Sstevel@tonic-gate 	cp->ch_freed = gethrestime_sec();
6177c478bd9Sstevel@tonic-gate 
6187c478bd9Sstevel@tonic-gate 	/*
6197c478bd9Sstevel@tonic-gate 	 * Add the free client handle to the front of the list.
6207c478bd9Sstevel@tonic-gate 	 * This way, the list will be sorted in youngest to oldest
6217c478bd9Sstevel@tonic-gate 	 * order.
6227c478bd9Sstevel@tonic-gate 	 */
6237c478bd9Sstevel@tonic-gate 	mutex_enter(&nfscl->nfscl_chtable_lock);
6247c478bd9Sstevel@tonic-gate 	cp->ch_list = cp->ch_head->ch_list;
6257c478bd9Sstevel@tonic-gate 	cp->ch_head->ch_list = cp;
6267c478bd9Sstevel@tonic-gate 	mutex_exit(&nfscl->nfscl_chtable_lock);
6277c478bd9Sstevel@tonic-gate }
6287c478bd9Sstevel@tonic-gate 
6297c478bd9Sstevel@tonic-gate void
clfree(CLIENT * cl,struct chtab * cp)6307c478bd9Sstevel@tonic-gate clfree(CLIENT *cl, struct chtab *cp)
6317c478bd9Sstevel@tonic-gate {
6327c478bd9Sstevel@tonic-gate 	struct nfs_clnt *nfscl;
6337c478bd9Sstevel@tonic-gate 
634108322fbScarlsonj 	nfscl = zone_getspecific(nfsclnt_zone_key, nfs_zone());
6357c478bd9Sstevel@tonic-gate 	ASSERT(nfscl != NULL);
6367c478bd9Sstevel@tonic-gate 
6377c478bd9Sstevel@tonic-gate 	clfree_impl(cl, cp, nfscl);
6387c478bd9Sstevel@tonic-gate }
6397c478bd9Sstevel@tonic-gate 
6407c478bd9Sstevel@tonic-gate #define	CL_HOLDTIME	60	/* time to hold client handles */
6417c478bd9Sstevel@tonic-gate 
6427c478bd9Sstevel@tonic-gate static void
clreclaim_zone(struct nfs_clnt * nfscl,uint_t cl_holdtime)6437c478bd9Sstevel@tonic-gate clreclaim_zone(struct nfs_clnt *nfscl, uint_t cl_holdtime)
6447c478bd9Sstevel@tonic-gate {
6457c478bd9Sstevel@tonic-gate 	struct chhead *ch;
6467c478bd9Sstevel@tonic-gate 	struct chtab *cp;	/* list of objects that can be reclaimed */
6477c478bd9Sstevel@tonic-gate 	struct chtab *cpe;
6487c478bd9Sstevel@tonic-gate 	struct chtab *cpl;
6497c478bd9Sstevel@tonic-gate 	struct chtab **cpp;
6507c478bd9Sstevel@tonic-gate #ifdef DEBUG
6517c478bd9Sstevel@tonic-gate 	int n = 0;
6527c478bd9Sstevel@tonic-gate #endif
6537c478bd9Sstevel@tonic-gate 
6547c478bd9Sstevel@tonic-gate 	/*
6557c478bd9Sstevel@tonic-gate 	 * Need to reclaim some memory, so step through the cache
6567c478bd9Sstevel@tonic-gate 	 * looking through the lists for entries which can be freed.
6577c478bd9Sstevel@tonic-gate 	 */
6587c478bd9Sstevel@tonic-gate 	cp = NULL;
6597c478bd9Sstevel@tonic-gate 
6607c478bd9Sstevel@tonic-gate 	mutex_enter(&nfscl->nfscl_chtable_lock);
6617c478bd9Sstevel@tonic-gate 
6627c478bd9Sstevel@tonic-gate 	/*
6637c478bd9Sstevel@tonic-gate 	 * Here we step through each non-NULL quadruple and start to
6647c478bd9Sstevel@tonic-gate 	 * construct the reclaim list pointed to by cp.  Note that
6657c478bd9Sstevel@tonic-gate 	 * cp will contain all eligible chtab entries.  When this traversal
6667c478bd9Sstevel@tonic-gate 	 * completes, chtab entries from the last quadruple will be at the
6677c478bd9Sstevel@tonic-gate 	 * front of cp and entries from previously inspected quadruples have
6687c478bd9Sstevel@tonic-gate 	 * been appended to the rear of cp.
6697c478bd9Sstevel@tonic-gate 	 */
6707c478bd9Sstevel@tonic-gate 	for (ch = nfscl->nfscl_chtable; ch != NULL; ch = ch->ch_next) {
6717c478bd9Sstevel@tonic-gate 		if (ch->ch_list == NULL)
6727c478bd9Sstevel@tonic-gate 			continue;
6737c478bd9Sstevel@tonic-gate 		/*
6747c478bd9Sstevel@tonic-gate 		 * Search each list for entries older then
6757c478bd9Sstevel@tonic-gate 		 * cl_holdtime seconds.  The lists are maintained
6767c478bd9Sstevel@tonic-gate 		 * in youngest to oldest order so that when the
6777c478bd9Sstevel@tonic-gate 		 * first entry is found which is old enough, then
6787c478bd9Sstevel@tonic-gate 		 * all of the rest of the entries on the list will
6797c478bd9Sstevel@tonic-gate 		 * be old enough as well.
6807c478bd9Sstevel@tonic-gate 		 */
6817c478bd9Sstevel@tonic-gate 		cpl = ch->ch_list;
6827c478bd9Sstevel@tonic-gate 		cpp = &ch->ch_list;
6837c478bd9Sstevel@tonic-gate 		while (cpl != NULL &&
6847106075aSmarks 		    cpl->ch_freed + cl_holdtime > gethrestime_sec()) {
6857c478bd9Sstevel@tonic-gate 			cpp = &cpl->ch_list;
6867c478bd9Sstevel@tonic-gate 			cpl = cpl->ch_list;
6877c478bd9Sstevel@tonic-gate 		}
6887c478bd9Sstevel@tonic-gate 		if (cpl != NULL) {
6897c478bd9Sstevel@tonic-gate 			*cpp = NULL;
6907c478bd9Sstevel@tonic-gate 			if (cp != NULL) {
6917c478bd9Sstevel@tonic-gate 				cpe = cpl;
6927c478bd9Sstevel@tonic-gate 				while (cpe->ch_list != NULL)
6937c478bd9Sstevel@tonic-gate 					cpe = cpe->ch_list;
6947c478bd9Sstevel@tonic-gate 				cpe->ch_list = cp;
6957c478bd9Sstevel@tonic-gate 			}
6967c478bd9Sstevel@tonic-gate 			cp = cpl;
6977c478bd9Sstevel@tonic-gate 		}
6987c478bd9Sstevel@tonic-gate 	}
6997c478bd9Sstevel@tonic-gate 
7007c478bd9Sstevel@tonic-gate 	mutex_exit(&nfscl->nfscl_chtable_lock);
7017c478bd9Sstevel@tonic-gate 
7027c478bd9Sstevel@tonic-gate 	/*
7037c478bd9Sstevel@tonic-gate 	 * If cp is empty, then there is nothing to reclaim here.
7047c478bd9Sstevel@tonic-gate 	 */
7057c478bd9Sstevel@tonic-gate 	if (cp == NULL)
7067c478bd9Sstevel@tonic-gate 		return;
7077c478bd9Sstevel@tonic-gate 
7087c478bd9Sstevel@tonic-gate 	/*
7097c478bd9Sstevel@tonic-gate 	 * Step through the list of entries to free, destroying each client
7107c478bd9Sstevel@tonic-gate 	 * handle and kmem_free'ing the memory for each entry.
7117c478bd9Sstevel@tonic-gate 	 */
7127c478bd9Sstevel@tonic-gate 	while (cp != NULL) {
7137c478bd9Sstevel@tonic-gate #ifdef DEBUG
7147c478bd9Sstevel@tonic-gate 		n++;
7157c478bd9Sstevel@tonic-gate #endif
7167c478bd9Sstevel@tonic-gate 		CLNT_DESTROY(cp->ch_client);
7177c478bd9Sstevel@tonic-gate 		cpl = cp->ch_list;
7187c478bd9Sstevel@tonic-gate 		kmem_cache_free(chtab_cache, cp);
7197c478bd9Sstevel@tonic-gate 		cp = cpl;
7207c478bd9Sstevel@tonic-gate 	}
7217c478bd9Sstevel@tonic-gate 
7227c478bd9Sstevel@tonic-gate #ifdef DEBUG
7237c478bd9Sstevel@tonic-gate 	/*
7247c478bd9Sstevel@tonic-gate 	 * Update clalloc so that nfsstat shows the current number
7257c478bd9Sstevel@tonic-gate 	 * of allocated client handles.
7267c478bd9Sstevel@tonic-gate 	 */
7277c478bd9Sstevel@tonic-gate 	atomic_add_64(&nfscl->nfscl_stat.clalloc.value.ui64, -n);
7287c478bd9Sstevel@tonic-gate #endif
7297c478bd9Sstevel@tonic-gate }
7307c478bd9Sstevel@tonic-gate 
7317c478bd9Sstevel@tonic-gate /* ARGSUSED */
7327c478bd9Sstevel@tonic-gate static void
clreclaim(void * all)7337c478bd9Sstevel@tonic-gate clreclaim(void *all)
7347c478bd9Sstevel@tonic-gate {
7357c478bd9Sstevel@tonic-gate 	struct nfs_clnt *nfscl;
7367c478bd9Sstevel@tonic-gate 
7377c478bd9Sstevel@tonic-gate #ifdef DEBUG
7387c478bd9Sstevel@tonic-gate 	clstat_debug.clreclaim.value.ui64++;
7397c478bd9Sstevel@tonic-gate #endif
7407c478bd9Sstevel@tonic-gate 	/*
7417c478bd9Sstevel@tonic-gate 	 * The system is low on memory; go through and try to reclaim some from
7427c478bd9Sstevel@tonic-gate 	 * every zone on the system.
7437c478bd9Sstevel@tonic-gate 	 */
7447c478bd9Sstevel@tonic-gate 	mutex_enter(&nfs_clnt_list_lock);
7457c478bd9Sstevel@tonic-gate 	nfscl = list_head(&nfs_clnt_list);
7467c478bd9Sstevel@tonic-gate 	for (; nfscl != NULL; nfscl = list_next(&nfs_clnt_list, nfscl))
7477c478bd9Sstevel@tonic-gate 		clreclaim_zone(nfscl, CL_HOLDTIME);
7487c478bd9Sstevel@tonic-gate 	mutex_exit(&nfs_clnt_list_lock);
7497c478bd9Sstevel@tonic-gate }
7507c478bd9Sstevel@tonic-gate 
7517c478bd9Sstevel@tonic-gate /*
7527c478bd9Sstevel@tonic-gate  * Minimum time-out values indexed by call type
7537c478bd9Sstevel@tonic-gate  * These units are in "eights" of a second to avoid multiplies
7547c478bd9Sstevel@tonic-gate  */
7557c478bd9Sstevel@tonic-gate static unsigned int minimum_timeo[] = {
7567c478bd9Sstevel@tonic-gate 	6, 7, 10
7577c478bd9Sstevel@tonic-gate };
7587c478bd9Sstevel@tonic-gate 
7597c478bd9Sstevel@tonic-gate /*
7607c478bd9Sstevel@tonic-gate  * Back off for retransmission timeout, MAXTIMO is in hz of a sec
7617c478bd9Sstevel@tonic-gate  */
7627c478bd9Sstevel@tonic-gate #define	MAXTIMO	(20*hz)
7637c478bd9Sstevel@tonic-gate #define	backoff(tim)	(((tim) < MAXTIMO) ? dobackoff(tim) : (tim))
7647c478bd9Sstevel@tonic-gate #define	dobackoff(tim)	((((tim) << 1) > MAXTIMO) ? MAXTIMO : ((tim) << 1))
7657c478bd9Sstevel@tonic-gate 
7667c478bd9Sstevel@tonic-gate #define	MIN_NFS_TSIZE 512	/* minimum "chunk" of NFS IO */
7677c478bd9Sstevel@tonic-gate #define	REDUCE_NFS_TIME (hz/2)	/* rtxcur we try to keep under */
7687c478bd9Sstevel@tonic-gate #define	INCREASE_NFS_TIME (hz/3*8) /* srtt we try to keep under (scaled*8) */
7697c478bd9Sstevel@tonic-gate 
7707c478bd9Sstevel@tonic-gate /*
7717c478bd9Sstevel@tonic-gate  * Function called when rfscall notices that we have been
7727c478bd9Sstevel@tonic-gate  * re-transmitting, or when we get a response without retransmissions.
7737c478bd9Sstevel@tonic-gate  * Return 1 if the transfer size was adjusted down - 0 if no change.
7747c478bd9Sstevel@tonic-gate  */
7757c478bd9Sstevel@tonic-gate static int
nfs_feedback(int flag,int which,mntinfo_t * mi)7767c478bd9Sstevel@tonic-gate nfs_feedback(int flag, int which, mntinfo_t *mi)
7777c478bd9Sstevel@tonic-gate {
7787c478bd9Sstevel@tonic-gate 	int kind;
7797c478bd9Sstevel@tonic-gate 	int r = 0;
7807c478bd9Sstevel@tonic-gate 
7817c478bd9Sstevel@tonic-gate 	mutex_enter(&mi->mi_lock);
7827c478bd9Sstevel@tonic-gate 	if (flag == FEEDBACK_REXMIT1) {
7837c478bd9Sstevel@tonic-gate 		if (mi->mi_timers[NFS_CALLTYPES].rt_rtxcur != 0 &&
7847c478bd9Sstevel@tonic-gate 		    mi->mi_timers[NFS_CALLTYPES].rt_rtxcur < REDUCE_NFS_TIME)
7857c478bd9Sstevel@tonic-gate 			goto done;
7867c478bd9Sstevel@tonic-gate 		if (mi->mi_curread > MIN_NFS_TSIZE) {
7877c478bd9Sstevel@tonic-gate 			mi->mi_curread /= 2;
7887c478bd9Sstevel@tonic-gate 			if (mi->mi_curread < MIN_NFS_TSIZE)
7897c478bd9Sstevel@tonic-gate 				mi->mi_curread = MIN_NFS_TSIZE;
7907c478bd9Sstevel@tonic-gate 			r = 1;
7917c478bd9Sstevel@tonic-gate 		}
7927c478bd9Sstevel@tonic-gate 
7937c478bd9Sstevel@tonic-gate 		if (mi->mi_curwrite > MIN_NFS_TSIZE) {
7947c478bd9Sstevel@tonic-gate 			mi->mi_curwrite /= 2;
7957c478bd9Sstevel@tonic-gate 			if (mi->mi_curwrite < MIN_NFS_TSIZE)
7967c478bd9Sstevel@tonic-gate 				mi->mi_curwrite = MIN_NFS_TSIZE;
7977c478bd9Sstevel@tonic-gate 			r = 1;
7987c478bd9Sstevel@tonic-gate 		}
7997c478bd9Sstevel@tonic-gate 	} else if (flag == FEEDBACK_OK) {
8007c478bd9Sstevel@tonic-gate 		kind = mi->mi_timer_type[which];
8017c478bd9Sstevel@tonic-gate 		if (kind == 0 ||
8027c478bd9Sstevel@tonic-gate 		    mi->mi_timers[kind].rt_srtt >= INCREASE_NFS_TIME)
8037c478bd9Sstevel@tonic-gate 			goto done;
8047c478bd9Sstevel@tonic-gate 		if (kind == 1) {
8057c478bd9Sstevel@tonic-gate 			if (mi->mi_curread >= mi->mi_tsize)
8067c478bd9Sstevel@tonic-gate 				goto done;
8077c478bd9Sstevel@tonic-gate 			mi->mi_curread +=  MIN_NFS_TSIZE;
8087c478bd9Sstevel@tonic-gate 			if (mi->mi_curread > mi->mi_tsize/2)
8097c478bd9Sstevel@tonic-gate 				mi->mi_curread = mi->mi_tsize;
8107c478bd9Sstevel@tonic-gate 		} else if (kind == 2) {
8117c478bd9Sstevel@tonic-gate 			if (mi->mi_curwrite >= mi->mi_stsize)
8127c478bd9Sstevel@tonic-gate 				goto done;
8137c478bd9Sstevel@tonic-gate 			mi->mi_curwrite += MIN_NFS_TSIZE;
8147c478bd9Sstevel@tonic-gate 			if (mi->mi_curwrite > mi->mi_stsize/2)
8157c478bd9Sstevel@tonic-gate 				mi->mi_curwrite = mi->mi_stsize;
8167c478bd9Sstevel@tonic-gate 		}
8177c478bd9Sstevel@tonic-gate 	}
8187c478bd9Sstevel@tonic-gate done:
8197c478bd9Sstevel@tonic-gate 	mutex_exit(&mi->mi_lock);
8207c478bd9Sstevel@tonic-gate 	return (r);
8217c478bd9Sstevel@tonic-gate }
8227c478bd9Sstevel@tonic-gate 
8237c478bd9Sstevel@tonic-gate #ifdef DEBUG
8247c478bd9Sstevel@tonic-gate static int rfs2call_hits = 0;
8257c478bd9Sstevel@tonic-gate static int rfs2call_misses = 0;
8267c478bd9Sstevel@tonic-gate #endif
8277c478bd9Sstevel@tonic-gate 
8287c478bd9Sstevel@tonic-gate int
rfs2call(mntinfo_t * mi,rpcproc_t which,xdrproc_t xdrargs,caddr_t argsp,xdrproc_t xdrres,caddr_t resp,cred_t * cr,int * douprintf,enum nfsstat * statusp,int flags,failinfo_t * fi)8297c478bd9Sstevel@tonic-gate rfs2call(mntinfo_t *mi, rpcproc_t which, xdrproc_t xdrargs, caddr_t argsp,
8307c478bd9Sstevel@tonic-gate     xdrproc_t xdrres, caddr_t resp, cred_t *cr, int *douprintf,
8317c478bd9Sstevel@tonic-gate     enum nfsstat *statusp, int flags, failinfo_t *fi)
8327c478bd9Sstevel@tonic-gate {
8337c478bd9Sstevel@tonic-gate 	int rpcerror;
8347c478bd9Sstevel@tonic-gate 	enum clnt_stat rpc_status;
8357c478bd9Sstevel@tonic-gate 
8367c478bd9Sstevel@tonic-gate 	ASSERT(statusp != NULL);
8377c478bd9Sstevel@tonic-gate 
8387c478bd9Sstevel@tonic-gate 	rpcerror = rfscall(mi, which, xdrargs, argsp, xdrres, resp,
8397c478bd9Sstevel@tonic-gate 	    cr, douprintf, &rpc_status, flags, fi);
8407c478bd9Sstevel@tonic-gate 	if (!rpcerror) {
8417c478bd9Sstevel@tonic-gate 		/*
8427c478bd9Sstevel@tonic-gate 		 * See crnetadjust() for comments.
8437c478bd9Sstevel@tonic-gate 		 */
8447c478bd9Sstevel@tonic-gate 		if (*statusp == NFSERR_ACCES &&
8457c478bd9Sstevel@tonic-gate 		    (cr = crnetadjust(cr)) != NULL) {
8467c478bd9Sstevel@tonic-gate #ifdef DEBUG
8477c478bd9Sstevel@tonic-gate 			rfs2call_hits++;
8487c478bd9Sstevel@tonic-gate #endif
8497c478bd9Sstevel@tonic-gate 			rpcerror = rfscall(mi, which, xdrargs, argsp, xdrres,
8507c478bd9Sstevel@tonic-gate 			    resp, cr, douprintf, NULL, flags, fi);
8517c478bd9Sstevel@tonic-gate 			crfree(cr);
8527c478bd9Sstevel@tonic-gate #ifdef DEBUG
8537c478bd9Sstevel@tonic-gate 			if (*statusp == NFSERR_ACCES)
8547c478bd9Sstevel@tonic-gate 				rfs2call_misses++;
8557c478bd9Sstevel@tonic-gate #endif
8567c478bd9Sstevel@tonic-gate 		}
8577c478bd9Sstevel@tonic-gate 	} else if (rpc_status == RPC_PROCUNAVAIL) {
8587c478bd9Sstevel@tonic-gate 		*statusp = NFSERR_OPNOTSUPP;
8597c478bd9Sstevel@tonic-gate 		rpcerror = 0;
8607c478bd9Sstevel@tonic-gate 	}
8617c478bd9Sstevel@tonic-gate 
8627c478bd9Sstevel@tonic-gate 	return (rpcerror);
8637c478bd9Sstevel@tonic-gate }
8647c478bd9Sstevel@tonic-gate 
8657c478bd9Sstevel@tonic-gate #define	NFS3_JUKEBOX_DELAY	10 * hz
8667c478bd9Sstevel@tonic-gate 
8677c478bd9Sstevel@tonic-gate static clock_t nfs3_jukebox_delay = 0;
8687c478bd9Sstevel@tonic-gate 
8697c478bd9Sstevel@tonic-gate #ifdef DEBUG
8707c478bd9Sstevel@tonic-gate static int rfs3call_hits = 0;
8717c478bd9Sstevel@tonic-gate static int rfs3call_misses = 0;
8727c478bd9Sstevel@tonic-gate #endif
8737c478bd9Sstevel@tonic-gate 
8747c478bd9Sstevel@tonic-gate int
rfs3call(mntinfo_t * mi,rpcproc_t which,xdrproc_t xdrargs,caddr_t argsp,xdrproc_t xdrres,caddr_t resp,cred_t * cr,int * douprintf,nfsstat3 * statusp,int flags,failinfo_t * fi)8757c478bd9Sstevel@tonic-gate rfs3call(mntinfo_t *mi, rpcproc_t which, xdrproc_t xdrargs, caddr_t argsp,
8767c478bd9Sstevel@tonic-gate     xdrproc_t xdrres, caddr_t resp, cred_t *cr, int *douprintf,
8777c478bd9Sstevel@tonic-gate     nfsstat3 *statusp, int flags, failinfo_t *fi)
8787c478bd9Sstevel@tonic-gate {
8797c478bd9Sstevel@tonic-gate 	int rpcerror;
8807c478bd9Sstevel@tonic-gate 	int user_informed;
8817c478bd9Sstevel@tonic-gate 
8827c478bd9Sstevel@tonic-gate 	user_informed = 0;
8837c478bd9Sstevel@tonic-gate 	do {
8847c478bd9Sstevel@tonic-gate 		rpcerror = rfscall(mi, which, xdrargs, argsp, xdrres, resp,
8857c478bd9Sstevel@tonic-gate 		    cr, douprintf, NULL, flags, fi);
8867c478bd9Sstevel@tonic-gate 		if (!rpcerror) {
8877c478bd9Sstevel@tonic-gate 			cred_t *crr;
8887c478bd9Sstevel@tonic-gate 			if (*statusp == NFS3ERR_JUKEBOX) {
8897c478bd9Sstevel@tonic-gate 				if (ttoproc(curthread) == &p0) {
8907c478bd9Sstevel@tonic-gate 					rpcerror = EAGAIN;
8917c478bd9Sstevel@tonic-gate 					break;
8927c478bd9Sstevel@tonic-gate 				}
8937c478bd9Sstevel@tonic-gate 				if (!user_informed) {
8947c478bd9Sstevel@tonic-gate 					user_informed = 1;
8957c478bd9Sstevel@tonic-gate 					uprintf(
8967c478bd9Sstevel@tonic-gate 		"file temporarily unavailable on the server, retrying...\n");
8977c478bd9Sstevel@tonic-gate 				}
8987c478bd9Sstevel@tonic-gate 				delay(nfs3_jukebox_delay);
8997c478bd9Sstevel@tonic-gate 			}
9007c478bd9Sstevel@tonic-gate 			/*
9017c478bd9Sstevel@tonic-gate 			 * See crnetadjust() for comments.
9027c478bd9Sstevel@tonic-gate 			 */
9037c478bd9Sstevel@tonic-gate 			else if (*statusp == NFS3ERR_ACCES &&
9047c478bd9Sstevel@tonic-gate 			    (crr = crnetadjust(cr)) != NULL) {
9057c478bd9Sstevel@tonic-gate #ifdef DEBUG
9067c478bd9Sstevel@tonic-gate 				rfs3call_hits++;
9077c478bd9Sstevel@tonic-gate #endif
9087c478bd9Sstevel@tonic-gate 				rpcerror = rfscall(mi, which, xdrargs, argsp,
9097c478bd9Sstevel@tonic-gate 				    xdrres, resp, crr, douprintf,
9107c478bd9Sstevel@tonic-gate 				    NULL, flags, fi);
9117c478bd9Sstevel@tonic-gate 
9127c478bd9Sstevel@tonic-gate 				crfree(crr);
9137c478bd9Sstevel@tonic-gate #ifdef DEBUG
9147c478bd9Sstevel@tonic-gate 				if (*statusp == NFS3ERR_ACCES)
9157c478bd9Sstevel@tonic-gate 					rfs3call_misses++;
9167c478bd9Sstevel@tonic-gate #endif
9177c478bd9Sstevel@tonic-gate 			}
9187c478bd9Sstevel@tonic-gate 		}
9197c478bd9Sstevel@tonic-gate 	} while (!rpcerror && *statusp == NFS3ERR_JUKEBOX);
9207c478bd9Sstevel@tonic-gate 
9217c478bd9Sstevel@tonic-gate 	return (rpcerror);
9227c478bd9Sstevel@tonic-gate }
9237c478bd9Sstevel@tonic-gate 
9247c478bd9Sstevel@tonic-gate #define	VALID_FH(fi)	(VTOR(fi->vp)->r_server == VTOMI(fi->vp)->mi_curr_serv)
9257c478bd9Sstevel@tonic-gate #define	INC_READERS(mi)		{ \
9267c478bd9Sstevel@tonic-gate 	mi->mi_readers++; \
9277c478bd9Sstevel@tonic-gate }
9287c478bd9Sstevel@tonic-gate #define	DEC_READERS(mi)		{ \
9297c478bd9Sstevel@tonic-gate 	mi->mi_readers--; \
9307c478bd9Sstevel@tonic-gate 	if (mi->mi_readers == 0) \
9317c478bd9Sstevel@tonic-gate 		cv_broadcast(&mi->mi_failover_cv); \
9327c478bd9Sstevel@tonic-gate }
9337c478bd9Sstevel@tonic-gate 
9347c478bd9Sstevel@tonic-gate static int
rfscall(mntinfo_t * mi,rpcproc_t which,xdrproc_t xdrargs,caddr_t argsp,xdrproc_t xdrres,caddr_t resp,cred_t * icr,int * douprintf,enum clnt_stat * rpc_status,int flags,failinfo_t * fi)9357c478bd9Sstevel@tonic-gate rfscall(mntinfo_t *mi, rpcproc_t which, xdrproc_t xdrargs, caddr_t argsp,
93645916cd2Sjpk     xdrproc_t xdrres, caddr_t resp, cred_t *icr, int *douprintf,
9377c478bd9Sstevel@tonic-gate     enum clnt_stat *rpc_status, int flags, failinfo_t *fi)
9387c478bd9Sstevel@tonic-gate {
9397c478bd9Sstevel@tonic-gate 	CLIENT *client;
9407c478bd9Sstevel@tonic-gate 	struct chtab *ch;
94145916cd2Sjpk 	cred_t *cr = icr;
9427c478bd9Sstevel@tonic-gate 	enum clnt_stat status;
943e280ed37SDai Ngo 	struct rpc_err rpcerr, rpcerr_tmp;
9447c478bd9Sstevel@tonic-gate 	struct timeval wait;
9457c478bd9Sstevel@tonic-gate 	int timeo;		/* in units of hz */
9467c478bd9Sstevel@tonic-gate 	int my_rsize, my_wsize;
9477c478bd9Sstevel@tonic-gate 	bool_t tryagain;
94845916cd2Sjpk 	bool_t cred_cloned = FALSE;
9497c478bd9Sstevel@tonic-gate 	k_sigset_t smask;
9507c478bd9Sstevel@tonic-gate 	servinfo_t *svp;
9517c478bd9Sstevel@tonic-gate 	struct nfs_clnt *nfscl;
9527c478bd9Sstevel@tonic-gate 	zoneid_t zoneid = getzoneid();
953e280ed37SDai Ngo 	char *msg;
9547c478bd9Sstevel@tonic-gate #ifdef DEBUG
9557c478bd9Sstevel@tonic-gate 	char *bufp;
9567c478bd9Sstevel@tonic-gate #endif
9577c478bd9Sstevel@tonic-gate 
9587c478bd9Sstevel@tonic-gate 
9597c478bd9Sstevel@tonic-gate 	TRACE_2(TR_FAC_NFS, TR_RFSCALL_START,
9607106075aSmarks 	    "rfscall_start:which %d mi %p", which, mi);
9617c478bd9Sstevel@tonic-gate 
962108322fbScarlsonj 	nfscl = zone_getspecific(nfsclnt_zone_key, nfs_zone());
9637c478bd9Sstevel@tonic-gate 	ASSERT(nfscl != NULL);
9647c478bd9Sstevel@tonic-gate 
9657c478bd9Sstevel@tonic-gate 	nfscl->nfscl_stat.calls.value.ui64++;
9667c478bd9Sstevel@tonic-gate 	mi->mi_reqs[which].value.ui64++;
9677c478bd9Sstevel@tonic-gate 
9687c478bd9Sstevel@tonic-gate 	rpcerr.re_status = RPC_SUCCESS;
9697c478bd9Sstevel@tonic-gate 
9707c478bd9Sstevel@tonic-gate 	/*
9717c478bd9Sstevel@tonic-gate 	 * In case of forced unmount or zone shutdown, return EIO.
9727c478bd9Sstevel@tonic-gate 	 */
9737c478bd9Sstevel@tonic-gate 
9747c478bd9Sstevel@tonic-gate 	if (FS_OR_ZONE_GONE(mi->mi_vfsp)) {
9757c478bd9Sstevel@tonic-gate 		rpcerr.re_status = RPC_FAILED;
9767c478bd9Sstevel@tonic-gate 		rpcerr.re_errno = EIO;
9777c478bd9Sstevel@tonic-gate 		return (rpcerr.re_errno);
9787c478bd9Sstevel@tonic-gate 	}
9797c478bd9Sstevel@tonic-gate 
9807c478bd9Sstevel@tonic-gate 	/*
9817c478bd9Sstevel@tonic-gate 	 * Remember the transfer sizes in case
9827c478bd9Sstevel@tonic-gate 	 * nfs_feedback changes them underneath us.
9837c478bd9Sstevel@tonic-gate 	 */
9847c478bd9Sstevel@tonic-gate 	my_rsize = mi->mi_curread;
9857c478bd9Sstevel@tonic-gate 	my_wsize = mi->mi_curwrite;
9867c478bd9Sstevel@tonic-gate 
9877c478bd9Sstevel@tonic-gate 	/*
9887c478bd9Sstevel@tonic-gate 	 * NFS client failover support
9897c478bd9Sstevel@tonic-gate 	 *
9907c478bd9Sstevel@tonic-gate 	 * If this rnode is not in sync with the current server (VALID_FH),
9917c478bd9Sstevel@tonic-gate 	 * we'd like to do a remap to get in sync.  We can be interrupted
9927c478bd9Sstevel@tonic-gate 	 * in failover_remap(), and if so we'll bail.  Otherwise, we'll
9937c478bd9Sstevel@tonic-gate 	 * use the best info we have to try the RPC.  Part of that is
9947c478bd9Sstevel@tonic-gate 	 * unconditionally updating the filehandle copy kept for V3.
9957c478bd9Sstevel@tonic-gate 	 *
9967c478bd9Sstevel@tonic-gate 	 * Locking: INC_READERS/DEC_READERS is a poor man's interrruptible
9977c478bd9Sstevel@tonic-gate 	 * rw_enter(); we're trying to keep the current server from being
9987c478bd9Sstevel@tonic-gate 	 * changed on us until we're done with the remapping and have a
9997c478bd9Sstevel@tonic-gate 	 * matching client handle.  We don't want to sending a filehandle
10007c478bd9Sstevel@tonic-gate 	 * to the wrong host.
10017c478bd9Sstevel@tonic-gate 	 */
10027c478bd9Sstevel@tonic-gate failoverretry:
10037c478bd9Sstevel@tonic-gate 	if (FAILOVER_MOUNT(mi)) {
10047c478bd9Sstevel@tonic-gate 		mutex_enter(&mi->mi_lock);
10057c478bd9Sstevel@tonic-gate 		if (!(flags & RFSCALL_SOFT) && failover_safe(fi)) {
10067c478bd9Sstevel@tonic-gate 			if (failover_wait(mi)) {
10077c478bd9Sstevel@tonic-gate 				mutex_exit(&mi->mi_lock);
10087c478bd9Sstevel@tonic-gate 				return (EINTR);
10097c478bd9Sstevel@tonic-gate 			}
10107c478bd9Sstevel@tonic-gate 		}
10117c478bd9Sstevel@tonic-gate 		INC_READERS(mi);
10127c478bd9Sstevel@tonic-gate 		mutex_exit(&mi->mi_lock);
10137c478bd9Sstevel@tonic-gate 		if (fi) {
10147c478bd9Sstevel@tonic-gate 			if (!VALID_FH(fi) &&
10157c478bd9Sstevel@tonic-gate 			    !(flags & RFSCALL_SOFT) && failover_safe(fi)) {
10167c478bd9Sstevel@tonic-gate 				int remaperr;
10177c478bd9Sstevel@tonic-gate 
10187c478bd9Sstevel@tonic-gate 				svp = mi->mi_curr_serv;
10197c478bd9Sstevel@tonic-gate 				remaperr = failover_remap(fi);
10207c478bd9Sstevel@tonic-gate 				if (remaperr != 0) {
10217c478bd9Sstevel@tonic-gate #ifdef DEBUG
10227c478bd9Sstevel@tonic-gate 					if (remaperr != EINTR)
10237c478bd9Sstevel@tonic-gate 						nfs_cmn_err(remaperr, CE_WARN,
10247c478bd9Sstevel@tonic-gate 					    "rfscall couldn't failover: %m");
10257c478bd9Sstevel@tonic-gate #endif
10267c478bd9Sstevel@tonic-gate 					mutex_enter(&mi->mi_lock);
10277c478bd9Sstevel@tonic-gate 					DEC_READERS(mi);
10287c478bd9Sstevel@tonic-gate 					mutex_exit(&mi->mi_lock);
10297c478bd9Sstevel@tonic-gate 					/*
10307c478bd9Sstevel@tonic-gate 					 * If failover_remap returns ETIMEDOUT
10317c478bd9Sstevel@tonic-gate 					 * and the filesystem is hard mounted
10327c478bd9Sstevel@tonic-gate 					 * we have to retry the call with a new
10337c478bd9Sstevel@tonic-gate 					 * server.
10347c478bd9Sstevel@tonic-gate 					 */
10357c478bd9Sstevel@tonic-gate 					if ((mi->mi_flags & MI_HARD) &&
10367c478bd9Sstevel@tonic-gate 					    IS_RECOVERABLE_ERROR(remaperr)) {
10377c478bd9Sstevel@tonic-gate 						if (svp == mi->mi_curr_serv)
10387c478bd9Sstevel@tonic-gate 							failover_newserver(mi);
10397c478bd9Sstevel@tonic-gate 						rpcerr.re_status = RPC_SUCCESS;
10407c478bd9Sstevel@tonic-gate 						goto failoverretry;
10417c478bd9Sstevel@tonic-gate 					}
10427c478bd9Sstevel@tonic-gate 					rpcerr.re_errno = remaperr;
10437c478bd9Sstevel@tonic-gate 					return (remaperr);
10447c478bd9Sstevel@tonic-gate 				}
10457c478bd9Sstevel@tonic-gate 			}
10467c478bd9Sstevel@tonic-gate 			if (fi->fhp && fi->copyproc)
10477c478bd9Sstevel@tonic-gate 				(*fi->copyproc)(fi->fhp, fi->vp);
10487c478bd9Sstevel@tonic-gate 		}
10497c478bd9Sstevel@tonic-gate 	}
10507c478bd9Sstevel@tonic-gate 
105145916cd2Sjpk 	/* For TSOL, use a new cred which has net_mac_aware flag */
105245916cd2Sjpk 	if (!cred_cloned && is_system_labeled()) {
105345916cd2Sjpk 		cred_cloned = TRUE;
105445916cd2Sjpk 		cr = crdup(icr);
105545916cd2Sjpk 		(void) setpflags(NET_MAC_AWARE, 1, cr);
105645916cd2Sjpk 	}
105745916cd2Sjpk 
10587c478bd9Sstevel@tonic-gate 	/*
10597c478bd9Sstevel@tonic-gate 	 * clget() calls clnt_tli_kinit() which clears the xid, so we
10607c478bd9Sstevel@tonic-gate 	 * are guaranteed to reprocess the retry as a new request.
10617c478bd9Sstevel@tonic-gate 	 */
10627c478bd9Sstevel@tonic-gate 	svp = mi->mi_curr_serv;
10637c478bd9Sstevel@tonic-gate 	rpcerr.re_errno = nfs_clget(mi, svp, cr, &client, &ch, nfscl);
10647c478bd9Sstevel@tonic-gate 
10657c478bd9Sstevel@tonic-gate 	if (FAILOVER_MOUNT(mi)) {
10667c478bd9Sstevel@tonic-gate 		mutex_enter(&mi->mi_lock);
10677c478bd9Sstevel@tonic-gate 		DEC_READERS(mi);
10687c478bd9Sstevel@tonic-gate 		mutex_exit(&mi->mi_lock);
10697c478bd9Sstevel@tonic-gate 
10707c478bd9Sstevel@tonic-gate 		if ((rpcerr.re_errno == ETIMEDOUT ||
10717106075aSmarks 		    rpcerr.re_errno == ECONNRESET) &&
10727106075aSmarks 		    failover_safe(fi)) {
10737c478bd9Sstevel@tonic-gate 			if (svp == mi->mi_curr_serv)
10747c478bd9Sstevel@tonic-gate 				failover_newserver(mi);
10757c478bd9Sstevel@tonic-gate 			goto failoverretry;
10767c478bd9Sstevel@tonic-gate 		}
10777c478bd9Sstevel@tonic-gate 	}
10787c478bd9Sstevel@tonic-gate 	if (rpcerr.re_errno != 0)
10797c478bd9Sstevel@tonic-gate 		return (rpcerr.re_errno);
10807c478bd9Sstevel@tonic-gate 
10817c478bd9Sstevel@tonic-gate 	if (svp->sv_knconf->knc_semantics == NC_TPI_COTS_ORD ||
10827c478bd9Sstevel@tonic-gate 	    svp->sv_knconf->knc_semantics == NC_TPI_COTS) {
10837c478bd9Sstevel@tonic-gate 		timeo = (mi->mi_timeo * hz) / 10;
10847c478bd9Sstevel@tonic-gate 	} else {
10857c478bd9Sstevel@tonic-gate 		mutex_enter(&mi->mi_lock);
10867c478bd9Sstevel@tonic-gate 		timeo = CLNT_SETTIMERS(client,
10877c478bd9Sstevel@tonic-gate 		    &(mi->mi_timers[mi->mi_timer_type[which]]),
10887c478bd9Sstevel@tonic-gate 		    &(mi->mi_timers[NFS_CALLTYPES]),
10897c478bd9Sstevel@tonic-gate 		    (minimum_timeo[mi->mi_call_type[which]]*hz)>>3,
10907c478bd9Sstevel@tonic-gate 		    (void (*)())NULL, (caddr_t)mi, 0);
10917c478bd9Sstevel@tonic-gate 		mutex_exit(&mi->mi_lock);
10927c478bd9Sstevel@tonic-gate 	}
10937c478bd9Sstevel@tonic-gate 
10947c478bd9Sstevel@tonic-gate 	/*
10957c478bd9Sstevel@tonic-gate 	 * If hard mounted fs, retry call forever unless hard error occurs.
10967c478bd9Sstevel@tonic-gate 	 */
10977c478bd9Sstevel@tonic-gate 	do {
10987c478bd9Sstevel@tonic-gate 		tryagain = FALSE;
10997c478bd9Sstevel@tonic-gate 
11007c478bd9Sstevel@tonic-gate 		if (FS_OR_ZONE_GONE(mi->mi_vfsp)) {
11017c478bd9Sstevel@tonic-gate 			status = RPC_FAILED;
11027c478bd9Sstevel@tonic-gate 			rpcerr.re_status = RPC_FAILED;
11037c478bd9Sstevel@tonic-gate 			rpcerr.re_errno = EIO;
11047c478bd9Sstevel@tonic-gate 			break;
11057c478bd9Sstevel@tonic-gate 		}
11067c478bd9Sstevel@tonic-gate 
11077c478bd9Sstevel@tonic-gate 		TICK_TO_TIMEVAL(timeo, &wait);
11087c478bd9