17c478bd9Sstevel@tonic-gate /* 27c478bd9Sstevel@tonic-gate * CDDL HEADER START 37c478bd9Sstevel@tonic-gate * 47c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the 545916cd2Sjpk * Common Development and Distribution License (the "License"). 645916cd2Sjpk * You may not use this file except in compliance with the License. 77c478bd9Sstevel@tonic-gate * 87c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 97c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 107c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions 117c478bd9Sstevel@tonic-gate * and limitations under the License. 127c478bd9Sstevel@tonic-gate * 137c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 147c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 157c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 167c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 177c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 187c478bd9Sstevel@tonic-gate * 197c478bd9Sstevel@tonic-gate * CDDL HEADER END 207c478bd9Sstevel@tonic-gate */ 217c478bd9Sstevel@tonic-gate /* 22f8bbc571SPavel Filipensky * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 237c478bd9Sstevel@tonic-gate * Use is subject to license terms. 247c478bd9Sstevel@tonic-gate */ 257c478bd9Sstevel@tonic-gate 26f5654033SAlexander Eremin /* 27f5654033SAlexander Eremin * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 28ade42b55SSebastien Roy * Copyright (c) 2016, 2017 by Delphix. All rights reserved. 29f5654033SAlexander Eremin */ 30f5654033SAlexander Eremin 317c478bd9Sstevel@tonic-gate #include <sys/param.h> 327c478bd9Sstevel@tonic-gate #include <sys/types.h> 337c478bd9Sstevel@tonic-gate #include <sys/systm.h> 3467dbe2beSCasper H.S. Dik #include <sys/cred.h> 357c478bd9Sstevel@tonic-gate #include <sys/proc.h> 367c478bd9Sstevel@tonic-gate #include <sys/user.h> 377c478bd9Sstevel@tonic-gate #include <sys/time.h> 387c478bd9Sstevel@tonic-gate #include <sys/buf.h> 397c478bd9Sstevel@tonic-gate #include <sys/vfs.h> 407c478bd9Sstevel@tonic-gate #include <sys/vnode.h> 417c478bd9Sstevel@tonic-gate #include <sys/socket.h> 427c478bd9Sstevel@tonic-gate #include <sys/uio.h> 437c478bd9Sstevel@tonic-gate #include <sys/tiuser.h> 447c478bd9Sstevel@tonic-gate #include <sys/swap.h> 457c478bd9Sstevel@tonic-gate #include <sys/errno.h> 467c478bd9Sstevel@tonic-gate #include <sys/debug.h> 477c478bd9Sstevel@tonic-gate #include <sys/kmem.h> 487c478bd9Sstevel@tonic-gate #include <sys/kstat.h> 497c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h> 507c478bd9Sstevel@tonic-gate #include <sys/vtrace.h> 517c478bd9Sstevel@tonic-gate #include <sys/session.h> 527c478bd9Sstevel@tonic-gate #include <sys/dnlc.h> 537c478bd9Sstevel@tonic-gate #include <sys/bitmap.h> 547c478bd9Sstevel@tonic-gate #include <sys/acl.h> 557c478bd9Sstevel@tonic-gate #include <sys/ddi.h> 567c478bd9Sstevel@tonic-gate #include <sys/pathname.h> 577c478bd9Sstevel@tonic-gate #include <sys/flock.h> 587c478bd9Sstevel@tonic-gate #include <sys/dirent.h> 597c478bd9Sstevel@tonic-gate #include <sys/flock.h> 607c478bd9Sstevel@tonic-gate #include <sys/callb.h> 617c478bd9Sstevel@tonic-gate #include <sys/atomic.h> 627c478bd9Sstevel@tonic-gate #include <sys/list.h> 6345916cd2Sjpk #include <sys/tsol/tnet.h> 6445916cd2Sjpk #include <sys/priv.h> 6503986916Sjarrett #include <sys/sdt.h> 6693aeed83Smarks #include <sys/attr.h> 6745916cd2Sjpk 6845916cd2Sjpk #include <inet/ip6.h> 697c478bd9Sstevel@tonic-gate 707c478bd9Sstevel@tonic-gate #include <rpc/types.h> 717c478bd9Sstevel@tonic-gate #include <rpc/xdr.h> 727c478bd9Sstevel@tonic-gate #include <rpc/auth.h> 737c478bd9Sstevel@tonic-gate #include <rpc/clnt.h> 747c478bd9Sstevel@tonic-gate 757c478bd9Sstevel@tonic-gate #include <nfs/nfs.h> 767c478bd9Sstevel@tonic-gate #include <nfs/nfs4.h> 777c478bd9Sstevel@tonic-gate #include <nfs/nfs_clnt.h> 787c478bd9Sstevel@tonic-gate #include <nfs/rnode.h> 797c478bd9Sstevel@tonic-gate #include <nfs/nfs_acl.h> 807c478bd9Sstevel@tonic-gate 8103986916Sjarrett #include <sys/tsol/label.h> 8203986916Sjarrett 837c478bd9Sstevel@tonic-gate /* 847c478bd9Sstevel@tonic-gate * The hash queues for the access to active and cached rnodes 857c478bd9Sstevel@tonic-gate * are organized as doubly linked lists. A reader/writer lock 867c478bd9Sstevel@tonic-gate * for each hash bucket is used to control access and to synchronize 877c478bd9Sstevel@tonic-gate * lookups, additions, and deletions from the hash queue. 887c478bd9Sstevel@tonic-gate * 897c478bd9Sstevel@tonic-gate * The rnode freelist is organized as a doubly linked list with 907c478bd9Sstevel@tonic-gate * a head pointer. Additions and deletions are synchronized via 917c478bd9Sstevel@tonic-gate * a single mutex. 927c478bd9Sstevel@tonic-gate * 937c478bd9Sstevel@tonic-gate * In order to add an rnode to the free list, it must be hashed into 947c478bd9Sstevel@tonic-gate * a hash queue and the exclusive lock to the hash queue be held. 957c478bd9Sstevel@tonic-gate * If an rnode is not hashed into a hash queue, then it is destroyed 967c478bd9Sstevel@tonic-gate * because it represents no valuable information that can be reused 977c478bd9Sstevel@tonic-gate * about the file. The exclusive lock to the hash queue must be 987c478bd9Sstevel@tonic-gate * held in order to prevent a lookup in the hash queue from finding 997c478bd9Sstevel@tonic-gate * the rnode and using it and assuming that the rnode is not on the 1007c478bd9Sstevel@tonic-gate * freelist. The lookup in the hash queue will have the hash queue 1017c478bd9Sstevel@tonic-gate * locked, either exclusive or shared. 1027c478bd9Sstevel@tonic-gate * 1037c478bd9Sstevel@tonic-gate * The vnode reference count for each rnode is not allowed to drop 1047c478bd9Sstevel@tonic-gate * below 1. This prevents external entities, such as the VM 1057c478bd9Sstevel@tonic-gate * subsystem, from acquiring references to vnodes already on the 1067c478bd9Sstevel@tonic-gate * freelist and then trying to place them back on the freelist 1077c478bd9Sstevel@tonic-gate * when their reference is released. This means that the when an 1087c478bd9Sstevel@tonic-gate * rnode is looked up in the hash queues, then either the rnode 109da6c28aaSamw * is removed from the freelist and that reference is transferred to 1107c478bd9Sstevel@tonic-gate * the new reference or the vnode reference count must be incremented 1117c478bd9Sstevel@tonic-gate * accordingly. The mutex for the freelist must be held in order to 1127c478bd9Sstevel@tonic-gate * accurately test to see if the rnode is on the freelist or not. 1137c478bd9Sstevel@tonic-gate * The hash queue lock might be held shared and it is possible that 1147c478bd9Sstevel@tonic-gate * two different threads may race to remove the rnode from the 1157c478bd9Sstevel@tonic-gate * freelist. This race can be resolved by holding the mutex for the 1167c478bd9Sstevel@tonic-gate * freelist. Please note that the mutex for the freelist does not 1177c478bd9Sstevel@tonic-gate * need to held if the rnode is not on the freelist. It can not be 1187c478bd9Sstevel@tonic-gate * placed on the freelist due to the requirement that the thread 1197c478bd9Sstevel@tonic-gate * putting the rnode on the freelist must hold the exclusive lock 1207c478bd9Sstevel@tonic-gate * to the hash queue and the thread doing the lookup in the hash 1217c478bd9Sstevel@tonic-gate * queue is holding either a shared or exclusive lock to the hash 1227c478bd9Sstevel@tonic-gate * queue. 1237c478bd9Sstevel@tonic-gate * 1247c478bd9Sstevel@tonic-gate * The lock ordering is: 1257c478bd9Sstevel@tonic-gate * 1267c478bd9Sstevel@tonic-gate * hash bucket lock -> vnode lock 1277c478bd9Sstevel@tonic-gate * hash bucket lock -> freelist lock 1287c478bd9Sstevel@tonic-gate */ 1297c478bd9Sstevel@tonic-gate static rhashq_t *rtable; 1307c478bd9Sstevel@tonic-gate 1317c478bd9Sstevel@tonic-gate static kmutex_t rpfreelist_lock; 1327c478bd9Sstevel@tonic-gate static rnode_t *rpfreelist = NULL; 1337c478bd9Sstevel@tonic-gate static long rnew = 0; 1347c478bd9Sstevel@tonic-gate long nrnode = 0; 1357c478bd9Sstevel@tonic-gate 1367c478bd9Sstevel@tonic-gate static int rtablesize; 1377c478bd9Sstevel@tonic-gate static int rtablemask; 1387c478bd9Sstevel@tonic-gate 1397c478bd9Sstevel@tonic-gate static int hashlen = 4; 1407c478bd9Sstevel@tonic-gate 1417c478bd9Sstevel@tonic-gate static struct kmem_cache *rnode_cache; 1427c478bd9Sstevel@tonic-gate 1437c478bd9Sstevel@tonic-gate /* 1447c478bd9Sstevel@tonic-gate * Mutex to protect the following variables: 1457c478bd9Sstevel@tonic-gate * nfs_major 1467c478bd9Sstevel@tonic-gate * nfs_minor 1477c478bd9Sstevel@tonic-gate */ 1487c478bd9Sstevel@tonic-gate kmutex_t nfs_minor_lock; 1497c478bd9Sstevel@tonic-gate int nfs_major; 1507c478bd9Sstevel@tonic-gate int nfs_minor; 1517c478bd9Sstevel@tonic-gate 1527c478bd9Sstevel@tonic-gate /* Do we allow preepoch (negative) time values otw? */ 1537c478bd9Sstevel@tonic-gate bool_t nfs_allow_preepoch_time = FALSE; /* default: do not allow preepoch */ 1547c478bd9Sstevel@tonic-gate 1557c478bd9Sstevel@tonic-gate /* 1567c478bd9Sstevel@tonic-gate * Access cache 1577c478bd9Sstevel@tonic-gate */ 1587c478bd9Sstevel@tonic-gate static acache_hash_t *acache; 1597c478bd9Sstevel@tonic-gate static long nacache; /* used strictly to size the number of hash queues */ 1607c478bd9Sstevel@tonic-gate 1617c478bd9Sstevel@tonic-gate static int acachesize; 1627c478bd9Sstevel@tonic-gate static int acachemask; 1637c478bd9Sstevel@tonic-gate static struct kmem_cache *acache_cache; 1647c478bd9Sstevel@tonic-gate 1657c478bd9Sstevel@tonic-gate /* 1667c478bd9Sstevel@tonic-gate * Client side utilities 1677c478bd9Sstevel@tonic-gate */ 1687c478bd9Sstevel@tonic-gate 1697c478bd9Sstevel@tonic-gate /* 1707c478bd9Sstevel@tonic-gate * client side statistics 1717c478bd9Sstevel@tonic-gate */ 1727c478bd9Sstevel@tonic-gate static const struct clstat clstat_tmpl = { 1737c478bd9Sstevel@tonic-gate { "calls", KSTAT_DATA_UINT64 }, 1747c478bd9Sstevel@tonic-gate { "badcalls", KSTAT_DATA_UINT64 }, 1757c478bd9Sstevel@tonic-gate { "clgets", KSTAT_DATA_UINT64 }, 1767c478bd9Sstevel@tonic-gate { "cltoomany", KSTAT_DATA_UINT64 }, 1777c478bd9Sstevel@tonic-gate #ifdef DEBUG 1787c478bd9Sstevel@tonic-gate { "clalloc", KSTAT_DATA_UINT64 }, 1797c478bd9Sstevel@tonic-gate { "noresponse", KSTAT_DATA_UINT64 }, 1807c478bd9Sstevel@tonic-gate { "failover", KSTAT_DATA_UINT64 }, 1817c478bd9Sstevel@tonic-gate { "remap", KSTAT_DATA_UINT64 }, 1827c478bd9Sstevel@tonic-gate #endif 1837c478bd9Sstevel@tonic-gate }; 1847c478bd9Sstevel@tonic-gate 1857c478bd9Sstevel@tonic-gate /* 1867c478bd9Sstevel@tonic-gate * The following are statistics that describe behavior of the system as a whole 1877c478bd9Sstevel@tonic-gate * and doesn't correspond to any one particular zone. 1887c478bd9Sstevel@tonic-gate */ 1897c478bd9Sstevel@tonic-gate #ifdef DEBUG 1907c478bd9Sstevel@tonic-gate static struct clstat_debug { 1917c478bd9Sstevel@tonic-gate kstat_named_t nrnode; /* number of allocated rnodes */ 1927c478bd9Sstevel@tonic-gate kstat_named_t access; /* size of access cache */ 1937c478bd9Sstevel@tonic-gate kstat_named_t dirent; /* size of readdir cache */ 1947c478bd9Sstevel@tonic-gate kstat_named_t dirents; /* size of readdir buf cache */ 1957c478bd9Sstevel@tonic-gate kstat_named_t reclaim; /* number of reclaims */ 1967c478bd9Sstevel@tonic-gate kstat_named_t clreclaim; /* number of cl reclaims */ 1977c478bd9Sstevel@tonic-gate kstat_named_t f_reclaim; /* number of free reclaims */ 1987c478bd9Sstevel@tonic-gate kstat_named_t a_reclaim; /* number of active reclaims */ 1997c478bd9Sstevel@tonic-gate kstat_named_t r_reclaim; /* number of rnode reclaims */ 2007c478bd9Sstevel@tonic-gate kstat_named_t rpath; /* bytes used to store rpaths */ 2017c478bd9Sstevel@tonic-gate } clstat_debug = { 2027c478bd9Sstevel@tonic-gate { "nrnode", KSTAT_DATA_UINT64 }, 2037c478bd9Sstevel@tonic-gate { "access", KSTAT_DATA_UINT64 }, 2047c478bd9Sstevel@tonic-gate { "dirent", KSTAT_DATA_UINT64 }, 2057c478bd9Sstevel@tonic-gate { "dirents", KSTAT_DATA_UINT64 }, 2067c478bd9Sstevel@tonic-gate { "reclaim", KSTAT_DATA_UINT64 }, 2077c478bd9Sstevel@tonic-gate { "clreclaim", KSTAT_DATA_UINT64 }, 2087c478bd9Sstevel@tonic-gate { "f_reclaim", KSTAT_DATA_UINT64 }, 2097c478bd9Sstevel@tonic-gate { "a_reclaim", KSTAT_DATA_UINT64 }, 2107c478bd9Sstevel@tonic-gate { "r_reclaim", KSTAT_DATA_UINT64 }, 2117c478bd9Sstevel@tonic-gate { "r_path", KSTAT_DATA_UINT64 }, 2127c478bd9Sstevel@tonic-gate }; 2137c478bd9Sstevel@tonic-gate #endif /* DEBUG */ 2147c478bd9Sstevel@tonic-gate 2157c478bd9Sstevel@tonic-gate /* 2167c478bd9Sstevel@tonic-gate * We keep a global list of per-zone client data, so we can clean up all zones 2177c478bd9Sstevel@tonic-gate * if we get low on memory. 2187c478bd9Sstevel@tonic-gate */ 2197c478bd9Sstevel@tonic-gate static list_t nfs_clnt_list; 2207c478bd9Sstevel@tonic-gate static kmutex_t nfs_clnt_list_lock; 2217c478bd9Sstevel@tonic-gate static zone_key_t nfsclnt_zone_key; 2227c478bd9Sstevel@tonic-gate 2237c478bd9Sstevel@tonic-gate static struct kmem_cache *chtab_cache; 2247c478bd9Sstevel@tonic-gate 2257c478bd9Sstevel@tonic-gate /* 2267c478bd9Sstevel@tonic-gate * Some servers do not properly update the attributes of the 2277c478bd9Sstevel@tonic-gate * directory when changes are made. To allow interoperability 2287c478bd9Sstevel@tonic-gate * with these broken servers, the nfs_disable_rddir_cache 2297c478bd9Sstevel@tonic-gate * parameter must be set in /etc/system 2307c478bd9Sstevel@tonic-gate */ 2317c478bd9Sstevel@tonic-gate int nfs_disable_rddir_cache = 0; 2327c478bd9Sstevel@tonic-gate 2337c478bd9Sstevel@tonic-gate int clget(clinfo_t *, servinfo_t *, cred_t *, CLIENT **, 2347c478bd9Sstevel@tonic-gate struct chtab **); 2357c478bd9Sstevel@tonic-gate void clfree(CLIENT *, struct chtab *); 2367c478bd9Sstevel@tonic-gate static int acl_clget(mntinfo_t *, servinfo_t *, cred_t *, CLIENT **, 2377c478bd9Sstevel@tonic-gate struct chtab **, struct nfs_clnt *); 2387c478bd9Sstevel@tonic-gate static int nfs_clget(mntinfo_t *, servinfo_t *, cred_t *, CLIENT **, 2397c478bd9Sstevel@tonic-gate struct chtab **, struct nfs_clnt *); 2407c478bd9Sstevel@tonic-gate static void clreclaim(void *); 2417c478bd9Sstevel@tonic-gate static int nfs_feedback(int, int, mntinfo_t *); 2427c478bd9Sstevel@tonic-gate static int rfscall(mntinfo_t *, rpcproc_t, xdrproc_t, caddr_t, xdrproc_t, 2437c478bd9Sstevel@tonic-gate caddr_t, cred_t *, int *, enum clnt_stat *, int, 2447c478bd9Sstevel@tonic-gate failinfo_t *); 2457c478bd9Sstevel@tonic-gate static int aclcall(mntinfo_t *, rpcproc_t, xdrproc_t, caddr_t, xdrproc_t, 2467c478bd9Sstevel@tonic-gate caddr_t, cred_t *, int *, int, failinfo_t *); 2477c478bd9Sstevel@tonic-gate static void rinactive(rnode_t *, cred_t *); 2487c478bd9Sstevel@tonic-gate static int rtablehash(nfs_fhandle *); 2497c478bd9Sstevel@tonic-gate static vnode_t *make_rnode(nfs_fhandle *, rhashq_t *, struct vfs *, 2507c478bd9Sstevel@tonic-gate struct vnodeops *, 2517c478bd9Sstevel@tonic-gate int (*)(vnode_t *, page_t *, u_offset_t *, size_t *, int, 2527c478bd9Sstevel@tonic-gate cred_t *), 2537c478bd9Sstevel@tonic-gate int (*)(const void *, const void *), int *, cred_t *, 2547c478bd9Sstevel@tonic-gate char *, char *); 2557c478bd9Sstevel@tonic-gate static void rp_rmfree(rnode_t *); 2567c478bd9Sstevel@tonic-gate static void rp_addhash(rnode_t *); 2577c478bd9Sstevel@tonic-gate static void rp_rmhash_locked(rnode_t *); 2587c478bd9Sstevel@tonic-gate static rnode_t *rfind(rhashq_t *, nfs_fhandle *, struct vfs *); 2597c478bd9Sstevel@tonic-gate static void destroy_rnode(rnode_t *); 2607c478bd9Sstevel@tonic-gate static void rddir_cache_free(rddir_cache *); 2617c478bd9Sstevel@tonic-gate static int nfs_free_data_reclaim(rnode_t *); 2627c478bd9Sstevel@tonic-gate static int nfs_active_data_reclaim(rnode_t *); 2637c478bd9Sstevel@tonic-gate static int nfs_free_reclaim(void); 2647c478bd9Sstevel@tonic-gate static int nfs_active_reclaim(void); 2657c478bd9Sstevel@tonic-gate static int nfs_rnode_reclaim(void); 2667c478bd9Sstevel@tonic-gate static void nfs_reclaim(void *); 2677c478bd9Sstevel@tonic-gate static int failover_safe(failinfo_t *); 2687c478bd9Sstevel@tonic-gate static void failover_newserver(mntinfo_t *mi); 2697c478bd9Sstevel@tonic-gate static void failover_thread(mntinfo_t *mi); 2707c478bd9Sstevel@tonic-gate static int failover_wait(mntinfo_t *); 2717c478bd9Sstevel@tonic-gate static int failover_remap(failinfo_t *); 2727c478bd9Sstevel@tonic-gate static int failover_lookup(char *, vnode_t *, 2737c478bd9Sstevel@tonic-gate int (*)(vnode_t *, char *, vnode_t **, 2747c478bd9Sstevel@tonic-gate struct pathname *, int, vnode_t *, cred_t *, int), 2757c478bd9Sstevel@tonic-gate int (*)(vnode_t *, vnode_t **, bool_t, cred_t *, int), 2767c478bd9Sstevel@tonic-gate vnode_t **); 2777c478bd9Sstevel@tonic-gate static void nfs_free_r_path(rnode_t *); 2787c478bd9Sstevel@tonic-gate static void nfs_set_vroot(vnode_t *); 2797c478bd9Sstevel@tonic-gate static char *nfs_getsrvnames(mntinfo_t *, size_t *); 2807c478bd9Sstevel@tonic-gate 2817c478bd9Sstevel@tonic-gate /* 2827c478bd9Sstevel@tonic-gate * from rpcsec module (common/rpcsec) 2837c478bd9Sstevel@tonic-gate */ 2847c478bd9Sstevel@tonic-gate extern int sec_clnt_geth(CLIENT *, struct sec_data *, cred_t *, AUTH **); 2857c478bd9Sstevel@tonic-gate extern void sec_clnt_freeh(AUTH *); 2867c478bd9Sstevel@tonic-gate extern void sec_clnt_freeinfo(struct sec_data *); 2877c478bd9Sstevel@tonic-gate 28845916cd2Sjpk /* 28945916cd2Sjpk * used in mount policy 29045916cd2Sjpk */ 29145916cd2Sjpk extern ts_label_t *getflabel_cipso(vfs_t *); 29245916cd2Sjpk 2937c478bd9Sstevel@tonic-gate /* 2947c478bd9Sstevel@tonic-gate * EIO or EINTR are not recoverable errors. 2957c478bd9Sstevel@tonic-gate */ 2967c478bd9Sstevel@tonic-gate #define IS_RECOVERABLE_ERROR(error) !((error == EINTR) || (error == EIO)) 2977c478bd9Sstevel@tonic-gate 298e280ed37SDai Ngo #ifdef DEBUG 299e280ed37SDai Ngo #define SRV_QFULL_MSG "send queue to NFS%d server %s is full; still trying\n" 300e280ed37SDai Ngo #define SRV_NOTRESP_MSG "NFS%d server %s not responding still trying\n" 301e280ed37SDai Ngo #else 302e280ed37SDai Ngo #define SRV_QFULL_MSG "send queue to NFS server %s is full still trying\n" 303e280ed37SDai Ngo #define SRV_NOTRESP_MSG "NFS server %s not responding still trying\n" 304e280ed37SDai Ngo #endif 3057c478bd9Sstevel@tonic-gate /* 3067c478bd9Sstevel@tonic-gate * Common handle get program for NFS, NFS ACL, and NFS AUTH client. 3077c478bd9Sstevel@tonic-gate */ 3087c478bd9Sstevel@tonic-gate static int 3097c478bd9Sstevel@tonic-gate clget_impl(clinfo_t *ci, servinfo_t *svp, cred_t *cr, CLIENT **newcl, 3107c478bd9Sstevel@tonic-gate struct chtab **chp, struct nfs_clnt *nfscl) 3117c478bd9Sstevel@tonic-gate { 3127c478bd9Sstevel@tonic-gate struct chhead *ch, *newch; 3137c478bd9Sstevel@tonic-gate struct chhead **plistp; 3147c478bd9Sstevel@tonic-gate struct chtab *cp; 3157c478bd9Sstevel@tonic-gate int error; 3167c478bd9Sstevel@tonic-gate k_sigset_t smask; 3177c478bd9Sstevel@tonic-gate 3187c478bd9Sstevel@tonic-gate if (newcl == NULL || chp == NULL || ci == NULL) 3197c478bd9Sstevel@tonic-gate return (EINVAL); 3207c478bd9Sstevel@tonic-gate 3217c478bd9Sstevel@tonic-gate *newcl = NULL; 3227c478bd9Sstevel@tonic-gate *chp = NULL; 3237c478bd9Sstevel@tonic-gate 3247c478bd9Sstevel@tonic-gate /* 3257c478bd9Sstevel@tonic-gate * Find an unused handle or create one 3267c478bd9Sstevel@tonic-gate */ 3277c478bd9Sstevel@tonic-gate newch = NULL; 3287c478bd9Sstevel@tonic-gate nfscl->nfscl_stat.clgets.value.ui64++; 3297c478bd9Sstevel@tonic-gate top: 3307c478bd9Sstevel@tonic-gate /* 3317c478bd9Sstevel@tonic-gate * Find the correct entry in the cache to check for free 3327c478bd9Sstevel@tonic-gate * client handles. The search is based on the RPC program 3337c478bd9Sstevel@tonic-gate * number, program version number, dev_t for the transport 3347c478bd9Sstevel@tonic-gate * device, and the protocol family. 3357c478bd9Sstevel@tonic-gate */ 3367c478bd9Sstevel@tonic-gate mutex_enter(&nfscl->nfscl_chtable_lock); 3377c478bd9Sstevel@tonic-gate plistp = &nfscl->nfscl_chtable; 3387c478bd9Sstevel@tonic-gate for (ch = nfscl->nfscl_chtable; ch != NULL; ch = ch->ch_next) { 3397c478bd9Sstevel@tonic-gate if (ch->ch_prog == ci->cl_prog && 3407c478bd9Sstevel@tonic-gate ch->ch_vers == ci->cl_vers && 3417c478bd9Sstevel@tonic-gate ch->ch_dev == svp->sv_knconf->knc_rdev && 3427c478bd9Sstevel@tonic-gate (strcmp(ch->ch_protofmly, 3437106075aSmarks svp->sv_knconf->knc_protofmly) == 0)) 3447c478bd9Sstevel@tonic-gate break; 3457c478bd9Sstevel@tonic-gate plistp = &ch->ch_next; 3467c478bd9Sstevel@tonic-gate } 3477c478bd9Sstevel@tonic-gate 3487c478bd9Sstevel@tonic-gate /* 3497c478bd9Sstevel@tonic-gate * If we didn't find a cache entry for this quadruple, then 3507c478bd9Sstevel@tonic-gate * create one. If we don't have one already preallocated, 3517c478bd9Sstevel@tonic-gate * then drop the cache lock, create one, and then start over. 3527c478bd9Sstevel@tonic-gate * If we did have a preallocated entry, then just add it to 3537c478bd9Sstevel@tonic-gate * the front of the list. 3547c478bd9Sstevel@tonic-gate */ 3557c478bd9Sstevel@tonic-gate if (ch == NULL) { 3567c478bd9Sstevel@tonic-gate if (newch == NULL) { 3577c478bd9Sstevel@tonic-gate mutex_exit(&nfscl->nfscl_chtable_lock); 3587c478bd9Sstevel@tonic-gate newch = kmem_alloc(sizeof (*newch), KM_SLEEP); 3597c478bd9Sstevel@tonic-gate newch->ch_timesused = 0; 3607c478bd9Sstevel@tonic-gate newch->ch_prog = ci->cl_prog; 3617c478bd9Sstevel@tonic-gate newch->ch_vers = ci->cl_vers; 3627c478bd9Sstevel@tonic-gate newch->ch_dev = svp->sv_knconf->knc_rdev; 3637c478bd9Sstevel@tonic-gate newch->ch_protofmly = kmem_alloc( 3647c478bd9Sstevel@tonic-gate strlen(svp->sv_knconf->knc_protofmly) + 1, 3657c478bd9Sstevel@tonic-gate KM_SLEEP); 3667c478bd9Sstevel@tonic-gate (void) strcpy(newch->ch_protofmly, 3677c478bd9Sstevel@tonic-gate svp->sv_knconf->knc_protofmly); 3687c478bd9Sstevel@tonic-gate newch->ch_list = NULL; 3697c478bd9Sstevel@tonic-gate goto top; 3707c478bd9Sstevel@tonic-gate } 3717c478bd9Sstevel@tonic-gate ch = newch; 3727c478bd9Sstevel@tonic-gate newch = NULL; 3737c478bd9Sstevel@tonic-gate ch->ch_next = nfscl->nfscl_chtable; 3747c478bd9Sstevel@tonic-gate nfscl->nfscl_chtable = ch; 3757c478bd9Sstevel@tonic-gate /* 3767c478bd9Sstevel@tonic-gate * We found a cache entry, but if it isn't on the front of the 3777c478bd9Sstevel@tonic-gate * list, then move it to the front of the list to try to take 3787c478bd9Sstevel@tonic-gate * advantage of locality of operations. 3797c478bd9Sstevel@tonic-gate */ 3807c478bd9Sstevel@tonic-gate } else if (ch != nfscl->nfscl_chtable) { 3817c478bd9Sstevel@tonic-gate *plistp = ch->ch_next; 3827c478bd9Sstevel@tonic-gate ch->ch_next = nfscl->nfscl_chtable; 3837c478bd9Sstevel@tonic-gate nfscl->nfscl_chtable = ch; 3847c478bd9Sstevel@tonic-gate } 3857c478bd9Sstevel@tonic-gate 3867c478bd9Sstevel@tonic-gate /* 3877c478bd9Sstevel@tonic-gate * If there was a free client handle cached, then remove it 3887c478bd9Sstevel@tonic-gate * from the list, init it, and use it. 3897c478bd9Sstevel@tonic-gate */ 3907c478bd9Sstevel@tonic-gate if (ch->ch_list != NULL) { 3917c478bd9Sstevel@tonic-gate cp = ch->ch_list; 3927c478bd9Sstevel@tonic-gate ch->ch_list = cp->ch_list; 3937c478bd9Sstevel@tonic-gate mutex_exit(&nfscl->nfscl_chtable_lock); 3947c478bd9Sstevel@tonic-gate if (newch != NULL) { 3957c478bd9Sstevel@tonic-gate kmem_free(newch->ch_protofmly, 3967c478bd9Sstevel@tonic-gate strlen(newch->ch_protofmly) + 1); 3977c478bd9Sstevel@tonic-gate kmem_free(newch, sizeof (*newch)); 3987c478bd9Sstevel@tonic-gate } 3997c478bd9Sstevel@tonic-gate (void) clnt_tli_kinit(cp->ch_client, svp->sv_knconf, 4007c478bd9Sstevel@tonic-gate &svp->sv_addr, ci->cl_readsize, ci->cl_retrans, cr); 4017c478bd9Sstevel@tonic-gate error = sec_clnt_geth(cp->ch_client, svp->sv_secdata, cr, 4027c478bd9Sstevel@tonic-gate &cp->ch_client->cl_auth); 4037c478bd9Sstevel@tonic-gate if (error || cp->ch_client->cl_auth == NULL) { 4047c478bd9Sstevel@tonic-gate CLNT_DESTROY(cp->ch_client); 4057c478bd9Sstevel@tonic-gate kmem_cache_free(chtab_cache, cp); 4067c478bd9Sstevel@tonic-gate return ((error != 0) ? error : EINTR); 4077c478bd9Sstevel@tonic-gate } 4087c478bd9Sstevel@tonic-gate ch->ch_timesused++; 4097c478bd9Sstevel@tonic-gate *newcl = cp->ch_client; 4107c478bd9Sstevel@tonic-gate *chp = cp; 4117c478bd9Sstevel@tonic-gate return (0); 4127c478bd9Sstevel@tonic-gate } 4137c478bd9Sstevel@tonic-gate 4147c478bd9Sstevel@tonic-gate /* 4157c478bd9Sstevel@tonic-gate * There weren't any free client handles which fit, so allocate 4167c478bd9Sstevel@tonic-gate * a new one and use that. 4177c478bd9Sstevel@tonic-gate */ 4187c478bd9Sstevel@tonic-gate #ifdef DEBUG 4191a5e258fSJosef 'Jeff' Sipek atomic_inc_64(&nfscl->nfscl_stat.clalloc.value.ui64); 4207c478bd9Sstevel@tonic-gate #endif 4217c478bd9Sstevel@tonic-gate mutex_exit(&nfscl->nfscl_chtable_lock); 4227c478bd9Sstevel@tonic-gate 4237c478bd9Sstevel@tonic-gate nfscl->nfscl_stat.cltoomany.value.ui64++; 4247c478bd9Sstevel@tonic-gate if (newch != NULL) { 4257c478bd9Sstevel@tonic-gate kmem_free(newch->ch_protofmly, strlen(newch->ch_protofmly) + 1); 4267c478bd9Sstevel@tonic-gate kmem_free(newch, sizeof (*newch)); 4277c478bd9Sstevel@tonic-gate } 4287c478bd9Sstevel@tonic-gate 4297c478bd9Sstevel@tonic-gate cp = kmem_cache_alloc(chtab_cache, KM_SLEEP); 4307c478bd9Sstevel@tonic-gate cp->ch_head = ch; 4317c478bd9Sstevel@tonic-gate 4327c478bd9Sstevel@tonic-gate sigintr(&smask, (int)ci->cl_flags & MI_INT); 4337c478bd9Sstevel@tonic-gate error = clnt_tli_kcreate(svp->sv_knconf, &svp->sv_addr, ci->cl_prog, 4347c478bd9Sstevel@tonic-gate ci->cl_vers, ci->cl_readsize, ci->cl_retrans, cr, &cp->ch_client); 4357c478bd9Sstevel@tonic-gate sigunintr(&smask); 4367c478bd9Sstevel@tonic-gate 4377c478bd9Sstevel@tonic-gate if (error != 0) { 4387c478bd9Sstevel@tonic-gate kmem_cache_free(chtab_cache, cp); 4397c478bd9Sstevel@tonic-gate #ifdef DEBUG 4401a5e258fSJosef 'Jeff' Sipek atomic_dec_64(&nfscl->nfscl_stat.clalloc.value.ui64); 4417c478bd9Sstevel@tonic-gate #endif 4427c478bd9Sstevel@tonic-gate /* 4437c478bd9Sstevel@tonic-gate * Warning is unnecessary if error is EINTR. 4447c478bd9Sstevel@tonic-gate */ 4457c478bd9Sstevel@tonic-gate if (error != EINTR) { 4467c478bd9Sstevel@tonic-gate nfs_cmn_err(error, CE_WARN, 4477c478bd9Sstevel@tonic-gate "clget: couldn't create handle: %m\n"); 4487c478bd9Sstevel@tonic-gate } 4497c478bd9Sstevel@tonic-gate return (error); 4507c478bd9Sstevel@tonic-gate } 4517c478bd9Sstevel@tonic-gate (void) CLNT_CONTROL(cp->ch_client, CLSET_PROGRESS, NULL); 4527c478bd9Sstevel@tonic-gate auth_destroy(cp->ch_client->cl_auth); 4537c478bd9Sstevel@tonic-gate error = sec_clnt_geth(cp->ch_client, svp->sv_secdata, cr, 4547c478bd9Sstevel@tonic-gate &cp->ch_client->cl_auth); 4557c478bd9Sstevel@tonic-gate if (error || cp->ch_client->cl_auth == NULL) { 4567c478bd9Sstevel@tonic-gate CLNT_DESTROY(cp->ch_client); 4577c478bd9Sstevel@tonic-gate kmem_cache_free(chtab_cache, cp); 4587c478bd9Sstevel@tonic-gate #ifdef DEBUG 4591a5e258fSJosef 'Jeff' Sipek atomic_dec_64(&nfscl->nfscl_stat.clalloc.value.ui64); 4607c478bd9Sstevel@tonic-gate #endif 4617c478bd9Sstevel@tonic-gate return ((error != 0) ? error : EINTR); 4627c478bd9Sstevel@tonic-gate } 4637c478bd9Sstevel@tonic-gate ch->ch_timesused++; 4647c478bd9Sstevel@tonic-gate *newcl = cp->ch_client; 4657c478bd9Sstevel@tonic-gate ASSERT(cp->ch_client->cl_nosignal == FALSE); 4667c478bd9Sstevel@tonic-gate *chp = cp; 4677c478bd9Sstevel@tonic-gate return (0); 4687c478bd9Sstevel@tonic-gate } 4697c478bd9Sstevel@tonic-gate 4707c478bd9Sstevel@tonic-gate int 4717c478bd9Sstevel@tonic-gate clget(clinfo_t *ci, servinfo_t *svp, cred_t *cr, CLIENT **newcl, 4727c478bd9Sstevel@tonic-gate struct chtab **chp) 4737c478bd9Sstevel@tonic-gate { 4747c478bd9Sstevel@tonic-gate struct nfs_clnt *nfscl; 4757c478bd9Sstevel@tonic-gate 476108322fbScarlsonj nfscl = zone_getspecific(nfsclnt_zone_key, nfs_zone()); 4777c478bd9Sstevel@tonic-gate ASSERT(nfscl != NULL); 4787c478bd9Sstevel@tonic-gate 4797c478bd9Sstevel@tonic-gate return (clget_impl(ci, svp, cr, newcl, chp, nfscl)); 4807c478bd9Sstevel@tonic-gate } 4817c478bd9Sstevel@tonic-gate 4827c478bd9Sstevel@tonic-gate static int 4837c478bd9Sstevel@tonic-gate acl_clget(mntinfo_t *mi, servinfo_t *svp, cred_t *cr, CLIENT **newcl, 4847c478bd9Sstevel@tonic-gate struct chtab **chp, struct nfs_clnt *nfscl) 4857c478bd9Sstevel@tonic-gate { 4867c478bd9Sstevel@tonic-gate clinfo_t ci; 4877c478bd9Sstevel@tonic-gate int error; 4887c478bd9Sstevel@tonic-gate 4897c478bd9Sstevel@tonic-gate /* 4907c478bd9Sstevel@tonic-gate * Set read buffer size to rsize 4917c478bd9Sstevel@tonic-gate * and add room for RPC headers. 4927c478bd9Sstevel@tonic-gate */ 4937c478bd9Sstevel@tonic-gate ci.cl_readsize = mi->mi_tsize; 4947c478bd9Sstevel@tonic-gate if (ci.cl_readsize != 0) 4957c478bd9Sstevel@tonic-gate ci.cl_readsize += (RPC_MAXDATASIZE - NFS_MAXDATA); 4967c478bd9Sstevel@tonic-gate 4977c478bd9Sstevel@tonic-gate /* 4987c478bd9Sstevel@tonic-gate * If soft mount and server is down just try once. 4997c478bd9Sstevel@tonic-gate * meaning: do not retransmit. 5007c478bd9Sstevel@tonic-gate */ 5017c478bd9Sstevel@tonic-gate if (!(mi->mi_flags & MI_HARD) && (mi->mi_flags & MI_DOWN)) 5027c478bd9Sstevel@tonic-gate ci.cl_retrans = 0; 5037c478bd9Sstevel@tonic-gate else 5047c478bd9Sstevel@tonic-gate ci.cl_retrans = mi->mi_retrans; 5057c478bd9Sstevel@tonic-gate 5067c478bd9Sstevel@tonic-gate ci.cl_prog = NFS_ACL_PROGRAM; 5077c478bd9Sstevel@tonic-gate ci.cl_vers = mi->mi_vers; 5087c478bd9Sstevel@tonic-gate ci.cl_flags = mi->mi_flags; 5097c478bd9Sstevel@tonic-gate 5107c478bd9Sstevel@tonic-gate /* 5117c478bd9Sstevel@tonic-gate * clget calls sec_clnt_geth() to get an auth handle. For RPCSEC_GSS 5127c478bd9Sstevel@tonic-gate * security flavor, the client tries to establish a security context 5137c478bd9Sstevel@tonic-gate * by contacting the server. If the connection is timed out or reset, 5147c478bd9Sstevel@tonic-gate * e.g. server reboot, we will try again. 5157c478bd9Sstevel@tonic-gate */ 5167c478bd9Sstevel@tonic-gate do { 5177c478bd9Sstevel@tonic-gate error = clget_impl(&ci, svp, cr, newcl, chp, nfscl); 5187c478bd9Sstevel@tonic-gate 5197c478bd9Sstevel@tonic-gate if (error == 0) 5207c478bd9Sstevel@tonic-gate break; 5217c478bd9Sstevel@tonic-gate 5227c478bd9Sstevel@tonic-gate /* 5237c478bd9Sstevel@tonic-gate * For forced unmount or zone shutdown, bail out, no retry. 5247c478bd9Sstevel@tonic-gate */ 5257c478bd9Sstevel@tonic-gate if (FS_OR_ZONE_GONE(mi->mi_vfsp)) { 5267c478bd9Sstevel@tonic-gate error = EIO; 5277c478bd9Sstevel@tonic-gate break; 5287c478bd9Sstevel@tonic-gate } 5297c478bd9Sstevel@tonic-gate 5307c478bd9Sstevel@tonic-gate /* do not retry for softmount */ 5317c478bd9Sstevel@tonic-gate if (!(mi->mi_flags & MI_HARD)) 5327c478bd9Sstevel@tonic-gate break; 5337c478bd9Sstevel@tonic-gate 5347c478bd9Sstevel@tonic-gate /* let the caller deal with the failover case */ 5357c478bd9Sstevel@tonic-gate if (FAILOVER_MOUNT(mi)) 5367c478bd9Sstevel@tonic-gate break; 5377c478bd9Sstevel@tonic-gate 5387c478bd9Sstevel@tonic-gate } while (error == ETIMEDOUT || error == ECONNRESET); 5397c478bd9Sstevel@tonic-gate 5407c478bd9Sstevel@tonic-gate return (error); 5417c478bd9Sstevel@tonic-gate } 5427c478bd9Sstevel@tonic-gate 5437c478bd9Sstevel@tonic-gate static int 5447c478bd9Sstevel@tonic-gate nfs_clget(mntinfo_t *mi, servinfo_t *svp, cred_t *cr, CLIENT **newcl, 5457c478bd9Sstevel@tonic-gate struct chtab **chp, struct nfs_clnt *nfscl) 5467c478bd9Sstevel@tonic-gate { 5477c478bd9Sstevel@tonic-gate clinfo_t ci; 5487c478bd9Sstevel@tonic-gate int error; 5497c478bd9Sstevel@tonic-gate 5507c478bd9Sstevel@tonic-gate /* 5517c478bd9Sstevel@tonic-gate * Set read buffer size to rsize 5527c478bd9Sstevel@tonic-gate * and add room for RPC headers. 5537c478bd9Sstevel@tonic-gate */ 5547c478bd9Sstevel@tonic-gate ci.cl_readsize = mi->mi_tsize; 5557c478bd9Sstevel@tonic-gate if (ci.cl_readsize != 0) 5567c478bd9Sstevel@tonic-gate ci.cl_readsize += (RPC_MAXDATASIZE - NFS_MAXDATA); 5577c478bd9Sstevel@tonic-gate 5587c478bd9Sstevel@tonic-gate /* 5597c478bd9Sstevel@tonic-gate * If soft mount and server is down just try once. 5607c478bd9Sstevel@tonic-gate * meaning: do not retransmit. 5617c478bd9Sstevel@tonic-gate */ 5627c478bd9Sstevel@tonic-gate if (!(mi->mi_flags & MI_HARD) && (mi->mi_flags & MI_DOWN)) 5637c478bd9Sstevel@tonic-gate ci.cl_retrans = 0; 5647c478bd9Sstevel@tonic-gate else 5657c478bd9Sstevel@tonic-gate ci.cl_retrans = mi->mi_retrans; 5667c478bd9Sstevel@tonic-gate 5677c478bd9Sstevel@tonic-gate ci.cl_prog = mi->mi_prog; 5687c478bd9Sstevel@tonic-gate ci.cl_vers = mi->mi_vers; 5697c478bd9Sstevel@tonic-gate ci.cl_flags = mi->mi_flags; 5707c478bd9Sstevel@tonic-gate 5717c478bd9Sstevel@tonic-gate /* 5727c478bd9Sstevel@tonic-gate * clget calls sec_clnt_geth() to get an auth handle. For RPCSEC_GSS 5737c478bd9Sstevel@tonic-gate * security flavor, the client tries to establish a security context 5747c478bd9Sstevel@tonic-gate * by contacting the server. If the connection is timed out or reset, 5757c478bd9Sstevel@tonic-gate * e.g. server reboot, we will try again. 5767c478bd9Sstevel@tonic-gate */ 5777c478bd9Sstevel@tonic-gate do { 5787c478bd9Sstevel@tonic-gate error = clget_impl(&ci, svp, cr, newcl, chp, nfscl); 5797c478bd9Sstevel@tonic-gate 5807c478bd9Sstevel@tonic-gate if (error == 0) 5817c478bd9Sstevel@tonic-gate break; 5827c478bd9Sstevel@tonic-gate 5837c478bd9Sstevel@tonic-gate /* 5847c478bd9Sstevel@tonic-gate * For forced unmount or zone shutdown, bail out, no retry. 5857c478bd9Sstevel@tonic-gate */ 5867c478bd9Sstevel@tonic-gate if (FS_OR_ZONE_GONE(mi->mi_vfsp)) { 5877c478bd9Sstevel@tonic-gate error = EIO; 5887c478bd9Sstevel@tonic-gate break; 5897c478bd9Sstevel@tonic-gate } 5907c478bd9Sstevel@tonic-gate 5917c478bd9Sstevel@tonic-gate /* do not retry for softmount */ 5927c478bd9Sstevel@tonic-gate if (!(mi->mi_flags & MI_HARD)) 5937c478bd9Sstevel@tonic-gate break; 5947c478bd9Sstevel@tonic-gate 5957c478bd9Sstevel@tonic-gate /* let the caller deal with the failover case */ 5967c478bd9Sstevel@tonic-gate if (FAILOVER_MOUNT(mi)) 5977c478bd9Sstevel@tonic-gate break; 5987c478bd9Sstevel@tonic-gate 5997c478bd9Sstevel@tonic-gate } while (error == ETIMEDOUT || error == ECONNRESET); 6007c478bd9Sstevel@tonic-gate 6017c478bd9Sstevel@tonic-gate return (error); 6027c478bd9Sstevel@tonic-gate } 6037c478bd9Sstevel@tonic-gate 6047c478bd9Sstevel@tonic-gate static void 6057c478bd9Sstevel@tonic-gate clfree_impl(CLIENT *cl, struct chtab *cp, struct nfs_clnt *nfscl) 6067c478bd9Sstevel@tonic-gate { 6077c478bd9Sstevel@tonic-gate if (cl->cl_auth != NULL) { 6087c478bd9Sstevel@tonic-gate sec_clnt_freeh(cl->cl_auth); 6097c478bd9Sstevel@tonic-gate cl->cl_auth = NULL; 6107c478bd9Sstevel@tonic-gate } 6117c478bd9Sstevel@tonic-gate 6127c478bd9Sstevel@tonic-gate /* 6137c478bd9Sstevel@tonic-gate * Timestamp this cache entry so that we know when it was last 6147c478bd9Sstevel@tonic-gate * used. 6157c478bd9Sstevel@tonic-gate */ 6167c478bd9Sstevel@tonic-gate cp->ch_freed = gethrestime_sec(); 6177c478bd9Sstevel@tonic-gate 6187c478bd9Sstevel@tonic-gate /* 6197c478bd9Sstevel@tonic-gate * Add the free client handle to the front of the list. 6207c478bd9Sstevel@tonic-gate * This way, the list will be sorted in youngest to oldest 6217c478bd9Sstevel@tonic-gate * order. 6227c478bd9Sstevel@tonic-gate */ 6237c478bd9Sstevel@tonic-gate mutex_enter(&nfscl->nfscl_chtable_lock); 6247c478bd9Sstevel@tonic-gate cp->ch_list = cp->ch_head->ch_list; 6257c478bd9Sstevel@tonic-gate cp->ch_head->ch_list = cp; 6267c478bd9Sstevel@tonic-gate mutex_exit(&nfscl->nfscl_chtable_lock); 6277c478bd9Sstevel@tonic-gate } 6287c478bd9Sstevel@tonic-gate 6297c478bd9Sstevel@tonic-gate void 6307c478bd9Sstevel@tonic-gate clfree(CLIENT *cl, struct chtab *cp) 6317c478bd9Sstevel@tonic-gate { 6327c478bd9Sstevel@tonic-gate struct nfs_clnt *nfscl; 6337c478bd9Sstevel@tonic-gate 634108322fbScarlsonj nfscl = zone_getspecific(nfsclnt_zone_key, nfs_zone()); 6357c478bd9Sstevel@tonic-gate ASSERT(nfscl != NULL); 6367c478bd9Sstevel@tonic-gate 6377c478bd9Sstevel@tonic-gate clfree_impl(cl, cp, nfscl); 6387c478bd9Sstevel@tonic-gate } 6397c478bd9Sstevel@tonic-gate 6407c478bd9Sstevel@tonic-gate #define CL_HOLDTIME 60 /* time to hold client handles */ 6417c478bd9Sstevel@tonic-gate 6427c478bd9Sstevel@tonic-gate static void 6437c478bd9Sstevel@tonic-gate clreclaim_zone(struct nfs_clnt *nfscl, uint_t cl_holdtime) 6447c478bd9Sstevel@tonic-gate { 6457c478bd9Sstevel@tonic-gate struct chhead *ch; 6467c478bd9Sstevel@tonic-gate struct chtab *cp; /* list of objects that can be reclaimed */ 6477c478bd9Sstevel@tonic-gate struct chtab *cpe; 6487c478bd9Sstevel@tonic-gate struct chtab *cpl; 6497c478bd9Sstevel@tonic-gate struct chtab **cpp; 6507c478bd9Sstevel@tonic-gate #ifdef DEBUG 6517c478bd9Sstevel@tonic-gate int n = 0; 6527c478bd9Sstevel@tonic-gate #endif 6537c478bd9Sstevel@tonic-gate 6547c478bd9Sstevel@tonic-gate /* 6557c478bd9Sstevel@tonic-gate * Need to reclaim some memory, so step through the cache 6567c478bd9Sstevel@tonic-gate * looking through the lists for entries which can be freed. 6577c478bd9Sstevel@tonic-gate */ 6587c478bd9Sstevel@tonic-gate cp = NULL; 6597c478bd9Sstevel@tonic-gate 6607c478bd9Sstevel@tonic-gate mutex_enter(&nfscl->nfscl_chtable_lock); 6617c478bd9Sstevel@tonic-gate 6627c478bd9Sstevel@tonic-gate /* 6637c478bd9Sstevel@tonic-gate * Here we step through each non-NULL quadruple and start to 6647c478bd9Sstevel@tonic-gate * construct the reclaim list pointed to by cp. Note that 6657c478bd9Sstevel@tonic-gate * cp will contain all eligible chtab entries. When this traversal 6667c478bd9Sstevel@tonic-gate * completes, chtab entries from the last quadruple will be at the 6677c478bd9Sstevel@tonic-gate * front of cp and entries from previously inspected quadruples have 6687c478bd9Sstevel@tonic-gate * been appended to the rear of cp. 6697c478bd9Sstevel@tonic-gate */ 6707c478bd9Sstevel@tonic-gate for (ch = nfscl->nfscl_chtable; ch != NULL; ch = ch->ch_next) { 6717c478bd9Sstevel@tonic-gate if (ch->ch_list == NULL) 6727c478bd9Sstevel@tonic-gate continue; 6737c478bd9Sstevel@tonic-gate /* 6747c478bd9Sstevel@tonic-gate * Search each list for entries older then 6757c478bd9Sstevel@tonic-gate * cl_holdtime seconds. The lists are maintained 6767c478bd9Sstevel@tonic-gate * in youngest to oldest order so that when the 6777c478bd9Sstevel@tonic-gate * first entry is found which is old enough, then 6787c478bd9Sstevel@tonic-gate * all of the rest of the entries on the list will 6797c478bd9Sstevel@tonic-gate * be old enough as well. 6807c478bd9Sstevel@tonic-gate */ 6817c478bd9Sstevel@tonic-gate cpl = ch->ch_list; 6827c478bd9Sstevel@tonic-gate cpp = &ch->ch_list; 6837c478bd9Sstevel@tonic-gate while (cpl != NULL && 6847106075aSmarks cpl->ch_freed + cl_holdtime > gethrestime_sec()) { 6857c478bd9Sstevel@tonic-gate cpp = &cpl->ch_list; 6867c478bd9Sstevel@tonic-gate cpl = cpl->ch_list; 6877c478bd9Sstevel@tonic-gate } 6887c478bd9Sstevel@tonic-gate if (cpl != NULL) { 6897c478bd9Sstevel@tonic-gate *cpp = NULL; 6907c478bd9Sstevel@tonic-gate if (cp != NULL) { 6917c478bd9Sstevel@tonic-gate cpe = cpl; 6927c478bd9Sstevel@tonic-gate while (cpe->ch_list != NULL) 6937c478bd9Sstevel@tonic-gate cpe = cpe->ch_list; 6947c478bd9Sstevel@tonic-gate cpe->ch_list = cp; 6957c478bd9Sstevel@tonic-gate } 6967c478bd9Sstevel@tonic-gate cp = cpl; 6977c478bd9Sstevel@tonic-gate } 6987c478bd9Sstevel@tonic-gate } 6997c478bd9Sstevel@tonic-gate 7007c478bd9Sstevel@tonic-gate mutex_exit(&nfscl->nfscl_chtable_lock); 7017c478bd9Sstevel@tonic-gate 7027c478bd9Sstevel@tonic-gate /* 7037c478bd9Sstevel@tonic-gate * If cp is empty, then there is nothing to reclaim here. 7047c478bd9Sstevel@tonic-gate */ 7057c478bd9Sstevel@tonic-gate if (cp == NULL) 7067c478bd9Sstevel@tonic-gate return; 7077c478bd9Sstevel@tonic-gate 7087c478bd9Sstevel@tonic-gate /* 7097c478bd9Sstevel@tonic-gate * Step through the list of entries to free, destroying each client 7107c478bd9Sstevel@tonic-gate * handle and kmem_free'ing the memory for each entry. 7117c478bd9Sstevel@tonic-gate */ 7127c478bd9Sstevel@tonic-gate while (cp != NULL) { 7137c478bd9Sstevel@tonic-gate #ifdef DEBUG 7147c478bd9Sstevel@tonic-gate n++; 7157c478bd9Sstevel@tonic-gate #endif 7167c478bd9Sstevel@tonic-gate CLNT_DESTROY(cp->ch_client); 7177c478bd9Sstevel@tonic-gate cpl = cp->ch_list; 7187c478bd9Sstevel@tonic-gate kmem_cache_free(chtab_cache, cp); 7197c478bd9Sstevel@tonic-gate cp = cpl; 7207c478bd9Sstevel@tonic-gate } 7217c478bd9Sstevel@tonic-gate 7227c478bd9Sstevel@tonic-gate #ifdef DEBUG 7237c478bd9Sstevel@tonic-gate /* 7247c478bd9Sstevel@tonic-gate * Update clalloc so that nfsstat shows the current number 7257c478bd9Sstevel@tonic-gate * of allocated client handles. 7267c478bd9Sstevel@tonic-gate */ 7277c478bd9Sstevel@tonic-gate atomic_add_64(&nfscl->nfscl_stat.clalloc.value.ui64, -n); 7287c478bd9Sstevel@tonic-gate #endif 7297c478bd9Sstevel@tonic-gate } 7307c478bd9Sstevel@tonic-gate 7317c478bd9Sstevel@tonic-gate /* ARGSUSED */ 7327c478bd9Sstevel@tonic-gate static void 7337c478bd9Sstevel@tonic-gate clreclaim(void *all) 7347c478bd9Sstevel@tonic-gate { 7357c478bd9Sstevel@tonic-gate struct nfs_clnt *nfscl; 7367c478bd9Sstevel@tonic-gate 7377c478bd9Sstevel@tonic-gate #ifdef DEBUG 7387c478bd9Sstevel@tonic-gate clstat_debug.clreclaim.value.ui64++; 7397c478bd9Sstevel@tonic-gate #endif 7407c478bd9Sstevel@tonic-gate /* 7417c478bd9Sstevel@tonic-gate * The system is low on memory; go through and try to reclaim some from 7427c478bd9Sstevel@tonic-gate * every zone on the system. 7437c478bd9Sstevel@tonic-gate */ 7447c478bd9Sstevel@tonic-gate mutex_enter(&nfs_clnt_list_lock); 7457c478bd9Sstevel@tonic-gate nfscl = list_head(&nfs_clnt_list); 7467c478bd9Sstevel@tonic-gate for (; nfscl != NULL; nfscl = list_next(&nfs_clnt_list, nfscl)) 7477c478bd9Sstevel@tonic-gate clreclaim_zone(nfscl, CL_HOLDTIME); 7487c478bd9Sstevel@tonic-gate mutex_exit(&nfs_clnt_list_lock); 7497c478bd9Sstevel@tonic-gate } 7507c478bd9Sstevel@tonic-gate 7517c478bd9Sstevel@tonic-gate /* 7527c478bd9Sstevel@tonic-gate * Minimum time-out values indexed by call type 7537c478bd9Sstevel@tonic-gate * These units are in "eights" of a second to avoid multiplies 7547c478bd9Sstevel@tonic-gate */ 7557c478bd9Sstevel@tonic-gate static unsigned int minimum_timeo[] = { 7567c478bd9Sstevel@tonic-gate 6, 7, 10 7577c478bd9Sstevel@tonic-gate }; 7587c478bd9Sstevel@tonic-gate 7597c478bd9Sstevel@tonic-gate /* 7607c478bd9Sstevel@tonic-gate * Back off for retransmission timeout, MAXTIMO is in hz of a sec 7617c478bd9Sstevel@tonic-gate */ 7627c478bd9Sstevel@tonic-gate #define MAXTIMO (20*hz) 7637c478bd9Sstevel@tonic-gate #define backoff(tim) (((tim) < MAXTIMO) ? dobackoff(tim) : (tim)) 7647c478bd9Sstevel@tonic-gate #define dobackoff(tim) ((((tim) << 1) > MAXTIMO) ? MAXTIMO : ((tim) << 1)) 7657c478bd9Sstevel@tonic-gate 7667c478bd9Sstevel@tonic-gate #define MIN_NFS_TSIZE 512 /* minimum "chunk" of NFS IO */ 7677c478bd9Sstevel@tonic-gate #define REDUCE_NFS_TIME (hz/2) /* rtxcur we try to keep under */ 7687c478bd9Sstevel@tonic-gate #define INCREASE_NFS_TIME (hz/3*8) /* srtt we try to keep under (scaled*8) */ 7697c478bd9Sstevel@tonic-gate 7707c478bd9Sstevel@tonic-gate /* 7717c478bd9Sstevel@tonic-gate * Function called when rfscall notices that we have been 7727c478bd9Sstevel@tonic-gate * re-transmitting, or when we get a response without retransmissions. 7737c478bd9Sstevel@tonic-gate * Return 1 if the transfer size was adjusted down - 0 if no change. 7747c478bd9Sstevel@tonic-gate */ 7757c478bd9Sstevel@tonic-gate static int 7767c478bd9Sstevel@tonic-gate nfs_feedback(int flag, int which, mntinfo_t *mi) 7777c478bd9Sstevel@tonic-gate { 7787c478bd9Sstevel@tonic-gate int kind; 7797c478bd9Sstevel@tonic-gate int r = 0; 7807c478bd9Sstevel@tonic-gate 7817c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 7827c478bd9Sstevel@tonic-gate if (flag == FEEDBACK_REXMIT1) { 7837c478bd9Sstevel@tonic-gate if (mi->mi_timers[NFS_CALLTYPES].rt_rtxcur != 0 && 7847c478bd9Sstevel@tonic-gate mi->mi_timers[NFS_CALLTYPES].rt_rtxcur < REDUCE_NFS_TIME) 7857c478bd9Sstevel@tonic-gate goto done; 7867c478bd9Sstevel@tonic-gate if (mi->mi_curread > MIN_NFS_TSIZE) { 7877c478bd9Sstevel@tonic-gate mi->mi_curread /= 2; 7887c478bd9Sstevel@tonic-gate if (mi->mi_curread < MIN_NFS_TSIZE) 7897c478bd9Sstevel@tonic-gate mi->mi_curread = MIN_NFS_TSIZE; 7907c478bd9Sstevel@tonic-gate r = 1; 7917c478bd9Sstevel@tonic-gate } 7927c478bd9Sstevel@tonic-gate 7937c478bd9Sstevel@tonic-gate if (mi->mi_curwrite > MIN_NFS_TSIZE) { 7947c478bd9Sstevel@tonic-gate mi->mi_curwrite /= 2; 7957c478bd9Sstevel@tonic-gate if (mi->mi_curwrite < MIN_NFS_TSIZE) 7967c478bd9Sstevel@tonic-gate mi->mi_curwrite = MIN_NFS_TSIZE; 7977c478bd9Sstevel@tonic-gate r = 1; 7987c478bd9Sstevel@tonic-gate } 7997c478bd9Sstevel@tonic-gate } else if (flag == FEEDBACK_OK) { 8007c478bd9Sstevel@tonic-gate kind = mi->mi_timer_type[which]; 8017c478bd9Sstevel@tonic-gate if (kind == 0 || 8027c478bd9Sstevel@tonic-gate mi->mi_timers[kind].rt_srtt >= INCREASE_NFS_TIME) 8037c478bd9Sstevel@tonic-gate goto done; 8047c478bd9Sstevel@tonic-gate if (kind == 1) { 8057c478bd9Sstevel@tonic-gate if (mi->mi_curread >= mi->mi_tsize) 8067c478bd9Sstevel@tonic-gate goto done; 8077c478bd9Sstevel@tonic-gate mi->mi_curread += MIN_NFS_TSIZE; 8087c478bd9Sstevel@tonic-gate if (mi->mi_curread > mi->mi_tsize/2) 8097c478bd9Sstevel@tonic-gate mi->mi_curread = mi->mi_tsize; 8107c478bd9Sstevel@tonic-gate } else if (kind == 2) { 8117c478bd9Sstevel@tonic-gate if (mi->mi_curwrite >= mi->mi_stsize) 8127c478bd9Sstevel@tonic-gate goto done; 8137c478bd9Sstevel@tonic-gate mi->mi_curwrite += MIN_NFS_TSIZE; 8147c478bd9Sstevel@tonic-gate if (mi->mi_curwrite > mi->mi_stsize/2) 8157c478bd9Sstevel@tonic-gate mi->mi_curwrite = mi->mi_stsize; 8167c478bd9Sstevel@tonic-gate } 8177c478bd9Sstevel@tonic-gate } 8187c478bd9Sstevel@tonic-gate done: 8197c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 8207c478bd9Sstevel@tonic-gate return (r); 8217c478bd9Sstevel@tonic-gate } 8227c478bd9Sstevel@tonic-gate 8237c478bd9Sstevel@tonic-gate #ifdef DEBUG 8247c478bd9Sstevel@tonic-gate static int rfs2call_hits = 0; 8257c478bd9Sstevel@tonic-gate static int rfs2call_misses = 0; 8267c478bd9Sstevel@tonic-gate #endif 8277c478bd9Sstevel@tonic-gate 8287c478bd9Sstevel@tonic-gate int 8297c478bd9Sstevel@tonic-gate rfs2call(mntinfo_t *mi, rpcproc_t which, xdrproc_t xdrargs, caddr_t argsp, 8307c478bd9Sstevel@tonic-gate xdrproc_t xdrres, caddr_t resp, cred_t *cr, int *douprintf, 8317c478bd9Sstevel@tonic-gate enum nfsstat *statusp, int flags, failinfo_t *fi) 8327c478bd9Sstevel@tonic-gate { 8337c478bd9Sstevel@tonic-gate int rpcerror; 8347c478bd9Sstevel@tonic-gate enum clnt_stat rpc_status; 8357c478bd9Sstevel@tonic-gate 8367c478bd9Sstevel@tonic-gate ASSERT(statusp != NULL); 8377c478bd9Sstevel@tonic-gate 8387c478bd9Sstevel@tonic-gate rpcerror = rfscall(mi, which, xdrargs, argsp, xdrres, resp, 8397c478bd9Sstevel@tonic-gate cr, douprintf, &rpc_status, flags, fi); 8407c478bd9Sstevel@tonic-gate if (!rpcerror) { 8417c478bd9Sstevel@tonic-gate /* 8427c478bd9Sstevel@tonic-gate * See crnetadjust() for comments. 8437c478bd9Sstevel@tonic-gate */ 8447c478bd9Sstevel@tonic-gate if (*statusp == NFSERR_ACCES && 8457c478bd9Sstevel@tonic-gate (cr = crnetadjust(cr)) != NULL) { 8467c478bd9Sstevel@tonic-gate #ifdef DEBUG 8477c478bd9Sstevel@tonic-gate rfs2call_hits++; 8487c478bd9Sstevel@tonic-gate #endif 8497c478bd9Sstevel@tonic-gate rpcerror = rfscall(mi, which, xdrargs, argsp, xdrres, 8507c478bd9Sstevel@tonic-gate resp, cr, douprintf, NULL, flags, fi); 8517c478bd9Sstevel@tonic-gate crfree(cr); 8527c478bd9Sstevel@tonic-gate #ifdef DEBUG 8537c478bd9Sstevel@tonic-gate if (*statusp == NFSERR_ACCES) 8547c478bd9Sstevel@tonic-gate rfs2call_misses++; 8557c478bd9Sstevel@tonic-gate #endif 8567c478bd9Sstevel@tonic-gate } 8577c478bd9Sstevel@tonic-gate } else if (rpc_status == RPC_PROCUNAVAIL) { 8587c478bd9Sstevel@tonic-gate *statusp = NFSERR_OPNOTSUPP; 8597c478bd9Sstevel@tonic-gate rpcerror = 0; 8607c478bd9Sstevel@tonic-gate } 8617c478bd9Sstevel@tonic-gate 8627c478bd9Sstevel@tonic-gate return (rpcerror); 8637c478bd9Sstevel@tonic-gate } 8647c478bd9Sstevel@tonic-gate 8657c478bd9Sstevel@tonic-gate #define NFS3_JUKEBOX_DELAY 10 * hz 8667c478bd9Sstevel@tonic-gate 8677c478bd9Sstevel@tonic-gate static clock_t nfs3_jukebox_delay = 0; 8687c478bd9Sstevel@tonic-gate 8697c478bd9Sstevel@tonic-gate #ifdef DEBUG 8707c478bd9Sstevel@tonic-gate static int rfs3call_hits = 0; 8717c478bd9Sstevel@tonic-gate static int rfs3call_misses = 0; 8727c478bd9Sstevel@tonic-gate #endif 8737c478bd9Sstevel@tonic-gate 8747c478bd9Sstevel@tonic-gate int 8757c478bd9Sstevel@tonic-gate rfs3call(mntinfo_t *mi, rpcproc_t which, xdrproc_t xdrargs, caddr_t argsp, 8767c478bd9Sstevel@tonic-gate xdrproc_t xdrres, caddr_t resp, cred_t *cr, int *douprintf, 8777c478bd9Sstevel@tonic-gate nfsstat3 *statusp, int flags, failinfo_t *fi) 8787c478bd9Sstevel@tonic-gate { 8797c478bd9Sstevel@tonic-gate int rpcerror; 8807c478bd9Sstevel@tonic-gate int user_informed; 8817c478bd9Sstevel@tonic-gate 8827c478bd9Sstevel@tonic-gate user_informed = 0; 8837c478bd9Sstevel@tonic-gate do { 8847c478bd9Sstevel@tonic-gate rpcerror = rfscall(mi, which, xdrargs, argsp, xdrres, resp, 8857c478bd9Sstevel@tonic-gate cr, douprintf, NULL, flags, fi); 8867c478bd9Sstevel@tonic-gate if (!rpcerror) { 8877c478bd9Sstevel@tonic-gate cred_t *crr; 8887c478bd9Sstevel@tonic-gate if (*statusp == NFS3ERR_JUKEBOX) { 8897c478bd9Sstevel@tonic-gate if (ttoproc(curthread) == &p0) { 8907c478bd9Sstevel@tonic-gate rpcerror = EAGAIN; 8917c478bd9Sstevel@tonic-gate break; 8927c478bd9Sstevel@tonic-gate } 8937c478bd9Sstevel@tonic-gate if (!user_informed) { 8947c478bd9Sstevel@tonic-gate user_informed = 1; 8957c478bd9Sstevel@tonic-gate uprintf( 8967c478bd9Sstevel@tonic-gate "file temporarily unavailable on the server, retrying...\n"); 8977c478bd9Sstevel@tonic-gate } 8987c478bd9Sstevel@tonic-gate delay(nfs3_jukebox_delay); 8997c478bd9Sstevel@tonic-gate } 9007c478bd9Sstevel@tonic-gate /* 9017c478bd9Sstevel@tonic-gate * See crnetadjust() for comments. 9027c478bd9Sstevel@tonic-gate */ 9037c478bd9Sstevel@tonic-gate else if (*statusp == NFS3ERR_ACCES && 9047c478bd9Sstevel@tonic-gate (crr = crnetadjust(cr)) != NULL) { 9057c478bd9Sstevel@tonic-gate #ifdef DEBUG 9067c478bd9Sstevel@tonic-gate rfs3call_hits++; 9077c478bd9Sstevel@tonic-gate #endif 9087c478bd9Sstevel@tonic-gate rpcerror = rfscall(mi, which, xdrargs, argsp, 9097c478bd9Sstevel@tonic-gate xdrres, resp, crr, douprintf, 9107c478bd9Sstevel@tonic-gate NULL, flags, fi); 9117c478bd9Sstevel@tonic-gate 9127c478bd9Sstevel@tonic-gate crfree(crr); 9137c478bd9Sstevel@tonic-gate #ifdef DEBUG 9147c478bd9Sstevel@tonic-gate if (*statusp == NFS3ERR_ACCES) 9157c478bd9Sstevel@tonic-gate rfs3call_misses++; 9167c478bd9Sstevel@tonic-gate #endif 9177c478bd9Sstevel@tonic-gate } 9187c478bd9Sstevel@tonic-gate } 9197c478bd9Sstevel@tonic-gate } while (!rpcerror && *statusp == NFS3ERR_JUKEBOX); 9207c478bd9Sstevel@tonic-gate 9217c478bd9Sstevel@tonic-gate return (rpcerror); 9227c478bd9Sstevel@tonic-gate } 9237c478bd9Sstevel@tonic-gate 9247c478bd9Sstevel@tonic-gate #define VALID_FH(fi) (VTOR(fi->vp)->r_server == VTOMI(fi->vp)->mi_curr_serv) 9257c478bd9Sstevel@tonic-gate #define INC_READERS(mi) { \ 9267c478bd9Sstevel@tonic-gate mi->mi_readers++; \ 9277c478bd9Sstevel@tonic-gate } 9287c478bd9Sstevel@tonic-gate #define DEC_READERS(mi) { \ 9297c478bd9Sstevel@tonic-gate mi->mi_readers--; \ 9307c478bd9Sstevel@tonic-gate if (mi->mi_readers == 0) \ 9317c478bd9Sstevel@tonic-gate cv_broadcast(&mi->mi_failover_cv); \ 9327c478bd9Sstevel@tonic-gate } 9337c478bd9Sstevel@tonic-gate 9347c478bd9Sstevel@tonic-gate static int 9357c478bd9Sstevel@tonic-gate rfscall(mntinfo_t *mi, rpcproc_t which, xdrproc_t xdrargs, caddr_t argsp, 93645916cd2Sjpk xdrproc_t xdrres, caddr_t resp, cred_t *icr, int *douprintf, 9377c478bd9Sstevel@tonic-gate enum clnt_stat *rpc_status, int flags, failinfo_t *fi) 9387c478bd9Sstevel@tonic-gate { 9397c478bd9Sstevel@tonic-gate CLIENT *client; 9407c478bd9Sstevel@tonic-gate struct chtab *ch; 94145916cd2Sjpk cred_t *cr = icr; 9427c478bd9Sstevel@tonic-gate enum clnt_stat status; 943e280ed37SDai Ngo struct rpc_err rpcerr, rpcerr_tmp; 9447c478bd9Sstevel@tonic-gate struct timeval wait; 9457c478bd9Sstevel@tonic-gate int timeo; /* in units of hz */ 9467c478bd9Sstevel@tonic-gate int my_rsize, my_wsize; 9477c478bd9Sstevel@tonic-gate bool_t tryagain; 94845916cd2Sjpk bool_t cred_cloned = FALSE; 9497c478bd9Sstevel@tonic-gate k_sigset_t smask; 9507c478bd9Sstevel@tonic-gate servinfo_t *svp; 9517c478bd9Sstevel@tonic-gate struct nfs_clnt *nfscl; 9527c478bd9Sstevel@tonic-gate zoneid_t zoneid = getzoneid(); 953e280ed37SDai Ngo char *msg; 9547c478bd9Sstevel@tonic-gate #ifdef DEBUG 9557c478bd9Sstevel@tonic-gate char *bufp; 9567c478bd9Sstevel@tonic-gate #endif 9577c478bd9Sstevel@tonic-gate 9587c478bd9Sstevel@tonic-gate 9597c478bd9Sstevel@tonic-gate TRACE_2(TR_FAC_NFS, TR_RFSCALL_START, 9607106075aSmarks "rfscall_start:which %d mi %p", which, mi); 9617c478bd9Sstevel@tonic-gate 962108322fbScarlsonj nfscl = zone_getspecific(nfsclnt_zone_key, nfs_zone()); 9637c478bd9Sstevel@tonic-gate ASSERT(nfscl != NULL); 9647c478bd9Sstevel@tonic-gate 9657c478bd9Sstevel@tonic-gate nfscl->nfscl_stat.calls.value.ui64++; 9667c478bd9Sstevel@tonic-gate mi->mi_reqs[which].value.ui64++; 9677c478bd9Sstevel@tonic-gate 9687c478bd9Sstevel@tonic-gate rpcerr.re_status = RPC_SUCCESS; 9697c478bd9Sstevel@tonic-gate 9707c478bd9Sstevel@tonic-gate /* 9717c478bd9Sstevel@tonic-gate * In case of forced unmount or zone shutdown, return EIO. 9727c478bd9Sstevel@tonic-gate */ 9737c478bd9Sstevel@tonic-gate 9747c478bd9Sstevel@tonic-gate if (FS_OR_ZONE_GONE(mi->mi_vfsp)) { 9757c478bd9Sstevel@tonic-gate rpcerr.re_status = RPC_FAILED; 9767c478bd9Sstevel@tonic-gate rpcerr.re_errno = EIO; 9777c478bd9Sstevel@tonic-gate return (rpcerr.re_errno); 9787c478bd9Sstevel@tonic-gate } 9797c478bd9Sstevel@tonic-gate 9807c478bd9Sstevel@tonic-gate /* 9817c478bd9Sstevel@tonic-gate * Remember the transfer sizes in case 9827c478bd9Sstevel@tonic-gate * nfs_feedback changes them underneath us. 9837c478bd9Sstevel@tonic-gate */ 9847c478bd9Sstevel@tonic-gate my_rsize = mi->mi_curread; 9857c478bd9Sstevel@tonic-gate my_wsize = mi->mi_curwrite; 9867c478bd9Sstevel@tonic-gate 9877c478bd9Sstevel@tonic-gate /* 9887c478bd9Sstevel@tonic-gate * NFS client failover support 9897c478bd9Sstevel@tonic-gate * 9907c478bd9Sstevel@tonic-gate * If this rnode is not in sync with the current server (VALID_FH), 9917c478bd9Sstevel@tonic-gate * we'd like to do a remap to get in sync. We can be interrupted 9927c478bd9Sstevel@tonic-gate * in failover_remap(), and if so we'll bail. Otherwise, we'll 9937c478bd9Sstevel@tonic-gate * use the best info we have to try the RPC. Part of that is 9947c478bd9Sstevel@tonic-gate * unconditionally updating the filehandle copy kept for V3. 9957c478bd9Sstevel@tonic-gate * 9967c478bd9Sstevel@tonic-gate * Locking: INC_READERS/DEC_READERS is a poor man's interrruptible 9977c478bd9Sstevel@tonic-gate * rw_enter(); we're trying to keep the current server from being 9987c478bd9Sstevel@tonic-gate * changed on us until we're done with the remapping and have a 9997c478bd9Sstevel@tonic-gate * matching client handle. We don't want to sending a filehandle 10007c478bd9Sstevel@tonic-gate * to the wrong host. 10017c478bd9Sstevel@tonic-gate */ 10027c478bd9Sstevel@tonic-gate failoverretry: 10037c478bd9Sstevel@tonic-gate if (FAILOVER_MOUNT(mi)) { 10047c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 10057c478bd9Sstevel@tonic-gate if (!(flags & RFSCALL_SOFT) && failover_safe(fi)) { 10067c478bd9Sstevel@tonic-gate if (failover_wait(mi)) { 10077c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 10087c478bd9Sstevel@tonic-gate return (EINTR); 10097c478bd9Sstevel@tonic-gate } 10107c478bd9Sstevel@tonic-gate } 10117c478bd9Sstevel@tonic-gate INC_READERS(mi); 10127c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 10137c478bd9Sstevel@tonic-gate if (fi) { 10147c478bd9Sstevel@tonic-gate if (!VALID_FH(fi) && 10157c478bd9Sstevel@tonic-gate !(flags & RFSCALL_SOFT) && failover_safe(fi)) { 10167c478bd9Sstevel@tonic-gate int remaperr; 10177c478bd9Sstevel@tonic-gate 10187c478bd9Sstevel@tonic-gate svp = mi->mi_curr_serv; 10197c478bd9Sstevel@tonic-gate remaperr = failover_remap(fi); 10207c478bd9Sstevel@tonic-gate if (remaperr != 0) { 10217c478bd9Sstevel@tonic-gate #ifdef DEBUG 10227c478bd9Sstevel@tonic-gate if (remaperr != EINTR) 10237c478bd9Sstevel@tonic-gate nfs_cmn_err(remaperr, CE_WARN, 10247c478bd9Sstevel@tonic-gate "rfscall couldn't failover: %m"); 10257c478bd9Sstevel@tonic-gate #endif 10267c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 10277c478bd9Sstevel@tonic-gate DEC_READERS(mi); 10287c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 10297c478bd9Sstevel@tonic-gate /* 10307c478bd9Sstevel@tonic-gate * If failover_remap returns ETIMEDOUT 10317c478bd9Sstevel@tonic-gate * and the filesystem is hard mounted 10327c478bd9Sstevel@tonic-gate * we have to retry the call with a new 10337c478bd9Sstevel@tonic-gate * server. 10347c478bd9Sstevel@tonic-gate */ 10357c478bd9Sstevel@tonic-gate if ((mi->mi_flags & MI_HARD) && 10367c478bd9Sstevel@tonic-gate IS_RECOVERABLE_ERROR(remaperr)) { 10377c478bd9Sstevel@tonic-gate if (svp == mi->mi_curr_serv) 10387c478bd9Sstevel@tonic-gate failover_newserver(mi); 10397c478bd9Sstevel@tonic-gate rpcerr.re_status = RPC_SUCCESS; 10407c478bd9Sstevel@tonic-gate goto failoverretry; 10417c478bd9Sstevel@tonic-gate } 10427c478bd9Sstevel@tonic-gate rpcerr.re_errno = remaperr; 10437c478bd9Sstevel@tonic-gate return (remaperr); 10447c478bd9Sstevel@tonic-gate } 10457c478bd9Sstevel@tonic-gate } 10467c478bd9Sstevel@tonic-gate if (fi->fhp && fi->copyproc) 10477c478bd9Sstevel@tonic-gate (*fi->copyproc)(fi->fhp, fi->vp); 10487c478bd9Sstevel@tonic-gate } 10497c478bd9Sstevel@tonic-gate } 10507c478bd9Sstevel@tonic-gate 105145916cd2Sjpk /* For TSOL, use a new cred which has net_mac_aware flag */ 105245916cd2Sjpk if (!cred_cloned && is_system_labeled()) { 105345916cd2Sjpk cred_cloned = TRUE; 105445916cd2Sjpk cr = crdup(icr); 105545916cd2Sjpk (void) setpflags(NET_MAC_AWARE, 1, cr); 105645916cd2Sjpk } 105745916cd2Sjpk 10587c478bd9Sstevel@tonic-gate /* 10597c478bd9Sstevel@tonic-gate * clget() calls clnt_tli_kinit() which clears the xid, so we 10607c478bd9Sstevel@tonic-gate * are guaranteed to reprocess the retry as a new request. 10617c478bd9Sstevel@tonic-gate */ 10627c478bd9Sstevel@tonic-gate svp = mi->mi_curr_serv; 10637c478bd9Sstevel@tonic-gate rpcerr.re_errno = nfs_clget(mi, svp, cr, &client, &ch, nfscl); 10647c478bd9Sstevel@tonic-gate 10657c478bd9Sstevel@tonic-gate if (FAILOVER_MOUNT(mi)) { 10667c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 10677c478bd9Sstevel@tonic-gate DEC_READERS(mi); 10687c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 10697c478bd9Sstevel@tonic-gate 10707c478bd9Sstevel@tonic-gate if ((rpcerr.re_errno == ETIMEDOUT || 10717106075aSmarks rpcerr.re_errno == ECONNRESET) && 10727106075aSmarks failover_safe(fi)) { 10737c478bd9Sstevel@tonic-gate if (svp == mi->mi_curr_serv) 10747c478bd9Sstevel@tonic-gate failover_newserver(mi); 10757c478bd9Sstevel@tonic-gate goto failoverretry; 10767c478bd9Sstevel@tonic-gate } 10777c478bd9Sstevel@tonic-gate } 10787c478bd9Sstevel@tonic-gate if (rpcerr.re_errno != 0) 10797c478bd9Sstevel@tonic-gate return (rpcerr.re_errno); 10807c478bd9Sstevel@tonic-gate 10817c478bd9Sstevel@tonic-gate if (svp->sv_knconf->knc_semantics == NC_TPI_COTS_ORD || 10827c478bd9Sstevel@tonic-gate svp->sv_knconf->knc_semantics == NC_TPI_COTS) { 10837c478bd9Sstevel@tonic-gate timeo = (mi->mi_timeo * hz) / 10; 10847c478bd9Sstevel@tonic-gate } else { 10857c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 10867c478bd9Sstevel@tonic-gate timeo = CLNT_SETTIMERS(client, 10877c478bd9Sstevel@tonic-gate &(mi->mi_timers[mi->mi_timer_type[which]]), 10887c478bd9Sstevel@tonic-gate &(mi->mi_timers[NFS_CALLTYPES]), 10897c478bd9Sstevel@tonic-gate (minimum_timeo[mi->mi_call_type[which]]*hz)>>3, 10907c478bd9Sstevel@tonic-gate (void (*)())NULL, (caddr_t)mi, 0); 10917c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 10927c478bd9Sstevel@tonic-gate } 10937c478bd9Sstevel@tonic-gate 10947c478bd9Sstevel@tonic-gate /* 10957c478bd9Sstevel@tonic-gate * If hard mounted fs, retry call forever unless hard error occurs. 10967c478bd9Sstevel@tonic-gate */ 10977c478bd9Sstevel@tonic-gate do { 10987c478bd9Sstevel@tonic-gate tryagain = FALSE; 10997c478bd9Sstevel@tonic-gate 11007c478bd9Sstevel@tonic-gate if (FS_OR_ZONE_GONE(mi->mi_vfsp)) { 11017c478bd9Sstevel@tonic-gate status = RPC_FAILED; 11027c478bd9Sstevel@tonic-gate rpcerr.re_status = RPC_FAILED; 11037c478bd9Sstevel@tonic-gate rpcerr.re_errno = EIO; 11047c478bd9Sstevel@tonic-gate break; 11057c478bd9Sstevel@tonic-gate } 11067c478bd9Sstevel@tonic-gate 11077c478bd9Sstevel@tonic-gate TICK_TO_TIMEVAL(timeo, &wait); 11087c478bd9Sstevel@tonic-gate 11097c478bd9Sstevel@tonic-gate /* 11107c478bd9Sstevel@tonic-gate * Mask out all signals except SIGHUP, SIGINT, SIGQUIT 11117c478bd9Sstevel@tonic-gate * and SIGTERM. (Preserving the existing masks). 11127c478bd9Sstevel@tonic-gate * Mask out SIGINT if mount option nointr is specified. 11137c478bd9Sstevel@tonic-gate */ 11147c478bd9Sstevel@tonic-gate sigintr(&smask, (int)mi->mi_flags & MI_INT); 11157c478bd9Sstevel@tonic-gate if (!(mi->mi_flags & MI_INT)) 11167c478bd9Sstevel@tonic-gate client->cl_nosignal = TRUE; 11177c478bd9Sstevel@tonic-gate 11187c478bd9Sstevel@tonic-gate /* 11197c478bd9Sstevel@tonic-gate * If there is a current signal, then don't bother 11207c478bd9Sstevel@tonic-gate * even trying to send out the request because we 11217c478bd9Sstevel@tonic-gate * won't be able to block waiting for the response. 11227c478bd9Sstevel@tonic-gate * Simply assume RPC_INTR and get on with it. 11237c478bd9Sstevel@tonic-gate */ 11247c478bd9Sstevel@tonic-gate if (ttolwp(curthread) != NULL && ISSIG(curthread, JUSTLOOKING)) 11257c478bd9Sstevel@tonic-gate status = RPC_INTR; 11267c478bd9Sstevel@tonic-gate else { 11277c478bd9Sstevel@tonic-gate status = CLNT_CALL(client, which, xdrargs, argsp, 11287c478bd9Sstevel@tonic-gate xdrres, resp, wait); 11297c478bd9Sstevel@tonic-gate } 11307c478bd9Sstevel@tonic-gate 11317c478bd9Sstevel@tonic-gate if (!(mi->mi_flags & MI_INT)) 11327c478bd9Sstevel@tonic-gate client->cl_nosignal = FALSE; 11337c478bd9Sstevel@tonic-gate /* 11347c478bd9Sstevel@tonic-gate * restore original signal mask 11357c478bd9Sstevel@tonic-gate */ 11367c478bd9Sstevel@tonic-gate sigunintr(&smask); 11377c478bd9Sstevel@tonic-gate 11387c478bd9Sstevel@tonic-gate switch (status) { 11397c478bd9Sstevel@tonic-gate case RPC_SUCCESS: 11407c478bd9Sstevel@tonic-gate if ((mi->mi_flags & MI_DYNAMIC) && 11417c478bd9Sstevel@tonic-gate mi->mi_timer_type[which] != 0 && 11427c478bd9Sstevel@tonic-gate (mi->mi_curread != my_rsize || 11437c478bd9Sstevel@tonic-gate mi->mi_curwrite != my_wsize)) 11447c478bd9Sstevel@tonic-gate (void) nfs_feedback(FEEDBACK_OK, which, mi); 11457c478bd9Sstevel@tonic-gate break; 11467c478bd9Sstevel@tonic-gate 11477c478bd9Sstevel@tonic-gate case RPC_INTR: 11487c478bd9Sstevel@tonic-gate /* 11497c478bd9Sstevel@tonic-gate * There is no way to recover from this error, 11507c478bd9Sstevel@tonic-gate * even if mount option nointr is specified. 11517c478bd9Sstevel@tonic-gate * SIGKILL, for example, cannot be blocked. 11527c478bd9Sstevel@tonic-gate */ 11537c478bd9Sstevel@tonic-gate rpcerr.re_status = RPC_INTR; 11547c478bd9Sstevel@tonic-gate rpcerr.re_errno = EINTR; 11557c478bd9Sstevel@tonic-gate break; 11567c478bd9Sstevel@tonic-gate 11577c478bd9Sstevel@tonic-gate case RPC_UDERROR: 11587c478bd9Sstevel@tonic-gate /* 11597c478bd9Sstevel@tonic-gate * If the NFS server is local (vold) and 11607c478bd9Sstevel@tonic-gate * it goes away then we get RPC_UDERROR. 11617c478bd9Sstevel@tonic-gate * This is a retryable error, so we would 11627c478bd9Sstevel@tonic-gate * loop, so check to see if the specific 11637c478bd9Sstevel@tonic-gate * error was ECONNRESET, indicating that 11647c478bd9Sstevel@tonic-gate * target did not exist at all. If so, 11657c478bd9Sstevel@tonic-gate * return with RPC_PROGUNAVAIL and 11667c478bd9Sstevel@tonic-gate * ECONNRESET to indicate why. 11677c478bd9Sstevel@tonic-gate */ 11687c478bd9Sstevel@tonic-gate CLNT_GETERR(client, &rpcerr); 11697c478bd9Sstevel@tonic-gate if (rpcerr.re_errno == ECONNRESET) { 11707c478bd9Sstevel@tonic-gate rpcerr.re_status = RPC_PROGUNAVAIL; 11717c478bd9Sstevel@tonic-gate rpcerr.re_errno = ECONNRESET; 11727c478bd9Sstevel@tonic-gate break; 11737c478bd9Sstevel@tonic-gate } 11747c478bd9Sstevel@tonic-gate /*FALLTHROUGH*/ 11757c478bd9Sstevel@tonic-gate 11767c478bd9Sstevel@tonic-gate default: /* probably RPC_TIMEDOUT */ 11777c478bd9Sstevel@tonic-gate if (IS_UNRECOVERABLE_RPC(status)) 11787c478bd9Sstevel@tonic-gate break; 11797c478bd9Sstevel@tonic-gate 11807c478bd9Sstevel@tonic-gate /* 11817c478bd9Sstevel@tonic-gate * increment server not responding count 11827c478bd9Sstevel@tonic-gate */ 11837c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 11847c478bd9Sstevel@tonic-gate mi->mi_noresponse++; 11857c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 11867c478bd9Sstevel@tonic-gate #ifdef DEBUG 11877c478bd9Sstevel@tonic-gate nfscl->nfscl_stat.noresponse.value.ui64++; 11887c478bd9Sstevel@tonic-gate #endif 11897c478bd9Sstevel@tonic-gate 11907c478bd9Sstevel@tonic-gate if (!(mi->mi_flags & MI_HARD)) { 11917c478bd9Sstevel@tonic-gate if (!(mi->mi_flags & MI_SEMISOFT) || 11927c478bd9Sstevel@tonic-gate (mi->mi_ss_call_type[which] == 0)) 11937c478bd9Sstevel@tonic-gate break; 11947c478bd9Sstevel@tonic-gate } 11957c478bd9Sstevel@tonic-gate 11967c478bd9Sstevel@tonic-gate /* 11977c478bd9Sstevel@tonic-gate * The call is in progress (over COTS). 11987c478bd9Sstevel@tonic-gate * Try the CLNT_CALL again, but don't 11997c478bd9Sstevel@tonic-gate * print a noisy error message. 12007c478bd9Sstevel@tonic-gate */ 12017c478bd9Sstevel@tonic-gate if (status == RPC_INPROGRESS) { 12027c478bd9Sstevel@tonic-gate tryagain = TRUE; 12037c478bd9Sstevel@tonic-gate break; 12047c478bd9Sstevel@tonic-gate } 12057c478bd9Sstevel@tonic-gate 12067c478bd9Sstevel@tonic-gate if (flags & RFSCALL_SOFT) 12077c478bd9Sstevel@tonic-gate break; 12087c478bd9Sstevel@tonic-gate 12097c478bd9Sstevel@tonic-gate /* 12107c478bd9Sstevel@tonic-gate * On zone shutdown, just move on. 12117c478bd9Sstevel@tonic-gate */ 12127c478bd9Sstevel@tonic-gate if (zone_status_get(curproc->p_zone) >= 12137c478bd9Sstevel@tonic-gate ZONE_IS_SHUTTING_DOWN) { 12147c478bd9Sstevel@tonic-gate rpcerr.re_status = RPC_FAILED; 12157c478bd9Sstevel@tonic-gate rpcerr.re_errno = EIO; 12167c478bd9Sstevel@tonic-gate break; 12177c478bd9Sstevel@tonic-gate } 12187c478bd9Sstevel@tonic-gate 12197c478bd9Sstevel@tonic-gate /* 12207c478bd9Sstevel@tonic-gate * NFS client failover support 12217c478bd9Sstevel@tonic-gate * 12227c478bd9Sstevel@tonic-gate * If the current server just failed us, we'll 12237c478bd9Sstevel@tonic-gate * start the process of finding a new server. 12247c478bd9Sstevel@tonic-gate * After that, we can just retry. 12257c478bd9Sstevel@tonic-gate */ 12267c478bd9Sstevel@tonic-gate if (FAILOVER_MOUNT(mi) && failover_safe(fi)) { 12277c478bd9Sstevel@tonic-gate if (svp == mi->mi_curr_serv) 12287c478bd9Sstevel@tonic-gate failover_newserver(mi); 12297c478bd9Sstevel@tonic-gate clfree_impl(client, ch, nfscl); 12307c478bd9Sstevel@tonic-gate goto failoverretry; 12317c478bd9Sstevel@tonic-gate } 12327c478bd9Sstevel@tonic-gate 12337c478bd9Sstevel@tonic-gate tryagain = TRUE; 12347c478bd9Sstevel@tonic-gate timeo = backoff(timeo); 1235e280ed37SDai Ngo 1236e280ed37SDai Ngo CLNT_GETERR(client, &rpcerr_tmp); 1237e280ed37SDai Ngo if ((status == RPC_CANTSEND) && 1238e280ed37SDai Ngo (rpcerr_tmp.re_errno == ENOBUFS)) 1239e280ed37SDai Ngo msg = SRV_QFULL_MSG; 1240e280ed37SDai Ngo else 1241e280ed37SDai Ngo msg = SRV_NOTRESP_MSG; 1242e280ed37SDai Ngo 12437c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 12447c478bd9Sstevel@tonic-gate if (!(mi->mi_flags & MI_PRINTED)) { 12457c478bd9Sstevel@tonic-gate mi->mi_flags |= MI_PRINTED; 12467c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 12477c478bd9Sstevel@tonic-gate #ifdef DEBUG 1248e280ed37SDai Ngo zprintf(zoneid, msg, mi->mi_vers, 12497c478bd9Sstevel@tonic-gate svp->sv_hostname); 1250e280ed37SDai Ngo #else 1251e280ed37SDai Ngo zprintf(zoneid, msg, svp->sv_hostname); 12527c478bd9Sstevel@tonic-gate #endif 12537c478bd9Sstevel@tonic-gate } else 12547c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 12559acbbeafSnn if (*douprintf && nfs_has_ctty()) { 12567c478bd9Sstevel@tonic-gate *douprintf = 0; 12577c478bd9Sstevel@tonic-gate if (!(mi->mi_flags & MI_NOPRINT)) 12587c478bd9Sstevel@tonic-gate #ifdef DEBUG 1259e280ed37SDai Ngo uprintf(msg, mi->mi_vers, 12607c478bd9Sstevel@tonic-gate svp->sv_hostname); 1261e280ed37SDai Ngo #else 1262e280ed37SDai Ngo uprintf(msg, svp->sv_hostname); 12637c478bd9Sstevel@tonic-gate #endif 12647c478bd9Sstevel@tonic-gate } 12657c478bd9Sstevel@tonic-gate 12667c478bd9Sstevel@tonic-gate /* 12677c478bd9Sstevel@tonic-gate * If doing dynamic adjustment of transfer 12687c478bd9Sstevel@tonic-gate * size and if it's a read or write call 12697c478bd9Sstevel@tonic-gate * and if the transfer size changed while 12707c478bd9Sstevel@tonic-gate * retransmitting or if the feedback routine 12717c478bd9Sstevel@tonic-gate * changed the transfer size, 12727c478bd9Sstevel@tonic-gate * then exit rfscall so that the transfer 12737c478bd9Sstevel@tonic-gate * size can be adjusted at the vnops level. 12747c478bd9Sstevel@tonic-gate */ 12757c478bd9Sstevel@tonic-gate if ((mi->mi_flags & MI_DYNAMIC) && 12767c478bd9Sstevel@tonic-gate mi->mi_timer_type[which] != 0 && 12777c478bd9Sstevel@tonic-gate (mi->mi_curread != my_rsize || 12787c478bd9Sstevel@tonic-gate mi->mi_curwrite != my_wsize || 12797c478bd9Sstevel@tonic-gate nfs_feedback(FEEDBACK_REXMIT1, which, mi))) { 12807c478bd9Sstevel@tonic-gate /* 12817c478bd9Sstevel@tonic-gate * On read or write calls, return 12827c478bd9Sstevel@tonic-gate * back to the vnode ops level if 12837c478bd9Sstevel@tonic-gate * the transfer size changed. 12847c478bd9Sstevel@tonic-gate */ 12857c478bd9Sstevel@tonic-gate clfree_impl(client, ch, nfscl); 128645916cd2Sjpk if (cred_cloned) 128745916cd2Sjpk crfree(cr); 12887c478bd9Sstevel@tonic-gate return (ENFS_TRYAGAIN); 12897c478bd9Sstevel@tonic-gate } 12907c478bd9Sstevel@tonic-gate } 12917c478bd9Sstevel@tonic-gate } while (tryagain); 12927c478bd9Sstevel@tonic-gate 12937c478bd9Sstevel@tonic-gate if (status != RPC_SUCCESS) { 12947c478bd9Sstevel@tonic-gate /* 12957c478bd9Sstevel@tonic-gate * Let soft mounts use the timed out message. 12967c478bd9Sstevel@tonic-gate */ 12977c478bd9Sstevel@tonic-gate if (status == RPC_INPROGRESS) 12987c478bd9Sstevel@tonic-gate status = RPC_TIMEDOUT; 12997c478bd9Sstevel@tonic-gate nfscl->nfscl_stat.badcalls.value.ui64++; 13007c478bd9Sstevel@tonic-gate if (status != RPC_INTR) { 13017c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 13027c478bd9Sstevel@tonic-gate mi->mi_flags |= MI_DOWN; 13037c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 13047c478bd9Sstevel@tonic-gate CLNT_GETERR(client, &rpcerr); 13057c478bd9Sstevel@tonic-gate #ifdef DEBUG 13067c478bd9Sstevel@tonic-gate bufp = clnt_sperror(client, svp->sv_hostname); 13077c478bd9Sstevel@tonic-gate zprintf(zoneid, "NFS%d %s failed for %s\n", 13087c478bd9Sstevel@tonic-gate mi->mi_vers, mi->mi_rfsnames[which], bufp); 13099acbbeafSnn if (nfs_has_ctty()) { 13107c478bd9Sstevel@tonic-gate if (!(mi->mi_flags & MI_NOPRINT)) { 13117c478bd9Sstevel@tonic-gate uprintf("NFS%d %s failed for %s\n", 13127c478bd9Sstevel@tonic-gate mi->mi_vers, mi->mi_rfsnames[which], 13137c478bd9Sstevel@tonic-gate bufp); 13147c478bd9Sstevel@tonic-gate } 13157c478bd9Sstevel@tonic-gate } 13167c478bd9Sstevel@tonic-gate kmem_free(bufp, MAXPATHLEN); 13177c478bd9Sstevel@tonic-gate #else 13187c478bd9Sstevel@tonic-gate zprintf(zoneid, 13197c478bd9Sstevel@tonic-gate "NFS %s failed for server %s: error %d (%s)\n", 13207c478bd9Sstevel@tonic-gate mi->mi_rfsnames[which], svp->sv_hostname, 13217c478bd9Sstevel@tonic-gate status, clnt_sperrno(status)); 13229acbbeafSnn if (nfs_has_ctty()) { 13237c478bd9Sstevel@tonic-gate if (!(mi->mi_flags & MI_NOPRINT)) { 13247c478bd9Sstevel@tonic-gate uprintf( 13257c478bd9Sstevel@tonic-gate "NFS %s failed for server %s: error %d (%s)\n", 13267c478bd9Sstevel@tonic-gate mi->mi_rfsnames[which], 13277c478bd9Sstevel@tonic-gate svp->sv_hostname, status, 13287c478bd9Sstevel@tonic-gate clnt_sperrno(status)); 13297c478bd9Sstevel@tonic-gate } 13307c478bd9Sstevel@tonic-gate } 13317c478bd9Sstevel@tonic-gate #endif 13327c478bd9Sstevel@tonic-gate /* 13337c478bd9Sstevel@tonic-gate * when CLNT_CALL() fails with RPC_AUTHERROR, 13347c478bd9Sstevel@tonic-gate * re_errno is set appropriately depending on 13357c478bd9Sstevel@tonic-gate * the authentication error 13367c478bd9Sstevel@tonic-gate */ 13377c478bd9Sstevel@tonic-gate if (status == RPC_VERSMISMATCH || 13387c478bd9Sstevel@tonic-gate status == RPC_PROGVERSMISMATCH) 13397c478bd9Sstevel@tonic-gate rpcerr.re_errno = EIO; 13407c478bd9Sstevel@tonic-gate } 13417c478bd9Sstevel@tonic-gate } else { 13427c478bd9Sstevel@tonic-gate /* 13437c478bd9Sstevel@tonic-gate * Test the value of mi_down and mi_printed without 13447c478bd9Sstevel@tonic-gate * holding the mi_lock mutex. If they are both zero, 13457c478bd9Sstevel@tonic-gate * then it is okay to skip the down and printed 13467c478bd9Sstevel@tonic-gate * processing. This saves on a mutex_enter and 13477c478bd9Sstevel@tonic-gate * mutex_exit pair for a normal, successful RPC. 13487c478bd9Sstevel@tonic-gate * This was just complete overhead. 13497c478bd9Sstevel@tonic-gate */ 13507c478bd9Sstevel@tonic-gate if (mi->mi_flags & (MI_DOWN | MI_PRINTED)) { 13517c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 13527c478bd9Sstevel@tonic-gate mi->mi_flags &= ~MI_DOWN; 13537c478bd9Sstevel@tonic-gate if (mi->mi_flags & MI_PRINTED) { 13547c478bd9Sstevel@tonic-gate mi->mi_flags &= ~MI_PRINTED; 13557c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 13567c478bd9Sstevel@tonic-gate #ifdef DEBUG 13577c478bd9Sstevel@tonic-gate if (!(mi->mi_vfsp->vfs_flag & VFS_UNMOUNTED)) 13587c478bd9Sstevel@tonic-gate zprintf(zoneid, "NFS%d server %s ok\n", 13597c478bd9Sstevel@tonic-gate mi->mi_vers, svp->sv_hostname); 13607c478bd9Sstevel@tonic-gate #else 13617c478bd9Sstevel@tonic-gate if (!(mi->mi_vfsp->vfs_flag & VFS_UNMOUNTED)) 13627c478bd9Sstevel@tonic-gate zprintf(zoneid, "NFS server %s ok\n", 13637c478bd9Sstevel@tonic-gate svp->sv_hostname); 13647c478bd9Sstevel@tonic-gate #endif 13657c478bd9Sstevel@tonic-gate } else 13667c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 13677c478bd9Sstevel@tonic-gate } 13687c478bd9Sstevel@tonic-gate 13697c478bd9Sstevel@tonic-gate if (*douprintf == 0) { 13707c478bd9Sstevel@tonic-gate if (!(mi->mi_flags & MI_NOPRINT)) 13717c478bd9Sstevel@tonic-gate #ifdef DEBUG 13727c478bd9Sstevel@tonic-gate if (!(mi->mi_vfsp->vfs_flag & VFS_UNMOUNTED)) 13737c478bd9Sstevel@tonic-gate uprintf("NFS%d server %s ok\n", 13747c478bd9Sstevel@tonic-gate mi->mi_vers, svp->sv_hostname); 13757c478bd9Sstevel@tonic-gate #else 13767c478bd9Sstevel@tonic-gate if (!(mi->mi_vfsp->vfs_flag & VFS_UNMOUNTED)) 13777c478bd9Sstevel@tonic-gate uprintf("NFS server %s ok\n", svp->sv_hostname); 13787c478bd9Sstevel@tonic-gate #endif 13797c478bd9Sstevel@tonic-gate *douprintf = 1; 13807c478bd9Sstevel@tonic-gate } 13817c478bd9Sstevel@tonic-gate } 13827c478bd9Sstevel@tonic-gate 13837c478bd9Sstevel@tonic-gate clfree_impl(client, ch, nfscl); 138445916cd2Sjpk if (cred_cloned) 138545916cd2Sjpk crfree(cr); 13867c478bd9Sstevel@tonic-gate 13877c478bd9Sstevel@tonic-gate ASSERT(rpcerr.re_status == RPC_SUCCESS || rpcerr.re_errno != 0); 13887c478bd9Sstevel@tonic-gate 13897c478bd9Sstevel@tonic-gate if (rpc_status != NULL) 13907c478bd9Sstevel@tonic-gate *rpc_status = rpcerr.re_status; 13917c478bd9Sstevel@tonic-gate 13927c478bd9Sstevel@tonic-gate TRACE_1(TR_FAC_NFS, TR_RFSCALL_END, "rfscall_end:errno %d", 13937c478bd9Sstevel@tonic-gate rpcerr.re_errno); 13947c478bd9Sstevel@tonic-gate 13957c478bd9Sstevel@tonic-gate return (rpcerr.re_errno); 13967c478bd9Sstevel@tonic-gate } 13977c478bd9Sstevel@tonic-gate 13987c478bd9Sstevel@tonic-gate #ifdef DEBUG 13997c478bd9Sstevel@tonic-gate static int acl2call_hits = 0; 14007c478bd9Sstevel@tonic-gate static int acl2call_misses = 0; 14017c478bd9Sstevel@tonic-gate #endif 14027c478bd9Sstevel@tonic-gate 14037c478bd9Sstevel@tonic-gate int 14047c478bd9Sstevel@tonic-gate acl2call(mntinfo_t *mi, rpcproc_t which, xdrproc_t xdrargs, caddr_t argsp, 14057c478bd9Sstevel@tonic-gate xdrproc_t xdrres, caddr_t resp, cred_t *cr, int *douprintf, 14067c478bd9Sstevel@tonic-gate enum nfsstat *statusp, int flags, failinfo_t *fi) 14077c478bd9Sstevel@tonic-gate { 14087c478bd9Sstevel@tonic-gate int rpcerror; 14097c478bd9Sstevel@tonic-gate 14107c478bd9Sstevel@tonic-gate rpcerror = aclcall(mi, which, xdrargs, argsp, xdrres, resp, 14117c478bd9Sstevel@tonic-gate cr, douprintf, flags, fi); 14127c478bd9Sstevel@tonic-gate if (!rpcerror) { 14137c478bd9Sstevel@tonic-gate /* 14147c478bd9Sstevel@tonic-gate * See comments with crnetadjust(). 14157c478bd9Sstevel@tonic-gate */ 14167c478bd9Sstevel@tonic-gate if (*statusp == NFSERR_ACCES && 14177c478bd9Sstevel@tonic-gate (cr = crnetadjust(cr)) != NULL) { 14187c478bd9Sstevel@tonic-gate #ifdef DEBUG 14197c478bd9Sstevel@tonic-gate acl2call_hits++; 14207c478bd9Sstevel@tonic-gate #endif 14217c478bd9Sstevel@tonic-gate rpcerror = aclcall(mi, which, xdrargs, argsp, xdrres, 14227c478bd9Sstevel@tonic-gate resp, cr, douprintf, flags, fi); 14237c478bd9Sstevel@tonic-gate crfree(cr); 14247c478bd9Sstevel@tonic-gate #ifdef DEBUG 14257c478bd9Sstevel@tonic-gate if (*statusp == NFSERR_ACCES) 14267c478bd9Sstevel@tonic-gate acl2call_misses++; 14277c478bd9Sstevel@tonic-gate #endif 14287c478bd9Sstevel@tonic-gate } 14297c478bd9Sstevel@tonic-gate } 14307c478bd9Sstevel@tonic-gate 14317c478bd9Sstevel@tonic-gate return (rpcerror); 14327c478bd9Sstevel@tonic-gate } 14337c478bd9Sstevel@tonic-gate 14347c478bd9Sstevel@tonic-gate #ifdef DEBUG 14357c478bd9Sstevel@tonic-gate static int acl3call_hits = 0; 14367c478bd9Sstevel@tonic-gate static int acl3call_misses = 0; 14377c478bd9Sstevel@tonic-gate #endif 14387c478bd9Sstevel@tonic-gate 14397c478bd9Sstevel@tonic-gate int 14407c478bd9Sstevel@tonic-gate acl3call(mntinfo_t *mi, rpcproc_t which, xdrproc_t xdrargs, caddr_t argsp, 14417c478bd9Sstevel@tonic-gate xdrproc_t xdrres, caddr_t resp, cred_t *cr, int *douprintf, 14427c478bd9Sstevel@tonic-gate nfsstat3 *statusp, int flags, failinfo_t *fi) 14437c478bd9Sstevel@tonic-gate { 14447c478bd9Sstevel@tonic-gate int rpcerror; 14457c478bd9Sstevel@tonic-gate int user_informed; 14467c478bd9Sstevel@tonic-gate 14477c478bd9Sstevel@tonic-gate user_informed = 0; 14487c478bd9Sstevel@tonic-gate 14497c478bd9Sstevel@tonic-gate do { 14507c478bd9Sstevel@tonic-gate rpcerror = aclcall(mi, which, xdrargs, argsp, xdrres, resp, 14517c478bd9Sstevel@tonic-gate cr, douprintf, flags, fi); 14527c478bd9Sstevel@tonic-gate if (!rpcerror) { 14537c478bd9Sstevel@tonic-gate cred_t *crr; 14547c478bd9Sstevel@tonic-gate if (*statusp == NFS3ERR_JUKEBOX) { 14557c478bd9Sstevel@tonic-gate if (!user_informed) { 14567c478bd9Sstevel@tonic-gate user_informed = 1; 14577c478bd9Sstevel@tonic-gate uprintf( 14587c478bd9Sstevel@tonic-gate "file temporarily unavailable on the server, retrying...\n"); 14597c478bd9Sstevel@tonic-gate } 14607c478bd9Sstevel@tonic-gate delay(nfs3_jukebox_delay); 14617c478bd9Sstevel@tonic-gate } 14627c478bd9Sstevel@tonic-gate /* 14637c478bd9Sstevel@tonic-gate * See crnetadjust() for comments. 14647c478bd9Sstevel@tonic-gate */ 14657c478bd9Sstevel@tonic-gate else if (*statusp == NFS3ERR_ACCES && 14667c478bd9Sstevel@tonic-gate (crr = crnetadjust(cr)) != NULL) { 14677c478bd9Sstevel@tonic-gate #ifdef DEBUG 14687c478bd9Sstevel@tonic-gate acl3call_hits++; 14697c478bd9Sstevel@tonic-gate #endif 14707c478bd9Sstevel@tonic-gate rpcerror = aclcall(mi, which, xdrargs, argsp, 14717c478bd9Sstevel@tonic-gate xdrres, resp, crr, douprintf, flags, fi); 14727c478bd9Sstevel@tonic-gate 14737c478bd9Sstevel@tonic-gate crfree(crr); 14747c478bd9Sstevel@tonic-gate #ifdef DEBUG 14757c478bd9Sstevel@tonic-gate if (*statusp == NFS3ERR_ACCES) 14767c478bd9Sstevel@tonic-gate acl3call_misses++; 14777c478bd9Sstevel@tonic-gate #endif 14787c478bd9Sstevel@tonic-gate } 14797c478bd9Sstevel@tonic-gate } 14807c478bd9Sstevel@tonic-gate } while (!rpcerror && *statusp == NFS3ERR_JUKEBOX); 14817c478bd9Sstevel@tonic-gate 14827c478bd9Sstevel@tonic-gate return (rpcerror); 14837c478bd9Sstevel@tonic-gate } 14847c478bd9Sstevel@tonic-gate 14857c478bd9Sstevel@tonic-gate static int 14867c478bd9Sstevel@tonic-gate aclcall(mntinfo_t *mi, rpcproc_t which, xdrproc_t xdrargs, caddr_t argsp, 148745916cd2Sjpk xdrproc_t xdrres, caddr_t resp, cred_t *icr, int *douprintf, 14887c478bd9Sstevel@tonic-gate int flags, failinfo_t *fi) 14897c478bd9Sstevel@tonic-gate { 14907c478bd9Sstevel@tonic-gate CLIENT *client; 14917c478bd9Sstevel@tonic-gate struct chtab *ch; 149245916cd2Sjpk cred_t *cr = icr; 149345916cd2Sjpk bool_t cred_cloned = FALSE; 14947c478bd9Sstevel@tonic-gate enum clnt_stat status; 14957c478bd9Sstevel@tonic-gate struct rpc_err rpcerr; 14967c478bd9Sstevel@tonic-gate struct timeval wait; 14977c478bd9Sstevel@tonic-gate int timeo; /* in units of hz */ 14987c478bd9Sstevel@tonic-gate #if 0 /* notyet */ 14997c478bd9Sstevel@tonic-gate int my_rsize, my_wsize; 15007c478bd9Sstevel@tonic-gate #endif 15017c478bd9Sstevel@tonic-gate bool_t tryagain; 15027c478bd9Sstevel@tonic-gate k_sigset_t smask; 15037c478bd9Sstevel@tonic-gate servinfo_t *svp; 15047c478bd9Sstevel@tonic-gate struct nfs_clnt *nfscl; 15057c478bd9Sstevel@tonic-gate zoneid_t zoneid = getzoneid(); 15067c478bd9Sstevel@tonic-gate #ifdef DEBUG 15077c478bd9Sstevel@tonic-gate char *bufp; 15087c478bd9Sstevel@tonic-gate #endif 15097c478bd9Sstevel@tonic-gate 15107c478bd9Sstevel@tonic-gate #if 0 /* notyet */ 15117c478bd9Sstevel@tonic-gate TRACE_2(TR_FAC_NFS, TR_RFSCALL_START, 15127106075aSmarks "rfscall_start:which %d mi %p", which, mi); 15137c478bd9Sstevel@tonic-gate #endif 15147c478bd9Sstevel@tonic-gate 1515108322fbScarlsonj nfscl = zone_getspecific(nfsclnt_zone_key, nfs_zone()); 15167c478bd9Sstevel@tonic-gate ASSERT(nfscl != NULL); 15177c478bd9Sstevel@tonic-gate 15187c478bd9Sstevel@tonic-gate nfscl->nfscl_stat.calls.value.ui64++; 15197c478bd9Sstevel@tonic-gate mi->mi_aclreqs[which].value.ui64++; 15207c478bd9Sstevel@tonic-gate 15217c478bd9Sstevel@tonic-gate rpcerr.re_status = RPC_SUCCESS; 15227c478bd9Sstevel@tonic-gate 15237c478bd9Sstevel@tonic-gate if (FS_OR_ZONE_GONE(mi->mi_vfsp)) { 15247c478bd9Sstevel@tonic-gate rpcerr.re_status = RPC_FAILED; 15257c478bd9Sstevel@tonic-gate rpcerr.re_errno = EIO; 15267c478bd9Sstevel@tonic-gate return (rpcerr.re_errno); 15277c478bd9Sstevel@tonic-gate } 15287c478bd9Sstevel@tonic-gate 15297c478bd9Sstevel@tonic-gate #if 0 /* notyet */ 15307c478bd9Sstevel@tonic-gate /* 15317c478bd9Sstevel@tonic-gate * Remember the transfer sizes in case 15327c478bd9Sstevel@tonic-gate * nfs_feedback changes them underneath us. 15337c478bd9Sstevel@tonic-gate */ 15347c478bd9Sstevel@tonic-gate my_rsize = mi->mi_curread; 15357c478bd9Sstevel@tonic-gate my_wsize = mi->mi_curwrite; 15367c478bd9Sstevel@tonic-gate #endif 15377c478bd9Sstevel@tonic-gate 15387c478bd9Sstevel@tonic-gate /* 15397c478bd9Sstevel@tonic-gate * NFS client failover support 15407c478bd9Sstevel@tonic-gate * 15417c478bd9Sstevel@tonic-gate * If this rnode is not in sync with the current server (VALID_FH), 15427c478bd9Sstevel@tonic-gate * we'd like to do a remap to get in sync. We can be interrupted 15437c478bd9Sstevel@tonic-gate * in failover_remap(), and if so we'll bail. Otherwise, we'll 15447c478bd9Sstevel@tonic-gate * use the best info we have to try the RPC. Part of that is 15457c478bd9Sstevel@tonic-gate * unconditionally updating the filehandle copy kept for V3. 15467c478bd9Sstevel@tonic-gate * 15477c478bd9Sstevel@tonic-gate * Locking: INC_READERS/DEC_READERS is a poor man's interrruptible 15487c478bd9Sstevel@tonic-gate * rw_enter(); we're trying to keep the current server from being 15497c478bd9Sstevel@tonic-gate * changed on us until we're done with the remapping and have a 15507c478bd9Sstevel@tonic-gate * matching client handle. We don't want to sending a filehandle 15517c478bd9Sstevel@tonic-gate * to the wrong host. 15527c478bd9Sstevel@tonic-gate */ 15537c478bd9Sstevel@tonic-gate failoverretry: 15547c478bd9Sstevel@tonic-gate if (FAILOVER_MOUNT(mi)) { 15557c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 15567c478bd9Sstevel@tonic-gate if (!(flags & RFSCALL_SOFT) && failover_safe(fi)) { 15577c478bd9Sstevel@tonic-gate if (failover_wait(mi)) { 15587c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 15597c478bd9Sstevel@tonic-gate return (EINTR); 15607c478bd9Sstevel@tonic-gate } 15617c478bd9Sstevel@tonic-gate } 15627c478bd9Sstevel@tonic-gate INC_READERS(mi); 15637c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 15647c478bd9Sstevel@tonic-gate if (fi) { 15657c478bd9Sstevel@tonic-gate if (!VALID_FH(fi) && 15667c478bd9Sstevel@tonic-gate !(flags & RFSCALL_SOFT) && failover_safe(fi)) { 15677c478bd9Sstevel@tonic-gate int remaperr; 15687c478bd9Sstevel@tonic-gate 15697c478bd9Sstevel@tonic-gate svp = mi->mi_curr_serv; 15707c478bd9Sstevel@tonic-gate remaperr = failover_remap(fi); 15717c478bd9Sstevel@tonic-gate if (remaperr != 0) { 15727c478bd9Sstevel@tonic-gate #ifdef DEBUG 15737c478bd9Sstevel@tonic-gate if (remaperr != EINTR) 15747c478bd9Sstevel@tonic-gate nfs_cmn_err(remaperr, CE_WARN, 15757c478bd9Sstevel@tonic-gate "aclcall couldn't failover: %m"); 15767c478bd9Sstevel@tonic-gate #endif 15777c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 15787c478bd9Sstevel@tonic-gate DEC_READERS(mi); 15797c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 15807c478bd9Sstevel@tonic-gate 15817c478bd9Sstevel@tonic-gate /* 15827c478bd9Sstevel@tonic-gate * If failover_remap returns ETIMEDOUT 15837c478bd9Sstevel@tonic-gate * and the filesystem is hard mounted 15847c478bd9Sstevel@tonic-gate * we have to retry the call with a new 15857c478bd9Sstevel@tonic-gate * server. 15867c478bd9Sstevel@tonic-gate */ 15877c478bd9Sstevel@tonic-gate if ((mi->mi_flags & MI_HARD) && 15887c478bd9Sstevel@tonic-gate IS_RECOVERABLE_ERROR(remaperr)) { 15897c478bd9Sstevel@tonic-gate if (svp == mi->mi_curr_serv) 15907c478bd9Sstevel@tonic-gate failover_newserver(mi); 15917c478bd9Sstevel@tonic-gate rpcerr.re_status = RPC_SUCCESS; 15927c478bd9Sstevel@tonic-gate goto failoverretry; 15937c478bd9Sstevel@tonic-gate } 15947c478bd9Sstevel@tonic-gate return (remaperr); 15957c478bd9Sstevel@tonic-gate } 15967c478bd9Sstevel@tonic-gate } 15977c478bd9Sstevel@tonic-gate if (fi->fhp && fi->copyproc) 15987c478bd9Sstevel@tonic-gate (*fi->copyproc)(fi->fhp, fi->vp); 15997c478bd9Sstevel@tonic-gate } 16007c478bd9Sstevel@tonic-gate } 16017c478bd9Sstevel@tonic-gate 160245916cd2Sjpk /* For TSOL, use a new cred which has net_mac_aware flag */ 160345916cd2Sjpk if (!cred_cloned && is_system_labeled()) { 160445916cd2Sjpk cred_cloned = TRUE; 160545916cd2Sjpk cr = crdup(icr); 160645916cd2Sjpk (void) setpflags(NET_MAC_AWARE, 1, cr); 160745916cd2Sjpk } 160845916cd2Sjpk 16097c478bd9Sstevel@tonic-gate /* 16107c478bd9Sstevel@tonic-gate * acl_clget() calls clnt_tli_kinit() which clears the xid, so we 16117c478bd9Sstevel@tonic-gate * are guaranteed to reprocess the retry as a new request. 16127c478bd9Sstevel@tonic-gate */ 16137c478bd9Sstevel@tonic-gate svp = mi->mi_curr_serv; 16147c478bd9Sstevel@tonic-gate rpcerr.re_errno = acl_clget(mi, svp, cr, &client, &ch, nfscl); 16157c478bd9Sstevel@tonic-gate if (FAILOVER_MOUNT(mi)) { 16167c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 16177c478bd9Sstevel@tonic-gate DEC_READERS(mi); 16187c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 16197c478bd9Sstevel@tonic-gate 16207c478bd9Sstevel@tonic-gate if ((rpcerr.re_errno == ETIMEDOUT || 16217106075aSmarks rpcerr.re_errno == ECONNRESET) && 16227106075aSmarks failover_safe(fi)) { 16237c478bd9Sstevel@tonic-gate if (svp == mi->mi_curr_serv) 16247c478bd9Sstevel@tonic-gate failover_newserver(mi); 16257c478bd9Sstevel@tonic-gate goto failoverretry; 16267c478bd9Sstevel@tonic-gate } 16277c478bd9Sstevel@tonic-gate } 162845916cd2Sjpk if (rpcerr.re_errno != 0) { 162945916cd2Sjpk if (cred_cloned) 163045916cd2Sjpk crfree(cr); 16317c478bd9Sstevel@tonic-gate return (rpcerr.re_errno); 163245916cd2Sjpk } 16337c478bd9Sstevel@tonic-gate 16347c478bd9Sstevel@tonic-gate if (svp->sv_knconf->knc_semantics == NC_TPI_COTS_ORD || 16357c478bd9Sstevel@tonic-gate svp->sv_knconf->knc_semantics == NC_TPI_COTS) { 16367c478bd9Sstevel@tonic-gate timeo = (mi->mi_timeo * hz) / 10; 16377c478bd9Sstevel@tonic-gate } else { 16387c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 16397c478bd9Sstevel@tonic-gate timeo = CLNT_SETTIMERS(client, 16407c478bd9Sstevel@tonic-gate &(mi->mi_timers[mi->mi_acl_timer_type[which]]), 16417c478bd9Sstevel@tonic-gate &(mi->mi_timers[NFS_CALLTYPES]), 16427c478bd9Sstevel@tonic-gate (minimum_timeo[mi->mi_acl_call_type[which]]*hz)>>3, 16437c478bd9Sstevel@tonic-gate (void (*)()) 0, (caddr_t)mi, 0); 16447c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 16457c478bd9Sstevel@tonic-gate } 16467c478bd9Sstevel@tonic-gate 16477c478bd9Sstevel@tonic-gate /* 16487c478bd9Sstevel@tonic-gate * If hard mounted fs, retry call forever unless hard error occurs. 16497c478bd9Sstevel@tonic-gate */ 16507c478bd9Sstevel@tonic-gate do { 16517c478bd9Sstevel@tonic-gate tryagain = FALSE; 16527c478bd9Sstevel@tonic-gate 16537c478bd9Sstevel@tonic-gate if (FS_OR_ZONE_GONE(mi->mi_vfsp)) { 16547c478bd9Sstevel@tonic-gate status = RPC_FAILED; 16557c478bd9Sstevel@tonic-gate rpcerr.re_status = RPC_FAILED; 16567c478bd9Sstevel@tonic-gate rpcerr.re_errno = EIO; 16577c478bd9Sstevel@tonic-gate break; 16587c478bd9Sstevel@tonic-gate } 16597c478bd9Sstevel@tonic-gate 16607c478bd9Sstevel@tonic-gate TICK_TO_TIMEVAL(timeo, &wait); 16617c478bd9Sstevel@tonic-gate 16627c478bd9Sstevel@tonic-gate /* 16637c478bd9Sstevel@tonic-gate * Mask out all signals except SIGHUP, SIGINT, SIGQUIT 16647c478bd9Sstevel@tonic-gate * and SIGTERM. (Preserving the existing masks). 16657c478bd9Sstevel@tonic-gate * Mask out SIGINT if mount option nointr is specified. 16667c478bd9Sstevel@tonic-gate */ 16677c478bd9Sstevel@tonic-gate sigintr(&smask, (int)mi->mi_flags & MI_INT); 16687c478bd9Sstevel@tonic-gate if (!(mi->mi_flags & MI_INT)) 16697c478bd9Sstevel@tonic-gate client->cl_nosignal = TRUE; 16707c478bd9Sstevel@tonic-gate 16717c478bd9Sstevel@tonic-gate /* 16727c478bd9Sstevel@tonic-gate * If there is a current signal, then don't bother 16737c478bd9Sstevel@tonic-gate * even trying to send out the request because we 16747c478bd9Sstevel@tonic-gate * won't be able to block waiting for the response. 16757c478bd9Sstevel@tonic-gate * Simply assume RPC_INTR and get on with it. 16767c478bd9Sstevel@tonic-gate */ 16777c478bd9Sstevel@tonic-gate if (ttolwp(curthread) != NULL && ISSIG(curthread, JUSTLOOKING)) 16787c478bd9Sstevel@tonic-gate status = RPC_INTR; 16797c478bd9Sstevel@tonic-gate else { 16807c478bd9Sstevel@tonic-gate status = CLNT_CALL(client, which, xdrargs, argsp, 16817c478bd9Sstevel@tonic-gate xdrres, resp, wait); 16827c478bd9Sstevel@tonic-gate } 16837c478bd9Sstevel@tonic-gate 16847c478bd9Sstevel@tonic-gate if (!(mi->mi_flags & MI_INT)) 16857c478bd9Sstevel@tonic-gate client->cl_nosignal = FALSE; 16867c478bd9Sstevel@tonic-gate /* 16877c478bd9Sstevel@tonic-gate * restore original signal mask 16887c478bd9Sstevel@tonic-gate */ 16897c478bd9Sstevel@tonic-gate sigunintr(&smask); 16907c478bd9Sstevel@tonic-gate 16917c478bd9Sstevel@tonic-gate switch (status) { 16927c478bd9Sstevel@tonic-gate case RPC_SUCCESS: 16937c478bd9Sstevel@tonic-gate #if 0 /* notyet */ 16947c478bd9Sstevel@tonic-gate if ((mi->mi_flags & MI_DYNAMIC) && 16957c478bd9Sstevel@tonic-gate mi->mi_timer_type[which] != 0 && 16967c478bd9Sstevel@tonic-gate (mi->mi_curread != my_rsize || 16977c478bd9Sstevel@tonic-gate mi->mi_curwrite != my_wsize)) 16987c478bd9Sstevel@tonic-gate (void) nfs_feedback(FEEDBACK_OK, which, mi); 16997c478bd9Sstevel@tonic-gate #endif 17007c478bd9Sstevel@tonic-gate break; 17017c478bd9Sstevel@tonic-gate 17027c478bd9Sstevel@tonic-gate /* 17037c478bd9Sstevel@tonic-gate * Unfortunately, there are servers in the world which 17047c478bd9Sstevel@tonic-gate * are not coded correctly. They are not prepared to 17057c478bd9Sstevel@tonic-gate * handle RPC requests to the NFS port which are not 17067c478bd9Sstevel@tonic-gate * NFS requests. Thus, they may try to process the 17077c478bd9Sstevel@tonic-gate * NFS_ACL request as if it were an NFS request. This 17087c478bd9Sstevel@tonic-gate * does not work. Generally, an error will be generated 17097c478bd9Sstevel@tonic-gate * on the client because it will not be able to decode 17107c478bd9Sstevel@tonic-gate * the response from the server. However, it seems 17117c478bd9Sstevel@tonic-gate * possible that the server may not be able to decode 17127c478bd9Sstevel@tonic-gate * the arguments. Thus, the criteria for deciding 17137c478bd9Sstevel@tonic-gate * whether the server supports NFS_ACL or not is whether 17147c478bd9Sstevel@tonic-gate * the following RPC errors are returned from CLNT_CALL. 17157c478bd9Sstevel@tonic-gate */ 17167c478bd9Sstevel@tonic-gate case RPC_CANTDECODERES: 17177c478bd9Sstevel@tonic-gate case RPC_PROGUNAVAIL: 17187c478bd9Sstevel@tonic-gate case RPC_CANTDECODEARGS: 17197c478bd9Sstevel@tonic-gate case RPC_PROGVERSMISMATCH: 17207c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 17217c478bd9Sstevel@tonic-gate mi->mi_flags &= ~(MI_ACL | MI_EXTATTR); 17227c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 17237c478bd9Sstevel@tonic-gate break; 17247c478bd9Sstevel@tonic-gate 17257c478bd9Sstevel@tonic-gate /* 17267c478bd9Sstevel@tonic-gate * If the server supports NFS_ACL but not the new ops 17277c478bd9Sstevel@tonic-gate * for extended attributes, make sure we don't retry. 17287c478bd9Sstevel@tonic-gate */ 17297c478bd9Sstevel@tonic-gate case RPC_PROCUNAVAIL: 17307c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 17317c478bd9Sstevel@tonic-gate mi->mi_flags &= ~MI_EXTATTR; 17327c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 17337c478bd9Sstevel@tonic-gate break; 17347c478bd9Sstevel@tonic-gate 17357c478bd9Sstevel@tonic-gate case RPC_INTR: 17367c478bd9Sstevel@tonic-gate /* 17377c478bd9Sstevel@tonic-gate * There is no way to recover from this error, 17387c478bd9Sstevel@tonic-gate * even if mount option nointr is specified. 17397c478bd9Sstevel@tonic-gate * SIGKILL, for example, cannot be blocked. 17407c478bd9Sstevel@tonic-gate */ 17417c478bd9Sstevel@tonic-gate rpcerr.re_status = RPC_INTR; 17427c478bd9Sstevel@tonic-gate rpcerr.re_errno = EINTR; 17437c478bd9Sstevel@tonic-gate break; 17447c478bd9Sstevel@tonic-gate 17457c478bd9Sstevel@tonic-gate case RPC_UDERROR: 17467c478bd9Sstevel@tonic-gate /* 17477c478bd9Sstevel@tonic-gate * If the NFS server is local (vold) and 17487c478bd9Sstevel@tonic-gate * it goes away then we get RPC_UDERROR. 17497c478bd9Sstevel@tonic-gate * This is a retryable error, so we would 17507c478bd9Sstevel@tonic-gate * loop, so check to see if the specific 17517c478bd9Sstevel@tonic-gate * error was ECONNRESET, indicating that 17527c478bd9Sstevel@tonic-gate * target did not exist at all. If so, 17537c478bd9Sstevel@tonic-gate * return with RPC_PROGUNAVAIL and 17547c478bd9Sstevel@tonic-gate * ECONNRESET to indicate why. 17557c478bd9Sstevel@tonic-gate */ 17567c478bd9Sstevel@tonic-gate CLNT_GETERR(client, &rpcerr); 17577c478bd9Sstevel@tonic-gate if (rpcerr.re_errno == ECONNRESET) { 17587c478bd9Sstevel@tonic-gate rpcerr.re_status = RPC_PROGUNAVAIL; 17597c478bd9Sstevel@tonic-gate rpcerr.re_errno = ECONNRESET; 17607c478bd9Sstevel@tonic-gate break; 17617c478bd9Sstevel@tonic-gate } 17627c478bd9Sstevel@tonic-gate /*FALLTHROUGH*/ 17637c478bd9Sstevel@tonic-gate 17647c478bd9Sstevel@tonic-gate default: /* probably RPC_TIMEDOUT */ 17657c478bd9Sstevel@tonic-gate if (IS_UNRECOVERABLE_RPC(status)) 17667c478bd9Sstevel@tonic-gate break; 17677c478bd9Sstevel@tonic-gate 17687c478bd9Sstevel@tonic-gate /* 17697c478bd9Sstevel@tonic-gate * increment server not responding count 17707c478bd9Sstevel@tonic-gate */ 17717c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 17727c478bd9Sstevel@tonic-gate mi->mi_noresponse++; 17737c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 17747c478bd9Sstevel@tonic-gate #ifdef DEBUG 17757c478bd9Sstevel@tonic-gate nfscl->nfscl_stat.noresponse.value.ui64++; 17767c478bd9Sstevel@tonic-gate #endif 17777c478bd9Sstevel@tonic-gate 17787c478bd9Sstevel@tonic-gate if (!(mi->mi_flags & MI_HARD)) { 17797c478bd9Sstevel@tonic-gate if (!(mi->mi_flags & MI_SEMISOFT) || 17807c478bd9Sstevel@tonic-gate (mi->mi_acl_ss_call_type[which] == 0)) 17817c478bd9Sstevel@tonic-gate break; 17827c478bd9Sstevel@tonic-gate } 17837c478bd9Sstevel@tonic-gate 17847c478bd9Sstevel@tonic-gate /* 17857c478bd9Sstevel@tonic-gate * The call is in progress (over COTS). 17867c478bd9Sstevel@tonic-gate * Try the CLNT_CALL again, but don't 17877c478bd9Sstevel@tonic-gate * print a noisy error message. 17887c478bd9Sstevel@tonic-gate */ 17897c478bd9Sstevel@tonic-gate if (status == RPC_INPROGRESS) { 17907c478bd9Sstevel@tonic-gate tryagain = TRUE; 17917c478bd9Sstevel@tonic-gate break; 17927c478bd9Sstevel@tonic-gate } 17937c478bd9Sstevel@tonic-gate 17947c478bd9Sstevel@tonic-gate if (flags & RFSCALL_SOFT) 17957c478bd9Sstevel@tonic-gate break; 17967c478bd9Sstevel@tonic-gate 17977c478bd9Sstevel@tonic-gate /* 17987c478bd9Sstevel@tonic-gate * On zone shutdown, just move on. 17997c478bd9Sstevel@tonic-gate */ 18007c478bd9Sstevel@tonic-gate if (zone_status_get(curproc->p_zone) >= 18017c478bd9Sstevel@tonic-gate ZONE_IS_SHUTTING_DOWN) { 18027c478bd9Sstevel@tonic-gate rpcerr.re_status = RPC_FAILED; 18037c478bd9Sstevel@tonic-gate rpcerr.re_errno = EIO; 18047c478bd9Sstevel@tonic-gate break; 18057c478bd9Sstevel@tonic-gate } 18067c478bd9Sstevel@tonic-gate 18077c478bd9Sstevel@tonic-gate /* 18087c478bd9Sstevel@tonic-gate * NFS client failover support 18097c478bd9Sstevel@tonic-gate * 18107c478bd9Sstevel@tonic-gate * If the current server just failed us, we'll 18117c478bd9Sstevel@tonic-gate * start the process of finding a new server. 18127c478bd9Sstevel@tonic-gate * After that, we can just retry. 18137c478bd9Sstevel@tonic-gate */ 18147c478bd9Sstevel@tonic-gate if (FAILOVER_MOUNT(mi) && failover_safe(fi)) { 18157c478bd9Sstevel@tonic-gate if (svp == mi->mi_curr_serv) 18167c478bd9Sstevel@tonic-gate failover_newserver(mi); 18177c478bd9Sstevel@tonic-gate clfree_impl(client, ch, nfscl); 18187c478bd9Sstevel@tonic-gate goto failoverretry; 18197c478bd9Sstevel@tonic-gate } 18207c478bd9Sstevel@tonic-gate 18217c478bd9Sstevel@tonic-gate tryagain = TRUE; 18227c478bd9Sstevel@tonic-gate timeo = backoff(timeo); 18237c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 18247c478bd9Sstevel@tonic-gate if (!(mi->mi_flags & MI_PRINTED)) { 18257c478bd9Sstevel@tonic-gate mi->mi_flags |= MI_PRINTED; 18267c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 18277c478bd9Sstevel@tonic-gate #ifdef DEBUG 18287c478bd9Sstevel@tonic-gate zprintf(zoneid, 18297c478bd9Sstevel@tonic-gate "NFS_ACL%d server %s not responding still trying\n", 18307c478bd9Sstevel@tonic-gate mi->mi_vers, svp->sv_hostname); 18317c478bd9Sstevel@tonic-gate #else 18327c478bd9Sstevel@tonic-gate zprintf(zoneid, 18337c478bd9Sstevel@tonic-gate "NFS server %s not responding still trying\n", 18347c478bd9Sstevel@tonic-gate svp->sv_hostname); 18357c478bd9Sstevel@tonic-gate #endif 18367c478bd9Sstevel@tonic-gate } else 18377c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 18389acbbeafSnn if (*douprintf && nfs_has_ctty()) { 18397c478bd9Sstevel@tonic-gate *douprintf = 0; 18407c478bd9Sstevel@tonic-gate if (!(mi->mi_flags & MI_NOPRINT)) 18417c478bd9Sstevel@tonic-gate #ifdef DEBUG 18427c478bd9Sstevel@tonic-gate uprintf( 18437c478bd9Sstevel@tonic-gate "NFS_ACL%d server %s not responding still trying\n", 18447c478bd9Sstevel@tonic-gate mi->mi_vers, svp->sv_hostname); 18457c478bd9Sstevel@tonic-gate #else 18467c478bd9Sstevel@tonic-gate uprintf( 18477c478bd9Sstevel@tonic-gate "NFS server %s not responding still trying\n", 18487c478bd9Sstevel@tonic-gate svp->sv_hostname); 18497c478bd9Sstevel@tonic-gate #endif 18507c478bd9Sstevel@tonic-gate } 18517c478bd9Sstevel@tonic-gate 18527c478bd9Sstevel@tonic-gate #if 0 /* notyet */ 18537c478bd9Sstevel@tonic-gate /* 18547c478bd9Sstevel@tonic-gate * If doing dynamic adjustment of transfer 18557c478bd9Sstevel@tonic-gate * size and if it's a read or write call 18567c478bd9Sstevel@tonic-gate * and if the transfer size changed while 18577c478bd9Sstevel@tonic-gate * retransmitting or if the feedback routine 18587c478bd9Sstevel@tonic-gate * changed the transfer size, 18597c478bd9Sstevel@tonic-gate * then exit rfscall so that the transfer 18607c478bd9Sstevel@tonic-gate * size can be adjusted at the vnops level. 18617c478bd9Sstevel@tonic-gate */ 18627c478bd9Sstevel@tonic-gate if ((mi->mi_flags & MI_DYNAMIC) && 18637c478bd9Sstevel@tonic-gate mi->mi_acl_timer_type[which] != 0 && 18647c478bd9Sstevel@tonic-gate (mi->mi_curread != my_rsize || 18657c478bd9Sstevel@tonic-gate mi->mi_curwrite != my_wsize || 18667c478bd9Sstevel@tonic-gate nfs_feedback(FEEDBACK_REXMIT1, which, mi))) { 18677c478bd9Sstevel@tonic-gate /* 18687c478bd9Sstevel@tonic-gate * On read or write calls, return 18697c478bd9Sstevel@tonic-gate * back to the vnode ops level if 18707c478bd9Sstevel@tonic-gate * the transfer size changed. 18717c478bd9Sstevel@tonic-gate */ 18727c478bd9Sstevel@tonic-gate clfree_impl(client, ch, nfscl); 187345916cd2Sjpk if (cred_cloned) 187445916cd2Sjpk crfree(cr); 18757c478bd9Sstevel@tonic-gate return (ENFS_TRYAGAIN); 18767c478bd9Sstevel@tonic-gate } 18777c478bd9Sstevel@tonic-gate #endif 18787c478bd9Sstevel@tonic-gate } 18797c478bd9Sstevel@tonic-gate } while (tryagain); 18807c478bd9Sstevel@tonic-gate 18817c478bd9Sstevel@tonic-gate if (status != RPC_SUCCESS) { 18827c478bd9Sstevel@tonic-gate /* 18837c478bd9Sstevel@tonic-gate * Let soft mounts use the timed out message. 18847c478bd9Sstevel@tonic-gate */ 18857c478bd9Sstevel@tonic-gate if (status == RPC_INPROGRESS) 18867c478bd9Sstevel@tonic-gate status = RPC_TIMEDOUT; 18877c478bd9Sstevel@tonic-gate nfscl->nfscl_stat.badcalls.value.ui64++; 18887c478bd9Sstevel@tonic-gate if (status == RPC_CANTDECODERES || 18897c478bd9Sstevel@tonic-gate status == RPC_PROGUNAVAIL || 18907c478bd9Sstevel@tonic-gate status == RPC_PROCUNAVAIL || 18917c478bd9Sstevel@tonic-gate status == RPC_CANTDECODEARGS || 18927c478bd9Sstevel@tonic-gate status == RPC_PROGVERSMISMATCH) 18937c478bd9Sstevel@tonic-gate CLNT_GETERR(client, &rpcerr); 18947c478bd9Sstevel@tonic-gate else if (status != RPC_INTR) { 18957c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 18967c478bd9Sstevel@tonic-gate mi->mi_flags |= MI_DOWN; 18977c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 18987c478bd9Sstevel@tonic-gate CLNT_GETERR(client, &rpcerr); 18997c478bd9Sstevel@tonic-gate #ifdef DEBUG 19007c478bd9Sstevel@tonic-gate bufp = clnt_sperror(client, svp->sv_hostname); 19017c478bd9Sstevel@tonic-gate zprintf(zoneid, "NFS_ACL%d %s failed for %s\n", 19027c478bd9Sstevel@tonic-gate mi->mi_vers, mi->mi_aclnames[which], bufp); 19039acbbeafSnn if (nfs_has_ctty()) { 19047c478bd9Sstevel@tonic-gate if (!(mi->mi_flags & MI_NOPRINT)) { 19057c478bd9Sstevel@tonic-gate uprintf("NFS_ACL%d %s failed for %s\n", 19067c478bd9Sstevel@tonic-gate mi->mi_vers, mi->mi_aclnames[which], 19077c478bd9Sstevel@tonic-gate bufp); 19087c478bd9Sstevel@tonic-gate } 19097c478bd9Sstevel@tonic-gate } 19107c478bd9Sstevel@tonic-gate kmem_free(bufp, MAXPATHLEN); 19117c478bd9Sstevel@tonic-gate #else 19127c478bd9Sstevel@tonic-gate zprintf(zoneid, 19137c478bd9Sstevel@tonic-gate "NFS %s failed for server %s: error %d (%s)\n", 19147c478bd9Sstevel@tonic-gate mi->mi_aclnames[which], svp->sv_hostname, 19157c478bd9Sstevel@tonic-gate status, clnt_sperrno(status)); 19169acbbeafSnn if (nfs_has_ctty()) { 19177c478bd9Sstevel@tonic-gate if (!(mi->mi_flags & MI_NOPRINT)) 19187c478bd9Sstevel@tonic-gate uprintf( 19197c478bd9Sstevel@tonic-gate "NFS %s failed for server %s: error %d (%s)\n", 19207c478bd9Sstevel@tonic-gate mi->mi_aclnames[which], 19217c478bd9Sstevel@tonic-gate svp->sv_hostname, status, 19227c478bd9Sstevel@tonic-gate clnt_sperrno(status)); 19237c478bd9Sstevel@tonic-gate } 19247c478bd9Sstevel@tonic-gate #endif 19257c478bd9Sstevel@tonic-gate /* 19267c478bd9Sstevel@tonic-gate * when CLNT_CALL() fails with RPC_AUTHERROR, 19277c478bd9Sstevel@tonic-gate * re_errno is set appropriately depending on 19287c478bd9Sstevel@tonic-gate * the authentication error 19297c478bd9Sstevel@tonic-gate */ 19307c478bd9Sstevel@tonic-gate if (status == RPC_VERSMISMATCH || 19317c478bd9Sstevel@tonic-gate status == RPC_PROGVERSMISMATCH) 19327c478bd9Sstevel@tonic-gate rpcerr.re_errno = EIO; 19337c478bd9Sstevel@tonic-gate } 19347c478bd9Sstevel@tonic-gate } else { 19357c478bd9Sstevel@tonic-gate /* 19367c478bd9Sstevel@tonic-gate * Test the value of mi_down and mi_printed without 19377c478bd9Sstevel@tonic-gate * holding the mi_lock mutex. If they are both zero, 19387c478bd9Sstevel@tonic-gate * then it is okay to skip the down and printed 19397c478bd9Sstevel@tonic-gate * processing. This saves on a mutex_enter and 19407c478bd9Sstevel@tonic-gate * mutex_exit pair for a normal, successful RPC. 19417c478bd9Sstevel@tonic-gate * This was just complete overhead. 19427c478bd9Sstevel@tonic-gate */ 19437c478bd9Sstevel@tonic-gate if (mi->mi_flags & (MI_DOWN | MI_PRINTED)) { 19447c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 19457c478bd9Sstevel@tonic-gate mi->mi_flags &= ~MI_DOWN; 19467c478bd9Sstevel@tonic-gate if (mi->mi_flags & MI_PRINTED) { 19477c478bd9Sstevel@tonic-gate mi->mi_flags &= ~MI_PRINTED; 19487c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 19497c478bd9Sstevel@tonic-gate #ifdef DEBUG 19507c478bd9Sstevel@tonic-gate zprintf(zoneid, "NFS_ACL%d server %s ok\n", 19517c478bd9Sstevel@tonic-gate mi->mi_vers, svp->sv_hostname); 19527c478bd9Sstevel@tonic-gate #else 19537c478bd9Sstevel@tonic-gate zprintf(zoneid, "NFS server %s ok\n", 19547c478bd9Sstevel@tonic-gate svp->sv_hostname); 19557c478bd9Sstevel@tonic-gate #endif 19567c478bd9Sstevel@tonic-gate } else 19577c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 19587c478bd9Sstevel@tonic-gate } 19597c478bd9Sstevel@tonic-gate 19607c478bd9Sstevel@tonic-gate if (*douprintf == 0) { 19617c478bd9Sstevel@tonic-gate if (!(mi->mi_flags & MI_NOPRINT)) 19627c478bd9Sstevel@tonic-gate #ifdef DEBUG 19637c478bd9Sstevel@tonic-gate uprintf("NFS_ACL%d server %s ok\n", 19647c478bd9Sstevel@tonic-gate mi->mi_vers, svp->sv_hostname); 19657c478bd9Sstevel@tonic-gate #else 19667c478bd9Sstevel@tonic-gate uprintf("NFS server %s ok\n", svp->sv_hostname); 19677c478bd9Sstevel@tonic-gate #endif 19687c478bd9Sstevel@tonic-gate *douprintf = 1; 19697c478bd9Sstevel@tonic-gate } 19707c478bd9Sstevel@tonic-gate } 19717c478bd9Sstevel@tonic-gate 19727c478bd9Sstevel@tonic-gate clfree_impl(client, ch, nfscl); 197345916cd2Sjpk if (cred_cloned) 197445916cd2Sjpk crfree(cr); 19757c478bd9Sstevel@tonic-gate 19767c478bd9Sstevel@tonic-gate ASSERT(rpcerr.re_status == RPC_SUCCESS || rpcerr.re_errno != 0); 19777c478bd9Sstevel@tonic-gate 19787c478bd9Sstevel@tonic-gate #if 0 /* notyet */ 19797c478bd9Sstevel@tonic-gate TRACE_1(TR_FAC_NFS, TR_RFSCALL_END, "rfscall_end:errno %d", 19807c478bd9Sstevel@tonic-gate rpcerr.re_errno); 19817c478bd9Sstevel@tonic-gate #endif 19827c478bd9Sstevel@tonic-gate 19837c478bd9Sstevel@tonic-gate return (rpcerr.re_errno); 19847c478bd9Sstevel@tonic-gate } 19857c478bd9Sstevel@tonic-gate 19867c478bd9Sstevel@tonic-gate int 19877c478bd9Sstevel@tonic-gate vattr_to_sattr(struct vattr *vap, struct nfssattr *sa) 19887c478bd9Sstevel@tonic-gate { 19897c478bd9Sstevel@tonic-gate uint_t mask = vap->va_mask; 19907c478bd9Sstevel@tonic-gate 19917c478bd9Sstevel@tonic-gate if (!(mask & AT_MODE)) 19927c478bd9Sstevel@tonic-gate sa->sa_mode = (uint32_t)-1; 19937c478bd9Sstevel@tonic-gate else 19947c478bd9Sstevel@tonic-gate sa->sa_mode = vap->va_mode; 19957c478bd9Sstevel@tonic-gate if (!(mask & AT_UID)) 19967c478bd9Sstevel@tonic-gate sa->sa_uid = (uint32_t)-1; 19977c478bd9Sstevel@tonic-gate else 19987c478bd9Sstevel@tonic-gate sa->sa_uid = (uint32_t)vap->va_uid; 19997c478bd9Sstevel@tonic-gate if (!(mask & AT_GID)) 20007c478bd9Sstevel@tonic-gate sa->sa_gid = (uint32_t)-1; 20017c478bd9Sstevel@tonic-gate else 20027c478bd9Sstevel@tonic-gate sa->sa_gid = (uint32_t)vap->va_gid; 20037c478bd9Sstevel@tonic-gate if (!(mask & AT_SIZE)) 20047c478bd9Sstevel@tonic-gate sa->sa_size = (uint32_t)-1; 20057c478bd9Sstevel@tonic-gate else 20067c478bd9Sstevel@tonic-gate sa->sa_size = (uint32_t)vap->va_size; 20077c478bd9Sstevel@tonic-gate if (!(mask & AT_ATIME)) 20087c478bd9Sstevel@tonic-gate sa->sa_atime.tv_sec = sa->sa_atime.tv_usec = (int32_t)-1; 20097c478bd9Sstevel@tonic-gate else { 20107c478bd9Sstevel@tonic-gate /* check time validity */ 20117c478bd9Sstevel@tonic-gate if (! NFS_TIME_T_OK(vap->va_atime.tv_sec)) { 20127c478bd9Sstevel@tonic-gate return (EOVERFLOW); 20137c478bd9Sstevel@tonic-gate } 20147c478bd9Sstevel@tonic-gate sa->sa_atime.tv_sec = vap->va_atime.tv_sec; 20157c478bd9Sstevel@tonic-gate sa->sa_atime.tv_usec = vap->va_atime.tv_nsec / 1000; 20167c478bd9Sstevel@tonic-gate } 20177c478bd9Sstevel@tonic-gate if (!(mask & AT_MTIME)) 20187c478bd9Sstevel@tonic-gate sa->sa_mtime.tv_sec = sa->sa_mtime.tv_usec = (int32_t)-1; 20197c478bd9Sstevel@tonic-gate else { 20207c478bd9Sstevel@tonic-gate /* check time validity */ 20217c478bd9Sstevel@tonic-gate if (! NFS_TIME_T_OK(vap->va_mtime.tv_sec)) { 20227c478bd9Sstevel@tonic-gate return (EOVERFLOW); 20237c478bd9Sstevel@tonic-gate } 20247c478bd9Sstevel@tonic-gate sa->sa_mtime.tv_sec = vap->va_mtime.tv_sec; 20257c478bd9Sstevel@tonic-gate sa->sa_mtime.tv_usec = vap->va_mtime.tv_nsec / 1000; 20267c478bd9Sstevel@tonic-gate } 20277c478bd9Sstevel@tonic-gate return (0); 20287c478bd9Sstevel@tonic-gate } 20297c478bd9Sstevel@tonic-gate 20307c478bd9Sstevel@tonic-gate int 20317c478bd9Sstevel@tonic-gate vattr_to_sattr3(struct vattr *vap, sattr3 *sa) 20327c478bd9Sstevel@tonic-gate { 20337c478bd9Sstevel@tonic-gate uint_t mask = vap->va_mask; 20347c478bd9Sstevel@tonic-gate 20357c478bd9Sstevel@tonic-gate if (!(mask & AT_MODE)) 20367c478bd9Sstevel@tonic-gate sa->mode.set_it = FALSE; 20377c478bd9Sstevel@tonic-gate else { 20387c478bd9Sstevel@tonic-gate sa->mode.set_it = TRUE; 20397c478bd9Sstevel@tonic-gate sa->mode.mode = (mode3)vap->va_mode; 20407c478bd9Sstevel@tonic-gate } 20417c478bd9Sstevel@tonic-gate if (!(mask & AT_UID)) 20427c478bd9Sstevel@tonic-gate sa->uid.set_it = FALSE; 20437c478bd9Sstevel@tonic-gate else { 20447c478bd9Sstevel@tonic-gate sa->uid.set_it = TRUE; 20457c478bd9Sstevel@tonic-gate sa->uid.uid = (uid3)vap->va_uid; 20467c478bd9Sstevel@tonic-gate } 20477c478bd9Sstevel@tonic-gate if (!(mask & AT_GID)) 20487c478bd9Sstevel@tonic-gate sa->gid.set_it = FALSE; 20497c478bd9Sstevel@tonic-gate else { 20507c478bd9Sstevel@tonic-gate sa->gid.set_it = TRUE; 20517c478bd9Sstevel@tonic-gate sa->gid.gid = (gid3)vap->va_gid; 20527c478bd9Sstevel@tonic-gate } 20537c478bd9Sstevel@tonic-gate if (!(mask & AT_SIZE)) 20547c478bd9Sstevel@tonic-gate sa->size.set_it = FALSE; 20557c478bd9Sstevel@tonic-gate else { 20567c478bd9Sstevel@tonic-gate sa->size.set_it = TRUE; 20577c478bd9Sstevel@tonic-gate sa->size.size = (size3)vap->va_size; 20587c478bd9Sstevel@tonic-gate } 20597c478bd9Sstevel@tonic-gate if (!(mask & AT_ATIME)) 20607c478bd9Sstevel@tonic-gate sa->atime.set_it = DONT_CHANGE; 20617c478bd9Sstevel@tonic-gate else { 20627c478bd9Sstevel@tonic-gate /* check time validity */ 20637c478bd9Sstevel@tonic-gate if (! NFS_TIME_T_OK(vap->va_atime.tv_sec)) { 20647c478bd9Sstevel@tonic-gate return (EOVERFLOW); 20657c478bd9Sstevel@tonic-gate } 20667c478bd9Sstevel@tonic-gate sa->atime.set_it = SET_TO_CLIENT_TIME; 20677c478bd9Sstevel@tonic-gate sa->atime.atime.seconds = (uint32)vap->va_atime.tv_sec; 20687c478bd9Sstevel@tonic-gate sa->atime.atime.nseconds = (uint32)vap->va_atime.tv_nsec; 20697c478bd9Sstevel@tonic-gate } 20707c478bd9Sstevel@tonic-gate if (!(mask & AT_MTIME)) 20717c478bd9Sstevel@tonic-gate sa->mtime.set_it = DONT_CHANGE; 20727c478bd9Sstevel@tonic-gate else { 20737c478bd9Sstevel@tonic-gate /* check time validity */ 20747c478bd9Sstevel@tonic-gate if (! NFS_TIME_T_OK(vap->va_mtime.tv_sec)) { 20757c478bd9Sstevel@tonic-gate return (EOVERFLOW); 20767c478bd9Sstevel@tonic-gate } 20777c478bd9Sstevel@tonic-gate sa->mtime.set_it = SET_TO_CLIENT_TIME; 20787c478bd9Sstevel@tonic-gate sa->mtime.mtime.seconds = (uint32)vap->va_mtime.tv_sec; 20797c478bd9Sstevel@tonic-gate sa->mtime.mtime.nseconds = (uint32)vap->va_mtime.tv_nsec; 20807c478bd9Sstevel@tonic-gate } 20817c478bd9Sstevel@tonic-gate return (0); 20827c478bd9Sstevel@tonic-gate } 20837c478bd9Sstevel@tonic-gate 20847c478bd9Sstevel@tonic-gate void 20857c478bd9Sstevel@tonic-gate setdiropargs(struct nfsdiropargs *da, char *nm, vnode_t *dvp) 20867c478bd9Sstevel@tonic-gate { 20877c478bd9Sstevel@tonic-gate 20887c478bd9Sstevel@tonic-gate da->da_fhandle = VTOFH(dvp); 20897c478bd9Sstevel@tonic-gate da->da_name = nm; 20907c478bd9Sstevel@tonic-gate da->da_flags = 0; 20917c478bd9Sstevel@tonic-gate } 20927c478bd9Sstevel@tonic-gate 20937c478bd9Sstevel@tonic-gate void 20947c478bd9Sstevel@tonic-gate setdiropargs3(diropargs3 *da, char *nm, vnode_t *dvp) 20957c478bd9Sstevel@tonic-gate { 20967c478bd9Sstevel@tonic-gate 20977c478bd9Sstevel@tonic-gate da->dirp = VTOFH3(dvp); 20987c478bd9Sstevel@tonic-gate da->name = nm; 20997c478bd9Sstevel@tonic-gate } 21007c478bd9Sstevel@tonic-gate 21017c478bd9Sstevel@tonic-gate int 21027c478bd9Sstevel@tonic-gate setdirgid(vnode_t *dvp, gid_t *gidp, cred_t *cr) 21037c478bd9Sstevel@tonic-gate { 21047c478bd9Sstevel@tonic-gate int error; 21057c478bd9Sstevel@tonic-gate rnode_t *rp; 21067c478bd9Sstevel@tonic-gate struct vattr va; 21077c478bd9Sstevel@tonic-gate 21087c478bd9Sstevel@tonic-gate va.va_mask = AT_MODE | AT_GID; 2109da6c28aaSamw error = VOP_GETATTR(dvp, &va, 0, cr, NULL); 21107c478bd9Sstevel@tonic-gate if (error) 21117c478bd9Sstevel@tonic-gate return (error); 21127c478bd9Sstevel@tonic-gate 21137c478bd9Sstevel@tonic-gate /* 21147c478bd9Sstevel@tonic-gate * To determine the expected group-id of the created file: 21157c478bd9Sstevel@tonic-gate * 1) If the filesystem was not mounted with the Old-BSD-compatible 21167c478bd9Sstevel@tonic-gate * GRPID option, and the directory's set-gid bit is clear, 21177c478bd9Sstevel@tonic-gate * then use the process's gid. 21187c478bd9Sstevel@tonic-gate * 2) Otherwise, set the group-id to the gid of the parent directory. 21197c478bd9Sstevel@tonic-gate */ 21207c478bd9Sstevel@tonic-gate rp = VTOR(dvp); 21217c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 21227c478bd9Sstevel@tonic-gate if (!(VTOMI(dvp)->mi_flags & MI_GRPID) && !(va.va_mode & VSGID)) 21237c478bd9Sstevel@tonic-gate *gidp = crgetgid(cr); 21247c478bd9Sstevel@tonic-gate else 21257c478bd9Sstevel@tonic-gate *gidp = va.va_gid; 21267c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 21277c478bd9Sstevel@tonic-gate return (0); 21287c478bd9Sstevel@tonic-gate } 21297c478bd9Sstevel@tonic-gate 21307c478bd9Sstevel@tonic-gate int 21317c478bd9Sstevel@tonic-gate setdirmode(vnode_t *dvp, mode_t *omp, cred_t *cr) 21327c478bd9Sstevel@tonic-gate { 21337c478bd9Sstevel@tonic-gate int error; 21347c478bd9Sstevel@tonic-gate struct vattr va; 21357c478bd9Sstevel@tonic-gate 21367c478bd9Sstevel@tonic-gate va.va_mask = AT_MODE; 2137da6c28aaSamw error = VOP_GETATTR(dvp, &va, 0, cr, NULL); 21387c478bd9Sstevel@tonic-gate if (error) 21397c478bd9Sstevel@tonic-gate return (error); 21407c478bd9Sstevel@tonic-gate 21417c478bd9Sstevel@tonic-gate /* 21427c478bd9Sstevel@tonic-gate * Modify the expected mode (om) so that the set-gid bit matches 21437c478bd9Sstevel@tonic-gate * that of the parent directory (dvp). 21447c478bd9Sstevel@tonic-gate */ 21457c478bd9Sstevel@tonic-gate if (va.va_mode & VSGID) 21467c478bd9Sstevel@tonic-gate *omp |= VSGID; 21477c478bd9Sstevel@tonic-gate else 21487c478bd9Sstevel@tonic-gate *omp &= ~VSGID; 21497c478bd9Sstevel@tonic-gate return (0); 21507c478bd9Sstevel@tonic-gate } 21517c478bd9Sstevel@tonic-gate 21527c478bd9Sstevel@tonic-gate void 21537c478bd9Sstevel@tonic-gate nfs_setswaplike(vnode_t *vp, vattr_t *vap) 21547c478bd9Sstevel@tonic-gate { 21557c478bd9Sstevel@tonic-gate 21567c478bd9Sstevel@tonic-gate if (vp->v_type == VREG && (vap->va_mode & (VEXEC | VSVTX)) == VSVTX) { 21577c478bd9Sstevel@tonic-gate if (!(vp->v_flag & VSWAPLIKE)) { 21587c478bd9Sstevel@tonic-gate mutex_enter(&vp->v_lock); 21597c478bd9Sstevel@tonic-gate vp->v_flag |= VSWAPLIKE; 21607c478bd9Sstevel@tonic-gate mutex_exit(&vp->v_lock); 21617c478bd9Sstevel@tonic-gate } 21627c478bd9Sstevel@tonic-gate } else { 21637c478bd9Sstevel@tonic-gate if (vp->v_flag & VSWAPLIKE) { 21647c478bd9Sstevel@tonic-gate mutex_enter(&vp->v_lock); 21657c478bd9Sstevel@tonic-gate vp->v_flag &= ~VSWAPLIKE; 21667c478bd9Sstevel@tonic-gate mutex_exit(&vp->v_lock); 21677c478bd9Sstevel@tonic-gate } 21687c478bd9Sstevel@tonic-gate } 21697c478bd9Sstevel@tonic-gate } 21707c478bd9Sstevel@tonic-gate 21717c478bd9Sstevel@tonic-gate /* 21727c478bd9Sstevel@tonic-gate * Free the resources associated with an rnode. 21737c478bd9Sstevel@tonic-gate */ 21747c478bd9Sstevel@tonic-gate static void 21757c478bd9Sstevel@tonic-gate rinactive(rnode_t *rp, cred_t *cr) 21767c478bd9Sstevel@tonic-gate { 21777c478bd9Sstevel@tonic-gate vnode_t *vp; 21787c478bd9Sstevel@tonic-gate cred_t *cred; 21797c478bd9Sstevel@tonic-gate char *contents; 21807c478bd9Sstevel@tonic-gate int size; 21817c478bd9Sstevel@tonic-gate vsecattr_t *vsp; 21827c478bd9Sstevel@tonic-gate int error; 21837c478bd9Sstevel@tonic-gate nfs3_pathconf_info *info; 21847c478bd9Sstevel@tonic-gate 21857c478bd9Sstevel@tonic-gate /* 21867c478bd9Sstevel@tonic-gate * Before freeing anything, wait until all asynchronous 21877c478bd9Sstevel@tonic-gate * activity is done on this rnode. This will allow all 21887c478bd9Sstevel@tonic-gate * asynchronous read ahead and write behind i/o's to 21897c478bd9Sstevel@tonic-gate * finish. 21907c478bd9Sstevel@tonic-gate */ 21917c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 21927c478bd9Sstevel@tonic-gate while (rp->r_count > 0) 21937c478bd9Sstevel@tonic-gate cv_wait(&rp->r_cv, &rp->r_statelock); 21947c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 21957c478bd9Sstevel@tonic-gate 21967c478bd9Sstevel@tonic-gate /* 21977c478bd9Sstevel@tonic-gate * Flush and invalidate all pages associated with the vnode. 21987c478bd9Sstevel@tonic-gate */ 21997c478bd9Sstevel@tonic-gate vp = RTOV(rp); 22007c478bd9Sstevel@tonic-gate if (vn_has_cached_data(vp)) { 22017c478bd9Sstevel@tonic-gate ASSERT(vp->v_type != VCHR); 22027c478bd9Sstevel@tonic-gate if ((rp->r_flags & RDIRTY) && !rp->r_error) { 2203da6c28aaSamw error = VOP_PUTPAGE(vp, (u_offset_t)0, 0, 0, cr, NULL); 22047c478bd9Sstevel@tonic-gate if (error && (error == ENOSPC || error == EDQUOT)) { 22057c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 22067c478bd9Sstevel@tonic-gate if (!rp->r_error) 22077c478bd9Sstevel@tonic-gate rp->r_error = error; 22087c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 22097c478bd9Sstevel@tonic-gate } 22107c478bd9Sstevel@tonic-gate } 22117c478bd9Sstevel@tonic-gate nfs_invalidate_pages(vp, (u_offset_t)0, cr); 22127c478bd9Sstevel@tonic-gate } 22137c478bd9Sstevel@tonic-gate 22147c478bd9Sstevel@tonic-gate /* 22157c478bd9Sstevel@tonic-gate * Free any held credentials and caches which may be associated 22167c478bd9Sstevel@tonic-gate * with this rnode. 22177c478bd9Sstevel@tonic-gate */ 22187c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 22197c478bd9Sstevel@tonic-gate cred = rp->r_cred; 22207c478bd9Sstevel@tonic-gate rp->r_cred = NULL; 22217c478bd9Sstevel@tonic-gate contents = rp->r_symlink.contents; 22227c478bd9Sstevel@tonic-gate size = rp->r_symlink.size; 22237c478bd9Sstevel@tonic-gate rp->r_symlink.contents = NULL; 22247c478bd9Sstevel@tonic-gate vsp = rp->r_secattr; 22257c478bd9Sstevel@tonic-gate rp->r_secattr = NULL; 22267c478bd9Sstevel@tonic-gate info = rp->r_pathconf; 22277c478bd9Sstevel@tonic-gate rp->r_pathconf = NULL; 22287c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 22297c478bd9Sstevel@tonic-gate 22307c478bd9Sstevel@tonic-gate /* 22317c478bd9Sstevel@tonic-gate * Free the held credential. 22327c478bd9Sstevel@tonic-gate */ 22337c478bd9Sstevel@tonic-gate if (cred != NULL) 22347c478bd9Sstevel@tonic-gate crfree(cred); 22357c478bd9Sstevel@tonic-gate 22367c478bd9Sstevel@tonic-gate /* 22377c478bd9Sstevel@tonic-gate * Free the access cache entries. 22387c478bd9Sstevel@tonic-gate */ 22397c478bd9Sstevel@tonic-gate (void) nfs_access_purge_rp(rp); 22407c478bd9Sstevel@tonic-gate 22417c478bd9Sstevel@tonic-gate /* 22427c478bd9Sstevel@tonic-gate * Free the readdir cache entries. 22437c478bd9Sstevel@tonic-gate */ 22447c478bd9Sstevel@tonic-gate if (HAVE_RDDIR_CACHE(rp)) 22457c478bd9Sstevel@tonic-gate nfs_purge_rddir_cache(vp); 22467c478bd9Sstevel@tonic-gate 22477c478bd9Sstevel@tonic-gate /* 22487c478bd9Sstevel@tonic-gate * Free the symbolic link cache. 22497c478bd9Sstevel@tonic-gate */ 22507c478bd9Sstevel@tonic-gate if (contents != NULL) { 22517c478bd9Sstevel@tonic-gate 22527c478bd9Sstevel@tonic-gate kmem_free((void *)contents, size); 22537c478bd9Sstevel@tonic-gate } 22547c478bd9Sstevel@tonic-gate 22557c478bd9Sstevel@tonic-gate /* 22567c478bd9Sstevel@tonic-gate * Free any cached ACL. 22577c478bd9Sstevel@tonic-gate */ 22587c478bd9Sstevel@tonic-gate if (vsp != NULL) 22597c478bd9Sstevel@tonic-gate nfs_acl_free(vsp); 22607c478bd9Sstevel@tonic-gate 22617c478bd9Sstevel@tonic-gate /* 22627c478bd9Sstevel@tonic-gate * Free any cached pathconf information. 22637c478bd9Sstevel@tonic-gate */ 22647c478bd9Sstevel@tonic-gate if (info != NULL) 22657c478bd9Sstevel@tonic-gate kmem_free(info, sizeof (*info)); 22667c478bd9Sstevel@tonic-gate } 22677c478bd9Sstevel@tonic-gate 22687c478bd9Sstevel@tonic-gate /* 22697c478bd9Sstevel@tonic-gate * Return a vnode for the given NFS Version 2 file handle. 22707c478bd9Sstevel@tonic-gate * If no rnode exists for this fhandle, create one and put it 22717c478bd9Sstevel@tonic-gate * into the hash queues. If the rnode for this fhandle 22727c478bd9Sstevel@tonic-gate * already exists, return it. 22737c478bd9Sstevel@tonic-gate * 22747c478bd9Sstevel@tonic-gate * Note: make_rnode() may upgrade the hash bucket lock to exclusive. 22757c478bd9Sstevel@tonic-gate */ 22767c478bd9Sstevel@tonic-gate vnode_t * 22777c478bd9Sstevel@tonic-gate makenfsnode(fhandle_t *fh, struct nfsfattr *attr, struct vfs *vfsp, 22787c478bd9Sstevel@tonic-gate hrtime_t t, cred_t *cr, char *dnm, char *nm) 22797c478bd9Sstevel@tonic-gate { 22807c478bd9Sstevel@tonic-gate int newnode; 22817c478bd9Sstevel@tonic-gate int index; 22827c478bd9Sstevel@tonic-gate vnode_t *vp; 22837c478bd9Sstevel@tonic-gate nfs_fhandle nfh; 22847c478bd9Sstevel@tonic-gate vattr_t va; 22857c478bd9Sstevel@tonic-gate 22867c478bd9Sstevel@tonic-gate nfh.fh_len = NFS_FHSIZE; 22877c478bd9Sstevel@tonic-gate bcopy(fh, nfh.fh_buf, NFS_FHSIZE); 22887c478bd9Sstevel@tonic-gate 22897c478bd9Sstevel@tonic-gate index = rtablehash(&nfh); 22907c478bd9Sstevel@tonic-gate rw_enter(&rtable[index].r_lock, RW_READER); 22917c478bd9Sstevel@tonic-gate 22927c478bd9Sstevel@tonic-gate vp = make_rnode(&nfh, &rtable[index], vfsp, nfs_vnodeops, 22937c478bd9Sstevel@tonic-gate nfs_putapage, nfs_rddir_compar, &newnode, cr, dnm, nm); 22947c478bd9Sstevel@tonic-gate 22957c478bd9Sstevel@tonic-gate if (attr != NULL) { 22967c478bd9Sstevel@tonic-gate if (!newnode) { 22977c478bd9Sstevel@tonic-gate rw_exit(&rtable[index].r_lock); 22987c478bd9Sstevel@tonic-gate (void) nfs_cache_fattr(vp, attr, &va, t, cr); 22997c478bd9Sstevel@tonic-gate } else { 23007c478bd9Sstevel@tonic-gate if (attr->na_type < NFNON || attr->na_type > NFSOC) 23017c478bd9Sstevel@tonic-gate vp->v_type = VBAD; 23027c478bd9Sstevel@tonic-gate else 23037c478bd9Sstevel@tonic-gate vp->v_type = n2v_type(attr); 23047c478bd9Sstevel@tonic-gate /* 23057c478bd9Sstevel@tonic-gate * A translation here seems to be necessary 23067c478bd9Sstevel@tonic-gate * because this function can be called 23077c478bd9Sstevel@tonic-gate * with `attr' that has come from the wire, 23087c478bd9Sstevel@tonic-gate * and been operated on by vattr_to_nattr(). 23097c478bd9Sstevel@tonic-gate * See nfsrootvp()->VOP_GETTATTR()->nfsgetattr() 23107c478bd9Sstevel@tonic-gate * ->nfs_getattr_otw()->rfscall()->vattr_to_nattr() 23117c478bd9Sstevel@tonic-gate * ->makenfsnode(). 23127c478bd9Sstevel@tonic-gate */ 23137c478bd9Sstevel@tonic-gate if ((attr->na_rdev & 0xffff0000) == 0) 23147c478bd9Sstevel@tonic-gate vp->v_rdev = nfsv2_expdev(attr->na_rdev); 23157c478bd9Sstevel@tonic-gate else 23167c478bd9Sstevel@tonic-gate vp->v_rdev = expldev(n2v_rdev(attr)); 23177c478bd9Sstevel@tonic-gate nfs_attrcache(vp, attr, t); 23187c478bd9Sstevel@tonic-gate rw_exit(&rtable[index].r_lock); 23197c478bd9Sstevel@tonic-gate } 23207c478bd9Sstevel@tonic-gate } else { 23217c478bd9Sstevel@tonic-gate if (newnode) { 23227c478bd9Sstevel@tonic-gate PURGE_ATTRCACHE(vp); 23237c478bd9Sstevel@tonic-gate } 23247c478bd9Sstevel@tonic-gate rw_exit(&rtable[index].r_lock); 23257c478bd9Sstevel@tonic-gate } 23267c478bd9Sstevel@tonic-gate 23277c478bd9Sstevel@tonic-gate return (vp); 23287c478bd9Sstevel@tonic-gate } 23297c478bd9Sstevel@tonic-gate 23307c478bd9Sstevel@tonic-gate /* 23317c478bd9Sstevel@tonic-gate * Return a vnode for the given NFS Version 3 file handle. 23327c478bd9Sstevel@tonic-gate * If no rnode exists for this fhandle, create one and put it 23337c478bd9Sstevel@tonic-gate * into the hash queues. If the rnode for this fhandle 23347c478bd9Sstevel@tonic-gate * already exists, return it. 23357c478bd9Sstevel@tonic-gate * 23367c478bd9Sstevel@tonic-gate * Note: make_rnode() may upgrade the hash bucket lock to exclusive. 23377c478bd9Sstevel@tonic-gate */ 23387c478bd9Sstevel@tonic-gate vnode_t * 23397c478bd9Sstevel@tonic-gate makenfs3node_va(nfs_fh3 *fh, vattr_t *vap, struct vfs *vfsp, hrtime_t t, 23407c478bd9Sstevel@tonic-gate cred_t *cr, char *dnm, char *nm) 23417c478bd9Sstevel@tonic-gate { 23427c478bd9Sstevel@tonic-gate int newnode; 23437c478bd9Sstevel@tonic-gate int index; 23447c478bd9Sstevel@tonic-gate vnode_t *vp; 23457c478bd9Sstevel@tonic-gate 23467c478bd9Sstevel@tonic-gate index = rtablehash((nfs_fhandle *)fh); 23477c478bd9Sstevel@tonic-gate rw_enter(&rtable[index].r_lock, RW_READER); 23487c478bd9Sstevel@tonic-gate 23497c478bd9Sstevel@tonic-gate vp = make_rnode((nfs_fhandle *)fh, &rtable[index], vfsp, 23507c478bd9Sstevel@tonic-gate nfs3_vnodeops, nfs3_putapage, nfs3_rddir_compar, &newnode, cr, 23517c478bd9Sstevel@tonic-gate dnm, nm); 23527c478bd9Sstevel@tonic-gate 23537c478bd9Sstevel@tonic-gate if (vap == NULL) { 23547c478bd9Sstevel@tonic-gate if (newnode) { 23557c478bd9Sstevel@tonic-gate PURGE_ATTRCACHE(vp); 23567c478bd9Sstevel@tonic-gate } 23577c478bd9Sstevel@tonic-gate rw_exit(&rtable[index].r_lock); 23587c478bd9Sstevel@tonic-gate return (vp); 23597c478bd9Sstevel@tonic-gate } 23607c478bd9Sstevel@tonic-gate 23617c478bd9Sstevel@tonic-gate if (!newnode) { 23627c478bd9Sstevel@tonic-gate rw_exit(&rtable[index].r_lock); 23637c478bd9Sstevel@tonic-gate nfs_attr_cache(vp, vap, t, cr); 23647c478bd9Sstevel@tonic-gate } else { 23657c478bd9Sstevel@tonic-gate rnode_t *rp = VTOR(vp); 23667c478bd9Sstevel@tonic-gate 23677c478bd9Sstevel@tonic-gate vp->v_type = vap->va_type; 23687c478bd9Sstevel@tonic-gate vp->v_rdev = vap->va_rdev; 23697c478bd9Sstevel@tonic-gate 23707c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 23717c478bd9Sstevel@tonic-gate if (rp->r_mtime <= t) 23727c478bd9Sstevel@tonic-gate nfs_attrcache_va(vp, vap); 23737c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 23747c478bd9Sstevel@tonic-gate rw_exit(&rtable[index].r_lock); 23757c478bd9Sstevel@tonic-gate } 23767c478bd9Sstevel@tonic-gate 23777c478bd9Sstevel@tonic-gate return (vp); 23787c478bd9Sstevel@tonic-gate } 23797c478bd9Sstevel@tonic-gate 23807c478bd9Sstevel@tonic-gate vnode_t * 23817c478bd9Sstevel@tonic-gate makenfs3node(nfs_fh3 *fh, fattr3 *attr, struct vfs *vfsp, hrtime_t t, 23827c478bd9Sstevel@tonic-gate cred_t *cr, char *dnm, char *nm) 23837c478bd9Sstevel@tonic-gate { 23847c478bd9Sstevel@tonic-gate int newnode; 23857c478bd9Sstevel@tonic-gate int index; 23867c478bd9Sstevel@tonic-gate vnode_t *vp; 23877c478bd9Sstevel@tonic-gate vattr_t va; 23887c478bd9Sstevel@tonic-gate 23897c478bd9Sstevel@tonic-gate index = rtablehash((nfs_fhandle *)fh); 23907c478bd9Sstevel@tonic-gate rw_enter(&rtable[index].r_lock, RW_READER); 23917c478bd9Sstevel@tonic-gate 23927c478bd9Sstevel@tonic-gate vp = make_rnode((nfs_fhandle *)fh, &rtable[index], vfsp, 23937c478bd9Sstevel@tonic-gate nfs3_vnodeops, nfs3_putapage, nfs3_rddir_compar, &newnode, cr, 23947c478bd9Sstevel@tonic-gate dnm, nm); 23957c478bd9Sstevel@tonic-gate 23967c478bd9Sstevel@tonic-gate if (attr == NULL) { 23977c478bd9Sstevel@tonic-gate if (newnode) { 23987c478bd9Sstevel@tonic-gate PURGE_ATTRCACHE(vp); 23997c478bd9Sstevel@tonic-gate } 24007c478bd9Sstevel@tonic-gate rw_exit(&rtable[index].r_lock); 24017c478bd9Sstevel@tonic-gate return (vp); 24027c478bd9Sstevel@tonic-gate } 24037c478bd9Sstevel@tonic-gate 24047c478bd9Sstevel@tonic-gate if (!newnode) { 24057c478bd9Sstevel@tonic-gate rw_exit(&rtable[index].r_lock); 24067c478bd9Sstevel@tonic-gate (void) nfs3_cache_fattr3(vp, attr, &va, t, cr); 24077c478bd9Sstevel@tonic-gate } else { 24087c478bd9Sstevel@tonic-gate if (attr->type < NF3REG || attr->type > NF3FIFO) 24097c478bd9Sstevel@tonic-gate vp->v_type = VBAD; 24107c478bd9Sstevel@tonic-gate else 24117c478bd9Sstevel@tonic-gate vp->v_type = nf3_to_vt[attr->type]; 24127c478bd9Sstevel@tonic-gate vp->v_rdev = makedevice(attr->rdev.specdata1, 24137106075aSmarks attr->rdev.specdata2); 24147c478bd9Sstevel@tonic-gate nfs3_attrcache(vp, attr, t); 24157c478bd9Sstevel@tonic-gate rw_exit(&rtable[index].r_lock); 24167c478bd9Sstevel@tonic-gate } 24177c478bd9Sstevel@tonic-gate 24187c478bd9Sstevel@tonic-gate return (vp); 24197c478bd9Sstevel@tonic-gate } 24207c478bd9Sstevel@tonic-gate 24217c478bd9Sstevel@tonic-gate /* 24227c478bd9Sstevel@tonic-gate * Read this comment before making changes to rtablehash()! 24237c478bd9Sstevel@tonic-gate * This is a hash function in which seemingly obvious and harmless 24247c478bd9Sstevel@tonic-gate * changes can cause escalations costing million dollars! 24257c478bd9Sstevel@tonic-gate * Know what you are doing. 24267c478bd9Sstevel@tonic-gate * 24277c478bd9Sstevel@tonic-gate * rtablehash() implements Jenkins' one-at-a-time hash algorithm. The 24287c478bd9Sstevel@tonic-gate * algorithm is currently detailed here: 24297c478bd9Sstevel@tonic-gate * 24307c478bd9Sstevel@tonic-gate * http://burtleburtle.net/bob/hash/doobs.html 24317c478bd9Sstevel@tonic-gate * 24327c478bd9Sstevel@tonic-gate * Of course, the above link may not be valid by the time you are reading 24337c478bd9Sstevel@tonic-gate * this, but suffice it to say that the one-at-a-time algorithm works well in 24347c478bd9Sstevel@tonic-gate * almost all cases. If you are changing the algorithm be sure to verify that 24357c478bd9Sstevel@tonic-gate * the hash algorithm still provides even distribution in all cases and with 24367c478bd9Sstevel@tonic-gate * any server returning filehandles in whatever order (sequential or random). 24377c478bd9Sstevel@tonic-gate */ 24387c478bd9Sstevel@tonic-gate static int 24397c478bd9Sstevel@tonic-gate rtablehash(nfs_fhandle *fh) 24407c478bd9Sstevel@tonic-gate { 24417c478bd9Sstevel@tonic-gate ulong_t hash, len, i; 24427c478bd9Sstevel@tonic-gate char *key; 24437c478bd9Sstevel@tonic-gate 24447c478bd9Sstevel@tonic-gate key = fh->fh_buf; 24457c478bd9Sstevel@tonic-gate len = (ulong_t)fh->fh_len; 24467c478bd9Sstevel@tonic-gate for (hash = 0, i = 0; i < len; i++) { 24477c478bd9Sstevel@tonic-gate hash += key[i]; 24487c478bd9Sstevel@tonic-gate hash += (hash << 10); 24497c478bd9Sstevel@tonic-gate hash ^= (hash >> 6); 24507c478bd9Sstevel@tonic-gate } 24517c478bd9Sstevel@tonic-gate hash += (hash << 3); 24527c478bd9Sstevel@tonic-gate hash ^= (hash >> 11); 24537c478bd9Sstevel@tonic-gate hash += (hash << 15); 24547c478bd9Sstevel@tonic-gate return (hash & rtablemask); 24557c478bd9Sstevel@tonic-gate } 24567c478bd9Sstevel@tonic-gate 24577c478bd9Sstevel@tonic-gate static vnode_t * 24587c478bd9Sstevel@tonic-gate make_rnode(nfs_fhandle *fh, rhashq_t *rhtp, struct vfs *vfsp, 24597c478bd9Sstevel@tonic-gate struct vnodeops *vops, 24607c478bd9Sstevel@tonic-gate int (*putapage)(vnode_t *, page_t *, u_offset_t *, size_t *, int, cred_t *), 24617c478bd9Sstevel@tonic-gate int (*compar)(const void *, const void *), 24627c478bd9Sstevel@tonic-gate int *newnode, cred_t *cr, char *dnm, char *nm) 24637c478bd9Sstevel@tonic-gate { 24647c478bd9Sstevel@tonic-gate rnode_t *rp; 24657c478bd9Sstevel@tonic-gate rnode_t *trp; 24667c478bd9Sstevel@tonic-gate vnode_t *vp; 24677c478bd9Sstevel@tonic-gate mntinfo_t *mi; 24687c478bd9Sstevel@tonic-gate 24697c478bd9Sstevel@tonic-gate ASSERT(RW_READ_HELD(&rhtp->r_lock)); 24707c478bd9Sstevel@tonic-gate 24717c478bd9Sstevel@tonic-gate mi = VFTOMI(vfsp); 24727c478bd9Sstevel@tonic-gate start: 24737c478bd9Sstevel@tonic-gate if ((rp = rfind(rhtp, fh, vfsp)) != NULL) { 24747c478bd9Sstevel@tonic-gate vp = RTOV(rp); 24757c478bd9Sstevel@tonic-gate nfs_set_vroot(vp); 24767c478bd9Sstevel@tonic-gate *newnode = 0; 24777c478bd9Sstevel@tonic-gate return (vp); 24787c478bd9Sstevel@tonic-gate } 24797c478bd9Sstevel@tonic-gate rw_exit(&rhtp->r_lock); 24807c478bd9Sstevel@tonic-gate 24817c478bd9Sstevel@tonic-gate mutex_enter(&rpfreelist_lock); 24827c478bd9Sstevel@tonic-gate if (rpfreelist != NULL && rnew >= nrnode) { 24837c478bd9Sstevel@tonic-gate rp = rpfreelist; 24847c478bd9Sstevel@tonic-gate rp_rmfree(rp); 24857c478bd9Sstevel@tonic-gate mutex_exit(&rpfreelist_lock); 24867c478bd9Sstevel@tonic-gate 24877c478bd9Sstevel@tonic-gate vp = RTOV(rp); 24887c478bd9Sstevel@tonic-gate 24897c478bd9Sstevel@tonic-gate if (rp->r_flags & RHASHED) { 24907c478bd9Sstevel@tonic-gate rw_enter(&rp->r_hashq->r_lock, RW_WRITER); 24917c478bd9Sstevel@tonic-gate mutex_enter(&vp->v_lock); 24927c478bd9Sstevel@tonic-gate if (vp->v_count > 1) { 2493ade42b55SSebastien Roy VN_RELE_LOCKED(vp); 24947c478bd9Sstevel@tonic-gate mutex_exit(&vp->v_lock); 24957c478bd9Sstevel@tonic-gate rw_exit(&rp->r_hashq->r_lock); 24967c478bd9Sstevel@tonic-gate rw_enter(&rhtp->r_lock, RW_READER); 24977c478bd9Sstevel@tonic-gate goto start; 24987c478bd9Sstevel@tonic-gate } 24997c478bd9Sstevel@tonic-gate mutex_exit(&vp->v_lock); 25007c478bd9Sstevel@tonic-gate rp_rmhash_locked(rp); 25017c478bd9Sstevel@tonic-gate rw_exit(&rp->r_hashq->r_lock); 25027c478bd9Sstevel@tonic-gate } 25037c478bd9Sstevel@tonic-gate 25047c478bd9Sstevel@tonic-gate rinactive(rp, cr); 25057c478bd9Sstevel@tonic-gate 25067c478bd9Sstevel@tonic-gate mutex_enter(&vp->v_lock); 25077c478bd9Sstevel@tonic-gate if (vp->v_count > 1) { 2508ade42b55SSebastien Roy VN_RELE_LOCKED(vp); 25097c478bd9Sstevel@tonic-gate mutex_exit(&vp->v_lock); 25107c478bd9Sstevel@tonic-gate rw_enter(&rhtp->r_lock, RW_READER); 25117c478bd9Sstevel@tonic-gate goto start; 25127c478bd9Sstevel@tonic-gate } 25137c478bd9Sstevel@tonic-gate mutex_exit(&vp->v_lock); 25147c478bd9Sstevel@tonic-gate vn_invalid(vp); 25157c478bd9Sstevel@tonic-gate /* 25167c478bd9Sstevel@tonic-gate * destroy old locks before bzero'ing and 25177c478bd9Sstevel@tonic-gate * recreating the locks below. 25187c478bd9Sstevel@tonic-gate */ 25197c478bd9Sstevel@tonic-gate nfs_rw_destroy(&rp->r_rwlock); 25207c478bd9Sstevel@tonic-gate nfs_rw_destroy(&rp->r_lkserlock); 25217c478bd9Sstevel@tonic-gate mutex_destroy(&rp->r_statelock); 25227c478bd9Sstevel@tonic-gate cv_destroy(&rp->r_cv); 25237c478bd9Sstevel@tonic-gate cv_destroy(&rp->r_commit.c_cv); 25247c478bd9Sstevel@tonic-gate nfs_free_r_path(rp); 25257c478bd9Sstevel@tonic-gate avl_destroy(&rp->r_dir); 25267c478bd9Sstevel@tonic-gate /* 25277c478bd9Sstevel@tonic-gate * Make sure that if rnode is recycled then 25287c478bd9Sstevel@tonic-gate * VFS count is decremented properly before 25297c478bd9Sstevel@tonic-gate * reuse. 25307c478bd9Sstevel@tonic-gate */ 25317c478bd9Sstevel@tonic-gate VFS_RELE(vp->v_vfsp); 25327c478bd9Sstevel@tonic-gate vn_reinit(vp); 25337c478bd9Sstevel@tonic-gate } else { 25347c478bd9Sstevel@tonic-gate vnode_t *new_vp; 25357c478bd9Sstevel@tonic-gate 25367c478bd9Sstevel@tonic-gate mutex_exit(&rpfreelist_lock); 25377c478bd9Sstevel@tonic-gate 25387c478bd9Sstevel@tonic-gate rp = kmem_cache_alloc(rnode_cache, KM_SLEEP); 25397c478bd9Sstevel@tonic-gate new_vp = vn_alloc(KM_SLEEP); 25407c478bd9Sstevel@tonic-gate 25411a5e258fSJosef 'Jeff' Sipek atomic_inc_ulong((ulong_t *)&rnew); 25427c478bd9Sstevel@tonic-gate #ifdef DEBUG 25437c478bd9Sstevel@tonic-gate clstat_debug.nrnode.value.ui64++; 25447c478bd9Sstevel@tonic-gate #endif 25457c478bd9Sstevel@tonic-gate vp = new_vp; 25467c478bd9Sstevel@tonic-gate } 25477c478bd9Sstevel@tonic-gate 25487c478bd9Sstevel@tonic-gate bzero(rp, sizeof (*rp)); 25497c478bd9Sstevel@tonic-gate rp->r_vnode = vp; 25507c478bd9Sstevel@tonic-gate nfs_rw_init(&rp->r_rwlock, NULL, RW_DEFAULT, NULL); 25517c478bd9Sstevel@tonic-gate nfs_rw_init(&rp->r_lkserlock, NULL, RW_DEFAULT, NULL); 25527c478bd9Sstevel@tonic-gate mutex_init(&rp->r_statelock, NULL, MUTEX_DEFAULT, NULL); 25537c478bd9Sstevel@tonic-gate cv_init(&rp->r_cv, NULL, CV_DEFAULT, NULL); 25547c478bd9Sstevel@tonic-gate cv_init(&rp->r_commit.c_cv, NULL, CV_DEFAULT, NULL); 25557c478bd9Sstevel@tonic-gate rp->r_fh.fh_len = fh->fh_len; 25567c478bd9Sstevel@tonic-gate bcopy(fh->fh_buf, rp->r_fh.fh_buf, fh->fh_len); 25577c478bd9Sstevel@tonic-gate rp->r_server = mi->mi_curr_serv; 25587c478bd9Sstevel@tonic-gate if (FAILOVER_MOUNT(mi)) { 25597c478bd9Sstevel@tonic-gate /* 25607c478bd9Sstevel@tonic-gate * If replicated servers, stash pathnames 25617c478bd9Sstevel@tonic-gate */ 25627c478bd9Sstevel@tonic-gate if (dnm != NULL && nm != NULL) { 25637c478bd9Sstevel@tonic-gate char *s, *p; 25647c478bd9Sstevel@tonic-gate uint_t len; 25657c478bd9Sstevel@tonic-gate 25667c478bd9Sstevel@tonic-gate len = (uint_t)(strlen(dnm) + strlen(nm) + 2); 25677c478bd9Sstevel@tonic-gate rp->r_path = kmem_alloc(len, KM_SLEEP); 25687c478bd9Sstevel@tonic-gate #ifdef DEBUG 25697c478bd9Sstevel@tonic-gate clstat_debug.rpath.value.ui64 += len; 25707c478bd9Sstevel@tonic-gate #endif 25717c478bd9Sstevel@tonic-gate s = rp->r_path; 25727c478bd9Sstevel@tonic-gate for (p = dnm; *p; p++) 25737c478bd9Sstevel@tonic-gate *s++ = *p; 25747c478bd9Sstevel@tonic-gate *s++ = '/'; 25757c478bd9Sstevel@tonic-gate for (p = nm; *p; p++) 25767c478bd9Sstevel@tonic-gate *s++ = *p; 25777c478bd9Sstevel@tonic-gate *s = '\0'; 25787c478bd9Sstevel@tonic-gate } else { 25797c478bd9Sstevel@tonic-gate /* special case for root */ 25807c478bd9Sstevel@tonic-gate rp->r_path = kmem_alloc(2, KM_SLEEP); 25817c478bd9Sstevel@tonic-gate #ifdef DEBUG 25827c478bd9Sstevel@tonic-gate clstat_debug.rpath.value.ui64 += 2; 25837c478bd9Sstevel@tonic-gate #endif 25847c478bd9Sstevel@tonic-gate *rp->r_path = '.'; 25857c478bd9Sstevel@tonic-gate *(rp->r_path + 1) = '\0'; 25867c478bd9Sstevel@tonic-gate } 25877c478bd9Sstevel@tonic-gate } 25887c478bd9Sstevel@tonic-gate VFS_HOLD(vfsp); 25897c478bd9Sstevel@tonic-gate rp->r_putapage = putapage; 25907c478bd9Sstevel@tonic-gate rp->r_hashq = rhtp; 25917c478bd9Sstevel@tonic-gate rp->r_flags = RREADDIRPLUS; 25927c478bd9Sstevel@tonic-gate avl_create(&rp->r_dir, compar, sizeof (rddir_cache), 25937c478bd9Sstevel@tonic-gate offsetof(rddir_cache, tree)); 25947c478bd9Sstevel@tonic-gate vn_setops(vp, vops); 25957c478bd9Sstevel@tonic-gate vp->v_data = (caddr_t)rp; 25967c478bd9Sstevel@tonic-gate vp->v_vfsp = vfsp; 25977c478bd9Sstevel@tonic-gate vp->v_type = VNON; 2598f8bbc571SPavel Filipensky vp->v_flag |= VMODSORT; 25997c478bd9Sstevel@tonic-gate nfs_set_vroot(vp); 26007c478bd9Sstevel@tonic-gate 26017c478bd9Sstevel@tonic-gate /* 26027c478bd9Sstevel@tonic-gate * There is a race condition if someone else 26037c478bd9Sstevel@tonic-gate * alloc's the rnode while no locks are held, so we 26047c478bd9Sstevel@tonic-gate * check again and recover if found. 26057c478bd9Sstevel@tonic-gate */ 26067c478bd9Sstevel@tonic-gate rw_enter(&rhtp->r_lock, RW_WRITER); 26077c478bd9Sstevel@tonic-gate if ((trp = rfind(rhtp, fh, vfsp)) != NULL) { 26087c478bd9Sstevel@tonic-gate vp = RTOV(trp); 26097c478bd9Sstevel@tonic-gate nfs_set_vroot(vp); 26107c478bd9Sstevel@tonic-gate *newnode = 0; 26117c478bd9Sstevel@tonic-gate rw_exit(&rhtp->r_lock); 26127c478bd9Sstevel@tonic-gate rp_addfree(rp, cr); 26137c478bd9Sstevel@tonic-gate rw_enter(&rhtp->r_lock, RW_READER); 26147c478bd9Sstevel@tonic-gate return (vp); 26157c478bd9Sstevel@tonic-gate } 26167c478bd9Sstevel@tonic-gate rp_addhash(rp); 26177c478bd9Sstevel@tonic-gate *newnode = 1; 26187c478bd9Sstevel@tonic-gate return (vp); 26197c478bd9Sstevel@tonic-gate } 26207c478bd9Sstevel@tonic-gate 2621f8bbc571SPavel Filipensky /* 2622f8bbc571SPavel Filipensky * Callback function to check if the page should be marked as 2623f8bbc571SPavel Filipensky * modified. In the positive case, p_fsdata is set to C_NOCOMMIT. 2624f8bbc571SPavel Filipensky */ 2625f8bbc571SPavel Filipensky int 2626f8bbc571SPavel Filipensky nfs_setmod_check(page_t *pp) 2627f8bbc571SPavel Filipensky { 2628f8bbc571SPavel Filipensky if (pp->p_fsdata != C_NOCOMMIT) { 2629f8bbc571SPavel Filipensky pp->p_fsdata = C_NOCOMMIT; 2630f8bbc571SPavel Filipensky return (1); 2631f8bbc571SPavel Filipensky } 2632f8bbc571SPavel Filipensky return (0); 2633f8bbc571SPavel Filipensky } 2634f8bbc571SPavel Filipensky 26357c478bd9Sstevel@tonic-gate static void 26367c478bd9Sstevel@tonic-gate nfs_set_vroot(vnode_t *vp) 26377c478bd9Sstevel@tonic-gate { 26387c478bd9Sstevel@tonic-gate rnode_t *rp; 26397c478bd9Sstevel@tonic-gate nfs_fhandle *rootfh; 26407c478bd9Sstevel@tonic-gate 26417c478bd9Sstevel@tonic-gate rp = VTOR(vp); 26427c478bd9Sstevel@tonic-gate rootfh = &rp->r_server->sv_fhandle; 26437c478bd9Sstevel@tonic-gate if (rootfh->fh_len == rp->r_fh.fh_len && 26447c478bd9Sstevel@tonic-gate bcmp(rootfh->fh_buf, rp->r_fh.fh_buf, rp->r_fh.fh_len) == 0) { 26457c478bd9Sstevel@tonic-gate if (!(vp->v_flag & VROOT)) { 26467c478bd9Sstevel@tonic-gate mutex_enter(&vp->v_lock); 26477c478bd9Sstevel@tonic-gate vp->v_flag |= VROOT; 26487c478bd9Sstevel@tonic-gate mutex_exit(&vp->v_lock); 26497c478bd9Sstevel@tonic-gate } 26507c478bd9Sstevel@tonic-gate } 26517c478bd9Sstevel@tonic-gate } 26527c478bd9Sstevel@tonic-gate 26537c478bd9Sstevel@tonic-gate static void 26547c478bd9Sstevel@tonic-gate nfs_free_r_path(rnode_t *rp) 26557c478bd9Sstevel@tonic-gate { 26567c478bd9Sstevel@tonic-gate char *path; 26577c478bd9Sstevel@tonic-gate size_t len; 26587c478bd9Sstevel@tonic-gate 26597c478bd9Sstevel@tonic-gate path = rp->r_path; 26607c478bd9Sstevel@tonic-gate if (path) { 26617c478bd9Sstevel@tonic-gate rp->r_path = NULL; 26627c478bd9Sstevel@tonic-gate len = strlen(path) + 1; 26637c478bd9Sstevel@tonic-gate kmem_free(path, len); 26647c478bd9Sstevel@tonic-gate #ifdef DEBUG 26657c478bd9Sstevel@tonic-gate clstat_debug.rpath.value.ui64 -= len; 26667c478bd9Sstevel@tonic-gate #endif 26677c478bd9Sstevel@tonic-gate } 26687c478bd9Sstevel@tonic-gate } 26697c478bd9Sstevel@tonic-gate 26707c478bd9Sstevel@tonic-gate /* 26717c478bd9Sstevel@tonic-gate * Put an rnode on the free list. 26727c478bd9Sstevel@tonic-gate * 26737c478bd9Sstevel@tonic-gate * Rnodes which were allocated above and beyond the normal limit 26747c478bd9Sstevel@tonic-gate * are immediately freed. 26757c478bd9Sstevel@tonic-gate */ 26767c478bd9Sstevel@tonic-gate void 26777c478bd9Sstevel@tonic-gate rp_addfree(rnode_t *rp, cred_t *cr) 26787c478bd9Sstevel@tonic-gate { 26797c478bd9Sstevel@tonic-gate vnode_t *vp; 26807c478bd9Sstevel@tonic-gate struct vfs *vfsp; 26817c478bd9Sstevel@tonic-gate 26827c478bd9Sstevel@tonic-gate vp = RTOV(rp); 26837c478bd9Sstevel@tonic-gate ASSERT(vp->v_count >= 1); 26847c478bd9Sstevel@tonic-gate ASSERT(rp->r_freef == NULL && rp->r_freeb == NULL); 26857c478bd9Sstevel@tonic-gate 26867c478bd9Sstevel@tonic-gate /* 26877c478bd9Sstevel@tonic-gate * If we have too many rnodes allocated and there are no 26887c478bd9Sstevel@tonic-gate * references to this rnode, or if the rnode is no longer 26897c478bd9Sstevel@tonic-gate * accessible by it does not reside in the hash queues, 26907c478bd9Sstevel@tonic-gate * or if an i/o error occurred while writing to the file, 26917c478bd9Sstevel@tonic-gate * then just free it instead of putting it on the rnode 26927c478bd9Sstevel@tonic-gate * freelist. 26937c478bd9Sstevel@tonic-gate */ 26947c478bd9Sstevel@tonic-gate vfsp = vp->v_vfsp; 26957c478bd9Sstevel@tonic-gate if (((rnew > nrnode || !(rp->r_flags & RHASHED) || rp->r_error || 26967c478bd9Sstevel@tonic-gate (vfsp->vfs_flag & VFS_UNMOUNTED)) && rp->r_count == 0)) { 26977c478bd9Sstevel@tonic-gate if (rp->r_flags & RHASHED) { 26987c478bd9Sstevel@tonic-gate rw_enter(&rp->r_hashq->r_lock, RW_WRITER); 26997c478bd9Sstevel@tonic-gate mutex_enter(&vp->v_lock); 27007c478bd9Sstevel@tonic-gate if (vp->v_count > 1) { 2701ade42b55SSebastien Roy VN_RELE_LOCKED(vp); 27027c478bd9Sstevel@tonic-gate mutex_exit(&vp->v_lock); 27037c478bd9Sstevel@tonic-gate rw_exit(&rp->r_hashq->r_lock); 27047c478bd9Sstevel@tonic-gate return; 27057c478bd9Sstevel@tonic-gate } 27067c478bd9Sstevel@tonic-gate mutex_exit(&vp->v_lock); 27077c478bd9Sstevel@tonic-gate rp_rmhash_locked(rp); 27087c478bd9Sstevel@tonic-gate rw_exit(&rp->r_hashq->r_lock); 27097c478bd9Sstevel@tonic-gate } 27107c478bd9Sstevel@tonic-gate 27117c478bd9Sstevel@tonic-gate rinactive(rp, cr); 27127c478bd9Sstevel@tonic-gate 27137c478bd9Sstevel@tonic-gate /* 27147c478bd9Sstevel@tonic-gate * Recheck the vnode reference count. We need to 27157c478bd9Sstevel@tonic-gate * make sure that another reference has not been 27167c478bd9Sstevel@tonic-gate * acquired while we were not holding v_lock. The 27177c478bd9Sstevel@tonic-gate * rnode is not in the rnode hash queues, so the 27187c478bd9Sstevel@tonic-gate * only way for a reference to have been acquired 27197c478bd9Sstevel@tonic-gate * is for a VOP_PUTPAGE because the rnode was marked 27207c478bd9Sstevel@tonic-gate * with RDIRTY or for a modified page. This 27217c478bd9Sstevel@tonic-gate * reference may have been acquired before our call 27227c478bd9Sstevel@tonic-gate * to rinactive. The i/o may have been completed, 27237c478bd9Sstevel@tonic-gate * thus allowing rinactive to complete, but the 27247c478bd9Sstevel@tonic-gate * reference to the vnode may not have been released 27257c478bd9Sstevel@tonic-gate * yet. In any case, the rnode can not be destroyed 27267c478bd9Sstevel@tonic-gate * until the other references to this vnode have been 27277c478bd9Sstevel@tonic-gate * released. The other references will take care of 27287c478bd9Sstevel@tonic-gate * either destroying the rnode or placing it on the 27297c478bd9Sstevel@tonic-gate * rnode freelist. If there are no other references, 27307c478bd9Sstevel@tonic-gate * then the rnode may be safely destroyed. 27317c478bd9Sstevel@tonic-gate */ 27327c478bd9Sstevel@tonic-gate mutex_enter(&vp->v_lock); 27337c478bd9Sstevel@tonic-gate if (vp->v_count > 1) { 2734ade42b55SSebastien Roy VN_RELE_LOCKED(vp); 27357c478bd9Sstevel@tonic-gate mutex_exit(&vp->v_lock); 27367c478bd9Sstevel@tonic-gate return; 27377c478bd9Sstevel@tonic-gate } 27387c478bd9Sstevel@tonic-gate mutex_exit(&vp->v_lock); 27397c478bd9Sstevel@tonic-gate 27407c478bd9Sstevel@tonic-gate destroy_rnode(rp); 27417c478bd9Sstevel@tonic-gate return; 27427c478bd9Sstevel@tonic-gate } 27437c478bd9Sstevel@tonic-gate 27447c478bd9Sstevel@tonic-gate /* 27457c478bd9Sstevel@tonic-gate * Lock the hash queue and then recheck the reference count 27467c478bd9Sstevel@tonic-gate * to ensure that no other threads have acquired a reference 27477c478bd9Sstevel@tonic-gate * to indicate that the rnode should not be placed on the 27487c478bd9Sstevel@tonic-gate * freelist. If another reference has been acquired, then 27497c478bd9Sstevel@tonic-gate * just release this one and let the other thread complete 27507c478bd9Sstevel@tonic-gate * the processing of adding this rnode to the freelist. 27517c478bd9Sstevel@tonic-gate */ 27527c478bd9Sstevel@tonic-gate rw_enter(&rp->r_hashq->r_lock, RW_WRITER); 27537c478bd9Sstevel@tonic-gate 27547c478bd9Sstevel@tonic-gate mutex_enter(&vp->v_lock); 27557c478bd9Sstevel@tonic-gate if (vp->v_count > 1) { 2756ade42b55SSebastien Roy VN_RELE_LOCKED(vp); 27577c478bd9Sstevel@tonic-gate mutex_exit(&vp->v_lock); 27587c478bd9Sstevel@tonic-gate rw_exit(&rp->r_hashq->r_lock); 27597c478bd9Sstevel@tonic-gate return; 27607c478bd9Sstevel@tonic-gate } 27617c478bd9Sstevel@tonic-gate mutex_exit(&vp->v_lock); 27627c478bd9Sstevel@tonic-gate 27637c478bd9Sstevel@tonic-gate /* 27647c478bd9Sstevel@tonic-gate * If there is no cached data or metadata for this file, then 27657c478bd9Sstevel@tonic-gate * put the rnode on the front of the freelist so that it will 27667c478bd9Sstevel@tonic-gate * be reused before other rnodes which may have cached data or 27677c478bd9Sstevel@tonic-gate * metadata associated with them. 27687c478bd9Sstevel@tonic-gate */ 27697c478bd9Sstevel@tonic-gate mutex_enter(&rpfreelist_lock); 27707c478bd9Sstevel@tonic-gate if (rpfreelist == NULL) { 27717c478bd9Sstevel@tonic-gate rp->r_freef = rp; 27727c478bd9Sstevel@tonic-gate rp->r_freeb = rp; 27737c478bd9Sstevel@tonic-gate rpfreelist = rp; 27747c478bd9Sstevel@tonic-gate } else { 27757c478bd9Sstevel@tonic-gate rp->r_freef = rpfreelist; 27767c478bd9Sstevel@tonic-gate rp->r_freeb = rpfreelist->r_freeb; 27777c478bd9Sstevel@tonic-gate rpfreelist->r_freeb->r_freef = rp; 27787c478bd9Sstevel@tonic-gate rpfreelist->r_freeb = rp; 27797c478bd9Sstevel@tonic-gate if (!vn_has_cached_data(vp) && 27807c478bd9Sstevel@tonic-gate !HAVE_RDDIR_CACHE(rp) && 27817c478bd9Sstevel@tonic-gate rp->r_symlink.contents == NULL && 27827c478bd9Sstevel@tonic-gate rp->r_secattr == NULL && 27837c478bd9Sstevel@tonic-gate rp->r_pathconf == NULL) 27847c478bd9Sstevel@tonic-gate rpfreelist = rp; 27857c478bd9Sstevel@tonic-gate } 27867c478bd9Sstevel@tonic-gate mutex_exit(&rpfreelist_lock); 27877c478bd9Sstevel@tonic-gate 27887c478bd9Sstevel@tonic-gate rw_exit(&rp->r_hashq->r_lock); 27897c478bd9Sstevel@tonic-gate } 27907c478bd9Sstevel@tonic-gate 27917c478bd9Sstevel@tonic-gate /* 27927c478bd9Sstevel@tonic-gate * Remove an rnode from the free list. 27937c478bd9Sstevel@tonic-gate * 27947c478bd9Sstevel@tonic-gate * The caller must be holding rpfreelist_lock and the rnode 27957c478bd9Sstevel@tonic-gate * must be on the freelist. 27967c478bd9Sstevel@tonic-gate */ 27977c478bd9Sstevel@tonic-gate static void 27987c478bd9Sstevel@tonic-gate rp_rmfree(rnode_t *rp) 27997c478bd9Sstevel@tonic-gate { 28007c478bd9Sstevel@tonic-gate 28017c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&rpfreelist_lock)); 28027c478bd9Sstevel@tonic-gate ASSERT(rp->r_freef != NULL && rp->r_freeb != NULL); 28037c478bd9Sstevel@tonic-gate 28047c478bd9Sstevel@tonic-gate if (rp == rpfreelist) { 28057c478bd9Sstevel@tonic-gate rpfreelist = rp->r_freef; 28067c478bd9Sstevel@tonic-gate if (rp == rpfreelist) 28077c478bd9Sstevel@tonic-gate rpfreelist = NULL; 28087c478bd9Sstevel@tonic-gate } 28097c478bd9Sstevel@tonic-gate 28107c478bd9Sstevel@tonic-gate rp->r_freeb->r_freef = rp->r_freef; 28117c478bd9Sstevel@tonic-gate rp->r_freef->r_freeb = rp->r_freeb; 28127c478bd9Sstevel@tonic-gate 28137c478bd9Sstevel@tonic-gate rp->r_freef = rp->r_freeb = NULL; 28147c478bd9Sstevel@tonic-gate } 28157c478bd9Sstevel@tonic-gate 28167c478bd9Sstevel@tonic-gate /* 28177c478bd9Sstevel@tonic-gate * Put a rnode in the hash table. 28187c478bd9Sstevel@tonic-gate * 28197c478bd9Sstevel@tonic-gate * The caller must be holding the exclusive hash queue lock. 28207c478bd9Sstevel@tonic-gate */ 28217c478bd9Sstevel@tonic-gate static void 28227c478bd9Sstevel@tonic-gate rp_addhash(rnode_t *rp) 28237c478bd9Sstevel@tonic-gate { 2824*e010bda9SMarcel Telka mntinfo_t *mi; 28257c478bd9Sstevel@tonic-gate 28267c478bd9Sstevel@tonic-gate ASSERT(RW_WRITE_HELD(&rp->r_hashq->r_lock)); 28277c478bd9Sstevel@tonic-gate ASSERT(!(rp->r_flags & RHASHED)); 28287c478bd9Sstevel@tonic-gate 28297c478bd9Sstevel@tonic-gate rp->r_hashf = rp->r_hashq->r_hashf; 28307c478bd9Sstevel@tonic-gate rp->r_hashq->r_hashf = rp; 28317c478bd9Sstevel@tonic-gate rp->r_hashb = (rnode_t *)rp->r_hashq; 28327c478bd9Sstevel@tonic-gate rp->r_hashf->r_hashb = rp; 28337c478bd9Sstevel@tonic-gate 28347c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 28357c478bd9Sstevel@tonic-gate rp->r_flags |= RHASHED; 28367c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 2837*e010bda9SMarcel Telka 2838*e010bda9SMarcel Telka mi = VTOMI(RTOV(rp)); 2839*e010bda9SMarcel Telka mutex_enter(&mi->mi_rnodes_lock); 2840*e010bda9SMarcel Telka list_insert_tail(&mi->mi_rnodes, rp); 2841*e010bda9SMarcel Telka mutex_exit(&mi->mi_rnodes_lock); 28427c478bd9Sstevel@tonic-gate } 28437c478bd9Sstevel@tonic-gate 28447c478bd9Sstevel@tonic-gate /* 28457c478bd9Sstevel@tonic-gate * Remove a rnode from the hash table. 28467c478bd9Sstevel@tonic-gate * 28477c478bd9Sstevel@tonic-gate * The caller must be holding the hash queue lock. 28487c478bd9Sstevel@tonic-gate */ 28497c478bd9Sstevel@tonic-gate static void 28507c478bd9Sstevel@tonic-gate rp_rmhash_locked(rnode_t *rp) 28517c478bd9Sstevel@tonic-gate { 2852*e010bda9SMarcel Telka mntinfo_t *mi; 28537c478bd9Sstevel@tonic-gate 28547c478bd9Sstevel@tonic-gate ASSERT(RW_WRITE_HELD(&rp->r_hashq->r_lock)); 28557c478bd9Sstevel@tonic-gate ASSERT(rp->r_flags & RHASHED); 28567c478bd9Sstevel@tonic-gate 28577c478bd9Sstevel@tonic-gate rp->r_hashb->r_hashf = rp->r_hashf; 28587c478bd9Sstevel@tonic-gate rp->r_hashf->r_hashb = rp->r_hashb; 28597c478bd9Sstevel@tonic-gate 28607c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 28617c478bd9Sstevel@tonic-gate rp->r_flags &= ~RHASHED; 28627c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 2863*e010bda9SMarcel Telka 2864*e010bda9SMarcel Telka mi = VTOMI(RTOV(rp)); 2865*e010bda9SMarcel Telka mutex_enter(&mi->mi_rnodes_lock); 2866*e010bda9SMarcel Telka if (list_link_active(&rp->r_mi_link)) 2867*e010bda9SMarcel Telka list_remove(&mi->mi_rnodes, rp); 2868*e010bda9SMarcel Telka mutex_exit(&mi->mi_rnodes_lock); 28697c478bd9Sstevel@tonic-gate } 28707c478bd9Sstevel@tonic-gate 28717c478bd9Sstevel@tonic-gate /* 28727c478bd9Sstevel@tonic-gate * Remove a rnode from the hash table. 28737c478bd9Sstevel@tonic-gate * 28747c478bd9Sstevel@tonic-gate * The caller must not be holding the hash queue lock. 28757c478bd9Sstevel@tonic-gate */ 28767c478bd9Sstevel@tonic-gate void 28777c478bd9Sstevel@tonic-gate rp_rmhash(rnode_t *rp) 28787c478bd9Sstevel@tonic-gate { 28797c478bd9Sstevel@tonic-gate 28807c478bd9Sstevel@tonic-gate rw_enter(&rp->r_hashq->r_lock, RW_WRITER); 28817c478bd9Sstevel@tonic-gate rp_rmhash_locked(rp); 28827c478bd9Sstevel@tonic-gate rw_exit(&rp->r_hashq->r_lock); 28837c478bd9Sstevel@tonic-gate } 28847c478bd9Sstevel@tonic-gate 28857c478bd9Sstevel@tonic-gate /* 28867c478bd9Sstevel@tonic-gate * Lookup a rnode by fhandle. 28877c478bd9Sstevel@tonic-gate * 28887c478bd9Sstevel@tonic-gate * The caller must be holding the hash queue lock, either shared or exclusive. 28897c478bd9Sstevel@tonic-gate */ 28907c478bd9Sstevel@tonic-gate static rnode_t * 28917c478bd9Sstevel@tonic-gate rfind(rhashq_t *rhtp, nfs_fhandle *fh, struct vfs *vfsp) 28927c478bd9Sstevel@tonic-gate { 28937c478bd9Sstevel@tonic-gate rnode_t *rp; 28947c478bd9Sstevel@tonic-gate vnode_t *vp; 28957c478bd9Sstevel@tonic-gate 28967c478bd9Sstevel@tonic-gate ASSERT(RW_LOCK_HELD(&rhtp->r_lock)); 28977c478bd9Sstevel@tonic-gate 28987c478bd9Sstevel@tonic-gate for (rp = rhtp->r_hashf; rp != (rnode_t *)rhtp; rp = rp->r_hashf) { 28997c478bd9Sstevel@tonic-gate vp = RTOV(rp); 29007c478bd9Sstevel@tonic-gate if (vp->v_vfsp == vfsp && 29017c478bd9Sstevel@tonic-gate rp->r_fh.fh_len == fh->fh_len && 29027c478bd9Sstevel@tonic-gate bcmp(rp->r_fh.fh_buf, fh->fh_buf, fh->fh_len) == 0) { 29037c478bd9Sstevel@tonic-gate /* 29047c478bd9Sstevel@tonic-gate * remove rnode from free list, if necessary. 29057c478bd9Sstevel@tonic-gate */ 29067c478bd9Sstevel@tonic-gate if (rp->r_freef != NULL) { 29077c478bd9Sstevel@tonic-gate mutex_enter(&rpfreelist_lock); 29087c478bd9Sstevel@tonic-gate /* 29097c478bd9Sstevel@tonic-gate * If the rnode is on the freelist, 29107c478bd9Sstevel@tonic-gate * then remove it and use that reference 29117c478bd9Sstevel@tonic-gate * as the new reference. Otherwise, 29127c478bd9Sstevel@tonic-gate * need to increment the reference count. 29137c478bd9Sstevel@tonic-gate */ 29147c478bd9Sstevel@tonic-gate if (rp->r_freef != NULL) { 29157c478bd9Sstevel@tonic-gate rp_rmfree(rp); 29167c478bd9Sstevel@tonic-gate mutex_exit(&rpfreelist_lock); 29177c478bd9Sstevel@tonic-gate } else { 29187c478bd9Sstevel@tonic-gate mutex_exit(&rpfreelist_lock); 29197c478bd9Sstevel@tonic-gate VN_HOLD(vp); 29207c478bd9Sstevel@tonic-gate } 29217c478bd9Sstevel@tonic-gate } else 29227c478bd9Sstevel@tonic-gate VN_HOLD(vp); 29237c478bd9Sstevel@tonic-gate return (rp); 29247c478bd9Sstevel@tonic-gate } 29257c478bd9Sstevel@tonic-gate } 29267c478bd9Sstevel@tonic-gate return (NULL); 29277c478bd9Sstevel@tonic-gate } 29287c478bd9Sstevel@tonic-gate 29297c478bd9Sstevel@tonic-gate /* 2930*e010bda9SMarcel Telka * Return 1 if there is an active vnode belonging to this vfs in the 29317c478bd9Sstevel@tonic-gate * rtable cache. 29327c478bd9Sstevel@tonic-gate * 29337c478bd9Sstevel@tonic-gate * Several of these checks are done without holding the usual 29347c478bd9Sstevel@tonic-gate * locks. This is safe because destroy_rtable(), rp_addfree(), 29357c478bd9Sstevel@tonic-gate * etc. will redo the necessary checks before actually destroying 29367c478bd9Sstevel@tonic-gate * any rnodes. 29377c478bd9Sstevel@tonic-gate */ 29387c478bd9Sstevel@tonic-gate int 29397c478bd9Sstevel@tonic-gate check_rtable(struct vfs *vfsp) 29407c478bd9Sstevel@tonic-gate { 29417c478bd9Sstevel@tonic-gate rnode_t *rp; 29427c478bd9Sstevel@tonic-gate vnode_t *vp; 2943*e010bda9SMarcel Telka mntinfo_t *mi; 29447c478bd9Sstevel@tonic-gate 2945*e010bda9SMarcel Telka ASSERT(vfsp != NULL); 2946*e010bda9SMarcel Telka mi = VFTOMI(vfsp); 2947*e010bda9SMarcel Telka 2948*e010bda9SMarcel Telka mutex_enter(&mi->mi_rnodes_lock); 2949*e010bda9SMarcel Telka for (rp = list_head(&mi->mi_rnodes); rp != NULL; 2950*e010bda9SMarcel Telka rp = list_next(&mi->mi_rnodes, rp)) { 2951*e010bda9SMarcel Telka vp = RTOV(rp); 2952*e010bda9SMarcel Telka 2953*e010bda9SMarcel Telka if (rp->r_freef == NULL || 2954*e010bda9SMarcel Telka (vn_has_cached_data(vp) && (rp->r_flags & RDIRTY)) || 2955*e010bda9SMarcel Telka rp->r_count > 0) { 2956*e010bda9SMarcel Telka mutex_exit(&mi->mi_rnodes_lock); 2957*e010bda9SMarcel Telka return (1); 29587c478bd9Sstevel@tonic-gate } 29597c478bd9Sstevel@tonic-gate } 2960*e010bda9SMarcel Telka mutex_exit(&mi->mi_rnodes_lock); 2961*e010bda9SMarcel Telka 29627c478bd9Sstevel@tonic-gate return (0); 29637c478bd9Sstevel@tonic-gate } 29647c478bd9Sstevel@tonic-gate 29657c478bd9Sstevel@tonic-gate /* 29667c478bd9Sstevel@tonic-gate * Destroy inactive vnodes from the hash queues which belong to this 29677c478bd9Sstevel@tonic-gate * vfs. It is essential that we destroy all inactive vnodes during a 29687c478bd9Sstevel@tonic-gate * forced unmount as well as during a normal unmount. 29697c478bd9Sstevel@tonic-gate */ 29707c478bd9Sstevel@tonic-gate void 29717c478bd9Sstevel@tonic-gate destroy_rtable(struct vfs *vfsp, cred_t *cr) 29727c478bd9Sstevel@tonic-gate { 29737c478bd9Sstevel@tonic-gate rnode_t *rp; 2974*e010bda9SMarcel Telka mntinfo_t *mi; 29757c478bd9Sstevel@tonic-gate 2976*e010bda9SMarcel Telka ASSERT(vfsp != NULL); 29777c478bd9Sstevel@tonic-gate 2978*e010bda9SMarcel Telka mi = VFTOMI(vfsp); 2979*e010bda9SMarcel Telka 2980*e010bda9SMarcel Telka mutex_enter(&rpfreelist_lock); 2981*e010bda9SMarcel Telka mutex_enter(&mi->mi_rnodes_lock); 2982*e010bda9SMarcel Telka while ((rp = list_remove_head(&mi->mi_rnodes)) != NULL) { 2983*e010bda9SMarcel Telka /* 2984*e010bda9SMarcel Telka * If the rnode is no longer on the freelist it is not 2985*e010bda9SMarcel Telka * ours and it will be handled by some other thread, so 2986*e010bda9SMarcel Telka * skip it. 2987*e010bda9SMarcel Telka */ 2988*e010bda9SMarcel Telka if (rp->r_freef == NULL) 2989*e010bda9SMarcel Telka continue; 2990*e010bda9SMarcel Telka mutex_exit(&mi->mi_rnodes_lock); 2991*e010bda9SMarcel Telka 2992*e010bda9SMarcel Telka rp_rmfree(rp); 2993*e010bda9SMarcel Telka mutex_exit(&rpfreelist_lock); 2994*e010bda9SMarcel Telka 2995*e010bda9SMarcel Telka rp_rmhash(rp); 29967c478bd9Sstevel@tonic-gate 29977c478bd9Sstevel@tonic-gate /* 29987c478bd9Sstevel@tonic-gate * This call to rp_addfree will end up destroying the 29997c478bd9Sstevel@tonic-gate * rnode, but in a safe way with the appropriate set 30007c478bd9Sstevel@tonic-gate * of checks done. 30017c478bd9Sstevel@tonic-gate */ 30027c478bd9Sstevel@tonic-gate rp_addfree(rp, cr); 30037c478bd9Sstevel@tonic-gate 3004*e010bda9SMarcel Telka mutex_enter(&rpfreelist_lock); 3005*e010bda9SMarcel Telka mutex_enter(&mi->mi_rnodes_lock); 3006*e010bda9SMarcel Telka } 3007*e010bda9SMarcel Telka mutex_exit(&mi->mi_rnodes_lock); 3008*e010bda9SMarcel Telka mutex_exit(&rpfreelist_lock); 30097c478bd9Sstevel@tonic-gate } 30107c478bd9Sstevel@tonic-gate 30117c478bd9Sstevel@tonic-gate /* 30127c478bd9Sstevel@tonic-gate * This routine destroys all the resources associated with the rnode 30137c478bd9Sstevel@tonic-gate * and then the rnode itself. 30147c478bd9Sstevel@tonic-gate */ 30157c478bd9Sstevel@tonic-gate static void 30167c478bd9Sstevel@tonic-gate destroy_rnode(rnode_t *rp) 30177c478bd9Sstevel@tonic-gate { 30187c478bd9Sstevel@tonic-gate vnode_t *vp; 30197c478bd9Sstevel@tonic-gate vfs_t *vfsp; 30207c478bd9Sstevel@tonic-gate 30217c478bd9Sstevel@tonic-gate vp = RTOV(rp); 30227c478bd9Sstevel@tonic-gate vfsp = vp->v_vfsp; 30237c478bd9Sstevel@tonic-gate 30247c478bd9Sstevel@tonic-gate ASSERT(vp->v_count == 1); 30257c478bd9Sstevel@tonic-gate ASSERT(rp->r_count == 0); 30267c478bd9Sstevel@tonic-gate ASSERT(rp->r_lmpl == NULL); 30277c478bd9Sstevel@tonic-gate ASSERT(rp->r_mapcnt == 0); 30287c478bd9Sstevel@tonic-gate ASSERT(!(rp->r_flags & RHASHED)); 30297c478bd9Sstevel@tonic-gate ASSERT(rp->r_freef == NULL && rp->r_freeb == NULL); 30301a5e258fSJosef 'Jeff' Sipek atomic_dec_ulong((ulong_t *)&rnew); 30317c478bd9Sstevel@tonic-gate #ifdef DEBUG 30327c478bd9Sstevel@tonic-gate clstat_debug.nrnode.value.ui64--; 30337c478bd9Sstevel@tonic-gate #endif 30347c478bd9Sstevel@tonic-gate nfs_rw_destroy(&rp->r_rwlock); 30357c478bd9Sstevel@tonic-gate nfs_rw_destroy(&rp->r_lkserlock); 30367c478bd9Sstevel@tonic-gate mutex_destroy(&rp->r_statelock); 30377c478bd9Sstevel@tonic-gate cv_destroy(&rp->r_cv); 30387c478bd9Sstevel@tonic-gate cv_destroy(&rp->r_commit.c_cv); 30397c478bd9Sstevel@tonic-gate if (rp->r_flags & RDELMAPLIST) 30407c478bd9Sstevel@tonic-gate list_destroy(&rp->r_indelmap); 30417c478bd9Sstevel@tonic-gate nfs_free_r_path(rp); 30427c478bd9Sstevel@tonic-gate avl_destroy(&rp->r_dir); 30437c478bd9Sstevel@tonic-gate vn_invalid(vp); 30447c478bd9Sstevel@tonic-gate vn_free(vp); 30457c478bd9Sstevel@tonic-gate kmem_cache_free(rnode_cache, rp); 30467c478bd9Sstevel@tonic-gate VFS_RELE(vfsp); 30477c478bd9Sstevel@tonic-gate } 30487c478bd9Sstevel@tonic-gate 30497c478bd9Sstevel@tonic-gate /* 30507c478bd9Sstevel@tonic-gate * Flush all vnodes in this (or every) vfs. 30517c478bd9Sstevel@tonic-gate * Used by nfs_sync and by nfs_unmount. 30527c478bd9Sstevel@tonic-gate */ 30537c478bd9Sstevel@tonic-gate void 30547c478bd9Sstevel@tonic-gate rflush(struct vfs *vfsp, cred_t *cr) 30557c478bd9Sstevel@tonic-gate { 30567c478bd9Sstevel@tonic-gate int index; 30577c478bd9Sstevel@tonic-gate rnode_t *rp; 30587c478bd9Sstevel@tonic-gate vnode_t *vp, **vplist; 30597c478bd9Sstevel@tonic-gate long num, cnt; 30607c478bd9Sstevel@tonic-gate 30617c478bd9Sstevel@tonic-gate /* 30627c478bd9Sstevel@tonic-gate * Check to see whether there is anything to do. 30637c478bd9Sstevel@tonic-gate */ 30647c478bd9Sstevel@tonic-gate num = rnew; 30657c478bd9Sstevel@tonic-gate if (num == 0) 30667c478bd9Sstevel@tonic-gate return; 30677c478bd9Sstevel@tonic-gate 30687c478bd9Sstevel@tonic-gate /* 30697c478bd9Sstevel@tonic-gate * Allocate a slot for all currently active rnodes on the 30707c478bd9Sstevel@tonic-gate * supposition that they all may need flushing. 30717c478bd9Sstevel@tonic-gate */ 30727c478bd9Sstevel@tonic-gate vplist = kmem_alloc(num * sizeof (*vplist), KM_SLEEP); 30737c478bd9Sstevel@tonic-gate cnt = 0; 30747c478bd9Sstevel@tonic-gate 3075*e010bda9SMarcel Telka /* 3076*e010bda9SMarcel Telka * If the vfs is known we can do fast path by iterating all rnodes that 3077*e010bda9SMarcel Telka * belongs to this vfs. This is much faster than the traditional way 3078*e010bda9SMarcel Telka * of iterating rtable (below) in a case there is a lot of rnodes that 3079*e010bda9SMarcel Telka * does not belong to our vfs. 3080*e010bda9SMarcel Telka */ 3081*e010bda9SMarcel Telka if (vfsp != NULL) { 3082*e010bda9SMarcel Telka mntinfo_t *mi = VFTOMI(vfsp); 3083*e010bda9SMarcel Telka 3084*e010bda9SMarcel Telka mutex_enter(&mi->mi_rnodes_lock); 3085*e010bda9SMarcel Telka for (rp = list_head(&mi->mi_rnodes); rp != NULL; 3086*e010bda9SMarcel Telka rp = list_next(&mi->mi_rnodes, rp)) { 3087*e010bda9SMarcel Telka vp = RTOV(rp); 3088*e010bda9SMarcel Telka /* 3089*e010bda9SMarcel Telka * Don't bother sync'ing a vp if it 3090*e010bda9SMarcel Telka * is part of virtual swap device or 3091*e010bda9SMarcel Telka * if VFS is read-only 3092*e010bda9SMarcel Telka */ 3093*e010bda9SMarcel Telka if (IS_SWAPVP(vp) || vn_is_readonly(vp)) 3094*e010bda9SMarcel Telka continue; 3095*e010bda9SMarcel Telka /* 3096*e010bda9SMarcel Telka * If the vnode has pages and is marked as either dirty 3097*e010bda9SMarcel Telka * or mmap'd, hold and add this vnode to the list of 3098*e010bda9SMarcel Telka * vnodes to flush. 3099*e010bda9SMarcel Telka */ 3100*e010bda9SMarcel Telka ASSERT(vp->v_vfsp == vfsp); 3101*e010bda9SMarcel Telka if (vn_has_cached_data(vp) && 3102*e010bda9SMarcel Telka ((rp->r_flags & RDIRTY) || rp->r_mapcnt > 0)) { 3103*e010bda9SMarcel Telka VN_HOLD(vp); 3104*e010bda9SMarcel Telka vplist[cnt++] = vp; 3105*e010bda9SMarcel Telka if (cnt == num) { 3106*e010bda9SMarcel Telka /* 3107*e010bda9SMarcel Telka * The vplist is full because there is 3108*e010bda9SMarcel Telka * too many rnodes. We are done for 3109*e010bda9SMarcel Telka * now. 3110*e010bda9SMarcel Telka */ 3111*e010bda9SMarcel Telka break; 3112*e010bda9SMarcel Telka } 3113*e010bda9SMarcel Telka } 3114*e010bda9SMarcel Telka } 3115*e010bda9SMarcel Telka mutex_exit(&mi->mi_rnodes_lock); 3116*e010bda9SMarcel Telka 3117*e010bda9SMarcel Telka goto done; 3118*e010bda9SMarcel Telka } 3119*e010bda9SMarcel Telka 3120*e010bda9SMarcel Telka ASSERT(vfsp == NULL); 3121*e010bda9SMarcel Telka 31227c478bd9Sstevel@tonic-gate /* 31237c478bd9Sstevel@tonic-gate * Walk the hash queues looking for rnodes with page 31247c478bd9Sstevel@tonic-gate * lists associated with them. Make a list of these 31257c478bd9Sstevel@tonic-gate * files. 31267c478bd9Sstevel@tonic-gate */ 31277c478bd9Sstevel@tonic-gate for (index = 0; index < rtablesize; index++) { 31287c478bd9Sstevel@tonic-gate rw_enter(&rtable[index].r_lock, RW_READER); 31297c478bd9Sstevel@tonic-gate for (rp = rtable[index].r_hashf; 31307c478bd9Sstevel@tonic-gate rp != (rnode_t *)(&rtable[index]); 31317c478bd9Sstevel@tonic-gate rp = rp->r_hashf) { 31327c478bd9Sstevel@tonic-gate vp = RTOV(rp); 31337c478bd9Sstevel@tonic-gate /* 31347c478bd9Sstevel@tonic-gate * Don't bother sync'ing a vp if it 31357c478bd9Sstevel@tonic-gate * is part of virtual swap device or 31367c478bd9Sstevel@tonic-gate * if VFS is read-only 31377c478bd9Sstevel@tonic-gate */ 31387c478bd9Sstevel@tonic-gate if (IS_SWAPVP(vp) || vn_is_readonly(vp)) 31397c478bd9Sstevel@tonic-gate continue; 31407c478bd9Sstevel@tonic-gate /* 3141*e010bda9SMarcel Telka * If the vnode has pages and is marked as either dirty 3142*e010bda9SMarcel Telka * or mmap'd, hold and add this vnode to the list of 31437c478bd9Sstevel@tonic-gate * vnodes to flush. 31447c478bd9Sstevel@tonic-gate */ 3145*e010bda9SMarcel Telka if (vn_has_cached_data(vp) && 31467c478bd9Sstevel@tonic-gate ((rp->r_flags & RDIRTY) || rp->r_mapcnt > 0)) { 31477c478bd9Sstevel@tonic-gate VN_HOLD(vp); 31487c478bd9Sstevel@tonic-gate vplist[cnt++] = vp; 31497c478bd9Sstevel@tonic-gate if (cnt == num) { 31507c478bd9Sstevel@tonic-gate rw_exit(&rtable[index].r_lock); 3151*e010bda9SMarcel Telka /* 3152*e010bda9SMarcel Telka * The vplist is full because there is 3153*e010bda9SMarcel Telka * too many rnodes. We are done for 3154*e010bda9SMarcel Telka * now. 3155*e010bda9SMarcel Telka */ 3156*e010bda9SMarcel Telka goto done; 31577c478bd9Sstevel@tonic-gate } 31587c478bd9Sstevel@tonic-gate } 31597c478bd9Sstevel@tonic-gate } 31607c478bd9Sstevel@tonic-gate rw_exit(&rtable[index].r_lock); 31617c478bd9Sstevel@tonic-gate } 3162*e010bda9SMarcel Telka 3163*e010bda9SMarcel Telka done: 31647c478bd9Sstevel@tonic-gate 31657c478bd9Sstevel@tonic-gate /* 31667c478bd9Sstevel@tonic-gate * Flush and release all of the files on the list. 31677c478bd9Sstevel@tonic-gate */ 31687c478bd9Sstevel@tonic-gate while (cnt-- > 0) { 31697c478bd9Sstevel@tonic-gate vp = vplist[cnt]; 3170da6c28aaSamw (void) VOP_PUTPAGE(vp, (u_offset_t)0, 0, B_ASYNC, cr, NULL); 31717c478bd9Sstevel@tonic-gate VN_RELE(vp); 31727c478bd9Sstevel@tonic-gate } 31737c478bd9Sstevel@tonic-gate 31747c478bd9Sstevel@tonic-gate /* 31757c478bd9Sstevel@tonic-gate * Free the space allocated to hold the list. 31767c478bd9Sstevel@tonic-gate */ 31777c478bd9Sstevel@tonic-gate kmem_free(vplist, num * sizeof (*vplist)); 31787c478bd9Sstevel@tonic-gate } 31797c478bd9Sstevel@tonic-gate 31807c478bd9Sstevel@tonic-gate /* 31817c478bd9Sstevel@tonic-gate * This probably needs to be larger than or equal to 31827c478bd9Sstevel@tonic-gate * log2(sizeof (struct rnode)) due to the way that rnodes are 31837c478bd9Sstevel@tonic-gate * allocated. 31847c478bd9Sstevel@tonic-gate */ 31857c478bd9Sstevel@tonic-gate #define ACACHE_SHIFT_BITS 9 31867c478bd9Sstevel@tonic-gate 31877c478bd9Sstevel@tonic-gate static int 31887c478bd9Sstevel@tonic-gate acachehash(rnode_t *rp, cred_t *cr) 31897c478bd9Sstevel@tonic-gate { 31907c478bd9Sstevel@tonic-gate 31917c478bd9Sstevel@tonic-gate return ((((intptr_t)rp >> ACACHE_SHIFT_BITS) + crgetuid(cr)) & 31927c478bd9Sstevel@tonic-gate acachemask); 31937c478bd9Sstevel@tonic-gate } 31947c478bd9Sstevel@tonic-gate 31957c478bd9Sstevel@tonic-gate #ifdef DEBUG 31967c478bd9Sstevel@tonic-gate static long nfs_access_cache_hits = 0; 31977c478bd9Sstevel@tonic-gate static long nfs_access_cache_misses = 0; 31987c478bd9Sstevel@tonic-gate #endif 31997c478bd9Sstevel@tonic-gate 32007c478bd9Sstevel@tonic-gate nfs_access_type_t 32017c478bd9Sstevel@tonic-gate nfs_access_check(rnode_t *rp, uint32_t acc, cred_t *cr) 32027c478bd9Sstevel@tonic-gate { 32037c478bd9Sstevel@tonic-gate vnode_t *vp; 32047c478bd9Sstevel@tonic-gate acache_t *ap; 32057c478bd9Sstevel@tonic-gate acache_hash_t *hp; 32067c478bd9Sstevel@tonic-gate nfs_access_type_t all; 32077c478bd9Sstevel@tonic-gate 32087c478bd9Sstevel@tonic-gate vp = RTOV(rp); 32097c478bd9Sstevel@tonic-gate if (!ATTRCACHE_VALID(vp) || nfs_waitfor_purge_complete(vp)) 32107c478bd9Sstevel@tonic-gate return (NFS_ACCESS_UNKNOWN); 32117c478bd9Sstevel@tonic-gate 32127c478bd9Sstevel@tonic-gate if (rp->r_acache != NULL) { 32137c478bd9Sstevel@tonic-gate hp = &acache[acachehash(rp, cr)]; 32147c478bd9Sstevel@tonic-gate rw_enter(&hp->lock, RW_READER); 32157c478bd9Sstevel@tonic-gate ap = hp->next; 32167c478bd9Sstevel@tonic-gate while (ap != (acache_t *)hp) { 32177c478bd9Sstevel@tonic-gate if (crcmp(ap->cred, cr) == 0 && ap->rnode == rp) { 32187c478bd9Sstevel@tonic-gate if ((ap->known & acc) == acc) { 32197c478bd9Sstevel@tonic-gate #ifdef DEBUG 32207c478bd9Sstevel@tonic-gate nfs_access_cache_hits++; 32217c478bd9Sstevel@tonic-gate #endif 32227c478bd9Sstevel@tonic-gate if ((ap->allowed & acc) == acc) 32237c478bd9Sstevel@tonic-gate all = NFS_ACCESS_ALLOWED; 32247c478bd9Sstevel@tonic-gate else 32257c478bd9Sstevel@tonic-gate all = NFS_ACCESS_DENIED; 32267c478bd9Sstevel@tonic-gate } else { 32277c478bd9Sstevel@tonic-gate #ifdef DEBUG 32287c478bd9Sstevel@tonic-gate nfs_access_cache_misses++; 32297c478bd9Sstevel@tonic-gate #endif 32307c478bd9Sstevel@tonic-gate all = NFS_ACCESS_UNKNOWN; 32317c478bd9Sstevel@tonic-gate } 32327c478bd9Sstevel@tonic-gate rw_exit(&hp->lock); 32337c478bd9Sstevel@tonic-gate return (all); 32347c478bd9Sstevel@tonic-gate } 32357c478bd9Sstevel@tonic-gate ap = ap->next; 32367c478bd9Sstevel@tonic-gate } 32377c478bd9Sstevel@tonic-gate rw_exit(&hp->lock); 32387c478bd9Sstevel@tonic-gate } 32397c478bd9Sstevel@tonic-gate 32407c478bd9Sstevel@tonic-gate #ifdef DEBUG 32417c478bd9Sstevel@tonic-gate nfs_access_cache_misses++; 32427c478bd9Sstevel@tonic-gate #endif 32437c478bd9Sstevel@tonic-gate return (NFS_ACCESS_UNKNOWN); 32447c478bd9Sstevel@tonic-gate } 32457c478bd9Sstevel@tonic-gate 32467c478bd9Sstevel@tonic-gate void 32477c478bd9Sstevel@tonic-gate nfs_access_cache(rnode_t *rp, uint32_t acc, uint32_t resacc, cred_t *cr) 32487c478bd9Sstevel@tonic-gate { 32497c478bd9Sstevel@tonic-gate acache_t *ap; 32507c478bd9Sstevel@tonic-gate acache_t *nap; 32517c478bd9Sstevel@tonic-gate acache_hash_t *hp; 32527c478bd9Sstevel@tonic-gate 32537c478bd9Sstevel@tonic-gate hp = &acache[acachehash(rp, cr)]; 32547c478bd9Sstevel@tonic-gate 32557c478bd9Sstevel@tonic-gate /* 32567c478bd9Sstevel@tonic-gate * Allocate now assuming that mostly an allocation will be 32577c478bd9Sstevel@tonic-gate * required. This allows the allocation to happen without 32587c478bd9Sstevel@tonic-gate * holding the hash bucket locked. 32597c478bd9Sstevel@tonic-gate */ 32607c478bd9Sstevel@tonic-gate nap = kmem_cache_alloc(acache_cache, KM_NOSLEEP); 32617c478bd9Sstevel@tonic-gate if (nap != NULL) { 32627c478bd9Sstevel@tonic-gate nap->known = acc; 32637c478bd9Sstevel@tonic-gate nap->allowed = resacc; 32647c478bd9Sstevel@tonic-gate nap->rnode = rp; 32657c478bd9Sstevel@tonic-gate crhold(cr); 32667c478bd9Sstevel@tonic-gate nap->cred = cr; 32677c478bd9Sstevel@tonic-gate nap->hashq = hp; 32687c478bd9Sstevel@tonic-gate } 32697c478bd9Sstevel@tonic-gate 32707c478bd9Sstevel@tonic-gate rw_enter(&hp->lock, RW_WRITER); 32717c478bd9Sstevel@tonic-gate 32727c478bd9Sstevel@tonic-gate if (rp->r_acache != NULL) { 32737c478bd9Sstevel@tonic-gate ap = hp->next; 32747c478bd9Sstevel@tonic-gate while (ap != (acache_t *)hp) { 32757c478bd9Sstevel@tonic-gate if (crcmp(ap->cred, cr) == 0 && ap->rnode == rp) { 32767c478bd9Sstevel@tonic-gate ap->known |= acc; 32777c478bd9Sstevel@tonic-gate ap->allowed &= ~acc; 32787c478bd9Sstevel@tonic-gate ap->allowed |= resacc; 32797c478bd9Sstevel@tonic-gate rw_exit(&hp->lock); 32807c478bd9Sstevel@tonic-gate if (nap != NULL) { 32817c478bd9Sstevel@tonic-gate crfree(nap->cred); 32827c478bd9Sstevel@tonic-gate kmem_cache_free(acache_cache, nap); 32837c478bd9Sstevel@tonic-gate } 32847c478bd9Sstevel@tonic-gate return; 32857c478bd9Sstevel@tonic-gate } 32867c478bd9Sstevel@tonic-gate ap = ap->next; 32877c478bd9Sstevel@tonic-gate } 32887c478bd9Sstevel@tonic-gate } 32897c478bd9Sstevel@tonic-gate 32907c478bd9Sstevel@tonic-gate if (nap != NULL) { 32917c478bd9Sstevel@tonic-gate #ifdef DEBUG 32927c478bd9Sstevel@tonic-gate clstat_debug.access.value.ui64++; 32937c478bd9Sstevel@tonic-gate #endif 32947c478bd9Sstevel@tonic-gate nap->next = hp->next; 32957c478bd9Sstevel@tonic-gate hp->next = nap; 32967c478bd9Sstevel@tonic-gate nap->next->prev = nap; 32977c478bd9Sstevel@tonic-gate nap->prev = (acache_t *)hp; 32987c478bd9Sstevel@tonic-gate 32997c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 33007c478bd9Sstevel@tonic-gate nap->list = rp->r_acache; 33017c478bd9Sstevel@tonic-gate rp->r_acache = nap; 33027c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 33037c478bd9Sstevel@tonic-gate } 33047c478bd9Sstevel@tonic-gate 33057c478bd9Sstevel@tonic-gate rw_exit(&hp->lock); 33067c478bd9Sstevel@tonic-gate } 33077c478bd9Sstevel@tonic-gate 33087c478bd9Sstevel@tonic-gate int 33097c478bd9Sstevel@tonic-gate nfs_access_purge_rp(rnode_t *rp) 33107c478bd9Sstevel@tonic-gate { 33117c478bd9Sstevel@tonic-gate acache_t *ap; 33127c478bd9Sstevel@tonic-gate acache_t *tmpap; 33137c478bd9Sstevel@tonic-gate acache_t *rplist; 33147c478bd9Sstevel@tonic-gate 33157c478bd9Sstevel@tonic-gate /* 33167c478bd9Sstevel@tonic-gate * If there aren't any cached entries, then there is nothing 33177c478bd9Sstevel@tonic-gate * to free. 33187c478bd9Sstevel@tonic-gate */ 33197c478bd9Sstevel@tonic-gate if (rp->r_acache == NULL) 33207c478bd9Sstevel@tonic-gate return (0); 33217c478bd9Sstevel@tonic-gate 33227c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 33237c478bd9Sstevel@tonic-gate rplist = rp->r_acache; 33247c478bd9Sstevel@tonic-gate rp->r_acache = NULL; 33257c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 33267c478bd9Sstevel@tonic-gate 33277c478bd9Sstevel@tonic-gate /* 33287c478bd9Sstevel@tonic-gate * Loop through each entry in the list pointed to in the 33297c478bd9Sstevel@tonic-gate * rnode. Remove each of these entries from the hash 33307c478bd9Sstevel@tonic-gate * queue that it is on and remove it from the list in 33317c478bd9Sstevel@tonic-gate * the rnode. 33327c478bd9Sstevel@tonic-gate */ 33337c478bd9Sstevel@tonic-gate for (ap = rplist; ap != NULL; ap = tmpap) { 33347c478bd9Sstevel@tonic-gate rw_enter(&ap->hashq->lock, RW_WRITER); 33357c478bd9Sstevel@tonic-gate ap->prev->next = ap->next; 33367c478bd9Sstevel@tonic-gate ap->next->prev = ap->prev; 33377c478bd9Sstevel@tonic-gate rw_exit(&ap->hashq->lock); 33387c478bd9Sstevel@tonic-gate 33397c478bd9Sstevel@tonic-gate tmpap = ap->list; 33407c478bd9Sstevel@tonic-gate crfree(ap->cred); 33417c478bd9Sstevel@tonic-gate kmem_cache_free(acache_cache, ap); 33427c478bd9Sstevel@tonic-gate #ifdef DEBUG 33437c478bd9Sstevel@tonic-gate clstat_debug.access.value.ui64--; 33447c478bd9Sstevel@tonic-gate #endif 33457c478bd9Sstevel@tonic-gate } 33467c478bd9Sstevel@tonic-gate 33477c478bd9Sstevel@tonic-gate return (1); 33487c478bd9Sstevel@tonic-gate } 33497c478bd9Sstevel@tonic-gate 33507c478bd9Sstevel@tonic-gate static const char prefix[] = ".nfs"; 33517c478bd9Sstevel@tonic-gate 33527c478bd9Sstevel@tonic-gate static kmutex_t newnum_lock; 33537c478bd9Sstevel@tonic-gate 33547c478bd9Sstevel@tonic-gate int 33557c478bd9Sstevel@tonic-gate newnum(void) 33567c478bd9Sstevel@tonic-gate { 33577c478bd9Sstevel@tonic-gate static uint_t newnum = 0; 33587c478bd9Sstevel@tonic-gate uint_t id; 33597c478bd9Sstevel@tonic-gate 33607c478bd9Sstevel@tonic-gate mutex_enter(&newnum_lock); 33617c478bd9Sstevel@tonic-gate if (newnum == 0) 33627c478bd9Sstevel@tonic-gate newnum = gethrestime_sec() & 0xffff; 33637c478bd9Sstevel@tonic-gate id = newnum++; 33647c478bd9Sstevel@tonic-gate mutex_exit(&newnum_lock); 33657c478bd9Sstevel@tonic-gate return (id); 33667c478bd9Sstevel@tonic-gate } 33677c478bd9Sstevel@tonic-gate 33687c478bd9Sstevel@tonic-gate char * 33697c478bd9Sstevel@tonic-gate newname(void) 33707c478bd9Sstevel@tonic-gate { 33717c478bd9Sstevel@tonic-gate char *news; 33727c478bd9Sstevel@tonic-gate char *s; 33737c478bd9Sstevel@tonic-gate const char *p; 33747c478bd9Sstevel@tonic-gate uint_t id; 33757c478bd9Sstevel@tonic-gate 33767c478bd9Sstevel@tonic-gate id = newnum(); 33777c478bd9Sstevel@tonic-gate news = kmem_alloc(MAXNAMELEN, KM_SLEEP); 33787c478bd9Sstevel@tonic-gate s = news; 33797c478bd9Sstevel@tonic-gate p = prefix; 33807c478bd9Sstevel@tonic-gate while (*p != '\0') 33817c478bd9Sstevel@tonic-gate *s++ = *p++; 33827c478bd9Sstevel@tonic-gate while (id != 0) { 33837c478bd9Sstevel@tonic-gate *s++ = "0123456789ABCDEF"[id & 0x0f]; 33847c478bd9Sstevel@tonic-gate id >>= 4; 33857c478bd9Sstevel@tonic-gate } 33867c478bd9Sstevel@tonic-gate *s = '\0'; 33877c478bd9Sstevel@tonic-gate return (news); 33887c478bd9Sstevel@tonic-gate } 33897c478bd9Sstevel@tonic-gate 33907c478bd9Sstevel@tonic-gate /* 33917c478bd9Sstevel@tonic-gate * Snapshot callback for nfs:0:nfs_client as registered with the kstat 33927c478bd9Sstevel@tonic-gate * framework. 33937c478bd9Sstevel@tonic-gate */ 33947c478bd9Sstevel@tonic-gate static int 33957c478bd9Sstevel@tonic-gate cl_snapshot(kstat_t *ksp, void *buf, int rw) 33967c478bd9Sstevel@tonic-gate { 33977c478bd9Sstevel@tonic-gate ksp->ks_snaptime = gethrtime(); 33987c478bd9Sstevel@tonic-gate if (rw == KSTAT_WRITE) { 33997c478bd9Sstevel@tonic-gate bcopy(buf, ksp->ks_private, sizeof (clstat_tmpl)); 34007c478bd9Sstevel@tonic-gate #ifdef DEBUG 34017c478bd9Sstevel@tonic-gate /* 34027c478bd9Sstevel@tonic-gate * Currently only the global zone can write to kstats, but we 34037c478bd9Sstevel@tonic-gate * add the check just for paranoia. 34047c478bd9Sstevel@tonic-gate */ 34057c478bd9Sstevel@tonic-gate if (INGLOBALZONE(curproc)) 34067c478bd9Sstevel@tonic-gate bcopy((char *)buf + sizeof (clstat_tmpl), &clstat_debug, 34077c478bd9Sstevel@tonic-gate sizeof (clstat_debug)); 34087c478bd9Sstevel@tonic-gate #endif 34097c478bd9Sstevel@tonic-gate } else { 34107c478bd9Sstevel@tonic-gate bcopy(ksp->ks_private, buf, sizeof (clstat_tmpl)); 34117c478bd9Sstevel@tonic-gate #ifdef DEBUG 34127c478bd9Sstevel@tonic-gate /* 34137c478bd9Sstevel@tonic-gate * If we're displaying the "global" debug kstat values, we 34147c478bd9Sstevel@tonic-gate * display them as-is to all zones since in fact they apply to 34157c478bd9Sstevel@tonic-gate * the system as a whole. 34167c478bd9Sstevel@tonic-gate */ 34177c478bd9Sstevel@tonic-gate bcopy(&clstat_debug, (char *)buf + sizeof (clstat_tmpl), 34187c478bd9Sstevel@tonic-gate sizeof (clstat_debug)); 34197c478bd9Sstevel@tonic-gate #endif 34207c478bd9Sstevel@tonic-gate } 34217c478bd9Sstevel@tonic-gate return (0); 34227c478bd9Sstevel@tonic-gate } 34237c478bd9Sstevel@tonic-gate 34247c478bd9Sstevel@tonic-gate static void * 34257c478bd9Sstevel@tonic-gate clinit_zone(zoneid_t zoneid) 34267c478bd9Sstevel@tonic-gate { 34277c478bd9Sstevel@tonic-gate kstat_t *nfs_client_kstat; 34287c478bd9Sstevel@tonic-gate struct nfs_clnt *nfscl; 34297c478bd9Sstevel@tonic-gate uint_t ndata; 34307c478bd9Sstevel@tonic-gate 34317c478bd9Sstevel@tonic-gate nfscl = kmem_alloc(sizeof (*nfscl), KM_SLEEP); 34327c478bd9Sstevel@tonic-gate mutex_init(&nfscl->nfscl_chtable_lock, NULL, MUTEX_DEFAULT, NULL); 34337c478bd9Sstevel@tonic-gate nfscl->nfscl_chtable = NULL; 34347c478bd9Sstevel@tonic-gate nfscl->nfscl_zoneid = zoneid; 34357c478bd9Sstevel@tonic-gate 34367c478bd9Sstevel@tonic-gate bcopy(&clstat_tmpl, &nfscl->nfscl_stat, sizeof (clstat_tmpl)); 34377c478bd9Sstevel@tonic-gate ndata = sizeof (clstat_tmpl) / sizeof (kstat_named_t); 34387c478bd9Sstevel@tonic-gate #ifdef DEBUG 34397c478bd9Sstevel@tonic-gate ndata += sizeof (clstat_debug) / sizeof (kstat_named_t); 34407c478bd9Sstevel@tonic-gate #endif 34417c478bd9Sstevel@tonic-gate if ((nfs_client_kstat = kstat_create_zone("nfs", 0, "nfs_client", 34427c478bd9Sstevel@tonic-gate "misc", KSTAT_TYPE_NAMED, ndata, 34437c478bd9Sstevel@tonic-gate KSTAT_FLAG_VIRTUAL | KSTAT_FLAG_WRITABLE, zoneid)) != NULL) { 34447c478bd9Sstevel@tonic-gate nfs_client_kstat->ks_private = &nfscl->nfscl_stat; 34457c478bd9Sstevel@tonic-gate nfs_client_kstat->ks_snapshot = cl_snapshot; 34467c478bd9Sstevel@tonic-gate kstat_install(nfs_client_kstat); 34477c478bd9Sstevel@tonic-gate } 34487c478bd9Sstevel@tonic-gate mutex_enter(&nfs_clnt_list_lock); 34497c478bd9Sstevel@tonic-gate list_insert_head(&nfs_clnt_list, nfscl); 34507c478bd9Sstevel@tonic-gate mutex_exit(&nfs_clnt_list_lock); 34517c478bd9Sstevel@tonic-gate return (nfscl); 34527c478bd9Sstevel@tonic-gate } 34537c478bd9Sstevel@tonic-gate 34547c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 34557c478bd9Sstevel@tonic-gate static void 34567c478bd9Sstevel@tonic-gate clfini_zone(zoneid_t zoneid, void *arg) 34577c478bd9Sstevel@tonic-gate { 34587c478bd9Sstevel@tonic-gate struct nfs_clnt *nfscl = arg; 34597c478bd9Sstevel@tonic-gate chhead_t *chp, *next; 34607c478bd9Sstevel@tonic-gate 34617c478bd9Sstevel@tonic-gate if (nfscl == NULL) 34627c478bd9Sstevel@tonic-gate return; 34637c478bd9Sstevel@tonic-gate mutex_enter(&nfs_clnt_list_lock); 34647c478bd9Sstevel@tonic-gate list_remove(&nfs_clnt_list, nfscl); 34657c478bd9Sstevel@tonic-gate mutex_exit(&nfs_clnt_list_lock); 34667c478bd9Sstevel@tonic-gate clreclaim_zone(nfscl, 0); 34677c478bd9Sstevel@tonic-gate for (chp = nfscl->nfscl_chtable; chp != NULL; chp = next) { 34687c478bd9Sstevel@tonic-gate ASSERT(chp->ch_list == NULL); 34697c478bd9Sstevel@tonic-gate kmem_free(chp->ch_protofmly, strlen(chp->ch_protofmly) + 1); 34707c478bd9Sstevel@tonic-gate next = chp->ch_next; 34717c478bd9Sstevel@tonic-gate kmem_free(chp, sizeof (*chp)); 34727c478bd9Sstevel@tonic-gate } 34737c478bd9Sstevel@tonic-gate kstat_delete_byname_zone("nfs", 0, "nfs_client", zoneid); 34747c478bd9Sstevel@tonic-gate mutex_destroy(&nfscl->nfscl_chtable_lock); 34757c478bd9Sstevel@tonic-gate kmem_free(nfscl, sizeof (*nfscl)); 34767c478bd9Sstevel@tonic-gate } 34777c478bd9Sstevel@tonic-gate 34787c478bd9Sstevel@tonic-gate /* 34797c478bd9Sstevel@tonic-gate * Called by endpnt_destructor to make sure the client handles are 34807c478bd9Sstevel@tonic-gate * cleaned up before the RPC endpoints. This becomes a no-op if 34817c478bd9Sstevel@tonic-gate * clfini_zone (above) is called first. This function is needed 34827c478bd9Sstevel@tonic-gate * (rather than relying on clfini_zone to clean up) because the ZSD 34837c478bd9Sstevel@tonic-gate * callbacks have no ordering mechanism, so we have no way to ensure 34847c478bd9Sstevel@tonic-gate * that clfini_zone is called before endpnt_destructor. 34857c478bd9Sstevel@tonic-gate */ 34867c478bd9Sstevel@tonic-gate void 34877c478bd9Sstevel@tonic-gate clcleanup_zone(zoneid_t zoneid) 34887c478bd9Sstevel@tonic-gate { 34897c478bd9Sstevel@tonic-gate struct nfs_clnt *nfscl; 34907c478bd9Sstevel@tonic-gate 34917c478bd9Sstevel@tonic-gate mutex_enter(&nfs_clnt_list_lock); 34927c478bd9Sstevel@tonic-gate nfscl = list_head(&nfs_clnt_list); 34937c478bd9Sstevel@tonic-gate for (; nfscl != NULL; nfscl = list_next(&nfs_clnt_list, nfscl)) { 34947c478bd9Sstevel@tonic-gate if (nfscl->nfscl_zoneid == zoneid) { 34957c478bd9Sstevel@tonic-gate clreclaim_zone(nfscl, 0); 34967c478bd9Sstevel@tonic-gate break; 34977c478bd9Sstevel@tonic-gate } 34987c478bd9Sstevel@tonic-gate } 34997c478bd9Sstevel@tonic-gate mutex_exit(&nfs_clnt_list_lock); 35007c478bd9Sstevel@tonic-gate } 35017c478bd9Sstevel@tonic-gate 35027c478bd9Sstevel@tonic-gate int 35037c478bd9Sstevel@tonic-gate nfs_subrinit(void) 35047c478bd9Sstevel@tonic-gate { 35057c478bd9Sstevel@tonic-gate int i; 35067c478bd9Sstevel@tonic-gate ulong_t nrnode_max; 35077c478bd9Sstevel@tonic-gate 35087c478bd9Sstevel@tonic-gate /* 35097c478bd9Sstevel@tonic-gate * Allocate and initialize the rnode hash queues 35107c478bd9Sstevel@tonic-gate */ 35117c478bd9Sstevel@tonic-gate if (nrnode <= 0) 35127c478bd9Sstevel@tonic-gate nrnode = ncsize; 35137c478bd9Sstevel@tonic-gate nrnode_max = (ulong_t)((kmem_maxavail() >> 2) / sizeof (struct rnode)); 35147c478bd9Sstevel@tonic-gate if (nrnode > nrnode_max || (nrnode == 0 && ncsize == 0)) { 35157c478bd9Sstevel@tonic-gate zcmn_err(GLOBAL_ZONEID, CE_NOTE, 3516f5654033SAlexander Eremin "!setting nrnode to max value of %ld", nrnode_max); 35177c478bd9Sstevel@tonic-gate nrnode = nrnode_max; 35187c478bd9Sstevel@tonic-gate } 35197c478bd9Sstevel@tonic-gate 35207c478bd9Sstevel@tonic-gate rtablesize = 1 << highbit(nrnode / hashlen); 35217c478bd9Sstevel@tonic-gate rtablemask = rtablesize - 1; 35227c478bd9Sstevel@tonic-gate rtable = kmem_alloc(rtablesize * sizeof (*rtable), KM_SLEEP); 35237c478bd9Sstevel@tonic-gate for (i = 0; i < rtablesize; i++) { 35247c478bd9Sstevel@tonic-gate rtable[i].r_hashf = (rnode_t *)(&rtable[i]); 35257c478bd9Sstevel@tonic-gate rtable[i].r_hashb = (rnode_t *)(&rtable[i]); 35267c478bd9Sstevel@tonic-gate rw_init(&rtable[i].r_lock, NULL, RW_DEFAULT, NULL); 35277c478bd9Sstevel@tonic-gate } 35287c478bd9Sstevel@tonic-gate rnode_cache = kmem_cache_create("rnode_cache", sizeof (rnode_t), 35297c478bd9Sstevel@tonic-gate 0, NULL, NULL, nfs_reclaim, NULL, NULL, 0); 35307c478bd9Sstevel@tonic-gate 35317c478bd9Sstevel@tonic-gate /* 35327c478bd9Sstevel@tonic-gate * Allocate and initialize the access cache 35337c478bd9Sstevel@tonic-gate */ 35347c478bd9Sstevel@tonic-gate 35357c478bd9Sstevel@tonic-gate /* 35367c478bd9Sstevel@tonic-gate * Initial guess is one access cache entry per rnode unless 35377c478bd9Sstevel@tonic-gate * nacache is set to a non-zero value and then it is used to 35387c478bd9Sstevel@tonic-gate * indicate a guess at the number of access cache entries. 35397c478bd9Sstevel@tonic-gate */ 35407c478bd9Sstevel@tonic-gate if (nacache > 0) 35417c478bd9Sstevel@tonic-gate acachesize = 1 << highbit(nacache / hashlen); 35427c478bd9Sstevel@tonic-gate else 35437c478bd9Sstevel@tonic-gate acachesize = rtablesize; 35447c478bd9Sstevel@tonic-gate acachemask = acachesize - 1; 35457c478bd9Sstevel@tonic-gate acache = kmem_alloc(acachesize * sizeof (*acache), KM_SLEEP); 35467c478bd9Sstevel@tonic-gate for (i = 0; i < acachesize; i++) { 35477c478bd9Sstevel@tonic-gate acache[i].next = (acache_t *)&acache[i]; 35487c478bd9Sstevel@tonic-gate acache[i].prev = (acache_t *)&acache[i]; 35497c478bd9Sstevel@tonic-gate rw_init(&acache[i].lock, NULL, RW_DEFAULT, NULL); 35507c478bd9Sstevel@tonic-gate } 35517c478bd9Sstevel@tonic-gate acache_cache = kmem_cache_create("nfs_access_cache", 35527c478bd9Sstevel@tonic-gate sizeof (acache_t), 0, NULL, NULL, NULL, NULL, NULL, 0); 35537c478bd9Sstevel@tonic-gate /* 35547c478bd9Sstevel@tonic-gate * Allocate and initialize the client handle cache 35557c478bd9Sstevel@tonic-gate */ 35567c478bd9Sstevel@tonic-gate chtab_cache = kmem_cache_create("client_handle_cache", 35577106075aSmarks sizeof (struct chtab), 0, NULL, NULL, clreclaim, NULL, NULL, 0); 35587c478bd9Sstevel@tonic-gate /* 35597c478bd9Sstevel@tonic-gate * Initialize the list of per-zone client handles (and associated data). 35607c478bd9Sstevel@tonic-gate * This needs to be done before we call zone_key_create(). 35617c478bd9Sstevel@tonic-gate */ 35627c478bd9Sstevel@tonic-gate list_create(&nfs_clnt_list, sizeof (struct nfs_clnt), 35637c478bd9Sstevel@tonic-gate offsetof(struct nfs_clnt, nfscl_node)); 35647c478bd9Sstevel@tonic-gate /* 35657c478bd9Sstevel@tonic-gate * Initialize the zone_key for per-zone client handle lists. 35667c478bd9Sstevel@tonic-gate */ 35677c478bd9Sstevel@tonic-gate zone_key_create(&nfsclnt_zone_key, clinit_zone, NULL, clfini_zone); 35687c478bd9Sstevel@tonic-gate /* 35697c478bd9Sstevel@tonic-gate * Initialize the various mutexes and reader/writer locks 35707c478bd9Sstevel@tonic-gate */ 35717c478bd9Sstevel@tonic-gate mutex_init(&rpfreelist_lock, NULL, MUTEX_DEFAULT, NULL); 35727c478bd9Sstevel@tonic-gate mutex_init(&newnum_lock, NULL, MUTEX_DEFAULT, NULL); 35737c478bd9Sstevel@tonic-gate mutex_init(&nfs_minor_lock, NULL, MUTEX_DEFAULT, NULL); 35747c478bd9Sstevel@tonic-gate 35757c478bd9Sstevel@tonic-gate /* 35767c478bd9Sstevel@tonic-gate * Assign unique major number for all nfs mounts 35777c478bd9Sstevel@tonic-gate */ 35787c478bd9Sstevel@tonic-gate if ((nfs_major = getudev()) == -1) { 35797c478bd9Sstevel@tonic-gate zcmn_err(GLOBAL_ZONEID, CE_WARN, 35807c478bd9Sstevel@tonic-gate "nfs: init: can't get unique device number"); 35817c478bd9Sstevel@tonic-gate nfs_major = 0; 35827c478bd9Sstevel@tonic-gate } 35837c478bd9Sstevel@tonic-gate nfs_minor = 0; 35847c478bd9Sstevel@tonic-gate 35857c478bd9Sstevel@tonic-gate if (nfs3_jukebox_delay == 0) 35867c478bd9Sstevel@tonic-gate nfs3_jukebox_delay = NFS3_JUKEBOX_DELAY; 35877c478bd9Sstevel@tonic-gate 35887c478bd9Sstevel@tonic-gate return (0); 35897c478bd9Sstevel@tonic-gate } 35907c478bd9Sstevel@tonic-gate 35917c478bd9Sstevel@tonic-gate void 35927c478bd9Sstevel@tonic-gate nfs_subrfini(void) 35937c478bd9Sstevel@tonic-gate { 35947c478bd9Sstevel@tonic-gate int i; 35957c478bd9Sstevel@tonic-gate 35967c478bd9Sstevel@tonic-gate /* 35977c478bd9Sstevel@tonic-gate * Deallocate the rnode hash queues 35987c478bd9Sstevel@tonic-gate */ 35997c478bd9Sstevel@tonic-gate kmem_cache_destroy(rnode_cache); 36007c478bd9Sstevel@tonic-gate 36017c478bd9Sstevel@tonic-gate for (i = 0; i < rtablesize; i++) 36027c478bd9Sstevel@tonic-gate rw_destroy(&rtable[i].r_lock); 36037c478bd9Sstevel@tonic-gate kmem_free(rtable, rtablesize * sizeof (*rtable)); 36047c478bd9Sstevel@tonic-gate 36057c478bd9Sstevel@tonic-gate /* 36067c478bd9Sstevel@tonic-gate * Deallocated the access cache 36077c478bd9Sstevel@tonic-gate */ 36087c478bd9Sstevel@tonic-gate kmem_cache_destroy(acache_cache); 36097c478bd9Sstevel@tonic-gate 36107c478bd9Sstevel@tonic-gate for (i = 0; i < acachesize; i++) 36117c478bd9Sstevel@tonic-gate rw_destroy(&acache[i].lock); 36127c478bd9Sstevel@tonic-gate kmem_free(acache, acachesize * sizeof (*acache)); 36137c478bd9Sstevel@tonic-gate 36147c478bd9Sstevel@tonic-gate /* 36157c478bd9Sstevel@tonic-gate * Deallocate the client handle cache 36167c478bd9Sstevel@tonic-gate */ 36177c478bd9Sstevel@tonic-gate kmem_cache_destroy(chtab_cache); 36187c478bd9Sstevel@tonic-gate 36197c478bd9Sstevel@tonic-gate /* 36207c478bd9Sstevel@tonic-gate * Destroy the various mutexes and reader/writer locks 36217c478bd9Sstevel@tonic-gate */ 36227c478bd9Sstevel@tonic-gate mutex_destroy(&rpfreelist_lock); 36237c478bd9Sstevel@tonic-gate mutex_destroy(&newnum_lock); 36247c478bd9Sstevel@tonic-gate mutex_destroy(&nfs_minor_lock); 36257c478bd9Sstevel@tonic-gate (void) zone_key_delete(nfsclnt_zone_key); 36267c478bd9Sstevel@tonic-gate } 36277c478bd9Sstevel@tonic-gate 36287c478bd9Sstevel@tonic-gate enum nfsstat 36297c478bd9Sstevel@tonic-gate puterrno(int error) 36307c478bd9Sstevel@tonic-gate { 36317c478bd9Sstevel@tonic-gate 36327c478bd9Sstevel@tonic-gate switch (error) { 36337c478bd9Sstevel@tonic-gate case EOPNOTSUPP: 36347c478bd9Sstevel@tonic-gate return (NFSERR_OPNOTSUPP); 36357c478bd9Sstevel@tonic-gate case ENAMETOOLONG: 36367c478bd9Sstevel@tonic-gate return (NFSERR_NAMETOOLONG); 36377c478bd9Sstevel@tonic-gate case ENOTEMPTY: 36387c478bd9Sstevel@tonic-gate return (NFSERR_NOTEMPTY); 36397c478bd9Sstevel@tonic-gate case EDQUOT: 36407c478bd9Sstevel@tonic-gate return (NFSERR_DQUOT); 36417c478bd9Sstevel@tonic-gate case ESTALE: 36427c478bd9Sstevel@tonic-gate return (NFSERR_STALE); 36437c478bd9Sstevel@tonic-gate case EREMOTE: 36447c478bd9Sstevel@tonic-gate return (NFSERR_REMOTE); 36457c478bd9Sstevel@tonic-gate case ENOSYS: 36467c478bd9Sstevel@tonic-gate return (NFSERR_OPNOTSUPP); 36477c478bd9Sstevel@tonic-gate case EOVERFLOW: 36487c478bd9Sstevel@tonic-gate return (NFSERR_INVAL); 36497c478bd9Sstevel@tonic-gate default: 36507c478bd9Sstevel@tonic-gate return ((enum nfsstat)error); 36517c478bd9Sstevel@tonic-gate } 36527c478bd9Sstevel@tonic-gate /* NOTREACHED */ 36537c478bd9Sstevel@tonic-gate } 36547c478bd9Sstevel@tonic-gate 36557c478bd9Sstevel@tonic-gate int 36567c478bd9Sstevel@tonic-gate geterrno(enum nfsstat status) 36577c478bd9Sstevel@tonic-gate { 36587c478bd9Sstevel@tonic-gate 36597c478bd9Sstevel@tonic-gate switch (status) { 36607c478bd9Sstevel@tonic-gate case NFSERR_OPNOTSUPP: 36617c478bd9Sstevel@tonic-gate return (EOPNOTSUPP); 36627c478bd9Sstevel@tonic-gate case NFSERR_NAMETOOLONG: 36637c478bd9Sstevel@tonic-gate return (ENAMETOOLONG); 36647c478bd9Sstevel@tonic-gate case NFSERR_NOTEMPTY: 36657c478bd9Sstevel@tonic-gate return (ENOTEMPTY); 36667c478bd9Sstevel@tonic-gate case NFSERR_DQUOT: 36677c478bd9Sstevel@tonic-gate return (EDQUOT); 36687c478bd9Sstevel@tonic-gate case NFSERR_STALE: 36697c478bd9Sstevel@tonic-gate return (ESTALE); 36707c478bd9Sstevel@tonic-gate case NFSERR_REMOTE: 36717c478bd9Sstevel@tonic-gate return (EREMOTE); 36727c478bd9Sstevel@tonic-gate case NFSERR_WFLUSH: 36737c478bd9Sstevel@tonic-gate return (EIO); 36747c478bd9Sstevel@tonic-gate default: 36757c478bd9Sstevel@tonic-gate return ((int)status); 36767c478bd9Sstevel@tonic-gate } 36777c478bd9Sstevel@tonic-gate /* NOTREACHED */ 36787c478bd9Sstevel@tonic-gate } 36797c478bd9Sstevel@tonic-gate 36807c478bd9Sstevel@tonic-gate enum nfsstat3 36817c478bd9Sstevel@tonic-gate puterrno3(int error) 36827c478bd9Sstevel@tonic-gate { 36837c478bd9Sstevel@tonic-gate 36847c478bd9Sstevel@tonic-gate #ifdef DEBUG 36857c478bd9Sstevel@tonic-gate switch (error) { 36867c478bd9Sstevel@tonic-gate case 0: 36877c478bd9Sstevel@tonic-gate return (NFS3_OK); 36887c478bd9Sstevel@tonic-gate case EPERM: 36897c478bd9Sstevel@tonic-gate return (NFS3ERR_PERM); 36907c478bd9Sstevel@tonic-gate case ENOENT: 36917c478bd9Sstevel@tonic-gate return (NFS3ERR_NOENT); 36927c478bd9Sstevel@tonic-gate case EIO: 36937c478bd9Sstevel@tonic-gate return (NFS3ERR_IO); 36947c478bd9Sstevel@tonic-gate case ENXIO: 36957c478bd9Sstevel@tonic-gate return (NFS3ERR_NXIO); 36967c478bd9Sstevel@tonic-gate case EACCES: 36977c478bd9Sstevel@tonic-gate return (NFS3ERR_ACCES); 36987c478bd9Sstevel@tonic-gate case EEXIST: 36997c478bd9Sstevel@tonic-gate return (NFS3ERR_EXIST); 37007c478bd9Sstevel@tonic-gate case EXDEV: 37017c478bd9Sstevel@tonic-gate return (NFS3ERR_XDEV); 37027c478bd9Sstevel@tonic-gate case ENODEV: 37037c478bd9Sstevel@tonic-gate return (NFS3ERR_NODEV); 37047c478bd9Sstevel@tonic-gate case ENOTDIR: 37057c478bd9Sstevel@tonic-gate return (NFS3ERR_NOTDIR); 37067c478bd9Sstevel@tonic-gate case EISDIR: 37077c478bd9Sstevel@tonic-gate return (NFS3ERR_ISDIR); 37087c478bd9Sstevel@tonic-gate case EINVAL: 37097c478bd9Sstevel@tonic-gate return (NFS3ERR_INVAL); 37107c478bd9Sstevel@tonic-gate case EFBIG: 37117c478bd9Sstevel@tonic-gate return (NFS3ERR_FBIG); 37127c478bd9Sstevel@tonic-gate case ENOSPC: 37137c478bd9Sstevel@tonic-gate return (NFS3ERR_NOSPC); 37147c478bd9Sstevel@tonic-gate case EROFS: 37157c478bd9Sstevel@tonic-gate return (NFS3ERR_ROFS); 37167c478bd9Sstevel@tonic-gate case EMLINK: 37177c478bd9Sstevel@tonic-gate return (NFS3ERR_MLINK); 37187c478bd9Sstevel@tonic-gate case ENAMETOOLONG: 37197c478bd9Sstevel@tonic-gate return (NFS3ERR_NAMETOOLONG); 37207c478bd9Sstevel@tonic-gate case ENOTEMPTY: 37217c478bd9Sstevel@tonic-gate return (NFS3ERR_NOTEMPTY); 37227c478bd9Sstevel@tonic-gate case EDQUOT: 37237c478bd9Sstevel@tonic-gate return (NFS3ERR_DQUOT); 37247c478bd9Sstevel@tonic-gate case ESTALE: 37257c478bd9Sstevel@tonic-gate return (NFS3ERR_STALE); 37267c478bd9Sstevel@tonic-gate case EREMOTE: 37277c478bd9Sstevel@tonic-gate return (NFS3ERR_REMOTE); 37287106075aSmarks case ENOSYS: 37297c478bd9Sstevel@tonic-gate case EOPNOTSUPP: 37307c478bd9Sstevel@tonic-gate return (NFS3ERR_NOTSUPP); 37317c478bd9Sstevel@tonic-gate case EOVERFLOW: 37327c478bd9Sstevel@tonic-gate return (NFS3ERR_INVAL); 37337c478bd9Sstevel@tonic-gate default: 37347c478bd9Sstevel@tonic-gate zcmn_err(getzoneid(), CE_WARN, 37357c478bd9Sstevel@tonic-gate "puterrno3: got error %d", error); 37367c478bd9Sstevel@tonic-gate return ((enum nfsstat3)error); 37377c478bd9Sstevel@tonic-gate } 37387c478bd9Sstevel@tonic-gate #else 37397c478bd9Sstevel@tonic-gate switch (error) { 37407c478bd9Sstevel@tonic-gate case ENAMETOOLONG: 37417c478bd9Sstevel@tonic-gate return (NFS3ERR_NAMETOOLONG); 37427c478bd9Sstevel@tonic-gate case ENOTEMPTY: 37437c478bd9Sstevel@tonic-gate return (NFS3ERR_NOTEMPTY); 37447c478bd9Sstevel@tonic-gate case EDQUOT: 37457c478bd9Sstevel@tonic-gate return (NFS3ERR_DQUOT); 37467c478bd9Sstevel@tonic-gate case ESTALE: 37477c478bd9Sstevel@tonic-gate return (NFS3ERR_STALE); 37487106075aSmarks case ENOSYS: 37497c478bd9Sstevel@tonic-gate case EOPNOTSUPP: 37507c478bd9Sstevel@tonic-gate return (NFS3ERR_NOTSUPP); 37517c478bd9Sstevel@tonic-gate case EREMOTE: 37527c478bd9Sstevel@tonic-gate return (NFS3ERR_REMOTE); 37537c478bd9Sstevel@tonic-gate case EOVERFLOW: 37547c478bd9Sstevel@tonic-gate return (NFS3ERR_INVAL); 37557c478bd9Sstevel@tonic-gate default: 37567c478bd9Sstevel@tonic-gate return ((enum nfsstat3)error); 37577c478bd9Sstevel@tonic-gate } 37587c478bd9Sstevel@tonic-gate #endif 37597c478bd9Sstevel@tonic-gate } 37607c478bd9Sstevel@tonic-gate 37617c478bd9Sstevel@tonic-gate int 37627c478bd9Sstevel@tonic-gate geterrno3(enum nfsstat3 status) 37637c478bd9Sstevel@tonic-gate { 37647c478bd9Sstevel@tonic-gate 37657c478bd9Sstevel@tonic-gate #ifdef DEBUG 37667c478bd9Sstevel@tonic-gate switch (status) { 37677c478bd9Sstevel@tonic-gate case NFS3_OK: 37687c478bd9Sstevel@tonic-gate return (0); 37697c478bd9Sstevel@tonic-gate case NFS3ERR_PERM: 37707c478bd9Sstevel@tonic-gate return (EPERM); 37717c478bd9Sstevel@tonic-gate case NFS3ERR_NOENT: 37727c478bd9Sstevel@tonic-gate return (ENOENT); 37737c478bd9Sstevel@tonic-gate case NFS3ERR_IO: 37747c478bd9Sstevel@tonic-gate return (EIO); 37757c478bd9Sstevel@tonic-gate case NFS3ERR_NXIO: 37767c478bd9Sstevel@tonic-gate return (ENXIO); 37777c478bd9Sstevel@tonic-gate case NFS3ERR_ACCES: 37787c478bd9Sstevel@tonic-gate return (EACCES); 37797c478bd9Sstevel@tonic-gate case NFS3ERR_EXIST: 37807c478bd9Sstevel@tonic-gate return (EEXIST); 37817c478bd9Sstevel@tonic-gate case NFS3ERR_XDEV: 37827c478bd9Sstevel@tonic-gate return (EXDEV); 37837c478bd9Sstevel@tonic-gate case NFS3ERR_NODEV: 37847c478bd9Sstevel@tonic-gate return (ENODEV); 37857c478bd9Sstevel@tonic-gate case NFS3ERR_NOTDIR: 37867c478bd9Sstevel@tonic-gate return (ENOTDIR); 37877c478bd9Sstevel@tonic-gate case NFS3ERR_ISDIR: 37887c478bd9Sstevel@tonic-gate return (EISDIR); 37897c478bd9Sstevel@tonic-gate case NFS3ERR_INVAL: 37907c478bd9Sstevel@tonic-gate return (EINVAL); 37917c478bd9Sstevel@tonic-gate case NFS3ERR_FBIG: 37927c478bd9Sstevel@tonic-gate return (EFBIG); 37937c478bd9Sstevel@tonic-gate case NFS3ERR_NOSPC: 37947c478bd9Sstevel@tonic-gate return (ENOSPC); 37957c478bd9Sstevel@tonic-gate case NFS3ERR_ROFS: 37967c478bd9Sstevel@tonic-gate return (EROFS); 37977c478bd9Sstevel@tonic-gate case NFS3ERR_MLINK: 37987c478bd9Sstevel@tonic-gate return (EMLINK); 37997c478bd9Sstevel@tonic-gate case NFS3ERR_NAMETOOLONG: 38007c478bd9Sstevel@tonic-gate return (ENAMETOOLONG); 38017c478bd9Sstevel@tonic-gate case NFS3ERR_NOTEMPTY: 38027c478bd9Sstevel@tonic-gate return (ENOTEMPTY); 38037c478bd9Sstevel@tonic-gate case NFS3ERR_DQUOT: 38047c478bd9Sstevel@tonic-gate return (EDQUOT); 38057c478bd9Sstevel@tonic-gate case NFS3ERR_STALE: 38067c478bd9Sstevel@tonic-gate return (ESTALE); 38077c478bd9Sstevel@tonic-gate case NFS3ERR_REMOTE: 38087c478bd9Sstevel@tonic-gate return (EREMOTE); 38097c478bd9Sstevel@tonic-gate case NFS3ERR_BADHANDLE: 38107c478bd9Sstevel@tonic-gate return (ESTALE); 38117c478bd9Sstevel@tonic-gate case NFS3ERR_NOT_SYNC: 38127c478bd9Sstevel@tonic-gate return (EINVAL); 38137c478bd9Sstevel@tonic-gate case NFS3ERR_BAD_COOKIE: 38147c478bd9Sstevel@tonic-gate return (ENOENT); 38157c478bd9Sstevel@tonic-gate case NFS3ERR_NOTSUPP: 38167c478bd9Sstevel@tonic-gate return (EOPNOTSUPP); 38177c478bd9Sstevel@tonic-gate case NFS3ERR_TOOSMALL: 38187c478bd9Sstevel@tonic-gate return (EINVAL); 38197c478bd9Sstevel@tonic-gate case NFS3ERR_SERVERFAULT: 38207c478bd9Sstevel@tonic-gate return (EIO); 38217c478bd9Sstevel@tonic-gate case NFS3ERR_BADTYPE: 38227c478bd9Sstevel@tonic-gate return (EINVAL); 38237c478bd9Sstevel@tonic-gate case NFS3ERR_JUKEBOX: 38247c478bd9Sstevel@tonic-gate return (ENXIO); 38257c478bd9Sstevel@tonic-gate default: 38267c478bd9Sstevel@tonic-gate zcmn_err(getzoneid(), CE_WARN, 38277c478bd9Sstevel@tonic-gate "geterrno3: got status %d", status); 38287c478bd9Sstevel@tonic-gate return ((int)status); 38297c478bd9Sstevel@tonic-gate } 38307c478bd9Sstevel@tonic-gate #else 38317c478bd9Sstevel@tonic-gate switch (status) { 38327c478bd9Sstevel@tonic-gate case NFS3ERR_NAMETOOLONG: 38337c478bd9Sstevel@tonic-gate return (ENAMETOOLONG); 38347c478bd9Sstevel@tonic-gate case NFS3ERR_NOTEMPTY: 38357c478bd9Sstevel@tonic-gate return (ENOTEMPTY); 38367c478bd9Sstevel@tonic-gate case NFS3ERR_DQUOT: 38377c478bd9Sstevel@tonic-gate return (EDQUOT); 38387c478bd9Sstevel@tonic-gate case NFS3ERR_STALE: 38397c478bd9Sstevel@tonic-gate case NFS3ERR_BADHANDLE: 38407c478bd9Sstevel@tonic-gate return (ESTALE); 38417c478bd9Sstevel@tonic-gate case NFS3ERR_NOTSUPP: 38427c478bd9Sstevel@tonic-gate return (EOPNOTSUPP); 38437c478bd9Sstevel@tonic-gate case NFS3ERR_REMOTE: 38447c478bd9Sstevel@tonic-gate return (EREMOTE); 38457c478bd9Sstevel@tonic-gate case NFS3ERR_NOT_SYNC: 38467c478bd9Sstevel@tonic-gate case NFS3ERR_TOOSMALL: 38477c478bd9Sstevel@tonic-gate case NFS3ERR_BADTYPE: 38487c478bd9Sstevel@tonic-gate return (EINVAL); 38497c478bd9Sstevel@tonic-gate case NFS3ERR_BAD_COOKIE: 38507c478bd9Sstevel@tonic-gate return (ENOENT); 38517c478bd9Sstevel@tonic-gate case NFS3ERR_SERVERFAULT: 38527c478bd9Sstevel@tonic-gate return (EIO); 38537c478bd9Sstevel@tonic-gate case NFS3ERR_JUKEBOX: 38547c478bd9Sstevel@tonic-gate return (ENXIO); 38557c478bd9Sstevel@tonic-gate default: 38567c478bd9Sstevel@tonic-gate return ((int)status); 38577c478bd9Sstevel@tonic-gate } 38587c478bd9Sstevel@tonic-gate #endif 38597c478bd9Sstevel@tonic-gate } 38607c478bd9Sstevel@tonic-gate 38617c478bd9Sstevel@tonic-gate rddir_cache * 38627c478bd9Sstevel@tonic-gate rddir_cache_alloc(int flags) 38637c478bd9Sstevel@tonic-gate { 38647c478bd9Sstevel@tonic-gate rddir_cache *rc; 38657c478bd9Sstevel@tonic-gate 38667c478bd9Sstevel@tonic-gate rc = kmem_alloc(sizeof (*rc), flags); 38677c478bd9Sstevel@tonic-gate if (rc != NULL) { 38687c478bd9Sstevel@tonic-gate rc->entries = NULL; 38697c478bd9Sstevel@tonic-gate rc->flags = RDDIR; 38707c478bd9Sstevel@tonic-gate cv_init(&rc->cv, NULL, CV_DEFAULT, NULL); 38717c478bd9Sstevel@tonic-gate mutex_init(&rc->lock, NULL, MUTEX_DEFAULT, NULL); 38727c478bd9Sstevel@tonic-gate rc->count = 1; 38737c478bd9Sstevel@tonic-gate #ifdef DEBUG 38741a5e258fSJosef 'Jeff' Sipek atomic_inc_64(&clstat_debug.dirent.value.ui64); 38757c478bd9Sstevel@tonic-gate #endif 38767c478bd9Sstevel@tonic-gate } 38777c478bd9Sstevel@tonic-gate return (rc); 38787c478bd9Sstevel@tonic-gate } 38797c478bd9Sstevel@tonic-gate 38807c478bd9Sstevel@tonic-gate static void 38817c478bd9Sstevel@tonic-gate rddir_cache_free(rddir_cache *rc) 38827c478bd9Sstevel@tonic-gate { 38837c478bd9Sstevel@tonic-gate 38847c478bd9Sstevel@tonic-gate #ifdef DEBUG 38851a5e258fSJosef 'Jeff' Sipek atomic_dec_64(&clstat_debug.dirent.value.ui64); 38867c478bd9Sstevel@tonic-gate #endif 38877c478bd9Sstevel@tonic-gate if (rc->entries != NULL) { 38887c478bd9Sstevel@tonic-gate #ifdef DEBUG 38897c478bd9Sstevel@tonic-gate rddir_cache_buf_free(rc->entries, rc->buflen); 38907c478bd9Sstevel@tonic-gate #else 38917c478bd9Sstevel@tonic-gate kmem_free(rc->entries, rc->buflen); 38927c478bd9Sstevel@tonic-gate #endif 38937c478bd9Sstevel@tonic-gate } 38947c478bd9Sstevel@tonic-gate cv_destroy(&rc->cv); 38957c478bd9Sstevel@tonic-gate mutex_destroy(&rc->lock); 38967c478bd9Sstevel@tonic-gate kmem_free(rc, sizeof (*rc)); 38977c478bd9Sstevel@tonic-gate } 38987c478bd9Sstevel@tonic-gate 38997c478bd9Sstevel@tonic-gate void 39007c478bd9Sstevel@tonic-gate rddir_cache_hold(rddir_cache *rc) 39017c478bd9Sstevel@tonic-gate { 39027c478bd9Sstevel@tonic-gate 39037c478bd9Sstevel@tonic-gate mutex_enter(&rc->lock); 39047c478bd9Sstevel@tonic-gate rc->count++; 39057c478bd9Sstevel@tonic-gate mutex_exit(&rc->lock); 39067c478bd9Sstevel@tonic-gate } 39077c478bd9Sstevel@tonic-gate 39087c478bd9Sstevel@tonic-gate void 39097c478bd9Sstevel@tonic-gate rddir_cache_rele(rddir_cache *rc) 39107c478bd9Sstevel@tonic-gate { 39117c478bd9Sstevel@tonic-gate 39127c478bd9Sstevel@tonic-gate mutex_enter(&rc->lock); 39137c478bd9Sstevel@tonic-gate ASSERT(rc->count > 0); 39147c478bd9Sstevel@tonic-gate if (--rc->count == 0) { 39157c478bd9Sstevel@tonic-gate mutex_exit(&rc->lock); 39167c478bd9Sstevel@tonic-gate rddir_cache_free(rc); 39177c478bd9Sstevel@tonic-gate } else 39187c478bd9Sstevel@tonic-gate mutex_exit(&rc->lock); 39197c478bd9Sstevel@tonic-gate } 39207c478bd9Sstevel@tonic-gate 39217c478bd9Sstevel@tonic-gate #ifdef DEBUG 39227c478bd9Sstevel@tonic-gate char * 39237c478bd9Sstevel@tonic-gate rddir_cache_buf_alloc(size_t size, int flags) 39247c478bd9Sstevel@tonic-gate { 39257c478bd9Sstevel@tonic-gate char *rc; 39267c478bd9Sstevel@tonic-gate 39277c478bd9Sstevel@tonic-gate rc = kmem_alloc(size, flags); 39287c478bd9Sstevel@tonic-gate if (rc != NULL) 39297c478bd9Sstevel@tonic-gate atomic_add_64(&clstat_debug.dirents.value.ui64, size); 39307c478bd9Sstevel@tonic-gate return (rc); 39317c478bd9Sstevel@tonic-gate } 39327c478bd9Sstevel@tonic-gate 39337c478bd9Sstevel@tonic-gate void 39347c478bd9Sstevel@tonic-gate rddir_cache_buf_free(void *addr, size_t size) 39357c478bd9Sstevel@tonic-gate { 39367c478bd9Sstevel@tonic-gate 39377c478bd9Sstevel@tonic-gate atomic_add_64(&clstat_debug.dirents.value.ui64, -(int64_t)size); 39387c478bd9Sstevel@tonic-gate kmem_free(addr, size); 39397c478bd9Sstevel@tonic-gate } 39407c478bd9Sstevel@tonic-gate #endif 39417c478bd9Sstevel@tonic-gate 39427c478bd9Sstevel@tonic-gate static int 39437c478bd9Sstevel@tonic-gate nfs_free_data_reclaim(rnode_t *rp) 39447c478bd9Sstevel@tonic-gate { 39457c478bd9Sstevel@tonic-gate char *contents; 39467c478bd9Sstevel@tonic-gate int size; 39477c478bd9Sstevel@tonic-gate vsecattr_t *vsp; 39487c478bd9Sstevel@tonic-gate nfs3_pathconf_info *info; 39497c478bd9Sstevel@tonic-gate int freed; 39507c478bd9Sstevel@tonic-gate cred_t *cred; 39517c478bd9Sstevel@tonic-gate 39527c478bd9Sstevel@tonic-gate /* 39537c478bd9Sstevel@tonic-gate * Free any held credentials and caches which 39547c478bd9Sstevel@tonic-gate * may be associated with this rnode. 39557c478bd9Sstevel@tonic-gate */ 39567c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock); 39577c478bd9Sstevel@tonic-gate cred = rp->r_cred; 39587c478bd9Sstevel@tonic-gate rp->r_cred = NULL; 39597c478bd9Sstevel@tonic-gate contents = rp->r_symlink.contents; 39607c478bd9Sstevel@tonic-gate size = rp->r_symlink.size; 39617c478bd9Sstevel@tonic-gate rp->r_symlink.contents = NULL; 39627c478bd9Sstevel@tonic-gate vsp = rp->r_secattr; 39637c478bd9Sstevel@tonic-gate rp->r_secattr = NULL; 39647c478bd9Sstevel@tonic-gate info = rp->r_pathconf; 39657c478bd9Sstevel@tonic-gate rp->r_pathconf = NULL; 39667c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 39677c478bd9Sstevel@tonic-gate 39687c478bd9Sstevel@tonic-gate if (cred != NULL) 39697c478bd9Sstevel@tonic-gate crfree(cred); 39707c478bd9Sstevel@tonic-gate 39717c478bd9Sstevel@tonic-gate /* 39727c478bd9Sstevel@tonic-gate * Free the access cache entries. 39737c478bd9Sstevel@tonic-gate */ 39747c478bd9Sstevel@tonic-gate freed = nfs_access_purge_rp(rp); 39757c478bd9Sstevel@tonic-gate 39767c478bd9Sstevel@tonic-gate if (!HAVE_RDDIR_CACHE(rp) && 39777c478bd9Sstevel@tonic-gate contents == NULL && 39787c478bd9Sstevel@tonic-gate vsp == NULL && 39797c478bd9Sstevel@tonic-gate info == NULL) 39807c478bd9Sstevel@tonic-gate return (freed); 39817c478bd9Sstevel@tonic-gate 39827c478bd9Sstevel@tonic-gate /* 39837c478bd9Sstevel@tonic-gate * Free the readdir cache entries 39847c478bd9Sstevel@tonic-gate */ 39857c478bd9Sstevel@tonic-gate if (HAVE_RDDIR_CACHE(rp)) 39867c478bd9Sstevel@tonic-gate nfs_purge_rddir_cache(RTOV(rp)); 39877c478bd9Sstevel@tonic-gate 39887c478bd9Sstevel@tonic-gate /* 39897c478bd9Sstevel@tonic-gate * Free the symbolic link cache. 39907c478bd9Sstevel@tonic-gate */ 39917c478bd9Sstevel@tonic-gate if (contents != NULL) { 39927c478bd9Sstevel@tonic-gate 39937c478bd9Sstevel@tonic-gate kmem_free((void *)contents, size); 39947c478bd9Sstevel@tonic-gate } 39957c478bd9Sstevel@tonic-gate 39967c478bd9Sstevel@tonic-gate /* 39977c478bd9Sstevel@tonic-gate * Free any cached ACL. 39987c478bd9Sstevel@tonic-gate */ 39997c478bd9Sstevel@tonic-gate if (vsp != NULL) 40007c478bd9Sstevel@tonic-gate nfs_acl_free(vsp); 40017c478bd9Sstevel@tonic-gate 40027c478bd9Sstevel@tonic-gate /* 40037c478bd9Sstevel@tonic-gate * Free any cached pathconf information. 40047c478bd9Sstevel@tonic-gate */ 40057c478bd9Sstevel@tonic-gate if (info != NULL) 40067c478bd9Sstevel@tonic-gate kmem_free(info, sizeof (*info)); 40077c478bd9Sstevel@tonic-gate 40087c478bd9Sstevel@tonic-gate return (1); 40097c478bd9Sstevel@tonic-gate } 40107c478bd9Sstevel@tonic-gate 40117c478bd9Sstevel@tonic-gate static int 40127c478bd9Sstevel@tonic-gate nfs_active_data_reclaim(rnode_t *rp) 40137c478bd9Sstevel@tonic-gate { 40147c478bd9Sstevel@tonic-gate char *contents; 40157c478bd9Sstevel@tonic-gate int size; 40167c478bd9Sstevel@tonic-gate vsecattr_t *vsp; 40177c478bd9Sstevel@tonic-gate nfs3_pathconf_info *info; 40187c478bd9Sstevel@tonic-gate int freed; 40197c478bd9Sstevel@tonic-gate 40207c478bd9Sstevel@tonic-gate /* 40217c478bd9Sstevel@tonic-gate * Free any held credentials and caches which 40227c478bd9Sstevel@tonic-gate * may be associated with this rnode. 40237c478bd9Sstevel@tonic-gate */ 40247c478bd9Sstevel@tonic-gate if (!mutex_tryenter(&rp->r_statelock)) 40257c478bd9Sstevel@tonic-gate return (0); 40267c478bd9Sstevel@tonic-gate contents = rp->r_symlink.contents; 40277c478bd9Sstevel@tonic-gate size = rp->r_symlink.size; 40287c478bd9Sstevel@tonic-gate rp->r_symlink.contents = NULL; 40297c478bd9Sstevel@tonic-gate vsp = rp->r_secattr; 40307c478bd9Sstevel@tonic-gate rp->r_secattr = NULL; 40317c478bd9Sstevel@tonic-gate info = rp->r_pathconf; 40327c478bd9Sstevel@tonic-gate rp->r_pathconf = NULL; 40337c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock); 40347c478bd9Sstevel@tonic-gate 40357c478bd9Sstevel@tonic-gate /* 40367c478bd9Sstevel@tonic-gate * Free the access cache entries. 40377c478bd9Sstevel@tonic-gate */ 40387c478bd9Sstevel@tonic-gate freed = nfs_access_purge_rp(rp); 40397c478bd9Sstevel@tonic-gate 40407c478bd9Sstevel@tonic-gate if (!HAVE_RDDIR_CACHE(rp) && 40417c478bd9Sstevel@tonic-gate contents == NULL && 40427c478bd9Sstevel@tonic-gate vsp == NULL && 40437c478bd9Sstevel@tonic-gate info == NULL) 40447c478bd9Sstevel@tonic-gate return (freed); 40457c478bd9Sstevel@tonic-gate 40467c478bd9Sstevel@tonic-gate /* 40477c478bd9Sstevel@tonic-gate * Free the readdir cache entries 40487c478bd9Sstevel@tonic-gate */ 40497c478bd9Sstevel@tonic-gate if (HAVE_RDDIR_CACHE(rp)) 40507c478bd9Sstevel@tonic-gate nfs_purge_rddir_cache(RTOV(rp)); 40517c478bd9Sstevel@tonic-gate 40527c478bd9Sstevel@tonic-gate /* 40537c478bd9Sstevel@tonic-gate * Free the symbolic link cache. 40547c478bd9Sstevel@tonic-gate */ 40557c478bd9Sstevel@tonic-gate if (contents != NULL) { 40567c478bd9Sstevel@tonic-gate 40577c478bd9Sstevel@tonic-gate kmem_free((void *)contents, size); 40587c478bd9Sstevel@tonic-gate } 40597c478bd9Sstevel@tonic-gate 40607c478bd9Sstevel@tonic-gate /* 40617c478bd9Sstevel@tonic-gate * Free any cached ACL. 40627c478bd9Sstevel@tonic-gate */ 40637c478bd9Sstevel@tonic-gate if (vsp != NULL) 40647c478bd9Sstevel@tonic-gate nfs_acl_free(vsp); 40657c478bd9Sstevel@tonic-gate 40667c478bd9Sstevel@tonic-gate /* 40677c478bd9Sstevel@tonic-gate * Free any cached pathconf information. 40687c478bd9Sstevel@tonic-gate */ 40697c478bd9Sstevel@tonic-gate if (info != NULL) 40707c478bd9Sstevel@tonic-gate kmem_free(info, sizeof (*info)); 40717c478bd9Sstevel@tonic-gate 40727c478bd9Sstevel@tonic-gate return (1); 40737c478bd9Sstevel@tonic-gate } 40747c478bd9Sstevel@tonic-gate 40757c478bd9Sstevel@tonic-gate static int 40767c478bd9Sstevel@tonic-gate nfs_free_reclaim(void) 40777c478bd9Sstevel@tonic-gate { 40787c478bd9Sstevel@tonic-gate int freed; 40797c478bd9Sstevel@tonic-gate rnode_t *rp; 40807c478bd9Sstevel@tonic-gate 40817c478bd9Sstevel@tonic-gate #ifdef DEBUG 40827c478bd9Sstevel@tonic-gate clstat_debug.f_reclaim.value.ui64++; 40837c478bd9Sstevel@tonic-gate #endif 40847c478bd9Sstevel@tonic-gate freed = 0; 40857c478bd9Sstevel@tonic-gate mutex_enter(&rpfreelist_lock); 40867c478bd9Sstevel@tonic-gate rp = rpfreelist; 40877c478bd9Sstevel@tonic-gate if (rp != NULL) { 40887c478bd9Sstevel@tonic-gate do { 40897c478bd9Sstevel@tonic-gate if (nfs_free_data_reclaim(rp)) 40907c478bd9Sstevel@tonic-gate freed = 1; 40917c478bd9Sstevel@tonic-gate } while ((rp = rp->r_freef) != rpfreelist); 40927c478bd9Sstevel@tonic-gate } 40937c478bd9Sstevel@tonic-gate mutex_exit(&rpfreelist_lock); 40947c478bd9Sstevel@tonic-gate return (freed); 40957c478bd9Sstevel@tonic-gate } 40967c478bd9Sstevel@tonic-gate 40977c478bd9Sstevel@tonic-gate static int 40987c478bd9Sstevel@tonic-gate nfs_active_reclaim(void) 40997c478bd9Sstevel@tonic-gate { 41007c478bd9Sstevel@tonic-gate int freed; 41017c478bd9Sstevel@tonic-gate int index; 41027c478bd9Sstevel@tonic-gate rnode_t *rp; 41037c478bd9Sstevel@tonic-gate 41047c478bd9Sstevel@tonic-gate #ifdef DEBUG 41057c478bd9Sstevel@tonic-gate clstat_debug.a_reclaim.value.ui64++; 41067c478bd9Sstevel@tonic-gate #endif 41077c478bd9Sstevel@tonic-gate freed = 0; 41087c478bd9Sstevel@tonic-gate for (index = 0; index < rtablesize; index++) { 41097c478bd9Sstevel@tonic-gate rw_enter(&rtable[index].r_lock, RW_READER); 41107c478bd9Sstevel@tonic-gate for (rp = rtable[index].r_hashf; 41117c478bd9Sstevel@tonic-gate rp != (rnode_t *)(&rtable[index]); 41127c478bd9Sstevel@tonic-gate rp = rp->r_hashf) { 41137c478bd9Sstevel@tonic-gate if (nfs_active_data_reclaim(rp)) 41147c478bd9Sstevel@tonic-gate freed = 1; 41157c478bd9Sstevel@tonic-gate } 41167c478bd9Sstevel@tonic-gate rw_exit(&rtable[index].r_lock); 41177c478bd9Sstevel@tonic-gate } 41187c478bd9Sstevel@tonic-gate return (freed); 41197c478bd9Sstevel@tonic-gate } 41207c478bd9Sstevel@tonic-gate 41217c478bd9Sstevel@tonic-gate static int 41227c478bd9Sstevel@tonic-gate nfs_rnode_reclaim(void) 41237c478bd9Sstevel@tonic-gate { 41247c478bd9Sstevel@tonic-gate int freed; 41257c478bd9Sstevel@tonic-gate rnode_t *rp; 41267c478bd9Sstevel@tonic-gate vnode_t *vp; 41277c478bd9Sstevel@tonic-gate 41287c478bd9Sstevel@tonic-gate #ifdef DEBUG 41297c478bd9Sstevel@tonic-gate clstat_debug.r_reclaim.value.ui64++; 41307c478bd9Sstevel@tonic-gate #endif 41317c478bd9Sstevel@tonic-gate freed = 0; 41327c478bd9Sstevel@tonic-gate mutex_enter(&rpfreelist_lock); 41337c478bd9Sstevel@tonic-gate while ((rp = rpfreelist) != NULL) { 41347c478bd9Sstevel@tonic-gate rp_rmfree(rp); 41357c478bd9Sstevel@tonic-gate mutex_exit(&rpfreelist_lock); 41367c478bd9Sstevel@tonic-gate if (rp->r_flags & RHASHED) { 41377c478bd9Sstevel@tonic-gate vp = RTOV(rp); 41387c478bd9Sstevel@tonic-gate rw_enter(&rp->r_hashq->r_lock, RW_WRITER); 41397c478bd9Sstevel@tonic-gate mutex_enter(&vp->v_lock); 41407c478bd9Sstevel@tonic-gate if (vp->v_count > 1) { 4141ade42b55SSebastien Roy VN_RELE_LOCKED(vp); 41427c478bd9Sstevel@tonic-gate mutex_exit(&vp->v_lock); 41437c478bd9Sstevel@tonic-gate rw_exit(&rp->r_hashq->r_lock); 41447c478bd9Sstevel@tonic-gate mutex_enter(&rpfreelist_lock); 41457c478bd9Sstevel@tonic-gate continue; 41467c478bd9Sstevel@tonic-gate } 41477c478bd9Sstevel@tonic-gate mutex_exit(&vp->v_lock); 41487c478bd9Sstevel@tonic-gate rp_rmhash_locked(rp); 41497c478bd9Sstevel@tonic-gate rw_exit(&rp->r_hashq->r_lock); 41507c478bd9Sstevel@tonic-gate } 41517c478bd9Sstevel@tonic-gate /* 41527c478bd9Sstevel@tonic-gate * This call to rp_addfree will end up destroying the 41537c478bd9Sstevel@tonic-gate * rnode, but in a safe way with the appropriate set 41547c478bd9Sstevel@tonic-gate * of checks done. 41557c478bd9Sstevel@tonic-gate */ 41567c478bd9Sstevel@tonic-gate rp_addfree(rp, CRED()); 41577c478bd9Sstevel@tonic-gate mutex_enter(&rpfreelist_lock); 41587c478bd9Sstevel@tonic-gate } 41597c478bd9Sstevel@tonic-gate mutex_exit(&rpfreelist_lock); 41607c478bd9Sstevel@tonic-gate return (freed); 41617c478bd9Sstevel@tonic-gate } 41627c478bd9Sstevel@tonic-gate 41637c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 41647c478bd9Sstevel@tonic-gate static void 41657c478bd9Sstevel@tonic-gate nfs_reclaim(void *cdrarg) 41667c478bd9Sstevel@tonic-gate { 41677c478bd9Sstevel@tonic-gate 41687c478bd9Sstevel@tonic-gate #ifdef DEBUG 41697c478bd9Sstevel@tonic-gate clstat_debug.reclaim.value.ui64++; 41707c478bd9Sstevel@tonic-gate #endif 41717c478bd9Sstevel@tonic-gate if (nfs_free_reclaim()) 41727c478bd9Sstevel@tonic-gate return; 41737c478bd9Sstevel@tonic-gate 41747c478bd9Sstevel@tonic-gate if (nfs_active_reclaim()) 41757c478bd9Sstevel@tonic-gate return; 41767c478bd9Sstevel@tonic-gate 41777c478bd9Sstevel@tonic-gate (void) nfs_rnode_reclaim(); 41787c478bd9Sstevel@tonic-gate } 41797c478bd9Sstevel@tonic-gate 41807c478bd9Sstevel@tonic-gate /* 41817c478bd9Sstevel@tonic-gate * NFS client failover support 41827c478bd9Sstevel@tonic-gate * 41837c478bd9Sstevel@tonic-gate * Routines to copy filehandles 41847c478bd9Sstevel@tonic-gate */ 41857c478bd9Sstevel@tonic-gate void 41867c478bd9Sstevel@tonic-gate nfscopyfh(caddr_t fhp, vnode_t *vp) 41877c478bd9Sstevel@tonic-gate { 41887c478bd9Sstevel@tonic-gate fhandle_t *dest = (fhandle_t *)fhp; 41897c478bd9Sstevel@tonic-gate 41907c478bd9Sstevel@tonic-gate if (dest != NULL) 41917c478bd9Sstevel@tonic-gate *dest = *VTOFH(vp); 41927c478bd9Sstevel@tonic-gate } 41937c478bd9Sstevel@tonic-gate 41947c478bd9Sstevel@tonic-gate void 41957c478bd9Sstevel@tonic-gate nfs3copyfh(caddr_t fhp, vnode_t *vp) 41967c478bd9Sstevel@tonic-gate { 41977c478bd9Sstevel@tonic-gate nfs_fh3 *dest = (nfs_fh3 *)fhp; 41987c478bd9Sstevel@tonic-gate 41997c478bd9Sstevel@tonic-gate if (dest != NULL) 42007c478bd9Sstevel@tonic-gate *dest = *VTOFH3(vp); 42017c478bd9Sstevel@tonic-gate } 42027c478bd9Sstevel@tonic-gate 42037c478bd9Sstevel@tonic-gate /* 42047c478bd9Sstevel@tonic-gate * NFS client failover support 42057c478bd9Sstevel@tonic-gate * 42067c478bd9Sstevel@tonic-gate * failover_safe() will test various conditions to ensure that 42077c478bd9Sstevel@tonic-gate * failover is permitted for this vnode. It will be denied 42087c478bd9Sstevel@tonic-gate * if: 42097c478bd9Sstevel@tonic-gate * 1) the operation in progress does not support failover (NULL fi) 42107c478bd9Sstevel@tonic-gate * 2) there are no available replicas (NULL mi_servers->sv_next) 42117c478bd9Sstevel@tonic-gate * 3) any locks are outstanding on this file 42127c478bd9Sstevel@tonic-gate */ 42137c478bd9Sstevel@tonic-gate static int 42147c478bd9Sstevel@tonic-gate failover_safe(failinfo_t *fi) 42157c478bd9Sstevel@tonic-gate { 42167c478bd9Sstevel@tonic-gate 42177c478bd9Sstevel@tonic-gate /* 42187c478bd9Sstevel@tonic-gate * Does this op permit failover? 42197c478bd9Sstevel@tonic-gate */ 42207c478bd9Sstevel@tonic-gate if (fi == NULL || fi->vp == NULL) 42217c478bd9Sstevel@tonic-gate return (0); 42227c478bd9Sstevel@tonic-gate 42237c478bd9Sstevel@tonic-gate /* 42247c478bd9Sstevel@tonic-gate * Are there any alternates to failover to? 42257c478bd9Sstevel@tonic-gate */ 42267c478bd9Sstevel@tonic-gate if (VTOMI(fi->vp)->mi_servers->sv_next == NULL) 42277c478bd9Sstevel@tonic-gate return (0); 42287c478bd9Sstevel@tonic-gate 42297c478bd9Sstevel@tonic-gate /* 42307c478bd9Sstevel@tonic-gate * Disable check; we've forced local locking 42317c478bd9Sstevel@tonic-gate * 42327c478bd9Sstevel@tonic-gate * if (flk_has_remote_locks(fi->vp)) 42337c478bd9Sstevel@tonic-gate * return (0); 42347c478bd9Sstevel@tonic-gate */ 42357c478bd9Sstevel@tonic-gate 42367c478bd9Sstevel@tonic-gate /* 42377c478bd9Sstevel@tonic-gate * If we have no partial path, we can't do anything 42387c478bd9Sstevel@tonic-gate */ 42397c478bd9Sstevel@tonic-gate if (VTOR(fi->vp)->r_path == NULL) 42407c478bd9Sstevel@tonic-gate return (0); 42417c478bd9Sstevel@tonic-gate 42427c478bd9Sstevel@tonic-gate return (1); 42437c478bd9Sstevel@tonic-gate } 42447c478bd9Sstevel@tonic-gate 42457c478bd9Sstevel@tonic-gate #include <sys/thread.h> 42467c478bd9Sstevel@tonic-gate 42477c478bd9Sstevel@tonic-gate /* 42487c478bd9Sstevel@tonic-gate * NFS client failover support 42497c478bd9Sstevel@tonic-gate * 42507c478bd9Sstevel@tonic-gate * failover_newserver() will start a search for a new server, 42517c478bd9Sstevel@tonic-gate * preferably by starting an async thread to do the work. If 42527c478bd9Sstevel@tonic-gate * someone is already doing this (recognizable by MI_BINDINPROG 42537c478bd9Sstevel@tonic-gate * being set), it will simply return and the calling thread 42547c478bd9Sstevel@tonic-gate * will queue on the mi_failover_cv condition variable. 42557c478bd9Sstevel@tonic-gate */ 42567c478bd9Sstevel@tonic-gate static void 42577c478bd9Sstevel@tonic-gate failover_newserver(mntinfo_t *mi) 42587c478bd9Sstevel@tonic-gate { 42597c478bd9Sstevel@tonic-gate /* 42607c478bd9Sstevel@tonic-gate * Check if someone else is doing this already 42617c478bd9Sstevel@tonic-gate */ 42627c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 42637c478bd9Sstevel@tonic-gate if (mi->mi_flags & MI_BINDINPROG) { 42647c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 42657c478bd9Sstevel@tonic-gate return; 42667c478bd9Sstevel@tonic-gate } 42677c478bd9Sstevel@tonic-gate mi->mi_flags |= MI_BINDINPROG; 42687c478bd9Sstevel@tonic-gate 42697c478bd9Sstevel@tonic-gate /* 42707c478bd9Sstevel@tonic-gate * Need to hold the vfs struct so that it can't be released 42717c478bd9Sstevel@tonic-gate * while the failover thread is selecting a new server. 42727c478bd9Sstevel@tonic-gate */ 42737c478bd9Sstevel@tonic-gate VFS_HOLD(mi->mi_vfsp); 42747c478bd9Sstevel@tonic-gate 42757c478bd9Sstevel@tonic-gate /* 42767c478bd9Sstevel@tonic-gate * Start a thread to do the real searching. 42777c478bd9Sstevel@tonic-gate */ 42787c478bd9Sstevel@tonic-gate (void) zthread_create(NULL, 0, failover_thread, mi, 0, minclsyspri); 42797c478bd9Sstevel@tonic-gate 42807c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 42817c478bd9Sstevel@tonic-gate } 42827c478bd9Sstevel@tonic-gate 42837c478bd9Sstevel@tonic-gate /* 42847c478bd9Sstevel@tonic-gate * NFS client failover support 42857c478bd9Sstevel@tonic-gate * 42867c478bd9Sstevel@tonic-gate * failover_thread() will find a new server to replace the one 42877c478bd9Sstevel@tonic-gate * currently in use, wake up other threads waiting on this mount 42887c478bd9Sstevel@tonic-gate * point, and die. It will start at the head of the server list 42897c478bd9Sstevel@tonic-gate * and poll servers until it finds one with an NFS server which is 42907c478bd9Sstevel@tonic-gate * registered and responds to a NULL procedure ping. 42917c478bd9Sstevel@tonic-gate * 42927c478bd9Sstevel@tonic-gate * XXX failover_thread is unsafe within the scope of the 42937c478bd9Sstevel@tonic-gate * present model defined for cpr to suspend the system. 42947c478bd9Sstevel@tonic-gate * Specifically, over-the-wire calls made by the thread 42957c478bd9Sstevel@tonic-gate * are unsafe. The thread needs to be reevaluated in case of 42967c478bd9Sstevel@tonic-gate * future updates to the cpr suspend model. 42977c478bd9Sstevel@tonic-gate */ 42987c478bd9Sstevel@tonic-gate static void 42997c478bd9Sstevel@tonic-gate failover_thread(mntinfo_t *mi) 43007c478bd9Sstevel@tonic-gate { 43017c478bd9Sstevel@tonic-gate servinfo_t *svp = NULL; 43027c478bd9Sstevel@tonic-gate CLIENT *cl; 43037c478bd9Sstevel@tonic-gate enum clnt_stat status; 43047c478bd9Sstevel@tonic-gate struct timeval tv; 43057c478bd9Sstevel@tonic-gate int error; 43067c478bd9Sstevel@tonic-gate int oncethru = 0; 43077c478bd9Sstevel@tonic-gate callb_cpr_t cprinfo; 43087c478bd9Sstevel@tonic-gate rnode_t *rp; 43097c478bd9Sstevel@tonic-gate int index; 43107c478bd9Sstevel@tonic-gate char *srvnames; 43117c478bd9Sstevel@tonic-gate size_t srvnames_len; 43127c478bd9Sstevel@tonic-gate struct nfs_clnt *nfscl = NULL; 43137c478bd9Sstevel@tonic-gate zoneid_t zoneid = getzoneid(); 43147c478bd9Sstevel@tonic-gate 43157c478bd9Sstevel@tonic-gate #ifdef DEBUG 43167c478bd9Sstevel@tonic-gate /* 43177c478bd9Sstevel@tonic-gate * This is currently only needed to access counters which exist on 43187c478bd9Sstevel@tonic-gate * DEBUG kernels, hence we don't want to pay the penalty of the lookup 43197c478bd9Sstevel@tonic-gate * on non-DEBUG kernels. 43207c478bd9Sstevel@tonic-gate */ 4321108322fbScarlsonj nfscl = zone_getspecific(nfsclnt_zone_key, nfs_zone()); 43227c478bd9Sstevel@tonic-gate ASSERT(nfscl != NULL); 43237c478bd9Sstevel@tonic-gate #endif 43247c478bd9Sstevel@tonic-gate 43257c478bd9Sstevel@tonic-gate /* 43267c478bd9Sstevel@tonic-gate * Its safe to piggyback on the mi_lock since failover_newserver() 43277c478bd9Sstevel@tonic-gate * code guarantees that there will be only one failover thread 43287c478bd9Sstevel@tonic-gate * per mountinfo at any instance. 43297c478bd9Sstevel@tonic-gate */ 43307c478bd9Sstevel@tonic-gate CALLB_CPR_INIT(&cprinfo, &mi->mi_lock, callb_generic_cpr, 43317c478bd9Sstevel@tonic-gate "failover_thread"); 43327c478bd9Sstevel@tonic-gate 43337c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 43347c478bd9Sstevel@tonic-gate while (mi->mi_readers) { 43357c478bd9Sstevel@tonic-gate CALLB_CPR_SAFE_BEGIN(&cprinfo); 43367c478bd9Sstevel@tonic-gate cv_wait(&mi->mi_failover_cv, &mi->mi_lock); 43377c478bd9Sstevel@tonic-gate CALLB_CPR_SAFE_END(&cprinfo, &mi->mi_lock); 43387c478bd9Sstevel@tonic-gate } 43397c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 43407c478bd9Sstevel@tonic-gate 43417c478bd9Sstevel@tonic-gate tv.tv_sec = 2; 43427c478bd9Sstevel@tonic-gate tv.tv_usec = 0; 43437c478bd9Sstevel@tonic-gate 43447c478bd9Sstevel@tonic-gate /* 43457c478bd9Sstevel@tonic-gate * Ping the null NFS procedure of every server in 43467c478bd9Sstevel@tonic-gate * the list until one responds. We always start 43477c478bd9Sstevel@tonic-gate * at the head of the list and always skip the one 43487c478bd9Sstevel@tonic-gate * that is current, since it's caused us a problem. 43497c478bd9Sstevel@tonic-gate */ 43507c478bd9Sstevel@tonic-gate while (svp == NULL) { 43517c478bd9Sstevel@tonic-gate for (svp = mi->mi_servers; svp; svp = svp->sv_next) { 43527c478bd9Sstevel@tonic-gate if (!oncethru && svp == mi->mi_curr_serv) 43537c478bd9Sstevel@tonic-gate continue; 43547c478bd9Sstevel@tonic-gate 43557c478bd9Sstevel@tonic-gate /* 43567c478bd9Sstevel@tonic-gate * If the file system was forcibly umounted 43577c478bd9Sstevel@tonic-gate * while trying to do a failover, then just 43587c478bd9Sstevel@tonic-gate * give up on the failover. It won't matter 43597c478bd9Sstevel@tonic-gate * what the server is. 43607c478bd9Sstevel@tonic-gate */ 43617c478bd9Sstevel@tonic-gate if (FS_OR_ZONE_GONE(mi->mi_vfsp)) { 43627c478bd9Sstevel@tonic-gate svp = NULL; 43637c478bd9Sstevel@tonic-gate goto done; 43647c478bd9Sstevel@tonic-gate } 43657c478bd9Sstevel@tonic-gate 43667c478bd9Sstevel@tonic-gate error = clnt_tli_kcreate(svp->sv_knconf, &svp->sv_addr, 43677c478bd9Sstevel@tonic-gate NFS_PROGRAM, NFS_VERSION, 0, 1, CRED(), &cl); 43687c478bd9Sstevel@tonic-gate if (error) 43697c478bd9Sstevel@tonic-gate continue; 43707c478bd9Sstevel@tonic-gate 43717c478bd9Sstevel@tonic-gate if (!(mi->mi_flags & MI_INT)) 43727c478bd9Sstevel@tonic-gate cl->cl_nosignal = TRUE; 43737c478bd9Sstevel@tonic-gate status = CLNT_CALL(cl, RFS_NULL, xdr_void, NULL, 43747c478bd9Sstevel@tonic-gate xdr_void, NULL, tv); 43757c478bd9Sstevel@tonic-gate if (!(mi->mi_flags & MI_INT)) 43767c478bd9Sstevel@tonic-gate cl->cl_nosignal = FALSE; 43777c478bd9Sstevel@tonic-gate AUTH_DESTROY(cl->cl_auth); 43787c478bd9Sstevel@tonic-gate CLNT_DESTROY(cl); 43797c478bd9Sstevel@tonic-gate if (status == RPC_SUCCESS) { 43807c478bd9Sstevel@tonic-gate if (svp == mi->mi_curr_serv) { 43817c478bd9Sstevel@tonic-gate #ifdef DEBUG 43827c478bd9Sstevel@tonic-gate zcmn_err(zoneid, CE_NOTE, 43837c478bd9Sstevel@tonic-gate "NFS%d: failing over: selecting original server %s", 43847c478bd9Sstevel@tonic-gate mi->mi_vers, svp->sv_hostname); 43857c478bd9Sstevel@tonic-gate #else 43867c478bd9Sstevel@tonic-gate zcmn_err(zoneid, CE_NOTE, 43877c478bd9Sstevel@tonic-gate "NFS: failing over: selecting original server %s", 43887c478bd9Sstevel@tonic-gate svp->sv_hostname); 43897c478bd9Sstevel@tonic-gate #endif 43907c478bd9Sstevel@tonic-gate } else { 43917c478bd9Sstevel@tonic-gate #ifdef DEBUG 43927c478bd9Sstevel@tonic-gate zcmn_err(zoneid, CE_NOTE, 43937c478bd9Sstevel@tonic-gate "NFS%d: failing over from %s to %s", 43947c478bd9Sstevel@tonic-gate mi->mi_vers, 43957c478bd9Sstevel@tonic-gate mi->mi_curr_serv->sv_hostname, 43967c478bd9Sstevel@tonic-gate svp->sv_hostname); 43977c478bd9Sstevel@tonic-gate #else 43987c478bd9Sstevel@tonic-gate zcmn_err(zoneid, CE_NOTE, 43997c478bd9Sstevel@tonic-gate "NFS: failing over from %s to %s", 44007c478bd9Sstevel@tonic-gate mi->mi_curr_serv->sv_hostname, 44017c478bd9Sstevel@tonic-gate svp->sv_hostname); 44027c478bd9Sstevel@tonic-gate #endif 44037c478bd9Sstevel@tonic-gate } 44047c478bd9Sstevel@tonic-gate break; 44057c478bd9Sstevel@tonic-gate } 44067c478bd9Sstevel@tonic-gate } 44077c478bd9Sstevel@tonic-gate 44087c478bd9Sstevel@tonic-gate if (svp == NULL) { 44097c478bd9Sstevel@tonic-gate if (!oncethru) { 44107c478bd9Sstevel@tonic-gate srvnames = nfs_getsrvnames(mi, &srvnames_len); 44117c478bd9Sstevel@tonic-gate #ifdef DEBUG 44127c478bd9Sstevel@tonic-gate zprintf(zoneid, 44137c478bd9Sstevel@tonic-gate "NFS%d servers %s not responding " 44147c478bd9Sstevel@tonic-gate "still trying\n", mi->mi_vers, srvnames); 44157c478bd9Sstevel@tonic-gate #else 44167c478bd9Sstevel@tonic-gate zprintf(zoneid, "NFS servers %s not responding " 44177c478bd9Sstevel@tonic-gate "still trying\n", srvnames); 44187c478bd9Sstevel@tonic-gate #endif 44197c478bd9Sstevel@tonic-gate oncethru = 1; 44207c478bd9Sstevel@tonic-gate } 44217c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 44227c478bd9Sstevel@tonic-gate CALLB_CPR_SAFE_BEGIN(&cprinfo); 44237c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 44247c478bd9Sstevel@tonic-gate delay(hz); 44257c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 44267c478bd9Sstevel@tonic-gate CALLB_CPR_SAFE_END(&cprinfo, &mi->mi_lock); 44277c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 44287c478bd9Sstevel@tonic-gate } 44297c478bd9Sstevel@tonic-gate } 44307c478bd9Sstevel@tonic-gate 44317c478bd9Sstevel@tonic-gate if (oncethru) { 44327c478bd9Sstevel@tonic-gate #ifdef DEBUG 44337c478bd9Sstevel@tonic-gate zprintf(zoneid, "NFS%d servers %s ok\n", mi->mi_vers, srvnames); 44347c478bd9Sstevel@tonic-gate #else 44357c478bd9Sstevel@tonic-gate zprintf(zoneid, "NFS servers %s ok\n", srvnames); 44367c478bd9Sstevel@tonic-gate #endif 44377c478bd9Sstevel@tonic-gate } 44387c478bd9Sstevel@tonic-gate 44397c478bd9Sstevel@tonic-gate if (svp != mi->mi_curr_serv) { 44407c478bd9Sstevel@tonic-gate (void) dnlc_purge_vfsp(mi->mi_vfsp, 0); 44417c478bd9Sstevel@tonic-gate index = rtablehash(&mi->mi_curr_serv->sv_fhandle); 44427c478bd9Sstevel@tonic-gate rw_enter(&rtable[index].r_lock, RW_WRITER); 44437c478bd9Sstevel@tonic-gate rp = rfind(&rtable[index], &mi->mi_curr_serv->sv_fhandle, 44447c478bd9Sstevel@tonic-gate mi->mi_vfsp); 44457c478bd9Sstevel@tonic-gate if (rp != NULL) { 44467c478bd9Sstevel@tonic-gate if (rp->r_flags & RHASHED) 44477c478bd9Sstevel@tonic-gate rp_rmhash_locked(rp); 44487c478bd9Sstevel@tonic-gate rw_exit(&rtable[index].r_lock); 44497c478bd9Sstevel@tonic-gate rp->r_server = svp; 44507c478bd9Sstevel@tonic-gate rp->r_fh = svp->sv_fhandle; 44517c478bd9Sstevel@tonic-gate (void) nfs_free_data_reclaim(rp); 44527c478bd9Sstevel@tonic-gate index = rtablehash(&rp->r_fh); 44537c478bd9Sstevel@tonic-gate rp->r_hashq = &rtable[index]; 44547c478bd9Sstevel@tonic-gate rw_enter(&rp->r_hashq->r_lock, RW_WRITER); 44557c478bd9Sstevel@tonic-gate vn_exists(RTOV(rp)); 44567c478bd9Sstevel@tonic-gate rp_addhash(rp); 44577c478bd9Sstevel@tonic-gate rw_exit(&rp->r_hashq->r_lock); 44587c478bd9Sstevel@tonic-gate VN_RELE(RTOV(rp)); 44597c478bd9Sstevel@tonic-gate } else 44607c478bd9Sstevel@tonic-gate rw_exit(&rtable[index].r_lock); 44617c478bd9Sstevel@tonic-gate } 44627c478bd9Sstevel@tonic-gate 44637c478bd9Sstevel@tonic-gate done: 44647c478bd9Sstevel@tonic-gate if (oncethru) 44657c478bd9Sstevel@tonic-gate kmem_free(srvnames, srvnames_len); 44667c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 44677c478bd9Sstevel@tonic-gate mi->mi_flags &= ~MI_BINDINPROG; 44687c478bd9Sstevel@tonic-gate if (svp != NULL) { 44697c478bd9Sstevel@tonic-gate mi->mi_curr_serv = svp; 44707c478bd9Sstevel@tonic-gate mi->mi_failover++; 44717c478bd9Sstevel@tonic-gate #ifdef DEBUG 44727c478bd9Sstevel@tonic-gate nfscl->nfscl_stat.failover.value.ui64++; 44737c478bd9Sstevel@tonic-gate #endif 44747c478bd9Sstevel@tonic-gate } 44757c478bd9Sstevel@tonic-gate cv_broadcast(&mi->mi_failover_cv); 44767c478bd9Sstevel@tonic-gate CALLB_CPR_EXIT(&cprinfo); 44777c478bd9Sstevel@tonic-gate VFS_RELE(mi->mi_vfsp); 44787c478bd9Sstevel@tonic-gate zthread_exit(); 44797c478bd9Sstevel@tonic-gate /* NOTREACHED */ 44807c478bd9Sstevel@tonic-gate } 44817c478bd9Sstevel@tonic-gate 44827c478bd9Sstevel@tonic-gate /* 44837c478bd9Sstevel@tonic-gate * NFS client failover support 44847c478bd9Sstevel@tonic-gate * 44857c478bd9Sstevel@tonic-gate * failover_wait() will put the thread to sleep until MI_BINDINPROG 44867c478bd9Sstevel@tonic-gate * is cleared, meaning that failover is complete. Called with 44877c478bd9Sstevel@tonic-gate * mi_lock mutex held. 44887c478bd9Sstevel@tonic-gate */ 44897c478bd9Sstevel@tonic-gate static int 44907c478bd9Sstevel@tonic-gate failover_wait(mntinfo_t *mi) 44917c478bd9Sstevel@tonic-gate { 44927c478bd9Sstevel@tonic-gate k_sigset_t smask; 44937c478bd9Sstevel@tonic-gate 44947c478bd9Sstevel@tonic-gate /* 44957c478bd9Sstevel@tonic-gate * If someone else is hunting for a living server, 44967c478bd9Sstevel@tonic-gate * sleep until it's done. After our sleep, we may 44977c478bd9Sstevel@tonic-gate * be bound to the right server and get off cheaply. 44987c478bd9Sstevel@tonic-gate */ 44997c478bd9Sstevel@tonic-gate while (mi->mi_flags & MI_BINDINPROG) { 45007c478bd9Sstevel@tonic-gate /* 45017c478bd9Sstevel@tonic-gate * Mask out all signals except SIGHUP, SIGINT, SIGQUIT 45027c478bd9Sstevel@tonic-gate * and SIGTERM. (Preserving the existing masks). 45037c478bd9Sstevel@tonic-gate * Mask out SIGINT if mount option nointr is specified. 45047c478bd9Sstevel@tonic-gate */ 45057c478bd9Sstevel@tonic-gate sigintr(&smask, (int)mi->mi_flags & MI_INT); 45067c478bd9Sstevel@tonic-gate if (!cv_wait_sig(&mi->mi_failover_cv, &mi->mi_lock)) { 45077c478bd9Sstevel@tonic-gate /* 45087c478bd9Sstevel@tonic-gate * restore original signal mask 45097c478bd9Sstevel@tonic-gate */ 45107c478bd9Sstevel@tonic-gate sigunintr(&smask); 45117c478bd9Sstevel@tonic-gate return (EINTR); 45127c478bd9Sstevel@tonic-gate } 45137c478bd9Sstevel@tonic-gate /* 45147c478bd9Sstevel@tonic-gate * restore original signal mask 45157c478bd9Sstevel@tonic-gate */ 45167c478bd9Sstevel@tonic-gate sigunintr(&smask); 45177c478bd9Sstevel@tonic-gate } 45187c478bd9Sstevel@tonic-gate return (0); 45197c478bd9Sstevel@tonic-gate } 45207c478bd9Sstevel@tonic-gate 45217c478bd9Sstevel@tonic-gate /* 45227c478bd9Sstevel@tonic-gate * NFS client failover support 45237c478bd9Sstevel@tonic-gate * 45247c478bd9Sstevel@tonic-gate * failover_remap() will do a partial pathname lookup and find the 45257c478bd9Sstevel@tonic-gate * desired vnode on the current server. The interim vnode will be 45267c478bd9Sstevel@tonic-gate * discarded after we pilfer the new filehandle. 45277c478bd9Sstevel@tonic-gate * 45287c478bd9Sstevel@tonic-gate * Side effects: 45297c478bd9Sstevel@tonic-gate * - This routine will also update the filehandle in the args structure 45307c478bd9Sstevel@tonic-gate * pointed to by the fi->fhp pointer if it is non-NULL. 45317c478bd9Sstevel@tonic-gate */ 45327c478bd9Sstevel@tonic-gate 45337c478bd9Sstevel@tonic-gate static int 45347c478bd9Sstevel@tonic-gate failover_remap(failinfo_t *fi) 45357c478bd9Sstevel@tonic-gate { 45367c478bd9Sstevel@tonic-gate vnode_t *vp, *nvp, *rootvp; 45377c478bd9Sstevel@tonic-gate rnode_t *rp, *nrp; 45387c478bd9Sstevel@tonic-gate mntinfo_t *mi; 45397c478bd9Sstevel@tonic-gate int error; 45407c478bd9Sstevel@tonic-gate #ifdef DEBUG 45417c478bd9Sstevel@tonic-gate struct nfs_clnt *nfscl; 45427c478bd9Sstevel@tonic-gate 4543108322fbScarlsonj nfscl = zone_getspecific(nfsclnt_zone_key, nfs_zone()); 45447c478bd9Sstevel@tonic-gate ASSERT(nfscl != NULL); 45457c478bd9Sstevel@tonic-gate #endif 45467c478bd9Sstevel@tonic-gate /* 45477c478bd9Sstevel@tonic-gate * Sanity check 45487c478bd9Sstevel@tonic-gate */ 45497c478bd9Sstevel@tonic-gate if (fi == NULL || fi->vp == NULL || fi->lookupproc == NULL) 45507c478bd9Sstevel@tonic-gate return (EINVAL); 45517c478bd9Sstevel@tonic-gate vp = fi->vp; 45527c478bd9Sstevel@tonic-gate rp = VTOR(vp); 45537c478bd9Sstevel@tonic-gate mi = VTOMI(vp); 45547c478bd9Sstevel@tonic-gate 45557c478bd9Sstevel@tonic-gate if (!(vp->v_flag & VROOT)) { 45567c478bd9Sstevel@tonic-gate /* 45577c478bd9Sstevel@tonic-gate * Given the root fh, use the path stored in 45587c478bd9Sstevel@tonic-gate * the rnode to find the fh for the new server. 45597c478bd9Sstevel@tonic-gate */ 45607c478bd9Sstevel@tonic-gate error = VFS_ROOT(mi->mi_vfsp, &rootvp); 45617c478bd9Sstevel@tonic-gate if (error) 45627c478bd9Sstevel@tonic-gate return (error); 45637c478bd9Sstevel@tonic-gate 45647c478bd9Sstevel@tonic-gate error = failover_lookup(rp->r_path, rootvp, 45657c478bd9Sstevel@tonic-gate fi->lookupproc, fi->xattrdirproc, &nvp); 45667c478bd9Sstevel@tonic-gate 45677c478bd9Sstevel@tonic-gate VN_RELE(rootvp); 45687c478bd9Sstevel@tonic-gate 45697c478bd9Sstevel@tonic-gate if (error) 45707c478bd9Sstevel@tonic-gate return (error); 45717c478bd9Sstevel@tonic-gate 45727c478bd9Sstevel@tonic-gate /* 45737c478bd9Sstevel@tonic-gate * If we found the same rnode, we're done now 45747c478bd9Sstevel@tonic-gate */ 45757c478bd9Sstevel@tonic-gate if (nvp == vp) { 45767c478bd9Sstevel@tonic-gate /* 45777c478bd9Sstevel@tonic-gate * Failed and the new server may physically be same 45787c478bd9Sstevel@tonic-gate * OR may share a same disk subsystem. In this case 45797c478bd9Sstevel@tonic-gate * file handle for a particular file path is not going 45807c478bd9Sstevel@tonic-gate * to change, given the same filehandle lookup will 45817c478bd9Sstevel@tonic-gate * always locate the same rnode as the existing one. 45827c478bd9Sstevel@tonic-gate * All we might need to do is to update the r_server 45837c478bd9Sstevel@tonic-gate * with the current servinfo. 45847c478bd9Sstevel@tonic-gate */ 45857c478bd9Sstevel@tonic-gate if (!VALID_FH(fi)) { 45867c478bd9Sstevel@tonic-gate rp->r_server = mi->mi_curr_serv; 45877c478bd9Sstevel@tonic-gate } 45887c478bd9Sstevel@tonic-gate VN_RELE(nvp); 45897c478bd9Sstevel@tonic-gate return (0); 45907c478bd9Sstevel@tonic-gate } 45917c478bd9Sstevel@tonic-gate 45927c478bd9Sstevel@tonic-gate /* 45937c478bd9Sstevel@tonic-gate * Try to make it so that no one else will find this 45947c478bd9Sstevel@tonic-gate * vnode because it is just a temporary to hold the 45957c478bd9Sstevel@tonic-gate * new file handle until that file handle can be 45967c478bd9Sstevel@tonic-gate * copied to the original vnode/rnode. 45977c478bd9Sstevel@tonic-gate */ 45987c478bd9Sstevel@tonic-gate nrp = VTOR(nvp); 4599e8dc3b7dSvv mutex_enter(&mi->mi_remap_lock); 4600e8dc3b7dSvv /* 4601e8dc3b7dSvv * Some other thread could have raced in here and could 4602e8dc3b7dSvv * have done the remap for this particular rnode before 4603e8dc3b7dSvv * this thread here. Check for rp->r_server and 4604e8dc3b7dSvv * mi->mi_curr_serv and return if they are same. 4605e8dc3b7dSvv */ 4606e8dc3b7dSvv if (VALID_FH(fi)) { 4607e8dc3b7dSvv mutex_exit(&mi->mi_remap_lock); 4608e8dc3b7dSvv VN_RELE(nvp); 4609e8dc3b7dSvv return (0); 4610e8dc3b7dSvv } 4611e8dc3b7dSvv 46127c478bd9Sstevel@tonic-gate if (nrp->r_flags & RHASHED) 46137c478bd9Sstevel@tonic-gate rp_rmhash(nrp); 46147c478bd9Sstevel@tonic-gate 46157c478bd9Sstevel@tonic-gate /* 46167c478bd9Sstevel@tonic-gate * As a heuristic check on the validity of the new 46177c478bd9Sstevel@tonic-gate * file, check that the size and type match against 46187c478bd9Sstevel@tonic-gate * that we remember from the old version. 46197c478bd9Sstevel@tonic-gate */ 46207c478bd9Sstevel@tonic-gate if (rp->r_size != nrp->r_size || vp->v_type != nvp->v_type) { 4621e8dc3b7dSvv mutex_exit(&mi->mi_remap_lock); 46227c478bd9Sstevel@tonic-gate zcmn_err(mi->mi_zone->zone_id, CE_WARN, 46237c478bd9Sstevel@tonic-gate "NFS replicas %s and %s: file %s not same.", 46247c478bd9Sstevel@tonic-gate rp->r_server->sv_hostname, 46257c478bd9Sstevel@tonic-gate nrp->r_server->sv_hostname, rp->r_path); 46267c478bd9Sstevel@tonic-gate VN_RELE(nvp); 46277c478bd9Sstevel@tonic-gate return (EINVAL); 46287c478bd9Sstevel@tonic-gate } 46297c478bd9Sstevel@tonic-gate 46307c478bd9Sstevel@tonic-gate /* 46317c478bd9Sstevel@tonic-gate * snarf the filehandle from the new rnode 46327c478bd9Sstevel@tonic-gate * then release it, again while updating the 46337c478bd9Sstevel@tonic-gate * hash queues for the rnode. 46347c478bd9Sstevel@tonic-gate */ 46357c478bd9Sstevel@tonic-gate if (rp->r_flags & RHASHED) 46367c478bd9Sstevel@tonic-gate rp_rmhash(rp); 46377c478bd9Sstevel@tonic-gate rp->r_server = mi->mi_curr_serv; 46387c478bd9Sstevel@tonic-gate rp->r_fh = nrp->r_fh; 4639e8dc3b7dSvv rp->r_hashq = nrp->r_hashq; 46407c478bd9Sstevel@tonic-gate /* 46417c478bd9Sstevel@tonic-gate * Copy the attributes from the new rnode to the old 46427c478bd9Sstevel@tonic-gate * rnode. This will help to reduce unnecessary page 46437c478bd9Sstevel@tonic-gate * cache flushes. 46447c478bd9Sstevel@tonic-gate */ 46457c478bd9Sstevel@tonic-gate rp->r_attr = nrp->r_attr; 46467c478bd9Sstevel@tonic-gate rp->r_attrtime = nrp->r_attrtime; 46477c478bd9Sstevel@tonic-gate rp->r_mtime = nrp->r_mtime; 46487c478bd9Sstevel@tonic-gate (void) nfs_free_data_reclaim(rp); 46497c478bd9Sstevel@tonic-gate nfs_setswaplike(vp, &rp->r_attr); 46507c478bd9Sstevel@tonic-gate rw_enter(&rp->r_hashq->r_lock, RW_WRITER); 46517c478bd9Sstevel@tonic-gate rp_addhash(rp); 46527c478bd9Sstevel@tonic-gate rw_exit(&rp->r_hashq->r_lock); 4653e8dc3b7dSvv mutex_exit(&mi->mi_remap_lock); 46547c478bd9Sstevel@tonic-gate VN_RELE(nvp); 46557c478bd9Sstevel@tonic-gate } 46567c478bd9Sstevel@tonic-gate 46577c478bd9Sstevel@tonic-gate /* 46587c478bd9Sstevel@tonic-gate * Update successful failover remap count 46597c478bd9Sstevel@tonic-gate */ 46607c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock); 46617c478bd9Sstevel@tonic-gate mi->mi_remap++; 46627c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock); 46637c478bd9Sstevel@tonic-gate #ifdef DEBUG 46647c478bd9Sstevel@tonic-gate nfscl->nfscl_stat.remap.value.ui64++; 46657c478bd9Sstevel@tonic-gate #endif 46667c478bd9Sstevel@tonic-gate 46677c478bd9Sstevel@tonic-gate /* 46687c478bd9Sstevel@tonic-gate * If we have a copied filehandle to update, do it now. 46697c478bd9Sstevel@tonic-gate */ 46707c478bd9Sstevel@tonic-gate if (fi->fhp != NULL && fi->copyproc != NULL) 46717c478bd9Sstevel@tonic-gate (*fi->copyproc)(fi->fhp, vp); 46727c478bd9Sstevel@tonic-gate 46737c478bd9Sstevel@tonic-gate return (0); 46747c478bd9Sstevel@tonic-gate } 46757c478bd9Sstevel@tonic-gate 46767c478bd9Sstevel@tonic-gate /* 46777c478bd9Sstevel@tonic-gate * NFS client failover support 46787c478bd9Sstevel@tonic-gate * 46797c478bd9Sstevel@tonic-gate * We want a simple pathname lookup routine to parse the pieces 46807c478bd9Sstevel@tonic-gate * of path in rp->r_path. We know that the path was a created 46817c478bd9Sstevel@tonic-gate * as rnodes were made, so we know we have only to deal with 46827c478bd9Sstevel@tonic-gate * paths that look like: 46837c478bd9Sstevel@tonic-gate * dir1/dir2/dir3/file 46847c478bd9Sstevel@tonic-gate * Any evidence of anything like .., symlinks, and ENOTDIR 46857c478bd9Sstevel@tonic-gate * are hard errors, because they mean something in this filesystem 46867c478bd9Sstevel@tonic-gate * is different from the one we came from, or has changed under 46877c478bd9Sstevel@tonic-gate * us in some way. If this is true, we want the failure. 46887c478bd9Sstevel@tonic-gate * 46897c478bd9Sstevel@tonic-gate * Extended attributes: if the filesystem is mounted with extended 46907c478bd9Sstevel@tonic-gate * attributes enabled (-o xattr), the attribute directory will be 46917c478bd9Sstevel@tonic-gate * represented in the r_path as the magic name XATTR_RPATH. So if 46927c478bd9Sstevel@tonic-gate * we see that name in the pathname, is must be because this node 46937c478bd9Sstevel@tonic-gate * is an extended attribute. Therefore, look it up that way. 46947c478bd9Sstevel@tonic-gate */ 46957c478bd9Sstevel@tonic-gate static int 46967c478bd9Sstevel@tonic-gate failover_lookup(char *path, vnode_t *root, 46977c478bd9Sstevel@tonic-gate int (*lookupproc)(vnode_t *, char *, vnode_t **, struct pathname *, int, 4698d1054fdaSMarcel Telka vnode_t *, cred_t *, int), 46997c478bd9Sstevel@tonic-gate int (*xattrdirproc)(vnode_t *, vnode_t **, bool_t, cred_t *, int), 47007c478bd9Sstevel@tonic-gate vnode_t **new) 47017c478bd9Sstevel@tonic-gate { 47027c478bd9Sstevel@tonic-gate vnode_t *dvp, *nvp; 47037c478bd9Sstevel@tonic-gate int error = EINVAL; 47047c478bd9Sstevel@tonic-gate char *s, *p, *tmppath; 47057c478bd9Sstevel@tonic-gate size_t len; 47067c478bd9Sstevel@tonic-gate mntinfo_t *mi; 47077c478bd9Sstevel@tonic-gate bool_t xattr; 47087c478bd9Sstevel@tonic-gate 47097c478bd9Sstevel@tonic-gate /* Make local copy of path */ 47107c478bd9Sstevel@tonic-gate len = strlen(path) + 1; 47117c478bd9Sstevel@tonic-gate tmppath = kmem_alloc(len, KM_SLEEP); 47127c478bd9Sstevel@tonic-gate (void) strcpy(tmppath, path); 47137c478bd9Sstevel@tonic-gate s = tmppath; 47147c478bd9Sstevel@tonic-gate 47157c478bd9Sstevel@tonic-gate dvp = root; 47167c478bd9Sstevel@tonic-gate VN_HOLD(dvp); 47177c478bd9Sstevel@tonic-gate mi = VTOMI(root); 47187c478bd9Sstevel@tonic-gate xattr = mi->mi_flags & MI_EXTATTR; 47197c478bd9Sstevel@tonic-gate 47207c478bd9Sstevel@tonic-gate do { 47217c478bd9Sstevel@tonic-gate p = strchr(s, '/'); 47227c478bd9Sstevel@tonic-gate if (p != NULL) 47237c478bd9Sstevel@tonic-gate *p = '\0'; 47247c478bd9Sstevel@tonic-gate if (xattr && strcmp(s, XATTR_RPATH) == 0) { 47257c478bd9Sstevel@tonic-gate error = (*xattrdirproc)(dvp, &nvp, FALSE, CRED(), 47267c478bd9Sstevel@tonic-gate RFSCALL_SOFT); 47277c478bd9Sstevel@tonic-gate } else { 47287c478bd9Sstevel@tonic-gate error = (*lookupproc)(dvp, s, &nvp, NULL, 0, NULL, 47297c478bd9Sstevel@tonic-gate CRED(), RFSCALL_SOFT); 47307c478bd9Sstevel@tonic-gate } 47317c478bd9Sstevel@tonic-gate if (p != NULL) 47327c478bd9Sstevel@tonic-gate *p++ = '/'; 47337c478bd9Sstevel@tonic-gate if (error) { 47347c478bd9Sstevel@tonic-gate VN_RELE(dvp); 47357c478bd9Sstevel@tonic-gate kmem_free(tmppath, len); 47367c478bd9Sstevel@tonic-gate return (error); 47377c478bd9Sstevel@tonic-gate } 47387c478bd9Sstevel@tonic-gate s = p; 47397c478bd9Sstevel@tonic-gate VN_RELE(dvp); 47407c478bd9Sstevel@tonic-gate dvp = nvp; 47417c478bd9Sstevel@tonic-gate } while (p != NULL); 47427c478bd9Sstevel@tonic-gate 47437c478bd9Sstevel@tonic-gate if (nvp != NULL && new != NULL) 47447c478bd9Sstevel@tonic-gate *new = nvp; 47457c478bd9Sstevel@tonic-gate kmem_free(tmppath, len); 47467c478bd9Sstevel@tonic-gate return (0); 47477c478bd9Sstevel@tonic-gate } 47487c478bd9Sstevel@tonic-gate 47497c478bd9Sstevel@tonic-gate /* 47507c478bd9Sstevel@tonic-gate * NFS client failover support 47517c478bd9Sstevel@tonic-gate * 47527c478bd9Sstevel@tonic-gate * sv_free() frees the malloc'd portion of a "servinfo_t". 47537c478bd9Sstevel@tonic-gate */ 47547c478bd9Sstevel@tonic-gate void 47557c478bd9Sstevel@tonic-gate sv_free(servinfo_t *svp) 47567c478bd9Sstevel@tonic-gate { 47577c478bd9Sstevel@tonic-gate servinfo_t *next; 47587c478bd9Sstevel@tonic-gate struct knetconfig *knconf; 47597c478bd9Sstevel@tonic-gate 47607c478bd9Sstevel@tonic-gate while (svp != NULL) { 47617c478bd9Sstevel@tonic-gate next = svp->sv_next; 47627c478bd9Sstevel@tonic-gate if (svp->sv_secdata) 47637c478bd9Sstevel@tonic-gate sec_clnt_freeinfo(svp->sv_secdata); 47647c478bd9Sstevel@tonic-gate if (svp->sv_hostname && svp->sv_hostnamelen > 0) 47657c478bd9Sstevel@tonic-gate kmem_free(svp->sv_hostname, svp->sv_hostnamelen); 47667c478bd9Sstevel@tonic-gate knconf = svp->sv_knconf; 47677c478bd9Sstevel@tonic-gate if (knconf != NULL) { 47687c478bd9Sstevel@tonic-gate if (knconf->knc_protofmly != NULL) 47697c478bd9Sstevel@tonic-gate kmem_free(knconf->knc_protofmly, KNC_STRSIZE); 47707c478bd9Sstevel@tonic-gate if (knconf->knc_proto != NULL) 47717c478bd9Sstevel@tonic-gate kmem_free(knconf->knc_proto, KNC_STRSIZE); 47727c478bd9Sstevel@tonic-gate kmem_free(knconf, sizeof (*knconf)); 47737c478bd9Sstevel@tonic-gate } 47747c478bd9Sstevel@tonic-gate knconf = svp->sv_origknconf; 47757c478bd9Sstevel@tonic-gate if (knconf != NULL) { 47767c478bd9Sstevel@tonic-gate if (knconf->knc_protofmly != NULL) 47777c478bd9Sstevel@tonic-gate kmem_free(knconf->knc_protofmly, KNC_STRSIZE); 47787c478bd9Sstevel@tonic-gate if (knconf->knc_proto != NULL) 47797c478bd9Sstevel@tonic-gate kmem_free(knconf->knc_proto, KNC_STRSIZE); 47807c478bd9Sstevel@tonic-gate kmem_free(knconf, sizeof (*knconf)); 47817c478bd9Sstevel@tonic-gate } 47827c478bd9Sstevel@tonic-gate if (svp->sv_addr.buf != NULL && svp->sv_addr.maxlen != 0) 47837c478bd9Sstevel@tonic-gate kmem_free(svp->sv_addr.buf, svp->sv_addr.maxlen); 47847c478bd9Sstevel@tonic-gate mutex_destroy(&svp->sv_lock); 47857c478bd9Sstevel@tonic-gate kmem_free(svp, sizeof (*svp)); 47867c478bd9Sstevel@tonic-gate svp = next; 47877c478bd9Sstevel@tonic-gate } 47887c478bd9Sstevel@tonic-gate } 47897c478bd9Sstevel@tonic-gate 47907c478bd9Sstevel@tonic-gate /* 47917c478bd9Sstevel@tonic-gate * Only can return non-zero if intr != 0. 47927c478bd9Sstevel@tonic-gate */ 47937c478bd9Sstevel@tonic-gate int 47947c478bd9Sstevel@tonic-gate nfs_rw_enter_sig(nfs_rwlock_t *l, krw_t rw, int intr) 47957c478bd9Sstevel@tonic-gate { 47967c478bd9Sstevel@tonic-gate 47977c478bd9Sstevel@tonic-gate mutex_enter(&l->lock); 47987c478bd9Sstevel@tonic-gate 47997c478bd9Sstevel@tonic-gate /* 48007c478bd9Sstevel@tonic-gate * If this is a nested enter, then allow it. There 48017c478bd9Sstevel@tonic-gate * must be as many exits as enters through. 48027c478bd9Sstevel@tonic-gate */ 48037c478bd9Sstevel@tonic-gate if (l->owner == curthread) { 48047c478bd9Sstevel@tonic-gate /* lock is held for writing by current thread */ 48057c478bd9Sstevel@tonic-gate ASSERT(rw == RW_READER || rw == RW_WRITER); 48067c478bd9Sstevel@tonic-gate l->count--; 48077c478bd9Sstevel@tonic-gate } else if (rw == RW_READER) { 48087c478bd9Sstevel@tonic-gate /* 48097c478bd9Sstevel@tonic-gate * While there is a writer active or writers waiting, 48107c478bd9Sstevel@tonic-gate * then wait for them to finish up and move on. Then, 48117c478bd9Sstevel@tonic-gate * increment the count to indicate that a reader is 48127c478bd9Sstevel@tonic-gate * active. 48137c478bd9Sstevel@tonic-gate */ 48147c478bd9Sstevel@tonic-gate while (l->count < 0 || l->waiters > 0) { 48157c478bd9Sstevel@tonic-gate if (intr) { 48167c478bd9Sstevel@tonic-gate klwp_t *lwp = ttolwp(curthread); 48177c478bd9Sstevel@tonic-gate 48187c478bd9Sstevel@tonic-gate if (lwp != NULL) 48197c478bd9Sstevel@tonic-gate lwp->lwp_nostop++; 48207909625fSMarcel Telka if (cv_wait_sig(&l->cv_rd, &l->lock) == 0) { 48217c478bd9Sstevel@tonic-gate if (lwp != NULL) 48227c478bd9Sstevel@tonic-gate lwp->lwp_nostop--; 48237c478bd9Sstevel@tonic-gate mutex_exit(&l->lock); 48247c478bd9Sstevel@tonic-gate return (EINTR); 48257c478bd9Sstevel@tonic-gate } 48267c478bd9Sstevel@tonic-gate if (lwp != NULL) 48277c478bd9Sstevel@tonic-gate lwp->lwp_nostop--; 48287c478bd9Sstevel@tonic-gate } else 48297909625fSMarcel Telka cv_wait(&l->cv_rd, &l->lock); 48307c478bd9Sstevel@tonic-gate } 48317c478bd9Sstevel@tonic-gate ASSERT(l->count < INT_MAX); 48327c478bd9Sstevel@tonic-gate #ifdef DEBUG 48337c478bd9Sstevel@tonic-gate if ((l->count % 10000) == 9999) 48347c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, "nfs_rw_enter_sig: count %d on" 48357106075aSmarks "rwlock @ %p\n", l->count, (void *)&l); 48367c478bd9Sstevel@tonic-gate #endif 48377c478bd9Sstevel@tonic-gate l->count++; 48387c478bd9Sstevel@tonic-gate } else { 48397c478bd9Sstevel@tonic-gate ASSERT(rw == RW_WRITER); 48407c478bd9Sstevel@tonic-gate /* 48417c478bd9Sstevel@tonic-gate * While there are readers active or a writer 48427c478bd9Sstevel@tonic-gate * active, then wait for all of the readers 48437c478bd9Sstevel@tonic-gate * to finish or for the writer to finish. 48447c478bd9Sstevel@tonic-gate * Then, set the owner field to curthread and 48457c478bd9Sstevel@tonic-gate * decrement count to indicate that a writer 48467c478bd9Sstevel@tonic-gate * is active. 48477c478bd9Sstevel@tonic-gate */ 4848d1054fdaSMarcel Telka while (l->count != 0) { 48497c478bd9Sstevel@tonic-gate l->waiters++; 48507c478bd9Sstevel@tonic-gate if (intr) { 48517c478bd9Sstevel@tonic-gate klwp_t *lwp = ttolwp(curthread); 48527c478bd9Sstevel@tonic-gate 48537c478bd9Sstevel@tonic-gate if (lwp != NULL) 48547c478bd9Sstevel@tonic-gate lwp->lwp_nostop++; 4855d1054fdaSMarcel Telka if (cv_wait_sig(&l->cv, &l->lock) == 0) { 48567c478bd9Sstevel@tonic-gate if (lwp != NULL) 48577c478bd9Sstevel@tonic-gate lwp->lwp_nostop--; 48587c478bd9Sstevel@tonic-gate l->waiters--; 4859d1054fdaSMarcel Telka /* 4860d1054fdaSMarcel Telka * If there are readers active and no 48617909625fSMarcel Telka * writers waiting then wake up all of 48627909625fSMarcel Telka * the waiting readers (if any). 4863d1054fdaSMarcel Telka */ 4864d1054fdaSMarcel Telka if (l->count > 0 && l->waiters == 0) 48657909625fSMarcel Telka cv_broadcast(&l->cv_rd); 48667c478bd9Sstevel@tonic-gate mutex_exit(&l->lock); 48677c478bd9Sstevel@tonic-gate return (EINTR); 48687c478bd9Sstevel@tonic-gate } 48697c478bd9Sstevel@tonic-gate if (lwp != NULL) 48707c478bd9Sstevel@tonic-gate lwp->lwp_nostop--; 48717c478bd9Sstevel@tonic-gate } else 48727c478bd9Sstevel@tonic-gate cv_wait(&l->cv, &l->lock); 48737c478bd9Sstevel@tonic-gate l->waiters--; 48747c478bd9Sstevel@tonic-gate } 4875d1054fdaSMarcel Telka ASSERT(l->owner == NULL); 48767c478bd9Sstevel@tonic-gate l->owner = curthread; 48777c478bd9Sstevel@tonic-gate l->count--; 48787c478bd9Sstevel@tonic-gate } 48797c478bd9Sstevel@tonic-gate 48807c478bd9Sstevel@tonic-gate mutex_exit(&l->lock); 48817c478bd9Sstevel@tonic-gate 48827c478bd9Sstevel@tonic-gate return (0); 48837c478bd9Sstevel@tonic-gate } 48847c478bd9Sstevel@tonic-gate 48857c478bd9Sstevel@tonic-gate /* 48867c478bd9Sstevel@tonic-gate * If the lock is available, obtain it and return non-zero. If there is 48877c478bd9Sstevel@tonic-gate * already a conflicting lock, return 0 immediately. 48887c478bd9Sstevel@tonic-gate */ 48897c478bd9Sstevel@tonic-gate 48907c478bd9Sstevel@tonic-gate int 48917c478bd9Sstevel@tonic-gate nfs_rw_tryenter(nfs_rwlock_t *l, krw_t rw) 48927c478bd9Sstevel@tonic-gate { 48937c478bd9Sstevel@tonic-gate mutex_enter(&l->lock); 48947c478bd9Sstevel@tonic-gate 48957c478bd9Sstevel@tonic-gate /* 48967c478bd9Sstevel@tonic-gate * If this is a nested enter, then allow it. There 48977c478bd9Sstevel@tonic-gate * must be as many exits as enters through. 48987c478bd9Sstevel@tonic-gate */ 48997c478bd9Sstevel@tonic-gate if (l->owner == curthread) { 49007c478bd9Sstevel@tonic-gate /* lock is held for writing by current thread */ 49017c478bd9Sstevel@tonic-gate ASSERT(rw == RW_READER || rw == RW_WRITER); 49027c478bd9Sstevel@tonic-gate l->count--; 49037c478bd9Sstevel@tonic-gate } else if (rw == RW_READER) { 49047c478bd9Sstevel@tonic-gate /* 49057c478bd9Sstevel@tonic-gate * If there is a writer active or writers waiting, deny the 49067c478bd9Sstevel@tonic-gate * lock. Otherwise, bump the count of readers. 49077c478bd9Sstevel@tonic-gate */ 49087c478bd9Sstevel@tonic-gate if (l->count < 0 || l->waiters > 0) { 49097c478bd9Sstevel@tonic-gate mutex_exit(&l->lock); 49107c478bd9Sstevel@tonic-gate return (0); 49117c478bd9Sstevel@tonic-gate } 49127c478bd9Sstevel@tonic-gate l->count++; 49137c478bd9Sstevel@tonic-gate } else { 49147c478bd9Sstevel@tonic-gate ASSERT(rw == RW_WRITER); 49157c478bd9Sstevel@tonic-gate /* 49167c478bd9Sstevel@tonic-gate * If there are readers active or a writer active, deny the 49177c478bd9Sstevel@tonic-gate * lock. Otherwise, set the owner field to curthread and 49187c478bd9Sstevel@tonic-gate * decrement count to indicate that a writer is active. 49197c478bd9Sstevel@tonic-gate */ 4920d1054fdaSMarcel Telka if (l->count != 0) { 49217c478bd9Sstevel@tonic-gate mutex_exit(&l->lock); 49227c478bd9Sstevel@tonic-gate return (0); 49237c478bd9Sstevel@tonic-gate } 4924d1054fdaSMarcel Telka ASSERT(l->owner == NULL); 49257c478bd9Sstevel@tonic-gate l->owner = curthread; 49267c478bd9Sstevel@tonic-gate l->count--; 49277c478bd9Sstevel@tonic-gate } 49287c478bd9Sstevel@tonic-gate 49297c478bd9Sstevel@tonic-gate mutex_exit(&l->lock); 49307c478bd9Sstevel@tonic-gate 49317c478bd9Sstevel@tonic-gate return (1); 49327c478bd9Sstevel@tonic-gate } 49337c478bd9Sstevel@tonic-gate 49347c478bd9Sstevel@tonic-gate void 49357c478bd9Sstevel@tonic-gate nfs_rw_exit(nfs_rwlock_t *l) 49367c478bd9Sstevel@tonic-gate { 49377c478bd9Sstevel@tonic-gate 49387c478bd9Sstevel@tonic-gate mutex_enter(&l->lock); 49397909625fSMarcel Telka 49407c478bd9Sstevel@tonic-gate if (l->owner != NULL) { 49417c478bd9Sstevel@tonic-gate ASSERT(l->owner == curthread); 49427909625fSMarcel Telka 49437909625fSMarcel Telka /* 49447909625fSMarcel Telka * To release a writer lock increment count to indicate that 49457909625fSMarcel Telka * there is one less writer active. If this was the last of 49467909625fSMarcel Telka * possibly nested writer locks, then clear the owner field as 49477909625fSMarcel Telka * well to indicate that there is no writer active. 49487909625fSMarcel Telka */ 49497909625fSMarcel Telka ASSERT(l->count < 0); 49507c478bd9Sstevel@tonic-gate l->count++; 49517c478bd9Sstevel@tonic-gate if (l->count == 0) { 49527c478bd9Sstevel@tonic-gate l->owner = NULL; 49537909625fSMarcel Telka 49547909625fSMarcel Telka /* 49557909625fSMarcel Telka * If there are no writers waiting then wakeup all of 49567909625fSMarcel Telka * the waiting readers (if any). 49577909625fSMarcel Telka */ 49587909625fSMarcel Telka if (l->waiters == 0) 49597909625fSMarcel Telka cv_broadcast(&l->cv_rd); 49607c478bd9Sstevel@tonic-gate } 49617c478bd9Sstevel@tonic-gate } else { 49627909625fSMarcel Telka /* 49637909625fSMarcel Telka * To release a reader lock just decrement count to indicate 49647909625fSMarcel Telka * that there is one less reader active. 49657909625fSMarcel Telka */ 49667c478bd9Sstevel@tonic-gate ASSERT(l->count > 0); 49677c478bd9Sstevel@tonic-gate l->count--; 49687c478bd9Sstevel@tonic-gate } 49697909625fSMarcel Telka 49707909625fSMarcel Telka /* 49717909625fSMarcel Telka * If there are no readers active nor a writer active and there is a 49727909625fSMarcel Telka * writer waiting we need to wake up it. 49737909625fSMarcel Telka */ 49747909625fSMarcel Telka if (l->count == 0 && l->waiters > 0) 49757909625fSMarcel Telka cv_signal(&l->cv); 49767c478bd9Sstevel@tonic-gate mutex_exit(&l->lock); 49777c478bd9Sstevel@tonic-gate } 49787c478bd9Sstevel@tonic-gate 49797c478bd9Sstevel@tonic-gate int 49807c478bd9Sstevel@tonic-gate nfs_rw_lock_held(nfs_rwlock_t *l, krw_t rw) 49817c478bd9Sstevel@tonic-gate { 49827c478bd9Sstevel@tonic-gate 49837c478bd9Sstevel@tonic-gate if (rw == RW_READER) 49847c478bd9Sstevel@tonic-gate return (l->count > 0); 49857c478bd9Sstevel@tonic-gate ASSERT(rw == RW_WRITER); 49867c478bd9Sstevel@tonic-gate return (l->count < 0); 49877c478bd9Sstevel@tonic-gate } 49887c478bd9Sstevel@tonic-gate 49897c478bd9Sstevel@tonic-gate /* ARGSUSED */ 49907c478bd9Sstevel@tonic-gate void 49917c478bd9Sstevel@tonic-gate nfs_rw_init(nfs_rwlock_t *l, char *name, krw_type_t type, void *arg) 49927c478bd9Sstevel@tonic-gate { 49937c478bd9Sstevel@tonic-gate 49947c478bd9Sstevel@tonic-gate l->count = 0; 49957c478bd9Sstevel@tonic-gate l->waiters = 0; 49967c478bd9Sstevel@tonic-gate l->owner = NULL; 49977c478bd9Sstevel@tonic-gate mutex_init(&l->lock, NULL, MUTEX_DEFAULT, NULL); 49987c478bd9Sstevel@tonic-gate cv_init(&l->cv, NULL, CV_DEFAULT, NULL); 49997909625fSMarcel Telka cv_init(&l->cv_rd, NULL, CV_DEFAULT, NULL); 50007c478bd9Sstevel@tonic-gate } 50017c478bd9Sstevel@tonic-gate 50027c478bd9Sstevel@tonic-gate void 50037c478bd9Sstevel@tonic-gate nfs_rw_destroy(nfs_rwlock_t *l) 50047c478bd9Sstevel@tonic-gate { 50057c478bd9Sstevel@tonic-gate 50067c478bd9Sstevel@tonic-gate mutex_destroy(&l->lock); 50077c478bd9Sstevel@tonic-gate cv_destroy(&l->cv); 50087909625fSMarcel Telka cv_destroy(&l->cv_rd); 50097c478bd9Sstevel@tonic-gate } 50107c478bd9Sstevel@tonic-gate 50117c478bd9Sstevel@tonic-gate int 50127c478bd9Sstevel@tonic-gate nfs3_rddir_compar(const void *x, const void *y) 50137c478bd9Sstevel@tonic-gate { 50147c478bd9Sstevel@tonic-gate rddir_cache *a = (rddir_cache *)x; 50157c478bd9Sstevel@tonic-gate rddir_cache *b = (rddir_cache *)y; 50167c478bd9Sstevel@tonic-gate 50177c478bd9Sstevel@tonic-gate if (a->nfs3_cookie == b->nfs3_cookie) { 50187c478bd9Sstevel@tonic-gate if (a->buflen == b->buflen) 50197c478bd9Sstevel@tonic-gate return (0); 50207c478bd9Sstevel@tonic-gate if (a->buflen < b->buflen) 50217c478bd9Sstevel@tonic-gate return (-1); 50227c478bd9Sstevel@tonic-gate return (1); 50237c478bd9Sstevel@tonic-gate } 50247c478bd9Sstevel@tonic-gate 50257c478bd9Sstevel@tonic-gate if (a->nfs3_cookie < b->nfs3_cookie) 50267c478bd9Sstevel@tonic-gate return (-1); 50277c478bd9Sstevel@tonic-gate 50287c478bd9Sstevel@tonic-gate return (1); 50297c478bd9Sstevel@tonic-gate } 50307c478bd9Sstevel@tonic-gate 50317c478bd9Sstevel@tonic-gate int 50327c478bd9Sstevel@tonic-gate nfs_rddir_compar(const void *x, const void *y) 50337c478bd9Sstevel@tonic-gate { 50347c478bd9Sstevel@tonic-gate rddir_cache *a = (rddir_cache *)x; 50357c478bd9Sstevel@tonic-gate rddir_cache *b = (rddir_cache *)y; 50367c478bd9Sstevel@tonic-gate 50377c478bd9Sstevel@tonic-gate if (a->nfs_cookie == b->nfs_cookie) { 50387c478bd9Sstevel@tonic-gate if (a->buflen == b->buflen) 50397c478bd9Sstevel@tonic-gate return (0); 50407c478bd9Sstevel@tonic-gate if (a->buflen < b->buflen) 50417c478bd9Sstevel@tonic-gate return (-1); 50427c478bd9Sstevel@tonic-gate return (1); 50437c478bd9Sstevel@tonic-gate } 50447c478bd9Sstevel@tonic-gate 50457c478bd9Sstevel@tonic-gate if (a->nfs_cookie < b->nfs_cookie) 50467c478bd9Sstevel@tonic-gate return (-1); 50477c478bd9Sstevel@tonic-gate 50487c478bd9Sstevel@tonic-gate return (1); 50497c478bd9Sstevel@tonic-gate } 50507c478bd9Sstevel@tonic-gate 50517c478bd9Sstevel@tonic-gate static char * 50527c478bd9Sstevel@tonic-gate nfs_getsrvnames(mntinfo_t *mi, size_t *len) 50537c478bd9Sstevel@tonic-gate { 50547c478bd9Sstevel@tonic-gate servinfo_t *s; 50557c478bd9Sstevel@tonic-gate char *srvnames; 50567c478bd9Sstevel@tonic-gate char *namep; 50577c478bd9Sstevel@tonic-gate size_t length; 50587c478bd9Sstevel@tonic-gate 50597c478bd9Sstevel@tonic-gate /* 50607c478bd9Sstevel@tonic-gate * Calculate the length of the string required to hold all 50617c478bd9Sstevel@tonic-gate * of the server names plus either a comma or a null 50627c478bd9Sstevel@tonic-gate * character following each individual one. 50637c478bd9Sstevel@tonic-gate */ 50647c478bd9Sstevel@tonic-gate length = 0; 50657c478bd9Sstevel@tonic-gate for (s = mi->mi_servers; s != NULL; s = s->sv_next) 50667c478bd9Sstevel@tonic-gate length += s->sv_hostnamelen; 50677c478bd9Sstevel@tonic-gate 50687c478bd9Sstevel@tonic-gate srvnames = kmem_alloc(length, KM_SLEEP); 50697c478bd9Sstevel@tonic-gate 50707c478bd9Sstevel@tonic-gate namep = srvnames; 50717c478bd9Sstevel@tonic-gate for (s = mi->mi_servers; s != NULL; s = s->sv_next) { 50727c478bd9Sstevel@tonic-gate (void) strcpy(namep, s->sv_hostname); 50737c478bd9Sstevel@tonic-gate namep += s->sv_hostnamelen - 1; 50747c478bd9Sstevel@tonic-gate *namep++ = ','; 50757c478bd9Sstevel@tonic-gate } 50767c478bd9Sstevel@tonic-gate *--namep = '\0'; 50777c478bd9Sstevel@tonic-gate 50787c478bd9Sstevel@tonic-gate *len = length; 50797c478bd9Sstevel@tonic-gate 50807c478bd9Sstevel@tonic-gate return (srvnames); 50817c478bd9Sstevel@tonic-gate } 5082108322fbScarlsonj 5083108322fbScarlsonj /* 5084108322fbScarlsonj * These two functions are temporary and designed for the upgrade-workaround 5085108322fbScarlsonj * only. They cannot be used for general zone-crossing NFS client support, and 5086108322fbScarlsonj * will be removed shortly. 5087108322fbScarlsonj * 5088108322fbScarlsonj * When the workaround is enabled, all NFS traffic is forced into the global 5089108322fbScarlsonj * zone. These functions are called when the code needs to refer to the state 5090108322fbScarlsonj * of the underlying network connection. They're not called when the function 5091108322fbScarlsonj * needs to refer to the state of the process that invoked the system call. 5092108322fbScarlsonj * (E.g., when checking whether the zone is shutting down during the mount() 5093108322fbScarlsonj * call.) 5094108322fbScarlsonj */ 5095108322fbScarlsonj 5096108322fbScarlsonj struct zone * 5097108322fbScarlsonj nfs_zone(void) 5098108322fbScarlsonj { 5099108322fbScarlsonj return (nfs_global_client_only != 0 ? global_zone : curproc->p_zone); 5100108322fbScarlsonj } 5101108322fbScarlsonj 5102108322fbScarlsonj zoneid_t 5103108322fbScarlsonj nfs_zoneid(void) 5104108322fbScarlsonj { 5105108322fbScarlsonj return (nfs_global_client_only != 0 ? GLOBAL_ZONEID : getzoneid()); 5106108322fbScarlsonj } 510745916cd2Sjpk 510845916cd2Sjpk /* 510945916cd2Sjpk * nfs_mount_label_policy: 511045916cd2Sjpk * Determine whether the mount is allowed according to MAC check, 511145916cd2Sjpk * by comparing (where appropriate) label of the remote server 511245916cd2Sjpk * against the label of the zone being mounted into. 511345916cd2Sjpk * 511445916cd2Sjpk * Returns: 511545916cd2Sjpk * 0 : access allowed 511645916cd2Sjpk * -1 : read-only access allowed (i.e., read-down) 511745916cd2Sjpk * >0 : error code, such as EACCES 511845916cd2Sjpk */ 511945916cd2Sjpk int 512045916cd2Sjpk nfs_mount_label_policy(vfs_t *vfsp, struct netbuf *addr, 512145916cd2Sjpk struct knetconfig *knconf, cred_t *cr) 512245916cd2Sjpk { 512345916cd2Sjpk int addr_type; 512445916cd2Sjpk void *ipaddr; 512545916cd2Sjpk bslabel_t *server_sl, *mntlabel; 512645916cd2Sjpk zone_t *mntzone = NULL; 512745916cd2Sjpk ts_label_t *zlabel; 512845916cd2Sjpk tsol_tpc_t *tp; 512945916cd2Sjpk ts_label_t *tsl = NULL; 513045916cd2Sjpk int retv; 513145916cd2Sjpk 513245916cd2Sjpk /* 513345916cd2Sjpk * Get the zone's label. Each zone on a labeled system has a label. 513445916cd2Sjpk */ 513545916cd2Sjpk mntzone = zone_find_by_any_path(refstr_value(vfsp->vfs_mntpt), B_FALSE); 513645916cd2Sjpk zlabel = mntzone->zone_slabel; 513745916cd2Sjpk ASSERT(zlabel != NULL); 513845916cd2Sjpk label_hold(zlabel); 513945916cd2Sjpk 514045916cd2Sjpk if (strcmp(knconf->knc_protofmly, NC_INET) == 0) { 514145916cd2Sjpk addr_type = IPV4_VERSION; 514245916cd2Sjpk ipaddr = &((struct sockaddr_in *)addr->buf)->sin_addr; 514345916cd2Sjpk } else if (strcmp(knconf->knc_protofmly, NC_INET6) == 0) { 514445916cd2Sjpk addr_type = IPV6_VERSION; 514545916cd2Sjpk ipaddr = &((struct sockaddr_in6 *)addr->buf)->sin6_addr; 514645916cd2Sjpk } else { 514745916cd2Sjpk retv = 0; 514845916cd2Sjpk goto out; 514945916cd2Sjpk } 515045916cd2Sjpk 515145916cd2Sjpk retv = EACCES; /* assume the worst */ 515245916cd2Sjpk 515345916cd2Sjpk /* 515445916cd2Sjpk * Next, get the assigned label of the remote server. 515545916cd2Sjpk */ 515645916cd2Sjpk tp = find_tpc(ipaddr, addr_type, B_FALSE); 515745916cd2Sjpk if (tp == NULL) 515845916cd2Sjpk goto out; /* error getting host entry */ 515945916cd2Sjpk 516045916cd2Sjpk if (tp->tpc_tp.tp_doi != zlabel->tsl_doi) 516145916cd2Sjpk goto rel_tpc; /* invalid domain */ 516245916cd2Sjpk if ((tp->tpc_tp.host_type != SUN_CIPSO) && 516345916cd2Sjpk (tp->tpc_tp.host_type != UNLABELED)) 516445916cd2Sjpk goto rel_tpc; /* invalid hosttype */ 516545916cd2Sjpk 516645916cd2Sjpk if (tp->tpc_tp.host_type == SUN_CIPSO) { 516745916cd2Sjpk tsl = getflabel_cipso(vfsp); 516845916cd2Sjpk if (tsl == NULL) 516945916cd2Sjpk goto rel_tpc; /* error getting server lbl */ 517045916cd2Sjpk 517145916cd2Sjpk server_sl = label2bslabel(tsl); 517245916cd2Sjpk } else { /* UNLABELED */ 517345916cd2Sjpk server_sl = &tp->tpc_tp.tp_def_label; 517445916cd2Sjpk } 517545916cd2Sjpk 517645916cd2Sjpk mntlabel = label2bslabel(zlabel); 517745916cd2Sjpk 517845916cd2Sjpk /* 517945916cd2Sjpk * Now compare labels to complete the MAC check. If the labels 518045916cd2Sjpk * are equal or if the requestor is in the global zone and has 518145916cd2Sjpk * NET_MAC_AWARE, then allow read-write access. (Except for 518245916cd2Sjpk * mounts into the global zone itself; restrict these to 518345916cd2Sjpk * read-only.) 518445916cd2Sjpk * 518548bbca81SDaniel Hoffman * If the requestor is in some other zone, but their label 518645916cd2Sjpk * dominates the server, then allow read-down. 518745916cd2Sjpk * 518845916cd2Sjpk * Otherwise, access is denied. 518945916cd2Sjpk */ 519045916cd2Sjpk if (blequal(mntlabel, server_sl) || 519145916cd2Sjpk (crgetzoneid(cr) == GLOBAL_ZONEID && 519245916cd2Sjpk getpflags(NET_MAC_AWARE, cr) != 0)) { 519345916cd2Sjpk if ((mntzone == global_zone) || 519445916cd2Sjpk !blequal(mntlabel, server_sl)) 519545916cd2Sjpk retv = -1; /* read-only */ 519645916cd2Sjpk else 519745916cd2Sjpk retv = 0; /* access OK */ 519845916cd2Sjpk } else if (bldominates(mntlabel, server_sl)) { 519945916cd2Sjpk retv = -1; /* read-only */ 520045916cd2Sjpk } else { 520145916cd2Sjpk retv = EACCES; 520245916cd2Sjpk } 520345916cd2Sjpk 520445916cd2Sjpk if (tsl != NULL) 520545916cd2Sjpk label_rele(tsl); 520645916cd2Sjpk 520745916cd2Sjpk rel_tpc: 520845916cd2Sjpk TPC_RELE(tp); 520945916cd2Sjpk out: 521045916cd2Sjpk if (mntzone) 521145916cd2Sjpk zone_rele(mntzone); 521245916cd2Sjpk label_rele(zlabel); 521345916cd2Sjpk return (retv); 521445916cd2Sjpk } 52159acbbeafSnn 52169acbbeafSnn boolean_t 52179acbbeafSnn nfs_has_ctty(void) 52189acbbeafSnn { 52199acbbeafSnn boolean_t rv; 52209acbbeafSnn mutex_enter(&curproc->p_splock); 52219acbbeafSnn rv = (curproc->p_sessp->s_vp != NULL); 52229acbbeafSnn mutex_exit(&curproc->p_splock); 52239acbbeafSnn return (rv); 52249acbbeafSnn } 522503986916Sjarrett 522693aeed83Smarks /* 522793aeed83Smarks * See if xattr directory to see if it has any generic user attributes 522893aeed83Smarks */ 522993aeed83Smarks int 523093aeed83Smarks do_xattr_exists_check(vnode_t *vp, ulong_t *valp, cred_t *cr) 523193aeed83Smarks { 523293aeed83Smarks struct uio uio; 523393aeed83Smarks struct iovec iov; 523493aeed83Smarks char *dbuf; 523593aeed83Smarks struct dirent64 *dp; 523693aeed83Smarks size_t dlen = 8 * 1024; 523793aeed83Smarks size_t dbuflen; 523893aeed83Smarks int eof = 0; 523993aeed83Smarks int error; 524093aeed83Smarks 524193aeed83Smarks *valp = 0; 524293aeed83Smarks dbuf = kmem_alloc(dlen, KM_SLEEP); 524393aeed83Smarks uio.uio_iov = &iov; 524493aeed83Smarks uio.uio_iovcnt = 1; 524593aeed83Smarks uio.uio_segflg = UIO_SYSSPACE; 524693aeed83Smarks uio.uio_fmode = 0; 524793aeed83Smarks uio.uio_extflg = UIO_COPY_CACHED; 524893aeed83Smarks uio.uio_loffset = 0; 524993aeed83Smarks uio.uio_resid = dlen; 525093aeed83Smarks iov.iov_base = dbuf; 525193aeed83Smarks iov.iov_len = dlen; 525293aeed83Smarks (void) VOP_RWLOCK(vp, V_WRITELOCK_FALSE, NULL); 525393aeed83Smarks error = VOP_READDIR(vp, &uio, cr, &eof, NULL, 0); 525493aeed83Smarks VOP_RWUNLOCK(vp, V_WRITELOCK_FALSE, NULL); 525593aeed83Smarks 525693aeed83Smarks dbuflen = dlen - uio.uio_resid; 525793aeed83Smarks 525893aeed83Smarks if (error || dbuflen == 0) { 525993aeed83Smarks kmem_free(dbuf, dlen); 526093aeed83Smarks return (error); 526193aeed83Smarks } 526293aeed83Smarks 526393aeed83Smarks dp = (dirent64_t *)dbuf; 526493aeed83Smarks 526593aeed83Smarks while ((intptr_t)dp < (intptr_t)dbuf + dbuflen) { 526693aeed83Smarks if (strcmp(dp->d_name, ".") == 0 || 526793aeed83Smarks strcmp(dp->d_name, "..") == 0 || strcmp(dp->d_name, 526893aeed83Smarks VIEW_READWRITE) == 0 || strcmp(dp->d_name, 526993aeed83Smarks VIEW_READONLY) == 0) { 527093aeed83Smarks dp = (dirent64_t *)((intptr_t)dp + dp->d_reclen); 527193aeed83Smarks continue; 527293aeed83Smarks } 527393aeed83Smarks 527493aeed83Smarks *valp = 1; 527593aeed83Smarks break; 527693aeed83Smarks } 527793aeed83Smarks kmem_free(dbuf, dlen); 527893aeed83Smarks return (0); 527993aeed83Smarks } 5280