17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate * CDDL HEADER START
37c478bd9Sstevel@tonic-gate *
47c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the
550a83466Sjwahlig * Common Development and Distribution License (the "License").
650a83466Sjwahlig * You may not use this file except in compliance with the License.
77c478bd9Sstevel@tonic-gate *
87c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing.
107c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions
117c478bd9Sstevel@tonic-gate * and limitations under the License.
127c478bd9Sstevel@tonic-gate *
137c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each
147c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the
167c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying
177c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bd9Sstevel@tonic-gate *
197c478bd9Sstevel@tonic-gate * CDDL HEADER END
207c478bd9Sstevel@tonic-gate */
217c478bd9Sstevel@tonic-gate /*
22a19609f8Sjv * Copyright (c) 1986, 2010, Oracle and/or its affiliates. All rights reserved.
23ade42b55SSebastien Roy * Copyright (c) 2017 by Delphix. All rights reserved.
247c478bd9Sstevel@tonic-gate */
257c478bd9Sstevel@tonic-gate
267c478bd9Sstevel@tonic-gate /*
27*6dc7d057SMarcel Telka * Copyright (c) 1983,1984,1985,1986,1987,1988,1989 AT&T.
287c478bd9Sstevel@tonic-gate * All Rights Reserved
297c478bd9Sstevel@tonic-gate */
307c478bd9Sstevel@tonic-gate
317c478bd9Sstevel@tonic-gate #include <sys/param.h>
327c478bd9Sstevel@tonic-gate #include <sys/types.h>
337c478bd9Sstevel@tonic-gate #include <sys/systm.h>
347c478bd9Sstevel@tonic-gate #include <sys/thread.h>
357c478bd9Sstevel@tonic-gate #include <sys/t_lock.h>
367c478bd9Sstevel@tonic-gate #include <sys/time.h>
377c478bd9Sstevel@tonic-gate #include <sys/vnode.h>
387c478bd9Sstevel@tonic-gate #include <sys/vfs.h>
397c478bd9Sstevel@tonic-gate #include <sys/errno.h>
407c478bd9Sstevel@tonic-gate #include <sys/buf.h>
417c478bd9Sstevel@tonic-gate #include <sys/stat.h>
427c478bd9Sstevel@tonic-gate #include <sys/cred.h>
437c478bd9Sstevel@tonic-gate #include <sys/kmem.h>
447c478bd9Sstevel@tonic-gate #include <sys/debug.h>
457c478bd9Sstevel@tonic-gate #include <sys/dnlc.h>
467c478bd9Sstevel@tonic-gate #include <sys/vmsystm.h>
477c478bd9Sstevel@tonic-gate #include <sys/flock.h>
487c478bd9Sstevel@tonic-gate #include <sys/share.h>
497c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h>
507c478bd9Sstevel@tonic-gate #include <sys/tiuser.h>
517c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h>
527c478bd9Sstevel@tonic-gate #include <sys/callb.h>
537c478bd9Sstevel@tonic-gate #include <sys/acl.h>
547c478bd9Sstevel@tonic-gate #include <sys/kstat.h>
557c478bd9Sstevel@tonic-gate #include <sys/signal.h>
567c478bd9Sstevel@tonic-gate #include <sys/disp.h>
577c478bd9Sstevel@tonic-gate #include <sys/atomic.h>
587c478bd9Sstevel@tonic-gate #include <sys/list.h>
597c478bd9Sstevel@tonic-gate #include <sys/sdt.h>
607c478bd9Sstevel@tonic-gate
617c478bd9Sstevel@tonic-gate #include <rpc/types.h>
627c478bd9Sstevel@tonic-gate #include <rpc/xdr.h>
637c478bd9Sstevel@tonic-gate #include <rpc/auth.h>
647c478bd9Sstevel@tonic-gate #include <rpc/clnt.h>
657c478bd9Sstevel@tonic-gate
667c478bd9Sstevel@tonic-gate #include <nfs/nfs.h>
677c478bd9Sstevel@tonic-gate #include <nfs/nfs_clnt.h>
687c478bd9Sstevel@tonic-gate #include <nfs/nfs_acl.h>
697c478bd9Sstevel@tonic-gate
707c478bd9Sstevel@tonic-gate #include <nfs/nfs4.h>
717c478bd9Sstevel@tonic-gate #include <nfs/rnode4.h>
727c478bd9Sstevel@tonic-gate #include <nfs/nfs4_clnt.h>
737c478bd9Sstevel@tonic-gate
747c478bd9Sstevel@tonic-gate #include <vm/hat.h>
757c478bd9Sstevel@tonic-gate #include <vm/as.h>
767c478bd9Sstevel@tonic-gate #include <vm/page.h>
777c478bd9Sstevel@tonic-gate #include <vm/pvn.h>
787c478bd9Sstevel@tonic-gate #include <vm/seg.h>
797c478bd9Sstevel@tonic-gate #include <vm/seg_map.h>
807c478bd9Sstevel@tonic-gate #include <vm/seg_vn.h>
817c478bd9Sstevel@tonic-gate
827c478bd9Sstevel@tonic-gate #include <sys/ddi.h>
837c478bd9Sstevel@tonic-gate
847c478bd9Sstevel@tonic-gate /*
857c478bd9Sstevel@tonic-gate * Arguments to page-flush thread.
867c478bd9Sstevel@tonic-gate */
877c478bd9Sstevel@tonic-gate typedef struct {
887c478bd9Sstevel@tonic-gate vnode_t *vp;
897c478bd9Sstevel@tonic-gate cred_t *cr;
907c478bd9Sstevel@tonic-gate } pgflush_t;
917c478bd9Sstevel@tonic-gate
927c478bd9Sstevel@tonic-gate #ifdef DEBUG
937c478bd9Sstevel@tonic-gate int nfs4_client_lease_debug;
947c478bd9Sstevel@tonic-gate int nfs4_sharedfh_debug;
957c478bd9Sstevel@tonic-gate int nfs4_fname_debug;
967c478bd9Sstevel@tonic-gate
977c478bd9Sstevel@tonic-gate /* temporary: panic if v_type is inconsistent with r_attr va_type */
987c478bd9Sstevel@tonic-gate int nfs4_vtype_debug;
997c478bd9Sstevel@tonic-gate
1007c478bd9Sstevel@tonic-gate uint_t nfs4_tsd_key;
1017c478bd9Sstevel@tonic-gate #endif
1027c478bd9Sstevel@tonic-gate
1037c478bd9Sstevel@tonic-gate static time_t nfs4_client_resumed = 0;
1047c478bd9Sstevel@tonic-gate static callb_id_t cid = 0;
1057c478bd9Sstevel@tonic-gate
1067c478bd9Sstevel@tonic-gate static int nfs4renew(nfs4_server_t *);
1077c478bd9Sstevel@tonic-gate static void nfs4_attrcache_va(vnode_t *, nfs4_ga_res_t *, int);
1087c478bd9Sstevel@tonic-gate static void nfs4_pgflush_thread(pgflush_t *);
1097c478bd9Sstevel@tonic-gate
1107c478bd9Sstevel@tonic-gate static boolean_t nfs4_client_cpr_callb(void *, int);
1117c478bd9Sstevel@tonic-gate
1127c478bd9Sstevel@tonic-gate struct mi4_globals {
1137c478bd9Sstevel@tonic-gate kmutex_t mig_lock; /* lock protecting mig_list */
1147c478bd9Sstevel@tonic-gate list_t mig_list; /* list of NFS v4 mounts in zone */
1157c478bd9Sstevel@tonic-gate boolean_t mig_destructor_called;
1167c478bd9Sstevel@tonic-gate };
1177c478bd9Sstevel@tonic-gate
1187c478bd9Sstevel@tonic-gate static zone_key_t mi4_list_key;
1197c478bd9Sstevel@tonic-gate
1207c478bd9Sstevel@tonic-gate /*
1217c478bd9Sstevel@tonic-gate * Attributes caching:
1227c478bd9Sstevel@tonic-gate *
1237c478bd9Sstevel@tonic-gate * Attributes are cached in the rnode in struct vattr form.
1247c478bd9Sstevel@tonic-gate * There is a time associated with the cached attributes (r_time_attr_inval)
1257c478bd9Sstevel@tonic-gate * which tells whether the attributes are valid. The time is initialized
1267c478bd9Sstevel@tonic-gate * to the difference between current time and the modify time of the vnode
1277c478bd9Sstevel@tonic-gate * when new attributes are cached. This allows the attributes for
1287c478bd9Sstevel@tonic-gate * files that have changed recently to be timed out sooner than for files
1297c478bd9Sstevel@tonic-gate * that have not changed for a long time. There are minimum and maximum
1307c478bd9Sstevel@tonic-gate * timeout values that can be set per mount point.
1317c478bd9Sstevel@tonic-gate */
1327c478bd9Sstevel@tonic-gate
1337c478bd9Sstevel@tonic-gate /*
1347c478bd9Sstevel@tonic-gate * If a cache purge is in progress, wait for it to finish.
1357c478bd9Sstevel@tonic-gate *
1367c478bd9Sstevel@tonic-gate * The current thread must not be in the middle of an
1377c478bd9Sstevel@tonic-gate * nfs4_start_op/nfs4_end_op region. Otherwise, there could be a deadlock
1387c478bd9Sstevel@tonic-gate * between this thread, a recovery thread, and the page flush thread.
1397c478bd9Sstevel@tonic-gate */
1407c478bd9Sstevel@tonic-gate int
nfs4_waitfor_purge_complete(vnode_t * vp)1417c478bd9Sstevel@tonic-gate nfs4_waitfor_purge_complete(vnode_t *vp)
1427c478bd9Sstevel@tonic-gate {
1437c478bd9Sstevel@tonic-gate rnode4_t *rp;
1447c478bd9Sstevel@tonic-gate k_sigset_t smask;
1457c478bd9Sstevel@tonic-gate
1467c478bd9Sstevel@tonic-gate rp = VTOR4(vp);
1477c478bd9Sstevel@tonic-gate if ((rp->r_serial != NULL && rp->r_serial != curthread) ||
1487c478bd9Sstevel@tonic-gate ((rp->r_flags & R4PGFLUSH) && rp->r_pgflush != curthread)) {
1497c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock);
1507c478bd9Sstevel@tonic-gate sigintr(&smask, VTOMI4(vp)->mi_flags & MI4_INT);
1517c478bd9Sstevel@tonic-gate while ((rp->r_serial != NULL && rp->r_serial != curthread) ||
1527c478bd9Sstevel@tonic-gate ((rp->r_flags & R4PGFLUSH) &&
1537c478bd9Sstevel@tonic-gate rp->r_pgflush != curthread)) {
1547c478bd9Sstevel@tonic-gate if (!cv_wait_sig(&rp->r_cv, &rp->r_statelock)) {
1557c478bd9Sstevel@tonic-gate sigunintr(&smask);
1567c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock);
1577c478bd9Sstevel@tonic-gate return (EINTR);
1587c478bd9Sstevel@tonic-gate }
1597c478bd9Sstevel@tonic-gate }
1607c478bd9Sstevel@tonic-gate sigunintr(&smask);
1617c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock);
1627c478bd9Sstevel@tonic-gate }
1637c478bd9Sstevel@tonic-gate return (0);
1647c478bd9Sstevel@tonic-gate }
1657c478bd9Sstevel@tonic-gate
1667c478bd9Sstevel@tonic-gate /*
1677c478bd9Sstevel@tonic-gate * Validate caches by checking cached attributes. If they have timed out,
1687c478bd9Sstevel@tonic-gate * then get new attributes from the server. As a side effect, cache
1697c478bd9Sstevel@tonic-gate * invalidation is done if the attributes have changed.
1707c478bd9Sstevel@tonic-gate *
1717c478bd9Sstevel@tonic-gate * If the attributes have not timed out and if there is a cache
1727c478bd9Sstevel@tonic-gate * invalidation being done by some other thread, then wait until that
1737c478bd9Sstevel@tonic-gate * thread has completed the cache invalidation.
1747c478bd9Sstevel@tonic-gate */
1757c478bd9Sstevel@tonic-gate int
nfs4_validate_caches(vnode_t * vp,cred_t * cr)1767c478bd9Sstevel@tonic-gate nfs4_validate_caches(vnode_t *vp, cred_t *cr)
1777c478bd9Sstevel@tonic-gate {
1787c478bd9Sstevel@tonic-gate int error;
1797c478bd9Sstevel@tonic-gate nfs4_ga_res_t gar;
1807c478bd9Sstevel@tonic-gate
1817c478bd9Sstevel@tonic-gate if (ATTRCACHE4_VALID(vp)) {
1827c478bd9Sstevel@tonic-gate error = nfs4_waitfor_purge_complete(vp);
1837c478bd9Sstevel@tonic-gate if (error)
1847c478bd9Sstevel@tonic-gate return (error);
1857c478bd9Sstevel@tonic-gate return (0);
1867c478bd9Sstevel@tonic-gate }
1877c478bd9Sstevel@tonic-gate
1887c478bd9Sstevel@tonic-gate return (nfs4_getattr_otw(vp, &gar, cr, 0));
1897c478bd9Sstevel@tonic-gate }
1907c478bd9Sstevel@tonic-gate
1917c478bd9Sstevel@tonic-gate /*
1927c478bd9Sstevel@tonic-gate * Fill in attribute from the cache.
1937c478bd9Sstevel@tonic-gate * If valid, then return 0 to indicate that no error occurred,
1947c478bd9Sstevel@tonic-gate * otherwise return 1 to indicate that an error occurred.
1957c478bd9Sstevel@tonic-gate */
1967c478bd9Sstevel@tonic-gate static int
nfs4_getattr_cache(vnode_t * vp,struct vattr * vap)1977c478bd9Sstevel@tonic-gate nfs4_getattr_cache(vnode_t *vp, struct vattr *vap)
1987c478bd9Sstevel@tonic-gate {
1997c478bd9Sstevel@tonic-gate rnode4_t *rp;
2007c478bd9Sstevel@tonic-gate
2017c478bd9Sstevel@tonic-gate rp = VTOR4(vp);
2027c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock);
2037c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statev4_lock);
2047c478bd9Sstevel@tonic-gate if (ATTRCACHE4_VALID(vp)) {
2057c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statev4_lock);
2067c478bd9Sstevel@tonic-gate /*
2077c478bd9Sstevel@tonic-gate * Cached attributes are valid
2087c478bd9Sstevel@tonic-gate */
2097c478bd9Sstevel@tonic-gate *vap = rp->r_attr;
2107c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock);
2117c478bd9Sstevel@tonic-gate return (0);
2127c478bd9Sstevel@tonic-gate }
2137c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statev4_lock);
2147c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock);
2157c478bd9Sstevel@tonic-gate return (1);
2167c478bd9Sstevel@tonic-gate }
2177c478bd9Sstevel@tonic-gate
2187c478bd9Sstevel@tonic-gate
2197c478bd9Sstevel@tonic-gate /*
2207c478bd9Sstevel@tonic-gate * If returned error is ESTALE flush all caches. The nfs4_purge_caches()
2217c478bd9Sstevel@tonic-gate * call is synchronous because all the pages were invalidated by the
2227c478bd9Sstevel@tonic-gate * nfs4_invalidate_pages() call.
2237c478bd9Sstevel@tonic-gate */
2247c478bd9Sstevel@tonic-gate void
nfs4_purge_stale_fh(int errno,vnode_t * vp,cred_t * cr)2257c478bd9Sstevel@tonic-gate nfs4_purge_stale_fh(int errno, vnode_t *vp, cred_t *cr)
2267c478bd9Sstevel@tonic-gate {
2277c478bd9Sstevel@tonic-gate struct rnode4 *rp = VTOR4(vp);
2287c478bd9Sstevel@tonic-gate
2297c478bd9Sstevel@tonic-gate /* Ensure that the ..._end_op() call has been done */
2307c478bd9Sstevel@tonic-gate ASSERT(tsd_get(nfs4_tsd_key) == NULL);
2317c478bd9Sstevel@tonic-gate
2327c478bd9Sstevel@tonic-gate if (errno != ESTALE)
2337c478bd9Sstevel@tonic-gate return;
2347c478bd9Sstevel@tonic-gate
2357c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock);
2367c478bd9Sstevel@tonic-gate rp->r_flags |= R4STALE;
2377c478bd9Sstevel@tonic-gate if (!rp->r_error)
2387c478bd9Sstevel@tonic-gate rp->r_error = errno;
2397c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock);
2407c478bd9Sstevel@tonic-gate if (nfs4_has_pages(vp))
2417c478bd9Sstevel@tonic-gate nfs4_invalidate_pages(vp, (u_offset_t)0, cr);
2427c478bd9Sstevel@tonic-gate nfs4_purge_caches(vp, NFS4_PURGE_DNLC, cr, FALSE);
2437c478bd9Sstevel@tonic-gate }
2447c478bd9Sstevel@tonic-gate
2457c478bd9Sstevel@tonic-gate /*
2467c478bd9Sstevel@tonic-gate * Purge all of the various NFS `data' caches. If "asyncpg" is TRUE, the
2477c478bd9Sstevel@tonic-gate * page purge is done asynchronously.
2487c478bd9Sstevel@tonic-gate */
2497c478bd9Sstevel@tonic-gate void
nfs4_purge_caches(vnode_t * vp,int purge_dnlc,cred_t * cr,int asyncpg)2507c478bd9Sstevel@tonic-gate nfs4_purge_caches(vnode_t *vp, int purge_dnlc, cred_t *cr, int asyncpg)
2517c478bd9Sstevel@tonic-gate {
2527c478bd9Sstevel@tonic-gate rnode4_t *rp;
2537c478bd9Sstevel@tonic-gate char *contents;
2547c478bd9Sstevel@tonic-gate vnode_t *xattr;
2557c478bd9Sstevel@tonic-gate int size;
2567c478bd9Sstevel@tonic-gate int pgflush; /* are we the page flush thread? */
2577c478bd9Sstevel@tonic-gate
2587c478bd9Sstevel@tonic-gate /*
2597c478bd9Sstevel@tonic-gate * Purge the DNLC for any entries which refer to this file.
2607c478bd9Sstevel@tonic-gate */
2617c478bd9Sstevel@tonic-gate if (vp->v_count > 1 &&
2627c478bd9Sstevel@tonic-gate (vp->v_type == VDIR || purge_dnlc == NFS4_PURGE_DNLC))
2637c478bd9Sstevel@tonic-gate dnlc_purge_vp(vp);
2647c478bd9Sstevel@tonic-gate
2657c478bd9Sstevel@tonic-gate /*
2667c478bd9Sstevel@tonic-gate * Clear any readdir state bits and purge the readlink response cache.
2677c478bd9Sstevel@tonic-gate */
2687c478bd9Sstevel@tonic-gate rp = VTOR4(vp);
2697c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock);
2707c478bd9Sstevel@tonic-gate rp->r_flags &= ~R4LOOKUP;
2717c478bd9Sstevel@tonic-gate contents = rp->r_symlink.contents;
2727c478bd9Sstevel@tonic-gate size = rp->r_symlink.size;
2737c478bd9Sstevel@tonic-gate rp->r_symlink.contents = NULL;
2747c478bd9Sstevel@tonic-gate
2757c478bd9Sstevel@tonic-gate xattr = rp->r_xattr_dir;
2767c478bd9Sstevel@tonic-gate rp->r_xattr_dir = NULL;
2777c478bd9Sstevel@tonic-gate
2787c478bd9Sstevel@tonic-gate /*
2797c478bd9Sstevel@tonic-gate * Purge pathconf cache too.
2807c478bd9Sstevel@tonic-gate */
2817c478bd9Sstevel@tonic-gate rp->r_pathconf.pc4_xattr_valid = 0;
2827c478bd9Sstevel@tonic-gate rp->r_pathconf.pc4_cache_valid = 0;
2837c478bd9Sstevel@tonic-gate
2847c478bd9Sstevel@tonic-gate pgflush = (curthread == rp->r_pgflush);
2857c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock);
2867c478bd9Sstevel@tonic-gate
2877c478bd9Sstevel@tonic-gate if (contents != NULL) {
2887c478bd9Sstevel@tonic-gate
2897c478bd9Sstevel@tonic-gate kmem_free((void *)contents, size);
2907c478bd9Sstevel@tonic-gate }
2917c478bd9Sstevel@tonic-gate
2927c478bd9Sstevel@tonic-gate if (xattr != NULL)
2937c478bd9Sstevel@tonic-gate VN_RELE(xattr);
2947c478bd9Sstevel@tonic-gate
2957c478bd9Sstevel@tonic-gate /*
2967c478bd9Sstevel@tonic-gate * Flush the page cache. If the current thread is the page flush
2977c478bd9Sstevel@tonic-gate * thread, don't initiate a new page flush. There's no need for
2987c478bd9Sstevel@tonic-gate * it, and doing it correctly is hard.
2997c478bd9Sstevel@tonic-gate */
3007c478bd9Sstevel@tonic-gate if (nfs4_has_pages(vp) && !pgflush) {
3017c478bd9Sstevel@tonic-gate if (!asyncpg) {
3027c478bd9Sstevel@tonic-gate (void) nfs4_waitfor_purge_complete(vp);
303d55e25c3SPavel Filipensky nfs4_flush_pages(vp, cr);
3047c478bd9Sstevel@tonic-gate } else {
3057c478bd9Sstevel@tonic-gate pgflush_t *args;
3067c478bd9Sstevel@tonic-gate
3077c478bd9Sstevel@tonic-gate /*
3087c478bd9Sstevel@tonic-gate * We don't hold r_statelock while creating the
3097c478bd9Sstevel@tonic-gate * thread, in case the call blocks. So we use a
3107c478bd9Sstevel@tonic-gate * flag to indicate that a page flush thread is
3117c478bd9Sstevel@tonic-gate * active.
3127c478bd9Sstevel@tonic-gate */
3137c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock);
3147c478bd9Sstevel@tonic-gate if (rp->r_flags & R4PGFLUSH) {
3157c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock);
3167c478bd9Sstevel@tonic-gate } else {
3177c478bd9Sstevel@tonic-gate rp->r_flags |= R4PGFLUSH;
3187c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock);
3197c478bd9Sstevel@tonic-gate
3207c478bd9Sstevel@tonic-gate args = kmem_alloc(sizeof (pgflush_t),
321b9238976Sth KM_SLEEP);
3227c478bd9Sstevel@tonic-gate args->vp = vp;
3237c478bd9Sstevel@tonic-gate VN_HOLD(args->vp);
3247c478bd9Sstevel@tonic-gate args->cr = cr;
3257c478bd9Sstevel@tonic-gate crhold(args->cr);
3267c478bd9Sstevel@tonic-gate (void) zthread_create(NULL, 0,
327b9238976Sth nfs4_pgflush_thread, args, 0,
328b9238976Sth minclsyspri);
3297c478bd9Sstevel@tonic-gate }
3307c478bd9Sstevel@tonic-gate }
3317c478bd9Sstevel@tonic-gate }
3327c478bd9Sstevel@tonic-gate
3337c478bd9Sstevel@tonic-gate /*
3347c478bd9Sstevel@tonic-gate * Flush the readdir response cache.
3357c478bd9Sstevel@tonic-gate */
3367c478bd9Sstevel@tonic-gate nfs4_purge_rddir_cache(vp);
3377c478bd9Sstevel@tonic-gate }
3387c478bd9Sstevel@tonic-gate
3397c478bd9Sstevel@tonic-gate /*
3407c478bd9Sstevel@tonic-gate * Invalidate all pages for the given file, after writing back the dirty
3417c478bd9Sstevel@tonic-gate * ones.
3427c478bd9Sstevel@tonic-gate */
3437c478bd9Sstevel@tonic-gate
344d55e25c3SPavel Filipensky void
nfs4_flush_pages(vnode_t * vp,cred_t * cr)345d55e25c3SPavel Filipensky nfs4_flush_pages(vnode_t *vp, cred_t *cr)
3467c478bd9Sstevel@tonic-gate {
3477c478bd9Sstevel@tonic-gate int error;
3487c478bd9Sstevel@tonic-gate rnode4_t *rp = VTOR4(vp);
3497c478bd9Sstevel@tonic-gate
350da6c28aaSamw error = VOP_PUTPAGE(vp, (u_offset_t)0, 0, B_INVAL, cr, NULL);
3517c478bd9Sstevel@tonic-gate if (error == ENOSPC || error == EDQUOT) {
3527c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock);
3537c478bd9Sstevel@tonic-gate if (!rp->r_error)
3547c478bd9Sstevel@tonic-gate rp->r_error = error;
3557c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock);
3567c478bd9Sstevel@tonic-gate }
3577c478bd9Sstevel@tonic-gate }
3587c478bd9Sstevel@tonic-gate
3597c478bd9Sstevel@tonic-gate /*
3607c478bd9Sstevel@tonic-gate * Page flush thread.
3617c478bd9Sstevel@tonic-gate */
3627c478bd9Sstevel@tonic-gate
3637c478bd9Sstevel@tonic-gate static void
nfs4_pgflush_thread(pgflush_t * args)3647c478bd9Sstevel@tonic-gate nfs4_pgflush_thread(pgflush_t *args)
3657c478bd9Sstevel@tonic-gate {
3667c478bd9Sstevel@tonic-gate rnode4_t *rp = VTOR4(args->vp);
3677c478bd9Sstevel@tonic-gate
3687c478bd9Sstevel@tonic-gate /* remember which thread we are, so we don't deadlock ourselves */
3697c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock);
3707c478bd9Sstevel@tonic-gate ASSERT(rp->r_pgflush == NULL);
3717c478bd9Sstevel@tonic-gate rp->r_pgflush = curthread;
3727c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock);
3737c478bd9Sstevel@tonic-gate
374d55e25c3SPavel Filipensky nfs4_flush_pages(args->vp, args->cr);
3757c478bd9Sstevel@tonic-gate
3767c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock);
3777c478bd9Sstevel@tonic-gate rp->r_pgflush = NULL;
3787c478bd9Sstevel@tonic-gate rp->r_flags &= ~R4PGFLUSH;
3797c478bd9Sstevel@tonic-gate cv_broadcast(&rp->r_cv);
3807c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock);
3817c478bd9Sstevel@tonic-gate
3827c478bd9Sstevel@tonic-gate VN_RELE(args->vp);
3837c478bd9Sstevel@tonic-gate crfree(args->cr);
3847c478bd9Sstevel@tonic-gate kmem_free(args, sizeof (pgflush_t));
3857c478bd9Sstevel@tonic-gate zthread_exit();
3867c478bd9Sstevel@tonic-gate }
3877c478bd9Sstevel@tonic-gate
3887c478bd9Sstevel@tonic-gate /*
3897c478bd9Sstevel@tonic-gate * Purge the readdir cache of all entries which are not currently
3907c478bd9Sstevel@tonic-gate * being filled.
3917c478bd9Sstevel@tonic-gate */
3927c478bd9Sstevel@tonic-gate void
nfs4_purge_rddir_cache(vnode_t * vp)3937c478bd9Sstevel@tonic-gate nfs4_purge_rddir_cache(vnode_t *vp)
3947c478bd9Sstevel@tonic-gate {
3957c478bd9Sstevel@tonic-gate rnode4_t *rp;
3967c478bd9Sstevel@tonic-gate
3977c478bd9Sstevel@tonic-gate rp = VTOR4(vp);
3987c478bd9Sstevel@tonic-gate
3997c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock);
4007c478bd9Sstevel@tonic-gate rp->r_direof = NULL;
4017c478bd9Sstevel@tonic-gate rp->r_flags &= ~R4LOOKUP;
4027c478bd9Sstevel@tonic-gate rp->r_flags |= R4READDIRWATTR;
4037c478bd9Sstevel@tonic-gate rddir4_cache_purge(rp);
4047c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock);
4057c478bd9Sstevel@tonic-gate }
4067c478bd9Sstevel@tonic-gate
4077c478bd9Sstevel@tonic-gate /*
4087c478bd9Sstevel@tonic-gate * Set attributes cache for given vnode using virtual attributes. There is
4097c478bd9Sstevel@tonic-gate * no cache validation, but if the attributes are deemed to be stale, they
4107c478bd9Sstevel@tonic-gate * are ignored. This corresponds to nfs3_attrcache().
4117c478bd9Sstevel@tonic-gate *
4127c478bd9Sstevel@tonic-gate * Set the timeout value on the attribute cache and fill it
4137c478bd9Sstevel@tonic-gate * with the passed in attributes.
4147c478bd9Sstevel@tonic-gate */
4157c478bd9Sstevel@tonic-gate void
nfs4_attrcache_noinval(vnode_t * vp,nfs4_ga_res_t * garp,hrtime_t t)4167c478bd9Sstevel@tonic-gate nfs4_attrcache_noinval(vnode_t *vp, nfs4_ga_res_t *garp, hrtime_t t)
4177c478bd9Sstevel@tonic-gate {
4187c478bd9Sstevel@tonic-gate rnode4_t *rp = VTOR4(vp);
4197c478bd9Sstevel@tonic-gate
4207c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock);
4217c478bd9Sstevel@tonic-gate if (rp->r_time_attr_saved <= t)
4227c478bd9Sstevel@tonic-gate nfs4_attrcache_va(vp, garp, FALSE);
4237c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock);
4247c478bd9Sstevel@tonic-gate }
4257c478bd9Sstevel@tonic-gate
4267c478bd9Sstevel@tonic-gate /*
4277c478bd9Sstevel@tonic-gate * Use the passed in virtual attributes to check to see whether the
4287c478bd9Sstevel@tonic-gate * data and metadata caches are valid, cache the new attributes, and
4297c478bd9Sstevel@tonic-gate * then do the cache invalidation if required.
4307c478bd9Sstevel@tonic-gate *
4317c478bd9Sstevel@tonic-gate * The cache validation and caching of the new attributes is done
4327c478bd9Sstevel@tonic-gate * atomically via the use of the mutex, r_statelock. If required,
4337c478bd9Sstevel@tonic-gate * the cache invalidation is done atomically w.r.t. the cache
4347c478bd9Sstevel@tonic-gate * validation and caching of the attributes via the pseudo lock,
4357c478bd9Sstevel@tonic-gate * r_serial.
4367c478bd9Sstevel@tonic-gate *
4377c478bd9Sstevel@tonic-gate * This routine is used to do cache validation and attributes caching
4387c478bd9Sstevel@tonic-gate * for operations with a single set of post operation attributes.
4397c478bd9Sstevel@tonic-gate */
4407c478bd9Sstevel@tonic-gate
4417c478bd9Sstevel@tonic-gate void
nfs4_attr_cache(vnode_t * vp,nfs4_ga_res_t * garp,hrtime_t t,cred_t * cr,int async,change_info4 * cinfo)4427c478bd9Sstevel@tonic-gate nfs4_attr_cache(vnode_t *vp, nfs4_ga_res_t *garp,
443b9238976Sth hrtime_t t, cred_t *cr, int async,
444b9238976Sth change_info4 *cinfo)
4457c478bd9Sstevel@tonic-gate {
4467c478bd9Sstevel@tonic-gate rnode4_t *rp;
4475e4df02aSvv int mtime_changed = 0;
4485e4df02aSvv int ctime_changed = 0;
4497c478bd9Sstevel@tonic-gate vsecattr_t *vsp;
4507c478bd9Sstevel@tonic-gate int was_serial, set_time_cache_inval, recov;
4517c478bd9Sstevel@tonic-gate vattr_t *vap = &garp->n4g_va;
4527c478bd9Sstevel@tonic-gate mntinfo4_t *mi = VTOMI4(vp);
4535e4df02aSvv len_t preattr_rsize;
4545e4df02aSvv boolean_t writemodify_set = B_FALSE;
4555e4df02aSvv boolean_t cachepurge_set = B_FALSE;
4567c478bd9Sstevel@tonic-gate
4577c478bd9Sstevel@tonic-gate ASSERT(mi->mi_vfsp->vfs_dev == garp->n4g_va.va_fsid);
4587c478bd9Sstevel@tonic-gate
4597c478bd9Sstevel@tonic-gate /* Is curthread the recovery thread? */
4607c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock);
4617c478bd9Sstevel@tonic-gate recov = (VTOMI4(vp)->mi_recovthread == curthread);
4627c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock);
4637c478bd9Sstevel@tonic-gate
4647c478bd9Sstevel@tonic-gate rp = VTOR4(vp);
4657c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock);
4667c478bd9Sstevel@tonic-gate was_serial = (rp->r_serial == curthread);
467*6dc7d057SMarcel Telka if (rp->r_serial != NULL && !was_serial) {
4687c478bd9Sstevel@tonic-gate /*
469*6dc7d057SMarcel Telka * Purge current attrs and bail out to avoid potential deadlock
470*6dc7d057SMarcel Telka * between another thread caching attrs (r_serial thread), this
471*6dc7d057SMarcel Telka * thread, and a thread trying to read or write pages.
4727c478bd9Sstevel@tonic-gate */
473*6dc7d057SMarcel Telka PURGE_ATTRCACHE4_LOCKED(rp);
474*6dc7d057SMarcel Telka mutex_exit(&rp->r_statelock);
475*6dc7d057SMarcel Telka return;
4767c478bd9Sstevel@tonic-gate }
4777c478bd9Sstevel@tonic-gate
4787c478bd9Sstevel@tonic-gate /*
4797c478bd9Sstevel@tonic-gate * If there is a page flush thread, the current thread needs to
4807c478bd9Sstevel@tonic-gate * bail out, to prevent a possible deadlock between the current
4817c478bd9Sstevel@tonic-gate * thread (which might be in a start_op/end_op region), the
4827c478bd9Sstevel@tonic-gate * recovery thread, and the page flush thread. Expire the
4837c478bd9Sstevel@tonic-gate * attribute cache, so that any attributes the current thread was
4847c478bd9Sstevel@tonic-gate * going to set are not lost.
4857c478bd9Sstevel@tonic-gate */
4867c478bd9Sstevel@tonic-gate if ((rp->r_flags & R4PGFLUSH) && rp->r_pgflush != curthread) {
4877c478bd9Sstevel@tonic-gate PURGE_ATTRCACHE4_LOCKED(rp);
4887c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock);
4897c478bd9Sstevel@tonic-gate return;
4907c478bd9Sstevel@tonic-gate }
4917c478bd9Sstevel@tonic-gate
4927c478bd9Sstevel@tonic-gate if (rp->r_time_attr_saved > t) {
4937c478bd9Sstevel@tonic-gate /*
4947c478bd9Sstevel@tonic-gate * Attributes have been cached since these attributes were
49500fdf600Smaheshvs * probably made. If there is an inconsistency in what is
49600fdf600Smaheshvs * cached, mark them invalid. If not, don't act on them.
4977c478bd9Sstevel@tonic-gate */
49800fdf600Smaheshvs if (!CACHE4_VALID(rp, vap->va_mtime, vap->va_size))
49900fdf600Smaheshvs PURGE_ATTRCACHE4_LOCKED(rp);
5007c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock);
5017c478bd9Sstevel@tonic-gate return;
5027c478bd9Sstevel@tonic-gate }
5037c478bd9Sstevel@tonic-gate set_time_cache_inval = 0;
5047c478bd9Sstevel@tonic-gate if (cinfo) {
5057c478bd9Sstevel@tonic-gate /*
5067c478bd9Sstevel@tonic-gate * Only directory modifying callers pass non-NULL cinfo.
5077c478bd9Sstevel@tonic-gate */
5087c478bd9Sstevel@tonic-gate ASSERT(vp->v_type == VDIR);
5097c478bd9Sstevel@tonic-gate /*
5107c478bd9Sstevel@tonic-gate * If the cache timeout either doesn't exist or hasn't expired,
5117c478bd9Sstevel@tonic-gate * and dir didn't changed on server before dirmod op
5127c478bd9Sstevel@tonic-gate * and dir didn't change after dirmod op but before getattr
5137c478bd9Sstevel@tonic-gate * then there's a chance that the client's cached data for
5147c478bd9Sstevel@tonic-gate * this object is current (not stale). No immediate cache
5157c478bd9Sstevel@tonic-gate * flush is required.
5167c478bd9Sstevel@tonic-gate *
5177c478bd9Sstevel@tonic-gate */
5187c478bd9Sstevel@tonic-gate if ((! rp->r_time_cache_inval || t < rp->r_time_cache_inval) &&
5197c478bd9Sstevel@tonic-gate cinfo->before == rp->r_change &&
5207c478bd9Sstevel@tonic-gate (garp->n4g_change_valid &&
5217c478bd9Sstevel@tonic-gate cinfo->after == garp->n4g_change)) {
5227c478bd9Sstevel@tonic-gate
5237c478bd9Sstevel@tonic-gate /*
5247c478bd9Sstevel@tonic-gate * If atomic isn't set, then the before/after info
5257c478bd9Sstevel@tonic-gate * cannot be blindly trusted. For this case, we tell
5267c478bd9Sstevel@tonic-gate * nfs4_attrcache_va to cache the attrs but also
5277c478bd9Sstevel@tonic-gate * establish an absolute maximum cache timeout. When
5287c478bd9Sstevel@tonic-gate * the timeout is reached, caches will be flushed.
5297c478bd9Sstevel@tonic-gate */
5307c478bd9Sstevel@tonic-gate if (! cinfo->atomic)
5317c478bd9Sstevel@tonic-gate set_time_cache_inval = 1;
5327c478bd9Sstevel@tonic-gate } else {
5337c478bd9Sstevel@tonic-gate
5347c478bd9Sstevel@tonic-gate /*
5357c478bd9Sstevel@tonic-gate * We're not sure exactly what changed, but we know
5367c478bd9Sstevel@tonic-gate * what to do. flush all caches for dir. remove the
5377c478bd9Sstevel@tonic-gate * attr timeout.
5387c478bd9Sstevel@tonic-gate *
5397c478bd9Sstevel@tonic-gate * a) timeout expired. flush all caches.
5407c478bd9Sstevel@tonic-gate * b) r_change != cinfo.before. flush all caches.
5417c478bd9Sstevel@tonic-gate * c) r_change == cinfo.before, but cinfo.after !=
5427c478bd9Sstevel@tonic-gate * post-op getattr(change). flush all caches.
5437c478bd9Sstevel@tonic-gate * d) post-op getattr(change) not provided by server.
5447c478bd9Sstevel@tonic-gate * flush all caches.
5457c478bd9Sstevel@tonic-gate */
5467c478bd9Sstevel@tonic-gate mtime_changed = 1;
5477c478bd9Sstevel@tonic-gate ctime_changed = 1;
5487c478bd9Sstevel@tonic-gate rp->r_time_cache_inval = 0;
5497c478bd9Sstevel@tonic-gate }
5507c478bd9Sstevel@tonic-gate } else {
5515e4df02aSvv /*
5525e4df02aSvv * Write thread after writing data to file on remote server,
5535e4df02aSvv * will always set R4WRITEMODIFIED to indicate that file on
5545e4df02aSvv * remote server was modified with a WRITE operation and would
5555e4df02aSvv * have marked attribute cache as timed out. If R4WRITEMODIFIED
5565e4df02aSvv * is set, then do not check for mtime and ctime change.
5575e4df02aSvv */
5587c478bd9Sstevel@tonic-gate if (!(rp->r_flags & R4WRITEMODIFIED)) {
5597c478bd9Sstevel@tonic-gate if (!CACHE4_VALID(rp, vap->va_mtime, vap->va_size))
5607c478bd9Sstevel@tonic-gate mtime_changed = 1;
5615e4df02aSvv
5627c478bd9Sstevel@tonic-gate if (rp->r_attr.va_ctime.tv_sec !=
5637c478bd9Sstevel@tonic-gate vap->va_ctime.tv_sec ||
5647c478bd9Sstevel@tonic-gate rp->r_attr.va_ctime.tv_nsec !=
5657c478bd9Sstevel@tonic-gate vap->va_ctime.tv_nsec)
5667c478bd9Sstevel@tonic-gate ctime_changed = 1;
5674a695956SMarcel Telka
5684a695956SMarcel Telka /*
5694a695956SMarcel Telka * If the change attribute was not provided by server
5704a695956SMarcel Telka * or it differs, then flush all caches.
5714a695956SMarcel Telka */
5724a695956SMarcel Telka if (!garp->n4g_change_valid ||
5734a695956SMarcel Telka rp->r_change != garp->n4g_change) {
5744a695956SMarcel Telka mtime_changed = 1;
5754a695956SMarcel Telka ctime_changed = 1;
5764a695956SMarcel Telka }
5777c478bd9Sstevel@tonic-gate } else {
5785e4df02aSvv writemodify_set = B_TRUE;
5797c478bd9Sstevel@tonic-gate }
5807c478bd9Sstevel@tonic-gate }
5817c478bd9Sstevel@tonic-gate
5825e4df02aSvv preattr_rsize = rp->r_size;
5835e4df02aSvv
5847c478bd9Sstevel@tonic-gate nfs4_attrcache_va(vp, garp, set_time_cache_inval);
5857c478bd9Sstevel@tonic-gate
5865e4df02aSvv /*
5875e4df02aSvv * If we have updated filesize in nfs4_attrcache_va, as soon as we
5885e4df02aSvv * drop statelock we will be in transition of purging all
5895e4df02aSvv * our caches and updating them. It is possible for another
5905e4df02aSvv * thread to pick this new file size and read in zeroed data.
5915e4df02aSvv * stall other threads till cache purge is complete.
5925e4df02aSvv */
5935e4df02aSvv if ((!cinfo) && (rp->r_size != preattr_rsize)) {
5945e4df02aSvv /*
5955e4df02aSvv * If R4WRITEMODIFIED was set and we have updated the file
5965e4df02aSvv * size, Server's returned file size need not necessarily
5975e4df02aSvv * be because of this Client's WRITE. We need to purge
5985e4df02aSvv * all caches.
5995e4df02aSvv */
6005e4df02aSvv if (writemodify_set)
6015e4df02aSvv mtime_changed = 1;
6025e4df02aSvv
6035e4df02aSvv if (mtime_changed && !(rp->r_flags & R4INCACHEPURGE)) {
6045e4df02aSvv rp->r_flags |= R4INCACHEPURGE;
6055e4df02aSvv cachepurge_set = B_TRUE;
6065e4df02aSvv }
6075e4df02aSvv }
6085e4df02aSvv
6097c478bd9Sstevel@tonic-gate if (!mtime_changed && !ctime_changed) {
6107c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock);
6117c478bd9Sstevel@tonic-gate return;
6127c478bd9Sstevel@tonic-gate }
6137c478bd9Sstevel@tonic-gate
6147c478bd9Sstevel@tonic-gate rp->r_serial = curthread;
6157c478bd9Sstevel@tonic-gate
6167c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock);
6177c478bd9Sstevel@tonic-gate
6187c478bd9Sstevel@tonic-gate /*
6197c478bd9Sstevel@tonic-gate * If we're the recov thread, then force async nfs4_purge_caches
6207c478bd9Sstevel@tonic-gate * to avoid potential deadlock.
6217c478bd9Sstevel@tonic-gate */
6227c478bd9Sstevel@tonic-gate if (mtime_changed)
6237c478bd9Sstevel@tonic-gate nfs4_purge_caches(vp, NFS4_NOPURGE_DNLC, cr, recov ? 1 : async);
6247c478bd9Sstevel@tonic-gate
6255e4df02aSvv if ((rp->r_flags & R4INCACHEPURGE) && cachepurge_set) {
6265e4df02aSvv mutex_enter(&rp->r_statelock);
6275e4df02aSvv rp->r_flags &= ~R4INCACHEPURGE;
6285e4df02aSvv cv_broadcast(&rp->r_cv);
6295e4df02aSvv mutex_exit(&rp->r_statelock);
6305e4df02aSvv cachepurge_set = B_FALSE;
6315e4df02aSvv }
6325e4df02aSvv
6337c478bd9Sstevel@tonic-gate if (ctime_changed) {
6347c478bd9Sstevel@tonic-gate (void) nfs4_access_purge_rp(rp);
6357c478bd9Sstevel@tonic-gate if (rp->r_secattr != NULL) {
6367c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock);
6377c478bd9Sstevel@tonic-gate vsp = rp->r_secattr;
6387c478bd9Sstevel@tonic-gate rp->r_secattr = NULL;
6397c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock);
6407c478bd9Sstevel@tonic-gate if (vsp != NULL)
6417c478bd9Sstevel@tonic-gate nfs4_acl_free_cache(vsp);
6427c478bd9Sstevel@tonic-gate }
6437c478bd9Sstevel@tonic-gate }
6447c478bd9Sstevel@tonic-gate
6457c478bd9Sstevel@tonic-gate if (!was_serial) {
6467c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock);
6477c478bd9Sstevel@tonic-gate rp->r_serial = NULL;
6487c478bd9Sstevel@tonic-gate cv_broadcast(&rp->r_cv);
6497c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock);
6507c478bd9Sstevel@tonic-gate }
6517c478bd9Sstevel@tonic-gate }
6527c478bd9Sstevel@tonic-gate
6537c478bd9Sstevel@tonic-gate /*
6547c478bd9Sstevel@tonic-gate * Set attributes cache for given vnode using virtual attributes.
6557c478bd9Sstevel@tonic-gate *
6567c478bd9Sstevel@tonic-gate * Set the timeout value on the attribute cache and fill it
6577c478bd9Sstevel@tonic-gate * with the passed in attributes.
6587c478bd9Sstevel@tonic-gate *
6597c478bd9Sstevel@tonic-gate * The caller must be holding r_statelock.
6607c478bd9Sstevel@tonic-gate */
6617c478bd9Sstevel@tonic-gate static void
nfs4_attrcache_va(vnode_t * vp,nfs4_ga_res_t * garp,int set_cache_timeout)6627c478bd9Sstevel@tonic-gate nfs4_attrcache_va(vnode_t *vp, nfs4_ga_res_t *garp, int set_cache_timeout)
6637c478bd9Sstevel@tonic-gate {
6647c478bd9Sstevel@tonic-gate rnode4_t *rp;
6657c478bd9Sstevel@tonic-gate mntinfo4_t *mi;
6667c478bd9Sstevel@tonic-gate hrtime_t delta;
6677c478bd9Sstevel@tonic-gate hrtime_t now;
6687c478bd9Sstevel@tonic-gate vattr_t *vap = &garp->n4g_va;
6697c478bd9Sstevel@tonic-gate
6707c478bd9Sstevel@tonic-gate rp = VTOR4(vp);
6717c478bd9Sstevel@tonic-gate
6727c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&rp->r_statelock));
6737c478bd9Sstevel@tonic-gate ASSERT(vap->va_mask == AT_ALL);
6747c478bd9Sstevel@tonic-gate
6757c478bd9Sstevel@tonic-gate /* Switch to master before checking v_flag */
6767c478bd9Sstevel@tonic-gate if (IS_SHADOW(vp, rp))
6777c478bd9Sstevel@tonic-gate vp = RTOV4(rp);
6787c478bd9Sstevel@tonic-gate
6797c478bd9Sstevel@tonic-gate now = gethrtime();
6807c478bd9Sstevel@tonic-gate
6817c478bd9Sstevel@tonic-gate mi = VTOMI4(vp);
6827c478bd9Sstevel@tonic-gate
6837c478bd9Sstevel@tonic-gate /*
6847c478bd9Sstevel@tonic-gate * Only establish a new cache timeout (if requested). Never
6857c478bd9Sstevel@tonic-gate * extend a timeout. Never clear a timeout. Clearing a timeout
6867c478bd9Sstevel@tonic-gate * is done by nfs4_update_dircaches (ancestor in our call chain)
6877c478bd9Sstevel@tonic-gate */
6887c478bd9Sstevel@tonic-gate if (set_cache_timeout && ! rp->r_time_cache_inval)
6897c478bd9Sstevel@tonic-gate rp->r_time_cache_inval = now + mi->mi_acdirmax;
6907c478bd9Sstevel@tonic-gate
6917c478bd9Sstevel@tonic-gate /*
6927c478bd9Sstevel@tonic-gate * Delta is the number of nanoseconds that we will
6937c478bd9Sstevel@tonic-gate * cache the attributes of the file. It is based on
6947c478bd9Sstevel@tonic-gate * the number of nanoseconds since the last time that
6957c478bd9Sstevel@tonic-gate * we detected a change. The assumption is that files
6967c478bd9Sstevel@tonic-gate * that changed recently are likely to change again.
6977c478bd9Sstevel@tonic-gate * There is a minimum and a maximum for regular files
6987c478bd9Sstevel@tonic-gate * and for directories which is enforced though.
6997c478bd9Sstevel@tonic-gate *
7007c478bd9Sstevel@tonic-gate * Using the time since last change was detected
7017c478bd9Sstevel@tonic-gate * eliminates direct comparison or calculation
7027c478bd9Sstevel@tonic-gate * using mixed client and server times. NFS does
7037c478bd9Sstevel@tonic-gate * not make any assumptions regarding the client
7047c478bd9Sstevel@tonic-gate * and server clocks being synchronized.
7057c478bd9Sstevel@tonic-gate */
7067c478bd9Sstevel@tonic-gate if (vap->va_mtime.tv_sec != rp->r_attr.va_mtime.tv_sec ||
7077c478bd9Sstevel@tonic-gate vap->va_mtime.tv_nsec != rp->r_attr.va_mtime.tv_nsec ||
7087c478bd9Sstevel@tonic-gate vap->va_size != rp->r_attr.va_size) {
7097c478bd9Sstevel@tonic-gate rp->r_time_attr_saved = now;
7107c478bd9Sstevel@tonic-gate }
7117c478bd9Sstevel@tonic-gate
7127c478bd9Sstevel@tonic-gate if ((mi->mi_flags & MI4_NOAC) || (vp->v_flag & VNOCACHE))
7137c478bd9Sstevel@tonic-gate delta = 0;
7147c478bd9Sstevel@tonic-gate else {
7157c478bd9Sstevel@tonic-gate delta = now - rp->r_time_attr_saved;
7167c478bd9Sstevel@tonic-gate if (vp->v_type == VDIR) {
7177c478bd9Sstevel@tonic-gate if (delta < mi->mi_acdirmin)
7187c478bd9Sstevel@tonic-gate delta = mi->mi_acdirmin;
7197c478bd9Sstevel@tonic-gate else if (delta > mi->mi_acdirmax)
7207c478bd9Sstevel@tonic-gate delta = mi->mi_acdirmax;
7217c478bd9Sstevel@tonic-gate } else {
7227c478bd9Sstevel@tonic-gate if (delta < mi->mi_acregmin)
7237c478bd9Sstevel@tonic-gate delta = mi->mi_acregmin;
7247c478bd9Sstevel@tonic-gate else if (delta > mi->mi_acregmax)
7257c478bd9Sstevel@tonic-gate delta = mi->mi_acregmax;
7267c478bd9Sstevel@tonic-gate }
7277c478bd9Sstevel@tonic-gate }
7287c478bd9Sstevel@tonic-gate rp->r_time_attr_inval = now + delta;
7297c478bd9Sstevel@tonic-gate
7307c478bd9Sstevel@tonic-gate rp->r_attr = *vap;
7317c478bd9Sstevel@tonic-gate if (garp->n4g_change_valid)
7327c478bd9Sstevel@tonic-gate rp->r_change = garp->n4g_change;
7337c478bd9Sstevel@tonic-gate
7347c478bd9Sstevel@tonic-gate /*
7357c478bd9Sstevel@tonic-gate * The attributes that were returned may be valid and can
7367c478bd9Sstevel@tonic-gate * be used, but they may not be allowed to be cached.
7377c478bd9Sstevel@tonic-gate * Reset the timers to cause immediate invalidation and
7387c478bd9Sstevel@tonic-gate * clear r_change so no VERIFY operations will suceed
7397c478bd9Sstevel@tonic-gate */
7407c478bd9Sstevel@tonic-gate if (garp->n4g_attrwhy == NFS4_GETATTR_NOCACHE_OK) {
7417c478bd9Sstevel@tonic-gate rp->r_time_attr_inval = now;
7427c478bd9Sstevel@tonic-gate rp->r_time_attr_saved = now;
7437c478bd9Sstevel@tonic-gate rp->r_change = 0;
7447c478bd9Sstevel@tonic-gate }
7457c478bd9Sstevel@tonic-gate
7467c478bd9Sstevel@tonic-gate /*
7477c478bd9Sstevel@tonic-gate * If mounted_on_fileid returned AND the object is a stub,
7487c478bd9Sstevel@tonic-gate * then set object's va_nodeid to the mounted over fid
7497c478bd9Sstevel@tonic-gate * returned by server.
7507c478bd9Sstevel@tonic-gate *
7517c478bd9Sstevel@tonic-gate * If mounted_on_fileid not provided/supported, then
7527c478bd9Sstevel@tonic-gate * just set it to 0 for now. Eventually it would be
7537c478bd9Sstevel@tonic-gate * better to set it to a hashed version of FH. This
7547c478bd9Sstevel@tonic-gate * would probably be good enough to provide a unique
7557c478bd9Sstevel@tonic-gate * fid/d_ino within a dir.
7567c478bd9Sstevel@tonic-gate *
7577c478bd9Sstevel@tonic-gate * We don't need to carry mounted_on_fileid in the
7587c478bd9Sstevel@tonic-gate * rnode as long as the client never requests fileid
7597c478bd9Sstevel@tonic-gate * without also requesting mounted_on_fileid. For
7607c478bd9Sstevel@tonic-gate * now, it stays.
7617c478bd9Sstevel@tonic-gate */
7627c478bd9Sstevel@tonic-gate if (garp->n4g_mon_fid_valid) {
7637c478bd9Sstevel@tonic-gate rp->r_mntd_fid = garp->n4g_mon_fid;
7647c478bd9Sstevel@tonic-gate
765b9238976Sth if (RP_ISSTUB(rp))
7667c478bd9Sstevel@tonic-gate rp->r_attr.va_nodeid = rp->r_mntd_fid;
7677c478bd9Sstevel@tonic-gate }
7687c478bd9Sstevel@tonic-gate
7697c478bd9Sstevel@tonic-gate /*
7707c478bd9Sstevel@tonic-gate * Check to see if there are valid pathconf bits to
7717c478bd9Sstevel@tonic-gate * cache in the rnode.
7727c478bd9Sstevel@tonic-gate */
7737c478bd9Sstevel@tonic-gate if (garp->n4g_ext_res) {
7747c478bd9Sstevel@tonic-gate if (garp->n4g_ext_res->n4g_pc4.pc4_cache_valid) {
7757c478bd9Sstevel@tonic-gate rp->r_pathconf = garp->n4g_ext_res->n4g_pc4;
7767c478bd9Sstevel@tonic-gate } else {
7777c478bd9Sstevel@tonic-gate if (garp->n4g_ext_res->n4g_pc4.pc4_xattr_valid) {
7787c478bd9Sstevel@tonic-gate rp->r_pathconf.pc4_xattr_valid = TRUE;
7797c478bd9Sstevel@tonic-gate rp->r_pathconf.pc4_xattr_exists =
7807c478bd9Sstevel@tonic-gate garp->n4g_ext_res->n4g_pc4.pc4_xattr_exists;
7817c478bd9Sstevel@tonic-gate }
7827c478bd9Sstevel@tonic-gate }
7837c478bd9Sstevel@tonic-gate }
7847c478bd9Sstevel@tonic-gate /*
7857c478bd9Sstevel@tonic-gate * Update the size of the file if there is no cached data or if
7867c478bd9Sstevel@tonic-gate * the cached data is clean and there is no data being written
7877c478bd9Sstevel@tonic-gate * out.
7887c478bd9Sstevel@tonic-gate */
7897c478bd9Sstevel@tonic-gate if (rp->r_size != vap->va_size &&
7907c478bd9Sstevel@tonic-gate (!vn_has_cached_data(vp) ||
7917c478bd9Sstevel@tonic-gate (!(rp->r_flags & R4DIRTY) && rp->r_count == 0))) {
7927c478bd9Sstevel@tonic-gate rp->r_size = vap->va_size;
7937c478bd9Sstevel@tonic-gate }
7947c478bd9Sstevel@tonic-gate nfs_setswaplike(vp, vap);
7957c478bd9Sstevel@tonic-gate rp->r_flags &= ~R4WRITEMODIFIED;
7967c478bd9Sstevel@tonic-gate }
7977c478bd9Sstevel@tonic-gate
7987c478bd9Sstevel@tonic-gate /*
7997c478bd9Sstevel@tonic-gate * Get attributes over-the-wire and update attributes cache
8007c478bd9Sstevel@tonic-gate * if no error occurred in the over-the-wire operation.
8017c478bd9Sstevel@tonic-gate * Return 0 if successful, otherwise error.
8027c478bd9Sstevel@tonic-gate */
8037c478bd9Sstevel@tonic-gate int
nfs4_getattr_otw(vnode_t * vp,nfs4_ga_res_t * garp,cred_t * cr,int get_acl)8047c478bd9Sstevel@tonic-gate nfs4_getattr_otw(vnode_t *vp, nfs4_ga_res_t *garp, cred_t *cr, int get_acl)
8057c478bd9Sstevel@tonic-gate {
8067c478bd9Sstevel@tonic-gate mntinfo4_t *mi = VTOMI4(vp);
8077c478bd9Sstevel@tonic-gate hrtime_t t;
8087c478bd9Sstevel@tonic-gate nfs4_recov_state_t recov_state;
8097c478bd9Sstevel@tonic-gate nfs4_error_t e = { 0, NFS4_OK, RPC_SUCCESS };
8107c478bd9Sstevel@tonic-gate
8117c478bd9Sstevel@tonic-gate recov_state.rs_flags = 0;
8127c478bd9Sstevel@tonic-gate recov_state.rs_num_retry_despite_err = 0;
8137c478bd9Sstevel@tonic-gate
8147c478bd9Sstevel@tonic-gate /* Save the original mount point security flavor */
8157c478bd9Sstevel@tonic-gate (void) save_mnt_secinfo(mi->mi_curr_serv);
8167c478bd9Sstevel@tonic-gate
8177c478bd9Sstevel@tonic-gate recov_retry:
818b9238976Sth
8197c478bd9Sstevel@tonic-gate if ((e.error = nfs4_start_fop(mi, vp, NULL, OH_GETATTR,
820b9238976Sth &recov_state, NULL))) {
8217c478bd9Sstevel@tonic-gate (void) check_mnt_secinfo(mi->mi_curr_serv, vp);
8227c478bd9Sstevel@tonic-gate return (e.error);
8237c478bd9Sstevel@tonic-gate }
8247c478bd9Sstevel@tonic-gate
8257c478bd9Sstevel@tonic-gate t = gethrtime();
8267c478bd9Sstevel@tonic-gate
8277c478bd9Sstevel@tonic-gate nfs4_getattr_otw_norecovery(vp, garp, &e, cr, get_acl);
8287c478bd9Sstevel@tonic-gate
8297c478bd9Sstevel@tonic-gate if (nfs4_needs_recovery(&e, FALSE, vp->v_vfsp)) {
8307c478bd9Sstevel@tonic-gate if (nfs4_start_recovery(&e, VTOMI4(vp), vp, NULL, NULL,
8312f172c55SRobert Thurlow NULL, OP_GETATTR, NULL, NULL, NULL) == FALSE) {
8327c478bd9Sstevel@tonic-gate nfs4_end_fop(VTOMI4(vp), vp, NULL, OH_GETATTR,
833b9238976Sth &recov_state, 1);
8347c478bd9Sstevel@tonic-gate goto recov_retry;
8357c478bd9Sstevel@tonic-gate }
8367c478bd9Sstevel@tonic-gate }
8377c478bd9Sstevel@tonic-gate
8387c478bd9Sstevel@tonic-gate nfs4_end_fop(VTOMI4(vp), vp, NULL, OH_GETATTR, &recov_state, 0);
8397c478bd9Sstevel@tonic-gate
8407c478bd9Sstevel@tonic-gate if (!e.error) {
8417c478bd9Sstevel@tonic-gate if (e.stat == NFS4_OK) {
8427c478bd9Sstevel@tonic-gate nfs4_attr_cache(vp, garp, t, cr, FALSE, NULL);
8437c478bd9Sstevel@tonic-gate } else {
8447c478bd9Sstevel@tonic-gate e.error = geterrno4(e.stat);
8457c478bd9Sstevel@tonic-gate
8467c478bd9Sstevel@tonic-gate nfs4_purge_stale_fh(e.error, vp, cr);
8477c478bd9Sstevel@tonic-gate }
8487c478bd9Sstevel@tonic-gate }
8497c478bd9Sstevel@tonic-gate
8507c478bd9Sstevel@tonic-gate /*
8517c478bd9Sstevel@tonic-gate * If getattr a node that is a stub for a crossed
8527c478bd9Sstevel@tonic-gate * mount point, keep the original secinfo flavor for
8537c478bd9Sstevel@tonic-gate * the current file system, not the crossed one.
8547c478bd9Sstevel@tonic-gate */
8557c478bd9Sstevel@tonic-gate (void) check_mnt_secinfo(mi->mi_curr_serv, vp);
8567c478bd9Sstevel@tonic-gate
8577c478bd9Sstevel@tonic-gate return (e.error);
8587c478bd9Sstevel@tonic-gate }
8597c478bd9Sstevel@tonic-gate
8607c478bd9Sstevel@tonic-gate /*
8617c478bd9Sstevel@tonic-gate * Generate a compound to get attributes over-the-wire.
8627c478bd9Sstevel@tonic-gate */
8637c478bd9Sstevel@tonic-gate void
nfs4_getattr_otw_norecovery(vnode_t * vp,nfs4_ga_res_t * garp,nfs4_error_t * ep,cred_t * cr,int get_acl)8647c478bd9Sstevel@tonic-gate nfs4_getattr_otw_norecovery(vnode_t *vp, nfs4_ga_res_t *garp,
865b9238976Sth nfs4_error_t *ep, cred_t *cr, int get_acl)
8667c478bd9Sstevel@tonic-gate {
8677c478bd9Sstevel@tonic-gate COMPOUND4args_clnt args;
8687c478bd9Sstevel@tonic-gate COMPOUND4res_clnt res;
8697c478bd9Sstevel@tonic-gate int doqueue;
8707c478bd9Sstevel@tonic-gate rnode4_t *rp = VTOR4(vp);
8717c478bd9Sstevel@tonic-gate nfs_argop4 argop[2];
8727c478bd9Sstevel@tonic-gate
8737c478bd9Sstevel@tonic-gate args.ctag = TAG_GETATTR;
8747c478bd9Sstevel@tonic-gate
8757c478bd9Sstevel@tonic-gate args.array_len = 2;
8767c478bd9Sstevel@tonic-gate args.array = argop;
8777c478bd9Sstevel@tonic-gate
8787c478bd9Sstevel@tonic-gate /* putfh */
8797c478bd9Sstevel@tonic-gate argop[0].argop = OP_CPUTFH;
8807c478bd9Sstevel@tonic-gate argop[0].nfs_argop4_u.opcputfh.sfh = rp->r_fh;
8817c478bd9Sstevel@tonic-gate
8827c478bd9Sstevel@tonic-gate /* getattr */
8837c478bd9Sstevel@tonic-gate /*
8847c478bd9Sstevel@tonic-gate * Unlike nfs version 2 and 3, where getattr returns all the
885da6c28aaSamw * attributes, nfs version 4 returns only the ones explicitly
8867c478bd9Sstevel@tonic-gate * asked for. This creates problems, as some system functions
8877c478bd9Sstevel@tonic-gate * (e.g. cache check) require certain attributes and if the
8887c478bd9Sstevel@tonic-gate * cached node lacks some attributes such as uid/gid, it can
8897c478bd9Sstevel@tonic-gate * affect system utilities (e.g. "ls") that rely on the information
8907c478bd9Sstevel@tonic-gate * to be there. This can lead to anything from system crashes to
8917c478bd9Sstevel@tonic-gate * corrupted information processed by user apps.
8927c478bd9Sstevel@tonic-gate * So to ensure that all bases are covered, request at least
8937c478bd9Sstevel@tonic-gate * the AT_ALL attribute mask.
8947c478bd9Sstevel@tonic-gate */
8957c478bd9Sstevel@tonic-gate argop[1].argop = OP_GETATTR;
8967c478bd9Sstevel@tonic-gate argop[1].nfs_argop4_u.opgetattr.attr_request = NFS4_VATTR_MASK;
8977c478bd9Sstevel@tonic-gate if (get_acl)
8987c478bd9Sstevel@tonic-gate argop[1].nfs_argop4_u.opgetattr.attr_request |= FATTR4_ACL_MASK;
8997c478bd9Sstevel@tonic-gate argop[1].nfs_argop4_u.opgetattr.mi = VTOMI4(vp);
9007c478bd9Sstevel@tonic-gate
9017c478bd9Sstevel@tonic-gate doqueue = 1;
9027c478bd9Sstevel@tonic-gate
9037c478bd9Sstevel@tonic-gate rfs4call(VTOMI4(vp), &args, &res, cr, &doqueue, 0, ep);
9047c478bd9Sstevel@tonic-gate
9057c478bd9Sstevel@tonic-gate if (ep->error)
9067c478bd9Sstevel@tonic-gate return;
9077c478bd9Sstevel@tonic-gate
9087c478bd9Sstevel@tonic-gate if (res.status != NFS4_OK) {
909a17ce845SMarcel Telka xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res);
9107c478bd9Sstevel@tonic-gate return;
9117c478bd9Sstevel@tonic-gate }
9127c478bd9Sstevel@tonic-gate
9137c478bd9Sstevel@tonic-gate *garp = res.array[1].nfs_resop4_u.opgetattr.ga_res;
9147c478bd9Sstevel@tonic-gate
915a17ce845SMarcel Telka xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res);
9167c478bd9Sstevel@tonic-gate }
9177c478bd9Sstevel@tonic-gate
9187c478bd9Sstevel@tonic-gate /*
9197c478bd9Sstevel@tonic-gate * Return either cached or remote attributes. If get remote attr
9207c478bd9Sstevel@tonic-gate * use them to check and invalidate caches, then cache the new attributes.
9217c478bd9Sstevel@tonic-gate */
9227c478bd9Sstevel@tonic-gate int
nfs4getattr(vnode_t * vp,vattr_t * vap,cred_t * cr)9237c478bd9Sstevel@tonic-gate nfs4getattr(vnode_t *vp, vattr_t *vap, cred_t *cr)
9247c478bd9Sstevel@tonic-gate {
9257c478bd9Sstevel@tonic-gate int error;
9267c478bd9Sstevel@tonic-gate rnode4_t *rp;
9277c478bd9Sstevel@tonic-gate nfs4_ga_res_t gar;
9287c478bd9Sstevel@tonic-gate
9297c478bd9Sstevel@tonic-gate ASSERT(nfs4_consistent_type(vp));
9307c478bd9Sstevel@tonic-gate
9317c478bd9Sstevel@tonic-gate /*
9327c478bd9Sstevel@tonic-gate * If we've got cached attributes, we're done, otherwise go
9337c478bd9Sstevel@tonic-gate * to the server to get attributes, which will update the cache
934b9238976Sth * in the process. Either way, use the cached attributes for
935b9238976Sth * the caller's vattr_t.
936b9238976Sth *
937b9238976Sth * Note that we ignore the gar set by the OTW call: the attr caching
938b9238976Sth * code may make adjustments when storing to the rnode, and we want
939b9238976Sth * to see those changes here.
9407c478bd9Sstevel@tonic-gate */
9417c478bd9Sstevel@tonic-gate rp = VTOR4(vp);
942b9238976Sth error = 0;
9437c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock);
944b9238976Sth if (!ATTRCACHE4_VALID(vp)) {
9457c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock);
946b9238976Sth error = nfs4_getattr_otw(vp, &gar, cr, 0);
947b9238976Sth mutex_enter(&rp->r_statelock);
9487c478bd9Sstevel@tonic-gate }
9497c478bd9Sstevel@tonic-gate
9507c478bd9Sstevel@tonic-gate if (!error)
951b9238976Sth *vap = rp->r_attr;
9527c478bd9Sstevel@tonic-gate
9537c478bd9Sstevel@tonic-gate /* Return the client's view of file size */
9547c478bd9Sstevel@tonic-gate vap->va_size = rp->r_size;
955b9238976Sth
9567c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock);
9577c478bd9Sstevel@tonic-gate
9587c478bd9Sstevel@tonic-gate ASSERT(nfs4_consistent_type(vp));
9597c478bd9Sstevel@tonic-gate
9607c478bd9Sstevel@tonic-gate return (error);
9617c478bd9Sstevel@tonic-gate }
9627c478bd9Sstevel@tonic-gate
9637c478bd9Sstevel@tonic-gate int
nfs4_attr_otw(vnode_t * vp,nfs4_tag_type_t tag_type,nfs4_ga_res_t * garp,bitmap4 reqbitmap,cred_t * cr)9647c478bd9Sstevel@tonic-gate nfs4_attr_otw(vnode_t *vp, nfs4_tag_type_t tag_type,
965b9238976Sth nfs4_ga_res_t *garp, bitmap4 reqbitmap, cred_t *cr)
9667c478bd9Sstevel@tonic-gate {
9677c478bd9Sstevel@tonic-gate COMPOUND4args_clnt args;
9687c478bd9Sstevel@tonic-gate COMPOUND4res_clnt res;
9697c478bd9Sstevel@tonic-gate int doqueue;
9707c478bd9Sstevel@tonic-gate nfs_argop4 argop[2];
9717c478bd9Sstevel@tonic-gate mntinfo4_t *mi = VTOMI4(vp);
9727c478bd9Sstevel@tonic-gate bool_t needrecov = FALSE;
9737c478bd9Sstevel@tonic-gate nfs4_recov_state_t recov_state;
9747c478bd9Sstevel@tonic-gate nfs4_error_t e = { 0, NFS4_OK, RPC_SUCCESS };
9757c478bd9Sstevel@tonic-gate nfs4_ga_ext_res_t *gerp;
9767c478bd9Sstevel@tonic-gate
9777c478bd9Sstevel@tonic-gate recov_state.rs_flags = 0;
9787c478bd9Sstevel@tonic-gate recov_state.rs_num_retry_despite_err = 0;
9797c478bd9Sstevel@tonic-gate
9807c478bd9Sstevel@tonic-gate recov_retry:
9817c478bd9Sstevel@tonic-gate args.ctag = tag_type;
9827c478bd9Sstevel@tonic-gate
9837c478bd9Sstevel@tonic-gate args.array_len = 2;
9847c478bd9Sstevel@tonic-gate args.array = argop;
9857c478bd9Sstevel@tonic-gate
9867c478bd9Sstevel@tonic-gate e.error = nfs4_start_fop(mi, vp, NULL, OH_GETATTR, &recov_state, NULL);
9877c478bd9Sstevel@tonic-gate if (e.error)
9887c478bd9Sstevel@tonic-gate return (e.error);
9897c478bd9Sstevel@tonic-gate
9907c478bd9Sstevel@tonic-gate /* putfh */
9917c478bd9Sstevel@tonic-gate argop[0].argop = OP_CPUTFH;
9927c478bd9Sstevel@tonic-gate argop[0].nfs_argop4_u.opcputfh.sfh = VTOR4(vp)->r_fh;
9937c478bd9Sstevel@tonic-gate
9947c478bd9Sstevel@tonic-gate /* getattr */
9957c478bd9Sstevel@tonic-gate argop[1].argop = OP_GETATTR;
9967c478bd9Sstevel@tonic-gate argop[1].nfs_argop4_u.opgetattr.attr_request = reqbitmap;
9977c478bd9Sstevel@tonic-gate argop[1].nfs_argop4_u.opgetattr.mi = mi;
9987c478bd9Sstevel@tonic-gate
9997c478bd9Sstevel@tonic-gate doqueue = 1;
10007c478bd9Sstevel@tonic-gate
10017c478bd9Sstevel@tonic-gate NFS4_DEBUG(nfs4_client_call_debug, (CE_NOTE,
10027c478bd9Sstevel@tonic-gate "nfs4_attr_otw: %s call, rp %s", needrecov ? "recov" : "first",
10037c478bd9Sstevel@tonic-gate rnode4info(VTOR4(vp))));
10047c478bd9Sstevel@tonic-gate
10057c478bd9Sstevel@tonic-gate rfs4call(mi, &args, &res, cr, &doqueue, 0, &e);
10067c478bd9Sstevel@tonic-gate
10077c478bd9Sstevel@tonic-gate needrecov = nfs4_needs_recovery(&e, FALSE, vp->v_vfsp);
10087c478bd9Sstevel@tonic-gate if (!needrecov && e.error) {
10097c478bd9Sstevel@tonic-gate nfs4_end_fop(VTOMI4(vp), vp, NULL, OH_GETATTR, &recov_state,
1010b9238976Sth needrecov);
10117c478bd9Sstevel@tonic-gate return (e.error);
10127c478bd9Sstevel@tonic-gate }
10137c478bd9Sstevel@tonic-gate
10147c478bd9Sstevel@tonic-gate if (needrecov) {
10157c478bd9Sstevel@tonic-gate bool_t abort;
10167c478bd9Sstevel@tonic-gate
10177c478bd9Sstevel@tonic-gate NFS4_DEBUG(nfs4_client_recov_debug, (CE_NOTE,
10187c478bd9Sstevel@tonic-gate "nfs4_attr_otw: initiating recovery\n"));
10197c478bd9Sstevel@tonic-gate
10207c478bd9Sstevel@tonic-gate abort = nfs4_start_recovery(&e, VTOMI4(vp), vp, NULL, NULL,
10212f172c55SRobert Thurlow NULL, OP_GETATTR, NULL, NULL, NULL);
10227c478bd9Sstevel@tonic-gate nfs4_end_fop(VTOMI4(vp), vp, NULL, OH_GETATTR, &recov_state,
1023b9238976Sth needrecov);
10247c478bd9Sstevel@tonic-gate if (!e.error) {
1025a17ce845SMarcel Telka xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res);
10267c478bd9Sstevel@tonic-gate e.error = geterrno4(res.status);
10277c478bd9Sstevel@tonic-gate }
10287c478bd9Sstevel@tonic-gate if (abort == FALSE)
10297c478bd9Sstevel@tonic-gate goto recov_retry;
10307c478bd9Sstevel@tonic-gate return (e.error);
10317c478bd9Sstevel@tonic-gate }
10327c478bd9Sstevel@tonic-gate
10337c478bd9Sstevel@tonic-gate if (res.status) {
10347c478bd9Sstevel@tonic-gate e.error = geterrno4(res.status);
10357c478bd9Sstevel@tonic-gate } else {
10367c478bd9Sstevel@tonic-gate gerp = garp->n4g_ext_res;
10377c478bd9Sstevel@tonic-gate bcopy(&res.array[1].nfs_resop4_u.opgetattr.ga_res,
1038b9238976Sth garp, sizeof (nfs4_ga_res_t));
10397c478bd9Sstevel@tonic-gate garp->n4g_ext_res = gerp;
10407c478bd9Sstevel@tonic-gate if (garp->n4g_ext_res &&
10417c478bd9Sstevel@tonic-gate res.array[1].nfs_resop4_u.opgetattr.ga_res.n4g_ext_res)
10427c478bd9Sstevel@tonic-gate bcopy(res.array[1].nfs_resop4_u.opgetattr.
1043b9238976Sth ga_res.n4g_ext_res,
1044b9238976Sth garp->n4g_ext_res, sizeof (nfs4_ga_ext_res_t));
10457c478bd9Sstevel@tonic-gate }
1046a17ce845SMarcel Telka xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res);
10477c478bd9Sstevel@tonic-gate nfs4_end_fop(VTOMI4(vp), vp, NULL, OH_GETATTR, &recov_state,
1048b9238976Sth needrecov);
10497c478bd9Sstevel@tonic-gate return (e.error);
10507c478bd9Sstevel@tonic-gate }
10517c478bd9Sstevel@tonic-gate
10527c478bd9Sstevel@tonic-gate /*
10537c478bd9Sstevel@tonic-gate * Asynchronous I/O parameters. nfs_async_threads is the high-water mark
10547c478bd9Sstevel@tonic-gate * for the demand-based allocation of async threads per-mount. The
10557c478bd9Sstevel@tonic-gate * nfs_async_timeout is the amount of time a thread will live after it
10567c478bd9Sstevel@tonic-gate * becomes idle, unless new I/O requests are received before the thread
10577c478bd9Sstevel@tonic-gate * dies. See nfs4_async_putpage and nfs4_async_start.
10587c478bd9Sstevel@tonic-gate */
10597c478bd9Sstevel@tonic-gate
10607c478bd9Sstevel@tonic-gate static void nfs4_async_start(struct vfs *);
10610776f5e6SVallish Vaidyeshwara static void nfs4_async_pgops_start(struct vfs *);
10620776f5e6SVallish Vaidyeshwara static void nfs4_async_common_start(struct vfs *, int);
10637c478bd9Sstevel@tonic-gate
10647c478bd9Sstevel@tonic-gate static void
free_async_args4(struct nfs4_async_reqs * args)10657c478bd9Sstevel@tonic-gate free_async_args4(struct nfs4_async_reqs *args)
10667c478bd9Sstevel@tonic-gate {
10677c478bd9Sstevel@tonic-gate rnode4_t *rp;
10687c478bd9Sstevel@tonic-gate
10697c478bd9Sstevel@tonic-gate if (args->a_io != NFS4_INACTIVE) {
10707c478bd9Sstevel@tonic-gate rp = VTOR4(args->a_vp);
10717c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock);
10727c478bd9Sstevel@tonic-gate rp->r_count--;
10737c478bd9Sstevel@tonic-gate if (args->a_io == NFS4_PUTAPAGE ||
10747c478bd9Sstevel@tonic-gate args->a_io == NFS4_PAGEIO)
10757c478bd9Sstevel@tonic-gate rp->r_awcount--;
10767c478bd9Sstevel@tonic-gate cv_broadcast(&rp->r_cv);
10777c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock);
10787c478bd9Sstevel@tonic-gate VN_RELE(args->a_vp);
10797c478bd9Sstevel@tonic-gate }
10807c478bd9Sstevel@tonic-gate crfree(args->a_cred);
10817c478bd9Sstevel@tonic-gate kmem_free(args, sizeof (*args));
10827c478bd9Sstevel@tonic-gate }
10837c478bd9Sstevel@tonic-gate
10847c478bd9Sstevel@tonic-gate /*
10857c478bd9Sstevel@tonic-gate * Cross-zone thread creation and NFS access is disallowed, yet fsflush() and
10867c478bd9Sstevel@tonic-gate * pageout(), running in the global zone, have legitimate reasons to do
10877c478bd9Sstevel@tonic-gate * VOP_PUTPAGE(B_ASYNC) on other zones' NFS mounts. We avoid the problem by
10887c478bd9Sstevel@tonic-gate * use of a a per-mount "asynchronous requests manager thread" which is
10897c478bd9Sstevel@tonic-gate * signaled by the various asynchronous work routines when there is
10907c478bd9Sstevel@tonic-gate * asynchronous work to be done. It is responsible for creating new
10917c478bd9Sstevel@tonic-gate * worker threads if necessary, and notifying existing worker threads
10927c478bd9Sstevel@tonic-gate * that there is work to be done.
10937c478bd9Sstevel@tonic-gate *
10947c478bd9Sstevel@tonic-gate * In other words, it will "take the specifications from the customers and
10957c478bd9Sstevel@tonic-gate * give them to the engineers."
10967c478bd9Sstevel@tonic-gate *
10977c478bd9Sstevel@tonic-gate * Worker threads die off of their own accord if they are no longer
10987c478bd9Sstevel@tonic-gate * needed.
10997c478bd9Sstevel@tonic-gate *
11007c478bd9Sstevel@tonic-gate * This thread is killed when the zone is going away or the filesystem
11017c478bd9Sstevel@tonic-gate * is being unmounted.
11027c478bd9Sstevel@tonic-gate */
11037c478bd9Sstevel@tonic-gate void
nfs4_async_manager(vfs_t * vfsp)11047c478bd9Sstevel@tonic-gate nfs4_async_manager(vfs_t *vfsp)
11057c478bd9Sstevel@tonic-gate {
11067c478bd9Sstevel@tonic-gate callb_cpr_t cprinfo;
11077c478bd9Sstevel@tonic-gate mntinfo4_t *mi;
11087c478bd9Sstevel@tonic-gate uint_t max_threads;
11097c478bd9Sstevel@tonic-gate
11107c478bd9Sstevel@tonic-gate mi = VFTOMI4(vfsp);
11117c478bd9Sstevel@tonic-gate
11127c478bd9Sstevel@tonic-gate CALLB_CPR_INIT(&cprinfo, &mi->mi_async_lock, callb_generic_cpr,
1113b9238976Sth "nfs4_async_manager");
11147c478bd9Sstevel@tonic-gate
11157c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_async_lock);
11167c478bd9Sstevel@tonic-gate /*
11177c478bd9Sstevel@tonic-gate * We want to stash the max number of threads that this mount was
11187c478bd9Sstevel@tonic-gate * allowed so we can use it later when the variable is set to zero as
11197c478bd9Sstevel@tonic-gate * part of the zone/mount going away.
11207c478bd9Sstevel@tonic-gate *
11217c478bd9Sstevel@tonic-gate * We want to be able to create at least one thread to handle
1122388e50fcSMarcel Telka * asynchronous inactive calls.
11237c478bd9Sstevel@tonic-gate */
11247c478bd9Sstevel@tonic-gate max_threads = MAX(mi->mi_max_threads, 1);
11257c478bd9Sstevel@tonic-gate /*
11267c478bd9Sstevel@tonic-gate * We don't want to wait for mi_max_threads to go to zero, since that
11277c478bd9Sstevel@tonic-gate * happens as part of a failed unmount, but this thread should only
11287c478bd9Sstevel@tonic-gate * exit when the mount is really going away.
11297c478bd9Sstevel@tonic-gate *
11307c478bd9Sstevel@tonic-gate * Once MI4_ASYNC_MGR_STOP is set, no more async operations will be
11317c478bd9Sstevel@tonic-gate * attempted: the various _async_*() functions know to do things
11327c478bd9Sstevel@tonic-gate * inline if mi_max_threads == 0. Henceforth we just drain out the
11337c478bd9Sstevel@tonic-gate * outstanding requests.
11347c478bd9Sstevel@tonic-gate *
11357c478bd9Sstevel@tonic-gate * Note that we still create zthreads even if we notice the zone is
11367c478bd9Sstevel@tonic-gate * shutting down (MI4_ASYNC_MGR_STOP is set); this may cause the zone
11377c478bd9Sstevel@tonic-gate * shutdown sequence to take slightly longer in some cases, but
11387c478bd9Sstevel@tonic-gate * doesn't violate the protocol, as all threads will exit as soon as
11397c478bd9Sstevel@tonic-gate * they're done processing the remaining requests.
11407c478bd9Sstevel@tonic-gate */
1141388e50fcSMarcel Telka for (;;) {
11427c478bd9Sstevel@tonic-gate while (mi->mi_async_req_count > 0) {
11437c478bd9Sstevel@tonic-gate /*
11447c478bd9Sstevel@tonic-gate * Paranoia: If the mount started out having
11457c478bd9Sstevel@tonic-gate * (mi->mi_max_threads == 0), and the value was
11467c478bd9Sstevel@tonic-gate * later changed (via a debugger or somesuch),
11477c478bd9Sstevel@tonic-gate * we could be confused since we will think we
11487c478bd9Sstevel@tonic-gate * can't create any threads, and the calling
11497c478bd9Sstevel@tonic-gate * code (which looks at the current value of
11507c478bd9Sstevel@tonic-gate * mi->mi_max_threads, now non-zero) thinks we
11517c478bd9Sstevel@tonic-gate * can.
11527c478bd9Sstevel@tonic-gate *
11537c478bd9Sstevel@tonic-gate * So, because we're paranoid, we create threads
11547c478bd9Sstevel@tonic-gate * up to the maximum of the original and the
11557c478bd9Sstevel@tonic-gate * current value. This means that future
11567c478bd9Sstevel@tonic-gate * (debugger-induced) alterations of
11577c478bd9Sstevel@tonic-gate * mi->mi_max_threads are ignored for our
11587c478bd9Sstevel@tonic-gate * purposes, but who told them they could change
11597c478bd9Sstevel@tonic-gate * random values on a live kernel anyhow?
11607c478bd9Sstevel@tonic-gate */
11610776f5e6SVallish Vaidyeshwara if (mi->mi_threads[NFS4_ASYNC_QUEUE] <
11627c478bd9Sstevel@tonic-gate MAX(mi->mi_max_threads, max_threads)) {
11630776f5e6SVallish Vaidyeshwara mi->mi_threads[NFS4_ASYNC_QUEUE]++;
11647c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_async_lock);
116550a83466Sjwahlig MI4_HOLD(mi);
11667c478bd9Sstevel@tonic-gate VFS_HOLD(vfsp); /* hold for new thread */
11677c478bd9Sstevel@tonic-gate (void) zthread_create(NULL, 0, nfs4_async_start,
11687c478bd9Sstevel@tonic-gate vfsp, 0, minclsyspri);
11697c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_async_lock);
11700776f5e6SVallish Vaidyeshwara } else if (mi->mi_threads[NFS4_ASYNC_PGOPS_QUEUE] <
11710776f5e6SVallish Vaidyeshwara NUM_ASYNC_PGOPS_THREADS) {
11720776f5e6SVallish Vaidyeshwara mi->mi_threads[NFS4_ASYNC_PGOPS_QUEUE]++;
11730776f5e6SVallish Vaidyeshwara mutex_exit(&mi->mi_async_lock);
11740776f5e6SVallish Vaidyeshwara MI4_HOLD(mi);
11750776f5e6SVallish Vaidyeshwara VFS_HOLD(vfsp); /* hold for new thread */
11760776f5e6SVallish Vaidyeshwara (void) zthread_create(NULL, 0,
11770776f5e6SVallish Vaidyeshwara nfs4_async_pgops_start, vfsp, 0,
11780776f5e6SVallish Vaidyeshwara minclsyspri);
11790776f5e6SVallish Vaidyeshwara mutex_enter(&mi->mi_async_lock);
11807c478bd9Sstevel@tonic-gate }
11810776f5e6SVallish Vaidyeshwara NFS4_WAKE_ASYNC_WORKER(mi->mi_async_work_cv);
11827c478bd9Sstevel@tonic-gate ASSERT(mi->mi_async_req_count != 0);
11837c478bd9Sstevel@tonic-gate mi->mi_async_req_count--;
11847c478bd9Sstevel@tonic-gate }
1185388e50fcSMarcel Telka
11867c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock);
1187388e50fcSMarcel Telka if (mi->mi_flags & MI4_ASYNC_MGR_STOP) {
1188388e50fcSMarcel Telka mutex_exit(&mi->mi_lock);
1189388e50fcSMarcel Telka break;
1190388e50fcSMarcel Telka }
1191388e50fcSMarcel Telka mutex_exit(&mi->mi_lock);
1192388e50fcSMarcel Telka
1193388e50fcSMarcel Telka CALLB_CPR_SAFE_BEGIN(&cprinfo);
1194388e50fcSMarcel Telka cv_wait(&mi->mi_async_reqs_cv, &mi->mi_async_lock);
1195388e50fcSMarcel Telka CALLB_CPR_SAFE_END(&cprinfo, &mi->mi_async_lock);
11967c478bd9Sstevel@tonic-gate }
11977c478bd9Sstevel@tonic-gate
11987c478bd9Sstevel@tonic-gate NFS4_DEBUG(nfs4_client_zone_debug, (CE_NOTE,
11997c478bd9Sstevel@tonic-gate "nfs4_async_manager exiting for vfs %p\n", (void *)mi->mi_vfsp));
12007c478bd9Sstevel@tonic-gate /*
12017c478bd9Sstevel@tonic-gate * Let everyone know we're done.
12027c478bd9Sstevel@tonic-gate */
12037c478bd9Sstevel@tonic-gate mi->mi_manager_thread = NULL;
12047c478bd9Sstevel@tonic-gate /*
12057c478bd9Sstevel@tonic-gate * Wake up the inactive thread.
12067c478bd9Sstevel@tonic-gate */
12077c478bd9Sstevel@tonic-gate cv_broadcast(&mi->mi_inact_req_cv);
12087c478bd9Sstevel@tonic-gate /*
12097c478bd9Sstevel@tonic-gate * Wake up anyone sitting in nfs4_async_manager_stop()
12107c478bd9Sstevel@tonic-gate */
12117c478bd9Sstevel@tonic-gate cv_broadcast(&mi->mi_async_cv);
12127c478bd9Sstevel@tonic-gate /*
12137c478bd9Sstevel@tonic-gate * There is no explicit call to mutex_exit(&mi->mi_async_lock)
12147c478bd9Sstevel@tonic-gate * since CALLB_CPR_EXIT is actually responsible for releasing
12157c478bd9Sstevel@tonic-gate * 'mi_async_lock'.
12167c478bd9Sstevel@tonic-gate */
12177c478bd9Sstevel@tonic-gate CALLB_CPR_EXIT(&cprinfo);
12187c478bd9Sstevel@tonic-gate VFS_RELE(vfsp); /* release thread's hold */
121950a83466Sjwahlig MI4_RELE(mi);
12207c478bd9Sstevel@tonic-gate zthread_exit();
12217c478bd9Sstevel@tonic-gate }
12227c478bd9Sstevel@tonic-gate
12237c478bd9Sstevel@tonic-gate /*
12247c478bd9Sstevel@tonic-gate * Signal (and wait for) the async manager thread to clean up and go away.
12257c478bd9Sstevel@tonic-gate */
12267c478bd9Sstevel@tonic-gate void
nfs4_async_manager_stop(vfs_t * vfsp)12277c478bd9Sstevel@tonic-gate nfs4_async_manager_stop(vfs_t *vfsp)
12287c478bd9Sstevel@tonic-gate {
12297c478bd9Sstevel@tonic-gate mntinfo4_t *mi = VFTOMI4(vfsp);
12307c478bd9Sstevel@tonic-gate
12317c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_async_lock);
12327c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock);
12337c478bd9Sstevel@tonic-gate mi->mi_flags |= MI4_ASYNC_MGR_STOP;
12347c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock);
12357c478bd9Sstevel@tonic-gate cv_broadcast(&mi->mi_async_reqs_cv);
12367c478bd9Sstevel@tonic-gate /*
12377c478bd9Sstevel@tonic-gate * Wait for the async manager thread to die.
12387c478bd9Sstevel@tonic-gate */
12397c478bd9Sstevel@tonic-gate while (mi->mi_manager_thread != NULL)
12407c478bd9Sstevel@tonic-gate cv_wait(&mi->mi_async_cv, &mi->mi_async_lock);
12417c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_async_lock);
12427c478bd9Sstevel@tonic-gate }
12437c478bd9Sstevel@tonic-gate
12447c478bd9Sstevel@tonic-gate int
nfs4_async_readahead(vnode_t * vp,u_offset_t blkoff,caddr_t addr,struct seg * seg,cred_t * cr,void (* readahead)(vnode_t *,u_offset_t,caddr_t,struct seg *,cred_t *))12457c478bd9Sstevel@tonic-gate nfs4_async_readahead(vnode_t *vp, u_offset_t blkoff, caddr_t addr,
1246b9238976Sth struct seg *seg, cred_t *cr, void (*readahead)(vnode_t *,
1247b9238976Sth u_offset_t, caddr_t, struct seg *, cred_t *))
12487c478bd9Sstevel@tonic-gate {
12497c478bd9Sstevel@tonic-gate rnode4_t *rp;
12507c478bd9Sstevel@tonic-gate mntinfo4_t *mi;
12517c478bd9Sstevel@tonic-gate struct nfs4_async_reqs *args;
12527c478bd9Sstevel@tonic-gate
12537c478bd9Sstevel@tonic-gate rp = VTOR4(vp);
12547c478bd9Sstevel@tonic-gate ASSERT(rp->r_freef == NULL);
12557c478bd9Sstevel@tonic-gate
12567c478bd9Sstevel@tonic-gate mi = VTOMI4(vp);
12577c478bd9Sstevel@tonic-gate
12587c478bd9Sstevel@tonic-gate /*
12597c478bd9Sstevel@tonic-gate * If addr falls in a different segment, don't bother doing readahead.
12607c478bd9Sstevel@tonic-gate */
12617c478bd9Sstevel@tonic-gate if (addr >= seg->s_base + seg->s_size)
12627c478bd9Sstevel@tonic-gate return (-1);
12637c478bd9Sstevel@tonic-gate
12647c478bd9Sstevel@tonic-gate /*
12657c478bd9Sstevel@tonic-gate * If we can't allocate a request structure, punt on the readahead.
12667c478bd9Sstevel@tonic-gate */
12677c478bd9Sstevel@tonic-gate if ((args = kmem_alloc(sizeof (*args), KM_NOSLEEP)) == NULL)
12687c478bd9Sstevel@tonic-gate return (-1);
12697c478bd9Sstevel@tonic-gate
12707c478bd9Sstevel@tonic-gate /*
12717c478bd9Sstevel@tonic-gate * If a lock operation is pending, don't initiate any new
12727c478bd9Sstevel@tonic-gate * readaheads. Otherwise, bump r_count to indicate the new
12737c478bd9Sstevel@tonic-gate * asynchronous I/O.
12747c478bd9Sstevel@tonic-gate */
12757c478bd9Sstevel@tonic-gate if (!nfs_rw_tryenter(&rp->r_lkserlock, RW_READER)) {
12767c478bd9Sstevel@tonic-gate kmem_free(args, sizeof (*args));
12777c478bd9Sstevel@tonic-gate return (-1);
12787c478bd9Sstevel@tonic-gate }
12797c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock);
12807c478bd9Sstevel@tonic-gate rp->r_count++;
12817c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock);
12827c478bd9Sstevel@tonic-gate nfs_rw_exit(&rp->r_lkserlock);
12837c478bd9Sstevel@tonic-gate
12847c478bd9Sstevel@tonic-gate args->a_next = NULL;
12857c478bd9Sstevel@tonic-gate #ifdef DEBUG
12867c478bd9Sstevel@tonic-gate args->a_queuer = curthread;
12877c478bd9Sstevel@tonic-gate #endif
12887c478bd9Sstevel@tonic-gate VN_HOLD(vp);
12897c478bd9Sstevel@tonic-gate args->a_vp = vp;
12907c478bd9Sstevel@tonic-gate ASSERT(cr != NULL);
12917c478bd9Sstevel@tonic-gate crhold(cr);
12927c478bd9Sstevel@tonic-gate args->a_cred = cr;
12937c478bd9Sstevel@tonic-gate args->a_io = NFS4_READ_AHEAD;
12947c478bd9Sstevel@tonic-gate args->a_nfs4_readahead = readahead;
12957c478bd9Sstevel@tonic-gate args->a_nfs4_blkoff = blkoff;
12967c478bd9Sstevel@tonic-gate args->a_nfs4_seg = seg;
12977c478bd9Sstevel@tonic-gate args->a_nfs4_addr = addr;
12987c478bd9Sstevel@tonic-gate
12997c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_async_lock);
13007c478bd9Sstevel@tonic-gate
13017c478bd9Sstevel@tonic-gate /*
13027c478bd9Sstevel@tonic-gate * If asyncio has been disabled, don't bother readahead.
13037c478bd9Sstevel@tonic-gate */
13047c478bd9Sstevel@tonic-gate if (mi->mi_max_threads == 0) {
13057c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_async_lock);
13067c478bd9Sstevel@tonic-gate goto noasync;
13077c478bd9Sstevel@tonic-gate }
13087c478bd9Sstevel@tonic-gate
13097c478bd9Sstevel@tonic-gate /*
13107c478bd9Sstevel@tonic-gate * Link request structure into the async list and
13117c478bd9Sstevel@tonic-gate * wakeup async thread to do the i/o.
13127c478bd9Sstevel@tonic-gate */
13137c478bd9Sstevel@tonic-gate if (mi->mi_async_reqs[NFS4_READ_AHEAD] == NULL) {
13147c478bd9Sstevel@tonic-gate mi->mi_async_reqs[NFS4_READ_AHEAD] = args;
13157c478bd9Sstevel@tonic-gate mi->mi_async_tail[NFS4_READ_AHEAD] = args;
13167c478bd9Sstevel@tonic-gate } else {
13177c478bd9Sstevel@tonic-gate mi->mi_async_tail[NFS4_READ_AHEAD]->a_next = args;
13187c478bd9Sstevel@tonic-gate mi->mi_async_tail[NFS4_READ_AHEAD] = args;
13197c478bd9Sstevel@tonic-gate }
13207c478bd9Sstevel@tonic-gate
13217c478bd9Sstevel@tonic-gate if (mi->mi_io_kstats) {
13227c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock);
13237c478bd9Sstevel@tonic-gate kstat_waitq_enter(KSTAT_IO_PTR(mi->mi_io_kstats));
13247c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock);
13257c478bd9Sstevel@tonic-gate }
13267c478bd9Sstevel@tonic-gate
13277c478bd9Sstevel@tonic-gate mi->mi_async_req_count++;
13287c478bd9Sstevel@tonic-gate ASSERT(mi->mi_async_req_count != 0);
13297c478bd9Sstevel@tonic-gate cv_signal(&mi->mi_async_reqs_cv);
13307c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_async_lock);
13317c478bd9Sstevel@tonic-gate return (0);
13327c478bd9Sstevel@tonic-gate
13337c478bd9Sstevel@tonic-gate noasync:
13347c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock);
13357c478bd9Sstevel@tonic-gate rp->r_count--;
13367c478bd9Sstevel@tonic-gate cv_broadcast(&rp->r_cv);
13377c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock);
13387c478bd9Sstevel@tonic-gate VN_RELE(vp);
13397c478bd9Sstevel@tonic-gate crfree(cr);
13407c478bd9Sstevel@tonic-gate kmem_free(args, sizeof (*args));
13417c478bd9Sstevel@tonic-gate return (-1);
13427c478bd9Sstevel@tonic-gate }
13437c478bd9Sstevel@tonic-gate
13440776f5e6SVallish Vaidyeshwara static void
nfs4_async_start(struct vfs * vfsp)13450776f5e6SVallish Vaidyeshwara nfs4_async_start(struct vfs *vfsp)
13460776f5e6SVallish Vaidyeshwara {
13470776f5e6SVallish Vaidyeshwara nfs4_async_common_start(vfsp, NFS4_ASYNC_QUEUE);
13480776f5e6SVallish Vaidyeshwara }
13490776f5e6SVallish Vaidyeshwara
13500776f5e6SVallish Vaidyeshwara static void
nfs4_async_pgops_start(struct vfs * vfsp)13510776f5e6SVallish Vaidyeshwara nfs4_async_pgops_start(struct vfs *vfsp)
13520776f5e6SVallish Vaidyeshwara {
13530776f5e6SVallish Vaidyeshwara nfs4_async_common_start(vfsp, NFS4_ASYNC_PGOPS_QUEUE);
13540776f5e6SVallish Vaidyeshwara }
13550776f5e6SVallish Vaidyeshwara
13567c478bd9Sstevel@tonic-gate /*
13577c478bd9Sstevel@tonic-gate * The async queues for each mounted file system are arranged as a
13587c478bd9Sstevel@tonic-gate * set of queues, one for each async i/o type. Requests are taken
13597c478bd9Sstevel@tonic-gate * from the queues in a round-robin fashion. A number of consecutive
13607c478bd9Sstevel@tonic-gate * requests are taken from each queue before moving on to the next
13617c478bd9Sstevel@tonic-gate * queue. This functionality may allow the NFS Version 2 server to do
13627c478bd9Sstevel@tonic-gate * write clustering, even if the client is mixing writes and reads
13637c478bd9Sstevel@tonic-gate * because it will take multiple write requests from the queue
13647c478bd9Sstevel@tonic-gate * before processing any of the other async i/o types.
13657c478bd9Sstevel@tonic-gate *
13660776f5e6SVallish Vaidyeshwara * XXX The nfs4_async_common_start thread is unsafe in the light of the present
13677c478bd9Sstevel@tonic-gate * model defined by cpr to suspend the system. Specifically over the
13687c478bd9Sstevel@tonic-gate * wire calls are cpr-unsafe. The thread should be reevaluated in
13697c478bd9Sstevel@tonic-gate * case of future updates to the cpr model.
13707c478bd9Sstevel@tonic-gate */
13717c478bd9Sstevel@tonic-gate static void
nfs4_async_common_start(struct vfs * vfsp,int async_queue)13720776f5e6SVallish Vaidyeshwara nfs4_async_common_start(struct vfs *vfsp, int async_queue)
13737c478bd9Sstevel@tonic-gate {
13747c478bd9Sstevel@tonic-gate struct nfs4_async_reqs *args;
13757c478bd9Sstevel@tonic-gate mntinfo4_t *mi = VFTOMI4(vfsp);
13767c478bd9Sstevel@tonic-gate clock_t time_left = 1;
13777c478bd9Sstevel@tonic-gate callb_cpr_t cprinfo;
13787c478bd9Sstevel@tonic-gate int i;
13797c478bd9Sstevel@tonic-gate extern int nfs_async_timeout;
13800776f5e6SVallish Vaidyeshwara int async_types;
13810776f5e6SVallish Vaidyeshwara kcondvar_t *async_work_cv;
13820776f5e6SVallish Vaidyeshwara
13830776f5e6SVallish Vaidyeshwara if (async_queue == NFS4_ASYNC_QUEUE) {
13840776f5e6SVallish Vaidyeshwara async_types = NFS4_ASYNC_TYPES;
13850776f5e6SVallish Vaidyeshwara async_work_cv = &mi->mi_async_work_cv[NFS4_ASYNC_QUEUE];
13860776f5e6SVallish Vaidyeshwara } else {
13870776f5e6SVallish Vaidyeshwara async_types = NFS4_ASYNC_PGOPS_TYPES;
13880776f5e6SVallish Vaidyeshwara async_work_cv = &mi->mi_async_work_cv[NFS4_ASYNC_PGOPS_QUEUE];
13890776f5e6SVallish Vaidyeshwara }
13907c478bd9Sstevel@tonic-gate
13917c478bd9Sstevel@tonic-gate /*
13927c478bd9Sstevel@tonic-gate * Dynamic initialization of nfs_async_timeout to allow nfs to be
13937c478bd9Sstevel@tonic-gate * built in an implementation independent manner.
13947c478bd9Sstevel@tonic-gate */
13957c478bd9Sstevel@tonic-gate if (nfs_async_timeout == -1)
13967c478bd9Sstevel@tonic-gate nfs_async_timeout = NFS_ASYNC_TIMEOUT;
13977c478bd9Sstevel@tonic-gate
13987c478bd9Sstevel@tonic-gate CALLB_CPR_INIT(&cprinfo, &mi->mi_async_lock, callb_generic_cpr, "nas");
13997c478bd9Sstevel@tonic-gate
14007c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_async_lock);
14017c478bd9Sstevel@tonic-gate for (;;) {
14027c478bd9Sstevel@tonic-gate /*
14037c478bd9Sstevel@tonic-gate * Find the next queue containing an entry. We start
14047c478bd9Sstevel@tonic-gate * at the current queue pointer and then round robin
14057c478bd9Sstevel@tonic-gate * through all of them until we either find a non-empty
14067c478bd9Sstevel@tonic-gate * queue or have looked through all of them.
14077c478bd9Sstevel@tonic-gate */
14080776f5e6SVallish Vaidyeshwara for (i = 0; i < async_types; i++) {
14090776f5e6SVallish Vaidyeshwara args = *mi->mi_async_curr[async_queue];
14107c478bd9Sstevel@tonic-gate if (args != NULL)
14117c478bd9Sstevel@tonic-gate break;
14120776f5e6SVallish Vaidyeshwara mi->mi_async_curr[async_queue]++;
14130776f5e6SVallish Vaidyeshwara if (mi->mi_async_curr[async_queue] ==
14140776f5e6SVallish Vaidyeshwara &mi->mi_async_reqs[async_types]) {
14150776f5e6SVallish Vaidyeshwara mi->mi_async_curr[async_queue] =
14160776f5e6SVallish Vaidyeshwara &mi->mi_async_reqs[0];
14170776f5e6SVallish Vaidyeshwara }
14187c478bd9Sstevel@tonic-gate }
14197c478bd9Sstevel@tonic-gate /*
14207c478bd9Sstevel@tonic-gate * If we didn't find a entry, then block until woken up
14217c478bd9Sstevel@tonic-gate * again and then look through the queues again.
14227c478bd9Sstevel@tonic-gate */
14237c478bd9Sstevel@tonic-gate if (args == NULL) {
14247c478bd9Sstevel@tonic-gate /*
14257c478bd9Sstevel@tonic-gate * Exiting is considered to be safe for CPR as well
14267c478bd9Sstevel@tonic-gate */
14277c478bd9Sstevel@tonic-gate CALLB_CPR_SAFE_BEGIN(&cprinfo);
14287c478bd9Sstevel@tonic-gate
14297c478bd9Sstevel@tonic-gate /*
14307c478bd9Sstevel@tonic-gate * Wakeup thread waiting to unmount the file
14317c478bd9Sstevel@tonic-gate * system only if all async threads are inactive.
14327c478bd9Sstevel@tonic-gate *
14337c478bd9Sstevel@tonic-gate * If we've timed-out and there's nothing to do,
14347c478bd9Sstevel@tonic-gate * then get rid of this thread.
14357c478bd9Sstevel@tonic-gate */
14367c478bd9Sstevel@tonic-gate if (mi->mi_max_threads == 0 || time_left <= 0) {
14370776f5e6SVallish Vaidyeshwara --mi->mi_threads[async_queue];
14380776f5e6SVallish Vaidyeshwara
14390776f5e6SVallish Vaidyeshwara if (mi->mi_threads[NFS4_ASYNC_QUEUE] == 0 &&
14400776f5e6SVallish Vaidyeshwara mi->mi_threads[NFS4_ASYNC_PGOPS_QUEUE] == 0)
14417c478bd9Sstevel@tonic-gate cv_signal(&mi->mi_async_cv);
14427c478bd9Sstevel@tonic-gate CALLB_CPR_EXIT(&cprinfo);
14437c478bd9Sstevel@tonic-gate VFS_RELE(vfsp); /* release thread's hold */
144450a83466Sjwahlig MI4_RELE(mi);
14457c478bd9Sstevel@tonic-gate zthread_exit();
14467c478bd9Sstevel@tonic-gate /* NOTREACHED */
14477c478bd9Sstevel@tonic-gate }
14480776f5e6SVallish Vaidyeshwara time_left = cv_reltimedwait(async_work_cv,
1449d3d50737SRafael Vanoni &mi->mi_async_lock, nfs_async_timeout,
1450d3d50737SRafael Vanoni TR_CLOCK_TICK);
14517c478bd9Sstevel@tonic-gate
14527c478bd9Sstevel@tonic-gate CALLB_CPR_SAFE_END(&cprinfo, &mi->mi_async_lock);
14537c478bd9Sstevel@tonic-gate
14547c478bd9Sstevel@tonic-gate continue;
14557c478bd9Sstevel@tonic-gate } else {
14567c478bd9Sstevel@tonic-gate time_left = 1;
14577c478bd9Sstevel@tonic-gate }
14587c478bd9Sstevel@tonic-gate
14597c478bd9Sstevel@tonic-gate /*
14607c478bd9Sstevel@tonic-gate * Remove the request from the async queue and then
14617c478bd9Sstevel@tonic-gate * update the current async request queue pointer. If
14627c478bd9Sstevel@tonic-gate * the current queue is empty or we have removed enough
14637c478bd9Sstevel@tonic-gate * consecutive entries from it, then reset the counter
14647c478bd9Sstevel@tonic-gate * for this queue and then move the current pointer to
14657c478bd9Sstevel@tonic-gate * the next queue.
14667c478bd9Sstevel@tonic-gate */
14670776f5e6SVallish Vaidyeshwara *mi->mi_async_curr[async_queue] = args->a_next;
14680776f5e6SVallish Vaidyeshwara if (*mi->mi_async_curr[async_queue] == NULL ||
14697c478bd9Sstevel@tonic-gate --mi->mi_async_clusters[args->a_io] == 0) {
14707c478bd9Sstevel@tonic-gate mi->mi_async_clusters[args->a_io] =
1471b9238976Sth mi->mi_async_init_clusters;
14720776f5e6SVallish Vaidyeshwara mi->mi_async_curr[async_queue]++;
14730776f5e6SVallish Vaidyeshwara if (mi->mi_async_curr[async_queue] ==
14740776f5e6SVallish Vaidyeshwara &mi->mi_async_reqs[async_types]) {
14750776f5e6SVallish Vaidyeshwara mi->mi_async_curr[async_queue] =
14760776f5e6SVallish Vaidyeshwara &mi->mi_async_reqs[0];
14770776f5e6SVallish Vaidyeshwara }
14787c478bd9Sstevel@tonic-gate }
14797c478bd9Sstevel@tonic-gate
14807c478bd9Sstevel@tonic-gate if (args->a_io != NFS4_INACTIVE && mi->mi_io_kstats) {
14817c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock);
14827c478bd9Sstevel@tonic-gate kstat_waitq_exit(KSTAT_IO_PTR(mi->mi_io_kstats));
14837c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock);
14847c478bd9Sstevel@tonic-gate }
14857c478bd9Sstevel@tonic-gate
14867c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_async_lock);
14877c478bd9Sstevel@tonic-gate
14887c478bd9Sstevel@tonic-gate /*
14897c478bd9Sstevel@tonic-gate * Obtain arguments from the async request structure.
14907c478bd9Sstevel@tonic-gate */
14917c478bd9Sstevel@tonic-gate if (args->a_io == NFS4_READ_AHEAD && mi->mi_max_threads > 0) {
14927c478bd9Sstevel@tonic-gate (*args->a_nfs4_readahead)(args->a_vp,
1493b9238976Sth args->a_nfs4_blkoff, args->a_nfs4_addr,
1494b9238976Sth args->a_nfs4_seg, args->a_cred);
14957c478bd9Sstevel@tonic-gate } else if (args->a_io == NFS4_PUTAPAGE) {
14967c478bd9Sstevel@tonic-gate (void) (*args->a_nfs4_putapage)(args->a_vp,
1497b9238976Sth args->a_nfs4_pp, args->a_nfs4_off,
1498b9238976Sth args->a_nfs4_len, args->a_nfs4_flags,
1499b9238976Sth args->a_cred);
15007c478bd9Sstevel@tonic-gate } else if (args->a_io == NFS4_PAGEIO) {
15017c478bd9Sstevel@tonic-gate (void) (*args->a_nfs4_pageio)(args->a_vp,
1502b9238976Sth args->a_nfs4_pp, args->a_nfs4_off,
1503b9238976Sth args->a_nfs4_len, args->a_nfs4_flags,
1504b9238976Sth args->a_cred);
15057c478bd9Sstevel@tonic-gate } else if (args->a_io == NFS4_READDIR) {
15067c478bd9Sstevel@tonic-gate (void) ((*args->a_nfs4_readdir)(args->a_vp,
1507b9238976Sth args->a_nfs4_rdc, args->a_cred));
15087c478bd9Sstevel@tonic-gate } else if (args->a_io == NFS4_COMMIT) {
15097c478bd9Sstevel@tonic-gate (*args->a_nfs4_commit)(args->a_vp, args->a_nfs4_plist,
1510b9238976Sth args->a_nfs4_offset, args->a_nfs4_count,
1511b9238976Sth args->a_cred);
15127c478bd9Sstevel@tonic-gate } else if (args->a_io == NFS4_INACTIVE) {
15137c478bd9Sstevel@tonic-gate nfs4_inactive_otw(args->a_vp, args->a_cred);
15147c478bd9Sstevel@tonic-gate }
15157c478bd9Sstevel@tonic-gate
15167c478bd9Sstevel@tonic-gate /*
15177c478bd9Sstevel@tonic-gate * Now, release the vnode and free the credentials
15187c478bd9Sstevel@tonic-gate * structure.
15197c478bd9Sstevel@tonic-gate */
15207c478bd9Sstevel@tonic-gate free_async_args4(args);
15217c478bd9Sstevel@tonic-gate /*
15227c478bd9Sstevel@tonic-gate * Reacquire the mutex because it will be needed above.
15237c478bd9Sstevel@tonic-gate */
15247c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_async_lock);
15257c478bd9Sstevel@tonic-gate }
15267c478bd9Sstevel@tonic-gate }
15277c478bd9Sstevel@tonic-gate
15287c478bd9Sstevel@tonic-gate /*
15297c478bd9Sstevel@tonic-gate * nfs4_inactive_thread - look for vnodes that need over-the-wire calls as
15307c478bd9Sstevel@tonic-gate * part of VOP_INACTIVE.
15317c478bd9Sstevel@tonic-gate */
15327c478bd9Sstevel@tonic-gate
15337c478bd9Sstevel@tonic-gate void
nfs4_inactive_thread(mntinfo4_t * mi)15347c478bd9Sstevel@tonic-gate nfs4_inactive_thread(mntinfo4_t *mi)
15357c478bd9Sstevel@tonic-gate {
15367c478bd9Sstevel@tonic-gate struct nfs4_async_reqs *args;
15377c478bd9Sstevel@tonic-gate callb_cpr_t cprinfo;
15387c478bd9Sstevel@tonic-gate vfs_t *vfsp = mi->mi_vfsp;
15397c478bd9Sstevel@tonic-gate
15407c478bd9Sstevel@tonic-gate CALLB_CPR_INIT(&cprinfo, &mi->mi_async_lock, callb_generic_cpr,
1541b9238976Sth "nfs4_inactive_thread");
15427c478bd9Sstevel@tonic-gate
15437c478bd9Sstevel@tonic-gate for (;;) {
15447c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_async_lock);
15457c478bd9Sstevel@tonic-gate args = mi->mi_async_reqs[NFS4_INACTIVE];
15467c478bd9Sstevel@tonic-gate if (args == NULL) {
15477c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock);
15487c478bd9Sstevel@tonic-gate /*
154950a83466Sjwahlig * We don't want to exit until the async manager is done
15507c478bd9Sstevel@tonic-gate * with its work; hence the check for mi_manager_thread
15517c478bd9Sstevel@tonic-gate * being NULL.
15527c478bd9Sstevel@tonic-gate *
15537c478bd9Sstevel@tonic-gate * The async manager thread will cv_broadcast() on
15547c478bd9Sstevel@tonic-gate * mi_inact_req_cv when it's done, at which point we'll
15557c478bd9Sstevel@tonic-gate * wake up and exit.
15567c478bd9Sstevel@tonic-gate */
155750a83466Sjwahlig if (mi->mi_manager_thread == NULL)
15587c478bd9Sstevel@tonic-gate goto die;
15597c478bd9Sstevel@tonic-gate mi->mi_flags |= MI4_INACTIVE_IDLE;
15607c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock);
15617c478bd9Sstevel@tonic-gate cv_signal(&mi->mi_async_cv);
15627c478bd9Sstevel@tonic-gate CALLB_CPR_SAFE_BEGIN(&cprinfo);
15637c478bd9Sstevel@tonic-gate cv_wait(&mi->mi_inact_req_cv, &mi->mi_async_lock);
15647c478bd9Sstevel@tonic-gate CALLB_CPR_SAFE_END(&cprinfo, &mi->mi_async_lock);
15657c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_async_lock);
15667c478bd9Sstevel@tonic-gate } else {
15677c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock);
15687c478bd9Sstevel@tonic-gate mi->mi_flags &= ~MI4_INACTIVE_IDLE;
15697c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock);
15707c478bd9Sstevel@tonic-gate mi->mi_async_reqs[NFS4_INACTIVE] = args->a_next;
15717c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_async_lock);
15727c478bd9Sstevel@tonic-gate nfs4_inactive_otw(args->a_vp, args->a_cred);
15737c478bd9Sstevel@tonic-gate crfree(args->a_cred);
15747c478bd9Sstevel@tonic-gate kmem_free(args, sizeof (*args));
15757c478bd9Sstevel@tonic-gate }
15767c478bd9Sstevel@tonic-gate }
15777c478bd9Sstevel@tonic-gate die:
15787c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock);
15797c478bd9Sstevel@tonic-gate mi->mi_inactive_thread = NULL;
15807c478bd9Sstevel@tonic-gate cv_signal(&mi->mi_async_cv);
158150a83466Sjwahlig
15827c478bd9Sstevel@tonic-gate /*
15837c478bd9Sstevel@tonic-gate * There is no explicit call to mutex_exit(&mi->mi_async_lock) since
15847c478bd9Sstevel@tonic-gate * CALLB_CPR_EXIT is actually responsible for releasing 'mi_async_lock'.
15857c478bd9Sstevel@tonic-gate */
15867c478bd9Sstevel@tonic-gate CALLB_CPR_EXIT(&cprinfo);
158750a83466Sjwahlig
15887c478bd9Sstevel@tonic-gate NFS4_DEBUG(nfs4_client_zone_debug, (CE_NOTE,
15897c478bd9Sstevel@tonic-gate "nfs4_inactive_thread exiting for vfs %p\n", (void *)vfsp));
159050a83466Sjwahlig
159150a83466Sjwahlig MI4_RELE(mi);
15927c478bd9Sstevel@tonic-gate zthread_exit();
15937c478bd9Sstevel@tonic-gate /* NOTREACHED */
15947c478bd9Sstevel@tonic-gate }
15957c478bd9Sstevel@tonic-gate
15967c478bd9Sstevel@tonic-gate /*
15977c478bd9Sstevel@tonic-gate * nfs_async_stop:
15987c478bd9Sstevel@tonic-gate * Wait for all outstanding putpage operations and the inactive thread to
15997c478bd9Sstevel@tonic-gate * complete; nfs4_async_stop_sig() without interruptibility.
16007c478bd9Sstevel@tonic-gate */
16017c478bd9Sstevel@tonic-gate void
nfs4_async_stop(struct vfs * vfsp)16027c478bd9Sstevel@tonic-gate nfs4_async_stop(struct vfs *vfsp)
16037c478bd9Sstevel@tonic-gate {
16047c478bd9Sstevel@tonic-gate mntinfo4_t *mi = VFTOMI4(vfsp);
16057c478bd9Sstevel@tonic-gate
16067c478bd9Sstevel@tonic-gate /*
16077c478bd9Sstevel@tonic-gate * Wait for all outstanding async operations to complete and for
16087c478bd9Sstevel@tonic-gate * worker threads to exit.
16097c478bd9Sstevel@tonic-gate */
16107c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_async_lock);
16117c478bd9Sstevel@tonic-gate mi->mi_max_threads = 0;
16120776f5e6SVallish Vaidyeshwara NFS4_WAKEALL_ASYNC_WORKERS(mi->mi_async_work_cv);
16130776f5e6SVallish Vaidyeshwara while (mi->mi_threads[NFS4_ASYNC_QUEUE] != 0 ||
16140776f5e6SVallish Vaidyeshwara mi->mi_threads[NFS4_ASYNC_PGOPS_QUEUE] != 0)
16157c478bd9Sstevel@tonic-gate cv_wait(&mi->mi_async_cv, &mi->mi_async_lock);
16167c478bd9Sstevel@tonic-gate
16177c478bd9Sstevel@tonic-gate /*
16187c478bd9Sstevel@tonic-gate * Wait for the inactive thread to finish doing what it's doing. It
16197c478bd9Sstevel@tonic-gate * won't exit until the last reference to the vfs_t goes away.
16207c478bd9Sstevel@tonic-gate */
16217c478bd9Sstevel@tonic-gate if (mi->mi_inactive_thread != NULL) {
16227c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock);
16237c478bd9Sstevel@tonic-gate while (!(mi->mi_flags & MI4_INACTIVE_IDLE) ||
16247c478bd9Sstevel@tonic-gate (mi->mi_async_reqs[NFS4_INACTIVE] != NULL)) {
16257c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock);
16267c478bd9Sstevel@tonic-gate cv_wait(&mi->mi_async_cv, &mi->mi_async_lock);
16277c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock);
16287c478bd9Sstevel@tonic-gate }
16297c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock);
16307c478bd9Sstevel@tonic-gate }
16317c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_async_lock);
16327c478bd9Sstevel@tonic-gate }
16337c478bd9Sstevel@tonic-gate
16347c478bd9Sstevel@tonic-gate /*
16357c478bd9Sstevel@tonic-gate * nfs_async_stop_sig:
16367c478bd9Sstevel@tonic-gate * Wait for all outstanding putpage operations and the inactive thread to
16377c478bd9Sstevel@tonic-gate * complete. If a signal is delivered we will abort and return non-zero;
16387c478bd9Sstevel@tonic-gate * otherwise return 0. Since this routine is called from nfs4_unmount, we
1639da6c28aaSamw * need to make it interruptible.
16407c478bd9Sstevel@tonic-gate */
16417c478bd9Sstevel@tonic-gate int
nfs4_async_stop_sig(struct vfs * vfsp)16427c478bd9Sstevel@tonic-gate nfs4_async_stop_sig(struct vfs *vfsp)
16437c478bd9Sstevel@tonic-gate {
16447c478bd9Sstevel@tonic-gate mntinfo4_t *mi = VFTOMI4(vfsp);
16457c478bd9Sstevel@tonic-gate ushort_t omax;
16467c478bd9Sstevel@tonic-gate bool_t intr = FALSE;
16477c478bd9Sstevel@tonic-gate
16487c478bd9Sstevel@tonic-gate /*
16497c478bd9Sstevel@tonic-gate * Wait for all outstanding putpage operations to complete and for
16507c478bd9Sstevel@tonic-gate * worker threads to exit.
16517c478bd9Sstevel@tonic-gate */
16527c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_async_lock);
16537c478bd9Sstevel@tonic-gate omax = mi->mi_max_threads;
16547c478bd9Sstevel@tonic-gate mi->mi_max_threads = 0;
16550776f5e6SVallish Vaidyeshwara NFS4_WAKEALL_ASYNC_WORKERS(mi->mi_async_work_cv);
16560776f5e6SVallish Vaidyeshwara while (mi->mi_threads[NFS4_ASYNC_QUEUE] != 0 ||
16570776f5e6SVallish Vaidyeshwara mi->mi_threads[NFS4_ASYNC_PGOPS_QUEUE] != 0) {
16587c478bd9Sstevel@tonic-gate if (!cv_wait_sig(&mi->mi_async_cv, &mi->mi_async_lock)) {
16597c478bd9Sstevel@tonic-gate intr = TRUE;
16607c478bd9Sstevel@tonic-gate goto interrupted;
16617c478bd9Sstevel@tonic-gate }
16627c478bd9Sstevel@tonic-gate }
16637c478bd9Sstevel@tonic-gate
16647c478bd9Sstevel@tonic-gate /*
16657c478bd9Sstevel@tonic-gate * Wait for the inactive thread to finish doing what it's doing. It
16667c478bd9Sstevel@tonic-gate * won't exit until the a last reference to the vfs_t goes away.
16677c478bd9Sstevel@tonic-gate */
16687c478bd9Sstevel@tonic-gate if (mi->mi_inactive_thread != NULL) {
16697c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock);
16707c478bd9Sstevel@tonic-gate while (!(mi->mi_flags & MI4_INACTIVE_IDLE) ||
16717c478bd9Sstevel@tonic-gate (mi->mi_async_reqs[NFS4_INACTIVE] != NULL)) {
16727c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock);
16737c478bd9Sstevel@tonic-gate if (!cv_wait_sig(&mi->mi_async_cv,
16747c478bd9Sstevel@tonic-gate &mi->mi_async_lock)) {
16757c478bd9Sstevel@tonic-gate intr = TRUE;
16767c478bd9Sstevel@tonic-gate goto interrupted;
16777c478bd9Sstevel@tonic-gate }
16787c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock);
16797c478bd9Sstevel@tonic-gate }
16807c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock);
16817c478bd9Sstevel@tonic-gate }
16827c478bd9Sstevel@tonic-gate interrupted:
16837c478bd9Sstevel@tonic-gate if (intr)
16847c478bd9Sstevel@tonic-gate mi->mi_max_threads = omax;
16857c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_async_lock);
16867c478bd9Sstevel@tonic-gate
16877c478bd9Sstevel@tonic-gate return (intr);
16887c478bd9Sstevel@tonic-gate }
16897c478bd9Sstevel@tonic-gate
16907c478bd9Sstevel@tonic-gate int
nfs4_async_putapage(vnode_t * vp,page_t * pp,u_offset_t off,size_t len,int flags,cred_t * cr,int (* putapage)(vnode_t *,page_t *,u_offset_t,size_t,int,cred_t *))16917c478bd9Sstevel@tonic-gate nfs4_async_putapage(vnode_t *vp, page_t *pp, u_offset_t off, size_t len,
1692b9238976Sth int flags, cred_t *cr, int (*putapage)(vnode_t *, page_t *,
1693b9238976Sth u_offset_t, size_t, int, cred_t *))
16947c478bd9Sstevel@tonic-gate {
16957c478bd9Sstevel@tonic-gate rnode4_t *rp;
16967c478bd9Sstevel@tonic-gate mntinfo4_t *mi;
16977c478bd9Sstevel@tonic-gate struct nfs4_async_reqs *args;
16987c478bd9Sstevel@tonic-gate
16997c478bd9Sstevel@tonic-gate ASSERT(flags & B_ASYNC);
17007c478bd9Sstevel@tonic-gate ASSERT(vp->v_vfsp != NULL);
17017c478bd9Sstevel@tonic-gate
17027c478bd9Sstevel@tonic-gate rp = VTOR4(vp);
17037c478bd9Sstevel@tonic-gate ASSERT(rp->r_count > 0);
17047c478bd9Sstevel@tonic-gate
17057c478bd9Sstevel@tonic-gate mi = VTOMI4(vp);
17067c478bd9Sstevel@tonic-gate
17077c478bd9Sstevel@tonic-gate /*
17087c478bd9Sstevel@tonic-gate * If we can't allocate a request structure, do the putpage
17097c478bd9Sstevel@tonic-gate * operation synchronously in this thread's context.
17107c478bd9Sstevel@tonic-gate */
17117c478bd9Sstevel@tonic-gate if ((args = kmem_alloc(sizeof (*args), KM_NOSLEEP)) == NULL)
17127c478bd9Sstevel@tonic-gate goto noasync;
17137c478bd9Sstevel@tonic-gate
17147c478bd9Sstevel@tonic-gate args->a_next = NULL;
17157c478bd9Sstevel@tonic-gate #ifdef DEBUG
17167c478bd9Sstevel@tonic-gate args->a_queuer = curthread;
17177c478bd9Sstevel@tonic-gate #endif
17187c478bd9Sstevel@tonic-gate VN_HOLD(vp);
17197c478bd9Sstevel@tonic-gate args->a_vp = vp;
17207c478bd9Sstevel@tonic-gate ASSERT(cr != NULL);
17217c478bd9Sstevel@tonic-gate crhold(cr);
17227c478bd9Sstevel@tonic-gate args->a_cred = cr;
17237c478bd9Sstevel@tonic-gate args->a_io = NFS4_PUTAPAGE;
17247c478bd9Sstevel@tonic-gate args->a_nfs4_putapage = putapage;
17257c478bd9Sstevel@tonic-gate args->a_nfs4_pp = pp;
17267c478bd9Sstevel@tonic-gate args->a_nfs4_off = off;
17277c478bd9Sstevel@tonic-gate args->a_nfs4_len = (uint_t)len;
17287c478bd9Sstevel@tonic-gate args->a_nfs4_flags = flags;
17297c478bd9Sstevel@tonic-gate
17307c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_async_lock);
17317c478bd9Sstevel@tonic-gate
17327c478bd9Sstevel@tonic-gate /*
17337c478bd9Sstevel@tonic-gate * If asyncio has been disabled, then make a synchronous request.
17347c478bd9Sstevel@tonic-gate * This check is done a second time in case async io was diabled
17357c478bd9Sstevel@tonic-gate * while this thread was blocked waiting for memory pressure to
17367c478bd9Sstevel@tonic-gate * reduce or for the queue to drain.
17377c478bd9Sstevel@tonic-gate */
17387c478bd9Sstevel@tonic-gate if (mi->mi_max_threads == 0) {
17397c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_async_lock);
17407c478bd9Sstevel@tonic-gate
17417c478bd9Sstevel@tonic-gate VN_RELE(vp);
17427c478bd9Sstevel@tonic-gate crfree(cr);
17437c478bd9Sstevel@tonic-gate kmem_free(args, sizeof (*args));
17447c478bd9Sstevel@tonic-gate goto noasync;
17457c478bd9Sstevel@tonic-gate }
17467c478bd9Sstevel@tonic-gate
17477c478bd9Sstevel@tonic-gate /*
17487c478bd9Sstevel@tonic-gate * Link request structure into the async list and
17497c478bd9Sstevel@tonic-gate * wakeup async thread to do the i/o.
17507c478bd9Sstevel@tonic-gate */
17517c478bd9Sstevel@tonic-gate if (mi->mi_async_reqs[NFS4_PUTAPAGE] == NULL) {
17527c478bd9Sstevel@tonic-gate mi->mi_async_reqs[NFS4_PUTAPAGE] = args;
17537c478bd9Sstevel@tonic-gate mi->mi_async_tail[NFS4_PUTAPAGE] = args;
17547c478bd9Sstevel@tonic-gate } else {
17557c478bd9Sstevel@tonic-gate mi->mi_async_tail[NFS4_PUTAPAGE]->a_next = args;
17567c478bd9Sstevel@tonic-gate mi->mi_async_tail[NFS4_PUTAPAGE] = args;
17577c478bd9Sstevel@tonic-gate }
17587c478bd9Sstevel@tonic-gate
17597c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock);
17607c478bd9Sstevel@tonic-gate rp->r_count++;
17617c478bd9Sstevel@tonic-gate rp->r_awcount++;
17627c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock);
17637c478bd9Sstevel@tonic-gate
17647c478bd9Sstevel@tonic-gate if (mi->mi_io_kstats) {
17657c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock);
17667c478bd9Sstevel@tonic-gate kstat_waitq_enter(KSTAT_IO_PTR(mi->mi_io_kstats));
17677c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock);
17687c478bd9Sstevel@tonic-gate }
17697c478bd9Sstevel@tonic-gate
17707c478bd9Sstevel@tonic-gate mi->mi_async_req_count++;
17717c478bd9Sstevel@tonic-gate ASSERT(mi->mi_async_req_count != 0);
17727c478bd9Sstevel@tonic-gate cv_signal(&mi->mi_async_reqs_cv);
17737c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_async_lock);
17747c478bd9Sstevel@tonic-gate return (0);
17757c478bd9Sstevel@tonic-gate
17767c478bd9Sstevel@tonic-gate noasync:
17777c478bd9Sstevel@tonic-gate
1778aea676fdSArne Jansen if (curproc == proc_pageout || curproc == proc_fsflush) {
17797c478bd9Sstevel@tonic-gate /*
17807c478bd9Sstevel@tonic-gate * If we get here in the context of the pageout/fsflush,
17817c478bd9Sstevel@tonic-gate * or we have run out of memory or we're attempting to
17827c478bd9Sstevel@tonic-gate * unmount we refuse to do a sync write, because this may
17837c478bd9Sstevel@tonic-gate * hang pageout/fsflush and the machine. In this case,
17847c478bd9Sstevel@tonic-gate * we just re-mark the page as dirty and punt on the page.
17857c478bd9Sstevel@tonic-gate *
17867c478bd9Sstevel@tonic-gate * Make sure B_FORCE isn't set. We can re-mark the
17877c478bd9Sstevel@tonic-gate * pages as dirty and unlock the pages in one swoop by
17887c478bd9Sstevel@tonic-gate * passing in B_ERROR to pvn_write_done(). However,
17897c478bd9Sstevel@tonic-gate * we should make sure B_FORCE isn't set - we don't
17907c478bd9Sstevel@tonic-gate * want the page tossed before it gets written out.
17917c478bd9Sstevel@tonic-gate */
17927c478bd9Sstevel@tonic-gate if (flags & B_FORCE)
17937c478bd9Sstevel@tonic-gate flags &= ~(B_INVAL | B_FORCE);
17947c478bd9Sstevel@tonic-gate pvn_write_done(pp, flags | B_ERROR);
17957c478bd9Sstevel@tonic-gate return (0);
17967c478bd9Sstevel@tonic-gate }
17977c478bd9Sstevel@tonic-gate
1798aea676fdSArne Jansen if (nfs_zone() != mi->mi_zone) {
1799aea676fdSArne Jansen /*
1800aea676fdSArne Jansen * So this was a cross-zone sync putpage.
1801aea676fdSArne Jansen *
1802aea676fdSArne Jansen * We pass in B_ERROR to pvn_write_done() to re-mark the pages
1803aea676fdSArne Jansen * as dirty and unlock them.
1804aea676fdSArne Jansen *
1805aea676fdSArne Jansen * We don't want to clear B_FORCE here as the caller presumably
1806aea676fdSArne Jansen * knows what they're doing if they set it.
1807aea676fdSArne Jansen */
1808aea676fdSArne Jansen pvn_write_done(pp, flags | B_ERROR);
1809aea676fdSArne Jansen return (EPERM);
1810aea676fdSArne Jansen }
1811aea676fdSArne Jansen return ((*putapage)(vp, pp, off, len, flags, cr));
18127c478bd9Sstevel@tonic-gate }
18137c478bd9Sstevel@tonic-gate
18147c478bd9Sstevel@tonic-gate int
nfs4_async_pageio(vnode_t * vp,page_t * pp,u_offset_t io_off,size_t io_len,int flags,cred_t * cr,int (* pageio)(vnode_t *,page_t *,u_offset_t,size_t,int,cred_t *))18157c478bd9Sstevel@tonic-gate nfs4_async_pageio(vnode_t *vp, page_t *pp, u_offset_t io_off, size_t io_len,
1816b9238976Sth int flags, cred_t *cr, int (*pageio)(vnode_t *, page_t *, u_offset_t,
1817b9238976Sth size_t, int, cred_t *))
18187c478bd9Sstevel@tonic-gate {
18197c478bd9Sstevel@tonic-gate rnode4_t *rp;
18207c478bd9Sstevel@tonic-gate mntinfo4_t *mi;
18217c478bd9Sstevel@tonic-gate struct nfs4_async_reqs *args;
18227c478bd9Sstevel@tonic-gate
18237c478bd9Sstevel@tonic-gate ASSERT(flags & B_ASYNC);
18247c478bd9Sstevel@tonic-gate ASSERT(vp->v_vfsp != NULL);
18257c478bd9Sstevel@tonic-gate
18267c478bd9Sstevel@tonic-gate rp = VTOR4(vp);
18277c478bd9Sstevel@tonic-gate ASSERT(rp->r_count > 0);
18287c478bd9Sstevel@tonic-gate
18297c478bd9Sstevel@tonic-gate mi = VTOMI4(vp);
18307c478bd9Sstevel@tonic-gate
18317c478bd9Sstevel@tonic-gate /*
18327c478bd9Sstevel@tonic-gate * If we can't allocate a request structure, do the pageio
18337c478bd9Sstevel@tonic-gate * request synchronously in this thread's context.
18347c478bd9Sstevel@tonic-gate */
18357c478bd9Sstevel@tonic-gate if ((args = kmem_alloc(sizeof (*args), KM_NOSLEEP)) == NULL)
18367c478bd9Sstevel@tonic-gate goto noasync;
18377c478bd9Sstevel@tonic-gate
18387c478bd9Sstevel@tonic-gate args->a_next = NULL;
18397c478bd9Sstevel@tonic-gate #ifdef DEBUG
18407c478bd9Sstevel@tonic-gate args->a_queuer = curthread;
18417c478bd9Sstevel@tonic-gate #endif
18427c478bd9Sstevel@tonic-gate VN_HOLD(vp);
18437c478bd9Sstevel@tonic-gate args->a_vp = vp;
18447c478bd9Sstevel@tonic-gate ASSERT(cr != NULL);
18457c478bd9Sstevel@tonic-gate crhold(cr);
18467c478bd9Sstevel@tonic-gate args->a_cred = cr;
18477c478bd9Sstevel@tonic-gate args->a_io = NFS4_PAGEIO;
18487c478bd9Sstevel@tonic-gate args->a_nfs4_pageio = pageio;
18497c478bd9Sstevel@tonic-gate args->a_nfs4_pp = pp;
18507c478bd9Sstevel@tonic-gate args->a_nfs4_off = io_off;
18517c478bd9Sstevel@tonic-gate args->a_nfs4_len = (uint_t)io_len;
18527c478bd9Sstevel@tonic-gate args->a_nfs4_flags = flags;
18537c478bd9Sstevel@tonic-gate
18547c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_async_lock);
18557c478bd9Sstevel@tonic-gate
18567c478bd9Sstevel@tonic-gate /*
18577c478bd9Sstevel@tonic-gate * If asyncio has been disabled, then make a synchronous request.
18587c478bd9Sstevel@tonic-gate * This check is done a second time in case async io was diabled
18597c478bd9Sstevel@tonic-gate * while this thread was blocked waiting for memory pressure to
18607c478bd9Sstevel@tonic-gate * reduce or for the queue to drain.
18617c478bd9Sstevel@tonic-gate */
18627c478bd9Sstevel@tonic-gate if (mi->mi_max_threads == 0) {
18637c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_async_lock);
18647c478bd9Sstevel@tonic-gate
18657c478bd9Sstevel@tonic-gate VN_RELE(vp);
18667c478bd9Sstevel@tonic-gate crfree(cr);
18677c478bd9Sstevel@tonic-gate kmem_free(args, sizeof (*args));
18687c478bd9Sstevel@tonic-gate goto noasync;
18697c478bd9Sstevel@tonic-gate }
18707c478bd9Sstevel@tonic-gate
18717c478bd9Sstevel@tonic-gate /*
18727c478bd9Sstevel@tonic-gate * Link request structure into the async list and
18737c478bd9Sstevel@tonic-gate * wakeup async thread to do the i/o.
18747c478bd9Sstevel@tonic-gate */
18757c478bd9Sstevel@tonic-gate if (mi->mi_async_reqs[NFS4_PAGEIO] == NULL) {
18767c478bd9Sstevel@tonic-gate mi->mi_async_reqs[NFS4_PAGEIO] = args;
18777c478bd9Sstevel@tonic-gate mi->mi_async_tail[NFS4_PAGEIO] = args;
18787c478bd9Sstevel@tonic-gate } else {
18797c478bd9Sstevel@tonic-gate mi->mi_async_tail[NFS4_PAGEIO]->a_next = args;
18807c478bd9Sstevel@tonic-gate mi->mi_async_tail[NFS4_PAGEIO] = args;
18817c478bd9Sstevel@tonic-gate }
18827c478bd9Sstevel@tonic-gate
18837c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock);
18847c478bd9Sstevel@tonic-gate rp->r_count++;
18857c478bd9Sstevel@tonic-gate rp->r_awcount++;
18867c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock);
18877c478bd9Sstevel@tonic-gate
18887c478bd9Sstevel@tonic-gate if (mi->mi_io_kstats) {
18897c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock);
18907c478bd9Sstevel@tonic-gate kstat_waitq_enter(KSTAT_IO_PTR(mi->mi_io_kstats));
18917c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock);
18927c478bd9Sstevel@tonic-gate }
18937c478bd9Sstevel@tonic-gate
18947c478bd9Sstevel@tonic-gate mi->mi_async_req_count++;
18957c478bd9Sstevel@tonic-gate ASSERT(mi->mi_async_req_count != 0);
18967c478bd9Sstevel@tonic-gate cv_signal(&mi->mi_async_reqs_cv);
18977c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_async_lock);
18987c478bd9Sstevel@tonic-gate return (0);
18997c478bd9Sstevel@tonic-gate
19007c478bd9Sstevel@tonic-gate noasync:
19017c478bd9Sstevel@tonic-gate /*
19027c478bd9Sstevel@tonic-gate * If we can't do it ASYNC, for reads we do nothing (but cleanup
19037c478bd9Sstevel@tonic-gate * the page list), for writes we do it synchronously, except for
19047c478bd9Sstevel@tonic-gate * proc_pageout/proc_fsflush as described below.
19057c478bd9Sstevel@tonic-gate */
19067c478bd9Sstevel@tonic-gate if (flags & B_READ) {
19077c478bd9Sstevel@tonic-gate pvn_read_done(pp, flags | B_ERROR);
19087c478bd9Sstevel@tonic-gate return (0);
19097c478bd9Sstevel@tonic-gate }
19107c478bd9Sstevel@tonic-gate
19117c478bd9Sstevel@tonic-gate if (curproc == proc_pageout || curproc == proc_fsflush) {
19127c478bd9Sstevel@tonic-gate /*
19137c478bd9Sstevel@tonic-gate * If we get here in the context of the pageout/fsflush,
19147c478bd9Sstevel@tonic-gate * we refuse to do a sync write, because this may hang
19157c478bd9Sstevel@tonic-gate * pageout/fsflush (and the machine). In this case, we just
19167c478bd9Sstevel@tonic-gate * re-mark the page as dirty and punt on the page.
19177c478bd9Sstevel@tonic-gate *
19187c478bd9Sstevel@tonic-gate * Make sure B_FORCE isn't set. We can re-mark the
19197c478bd9Sstevel@tonic-gate * pages as dirty and unlock the pages in one swoop by
19207c478bd9Sstevel@tonic-gate * passing in B_ERROR to pvn_write_done(). However,
19217c478bd9Sstevel@tonic-gate * we should make sure B_FORCE isn't set - we don't
19227c478bd9Sstevel@tonic-gate * want the page tossed before it gets written out.
19237c478bd9Sstevel@tonic-gate */
19247c478bd9Sstevel@tonic-gate if (flags & B_FORCE)
19257c478bd9Sstevel@tonic-gate flags &= ~(B_INVAL | B_FORCE);
19267c478bd9Sstevel@tonic-gate pvn_write_done(pp, flags | B_ERROR);
19277c478bd9Sstevel@tonic-gate return (0);
19287c478bd9Sstevel@tonic-gate }
19297c478bd9Sstevel@tonic-gate
1930108322fbScarlsonj if (nfs_zone() != mi->mi_zone) {
19317c478bd9Sstevel@tonic-gate /*
19327c478bd9Sstevel@tonic-gate * So this was a cross-zone sync pageio. We pass in B_ERROR
19337c478bd9Sstevel@tonic-gate * to pvn_write_done() to re-mark the pages as dirty and unlock
19347c478bd9Sstevel@tonic-gate * them.
19357c478bd9Sstevel@tonic-gate *
19367c478bd9Sstevel@tonic-gate * We don't want to clear B_FORCE here as the caller presumably
19377c478bd9Sstevel@tonic-gate * knows what they're doing if they set it.
19387c478bd9Sstevel@tonic-gate */
19397c478bd9Sstevel@tonic-gate pvn_write_done(pp, flags | B_ERROR);
19407c478bd9Sstevel@tonic-gate return (EPERM);
19417c478bd9Sstevel@tonic-gate }
19427c478bd9Sstevel@tonic-gate return ((*pageio)(vp, pp, io_off, io_len, flags, cr));
19437c478bd9Sstevel@tonic-gate }
19447c478bd9Sstevel@tonic-gate
19457c478bd9Sstevel@tonic-gate void
nfs4_async_readdir(vnode_t * vp,rddir4_cache * rdc,cred_t * cr,int (* readdir)(vnode_t *,rddir4_cache *,cred_t *))19467c478bd9Sstevel@tonic-gate nfs4_async_readdir(vnode_t *vp, rddir4_cache *rdc, cred_t *cr,
1947b9238976Sth int (*readdir)(vnode_t *, rddir4_cache *, cred_t *))
19487c478bd9Sstevel@tonic-gate {
19497c478bd9Sstevel@tonic-gate rnode4_t *rp;
19507c478bd9Sstevel@tonic-gate mntinfo4_t *mi;
19517c478bd9Sstevel@tonic-gate struct nfs4_async_reqs *args;
19527c478bd9Sstevel@tonic-gate
19537c478bd9Sstevel@tonic-gate rp = VTOR4(vp);
19547c478bd9Sstevel@tonic-gate ASSERT(rp->r_freef == NULL);
19557c478bd9Sstevel@tonic-gate
19567c478bd9Sstevel@tonic-gate mi = VTOMI4(vp);
19577c478bd9Sstevel@tonic-gate
19587c478bd9Sstevel@tonic-gate /*
19597c478bd9Sstevel@tonic-gate * If we can't allocate a request structure, skip the readdir.
19607c478bd9Sstevel@tonic-gate */
19617c478bd9Sstevel@tonic-gate if ((args = kmem_alloc(sizeof (*args), KM_NOSLEEP)) == NULL)
19627c478bd9Sstevel@tonic-gate goto noasync;
19637c478bd9Sstevel@tonic-gate
19647c478bd9Sstevel@tonic-gate args->a_next = NULL;
19657c478bd9Sstevel@tonic-gate #ifdef DEBUG
19667c478bd9Sstevel@tonic-gate args->a_queuer = curthread;
19677c478bd9Sstevel@tonic-gate #endif
19687c478bd9Sstevel@tonic-gate VN_HOLD(vp);
19697c478bd9Sstevel@tonic-gate args->a_vp = vp;
19707c478bd9Sstevel@tonic-gate ASSERT(cr != NULL);
19717c478bd9Sstevel@tonic-gate crhold(cr);
19727c478bd9Sstevel@tonic-gate args->a_cred = cr;
19737c478bd9Sstevel@tonic-gate args->a_io = NFS4_READDIR;
19747c478bd9Sstevel@tonic-gate args->a_nfs4_readdir = readdir;
19757c478bd9Sstevel@tonic-gate args->a_nfs4_rdc = rdc;
19767c478bd9Sstevel@tonic-gate
19777c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_async_lock);
19787c478bd9Sstevel@tonic-gate
19797c478bd9Sstevel@tonic-gate /*
19807c478bd9Sstevel@tonic-gate * If asyncio has been disabled, then skip this request
19817c478bd9Sstevel@tonic-gate */
19827c478bd9Sstevel@tonic-gate if (mi->mi_max_threads == 0) {
19837c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_async_lock);
19847c478bd9Sstevel@tonic-gate
19857c478bd9Sstevel@tonic-gate VN_RELE(vp);
19867c478bd9Sstevel@tonic-gate crfree(cr);
19877c478bd9Sstevel@tonic-gate kmem_free(args, sizeof (*args));
19887c478bd9Sstevel@tonic-gate goto noasync;
19897c478bd9Sstevel@tonic-gate }
19907c478bd9Sstevel@tonic-gate
19917c478bd9Sstevel@tonic-gate /*
19927c478bd9Sstevel@tonic-gate * Link request structure into the async list and
19937c478bd9Sstevel@tonic-gate * wakeup async thread to do the i/o.
19947c478bd9Sstevel@tonic-gate */
19957c478bd9Sstevel@tonic-gate if (mi->mi_async_reqs[NFS4_READDIR] == NULL) {
19967c478bd9Sstevel@tonic-gate mi->mi_async_reqs[NFS4_READDIR] = args;
19977c478bd9Sstevel@tonic-gate mi->mi_async_tail[NFS4_READDIR] = args;
19987c478bd9Sstevel@tonic-gate } else {
19997c478bd9Sstevel@tonic-gate mi->mi_async_tail[NFS4_READDIR]->a_next = args;
20007c478bd9Sstevel@tonic-gate mi->mi_async_tail[NFS4_READDIR] = args;
20017c478bd9Sstevel@tonic-gate }
20027c478bd9Sstevel@tonic-gate
20037c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock);
20047c478bd9Sstevel@tonic-gate rp->r_count++;
20057c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock);
20067c478bd9Sstevel@tonic-gate
20077c478bd9Sstevel@tonic-gate if (mi->mi_io_kstats) {
20087c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock);
20097c478bd9Sstevel@tonic-gate kstat_waitq_enter(KSTAT_IO_PTR(mi->mi_io_kstats));
20107c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock);
20117c478bd9Sstevel@tonic-gate }
20127c478bd9Sstevel@tonic-gate
20137c478bd9Sstevel@tonic-gate mi->mi_async_req_count++;
20147c478bd9Sstevel@tonic-gate ASSERT(mi->mi_async_req_count != 0);
20157c478bd9Sstevel@tonic-gate cv_signal(&mi->mi_async_reqs_cv);
20167c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_async_lock);
20177c478bd9Sstevel@tonic-gate return;
20187c478bd9Sstevel@tonic-gate
20197c478bd9Sstevel@tonic-gate noasync:
20207c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock);
20217c478bd9Sstevel@tonic-gate rdc->entries = NULL;
20227c478bd9Sstevel@tonic-gate /*
20237c478bd9Sstevel@tonic-gate * Indicate that no one is trying to fill this entry and
20247c478bd9Sstevel@tonic-gate * it still needs to be filled.
20257c478bd9Sstevel@tonic-gate */
20267c478bd9Sstevel@tonic-gate rdc->flags &= ~RDDIR;
20277c478bd9Sstevel@tonic-gate rdc->flags |= RDDIRREQ;
20287c478bd9Sstevel@tonic-gate rddir4_cache_rele(rp, rdc);
20297c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock);
20307c478bd9Sstevel@tonic-gate }
20317c478bd9Sstevel@tonic-gate
20327c478bd9Sstevel@tonic-gate void
nfs4_async_commit(vnode_t * vp,page_t * plist,offset3 offset,count3 count,cred_t * cr,void (* commit)(vnode_t *,page_t *,offset3,count3,cred_t *))20337c478bd9Sstevel@tonic-gate nfs4_async_commit(vnode_t *vp, page_t *plist, offset3 offset, count3 count,
2034b9238976Sth cred_t *cr, void (*commit)(vnode_t *, page_t *, offset3, count3,
2035b9238976Sth cred_t *))
20367c478bd9Sstevel@tonic-gate {
20377c478bd9Sstevel@tonic-gate rnode4_t *rp;
20387c478bd9Sstevel@tonic-gate mntinfo4_t *mi;
20397c478bd9Sstevel@tonic-gate struct nfs4_async_reqs *args;
20407c478bd9Sstevel@tonic-gate page_t *pp;
20417c478bd9Sstevel@tonic-gate
20427c478bd9Sstevel@tonic-gate rp = VTOR4(vp);
20437c478bd9Sstevel@tonic-gate mi = VTOMI4(vp);
20447c478bd9Sstevel@tonic-gate
20457c478bd9Sstevel@tonic-gate /*
20467c478bd9Sstevel@tonic-gate * If we can't allocate a request structure, do the commit
20477c478bd9Sstevel@tonic-gate * operation synchronously in this thread's context.
20487c478bd9Sstevel@tonic-gate */
20497c478bd9Sstevel@tonic-gate if ((args = kmem_alloc(sizeof (*args), KM_NOSLEEP)) == NULL)
20507c478bd9Sstevel@tonic-gate goto noasync;
20517c478bd9Sstevel@tonic-gate
20527c478bd9Sstevel@tonic-gate args->a_next = NULL;
20537c478bd9Sstevel@tonic-gate #ifdef DEBUG
20547c478bd9Sstevel@tonic-gate args->a_queuer = curthread;
20557c478bd9Sstevel@tonic-gate #endif
20567c478bd9Sstevel@tonic-gate VN_HOLD(vp);
20577c478bd9Sstevel@tonic-gate args->a_vp = vp;
20587c478bd9Sstevel@tonic-gate ASSERT(cr != NULL);
20597c478bd9Sstevel@tonic-gate crhold(cr);
20607c478bd9Sstevel@tonic-gate args->a_cred = cr;
20617c478bd9Sstevel@tonic-gate args->a_io = NFS4_COMMIT;
20627c478bd9Sstevel@tonic-gate args->a_nfs4_commit = commit;
20637c478bd9Sstevel@tonic-gate args->a_nfs4_plist = plist;
20647c478bd9Sstevel@tonic-gate args->a_nfs4_offset = offset;
20657c478bd9Sstevel@tonic-gate args->a_nfs4_count = count;
20667c478bd9Sstevel@tonic-gate
20677c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_async_lock);
20687c478bd9Sstevel@tonic-gate
20697c478bd9Sstevel@tonic-gate /*
20707c478bd9Sstevel@tonic-gate * If asyncio has been disabled, then make a synchronous request.
20717c478bd9Sstevel@tonic-gate * This check is done a second time in case async io was diabled
20727c478bd9Sstevel@tonic-gate * while this thread was blocked waiting for memory pressure to
20737c478bd9Sstevel@tonic-gate * reduce or for the queue to drain.
20747c478bd9Sstevel@tonic-gate */
20757c478bd9Sstevel@tonic-gate if (mi->mi_max_threads == 0) {
20767c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_async_lock);
20777c478bd9Sstevel@tonic-gate
20787c478bd9Sstevel@tonic-gate VN_RELE(vp);
20797c478bd9Sstevel@tonic-gate crfree(cr);
20807c478bd9Sstevel@tonic-gate kmem_free(args, sizeof (*args));
20817c478bd9Sstevel@tonic-gate goto noasync;
20827c478bd9Sstevel@tonic-gate }
20837c478bd9Sstevel@tonic-gate
20847c478bd9Sstevel@tonic-gate /*
20857c478bd9Sstevel@tonic-gate * Link request structure into the async list and
20867c478bd9Sstevel@tonic-gate * wakeup async thread to do the i/o.
20877c478bd9Sstevel@tonic-gate */
20887c478bd9Sstevel@tonic-gate if (mi->mi_async_reqs[NFS4_COMMIT] == NULL) {
20897c478bd9Sstevel@tonic-gate mi->mi_async_reqs[NFS4_COMMIT] = args;
20907c478bd9Sstevel@tonic-gate mi->mi_async_tail[NFS4_COMMIT] = args;
20917c478bd9Sstevel@tonic-gate } else {
20927c478bd9Sstevel@tonic-gate mi->mi_async_tail[NFS4_COMMIT]->a_next = args;
20937c478bd9Sstevel@tonic-gate mi->mi_async_tail[NFS4_COMMIT] = args;
20947c478bd9Sstevel@tonic-gate }
20957c478bd9Sstevel@tonic-gate
20967c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock);
20977c478bd9Sstevel@tonic-gate rp->r_count++;
20987c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock);
20997c478bd9Sstevel@tonic-gate
21007c478bd9Sstevel@tonic-gate if (mi->mi_io_kstats) {
21017c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock);
21027c478bd9Sstevel@tonic-gate kstat_waitq_enter(KSTAT_IO_PTR(mi->mi_io_kstats));
21037c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock);
21047c478bd9Sstevel@tonic-gate }
21057c478bd9Sstevel@tonic-gate
21067c478bd9Sstevel@tonic-gate mi->mi_async_req_count++;
21077c478bd9Sstevel@tonic-gate ASSERT(mi->mi_async_req_count != 0);
21087c478bd9Sstevel@tonic-gate cv_signal(&mi->mi_async_reqs_cv);
21097c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_async_lock);
21107c478bd9Sstevel@tonic-gate return;
21117c478bd9Sstevel@tonic-gate
21127c478bd9Sstevel@tonic-gate noasync:
21137c478bd9Sstevel@tonic-gate if (curproc == proc_pageout || curproc == proc_fsflush ||
2114108322fbScarlsonj nfs_zone() != mi->mi_zone) {
21157c478bd9Sstevel@tonic-gate while (plist != NULL) {
21167c478bd9Sstevel@tonic-gate pp = plist;
21177c478bd9Sstevel@tonic-gate page_sub(&plist, pp);
21187c478bd9Sstevel@tonic-gate pp->p_fsdata = C_COMMIT;
21197c478bd9Sstevel@tonic-gate page_unlock(pp);
21207c478bd9Sstevel@tonic-gate }
21217c478bd9Sstevel@tonic-gate return;
21227c478bd9Sstevel@tonic-gate }
21237c478bd9Sstevel@tonic-gate (*commit)(vp, plist, offset, count, cr);
21247c478bd9Sstevel@tonic-gate }
21257c478bd9Sstevel@tonic-gate
21267c478bd9Sstevel@tonic-gate /*
21277c478bd9Sstevel@tonic-gate * nfs4_async_inactive - hand off a VOP_INACTIVE call to a thread. The
21287c478bd9Sstevel@tonic-gate * reference to the vnode is handed over to the thread; the caller should
21297c478bd9Sstevel@tonic-gate * no longer refer to the vnode.
21307c478bd9Sstevel@tonic-gate *
21317c478bd9Sstevel@tonic-gate * Unlike most of the async routines, this handoff is needed for
21327c478bd9Sstevel@tonic-gate * correctness reasons, not just performance. So doing operations in the
21337c478bd9Sstevel@tonic-gate * context of the current thread is not an option.
21347c478bd9Sstevel@tonic-gate */
21357c478bd9Sstevel@tonic-gate void
nfs4_async_inactive(vnode_t * vp,cred_t * cr)21367c478bd9Sstevel@tonic-gate nfs4_async_inactive(vnode_t *vp, cred_t *cr)
21377c478bd9Sstevel@tonic-gate {
21387c478bd9Sstevel@tonic-gate mntinfo4_t *mi;
21397c478bd9Sstevel@tonic-gate struct nfs4_async_reqs *args;
21407c478bd9Sstevel@tonic-gate boolean_t signal_inactive_thread = B_FALSE;
21417c478bd9Sstevel@tonic-gate
21427c478bd9Sstevel@tonic-gate mi = VTOMI4(vp);
21437c478bd9Sstevel@tonic-gate
21447c478bd9Sstevel@tonic-gate args = kmem_alloc(sizeof (*args), KM_SLEEP);
21457c478bd9Sstevel@tonic-gate args->a_next = NULL;
21467c478bd9Sstevel@tonic-gate #ifdef DEBUG
21477c478bd9Sstevel@tonic-gate args->a_queuer = curthread;
21487c478bd9Sstevel@tonic-gate #endif
21497c478bd9Sstevel@tonic-gate args->a_vp = vp;
21507c478bd9Sstevel@tonic-gate ASSERT(cr != NULL);
21517c478bd9Sstevel@tonic-gate crhold(cr);
21527c478bd9Sstevel@tonic-gate args->a_cred = cr;
21537c478bd9Sstevel@tonic-gate args->a_io = NFS4_INACTIVE;
21547c478bd9Sstevel@tonic-gate
21557c478bd9Sstevel@tonic-gate /*
21567c478bd9Sstevel@tonic-gate * Note that we don't check mi->mi_max_threads here, since we
21577c478bd9Sstevel@tonic-gate * *need* to get rid of this vnode regardless of whether someone
21587c478bd9Sstevel@tonic-gate * set nfs4_max_threads to zero in /etc/system.
21597c478bd9Sstevel@tonic-gate *
21607c478bd9Sstevel@tonic-gate * The manager thread knows about this and is willing to create
2161da6c28aaSamw * at least one thread to accommodate us.
21627c478bd9Sstevel@tonic-gate */
21637c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_async_lock);
21647c478bd9Sstevel@tonic-gate if (mi->mi_inactive_thread == NULL) {
21657c478bd9Sstevel@tonic-gate rnode4_t *rp;
21667c478bd9Sstevel@tonic-gate vnode_t *unldvp = NULL;
21677c478bd9Sstevel@tonic-gate char *unlname;
21687c478bd9Sstevel@tonic-gate cred_t *unlcred;
21697c478bd9Sstevel@tonic-gate
21707c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_async_lock);
21717c478bd9Sstevel@tonic-gate /*
21727c478bd9Sstevel@tonic-gate * We just need to free up the memory associated with the
21737c478bd9Sstevel@tonic-gate * vnode, which can be safely done from within the current
21747c478bd9Sstevel@tonic-gate * context.
21757c478bd9Sstevel@tonic-gate */
21767c478bd9Sstevel@tonic-gate crfree(cr); /* drop our reference */
21777c478bd9Sstevel@tonic-gate kmem_free(args, sizeof (*args));
21787c478bd9Sstevel@tonic-gate rp = VTOR4(vp);
21797c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock);
21807c478bd9Sstevel@tonic-gate if (rp->r_unldvp != NULL) {
21817c478bd9Sstevel@tonic-gate unldvp = rp->r_unldvp;
21827c478bd9Sstevel@tonic-gate rp->r_unldvp = NULL;
21837c478bd9Sstevel@tonic-gate unlname = rp->r_unlname;
21847c478bd9Sstevel@tonic-gate rp->r_unlname = NULL;
21857c478bd9Sstevel@tonic-gate unlcred = rp->r_unlcred;
21867c478bd9Sstevel@tonic-gate rp->r_unlcred = NULL;
21877c478bd9Sstevel@tonic-gate }
21887c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock);
21897c478bd9Sstevel@tonic-gate /*
21907c478bd9Sstevel@tonic-gate * No need to explicitly throw away any cached pages. The
21917c478bd9Sstevel@tonic-gate * eventual r4inactive() will attempt a synchronous
21927c478bd9Sstevel@tonic-gate * VOP_PUTPAGE() which will immediately fail since the request
21937c478bd9Sstevel@tonic-gate * is coming from the wrong zone, and then will proceed to call
21947c478bd9Sstevel@tonic-gate * nfs4_invalidate_pages() which will clean things up for us.
21957c478bd9Sstevel@tonic-gate *
21967c478bd9Sstevel@tonic-gate * Throw away the delegation here so rp4_addfree()'s attempt to
21977c478bd9Sstevel@tonic-gate * return any existing delegations becomes a no-op.
21987c478bd9Sstevel@tonic-gate */
219950a83466Sjwahlig if (rp->r_deleg_type != OPEN_DELEGATE_NONE) {
220050a83466Sjwahlig (void) nfs_rw_enter_sig(&mi->mi_recovlock, RW_READER,
2201b9238976Sth FALSE);
22027c478bd9Sstevel@tonic-gate (void) nfs4delegreturn(rp, NFS4_DR_DISCARD);
220350a83466Sjwahlig nfs_rw_exit(&mi->mi_recovlock);
220450a83466Sjwahlig }
22057c478bd9Sstevel@tonic-gate nfs4_clear_open_streams(rp);
22067c478bd9Sstevel@tonic-gate
22077c478bd9Sstevel@tonic-gate rp4_addfree(rp, cr);
22087c478bd9Sstevel@tonic-gate if (unldvp != NULL) {
22097c478bd9Sstevel@tonic-gate kmem_free(unlname, MAXNAMELEN);
22107c478bd9Sstevel@tonic-gate VN_RELE(unldvp);
22117c478bd9Sstevel@tonic-gate crfree(unlcred);
22127c478bd9Sstevel@tonic-gate }
22137c478bd9Sstevel@tonic-gate return;
22147c478bd9Sstevel@tonic-gate }
22157c478bd9Sstevel@tonic-gate
22167c478bd9Sstevel@tonic-gate if (mi->mi_manager_thread == NULL) {
22177c478bd9Sstevel@tonic-gate /*
22187c478bd9Sstevel@tonic-gate * We want to talk to the inactive thread.
22197c478bd9Sstevel@tonic-gate */
22207c478bd9Sstevel@tonic-gate signal_inactive_thread = B_TRUE;
22217c478bd9Sstevel@tonic-gate }
22227c478bd9Sstevel@tonic-gate
22237c478bd9Sstevel@tonic-gate /*
22247c478bd9Sstevel@tonic-gate * Enqueue the vnode and wake up either the special thread (empty
22257c478bd9Sstevel@tonic-gate * list) or an async thread.
22267c478bd9Sstevel@tonic-gate */
22277c478bd9Sstevel@tonic-gate if (mi->mi_async_reqs[NFS4_INACTIVE] == NULL) {
22287c478bd9Sstevel@tonic-gate mi->mi_async_reqs[NFS4_INACTIVE] = args;
22297c478bd9Sstevel@tonic-gate mi->mi_async_tail[NFS4_INACTIVE] = args;
22307c478bd9Sstevel@tonic-gate signal_inactive_thread = B_TRUE;
22317c478bd9Sstevel@tonic-gate } else {
22327c478bd9Sstevel@tonic-gate mi->mi_async_tail[NFS4_INACTIVE]->a_next = args;
22337c478bd9Sstevel@tonic-gate mi->mi_async_tail[NFS4_INACTIVE] = args;
22347c478bd9Sstevel@tonic-gate }
22357c478bd9Sstevel@tonic-gate if (signal_inactive_thread) {
22367c478bd9Sstevel@tonic-gate cv_signal(&mi->mi_inact_req_cv);
22377c478bd9Sstevel@tonic-gate } else {
22387c478bd9Sstevel@tonic-gate mi->mi_async_req_count++;
22397c478bd9Sstevel@tonic-gate ASSERT(mi->mi_async_req_count != 0);
22407c478bd9Sstevel@tonic-gate cv_signal(&mi->mi_async_reqs_cv);
22417c478bd9Sstevel@tonic-gate }
22427c478bd9Sstevel@tonic-gate
22437c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_async_lock);
22447c478bd9Sstevel@tonic-gate }
22457c478bd9Sstevel@tonic-gate
22467c478bd9Sstevel@tonic-gate int
writerp4(rnode4_t * rp,caddr_t base,int tcount,struct uio * uio,int pgcreated)22477c478bd9Sstevel@tonic-gate writerp4(rnode4_t *rp, caddr_t base, int tcount, struct uio *uio, int pgcreated)
22487c478bd9Sstevel@tonic-gate {
22497c478bd9Sstevel@tonic-gate int pagecreate;
22507c478bd9Sstevel@tonic-gate int n;
22517c478bd9Sstevel@tonic-gate int saved_n;
22527c478bd9Sstevel@tonic-gate caddr_t saved_base;
22537c478bd9Sstevel@tonic-gate u_offset_t offset;
22547c478bd9Sstevel@tonic-gate int error;
22557c478bd9Sstevel@tonic-gate int sm_error;
2256a5652762Spraks vnode_t *vp = RTOV(rp);
22577c478bd9Sstevel@tonic-gate
22587c478bd9Sstevel@tonic-gate ASSERT(tcount <= MAXBSIZE && tcount <= uio->uio_resid);
22597c478bd9Sstevel@tonic-gate ASSERT(nfs_rw_lock_held(&rp->r_rwlock, RW_WRITER));
2260a5652762Spraks if (!vpm_enable) {
2261a5652762Spraks ASSERT(((uintptr_t)base & MAXBOFFSET) + tcount <= MAXBSIZE);
2262a5652762Spraks }
22637c478bd9Sstevel@tonic-gate
22647c478bd9Sstevel@tonic-gate /*
22657c478bd9Sstevel@tonic-gate * Move bytes in at most PAGESIZE chunks. We must avoid
22667c478bd9Sstevel@tonic-gate * spanning pages in uiomove() because page faults may cause
22677c478bd9Sstevel@tonic-gate * the cache to be invalidated out from under us. The r_size is not
22687c478bd9Sstevel@tonic-gate * updated until after the uiomove. If we push the last page of a
22697c478bd9Sstevel@tonic-gate * file before r_size is correct, we will lose the data written past
22707c478bd9Sstevel@tonic-gate * the current (and invalid) r_size.
22717c478bd9Sstevel@tonic-gate */
22727c478bd9Sstevel@tonic-gate do {
22737c478bd9Sstevel@tonic-gate offset = uio->uio_loffset;
22747c478bd9Sstevel@tonic-gate pagecreate = 0;
22757c478bd9Sstevel@tonic-gate
22767c478bd9Sstevel@tonic-gate /*
22777c478bd9Sstevel@tonic-gate * n is the number of bytes required to satisfy the request
22787c478bd9Sstevel@tonic-gate * or the number of bytes to fill out the page.
22797c478bd9Sstevel@tonic-gate */
2280a5652762Spraks n = (int)MIN((PAGESIZE - (offset & PAGEOFFSET)), tcount);
22817c478bd9Sstevel@tonic-gate
22827c478bd9Sstevel@tonic-gate /*
22837c478bd9Sstevel@tonic-gate * Check to see if we can skip reading in the page
22847c478bd9Sstevel@tonic-gate * and just allocate the memory. We can do this
22857c478bd9Sstevel@tonic-gate * if we are going to rewrite the entire mapping
22867c478bd9Sstevel@tonic-gate * or if we are going to write to or beyond the current
22877c478bd9Sstevel@tonic-gate * end of file from the beginning of the mapping.
22887c478bd9Sstevel@tonic-gate *
22897c478bd9Sstevel@tonic-gate * The read of r_size is now protected by r_statelock.
22907c478bd9Sstevel@tonic-gate */
22917c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock);
22927c478bd9Sstevel@tonic-gate /*
22937c478bd9Sstevel@tonic-gate * When pgcreated is nonzero the caller has already done
22947c478bd9Sstevel@tonic-gate * a segmap_getmapflt with forcefault 0 and S_WRITE. With
22957c478bd9Sstevel@tonic-gate * segkpm this means we already have at least one page
22967c478bd9Sstevel@tonic-gate * created and mapped at base.
22977c478bd9Sstevel@tonic-gate */
22987c478bd9Sstevel@tonic-gate pagecreate = pgcreated ||
2299b9238976Sth ((offset & PAGEOFFSET) == 0 &&
2300b9238976Sth (n == PAGESIZE || ((offset + n) >= rp->r_size)));
23017c478bd9Sstevel@tonic-gate
23027c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock);
23037c478bd9Sstevel@tonic-gate
2304a5652762Spraks if (!vpm_enable && pagecreate) {
23057c478bd9Sstevel@tonic-gate /*
23067c478bd9Sstevel@tonic-gate * The last argument tells segmap_pagecreate() to
23077c478bd9Sstevel@tonic-gate * always lock the page, as opposed to sometimes
23087c478bd9Sstevel@tonic-gate * returning with the page locked. This way we avoid a
23097c478bd9Sstevel@tonic-gate * fault on the ensuing uiomove(), but also
23107c478bd9Sstevel@tonic-gate * more importantly (to fix bug 1094402) we can
23117c478bd9Sstevel@tonic-gate * call segmap_fault() to unlock the page in all
23127c478bd9Sstevel@tonic-gate * cases. An alternative would be to modify
23137c478bd9Sstevel@tonic-gate * segmap_pagecreate() to tell us when it is
23147c478bd9Sstevel@tonic-gate * locking a page, but that's a fairly major
23157c478bd9Sstevel@tonic-gate * interface change.
23167c478bd9Sstevel@tonic-gate */
23177c478bd9Sstevel@tonic-gate if (pgcreated == 0)
23187c478bd9Sstevel@tonic-gate (void) segmap_pagecreate(segkmap, base,
2319b9238976Sth (uint_t)n, 1);
23207c478bd9Sstevel@tonic-gate saved_base = base;
23217c478bd9Sstevel@tonic-gate saved_n = n;
23227c478bd9Sstevel@tonic-gate }
23237c478bd9Sstevel@tonic-gate
23247c478bd9Sstevel@tonic-gate /*
23257c478bd9Sstevel@tonic-gate * The number of bytes of data in the last page can not
23267c478bd9Sstevel@tonic-gate * be accurately be determined while page is being
23277c478bd9Sstevel@tonic-gate * uiomove'd to and the size of the file being updated.
23287c478bd9Sstevel@tonic-gate * Thus, inform threads which need to know accurately
23297c478bd9Sstevel@tonic-gate * how much data is in the last page of the file. They
23307c478bd9Sstevel@tonic-gate * will not do the i/o immediately, but will arrange for
23317c478bd9Sstevel@tonic-gate * the i/o to happen later when this modify operation
23327c478bd9Sstevel@tonic-gate * will have finished.
23337c478bd9Sstevel@tonic-gate */
23347c478bd9Sstevel@tonic-gate ASSERT(!(rp->r_flags & R4MODINPROGRESS));
23357c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock);
23367c478bd9Sstevel@tonic-gate rp->r_flags |= R4MODINPROGRESS;
23377c478bd9Sstevel@tonic-gate rp->r_modaddr = (offset & MAXBMASK);
23387c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock);
23397c478bd9Sstevel@tonic-gate
2340a5652762Spraks if (vpm_enable) {
2341a5652762Spraks /*
2342a5652762Spraks * Copy data. If new pages are created, part of
2343a5652762Spraks * the page that is not written will be initizliazed
2344a5652762Spraks * with zeros.
2345a5652762Spraks */
2346a5652762Spraks error = vpm_data_copy(vp, offset, n, uio,
2347b9238976Sth !pagecreate, NULL, 0, S_WRITE);
2348a5652762Spraks } else {
2349a5652762Spraks error = uiomove(base, n, UIO_WRITE, uio);
2350a5652762Spraks }
23517c478bd9Sstevel@tonic-gate
23527c478bd9Sstevel@tonic-gate /*
23537c478bd9Sstevel@tonic-gate * r_size is the maximum number of
23547c478bd9Sstevel@tonic-gate * bytes known to be in the file.
23557c478bd9Sstevel@tonic-gate * Make sure it is at least as high as the
23567c478bd9Sstevel@tonic-gate * first unwritten byte pointed to by uio_loffset.
23577c478bd9Sstevel@tonic-gate */
23587c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock);
23597c478bd9Sstevel@tonic-gate if (rp->r_size < uio->uio_loffset)
23607c478bd9Sstevel@tonic-gate rp->r_size = uio->uio_loffset;
23617c478bd9Sstevel@tonic-gate rp->r_flags &= ~R4MODINPROGRESS;
23627c478bd9Sstevel@tonic-gate rp->r_flags |= R4DIRTY;
23637c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock);
23647c478bd9Sstevel@tonic-gate
23657c478bd9Sstevel@tonic-gate /* n = # of bytes written */
23667c478bd9Sstevel@tonic-gate n = (int)(uio->uio_loffset - offset);
2367a5652762Spraks
2368a5652762Spraks if (!vpm_enable) {
2369a5652762Spraks base += n;
2370a5652762Spraks }
2371a5652762Spraks
23727c478bd9Sstevel@tonic-gate tcount -= n;
23737c478bd9Sstevel@tonic-gate /*
23747c478bd9Sstevel@tonic-gate * If we created pages w/o initializing them completely,
23757c478bd9Sstevel@tonic-gate * we need to zero the part that wasn't set up.
23767c478bd9Sstevel@tonic-gate * This happens on a most EOF write cases and if
23777c478bd9Sstevel@tonic-gate * we had some sort of error during the uiomove.
23787c478bd9Sstevel@tonic-gate */
2379a5652762Spraks if (!vpm_enable && pagecreate) {
23807c478bd9Sstevel@tonic-gate if ((uio->uio_loffset & PAGEOFFSET) || n == 0)
23817c478bd9Sstevel@tonic-gate (void) kzero(base, PAGESIZE - n);
23827c478bd9Sstevel@tonic-gate
23837c478bd9Sstevel@tonic-gate if (pgcreated) {
23847c478bd9Sstevel@tonic-gate /*
23857c478bd9Sstevel@tonic-gate * Caller is responsible for this page,
23867c478bd9Sstevel@tonic-gate * it was not created in this loop.
23877c478bd9Sstevel@tonic-gate */
23887c478bd9Sstevel@tonic-gate pgcreated = 0;
23897c478bd9Sstevel@tonic-gate } else {
23907c478bd9Sstevel@tonic-gate /*
23917c478bd9Sstevel@tonic-gate * For bug 1094402: segmap_pagecreate locks
23927c478bd9Sstevel@tonic-gate * page. Unlock it. This also unlocks the
23937c478bd9Sstevel@tonic-gate * pages allocated by page_create_va() in
23947c478bd9Sstevel@tonic-gate * segmap_pagecreate().
23957c478bd9Sstevel@tonic-gate */
23967c478bd9Sstevel@tonic-gate sm_error = segmap_fault(kas.a_hat, segkmap,
2397b9238976Sth saved_base, saved_n,
2398b9238976Sth F_SOFTUNLOCK, S_WRITE);
23997c478bd9Sstevel@tonic-gate if (error == 0)
24007c478bd9Sstevel@tonic-gate error = sm_error;
24017c478bd9Sstevel@tonic-gate }
24027c478bd9Sstevel@tonic-gate }
24037c478bd9Sstevel@tonic-gate } while (tcount > 0 && error == 0);
24047c478bd9Sstevel@tonic-gate
24057c478bd9Sstevel@tonic-gate return (error);
24067c478bd9Sstevel@tonic-gate }
24077c478bd9Sstevel@tonic-gate
24087c478bd9Sstevel@tonic-gate int
nfs4_putpages(vnode_t * vp,u_offset_t off,size_t len,int flags,cred_t * cr)24097c478bd9Sstevel@tonic-gate nfs4_putpages(vnode_t *vp, u_offset_t off, size_t len, int flags, cred_t *cr)
24107c478bd9Sstevel@tonic-gate {
24117c478bd9Sstevel@tonic-gate rnode4_t *rp;
24127c478bd9Sstevel@tonic-gate page_t *pp;
24137c478bd9Sstevel@tonic-gate u_offset_t eoff;
24147c478bd9Sstevel@tonic-gate u_offset_t io_off;
24157c478bd9Sstevel@tonic-gate size_t io_len;
24167c478bd9Sstevel@tonic-gate int error;
24177c478bd9Sstevel@tonic-gate int rdirty;
24187c478bd9Sstevel@tonic-gate int err;
24197c478bd9Sstevel@tonic-gate
24207c478bd9Sstevel@tonic-gate rp = VTOR4(vp);
24217c478bd9Sstevel@tonic-gate ASSERT(rp->r_count > 0);
24227c478bd9Sstevel@tonic-gate
24237c478bd9Sstevel@tonic-gate if (!nfs4_has_pages(vp))
24247c478bd9Sstevel@tonic-gate return (0);
24257c478bd9Sstevel@tonic-gate
24267c478bd9Sstevel@tonic-gate ASSERT(vp->v_type != VCHR);
24277c478bd9Sstevel@tonic-gate
24287c478bd9Sstevel@tonic-gate /*
24297c478bd9Sstevel@tonic-gate * If R4OUTOFSPACE is set, then all writes turn into B_INVAL
24307c478bd9Sstevel@tonic-gate * writes. B_FORCE is set to force the VM system to actually
24317c478bd9Sstevel@tonic-gate * invalidate the pages, even if the i/o failed. The pages
24327c478bd9Sstevel@tonic-gate * need to get invalidated because they can't be written out
24337c478bd9Sstevel@tonic-gate * because there isn't any space left on either the server's
24347c478bd9Sstevel@tonic-gate * file system or in the user's disk quota. The B_FREE bit
24357c478bd9Sstevel@tonic-gate * is cleared to avoid confusion as to whether this is a
24367c478bd9Sstevel@tonic-gate * request to place the page on the freelist or to destroy
24377c478bd9Sstevel@tonic-gate * it.
24387c478bd9Sstevel@tonic-gate */
24397c478bd9Sstevel@tonic-gate if ((rp->r_flags & R4OUTOFSPACE) ||
24407c478bd9Sstevel@tonic-gate (vp->v_vfsp->vfs_flag & VFS_UNMOUNTED))
24417c478bd9Sstevel@tonic-gate flags = (flags & ~B_FREE) | B_INVAL | B_FORCE;
24427c478bd9Sstevel@tonic-gate
24437c478bd9Sstevel@tonic-gate if (len == 0) {
24447c478bd9Sstevel@tonic-gate /*
24457c478bd9Sstevel@tonic-gate * If doing a full file synchronous operation, then clear
24467c478bd9Sstevel@tonic-gate * the R4DIRTY bit. If a page gets dirtied while the flush
24477c478bd9Sstevel@tonic-gate * is happening, then R4DIRTY will get set again. The
24487c478bd9Sstevel@tonic-gate * R4DIRTY bit must get cleared before the flush so that
24497c478bd9Sstevel@tonic-gate * we don't lose this information.
245084d68d8eSthurlow *
245184d68d8eSthurlow * If there are no full file async write operations
245284d68d8eSthurlow * pending and RDIRTY bit is set, clear it.
24537c478bd9Sstevel@tonic-gate */
24547c478bd9Sstevel@tonic-gate if (off == (u_offset_t)0 &&
24557c478bd9Sstevel@tonic-gate !(flags & B_ASYNC) &&
24567c478bd9Sstevel@tonic-gate (rp->r_flags & R4DIRTY)) {
24577c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock);
24587c478bd9Sstevel@tonic-gate rdirty = (rp->r_flags & R4DIRTY);
24597c478bd9Sstevel@tonic-gate rp->r_flags &= ~R4DIRTY;
24607c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock);
246184d68d8eSthurlow } else if (flags & B_ASYNC && off == (u_offset_t)0) {
246284d68d8eSthurlow mutex_enter(&rp->r_statelock);
246384d68d8eSthurlow if (rp->r_flags & R4DIRTY && rp->r_awcount == 0) {
246484d68d8eSthurlow rdirty = (rp->r_flags & R4DIRTY);
246584d68d8eSthurlow rp->r_flags &= ~R4DIRTY;
246684d68d8eSthurlow }
246784d68d8eSthurlow mutex_exit(&rp->r_statelock);
24687c478bd9Sstevel@tonic-gate } else
24697c478bd9Sstevel@tonic-gate rdirty = 0;
24707c478bd9Sstevel@tonic-gate
24717c478bd9Sstevel@tonic-gate /*
24727c478bd9Sstevel@tonic-gate * Search the entire vp list for pages >= off, and flush
24737c478bd9Sstevel@tonic-gate * the dirty pages.
24747c478bd9Sstevel@tonic-gate */
24757c478bd9Sstevel@tonic-gate error = pvn_vplist_dirty(vp, off, rp->r_putapage,
2476b9238976Sth flags, cr);
24777c478bd9Sstevel@tonic-gate
24787c478bd9Sstevel@tonic-gate /*
2479da6c28aaSamw * If an error occurred and the file was marked as dirty
24807c478bd9Sstevel@tonic-gate * before and we aren't forcibly invalidating pages, then
24817c478bd9Sstevel@tonic-gate * reset the R4DIRTY flag.
24827c478bd9Sstevel@tonic-gate */
24837c478bd9Sstevel@tonic-gate if (error && rdirty &&
24847c478bd9Sstevel@tonic-gate (flags & (B_INVAL | B_FORCE)) != (B_INVAL | B_FORCE)) {
24857c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock);
24867c478bd9Sstevel@tonic-gate rp->r_flags |= R4DIRTY;
24877c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock);
24887c478bd9Sstevel@tonic-gate }
24897c478bd9Sstevel@tonic-gate } else {
24907c478bd9Sstevel@tonic-gate /*
24917c478bd9Sstevel@tonic-gate * Do a range from [off...off + len) looking for pages
24927c478bd9Sstevel@tonic-gate * to deal with.
24937c478bd9Sstevel@tonic-gate */
24947c478bd9Sstevel@tonic-gate error = 0;
24957c478bd9Sstevel@tonic-gate io_len = 0;
24967c478bd9Sstevel@tonic-gate eoff = off + len;
24977c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock);
24987c478bd9Sstevel@tonic-gate for (io_off = off; io_off < eoff && io_off < rp->r_size;
24997c478bd9Sstevel@tonic-gate io_off += io_len) {
25007c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock);
25017c478bd9Sstevel@tonic-gate /*
25027c478bd9Sstevel@tonic-gate * If we are not invalidating, synchronously
25037c478bd9Sstevel@tonic-gate * freeing or writing pages use the routine
25047c478bd9Sstevel@tonic-gate * page_lookup_nowait() to prevent reclaiming
25057c478bd9Sstevel@tonic-gate * them from the free list.
25067c478bd9Sstevel@tonic-gate */
25077c478bd9Sstevel@tonic-gate if ((flags & B_INVAL) || !(flags & B_ASYNC)) {
25087c478bd9Sstevel@tonic-gate pp = page_lookup(vp, io_off,
25097c478bd9Sstevel@tonic-gate (flags & (B_INVAL | B_FREE)) ?
25107c478bd9Sstevel@tonic-gate SE_EXCL : SE_SHARED);
25117c478bd9Sstevel@tonic-gate } else {
25127c478bd9Sstevel@tonic-gate pp = page_lookup_nowait(vp, io_off,
25137c478bd9Sstevel@tonic-gate (flags & B_FREE) ? SE_EXCL : SE_SHARED);
25147c478bd9Sstevel@tonic-gate }
25157c478bd9Sstevel@tonic-gate
25167c478bd9Sstevel@tonic-gate if (pp == NULL || !pvn_getdirty(pp, flags))
25177c478bd9Sstevel@tonic-gate io_len = PAGESIZE;
25187c478bd9Sstevel@tonic-gate else {
25197c478bd9Sstevel@tonic-gate err = (*rp->r_putapage)(vp, pp, &io_off,
25207c478bd9Sstevel@tonic-gate &io_len, flags, cr);
25217c478bd9Sstevel@tonic-gate if (!error)
25227c478bd9Sstevel@tonic-gate error = err;
25237c478bd9Sstevel@tonic-gate /*
25247c478bd9Sstevel@tonic-gate * "io_off" and "io_len" are returned as
25257c478bd9Sstevel@tonic-gate * the range of pages we actually wrote.
25267c478bd9Sstevel@tonic-gate * This allows us to skip ahead more quickly
25277c478bd9Sstevel@tonic-gate * since several pages may've been dealt
25287c478bd9Sstevel@tonic-gate * with by this iteration of the loop.
25297c478bd9Sstevel@tonic-gate */
25307c478bd9Sstevel@tonic-gate }
25317c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock);
25327c478bd9Sstevel@tonic-gate }
25337c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock);
25347c478bd9Sstevel@tonic-gate }
25357c478bd9Sstevel@tonic-gate
25367c478bd9Sstevel@tonic-gate return (error);
25377c478bd9Sstevel@tonic-gate }
25387c478bd9Sstevel@tonic-gate
25397c478bd9Sstevel@tonic-gate void
nfs4_invalidate_pages(vnode_t * vp,u_offset_t off,cred_t * cr)25407c478bd9Sstevel@tonic-gate nfs4_invalidate_pages(vnode_t *vp, u_offset_t off, cred_t *cr)
25417c478bd9Sstevel@tonic-gate {
25427c478bd9Sstevel@tonic-gate rnode4_t *rp;
25437c478bd9Sstevel@tonic-gate
25447c478bd9Sstevel@tonic-gate rp = VTOR4(vp);
25457c478bd9Sstevel@tonic-gate if (IS_SHADOW(vp, rp))
25467c478bd9Sstevel@tonic-gate vp = RTOV4(rp);
25477c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock);
25487c478bd9Sstevel@tonic-gate while (rp->r_flags & R4TRUNCATE)
25497c478bd9Sstevel@tonic-gate cv_wait(&rp->r_cv, &rp->r_statelock);
25507c478bd9Sstevel@tonic-gate rp->r_flags |= R4TRUNCATE;
25517c478bd9Sstevel@tonic-gate if (off == (u_offset_t)0) {
25527c478bd9Sstevel@tonic-gate rp->r_flags &= ~R4DIRTY;
25537c478bd9Sstevel@tonic-gate if (!(rp->r_flags & R4STALE))
25547c478bd9Sstevel@tonic-gate rp->r_error = 0;
25557c478bd9Sstevel@tonic-gate }
25567c478bd9Sstevel@tonic-gate rp->r_truncaddr = off;
25577c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock);
25587c478bd9Sstevel@tonic-gate (void) pvn_vplist_dirty(vp, off, rp->r_putapage,
2559b9238976Sth B_INVAL | B_TRUNC, cr);
25607c478bd9Sstevel@tonic-gate mutex_enter(&rp->r_statelock);
25617c478bd9Sstevel@tonic-gate rp->r_flags &= ~R4TRUNCATE;
25627c478bd9Sstevel@tonic-gate cv_broadcast(&rp->r_cv);
25637c478bd9Sstevel@tonic-gate mutex_exit(&rp->r_statelock);
25647c478bd9Sstevel@tonic-gate }
25657c478bd9Sstevel@tonic-gate
25667c478bd9Sstevel@tonic-gate static int
nfs4_mnt_kstat_update(kstat_t * ksp,int rw)25677c478bd9Sstevel@tonic-gate nfs4_mnt_kstat_update(kstat_t *ksp, int rw)
25687c478bd9Sstevel@tonic-gate {
25697c478bd9Sstevel@tonic-gate mntinfo4_t *mi;
25707c478bd9Sstevel@tonic-gate struct mntinfo_kstat *mik;
25717c478bd9Sstevel@tonic-gate vfs_t *vfsp;
25727c478bd9Sstevel@tonic-gate
25737c478bd9Sstevel@tonic-gate /* this is a read-only kstat. Bail out on a write */
25747c478bd9Sstevel@tonic-gate if (rw == KSTAT_WRITE)
25757c478bd9Sstevel@tonic-gate return (EACCES);
25767c478bd9Sstevel@tonic-gate
25777c478bd9Sstevel@tonic-gate
25787c478bd9Sstevel@tonic-gate /*
25797c478bd9Sstevel@tonic-gate * We don't want to wait here as kstat_chain_lock could be held by
25807c478bd9Sstevel@tonic-gate * dounmount(). dounmount() takes vfs_reflock before the chain lock
25817c478bd9Sstevel@tonic-gate * and thus could lead to a deadlock.
25827c478bd9Sstevel@tonic-gate */
25837c478bd9Sstevel@tonic-gate vfsp = (struct vfs *)ksp->ks_private;
25847c478bd9Sstevel@tonic-gate
25857c478bd9Sstevel@tonic-gate mi = VFTOMI4(vfsp);
25867c478bd9Sstevel@tonic-gate mik = (struct mntinfo_kstat *)ksp->ks_data;
25877c478bd9Sstevel@tonic-gate
25887c478bd9Sstevel@tonic-gate (void) strcpy(mik->mik_proto, mi->mi_curr_serv->sv_knconf->knc_proto);
25897c478bd9Sstevel@tonic-gate
25907c478bd9Sstevel@tonic-gate mik->mik_vers = (uint32_t)mi->mi_vers;
25917c478bd9Sstevel@tonic-gate mik->mik_flags = mi->mi_flags;
25927c478bd9Sstevel@tonic-gate /*
25937c478bd9Sstevel@tonic-gate * The sv_secdata holds the flavor the client specifies.
25947c478bd9Sstevel@tonic-gate * If the client uses default and a security negotiation
25957c478bd9Sstevel@tonic-gate * occurs, sv_currsec will point to the current flavor
25967c478bd9Sstevel@tonic-gate * selected from the server flavor list.
25977c478bd9Sstevel@tonic-gate * sv_currsec is NULL if no security negotiation takes place.
25987c478bd9Sstevel@tonic-gate */
25997c478bd9Sstevel@tonic-gate mik->mik_secmod = mi->mi_curr_serv->sv_currsec ?
2600b9238976Sth mi->mi_curr_serv->sv_currsec->secmod :
2601b9238976Sth mi->mi_curr_serv->sv_secdata->secmod;
26027c478bd9Sstevel@tonic-gate mik->mik_curread = (uint32_t)mi->mi_curread;
26037c478bd9Sstevel@tonic-gate mik->mik_curwrite = (uint32_t)mi->mi_curwrite;
26047c478bd9Sstevel@tonic-gate mik->mik_retrans = mi->mi_retrans;
26057c478bd9Sstevel@tonic-gate mik->mik_timeo = mi->mi_timeo;
26067c478bd9Sstevel@tonic-gate mik->mik_acregmin = HR2SEC(mi->mi_acregmin);
26077c478bd9Sstevel@tonic-gate mik->mik_acregmax = HR2SEC(mi->mi_acregmax);
26087c478bd9Sstevel@tonic-gate mik->mik_acdirmin = HR2SEC(mi->mi_acdirmin);
26097c478bd9Sstevel@tonic-gate mik->mik_acdirmax = HR2SEC(mi->mi_acdirmax);
26107c478bd9Sstevel@tonic-gate mik->mik_noresponse = (uint32_t)mi->mi_noresponse;
26117c478bd9Sstevel@tonic-gate mik->mik_failover = (uint32_t)mi->mi_failover;
26127c478bd9Sstevel@tonic-gate mik->mik_remap = (uint32_t)mi->mi_remap;
26137c478bd9Sstevel@tonic-gate
26147c478bd9Sstevel@tonic-gate (void) strcpy(mik->mik_curserver, mi->mi_curr_serv->sv_hostname);
26157c478bd9Sstevel@tonic-gate
26167c478bd9Sstevel@tonic-gate return (0);
26177c478bd9Sstevel@tonic-gate }
26187c478bd9Sstevel@tonic-gate
26197c478bd9Sstevel@tonic-gate void
nfs4_mnt_kstat_init(struct vfs * vfsp)26207c478bd9Sstevel@tonic-gate nfs4_mnt_kstat_init(struct vfs *vfsp)
26217c478bd9Sstevel@tonic-gate {
26227c478bd9Sstevel@tonic-gate mntinfo4_t *mi = VFTOMI4(vfsp);
26237c478bd9Sstevel@tonic-gate
26247c478bd9Sstevel@tonic-gate /*
26257c478bd9Sstevel@tonic-gate * PSARC 2001/697 Contract Private Interface
26267c478bd9Sstevel@tonic-gate * All nfs kstats are under SunMC contract
26277c478bd9Sstevel@tonic-gate * Please refer to the PSARC listed above and contact
26287c478bd9Sstevel@tonic-gate * SunMC before making any changes!
26297c478bd9Sstevel@tonic-gate *
26307c478bd9Sstevel@tonic-gate * Changes must be reviewed by Solaris File Sharing
26317c478bd9Sstevel@tonic-gate * Changes must be communicated to contract-2001-697@sun.com
26327c478bd9Sstevel@tonic-gate *
26337c478bd9Sstevel@tonic-gate */
26347c478bd9Sstevel@tonic-gate
26357c478bd9Sstevel@tonic-gate mi->mi_io_kstats = kstat_create_zone("nfs", getminor(vfsp->vfs_dev),
26367c478bd9Sstevel@tonic-gate NULL, "nfs", KSTAT_TYPE_IO, 1, 0, mi->mi_zone->zone_id);
26377c478bd9Sstevel@tonic-gate if (mi->mi_io_kstats) {
26387c478bd9Sstevel@tonic-gate if (mi->mi_zone->zone_id != GLOBAL_ZONEID)
26397c478bd9Sstevel@tonic-gate kstat_zone_add(mi->mi_io_kstats, GLOBAL_ZONEID);
26407c478bd9Sstevel@tonic-gate mi->mi_io_kstats->ks_lock = &mi->mi_lock;
26417c478bd9Sstevel@tonic-gate kstat_install(mi->mi_io_kstats);
26427c478bd9Sstevel@tonic-gate }
26437c478bd9Sstevel@tonic-gate
26447c478bd9Sstevel@tonic-gate if ((mi->mi_ro_kstats = kstat_create_zone("nfs",
26457c478bd9Sstevel@tonic-gate getminor(vfsp->vfs_dev), "mntinfo", "misc", KSTAT_TYPE_RAW,
26467c478bd9Sstevel@tonic-gate sizeof (struct mntinfo_kstat), 0, mi->mi_zone->zone_id)) != NULL) {
26477c478bd9Sstevel@tonic-gate if (mi->mi_zone->zone_id != GLOBAL_ZONEID)
26487c478bd9Sstevel@tonic-gate kstat_zone_add(mi->mi_ro_kstats, GLOBAL_ZONEID);
26497c478bd9Sstevel@tonic-gate mi->mi_ro_kstats->ks_update = nfs4_mnt_kstat_update;
26507c478bd9Sstevel@tonic-gate mi->mi_ro_kstats->ks_private = (void *)vfsp;
26517c478bd9Sstevel@tonic-gate kstat_install(mi->mi_ro_kstats);
26527c478bd9Sstevel@tonic-gate }
26537c478bd9Sstevel@tonic-gate
26547c478bd9Sstevel@tonic-gate nfs4_mnt_recov_kstat_init(vfsp);
26557c478bd9Sstevel@tonic-gate }
26567c478bd9Sstevel@tonic-gate
26577c478bd9Sstevel@tonic-gate void
nfs4_write_error(vnode_t * vp,int error,cred_t * cr)26587c478bd9Sstevel@tonic-gate nfs4_write_error(vnode_t *vp, int error, cred_t *cr)
26597c478bd9Sstevel@tonic-gate {
26607c478bd9Sstevel@tonic-gate mntinfo4_t *mi;
2661d3d50737SRafael Vanoni clock_t now = ddi_get_lbolt();
26627c478bd9Sstevel@tonic-gate
26637c478bd9Sstevel@tonic-gate mi = VTOMI4(vp);
26647c478bd9Sstevel@tonic-gate /*
26657c478bd9Sstevel@tonic-gate * In case of forced unmount, do not print any messages
26667c478bd9Sstevel@tonic-gate * since it can flood the console with error messages.
26677c478bd9Sstevel@tonic-gate */
26687c478bd9Sstevel@tonic-gate if (mi->mi_vfsp->vfs_flag & VFS_UNMOUNTED)
26697c478bd9Sstevel@tonic-gate return;
26707c478bd9Sstevel@tonic-gate
26717c478bd9Sstevel@tonic-gate /*
26727c478bd9Sstevel@tonic-gate * If the mount point is dead, not recoverable, do not
26737c478bd9Sstevel@tonic-gate * print error messages that can flood the console.
26747c478bd9Sstevel@tonic-gate */
26757c478bd9Sstevel@tonic-gate if (mi->mi_flags & MI4_RECOV_FAIL)
26767c478bd9Sstevel@tonic-gate return;
26777c478bd9Sstevel@tonic-gate
26787c478bd9Sstevel@tonic-gate /*
26797c478bd9Sstevel@tonic-gate * No use in flooding the console with ENOSPC
26807c478bd9Sstevel@tonic-gate * messages from the same file system.
26817c478bd9Sstevel@tonic-gate */
26827c478bd9Sstevel@tonic-gate if ((error != ENOSPC && error != EDQUOT) ||
2683d3d50737SRafael Vanoni now - mi->mi_printftime > 0) {
26847c478bd9Sstevel@tonic-gate zoneid_t zoneid = mi->mi_zone->zone_id;
26857c478bd9Sstevel@tonic-gate
26867c478bd9Sstevel@tonic-gate #ifdef DEBUG
26877c478bd9Sstevel@tonic-gate nfs_perror(error, "NFS%ld write error on host %s: %m.\n",
26887c478bd9Sstevel@tonic-gate mi->mi_vers, VTOR4(vp)->r_server->sv_hostname, NULL);
26897c478bd9Sstevel@tonic-gate #else
26907c478bd9Sstevel@tonic-gate nfs_perror(error, "NFS write error on host %s: %m.\n",
26917c478bd9Sstevel@tonic-gate VTOR4(vp)->r_server->sv_hostname, NULL);
26927c478bd9Sstevel@tonic-gate #endif
26937c478bd9Sstevel@tonic-gate if (error == ENOSPC || error == EDQUOT) {
26947c478bd9Sstevel@tonic-gate zcmn_err(zoneid, CE_CONT,
26957c478bd9Sstevel@tonic-gate "^File: userid=%d, groupid=%d\n",
26967c478bd9Sstevel@tonic-gate crgetuid(cr), crgetgid(cr));
26977c478bd9Sstevel@tonic-gate if (crgetuid(curthread->t_cred) != crgetuid(cr) ||
26987c478bd9Sstevel@tonic-gate crgetgid(curthread->t_cred) != crgetgid(cr)) {
26997c478bd9Sstevel@tonic-gate zcmn_err(zoneid, CE_CONT,
27007c478bd9Sstevel@tonic-gate "^User: userid=%d, groupid=%d\n",
27017c478bd9Sstevel@tonic-gate crgetuid(curthread->t_cred),
27027c478bd9Sstevel@tonic-gate crgetgid(curthread->t_cred));
27037c478bd9Sstevel@tonic-gate }
2704d3d50737SRafael Vanoni mi->mi_printftime = now +
27057c478bd9Sstevel@tonic-gate nfs_write_error_interval * hz;
27067c478bd9Sstevel@tonic-gate }
27077c478bd9Sstevel@tonic-gate sfh4_printfhandle(VTOR4(vp)->r_fh);
27087c478bd9Sstevel@tonic-gate #ifdef DEBUG
27097c478bd9Sstevel@tonic-gate if (error == EACCES) {
27107c478bd9Sstevel@tonic-gate zcmn_err(zoneid, CE_CONT,
27117c478bd9Sstevel@tonic-gate "nfs_bio: cred is%s kcred\n",
27127c478bd9Sstevel@tonic-gate cr == kcred ? "" : " not");
27137c478bd9Sstevel@tonic-gate }
27147c478bd9Sstevel@tonic-gate #endif
27157c478bd9Sstevel@tonic-gate }
27167c478bd9Sstevel@tonic-gate }
27177c478bd9Sstevel@tonic-gate
27187c478bd9Sstevel@tonic-gate /*
27197c478bd9Sstevel@tonic-gate * Return non-zero if the given file can be safely memory mapped. Locks
27207c478bd9Sstevel@tonic-gate * are safe if whole-file (length and offset are both zero).
27217c478bd9Sstevel@tonic-gate */
27227c478bd9Sstevel@tonic-gate
27237c478bd9Sstevel@tonic-gate #define SAFE_LOCK(flk) ((flk).l_start == 0 && (flk).l_len == 0)
27247c478bd9Sstevel@tonic-gate
27257c478bd9Sstevel@tonic-gate static int
nfs4_safemap(const vnode_t * vp)27267c478bd9Sstevel@tonic-gate nfs4_safemap(const vnode_t *vp)
27277c478bd9Sstevel@tonic-gate {
27287c478bd9Sstevel@tonic-gate locklist_t *llp, *next_llp;
27297c478bd9Sstevel@tonic-gate int safe = 1;
27307c478bd9Sstevel@tonic-gate rnode4_t *rp = VTOR4(vp);
27317c478bd9Sstevel@tonic-gate
27327c478bd9Sstevel@tonic-gate ASSERT(nfs_rw_lock_held(&rp->r_lkserlock, RW_WRITER));
27337c478bd9Sstevel@tonic-gate
27347c478bd9Sstevel@tonic-gate NFS4_DEBUG(nfs4_client_map_debug, (CE_NOTE, "nfs4_safemap: "
2735b9238976Sth "vp = %p", (void *)vp));
27367c478bd9Sstevel@tonic-gate
27377c478bd9Sstevel@tonic-gate /*
27387c478bd9Sstevel@tonic-gate * Review all the locks for the vnode, both ones that have been
27397c478bd9Sstevel@tonic-gate * acquired and ones that are pending. We assume that
27407c478bd9Sstevel@tonic-gate * flk_active_locks_for_vp() has merged any locks that can be
27417c478bd9Sstevel@tonic-gate * merged (so that if a process has the entire file locked, it is
27427c478bd9Sstevel@tonic-gate * represented as a single lock).
27437c478bd9Sstevel@tonic-gate *
27447c478bd9Sstevel@tonic-gate * Note that we can't bail out of the loop if we find a non-safe
27457c478bd9Sstevel@tonic-gate * lock, because we have to free all the elements in the llp list.
27467c478bd9Sstevel@tonic-gate * We might be able to speed up this code slightly by not looking
27477c478bd9Sstevel@tonic-gate * at each lock's l_start and l_len fields once we've found a
27487c478bd9Sstevel@tonic-gate * non-safe lock.
27497c478bd9Sstevel@tonic-gate */
27507c478bd9Sstevel@tonic-gate
27517c478bd9Sstevel@tonic-gate llp = flk_active_locks_for_vp(vp);
27527c478bd9Sstevel@tonic-gate while (llp) {
27537c478bd9Sstevel@tonic-gate NFS4_DEBUG(nfs4_client_map_debug, (CE_NOTE,
27547c478bd9Sstevel@tonic-gate "nfs4_safemap: active lock (%" PRId64 ", %" PRId64 ")",
27557c478bd9Sstevel@tonic-gate llp->ll_flock.l_start, llp->ll_flock.l_len));
27567c478bd9Sstevel@tonic-gate if (!SAFE_LOCK(llp->ll_flock)) {
27577c478bd9Sstevel@tonic-gate safe = 0;
27587c478bd9Sstevel@tonic-gate NFS4_DEBUG(nfs4_client_map_debug, (CE_NOTE,
27597c478bd9Sstevel@tonic-gate "nfs4_safemap: unsafe active lock (%" PRId64
27607c478bd9Sstevel@tonic-gate ", %" PRId64 ")", llp->ll_flock.l_start,
27617c478bd9Sstevel@tonic-gate llp->ll_flock.l_len));
27627c478bd9Sstevel@tonic-gate }
27637c478bd9Sstevel@tonic-gate next_llp = llp->ll_next;
27647c478bd9Sstevel@tonic-gate VN_RELE(llp->ll_vp);
27657c478bd9Sstevel@tonic-gate kmem_free(llp, sizeof (*llp));
27667c478bd9Sstevel@tonic-gate llp = next_llp;
27677c478bd9Sstevel@tonic-gate }
27687c478bd9Sstevel@tonic-gate
27697c478bd9Sstevel@tonic-gate NFS4_DEBUG(nfs4_client_map_debug, (CE_NOTE, "nfs4_safemap: %s",
2770b9238976Sth safe ? "safe" : "unsafe"));
27717c478bd9Sstevel@tonic-gate return (safe);
27727c478bd9Sstevel@tonic-gate }
27737c478bd9Sstevel@tonic-gate
27747c478bd9Sstevel@tonic-gate /*
27757c478bd9Sstevel@tonic-gate * Return whether there is a lost LOCK or LOCKU queued up for the given
27767c478bd9Sstevel@tonic-gate * file that would make an mmap request unsafe. cf. nfs4_safemap().
27777c478bd9Sstevel@tonic-gate */
27787c478bd9Sstevel@tonic-gate
27797c478bd9Sstevel@tonic-gate bool_t
nfs4_map_lost_lock_conflict(vnode_t * vp)27807c478bd9Sstevel@tonic-gate nfs4_map_lost_lock_conflict(vnode_t *vp)
27817c478bd9Sstevel@tonic-gate {
27827c478bd9Sstevel@tonic-gate bool_t conflict = FALSE;
27837c478bd9Sstevel@tonic-gate nfs4_lost_rqst_t *lrp;
27847c478bd9Sstevel@tonic-gate mntinfo4_t *mi = VTOMI4(vp);
27857c478bd9Sstevel@tonic-gate
27867c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock);
27877c478bd9Sstevel@tonic-gate for (lrp = list_head(&mi->mi_lost_state); lrp != NULL;
27887c478bd9Sstevel@tonic-gate lrp = list_next(&mi->mi_lost_state, lrp)) {
27897c478bd9Sstevel@tonic-gate if (lrp->lr_op != OP_LOCK && lrp->lr_op != OP_LOCKU)
27907c478bd9Sstevel@tonic-gate continue;
27917c478bd9Sstevel@tonic-gate ASSERT(lrp->lr_vp != NULL);
2792da6c28aaSamw if (!VOP_CMP(lrp->lr_vp, vp, NULL))
27937c478bd9Sstevel@tonic-gate continue; /* different file */
27947c478bd9Sstevel@tonic-gate if (!SAFE_LOCK(*lrp->lr_flk)) {
27957c478bd9Sstevel@tonic-gate conflict = TRUE;
27967c478bd9Sstevel@tonic-gate break;
27977c478bd9Sstevel@tonic-gate }
27987c478bd9Sstevel@tonic-gate }
27997c478bd9Sstevel@tonic-gate
28007c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock);
28017c478bd9Sstevel@tonic-gate return (conflict);
28027c478bd9Sstevel@tonic-gate }
28037c478bd9Sstevel@tonic-gate
28047c478bd9Sstevel@tonic-gate /*
28057c478bd9Sstevel@tonic-gate * nfs_lockcompletion:
28067c478bd9Sstevel@tonic-gate *
28077c478bd9Sstevel@tonic-gate * If the vnode has a lock that makes it unsafe to cache the file, mark it
28087c478bd9Sstevel@tonic-gate * as non cachable (set VNOCACHE bit).
28097c478bd9Sstevel@tonic-gate */
28107c478bd9Sstevel@tonic-gate
28117c478bd9Sstevel@tonic-gate void
nfs4_lockcompletion(vnode_t * vp,int cmd)28127c478bd9Sstevel@tonic-gate nfs4_lockcompletion(vnode_t *vp, int cmd)
28137c478bd9Sstevel@tonic-gate {
28147c478bd9Sstevel@tonic-gate rnode4_t *rp = VTOR4(vp);
28157c478bd9Sstevel@tonic-gate
28167c478bd9Sstevel@tonic-gate ASSERT(nfs_rw_lock_held(&rp->r_lkserlock, RW_WRITER));
28177c478bd9Sstevel@tonic-gate ASSERT(!IS_SHADOW(vp, rp));
28187c478bd9Sstevel@tonic-gate
28197c478bd9Sstevel@tonic-gate if (cmd == F_SETLK || cmd == F_SETLKW) {
28207c478bd9Sstevel@tonic-gate
28217c478bd9Sstevel@tonic-gate if (!nfs4_safemap(vp)) {
28227c478bd9Sstevel@tonic-gate mutex_enter(&vp->v_lock);
28237c478bd9Sstevel@tonic-gate vp->v_flag |= VNOCACHE;
28247c478bd9Sstevel@tonic-gate mutex_exit(&vp->v_lock);
28257c478bd9Sstevel@tonic-gate } else {
28267c478bd9Sstevel@tonic-gate mutex_enter(&vp->v_lock);
28277c478bd9Sstevel@tonic-gate vp->v_flag &= ~VNOCACHE;
28287c478bd9Sstevel@tonic-gate mutex_exit(&vp->v_lock);
28297c478bd9Sstevel@tonic-gate }
28307c478bd9Sstevel@tonic-gate }
28317c478bd9Sstevel@tonic-gate /*
28327c478bd9Sstevel@tonic-gate * The cached attributes of the file are stale after acquiring
28337c478bd9Sstevel@tonic-gate * the lock on the file. They were updated when the file was
28347c478bd9Sstevel@tonic-gate * opened, but not updated when the lock was acquired. Therefore the
28357c478bd9Sstevel@tonic-gate * cached attributes are invalidated after the lock is obtained.
28367c478bd9Sstevel@tonic-gate */
28377c478bd9Sstevel@tonic-gate PURGE_ATTRCACHE4(vp);
28387c478bd9Sstevel@tonic-gate }
28397c478bd9Sstevel@tonic-gate
28407c478bd9Sstevel@tonic-gate /* ARGSUSED */
28417c478bd9Sstevel@tonic-gate static void *
nfs4_mi_init(zoneid_t zoneid)28427c478bd9Sstevel@tonic-gate nfs4_mi_init(zoneid_t zoneid)
28437c478bd9Sstevel@tonic-gate {
28447c478bd9Sstevel@tonic-gate struct mi4_globals *mig;
28457c478bd9Sstevel@tonic-gate
28467c478bd9Sstevel@tonic-gate mig = kmem_alloc(sizeof (*mig), KM_SLEEP);
28477c478bd9Sstevel@tonic-gate mutex_init(&mig->mig_lock, NULL, MUTEX_DEFAULT, NULL);
28487c478bd9Sstevel@tonic-gate list_create(&mig->mig_list, sizeof (mntinfo4_t),
28497c478bd9Sstevel@tonic-gate offsetof(mntinfo4_t, mi_zone_node));
28507c478bd9Sstevel@tonic-gate mig->mig_destructor_called = B_FALSE;
28517c478bd9Sstevel@tonic-gate return (mig);
28527c478bd9Sstevel@tonic-gate }
28537c478bd9Sstevel@tonic-gate
28547c478bd9Sstevel@tonic-gate /*
28557c478bd9Sstevel@tonic-gate * Callback routine to tell all NFSv4 mounts in the zone to start tearing down
28567c478bd9Sstevel@tonic-gate * state and killing off threads.
28577c478bd9Sstevel@tonic-gate */
28587c478bd9Sstevel@tonic-gate /* ARGSUSED */
28597c478bd9Sstevel@tonic-gate static void
nfs4_mi_shutdown(zoneid_t zoneid,void * data)28607c478bd9Sstevel@tonic-gate nfs4_mi_shutdown(zoneid_t zoneid, void *data)
28617c478bd9Sstevel@tonic-gate {
28627c478bd9Sstevel@tonic-gate struct mi4_globals *mig = data;
28637c478bd9Sstevel@tonic-gate mntinfo4_t *mi;
28647c478bd9Sstevel@tonic-gate nfs4_server_t *np;
28657c478bd9Sstevel@tonic-gate
28667c478bd9Sstevel@tonic-gate NFS4_DEBUG(nfs4_client_zone_debug, (CE_NOTE,
28677c478bd9Sstevel@tonic-gate "nfs4_mi_shutdown zone %d\n", zoneid));
28687c478bd9Sstevel@tonic-gate ASSERT(mig != NULL);
286950a83466Sjwahlig for (;;) {
287050a83466Sjwahlig mutex_enter(&mig->mig_lock);
287150a83466Sjwahlig mi = list_head(&mig->mig_list);
287250a83466Sjwahlig if (mi == NULL) {
287350a83466Sjwahlig mutex_exit(&mig->mig_lock);
287450a83466Sjwahlig break;
287550a83466Sjwahlig }
28763fd6cc29Sthurlow
28777c478bd9Sstevel@tonic-gate NFS4_DEBUG(nfs4_client_zone_debug, (CE_NOTE,
28787c478bd9Sstevel@tonic-gate "nfs4_mi_shutdown stopping vfs %p\n", (void *)mi->mi_vfsp));
28797c478bd9Sstevel@tonic-gate /*
28807c478bd9Sstevel@tonic-gate * purge the DNLC for this filesystem
28817c478bd9Sstevel@tonic-gate */
28827c478bd9Sstevel@tonic-gate (void) dnlc_purge_vfsp(mi->mi_vfsp, 0);
28837c478bd9Sstevel@tonic-gate /*
28847c478bd9Sstevel@tonic-gate * Tell existing async worker threads to exit.
28857c478bd9Sstevel@tonic-gate */
288650a83466Sjwahlig mutex_enter(&mi->mi_async_lock);
28877c478bd9Sstevel@tonic-gate mi->mi_max_threads = 0;
28880776f5e6SVallish Vaidyeshwara NFS4_WAKEALL_ASYNC_WORKERS(mi->mi_async_work_cv);
28897c478bd9Sstevel@tonic-gate /*
289050a83466Sjwahlig * Set the appropriate flags, signal and wait for both the
289150a83466Sjwahlig * async manager and the inactive thread to exit when they're
289250a83466Sjwahlig * done with their current work.
28937c478bd9Sstevel@tonic-gate */
28947c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock);
28957c478bd9Sstevel@tonic-gate mi->mi_flags |= (MI4_ASYNC_MGR_STOP|MI4_DEAD);
28967c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock);
289750a83466Sjwahlig mutex_exit(&mi->mi_async_lock);
289850a83466Sjwahlig if (mi->mi_manager_thread) {
289950a83466Sjwahlig nfs4_async_manager_stop(mi->mi_vfsp);
290050a83466Sjwahlig }
290150a83466Sjwahlig if (mi->mi_inactive_thread) {
290250a83466Sjwahlig mutex_enter(&mi->mi_async_lock);
290350a83466Sjwahlig cv_signal(&mi->mi_inact_req_cv);
290450a83466Sjwahlig /*
290550a83466Sjwahlig * Wait for the inactive thread to exit.
290650a83466Sjwahlig */
290750a83466Sjwahlig while (mi->mi_inactive_thread != NULL) {
290850a83466Sjwahlig cv_wait(&mi->mi_async_cv, &mi->mi_async_lock);
290950a83466Sjwahlig }
291050a83466Sjwahlig mutex_exit(&mi->mi_async_lock);
291150a83466Sjwahlig }
29127c478bd9Sstevel@tonic-gate /*
291350a83466Sjwahlig * Wait for the recovery thread to complete, that is, it will
291450a83466Sjwahlig * signal when it is done using the "mi" structure and about
291550a83466Sjwahlig * to exit
29167c478bd9Sstevel@tonic-gate */
291750a83466Sjwahlig mutex_enter(&mi->mi_lock);
291850a83466Sjwahlig while (mi->mi_in_recovery > 0)
291950a83466Sjwahlig cv_wait(&mi->mi_cv_in_recov, &mi->mi_lock);
292050a83466Sjwahlig mutex_exit(&mi->mi_lock);
29213fd6cc29Sthurlow /*
29223fd6cc29Sthurlow * We're done when every mi has been done or the list is empty.
292350a83466Sjwahlig * This one is done, remove it from the list.
29243fd6cc29Sthurlow */
292550a83466Sjwahlig list_remove(&mig->mig_list, mi);
29263fd6cc29Sthurlow mutex_exit(&mig->mig_lock);
2927a19609f8Sjv zone_rele_ref(&mi->mi_zone_ref, ZONE_REF_NFSV4);
2928a19609f8Sjv
292950a83466Sjwahlig /*
293050a83466Sjwahlig * Release hold on vfs and mi done to prevent race with zone
293150a83466Sjwahlig * shutdown. This releases the hold in nfs4_mi_zonelist_add.
293250a83466Sjwahlig */
29333fd6cc29Sthurlow VFS_RELE(mi->mi_vfsp);
293450a83466Sjwahlig MI4_RELE(mi);
29357c478bd9Sstevel@tonic-gate }
29367c478bd9Sstevel@tonic-gate /*
29377c478bd9Sstevel@tonic-gate * Tell each renew thread in the zone to exit
29387c478bd9Sstevel@tonic-gate */
29397c478bd9Sstevel@tonic-gate mutex_enter(&nfs4_server_lst_lock);
29407c478bd9Sstevel@tonic-gate for (np = nfs4_server_lst.forw; np != &nfs4_server_lst; np = np->forw) {
29417c478bd9Sstevel@tonic-gate mutex_enter(&np->s_lock);
29427c478bd9Sstevel@tonic-gate if (np->zoneid == zoneid) {
29437c478bd9Sstevel@tonic-gate /*
29447c478bd9Sstevel@tonic-gate * We add another hold onto the nfs4_server_t
29457c478bd9Sstevel@tonic-gate * because this will make sure tha the nfs4_server_t
29467c478bd9Sstevel@tonic-gate * stays around until nfs4_callback_fini_zone destroys
29477c478bd9Sstevel@tonic-gate * the zone. This way, the renew thread can
29487c478bd9Sstevel@tonic-gate * unconditionally release its holds on the
29497c478bd9Sstevel@tonic-gate * nfs4_server_t.
29507c478bd9Sstevel@tonic-gate */
29517c478bd9Sstevel@tonic-gate np->s_refcnt++;
29527c478bd9Sstevel@tonic-gate nfs4_mark_srv_dead(np);
29537c478bd9Sstevel@tonic-gate }
29547c478bd9Sstevel@tonic-gate mutex_exit(&np->s_lock);
29557c478bd9Sstevel@tonic-gate }
29567c478bd9Sstevel@tonic-gate mutex_exit(&nfs4_server_lst_lock);
29577c478bd9Sstevel@tonic-gate }
29587c478bd9Sstevel@tonic-gate
29597c478bd9Sstevel@tonic-gate static void
nfs4_mi_free_globals(struct mi4_globals * mig)29607c478bd9Sstevel@tonic-gate nfs4_mi_free_globals(struct mi4_globals *mig)
29617c478bd9Sstevel@tonic-gate {
29627c478bd9Sstevel@tonic-gate list_destroy(&mig->mig_list); /* makes sure the list is empty */
29637c478bd9Sstevel@tonic-gate mutex_destroy(&mig->mig_lock);
29647c478bd9Sstevel@tonic-gate kmem_free(mig, sizeof (*mig));
29657c478bd9Sstevel@tonic-gate }
29667c478bd9Sstevel@tonic-gate
29677c478bd9Sstevel@tonic-gate /* ARGSUSED */
29687c478bd9Sstevel@tonic-gate static void
nfs4_mi_destroy(zoneid_t zoneid,void * data)29697c478bd9Sstevel@tonic-gate nfs4_mi_destroy(zoneid_t zoneid, void *data)
29707c478bd9Sstevel@tonic-gate {
29717c478bd9Sstevel@tonic-gate struct mi4_globals *mig = data;
29727c478bd9Sstevel@tonic-gate
29737c478bd9Sstevel@tonic-gate NFS4_DEBUG(nfs4_client_zone_debug, (CE_NOTE,
29747c478bd9Sstevel@tonic-gate "nfs4_mi_destroy zone %d\n", zoneid));
29757c478bd9Sstevel@tonic-gate ASSERT(mig != NULL);
29767c478bd9Sstevel@tonic-gate mutex_enter(&mig->mig_lock);
29777c478bd9Sstevel@tonic-gate if (list_head(&mig->mig_list) != NULL) {
29787c478bd9Sstevel@tonic-gate /* Still waiting for VFS_FREEVFS() */
29797c478bd9Sstevel@tonic-gate mig->mig_destructor_called = B_TRUE;
29807c478bd9Sstevel@tonic-gate mutex_exit(&mig->mig_lock);
29817c478bd9Sstevel@tonic-gate return;
29827c478bd9Sstevel@tonic-gate }
29837c478bd9Sstevel@tonic-gate nfs4_mi_free_globals(mig);
29847c478bd9Sstevel@tonic-gate }
29857c478bd9Sstevel@tonic-gate
29867c478bd9Sstevel@tonic-gate /*
29877c478bd9Sstevel@tonic-gate * Add an NFS mount to the per-zone list of NFS mounts.
29887c478bd9Sstevel@tonic-gate */
29897c478bd9Sstevel@tonic-gate void
nfs4_mi_zonelist_add(mntinfo4_t * mi)29907c478bd9Sstevel@tonic-gate nfs4_mi_zonelist_add(mntinfo4_t *mi)
29917c478bd9Sstevel@tonic-gate {
29927c478bd9Sstevel@tonic-gate struct mi4_globals *mig;
29937c478bd9Sstevel@tonic-gate
29947c478bd9Sstevel@tonic-gate mig = zone_getspecific(mi4_list_key, mi->mi_zone);
29957c478bd9Sstevel@tonic-gate mutex_enter(&mig->mig_lock);
29967c478bd9Sstevel@tonic-gate list_insert_head(&mig->mig_list, mi);
299750a83466Sjwahlig /*
299850a83466Sjwahlig * hold added to eliminate race with zone shutdown -this will be
299950a83466Sjwahlig * released in mi_shutdown
300050a83466Sjwahlig */
300150a83466Sjwahlig MI4_HOLD(mi);
300250a83466Sjwahlig VFS_HOLD(mi->mi_vfsp);
30037c478bd9Sstevel@tonic-gate mutex_exit(&mig->mig_lock);
30047c478bd9Sstevel@tonic-gate }
30057c478bd9Sstevel@tonic-gate
30067c478bd9Sstevel@tonic-gate /*
30077c478bd9Sstevel@tonic-gate * Remove an NFS mount from the per-zone list of NFS mounts.
30087c478bd9Sstevel@tonic-gate */
300950a83466Sjwahlig int
nfs4_mi_zonelist_remove(mntinfo4_t * mi)30107c478bd9Sstevel@tonic-gate nfs4_mi_zonelist_remove(mntinfo4_t *mi)
30117c478bd9Sstevel@tonic-gate {
30127c478bd9Sstevel@tonic-gate struct mi4_globals *mig;
301350a83466Sjwahlig int ret = 0;
30147c478bd9Sstevel@tonic-gate
30157c478bd9Sstevel@tonic-gate mig = zone_getspecific(mi4_list_key, mi->mi_zone);
30167c478bd9Sstevel@tonic-gate mutex_enter(&mig->mig_lock);
301750a83466Sjwahlig mutex_enter(&mi->mi_lock);
301850a83466Sjwahlig /* if this mi is marked dead, then the zone already released it */
301950a83466Sjwahlig if (!(mi->mi_flags & MI4_DEAD)) {
302050a83466Sjwahlig list_remove(&mig->mig_list, mi);
30211dc00f28SJames Wahlig mutex_exit(&mi->mi_lock);
302250a83466Sjwahlig
302350a83466Sjwahlig /* release the holds put on in zonelist_add(). */
302450a83466Sjwahlig VFS_RELE(mi->mi_vfsp);
302550a83466Sjwahlig MI4_RELE(mi);
302650a83466Sjwahlig ret = 1;
30271dc00f28SJames Wahlig } else {
30281dc00f28SJames Wahlig mutex_exit(&mi->mi_lock);
302950a83466Sjwahlig }
303050a83466Sjwahlig
30317c478bd9Sstevel@tonic-gate /*
30327c478bd9Sstevel@tonic-gate * We can be called asynchronously by VFS_FREEVFS() after the zone
30337c478bd9Sstevel@tonic-gate * shutdown/destroy callbacks have executed; if so, clean up the zone's
30347c478bd9Sstevel@tonic-gate * mi globals.
30357c478bd9Sstevel@tonic-gate */
30367c478bd9Sstevel@tonic-gate if (list_head(&mig->mig_list) == NULL &&
30377c478bd9Sstevel@tonic-gate mig->mig_destructor_called == B_TRUE) {
30387c478bd9Sstevel@tonic-gate nfs4_mi_free_globals(mig);
303950a83466Sjwahlig return (ret);
30407c478bd9Sstevel@tonic-gate }
30417c478bd9Sstevel@tonic-gate mutex_exit(&mig->mig_lock);
304250a83466Sjwahlig return (ret);
30437c478bd9Sstevel@tonic-gate }
30447c478bd9Sstevel@tonic-gate
30457c478bd9Sstevel@tonic-gate void
nfs_free_mi4(mntinfo4_t * mi)30467c478bd9Sstevel@tonic-gate nfs_free_mi4(mntinfo4_t *mi)
30477c478bd9Sstevel@tonic-gate {
30487c478bd9Sstevel@tonic-gate nfs4_open_owner_t *foop;
304950a83466Sjwahlig nfs4_oo_hash_bucket_t *bucketp;
30507c478bd9Sstevel@tonic-gate nfs4_debug_msg_t *msgp;
30517c478bd9Sstevel@tonic-gate int i;
3052*6dc7d057SMarcel Telka servinfo4_t *svp;
30537c478bd9Sstevel@tonic-gate
3054f0558703SVallish Vaidyeshwara /*
3055f0558703SVallish Vaidyeshwara * Code introduced here should be carefully evaluated to make
3056f0558703SVallish Vaidyeshwara * sure none of the freed resources are accessed either directly
3057f0558703SVallish Vaidyeshwara * or indirectly after freeing them. For eg: Introducing calls to
3058f0558703SVallish Vaidyeshwara * NFS4_DEBUG that use mntinfo4_t structure member after freeing
3059f0558703SVallish Vaidyeshwara * the structure members or other routines calling back into NFS
3060f0558703SVallish Vaidyeshwara * accessing freed mntinfo4_t structure member.
3061f0558703SVallish Vaidyeshwara */
30627c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_lock);
30637c478bd9Sstevel@tonic-gate ASSERT(mi->mi_recovthread == NULL);
30647c478bd9Sstevel@tonic-gate ASSERT(mi->mi_flags & MI4_ASYNC_MGR_STOP);
30657c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_lock);
30667c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_async_lock);
30670776f5e6SVallish Vaidyeshwara ASSERT(mi->mi_threads[NFS4_ASYNC_QUEUE] == 0 &&
30680776f5e6SVallish Vaidyeshwara mi->mi_threads[NFS4_ASYNC_PGOPS_QUEUE] == 0);
30697c478bd9Sstevel@tonic-gate ASSERT(mi->mi_manager_thread == NULL);
307050a83466Sjwahlig mutex_exit(&mi->mi_async_lock);
307150a83466Sjwahlig if (mi->mi_io_kstats) {
307250a83466Sjwahlig kstat_delete(mi->mi_io_kstats);
307350a83466Sjwahlig mi->mi_io_kstats = NULL;
30747c478bd9Sstevel@tonic-gate }
307550a83466Sjwahlig if (mi->mi_ro_kstats) {
307650a83466Sjwahlig kstat_delete(mi->mi_ro_kstats);
307750a83466Sjwahlig mi->mi_ro_kstats = NULL;
307850a83466Sjwahlig }
307950a83466Sjwahlig if (mi->mi_recov_ksp) {
308050a83466Sjwahlig kstat_delete(mi->mi_recov_ksp);
308150a83466Sjwahlig mi->mi_recov_ksp = NULL;
30827c478bd9Sstevel@tonic-gate }
30837c478bd9Sstevel@tonic-gate mutex_enter(&mi->mi_msg_list_lock);
30847c478bd9Sstevel@tonic-gate while (msgp = list_head(&mi->mi_msg_list)) {
30857c478bd9Sstevel@tonic-gate list_remove(&mi->mi_msg_list, msgp);
30867c478bd9Sstevel@tonic-gate nfs4_free_msg(msgp);
30877c478bd9Sstevel@tonic-gate }
30887c478bd9Sstevel@tonic-gate mutex_exit(&mi->mi_msg_list_lock);
30897c478bd9Sstevel@tonic-gate list_destroy(&mi->mi_msg_list);
3090bbf2a467SNagakiran Rajashekar if (mi->mi_fname != NULL)
3091bbf2a467SNagakiran Rajashekar fn_rele(&mi->mi_fname);
30927c478bd9Sstevel@tonic-gate if (mi->mi_rootfh != NULL)
30937c478bd9Sstevel@tonic-gate sfh4_rele(&mi->mi_rootfh);
30947c478bd9Sstevel@tonic-gate if (mi->mi_srvparentfh != NULL)
30957c478bd9Sstevel@tonic-gate sfh4_rele(&mi->mi_srvparentfh);
3096f0558703SVallish Vaidyeshwara svp = mi->mi_servers;
3097f0558703SVallish Vaidyeshwara sv4_free(svp);
30987c478bd9Sstevel@tonic-gate mutex_destroy(&mi->mi_lock);
30997c478bd9Sstevel@tonic-gate mutex_destroy(&mi->mi_async_lock);
31007c478bd9Sstevel@tonic-gate mutex_destroy(&mi->mi_msg_list_lock);
3101e010bda9SMarcel Telka mutex_destroy(&mi->mi_rnodes_lock);
31027c478bd9Sstevel@tonic-gate nfs_rw_destroy(&mi->mi_recovlock);
31037c478bd9Sstevel@tonic-gate nfs_rw_destroy(&mi->mi_rename_lock);
31047c478bd9Sstevel@tonic-gate nfs_rw_destroy(&mi->mi_fh_lock);
31057c478bd9Sstevel@tonic-gate cv_destroy(&mi->mi_failover_cv);
31067c478bd9Sstevel@tonic-gate cv_destroy(&mi->mi_async_reqs_cv);
31070776f5e6SVallish Vaidyeshwara cv_destroy(&mi->mi_async_work_cv[NFS4_ASYNC_QUEUE]);
31080776f5e6SVallish Vaidyeshwara cv_destroy(&mi->mi_async_work_cv[NFS4_ASYNC_PGOPS_QUEUE]);
31097c478bd9Sstevel@tonic-gate cv_destroy(&mi->mi_async_cv);
31107c478bd9Sstevel@tonic-gate cv_destroy(&mi->mi_inact_req_cv);
31117c478bd9Sstevel@tonic-gate /*
31127c478bd9Sstevel@tonic-gate * Destroy the oo hash lists and mutexes for the cred hash table.
31137c478bd9Sstevel@tonic-gate */
31147c478bd9Sstevel@tonic-gate for (i = 0; i < NFS4_NUM_OO_BUCKETS; i++) {
31157c478bd9Sstevel@tonic-gate bucketp = &(mi->mi_oo_list[i]);
31167c478bd9Sstevel@tonic-gate /* Destroy any remaining open owners on the list */
31177c478bd9Sstevel@tonic-gate foop = list_head(&bucketp->b_oo_hash_list);
31187c478bd9Sstevel@tonic-gate while (foop != NULL) {
31197c478bd9Sstevel@tonic-gate list_remove(&bucketp->b_oo_hash_list, foop);
31207c478bd9Sstevel@tonic-gate nfs4_destroy_open_owner(foop);
31217c478bd9Sstevel@tonic-gate foop = list_head(&bucketp->b_oo_hash_list);
31227c478bd9Sstevel@tonic-gate }
31237c478bd9Sstevel@tonic-gate list_destroy(&bucketp->b_oo_hash_list);
31247c478bd9Sstevel@tonic-gate mutex_destroy(&bucketp->b_lock);
31257c478bd9Sstevel@tonic-gate }
31267c478bd9Sstevel@tonic-gate /*
31277c478bd9Sstevel@tonic-gate * Empty and destroy the freed open owner list.
31287c478bd9Sstevel@tonic-gate */
31297c478bd9Sstevel@tonic-gate foop = list_head(&mi->mi_foo_list);
31307c478bd9Sstevel@tonic-gate while (foop != NULL) {
31317c478bd9Sstevel@tonic-gate list_remove(&mi->mi_foo_list, foop);
31327c478bd9Sstevel@tonic-gate nfs4_destroy_open_owner(foop);
31337c478bd9Sstevel@tonic-gate foop = list_head(&mi->mi_foo_list);
31347c478bd9Sstevel@tonic-gate }
31357c478bd9Sstevel@tonic-gate list_destroy(&mi->mi_foo_list);
31367c478bd9Sstevel@tonic-gate list_destroy(&mi->mi_bseqid_list);
31377c478bd9Sstevel@tonic-gate list_destroy(&mi->mi_lost_state);
3138e010bda9SMarcel Telka list_destroy(&mi->mi_rnodes);
31397c478bd9Sstevel@tonic-gate avl_destroy(&mi->mi_filehandles);
31407c478bd9Sstevel@tonic-gate kmem_free(mi, sizeof (*mi));
31417c478bd9Sstevel@tonic-gate }
314250a83466Sjwahlig void
mi_hold(mntinfo4_t * mi)314350a83466Sjwahlig mi_hold(mntinfo4_t *mi)
314450a83466Sjwahlig {
31451a5e258fSJosef 'Jeff' Sipek atomic_inc_32(&mi->mi_count);
314650a83466Sjwahlig ASSERT(mi->mi_count != 0);
314750a83466Sjwahlig }
314850a83466Sjwahlig
314950a83466Sjwahlig void
mi_rele(mntinfo4_t * mi)315050a83466Sjwahlig mi_rele(mntinfo4_t *mi)
315150a83466Sjwahlig {
315250a83466Sjwahlig ASSERT(mi->mi_count != 0);
31531a5e258fSJosef 'Jeff' Sipek if (atomic_dec_32_nv(&mi->mi_count) == 0) {
315450a83466Sjwahlig nfs_free_mi4(mi);
315550a83466Sjwahlig }
315650a83466Sjwahlig }
31577c478bd9Sstevel@tonic-gate
31587c478bd9Sstevel@tonic-gate vnode_t nfs4_xattr_notsupp_vnode;
31597c478bd9Sstevel@tonic-gate
31607c478bd9Sstevel@tonic-gate void
nfs4_clnt_init(void)31617c478bd9Sstevel@tonic-gate nfs4_clnt_init(void)
31627c478bd9Sstevel@tonic-gate {
31637c478bd9Sstevel@tonic-gate nfs4_vnops_init();
31647c478bd9Sstevel@tonic-gate (void) nfs4_rnode_init();
31657c478bd9Sstevel@tonic-gate (void) nfs4_shadow_init();
31667c478bd9Sstevel@tonic-gate (void) nfs4_acache_init();
31677c478bd9Sstevel@tonic-gate (void) nfs4_subr_init();
31687c478bd9Sstevel@tonic-gate nfs4_acl_init();
31697c478bd9Sstevel@tonic-gate nfs_idmap_init();
31707c478bd9Sstevel@tonic-gate nfs4_callback_init();
31717c478bd9Sstevel@tonic-gate nfs4_secinfo_init();
31727c478bd9Sstevel@tonic-gate #ifdef DEBUG
31737c478bd9Sstevel@tonic-gate tsd_create(&nfs4_tsd_key, NULL);
31747c478bd9Sstevel@tonic-gate #endif
31757c478bd9Sstevel@tonic-gate
31767c478bd9Sstevel@tonic-gate /*
31777c478bd9Sstevel@tonic-gate * Add a CPR callback so that we can update client
31787c478bd9Sstevel@tonic-gate * lease after a suspend and resume.
31797c478bd9Sstevel@tonic-gate */
31807c478bd9Sstevel@tonic-gate cid = callb_add(nfs4_client_cpr_callb, 0, CB_CL_CPR_RPC, "nfs4");
31817c478bd9Sstevel@tonic-gate
31827c478bd9Sstevel@tonic-gate zone_key_create(&mi4_list_key, nfs4_mi_init, nfs4_mi_shutdown,
31837c478bd9Sstevel@tonic-gate nfs4_mi_destroy);
31847c478bd9Sstevel@tonic-gate
31857c478bd9Sstevel@tonic-gate /*
3186ade42b55SSebastien Roy * Initialize the reference count of the notsupp xattr cache vnode to 1
31877da74b76SPrakash Surya * so that it never goes away (VOP_INACTIVE isn't called on it).
31887da74b76SPrakash Surya */
31897da74b76SPrakash Surya vn_reinit(&nfs4_xattr_notsupp_vnode);
31907c478bd9Sstevel@tonic-gate }
31917c478bd9Sstevel@tonic-gate
31927c478bd9Sstevel@tonic-gate void
nfs4_clnt_fini(void)31937c478bd9Sstevel@tonic-gate nfs4_clnt_fini(void)
31947c478bd9Sstevel@tonic-gate {
31957c478bd9Sstevel@tonic-gate (void) zone_key_delete(mi4_list_key);
31967c478bd9Sstevel@tonic-gate nfs4_vnops_fini();
31977c478bd9Sstevel@tonic-gate (void) nfs4_rnode_fini();
31987c478bd9Sstevel@tonic-gate (void) nfs4_shadow_fini();
31997c478bd9Sstevel@tonic-gate (void) nfs4_acache_fini();
32007c478bd9Sstevel@tonic-gate (void) nfs4_subr_fini();
32017c478bd9Sstevel@tonic-gate nfs_idmap_fini();
32027c478bd9Sstevel@tonic-gate nfs4_callback_fini();
32037c478bd9Sstevel@tonic-gate nfs4_secinfo_fini();
32047c478bd9Sstevel@tonic-gate #ifdef DEBUG
32057c478bd9Sstevel@tonic-gate tsd_destroy(&nfs4_tsd_key);
32067c478bd9Sstevel@tonic-gate #endif
32077c478bd9Sstevel@tonic-gate if (cid)
32087c478bd9Sstevel@tonic-gate (void) callb_delete(cid);
32097c478bd9Sstevel@tonic-gate }
32107c478bd9Sstevel@tonic-gate
32117c478bd9Sstevel@tonic-gate /*ARGSUSED*/
32127c478bd9Sstevel@tonic-gate static boolean_t
nfs4_client_cpr_callb(void * arg,int code)32137c478bd9Sstevel@tonic-gate nfs4_client_cpr_callb(void *arg, int code)
32147c478bd9Sstevel@tonic-gate {
32157c478bd9Sstevel@tonic-gate /*
32167c478bd9Sstevel@tonic-gate * We get called for Suspend and Resume events.
32177c478bd9Sstevel@tonic-gate * For the suspend case we simply don't care!
32187c478bd9Sstevel@tonic-gate */
32197c478bd9Sstevel@tonic-gate if (code == CB_CODE_CPR_CHKPT) {
32207c478bd9Sstevel@tonic-gate return (B_TRUE);
32217c478bd9Sstevel@tonic-gate }
32227c478bd9Sstevel@tonic-gate
32237c478bd9Sstevel@tonic-gate /*
32247c478bd9Sstevel@tonic-gate * When we get to here we are in the process of
32257c478bd9Sstevel@tonic-gate * resuming the system from a previous suspend.
32267c478bd9Sstevel@tonic-gate */
32277c478bd9Sstevel@tonic-gate nfs4_client_resumed = gethrestime_sec();
32287c478bd9Sstevel@tonic-gate return (B_TRUE);
32297c478bd9Sstevel@tonic-gate }
32307c478bd9Sstevel@tonic-gate
32317c478bd9Sstevel@tonic-gate void
nfs4_renew_lease_thread(nfs4_server_t * sp)32327c478bd9Sstevel@tonic-gate nfs4_renew_lease_thread(nfs4_server_t *sp)
32337c478bd9Sstevel@tonic-gate {
32347c478bd9Sstevel@tonic-gate int error = 0;
32357c478bd9Sstevel@tonic-gate time_t tmp_last_renewal_time, tmp_time, tmp_now_time, kip_secs;
32367c478bd9Sstevel@tonic-gate clock_t tick_delay = 0;
32377c478bd9Sstevel@tonic-gate clock_t time_left = 0;
32387c478bd9Sstevel@tonic-gate callb_cpr_t cpr_info;
32397c478bd9Sstevel@tonic-gate kmutex_t cpr_lock;
32407c478bd9Sstevel@tonic-gate
32417c478bd9Sstevel@tonic-gate NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE,
3242b9238976Sth "nfs4_renew_lease_thread: acting on sp 0x%p", (void*)sp));
32437c478bd9Sstevel@tonic-gate mutex_init(&cpr_lock, NULL, MUTEX_DEFAULT, NULL);
32447c478bd9Sstevel@tonic-gate CALLB_CPR_INIT(&cpr_info, &cpr_lock, callb_generic_cpr, "nfsv4Lease");
32457c478bd9Sstevel@tonic-gate
32467c478bd9Sstevel@tonic-gate mutex_enter(&sp->s_lock);
32477c478bd9Sstevel@tonic-gate /* sp->s_lease_time is set via a GETATTR */
32487c478bd9Sstevel@tonic-gate sp->last_renewal_time = gethrestime_sec();
32497c478bd9Sstevel@tonic-gate sp->lease_valid = NFS4_LEASE_UNINITIALIZED;
32507c478bd9Sstevel@tonic-gate ASSERT(sp->s_refcnt >= 1);
32517c478bd9Sstevel@tonic-gate
32527c478bd9Sstevel@tonic-gate for (;;) {
32537c478bd9Sstevel@tonic-gate if (!sp->state_ref_count ||
3254b9238976Sth sp->lease_valid != NFS4_LEASE_VALID) {
32557c478bd9Sstevel@tonic-gate
32567c478bd9Sstevel@tonic-gate kip_secs = MAX((sp->s_lease_time >> 1) -
3257b9238976Sth (3 * sp->propagation_delay.tv_sec), 1);
32587c478bd9Sstevel@tonic-gate
32597c478bd9Sstevel@tonic-gate tick_delay = SEC_TO_TICK(kip_secs);
32607c478bd9Sstevel@tonic-gate
32617c478bd9Sstevel@tonic-gate NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE,
3262b9238976Sth "nfs4_renew_lease_thread: no renew : thread "
3263b9238976Sth "wait %ld secs", kip_secs));
32647c478bd9Sstevel@tonic-gate
32657c478bd9Sstevel@tonic-gate NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE,
3266b9238976Sth "nfs4_renew_lease_thread: no renew : "
3267b9238976Sth "state_ref_count %d, lease_valid %d",
3268b9238976Sth sp->state_ref_count, sp->lease_valid));
32697c478bd9Sstevel@tonic-gate
32707c478bd9Sstevel@tonic-gate mutex_enter(&cpr_lock);
32717c478bd9Sstevel@tonic-gate CALLB_CPR_SAFE_BEGIN(&cpr_info);
32727c478bd9Sstevel@tonic-gate mutex_exit(&cpr_lock);
3273d3d50737SRafael Vanoni time_left = cv_reltimedwait(&sp->cv_thread_exit,
3274d3d50737SRafael Vanoni &sp->s_lock, tick_delay, TR_CLOCK_TICK);
32757c478bd9Sstevel@tonic-gate mutex_enter(&cpr_lock);
32767c478bd9Sstevel@tonic-gate CALLB_CPR_SAFE_END(&cpr_info, &cpr_lock);
32777c478bd9Sstevel@tonic-gate mutex_exit(&cpr_lock);
32787c478bd9Sstevel@tonic-gate
32797c478bd9Sstevel@tonic-gate NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE,
3280b9238976Sth "nfs4_renew_lease_thread: no renew: "
3281b9238976Sth "time left %ld", time_left));
32827c478bd9Sstevel@tonic-gate
32837c478bd9Sstevel@tonic-gate if (sp->s_thread_exit == NFS4_THREAD_EXIT)
32847c478bd9Sstevel@tonic-gate goto die;
32857c478bd9Sstevel@tonic-gate continue;
32867c478bd9Sstevel@tonic-gate }
32877c478bd9Sstevel@tonic-gate
32887c478bd9Sstevel@tonic-gate tmp_last_renewal_time = sp->last_renewal_time;
32897c478bd9Sstevel@tonic-gate
32907c478bd9Sstevel@tonic-gate tmp_time = gethrestime_sec() - sp->last_renewal_time +
3291b9238976Sth (3 * sp->propagation_delay.tv_sec);
32927c478bd9Sstevel@tonic-gate
32937c478bd9Sstevel@tonic-gate NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE,
3294b9238976Sth "nfs4_renew_lease_thread: tmp_time %ld, "
3295b9238976Sth "sp->last_renewal_time %ld", tmp_time,
3296b9238976Sth sp->last_renewal_time));
32977c478bd9Sstevel@tonic-gate
32987c478bd9Sstevel@tonic-gate kip_secs = MAX((sp->s_lease_time >> 1) - tmp_time, 1);
32997c478bd9Sstevel@tonic-gate
33007c478bd9Sstevel@tonic-gate tick_delay = SEC_TO_TICK(kip_secs);
33017c478bd9Sstevel@tonic-gate
33027c478bd9Sstevel@tonic-gate NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE,
3303b9238976Sth "nfs4_renew_lease_thread: valid lease: sleep for %ld "
3304b9238976Sth "secs", kip_secs));
33057c478bd9Sstevel@tonic-gate
33067c478bd9Sstevel@tonic-gate mutex_enter(&cpr_lock);
33077c478bd9Sstevel@tonic-gate CALLB_CPR_SAFE_BEGIN(&cpr_info);
33087c478bd9Sstevel@tonic-gate mutex_exit(&cpr_lock);
3309d3d50737SRafael Vanoni time_left = cv_reltimedwait(&sp->cv_thread_exit, &sp->s_lock,
3310d3d50737SRafael Vanoni tick_delay, TR_CLOCK_TICK);
33117c478bd9Sstevel@tonic-gate mutex_enter(&cpr_lock);
33127c478bd9Sstevel@tonic-gate CALLB_CPR_SAFE_END(&cpr_info, &cpr_lock);
33137c478bd9Sstevel@tonic-gate mutex_exit(&cpr_lock);
33147c478bd9Sstevel@tonic-gate
33157c478bd9Sstevel@tonic-gate NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE,
3316b9238976Sth "nfs4_renew_lease_thread: valid lease: time left %ld :"
3317b9238976Sth "sp last_renewal_time %ld, nfs4_client_resumed %ld, "
3318b9238976Sth "tmp_last_renewal_time %ld", time_left,
3319b9238976Sth sp->last_renewal_time, nfs4_client_resumed,
3320b9238976Sth tmp_last_renewal_time));
33217c478bd9Sstevel@tonic-gate
33227c478bd9Sstevel@tonic-gate if (sp->s_thread_exit == NFS4_THREAD_EXIT)
33237c478bd9Sstevel@tonic-gate goto die;
33247c478bd9Sstevel@tonic-gate
33257c478bd9Sstevel@tonic-gate if (tmp_last_renewal_time == sp->last_renewal_time ||
3326b9238976Sth (nfs4_client_resumed != 0 &&
3327b9238976Sth nfs4_client_resumed > sp->last_renewal_time)) {
33287c478bd9Sstevel@tonic-gate /*
33297c478bd9Sstevel@tonic-gate * Issue RENEW op since we haven't renewed the lease
33307c478bd9Sstevel@tonic-gate * since we slept.
33317c478bd9Sstevel@tonic-gate */
33327c478bd9Sstevel@tonic-gate tmp_now_time = gethrestime_sec();
33337c478bd9Sstevel@tonic-gate error = nfs4renew(sp);
33347c478bd9Sstevel@tonic-gate /*
33357c478bd9Sstevel@tonic-gate * Need to re-acquire sp's lock, nfs4renew()
33367c478bd9Sstevel@tonic-gate * relinqueshes it.
33377c478bd9Sstevel@tonic-gate */
33387c478bd9Sstevel@tonic-gate mutex_enter(&sp->s_lock);
33397c478bd9Sstevel@tonic-gate
33407c478bd9Sstevel@tonic-gate /*
33417c478bd9Sstevel@tonic-gate * See if someone changed s_thread_exit while we gave
33427c478bd9Sstevel@tonic-gate * up s_lock.
33437c478bd9Sstevel@tonic-gate */
33447c478bd9Sstevel@tonic-gate if (sp->s_thread_exit == NFS4_THREAD_EXIT)
33457c478bd9Sstevel@tonic-gate goto die;
33467c478bd9Sstevel@tonic-gate
33477c478bd9Sstevel@tonic-gate if (!error) {
33487c478bd9Sstevel@tonic-gate /*
33497c478bd9Sstevel@tonic-gate * check to see if we implicitly renewed while
33507c478bd9Sstevel@tonic-gate * we waited for a reply for our RENEW call.
33517c478bd9Sstevel@tonic-gate */
33527c478bd9Sstevel@tonic-gate if (tmp_last_renewal_time ==
3353b9238976Sth sp->last_renewal_time) {
33547c478bd9Sstevel@tonic-gate /* no implicit renew came */
33557c478bd9Sstevel@tonic-gate sp->last_renewal_time = tmp_now_time;
33567c478bd9Sstevel@tonic-gate } else {
33577c478bd9Sstevel@tonic-gate NFS4_DEBUG(nfs4_client_lease_debug,
3358b9238976Sth (CE_NOTE, "renew_thread: did "
3359b9238976Sth "implicit renewal before reply "
3360b9238976Sth "from server for RENEW"));
33617c478bd9Sstevel@tonic-gate }
33627c478bd9Sstevel@tonic-gate } else {
33637c478bd9Sstevel@tonic-gate /* figure out error */
33647c478bd9Sstevel@tonic-gate NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE,
3365b9238976Sth "renew_thread: nfs4renew returned error"
3366b9238976Sth " %d", error));
33677c478bd9Sstevel@tonic-gate }
33687c478bd9Sstevel@tonic-gate
33697c478bd9Sstevel@tonic-gate }
33707c478bd9Sstevel@tonic-gate }
33717c478bd9Sstevel@tonic-gate
33727c478bd9Sstevel@tonic-gate die:
33737c478bd9Sstevel@tonic-gate NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE,
3374b9238976Sth "nfs4_renew_lease_thread: thread exiting"));
33757c478bd9Sstevel@tonic-gate
33767c478bd9Sstevel@tonic-gate while (sp->s_otw_call_count != 0) {
33777c478bd9Sstevel@tonic-gate NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE,
3378b9238976Sth "nfs4_renew_lease_thread: waiting for outstanding "
3379b9238976Sth "otw calls to finish for sp 0x%p, current "
3380b9238976Sth "s_otw_call_count %d", (void *)sp,
3381b9238976Sth sp->s_otw_call_count));
33827c478bd9Sstevel@tonic-gate mutex_enter(&cpr_lock);
33837c478bd9Sstevel@tonic-gate CALLB_CPR_SAFE_BEGIN(&cpr_info);
33847c478bd9Sstevel@tonic-gate mutex_exit(&cpr_lock);
33857c478bd9Sstevel@tonic-gate cv_wait(&sp->s_cv_otw_count, &sp->s_lock);
33867c478bd9Sstevel@tonic-gate mutex_enter(&cpr_lock);
33877c478bd9Sstevel@tonic-gate CALLB_CPR_SAFE_END(&cpr_info, &cpr_lock);
33887c478bd9Sstevel@tonic-gate mutex_exit(&cpr_lock);
33897c478bd9Sstevel@tonic-gate }
33907c478bd9Sstevel@tonic-gate mutex_exit(&sp->s_lock);
33917c478bd9Sstevel@tonic-gate
33927c478bd9Sstevel@tonic-gate nfs4_server_rele(sp); /* free the thread's reference */
33937c478bd9Sstevel@tonic-gate nfs4_server_rele(sp); /* free the list's reference */
33947c478bd9Sstevel@tonic-gate sp = NULL;
33957c478bd9Sstevel@tonic-gate
33967c478bd9Sstevel@tonic-gate done:
33977c478bd9Sstevel@tonic-gate mutex_enter(&cpr_lock);
33987c478bd9Sstevel@tonic-gate CALLB_CPR_EXIT(&cpr_info); /* drops cpr_lock */
33997c478bd9Sstevel@tonic-gate mutex_destroy(&cpr_lock);
34007c478bd9Sstevel@tonic-gate
34017c478bd9Sstevel@tonic-gate NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE,
3402b9238976Sth "nfs4_renew_lease_thread: renew thread exit officially"));
34037c478bd9Sstevel@tonic-gate
34047c478bd9Sstevel@tonic-gate zthread_exit();
34057c478bd9Sstevel@tonic-gate /* NOT REACHED */
34067c478bd9Sstevel@tonic-gate }
34077c478bd9Sstevel@tonic-gate
34087c478bd9Sstevel@tonic-gate /*
34097c478bd9Sstevel@tonic-gate * Send out a RENEW op to the server.
34107c478bd9Sstevel@tonic-gate * Assumes sp is locked down.
34117c478bd9Sstevel@tonic-gate */
34127c478bd9Sstevel@tonic-gate static int
nfs4renew(nfs4_server_t * sp)34137c478bd9Sstevel@tonic-gate nfs4renew(nfs4_server_t *sp)
34147c478bd9Sstevel@tonic-gate {
34157c478bd9Sstevel@tonic-gate COMPOUND4args_clnt args;
34167c478bd9Sstevel@tonic-gate COMPOUND4res_clnt res;
34177c478bd9Sstevel@tonic-gate nfs_argop4 argop[1];
34187c478bd9Sstevel@tonic-gate int doqueue = 1;
34197c478bd9Sstevel@tonic-gate int rpc_error;
34207c478bd9Sstevel@tonic-gate cred_t *cr;
34217c478bd9Sstevel@tonic-gate mntinfo4_t *mi;
34227c478bd9Sstevel@tonic-gate timespec_t prop_time, after_time;
34237c478bd9Sstevel@tonic-gate int needrecov = FALSE;
34247c478bd9Sstevel@tonic-gate nfs4_recov_state_t recov_state;
34257c478bd9Sstevel@tonic-gate nfs4_error_t e = { 0, NFS4_OK, RPC_SUCCESS };
34267c478bd9Sstevel@tonic-gate
34277c478bd9Sstevel@tonic-gate NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, "nfs4renew"));
34287c478bd9Sstevel@tonic-gate
34297c478bd9Sstevel@tonic-gate recov_state.rs_flags = 0;
34307c478bd9Sstevel@tonic-gate recov_state.rs_num_retry_despite_err = 0;
34317c478bd9Sstevel@tonic-gate
34327c478bd9Sstevel@tonic-gate recov_retry:
34337c478bd9Sstevel@tonic-gate mi = sp->mntinfo4_list;
34347c478bd9Sstevel@tonic-gate VFS_HOLD(mi->mi_vfsp);
34357c478bd9Sstevel@tonic-gate mutex_exit(&sp->s_lock);
34367c478bd9Sstevel@tonic-gate ASSERT(mi != NULL);
34377c478bd9Sstevel@tonic-gate
34387c478bd9Sstevel@tonic-gate e.error = nfs4_start_op(mi, NULL, NULL, &recov_state);
34397c478bd9Sstevel@tonic-gate if (e.error) {
34407c478bd9Sstevel@tonic-gate VFS_RELE(mi->mi_vfsp);
34417c478bd9Sstevel@tonic-gate return (e.error);
34427c478bd9Sstevel@tonic-gate }
34437c478bd9Sstevel@tonic-gate
34447c478bd9Sstevel@tonic-gate /* Check to see if we're dealing with a marked-dead sp */
34457c478bd9Sstevel@tonic-gate mutex_enter(&sp->s_lock);
34467c478bd9Sstevel@tonic-gate if (sp->s_thread_exit == NFS4_THREAD_EXIT) {
34477c478bd9Sstevel@tonic-gate mutex_exit(&sp->s_lock);
34487c478bd9Sstevel@tonic-gate nfs4_end_op(mi, NULL, NULL, &recov_state, needrecov);
34497c478bd9Sstevel@tonic-gate VFS_RELE(mi->mi_vfsp);
34507c478bd9Sstevel@tonic-gate return (0);
34517c478bd9Sstevel@tonic-gate }
34527c478bd9Sstevel@tonic-gate
34537c478bd9Sstevel@tonic-gate /* Make sure mi hasn't changed on us */
34547c478bd9Sstevel@tonic-gate if (mi != sp->mntinfo4_list) {
34557c478bd9Sstevel@tonic-gate /* Must drop sp's lock to avoid a recursive mutex enter */
34567c478bd9Sstevel@tonic-gate mutex_exit(&sp->s_lock);
34577c478bd9Sstevel@tonic-gate nfs4_end_op(mi, NULL, NULL, &recov_state, needrecov);
34587c478bd9Sstevel@tonic-gate VFS_RELE(mi->mi_vfsp);
34597c478bd9Sstevel@tonic-gate mutex_enter(&sp->s_lock);
34607c478bd9Sstevel@tonic-gate goto recov_retry;
34617c478bd9Sstevel@tonic-gate }
34627c478bd9Sstevel@tonic-gate mutex_exit(&sp->s_lock);
34637c478bd9Sstevel@tonic-gate
34647c478bd9Sstevel@tonic-gate args.ctag = TAG_RENEW;
34657c478bd9Sstevel@tonic-gate
34667c478bd9Sstevel@tonic-gate args.array_len = 1;
34677c478bd9Sstevel@tonic-gate args.array = argop;
34687c478bd9Sstevel@tonic-gate
34697c478bd9Sstevel@tonic-gate argop[0].argop = OP_RENEW;
34707c478bd9Sstevel@tonic-gate
34717c478bd9Sstevel@tonic-gate mutex_enter(&sp->s_lock);
34727c478bd9Sstevel@tonic-gate argop[0].nfs_argop4_u.oprenew.clientid = sp->clientid;
34737c478bd9Sstevel@tonic-gate cr = sp->s_cred;
34747c478bd9Sstevel@tonic-gate crhold(cr);
34757c478bd9Sstevel@tonic-gate mutex_exit(&sp->s_lock);
34767c478bd9Sstevel@tonic-gate
34777c478bd9Sstevel@tonic-gate ASSERT(cr != NULL);
34787c478bd9Sstevel@tonic-gate
34797c478bd9Sstevel@tonic-gate /* used to figure out RTT for sp */
34807c478bd9Sstevel@tonic-gate gethrestime(&prop_time);
34817c478bd9Sstevel@tonic-gate
34827c478bd9Sstevel@tonic-gate NFS4_DEBUG(nfs4_client_call_debug, (CE_NOTE,
34837c478bd9Sstevel@tonic-gate "nfs4renew: %s call, sp 0x%p", needrecov ? "recov" : "first",
34847c478bd9Sstevel@tonic-gate (void*)sp));
34857c478bd9Sstevel@tonic-gate NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, "before: %ld s %ld ns ",
3486b9238976Sth prop_time.tv_sec, prop_time.tv_nsec));
34877c478bd9Sstevel@tonic-gate
34887c478bd9Sstevel@tonic-gate DTRACE_PROBE2(nfs4__renew__start, nfs4_server_t *, sp,
3489b9238976Sth mntinfo4_t *, mi);
34907c478bd9Sstevel@tonic-gate
34917c478bd9Sstevel@tonic-gate rfs4call(mi, &args, &res, cr, &doqueue, 0, &e);
34927c478bd9Sstevel@tonic-gate crfree(cr);
34937c478bd9Sstevel@tonic-gate
34947c478bd9Sstevel@tonic-gate DTRACE_PROBE2(nfs4__renew__end, nfs4_server_t *, sp,
3495b9238976Sth mntinfo4_t *, mi);
34967c478bd9Sstevel@tonic-gate
34977c478bd9Sstevel@tonic-gate gethrestime(&after_time);
34987c478bd9Sstevel@tonic-gate
34997c478bd9Sstevel@tonic-gate mutex_enter(&sp->s_lock);
35007c478bd9Sstevel@tonic-gate sp->propagation_delay.tv_sec =
3501b9238976Sth MAX(1, after_time.tv_sec - prop_time.tv_sec);
35027c478bd9Sstevel@tonic-gate mutex_exit(&sp->s_lock);
35037c478bd9Sstevel@tonic-gate
35047c478bd9Sstevel@tonic-gate NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE, "after : %ld s %ld ns ",
3505b9238976Sth after_time.tv_sec, after_time.tv_nsec));
35067c478bd9Sstevel@tonic-gate
35077c478bd9Sstevel@tonic-gate if (e.error == 0 && res.status == NFS4ERR_CB_PATH_DOWN) {
3508a17ce845SMarcel Telka xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res);
35097c478bd9Sstevel@tonic-gate nfs4_delegreturn_all(sp);
35107c478bd9Sstevel@tonic-gate nfs4_end_op(mi, NULL, NULL, &recov_state, needrecov);
35117c478bd9Sstevel@tonic-gate VFS_RELE(mi->mi_vfsp);
35127c478bd9Sstevel@tonic-gate /*
35137c478bd9Sstevel@tonic-gate * If the server returns CB_PATH_DOWN, it has renewed
35147c478bd9Sstevel@tonic-gate * the lease and informed us that the callback path is
35157c478bd9Sstevel@tonic-gate * down. Since the lease is renewed, just return 0 and
35167c478bd9Sstevel@tonic-gate * let the renew thread proceed as normal.
35177c478bd9Sstevel@tonic-gate */
35187c478bd9Sstevel@tonic-gate return (0);
35197c478bd9Sstevel@tonic-gate }
35207c478bd9Sstevel@tonic-gate
35217c478bd9Sstevel@tonic-gate needrecov = nfs4_needs_recovery(&e, FALSE, mi->mi_vfsp);
35227c478bd9Sstevel@tonic-gate if (!needrecov && e.error) {
35237c478bd9Sstevel@tonic-gate nfs4_end_op(mi, NULL, NULL, &recov_state, needrecov);
35247c478bd9Sstevel@tonic-gate VFS_RELE(mi->mi_vfsp);
35257c478bd9Sstevel@tonic-gate return (e.error);
35267c478bd9Sstevel@tonic-gate }
35277c478bd9Sstevel@tonic-gate
35287c478bd9Sstevel@tonic-gate rpc_error = e.error;
35297c478bd9Sstevel@tonic-gate
35307c478bd9Sstevel@tonic-gate if (needrecov) {
35317c478bd9Sstevel@tonic-gate NFS4_DEBUG(nfs4_client_recov_debug, (CE_NOTE,
35327c478bd9Sstevel@tonic-gate "nfs4renew: initiating recovery\n"));
35337c478bd9Sstevel@tonic-gate
35347c478bd9Sstevel@tonic-gate if (nfs4_start_recovery(&e, mi, NULL, NULL, NULL, NULL,
35352f172c55SRobert Thurlow OP_RENEW, NULL, NULL, NULL) == FALSE) {
35367c478bd9Sstevel@tonic-gate nfs4_end_op(mi, NULL, NULL, &recov_state, needrecov);
35377c478bd9Sstevel@tonic-gate VFS_RELE(mi->mi_vfsp);
35387c478bd9Sstevel@tonic-gate if (!e.error)
3539a17ce845SMarcel Telka xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res);
35407c478bd9Sstevel@tonic-gate mutex_enter(&sp->s_lock);
35417c478bd9Sstevel@tonic-gate goto recov_retry;
35427c478bd9Sstevel@tonic-gate }
35437c478bd9Sstevel@tonic-gate /* fall through for res.status case */
35447c478bd9Sstevel@tonic-gate }
35457c478bd9Sstevel@tonic-gate
35467c478bd9Sstevel@tonic-gate if (res.status) {
35477c478bd9Sstevel@tonic-gate if (res.status == NFS4ERR_LEASE_MOVED) {
35487c478bd9Sstevel@tonic-gate /*EMPTY*/
35497c478bd9Sstevel@tonic-gate /*
35507c478bd9Sstevel@tonic-gate * XXX need to try every mntinfo4 in sp->mntinfo4_list
35517c478bd9Sstevel@tonic-gate * to renew the lease on that server
35527c478bd9Sstevel@tonic-gate */
35537c478bd9Sstevel@tonic-gate }
35547c478bd9Sstevel@tonic-gate e.error = geterrno4(res.status);
35557c478bd9Sstevel@tonic-gate }
35567c478bd9Sstevel@tonic-gate
35577c478bd9Sstevel@tonic-gate if (!rpc_error)
3558a17ce845SMarcel Telka xdr_free(xdr_COMPOUND4res_clnt, (caddr_t)&res);
35597c478bd9Sstevel@tonic-gate
35607c478bd9Sstevel@tonic-gate nfs4_end_op(mi, NULL, NULL, &recov_state, needrecov);
35617c478bd9Sstevel@tonic-gate
35627c478bd9Sstevel@tonic-gate VFS_RELE(mi->mi_vfsp);
35637c478bd9Sstevel@tonic-gate
35647c478bd9Sstevel@tonic-gate return (e.error);
35657c478bd9Sstevel@tonic-gate }
35667c478bd9Sstevel@tonic-gate
35677c478bd9Sstevel@tonic-gate void
nfs4_inc_state_ref_count(mntinfo4_t * mi)35687c478bd9Sstevel@tonic-gate nfs4_inc_state_ref_count(mntinfo4_t *mi)
35697c478bd9Sstevel@tonic-gate {
35707c478bd9Sstevel@tonic-gate nfs4_server_t *sp;
35717c478bd9Sstevel@tonic-gate
35727c478bd9Sstevel@tonic-gate /* this locks down sp if it is found */
35737c478bd9Sstevel@tonic-gate sp = find_nfs4_server(mi);
35747c478bd9Sstevel@tonic-gate
35757c478bd9Sstevel@tonic-gate if (sp != NULL) {
35767c478bd9Sstevel@tonic-gate nfs4_inc_state_ref_count_nolock(sp, mi);
35777c478bd9Sstevel@tonic-gate mutex_exit(&sp->s_lock);
35787c478bd9Sstevel@tonic-gate nfs4_server_rele(sp);
35797c478bd9Sstevel@tonic-gate }
35807c478bd9Sstevel@tonic-gate }
35817c478bd9Sstevel@tonic-gate
35827c478bd9Sstevel@tonic-gate /*
35837c478bd9Sstevel@tonic-gate * Bump the number of OPEN files (ie: those with state) so we know if this
35847c478bd9Sstevel@tonic-gate * nfs4_server has any state to maintain a lease for or not.
35857c478bd9Sstevel@tonic-gate *
35867c478bd9Sstevel@tonic-gate * Also, marks the nfs4_server's lease valid if it hasn't been done so already.
35877c478bd9Sstevel@tonic-gate */
35887c478bd9Sstevel@tonic-gate void
nfs4_inc_state_ref_count_nolock(nfs4_server_t * sp,mntinfo4_t * mi)35897c478bd9Sstevel@tonic-gate nfs4_inc_state_ref_count_nolock(nfs4_server_t *sp, mntinfo4_t *mi)
35907c478bd9Sstevel@tonic-gate {
35917c478bd9Sstevel@tonic-gate ASSERT(mutex_owned(&sp->s_lock));
35927c478bd9Sstevel@tonic-gate
35937c478bd9Sstevel@tonic-gate sp->state_ref_count++;
35947c478bd9Sstevel@tonic-gate NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE,
3595b9238976Sth "nfs4_inc_state_ref_count: state_ref_count now %d",
3596b9238976Sth sp->state_ref_count));
35977c478bd9Sstevel@tonic-gate
35987c478bd9Sstevel@tonic-gate if (sp->lease_valid == NFS4_LEASE_UNINITIALIZED)
35997c478bd9Sstevel@tonic-gate sp->lease_valid = NFS4_LEASE_VALID;
36007c478bd9Sstevel@tonic-gate
36017c478bd9Sstevel@tonic-gate /*
36027c478bd9Sstevel@tonic-gate * If this call caused the lease to be marked valid and/or
36037c478bd9Sstevel@tonic-gate * took the state_ref_count from 0 to 1, then start the time
36047c478bd9Sstevel@tonic-gate * on lease renewal.
36057c478bd9Sstevel@tonic-gate */
36067c478bd9Sstevel@tonic-gate if (sp->lease_valid == NFS4_LEASE_VALID && sp->state_ref_count == 1)
36077c478bd9Sstevel@tonic-gate sp->last_renewal_time = gethrestime_sec();
36087c478bd9Sstevel@tonic-gate
36097c478bd9Sstevel@tonic-gate /* update the number of open files for mi */
36107c478bd9Sstevel@tonic-gate mi->mi_open_files++;
36117c478bd9Sstevel@tonic-gate }
36127c478bd9Sstevel@tonic-gate
36137c478bd9Sstevel@tonic-gate void
nfs4_dec_state_ref_count(mntinfo4_t * mi)36147c478bd9Sstevel@tonic-gate nfs4_dec_state_ref_count(mntinfo4_t *mi)
36157c478bd9Sstevel@tonic-gate {
36167c478bd9Sstevel@tonic-gate nfs4_server_t *sp;
36177c478bd9Sstevel@tonic-gate
36187c478bd9Sstevel@tonic-gate /* this locks down sp if it is found */
36197c478bd9Sstevel@tonic-gate sp = find_nfs4_server_all(mi, 1);
36207c478bd9Sstevel@tonic-gate
36217c478bd9Sstevel@tonic-gate if (sp != NULL) {
36227c478bd9Sstevel@tonic-gate nfs4_dec_state_ref_count_nolock(sp, mi);
36237c478bd9Sstevel@tonic-gate mutex_exit(&sp->s_lock);
36247c478bd9Sstevel@tonic-gate nfs4_server_rele(sp);
36257c478bd9Sstevel@tonic-gate }
36267c478bd9Sstevel@tonic-gate }
36277c478bd9Sstevel@tonic-gate
36287c478bd9Sstevel@tonic-gate /*
36297c478bd9Sstevel@tonic-gate * Decrement the number of OPEN files (ie: those with state) so we know if
36307c478bd9Sstevel@tonic-gate * this nfs4_server has any state to maintain a lease for or not.
36317c478bd9Sstevel@tonic-gate */
36327c478bd9Sstevel@tonic-gate void
nfs4_dec_state_ref_count_nolock(nfs4_server_t * sp,mntinfo4_t * mi)36337c478bd9Sstevel@tonic-gate nfs4_dec_state_ref_count_nolock(nfs4_server_t *sp, mntinfo4_t *mi)
36347c478bd9Sstevel@tonic-gate {
36357c478bd9Sstevel@tonic-gate ASSERT(mutex_owned(&sp->s_lock));
36367c478bd9Sstevel@tonic-gate ASSERT(sp->state_ref_count != 0);
36377c478bd9Sstevel@tonic-gate sp->state_ref_count--;
36387c478bd9Sstevel@tonic-gate
36397c478bd9Sstevel@tonic-gate NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE,
3640b9238976Sth "nfs4_dec_state_ref_count: state ref count now %d",
3641b9238976Sth sp->state_ref_count));
36427c478bd9Sstevel@tonic-gate
36437c478bd9Sstevel@tonic-gate mi->mi_open_files--;
36447c478bd9Sstevel@tonic-gate NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE,
3645b9238976Sth "nfs4_dec_state_ref_count: mi open files %d, v4 flags 0x%x",
3646b9238976Sth mi->mi_open_files, mi->mi_flags));
36477c478bd9Sstevel@tonic-gate
36487c478bd9Sstevel@tonic-gate /* We don't have to hold the mi_lock to test mi_flags */
36497c478bd9Sstevel@tonic-gate if (mi->mi_open_files == 0 &&
36507c478bd9Sstevel@tonic-gate (mi->mi_flags & MI4_REMOVE_ON_LAST_CLOSE)) {
36517c478bd9Sstevel@tonic-gate NFS4_DEBUG(nfs4_client_lease_debug, (CE_NOTE,
3652b9238976Sth "nfs4_dec_state_ref_count: remove mntinfo4 %p since "
3653b9238976Sth "we have closed the last open file", (void*)mi));
36547c478bd9Sstevel@tonic-gate nfs4_remove_mi_from_server(mi, sp);
36557c478bd9Sstevel@tonic-gate }
36567c478bd9Sstevel@tonic-gate }
36577c478bd9Sstevel@tonic-gate
36587c478bd9Sstevel@tonic-gate bool_t
inlease(nfs4_server_t * sp)36597c478bd9Sstevel@tonic-gate inlease(nfs4_server_t *sp)
36607c478bd9Sstevel@tonic-gate {
36617c478bd9Sstevel@tonic-gate bool_t result;
36627c478bd9Sstevel@tonic-gate
36637c478bd9Sstevel@tonic-gate ASSERT(mutex_owned(&sp->s_lock));
36647c478bd9Sstevel@tonic-gate
36657c478bd9Sstevel@tonic-gate if (sp->lease_valid == NFS4_LEASE_VALID &&
36667c478bd9Sstevel@tonic-gate gethrestime_sec() < sp->last_renewal_time + sp->s_lease_time)
36677c478bd9Sstevel@tonic-gate result = TRUE;
36687c478bd9Sstevel@tonic-gate else
36697c478bd9Sstevel@tonic-gate result = FALSE;
36707c478bd9Sstevel@tonic-gate
36717c478bd9Sstevel@tonic-gate return (result);
36727c478bd9Sstevel@tonic-gate }
36737c478bd9Sstevel@tonic-gate
36747c478bd9Sstevel@tonic-gate
36757c478bd9Sstevel@tonic-gate /*
36767c478bd9Sstevel@tonic-gate * Return non-zero if the given nfs4_server_t is going through recovery.
36777c478bd9Sstevel@tonic-gate */
36787c478bd9Sstevel@tonic-gate
36797c478bd9Sstevel@tonic-gate int
nfs4_server_in_recovery(nfs4_server_t * sp)36807c478bd9Sstevel@tonic-gate nfs4_server_in_recovery(nfs4_server_t *sp)
36817c478bd9Sstevel@tonic-gate {
36827c478bd9Sstevel@tonic-gate return (nfs_rw_lock_held(&sp->s_recovlock, RW_WRITER));
36837c478bd9Sstevel@tonic-gate }
36847c478bd9Sstevel@tonic-gate
36857c478bd9Sstevel@tonic-gate /*
36867c478bd9Sstevel@tonic-gate * Compare two shared filehandle objects. Returns -1, 0, or +1, if the
36877c478bd9Sstevel@tonic-gate * first is less than, equal to, or greater than the second.
36887c478bd9Sstevel@tonic-gate */
36897c478bd9Sstevel@tonic-gate
36907c478bd9Sstevel@tonic-gate int
sfh4cmp(const void * p1,const void * p2)36917c478bd9Sstevel@tonic-gate sfh4cmp(const void *p1, const void *p2)
36927c478bd9Sstevel@tonic-gate {
36937c478bd9Sstevel@tonic-gate const nfs4_sharedfh_t *sfh1 = (const nfs4_sharedfh_t *)p1;
36947c478bd9Sstevel@tonic-gate const nfs4_sharedfh_t *sfh2 = (const nfs4_sharedfh_t *)p2;
36957c478bd9Sstevel@tonic-gate
36967c478bd9Sstevel@tonic-gate return (nfs4cmpfh(&sfh1->sfh_fh, &sfh2->sfh_fh));
36977c478bd9Sstevel@tonic-gate }
36987c478bd9Sstevel@tonic-gate
36997c478bd9Sstevel@tonic-gate /*
37007c478bd9Sstevel@tonic-gate * Create a table for shared filehandle objects.
37017c478bd9Sstevel@tonic-gate */
37027c478bd9Sstevel@tonic-gate
37037c478bd9Sstevel@tonic-gate void
sfh4_createtab(avl_tree_t * tab)37047c478bd9Sstevel@tonic-gate sfh4_createtab(avl_tree_t *tab)
37057c478bd9Sstevel@tonic-gate {
37067c478bd9Sstevel@tonic-gate avl_create(tab, sfh4cmp, sizeof (nfs4_sharedfh_t),
3707b9238976Sth offsetof(nfs4_sharedfh_t, sfh_tree));
37087c478bd9Sstevel@tonic-gate }
37097c478bd9Sstevel@tonic-gate
37107c478bd9Sstevel@tonic-gate /*
37117c478bd9Sstevel@tonic-gate * Return a shared filehandle object for the given filehandle. The caller
37127c478bd9Sstevel@tonic-gate * is responsible for eventually calling sfh4_rele().
37137c478bd9Sstevel@tonic-gate */
37147c478bd9Sstevel@tonic-gate
37157c478bd9Sstevel@tonic-gate nfs4_sharedfh_t *
sfh4_put(const nfs_fh4 * fh,mntinfo4_t * mi,nfs4_sharedfh_t * key)37167c478bd9Sstevel@tonic-gate sfh4_put(const nfs_fh4 *fh, mntinfo4_t *mi, nfs4_sharedfh_t *key)
37177c478bd9Sstevel@tonic-gate {
37187c478bd9Sstevel@tonic-gate nfs4_sharedfh_t *sfh, *nsfh;
37197c478bd9Sstevel@tonic-gate avl_index_t where;
37207c478bd9Sstevel@tonic-gate nfs4_sharedfh_t skey;
37217c478bd9Sstevel@tonic-gate
37227c478bd9Sstevel@tonic-gate if (!key) {
37237c478bd9Sstevel@tonic-gate skey.sfh_fh = *fh;
37247c478bd9Sstevel@tonic-gate key = &skey;
37257c478bd9Sstevel@tonic-gate }
37267c478bd9Sstevel@tonic-gate
37277c478bd9Sstevel@tonic-gate nsfh = kmem_alloc(sizeof (nfs4_sharedfh_t), KM_SLEEP);
37287c478bd9Sstevel@tonic-gate nsfh->sfh_fh.nfs_fh4_len = fh->nfs_fh4_len;
37297c478bd9Sstevel@tonic-gate /*
37307c478bd9Sstevel@tonic-gate * We allocate the largest possible filehandle size because it's
37317c478bd9Sstevel@tonic-gate * not that big, and it saves us from possibly having to resize the
37327c478bd9Sstevel@tonic-gate * buffer later.
37337c478bd9Sstevel@tonic-gate */
37347c478bd9Sstevel@tonic-gate nsfh->sfh_fh.nfs_fh4_val = kmem_alloc(NFS4_FHSIZE, KM_SLEEP);
37357c478bd9Sstevel@tonic-gate bcopy(fh->nfs_fh4_val, nsfh->sfh_fh.nfs_fh4_val, fh->nfs_fh4_len);
37367c478bd9Sstevel@tonic-gate mutex_init(&nsfh->sfh_lock, NULL, MUTEX_DEFAULT, NULL);
37377c478bd9Sstevel@tonic-gate nsfh->sfh_refcnt = 1;
37387c478bd9Sstevel@tonic-gate nsfh->sfh_flags = SFH4_IN_TREE;
37397c478bd9Sstevel@tonic-gate nsfh->sfh_mi = mi;
37407c478bd9Sstevel@tonic-gate NFS4_DEBUG(nfs4_sharedfh_debug, (CE_NOTE, "sfh4_get: new object (%p)",
3741b9238976Sth (void *)nsfh));
37427c478bd9Sstevel@tonic-gate
37437c478bd9Sstevel@tonic-gate (void) nfs_rw_enter_sig(&mi->mi_fh_lock, RW_WRITER, 0);
37447c478bd9Sstevel@tonic-gate sfh = avl_find(&mi->mi_filehandles, key, &where);
37457c478bd9Sstevel@tonic-gate if (sfh != NULL) {
37467c478bd9Sstevel@tonic-gate mutex_enter(&sfh->sfh_lock);
37477c478bd9Sstevel@tonic-gate sfh->sfh_refcnt++;
37487c478bd9Sstevel@tonic-gate mutex_exit(&sfh->sfh_lock);
37497c478bd9Sstevel@tonic-gate nfs_rw_exit(&mi->mi_fh_lock);
37507c478bd9Sstevel@tonic-gate /* free our speculative allocs */
37517c478bd9Sstevel@tonic-gate kmem_free(nsfh->sfh_fh.nfs_fh4_val, NFS4_FHSIZE);
37527c478bd9Sstevel@tonic-gate kmem_free(nsfh, sizeof (nfs4_sharedfh_t));
37537c478bd9Sstevel@tonic-gate return (sfh);
37547c478bd9Sstevel@tonic-gate }
37557c478bd9Sstevel@tonic-gate
37567c478bd9Sstevel@tonic-gate avl_insert(&mi->mi_filehandles, nsfh, where);
37577c478bd9Sstevel@tonic-gate nfs_rw_exit(&mi->mi_fh_lock);
37587c478bd9Sstevel@tonic-gate
37597c478bd9Sstevel@tonic-gate return (nsfh);
37607c478bd9Sstevel@tonic-gate }
37617c478bd9Sstevel@tonic-gate
37627c478bd9Sstevel@tonic-gate /*
37637c478bd9Sstevel@tonic-gate * Return a shared filehandle object for the given filehandle. The caller
37647c478bd9Sstevel@tonic-gate * is responsible for eventually calling sfh4_rele().
37657c478bd9Sstevel@tonic-gate */
37667c478bd9Sstevel@tonic-gate
37677c478bd9Sstevel@tonic-gate nfs4_sharedfh_t *
sfh4_get(const nfs_fh4 * fh,mntinfo4_t * mi)37687c478bd9Sstevel@tonic-gate sfh4_get(const nfs_fh4 *fh, mntinfo4_t *mi)
37697c478bd9Sstevel@tonic-gate {
37707c478bd9Sstevel@tonic-gate nfs4_sharedfh_t *sfh;
37717c478bd9Sstevel@tonic-gate nfs4_sharedfh_t key;
37727c478bd9Sstevel@tonic-gate
37737c478bd9Sstevel@tonic-gate ASSERT(fh->nfs_fh4_len <= NFS4_FHSIZE);
37747c478bd9Sstevel@tonic-gate
37757c478bd9Sstevel@tonic-gate #ifdef DEBUG
37767c478bd9Sstevel@tonic-gate if (nfs4_sharedfh_debug) {
37777c478bd9Sstevel@tonic-gate nfs4_fhandle_t fhandle;
37787c478bd9Sstevel@tonic-gate
37797c478bd9Sstevel@tonic-gate fhandle.fh_len = fh->nfs_fh4_len;
37807c478bd9Sstevel@tonic-gate bcopy(fh->nfs_fh4_val, fhandle.fh_buf, fhandle.fh_len);
37817c478bd9Sstevel@tonic-gate zcmn_err(mi->mi_zone->zone_id, CE_NOTE, "sfh4_get:");
37827c478bd9Sstevel@tonic-gate nfs4_printfhandle(&fhandle);
37837c478bd9Sstevel@tonic-gate }
37847c478bd9Sstevel@tonic-gate #endif
37857c478bd9Sstevel@tonic-gate
37867c478bd9Sstevel@tonic-gate /*
37877c478bd9Sstevel@tonic-gate * If there's already an object for the given filehandle, bump the
37887c478bd9Sstevel@tonic-gate * reference count and return it. Otherwise, create a new object
37897c478bd9Sstevel@tonic-gate * and add it to the AVL tree.
37907c478bd9Sstevel@tonic-gate */
37917c478bd9Sstevel@tonic-gate
37927c478bd9Sstevel@tonic-gate key.sfh_fh = *fh;
37937c478bd9Sstevel@tonic-gate
37947c478bd9Sstevel@tonic-gate (void) nfs_rw_enter_sig(&mi->mi_fh_lock, RW_READER, 0);
37957c478bd9Sstevel@tonic-gate sfh = avl_find(&mi->mi_filehandles, &key, NULL);
37967c478bd9Sstevel@tonic-gate if (sfh != NULL) {
37977c478bd9Sstevel@tonic-gate mutex_enter(&sfh->sfh_lock);
37987c478bd9Sstevel@tonic-gate sfh->sfh_refcnt++;
37997c478bd9Sstevel@tonic-gate NFS4_DEBUG(nfs4_sharedfh_debug, (CE_NOTE,
3800b9238976Sth "sfh4_get: found existing %p, new refcnt=%d",
3801b9238976Sth (void *)sfh, sfh->sfh_refcnt));
38027c478bd9Sstevel@tonic-gate mutex_exit(&sfh->sfh_lock);
38037c478bd9Sstevel@tonic-gate nfs_rw_exit(&mi->mi_fh_lock);
38047c478bd9Sstevel@tonic-gate return (sfh);
38057c478bd9Sstevel@tonic-gate }
38067c478bd9Sstevel@tonic-gate nfs_rw_exit(&mi->mi_fh_lock);
38077c478bd9Sstevel@tonic-gate
38087c478bd9Sstevel@tonic-gate return (sfh4_put(fh, mi, &key));
38097c478bd9Sstevel@tonic-gate }
38107c478bd9Sstevel@tonic-gate
38117c478bd9Sstevel@tonic-gate /*
38127c478bd9Sstevel@tonic-gate * Get a reference to the given shared filehandle object.
38137c478bd9Sstevel@tonic-gate */
38147c478bd9Sstevel@tonic-gate
38157c478bd9Sstevel@tonic-gate void
sfh4_hold(nfs4_sharedfh_t * sfh)38167c478bd9Sstevel@tonic-gate sfh4_hold(nfs4_sharedfh_t *sfh)
38177c478bd9Sstevel@tonic-gate {
38187c478bd9Sstevel@tonic-gate ASSERT(sfh->sfh_refcnt > 0);
38197c478bd9Sstevel@tonic-gate
38207c478bd9Sstevel@tonic-gate mutex_enter(&sfh->sfh_lock);
38217c478bd9Sstevel@tonic-gate sfh->sfh_refcnt++;
38227c478bd9Sstevel@tonic-gate NFS4_DEBUG(nfs4_sharedfh_debug,
3823b9238976Sth (CE_NOTE, "sfh4_hold %p, new refcnt=%d",
3824b9238976Sth (void *)sfh, sfh->sfh_refcnt));
38257c478bd9Sstevel@tonic-gate mutex_exit(&sfh->sfh_lock);
38267c478bd9Sstevel@tonic-gate }
38277c478bd9Sstevel@tonic-gate
38287c478bd9Sstevel@tonic-gate /*
38297c478bd9Sstevel@tonic-gate * Release a reference to the given shared filehandle object and null out
38307c478bd9Sstevel@tonic-gate * the given pointer.
38317c478bd9Sstevel@tonic-gate */
38327c478bd9Sstevel@tonic-gate
38337c478bd9Sstevel@tonic-gate void
sfh4_rele(nfs4_sharedfh_t ** sfhpp)38347c478bd9Sstevel@tonic-gate sfh4_rele(nfs4_sharedfh_t **sfhpp)
38357c478bd9Sstevel@tonic-gate {
38367c478bd9Sstevel@tonic-gate mntinfo4_t *mi;
38377c478bd9Sstevel@tonic-gate nfs4_sharedfh_t *sfh = *sfhpp;
38387c478bd9Sstevel@tonic-gate
38397c478bd9Sstevel@tonic-gate ASSERT(sfh->sfh_refcnt > 0);
38407c478bd9Sstevel@tonic-gate
38417c478bd9Sstevel@tonic-gate mutex_enter(&sfh->sfh_lock);
38427c478bd9Sstevel@tonic-gate if (sfh->sfh_refcnt > 1) {
38437c478bd9Sstevel@tonic-gate sfh->sfh_refcnt--;
38447c478bd9Sstevel@tonic-gate NFS4_DEBUG(nfs4_sharedfh_debug, (CE_NOTE,
38457c478bd9Sstevel@tonic-gate "sfh4_rele %p, new refcnt=%d",
38467c478bd9Sstevel@tonic-gate (void *)sfh, sfh->sfh_refcnt));
38477c478bd9Sstevel@tonic-gate mutex_exit(&sfh->sfh_lock);
38487c478bd9Sstevel@tonic-gate goto finish;
38497c478bd9Sstevel@tonic-gate }
38507c478bd9Sstevel@tonic-gate mutex_exit(&sfh->sfh_lock);
38517c478bd9Sstevel@tonic-gate
38527c478bd9Sstevel@tonic-gate /*
38537c478bd9Sstevel@tonic-gate * Possibly the last reference, so get the lock for the table in
38547c478bd9Sstevel@tonic-gate * case it's time to remove the object from the table.
38557c478bd9Sstevel@tonic-gate */
38567c478bd9Sstevel@tonic-gate mi = sfh->sfh_mi;
38577c478bd9Sstevel@tonic-gate (void) nfs_rw_enter_sig(&mi->mi_fh_lock, RW_WRITER, 0);
38587c478bd9Sstevel@tonic-gate mutex_enter(&sfh->sfh_lock);
38597c478bd9Sstevel@tonic-gate sfh->sfh_refcnt--;
38607c478bd9Sstevel@tonic-gate if (sfh->sfh_refcnt > 0) {
38617c478bd9Sstevel@tonic-gate NFS4_DEBUG(nfs4_sharedfh_debug, (CE_NOTE,
38627c478bd9Sstevel@tonic-gate "sfh4_rele %p, new refcnt=%d",
38637c478bd9Sstevel@tonic-gate (void *)sfh, sfh->sfh_refcnt));
38647c478bd9Sstevel@tonic-gate mutex_exit(&sfh->sfh_lock);
38657c478bd9Sstevel@tonic-gate nfs_rw_exit(&mi->mi_fh_lock);
38667c478bd9Sstevel@tonic-gate goto finish;
38677c478bd9Sstevel@tonic-gate }
38687c478bd9Sstevel@tonic-gate
38697c478bd9Sstevel@tonic-gate NFS4_DEBUG(nfs4_sharedfh_debug, (CE_NOTE,
3870b9238976Sth "sfh4_rele %p, last ref", (void *)sfh));
38717c478bd9Sstevel@tonic-gate if (sfh->sfh_flags & SFH4_IN_TREE) {
38727c478bd9Sstevel@tonic-gate avl_remove(&mi->mi_filehandles, sfh);
38737c478bd9Sstevel@tonic-gate sfh->sfh_flags &= ~SFH4_IN_TREE;
38747c478bd9Sstevel@tonic-gate }
38757c478bd9Sstevel@tonic-gate mutex_exit(&sfh->sfh_lock);
38767c478bd9Sstevel@tonic-gate nfs_rw_exit(&mi->mi_fh_lock);
38777c478bd9Sstevel@tonic-gate mutex_destroy(&sfh->sfh_lock);
38787c478bd9Sstevel@tonic-gate kmem_free(sfh->sfh_fh.nfs_fh4_val, NFS4_FHSIZE);
38797c478bd9Sstevel@tonic-gate kmem_free(sfh, sizeof (nfs4_sharedfh_t));
38807c478bd9Sstevel@tonic-gate
38817c478bd9Sstevel@tonic-gate finish:
38827c478bd9Sstevel@tonic-gate *sfhpp = NULL;
38837c478bd9Sstevel@tonic-gate }
38847c478bd9Sstevel@tonic-gate
38857c478bd9Sstevel@tonic-gate /*
38867c478bd9Sstevel@tonic-gate * Update the filehandle for the given shared filehandle object.
38877c478bd9Sstevel@tonic-gate */
38887c478bd9Sstevel@tonic-gate
38897c478bd9Sstevel@tonic-gate int nfs4_warn_dupfh = 0; /* if set, always warn about dup fhs below */
38907c478bd9Sstevel@tonic-gate
38917c478bd9Sstevel@tonic-gate void
sfh4_update(nfs4_sharedfh_t * sfh,const nfs_fh4 * newfh)38927c478bd9Sstevel@tonic-gate sfh4_update(nfs4_sharedfh_t *sfh, const nfs_fh4 *newfh)
38937c478bd9Sstevel@tonic-gate {
38947c478bd9Sstevel@tonic-gate mntinfo4_t *mi = sfh->sfh_mi;
38957c478bd9Sstevel@tonic-gate nfs4_sharedfh_t *dupsfh;
38967c478bd9Sstevel@tonic-gate avl_index_t where;
38977c478bd9Sstevel@tonic-gate nfs4_sharedfh_t key;
38987c478bd9Sstevel@tonic-gate
38997c478bd9Sstevel@tonic-gate #ifdef DEBUG
39007c478bd9Sstevel@tonic-gate mutex_enter(&sfh->sfh_lock);
39017c478bd9Sstevel@tonic-gate ASSERT(sfh->sfh_refcnt > 0);
39027c478bd9Sstevel@tonic-gate mutex_exit(&sfh->sfh_lock);
39037c478bd9Sstevel@tonic-gate #endif
39047c478bd9Sstevel@tonic-gate ASSERT(newfh->nfs_fh4_len <= NFS4_FHSIZE);
39057c478bd9Sstevel@tonic-gate
39067c478bd9Sstevel@tonic-gate /*
39077c478bd9Sstevel@tonic-gate * The basic plan is to remove the shared filehandle object from
39087c478bd9Sstevel@tonic-gate * the table, update it to have the new filehandle, then reinsert
39097c478bd9Sstevel@tonic-gate * it.
39107c478bd9Sstevel@tonic-gate */
39117c478bd9Sstevel@tonic-gate
39127c478bd9Sstevel@tonic-gate (void) nfs_rw_enter_sig(&mi->mi_fh_lock, RW_WRITER, 0);
39137c478bd9Sstevel@tonic-gate mutex_enter(&sfh->sfh_lock);
39147c478bd9Sstevel@tonic-gate if (sfh->sfh_flags & SFH4_IN_TREE) {
39157c478bd9Sstevel@tonic-gate avl_remove(&mi->mi_filehandles, sfh);
39167c478bd9Sstevel@tonic-gate sfh->sfh_flags &= ~SFH4_IN_TREE;
39177c478bd9Sstevel@tonic-gate }
39187c478bd9Sstevel@tonic-gate mutex_exit(&sfh->sfh_lock);
39197c478bd9Sstevel@tonic-gate sfh->sfh_fh.nfs_fh4_len = newfh->nfs_fh4_len;
39207c478bd9Sstevel@tonic-gate bcopy(newfh->nfs_fh4_val, sfh->sfh_fh.nfs_fh4_val,
39217c478bd9Sstevel@tonic-gate sfh->sfh_fh.nfs_fh4_len);
39227c478bd9Sstevel@tonic-gate
39237c478bd9Sstevel@tonic-gate /*
39247c478bd9Sstevel@tonic-gate * XXX If there is already a shared filehandle object with the new
39257c478bd9Sstevel@tonic-gate * filehandle, we're in trouble, because the rnode code assumes
39267c478bd9Sstevel@tonic-gate * that there is only one shared filehandle object for a given
39277c478bd9Sstevel@tonic-gate * filehandle. So issue a warning (for read-write mounts only)
39287c478bd9Sstevel@tonic-gate * and don't try to re-insert the given object into the table.
39297c478bd9Sstevel@tonic-gate * Hopefully the given object will quickly go away and everyone
39307c478bd9Sstevel@tonic-gate * will use the new object.
39317c478bd9Sstevel@tonic-gate */
39327c478bd9Sstevel@tonic-gate key.sfh_fh = *newfh;
39337c478bd9Sstevel@tonic-gate dupsfh = avl_find(&mi->mi_filehandles, &key, &where);
39347c478bd9Sstevel@tonic-gate if (dupsfh != NULL) {
39357c478bd9Sstevel@tonic-gate if (!(mi->mi_vfsp->vfs_flag & VFS_RDONLY) || nfs4_warn_dupfh) {
39367c478bd9Sstevel@tonic-gate zcmn_err(mi->mi_zone->zone_id, CE_WARN, "sfh4_update: "
39377c478bd9Sstevel@tonic-gate "duplicate filehandle detected");
39387c478bd9Sstevel@tonic-gate sfh4_printfhandle(dupsfh);
39397c478bd9Sstevel@tonic-gate }
39407c478bd9Sstevel@tonic-gate } else {
39417c478bd9Sstevel@tonic-gate avl_insert(&mi->mi_filehandles, sfh, where);
39427c478bd9Sstevel@tonic-gate mutex_enter(&sfh->sfh_lock);
39437c478bd9Sstevel@tonic-gate sfh->sfh_flags |= SFH4_IN_TREE;
39447c478bd9Sstevel@tonic-gate mutex_exit(&sfh->sfh_lock);
39457c478bd9Sstevel@tonic-gate }
39467c478bd9Sstevel@tonic-gate nfs_rw_exit(&mi->mi_fh_lock);
39477c478bd9Sstevel@tonic-gate }
39487c478bd9Sstevel@tonic-gate
39497c478bd9Sstevel@tonic-gate /*
39507c478bd9Sstevel@tonic-gate * Copy out the current filehandle for the given shared filehandle object.
39517c478bd9Sstevel@tonic-gate */
39527c478bd9Sstevel@tonic-gate
39537c478bd9Sstevel@tonic-gate void
sfh4_copyval(const nfs4_sharedfh_t * sfh,nfs4_fhandle_t * fhp)39547c478bd9Sstevel@tonic-gate sfh4_copyval(const nfs4_sharedfh_t *sfh, nfs4_fhandle_t *fhp)
39557c478bd9Sstevel@tonic-gate {
39567c478bd9Sstevel@tonic-gate mntinfo4_t *mi = sfh->sfh_mi;
39577c478bd9Sstevel@tonic-gate
39587c478bd9Sstevel@tonic-gate ASSERT(sfh->sfh_refcnt > 0);
39597c478bd9Sstevel@tonic-gate
39607c478bd9Sstevel@tonic-gate (void) nfs_rw_enter_sig(&mi->mi_fh_lock, RW_READER, 0);
39617c478bd9Sstevel@tonic-gate fhp->fh_len = sfh->sfh_fh.nfs_fh4_len;
39627c478bd9Sstevel@tonic-gate ASSERT(fhp->fh_len <= NFS4_FHSIZE);
39637c478bd9Sstevel@tonic-gate bcopy(sfh->sfh_fh.nfs_fh4_val, fhp->fh_buf, fhp->fh_len);
39647c478bd9Sstevel@tonic-gate nfs_rw_exit(&mi->mi_fh_lock);
39657c478bd9Sstevel@tonic-gate }
39667c478bd9Sstevel@tonic-gate
39677c478bd9Sstevel@tonic-gate /*
39687c478bd9Sstevel@tonic-gate * Print out the filehandle for the given shared filehandle object.
39697c478bd9Sstevel@tonic-gate */
39707c478bd9Sstevel@tonic-gate
39717c478bd9Sstevel@tonic-gate void
sfh4_printfhandle(const nfs4_sharedfh_t * sfh)39727c478bd9Sstevel@tonic-gate sfh4_printfhandle(const nfs4_sharedfh_t *sfh)
39737c478bd9Sstevel@tonic-gate {
39747c478bd9Sstevel@tonic-gate nfs4_fhandle_t fhandle;
39757c478bd9Sstevel@tonic-gate
39767c478bd9Sstevel@tonic-gate sfh4_copyval(sfh, &fhandle);
39777c478bd9Sstevel@tonic-gate nfs4_printfhandle(&fhandle);
39787c478bd9Sstevel@tonic-gate }
39797c478bd9Sstevel@tonic-gate
39807c478bd9Sstevel@tonic-gate /*
39817c478bd9Sstevel@tonic-gate * Compare 2 fnames. Returns -1 if the first is "less" than the second, 0
39827c478bd9Sstevel@tonic-gate * if they're the same, +1 if the first is "greater" than the second. The
39837c478bd9Sstevel@tonic-gate * caller (or whoever's calling the AVL package) is responsible for
39847c478bd9Sstevel@tonic-gate * handling locking issues.
39857c478bd9Sstevel@tonic-gate */
39867c478bd9Sstevel@tonic-gate
39877c478bd9Sstevel@tonic-gate static int
fncmp(const void * p1,const void * p2)39887c478bd9Sstevel@tonic-gate fncmp(const void *p1, const void *p2)
39897c478bd9Sstevel@tonic-gate {
39907c478bd9Sstevel@tonic-gate const nfs4_fname_t *f1 = p1;
39917c478bd9Sstevel@tonic-gate const nfs4_fname_t *f2 = p2;
39927c478bd9Sstevel@tonic-gate int res;
39937c478bd9Sstevel@tonic-gate
39947c478bd9Sstevel@tonic-gate res = strcmp(f1->fn_name, f2->fn_name);
39957c478bd9Sstevel@tonic-gate /*
39967c478bd9Sstevel@tonic-gate * The AVL package wants +/-1, not arbitrary positive or negative
39977c478bd9Sstevel@tonic-gate * integers.
39987c478bd9Sstevel@tonic-gate */
39997c478bd9Sstevel@tonic-gate if (res > 0)
40007c478bd9Sstevel@tonic-gate res = 1;
40017c478bd9Sstevel@tonic-gate else if (res < 0)
40027c478bd9Sstevel@tonic-gate res = -1;
40037c478bd9Sstevel@tonic-gate return (res);
40047c478bd9Sstevel@tonic-gate }
40057c478bd9Sstevel@tonic-gate
40067c478bd9Sstevel@tonic-gate /*
40077c478bd9Sstevel@tonic-gate * Get or create an fname with the given name, as a child of the given
40087c478bd9Sstevel@tonic-gate * fname. The caller is responsible for eventually releasing the reference
40097c478bd9Sstevel@tonic-gate * (fn_rele()). parent may be NULL.
40107c478bd9Sstevel@tonic-gate */
40117c478bd9Sstevel@tonic-gate
40127c478bd9Sstevel@tonic-gate nfs4_fname_t *
fn_get(nfs4_fname_t * parent,char * name,nfs4_sharedfh_t * sfh)4013bbf2a467SNagakiran Rajashekar fn_get(nfs4_fname_t *parent, char *name, nfs4_sharedfh_t *sfh)
40147c478bd9Sstevel@tonic-gate {
40157c478bd9Sstevel@tonic-gate nfs4_fname_t key;
40167c478bd9Sstevel@tonic-gate nfs4_fname_t *fnp;
40177c478bd9Sstevel@tonic-gate avl_index_t where;
40187c478bd9Sstevel@tonic-gate
40197c478bd9Sstevel@tonic-gate key.fn_name = name;
40207c478bd9Sstevel@tonic-gate
40217c478bd9Sstevel@tonic-gate /*
40227c478bd9Sstevel@tonic-gate * If there's already an fname registered with the given name, bump
40237c478bd9Sstevel@tonic-gate * its reference count and return it. Otherwise, create a new one
40247c478bd9Sstevel@tonic-gate * and add it to the parent's AVL tree.
4025bbf2a467SNagakiran Rajashekar *
4026bbf2a467SNagakiran Rajashekar * fname entries we are looking for should match both name
4027bbf2a467SNagakiran Rajashekar * and sfh stored in the fname.
40287c478bd9Sstevel@tonic-gate */
4029bbf2a467SNagakiran Rajashekar again:
40307c478bd9Sstevel@tonic-gate if (parent != NULL) {
40317c478bd9Sstevel@tonic-gate mutex_enter(&parent->fn_lock);
40327c478bd9Sstevel@tonic-gate fnp = avl_find(&parent->fn_children, &key, &where);
40337c478bd9Sstevel@tonic-gate if (fnp != NULL) {
4034bbf2a467SNagakiran Rajashekar /*
4035bbf2a467SNagakiran Rajashekar * This hold on fnp is released below later,
4036bbf2a467SNagakiran Rajashekar * in case this is not the fnp we want.
4037bbf2a467SNagakiran Rajashekar */
40387c478bd9Sstevel@tonic-gate fn_hold(fnp);
4039bbf2a467SNagakiran Rajashekar
4040bbf2a467SNagakiran Rajashekar if (fnp->fn_sfh == sfh) {
4041bbf2a467SNagakiran Rajashekar /*
4042bbf2a467SNagakiran Rajashekar * We have found our entry.
4043bbf2a467SNagakiran Rajashekar * put an hold and return it.
4044bbf2a467SNagakiran Rajashekar */
4045bbf2a467SNagakiran Rajashekar mutex_exit(&parent->fn_lock);
4046bbf2a467SNagakiran Rajashekar return (fnp);
4047bbf2a467SNagakiran Rajashekar }
4048bbf2a467SNagakiran Rajashekar
4049bbf2a467SNagakiran Rajashekar /*
4050bbf2a467SNagakiran Rajashekar * We have found an entry that has a mismatching
4051bbf2a467SNagakiran Rajashekar * fn_sfh. This could be a stale entry due to
4052bbf2a467SNagakiran Rajashekar * server side rename. We will remove this entry
4053bbf2a467SNagakiran Rajashekar * and make sure no such entries exist.
4054bbf2a467SNagakiran Rajashekar */
40557c478bd9Sstevel@tonic-gate mutex_exit(&parent->fn_lock);
4056bbf2a467SNagakiran Rajashekar mutex_enter(&fnp->fn_lock);
4057bbf2a467SNagakiran Rajashekar if (fnp->fn_parent == parent) {
4058bbf2a467SNagakiran Rajashekar /*
4059bbf2a467SNagakiran Rajashekar * Remove ourselves from parent's
4060bbf2a467SNagakiran Rajashekar * fn_children tree.
4061bbf2a467SNagakiran Rajashekar */
4062bbf2a467SNagakiran Rajashekar mutex_enter(&parent->fn_lock);
4063bbf2a467SNagakiran Rajashekar avl_remove(&parent->fn_children, fnp);
4064bbf2a467SNagakiran Rajashekar mutex_exit(&parent->fn_lock);
4065bbf2a467SNagakiran Rajashekar fn_rele(&fnp->fn_parent);
4066bbf2a467SNagakiran Rajashekar }
4067bbf2a467SNagakiran Rajashekar mutex_exit(&fnp->fn_lock);
4068bbf2a467SNagakiran Rajashekar fn_rele(&fnp);
4069bbf2a467SNagakiran Rajashekar goto again;
40707c478bd9Sstevel@tonic-gate }
40717c478bd9Sstevel@tonic-gate }
40727c478bd9Sstevel@tonic-gate
40737c478bd9Sstevel@tonic-gate fnp = kmem_alloc(sizeof (nfs4_fname_t), KM_SLEEP);
40747c478bd9Sstevel@tonic-gate mutex_init(&fnp->fn_lock, NULL, MUTEX_DEFAULT, NULL);
40757c478bd9Sstevel@tonic-gate fnp->fn_parent = parent;
40767c478bd9Sstevel@tonic-gate if (parent != NULL)
40777c478bd9Sstevel@tonic-gate fn_hold(parent);
40787c478bd9Sstevel@tonic-gate fnp->fn_len = strlen(name);
40797c478bd9Sstevel@tonic-gate ASSERT(fnp->fn_len < MAXNAMELEN);
40807c478bd9Sstevel@tonic-gate fnp->fn_name = kmem_alloc(fnp->fn_len + 1, KM_SLEEP);
40817c478bd9Sstevel@tonic-gate (void) strcpy(fnp->fn_name, name);
40827c478bd9Sstevel@tonic-gate fnp->fn_refcnt = 1;
4083bbf2a467SNagakiran Rajashekar
4084bbf2a467SNagakiran Rajashekar /*
4085bbf2a467SNagakiran Rajashekar * This hold on sfh is later released
4086bbf2a467SNagakiran Rajashekar * when we do the final fn_rele() on this fname.
4087bbf2a467SNagakiran Rajashekar */
4088bbf2a467SNagakiran Rajashekar sfh4_hold(sfh);
4089bbf2a467SNagakiran Rajashekar fnp->fn_sfh = sfh;
4090bbf2a467SNagakiran Rajashekar
40917c478bd9Sstevel@tonic-gate avl_create(&fnp->fn_children, fncmp, sizeof (nfs4_fname_t),
40927c478bd9Sstevel@tonic-gate offsetof(nfs4_fname_t, fn_tree));
40937c478bd9Sstevel@tonic-gate NFS4_DEBUG(nfs4_fname_debug, (CE_NOTE,
4094b9238976Sth "fn_get %p:%s, a new nfs4_fname_t!",
4095b9238976Sth (void *)fnp, fnp->fn_name));
40967c478bd9Sstevel@tonic-gate if (parent != NULL) {
40977c478bd9Sstevel@tonic-gate avl_insert(&parent->fn_children, fnp, where);
40987c478bd9Sstevel@tonic-gate mutex_exit(&parent->fn_lock);
40997c478bd9Sstevel@tonic-gate }
41007c478bd9Sstevel@tonic-gate
41017c478bd9Sstevel@tonic-gate return (fnp);
41027c478bd9Sstevel@tonic-gate }
41037c478bd9Sstevel@tonic-gate
41047c478bd9Sstevel@tonic-gate void
fn_hold(nfs4_fname_t * fnp)41057c478bd9Sstevel@tonic-gate fn_hold(nfs4_fname_t *fnp)
41067c478bd9Sstevel@tonic-gate {
41071a5e258fSJosef 'Jeff' Sipek atomic_inc_32(&fnp->fn_refcnt);
41087c478bd9Sstevel@tonic-gate NFS4_DEBUG(nfs4_fname_debug, (CE_NOTE,
4109b9238976Sth "fn_hold %p:%s, new refcnt=%d",
4110b9238976Sth (void *)fnp, fnp->fn_name, fnp->fn_refcnt));
41117c478bd9Sstevel@tonic-gate }
41127c478bd9Sstevel@tonic-gate
41137c478bd9Sstevel@tonic-gate /*
41147c478bd9Sstevel@tonic-gate * Decrement the reference count of the given fname, and destroy it if its
41157c478bd9Sstevel@tonic-gate * reference count goes to zero. Nulls out the given pointer.
41167c478bd9Sstevel@tonic-gate */
41177c478bd9Sstevel@tonic-gate
41187c478bd9Sstevel@tonic-gate void
fn_rele(nfs4_fname_t ** fnpp)41197c478bd9Sstevel@tonic-gate fn_rele(nfs4_fname_t **fnpp)
41207c478bd9Sstevel@tonic-gate {
41217c478bd9Sstevel@tonic-gate nfs4_fname_t *parent;
41227c478bd9Sstevel@tonic-gate uint32_t newref;
41237c478bd9Sstevel@tonic-gate nfs4_fname_t *fnp;
41247c478bd9Sstevel@tonic-gate
41257c478bd9Sstevel@tonic-gate recur:
41267c478bd9Sstevel@tonic-gate fnp = *fnpp;
41277c478bd9Sstevel@tonic-gate *fnpp = NULL;
41287c478bd9Sstevel@tonic-gate
41297c478bd9Sstevel@tonic-gate mutex_enter(&fnp->fn_lock);
41307c478bd9Sstevel@tonic-gate parent = fnp->fn_parent;
41317c478bd9Sstevel@tonic-gate if (parent != NULL)
41327c478bd9Sstevel@tonic-gate mutex_enter(&parent->fn_lock); /* prevent new references */
41331a5e258fSJosef 'Jeff' Sipek newref = atomic_dec_32_nv(&fnp->fn_refcnt);
41347c478bd9Sstevel@tonic-gate if (newref > 0) {
41357c478bd9Sstevel@tonic-gate NFS4_DEBUG(nfs4_fname_debug, (CE_NOTE,
4136b9238976Sth "fn_rele %p:%s, new refcnt=%d",
4137b9238976Sth (void *)fnp, fnp->fn_name, fnp->fn_refcnt));
41387c478bd9Sstevel@tonic-gate if (parent != NULL)
41397c478bd9Sstevel@tonic-gate mutex_exit(&parent->fn_lock);
41407c478bd9Sstevel@tonic-gate mutex_exit(&fnp->fn_lock);
41417c478bd9Sstevel@tonic-gate return;
41427c478bd9Sstevel@tonic-gate }
41437c478bd9Sstevel@tonic-gate
41447c478bd9Sstevel@tonic-gate NFS4_DEBUG(nfs4_fname_debug, (CE_NOTE,
4145b9238976Sth "fn_rele %p:%s, last reference, deleting...",
4146b9238976Sth (void *)fnp, fnp->fn_name));
41477c478bd9Sstevel@tonic-gate if (parent != NULL) {
41487c478bd9Sstevel@tonic-gate avl_remove(&parent->fn_children, fnp);
41497c478bd9Sstevel@tonic-gate mutex_exit(&parent->fn_lock);
41507c478bd9Sstevel@tonic-gate }
41517c478bd9Sstevel@tonic-gate kmem_free(fnp->fn_name, fnp->fn_len + 1);
4152bbf2a467SNagakiran Rajashekar sfh4_rele(&fnp->fn_sfh);
41537c478bd9Sstevel@tonic-gate mutex_destroy(&fnp->fn_lock);
41547c478bd9Sstevel@tonic-gate avl_destroy(&fnp->fn_children);
41557c478bd9Sstevel@tonic-gate kmem_free(fnp, sizeof (nfs4_fname_t));
41567c478bd9Sstevel@tonic-gate /*
41577c478bd9Sstevel@tonic-gate * Recursivly fn_rele the parent.
41587c478bd9Sstevel@tonic-gate * Use goto instead of a recursive call to avoid stack overflow.
41597c478bd9Sstevel@tonic-gate */
41607c478bd9Sstevel@tonic-gate if (parent != NULL) {
41617c478bd9Sstevel@tonic-gate fnpp = &parent;
41627c478bd9Sstevel@tonic-gate goto recur;
41637c478bd9Sstevel@tonic-gate }
41647c478bd9Sstevel@tonic-gate }
41657c478bd9Sstevel@tonic-gate
41667c478bd9Sstevel@tonic-gate /*
41677c478bd9Sstevel@tonic-gate * Returns the single component name of the given fname, in a MAXNAMELEN
41687c478bd9Sstevel@tonic-gate * string buffer, which the caller is responsible for freeing. Note that
41697c478bd9Sstevel@tonic-gate * the name may become invalid as a result of fn_move().
41707c478bd9Sstevel@tonic-gate */
41717c478bd9Sstevel@tonic-gate
41727c478bd9Sstevel@tonic-gate char *
fn_name(nfs4_fname_t * fnp)41737c478bd9Sstevel@tonic-gate fn_name(nfs4_fname_t *fnp)
41747c478bd9Sstevel@tonic-gate {
41757c478bd9Sstevel@tonic-gate char *name;
41767c478bd9Sstevel@tonic-gate
41777c478bd9Sstevel@tonic-gate ASSERT(fnp->fn_len < MAXNAMELEN);
41787c478bd9Sstevel@tonic-gate name = kmem_alloc(MAXNAMELEN, KM_SLEEP);
41797c478bd9Sstevel@tonic-gate mutex_enter(&fnp->fn_lock);
41807c478bd9Sstevel@tonic-gate (void) strcpy(name, fnp->fn_name);
41817c478bd9Sstevel@tonic-gate mutex_exit(&fnp->fn_lock);
41827c478bd9Sstevel@tonic-gate
41837c478bd9Sstevel@tonic-gate return (name);
41847c478bd9Sstevel@tonic-gate }
41857c478bd9Sstevel@tonic-gate
41867c478bd9Sstevel@tonic-gate
41877c478bd9Sstevel@tonic-gate /*
41887c478bd9Sstevel@tonic-gate * fn_path_realloc
41897c478bd9Sstevel@tonic-gate *
41907c478bd9Sstevel@tonic-gate * This function, used only by fn_path, constructs
41917c478bd9Sstevel@tonic-gate * a new string which looks like "prepend" + "/" + "current".
41927c478bd9Sstevel@tonic-gate * by allocating a new string and freeing the old one.
41937c478bd9Sstevel@tonic-gate */
41947c478bd9Sstevel@tonic-gate static void
fn_path_realloc(char ** curses,char * prepend)41957c478bd9Sstevel@tonic-gate fn_path_realloc(char **curses, char *prepend)
41967c478bd9Sstevel@tonic-gate {
41977c478bd9Sstevel@tonic-gate int len, curlen = 0;
41987c478bd9Sstevel@tonic-gate char *news;
41997c478bd9Sstevel@tonic-gate
42007c478bd9Sstevel@tonic-gate if (*curses == NULL) {
42017c478bd9Sstevel@tonic-gate /*
42027c478bd9Sstevel@tonic-gate * Prime the pump, allocate just the
42037c478bd9Sstevel@tonic-gate * space for prepend and return that.
42047c478bd9Sstevel@tonic-gate */
42057c478bd9Sstevel@tonic-gate len = strlen(prepend) + 1;
42067c478bd9Sstevel@tonic-gate news = kmem_alloc(len, KM_SLEEP);
42077c478bd9Sstevel@tonic-gate (void) strncpy(news, prepend, len);
42087c478bd9Sstevel@tonic-gate } else {
42097c478bd9Sstevel@tonic-gate /*
42107c478bd9Sstevel@tonic-gate * Allocate the space for a new string
42117c478bd9Sstevel@tonic-gate * +1 +1 is for the "/" and the NULL
42127c478bd9Sstevel@tonic-gate * byte at the end of it all.
42137c478bd9Sstevel@tonic-gate */
42147c478bd9Sstevel@tonic-gate curlen = strlen(*curses);
42157c478bd9Sstevel@tonic-gate len = curlen + strlen(prepend) + 1 + 1;
42167c478bd9Sstevel@tonic-gate news = kmem_alloc(len, KM_SLEEP);
42177c478bd9Sstevel@tonic-gate (void) strncpy(news, prepend, len);
42187c478bd9Sstevel@tonic-gate (void) strcat(news, "/");
42197c478bd9Sstevel@tonic-gate (void) strcat(news, *curses);
42207c478bd9Sstevel@tonic-gate kmem_free(*curses, curlen + 1);
42217c478bd9Sstevel@tonic-gate }
42227c478bd9Sstevel@tonic-gate *curses = news;
42237c478bd9Sstevel@tonic-gate }
42247c478bd9Sstevel@tonic-gate
42257c478bd9Sstevel@tonic-gate /*
42267c478bd9Sstevel@tonic-gate * Returns the path name (starting from the fs root) for the given fname.
42277c478bd9Sstevel@tonic-gate * The caller is responsible for freeing. Note that the path may be or
42287c478bd9Sstevel@tonic-gate * become invalid as a result of fn_move().
42297c478bd9Sstevel@tonic-gate */
42307c478bd9Sstevel@tonic-gate
42317c478bd9Sstevel@tonic-gate char *
fn_path(nfs4_fname_t * fnp)42327c478bd9Sstevel@tonic-gate fn_path(nfs4_fname_t *fnp)
42337c478bd9Sstevel@tonic-gate {
42347c478bd9Sstevel@tonic-gate char *path;
42357c478bd9Sstevel@tonic-gate nfs4_fname_t *nextfnp;
42367c478bd9Sstevel@tonic-gate
42377c478bd9Sstevel@tonic-gate if (fnp == NULL)
42387c478bd9Sstevel@tonic-gate return (NULL);
42397c478bd9Sstevel@tonic-gate
42407c478bd9Sstevel@tonic-gate path = NULL;
42417c478bd9Sstevel@tonic-gate
42427c478bd9Sstevel@tonic-gate /* walk up the tree constructing the pathname. */
42437c478bd9Sstevel@tonic-gate
42447c478bd9Sstevel@tonic-gate fn_hold(fnp); /* adjust for later rele */
42457c478bd9Sstevel@tonic-gate do {
42467c478bd9Sstevel@tonic-gate mutex_enter(&fnp->fn_lock);
42477c478bd9Sstevel@tonic-gate /*
42487c478bd9Sstevel@tonic-gate * Add fn_name in front of the current path
42497c478bd9Sstevel@tonic-gate */
42507c478bd9Sstevel@tonic-gate fn_path_realloc(&path, fnp->fn_name);
42517c478bd9Sstevel@tonic-gate nextfnp = fnp->fn_parent;
42527c478bd9Sstevel@tonic-gate if (nextfnp != NULL)
42537c478bd9Sstevel@tonic-gate fn_hold(nextfnp);
42547c478bd9Sstevel@tonic-gate mutex_exit(&fnp->fn_lock);
42557c478bd9Sstevel@tonic-gate fn_rele(&fnp);
42567c478bd9Sstevel@tonic-gate fnp = nextfnp;
42577c478bd9Sstevel@tonic-gate } while (fnp != NULL);
42587c478bd9Sstevel@tonic-gate
42597c478bd9Sstevel@tonic-gate return (path);
42607c478bd9Sstevel@tonic-gate }
42617c478bd9Sstevel@tonic-gate
42627c478bd9Sstevel@tonic-gate /*
42637c478bd9Sstevel@tonic-gate * Return a reference to the parent of the given fname, which the caller is
42647c478bd9Sstevel@tonic-gate * responsible for eventually releasing.
42657c478bd9Sstevel@tonic-gate */
42667c478bd9Sstevel@tonic-gate
42677c478bd9Sstevel@tonic-gate nfs4_fname_t *
fn_parent(nfs4_fname_t * fnp)42687c478bd9Sstevel@tonic-gate fn_parent(nfs4_fname_t *fnp)
42697c478bd9Sstevel@tonic-gate {
42707c478bd9Sstevel@tonic-gate nfs4_fname_t *parent;
42717c478bd9Sstevel@tonic-gate
42727c478bd9Sstevel@tonic-gate mutex_enter(&fnp->fn_lock);
42737c478bd9Sstevel@tonic-gate parent = fnp->fn_parent;
42747c478bd9Sstevel@tonic-gate if (parent != NULL)
42757c478bd9Sstevel@tonic-gate fn_hold(parent);
42767c478bd9Sstevel@tonic-gate mutex_exit(&fnp->fn_lock);
42777c478bd9Sstevel@tonic-gate
42787c478bd9Sstevel@tonic-gate return (parent);
42797c478bd9Sstevel@tonic-gate }
42807c478bd9Sstevel@tonic-gate
42817c478bd9Sstevel@tonic-gate /*
42827c478bd9Sstevel@tonic-gate * Update fnp so that its parent is newparent and its name is newname.
42837c478bd9Sstevel@tonic-gate */
42847c478bd9Sstevel@tonic-gate
42857c478bd9Sstevel@tonic-gate void
fn_move(nfs4_fname_t * fnp,nfs4_fname_t * newparent,char * newname)42867c478bd9Sstevel@tonic-gate fn_move(nfs4_fname_t *fnp, nfs4_fname_t *newparent, char *newname)
42877c478bd9Sstevel@tonic-gate {
42887c478bd9Sstevel@tonic-gate nfs4_fname_t *parent, *tmpfnp;
42897c478bd9Sstevel@tonic-gate ssize_t newlen;
42907c478bd9Sstevel@tonic-gate nfs4_fname_t key;
42917c478bd9Sstevel@tonic-gate avl_index_t where;
42927c478bd9Sstevel@tonic-gate
42937c478bd9Sstevel@tonic-gate /*
42947c478bd9Sstevel@tonic-gate * This assert exists to catch the client trying to rename
42957c478bd9Sstevel@tonic-gate * a dir to be a child of itself. This happened at a recent
42967c478bd9Sstevel@tonic-gate * bakeoff against a 3rd party (broken) server which allowed
42977c478bd9Sstevel@tonic-gate * the rename to succeed. If it trips it means that:
42987c478bd9Sstevel@tonic-gate * a) the code in nfs4rename that detects this case is broken
42997c478bd9Sstevel@tonic-gate * b) the server is broken (since it allowed the bogus rename)
43007c478bd9Sstevel@tonic-gate *
43017c478bd9Sstevel@tonic-gate * For non-DEBUG kernels, prepare for a recursive mutex_enter
43027c478bd9Sstevel@tonic-gate * panic below from: mutex_enter(&newparent->fn_lock);
43037c478bd9Sstevel@tonic-gate */
43047c478bd9Sstevel@tonic-gate ASSERT(fnp != newparent);
43057c478bd9Sstevel@tonic-gate
43067c478bd9Sstevel@tonic-gate /*
43077c478bd9Sstevel@tonic-gate * Remove fnp from its current parent, change its name, then add it
43084a36c613SPavel Filipensky * to newparent. It might happen that fnp was replaced by another
43094a36c613SPavel Filipensky * nfs4_fname_t with the same fn_name in parent->fn_children.
43104a36c613SPavel Filipensky * In such case, fnp->fn_parent is NULL and we skip the removal
43114a36c613SPavel Filipensky * of fnp from its current parent.
43127c478bd9Sstevel@tonic-gate */
43137c478bd9Sstevel@tonic-gate mutex_enter(&fnp->fn_lock);
43147c478bd9Sstevel@tonic-gate parent = fnp->fn_parent;
43154a36c613SPavel Filipensky if (parent != NULL) {
43164a36c613SPavel Filipensky mutex_enter(&parent->fn_lock);
43174a36c613SPavel Filipensky avl_remove(&parent->fn_children, fnp);
43184a36c613SPavel Filipensky mutex_exit(&parent->fn_lock);
43194a36c613SPavel Filipensky fn_rele(&fnp->fn_parent);
43204a36c613SPavel Filipensky }
43217c478bd9Sstevel@tonic-gate
43227c478bd9Sstevel@tonic-gate newlen = strlen(newname);
43237c478bd9Sstevel@tonic-gate if (newlen != fnp->fn_len) {
43247c478bd9Sstevel@tonic-gate ASSERT(newlen < MAXNAMELEN);
43257c478bd9Sstevel@tonic-gate kmem_free(fnp->fn_name, fnp->fn_len + 1);
43267c478bd9Sstevel@tonic-gate fnp->fn_name = kmem_alloc(newlen + 1, KM_SLEEP);
43277c478bd9Sstevel@tonic-gate fnp->fn_len = newlen;
43287c478bd9Sstevel@tonic-gate }
43297c478bd9Sstevel@tonic-gate (void) strcpy(fnp->fn_name, newname);
43307c478bd9Sstevel@tonic-gate
43317c478bd9Sstevel@tonic-gate again:
43327c478bd9Sstevel@tonic-gate mutex_enter(&newparent->fn_lock);
43337c478bd9Sstevel@tonic-gate key.fn_name = fnp->fn_name;
43347c478bd9Sstevel@tonic-gate tmpfnp = avl_find(&newparent->fn_children, &key, &where);
43357c478bd9Sstevel@tonic-gate if (tmpfnp != NULL) {
43367c478bd9Sstevel@tonic-gate /*
43377c478bd9Sstevel@tonic-gate * This could be due to a file that was unlinked while
43387c478bd9Sstevel@tonic-gate * open, or perhaps the rnode is in the free list. Remove
43397c478bd9Sstevel@tonic-gate * it from newparent and let it go away on its own. The
43407c478bd9Sstevel@tonic-gate * contorted code is to deal with lock order issues and
43417c478bd9Sstevel@tonic-gate * race conditions.
43427c478bd9Sstevel@tonic-gate */
43437c478bd9Sstevel@tonic-gate fn_hold(tmpfnp);
43447c478bd9Sstevel@tonic-gate mutex_exit(&newparent->fn_lock);
43457c478bd9Sstevel@tonic-gate mutex_enter(&tmpfnp->fn_lock);
43467c478bd9Sstevel@tonic-gate if (tmpfnp->fn_parent == newparent) {
43477c478bd9Sstevel@tonic-gate mutex_enter(&newparent->fn_lock);
43487c478bd9Sstevel@tonic-gate avl_remove(&newparent->fn_children, tmpfnp);
43497c478bd9Sstevel@tonic-gate mutex_exit(&newparent->fn_lock);
43507c478bd9Sstevel@tonic-gate fn_rele(&tmpfnp->fn_parent);
43517c478bd9Sstevel@tonic-gate }
43527c478bd9Sstevel@tonic-gate mutex_exit(&tmpfnp->fn_lock);
43537c478bd9Sstevel@tonic-gate fn_rele(&tmpfnp);
43547c478bd9Sstevel@tonic-gate goto again;
43557c478bd9Sstevel@tonic-gate }
43567c478bd9Sstevel@tonic-gate fnp->fn_parent = newparent;
43577c478bd9Sstevel@tonic-gate fn_hold(newparent);
43587c478bd9Sstevel@tonic-gate avl_insert(&newparent->fn_children, fnp, where);
43597c478bd9Sstevel@tonic-gate mutex_exit(&newparent->fn_lock);
43607c478bd9Sstevel@tonic-gate mutex_exit(&fnp->fn_lock);
43617c478bd9Sstevel@tonic-gate }
43627c478bd9Sstevel@tonic-gate
43637c478bd9Sstevel@tonic-gate #ifdef DEBUG
43647c478bd9Sstevel@tonic-gate /*
43657c478bd9Sstevel@tonic-gate * Return non-zero if the type information makes sense for the given vnode.
43667c478bd9Sstevel@tonic-gate * Otherwise panic.
43677c478bd9Sstevel@tonic-gate */
43687c478bd9Sstevel@tonic-gate int
nfs4_consistent_type(vnode_t * vp)43697c478bd9Sstevel@tonic-gate nfs4_consistent_type(vnode_t *vp)
43707c478bd9Sstevel@tonic-gate {
43717c478bd9Sstevel@tonic-gate rnode4_t *rp = VTOR4(vp);
43727c478bd9Sstevel@tonic-gate
43737c478bd9Sstevel@tonic-gate if (nfs4_vtype_debug && vp->v_type != VNON &&
43747c478bd9Sstevel@tonic-gate rp->r_attr.va_type != VNON && vp->v_type != rp->r_attr.va_type) {
43757c478bd9Sstevel@tonic-gate cmn_err(CE_PANIC, "vnode %p type mismatch; v_type=%d, "
4376b9238976Sth "rnode attr type=%d", (void *)vp, vp->v_type,
4377b9238976Sth rp->r_attr.va_type);
43787c478bd9Sstevel@tonic-gate }
43797c478bd9Sstevel@tonic-gate
43807c478bd9Sstevel@tonic-gate return (1);
43817c478bd9Sstevel@tonic-gate }
43827c478bd9Sstevel@tonic-gate #endif /* DEBUG */
4383