xref: /illumos-gate/usr/src/uts/common/vm/page_lock.c (revision af4c679f)
17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate  * CDDL HEADER START
37c478bd9Sstevel@tonic-gate  *
47c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
58bc68872Selowe  * Common Development and Distribution License (the "License").
68bc68872Selowe  * You may not use this file except in compliance with the License.
77c478bd9Sstevel@tonic-gate  *
87c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
107c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
117c478bd9Sstevel@tonic-gate  * and limitations under the License.
127c478bd9Sstevel@tonic-gate  *
137c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
147c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
167c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
177c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bd9Sstevel@tonic-gate  *
197c478bd9Sstevel@tonic-gate  * CDDL HEADER END
207c478bd9Sstevel@tonic-gate  */
217c478bd9Sstevel@tonic-gate /*
22*af4c679fSSean McEnroe  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
237c478bd9Sstevel@tonic-gate  * Use is subject to license terms.
247c478bd9Sstevel@tonic-gate  */
257c478bd9Sstevel@tonic-gate 
267c478bd9Sstevel@tonic-gate 
277c478bd9Sstevel@tonic-gate /*
287c478bd9Sstevel@tonic-gate  * VM - page locking primitives
297c478bd9Sstevel@tonic-gate  */
307c478bd9Sstevel@tonic-gate #include <sys/param.h>
317c478bd9Sstevel@tonic-gate #include <sys/t_lock.h>
327c478bd9Sstevel@tonic-gate #include <sys/vtrace.h>
337c478bd9Sstevel@tonic-gate #include <sys/debug.h>
347c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h>
357c478bd9Sstevel@tonic-gate #include <sys/bitmap.h>
367c478bd9Sstevel@tonic-gate #include <sys/lockstat.h>
37d7d93655Sblakej #include <sys/sysmacros.h>
387c478bd9Sstevel@tonic-gate #include <sys/condvar_impl.h>
397c478bd9Sstevel@tonic-gate #include <vm/page.h>
407c478bd9Sstevel@tonic-gate #include <vm/seg_enum.h>
417c478bd9Sstevel@tonic-gate #include <vm/vm_dep.h>
42*af4c679fSSean McEnroe #include <vm/seg_kmem.h>
437c478bd9Sstevel@tonic-gate 
447c478bd9Sstevel@tonic-gate /*
457c478bd9Sstevel@tonic-gate  * This global mutex is for logical page locking.
467c478bd9Sstevel@tonic-gate  * The following fields in the page structure are protected
477c478bd9Sstevel@tonic-gate  * by this lock:
487c478bd9Sstevel@tonic-gate  *
497c478bd9Sstevel@tonic-gate  *	p_lckcnt
507c478bd9Sstevel@tonic-gate  *	p_cowcnt
517c478bd9Sstevel@tonic-gate  */
527c478bd9Sstevel@tonic-gate kmutex_t page_llock;
537c478bd9Sstevel@tonic-gate 
547c478bd9Sstevel@tonic-gate /*
557c478bd9Sstevel@tonic-gate  * This is a global lock for the logical page free list.  The
567c478bd9Sstevel@tonic-gate  * logical free list, in this implementation, is maintained as two
577c478bd9Sstevel@tonic-gate  * separate physical lists - the cache list and the free list.
587c478bd9Sstevel@tonic-gate  */
597c478bd9Sstevel@tonic-gate kmutex_t  page_freelock;
607c478bd9Sstevel@tonic-gate 
617c478bd9Sstevel@tonic-gate /*
627c478bd9Sstevel@tonic-gate  * The hash table, page_hash[], the p_selock fields, and the
637c478bd9Sstevel@tonic-gate  * list of pages associated with vnodes are protected by arrays of mutexes.
647c478bd9Sstevel@tonic-gate  *
657c478bd9Sstevel@tonic-gate  * Unless the hashes are changed radically, the table sizes must be
667c478bd9Sstevel@tonic-gate  * a power of two.  Also, we typically need more mutexes for the
677c478bd9Sstevel@tonic-gate  * vnodes since these locks are occasionally held for long periods.
687c478bd9Sstevel@tonic-gate  * And since there seem to be two special vnodes (kvp and swapvp),
697c478bd9Sstevel@tonic-gate  * we make room for private mutexes for them.
707c478bd9Sstevel@tonic-gate  *
717c478bd9Sstevel@tonic-gate  * The pse_mutex[] array holds the mutexes to protect the p_selock
727c478bd9Sstevel@tonic-gate  * fields of all page_t structures.
737c478bd9Sstevel@tonic-gate  *
747c478bd9Sstevel@tonic-gate  * PAGE_SE_MUTEX(pp) returns the address of the appropriate mutex
757c478bd9Sstevel@tonic-gate  * when given a pointer to a page_t.
767c478bd9Sstevel@tonic-gate  *
77d7d93655Sblakej  * PIO_TABLE_SIZE must be a power of two.  One could argue that we
787c478bd9Sstevel@tonic-gate  * should go to the trouble of setting it up at run time and base it
797c478bd9Sstevel@tonic-gate  * on memory size rather than the number of compile time CPUs.
807c478bd9Sstevel@tonic-gate  *
81d7d93655Sblakej  * XX64	We should be using physmem size to calculate PIO_SHIFT.
827c478bd9Sstevel@tonic-gate  *
837c478bd9Sstevel@tonic-gate  *	These might break in 64 bit world.
847c478bd9Sstevel@tonic-gate  */
85d7d93655Sblakej #define	PIO_SHIFT	7	/* log2(sizeof(page_t)) */
86d7d93655Sblakej #define	PIO_TABLE_SIZE	128	/* number of io mutexes to have */
877c478bd9Sstevel@tonic-gate 
887c478bd9Sstevel@tonic-gate pad_mutex_t	ph_mutex[PH_TABLE_SIZE];
897c478bd9Sstevel@tonic-gate kmutex_t	pio_mutex[PIO_TABLE_SIZE];
907c478bd9Sstevel@tonic-gate 
917c478bd9Sstevel@tonic-gate #define	PAGE_IO_MUTEX(pp) \
927c478bd9Sstevel@tonic-gate 	    &pio_mutex[(((uintptr_t)pp) >> PIO_SHIFT) & (PIO_TABLE_SIZE - 1)]
937c478bd9Sstevel@tonic-gate 
94d7d93655Sblakej /*
95d7d93655Sblakej  * The pse_mutex[] array is allocated in the platform startup code
96d7d93655Sblakej  * based on the size of the machine at startup.
97d7d93655Sblakej  */
98d7d93655Sblakej extern pad_mutex_t *pse_mutex;		/* Locks protecting pp->p_selock */
99d7d93655Sblakej extern size_t pse_table_size;		/* Number of mutexes in pse_mutex[] */
100d7d93655Sblakej extern int pse_shift;			/* log2(pse_table_size) */
101d7d93655Sblakej #define	PAGE_SE_MUTEX(pp)	&pse_mutex[				\
102d7d93655Sblakej 	((((uintptr_t)(pp) >> pse_shift) ^ ((uintptr_t)(pp))) >> 7) &	\
103d7d93655Sblakej 	(pse_table_size - 1)].pad_mutex
104d7d93655Sblakej 
1057c478bd9Sstevel@tonic-gate #define	PSZC_MTX_TABLE_SIZE	128
1067c478bd9Sstevel@tonic-gate #define	PSZC_MTX_TABLE_SHIFT	7
1077c478bd9Sstevel@tonic-gate 
1087c478bd9Sstevel@tonic-gate static pad_mutex_t	pszc_mutex[PSZC_MTX_TABLE_SIZE];
1097c478bd9Sstevel@tonic-gate 
1107c478bd9Sstevel@tonic-gate #define	PAGE_SZC_MUTEX(_pp) \
1117c478bd9Sstevel@tonic-gate 	    &pszc_mutex[((((uintptr_t)(_pp) >> PSZC_MTX_TABLE_SHIFT) ^ \
1127c478bd9Sstevel@tonic-gate 		((uintptr_t)(_pp) >> (PSZC_MTX_TABLE_SHIFT << 1)) ^ \
1137c478bd9Sstevel@tonic-gate 		((uintptr_t)(_pp) >> (3 * PSZC_MTX_TABLE_SHIFT))) & \
1147c478bd9Sstevel@tonic-gate 		(PSZC_MTX_TABLE_SIZE - 1))].pad_mutex
1157c478bd9Sstevel@tonic-gate 
1167c478bd9Sstevel@tonic-gate /*
1177c478bd9Sstevel@tonic-gate  * The vph_mutex[] array  holds the mutexes to protect the vnode chains,
1187c478bd9Sstevel@tonic-gate  * (i.e., the list of pages anchored by v_pages and connected via p_vpprev
1197c478bd9Sstevel@tonic-gate  * and p_vpnext).
1207c478bd9Sstevel@tonic-gate  *
1217c478bd9Sstevel@tonic-gate  * The page_vnode_mutex(vp) function returns the address of the appropriate
1227c478bd9Sstevel@tonic-gate  * mutex from this array given a pointer to a vnode.  It is complicated
1237c478bd9Sstevel@tonic-gate  * by the fact that the kernel's vnode and the swapfs vnode are referenced
1247c478bd9Sstevel@tonic-gate  * frequently enough to warrent their own mutexes.
1257c478bd9Sstevel@tonic-gate  *
1267c478bd9Sstevel@tonic-gate  * The VP_HASH_FUNC returns the index into the vph_mutex array given
1277c478bd9Sstevel@tonic-gate  * an address of a vnode.
1287c478bd9Sstevel@tonic-gate  */
1297c478bd9Sstevel@tonic-gate 
1307c478bd9Sstevel@tonic-gate /*
1317c478bd9Sstevel@tonic-gate  * XX64	VPH_TABLE_SIZE and VP_HASH_FUNC might break in 64 bit world.
1327c478bd9Sstevel@tonic-gate  *	Need to review again.
1337c478bd9Sstevel@tonic-gate  */
134ac52b00eSqiao #if defined(_LP64)
135ac52b00eSqiao #define	VPH_TABLE_SIZE  (1 << (VP_SHIFT + 3))
136ac52b00eSqiao #else	/* 32 bits */
1377c478bd9Sstevel@tonic-gate #define	VPH_TABLE_SIZE	(2 << VP_SHIFT)
138ac52b00eSqiao #endif
1397c478bd9Sstevel@tonic-gate 
1407c478bd9Sstevel@tonic-gate #define	VP_HASH_FUNC(vp) \
1417c478bd9Sstevel@tonic-gate 	((((uintptr_t)(vp) >> 6) + \
1427c478bd9Sstevel@tonic-gate 	    ((uintptr_t)(vp) >> 8) + \
1437c478bd9Sstevel@tonic-gate 	    ((uintptr_t)(vp) >> 10) + \
1447c478bd9Sstevel@tonic-gate 	    ((uintptr_t)(vp) >> 12)) \
1457c478bd9Sstevel@tonic-gate 	    & (VPH_TABLE_SIZE - 1))
1467c478bd9Sstevel@tonic-gate 
147ad23a2dbSjohansen /*
148ad23a2dbSjohansen  * Two slots after VPH_TABLE_SIZE are reserved in vph_mutex for kernel vnodes.
149ad23a2dbSjohansen  * The lock for kvp is VPH_TABLE_SIZE + 0, and the lock for zvp is
150ad23a2dbSjohansen  * VPH_TABLE_SIZE + 1.
151ad23a2dbSjohansen  */
152ad23a2dbSjohansen 
1537c478bd9Sstevel@tonic-gate kmutex_t	vph_mutex[VPH_TABLE_SIZE + 2];
1547c478bd9Sstevel@tonic-gate 
1557c478bd9Sstevel@tonic-gate /*
1567c478bd9Sstevel@tonic-gate  * Initialize the locks used by the Virtual Memory Management system.
1577c478bd9Sstevel@tonic-gate  */
1587c478bd9Sstevel@tonic-gate void
1597c478bd9Sstevel@tonic-gate page_lock_init()
1607c478bd9Sstevel@tonic-gate {
1617c478bd9Sstevel@tonic-gate }
1627c478bd9Sstevel@tonic-gate 
163d7d93655Sblakej /*
164d7d93655Sblakej  * Return a value for pse_shift based on npg (the number of physical pages)
165d7d93655Sblakej  * and ncpu (the maximum number of CPUs).  This is called by platform startup
166d7d93655Sblakej  * code.
167d7d93655Sblakej  *
168d7d93655Sblakej  * Lockstat data from TPC-H runs showed that contention on the pse_mutex[]
169d7d93655Sblakej  * locks grew approximately as the square of the number of threads executing.
170d7d93655Sblakej  * So the primary scaling factor used is NCPU^2.  The size of the machine in
171d7d93655Sblakej  * megabytes is used as an upper bound, particularly for sun4v machines which
172d7d93655Sblakej  * all claim to have 256 CPUs maximum, and the old value of PSE_TABLE_SIZE
173d7d93655Sblakej  * (128) is used as a minimum.  Since the size of the table has to be a power
174d7d93655Sblakej  * of two, the calculated size is rounded up to the next power of two.
175d7d93655Sblakej  */
176d7d93655Sblakej /*ARGSUSED*/
177d7d93655Sblakej int
178d7d93655Sblakej size_pse_array(pgcnt_t npg, int ncpu)
179d7d93655Sblakej {
180d7d93655Sblakej 	size_t size;
181d7d93655Sblakej 	pgcnt_t pp_per_mb = (1024 * 1024) / PAGESIZE;
182d7d93655Sblakej 
183d7d93655Sblakej 	size = MAX(128, MIN(npg / pp_per_mb, 2 * ncpu * ncpu));
184d7d93655Sblakej 	size += (1 << (highbit(size) - 1)) - 1;
185d7d93655Sblakej 	return (highbit(size) - 1);
186d7d93655Sblakej }
187d7d93655Sblakej 
1887c478bd9Sstevel@tonic-gate /*
1897c478bd9Sstevel@tonic-gate  * At present we only use page ownership to aid debugging, so it's
1907c478bd9Sstevel@tonic-gate  * OK if the owner field isn't exact.  In the 32-bit world two thread ids
1917c478bd9Sstevel@tonic-gate  * can map to the same owner because we just 'or' in 0x80000000 and
1927c478bd9Sstevel@tonic-gate  * then clear the second highest bit, so that (for example) 0x2faced00
1937c478bd9Sstevel@tonic-gate  * and 0xafaced00 both map to 0xafaced00.
1947c478bd9Sstevel@tonic-gate  * In the 64-bit world, p_selock may not be large enough to hold a full
1957c478bd9Sstevel@tonic-gate  * thread pointer.  If we ever need precise ownership (e.g. if we implement
1967c478bd9Sstevel@tonic-gate  * priority inheritance for page locks) then p_selock should become a
1977c478bd9Sstevel@tonic-gate  * uintptr_t and SE_WRITER should be -((uintptr_t)curthread >> 2).
1987c478bd9Sstevel@tonic-gate  */
1997c478bd9Sstevel@tonic-gate #define	SE_WRITER	(((selock_t)(ulong_t)curthread | INT_MIN) & ~SE_EWANTED)
2007c478bd9Sstevel@tonic-gate #define	SE_READER	1
2017c478bd9Sstevel@tonic-gate 
2027c478bd9Sstevel@tonic-gate /*
2037c478bd9Sstevel@tonic-gate  * A page that is deleted must be marked as such using the
2047c478bd9Sstevel@tonic-gate  * page_lock_delete() function. The page must be exclusively locked.
2057c478bd9Sstevel@tonic-gate  * The SE_DELETED marker is put in p_selock when this function is called.
2067c478bd9Sstevel@tonic-gate  * SE_DELETED must be distinct from any SE_WRITER value.
2077c478bd9Sstevel@tonic-gate  */
2087c478bd9Sstevel@tonic-gate #define	SE_DELETED	(1 | INT_MIN)
2097c478bd9Sstevel@tonic-gate 
2107c478bd9Sstevel@tonic-gate #ifdef VM_STATS
2117c478bd9Sstevel@tonic-gate uint_t	vph_kvp_count;
2127c478bd9Sstevel@tonic-gate uint_t	vph_swapfsvp_count;
2137c478bd9Sstevel@tonic-gate uint_t	vph_other;
2147c478bd9Sstevel@tonic-gate #endif /* VM_STATS */
2157c478bd9Sstevel@tonic-gate 
2167c478bd9Sstevel@tonic-gate #ifdef VM_STATS
2177c478bd9Sstevel@tonic-gate uint_t	page_lock_count;
2187c478bd9Sstevel@tonic-gate uint_t	page_lock_miss;
2197c478bd9Sstevel@tonic-gate uint_t	page_lock_miss_lock;
2207c478bd9Sstevel@tonic-gate uint_t	page_lock_reclaim;
2217c478bd9Sstevel@tonic-gate uint_t	page_lock_bad_reclaim;
2227c478bd9Sstevel@tonic-gate uint_t	page_lock_same_page;
2237c478bd9Sstevel@tonic-gate uint_t	page_lock_upgrade;
224db874c57Selowe uint_t	page_lock_retired;
2257c478bd9Sstevel@tonic-gate uint_t	page_lock_upgrade_failed;
2267c478bd9Sstevel@tonic-gate uint_t	page_lock_deleted;
2277c478bd9Sstevel@tonic-gate 
2287c478bd9Sstevel@tonic-gate uint_t	page_trylock_locked;
229db874c57Selowe uint_t	page_trylock_failed;
2307c478bd9Sstevel@tonic-gate uint_t	page_trylock_missed;
2317c478bd9Sstevel@tonic-gate 
2327c478bd9Sstevel@tonic-gate uint_t	page_try_reclaim_upgrade;
2337c478bd9Sstevel@tonic-gate #endif /* VM_STATS */
2347c478bd9Sstevel@tonic-gate 
2357c478bd9Sstevel@tonic-gate /*
2367c478bd9Sstevel@tonic-gate  * Acquire the "shared/exclusive" lock on a page.
2377c478bd9Sstevel@tonic-gate  *
2387c478bd9Sstevel@tonic-gate  * Returns 1 on success and locks the page appropriately.
2397c478bd9Sstevel@tonic-gate  *	   0 on failure and does not lock the page.
2407c478bd9Sstevel@tonic-gate  *
2417c478bd9Sstevel@tonic-gate  * If `lock' is non-NULL, it will be dropped and reacquired in the
2427c478bd9Sstevel@tonic-gate  * failure case.  This routine can block, and if it does
2437c478bd9Sstevel@tonic-gate  * it will always return a failure since the page identity [vp, off]
2447c478bd9Sstevel@tonic-gate  * or state may have changed.
2457c478bd9Sstevel@tonic-gate  */
2467c478bd9Sstevel@tonic-gate 
2477c478bd9Sstevel@tonic-gate int
2487c478bd9Sstevel@tonic-gate page_lock(page_t *pp, se_t se, kmutex_t *lock, reclaim_t reclaim)
2497c478bd9Sstevel@tonic-gate {
2507c478bd9Sstevel@tonic-gate 	return (page_lock_es(pp, se, lock, reclaim, 0));
2517c478bd9Sstevel@tonic-gate }
2527c478bd9Sstevel@tonic-gate 
2537c478bd9Sstevel@tonic-gate /*
2547c478bd9Sstevel@tonic-gate  * With the addition of reader-writer lock semantics to page_lock_es,
2557c478bd9Sstevel@tonic-gate  * callers wanting an exclusive (writer) lock may prevent shared-lock
2567c478bd9Sstevel@tonic-gate  * (reader) starvation by setting the es parameter to SE_EXCL_WANTED.
2577c478bd9Sstevel@tonic-gate  * In this case, when an exclusive lock cannot be acquired, p_selock's
258db874c57Selowe  * SE_EWANTED bit is set. Shared-lock (reader) requests are also denied
259db874c57Selowe  * if the page is slated for retirement.
260db874c57Selowe  *
261db874c57Selowe  * The se and es parameters determine if the lock should be granted
262db874c57Selowe  * based on the following decision table:
263db874c57Selowe  *
264db874c57Selowe  * Lock wanted   es flags     p_selock/SE_EWANTED  Action
265db874c57Selowe  * ----------- -------------- -------------------  ---------
266db874c57Selowe  * SE_EXCL        any [1][2]   unlocked/any        grant lock, clear SE_EWANTED
267db874c57Selowe  * SE_EXCL        SE_EWANTED   any lock/any        deny, set SE_EWANTED
268db874c57Selowe  * SE_EXCL        none         any lock/any        deny
2698bc68872Selowe  * SE_SHARED      n/a [2]        shared/0          grant
2708bc68872Selowe  * SE_SHARED      n/a [2]      unlocked/0          grant
271db874c57Selowe  * SE_SHARED      n/a            shared/1          deny
272db874c57Selowe  * SE_SHARED      n/a          unlocked/1          deny
273db874c57Selowe  * SE_SHARED      n/a              excl/any        deny
274db874c57Selowe  *
275db874c57Selowe  * Notes:
276db874c57Selowe  * [1] The code grants an exclusive lock to the caller and clears the bit
277db874c57Selowe  *   SE_EWANTED whenever p_selock is unlocked, regardless of the SE_EWANTED
278db874c57Selowe  *   bit's value.  This was deemed acceptable as we are not concerned about
279db874c57Selowe  *   exclusive-lock starvation. If this ever becomes an issue, a priority or
280db874c57Selowe  *   fifo mechanism should also be implemented. Meantime, the thread that
281db874c57Selowe  *   set SE_EWANTED should be prepared to catch this condition and reset it
282db874c57Selowe  *
283db874c57Selowe  * [2] Retired pages may not be locked at any time, regardless of the
284db874c57Selowe  *   dispostion of se, unless the es parameter has SE_RETIRED flag set.
2857c478bd9Sstevel@tonic-gate  *
286db874c57Selowe  * Notes on values of "es":
287db874c57Selowe  *
288db874c57Selowe  *   es & 1: page_lookup_create will attempt page relocation
289db874c57Selowe  *   es & SE_EXCL_WANTED: caller wants SE_EWANTED set (eg. delete
290db874c57Selowe  *       memory thread); this prevents reader-starvation of waiting
291db874c57Selowe  *       writer thread(s) by giving priority to writers over readers.
292db874c57Selowe  *   es & SE_RETIRED: caller wants to lock pages even if they are
293db874c57Selowe  *       retired.  Default is to deny the lock if the page is retired.
294db874c57Selowe  *
295db874c57Selowe  * And yes, we know, the semantics of this function are too complicated.
296db874c57Selowe  * It's on the list to be cleaned up.
2977c478bd9Sstevel@tonic-gate  */
2987c478bd9Sstevel@tonic-gate int
2997c478bd9Sstevel@tonic-gate page_lock_es(page_t *pp, se_t se, kmutex_t *lock, reclaim_t reclaim, int es)
3007c478bd9Sstevel@tonic-gate {
3017c478bd9Sstevel@tonic-gate 	int		retval;
3027c478bd9Sstevel@tonic-gate 	kmutex_t	*pse = PAGE_SE_MUTEX(pp);
3037c478bd9Sstevel@tonic-gate 	int		upgraded;
3047c478bd9Sstevel@tonic-gate 	int		reclaim_it;
3057c478bd9Sstevel@tonic-gate 
3067c478bd9Sstevel@tonic-gate 	ASSERT(lock != NULL ? MUTEX_HELD(lock) : 1);
3077c478bd9Sstevel@tonic-gate 
3087c478bd9Sstevel@tonic-gate 	VM_STAT_ADD(page_lock_count);
3097c478bd9Sstevel@tonic-gate 
3107c478bd9Sstevel@tonic-gate 	upgraded = 0;
3117c478bd9Sstevel@tonic-gate 	reclaim_it = 0;
3127c478bd9Sstevel@tonic-gate 
3137c478bd9Sstevel@tonic-gate 	mutex_enter(pse);
3147c478bd9Sstevel@tonic-gate 
3157c478bd9Sstevel@tonic-gate 	ASSERT(((es & SE_EXCL_WANTED) == 0) ||
316db874c57Selowe 	    ((es & SE_EXCL_WANTED) && (se == SE_EXCL)));
317db874c57Selowe 
318db874c57Selowe 	if (PP_RETIRED(pp) && !(es & SE_RETIRED)) {
319db874c57Selowe 		mutex_exit(pse);
320db874c57Selowe 		VM_STAT_ADD(page_lock_retired);
321db874c57Selowe 		return (0);
322db874c57Selowe 	}
3237c478bd9Sstevel@tonic-gate 
3247c478bd9Sstevel@tonic-gate 	if (se == SE_SHARED && es == 1 && pp->p_selock == 0) {
3257c478bd9Sstevel@tonic-gate 		se = SE_EXCL;
3267c478bd9Sstevel@tonic-gate 	}
3277c478bd9Sstevel@tonic-gate 
3287c478bd9Sstevel@tonic-gate 	if ((reclaim == P_RECLAIM) && (PP_ISFREE(pp))) {
3297c478bd9Sstevel@tonic-gate 
3307c478bd9Sstevel@tonic-gate 		reclaim_it = 1;
3317c478bd9Sstevel@tonic-gate 		if (se == SE_SHARED) {
3327c478bd9Sstevel@tonic-gate 			/*
3337c478bd9Sstevel@tonic-gate 			 * This is an interesting situation.
3347c478bd9Sstevel@tonic-gate 			 *
3357c478bd9Sstevel@tonic-gate 			 * Remember that p_free can only change if
3367c478bd9Sstevel@tonic-gate 			 * p_selock < 0.
3377c478bd9Sstevel@tonic-gate 			 * p_free does not depend on our holding `pse'.
3387c478bd9Sstevel@tonic-gate 			 * And, since we hold `pse', p_selock can not change.
3397c478bd9Sstevel@tonic-gate 			 * So, if p_free changes on us, the page is already
3407c478bd9Sstevel@tonic-gate 			 * exclusively held, and we would fail to get p_selock
3417c478bd9Sstevel@tonic-gate 			 * regardless.
3427c478bd9Sstevel@tonic-gate 			 *
3437c478bd9Sstevel@tonic-gate 			 * We want to avoid getting the share
3447c478bd9Sstevel@tonic-gate 			 * lock on a free page that needs to be reclaimed.
3457c478bd9Sstevel@tonic-gate 			 * It is possible that some other thread has the share
3467c478bd9Sstevel@tonic-gate 			 * lock and has left the free page on the cache list.
3477c478bd9Sstevel@tonic-gate 			 * pvn_vplist_dirty() does this for brief periods.
3487c478bd9Sstevel@tonic-gate 			 * If the se_share is currently SE_EXCL, we will fail
3497c478bd9Sstevel@tonic-gate 			 * to acquire p_selock anyway.  Blocking is the
3507c478bd9Sstevel@tonic-gate 			 * right thing to do.
3517c478bd9Sstevel@tonic-gate 			 * If we need to reclaim this page, we must get
3527c478bd9Sstevel@tonic-gate 			 * exclusive access to it, force the upgrade now.
3537c478bd9Sstevel@tonic-gate 			 * Again, we will fail to acquire p_selock if the
3547c478bd9Sstevel@tonic-gate 			 * page is not free and block.
3557c478bd9Sstevel@tonic-gate 			 */
3567c478bd9Sstevel@tonic-gate 			upgraded = 1;
3577c478bd9Sstevel@tonic-gate 			se = SE_EXCL;
3587c478bd9Sstevel@tonic-gate 			VM_STAT_ADD(page_lock_upgrade);
3597c478bd9Sstevel@tonic-gate 		}
3607c478bd9Sstevel@tonic-gate 	}
3617c478bd9Sstevel@tonic-gate 
3627c478bd9Sstevel@tonic-gate 	if (se == SE_EXCL) {
363db874c57Selowe 		if (!(es & SE_EXCL_WANTED) && (pp->p_selock & SE_EWANTED)) {
3647c478bd9Sstevel@tonic-gate 			/*
3657c478bd9Sstevel@tonic-gate 			 * if the caller wants a writer lock (but did not
3667c478bd9Sstevel@tonic-gate 			 * specify exclusive access), and there is a pending
3677c478bd9Sstevel@tonic-gate 			 * writer that wants exclusive access, return failure
3687c478bd9Sstevel@tonic-gate 			 */
3697c478bd9Sstevel@tonic-gate 			retval = 0;
3707c478bd9Sstevel@tonic-gate 		} else if ((pp->p_selock & ~SE_EWANTED) == 0) {
3717c478bd9Sstevel@tonic-gate 			/* no reader/writer lock held */
3727c478bd9Sstevel@tonic-gate 			THREAD_KPRI_REQUEST();
3737c478bd9Sstevel@tonic-gate 			/* this clears our setting of the SE_EWANTED bit */
3747c478bd9Sstevel@tonic-gate 			pp->p_selock = SE_WRITER;
3757c478bd9Sstevel@tonic-gate 			retval = 1;
3767c478bd9Sstevel@tonic-gate 		} else {
3777c478bd9Sstevel@tonic-gate 			/* page is locked */
378db874c57Selowe 			if (es & SE_EXCL_WANTED) {
3797c478bd9Sstevel@tonic-gate 				/* set the SE_EWANTED bit */
3807c478bd9Sstevel@tonic-gate 				pp->p_selock |= SE_EWANTED;
3817c478bd9Sstevel@tonic-gate 			}
3827c478bd9Sstevel@tonic-gate 			retval = 0;
3837c478bd9Sstevel@tonic-gate 		}
3847c478bd9Sstevel@tonic-gate 	} else {
3857c478bd9Sstevel@tonic-gate 		retval = 0;
3867c478bd9Sstevel@tonic-gate 		if (pp->p_selock >= 0) {
387db874c57Selowe 			if ((pp->p_selock & SE_EWANTED) == 0) {
3888bc68872Selowe 				pp->p_selock += SE_READER;
3898bc68872Selowe 				retval = 1;
3907c478bd9Sstevel@tonic-gate 			}
3917c478bd9Sstevel@tonic-gate 		}
3927c478bd9Sstevel@tonic-gate 	}
3937c478bd9Sstevel@tonic-gate 
3947c478bd9Sstevel@tonic-gate 	if (retval == 0) {
3957c478bd9Sstevel@tonic-gate 		if ((pp->p_selock & ~SE_EWANTED) == SE_DELETED) {
3967c478bd9Sstevel@tonic-gate 			VM_STAT_ADD(page_lock_deleted);
3977c478bd9Sstevel@tonic-gate 			mutex_exit(pse);
3987c478bd9Sstevel@tonic-gate 			return (retval);
3997c478bd9Sstevel@tonic-gate 		}
4007c478bd9Sstevel@tonic-gate 
4017c478bd9Sstevel@tonic-gate #ifdef VM_STATS
4027c478bd9Sstevel@tonic-gate 		VM_STAT_ADD(page_lock_miss);
4037c478bd9Sstevel@tonic-gate 		if (upgraded) {
4047c478bd9Sstevel@tonic-gate 			VM_STAT_ADD(page_lock_upgrade_failed);
4057c478bd9Sstevel@tonic-gate 		}
4067c478bd9Sstevel@tonic-gate #endif
4077c478bd9Sstevel@tonic-gate 		if (lock) {
4087c478bd9Sstevel@tonic-gate 			VM_STAT_ADD(page_lock_miss_lock);
4097c478bd9Sstevel@tonic-gate 			mutex_exit(lock);
4107c478bd9Sstevel@tonic-gate 		}
4117c478bd9Sstevel@tonic-gate 
4127c478bd9Sstevel@tonic-gate 		/*
4137c478bd9Sstevel@tonic-gate 		 * Now, wait for the page to be unlocked and
4147c478bd9Sstevel@tonic-gate 		 * release the lock protecting p_cv and p_selock.
4157c478bd9Sstevel@tonic-gate 		 */
4167c478bd9Sstevel@tonic-gate 		cv_wait(&pp->p_cv, pse);
4177c478bd9Sstevel@tonic-gate 		mutex_exit(pse);
4187c478bd9Sstevel@tonic-gate 
4197c478bd9Sstevel@tonic-gate 		/*
4207c478bd9Sstevel@tonic-gate 		 * The page identity may have changed while we were
4217c478bd9Sstevel@tonic-gate 		 * blocked.  If we are willing to depend on "pp"
4227c478bd9Sstevel@tonic-gate 		 * still pointing to a valid page structure (i.e.,
4237c478bd9Sstevel@tonic-gate 		 * assuming page structures are not dynamically allocated
4247c478bd9Sstevel@tonic-gate 		 * or freed), we could try to lock the page if its
4257c478bd9Sstevel@tonic-gate 		 * identity hasn't changed.
4267c478bd9Sstevel@tonic-gate 		 *
4277c478bd9Sstevel@tonic-gate 		 * This needs to be measured, since we come back from
4287c478bd9Sstevel@tonic-gate 		 * cv_wait holding pse (the expensive part of this
4297c478bd9Sstevel@tonic-gate 		 * operation) we might as well try the cheap part.
4307c478bd9Sstevel@tonic-gate 		 * Though we would also have to confirm that dropping
4317c478bd9Sstevel@tonic-gate 		 * `lock' did not cause any grief to the callers.
4327c478bd9Sstevel@tonic-gate 		 */
4337c478bd9Sstevel@tonic-gate 		if (lock) {
4347c478bd9Sstevel@tonic-gate 			mutex_enter(lock);
4357c478bd9Sstevel@tonic-gate 		}
4367c478bd9Sstevel@tonic-gate 	} else {
4377c478bd9Sstevel@tonic-gate 		/*
4387c478bd9Sstevel@tonic-gate 		 * We have the page lock.
4397c478bd9Sstevel@tonic-gate 		 * If we needed to reclaim the page, and the page
4407c478bd9Sstevel@tonic-gate 		 * needed reclaiming (ie, it was free), then we
4417c478bd9Sstevel@tonic-gate 		 * have the page exclusively locked.  We may need
4427c478bd9Sstevel@tonic-gate 		 * to downgrade the page.
4437c478bd9Sstevel@tonic-gate 		 */
4447c478bd9Sstevel@tonic-gate 		ASSERT((upgraded) ?
4457c478bd9Sstevel@tonic-gate 		    ((PP_ISFREE(pp)) && PAGE_EXCL(pp)) : 1);
4467c478bd9Sstevel@tonic-gate 		mutex_exit(pse);
4477c478bd9Sstevel@tonic-gate 
4487c478bd9Sstevel@tonic-gate 		/*
4497c478bd9Sstevel@tonic-gate 		 * We now hold this page's lock, either shared or
4507c478bd9Sstevel@tonic-gate 		 * exclusive.  This will prevent its identity from changing.
4517c478bd9Sstevel@tonic-gate 		 * The page, however, may or may not be free.  If the caller
4527c478bd9Sstevel@tonic-gate 		 * requested, and it is free, go reclaim it from the
4537c478bd9Sstevel@tonic-gate 		 * free list.  If the page can't be reclaimed, return failure
4547c478bd9Sstevel@tonic-gate 		 * so that the caller can start all over again.
4557c478bd9Sstevel@tonic-gate 		 *
4567c478bd9Sstevel@tonic-gate 		 * NOTE:page_reclaim() releases the page lock (p_selock)
4577c478bd9Sstevel@tonic-gate 		 *	if it can't be reclaimed.
4587c478bd9Sstevel@tonic-gate 		 */
4597c478bd9Sstevel@tonic-gate 		if (reclaim_it) {
4607c478bd9Sstevel@tonic-gate 			if (!page_reclaim(pp, lock)) {
4617c478bd9Sstevel@tonic-gate 				VM_STAT_ADD(page_lock_bad_reclaim);
4627c478bd9Sstevel@tonic-gate 				retval = 0;
4637c478bd9Sstevel@tonic-gate 			} else {
4647c478bd9Sstevel@tonic-gate 				VM_STAT_ADD(page_lock_reclaim);
4657c478bd9Sstevel@tonic-gate 				if (upgraded) {
4667c478bd9Sstevel@tonic-gate 					page_downgrade(pp);
4677c478bd9Sstevel@tonic-gate 				}
4687c478bd9Sstevel@tonic-gate 			}
4697c478bd9Sstevel@tonic-gate 		}
4707c478bd9Sstevel@tonic-gate 	}
4717c478bd9Sstevel@tonic-gate 	return (retval);
4727c478bd9Sstevel@tonic-gate }
4737c478bd9Sstevel@tonic-gate 
4747c478bd9Sstevel@tonic-gate /*
4757c478bd9Sstevel@tonic-gate  * Clear the SE_EWANTED bit from p_selock.  This function allows
4767c478bd9Sstevel@tonic-gate  * callers of page_lock_es and page_try_reclaim_lock to clear
4777c478bd9Sstevel@tonic-gate  * their setting of this bit if they decide they no longer wish
4787c478bd9Sstevel@tonic-gate  * to gain exclusive access to the page.  Currently only
4797c478bd9Sstevel@tonic-gate  * delete_memory_thread uses this when the delete memory
4807c478bd9Sstevel@tonic-gate  * operation is cancelled.
4817c478bd9Sstevel@tonic-gate  */
4827c478bd9Sstevel@tonic-gate void
4837c478bd9Sstevel@tonic-gate page_lock_clr_exclwanted(page_t *pp)
4847c478bd9Sstevel@tonic-gate {
4857c478bd9Sstevel@tonic-gate 	kmutex_t *pse = PAGE_SE_MUTEX(pp);
4867c478bd9Sstevel@tonic-gate 
4877c478bd9Sstevel@tonic-gate 	mutex_enter(pse);
4887c478bd9Sstevel@tonic-gate 	pp->p_selock &= ~SE_EWANTED;
4897c478bd9Sstevel@tonic-gate 	if (CV_HAS_WAITERS(&pp->p_cv))
4907c478bd9Sstevel@tonic-gate 		cv_broadcast(&pp->p_cv);
4917c478bd9Sstevel@tonic-gate 	mutex_exit(pse);
4927c478bd9Sstevel@tonic-gate }
4937c478bd9Sstevel@tonic-gate 
4947c478bd9Sstevel@tonic-gate /*
4957c478bd9Sstevel@tonic-gate  * Read the comments inside of page_lock_es() carefully.
4967c478bd9Sstevel@tonic-gate  *
4977c478bd9Sstevel@tonic-gate  * SE_EXCL callers specifying es == SE_EXCL_WANTED will cause the
4987c478bd9Sstevel@tonic-gate  * SE_EWANTED bit of p_selock to be set when the lock cannot be obtained.
4997c478bd9Sstevel@tonic-gate  * This is used by threads subject to reader-starvation (eg. memory delete).
5007c478bd9Sstevel@tonic-gate  *
5017c478bd9Sstevel@tonic-gate  * When a thread using SE_EXCL_WANTED does not obtain the SE_EXCL lock,
5027c478bd9Sstevel@tonic-gate  * it is expected that it will retry at a later time.  Threads that will
5037c478bd9Sstevel@tonic-gate  * not retry the lock *must* call page_lock_clr_exclwanted to clear the
5047c478bd9Sstevel@tonic-gate  * SE_EWANTED bit.  (When a thread using SE_EXCL_WANTED obtains the lock,
5057c478bd9Sstevel@tonic-gate  * the bit is cleared.)
5067c478bd9Sstevel@tonic-gate  */
5077c478bd9Sstevel@tonic-gate int
5087c478bd9Sstevel@tonic-gate page_try_reclaim_lock(page_t *pp, se_t se, int es)
5097c478bd9Sstevel@tonic-gate {
5107c478bd9Sstevel@tonic-gate 	kmutex_t *pse = PAGE_SE_MUTEX(pp);
5117c478bd9Sstevel@tonic-gate 	selock_t old;
5127c478bd9Sstevel@tonic-gate 
5137c478bd9Sstevel@tonic-gate 	mutex_enter(pse);
5147c478bd9Sstevel@tonic-gate 
5157c478bd9Sstevel@tonic-gate 	old = pp->p_selock;
5167c478bd9Sstevel@tonic-gate 
5177c478bd9Sstevel@tonic-gate 	ASSERT(((es & SE_EXCL_WANTED) == 0) ||
518db874c57Selowe 	    ((es & SE_EXCL_WANTED) && (se == SE_EXCL)));
519db874c57Selowe 
520db874c57Selowe 	if (PP_RETIRED(pp) && !(es & SE_RETIRED)) {
521db874c57Selowe 		mutex_exit(pse);
522db874c57Selowe 		VM_STAT_ADD(page_trylock_failed);
523db874c57Selowe 		return (0);
524db874c57Selowe 	}
5257c478bd9Sstevel@tonic-gate 
5267c478bd9Sstevel@tonic-gate 	if (se == SE_SHARED && es == 1 && old == 0) {
5277c478bd9Sstevel@tonic-gate 		se = SE_EXCL;
5287c478bd9Sstevel@tonic-gate 	}
5297c478bd9Sstevel@tonic-gate 
5307c478bd9Sstevel@tonic-gate 	if (se == SE_SHARED) {
5317c478bd9Sstevel@tonic-gate 		if (!PP_ISFREE(pp)) {
5327c478bd9Sstevel@tonic-gate 			if (old >= 0) {
533db874c57Selowe 				/*
534db874c57Selowe 				 * Readers are not allowed when excl wanted
535db874c57Selowe 				 */
536db874c57Selowe 				if ((old & SE_EWANTED) == 0) {
5378bc68872Selowe 					pp->p_selock = old + SE_READER;
5388bc68872Selowe 					mutex_exit(pse);
5398bc68872Selowe 					return (1);
5407c478bd9Sstevel@tonic-gate 				}
5417c478bd9Sstevel@tonic-gate 			}
5427c478bd9Sstevel@tonic-gate 			mutex_exit(pse);
5437c478bd9Sstevel@tonic-gate 			return (0);
5447c478bd9Sstevel@tonic-gate 		}
5457c478bd9Sstevel@tonic-gate 		/*
5467c478bd9Sstevel@tonic-gate 		 * The page is free, so we really want SE_EXCL (below)
5477c478bd9Sstevel@tonic-gate 		 */
5487c478bd9Sstevel@tonic-gate 		VM_STAT_ADD(page_try_reclaim_upgrade);
5497c478bd9Sstevel@tonic-gate 	}
5507c478bd9Sstevel@tonic-gate 
5517c478bd9Sstevel@tonic-gate 	/*
5527c478bd9Sstevel@tonic-gate 	 * The caller wants a writer lock.  We try for it only if
5537c478bd9Sstevel@tonic-gate 	 * SE_EWANTED is not set, or if the caller specified
5547c478bd9Sstevel@tonic-gate 	 * SE_EXCL_WANTED.
5557c478bd9Sstevel@tonic-gate 	 */
556db874c57Selowe 	if (!(old & SE_EWANTED) || (es & SE_EXCL_WANTED)) {
5577c478bd9Sstevel@tonic-gate 		if ((old & ~SE_EWANTED) == 0) {
5587c478bd9Sstevel@tonic-gate 			/* no reader/writer lock held */
5597c478bd9Sstevel@tonic-gate 			THREAD_KPRI_REQUEST();
5607c478bd9Sstevel@tonic-gate 			/* this clears out our setting of the SE_EWANTED bit */
5617c478bd9Sstevel@tonic-gate 			pp->p_selock = SE_WRITER;
5627c478bd9Sstevel@tonic-gate 			mutex_exit(pse);
5637c478bd9Sstevel@tonic-gate 			return (1);
5647c478bd9Sstevel@tonic-gate 		}
5657c478bd9Sstevel@tonic-gate 	}
566db874c57Selowe 	if (es & SE_EXCL_WANTED) {
5677c478bd9Sstevel@tonic-gate 		/* page is locked, set the SE_EWANTED bit */
5687c478bd9Sstevel@tonic-gate 		pp->p_selock |= SE_EWANTED;
5697c478bd9Sstevel@tonic-gate 	}
5707c478bd9Sstevel@tonic-gate 	mutex_exit(pse);
5717c478bd9Sstevel@tonic-gate 	return (0);
5727c478bd9Sstevel@tonic-gate }
5737c478bd9Sstevel@tonic-gate 
5747c478bd9Sstevel@tonic-gate /*
5757c478bd9Sstevel@tonic-gate  * Acquire a page's "shared/exclusive" lock, but never block.
5767c478bd9Sstevel@tonic-gate  * Returns 1 on success, 0 on failure.
5777c478bd9Sstevel@tonic-gate  */
5787c478bd9Sstevel@tonic-gate int
5797c478bd9Sstevel@tonic-gate page_trylock(page_t *pp, se_t se)
5807c478bd9Sstevel@tonic-gate {
5817c478bd9Sstevel@tonic-gate 	kmutex_t *pse = PAGE_SE_MUTEX(pp);
5827c478bd9Sstevel@tonic-gate 
5837c478bd9Sstevel@tonic-gate 	mutex_enter(pse);
584db874c57Selowe 	if (pp->p_selock & SE_EWANTED || PP_RETIRED(pp) ||
58524e9c58bSelowe 	    (se == SE_SHARED && PP_PR_NOSHARE(pp))) {
586db874c57Selowe 		/*
587db874c57Selowe 		 * Fail if a thread wants exclusive access and page is
588db874c57Selowe 		 * retired, if the page is slated for retirement, or a
589db874c57Selowe 		 * share lock is requested.
590db874c57Selowe 		 */
5917c478bd9Sstevel@tonic-gate 		mutex_exit(pse);
592db874c57Selowe 		VM_STAT_ADD(page_trylock_failed);
5937c478bd9Sstevel@tonic-gate 		return (0);
5947c478bd9Sstevel@tonic-gate 	}
5957c478bd9Sstevel@tonic-gate 
5967c478bd9Sstevel@tonic-gate 	if (se == SE_EXCL) {
5977c478bd9Sstevel@tonic-gate 		if (pp->p_selock == 0) {
5987c478bd9Sstevel@tonic-gate 			THREAD_KPRI_REQUEST();
5997c478bd9Sstevel@tonic-gate 			pp->p_selock = SE_WRITER;
6007c478bd9Sstevel@tonic-gate 			mutex_exit(pse);
6017c478bd9Sstevel@tonic-gate 			return (1);
6027c478bd9Sstevel@tonic-gate 		}
6037c478bd9Sstevel@tonic-gate 	} else {
6047c478bd9Sstevel@tonic-gate 		if (pp->p_selock >= 0) {
6057c478bd9Sstevel@tonic-gate 			pp->p_selock += SE_READER;
6067c478bd9Sstevel@tonic-gate 			mutex_exit(pse);
6077c478bd9Sstevel@tonic-gate 			return (1);
6087c478bd9Sstevel@tonic-gate 		}
6097c478bd9Sstevel@tonic-gate 	}
6107c478bd9Sstevel@tonic-gate 	mutex_exit(pse);
6117c478bd9Sstevel@tonic-gate 	return (0);
6127c478bd9Sstevel@tonic-gate }
6137c478bd9Sstevel@tonic-gate 
614db874c57Selowe /*
615db874c57Selowe  * Variant of page_unlock() specifically for the page freelist
616db874c57Selowe  * code. The mere existence of this code is a vile hack that
617db874c57Selowe  * has resulted due to the backwards locking order of the page
618db874c57Selowe  * freelist manager; please don't call it.
619db874c57Selowe  */
620db874c57Selowe void
6218b464eb8Smec page_unlock_nocapture(page_t *pp)
622db874c57Selowe {
623db874c57Selowe 	kmutex_t *pse = PAGE_SE_MUTEX(pp);
624db874c57Selowe 	selock_t old;
625db874c57Selowe 
626db874c57Selowe 	mutex_enter(pse);
627db874c57Selowe 
628db874c57Selowe 	old = pp->p_selock;
629db874c57Selowe 	if ((old & ~SE_EWANTED) == SE_READER) {
630db874c57Selowe 		pp->p_selock = old & ~SE_READER;
631db874c57Selowe 		if (CV_HAS_WAITERS(&pp->p_cv))
632db874c57Selowe 			cv_broadcast(&pp->p_cv);
633db874c57Selowe 	} else if ((old & ~SE_EWANTED) == SE_DELETED) {
634903a11ebSrh 		panic("page_unlock_nocapture: page %p is deleted", (void *)pp);
635db874c57Selowe 	} else if (old < 0) {
636db874c57Selowe 		THREAD_KPRI_RELEASE();
637db874c57Selowe 		pp->p_selock &= SE_EWANTED;
638db874c57Selowe 		if (CV_HAS_WAITERS(&pp->p_cv))
639db874c57Selowe 			cv_broadcast(&pp->p_cv);
640db874c57Selowe 	} else if ((old & ~SE_EWANTED) > SE_READER) {
641db874c57Selowe 		pp->p_selock = old - SE_READER;
642db874c57Selowe 	} else {
643903a11ebSrh 		panic("page_unlock_nocapture: page %p is not locked",
644903a11ebSrh 		    (void *)pp);
645db874c57Selowe 	}
646db874c57Selowe 
647db874c57Selowe 	mutex_exit(pse);
648db874c57Selowe }
649db874c57Selowe 
6507c478bd9Sstevel@tonic-gate /*
6517c478bd9Sstevel@tonic-gate  * Release the page's "shared/exclusive" lock and wake up anyone
6527c478bd9Sstevel@tonic-gate  * who might be waiting for it.
6537c478bd9Sstevel@tonic-gate  */
6547c478bd9Sstevel@tonic-gate void
6557c478bd9Sstevel@tonic-gate page_unlock(page_t *pp)
6567c478bd9Sstevel@tonic-gate {
6577c478bd9Sstevel@tonic-gate 	kmutex_t *pse = PAGE_SE_MUTEX(pp);
6587c478bd9Sstevel@tonic-gate 	selock_t old;
6597c478bd9Sstevel@tonic-gate 
6607c478bd9Sstevel@tonic-gate 	mutex_enter(pse);
661db874c57Selowe 
6627c478bd9Sstevel@tonic-gate 	old = pp->p_selock;
6637c478bd9Sstevel@tonic-gate 	if ((old & ~SE_EWANTED) == SE_READER) {
6647c478bd9Sstevel@tonic-gate 		pp->p_selock = old & ~SE_READER;
6657c478bd9Sstevel@tonic-gate 		if (CV_HAS_WAITERS(&pp->p_cv))
6667c478bd9Sstevel@tonic-gate 			cv_broadcast(&pp->p_cv);
6677c478bd9Sstevel@tonic-gate 	} else if ((old & ~SE_EWANTED) == SE_DELETED) {
668903a11ebSrh 		panic("page_unlock: page %p is deleted", (void *)pp);
6697c478bd9Sstevel@tonic-gate 	} else if (old < 0) {
6707c478bd9Sstevel@tonic-gate 		THREAD_KPRI_RELEASE();
6717c478bd9Sstevel@tonic-gate 		pp->p_selock &= SE_EWANTED;
6727c478bd9Sstevel@tonic-gate 		if (CV_HAS_WAITERS(&pp->p_cv))
6737c478bd9Sstevel@tonic-gate 			cv_broadcast(&pp->p_cv);
6747c478bd9Sstevel@tonic-gate 	} else if ((old & ~SE_EWANTED) > SE_READER) {
6757c478bd9Sstevel@tonic-gate 		pp->p_selock = old - SE_READER;
6767c478bd9Sstevel@tonic-gate 	} else {
677903a11ebSrh 		panic("page_unlock: page %p is not locked", (void *)pp);
6787c478bd9Sstevel@tonic-gate 	}
679db874c57Selowe 
6808b464eb8Smec 	if (pp->p_selock == 0) {
681db874c57Selowe 		/*
6828b464eb8Smec 		 * If the T_CAPTURING bit is set, that means that we should
6838b464eb8Smec 		 * not try and capture the page again as we could recurse
6848b464eb8Smec 		 * which could lead to a stack overflow panic or spending a
6858b464eb8Smec 		 * relatively long time in the kernel making no progress.
686db874c57Selowe 		 */
6878b464eb8Smec 		if ((pp->p_toxic & PR_CAPTURE) &&
6888b464eb8Smec 		    !(curthread->t_flag & T_CAPTURING) &&
6898b464eb8Smec 		    !PP_RETIRED(pp)) {
690db874c57Selowe 			THREAD_KPRI_REQUEST();
691db874c57Selowe 			pp->p_selock = SE_WRITER;
692db874c57Selowe 			mutex_exit(pse);
6938b464eb8Smec 			page_unlock_capture(pp);
694db874c57Selowe 		} else {
695db874c57Selowe 			mutex_exit(pse);
696db874c57Selowe 		}
697db874c57Selowe 	} else {
698db874c57Selowe 		mutex_exit(pse);
699db874c57Selowe 	}
7007c478bd9Sstevel@tonic-gate }
7017c478bd9Sstevel@tonic-gate 
7027c478bd9Sstevel@tonic-gate /*
7037c478bd9Sstevel@tonic-gate  * Try to upgrade the lock on the page from a "shared" to an
7047c478bd9Sstevel@tonic-gate  * "exclusive" lock.  Since this upgrade operation is done while
7057c478bd9Sstevel@tonic-gate  * holding the mutex protecting this page, no one else can acquire this page's
7067c478bd9Sstevel@tonic-gate  * lock and change the page. Thus, it is safe to drop the "shared"
7077c478bd9Sstevel@tonic-gate  * lock and attempt to acquire the "exclusive" lock.
7087c478bd9Sstevel@tonic-gate  *
7097c478bd9Sstevel@tonic-gate  * Returns 1 on success, 0 on failure.
7107c478bd9Sstevel@tonic-gate  */
7117c478bd9Sstevel@tonic-gate int
7127c478bd9Sstevel@tonic-gate page_tryupgrade(page_t *pp)
7137c478bd9Sstevel@tonic-gate {
7147c478bd9Sstevel@tonic-gate 	kmutex_t *pse = PAGE_SE_MUTEX(pp);
7157c478bd9Sstevel@tonic-gate 
7167c478bd9Sstevel@tonic-gate 	mutex_enter(pse);
7177c478bd9Sstevel@tonic-gate 	if (!(pp->p_selock & SE_EWANTED)) {
7187c478bd9Sstevel@tonic-gate 		/* no threads want exclusive access, try upgrade */
7197c478bd9Sstevel@tonic-gate 		if (pp->p_selock == SE_READER) {
7207c478bd9Sstevel@tonic-gate 			THREAD_KPRI_REQUEST();
7217c478bd9Sstevel@tonic-gate 			/* convert to exclusive lock */
7227c478bd9Sstevel@tonic-gate 			pp->p_selock = SE_WRITER;
7237c478bd9Sstevel@tonic-gate 			mutex_exit(pse);
7247c478bd9Sstevel@tonic-gate 			return (1);
7257c478bd9Sstevel@tonic-gate 		}
7267c478bd9Sstevel@tonic-gate 	}
7277c478bd9Sstevel@tonic-gate 	mutex_exit(pse);
7287c478bd9Sstevel@tonic-gate 	return (0);
7297c478bd9Sstevel@tonic-gate }
7307c478bd9Sstevel@tonic-gate 
7317c478bd9Sstevel@tonic-gate /*
7327c478bd9Sstevel@tonic-gate  * Downgrade the "exclusive" lock on the page to a "shared" lock
7337c478bd9Sstevel@tonic-gate  * while holding the mutex protecting this page's p_selock field.
7347c478bd9Sstevel@tonic-gate  */
7357c478bd9Sstevel@tonic-gate void
7367c478bd9Sstevel@tonic-gate page_downgrade(page_t *pp)
7377c478bd9Sstevel@tonic-gate {
7387c478bd9Sstevel@tonic-gate 	kmutex_t *pse = PAGE_SE_MUTEX(pp);
7397c478bd9Sstevel@tonic-gate 	int excl_waiting;
7407c478bd9Sstevel@tonic-gate 
7417c478bd9Sstevel@tonic-gate 	ASSERT((pp->p_selock & ~SE_EWANTED) != SE_DELETED);
7427c478bd9Sstevel@tonic-gate 	ASSERT(PAGE_EXCL(pp));
7437c478bd9Sstevel@tonic-gate 
7447c478bd9Sstevel@tonic-gate 	mutex_enter(pse);
7457c478bd9Sstevel@tonic-gate 	excl_waiting =  pp->p_selock & SE_EWANTED;
7467c478bd9Sstevel@tonic-gate 	THREAD_KPRI_RELEASE();
7477c478bd9Sstevel@tonic-gate 	pp->p_selock = SE_READER | excl_waiting;
7487c478bd9Sstevel@tonic-gate 	if (CV_HAS_WAITERS(&pp->p_cv))
7497c478bd9Sstevel@tonic-gate 		cv_broadcast(&pp->p_cv);
7507c478bd9Sstevel@tonic-gate 	mutex_exit(pse);
7517c478bd9Sstevel@tonic-gate }
7527c478bd9Sstevel@tonic-gate 
7537c478bd9Sstevel@tonic-gate void
7547c478bd9Sstevel@tonic-gate page_lock_delete(page_t *pp)
7557c478bd9Sstevel@tonic-gate {
7567c478bd9Sstevel@tonic-gate 	kmutex_t *pse = PAGE_SE_MUTEX(pp);
7577c478bd9Sstevel@tonic-gate 
7587c478bd9Sstevel@tonic-gate 	ASSERT(PAGE_EXCL(pp));
7597c478bd9Sstevel@tonic-gate 	ASSERT(pp->p_vnode == NULL);
7607c478bd9Sstevel@tonic-gate 	ASSERT(pp->p_offset == (u_offset_t)-1);
7617c478bd9Sstevel@tonic-gate 	ASSERT(!PP_ISFREE(pp));
7627c478bd9Sstevel@tonic-gate 
7637c478bd9Sstevel@tonic-gate 	mutex_enter(pse);
7647c478bd9Sstevel@tonic-gate 	THREAD_KPRI_RELEASE();
7657c478bd9Sstevel@tonic-gate 	pp->p_selock = SE_DELETED;
7667c478bd9Sstevel@tonic-gate 	if (CV_HAS_WAITERS(&pp->p_cv))
7677c478bd9Sstevel@tonic-gate 		cv_broadcast(&pp->p_cv);
7687c478bd9Sstevel@tonic-gate 	mutex_exit(pse);
7697c478bd9Sstevel@tonic-gate }
7707c478bd9Sstevel@tonic-gate 
7718b464eb8Smec int
7728b464eb8Smec page_deleted(page_t *pp)
7738b464eb8Smec {
7748b464eb8Smec 	return (pp->p_selock == SE_DELETED);
7758b464eb8Smec }
7768b464eb8Smec 
7777c478bd9Sstevel@tonic-gate /*
7787c478bd9Sstevel@tonic-gate  * Implement the io lock for pages
7797c478bd9Sstevel@tonic-gate  */
7807c478bd9Sstevel@tonic-gate void
7817c478bd9Sstevel@tonic-gate page_iolock_init(page_t *pp)
7827c478bd9Sstevel@tonic-gate {
7837c478bd9Sstevel@tonic-gate 	pp->p_iolock_state = 0;
7847c478bd9Sstevel@tonic-gate 	cv_init(&pp->p_io_cv, NULL, CV_DEFAULT, NULL);
7857c478bd9Sstevel@tonic-gate }
7867c478bd9Sstevel@tonic-gate 
7877c478bd9Sstevel@tonic-gate /*
7887c478bd9Sstevel@tonic-gate  * Acquire the i/o lock on a page.
7897c478bd9Sstevel@tonic-gate  */
7907c478bd9Sstevel@tonic-gate void
7917c478bd9Sstevel@tonic-gate page_io_lock(page_t *pp)
7927c478bd9Sstevel@tonic-gate {
7937c478bd9Sstevel@tonic-gate 	kmutex_t *pio;
7947c478bd9Sstevel@tonic-gate 
7957c478bd9Sstevel@tonic-gate 	pio = PAGE_IO_MUTEX(pp);
7967c478bd9Sstevel@tonic-gate 	mutex_enter(pio);
7977c478bd9Sstevel@tonic-gate 	while (pp->p_iolock_state & PAGE_IO_INUSE) {
7987c478bd9Sstevel@tonic-gate 		cv_wait(&(pp->p_io_cv), pio);
7997c478bd9Sstevel@tonic-gate 	}
8007c478bd9Sstevel@tonic-gate 	pp->p_iolock_state |= PAGE_IO_INUSE;
8017c478bd9Sstevel@tonic-gate 	mutex_exit(pio);
8027c478bd9Sstevel@tonic-gate }
8037c478bd9Sstevel@tonic-gate 
8047c478bd9Sstevel@tonic-gate /*
8057c478bd9Sstevel@tonic-gate  * Release the i/o lock on a page.
8067c478bd9Sstevel@tonic-gate  */
8077c478bd9Sstevel@tonic-gate void
8087c478bd9Sstevel@tonic-gate page_io_unlock(page_t *pp)
8097c478bd9Sstevel@tonic-gate {
8107c478bd9Sstevel@tonic-gate 	kmutex_t *pio;
8117c478bd9Sstevel@tonic-gate 
8127c478bd9Sstevel@tonic-gate 	pio = PAGE_IO_MUTEX(pp);
8137c478bd9Sstevel@tonic-gate 	mutex_enter(pio);
814a71e32b6Sstans 	cv_broadcast(&pp->p_io_cv);
8157c478bd9Sstevel@tonic-gate 	pp->p_iolock_state &= ~PAGE_IO_INUSE;
8167c478bd9Sstevel@tonic-gate 	mutex_exit(pio);
8177c478bd9Sstevel@tonic-gate }
8187c478bd9Sstevel@tonic-gate 
8197c478bd9Sstevel@tonic-gate /*
8207c478bd9Sstevel@tonic-gate  * Try to acquire the i/o lock on a page without blocking.
8217c478bd9Sstevel@tonic-gate  * Returns 1 on success, 0 on failure.
8227c478bd9Sstevel@tonic-gate  */
8237c478bd9Sstevel@tonic-gate int
8247c478bd9Sstevel@tonic-gate page_io_trylock(page_t *pp)
8257c478bd9Sstevel@tonic-gate {
8267c478bd9Sstevel@tonic-gate 	kmutex_t *pio;
8277c478bd9Sstevel@tonic-gate 
8287c478bd9Sstevel@tonic-gate 	if (pp->p_iolock_state & PAGE_IO_INUSE)
8297c478bd9Sstevel@tonic-gate 		return (0);
8307c478bd9Sstevel@tonic-gate 
8317c478bd9Sstevel@tonic-gate 	pio = PAGE_IO_MUTEX(pp);
8327c478bd9Sstevel@tonic-gate 	mutex_enter(pio);
8337c478bd9Sstevel@tonic-gate 
8347c478bd9Sstevel@tonic-gate 	if (pp->p_iolock_state & PAGE_IO_INUSE) {
8357c478bd9Sstevel@tonic-gate 		mutex_exit(pio);
8367c478bd9Sstevel@tonic-gate 		return (0);
8377c478bd9Sstevel@tonic-gate 	}
8387c478bd9Sstevel@tonic-gate 	pp->p_iolock_state |= PAGE_IO_INUSE;
8397c478bd9Sstevel@tonic-gate 	mutex_exit(pio);
8407c478bd9Sstevel@tonic-gate 
8417c478bd9Sstevel@tonic-gate 	return (1);
8427c478bd9Sstevel@tonic-gate }
8437c478bd9Sstevel@tonic-gate 
844a71e32b6Sstans /*
845a71e32b6Sstans  * Wait until the i/o lock is not held.
846a71e32b6Sstans  */
847a71e32b6Sstans void
848a71e32b6Sstans page_io_wait(page_t *pp)
849a71e32b6Sstans {
850a71e32b6Sstans 	kmutex_t *pio;
851a71e32b6Sstans 
852a71e32b6Sstans 	pio = PAGE_IO_MUTEX(pp);
853a71e32b6Sstans 	mutex_enter(pio);
854a71e32b6Sstans 	while (pp->p_iolock_state & PAGE_IO_INUSE) {
855a71e32b6Sstans 		cv_wait(&(pp->p_io_cv), pio);
856a71e32b6Sstans 	}
857a71e32b6Sstans 	mutex_exit(pio);
858a71e32b6Sstans }
859a71e32b6Sstans 
860a71e32b6Sstans /*
861a71e32b6Sstans  * Returns 1 on success, 0 on failure.
862a71e32b6Sstans  */
863a71e32b6Sstans int
864a71e32b6Sstans page_io_locked(page_t *pp)
865a71e32b6Sstans {
866a71e32b6Sstans 	return (pp->p_iolock_state & PAGE_IO_INUSE);
867a71e32b6Sstans }
868a71e32b6Sstans 
8697c478bd9Sstevel@tonic-gate /*
8707c478bd9Sstevel@tonic-gate  * Assert that the i/o lock on a page is held.
8717c478bd9Sstevel@tonic-gate  * Returns 1 on success, 0 on failure.
8727c478bd9Sstevel@tonic-gate  */
8737c478bd9Sstevel@tonic-gate int
8747c478bd9Sstevel@tonic-gate page_iolock_assert(page_t *pp)
8757c478bd9Sstevel@tonic-gate {
876a71e32b6Sstans 	return (page_io_locked(pp));
8777c478bd9Sstevel@tonic-gate }
8787c478bd9Sstevel@tonic-gate 
8797c478bd9Sstevel@tonic-gate /*
8807c478bd9Sstevel@tonic-gate  * Wrapper exported to kernel routines that are built
8817c478bd9Sstevel@tonic-gate  * platform-independent (the macro is platform-dependent;
8827c478bd9Sstevel@tonic-gate  * the size of vph_mutex[] is based on NCPU).
8837c478bd9Sstevel@tonic-gate  *
8847c478bd9Sstevel@tonic-gate  * Note that you can do stress testing on this by setting the
8857c478bd9Sstevel@tonic-gate  * variable page_vnode_mutex_stress to something other than
8867c478bd9Sstevel@tonic-gate  * zero in a DEBUG kernel in a debugger after loading the kernel.
8877c478bd9Sstevel@tonic-gate  * Setting it after the kernel is running may not work correctly.
8887c478bd9Sstevel@tonic-gate  */
8897c478bd9Sstevel@tonic-gate #ifdef DEBUG
8907c478bd9Sstevel@tonic-gate static int page_vnode_mutex_stress = 0;
8917c478bd9Sstevel@tonic-gate #endif
8927c478bd9Sstevel@tonic-gate 
8937c478bd9Sstevel@tonic-gate kmutex_t *
8947c478bd9Sstevel@tonic-gate page_vnode_mutex(vnode_t *vp)
8957c478bd9Sstevel@tonic-gate {
8967c478bd9Sstevel@tonic-gate 	if (vp == &kvp)
8977c478bd9Sstevel@tonic-gate 		return (&vph_mutex[VPH_TABLE_SIZE + 0]);
898ad23a2dbSjohansen 
899ad23a2dbSjohansen 	if (vp == &zvp)
900ad23a2dbSjohansen 		return (&vph_mutex[VPH_TABLE_SIZE + 1]);
9017c478bd9Sstevel@tonic-gate #ifdef DEBUG
9027c478bd9Sstevel@tonic-gate 	if (page_vnode_mutex_stress != 0)
9037c478bd9Sstevel@tonic-gate 		return (&vph_mutex[0]);
9047c478bd9Sstevel@tonic-gate #endif
9057c478bd9Sstevel@tonic-gate 
9067c478bd9Sstevel@tonic-gate 	return (&vph_mutex[VP_HASH_FUNC(vp)]);
9077c478bd9Sstevel@tonic-gate }
9087c478bd9Sstevel@tonic-gate 
9097c478bd9Sstevel@tonic-gate kmutex_t *
9107c478bd9Sstevel@tonic-gate page_se_mutex(page_t *pp)
9117c478bd9Sstevel@tonic-gate {
9127c478bd9Sstevel@tonic-gate 	return (PAGE_SE_MUTEX(pp));
9137c478bd9Sstevel@tonic-gate }
9147c478bd9Sstevel@tonic-gate 
9157c478bd9Sstevel@tonic-gate #ifdef VM_STATS
9167c478bd9Sstevel@tonic-gate uint_t pszclck_stat[4];
9177c478bd9Sstevel@tonic-gate #endif
9187c478bd9Sstevel@tonic-gate /*
9197c478bd9Sstevel@tonic-gate  * Find, take and return a mutex held by hat_page_demote().
9207c478bd9Sstevel@tonic-gate  * Called by page_demote_vp_pages() before hat_page_demote() call and by
9217c478bd9Sstevel@tonic-gate  * routines that want to block hat_page_demote() but can't do it
9227c478bd9Sstevel@tonic-gate  * via locking all constituent pages.
9237c478bd9Sstevel@tonic-gate  *
9247c478bd9Sstevel@tonic-gate  * Return NULL if p_szc is 0.
9257c478bd9Sstevel@tonic-gate  *
9267c478bd9Sstevel@tonic-gate  * It should only be used for pages that can be demoted by hat_page_demote()
9277c478bd9Sstevel@tonic-gate  * i.e. non swapfs file system pages.  The logic here is lifted from
9287c478bd9Sstevel@tonic-gate  * sfmmu_mlspl_enter() except there's no need to worry about p_szc increase
9297c478bd9Sstevel@tonic-gate  * since the page is locked and not free.
9307c478bd9Sstevel@tonic-gate  *
9317c478bd9Sstevel@tonic-gate  * Hash of the root page is used to find the lock.
9327c478bd9Sstevel@tonic-gate  * To find the root in the presense of hat_page_demote() chageing the location
9337c478bd9Sstevel@tonic-gate  * of the root this routine relies on the fact that hat_page_demote() changes
9347c478bd9Sstevel@tonic-gate  * root last.
9357c478bd9Sstevel@tonic-gate  *
9367c478bd9Sstevel@tonic-gate  * If NULL is returned pp's p_szc is guaranteed to be 0. If non NULL is
9377c478bd9Sstevel@tonic-gate  * returned pp's p_szc may be any value.
9387c478bd9Sstevel@tonic-gate  */
9397c478bd9Sstevel@tonic-gate kmutex_t *
9407c478bd9Sstevel@tonic-gate page_szc_lock(page_t *pp)
9417c478bd9Sstevel@tonic-gate {
9427c478bd9Sstevel@tonic-gate 	kmutex_t	*mtx;
9437c478bd9Sstevel@tonic-gate 	page_t		*rootpp;
9447c478bd9Sstevel@tonic-gate 	uint_t		szc;
9457c478bd9Sstevel@tonic-gate 	uint_t		rszc;
9467c478bd9Sstevel@tonic-gate 	uint_t		pszc = pp->p_szc;
9477c478bd9Sstevel@tonic-gate 
9487c478bd9Sstevel@tonic-gate 	ASSERT(pp != NULL);
9497c478bd9Sstevel@tonic-gate 	ASSERT(PAGE_LOCKED(pp));
9507c478bd9Sstevel@tonic-gate 	ASSERT(!PP_ISFREE(pp));
9517c478bd9Sstevel@tonic-gate 	ASSERT(pp->p_vnode != NULL);
9527c478bd9Sstevel@tonic-gate 	ASSERT(!IS_SWAPFSVP(pp->p_vnode));
953ad23a2dbSjohansen 	ASSERT(!PP_ISKAS(pp));
9547c478bd9Sstevel@tonic-gate 
9557c478bd9Sstevel@tonic-gate again:
9567c478bd9Sstevel@tonic-gate 	if (pszc == 0) {
9577c478bd9Sstevel@tonic-gate 		VM_STAT_ADD(pszclck_stat[0]);
9587c478bd9Sstevel@tonic-gate 		return (NULL);
9597c478bd9Sstevel@tonic-gate 	}
9607c478bd9Sstevel@tonic-gate 
9617c478bd9Sstevel@tonic-gate 	/* The lock lives in the root page */
9627c478bd9Sstevel@tonic-gate 
9637c478bd9Sstevel@tonic-gate 	rootpp = PP_GROUPLEADER(pp, pszc);
9647c478bd9Sstevel@tonic-gate 	mtx = PAGE_SZC_MUTEX(rootpp);
9657c478bd9Sstevel@tonic-gate 	mutex_enter(mtx);
9667c478bd9Sstevel@tonic-gate 
9677c478bd9Sstevel@tonic-gate 	/*
9687c478bd9Sstevel@tonic-gate 	 * since p_szc can only decrease if pp == rootpp
9697c478bd9Sstevel@tonic-gate 	 * rootpp will be always the same i.e we have the right root
9707c478bd9Sstevel@tonic-gate 	 * regardless of rootpp->p_szc.
9717c478bd9Sstevel@tonic-gate 	 * If location of pp's root didn't change after we took
9727c478bd9Sstevel@tonic-gate 	 * the lock we have the right root. return mutex hashed off it.
9737c478bd9Sstevel@tonic-gate 	 */
9747c478bd9Sstevel@tonic-gate 	if (pp == rootpp || (rszc = rootpp->p_szc) == pszc) {
9757c478bd9Sstevel@tonic-gate 		VM_STAT_ADD(pszclck_stat[1]);
9767c478bd9Sstevel@tonic-gate 		return (mtx);
9777c478bd9Sstevel@tonic-gate 	}
9787c478bd9Sstevel@tonic-gate 
9797c478bd9Sstevel@tonic-gate 	/*
9807c478bd9Sstevel@tonic-gate 	 * root location changed because page got demoted.
9817c478bd9Sstevel@tonic-gate 	 * locate the new root.
9827c478bd9Sstevel@tonic-gate 	 */
9837c478bd9Sstevel@tonic-gate 	if (rszc < pszc) {
9847c478bd9Sstevel@tonic-gate 		szc = pp->p_szc;
9857c478bd9Sstevel@tonic-gate 		ASSERT(szc < pszc);
9867c478bd9Sstevel@tonic-gate 		mutex_exit(mtx);
9877c478bd9Sstevel@tonic-gate 		pszc = szc;
9887c478bd9Sstevel@tonic-gate 		VM_STAT_ADD(pszclck_stat[2]);
9897c478bd9Sstevel@tonic-gate 		goto again;
9907c478bd9Sstevel@tonic-gate 	}
9917c478bd9Sstevel@tonic-gate 
9927c478bd9Sstevel@tonic-gate 	VM_STAT_ADD(pszclck_stat[3]);
9937c478bd9Sstevel@tonic-gate 	/*
9947c478bd9Sstevel@tonic-gate 	 * current hat_page_demote not done yet.
9957c478bd9Sstevel@tonic-gate 	 * wait for it to finish.
9967c478bd9Sstevel@tonic-gate 	 */
9977c478bd9Sstevel@tonic-gate 	mutex_exit(mtx);
9987c478bd9Sstevel@tonic-gate 	rootpp = PP_GROUPLEADER(rootpp, rszc);
9997c478bd9Sstevel@tonic-gate 	mtx = PAGE_SZC_MUTEX(rootpp);
10007c478bd9Sstevel@tonic-gate 	mutex_enter(mtx);
10017c478bd9Sstevel@tonic-gate 	mutex_exit(mtx);
10027c478bd9Sstevel@tonic-gate 	ASSERT(rootpp->p_szc < rszc);
10037c478bd9Sstevel@tonic-gate 	goto again;
10047c478bd9Sstevel@tonic-gate }
10057c478bd9Sstevel@tonic-gate 
10067c478bd9Sstevel@tonic-gate int
10077c478bd9Sstevel@tonic-gate page_szc_lock_assert(page_t *pp)
10087c478bd9Sstevel@tonic-gate {
10097c478bd9Sstevel@tonic-gate 	page_t *rootpp = PP_PAGEROOT(pp);
10107c478bd9Sstevel@tonic-gate 	kmutex_t *mtx = PAGE_SZC_MUTEX(rootpp);
10117c478bd9Sstevel@tonic-gate 
10127c478bd9Sstevel@tonic-gate 	return (MUTEX_HELD(mtx));
10137c478bd9Sstevel@tonic-gate }
1014ae115bc7Smrj 
1015ae115bc7Smrj /*
1016ae115bc7Smrj  * memseg locking
1017ae115bc7Smrj  */
1018ae115bc7Smrj static krwlock_t memsegslock;
1019ae115bc7Smrj 
1020ae115bc7Smrj /*
1021ae115bc7Smrj  * memlist (phys_install, phys_avail) locking.
1022ae115bc7Smrj  */
1023ae115bc7Smrj static krwlock_t memlists_lock;
1024ae115bc7Smrj 
1025*af4c679fSSean McEnroe int
1026*af4c679fSSean McEnroe memsegs_trylock(int writer)
1027*af4c679fSSean McEnroe {
1028*af4c679fSSean McEnroe 	return (rw_tryenter(&memsegslock, writer ? RW_WRITER : RW_READER));
1029*af4c679fSSean McEnroe }
1030*af4c679fSSean McEnroe 
1031ae115bc7Smrj void
1032ae115bc7Smrj memsegs_lock(int writer)
1033ae115bc7Smrj {
1034ae115bc7Smrj 	rw_enter(&memsegslock, writer ? RW_WRITER : RW_READER);
1035ae115bc7Smrj }
1036ae115bc7Smrj 
1037ae115bc7Smrj /*ARGSUSED*/
1038ae115bc7Smrj void
1039ae115bc7Smrj memsegs_unlock(int writer)
1040ae115bc7Smrj {
1041ae115bc7Smrj 	rw_exit(&memsegslock);
1042ae115bc7Smrj }
1043ae115bc7Smrj 
1044ae115bc7Smrj int
1045ae115bc7Smrj memsegs_lock_held(void)
1046ae115bc7Smrj {
1047ae115bc7Smrj 	return (RW_LOCK_HELD(&memsegslock));
1048ae115bc7Smrj }
1049ae115bc7Smrj 
1050ae115bc7Smrj void
1051ae115bc7Smrj memlist_read_lock(void)
1052ae115bc7Smrj {
1053ae115bc7Smrj 	rw_enter(&memlists_lock, RW_READER);
1054ae115bc7Smrj }
1055ae115bc7Smrj 
1056ae115bc7Smrj void
1057ae115bc7Smrj memlist_read_unlock(void)
1058ae115bc7Smrj {
1059ae115bc7Smrj 	rw_exit(&memlists_lock);
1060ae115bc7Smrj }
1061ae115bc7Smrj 
1062ae115bc7Smrj void
1063ae115bc7Smrj memlist_write_lock(void)
1064ae115bc7Smrj {
1065ae115bc7Smrj 	rw_enter(&memlists_lock, RW_WRITER);
1066ae115bc7Smrj }
1067ae115bc7Smrj 
1068ae115bc7Smrj void
1069ae115bc7Smrj memlist_write_unlock(void)
1070ae115bc7Smrj {
1071ae115bc7Smrj 	rw_exit(&memlists_lock);
1072ae115bc7Smrj }
1073