1*7c478bd9Sstevel@tonic-gate /* 2*7c478bd9Sstevel@tonic-gate * CDDL HEADER START 3*7c478bd9Sstevel@tonic-gate * 4*7c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the 5*7c478bd9Sstevel@tonic-gate * Common Development and Distribution License, Version 1.0 only 6*7c478bd9Sstevel@tonic-gate * (the "License"). You may not use this file except in compliance 7*7c478bd9Sstevel@tonic-gate * with the License. 8*7c478bd9Sstevel@tonic-gate * 9*7c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10*7c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 11*7c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions 12*7c478bd9Sstevel@tonic-gate * and limitations under the License. 13*7c478bd9Sstevel@tonic-gate * 14*7c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 15*7c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16*7c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 17*7c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 18*7c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 19*7c478bd9Sstevel@tonic-gate * 20*7c478bd9Sstevel@tonic-gate * CDDL HEADER END 21*7c478bd9Sstevel@tonic-gate */ 22*7c478bd9Sstevel@tonic-gate /* 23*7c478bd9Sstevel@tonic-gate * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24*7c478bd9Sstevel@tonic-gate * Use is subject to license terms. 25*7c478bd9Sstevel@tonic-gate */ 26*7c478bd9Sstevel@tonic-gate 27*7c478bd9Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 28*7c478bd9Sstevel@tonic-gate 29*7c478bd9Sstevel@tonic-gate /* 30*7c478bd9Sstevel@tonic-gate * VM - page locking primitives 31*7c478bd9Sstevel@tonic-gate */ 32*7c478bd9Sstevel@tonic-gate #include <sys/param.h> 33*7c478bd9Sstevel@tonic-gate #include <sys/t_lock.h> 34*7c478bd9Sstevel@tonic-gate #include <sys/vtrace.h> 35*7c478bd9Sstevel@tonic-gate #include <sys/debug.h> 36*7c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h> 37*7c478bd9Sstevel@tonic-gate #include <sys/vnode.h> 38*7c478bd9Sstevel@tonic-gate #include <sys/bitmap.h> 39*7c478bd9Sstevel@tonic-gate #include <sys/lockstat.h> 40*7c478bd9Sstevel@tonic-gate #include <sys/condvar_impl.h> 41*7c478bd9Sstevel@tonic-gate #include <vm/page.h> 42*7c478bd9Sstevel@tonic-gate #include <vm/seg_enum.h> 43*7c478bd9Sstevel@tonic-gate #include <vm/vm_dep.h> 44*7c478bd9Sstevel@tonic-gate 45*7c478bd9Sstevel@tonic-gate /* 46*7c478bd9Sstevel@tonic-gate * This global mutex is for logical page locking. 47*7c478bd9Sstevel@tonic-gate * The following fields in the page structure are protected 48*7c478bd9Sstevel@tonic-gate * by this lock: 49*7c478bd9Sstevel@tonic-gate * 50*7c478bd9Sstevel@tonic-gate * p_lckcnt 51*7c478bd9Sstevel@tonic-gate * p_cowcnt 52*7c478bd9Sstevel@tonic-gate */ 53*7c478bd9Sstevel@tonic-gate kmutex_t page_llock; 54*7c478bd9Sstevel@tonic-gate 55*7c478bd9Sstevel@tonic-gate /* 56*7c478bd9Sstevel@tonic-gate * This is a global lock for the logical page free list. The 57*7c478bd9Sstevel@tonic-gate * logical free list, in this implementation, is maintained as two 58*7c478bd9Sstevel@tonic-gate * separate physical lists - the cache list and the free list. 59*7c478bd9Sstevel@tonic-gate */ 60*7c478bd9Sstevel@tonic-gate kmutex_t page_freelock; 61*7c478bd9Sstevel@tonic-gate 62*7c478bd9Sstevel@tonic-gate /* 63*7c478bd9Sstevel@tonic-gate * The hash table, page_hash[], the p_selock fields, and the 64*7c478bd9Sstevel@tonic-gate * list of pages associated with vnodes are protected by arrays of mutexes. 65*7c478bd9Sstevel@tonic-gate * 66*7c478bd9Sstevel@tonic-gate * Unless the hashes are changed radically, the table sizes must be 67*7c478bd9Sstevel@tonic-gate * a power of two. Also, we typically need more mutexes for the 68*7c478bd9Sstevel@tonic-gate * vnodes since these locks are occasionally held for long periods. 69*7c478bd9Sstevel@tonic-gate * And since there seem to be two special vnodes (kvp and swapvp), 70*7c478bd9Sstevel@tonic-gate * we make room for private mutexes for them. 71*7c478bd9Sstevel@tonic-gate * 72*7c478bd9Sstevel@tonic-gate * The pse_mutex[] array holds the mutexes to protect the p_selock 73*7c478bd9Sstevel@tonic-gate * fields of all page_t structures. 74*7c478bd9Sstevel@tonic-gate * 75*7c478bd9Sstevel@tonic-gate * PAGE_SE_MUTEX(pp) returns the address of the appropriate mutex 76*7c478bd9Sstevel@tonic-gate * when given a pointer to a page_t. 77*7c478bd9Sstevel@tonic-gate * 78*7c478bd9Sstevel@tonic-gate * PSE_TABLE_SIZE must be a power of two. One could argue that we 79*7c478bd9Sstevel@tonic-gate * should go to the trouble of setting it up at run time and base it 80*7c478bd9Sstevel@tonic-gate * on memory size rather than the number of compile time CPUs. 81*7c478bd9Sstevel@tonic-gate * 82*7c478bd9Sstevel@tonic-gate * XX64 We should be using physmem size to calculate PSE_TABLE_SIZE, 83*7c478bd9Sstevel@tonic-gate * PSE_SHIFT, PIO_SHIFT. 84*7c478bd9Sstevel@tonic-gate * 85*7c478bd9Sstevel@tonic-gate * These might break in 64 bit world. 86*7c478bd9Sstevel@tonic-gate */ 87*7c478bd9Sstevel@tonic-gate #define PSE_SHIFT 7 /* log2(PSE_TABLE_SIZE) */ 88*7c478bd9Sstevel@tonic-gate 89*7c478bd9Sstevel@tonic-gate #define PSE_TABLE_SIZE 128 /* number of mutexes to have */ 90*7c478bd9Sstevel@tonic-gate 91*7c478bd9Sstevel@tonic-gate #define PIO_SHIFT PSE_SHIFT /* next power of 2 bigger than page_t */ 92*7c478bd9Sstevel@tonic-gate #define PIO_TABLE_SIZE PSE_TABLE_SIZE /* number of io mutexes to have */ 93*7c478bd9Sstevel@tonic-gate 94*7c478bd9Sstevel@tonic-gate pad_mutex_t ph_mutex[PH_TABLE_SIZE]; 95*7c478bd9Sstevel@tonic-gate pad_mutex_t pse_mutex[PSE_TABLE_SIZE]; 96*7c478bd9Sstevel@tonic-gate kmutex_t pio_mutex[PIO_TABLE_SIZE]; 97*7c478bd9Sstevel@tonic-gate 98*7c478bd9Sstevel@tonic-gate #define PAGE_SE_MUTEX(pp) \ 99*7c478bd9Sstevel@tonic-gate &pse_mutex[((((uintptr_t)(pp) >> PSE_SHIFT) ^ \ 100*7c478bd9Sstevel@tonic-gate ((uintptr_t)(pp) >> (PSE_SHIFT << 1))) & \ 101*7c478bd9Sstevel@tonic-gate (PSE_TABLE_SIZE - 1))].pad_mutex 102*7c478bd9Sstevel@tonic-gate 103*7c478bd9Sstevel@tonic-gate #define PAGE_IO_MUTEX(pp) \ 104*7c478bd9Sstevel@tonic-gate &pio_mutex[(((uintptr_t)pp) >> PIO_SHIFT) & (PIO_TABLE_SIZE - 1)] 105*7c478bd9Sstevel@tonic-gate 106*7c478bd9Sstevel@tonic-gate #define PSZC_MTX_TABLE_SIZE 128 107*7c478bd9Sstevel@tonic-gate #define PSZC_MTX_TABLE_SHIFT 7 108*7c478bd9Sstevel@tonic-gate 109*7c478bd9Sstevel@tonic-gate static pad_mutex_t pszc_mutex[PSZC_MTX_TABLE_SIZE]; 110*7c478bd9Sstevel@tonic-gate 111*7c478bd9Sstevel@tonic-gate #define PAGE_SZC_MUTEX(_pp) \ 112*7c478bd9Sstevel@tonic-gate &pszc_mutex[((((uintptr_t)(_pp) >> PSZC_MTX_TABLE_SHIFT) ^ \ 113*7c478bd9Sstevel@tonic-gate ((uintptr_t)(_pp) >> (PSZC_MTX_TABLE_SHIFT << 1)) ^ \ 114*7c478bd9Sstevel@tonic-gate ((uintptr_t)(_pp) >> (3 * PSZC_MTX_TABLE_SHIFT))) & \ 115*7c478bd9Sstevel@tonic-gate (PSZC_MTX_TABLE_SIZE - 1))].pad_mutex 116*7c478bd9Sstevel@tonic-gate 117*7c478bd9Sstevel@tonic-gate /* 118*7c478bd9Sstevel@tonic-gate * The vph_mutex[] array holds the mutexes to protect the vnode chains, 119*7c478bd9Sstevel@tonic-gate * (i.e., the list of pages anchored by v_pages and connected via p_vpprev 120*7c478bd9Sstevel@tonic-gate * and p_vpnext). 121*7c478bd9Sstevel@tonic-gate * 122*7c478bd9Sstevel@tonic-gate * The page_vnode_mutex(vp) function returns the address of the appropriate 123*7c478bd9Sstevel@tonic-gate * mutex from this array given a pointer to a vnode. It is complicated 124*7c478bd9Sstevel@tonic-gate * by the fact that the kernel's vnode and the swapfs vnode are referenced 125*7c478bd9Sstevel@tonic-gate * frequently enough to warrent their own mutexes. 126*7c478bd9Sstevel@tonic-gate * 127*7c478bd9Sstevel@tonic-gate * The VP_HASH_FUNC returns the index into the vph_mutex array given 128*7c478bd9Sstevel@tonic-gate * an address of a vnode. 129*7c478bd9Sstevel@tonic-gate */ 130*7c478bd9Sstevel@tonic-gate 131*7c478bd9Sstevel@tonic-gate /* 132*7c478bd9Sstevel@tonic-gate * XX64 VPH_TABLE_SIZE and VP_HASH_FUNC might break in 64 bit world. 133*7c478bd9Sstevel@tonic-gate * Need to review again. 134*7c478bd9Sstevel@tonic-gate */ 135*7c478bd9Sstevel@tonic-gate #define VPH_TABLE_SIZE (2 << VP_SHIFT) 136*7c478bd9Sstevel@tonic-gate 137*7c478bd9Sstevel@tonic-gate #define VP_HASH_FUNC(vp) \ 138*7c478bd9Sstevel@tonic-gate ((((uintptr_t)(vp) >> 6) + \ 139*7c478bd9Sstevel@tonic-gate ((uintptr_t)(vp) >> 8) + \ 140*7c478bd9Sstevel@tonic-gate ((uintptr_t)(vp) >> 10) + \ 141*7c478bd9Sstevel@tonic-gate ((uintptr_t)(vp) >> 12)) \ 142*7c478bd9Sstevel@tonic-gate & (VPH_TABLE_SIZE - 1)) 143*7c478bd9Sstevel@tonic-gate 144*7c478bd9Sstevel@tonic-gate extern struct vnode kvp; 145*7c478bd9Sstevel@tonic-gate 146*7c478bd9Sstevel@tonic-gate kmutex_t vph_mutex[VPH_TABLE_SIZE + 2]; 147*7c478bd9Sstevel@tonic-gate 148*7c478bd9Sstevel@tonic-gate /* 149*7c478bd9Sstevel@tonic-gate * Initialize the locks used by the Virtual Memory Management system. 150*7c478bd9Sstevel@tonic-gate */ 151*7c478bd9Sstevel@tonic-gate void 152*7c478bd9Sstevel@tonic-gate page_lock_init() 153*7c478bd9Sstevel@tonic-gate { 154*7c478bd9Sstevel@tonic-gate } 155*7c478bd9Sstevel@tonic-gate 156*7c478bd9Sstevel@tonic-gate /* 157*7c478bd9Sstevel@tonic-gate * At present we only use page ownership to aid debugging, so it's 158*7c478bd9Sstevel@tonic-gate * OK if the owner field isn't exact. In the 32-bit world two thread ids 159*7c478bd9Sstevel@tonic-gate * can map to the same owner because we just 'or' in 0x80000000 and 160*7c478bd9Sstevel@tonic-gate * then clear the second highest bit, so that (for example) 0x2faced00 161*7c478bd9Sstevel@tonic-gate * and 0xafaced00 both map to 0xafaced00. 162*7c478bd9Sstevel@tonic-gate * In the 64-bit world, p_selock may not be large enough to hold a full 163*7c478bd9Sstevel@tonic-gate * thread pointer. If we ever need precise ownership (e.g. if we implement 164*7c478bd9Sstevel@tonic-gate * priority inheritance for page locks) then p_selock should become a 165*7c478bd9Sstevel@tonic-gate * uintptr_t and SE_WRITER should be -((uintptr_t)curthread >> 2). 166*7c478bd9Sstevel@tonic-gate */ 167*7c478bd9Sstevel@tonic-gate #define SE_WRITER (((selock_t)(ulong_t)curthread | INT_MIN) & ~SE_EWANTED) 168*7c478bd9Sstevel@tonic-gate #define SE_READER 1 169*7c478bd9Sstevel@tonic-gate 170*7c478bd9Sstevel@tonic-gate /* 171*7c478bd9Sstevel@tonic-gate * A page that is deleted must be marked as such using the 172*7c478bd9Sstevel@tonic-gate * page_lock_delete() function. The page must be exclusively locked. 173*7c478bd9Sstevel@tonic-gate * The SE_DELETED marker is put in p_selock when this function is called. 174*7c478bd9Sstevel@tonic-gate * SE_DELETED must be distinct from any SE_WRITER value. 175*7c478bd9Sstevel@tonic-gate */ 176*7c478bd9Sstevel@tonic-gate #define SE_DELETED (1 | INT_MIN) 177*7c478bd9Sstevel@tonic-gate 178*7c478bd9Sstevel@tonic-gate #ifdef VM_STATS 179*7c478bd9Sstevel@tonic-gate uint_t vph_kvp_count; 180*7c478bd9Sstevel@tonic-gate uint_t vph_swapfsvp_count; 181*7c478bd9Sstevel@tonic-gate uint_t vph_other; 182*7c478bd9Sstevel@tonic-gate #endif /* VM_STATS */ 183*7c478bd9Sstevel@tonic-gate 184*7c478bd9Sstevel@tonic-gate #ifdef VM_STATS 185*7c478bd9Sstevel@tonic-gate uint_t page_lock_count; 186*7c478bd9Sstevel@tonic-gate uint_t page_lock_miss; 187*7c478bd9Sstevel@tonic-gate uint_t page_lock_miss_lock; 188*7c478bd9Sstevel@tonic-gate uint_t page_lock_reclaim; 189*7c478bd9Sstevel@tonic-gate uint_t page_lock_bad_reclaim; 190*7c478bd9Sstevel@tonic-gate uint_t page_lock_same_page; 191*7c478bd9Sstevel@tonic-gate uint_t page_lock_upgrade; 192*7c478bd9Sstevel@tonic-gate uint_t page_lock_upgrade_failed; 193*7c478bd9Sstevel@tonic-gate uint_t page_lock_deleted; 194*7c478bd9Sstevel@tonic-gate 195*7c478bd9Sstevel@tonic-gate uint_t page_trylock_locked; 196*7c478bd9Sstevel@tonic-gate uint_t page_trylock_missed; 197*7c478bd9Sstevel@tonic-gate 198*7c478bd9Sstevel@tonic-gate uint_t page_try_reclaim_upgrade; 199*7c478bd9Sstevel@tonic-gate #endif /* VM_STATS */ 200*7c478bd9Sstevel@tonic-gate 201*7c478bd9Sstevel@tonic-gate 202*7c478bd9Sstevel@tonic-gate /* 203*7c478bd9Sstevel@tonic-gate * Acquire the "shared/exclusive" lock on a page. 204*7c478bd9Sstevel@tonic-gate * 205*7c478bd9Sstevel@tonic-gate * Returns 1 on success and locks the page appropriately. 206*7c478bd9Sstevel@tonic-gate * 0 on failure and does not lock the page. 207*7c478bd9Sstevel@tonic-gate * 208*7c478bd9Sstevel@tonic-gate * If `lock' is non-NULL, it will be dropped and reacquired in the 209*7c478bd9Sstevel@tonic-gate * failure case. This routine can block, and if it does 210*7c478bd9Sstevel@tonic-gate * it will always return a failure since the page identity [vp, off] 211*7c478bd9Sstevel@tonic-gate * or state may have changed. 212*7c478bd9Sstevel@tonic-gate */ 213*7c478bd9Sstevel@tonic-gate 214*7c478bd9Sstevel@tonic-gate int 215*7c478bd9Sstevel@tonic-gate page_lock(page_t *pp, se_t se, kmutex_t *lock, reclaim_t reclaim) 216*7c478bd9Sstevel@tonic-gate { 217*7c478bd9Sstevel@tonic-gate return (page_lock_es(pp, se, lock, reclaim, 0)); 218*7c478bd9Sstevel@tonic-gate } 219*7c478bd9Sstevel@tonic-gate 220*7c478bd9Sstevel@tonic-gate /* 221*7c478bd9Sstevel@tonic-gate * With the addition of reader-writer lock semantics to page_lock_es, 222*7c478bd9Sstevel@tonic-gate * callers wanting an exclusive (writer) lock may prevent shared-lock 223*7c478bd9Sstevel@tonic-gate * (reader) starvation by setting the es parameter to SE_EXCL_WANTED. 224*7c478bd9Sstevel@tonic-gate * In this case, when an exclusive lock cannot be acquired, p_selock's 225*7c478bd9Sstevel@tonic-gate * SE_EWANTED bit is set. 226*7c478bd9Sstevel@tonic-gate * This bit, along with the se and es parameters, are used to decide 227*7c478bd9Sstevel@tonic-gate * if the requested lock should be granted: 228*7c478bd9Sstevel@tonic-gate * 229*7c478bd9Sstevel@tonic-gate * Lock wanted SE_EXCL_WANTED p_selock/SE_EWANTED Action 230*7c478bd9Sstevel@tonic-gate * ---------- -------------- ------------------- --------- 231*7c478bd9Sstevel@tonic-gate * SE_EXCL no dont-care/1 deny lock 232*7c478bd9Sstevel@tonic-gate * SE_EXCL any(see note) unlocked/any grant lock, clear SE_EWANTED 233*7c478bd9Sstevel@tonic-gate * SE_EXCL yes any lock/any deny, set SE_EWANTED 234*7c478bd9Sstevel@tonic-gate * SE_EXCL no any lock/any deny 235*7c478bd9Sstevel@tonic-gate * SE_SHARED not applicable shared/0 grant 236*7c478bd9Sstevel@tonic-gate * SE_SHARED not applicable unlocked/0 grant 237*7c478bd9Sstevel@tonic-gate * SE_SHARED not applicable shared/1 deny 238*7c478bd9Sstevel@tonic-gate * SE_SHARED not applicable unlocked/1 deny 239*7c478bd9Sstevel@tonic-gate * SE_SHARED not applicable excl/any deny 240*7c478bd9Sstevel@tonic-gate * 241*7c478bd9Sstevel@tonic-gate * Note: the code grants an exclusive lock to the caller and clears 242*7c478bd9Sstevel@tonic-gate * SE_EWANTED whenever p_selock is unlocked, regardless of the SE_EWANTED 243*7c478bd9Sstevel@tonic-gate * bit's value. This was deemed acceptable as we are not concerned about 244*7c478bd9Sstevel@tonic-gate * exclusive-lock starvation. If this ever becomes an issue, a priority or 245*7c478bd9Sstevel@tonic-gate * fifo mechanism should also be implemented. 246*7c478bd9Sstevel@tonic-gate */ 247*7c478bd9Sstevel@tonic-gate int 248*7c478bd9Sstevel@tonic-gate page_lock_es(page_t *pp, se_t se, kmutex_t *lock, reclaim_t reclaim, int es) 249*7c478bd9Sstevel@tonic-gate { 250*7c478bd9Sstevel@tonic-gate int retval; 251*7c478bd9Sstevel@tonic-gate kmutex_t *pse = PAGE_SE_MUTEX(pp); 252*7c478bd9Sstevel@tonic-gate int upgraded; 253*7c478bd9Sstevel@tonic-gate int reclaim_it; 254*7c478bd9Sstevel@tonic-gate 255*7c478bd9Sstevel@tonic-gate ASSERT(lock != NULL ? MUTEX_HELD(lock) : 1); 256*7c478bd9Sstevel@tonic-gate 257*7c478bd9Sstevel@tonic-gate VM_STAT_ADD(page_lock_count); 258*7c478bd9Sstevel@tonic-gate 259*7c478bd9Sstevel@tonic-gate upgraded = 0; 260*7c478bd9Sstevel@tonic-gate reclaim_it = 0; 261*7c478bd9Sstevel@tonic-gate 262*7c478bd9Sstevel@tonic-gate mutex_enter(pse); 263*7c478bd9Sstevel@tonic-gate 264*7c478bd9Sstevel@tonic-gate /* 265*7c478bd9Sstevel@tonic-gate * Current uses of 'es': 266*7c478bd9Sstevel@tonic-gate * es == 1 page_lookup_create will attempt page relocation 267*7c478bd9Sstevel@tonic-gate * es == SE_EXCL_WANTED caller wants SE_EWANTED set (eg. delete 268*7c478bd9Sstevel@tonic-gate * memory thread); this prevents reader-starvation of waiting 269*7c478bd9Sstevel@tonic-gate * writer thread(s). 270*7c478bd9Sstevel@tonic-gate */ 271*7c478bd9Sstevel@tonic-gate 272*7c478bd9Sstevel@tonic-gate 273*7c478bd9Sstevel@tonic-gate ASSERT(((es & SE_EXCL_WANTED) == 0) || 274*7c478bd9Sstevel@tonic-gate ((es == SE_EXCL_WANTED) && (se == SE_EXCL))); 275*7c478bd9Sstevel@tonic-gate 276*7c478bd9Sstevel@tonic-gate if (se == SE_SHARED && es == 1 && pp->p_selock == 0) { 277*7c478bd9Sstevel@tonic-gate se = SE_EXCL; 278*7c478bd9Sstevel@tonic-gate } 279*7c478bd9Sstevel@tonic-gate 280*7c478bd9Sstevel@tonic-gate if ((reclaim == P_RECLAIM) && (PP_ISFREE(pp))) { 281*7c478bd9Sstevel@tonic-gate 282*7c478bd9Sstevel@tonic-gate reclaim_it = 1; 283*7c478bd9Sstevel@tonic-gate if (se == SE_SHARED) { 284*7c478bd9Sstevel@tonic-gate /* 285*7c478bd9Sstevel@tonic-gate * This is an interesting situation. 286*7c478bd9Sstevel@tonic-gate * 287*7c478bd9Sstevel@tonic-gate * Remember that p_free can only change if 288*7c478bd9Sstevel@tonic-gate * p_selock < 0. 289*7c478bd9Sstevel@tonic-gate * p_free does not depend on our holding `pse'. 290*7c478bd9Sstevel@tonic-gate * And, since we hold `pse', p_selock can not change. 291*7c478bd9Sstevel@tonic-gate * So, if p_free changes on us, the page is already 292*7c478bd9Sstevel@tonic-gate * exclusively held, and we would fail to get p_selock 293*7c478bd9Sstevel@tonic-gate * regardless. 294*7c478bd9Sstevel@tonic-gate * 295*7c478bd9Sstevel@tonic-gate * We want to avoid getting the share 296*7c478bd9Sstevel@tonic-gate * lock on a free page that needs to be reclaimed. 297*7c478bd9Sstevel@tonic-gate * It is possible that some other thread has the share 298*7c478bd9Sstevel@tonic-gate * lock and has left the free page on the cache list. 299*7c478bd9Sstevel@tonic-gate * pvn_vplist_dirty() does this for brief periods. 300*7c478bd9Sstevel@tonic-gate * If the se_share is currently SE_EXCL, we will fail 301*7c478bd9Sstevel@tonic-gate * to acquire p_selock anyway. Blocking is the 302*7c478bd9Sstevel@tonic-gate * right thing to do. 303*7c478bd9Sstevel@tonic-gate * If we need to reclaim this page, we must get 304*7c478bd9Sstevel@tonic-gate * exclusive access to it, force the upgrade now. 305*7c478bd9Sstevel@tonic-gate * Again, we will fail to acquire p_selock if the 306*7c478bd9Sstevel@tonic-gate * page is not free and block. 307*7c478bd9Sstevel@tonic-gate */ 308*7c478bd9Sstevel@tonic-gate upgraded = 1; 309*7c478bd9Sstevel@tonic-gate se = SE_EXCL; 310*7c478bd9Sstevel@tonic-gate VM_STAT_ADD(page_lock_upgrade); 311*7c478bd9Sstevel@tonic-gate } 312*7c478bd9Sstevel@tonic-gate } 313*7c478bd9Sstevel@tonic-gate 314*7c478bd9Sstevel@tonic-gate if (se == SE_EXCL) { 315*7c478bd9Sstevel@tonic-gate if ((es != SE_EXCL_WANTED) && (pp->p_selock & SE_EWANTED)) { 316*7c478bd9Sstevel@tonic-gate /* 317*7c478bd9Sstevel@tonic-gate * if the caller wants a writer lock (but did not 318*7c478bd9Sstevel@tonic-gate * specify exclusive access), and there is a pending 319*7c478bd9Sstevel@tonic-gate * writer that wants exclusive access, return failure 320*7c478bd9Sstevel@tonic-gate */ 321*7c478bd9Sstevel@tonic-gate retval = 0; 322*7c478bd9Sstevel@tonic-gate } else if ((pp->p_selock & ~SE_EWANTED) == 0) { 323*7c478bd9Sstevel@tonic-gate /* no reader/writer lock held */ 324*7c478bd9Sstevel@tonic-gate THREAD_KPRI_REQUEST(); 325*7c478bd9Sstevel@tonic-gate /* this clears our setting of the SE_EWANTED bit */ 326*7c478bd9Sstevel@tonic-gate pp->p_selock = SE_WRITER; 327*7c478bd9Sstevel@tonic-gate retval = 1; 328*7c478bd9Sstevel@tonic-gate } else { 329*7c478bd9Sstevel@tonic-gate /* page is locked */ 330*7c478bd9Sstevel@tonic-gate if (es == SE_EXCL_WANTED) { 331*7c478bd9Sstevel@tonic-gate /* set the SE_EWANTED bit */ 332*7c478bd9Sstevel@tonic-gate pp->p_selock |= SE_EWANTED; 333*7c478bd9Sstevel@tonic-gate } 334*7c478bd9Sstevel@tonic-gate retval = 0; 335*7c478bd9Sstevel@tonic-gate } 336*7c478bd9Sstevel@tonic-gate } else { 337*7c478bd9Sstevel@tonic-gate retval = 0; 338*7c478bd9Sstevel@tonic-gate if (pp->p_selock >= 0) { 339*7c478bd9Sstevel@tonic-gate /* readers are not allowed when excl wanted */ 340*7c478bd9Sstevel@tonic-gate if (!(pp->p_selock & SE_EWANTED)) { 341*7c478bd9Sstevel@tonic-gate pp->p_selock += SE_READER; 342*7c478bd9Sstevel@tonic-gate retval = 1; 343*7c478bd9Sstevel@tonic-gate } 344*7c478bd9Sstevel@tonic-gate } 345*7c478bd9Sstevel@tonic-gate } 346*7c478bd9Sstevel@tonic-gate 347*7c478bd9Sstevel@tonic-gate if (retval == 0) { 348*7c478bd9Sstevel@tonic-gate if ((pp->p_selock & ~SE_EWANTED) == SE_DELETED) { 349*7c478bd9Sstevel@tonic-gate VM_STAT_ADD(page_lock_deleted); 350*7c478bd9Sstevel@tonic-gate mutex_exit(pse); 351*7c478bd9Sstevel@tonic-gate return (retval); 352*7c478bd9Sstevel@tonic-gate } 353*7c478bd9Sstevel@tonic-gate 354*7c478bd9Sstevel@tonic-gate #ifdef VM_STATS 355*7c478bd9Sstevel@tonic-gate VM_STAT_ADD(page_lock_miss); 356*7c478bd9Sstevel@tonic-gate if (upgraded) { 357*7c478bd9Sstevel@tonic-gate VM_STAT_ADD(page_lock_upgrade_failed); 358*7c478bd9Sstevel@tonic-gate } 359*7c478bd9Sstevel@tonic-gate #endif 360*7c478bd9Sstevel@tonic-gate if (lock) { 361*7c478bd9Sstevel@tonic-gate VM_STAT_ADD(page_lock_miss_lock); 362*7c478bd9Sstevel@tonic-gate mutex_exit(lock); 363*7c478bd9Sstevel@tonic-gate } 364*7c478bd9Sstevel@tonic-gate 365*7c478bd9Sstevel@tonic-gate /* 366*7c478bd9Sstevel@tonic-gate * Now, wait for the page to be unlocked and 367*7c478bd9Sstevel@tonic-gate * release the lock protecting p_cv and p_selock. 368*7c478bd9Sstevel@tonic-gate */ 369*7c478bd9Sstevel@tonic-gate cv_wait(&pp->p_cv, pse); 370*7c478bd9Sstevel@tonic-gate mutex_exit(pse); 371*7c478bd9Sstevel@tonic-gate 372*7c478bd9Sstevel@tonic-gate /* 373*7c478bd9Sstevel@tonic-gate * The page identity may have changed while we were 374*7c478bd9Sstevel@tonic-gate * blocked. If we are willing to depend on "pp" 375*7c478bd9Sstevel@tonic-gate * still pointing to a valid page structure (i.e., 376*7c478bd9Sstevel@tonic-gate * assuming page structures are not dynamically allocated 377*7c478bd9Sstevel@tonic-gate * or freed), we could try to lock the page if its 378*7c478bd9Sstevel@tonic-gate * identity hasn't changed. 379*7c478bd9Sstevel@tonic-gate * 380*7c478bd9Sstevel@tonic-gate * This needs to be measured, since we come back from 381*7c478bd9Sstevel@tonic-gate * cv_wait holding pse (the expensive part of this 382*7c478bd9Sstevel@tonic-gate * operation) we might as well try the cheap part. 383*7c478bd9Sstevel@tonic-gate * Though we would also have to confirm that dropping 384*7c478bd9Sstevel@tonic-gate * `lock' did not cause any grief to the callers. 385*7c478bd9Sstevel@tonic-gate */ 386*7c478bd9Sstevel@tonic-gate if (lock) { 387*7c478bd9Sstevel@tonic-gate mutex_enter(lock); 388*7c478bd9Sstevel@tonic-gate } 389*7c478bd9Sstevel@tonic-gate } else { 390*7c478bd9Sstevel@tonic-gate /* 391*7c478bd9Sstevel@tonic-gate * We have the page lock. 392*7c478bd9Sstevel@tonic-gate * If we needed to reclaim the page, and the page 393*7c478bd9Sstevel@tonic-gate * needed reclaiming (ie, it was free), then we 394*7c478bd9Sstevel@tonic-gate * have the page exclusively locked. We may need 395*7c478bd9Sstevel@tonic-gate * to downgrade the page. 396*7c478bd9Sstevel@tonic-gate */ 397*7c478bd9Sstevel@tonic-gate ASSERT((upgraded) ? 398*7c478bd9Sstevel@tonic-gate ((PP_ISFREE(pp)) && PAGE_EXCL(pp)) : 1); 399*7c478bd9Sstevel@tonic-gate mutex_exit(pse); 400*7c478bd9Sstevel@tonic-gate 401*7c478bd9Sstevel@tonic-gate /* 402*7c478bd9Sstevel@tonic-gate * We now hold this page's lock, either shared or 403*7c478bd9Sstevel@tonic-gate * exclusive. This will prevent its identity from changing. 404*7c478bd9Sstevel@tonic-gate * The page, however, may or may not be free. If the caller 405*7c478bd9Sstevel@tonic-gate * requested, and it is free, go reclaim it from the 406*7c478bd9Sstevel@tonic-gate * free list. If the page can't be reclaimed, return failure 407*7c478bd9Sstevel@tonic-gate * so that the caller can start all over again. 408*7c478bd9Sstevel@tonic-gate * 409*7c478bd9Sstevel@tonic-gate * NOTE:page_reclaim() releases the page lock (p_selock) 410*7c478bd9Sstevel@tonic-gate * if it can't be reclaimed. 411*7c478bd9Sstevel@tonic-gate */ 412*7c478bd9Sstevel@tonic-gate if (reclaim_it) { 413*7c478bd9Sstevel@tonic-gate if (!page_reclaim(pp, lock)) { 414*7c478bd9Sstevel@tonic-gate VM_STAT_ADD(page_lock_bad_reclaim); 415*7c478bd9Sstevel@tonic-gate retval = 0; 416*7c478bd9Sstevel@tonic-gate } else { 417*7c478bd9Sstevel@tonic-gate VM_STAT_ADD(page_lock_reclaim); 418*7c478bd9Sstevel@tonic-gate if (upgraded) { 419*7c478bd9Sstevel@tonic-gate page_downgrade(pp); 420*7c478bd9Sstevel@tonic-gate } 421*7c478bd9Sstevel@tonic-gate } 422*7c478bd9Sstevel@tonic-gate } 423*7c478bd9Sstevel@tonic-gate } 424*7c478bd9Sstevel@tonic-gate return (retval); 425*7c478bd9Sstevel@tonic-gate } 426*7c478bd9Sstevel@tonic-gate 427*7c478bd9Sstevel@tonic-gate /* 428*7c478bd9Sstevel@tonic-gate * Clear the SE_EWANTED bit from p_selock. This function allows 429*7c478bd9Sstevel@tonic-gate * callers of page_lock_es and page_try_reclaim_lock to clear 430*7c478bd9Sstevel@tonic-gate * their setting of this bit if they decide they no longer wish 431*7c478bd9Sstevel@tonic-gate * to gain exclusive access to the page. Currently only 432*7c478bd9Sstevel@tonic-gate * delete_memory_thread uses this when the delete memory 433*7c478bd9Sstevel@tonic-gate * operation is cancelled. 434*7c478bd9Sstevel@tonic-gate */ 435*7c478bd9Sstevel@tonic-gate void 436*7c478bd9Sstevel@tonic-gate page_lock_clr_exclwanted(page_t *pp) 437*7c478bd9Sstevel@tonic-gate { 438*7c478bd9Sstevel@tonic-gate kmutex_t *pse = PAGE_SE_MUTEX(pp); 439*7c478bd9Sstevel@tonic-gate 440*7c478bd9Sstevel@tonic-gate mutex_enter(pse); 441*7c478bd9Sstevel@tonic-gate pp->p_selock &= ~SE_EWANTED; 442*7c478bd9Sstevel@tonic-gate if (CV_HAS_WAITERS(&pp->p_cv)) 443*7c478bd9Sstevel@tonic-gate cv_broadcast(&pp->p_cv); 444*7c478bd9Sstevel@tonic-gate mutex_exit(pse); 445*7c478bd9Sstevel@tonic-gate } 446*7c478bd9Sstevel@tonic-gate 447*7c478bd9Sstevel@tonic-gate /* 448*7c478bd9Sstevel@tonic-gate * Read the comments inside of page_lock_es() carefully. 449*7c478bd9Sstevel@tonic-gate * 450*7c478bd9Sstevel@tonic-gate * SE_EXCL callers specifying es == SE_EXCL_WANTED will cause the 451*7c478bd9Sstevel@tonic-gate * SE_EWANTED bit of p_selock to be set when the lock cannot be obtained. 452*7c478bd9Sstevel@tonic-gate * This is used by threads subject to reader-starvation (eg. memory delete). 453*7c478bd9Sstevel@tonic-gate * 454*7c478bd9Sstevel@tonic-gate * When a thread using SE_EXCL_WANTED does not obtain the SE_EXCL lock, 455*7c478bd9Sstevel@tonic-gate * it is expected that it will retry at a later time. Threads that will 456*7c478bd9Sstevel@tonic-gate * not retry the lock *must* call page_lock_clr_exclwanted to clear the 457*7c478bd9Sstevel@tonic-gate * SE_EWANTED bit. (When a thread using SE_EXCL_WANTED obtains the lock, 458*7c478bd9Sstevel@tonic-gate * the bit is cleared.) 459*7c478bd9Sstevel@tonic-gate */ 460*7c478bd9Sstevel@tonic-gate int 461*7c478bd9Sstevel@tonic-gate page_try_reclaim_lock(page_t *pp, se_t se, int es) 462*7c478bd9Sstevel@tonic-gate { 463*7c478bd9Sstevel@tonic-gate kmutex_t *pse = PAGE_SE_MUTEX(pp); 464*7c478bd9Sstevel@tonic-gate selock_t old; 465*7c478bd9Sstevel@tonic-gate 466*7c478bd9Sstevel@tonic-gate mutex_enter(pse); 467*7c478bd9Sstevel@tonic-gate 468*7c478bd9Sstevel@tonic-gate old = pp->p_selock; 469*7c478bd9Sstevel@tonic-gate 470*7c478bd9Sstevel@tonic-gate ASSERT(((es & SE_EXCL_WANTED) == 0) || 471*7c478bd9Sstevel@tonic-gate ((es == SE_EXCL_WANTED) && (se == SE_EXCL))); 472*7c478bd9Sstevel@tonic-gate 473*7c478bd9Sstevel@tonic-gate if (se == SE_SHARED && es == 1 && old == 0) { 474*7c478bd9Sstevel@tonic-gate se = SE_EXCL; 475*7c478bd9Sstevel@tonic-gate } 476*7c478bd9Sstevel@tonic-gate 477*7c478bd9Sstevel@tonic-gate if (se == SE_SHARED) { 478*7c478bd9Sstevel@tonic-gate if (!PP_ISFREE(pp)) { 479*7c478bd9Sstevel@tonic-gate if (old >= 0) { 480*7c478bd9Sstevel@tonic-gate /* readers are not allowed when excl wanted */ 481*7c478bd9Sstevel@tonic-gate if (!(old & SE_EWANTED)) { 482*7c478bd9Sstevel@tonic-gate pp->p_selock = old + SE_READER; 483*7c478bd9Sstevel@tonic-gate mutex_exit(pse); 484*7c478bd9Sstevel@tonic-gate return (1); 485*7c478bd9Sstevel@tonic-gate } 486*7c478bd9Sstevel@tonic-gate } 487*7c478bd9Sstevel@tonic-gate mutex_exit(pse); 488*7c478bd9Sstevel@tonic-gate return (0); 489*7c478bd9Sstevel@tonic-gate } 490*7c478bd9Sstevel@tonic-gate /* 491*7c478bd9Sstevel@tonic-gate * The page is free, so we really want SE_EXCL (below) 492*7c478bd9Sstevel@tonic-gate */ 493*7c478bd9Sstevel@tonic-gate VM_STAT_ADD(page_try_reclaim_upgrade); 494*7c478bd9Sstevel@tonic-gate } 495*7c478bd9Sstevel@tonic-gate 496*7c478bd9Sstevel@tonic-gate /* 497*7c478bd9Sstevel@tonic-gate * The caller wants a writer lock. We try for it only if 498*7c478bd9Sstevel@tonic-gate * SE_EWANTED is not set, or if the caller specified 499*7c478bd9Sstevel@tonic-gate * SE_EXCL_WANTED. 500*7c478bd9Sstevel@tonic-gate */ 501*7c478bd9Sstevel@tonic-gate if (!(old & SE_EWANTED) || (es == SE_EXCL_WANTED)) { 502*7c478bd9Sstevel@tonic-gate if ((old & ~SE_EWANTED) == 0) { 503*7c478bd9Sstevel@tonic-gate /* no reader/writer lock held */ 504*7c478bd9Sstevel@tonic-gate THREAD_KPRI_REQUEST(); 505*7c478bd9Sstevel@tonic-gate /* this clears out our setting of the SE_EWANTED bit */ 506*7c478bd9Sstevel@tonic-gate pp->p_selock = SE_WRITER; 507*7c478bd9Sstevel@tonic-gate mutex_exit(pse); 508*7c478bd9Sstevel@tonic-gate return (1); 509*7c478bd9Sstevel@tonic-gate } 510*7c478bd9Sstevel@tonic-gate } 511*7c478bd9Sstevel@tonic-gate if (es == SE_EXCL_WANTED) { 512*7c478bd9Sstevel@tonic-gate /* page is locked, set the SE_EWANTED bit */ 513*7c478bd9Sstevel@tonic-gate pp->p_selock |= SE_EWANTED; 514*7c478bd9Sstevel@tonic-gate } 515*7c478bd9Sstevel@tonic-gate mutex_exit(pse); 516*7c478bd9Sstevel@tonic-gate return (0); 517*7c478bd9Sstevel@tonic-gate } 518*7c478bd9Sstevel@tonic-gate 519*7c478bd9Sstevel@tonic-gate /* 520*7c478bd9Sstevel@tonic-gate * Acquire a page's "shared/exclusive" lock, but never block. 521*7c478bd9Sstevel@tonic-gate * Returns 1 on success, 0 on failure. 522*7c478bd9Sstevel@tonic-gate */ 523*7c478bd9Sstevel@tonic-gate int 524*7c478bd9Sstevel@tonic-gate page_trylock(page_t *pp, se_t se) 525*7c478bd9Sstevel@tonic-gate { 526*7c478bd9Sstevel@tonic-gate kmutex_t *pse = PAGE_SE_MUTEX(pp); 527*7c478bd9Sstevel@tonic-gate 528*7c478bd9Sstevel@tonic-gate mutex_enter(pse); 529*7c478bd9Sstevel@tonic-gate if (pp->p_selock & SE_EWANTED) { 530*7c478bd9Sstevel@tonic-gate /* fail if a thread wants exclusive access */ 531*7c478bd9Sstevel@tonic-gate mutex_exit(pse); 532*7c478bd9Sstevel@tonic-gate return (0); 533*7c478bd9Sstevel@tonic-gate } 534*7c478bd9Sstevel@tonic-gate 535*7c478bd9Sstevel@tonic-gate if (se == SE_EXCL) { 536*7c478bd9Sstevel@tonic-gate if (pp->p_selock == 0) { 537*7c478bd9Sstevel@tonic-gate THREAD_KPRI_REQUEST(); 538*7c478bd9Sstevel@tonic-gate pp->p_selock = SE_WRITER; 539*7c478bd9Sstevel@tonic-gate mutex_exit(pse); 540*7c478bd9Sstevel@tonic-gate return (1); 541*7c478bd9Sstevel@tonic-gate } 542*7c478bd9Sstevel@tonic-gate } else { 543*7c478bd9Sstevel@tonic-gate if (pp->p_selock >= 0) { 544*7c478bd9Sstevel@tonic-gate pp->p_selock += SE_READER; 545*7c478bd9Sstevel@tonic-gate mutex_exit(pse); 546*7c478bd9Sstevel@tonic-gate return (1); 547*7c478bd9Sstevel@tonic-gate } 548*7c478bd9Sstevel@tonic-gate } 549*7c478bd9Sstevel@tonic-gate mutex_exit(pse); 550*7c478bd9Sstevel@tonic-gate return (0); 551*7c478bd9Sstevel@tonic-gate } 552*7c478bd9Sstevel@tonic-gate 553*7c478bd9Sstevel@tonic-gate /* 554*7c478bd9Sstevel@tonic-gate * Release the page's "shared/exclusive" lock and wake up anyone 555*7c478bd9Sstevel@tonic-gate * who might be waiting for it. 556*7c478bd9Sstevel@tonic-gate */ 557*7c478bd9Sstevel@tonic-gate void 558*7c478bd9Sstevel@tonic-gate page_unlock(page_t *pp) 559*7c478bd9Sstevel@tonic-gate { 560*7c478bd9Sstevel@tonic-gate kmutex_t *pse = PAGE_SE_MUTEX(pp); 561*7c478bd9Sstevel@tonic-gate selock_t old; 562*7c478bd9Sstevel@tonic-gate 563*7c478bd9Sstevel@tonic-gate mutex_enter(pse); 564*7c478bd9Sstevel@tonic-gate old = pp->p_selock; 565*7c478bd9Sstevel@tonic-gate if ((old & ~SE_EWANTED) == SE_READER) { 566*7c478bd9Sstevel@tonic-gate pp->p_selock = old & ~SE_READER; 567*7c478bd9Sstevel@tonic-gate if (CV_HAS_WAITERS(&pp->p_cv)) 568*7c478bd9Sstevel@tonic-gate cv_broadcast(&pp->p_cv); 569*7c478bd9Sstevel@tonic-gate } else if ((old & ~SE_EWANTED) == SE_DELETED) { 570*7c478bd9Sstevel@tonic-gate panic("page_unlock: page %p is deleted", pp); 571*7c478bd9Sstevel@tonic-gate } else if (old < 0) { 572*7c478bd9Sstevel@tonic-gate THREAD_KPRI_RELEASE(); 573*7c478bd9Sstevel@tonic-gate pp->p_selock &= SE_EWANTED; 574*7c478bd9Sstevel@tonic-gate if (CV_HAS_WAITERS(&pp->p_cv)) 575*7c478bd9Sstevel@tonic-gate cv_broadcast(&pp->p_cv); 576*7c478bd9Sstevel@tonic-gate } else if ((old & ~SE_EWANTED) > SE_READER) { 577*7c478bd9Sstevel@tonic-gate pp->p_selock = old - SE_READER; 578*7c478bd9Sstevel@tonic-gate } else { 579*7c478bd9Sstevel@tonic-gate panic("page_unlock: page %p is not locked", pp); 580*7c478bd9Sstevel@tonic-gate } 581*7c478bd9Sstevel@tonic-gate mutex_exit(pse); 582*7c478bd9Sstevel@tonic-gate } 583*7c478bd9Sstevel@tonic-gate 584*7c478bd9Sstevel@tonic-gate /* 585*7c478bd9Sstevel@tonic-gate * Try to upgrade the lock on the page from a "shared" to an 586*7c478bd9Sstevel@tonic-gate * "exclusive" lock. Since this upgrade operation is done while 587*7c478bd9Sstevel@tonic-gate * holding the mutex protecting this page, no one else can acquire this page's 588*7c478bd9Sstevel@tonic-gate * lock and change the page. Thus, it is safe to drop the "shared" 589*7c478bd9Sstevel@tonic-gate * lock and attempt to acquire the "exclusive" lock. 590*7c478bd9Sstevel@tonic-gate * 591*7c478bd9Sstevel@tonic-gate * Returns 1 on success, 0 on failure. 592*7c478bd9Sstevel@tonic-gate */ 593*7c478bd9Sstevel@tonic-gate int 594*7c478bd9Sstevel@tonic-gate page_tryupgrade(page_t *pp) 595*7c478bd9Sstevel@tonic-gate { 596*7c478bd9Sstevel@tonic-gate kmutex_t *pse = PAGE_SE_MUTEX(pp); 597*7c478bd9Sstevel@tonic-gate 598*7c478bd9Sstevel@tonic-gate mutex_enter(pse); 599*7c478bd9Sstevel@tonic-gate if (!(pp->p_selock & SE_EWANTED)) { 600*7c478bd9Sstevel@tonic-gate /* no threads want exclusive access, try upgrade */ 601*7c478bd9Sstevel@tonic-gate if (pp->p_selock == SE_READER) { 602*7c478bd9Sstevel@tonic-gate THREAD_KPRI_REQUEST(); 603*7c478bd9Sstevel@tonic-gate /* convert to exclusive lock */ 604*7c478bd9Sstevel@tonic-gate pp->p_selock = SE_WRITER; 605*7c478bd9Sstevel@tonic-gate mutex_exit(pse); 606*7c478bd9Sstevel@tonic-gate return (1); 607*7c478bd9Sstevel@tonic-gate } 608*7c478bd9Sstevel@tonic-gate } 609*7c478bd9Sstevel@tonic-gate mutex_exit(pse); 610*7c478bd9Sstevel@tonic-gate return (0); 611*7c478bd9Sstevel@tonic-gate } 612*7c478bd9Sstevel@tonic-gate 613*7c478bd9Sstevel@tonic-gate /* 614*7c478bd9Sstevel@tonic-gate * Downgrade the "exclusive" lock on the page to a "shared" lock 615*7c478bd9Sstevel@tonic-gate * while holding the mutex protecting this page's p_selock field. 616*7c478bd9Sstevel@tonic-gate */ 617*7c478bd9Sstevel@tonic-gate void 618*7c478bd9Sstevel@tonic-gate page_downgrade(page_t *pp) 619*7c478bd9Sstevel@tonic-gate { 620*7c478bd9Sstevel@tonic-gate kmutex_t *pse = PAGE_SE_MUTEX(pp); 621*7c478bd9Sstevel@tonic-gate int excl_waiting; 622*7c478bd9Sstevel@tonic-gate 623*7c478bd9Sstevel@tonic-gate ASSERT((pp->p_selock & ~SE_EWANTED) != SE_DELETED); 624*7c478bd9Sstevel@tonic-gate ASSERT(PAGE_EXCL(pp)); 625*7c478bd9Sstevel@tonic-gate 626*7c478bd9Sstevel@tonic-gate mutex_enter(pse); 627*7c478bd9Sstevel@tonic-gate excl_waiting = pp->p_selock & SE_EWANTED; 628*7c478bd9Sstevel@tonic-gate THREAD_KPRI_RELEASE(); 629*7c478bd9Sstevel@tonic-gate pp->p_selock = SE_READER | excl_waiting; 630*7c478bd9Sstevel@tonic-gate if (CV_HAS_WAITERS(&pp->p_cv)) 631*7c478bd9Sstevel@tonic-gate cv_broadcast(&pp->p_cv); 632*7c478bd9Sstevel@tonic-gate mutex_exit(pse); 633*7c478bd9Sstevel@tonic-gate } 634*7c478bd9Sstevel@tonic-gate 635*7c478bd9Sstevel@tonic-gate void 636*7c478bd9Sstevel@tonic-gate page_lock_delete(page_t *pp) 637*7c478bd9Sstevel@tonic-gate { 638*7c478bd9Sstevel@tonic-gate kmutex_t *pse = PAGE_SE_MUTEX(pp); 639*7c478bd9Sstevel@tonic-gate 640*7c478bd9Sstevel@tonic-gate ASSERT(PAGE_EXCL(pp)); 641*7c478bd9Sstevel@tonic-gate ASSERT(pp->p_vnode == NULL); 642*7c478bd9Sstevel@tonic-gate ASSERT(pp->p_offset == (u_offset_t)-1); 643*7c478bd9Sstevel@tonic-gate ASSERT(!PP_ISFREE(pp)); 644*7c478bd9Sstevel@tonic-gate 645*7c478bd9Sstevel@tonic-gate mutex_enter(pse); 646*7c478bd9Sstevel@tonic-gate THREAD_KPRI_RELEASE(); 647*7c478bd9Sstevel@tonic-gate pp->p_selock = SE_DELETED; 648*7c478bd9Sstevel@tonic-gate if (CV_HAS_WAITERS(&pp->p_cv)) 649*7c478bd9Sstevel@tonic-gate cv_broadcast(&pp->p_cv); 650*7c478bd9Sstevel@tonic-gate mutex_exit(pse); 651*7c478bd9Sstevel@tonic-gate } 652*7c478bd9Sstevel@tonic-gate 653*7c478bd9Sstevel@tonic-gate /* 654*7c478bd9Sstevel@tonic-gate * Implement the io lock for pages 655*7c478bd9Sstevel@tonic-gate */ 656*7c478bd9Sstevel@tonic-gate void 657*7c478bd9Sstevel@tonic-gate page_iolock_init(page_t *pp) 658*7c478bd9Sstevel@tonic-gate { 659*7c478bd9Sstevel@tonic-gate pp->p_iolock_state = 0; 660*7c478bd9Sstevel@tonic-gate cv_init(&pp->p_io_cv, NULL, CV_DEFAULT, NULL); 661*7c478bd9Sstevel@tonic-gate } 662*7c478bd9Sstevel@tonic-gate 663*7c478bd9Sstevel@tonic-gate /* 664*7c478bd9Sstevel@tonic-gate * Acquire the i/o lock on a page. 665*7c478bd9Sstevel@tonic-gate */ 666*7c478bd9Sstevel@tonic-gate void 667*7c478bd9Sstevel@tonic-gate page_io_lock(page_t *pp) 668*7c478bd9Sstevel@tonic-gate { 669*7c478bd9Sstevel@tonic-gate kmutex_t *pio; 670*7c478bd9Sstevel@tonic-gate 671*7c478bd9Sstevel@tonic-gate pio = PAGE_IO_MUTEX(pp); 672*7c478bd9Sstevel@tonic-gate mutex_enter(pio); 673*7c478bd9Sstevel@tonic-gate while (pp->p_iolock_state & PAGE_IO_INUSE) { 674*7c478bd9Sstevel@tonic-gate cv_wait(&(pp->p_io_cv), pio); 675*7c478bd9Sstevel@tonic-gate } 676*7c478bd9Sstevel@tonic-gate pp->p_iolock_state |= PAGE_IO_INUSE; 677*7c478bd9Sstevel@tonic-gate mutex_exit(pio); 678*7c478bd9Sstevel@tonic-gate } 679*7c478bd9Sstevel@tonic-gate 680*7c478bd9Sstevel@tonic-gate /* 681*7c478bd9Sstevel@tonic-gate * Release the i/o lock on a page. 682*7c478bd9Sstevel@tonic-gate */ 683*7c478bd9Sstevel@tonic-gate void 684*7c478bd9Sstevel@tonic-gate page_io_unlock(page_t *pp) 685*7c478bd9Sstevel@tonic-gate { 686*7c478bd9Sstevel@tonic-gate kmutex_t *pio; 687*7c478bd9Sstevel@tonic-gate 688*7c478bd9Sstevel@tonic-gate pio = PAGE_IO_MUTEX(pp); 689*7c478bd9Sstevel@tonic-gate mutex_enter(pio); 690*7c478bd9Sstevel@tonic-gate cv_signal(&pp->p_io_cv); 691*7c478bd9Sstevel@tonic-gate pp->p_iolock_state &= ~PAGE_IO_INUSE; 692*7c478bd9Sstevel@tonic-gate mutex_exit(pio); 693*7c478bd9Sstevel@tonic-gate } 694*7c478bd9Sstevel@tonic-gate 695*7c478bd9Sstevel@tonic-gate /* 696*7c478bd9Sstevel@tonic-gate * Try to acquire the i/o lock on a page without blocking. 697*7c478bd9Sstevel@tonic-gate * Returns 1 on success, 0 on failure. 698*7c478bd9Sstevel@tonic-gate */ 699*7c478bd9Sstevel@tonic-gate int 700*7c478bd9Sstevel@tonic-gate page_io_trylock(page_t *pp) 701*7c478bd9Sstevel@tonic-gate { 702*7c478bd9Sstevel@tonic-gate kmutex_t *pio; 703*7c478bd9Sstevel@tonic-gate 704*7c478bd9Sstevel@tonic-gate if (pp->p_iolock_state & PAGE_IO_INUSE) 705*7c478bd9Sstevel@tonic-gate return (0); 706*7c478bd9Sstevel@tonic-gate 707*7c478bd9Sstevel@tonic-gate pio = PAGE_IO_MUTEX(pp); 708*7c478bd9Sstevel@tonic-gate mutex_enter(pio); 709*7c478bd9Sstevel@tonic-gate 710*7c478bd9Sstevel@tonic-gate if (pp->p_iolock_state & PAGE_IO_INUSE) { 711*7c478bd9Sstevel@tonic-gate mutex_exit(pio); 712*7c478bd9Sstevel@tonic-gate return (0); 713*7c478bd9Sstevel@tonic-gate } 714*7c478bd9Sstevel@tonic-gate pp->p_iolock_state |= PAGE_IO_INUSE; 715*7c478bd9Sstevel@tonic-gate mutex_exit(pio); 716*7c478bd9Sstevel@tonic-gate 717*7c478bd9Sstevel@tonic-gate return (1); 718*7c478bd9Sstevel@tonic-gate } 719*7c478bd9Sstevel@tonic-gate 720*7c478bd9Sstevel@tonic-gate /* 721*7c478bd9Sstevel@tonic-gate * Assert that the i/o lock on a page is held. 722*7c478bd9Sstevel@tonic-gate * Returns 1 on success, 0 on failure. 723*7c478bd9Sstevel@tonic-gate */ 724*7c478bd9Sstevel@tonic-gate int 725*7c478bd9Sstevel@tonic-gate page_iolock_assert(page_t *pp) 726*7c478bd9Sstevel@tonic-gate { 727*7c478bd9Sstevel@tonic-gate return (pp->p_iolock_state & PAGE_IO_INUSE); 728*7c478bd9Sstevel@tonic-gate } 729*7c478bd9Sstevel@tonic-gate 730*7c478bd9Sstevel@tonic-gate /* 731*7c478bd9Sstevel@tonic-gate * Wrapper exported to kernel routines that are built 732*7c478bd9Sstevel@tonic-gate * platform-independent (the macro is platform-dependent; 733*7c478bd9Sstevel@tonic-gate * the size of vph_mutex[] is based on NCPU). 734*7c478bd9Sstevel@tonic-gate * 735*7c478bd9Sstevel@tonic-gate * Note that you can do stress testing on this by setting the 736*7c478bd9Sstevel@tonic-gate * variable page_vnode_mutex_stress to something other than 737*7c478bd9Sstevel@tonic-gate * zero in a DEBUG kernel in a debugger after loading the kernel. 738*7c478bd9Sstevel@tonic-gate * Setting it after the kernel is running may not work correctly. 739*7c478bd9Sstevel@tonic-gate */ 740*7c478bd9Sstevel@tonic-gate #ifdef DEBUG 741*7c478bd9Sstevel@tonic-gate static int page_vnode_mutex_stress = 0; 742*7c478bd9Sstevel@tonic-gate #endif 743*7c478bd9Sstevel@tonic-gate 744*7c478bd9Sstevel@tonic-gate kmutex_t * 745*7c478bd9Sstevel@tonic-gate page_vnode_mutex(vnode_t *vp) 746*7c478bd9Sstevel@tonic-gate { 747*7c478bd9Sstevel@tonic-gate if (vp == &kvp) 748*7c478bd9Sstevel@tonic-gate return (&vph_mutex[VPH_TABLE_SIZE + 0]); 749*7c478bd9Sstevel@tonic-gate #ifdef DEBUG 750*7c478bd9Sstevel@tonic-gate if (page_vnode_mutex_stress != 0) 751*7c478bd9Sstevel@tonic-gate return (&vph_mutex[0]); 752*7c478bd9Sstevel@tonic-gate #endif 753*7c478bd9Sstevel@tonic-gate 754*7c478bd9Sstevel@tonic-gate return (&vph_mutex[VP_HASH_FUNC(vp)]); 755*7c478bd9Sstevel@tonic-gate } 756*7c478bd9Sstevel@tonic-gate 757*7c478bd9Sstevel@tonic-gate kmutex_t * 758*7c478bd9Sstevel@tonic-gate page_se_mutex(page_t *pp) 759*7c478bd9Sstevel@tonic-gate { 760*7c478bd9Sstevel@tonic-gate return (PAGE_SE_MUTEX(pp)); 761*7c478bd9Sstevel@tonic-gate } 762*7c478bd9Sstevel@tonic-gate 763*7c478bd9Sstevel@tonic-gate #ifdef VM_STATS 764*7c478bd9Sstevel@tonic-gate uint_t pszclck_stat[4]; 765*7c478bd9Sstevel@tonic-gate #endif 766*7c478bd9Sstevel@tonic-gate /* 767*7c478bd9Sstevel@tonic-gate * Find, take and return a mutex held by hat_page_demote(). 768*7c478bd9Sstevel@tonic-gate * Called by page_demote_vp_pages() before hat_page_demote() call and by 769*7c478bd9Sstevel@tonic-gate * routines that want to block hat_page_demote() but can't do it 770*7c478bd9Sstevel@tonic-gate * via locking all constituent pages. 771*7c478bd9Sstevel@tonic-gate * 772*7c478bd9Sstevel@tonic-gate * Return NULL if p_szc is 0. 773*7c478bd9Sstevel@tonic-gate * 774*7c478bd9Sstevel@tonic-gate * It should only be used for pages that can be demoted by hat_page_demote() 775*7c478bd9Sstevel@tonic-gate * i.e. non swapfs file system pages. The logic here is lifted from 776*7c478bd9Sstevel@tonic-gate * sfmmu_mlspl_enter() except there's no need to worry about p_szc increase 777*7c478bd9Sstevel@tonic-gate * since the page is locked and not free. 778*7c478bd9Sstevel@tonic-gate * 779*7c478bd9Sstevel@tonic-gate * Hash of the root page is used to find the lock. 780*7c478bd9Sstevel@tonic-gate * To find the root in the presense of hat_page_demote() chageing the location 781*7c478bd9Sstevel@tonic-gate * of the root this routine relies on the fact that hat_page_demote() changes 782*7c478bd9Sstevel@tonic-gate * root last. 783*7c478bd9Sstevel@tonic-gate * 784*7c478bd9Sstevel@tonic-gate * If NULL is returned pp's p_szc is guaranteed to be 0. If non NULL is 785*7c478bd9Sstevel@tonic-gate * returned pp's p_szc may be any value. 786*7c478bd9Sstevel@tonic-gate */ 787*7c478bd9Sstevel@tonic-gate kmutex_t * 788*7c478bd9Sstevel@tonic-gate page_szc_lock(page_t *pp) 789*7c478bd9Sstevel@tonic-gate { 790*7c478bd9Sstevel@tonic-gate kmutex_t *mtx; 791*7c478bd9Sstevel@tonic-gate page_t *rootpp; 792*7c478bd9Sstevel@tonic-gate uint_t szc; 793*7c478bd9Sstevel@tonic-gate uint_t rszc; 794*7c478bd9Sstevel@tonic-gate uint_t pszc = pp->p_szc; 795*7c478bd9Sstevel@tonic-gate 796*7c478bd9Sstevel@tonic-gate ASSERT(pp != NULL); 797*7c478bd9Sstevel@tonic-gate ASSERT(PAGE_LOCKED(pp)); 798*7c478bd9Sstevel@tonic-gate ASSERT(!PP_ISFREE(pp)); 799*7c478bd9Sstevel@tonic-gate ASSERT(pp->p_vnode != NULL); 800*7c478bd9Sstevel@tonic-gate ASSERT(!IS_SWAPFSVP(pp->p_vnode)); 801*7c478bd9Sstevel@tonic-gate ASSERT(pp->p_vnode != &kvp); 802*7c478bd9Sstevel@tonic-gate 803*7c478bd9Sstevel@tonic-gate again: 804*7c478bd9Sstevel@tonic-gate if (pszc == 0) { 805*7c478bd9Sstevel@tonic-gate VM_STAT_ADD(pszclck_stat[0]); 806*7c478bd9Sstevel@tonic-gate return (NULL); 807*7c478bd9Sstevel@tonic-gate } 808*7c478bd9Sstevel@tonic-gate 809*7c478bd9Sstevel@tonic-gate /* The lock lives in the root page */ 810*7c478bd9Sstevel@tonic-gate 811*7c478bd9Sstevel@tonic-gate rootpp = PP_GROUPLEADER(pp, pszc); 812*7c478bd9Sstevel@tonic-gate mtx = PAGE_SZC_MUTEX(rootpp); 813*7c478bd9Sstevel@tonic-gate mutex_enter(mtx); 814*7c478bd9Sstevel@tonic-gate 815*7c478bd9Sstevel@tonic-gate /* 816*7c478bd9Sstevel@tonic-gate * since p_szc can only decrease if pp == rootpp 817*7c478bd9Sstevel@tonic-gate * rootpp will be always the same i.e we have the right root 818*7c478bd9Sstevel@tonic-gate * regardless of rootpp->p_szc. 819*7c478bd9Sstevel@tonic-gate * If location of pp's root didn't change after we took 820*7c478bd9Sstevel@tonic-gate * the lock we have the right root. return mutex hashed off it. 821*7c478bd9Sstevel@tonic-gate */ 822*7c478bd9Sstevel@tonic-gate if (pp == rootpp || (rszc = rootpp->p_szc) == pszc) { 823*7c478bd9Sstevel@tonic-gate VM_STAT_ADD(pszclck_stat[1]); 824*7c478bd9Sstevel@tonic-gate return (mtx); 825*7c478bd9Sstevel@tonic-gate } 826*7c478bd9Sstevel@tonic-gate 827*7c478bd9Sstevel@tonic-gate /* 828*7c478bd9Sstevel@tonic-gate * root location changed because page got demoted. 829*7c478bd9Sstevel@tonic-gate * locate the new root. 830*7c478bd9Sstevel@tonic-gate */ 831*7c478bd9Sstevel@tonic-gate if (rszc < pszc) { 832*7c478bd9Sstevel@tonic-gate szc = pp->p_szc; 833*7c478bd9Sstevel@tonic-gate ASSERT(szc < pszc); 834*7c478bd9Sstevel@tonic-gate mutex_exit(mtx); 835*7c478bd9Sstevel@tonic-gate pszc = szc; 836*7c478bd9Sstevel@tonic-gate VM_STAT_ADD(pszclck_stat[2]); 837*7c478bd9Sstevel@tonic-gate goto again; 838*7c478bd9Sstevel@tonic-gate } 839*7c478bd9Sstevel@tonic-gate 840*7c478bd9Sstevel@tonic-gate VM_STAT_ADD(pszclck_stat[3]); 841*7c478bd9Sstevel@tonic-gate /* 842*7c478bd9Sstevel@tonic-gate * current hat_page_demote not done yet. 843*7c478bd9Sstevel@tonic-gate * wait for it to finish. 844*7c478bd9Sstevel@tonic-gate */ 845*7c478bd9Sstevel@tonic-gate mutex_exit(mtx); 846*7c478bd9Sstevel@tonic-gate rootpp = PP_GROUPLEADER(rootpp, rszc); 847*7c478bd9Sstevel@tonic-gate mtx = PAGE_SZC_MUTEX(rootpp); 848*7c478bd9Sstevel@tonic-gate mutex_enter(mtx); 849*7c478bd9Sstevel@tonic-gate mutex_exit(mtx); 850*7c478bd9Sstevel@tonic-gate ASSERT(rootpp->p_szc < rszc); 851*7c478bd9Sstevel@tonic-gate goto again; 852*7c478bd9Sstevel@tonic-gate } 853*7c478bd9Sstevel@tonic-gate 854*7c478bd9Sstevel@tonic-gate int 855*7c478bd9Sstevel@tonic-gate page_szc_lock_assert(page_t *pp) 856*7c478bd9Sstevel@tonic-gate { 857*7c478bd9Sstevel@tonic-gate page_t *rootpp = PP_PAGEROOT(pp); 858*7c478bd9Sstevel@tonic-gate kmutex_t *mtx = PAGE_SZC_MUTEX(rootpp); 859*7c478bd9Sstevel@tonic-gate 860*7c478bd9Sstevel@tonic-gate return (MUTEX_HELD(mtx)); 861*7c478bd9Sstevel@tonic-gate } 862