xref: /illumos-gate/usr/src/uts/common/vm/vm_anon.c (revision 2570281c)
17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate  * CDDL HEADER START
37c478bd9Sstevel@tonic-gate  *
47c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5f5164d87Scwb  * Common Development and Distribution License (the "License").
6f5164d87Scwb  * You may not use this file except in compliance with the License.
77c478bd9Sstevel@tonic-gate  *
87c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
107c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
117c478bd9Sstevel@tonic-gate  * and limitations under the License.
127c478bd9Sstevel@tonic-gate  *
137c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
147c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
167c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
177c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bd9Sstevel@tonic-gate  *
197c478bd9Sstevel@tonic-gate  * CDDL HEADER END
207c478bd9Sstevel@tonic-gate  */
217c478bd9Sstevel@tonic-gate /*
22b942e89bSDavid Valin  * Copyright (c) 1986, 2010, Oracle and/or its affiliates. All rights reserved.
238905f42cSBryan Cantrill  * Copyright (c) 2015, Joyent, Inc. All rights reserved.
247c478bd9Sstevel@tonic-gate  */
257c478bd9Sstevel@tonic-gate 
267c478bd9Sstevel@tonic-gate /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
277e897d1fSToomas Soome /*	  All Rights Reserved	*/
287c478bd9Sstevel@tonic-gate 
297c478bd9Sstevel@tonic-gate /*
307c478bd9Sstevel@tonic-gate  * University Copyright- Copyright (c) 1982, 1986, 1988
317c478bd9Sstevel@tonic-gate  * The Regents of the University of California
327c478bd9Sstevel@tonic-gate  * All Rights Reserved
337c478bd9Sstevel@tonic-gate  *
347c478bd9Sstevel@tonic-gate  * University Acknowledgment- Portions of this document are derived from
357c478bd9Sstevel@tonic-gate  * software developed by the University of California, Berkeley, and its
367c478bd9Sstevel@tonic-gate  * contributors.
377c478bd9Sstevel@tonic-gate  */
387c478bd9Sstevel@tonic-gate 
397c478bd9Sstevel@tonic-gate /*
407c478bd9Sstevel@tonic-gate  * VM - anonymous pages.
417c478bd9Sstevel@tonic-gate  *
427c478bd9Sstevel@tonic-gate  * This layer sits immediately above the vm_swap layer.  It manages
437c478bd9Sstevel@tonic-gate  * physical pages that have no permanent identity in the file system
447c478bd9Sstevel@tonic-gate  * name space, using the services of the vm_swap layer to allocate
457c478bd9Sstevel@tonic-gate  * backing storage for these pages.  Since these pages have no external
467c478bd9Sstevel@tonic-gate  * identity, they are discarded when the last reference is removed.
477c478bd9Sstevel@tonic-gate  *
487c478bd9Sstevel@tonic-gate  * An important function of this layer is to manage low-level sharing
497c478bd9Sstevel@tonic-gate  * of pages that are logically distinct but that happen to be
507c478bd9Sstevel@tonic-gate  * physically identical (e.g., the corresponding pages of the processes
517c478bd9Sstevel@tonic-gate  * resulting from a fork before one process or the other changes their
527c478bd9Sstevel@tonic-gate  * contents).  This pseudo-sharing is present only as an optimization
537c478bd9Sstevel@tonic-gate  * and is not to be confused with true sharing in which multiple
547c478bd9Sstevel@tonic-gate  * address spaces deliberately contain references to the same object;
557c478bd9Sstevel@tonic-gate  * such sharing is managed at a higher level.
567c478bd9Sstevel@tonic-gate  *
577c478bd9Sstevel@tonic-gate  * The key data structure here is the anon struct, which contains a
587c478bd9Sstevel@tonic-gate  * reference count for its associated physical page and a hint about
597c478bd9Sstevel@tonic-gate  * the identity of that page.  Anon structs typically live in arrays,
607c478bd9Sstevel@tonic-gate  * with an instance's position in its array determining where the
617c478bd9Sstevel@tonic-gate  * corresponding backing storage is allocated; however, the swap_xlate()
627c478bd9Sstevel@tonic-gate  * routine abstracts away this representation information so that the
637c478bd9Sstevel@tonic-gate  * rest of the anon layer need not know it.  (See the swap layer for
647c478bd9Sstevel@tonic-gate  * more details on anon struct layout.)
657c478bd9Sstevel@tonic-gate  *
667c478bd9Sstevel@tonic-gate  * In the future versions of the system, the association between an
677c478bd9Sstevel@tonic-gate  * anon struct and its position on backing store will change so that
687c478bd9Sstevel@tonic-gate  * we don't require backing store all anonymous pages in the system.
697c478bd9Sstevel@tonic-gate  * This is important for consideration for large memory systems.
707c478bd9Sstevel@tonic-gate  * We can also use this technique to delay binding physical locations
717c478bd9Sstevel@tonic-gate  * to anonymous pages until pageout/swapout time where we can make
727c478bd9Sstevel@tonic-gate  * smarter allocation decisions to improve anonymous klustering.
737c478bd9Sstevel@tonic-gate  *
747c478bd9Sstevel@tonic-gate  * Many of the routines defined here take a (struct anon **) argument,
757c478bd9Sstevel@tonic-gate  * which allows the code at this level to manage anon pages directly,
767c478bd9Sstevel@tonic-gate  * so that callers can regard anon structs as opaque objects and not be
777c478bd9Sstevel@tonic-gate  * concerned with assigning or inspecting their contents.
787c478bd9Sstevel@tonic-gate  *
797c478bd9Sstevel@tonic-gate  * Clients of this layer refer to anon pages indirectly.  That is, they
807c478bd9Sstevel@tonic-gate  * maintain arrays of pointers to anon structs rather than maintaining
817c478bd9Sstevel@tonic-gate  * anon structs themselves.  The (struct anon **) arguments mentioned
827c478bd9Sstevel@tonic-gate  * above are pointers to entries in these arrays.  It is these arrays
837c478bd9Sstevel@tonic-gate  * that capture the mapping between offsets within a given segment and
847c478bd9Sstevel@tonic-gate  * the corresponding anonymous backing storage address.
857c478bd9Sstevel@tonic-gate  */
867c478bd9Sstevel@tonic-gate 
877c478bd9Sstevel@tonic-gate #ifdef DEBUG
887c478bd9Sstevel@tonic-gate #define	ANON_DEBUG
897c478bd9Sstevel@tonic-gate #endif
907c478bd9Sstevel@tonic-gate 
917c478bd9Sstevel@tonic-gate #include <sys/types.h>
927c478bd9Sstevel@tonic-gate #include <sys/t_lock.h>
937c478bd9Sstevel@tonic-gate #include <sys/param.h>
947c478bd9Sstevel@tonic-gate #include <sys/systm.h>
957c478bd9Sstevel@tonic-gate #include <sys/mman.h>
967c478bd9Sstevel@tonic-gate #include <sys/cred.h>
977c478bd9Sstevel@tonic-gate #include <sys/thread.h>
987c478bd9Sstevel@tonic-gate #include <sys/vnode.h>
997c478bd9Sstevel@tonic-gate #include <sys/cpuvar.h>
1007c478bd9Sstevel@tonic-gate #include <sys/swap.h>
1017c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h>
1027c478bd9Sstevel@tonic-gate #include <sys/vtrace.h>
1037c478bd9Sstevel@tonic-gate #include <sys/kmem.h>
1047c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h>
1057c478bd9Sstevel@tonic-gate #include <sys/bitmap.h>
1067c478bd9Sstevel@tonic-gate #include <sys/vmsystm.h>
107a98e9dbfSaguzovsk #include <sys/tuneable.h>
1087c478bd9Sstevel@tonic-gate #include <sys/debug.h>
109e44bd21cSsusans #include <sys/fs/swapnode.h>
1107c478bd9Sstevel@tonic-gate #include <sys/lgrp.h>
1117c478bd9Sstevel@tonic-gate #include <sys/policy.h>
1127c478bd9Sstevel@tonic-gate #include <sys/condvar_impl.h>
1137c478bd9Sstevel@tonic-gate #include <sys/mutex_impl.h>
1140209230bSgjelinek #include <sys/rctl.h>
1157c478bd9Sstevel@tonic-gate 
1167c478bd9Sstevel@tonic-gate #include <vm/as.h>
1177c478bd9Sstevel@tonic-gate #include <vm/hat.h>
1187c478bd9Sstevel@tonic-gate #include <vm/anon.h>
1197c478bd9Sstevel@tonic-gate #include <vm/page.h>
1207c478bd9Sstevel@tonic-gate #include <vm/vpage.h>
1217c478bd9Sstevel@tonic-gate #include <vm/seg.h>
1227c478bd9Sstevel@tonic-gate #include <vm/rm.h>
1237c478bd9Sstevel@tonic-gate 
1247c478bd9Sstevel@tonic-gate #include <fs/fs_subr.h>
1257c478bd9Sstevel@tonic-gate 
126e44bd21cSsusans struct vnode *anon_vp;
127e44bd21cSsusans 
1287c478bd9Sstevel@tonic-gate int anon_debug;
1297c478bd9Sstevel@tonic-gate 
1307c478bd9Sstevel@tonic-gate kmutex_t	anoninfo_lock;
1317c478bd9Sstevel@tonic-gate struct		k_anoninfo k_anoninfo;
132b52a336eSPavel Tatashin ani_free_t	*ani_free_pool;
1337c478bd9Sstevel@tonic-gate pad_mutex_t	anon_array_lock[ANON_LOCKSIZE];
1347c478bd9Sstevel@tonic-gate kcondvar_t	anon_array_cv[ANON_LOCKSIZE];
1357c478bd9Sstevel@tonic-gate 
1367c478bd9Sstevel@tonic-gate /*
1377c478bd9Sstevel@tonic-gate  * Global hash table for (vp, off) -> anon slot
1387c478bd9Sstevel@tonic-gate  */
1397c478bd9Sstevel@tonic-gate extern	int swap_maxcontig;
1407c478bd9Sstevel@tonic-gate size_t	anon_hash_size;
141cb15d5d9SPeter Rival unsigned int anon_hash_shift;
1427c478bd9Sstevel@tonic-gate struct anon **anon_hash;
1437c478bd9Sstevel@tonic-gate 
1447c478bd9Sstevel@tonic-gate static struct kmem_cache *anon_cache;
1457c478bd9Sstevel@tonic-gate static struct kmem_cache *anonmap_cache;
1467c478bd9Sstevel@tonic-gate 
14723d9e5acSMichael Corcoran pad_mutex_t	*anonhash_lock;
14823d9e5acSMichael Corcoran 
14923d9e5acSMichael Corcoran /*
15023d9e5acSMichael Corcoran  * Used to make the increment of all refcnts of all anon slots of a large
15123d9e5acSMichael Corcoran  * page appear to be atomic.  The lock is grabbed for the first anon slot of
15223d9e5acSMichael Corcoran  * a large page.
15323d9e5acSMichael Corcoran  */
15423d9e5acSMichael Corcoran pad_mutex_t	*anonpages_hash_lock;
15523d9e5acSMichael Corcoran 
15623d9e5acSMichael Corcoran #define	APH_MUTEX(vp, off)				\
15723d9e5acSMichael Corcoran 	(&anonpages_hash_lock[(ANON_HASH((vp), (off)) &	\
15823d9e5acSMichael Corcoran 	    (AH_LOCK_SIZE - 1))].pad_mutex)
15923d9e5acSMichael Corcoran 
1607c478bd9Sstevel@tonic-gate #ifdef VM_STATS
1617c478bd9Sstevel@tonic-gate static struct anonvmstats_str {
1627c478bd9Sstevel@tonic-gate 	ulong_t getpages[30];
1637c478bd9Sstevel@tonic-gate 	ulong_t privatepages[10];
1647c478bd9Sstevel@tonic-gate 	ulong_t demotepages[9];
1657c478bd9Sstevel@tonic-gate 	ulong_t decrefpages[9];
1667c478bd9Sstevel@tonic-gate 	ulong_t	dupfillholes[4];
1677c478bd9Sstevel@tonic-gate 	ulong_t freepages[1];
1687c478bd9Sstevel@tonic-gate } anonvmstats;
1697c478bd9Sstevel@tonic-gate #endif /* VM_STATS */
1707c478bd9Sstevel@tonic-gate 
1717c478bd9Sstevel@tonic-gate /*ARGSUSED*/
1727c478bd9Sstevel@tonic-gate static int
anonmap_cache_constructor(void * buf,void * cdrarg,int kmflags)1737c478bd9Sstevel@tonic-gate anonmap_cache_constructor(void *buf, void *cdrarg, int kmflags)
1747c478bd9Sstevel@tonic-gate {
1757c478bd9Sstevel@tonic-gate 	struct anon_map *amp = buf;
1767c478bd9Sstevel@tonic-gate 
1777c478bd9Sstevel@tonic-gate 	rw_init(&amp->a_rwlock, NULL, RW_DEFAULT, NULL);
178a98e9dbfSaguzovsk 	cv_init(&amp->a_purgecv, NULL, CV_DEFAULT, NULL);
179a98e9dbfSaguzovsk 	mutex_init(&amp->a_pmtx, NULL, MUTEX_DEFAULT, NULL);
180a98e9dbfSaguzovsk 	mutex_init(&amp->a_purgemtx, NULL, MUTEX_DEFAULT, NULL);
1817c478bd9Sstevel@tonic-gate 	return (0);
1827c478bd9Sstevel@tonic-gate }
1837c478bd9Sstevel@tonic-gate 
1847c478bd9Sstevel@tonic-gate /*ARGSUSED1*/
1857c478bd9Sstevel@tonic-gate static void
anonmap_cache_destructor(void * buf,void * cdrarg)1867c478bd9Sstevel@tonic-gate anonmap_cache_destructor(void *buf, void *cdrarg)
1877c478bd9Sstevel@tonic-gate {
1887c478bd9Sstevel@tonic-gate 	struct anon_map *amp = buf;
1897c478bd9Sstevel@tonic-gate 
1907c478bd9Sstevel@tonic-gate 	rw_destroy(&amp->a_rwlock);
191a98e9dbfSaguzovsk 	cv_destroy(&amp->a_purgecv);
192a98e9dbfSaguzovsk 	mutex_destroy(&amp->a_pmtx);
193a98e9dbfSaguzovsk 	mutex_destroy(&amp->a_purgemtx);
1947c478bd9Sstevel@tonic-gate }
1957c478bd9Sstevel@tonic-gate 
1967c478bd9Sstevel@tonic-gate void
anon_init(void)1977c478bd9Sstevel@tonic-gate anon_init(void)
1987c478bd9Sstevel@tonic-gate {
1997c478bd9Sstevel@tonic-gate 	int i;
20023d9e5acSMichael Corcoran 	pad_mutex_t *tmp;
2017c478bd9Sstevel@tonic-gate 
20223d9e5acSMichael Corcoran 	/* These both need to be powers of 2 so round up to the next power */
203cb15d5d9SPeter Rival 	anon_hash_shift = highbit((physmem / ANON_HASHAVELEN) - 1);
204cb15d5d9SPeter Rival 	anon_hash_size = 1L << anon_hash_shift;
20523d9e5acSMichael Corcoran 
20623d9e5acSMichael Corcoran 	/*
20723d9e5acSMichael Corcoran 	 * We need to align the anonhash_lock and anonpages_hash_lock arrays
20823d9e5acSMichael Corcoran 	 * to a 64B boundary to avoid false sharing.  We add 63B to our
20923d9e5acSMichael Corcoran 	 * allocation so that we can get a 64B aligned address to use.
21023d9e5acSMichael Corcoran 	 * We allocate both of these together to avoid wasting an additional
21123d9e5acSMichael Corcoran 	 * 63B.
21223d9e5acSMichael Corcoran 	 */
21323d9e5acSMichael Corcoran 	tmp = kmem_zalloc((2 * AH_LOCK_SIZE * sizeof (pad_mutex_t)) + 63,
21423d9e5acSMichael Corcoran 	    KM_SLEEP);
21523d9e5acSMichael Corcoran 	anonhash_lock = (pad_mutex_t *)P2ROUNDUP((uintptr_t)tmp, 64);
21623d9e5acSMichael Corcoran 	anonpages_hash_lock = anonhash_lock + AH_LOCK_SIZE;
2177c478bd9Sstevel@tonic-gate 
2187c478bd9Sstevel@tonic-gate 	for (i = 0; i < AH_LOCK_SIZE; i++) {
21923d9e5acSMichael Corcoran 		mutex_init(&anonhash_lock[i].pad_mutex, NULL, MUTEX_DEFAULT,
22023d9e5acSMichael Corcoran 		    NULL);
22123d9e5acSMichael Corcoran 		mutex_init(&anonpages_hash_lock[i].pad_mutex, NULL,
22223d9e5acSMichael Corcoran 		    MUTEX_DEFAULT, NULL);
2237c478bd9Sstevel@tonic-gate 	}
2247c478bd9Sstevel@tonic-gate 
2257c478bd9Sstevel@tonic-gate 	for (i = 0; i < ANON_LOCKSIZE; i++) {
2267c478bd9Sstevel@tonic-gate 		mutex_init(&anon_array_lock[i].pad_mutex, NULL,
22778b03d3aSkchow 		    MUTEX_DEFAULT, NULL);
2287c478bd9Sstevel@tonic-gate 		cv_init(&anon_array_cv[i], NULL, CV_DEFAULT, NULL);
2297c478bd9Sstevel@tonic-gate 	}
2307c478bd9Sstevel@tonic-gate 
2317c478bd9Sstevel@tonic-gate 	anon_hash = (struct anon **)
23278b03d3aSkchow 	    kmem_zalloc(sizeof (struct anon *) * anon_hash_size, KM_SLEEP);
2337c478bd9Sstevel@tonic-gate 	anon_cache = kmem_cache_create("anon_cache", sizeof (struct anon),
234b942e89bSDavid Valin 	    AN_CACHE_ALIGN, NULL, NULL, NULL, NULL, NULL, KMC_PREFILL);
2357c478bd9Sstevel@tonic-gate 	anonmap_cache = kmem_cache_create("anonmap_cache",
23678b03d3aSkchow 	    sizeof (struct anon_map), 0,
23778b03d3aSkchow 	    anonmap_cache_constructor, anonmap_cache_destructor, NULL,
23878b03d3aSkchow 	    NULL, NULL, 0);
2397c478bd9Sstevel@tonic-gate 	swap_maxcontig = (1024 * 1024) >> PAGESHIFT;	/* 1MB of pages */
240e44bd21cSsusans 
241b52a336eSPavel Tatashin 	tmp = kmem_zalloc((ANI_MAX_POOL * sizeof (ani_free_t)) + 63, KM_SLEEP);
242b52a336eSPavel Tatashin 	/* Round ani_free_pool to cacheline boundary to avoid false sharing. */
243b52a336eSPavel Tatashin 	ani_free_pool = (ani_free_t *)P2ROUNDUP((uintptr_t)tmp, 64);
244b52a336eSPavel Tatashin 
245e44bd21cSsusans 	anon_vp = vn_alloc(KM_SLEEP);
246e44bd21cSsusans 	vn_setops(anon_vp, swap_vnodeops);
247e44bd21cSsusans 	anon_vp->v_type = VREG;
248e44bd21cSsusans 	anon_vp->v_flag |= (VISSWAP|VISSWAPFS);
2497c478bd9Sstevel@tonic-gate }
2507c478bd9Sstevel@tonic-gate 
2517c478bd9Sstevel@tonic-gate /*
2527c478bd9Sstevel@tonic-gate  * Global anon slot hash table manipulation.
2537c478bd9Sstevel@tonic-gate  */
2547c478bd9Sstevel@tonic-gate 
2557c478bd9Sstevel@tonic-gate static void
anon_addhash(struct anon * ap)2567c478bd9Sstevel@tonic-gate anon_addhash(struct anon *ap)
2577c478bd9Sstevel@tonic-gate {
2587c478bd9Sstevel@tonic-gate 	int index;
2597c478bd9Sstevel@tonic-gate 
26023d9e5acSMichael Corcoran 	ASSERT(MUTEX_HELD(AH_MUTEX(ap->an_vp, ap->an_off)));
2617c478bd9Sstevel@tonic-gate 	index = ANON_HASH(ap->an_vp, ap->an_off);
2627c478bd9Sstevel@tonic-gate 	ap->an_hash = anon_hash[index];
2637c478bd9Sstevel@tonic-gate 	anon_hash[index] = ap;
2647c478bd9Sstevel@tonic-gate }
2657c478bd9Sstevel@tonic-gate 
2667c478bd9Sstevel@tonic-gate static void
anon_rmhash(struct anon * ap)2677c478bd9Sstevel@tonic-gate anon_rmhash(struct anon *ap)
2687c478bd9Sstevel@tonic-gate {
2697c478bd9Sstevel@tonic-gate 	struct anon **app;
2707c478bd9Sstevel@tonic-gate 
27123d9e5acSMichael Corcoran 	ASSERT(MUTEX_HELD(AH_MUTEX(ap->an_vp, ap->an_off)));
2727c478bd9Sstevel@tonic-gate 
2737c478bd9Sstevel@tonic-gate 	for (app = &anon_hash[ANON_HASH(ap->an_vp, ap->an_off)];
2747c478bd9Sstevel@tonic-gate 	    *app; app = &((*app)->an_hash)) {
2757c478bd9Sstevel@tonic-gate 		if (*app == ap) {
2767c478bd9Sstevel@tonic-gate 			*app = ap->an_hash;
2777c478bd9Sstevel@tonic-gate 			break;
2787c478bd9Sstevel@tonic-gate 		}
2797c478bd9Sstevel@tonic-gate 	}
2807c478bd9Sstevel@tonic-gate }
2817c478bd9Sstevel@tonic-gate 
2827c478bd9Sstevel@tonic-gate /*
2837c478bd9Sstevel@tonic-gate  * The anon array interfaces. Functions allocating,
2847c478bd9Sstevel@tonic-gate  * freeing array of pointers, and returning/setting
2857c478bd9Sstevel@tonic-gate  * entries in the array of pointers for a given offset.
2867c478bd9Sstevel@tonic-gate  *
2877c478bd9Sstevel@tonic-gate  * Create the list of pointers
2887c478bd9Sstevel@tonic-gate  */
2897c478bd9Sstevel@tonic-gate struct anon_hdr *
anon_create(pgcnt_t npages,int flags)2907c478bd9Sstevel@tonic-gate anon_create(pgcnt_t npages, int flags)
2917c478bd9Sstevel@tonic-gate {
2927c478bd9Sstevel@tonic-gate 	struct anon_hdr *ahp;
2937c478bd9Sstevel@tonic-gate 	ulong_t nchunks;
2947c478bd9Sstevel@tonic-gate 	int kmemflags = (flags & ANON_NOSLEEP) ? KM_NOSLEEP : KM_SLEEP;
2957c478bd9Sstevel@tonic-gate 
2967c478bd9Sstevel@tonic-gate 	if ((ahp = kmem_zalloc(sizeof (struct anon_hdr), kmemflags)) == NULL) {
2977c478bd9Sstevel@tonic-gate 		return (NULL);
2987c478bd9Sstevel@tonic-gate 	}
2997c478bd9Sstevel@tonic-gate 
3007c478bd9Sstevel@tonic-gate 	mutex_init(&ahp->serial_lock, NULL, MUTEX_DEFAULT, NULL);
3017c478bd9Sstevel@tonic-gate 	/*
3027c478bd9Sstevel@tonic-gate 	 * Single level case.
3037c478bd9Sstevel@tonic-gate 	 */
3047c478bd9Sstevel@tonic-gate 	ahp->size = npages;
3057c478bd9Sstevel@tonic-gate 	if (npages <= ANON_CHUNK_SIZE || (flags & ANON_ALLOC_FORCE)) {
3067c478bd9Sstevel@tonic-gate 
3077c478bd9Sstevel@tonic-gate 		if (flags & ANON_ALLOC_FORCE)
3087c478bd9Sstevel@tonic-gate 			ahp->flags |= ANON_ALLOC_FORCE;
3097c478bd9Sstevel@tonic-gate 
3107c478bd9Sstevel@tonic-gate 		ahp->array_chunk = kmem_zalloc(
3117c478bd9Sstevel@tonic-gate 		    ahp->size * sizeof (struct anon *), kmemflags);
3127c478bd9Sstevel@tonic-gate 
3137c478bd9Sstevel@tonic-gate 		if (ahp->array_chunk == NULL) {
3147c478bd9Sstevel@tonic-gate 			kmem_free(ahp, sizeof (struct anon_hdr));
3157c478bd9Sstevel@tonic-gate 			return (NULL);
3167c478bd9Sstevel@tonic-gate 		}
3177c478bd9Sstevel@tonic-gate 	} else {
3187c478bd9Sstevel@tonic-gate 		/*
3197c478bd9Sstevel@tonic-gate 		 * 2 Level case.
320e77f3106Sudpa 		 * anon hdr size needs to be rounded off  to be a multiple
321e77f3106Sudpa 		 * of ANON_CHUNK_SIZE. This is important as various anon
322e77f3106Sudpa 		 * related functions depend on this.
323e77f3106Sudpa 		 * NOTE -
324e77f3106Sudpa 		 * anon_grow()  makes anon hdr size a multiple of
325e77f3106Sudpa 		 * ANON_CHUNK_SIZE.
326e77f3106Sudpa 		 * amp size is <= anon hdr size.
327e77f3106Sudpa 		 * anon_index + seg_pgs <= anon hdr size.
3287c478bd9Sstevel@tonic-gate 		 */
329e77f3106Sudpa 		ahp->size = P2ROUNDUP(npages, ANON_CHUNK_SIZE);
330e77f3106Sudpa 		nchunks = ahp->size >> ANON_CHUNK_SHIFT;
3317c478bd9Sstevel@tonic-gate 
3327c478bd9Sstevel@tonic-gate 		ahp->array_chunk = kmem_zalloc(nchunks * sizeof (ulong_t *),
3337c478bd9Sstevel@tonic-gate 		    kmemflags);
3347c478bd9Sstevel@tonic-gate 
3357c478bd9Sstevel@tonic-gate 		if (ahp->array_chunk == NULL) {
3367c478bd9Sstevel@tonic-gate 			kmem_free(ahp, sizeof (struct anon_hdr));
3377c478bd9Sstevel@tonic-gate 			return (NULL);
3387c478bd9Sstevel@tonic-gate 		}
3397c478bd9Sstevel@tonic-gate 	}
3407c478bd9Sstevel@tonic-gate 	return (ahp);
3417c478bd9Sstevel@tonic-gate }
3427c478bd9Sstevel@tonic-gate 
3437c478bd9Sstevel@tonic-gate /*
3447c478bd9Sstevel@tonic-gate  * Free the array of pointers
3457c478bd9Sstevel@tonic-gate  */
3467c478bd9Sstevel@tonic-gate void
anon_release(struct anon_hdr * ahp,pgcnt_t npages)3477c478bd9Sstevel@tonic-gate anon_release(struct anon_hdr *ahp, pgcnt_t npages)
3487c478bd9Sstevel@tonic-gate {
3497c478bd9Sstevel@tonic-gate 	ulong_t i;
3507c478bd9Sstevel@tonic-gate 	void **ppp;
3517c478bd9Sstevel@tonic-gate 	ulong_t nchunks;
3527c478bd9Sstevel@tonic-gate 
353e77f3106Sudpa 	ASSERT(npages <= ahp->size);
3547c478bd9Sstevel@tonic-gate 
3557c478bd9Sstevel@tonic-gate 	/*
3567c478bd9Sstevel@tonic-gate 	 * Single level case.
3577c478bd9Sstevel@tonic-gate 	 */
3587c478bd9Sstevel@tonic-gate 	if (npages <= ANON_CHUNK_SIZE || (ahp->flags & ANON_ALLOC_FORCE)) {
3597c478bd9Sstevel@tonic-gate 		kmem_free(ahp->array_chunk, ahp->size * sizeof (struct anon *));
3607c478bd9Sstevel@tonic-gate 	} else {
3617c478bd9Sstevel@tonic-gate 		/*
3627c478bd9Sstevel@tonic-gate 		 * 2 level case.
3637c478bd9Sstevel@tonic-gate 		 */
364e77f3106Sudpa 		nchunks = ahp->size >> ANON_CHUNK_SHIFT;
3657c478bd9Sstevel@tonic-gate 		for (i = 0; i < nchunks; i++) {
3667c478bd9Sstevel@tonic-gate 			ppp = &ahp->array_chunk[i];
3677c478bd9Sstevel@tonic-gate 			if (*ppp != NULL)
3687c478bd9Sstevel@tonic-gate 				kmem_free(*ppp, PAGESIZE);
3697c478bd9Sstevel@tonic-gate 		}
3707c478bd9Sstevel@tonic-gate 		kmem_free(ahp->array_chunk, nchunks * sizeof (ulong_t *));
3717c478bd9Sstevel@tonic-gate 	}
3727c478bd9Sstevel@tonic-gate 	mutex_destroy(&ahp->serial_lock);
3737c478bd9Sstevel@tonic-gate 	kmem_free(ahp, sizeof (struct anon_hdr));
3747c478bd9Sstevel@tonic-gate }
3757c478bd9Sstevel@tonic-gate 
3767c478bd9Sstevel@tonic-gate /*
3777c478bd9Sstevel@tonic-gate  * Return the pointer from the list for a
3787c478bd9Sstevel@tonic-gate  * specified anon index.
3797c478bd9Sstevel@tonic-gate  */
3807c478bd9Sstevel@tonic-gate struct anon *
anon_get_ptr(struct anon_hdr * ahp,ulong_t an_idx)3817c478bd9Sstevel@tonic-gate anon_get_ptr(struct anon_hdr *ahp, ulong_t an_idx)
3827c478bd9Sstevel@tonic-gate {
3837c478bd9Sstevel@tonic-gate 	struct anon **app;
3847c478bd9Sstevel@tonic-gate 
3857c478bd9Sstevel@tonic-gate 	ASSERT(an_idx < ahp->size);
3867c478bd9Sstevel@tonic-gate 
3877c478bd9Sstevel@tonic-gate 	/*
3887c478bd9Sstevel@tonic-gate 	 * Single level case.
3897c478bd9Sstevel@tonic-gate 	 */
3907c478bd9Sstevel@tonic-gate 	if ((ahp->size <= ANON_CHUNK_SIZE) || (ahp->flags & ANON_ALLOC_FORCE)) {
3917c478bd9Sstevel@tonic-gate 		return ((struct anon *)
39278b03d3aSkchow 		    ((uintptr_t)ahp->array_chunk[an_idx] & ANON_PTRMASK));
3937c478bd9Sstevel@tonic-gate 	} else {
3947c478bd9Sstevel@tonic-gate 
3957c478bd9Sstevel@tonic-gate 		/*
3967c478bd9Sstevel@tonic-gate 		 * 2 level case.
3977c478bd9Sstevel@tonic-gate 		 */
3987c478bd9Sstevel@tonic-gate 		app = ahp->array_chunk[an_idx >> ANON_CHUNK_SHIFT];
3997c478bd9Sstevel@tonic-gate 		if (app) {
4007c478bd9Sstevel@tonic-gate 			return ((struct anon *)
40178b03d3aSkchow 			    ((uintptr_t)app[an_idx & ANON_CHUNK_OFF] &
40278b03d3aSkchow 			    ANON_PTRMASK));
4037c478bd9Sstevel@tonic-gate 		} else {
4047c478bd9Sstevel@tonic-gate 			return (NULL);
4057c478bd9Sstevel@tonic-gate 		}
4067c478bd9Sstevel@tonic-gate 	}
4077c478bd9Sstevel@tonic-gate }
4087c478bd9Sstevel@tonic-gate 
4097c478bd9Sstevel@tonic-gate /*
4107c478bd9Sstevel@tonic-gate  * Return the anon pointer for the first valid entry in the anon list,
4117c478bd9Sstevel@tonic-gate  * starting from the given index.
4127c478bd9Sstevel@tonic-gate  */
4137c478bd9Sstevel@tonic-gate struct anon *
anon_get_next_ptr(struct anon_hdr * ahp,ulong_t * index)4147c478bd9Sstevel@tonic-gate anon_get_next_ptr(struct anon_hdr *ahp, ulong_t *index)
4157c478bd9Sstevel@tonic-gate {
4167c478bd9Sstevel@tonic-gate 	struct anon *ap;
4177c478bd9Sstevel@tonic-gate 	struct anon **app;
4187c478bd9Sstevel@tonic-gate 	ulong_t chunkoff;
4197c478bd9Sstevel@tonic-gate 	ulong_t i;
4207c478bd9Sstevel@tonic-gate 	ulong_t j;
4217c478bd9Sstevel@tonic-gate 	pgcnt_t size;
4227c478bd9Sstevel@tonic-gate 
4237c478bd9Sstevel@tonic-gate 	i = *index;
4247c478bd9Sstevel@tonic-gate 	size = ahp->size;
4257c478bd9Sstevel@tonic-gate 
4267c478bd9Sstevel@tonic-gate 	ASSERT(i < size);
4277c478bd9Sstevel@tonic-gate 
4287c478bd9Sstevel@tonic-gate 	if ((size <= ANON_CHUNK_SIZE) || (ahp->flags & ANON_ALLOC_FORCE)) {
4297c478bd9Sstevel@tonic-gate 		/*
4307c478bd9Sstevel@tonic-gate 		 * 1 level case
4317c478bd9Sstevel@tonic-gate 		 */
4327c478bd9Sstevel@tonic-gate 		while (i < size) {
4337c478bd9Sstevel@tonic-gate 			ap = (struct anon *)
43478b03d3aSkchow 			    ((uintptr_t)ahp->array_chunk[i] & ANON_PTRMASK);
4357c478bd9Sstevel@tonic-gate 			if (ap) {
4367c478bd9Sstevel@tonic-gate 				*index = i;
4377c478bd9Sstevel@tonic-gate 				return (ap);
4387c478bd9Sstevel@tonic-gate 			}
4397c478bd9Sstevel@tonic-gate 			i++;
4407c478bd9Sstevel@tonic-gate 		}
4417c478bd9Sstevel@tonic-gate 	} else {
4427c478bd9Sstevel@tonic-gate 		/*
4437c478bd9Sstevel@tonic-gate 		 * 2 level case
4447c478bd9Sstevel@tonic-gate 		 */
4457c478bd9Sstevel@tonic-gate 		chunkoff = i & ANON_CHUNK_OFF;
4467c478bd9Sstevel@tonic-gate 		while (i < size) {
4477c478bd9Sstevel@tonic-gate 			app = ahp->array_chunk[i >> ANON_CHUNK_SHIFT];
4487c478bd9Sstevel@tonic-gate 			if (app)
4497c478bd9Sstevel@tonic-gate 				for (j = chunkoff; j < ANON_CHUNK_SIZE; j++) {
4507c478bd9Sstevel@tonic-gate 					ap = (struct anon *)
45178b03d3aSkchow 					    ((uintptr_t)app[j] & ANON_PTRMASK);
4527c478bd9Sstevel@tonic-gate 					if (ap) {
4537c478bd9Sstevel@tonic-gate 						*index = i + (j - chunkoff);
4547c478bd9Sstevel@tonic-gate 						return (ap);
4557c478bd9Sstevel@tonic-gate 					}
4567c478bd9Sstevel@tonic-gate 				}
4577c478bd9Sstevel@tonic-gate 			chunkoff = 0;
4587c478bd9Sstevel@tonic-gate 			i = (i + ANON_CHUNK_SIZE) & ~ANON_CHUNK_OFF;
4597c478bd9Sstevel@tonic-gate 		}
4607c478bd9Sstevel@tonic-gate 	}
4617c478bd9Sstevel@tonic-gate 	*index = size;
4627c478bd9Sstevel@tonic-gate 	return (NULL);
4637c478bd9Sstevel@tonic-gate }
4647c478bd9Sstevel@tonic-gate 
4657c478bd9Sstevel@tonic-gate /*
4667c478bd9Sstevel@tonic-gate  * Set list entry with a given pointer for a specified offset
4677c478bd9Sstevel@tonic-gate  */
4687c478bd9Sstevel@tonic-gate int
anon_set_ptr(struct anon_hdr * ahp,ulong_t an_idx,struct anon * ap,int flags)4697c478bd9Sstevel@tonic-gate anon_set_ptr(struct anon_hdr *ahp, ulong_t an_idx, struct anon *ap, int flags)
4707c478bd9Sstevel@tonic-gate {
4717c478bd9Sstevel@tonic-gate 	void		**ppp;
4727c478bd9Sstevel@tonic-gate 	struct anon	**app;
4737c478bd9Sstevel@tonic-gate 	int kmemflags = (flags & ANON_NOSLEEP) ? KM_NOSLEEP : KM_SLEEP;
4747c478bd9Sstevel@tonic-gate 	uintptr_t	*ap_addr;
4757c478bd9Sstevel@tonic-gate 
4767c478bd9Sstevel@tonic-gate 	ASSERT(an_idx < ahp->size);
4777c478bd9Sstevel@tonic-gate 
4787c478bd9Sstevel@tonic-gate 	/*
4797c478bd9Sstevel@tonic-gate 	 * Single level case.
4807c478bd9Sstevel@tonic-gate 	 */
4817c478bd9Sstevel@tonic-gate 	if (ahp->size <= ANON_CHUNK_SIZE || (ahp->flags & ANON_ALLOC_FORCE)) {
4827c478bd9Sstevel@tonic-gate 		ap_addr = (uintptr_t *)&ahp->array_chunk[an_idx];
4837c478bd9Sstevel@tonic-gate 	} else {
4847c478bd9Sstevel@tonic-gate 
4857c478bd9Sstevel@tonic-gate 		/*
4867c478bd9Sstevel@tonic-gate 		 * 2 level case.
4877c478bd9Sstevel@tonic-gate 		 */
4887c478bd9Sstevel@tonic-gate 		ppp = &ahp->array_chunk[an_idx >> ANON_CHUNK_SHIFT];
4897c478bd9Sstevel@tonic-gate 
4907c478bd9Sstevel@tonic-gate 		ASSERT(ppp != NULL);
4917c478bd9Sstevel@tonic-gate 		if (*ppp == NULL) {
4927c478bd9Sstevel@tonic-gate 			mutex_enter(&ahp->serial_lock);
4937c478bd9Sstevel@tonic-gate 			ppp = &ahp->array_chunk[an_idx >> ANON_CHUNK_SHIFT];
4947c478bd9Sstevel@tonic-gate 			if (*ppp == NULL) {
4957c478bd9Sstevel@tonic-gate 				*ppp = kmem_zalloc(PAGESIZE, kmemflags);
4967c478bd9Sstevel@tonic-gate 				if (*ppp == NULL) {
4977c478bd9Sstevel@tonic-gate 					mutex_exit(&ahp->serial_lock);
4987c478bd9Sstevel@tonic-gate 					return (ENOMEM);
4997c478bd9Sstevel@tonic-gate 				}
5007c478bd9Sstevel@tonic-gate 			}
5017c478bd9Sstevel@tonic-gate 			mutex_exit(&ahp->serial_lock);
5027c478bd9Sstevel@tonic-gate 		}
5037c478bd9Sstevel@tonic-gate 		app = *ppp;
5047c478bd9Sstevel@tonic-gate 		ap_addr = (uintptr_t *)&app[an_idx & ANON_CHUNK_OFF];
5057c478bd9Sstevel@tonic-gate 	}
5067c478bd9Sstevel@tonic-gate 	*ap_addr = (*ap_addr & ~ANON_PTRMASK) | (uintptr_t)ap;
5077c478bd9Sstevel@tonic-gate 	return (0);
5087c478bd9Sstevel@tonic-gate }
5097c478bd9Sstevel@tonic-gate 
5107c478bd9Sstevel@tonic-gate /*
5117c478bd9Sstevel@tonic-gate  * Copy anon array into a given new anon array
5127c478bd9Sstevel@tonic-gate  */
5137c478bd9Sstevel@tonic-gate int
anon_copy_ptr(struct anon_hdr * sahp,ulong_t s_idx,struct anon_hdr * dahp,ulong_t d_idx,pgcnt_t npages,int flags)5147e897d1fSToomas Soome anon_copy_ptr(struct anon_hdr *sahp, ulong_t s_idx, struct anon_hdr *dahp,
5157e897d1fSToomas Soome     ulong_t d_idx, pgcnt_t npages, int flags)
5167c478bd9Sstevel@tonic-gate {
5177c478bd9Sstevel@tonic-gate 	void **sapp, **dapp;
5187c478bd9Sstevel@tonic-gate 	void *ap;
5197c478bd9Sstevel@tonic-gate 	int kmemflags = (flags & ANON_NOSLEEP) ? KM_NOSLEEP : KM_SLEEP;
5207c478bd9Sstevel@tonic-gate 
5217c478bd9Sstevel@tonic-gate 	ASSERT((s_idx < sahp->size) && (d_idx < dahp->size));
5227c478bd9Sstevel@tonic-gate 	ASSERT((npages <= sahp->size) && (npages <= dahp->size));
5237c478bd9Sstevel@tonic-gate 
5247c478bd9Sstevel@tonic-gate 	/*
5257c478bd9Sstevel@tonic-gate 	 * Both arrays are 1 level.
5267c478bd9Sstevel@tonic-gate 	 */
5277c478bd9Sstevel@tonic-gate 	if (((sahp->size <= ANON_CHUNK_SIZE) &&
5287c478bd9Sstevel@tonic-gate 	    (dahp->size <= ANON_CHUNK_SIZE)) ||
5297c478bd9Sstevel@tonic-gate 	    ((sahp->flags & ANON_ALLOC_FORCE) &&
5307c478bd9Sstevel@tonic-gate 	    (dahp->flags & ANON_ALLOC_FORCE))) {
5317c478bd9Sstevel@tonic-gate 
5327c478bd9Sstevel@tonic-gate 		bcopy(&sahp->array_chunk[s_idx], &dahp->array_chunk[d_idx],
5337c478bd9Sstevel@tonic-gate 		    npages * sizeof (struct anon *));
5347c478bd9Sstevel@tonic-gate 		return (0);
5357c478bd9Sstevel@tonic-gate 	}
5367c478bd9Sstevel@tonic-gate 
5377c478bd9Sstevel@tonic-gate 	/*
5387c478bd9Sstevel@tonic-gate 	 * Both arrays are 2 levels.
5397c478bd9Sstevel@tonic-gate 	 */
5407c478bd9Sstevel@tonic-gate 	if (sahp->size > ANON_CHUNK_SIZE &&
5417c478bd9Sstevel@tonic-gate 	    dahp->size > ANON_CHUNK_SIZE &&
5427c478bd9Sstevel@tonic-gate 	    ((sahp->flags & ANON_ALLOC_FORCE) == 0) &&
5437c478bd9Sstevel@tonic-gate 	    ((dahp->flags & ANON_ALLOC_FORCE) == 0)) {
5447c478bd9Sstevel@tonic-gate 
5457c478bd9Sstevel@tonic-gate 		ulong_t sapidx, dapidx;
5467c478bd9Sstevel@tonic-gate 		ulong_t *sap, *dap;
5477c478bd9Sstevel@tonic-gate 		ulong_t chknp;
5487c478bd9Sstevel@tonic-gate 
5497c478bd9Sstevel@tonic-gate 		while (npages != 0) {
5507c478bd9Sstevel@tonic-gate 
5517c478bd9Sstevel@tonic-gate 			sapidx = s_idx & ANON_CHUNK_OFF;
5527c478bd9Sstevel@tonic-gate 			dapidx = d_idx & ANON_CHUNK_OFF;
5537c478bd9Sstevel@tonic-gate 			chknp = ANON_CHUNK_SIZE - MAX(sapidx, dapidx);
5547c478bd9Sstevel@tonic-gate 			if (chknp > npages)
5557c478bd9Sstevel@tonic-gate 				chknp = npages;
5567c478bd9Sstevel@tonic-gate 
5577c478bd9Sstevel@tonic-gate 			sapp = &sahp->array_chunk[s_idx >> ANON_CHUNK_SHIFT];
5587c478bd9Sstevel@tonic-gate 			if ((sap = *sapp) != NULL) {
5597c478bd9Sstevel@tonic-gate 				dapp = &dahp->array_chunk[d_idx
56078b03d3aSkchow 				    >> ANON_CHUNK_SHIFT];
5617c478bd9Sstevel@tonic-gate 				if ((dap = *dapp) == NULL) {
5627c478bd9Sstevel@tonic-gate 					*dapp = kmem_zalloc(PAGESIZE,
5637c478bd9Sstevel@tonic-gate 					    kmemflags);
5647c478bd9Sstevel@tonic-gate 					if ((dap = *dapp) == NULL)
5657c478bd9Sstevel@tonic-gate 						return (ENOMEM);
5667c478bd9Sstevel@tonic-gate 				}
5677c478bd9Sstevel@tonic-gate 				bcopy((sap + sapidx), (dap + dapidx),
5687c478bd9Sstevel@tonic-gate 				    chknp << ANON_PTRSHIFT);
5697c478bd9Sstevel@tonic-gate 			}
5707c478bd9Sstevel@tonic-gate 			s_idx += chknp;
5717c478bd9Sstevel@tonic-gate 			d_idx += chknp;
5727c478bd9Sstevel@tonic-gate 			npages -= chknp;
5737c478bd9Sstevel@tonic-gate 		}
5747c478bd9Sstevel@tonic-gate 		return (0);
5757c478bd9Sstevel@tonic-gate 	}
5767c478bd9Sstevel@tonic-gate 
5777c478bd9Sstevel@tonic-gate 	/*
5787c478bd9Sstevel@tonic-gate 	 * At least one of the arrays is 2 level.
5797c478bd9Sstevel@tonic-gate 	 */
5807c478bd9Sstevel@tonic-gate 	while (npages--) {
5817c478bd9Sstevel@tonic-gate 		if ((ap = anon_get_ptr(sahp, s_idx)) != NULL) {
5827c478bd9Sstevel@tonic-gate 			ASSERT(!ANON_ISBUSY(anon_get_slot(sahp, s_idx)));
5837c478bd9Sstevel@tonic-gate 			if (anon_set_ptr(dahp, d_idx, ap, flags) == ENOMEM)
5847c478bd9Sstevel@tonic-gate 					return (ENOMEM);
5857c478bd9Sstevel@tonic-gate 		}
5867c478bd9Sstevel@tonic-gate 		s_idx++;
5877c478bd9Sstevel@tonic-gate 		d_idx++;
5887c478bd9Sstevel@tonic-gate 	}
5897c478bd9Sstevel@tonic-gate 	return (0);
5907c478bd9Sstevel@tonic-gate }
5917c478bd9Sstevel@tonic-gate 
5927c478bd9Sstevel@tonic-gate 
5937c478bd9Sstevel@tonic-gate /*
5947c478bd9Sstevel@tonic-gate  * ANON_INITBUF is a convenience macro for anon_grow() below. It
5957c478bd9Sstevel@tonic-gate  * takes a buffer dst, which is at least as large as buffer src. It
5967c478bd9Sstevel@tonic-gate  * does a bcopy from src into dst, and then bzeros the extra bytes
5977c478bd9Sstevel@tonic-gate  * of dst. If tail is set, the data in src is tail aligned within
5987c478bd9Sstevel@tonic-gate  * dst instead of head aligned.
5997c478bd9Sstevel@tonic-gate  */
6007c478bd9Sstevel@tonic-gate 
6017c478bd9Sstevel@tonic-gate #define	ANON_INITBUF(src, srclen, dst, dstsize, tail)			      \
6027c478bd9Sstevel@tonic-gate 	if (tail) {							      \
6037c478bd9Sstevel@tonic-gate 		bzero((dst), (dstsize) - (srclen));			      \
6047c478bd9Sstevel@tonic-gate 		bcopy((src), (char *)(dst) + (dstsize) - (srclen), (srclen)); \
6057c478bd9Sstevel@tonic-gate 	} else {							      \
6067c478bd9Sstevel@tonic-gate 		bcopy((src), (dst), (srclen));				      \
6077c478bd9Sstevel@tonic-gate 		bzero((char *)(dst) + (srclen), (dstsize) - (srclen));	      \
6087c478bd9Sstevel@tonic-gate 	}
6097c478bd9Sstevel@tonic-gate 
6107c478bd9Sstevel@tonic-gate #define	ANON_1_LEVEL_INC	(ANON_CHUNK_SIZE / 8)
6117c478bd9Sstevel@tonic-gate #define	ANON_2_LEVEL_INC	(ANON_1_LEVEL_INC * ANON_CHUNK_SIZE)
6127c478bd9Sstevel@tonic-gate 
6137c478bd9Sstevel@tonic-gate /*
6147c478bd9Sstevel@tonic-gate  * anon_grow() is used to efficiently extend an existing anon array.
6157c478bd9Sstevel@tonic-gate  * startidx_p points to the index into the anon array of the first page
61652b2f68aSstans  * that is in use. oldseg_pgs is the number of pages in use, starting at
6177c478bd9Sstevel@tonic-gate  * *startidx_p. newpages is the number of additional pages desired.
6187c478bd9Sstevel@tonic-gate  *
6197c478bd9Sstevel@tonic-gate  * If startidx_p == NULL, startidx is taken to be 0 and cannot be changed.
6207c478bd9Sstevel@tonic-gate  *
6217c478bd9Sstevel@tonic-gate  * The growth is done by creating a new top level of the anon array,
6227c478bd9Sstevel@tonic-gate  * and (if the array is 2-level) reusing the existing second level arrays.
6237c478bd9Sstevel@tonic-gate  *
6247c478bd9Sstevel@tonic-gate  * flags can be used to specify ANON_NOSLEEP and ANON_GROWDOWN.
6257c478bd9Sstevel@tonic-gate  *
6267c478bd9Sstevel@tonic-gate  * Returns the new number of pages in the anon array.
6277c478bd9Sstevel@tonic-gate  */
6287c478bd9Sstevel@tonic-gate pgcnt_t
anon_grow(struct anon_hdr * ahp,ulong_t * startidx_p,pgcnt_t oldseg_pgs,pgcnt_t newseg_pgs,int flags)62952b2f68aSstans anon_grow(struct anon_hdr *ahp, ulong_t *startidx_p, pgcnt_t oldseg_pgs,
63052b2f68aSstans     pgcnt_t newseg_pgs, int flags)
6317c478bd9Sstevel@tonic-gate {
6327c478bd9Sstevel@tonic-gate 	ulong_t startidx = startidx_p ? *startidx_p : 0;
63352b2f68aSstans 	pgcnt_t oldamp_pgs = ahp->size, newamp_pgs;
6347c478bd9Sstevel@tonic-gate 	pgcnt_t oelems, nelems, totpages;
6357c478bd9Sstevel@tonic-gate 	void **level1;
6367c478bd9Sstevel@tonic-gate 	int kmemflags = (flags & ANON_NOSLEEP) ? KM_NOSLEEP : KM_SLEEP;
6377c478bd9Sstevel@tonic-gate 	int growdown = (flags & ANON_GROWDOWN);
6387c478bd9Sstevel@tonic-gate 	size_t newarrsz, oldarrsz;
6397c478bd9Sstevel@tonic-gate 	void *level2;
6407c478bd9Sstevel@tonic-gate 
6417c478bd9Sstevel@tonic-gate 	ASSERT(!(startidx_p == NULL && growdown));
64252b2f68aSstans 	ASSERT(startidx + oldseg_pgs <= ahp->size);
6437c478bd9Sstevel@tonic-gate 
6447c478bd9Sstevel@tonic-gate 	/*
6457c478bd9Sstevel@tonic-gate 	 * Determine the total number of pages needed in the new
6467c478bd9Sstevel@tonic-gate 	 * anon array. If growing down, totpages is all pages from
64752b2f68aSstans 	 * startidx through the end of the array, plus <newseg_pgs>
6487c478bd9Sstevel@tonic-gate 	 * pages. If growing up, keep all pages from page 0 through
64952b2f68aSstans 	 * the last page currently in use, plus <newseg_pgs> pages.
6507c478bd9Sstevel@tonic-gate 	 */
6517c478bd9Sstevel@tonic-gate 	if (growdown)
65252b2f68aSstans 		totpages = oldamp_pgs - startidx + newseg_pgs;
6537c478bd9Sstevel@tonic-gate 	else
65452b2f68aSstans 		totpages = startidx + oldseg_pgs + newseg_pgs;
6557c478bd9Sstevel@tonic-gate 
6567c478bd9Sstevel@tonic-gate 	/* If the array is already large enough, just return. */
6577c478bd9Sstevel@tonic-gate 
65852b2f68aSstans 	if (oldamp_pgs >= totpages) {
65952b2f68aSstans 		if (growdown)
66052b2f68aSstans 			*startidx_p = oldamp_pgs - totpages;
66152b2f68aSstans 		return (oldamp_pgs);
6627c478bd9Sstevel@tonic-gate 	}
6637c478bd9Sstevel@tonic-gate 
6647c478bd9Sstevel@tonic-gate 	/*
66552b2f68aSstans 	 * oldamp_pgs/newamp_pgs are the total numbers of pages represented
66652b2f68aSstans 	 * by the corresponding arrays.
66752b2f68aSstans 	 * oelems/nelems are the number of pointers in the top level arrays
66852b2f68aSstans 	 * which may be either level 1 or level 2.
6697c478bd9Sstevel@tonic-gate 	 * Will the new anon array be one level or two levels?
6707c478bd9Sstevel@tonic-gate 	 */
6717c478bd9Sstevel@tonic-gate 	if (totpages <= ANON_CHUNK_SIZE || (ahp->flags & ANON_ALLOC_FORCE)) {
67252b2f68aSstans 		newamp_pgs = P2ROUNDUP(totpages, ANON_1_LEVEL_INC);
67352b2f68aSstans 		oelems = oldamp_pgs;
67452b2f68aSstans 		nelems = newamp_pgs;
6757c478bd9Sstevel@tonic-gate 	} else {
67652b2f68aSstans 		newamp_pgs = P2ROUNDUP(totpages, ANON_2_LEVEL_INC);
67752b2f68aSstans 		oelems = (oldamp_pgs + ANON_CHUNK_OFF) >> ANON_CHUNK_SHIFT;
67852b2f68aSstans 		nelems = newamp_pgs >> ANON_CHUNK_SHIFT;
6797c478bd9Sstevel@tonic-gate 	}
6807c478bd9Sstevel@tonic-gate 
6817c478bd9Sstevel@tonic-gate 	newarrsz = nelems * sizeof (void *);
6827c478bd9Sstevel@tonic-gate 	level1 = kmem_alloc(newarrsz, kmemflags);
6837c478bd9Sstevel@tonic-gate 	if (level1 == NULL)
6847c478bd9Sstevel@tonic-gate 		return (0);
6857c478bd9Sstevel@tonic-gate 
6867c478bd9Sstevel@tonic-gate 	/* Are we converting from a one level to a two level anon array? */
6877c478bd9Sstevel@tonic-gate 
68852b2f68aSstans 	if (newamp_pgs > ANON_CHUNK_SIZE && oldamp_pgs <= ANON_CHUNK_SIZE &&
6897c478bd9Sstevel@tonic-gate 	    !(ahp->flags & ANON_ALLOC_FORCE)) {
69052b2f68aSstans 
6917c478bd9Sstevel@tonic-gate 		/*
6927c478bd9Sstevel@tonic-gate 		 * Yes, we're converting to a two level. Reuse old level 1
6937c478bd9Sstevel@tonic-gate 		 * as new level 2 if it is exactly PAGESIZE. Otherwise
6947c478bd9Sstevel@tonic-gate 		 * alloc a new level 2 and copy the old level 1 data into it.
6957c478bd9Sstevel@tonic-gate 		 */
69652b2f68aSstans 		if (oldamp_pgs == ANON_CHUNK_SIZE) {
6977c478bd9Sstevel@tonic-gate 			level2 = (void *)ahp->array_chunk;
6987c478bd9Sstevel@tonic-gate 		} else {
6997c478bd9Sstevel@tonic-gate 			level2 = kmem_alloc(PAGESIZE, kmemflags);
7007c478bd9Sstevel@tonic-gate 			if (level2 == NULL) {
7017c478bd9Sstevel@tonic-gate 				kmem_free(level1, newarrsz);
7027c478bd9Sstevel@tonic-gate 				return (0);
7037c478bd9Sstevel@tonic-gate 			}
70452b2f68aSstans 			oldarrsz = oldamp_pgs * sizeof (void *);
7057c478bd9Sstevel@tonic-gate 
7067c478bd9Sstevel@tonic-gate 			ANON_INITBUF(ahp->array_chunk, oldarrsz,
7077c478bd9Sstevel@tonic-gate 			    level2, PAGESIZE, growdown);
7087c478bd9Sstevel@tonic-gate 			kmem_free(ahp->array_chunk, oldarrsz);
7097c478bd9Sstevel@tonic-gate 		}
7107c478bd9Sstevel@tonic-gate 		bzero(level1, newarrsz);
7117c478bd9Sstevel@tonic-gate 		if (growdown)
7127c478bd9Sstevel@tonic-gate 			level1[nelems - 1] = level2;
7137c478bd9Sstevel@tonic-gate 		else
7147c478bd9Sstevel@tonic-gate 			level1[0] = level2;
7157c478bd9Sstevel@tonic-gate 	} else {
7167c478bd9Sstevel@tonic-gate 		oldarrsz = oelems * sizeof (void *);
7177c478bd9Sstevel@tonic-gate 
7187c478bd9Sstevel@tonic-gate 		ANON_INITBUF(ahp->array_chunk, oldarrsz,
7197c478bd9Sstevel@tonic-gate 		    level1, newarrsz, growdown);
7207c478bd9Sstevel@tonic-gate 		kmem_free(ahp->array_chunk, oldarrsz);
7217c478bd9Sstevel@tonic-gate 	}
7227c478bd9Sstevel@tonic-gate 
7237c478bd9Sstevel@tonic-gate 	ahp->array_chunk = level1;
72452b2f68aSstans 	ahp->size = newamp_pgs;
725e77f3106Sudpa 	if (growdown)
72652b2f68aSstans 		*startidx_p = newamp_pgs - totpages;
727e77f3106Sudpa 
72852b2f68aSstans 	return (newamp_pgs);
7297c478bd9Sstevel@tonic-gate }
7307c478bd9Sstevel@tonic-gate 
73152b2f68aSstans 
7327c478bd9Sstevel@tonic-gate /*
733b52a336eSPavel Tatashin  * Called to sync ani_free value.
7347c478bd9Sstevel@tonic-gate  */
7357c478bd9Sstevel@tonic-gate 
7367c478bd9Sstevel@tonic-gate void
set_anoninfo(void)7377c478bd9Sstevel@tonic-gate set_anoninfo(void)
7387c478bd9Sstevel@tonic-gate {
739b52a336eSPavel Tatashin 	processorid_t	ix, max_seqid;
740b52a336eSPavel Tatashin 	pgcnt_t		total = 0;
741b52a336eSPavel Tatashin 	static clock_t	last_time;
742b52a336eSPavel Tatashin 	clock_t		new_time;
743b52a336eSPavel Tatashin 
744b52a336eSPavel Tatashin 	if (ani_free_pool == NULL)
745b52a336eSPavel Tatashin 		return;
746b52a336eSPavel Tatashin 
747b52a336eSPavel Tatashin 	/*
748b52a336eSPavel Tatashin 	 * Recompute ani_free at most once per tick. Use max_cpu_seqid_ever to
749b52a336eSPavel Tatashin 	 * identify the maximum number of CPUs were ever online.
750b52a336eSPavel Tatashin 	 */
751b52a336eSPavel Tatashin 	new_time = ddi_get_lbolt();
752b52a336eSPavel Tatashin 	if (new_time > last_time) {
753b52a336eSPavel Tatashin 
754b52a336eSPavel Tatashin 		max_seqid = max_cpu_seqid_ever;
755b52a336eSPavel Tatashin 		ASSERT(ANI_MAX_POOL > max_seqid);
756b52a336eSPavel Tatashin 		for (ix = 0; ix <= max_seqid; ix++)
757b52a336eSPavel Tatashin 			total += ani_free_pool[ix].ani_count;
7587c478bd9Sstevel@tonic-gate 
759b52a336eSPavel Tatashin 		last_time = new_time;
760b52a336eSPavel Tatashin 		k_anoninfo.ani_free = total;
7617c478bd9Sstevel@tonic-gate 	}
7627c478bd9Sstevel@tonic-gate }
7637c478bd9Sstevel@tonic-gate 
7647c478bd9Sstevel@tonic-gate /*
7657c478bd9Sstevel@tonic-gate  * Reserve anon space.
7667c478bd9Sstevel@tonic-gate  *
7677c478bd9Sstevel@tonic-gate  * It's no longer simply a matter of incrementing ani_resv to
7687c478bd9Sstevel@tonic-gate  * reserve swap space, we need to check memory-based as well
7697c478bd9Sstevel@tonic-gate  * as disk-backed (physical) swap.  The following algorithm
7707c478bd9Sstevel@tonic-gate  * is used:
7717e897d1fSToomas Soome  *	Check the space on physical swap
7727e897d1fSToomas Soome  *		i.e. amount needed < ani_max - ani_phys_resv
7737e897d1fSToomas Soome  *	If we are swapping on swapfs check
7747c478bd9Sstevel@tonic-gate  *		amount needed < (availrmem - swapfs_minfree)
7757c478bd9Sstevel@tonic-gate  * Since the algorithm to check for the quantity of swap space is
7767c478bd9Sstevel@tonic-gate  * almost the same as that for reserving it, we'll just use anon_resvmem
7777c478bd9Sstevel@tonic-gate  * with a flag to decrement availrmem.
7787c478bd9Sstevel@tonic-gate  *
7797c478bd9Sstevel@tonic-gate  * Return non-zero on success.
7807c478bd9Sstevel@tonic-gate  */
7817c478bd9Sstevel@tonic-gate int
anon_resvmem(size_t size,boolean_t takemem,zone_t * zone,int tryhard)7822cb27123Saguzovsk anon_resvmem(size_t size, boolean_t takemem, zone_t *zone, int tryhard)
7837c478bd9Sstevel@tonic-gate {
7847c478bd9Sstevel@tonic-gate 	pgcnt_t npages = btopr(size);
7857c478bd9Sstevel@tonic-gate 	pgcnt_t mswap_pages = 0;
7867c478bd9Sstevel@tonic-gate 	pgcnt_t pswap_pages = 0;
7870209230bSgjelinek 	proc_t *p = curproc;
7880209230bSgjelinek 
7897de21d7cSBryan Cantrill 	if (zone != NULL) {
7900209230bSgjelinek 		/* test zone.max-swap resource control */
7910209230bSgjelinek 		mutex_enter(&p->p_lock);
7920209230bSgjelinek 		if (rctl_incr_swap(p, zone, ptob(npages)) != 0) {
7930209230bSgjelinek 			mutex_exit(&p->p_lock);
7947de21d7cSBryan Cantrill 
7957de21d7cSBryan Cantrill 			if (takemem)
7967de21d7cSBryan Cantrill 				atomic_add_64(&zone->zone_anon_alloc_fail, 1);
7977de21d7cSBryan Cantrill 
7980209230bSgjelinek 			return (0);
7990209230bSgjelinek 		}
8007de21d7cSBryan Cantrill 
8017de21d7cSBryan Cantrill 		if (!takemem)
8027de21d7cSBryan Cantrill 			rctl_decr_swap(zone, ptob(npages));
8037de21d7cSBryan Cantrill 
8040209230bSgjelinek 		mutex_exit(&p->p_lock);
8050209230bSgjelinek 	}
8067c478bd9Sstevel@tonic-gate 	mutex_enter(&anoninfo_lock);
8077c478bd9Sstevel@tonic-gate 
8087c478bd9Sstevel@tonic-gate 	/*
8097c478bd9Sstevel@tonic-gate 	 * pswap_pages is the number of pages we can take from
8107c478bd9Sstevel@tonic-gate 	 * physical (i.e. disk-backed) swap.
8117c478bd9Sstevel@tonic-gate 	 */
8127c478bd9Sstevel@tonic-gate 	ASSERT(k_anoninfo.ani_max >= k_anoninfo.ani_phys_resv);
8137c478bd9Sstevel@tonic-gate 	pswap_pages = k_anoninfo.ani_max - k_anoninfo.ani_phys_resv;
8147c478bd9Sstevel@tonic-gate 
8157c478bd9Sstevel@tonic-gate 	ANON_PRINT(A_RESV,
8167c478bd9Sstevel@tonic-gate 	    ("anon_resvmem: npages %lu takemem %u pswap %lu caller %p\n",
8177c478bd9Sstevel@tonic-gate 	    npages, takemem, pswap_pages, (void *)caller()));
8187c478bd9Sstevel@tonic-gate 
8197c478bd9Sstevel@tonic-gate 	if (npages <= pswap_pages) {
8207c478bd9Sstevel@tonic-gate 		/*
8217c478bd9Sstevel@tonic-gate 		 * we have enough space on a physical swap
8227c478bd9Sstevel@tonic-gate 		 */
8237c478bd9Sstevel@tonic-gate 		if (takemem)
8247c478bd9Sstevel@tonic-gate 			k_anoninfo.ani_phys_resv += npages;
8257c478bd9Sstevel@tonic-gate 		mutex_exit(&anoninfo_lock);
8267c478bd9Sstevel@tonic-gate 		return (1);
8277c478bd9Sstevel@tonic-gate 	} else if (pswap_pages != 0) {
8287c478bd9Sstevel@tonic-gate 		/*
8297c478bd9Sstevel@tonic-gate 		 * we have some space on a physical swap
8307c478bd9Sstevel@tonic-gate 		 */
8317c478bd9Sstevel@tonic-gate 		if (takemem) {
8327c478bd9Sstevel@tonic-gate 			/*
8337c478bd9Sstevel@tonic-gate 			 * use up remainder of phys swap
8347c478bd9Sstevel@tonic-gate 			 */
8357c478bd9Sstevel@tonic-gate 			k_anoninfo.ani_phys_resv += pswap_pages;
8367c478bd9Sstevel@tonic-gate 			ASSERT(k_anoninfo.ani_phys_resv == k_anoninfo.ani_max);
8377c478bd9Sstevel@tonic-gate 		}
8387c478bd9Sstevel@tonic-gate 	}
8397c478bd9Sstevel@tonic-gate 	/*
8407c478bd9Sstevel@tonic-gate 	 * since (npages > pswap_pages) we need mem swap
8417c478bd9Sstevel@tonic-gate 	 * mswap_pages is the number of pages needed from availrmem
8427c478bd9Sstevel@tonic-gate 	 */
8437c478bd9Sstevel@tonic-gate 	ASSERT(npages > pswap_pages);
8447c478bd9Sstevel@tonic-gate 	mswap_pages = npages - pswap_pages;
8457c478bd9Sstevel@tonic-gate 
8467c478bd9Sstevel@tonic-gate 	ANON_PRINT(A_RESV, ("anon_resvmem: need %ld pages from memory\n",
8477c478bd9Sstevel@tonic-gate 	    mswap_pages));
8487c478bd9Sstevel@tonic-gate 
8497c478bd9Sstevel@tonic-gate 	/*
8507c478bd9Sstevel@tonic-gate 	 * priv processes can reserve memory as swap as long as availrmem
8517c478bd9Sstevel@tonic-gate 	 * remains greater than swapfs_minfree; in the case of non-priv
8527c478bd9Sstevel@tonic-gate 	 * processes, memory can be reserved as swap only if availrmem
8537c478bd9Sstevel@tonic-gate 	 * doesn't fall below (swapfs_minfree + swapfs_reserve). Thus,
8547c478bd9Sstevel@tonic-gate 	 * swapfs_reserve amount of memswap is not available to non-priv
8557c478bd9Sstevel@tonic-gate 	 * processes. This protects daemons such as automounter dying
8567c478bd9Sstevel@tonic-gate 	 * as a result of application processes eating away almost entire
8577c478bd9Sstevel@tonic-gate 	 * membased swap. This safeguard becomes useless if apps are run
8587c478bd9Sstevel@tonic-gate 	 * with root access.
8597c478bd9Sstevel@tonic-gate 	 *
8607c478bd9Sstevel@tonic-gate 	 * swapfs_reserve is minimum of 4Mb or 1/16 of physmem.
8617c478bd9Sstevel@tonic-gate 	 *
8627c478bd9Sstevel@tonic-gate 	 */
8632cb27123Saguzovsk 	if (tryhard) {
8641c7cef2bSStan Studzinski 		pgcnt_t floor_pages;
8651c7cef2bSStan Studzinski 
8661c7cef2bSStan Studzinski 		if (secpolicy_resource_anon_mem(CRED())) {
8671c7cef2bSStan Studzinski 			floor_pages = swapfs_minfree;
8681c7cef2bSStan Studzinski 		} else {
8691c7cef2bSStan Studzinski 			floor_pages = swapfs_minfree + swapfs_reserve;
8701c7cef2bSStan Studzinski 		}
8711c7cef2bSStan Studzinski 
8722cb27123Saguzovsk 		mutex_exit(&anoninfo_lock);
873e0cb4e8dSOndrej Kubecka 		(void) page_reclaim_mem(mswap_pages, floor_pages, 0);
8742cb27123Saguzovsk 		mutex_enter(&anoninfo_lock);
8752cb27123Saguzovsk 	}
8763cff2f43Sstans 
8777c478bd9Sstevel@tonic-gate 	mutex_enter(&freemem_lock);
8787c478bd9Sstevel@tonic-gate 	if (availrmem > (swapfs_minfree + swapfs_reserve + mswap_pages) ||
87978b03d3aSkchow 	    (availrmem > (swapfs_minfree + mswap_pages) &&
88078b03d3aSkchow 	    secpolicy_resource(CRED()) == 0)) {
8817c478bd9Sstevel@tonic-gate 
8827c478bd9Sstevel@tonic-gate 		if (takemem) {
8837c478bd9Sstevel@tonic-gate 			/*
8847c478bd9Sstevel@tonic-gate 			 * Take the memory from the rest of the system.
8857c478bd9Sstevel@tonic-gate 			 */
8867c478bd9Sstevel@tonic-gate 			availrmem -= mswap_pages;
8877c478bd9Sstevel@tonic-gate 			mutex_exit(&freemem_lock);
8887c478bd9Sstevel@tonic-gate 			k_anoninfo.ani_mem_resv += mswap_pages;
8897c478bd9Sstevel@tonic-gate 			ANI_ADD(mswap_pages);
8907c478bd9Sstevel@tonic-gate 			ANON_PRINT((A_RESV | A_MRESV),
89178b03d3aSkchow 			    ("anon_resvmem: took %ld pages of availrmem\n",
89278b03d3aSkchow 			    mswap_pages));
8937c478bd9Sstevel@tonic-gate 		} else {
8947c478bd9Sstevel@tonic-gate 			mutex_exit(&freemem_lock);
8957c478bd9Sstevel@tonic-gate 		}
8967c478bd9Sstevel@tonic-gate 
8977c478bd9Sstevel@tonic-gate 		ASSERT(k_anoninfo.ani_max >= k_anoninfo.ani_phys_resv);
8987c478bd9Sstevel@tonic-gate 		mutex_exit(&anoninfo_lock);
8997c478bd9Sstevel@tonic-gate 		return (1);
9007c478bd9Sstevel@tonic-gate 	} else {
9017c478bd9Sstevel@tonic-gate 		/*
9027c478bd9Sstevel@tonic-gate 		 * Fail if not enough memory
9037c478bd9Sstevel@tonic-gate 		 */
9047c478bd9Sstevel@tonic-gate 		if (takemem) {
9057c478bd9Sstevel@tonic-gate 			k_anoninfo.ani_phys_resv -= pswap_pages;
9067c478bd9Sstevel@tonic-gate 		}
9077c478bd9Sstevel@tonic-gate 
9087c478bd9Sstevel@tonic-gate 		mutex_exit(&freemem_lock);
9097c478bd9Sstevel@tonic-gate 		mutex_exit(&anoninfo_lock);
9107c478bd9Sstevel@tonic-gate 		ANON_PRINT(A_RESV,
91178b03d3aSkchow 		    ("anon_resvmem: not enough space from swapfs\n"));
9120209230bSgjelinek 		if (zone != NULL && takemem)
9130209230bSgjelinek 			rctl_decr_swap(zone, ptob(npages));
9147c478bd9Sstevel@tonic-gate 		return (0);
9157c478bd9Sstevel@tonic-gate 	}
9167c478bd9Sstevel@tonic-gate }
9177c478bd9Sstevel@tonic-gate 
9187c478bd9Sstevel@tonic-gate /*
9197c478bd9Sstevel@tonic-gate  * Give back an anon reservation.
9207c478bd9Sstevel@tonic-gate  */
9217c478bd9Sstevel@tonic-gate void
anon_unresvmem(size_t size,zone_t * zone)9220209230bSgjelinek anon_unresvmem(size_t size, zone_t *zone)
9237c478bd9Sstevel@tonic-gate {
9247c478bd9Sstevel@tonic-gate 	pgcnt_t npages = btopr(size);
9257c478bd9Sstevel@tonic-gate 	spgcnt_t mem_free_pages = 0;
9267c478bd9Sstevel@tonic-gate 	pgcnt_t phys_free_slots;
9277c478bd9Sstevel@tonic-gate #ifdef	ANON_DEBUG
9287c478bd9Sstevel@tonic-gate 	pgcnt_t mem_resv;
9297c478bd9Sstevel@tonic-gate #endif
9300209230bSgjelinek 	if (zone != NULL)
93168803f2dSsl 		rctl_decr_swap(zone, ptob(npages));
9327c478bd9Sstevel@tonic-gate 
9337c478bd9Sstevel@tonic-gate 	mutex_enter(&anoninfo_lock);
9347c478bd9Sstevel@tonic-gate 
9357c478bd9Sstevel@tonic-gate 	ASSERT(k_anoninfo.ani_mem_resv >= k_anoninfo.ani_locked_swap);
936a98e9dbfSaguzovsk 
9377c478bd9Sstevel@tonic-gate 	/*
9387c478bd9Sstevel@tonic-gate 	 * If some of this reservation belonged to swapfs
9397c478bd9Sstevel@tonic-gate 	 * give it back to availrmem.
9407c478bd9Sstevel@tonic-gate 	 * ani_mem_resv is the amount of availrmem swapfs has reserved.
9417c478bd9Sstevel@tonic-gate 	 * but some of that memory could be locked by segspt so we can only
9427c478bd9Sstevel@tonic-gate 	 * return non locked ani_mem_resv back to availrmem
9437c478bd9Sstevel@tonic-gate 	 */
9447c478bd9Sstevel@tonic-gate 	if (k_anoninfo.ani_mem_resv > k_anoninfo.ani_locked_swap) {
9457c478bd9Sstevel@tonic-gate 		ANON_PRINT((A_RESV | A_MRESV),
9467c478bd9Sstevel@tonic-gate 		    ("anon_unresv: growing availrmem by %ld pages\n",
9477c478bd9Sstevel@tonic-gate 		    MIN(k_anoninfo.ani_mem_resv, npages)));
9487c478bd9Sstevel@tonic-gate 
9497c478bd9Sstevel@tonic-gate 		mem_free_pages = MIN((spgcnt_t)(k_anoninfo.ani_mem_resv -
9507c478bd9Sstevel@tonic-gate 		    k_anoninfo.ani_locked_swap), npages);
9517c478bd9Sstevel@tonic-gate 		mutex_enter(&freemem_lock);
9527c478bd9Sstevel@tonic-gate 		availrmem += mem_free_pages;
9537c478bd9Sstevel@tonic-gate 		mutex_exit(&freemem_lock);
9547c478bd9Sstevel@tonic-gate 		k_anoninfo.ani_mem_resv -= mem_free_pages;
9557c478bd9Sstevel@tonic-gate 
9567c478bd9Sstevel@tonic-gate 		ANI_ADD(-mem_free_pages);
9577c478bd9Sstevel@tonic-gate 	}
9587c478bd9Sstevel@tonic-gate 	/*
9597c478bd9Sstevel@tonic-gate 	 * The remainder of the pages is returned to phys swap
9607c478bd9Sstevel@tonic-gate 	 */
9617c478bd9Sstevel@tonic-gate 	ASSERT(npages >= mem_free_pages);
9627c478bd9Sstevel@tonic-gate 	phys_free_slots = npages - mem_free_pages;
9637c478bd9Sstevel@tonic-gate 
9647c478bd9Sstevel@tonic-gate 	if (phys_free_slots) {
96578b03d3aSkchow 		k_anoninfo.ani_phys_resv -= phys_free_slots;
9667c478bd9Sstevel@tonic-gate 	}
9677c478bd9Sstevel@tonic-gate 
9687c478bd9Sstevel@tonic-gate #ifdef	ANON_DEBUG
9697c478bd9Sstevel@tonic-gate 	mem_resv = k_anoninfo.ani_mem_resv;
9707c478bd9Sstevel@tonic-gate #endif
9717c478bd9Sstevel@tonic-gate 
9727c478bd9Sstevel@tonic-gate 	ASSERT(k_anoninfo.ani_mem_resv >= k_anoninfo.ani_locked_swap);
9737c478bd9Sstevel@tonic-gate 	ASSERT(k_anoninfo.ani_max >= k_anoninfo.ani_phys_resv);
9747c478bd9Sstevel@tonic-gate 
9757c478bd9Sstevel@tonic-gate 	mutex_exit(&anoninfo_lock);
9767c478bd9Sstevel@tonic-gate 
9777c478bd9Sstevel@tonic-gate 	ANON_PRINT(A_RESV, ("anon_unresv: %lu, tot %lu, caller %p\n",
9787c478bd9Sstevel@tonic-gate 	    npages, mem_resv, (void *)caller()));
9797c478bd9Sstevel@tonic-gate }
9807c478bd9Sstevel@tonic-gate 
9817c478bd9Sstevel@tonic-gate /*
9827c478bd9Sstevel@tonic-gate  * Allocate an anon slot and return it with the lock held.
9837c478bd9Sstevel@tonic-gate  */
9847c478bd9Sstevel@tonic-gate struct anon *
anon_alloc(struct vnode * vp,anoff_t off)9857c478bd9Sstevel@tonic-gate anon_alloc(struct vnode *vp, anoff_t off)
9867c478bd9Sstevel@tonic-gate {
9877c478bd9Sstevel@tonic-gate 	struct anon	*ap;
9887c478bd9Sstevel@tonic-gate 	kmutex_t	*ahm;
9897c478bd9Sstevel@tonic-gate 
9907c478bd9Sstevel@tonic-gate 	ap = kmem_cache_alloc(anon_cache, KM_SLEEP);
9917c478bd9Sstevel@tonic-gate 	if (vp == NULL) {
9927c478bd9Sstevel@tonic-gate 		swap_alloc(ap);
9937c478bd9Sstevel@tonic-gate 	} else {
9947c478bd9Sstevel@tonic-gate 		ap->an_vp = vp;
9957c478bd9Sstevel@tonic-gate 		ap->an_off = off;
9967c478bd9Sstevel@tonic-gate 	}
9977c478bd9Sstevel@tonic-gate 	ap->an_refcnt = 1;
9987c478bd9Sstevel@tonic-gate 	ap->an_pvp = NULL;
9997c478bd9Sstevel@tonic-gate 	ap->an_poff = 0;
100023d9e5acSMichael Corcoran 	ahm = AH_MUTEX(ap->an_vp, ap->an_off);
10017c478bd9Sstevel@tonic-gate 	mutex_enter(ahm);
10027c478bd9Sstevel@tonic-gate 	anon_addhash(ap);
10037c478bd9Sstevel@tonic-gate 	mutex_exit(ahm);
10047c478bd9Sstevel@tonic-gate 	ANI_ADD(-1);
10057c478bd9Sstevel@tonic-gate 	ANON_PRINT(A_ANON, ("anon_alloc: returning ap %p, vp %p\n",
10067c478bd9Sstevel@tonic-gate 	    (void *)ap, (ap ? (void *)ap->an_vp : NULL)));
10077c478bd9Sstevel@tonic-gate 	return (ap);
10087c478bd9Sstevel@tonic-gate }
10097c478bd9Sstevel@tonic-gate 
1010a98e9dbfSaguzovsk /*
1011a98e9dbfSaguzovsk  * Called for pages locked in memory via softlock/pagelock/mlock to make sure
1012a98e9dbfSaguzovsk  * such pages don't consume any physical swap resources needed for swapping
1013a98e9dbfSaguzovsk  * unlocked pages.
1014a98e9dbfSaguzovsk  */
1015a98e9dbfSaguzovsk void
anon_swap_free(struct anon * ap,page_t * pp)1016a98e9dbfSaguzovsk anon_swap_free(struct anon *ap, page_t *pp)
1017a98e9dbfSaguzovsk {
1018a98e9dbfSaguzovsk 	kmutex_t *ahm;
1019a98e9dbfSaguzovsk 
1020a98e9dbfSaguzovsk 	ASSERT(ap != NULL);
1021a98e9dbfSaguzovsk 	ASSERT(pp != NULL);
1022a98e9dbfSaguzovsk 	ASSERT(PAGE_LOCKED(pp));
1023a98e9dbfSaguzovsk 	ASSERT(pp->p_vnode != NULL);
1024a98e9dbfSaguzovsk 	ASSERT(IS_SWAPFSVP(pp->p_vnode));
1025a98e9dbfSaguzovsk 	ASSERT(ap->an_refcnt != 0);
1026a98e9dbfSaguzovsk 	ASSERT(pp->p_vnode == ap->an_vp);
1027a98e9dbfSaguzovsk 	ASSERT(pp->p_offset == ap->an_off);
1028a98e9dbfSaguzovsk 
1029a98e9dbfSaguzovsk 	if (ap->an_pvp == NULL)
1030a98e9dbfSaguzovsk 		return;
1031a98e9dbfSaguzovsk 
1032a98e9dbfSaguzovsk 	page_io_lock(pp);
103323d9e5acSMichael Corcoran 	ahm = AH_MUTEX(ap->an_vp, ap->an_off);
1034a98e9dbfSaguzovsk 	mutex_enter(ahm);
1035a98e9dbfSaguzovsk 
1036a98e9dbfSaguzovsk 	ASSERT(ap->an_refcnt != 0);
1037a98e9dbfSaguzovsk 	ASSERT(pp->p_vnode == ap->an_vp);
1038a98e9dbfSaguzovsk 	ASSERT(pp->p_offset == ap->an_off);
1039a98e9dbfSaguzovsk 
1040a98e9dbfSaguzovsk 	if (ap->an_pvp != NULL) {
1041a98e9dbfSaguzovsk 		swap_phys_free(ap->an_pvp, ap->an_poff, PAGESIZE);
1042a98e9dbfSaguzovsk 		ap->an_pvp = NULL;
1043a98e9dbfSaguzovsk 		ap->an_poff = 0;
1044a98e9dbfSaguzovsk 		mutex_exit(ahm);
1045a98e9dbfSaguzovsk 		hat_setmod(pp);
1046a98e9dbfSaguzovsk 	} else {
1047a98e9dbfSaguzovsk 		mutex_exit(ahm);
1048a98e9dbfSaguzovsk 	}
1049a98e9dbfSaguzovsk 	page_io_unlock(pp);
1050a98e9dbfSaguzovsk }
1051a98e9dbfSaguzovsk 
10527c478bd9Sstevel@tonic-gate /*
10537c478bd9Sstevel@tonic-gate  * Decrement the reference count of an anon page.
10547c478bd9Sstevel@tonic-gate  * If reference count goes to zero, free it and
10557c478bd9Sstevel@tonic-gate  * its associated page (if any).
10567c478bd9Sstevel@tonic-gate  */
10577c478bd9Sstevel@tonic-gate void
anon_decref(struct anon * ap)10587c478bd9Sstevel@tonic-gate anon_decref(struct anon *ap)
10597c478bd9Sstevel@tonic-gate {
10607c478bd9Sstevel@tonic-gate 	page_t *pp;
10617c478bd9Sstevel@tonic-gate 	struct vnode *vp;
10627c478bd9Sstevel@tonic-gate 	anoff_t off;
10637c478bd9Sstevel@tonic-gate 	kmutex_t *ahm;
10647c478bd9Sstevel@tonic-gate 
106523d9e5acSMichael Corcoran 	ahm = AH_MUTEX(ap->an_vp, ap->an_off);
10667c478bd9Sstevel@tonic-gate 	mutex_enter(ahm);
10677c478bd9Sstevel@tonic-gate 	ASSERT(ap->an_refcnt != 0);
10687c478bd9Sstevel@tonic-gate 	if (ap->an_refcnt == 0)
10697c478bd9Sstevel@tonic-gate 		panic("anon_decref: slot count 0");
10707c478bd9Sstevel@tonic-gate 	if (--ap->an_refcnt == 0) {
10717c478bd9Sstevel@tonic-gate 		swap_xlate(ap, &vp, &off);
107220a2d3f6Sstans 		anon_rmhash(ap);
107320a2d3f6Sstans 		if (ap->an_pvp != NULL)
107420a2d3f6Sstans 			swap_phys_free(ap->an_pvp, ap->an_poff, PAGESIZE);
10757c478bd9Sstevel@tonic-gate 		mutex_exit(ahm);
10767c478bd9Sstevel@tonic-gate 
10777c478bd9Sstevel@tonic-gate 		/*
10787c478bd9Sstevel@tonic-gate 		 * If there is a page for this anon slot we will need to
10797c478bd9Sstevel@tonic-gate 		 * call VN_DISPOSE to get rid of the vp association and
10807c478bd9Sstevel@tonic-gate 		 * put the page back on the free list as really free.
10817c478bd9Sstevel@tonic-gate 		 * Acquire the "exclusive" lock to ensure that any
10827c478bd9Sstevel@tonic-gate 		 * pending i/o always completes before the swap slot
10837c478bd9Sstevel@tonic-gate 		 * is freed.
10847c478bd9Sstevel@tonic-gate 		 */
10857c478bd9Sstevel@tonic-gate 		pp = page_lookup(vp, (u_offset_t)off, SE_EXCL);
10867c478bd9Sstevel@tonic-gate 		if (pp != NULL) {
10877c478bd9Sstevel@tonic-gate 			/*LINTED: constant in conditional context */
10887c478bd9Sstevel@tonic-gate 			VN_DISPOSE(pp, B_INVAL, 0, kcred);
10897c478bd9Sstevel@tonic-gate 		}
10907c478bd9Sstevel@tonic-gate 		ANON_PRINT(A_ANON, ("anon_decref: free ap %p, vp %p\n",
10917c478bd9Sstevel@tonic-gate 		    (void *)ap, (void *)ap->an_vp));
109220a2d3f6Sstans 
10937c478bd9Sstevel@tonic-gate 		kmem_cache_free(anon_cache, ap);
10947c478bd9Sstevel@tonic-gate 
10957c478bd9Sstevel@tonic-gate 		ANI_ADD(1);
10967c478bd9Sstevel@tonic-gate 	} else {
10977c478bd9Sstevel@tonic-gate 		mutex_exit(ahm);
10987c478bd9Sstevel@tonic-gate 	}
10997c478bd9Sstevel@tonic-gate }
11007c478bd9Sstevel@tonic-gate 
110178b03d3aSkchow 
110278b03d3aSkchow /*
110378b03d3aSkchow  * check an_refcnt of the root anon slot (anon_index argument is aligned at
110478b03d3aSkchow  * seg->s_szc level) to determine whether COW processing is required.
110578b03d3aSkchow  * anonpages_hash_lock[] held on the root ap ensures that if root's
110678b03d3aSkchow  * refcnt is 1 all other refcnt's are 1 as well (and they can't increase
110778b03d3aSkchow  * later since this process can't fork while its AS lock is held).
110878b03d3aSkchow  *
110978b03d3aSkchow  * returns 1 if the root anon slot has a refcnt > 1 otherwise returns 0.
111078b03d3aSkchow  */
111178b03d3aSkchow int
anon_szcshare(struct anon_hdr * ahp,ulong_t anon_index)111278b03d3aSkchow anon_szcshare(struct anon_hdr *ahp, ulong_t anon_index)
111378b03d3aSkchow {
111478b03d3aSkchow 	struct anon	*ap;
111578b03d3aSkchow 	kmutex_t	*ahmpages = NULL;
111678b03d3aSkchow 
111778b03d3aSkchow 	ap = anon_get_ptr(ahp, anon_index);
111878b03d3aSkchow 	if (ap == NULL)
111978b03d3aSkchow 		return (0);
112078b03d3aSkchow 
112123d9e5acSMichael Corcoran 	ahmpages = APH_MUTEX(ap->an_vp, ap->an_off);
112278b03d3aSkchow 	mutex_enter(ahmpages);
112378b03d3aSkchow 	ASSERT(ap->an_refcnt >= 1);
112478b03d3aSkchow 	if (ap->an_refcnt == 1) {
112578b03d3aSkchow 		mutex_exit(ahmpages);
112678b03d3aSkchow 		return (0);
112778b03d3aSkchow 	}
112878b03d3aSkchow 	mutex_exit(ahmpages);
112978b03d3aSkchow 	return (1);
113078b03d3aSkchow }
113178b03d3aSkchow /*
113278b03d3aSkchow  * Check 'nslots' anon slots for refcnt > 1.
113378b03d3aSkchow  *
113478b03d3aSkchow  * returns 1 if any of the 'nslots' anon slots has a refcnt > 1 otherwise
113578b03d3aSkchow  * returns 0.
113678b03d3aSkchow  */
11377c478bd9Sstevel@tonic-gate static int
anon_share(struct anon_hdr * ahp,ulong_t anon_index,pgcnt_t nslots)11387c478bd9Sstevel@tonic-gate anon_share(struct anon_hdr *ahp, ulong_t anon_index, pgcnt_t nslots)
11397c478bd9Sstevel@tonic-gate {
11407c478bd9Sstevel@tonic-gate 	struct anon *ap;
11417c478bd9Sstevel@tonic-gate 
11427c478bd9Sstevel@tonic-gate 	while (nslots-- > 0) {
11437c478bd9Sstevel@tonic-gate 		if ((ap = anon_get_ptr(ahp, anon_index)) != NULL &&
11447c478bd9Sstevel@tonic-gate 		    ap->an_refcnt > 1)
11457c478bd9Sstevel@tonic-gate 			return (1);
11467c478bd9Sstevel@tonic-gate 		anon_index++;
11477c478bd9Sstevel@tonic-gate 	}
11487c478bd9Sstevel@tonic-gate 
11497c478bd9Sstevel@tonic-gate 	return (0);
11507c478bd9Sstevel@tonic-gate }
11517c478bd9Sstevel@tonic-gate 
11527c478bd9Sstevel@tonic-gate static void
anon_decref_pages(struct anon_hdr * ahp,ulong_t an_idx,uint_t szc)11537c478bd9Sstevel@tonic-gate anon_decref_pages(
11547c478bd9Sstevel@tonic-gate 	struct anon_hdr *ahp,
11557c478bd9Sstevel@tonic-gate 	ulong_t an_idx,
11567c478bd9Sstevel@tonic-gate 	uint_t szc)
11577c478bd9Sstevel@tonic-gate {
11587c478bd9Sstevel@tonic-gate 	struct anon *ap = anon_get_ptr(ahp, an_idx);
11597c478bd9Sstevel@tonic-gate 	kmutex_t *ahmpages = NULL;
11607c478bd9Sstevel@tonic-gate 	page_t *pp;
11617c478bd9Sstevel@tonic-gate 	pgcnt_t pgcnt = page_get_pagecnt(szc);
11627c478bd9Sstevel@tonic-gate 	pgcnt_t i;
11637c478bd9Sstevel@tonic-gate 	struct vnode *vp;
11647c478bd9Sstevel@tonic-gate 	anoff_t   off;
11657c478bd9Sstevel@tonic-gate 	kmutex_t *ahm;
11667c478bd9Sstevel@tonic-gate #ifdef DEBUG
11677c478bd9Sstevel@tonic-gate 	int refcnt = 1;
11687c478bd9Sstevel@tonic-gate #endif
11697c478bd9Sstevel@tonic-gate 
11707c478bd9Sstevel@tonic-gate 	ASSERT(szc != 0);
11717c478bd9Sstevel@tonic-gate 	ASSERT(IS_P2ALIGNED(pgcnt, pgcnt));
11727c478bd9Sstevel@tonic-gate 	ASSERT(IS_P2ALIGNED(an_idx, pgcnt));
117307b65a64Saguzovsk 	ASSERT(an_idx < ahp->size);
117407b65a64Saguzovsk 
117507b65a64Saguzovsk 	if (ahp->size - an_idx < pgcnt) {
117607b65a64Saguzovsk 		/*
117707b65a64Saguzovsk 		 * In case of shared mappings total anon map size may not be
117807b65a64Saguzovsk 		 * the largest page size aligned.
117907b65a64Saguzovsk 		 */
118007b65a64Saguzovsk 		pgcnt = ahp->size - an_idx;
118107b65a64Saguzovsk 	}
11827c478bd9Sstevel@tonic-gate 
11837c478bd9Sstevel@tonic-gate 	VM_STAT_ADD(anonvmstats.decrefpages[0]);
11847c478bd9Sstevel@tonic-gate 
11857c478bd9Sstevel@tonic-gate 	if (ap != NULL) {
118623d9e5acSMichael Corcoran 		ahmpages = APH_MUTEX(ap->an_vp, ap->an_off);
11877c478bd9Sstevel@tonic-gate 		mutex_enter(ahmpages);
11887c478bd9Sstevel@tonic-gate 		ASSERT((refcnt = ap->an_refcnt) != 0);
11897c478bd9Sstevel@tonic-gate 		VM_STAT_ADD(anonvmstats.decrefpages[1]);
11907c478bd9Sstevel@tonic-gate 		if (ap->an_refcnt == 1) {
11917c478bd9Sstevel@tonic-gate 			VM_STAT_ADD(anonvmstats.decrefpages[2]);
11927c478bd9Sstevel@tonic-gate 			ASSERT(!anon_share(ahp, an_idx, pgcnt));
11937c478bd9Sstevel@tonic-gate 			mutex_exit(ahmpages);
11947c478bd9Sstevel@tonic-gate 			ahmpages = NULL;
11957c478bd9Sstevel@tonic-gate 		}
11967c478bd9Sstevel@tonic-gate 	}
11977c478bd9Sstevel@tonic-gate 
11987c478bd9Sstevel@tonic-gate 	i = 0;
11997c478bd9Sstevel@tonic-gate 	while (i < pgcnt) {
12007c478bd9Sstevel@tonic-gate 		if ((ap = anon_get_ptr(ahp, an_idx + i)) == NULL) {
12017c478bd9Sstevel@tonic-gate 			ASSERT(refcnt == 1 && ahmpages == NULL);
12027c478bd9Sstevel@tonic-gate 			i++;
12037c478bd9Sstevel@tonic-gate 			continue;
12047c478bd9Sstevel@tonic-gate 		}
12057c478bd9Sstevel@tonic-gate 		ASSERT(ap->an_refcnt == refcnt);
12067c478bd9Sstevel@tonic-gate 		ASSERT(ahmpages != NULL || ap->an_refcnt == 1);
12077c478bd9Sstevel@tonic-gate 		ASSERT(ahmpages == NULL || ap->an_refcnt > 1);
12087c478bd9Sstevel@tonic-gate 
12097c478bd9Sstevel@tonic-gate 		if (ahmpages == NULL) {
12107c478bd9Sstevel@tonic-gate 			swap_xlate(ap, &vp, &off);
12117c478bd9Sstevel@tonic-gate 			pp = page_lookup(vp, (u_offset_t)off, SE_EXCL);
12127c478bd9Sstevel@tonic-gate 			if (pp == NULL || pp->p_szc == 0) {
12137c478bd9Sstevel@tonic-gate 				VM_STAT_ADD(anonvmstats.decrefpages[3]);
121423d9e5acSMichael Corcoran 				ahm = AH_MUTEX(ap->an_vp, ap->an_off);
12157c478bd9Sstevel@tonic-gate 				(void) anon_set_ptr(ahp, an_idx + i, NULL,
12167c478bd9Sstevel@tonic-gate 				    ANON_SLEEP);
12177c478bd9Sstevel@tonic-gate 				mutex_enter(ahm);
12187c478bd9Sstevel@tonic-gate 				ap->an_refcnt--;
12197c478bd9Sstevel@tonic-gate 				ASSERT(ap->an_refcnt == 0);
12207c478bd9Sstevel@tonic-gate 				anon_rmhash(ap);
12217c478bd9Sstevel@tonic-gate 				if (ap->an_pvp)
12227c478bd9Sstevel@tonic-gate 					swap_phys_free(ap->an_pvp, ap->an_poff,
12237c478bd9Sstevel@tonic-gate 					    PAGESIZE);
12247c478bd9Sstevel@tonic-gate 				mutex_exit(ahm);
122520a2d3f6Sstans 				if (pp == NULL) {
122620a2d3f6Sstans 					pp = page_lookup(vp, (u_offset_t)off,
122720a2d3f6Sstans 					    SE_EXCL);
122820a2d3f6Sstans 					ASSERT(pp == NULL || pp->p_szc == 0);
122920a2d3f6Sstans 				}
12307c478bd9Sstevel@tonic-gate 				if (pp != NULL) {
12317c478bd9Sstevel@tonic-gate 					VM_STAT_ADD(anonvmstats.decrefpages[4]);
12327c478bd9Sstevel@tonic-gate 					/*LINTED*/
12337c478bd9Sstevel@tonic-gate 					VN_DISPOSE(pp, B_INVAL, 0, kcred);
12347c478bd9Sstevel@tonic-gate 				}
12357c478bd9Sstevel@tonic-gate 				kmem_cache_free(anon_cache, ap);
12367c478bd9Sstevel@tonic-gate 				ANI_ADD(1);
12377c478bd9Sstevel@tonic-gate 				i++;
12387c478bd9Sstevel@tonic-gate 			} else {
12397c478bd9Sstevel@tonic-gate 				pgcnt_t j;
12407c478bd9Sstevel@tonic-gate 				pgcnt_t curpgcnt =
12417c478bd9Sstevel@tonic-gate 				    page_get_pagecnt(pp->p_szc);
12427c478bd9Sstevel@tonic-gate 				size_t ppasize = curpgcnt * sizeof (page_t *);
12437c478bd9Sstevel@tonic-gate 				page_t **ppa = kmem_alloc(ppasize, KM_SLEEP);
12447c478bd9Sstevel@tonic-gate 				int dispose = 0;
12457c478bd9Sstevel@tonic-gate 
12467c478bd9Sstevel@tonic-gate 				VM_STAT_ADD(anonvmstats.decrefpages[5]);
12477c478bd9Sstevel@tonic-gate 
12487c478bd9Sstevel@tonic-gate 				ASSERT(pp->p_szc <= szc);
12497c478bd9Sstevel@tonic-gate 				ASSERT(IS_P2ALIGNED(curpgcnt, curpgcnt));
12507c478bd9Sstevel@tonic-gate 				ASSERT(IS_P2ALIGNED(i, curpgcnt));
12517c478bd9Sstevel@tonic-gate 				ASSERT(i + curpgcnt <= pgcnt);
12527c478bd9Sstevel@tonic-gate 				ASSERT(!(page_pptonum(pp) & (curpgcnt - 1)));
12537c478bd9Sstevel@tonic-gate 				ppa[0] = pp;
12547c478bd9Sstevel@tonic-gate 				for (j = i + 1; j < i + curpgcnt; j++) {
12557c478bd9Sstevel@tonic-gate 					ap = anon_get_ptr(ahp, an_idx + j);
12567c478bd9Sstevel@tonic-gate 					ASSERT(ap != NULL &&
12577c478bd9Sstevel@tonic-gate 					    ap->an_refcnt == 1);
12587c478bd9Sstevel@tonic-gate 					swap_xlate(ap, &vp, &off);
12597c478bd9Sstevel@tonic-gate 					pp = page_lookup(vp, (u_offset_t)off,
12607c478bd9Sstevel@tonic-gate 					    SE_EXCL);
12617c478bd9Sstevel@tonic-gate 					if (pp == NULL)
12627c478bd9Sstevel@tonic-gate 						panic("anon_decref_pages: "
12637c478bd9Sstevel@tonic-gate 						    "no page");
12647c478bd9Sstevel@tonic-gate 
12657c478bd9Sstevel@tonic-gate 					(void) hat_pageunload(pp,
12667c478bd9Sstevel@tonic-gate 					    HAT_FORCE_PGUNLOAD);
12677c478bd9Sstevel@tonic-gate 					ASSERT(pp->p_szc == ppa[0]->p_szc);
12687c478bd9Sstevel@tonic-gate 					ASSERT(page_pptonum(pp) - 1 ==
12697c478bd9Sstevel@tonic-gate 					    page_pptonum(ppa[j - i - 1]));
12707c478bd9Sstevel@tonic-gate 					ppa[j - i] = pp;
12717c478bd9Sstevel@tonic-gate 					if (ap->an_pvp != NULL &&
12727c478bd9Sstevel@tonic-gate 					    !vn_matchopval(ap->an_pvp,
127378b03d3aSkchow 					    VOPNAME_DISPOSE,
12747e897d1fSToomas Soome 					    (fs_generic_func_p)(uintptr_t)
12757e897d1fSToomas Soome 					    fs_dispose))
12767c478bd9Sstevel@tonic-gate 						dispose = 1;
12777c478bd9Sstevel@tonic-gate 				}
12787c478bd9Sstevel@tonic-gate 				for (j = i; j < i + curpgcnt; j++) {
12797c478bd9Sstevel@tonic-gate 					ap = anon_get_ptr(ahp, an_idx + j);
12807c478bd9Sstevel@tonic-gate 					ASSERT(ap != NULL &&
12817c478bd9Sstevel@tonic-gate 					    ap->an_refcnt == 1);
128223d9e5acSMichael Corcoran 					ahm = AH_MUTEX(ap->an_vp, ap->an_off);
12837c478bd9Sstevel@tonic-gate 					(void) anon_set_ptr(ahp, an_idx + j,
12847c478bd9Sstevel@tonic-gate 					    NULL, ANON_SLEEP);
12857c478bd9Sstevel@tonic-gate 					mutex_enter(ahm);
12867c478bd9Sstevel@tonic-gate 					ap->an_refcnt--;
12877c478bd9Sstevel@tonic-gate 					ASSERT(ap->an_refcnt == 0);
12887c478bd9Sstevel@tonic-gate 					anon_rmhash(ap);
12897c478bd9Sstevel@tonic-gate 					if (ap->an_pvp)
12907c478bd9Sstevel@tonic-gate 						swap_phys_free(ap->an_pvp,
129178b03d3aSkchow 						    ap->an_poff, PAGESIZE);
12927c478bd9Sstevel@tonic-gate 					mutex_exit(ahm);
12937c478bd9Sstevel@tonic-gate 					kmem_cache_free(anon_cache, ap);
12947c478bd9Sstevel@tonic-gate 					ANI_ADD(1);
12957c478bd9Sstevel@tonic-gate 				}
129620a2d3f6Sstans 				if (!dispose) {
129720a2d3f6Sstans 					VM_STAT_ADD(anonvmstats.decrefpages[6]);
129820a2d3f6Sstans 					page_destroy_pages(ppa[0]);
129920a2d3f6Sstans 				} else {
130020a2d3f6Sstans 					VM_STAT_ADD(anonvmstats.decrefpages[7]);
130120a2d3f6Sstans 					for (j = 0; j < curpgcnt; j++) {
130220a2d3f6Sstans 						ASSERT(PAGE_EXCL(ppa[j]));
130320a2d3f6Sstans 						ppa[j]->p_szc = 0;
130420a2d3f6Sstans 					}
130520a2d3f6Sstans 					for (j = 0; j < curpgcnt; j++) {
130620a2d3f6Sstans 						ASSERT(!hat_page_is_mapped(
130720a2d3f6Sstans 						    ppa[j]));
130820a2d3f6Sstans 						/*LINTED*/
130920a2d3f6Sstans 						VN_DISPOSE(ppa[j], B_INVAL, 0,
131020a2d3f6Sstans 						    kcred);
131120a2d3f6Sstans 					}
131220a2d3f6Sstans 				}
131320a2d3f6Sstans 				kmem_free(ppa, ppasize);
13147c478bd9Sstevel@tonic-gate 				i += curpgcnt;
13157c478bd9Sstevel@tonic-gate 			}
13167c478bd9Sstevel@tonic-gate 		} else {
13177c478bd9Sstevel@tonic-gate 			VM_STAT_ADD(anonvmstats.decrefpages[8]);
13187c478bd9Sstevel@tonic-gate 			(void) anon_set_ptr(ahp, an_idx + i, NULL, ANON_SLEEP);
131923d9e5acSMichael Corcoran 			ahm = AH_MUTEX(ap->an_vp, ap->an_off);
13207c478bd9Sstevel@tonic-gate 			mutex_enter(ahm);
13217c478bd9Sstevel@tonic-gate 			ap->an_refcnt--;
13227c478bd9Sstevel@tonic-gate 			mutex_exit(ahm);
13237c478bd9Sstevel@tonic-gate 			i++;
13247c478bd9Sstevel@tonic-gate 		}
13257c478bd9Sstevel@tonic-gate 	}
13267c478bd9Sstevel@tonic-gate 
13277c478bd9Sstevel@tonic-gate 	if (ahmpages != NULL) {
13287c478bd9Sstevel@tonic-gate 		mutex_exit(ahmpages);
13297c478bd9Sstevel@tonic-gate 	}
13307c478bd9Sstevel@tonic-gate }
13317c478bd9Sstevel@tonic-gate 
13327c478bd9Sstevel@tonic-gate /*
13337c478bd9Sstevel@tonic-gate  * Duplicate references to size bytes worth of anon pages.
13347c478bd9Sstevel@tonic-gate  * Used when duplicating a segment that contains private anon pages.
13357c478bd9Sstevel@tonic-gate  * This code assumes that procedure calling this one has already used
13367c478bd9Sstevel@tonic-gate  * hat_chgprot() to disable write access to the range of addresses that
13377c478bd9Sstevel@tonic-gate  * that *old actually refers to.
13387c478bd9Sstevel@tonic-gate  */
13397c478bd9Sstevel@tonic-gate void
anon_dup(struct anon_hdr * old,ulong_t old_idx,struct anon_hdr * new,ulong_t new_idx,size_t size)13407c478bd9Sstevel@tonic-gate anon_dup(struct anon_hdr *old, ulong_t old_idx, struct anon_hdr *new,
13417e897d1fSToomas Soome     ulong_t new_idx, size_t size)
13427c478bd9Sstevel@tonic-gate {
13437c478bd9Sstevel@tonic-gate 	spgcnt_t npages;
13447c478bd9Sstevel@tonic-gate 	kmutex_t *ahm;
13457c478bd9Sstevel@tonic-gate 	struct anon *ap;
13467c478bd9Sstevel@tonic-gate 	ulong_t off;
13477c478bd9Sstevel@tonic-gate 	ulong_t index;
13487c478bd9Sstevel@tonic-gate 
13497c478bd9Sstevel@tonic-gate 	npages = btopr(size);
13507c478bd9Sstevel@tonic-gate 	while (npages > 0) {
13517c478bd9Sstevel@tonic-gate 		index = old_idx;
13527c478bd9Sstevel@tonic-gate 		if ((ap = anon_get_next_ptr(old, &index)) == NULL)
13537c478bd9Sstevel@tonic-gate 			break;
13547c478bd9Sstevel@tonic-gate 
13557c478bd9Sstevel@tonic-gate 		ASSERT(!ANON_ISBUSY(anon_get_slot(old, index)));
13567c478bd9Sstevel@tonic-gate 		off = index - old_idx;
13577c478bd9Sstevel@tonic-gate 		npages -= off;
13587c478bd9Sstevel@tonic-gate 		if (npages <= 0)
13597c478bd9Sstevel@tonic-gate 			break;
13607c478bd9Sstevel@tonic-gate 
13617c478bd9Sstevel@tonic-gate 		(void) anon_set_ptr(new, new_idx + off, ap, ANON_SLEEP);
136223d9e5acSMichael Corcoran 		ahm = AH_MUTEX(ap->an_vp, ap->an_off);
13637c478bd9Sstevel@tonic-gate 
13647c478bd9Sstevel@tonic-gate 		mutex_enter(ahm);
13657c478bd9Sstevel@tonic-gate 		ap->an_refcnt++;
13667c478bd9Sstevel@tonic-gate 		mutex_exit(ahm);
13677c478bd9Sstevel@tonic-gate 
13687c478bd9Sstevel@tonic-gate 		off++;
13697c478bd9Sstevel@tonic-gate 		new_idx += off;
13707c478bd9Sstevel@tonic-gate 		old_idx += off;
13717c478bd9Sstevel@tonic-gate 		npages--;
13727c478bd9Sstevel@tonic-gate 	}
13737c478bd9Sstevel@tonic-gate }
13747c478bd9Sstevel@tonic-gate 
13757c478bd9Sstevel@tonic-gate /*
13767c478bd9Sstevel@tonic-gate  * Just like anon_dup but also guarantees there are no holes (unallocated anon
13777c478bd9Sstevel@tonic-gate  * slots) within any large page region. That means if a large page region is
13787c478bd9Sstevel@tonic-gate  * empty in the old array it will skip it. If there are 1 or more valid slots
13797c478bd9Sstevel@tonic-gate  * in the large page region of the old array it will make sure to fill in any
13807c478bd9Sstevel@tonic-gate  * unallocated ones and also copy them to the new array. If noalloc is 1 large
13817c478bd9Sstevel@tonic-gate  * page region should either have no valid anon slots or all slots should be
13827c478bd9Sstevel@tonic-gate  * valid.
13837c478bd9Sstevel@tonic-gate  */
13847c478bd9Sstevel@tonic-gate void
anon_dup_fill_holes(struct anon_hdr * old,ulong_t old_idx,struct anon_hdr * new,ulong_t new_idx,size_t size,uint_t szc,int noalloc)13857c478bd9Sstevel@tonic-gate anon_dup_fill_holes(
13867c478bd9Sstevel@tonic-gate 	struct anon_hdr *old,
13877c478bd9Sstevel@tonic-gate 	ulong_t old_idx,
13887c478bd9Sstevel@tonic-gate 	struct anon_hdr *new,
13897c478bd9Sstevel@tonic-gate 	ulong_t new_idx,
13907c478bd9Sstevel@tonic-gate 	size_t size,
13917c478bd9Sstevel@tonic-gate 	uint_t szc,
13927c478bd9Sstevel@tonic-gate 	int noalloc)
13937c478bd9Sstevel@tonic-gate {
13947c478bd9Sstevel@tonic-gate 	struct anon	*ap;
13957c478bd9Sstevel@tonic-gate 	spgcnt_t	npages;
13967c478bd9Sstevel@tonic-gate 	kmutex_t	*ahm, *ahmpages = NULL;
13977c478bd9Sstevel@tonic-gate 	pgcnt_t		pgcnt, i;
13987c478bd9Sstevel@tonic-gate 	ulong_t		index, off;
13997c478bd9Sstevel@tonic-gate #ifdef DEBUG
14007c478bd9Sstevel@tonic-gate 	int		refcnt;
14017c478bd9Sstevel@tonic-gate #endif
14027c478bd9Sstevel@tonic-gate 
14037c478bd9Sstevel@tonic-gate 	ASSERT(szc != 0);
14047c478bd9Sstevel@tonic-gate 	pgcnt = page_get_pagecnt(szc);
14057c478bd9Sstevel@tonic-gate 	ASSERT(IS_P2ALIGNED(pgcnt, pgcnt));
14067c478bd9Sstevel@tonic-gate 	npages = btopr(size);
14077c478bd9Sstevel@tonic-gate 	ASSERT(IS_P2ALIGNED(npages, pgcnt));
14087c478bd9Sstevel@tonic-gate 	ASSERT(IS_P2ALIGNED(old_idx, pgcnt));
14097c478bd9Sstevel@tonic-gate 
14107c478bd9Sstevel@tonic-gate 	VM_STAT_ADD(anonvmstats.dupfillholes[0]);
14117c478bd9Sstevel@tonic-gate 
14127c478bd9Sstevel@tonic-gate 	while (npages > 0) {
14137c478bd9Sstevel@tonic-gate 		index = old_idx;
14147c478bd9Sstevel@tonic-gate 
14157c478bd9Sstevel@tonic-gate 		/*
14167c478bd9Sstevel@tonic-gate 		 * Find the next valid slot.
14177c478bd9Sstevel@tonic-gate 		 */
14187c478bd9Sstevel@tonic-gate 		if (anon_get_next_ptr(old, &index) == NULL)
14197c478bd9Sstevel@tonic-gate 			break;
14207c478bd9Sstevel@tonic-gate 
14217c478bd9Sstevel@tonic-gate 		ASSERT(!ANON_ISBUSY(anon_get_slot(old, index)));
14227c478bd9Sstevel@tonic-gate 		/*
14237c478bd9Sstevel@tonic-gate 		 * Now backup index to the beginning of the
14247c478bd9Sstevel@tonic-gate 		 * current large page region of the old array.
14257c478bd9Sstevel@tonic-gate 		 */
14267c478bd9Sstevel@tonic-gate 		index = P2ALIGN(index, pgcnt);
14277c478bd9Sstevel@tonic-gate 		off = index - old_idx;
14287c478bd9Sstevel@tonic-gate 		ASSERT(IS_P2ALIGNED(off, pgcnt));
14297c478bd9Sstevel@tonic-gate 		npages -= off;
14307c478bd9Sstevel@tonic-gate 		if (npages <= 0)
14317c478bd9Sstevel@tonic-gate 			break;
14327c478bd9Sstevel@tonic-gate 
14337c478bd9Sstevel@tonic-gate 		/*
14347c478bd9Sstevel@tonic-gate 		 * Fill and copy a large page regions worth
14357c478bd9Sstevel@tonic-gate 		 * of anon slots.
14367c478bd9Sstevel@tonic-gate 		 */
14377c478bd9Sstevel@tonic-gate 		for (i = 0; i < pgcnt; i++) {
14387c478bd9Sstevel@tonic-gate 			if ((ap = anon_get_ptr(old, index + i)) == NULL) {
14397c478bd9Sstevel@tonic-gate 				if (noalloc) {
14407c478bd9Sstevel@tonic-gate 					panic("anon_dup_fill_holes: "
14417c478bd9Sstevel@tonic-gate 					    "empty anon slot\n");
14427c478bd9Sstevel@tonic-gate 				}
14437c478bd9Sstevel@tonic-gate 				VM_STAT_ADD(anonvmstats.dupfillholes[1]);
14447c478bd9Sstevel@tonic-gate 				ap = anon_alloc(NULL, 0);
14457c478bd9Sstevel@tonic-gate 				(void) anon_set_ptr(old, index + i, ap,
14467c478bd9Sstevel@tonic-gate 				    ANON_SLEEP);
14477c478bd9Sstevel@tonic-gate 			} else if (i == 0) {
14487c478bd9Sstevel@tonic-gate 				/*
14497c478bd9Sstevel@tonic-gate 				 * make the increment of all refcnts of all
14507c478bd9Sstevel@tonic-gate 				 * anon slots of a large page appear atomic by
14517c478bd9Sstevel@tonic-gate 				 * getting an anonpages_hash_lock for the
14527c478bd9Sstevel@tonic-gate 				 * first anon slot of a large page.
14537c478bd9Sstevel@tonic-gate 				 */
14547c478bd9Sstevel@tonic-gate 				VM_STAT_ADD(anonvmstats.dupfillholes[2]);
14557c478bd9Sstevel@tonic-gate 
145623d9e5acSMichael Corcoran 				ahmpages = APH_MUTEX(ap->an_vp, ap->an_off);
14577c478bd9Sstevel@tonic-gate 				mutex_enter(ahmpages);
14587c478bd9Sstevel@tonic-gate 				/*LINTED*/
14597c478bd9Sstevel@tonic-gate 				ASSERT(refcnt = ap->an_refcnt);
14607c478bd9Sstevel@tonic-gate 
14617c478bd9Sstevel@tonic-gate 				VM_STAT_COND_ADD(ap->an_refcnt > 1,
14627c478bd9Sstevel@tonic-gate 				    anonvmstats.dupfillholes[3]);
14637c478bd9Sstevel@tonic-gate 			}
14647c478bd9Sstevel@tonic-gate 			(void) anon_set_ptr(new, new_idx + off + i, ap,
14657c478bd9Sstevel@tonic-gate 			    ANON_SLEEP);
146623d9e5acSMichael Corcoran 			ahm = AH_MUTEX(ap->an_vp, ap->an_off);
14677c478bd9Sstevel@tonic-gate 			mutex_enter(ahm);
14687c478bd9Sstevel@tonic-gate 			ASSERT(ahmpages != NULL || ap->an_refcnt == 1);
14697c478bd9Sstevel@tonic-gate 			ASSERT(i == 0 || ahmpages == NULL ||
14707c478bd9Sstevel@tonic-gate 			    refcnt == ap->an_refcnt);
14717c478bd9Sstevel@tonic-gate 			ap->an_refcnt++;
14727c478bd9Sstevel@tonic-gate 			mutex_exit(ahm);
14737c478bd9Sstevel@tonic-gate 		}
14747c478bd9Sstevel@tonic-gate 		if (ahmpages != NULL) {
14757c478bd9Sstevel@tonic-gate 			mutex_exit(ahmpages);
14767c478bd9Sstevel@tonic-gate 			ahmpages = NULL;
14777c478bd9Sstevel@tonic-gate 		}
14787c478bd9Sstevel@tonic-gate 		off += pgcnt;
14797c478bd9Sstevel@tonic-gate 		new_idx += off;
14807c478bd9Sstevel@tonic-gate 		old_idx += off;
14817c478bd9Sstevel@tonic-gate 		npages -= pgcnt;
14827c478bd9Sstevel@tonic-gate 	}
14837c478bd9Sstevel@tonic-gate }
14847c478bd9Sstevel@tonic-gate 
14857c478bd9Sstevel@tonic-gate /*
14867c478bd9Sstevel@tonic-gate  * Used when a segment with a vnode changes szc. similarly to
14877c478bd9Sstevel@tonic-gate  * anon_dup_fill_holes() makes sure each large page region either has no anon
14887c478bd9Sstevel@tonic-gate  * slots or all of them. but new slots are created by COWing the file
14897c478bd9Sstevel@tonic-gate  * pages. on entrance no anon slots should be shared.
14907c478bd9Sstevel@tonic-gate  */
14917c478bd9Sstevel@tonic-gate int
anon_fill_cow_holes(struct seg * seg,caddr_t addr,struct anon_hdr * ahp,ulong_t an_idx,struct vnode * vp,u_offset_t vp_off,size_t size,uint_t szc,uint_t prot,struct vpage vpage[],struct cred * cred)14927c478bd9Sstevel@tonic-gate anon_fill_cow_holes(
14937c478bd9Sstevel@tonic-gate 	struct seg *seg,
14947c478bd9Sstevel@tonic-gate 	caddr_t addr,
14957c478bd9Sstevel@tonic-gate 	struct anon_hdr *ahp,
14967c478bd9Sstevel@tonic-gate 	ulong_t an_idx,
14977c478bd9Sstevel@tonic-gate 	struct vnode *vp,
14987c478bd9Sstevel@tonic-gate 	u_offset_t vp_off,
14997c478bd9Sstevel@tonic-gate 	size_t size,
15007c478bd9Sstevel@tonic-gate 	uint_t szc,
15017c478bd9Sstevel@tonic-gate 	uint_t prot,
15027c478bd9Sstevel@tonic-gate 	struct vpage vpage[],
15037c478bd9Sstevel@tonic-gate 	struct cred *cred)
15047c478bd9Sstevel@tonic-gate {
15057c478bd9Sstevel@tonic-gate 	struct anon	*ap;
15067c478bd9Sstevel@tonic-gate 	spgcnt_t	npages;
15077c478bd9Sstevel@tonic-gate 	pgcnt_t		pgcnt, i;
15087c478bd9Sstevel@tonic-gate 	ulong_t		index, off;
15097c478bd9Sstevel@tonic-gate 	int		err = 0;
15107c478bd9Sstevel@tonic-gate 	int		pageflags = 0;
15117c478bd9Sstevel@tonic-gate 
15127c478bd9Sstevel@tonic-gate 	ASSERT(szc != 0);
15137c478bd9Sstevel@tonic-gate 	pgcnt = page_get_pagecnt(szc);
15147c478bd9Sstevel@tonic-gate 	ASSERT(IS_P2ALIGNED(pgcnt, pgcnt));
15157c478bd9Sstevel@tonic-gate 	npages = btopr(size);
15167c478bd9Sstevel@tonic-gate 	ASSERT(IS_P2ALIGNED(npages, pgcnt));
15177c478bd9Sstevel@tonic-gate 	ASSERT(IS_P2ALIGNED(an_idx, pgcnt));
15187c478bd9Sstevel@tonic-gate 
15197c478bd9Sstevel@tonic-gate 	while (npages > 0) {
15207c478bd9Sstevel@tonic-gate 		index = an_idx;
15217c478bd9Sstevel@tonic-gate 
15227c478bd9Sstevel@tonic-gate 		/*
15237c478bd9Sstevel@tonic-gate 		 * Find the next valid slot.
15247c478bd9Sstevel@tonic-gate 		 */
15257c478bd9Sstevel@tonic-gate 		if (anon_get_next_ptr(ahp, &index) == NULL) {
15267c478bd9Sstevel@tonic-gate 			break;
15277c478bd9Sstevel@tonic-gate 		}
15287c478bd9Sstevel@tonic-gate 
15297c478bd9Sstevel@tonic-gate 		ASSERT(!ANON_ISBUSY(anon_get_slot(ahp, index)));
15307c478bd9Sstevel@tonic-gate 		/*
15317c478bd9Sstevel@tonic-gate 		 * Now backup index to the beginning of the
15327c478bd9Sstevel@tonic-gate 		 * current large page region of the anon array.
15337c478bd9Sstevel@tonic-gate 		 */
15347c478bd9Sstevel@tonic-gate 		index = P2ALIGN(index, pgcnt);
15357c478bd9Sstevel@tonic-gate 		off = index - an_idx;
15367c478bd9Sstevel@tonic-gate 		ASSERT(IS_P2ALIGNED(off, pgcnt));
15377c478bd9Sstevel@tonic-gate 		npages -= off;
15387c478bd9Sstevel@tonic-gate 		if (npages <= 0)
15397c478bd9Sstevel@tonic-gate 			break;
15407c478bd9Sstevel@tonic-gate 		an_idx += off;
15417c478bd9Sstevel@tonic-gate 		vp_off += ptob(off);
15427c478bd9Sstevel@tonic-gate 		addr += ptob(off);
15437c478bd9Sstevel@tonic-gate 		if (vpage != NULL) {
15447c478bd9Sstevel@tonic-gate 			vpage += off;
15457c478bd9Sstevel@tonic-gate 		}
15467c478bd9Sstevel@tonic-gate 
15477c478bd9Sstevel@tonic-gate 		for (i = 0; i < pgcnt; i++, an_idx++, vp_off += PAGESIZE) {
15487c478bd9Sstevel@tonic-gate 			if ((ap = anon_get_ptr(ahp, an_idx)) == NULL) {
15497c478bd9Sstevel@tonic-gate 				page_t *pl[1 + 1];
15507c478bd9Sstevel@tonic-gate 				page_t *pp;
15517c478bd9Sstevel@tonic-gate 
15527c478bd9Sstevel@tonic-gate 				err = VOP_GETPAGE(vp, vp_off, PAGESIZE, NULL,
1553da6c28aaSamw 				    pl, PAGESIZE, seg, addr, S_READ, cred,
1554da6c28aaSamw 				    NULL);
15557c478bd9Sstevel@tonic-gate 				if (err) {
15567c478bd9Sstevel@tonic-gate 					break;
15577c478bd9Sstevel@tonic-gate 				}
15587c478bd9Sstevel@tonic-gate 				if (vpage != NULL) {
15597c478bd9Sstevel@tonic-gate 					prot = VPP_PROT(vpage);
15607c478bd9Sstevel@tonic-gate 					pageflags = VPP_ISPPLOCK(vpage) ?
15617c478bd9Sstevel@tonic-gate 					    LOCK_PAGE : 0;
15627c478bd9Sstevel@tonic-gate 				}
15637c478bd9Sstevel@tonic-gate 				pp = anon_private(&ap, seg, addr, prot, pl[0],
156478b03d3aSkchow 				    pageflags, cred);
15657c478bd9Sstevel@tonic-gate 				if (pp == NULL) {
15667c478bd9Sstevel@tonic-gate 					err = ENOMEM;
15677c478bd9Sstevel@tonic-gate 					break;
15687c478bd9Sstevel@tonic-gate 				}
15697c478bd9Sstevel@tonic-gate 				(void) anon_set_ptr(ahp, an_idx, ap,
15707c478bd9Sstevel@tonic-gate 				    ANON_SLEEP);
15717c478bd9Sstevel@tonic-gate 				page_unlock(pp);
15727c478bd9Sstevel@tonic-gate 			}
15737c478bd9Sstevel@tonic-gate 			ASSERT(ap->an_refcnt == 1);
15747c478bd9Sstevel@tonic-gate 			addr += PAGESIZE;
15757c478bd9Sstevel@tonic-gate 			if (vpage != NULL) {
15767c478bd9Sstevel@tonic-gate 				vpage++;
15777c478bd9Sstevel@tonic-gate 			}
15787c478bd9Sstevel@tonic-gate 		}
15797c478bd9Sstevel@tonic-gate 		npages -= pgcnt;
15807c478bd9Sstevel@tonic-gate 	}
15817c478bd9Sstevel@tonic-gate 
15827c478bd9Sstevel@tonic-gate 	return (err);
15837c478bd9Sstevel@tonic-gate }
15847c478bd9Sstevel@tonic-gate 
15857c478bd9Sstevel@tonic-gate /*
15867c478bd9Sstevel@tonic-gate  * Free a group of "size" anon pages, size in bytes,
15877c478bd9Sstevel@tonic-gate  * and clear out the pointers to the anon entries.
15887c478bd9Sstevel@tonic-gate  */
15897c478bd9Sstevel@tonic-gate void
anon_free(struct anon_hdr * ahp,ulong_t index,size_t size)15907c478bd9Sstevel@tonic-gate anon_free(struct anon_hdr *ahp, ulong_t index, size_t size)
15917c478bd9Sstevel@tonic-gate {
15927c478bd9Sstevel@tonic-gate 	spgcnt_t npages;
15937c478bd9Sstevel@tonic-gate 	struct anon *ap;
15947c478bd9Sstevel@tonic-gate 	ulong_t old;
15957c478bd9Sstevel@tonic-gate 
15967c478bd9Sstevel@tonic-gate 	npages = btopr(size);
15977c478bd9Sstevel@tonic-gate 
15987c478bd9Sstevel@tonic-gate 	while (npages > 0) {
15997c478bd9Sstevel@tonic-gate 		old = index;
16007c478bd9Sstevel@tonic-gate 		if ((ap = anon_get_next_ptr(ahp, &index)) == NULL)
16017c478bd9Sstevel@tonic-gate 			break;
16027c478bd9Sstevel@tonic-gate 
16037c478bd9Sstevel@tonic-gate 		ASSERT(!ANON_ISBUSY(anon_get_slot(ahp, index)));
16047c478bd9Sstevel@tonic-gate 		npages -= index - old;
16057c478bd9Sstevel@tonic-gate 		if (npages <= 0)
16067c478bd9Sstevel@tonic-gate 			break;
16077c478bd9Sstevel@tonic-gate 
16087c478bd9Sstevel@tonic-gate 		(void) anon_set_ptr(ahp, index, NULL, ANON_SLEEP);
16097c478bd9Sstevel@tonic-gate 		anon_decref(ap);
16107c478bd9Sstevel@tonic-gate 		/*
16117c478bd9Sstevel@tonic-gate 		 * Bump index and decrement page count
16127c478bd9Sstevel@tonic-gate 		 */
16137c478bd9Sstevel@tonic-gate 		index++;
16147c478bd9Sstevel@tonic-gate 		npages--;
16157c478bd9Sstevel@tonic-gate 	}
16167c478bd9Sstevel@tonic-gate }
16177c478bd9Sstevel@tonic-gate 
16187c478bd9Sstevel@tonic-gate void
anon_free_pages(struct anon_hdr * ahp,ulong_t an_idx,size_t size,uint_t szc)16197c478bd9Sstevel@tonic-gate anon_free_pages(
16207c478bd9Sstevel@tonic-gate 	struct anon_hdr *ahp,
16217c478bd9Sstevel@tonic-gate 	ulong_t an_idx,
16227c478bd9Sstevel@tonic-gate 	size_t size,
16237c478bd9Sstevel@tonic-gate 	uint_t szc)
16247c478bd9Sstevel@tonic-gate {
16257c478bd9Sstevel@tonic-gate 	spgcnt_t	npages;
16267c478bd9Sstevel@tonic-gate 	pgcnt_t		pgcnt;
16277c478bd9Sstevel@tonic-gate 	ulong_t		index, off;
16287c478bd9Sstevel@tonic-gate 
16297c478bd9Sstevel@tonic-gate 	ASSERT(szc != 0);
16307c478bd9Sstevel@tonic-gate 	pgcnt = page_get_pagecnt(szc);
16317c478bd9Sstevel@tonic-gate 	ASSERT(IS_P2ALIGNED(pgcnt, pgcnt));
16327c478bd9Sstevel@tonic-gate 	npages = btopr(size);
16337c478bd9Sstevel@tonic-gate 	ASSERT(IS_P2ALIGNED(npages, pgcnt));
16347c478bd9Sstevel@tonic-gate 	ASSERT(IS_P2ALIGNED(an_idx, pgcnt));
163507b65a64Saguzovsk 	ASSERT(an_idx < ahp->size);
16367c478bd9Sstevel@tonic-gate 
16377c478bd9Sstevel@tonic-gate 	VM_STAT_ADD(anonvmstats.freepages[0]);
16387c478bd9Sstevel@tonic-gate 
16397c478bd9Sstevel@tonic-gate 	while (npages > 0) {
16407c478bd9Sstevel@tonic-gate 		index = an_idx;
16417c478bd9Sstevel@tonic-gate 
16427c478bd9Sstevel@tonic-gate 		/*
16437c478bd9Sstevel@tonic-gate 		 * Find the next valid slot.
16447c478bd9Sstevel@tonic-gate 		 */
16457c478bd9Sstevel@tonic-gate 		if (anon_get_next_ptr(ahp, &index) == NULL)
16467c478bd9Sstevel@tonic-gate 			break;
16477c478bd9Sstevel@tonic-gate 
16487c478bd9Sstevel@tonic-gate 		ASSERT(!ANON_ISBUSY(anon_get_slot(ahp, index)));
16497c478bd9Sstevel@tonic-gate 		/*
16507c478bd9Sstevel@tonic-gate 		 * Now backup index to the beginning of the
16517c478bd9Sstevel@tonic-gate 		 * current large page region of the old array.
16527c478bd9Sstevel@tonic-gate 		 */
16537c478bd9Sstevel@tonic-gate 		index = P2ALIGN(index, pgcnt);
16547c478bd9Sstevel@tonic-gate 		off = index - an_idx;
16557c478bd9Sstevel@tonic-gate 		ASSERT(IS_P2ALIGNED(off, pgcnt));
16567c478bd9Sstevel@tonic-gate 		npages -= off;
16577c478bd9Sstevel@tonic-gate 		if (npages <= 0)
16587c478bd9Sstevel@tonic-gate 			break;
16597c478bd9Sstevel@tonic-gate 
16607c478bd9Sstevel@tonic-gate 		anon_decref_pages(ahp, index, szc);
16617c478bd9Sstevel@tonic-gate 
16627c478bd9Sstevel@tonic-gate 		off += pgcnt;
16637c478bd9Sstevel@tonic-gate 		an_idx += off;
16647c478bd9Sstevel@tonic-gate 		npages -= pgcnt;
16657c478bd9Sstevel@tonic-gate 	}
16667c478bd9Sstevel@tonic-gate }
16677c478bd9Sstevel@tonic-gate 
16687c478bd9Sstevel@tonic-gate /*
16697c478bd9Sstevel@tonic-gate  * Make anonymous pages discardable
16707c478bd9Sstevel@tonic-gate  */
16718905f42cSBryan Cantrill int
anon_disclaim(struct anon_map * amp,ulong_t index,size_t size,uint_t behav,pgcnt_t * purged)16728905f42cSBryan Cantrill anon_disclaim(struct anon_map *amp, ulong_t index, size_t size,
16738905f42cSBryan Cantrill     uint_t behav, pgcnt_t *purged)
16747c478bd9Sstevel@tonic-gate {
16757c478bd9Sstevel@tonic-gate 	spgcnt_t npages = btopr(size);
16767c478bd9Sstevel@tonic-gate 	struct anon *ap;
16777c478bd9Sstevel@tonic-gate 	struct vnode *vp;
16787c478bd9Sstevel@tonic-gate 	anoff_t off;
16797c478bd9Sstevel@tonic-gate 	page_t *pp, *root_pp;
16807c478bd9Sstevel@tonic-gate 	kmutex_t *ahm;
16818905f42cSBryan Cantrill 	pgcnt_t pgcnt, npurged = 0;
16827c478bd9Sstevel@tonic-gate 	ulong_t old_idx, idx, i;
16837c478bd9Sstevel@tonic-gate 	struct anon_hdr *ahp = amp->ahp;
16847c478bd9Sstevel@tonic-gate 	anon_sync_obj_t cookie;
16858905f42cSBryan Cantrill 	int err = 0;
16867c478bd9Sstevel@tonic-gate 
16878905f42cSBryan Cantrill 	VERIFY(behav == MADV_FREE || behav == MADV_PURGE);
16887c478bd9Sstevel@tonic-gate 	ASSERT(RW_READ_HELD(&amp->a_rwlock));
16897c478bd9Sstevel@tonic-gate 	pgcnt = 1;
16902ba723d8Smec 	for (; npages > 0; index = (pgcnt == 1) ? index + 1 :
16912ba723d8Smec 	    P2ROUNDUP(index + 1, pgcnt), npages -= pgcnt) {
16927c478bd9Sstevel@tonic-gate 
16937c478bd9Sstevel@tonic-gate 		/*
16947c478bd9Sstevel@tonic-gate 		 * get anon pointer and index for the first valid entry
16957c478bd9Sstevel@tonic-gate 		 * in the anon list, starting from "index"
16967c478bd9Sstevel@tonic-gate 		 */
16977c478bd9Sstevel@tonic-gate 		old_idx = index;
16987c478bd9Sstevel@tonic-gate 		if ((ap = anon_get_next_ptr(ahp, &index)) == NULL)
16997c478bd9Sstevel@tonic-gate 			break;
17007c478bd9Sstevel@tonic-gate 
17017c478bd9Sstevel@tonic-gate 		/*
17027c478bd9Sstevel@tonic-gate 		 * decrement npages by number of NULL anon slots we skipped
17037c478bd9Sstevel@tonic-gate 		 */
17047c478bd9Sstevel@tonic-gate 		npages -= index - old_idx;
17057c478bd9Sstevel@tonic-gate 		if (npages <= 0)
17067c478bd9Sstevel@tonic-gate 			break;
17077c478bd9Sstevel@tonic-gate 
17087c478bd9Sstevel@tonic-gate 		anon_array_enter(amp, index, &cookie);
17097c478bd9Sstevel@tonic-gate 		ap = anon_get_ptr(ahp, index);
17107c478bd9Sstevel@tonic-gate 		ASSERT(ap != NULL);
17117c478bd9Sstevel@tonic-gate 
17127c478bd9Sstevel@tonic-gate 		/*
17137c478bd9Sstevel@tonic-gate 		 * Get anonymous page and try to lock it SE_EXCL;
17142ba723d8Smec 		 * if we couldn't grab the lock we skip to next page.
17157c478bd9Sstevel@tonic-gate 		 */
17167c478bd9Sstevel@tonic-gate 		swap_xlate(ap, &vp, &off);
17172ba723d8Smec 		pp = page_lookup_nowait(vp, (u_offset_t)off, SE_EXCL);
17187c478bd9Sstevel@tonic-gate 		if (pp == NULL) {
17197c478bd9Sstevel@tonic-gate 			segadvstat.MADV_FREE_miss.value.ul++;
17207c478bd9Sstevel@tonic-gate 			pgcnt = 1;
17217c478bd9Sstevel@tonic-gate 			anon_array_exit(&cookie);
17227c478bd9Sstevel@tonic-gate 			continue;
17237c478bd9Sstevel@tonic-gate 		}
17247c478bd9Sstevel@tonic-gate 		pgcnt = page_get_pagecnt(pp->p_szc);
17257c478bd9Sstevel@tonic-gate 
17267c478bd9Sstevel@tonic-gate 		/*
17277c478bd9Sstevel@tonic-gate 		 * we cannot free a page which is permanently locked.
17287c478bd9Sstevel@tonic-gate 		 * The page_struct_lock need not be acquired to examine
17297c478bd9Sstevel@tonic-gate 		 * these fields since the page has an "exclusive" lock.
17307c478bd9Sstevel@tonic-gate 		 */
17317c478bd9Sstevel@tonic-gate 		if (pp->p_lckcnt != 0 || pp->p_cowcnt != 0) {
17327c478bd9Sstevel@tonic-gate 			page_unlock(pp);
17337c478bd9Sstevel@tonic-gate 			segadvstat.MADV_FREE_miss.value.ul++;
17347c478bd9Sstevel@tonic-gate 			anon_array_exit(&cookie);
17358905f42cSBryan Cantrill 			err = EBUSY;
17367c478bd9Sstevel@tonic-gate 			continue;
17377c478bd9Sstevel@tonic-gate 		}
17387c478bd9Sstevel@tonic-gate 
173923d9e5acSMichael Corcoran 		ahm = AH_MUTEX(vp, off);
17407c478bd9Sstevel@tonic-gate 		mutex_enter(ahm);
17417c478bd9Sstevel@tonic-gate 		ASSERT(ap->an_refcnt != 0);
17427c478bd9Sstevel@tonic-gate 		/*
17437c478bd9Sstevel@tonic-gate 		 * skip this one if copy-on-write is not yet broken.
17447c478bd9Sstevel@tonic-gate 		 */
17457c478bd9Sstevel@tonic-gate 		if (ap->an_refcnt > 1) {
17467c478bd9Sstevel@tonic-gate 			mutex_exit(ahm);
17477c478bd9Sstevel@tonic-gate 			page_unlock(pp);
17487c478bd9Sstevel@tonic-gate 			segadvstat.MADV_FREE_miss.value.ul++;
17497c478bd9Sstevel@tonic-gate 			anon_array_exit(&cookie);
17507c478bd9Sstevel@tonic-gate 			continue;
17517c478bd9Sstevel@tonic-gate 		}
17527c478bd9Sstevel@tonic-gate 
17538905f42cSBryan Cantrill 		if (behav == MADV_PURGE && pp->p_szc != 0) {
17548905f42cSBryan Cantrill 			/*
17558905f42cSBryan Cantrill 			 * If we're purging and we have a large page, simplify
17568905f42cSBryan Cantrill 			 * things a bit by demoting ourselves into the base
17578905f42cSBryan Cantrill 			 * page case.
17588905f42cSBryan Cantrill 			 */
17598905f42cSBryan Cantrill 			(void) page_try_demote_pages(pp);
17608905f42cSBryan Cantrill 		}
17618905f42cSBryan Cantrill 
17627c478bd9Sstevel@tonic-gate 		if (pp->p_szc == 0) {
17637c478bd9Sstevel@tonic-gate 			pgcnt = 1;
17647c478bd9Sstevel@tonic-gate 
17657c478bd9Sstevel@tonic-gate 			/*
17667c478bd9Sstevel@tonic-gate 			 * free swap slot;
17677c478bd9Sstevel@tonic-gate 			 */
17687c478bd9Sstevel@tonic-gate 			if (ap->an_pvp) {
17697c478bd9Sstevel@tonic-gate 				swap_phys_free(ap->an_pvp, ap->an_poff,
17707c478bd9Sstevel@tonic-gate 				    PAGESIZE);
17717c478bd9Sstevel@tonic-gate 				ap->an_pvp = NULL;
17727c478bd9Sstevel@tonic-gate 				ap->an_poff = 0;
17737c478bd9Sstevel@tonic-gate 			}
17748905f42cSBryan Cantrill 
17758905f42cSBryan Cantrill 			if (behav == MADV_PURGE) {
17768905f42cSBryan Cantrill 				/*
17778905f42cSBryan Cantrill 				 * If we're purging (instead of merely freeing),
17788905f42cSBryan Cantrill 				 * rip out this anon structure entirely to
17798905f42cSBryan Cantrill 				 * assure that any subsequent fault pulls from
17808905f42cSBryan Cantrill 				 * the backing vnode (if any).
17818905f42cSBryan Cantrill 				 */
17828905f42cSBryan Cantrill 				if (--ap->an_refcnt == 0)
17838905f42cSBryan Cantrill 					anon_rmhash(ap);
17848905f42cSBryan Cantrill 
17858905f42cSBryan Cantrill 				mutex_exit(ahm);
17868905f42cSBryan Cantrill 				(void) anon_set_ptr(ahp, index,
17878905f42cSBryan Cantrill 				    NULL, ANON_SLEEP);
17888905f42cSBryan Cantrill 				npurged++;
17898905f42cSBryan Cantrill 				ANI_ADD(1);
17908905f42cSBryan Cantrill 				kmem_cache_free(anon_cache, ap);
17918905f42cSBryan Cantrill 			} else {
17928905f42cSBryan Cantrill 				mutex_exit(ahm);
17938905f42cSBryan Cantrill 			}
17948905f42cSBryan Cantrill 
17957c478bd9Sstevel@tonic-gate 			segadvstat.MADV_FREE_hit.value.ul++;
17967c478bd9Sstevel@tonic-gate 
17977c478bd9Sstevel@tonic-gate 			/*
17987c478bd9Sstevel@tonic-gate 			 * while we are at it, unload all the translations
17997c478bd9Sstevel@tonic-gate 			 * and attempt to free the page.
18007c478bd9Sstevel@tonic-gate 			 */
18017c478bd9Sstevel@tonic-gate 			(void) hat_pageunload(pp, HAT_FORCE_PGUNLOAD);
18027c478bd9Sstevel@tonic-gate 			/*LINTED: constant in conditional context */
18038905f42cSBryan Cantrill 			VN_DISPOSE(pp,
18048905f42cSBryan Cantrill 			    behav == MADV_FREE ? B_FREE : B_INVAL, 0, kcred);
18058905f42cSBryan Cantrill 
18067c478bd9Sstevel@tonic-gate 			anon_array_exit(&cookie);
18077c478bd9Sstevel@tonic-gate 			continue;
18087c478bd9Sstevel@tonic-gate 		}
18097c478bd9Sstevel@tonic-gate 
18107c478bd9Sstevel@tonic-gate 		pgcnt = page_get_pagecnt(pp->p_szc);
181107b65a64Saguzovsk 		if (!IS_P2ALIGNED(index, pgcnt) || npages < pgcnt) {
18127c478bd9Sstevel@tonic-gate 			if (!page_try_demote_pages(pp)) {
18137c478bd9Sstevel@tonic-gate 				mutex_exit(ahm);
18147c478bd9Sstevel@tonic-gate 				page_unlock(pp);
18157c478bd9Sstevel@tonic-gate 				segadvstat.MADV_FREE_miss.value.ul++;
18167c478bd9Sstevel@tonic-gate 				anon_array_exit(&cookie);
18178905f42cSBryan Cantrill 				err = EBUSY;
18187c478bd9Sstevel@tonic-gate 				continue;
18197c478bd9Sstevel@tonic-gate 			} else {
18207c478bd9Sstevel@tonic-gate 				pgcnt = 1;
18217c478bd9Sstevel@tonic-gate 				if (ap->an_pvp) {
18227c478bd9Sstevel@tonic-gate 					swap_phys_free(ap->an_pvp,
18237c478bd9Sstevel@tonic-gate 					    ap->an_poff, PAGESIZE);
18242ba723d8Smec 					ap->an_pvp = NULL;
18252ba723d8Smec 					ap->an_poff = 0;
18267c478bd9Sstevel@tonic-gate 				}
18277c478bd9Sstevel@tonic-gate 				mutex_exit(ahm);
18287c478bd9Sstevel@tonic-gate 				(void) hat_pageunload(pp, HAT_FORCE_PGUNLOAD);
18297c478bd9Sstevel@tonic-gate 				/*LINTED*/
18307c478bd9Sstevel@tonic-gate 				VN_DISPOSE(pp, B_FREE, 0, kcred);
18317c478bd9Sstevel@tonic-gate 				segadvstat.MADV_FREE_hit.value.ul++;
18327c478bd9Sstevel@tonic-gate 				anon_array_exit(&cookie);
18337c478bd9Sstevel@tonic-gate 				continue;
18347c478bd9Sstevel@tonic-gate 			}
18357c478bd9Sstevel@tonic-gate 		}
18367c478bd9Sstevel@tonic-gate 		mutex_exit(ahm);
18377c478bd9Sstevel@tonic-gate 		root_pp = pp;
18387c478bd9Sstevel@tonic-gate 
18397c478bd9Sstevel@tonic-gate 		/*
18407c478bd9Sstevel@tonic-gate 		 * try to lock remaining pages
18417c478bd9Sstevel@tonic-gate 		 */
18427c478bd9Sstevel@tonic-gate 		for (idx = 1; idx < pgcnt; idx++) {
1843affbd3ccSkchow 			pp++;
18447c478bd9Sstevel@tonic-gate 			if (!page_trylock(pp, SE_EXCL))
18457c478bd9Sstevel@tonic-gate 				break;
18467c478bd9Sstevel@tonic-gate 			if (pp->p_lckcnt != 0 || pp->p_cowcnt != 0) {
18477c478bd9Sstevel@tonic-gate 				page_unlock(pp);
18487c478bd9Sstevel@tonic-gate 				break;
18497c478bd9Sstevel@tonic-gate 			}
18507c478bd9Sstevel@tonic-gate 		}
18517c478bd9Sstevel@tonic-gate 
18527c478bd9Sstevel@tonic-gate 		if (idx == pgcnt) {
18537c478bd9Sstevel@tonic-gate 			for (i = 0; i < pgcnt; i++) {
18547c478bd9Sstevel@tonic-gate 				ap = anon_get_ptr(ahp, index + i);
18557c478bd9Sstevel@tonic-gate 				if (ap == NULL)
18567c478bd9Sstevel@tonic-gate 					break;
18577c478bd9Sstevel@tonic-gate 				swap_xlate(ap, &vp, &off);
185823d9e5acSMichael Corcoran 				ahm = AH_MUTEX(vp, off);
18597c478bd9Sstevel@tonic-gate 				mutex_enter(ahm);
18607c478bd9Sstevel@tonic-gate 				ASSERT(ap->an_refcnt != 0);
18617c478bd9Sstevel@tonic-gate 
18627c478bd9Sstevel@tonic-gate 				/*
18637c478bd9Sstevel@tonic-gate 				 * skip this one if copy-on-write
18647c478bd9Sstevel@tonic-gate 				 * is not yet broken.
18657c478bd9Sstevel@tonic-gate 				 */
18667c478bd9Sstevel@tonic-gate 				if (ap->an_refcnt > 1) {
18677c478bd9Sstevel@tonic-gate 					mutex_exit(ahm);
18687c478bd9Sstevel@tonic-gate 					goto skiplp;
18697c478bd9Sstevel@tonic-gate 				}
18707c478bd9Sstevel@tonic-gate 				if (ap->an_pvp) {
18717c478bd9Sstevel@tonic-gate 					swap_phys_free(ap->an_pvp,
18727c478bd9Sstevel@tonic-gate 					    ap->an_poff, PAGESIZE);
18732ba723d8Smec 					ap->an_pvp = NULL;
18742ba723d8Smec 					ap->an_poff = 0;
18757c478bd9Sstevel@tonic-gate 				}
18767c478bd9Sstevel@tonic-gate 				mutex_exit(ahm);
18777c478bd9Sstevel@tonic-gate 			}
18787c478bd9Sstevel@tonic-gate 			page_destroy_pages(root_pp);
18797c478bd9Sstevel@tonic-gate 			segadvstat.MADV_FREE_hit.value.ul += pgcnt;
18807c478bd9Sstevel@tonic-gate 			anon_array_exit(&cookie);
18817c478bd9Sstevel@tonic-gate 			continue;
18827c478bd9Sstevel@tonic-gate 		}
18837c478bd9Sstevel@tonic-gate skiplp:
18847c478bd9Sstevel@tonic-gate 		segadvstat.MADV_FREE_miss.value.ul += pgcnt;
1885affbd3ccSkchow 		for (i = 0, pp = root_pp; i < idx; pp++, i++)
18867c478bd9Sstevel@tonic-gate 			page_unlock(pp);
18877c478bd9Sstevel@tonic-gate 		anon_array_exit(&cookie);
18887c478bd9Sstevel@tonic-gate 	}
18898905f42cSBryan Cantrill 
18908905f42cSBryan Cantrill 	if (purged != NULL)
18918905f42cSBryan Cantrill 		*purged = npurged;
18928905f42cSBryan Cantrill 
18938905f42cSBryan Cantrill 	return (err);
18947c478bd9Sstevel@tonic-gate }
18957c478bd9Sstevel@tonic-gate 
18967c478bd9Sstevel@tonic-gate /*
18977c478bd9Sstevel@tonic-gate  * Return the kept page(s) and protections back to the segment driver.
18987c478bd9Sstevel@tonic-gate  */
18997c478bd9Sstevel@tonic-gate int
anon_getpage(struct anon ** app,uint_t * protp,page_t * pl[],size_t plsz,struct seg * seg,caddr_t addr,enum seg_rw rw,struct cred * cred)19007c478bd9Sstevel@tonic-gate anon_getpage(
19017c478bd9Sstevel@tonic-gate 	struct anon **app,
19027c478bd9Sstevel@tonic-gate 	uint_t *protp,
19037c478bd9Sstevel@tonic-gate 	page_t *pl[],
19047c478bd9Sstevel@tonic-gate 	size_t plsz,
19057c478bd9Sstevel@tonic-gate 	struct seg *seg,
19067c478bd9Sstevel@tonic-gate 	caddr_t addr,
19077c478bd9Sstevel@tonic-gate 	enum seg_rw rw,
19087c478bd9Sstevel@tonic-gate 	struct cred *cred)
19097c478bd9Sstevel@tonic-gate {
19107c478bd9Sstevel@tonic-gate 	page_t *pp;
19117c478bd9Sstevel@tonic-gate 	struct anon *ap = *app;
19127c478bd9Sstevel@tonic-gate 	struct vnode *vp;
19137c478bd9Sstevel@tonic-gate 	anoff_t off;
19147c478bd9Sstevel@tonic-gate 	int err;
19157c478bd9Sstevel@tonic-gate 	kmutex_t *ahm;
19167c478bd9Sstevel@tonic-gate 
19177c478bd9Sstevel@tonic-gate 	swap_xlate(ap, &vp, &off);
19187c478bd9Sstevel@tonic-gate 
19197c478bd9Sstevel@tonic-gate 	/*
19207c478bd9Sstevel@tonic-gate 	 * Lookup the page. If page is being paged in,
19217c478bd9Sstevel@tonic-gate 	 * wait for it to finish as we must return a list of
19227c478bd9Sstevel@tonic-gate 	 * pages since this routine acts like the VOP_GETPAGE
19237c478bd9Sstevel@tonic-gate 	 * routine does.
19247c478bd9Sstevel@tonic-gate 	 */
19257c478bd9Sstevel@tonic-gate 	if (pl != NULL && (pp = page_lookup(vp, (u_offset_t)off, SE_SHARED))) {
192623d9e5acSMichael Corcoran 		ahm = AH_MUTEX(ap->an_vp, ap->an_off);
19277c478bd9Sstevel@tonic-gate 		mutex_enter(ahm);
19287c478bd9Sstevel@tonic-gate 		if (ap->an_refcnt == 1)
19297c478bd9Sstevel@tonic-gate 			*protp = PROT_ALL;
19307c478bd9Sstevel@tonic-gate 		else
19317c478bd9Sstevel@tonic-gate 			*protp = PROT_ALL & ~PROT_WRITE;
19327c478bd9Sstevel@tonic-gate 		mutex_exit(ahm);
19337c478bd9Sstevel@tonic-gate 		pl[0] = pp;
19347c478bd9Sstevel@tonic-gate 		pl[1] = NULL;
19357c478bd9Sstevel@tonic-gate 		return (0);
19367c478bd9Sstevel@tonic-gate 	}
19377c478bd9Sstevel@tonic-gate 
19387c478bd9Sstevel@tonic-gate 	/*
19397c478bd9Sstevel@tonic-gate 	 * Simply treat it as a vnode fault on the anon vp.
19407c478bd9Sstevel@tonic-gate 	 */
19417c478bd9Sstevel@tonic-gate 
19427c478bd9Sstevel@tonic-gate 	TRACE_3(TR_FAC_VM, TR_ANON_GETPAGE,
194378b03d3aSkchow 	    "anon_getpage:seg %x addr %x vp %x",
194478b03d3aSkchow 	    seg, addr, vp);
19457c478bd9Sstevel@tonic-gate 
19467c478bd9Sstevel@tonic-gate 	err = VOP_GETPAGE(vp, (u_offset_t)off, PAGESIZE, protp, pl, plsz,
1947da6c28aaSamw 	    seg, addr, rw, cred, NULL);
19487c478bd9Sstevel@tonic-gate 
19497c478bd9Sstevel@tonic-gate 	if (err == 0 && pl != NULL) {
195023d9e5acSMichael Corcoran 		ahm = AH_MUTEX(ap->an_vp, ap->an_off);
19517c478bd9Sstevel@tonic-gate 		mutex_enter(ahm);
19527c478bd9Sstevel@tonic-gate 		if (ap->an_refcnt != 1)
19537c478bd9Sstevel@tonic-gate 			*protp &= ~PROT_WRITE;	/* make read-only */
19547c478bd9Sstevel@tonic-gate 		mutex_exit(ahm);
19557c478bd9Sstevel@tonic-gate 	}
19567c478bd9Sstevel@tonic-gate 	return (err);
19577c478bd9Sstevel@tonic-gate }
19587c478bd9Sstevel@tonic-gate 
19597c478bd9Sstevel@tonic-gate /*
19607c478bd9Sstevel@tonic-gate  * Creates or returns kept pages to the segment driver.  returns -1 if a large
19617c478bd9Sstevel@tonic-gate  * page cannot be allocated. returns -2 if some other process has allocated a
19627c478bd9Sstevel@tonic-gate  * larger page.
19637c478bd9Sstevel@tonic-gate  *
1964da6c28aaSamw  * For cowfault it will allocate any size pages to fill the requested area to
1965da6c28aaSamw  * avoid partially overwriting anon slots (i.e. sharing only some of the anon
19667c478bd9Sstevel@tonic-gate  * slots within a large page with other processes). This policy greatly
19677c478bd9Sstevel@tonic-gate  * simplifies large page freeing (which is only freed when all anon slot
19687c478bd9Sstevel@tonic-gate  * refcnts are 0).
19697c478bd9Sstevel@tonic-gate  */
19707c478bd9Sstevel@tonic-gate int
anon_map_getpages(struct anon_map * amp,ulong_t start_idx,uint_t szc,struct seg * seg,caddr_t addr,uint_t prot,uint_t * protp,page_t * ppa[],uint_t * ppa_szc,struct vpage vpage[],enum seg_rw rw,int brkcow,int anypgsz,int pgflags,struct cred * cred)19717c478bd9Sstevel@tonic-gate anon_map_getpages(
19727c478bd9Sstevel@tonic-gate 	struct anon_map *amp,
19737c478bd9Sstevel@tonic-gate 	ulong_t	start_idx,
19747c478bd9Sstevel@tonic-gate 	uint_t	szc,
19757c478bd9Sstevel@tonic-gate 	struct seg *seg,
19767c478bd9Sstevel@tonic-gate 	caddr_t	addr,
19777c478bd9Sstevel@tonic-gate 	uint_t prot,
19787c478bd9Sstevel@tonic-gate 	uint_t *protp,
19797c478bd9Sstevel@tonic-gate 	page_t	*ppa[],
19807c478bd9Sstevel@tonic-gate 	uint_t	*ppa_szc,
19817c478bd9Sstevel@tonic-gate 	struct vpage vpage[],
19827c478bd9Sstevel@tonic-gate 	enum seg_rw rw,
19837c478bd9Sstevel@tonic-gate 	int brkcow,
19847c478bd9Sstevel@tonic-gate 	int anypgsz,
19852cb27123Saguzovsk 	int pgflags,
19867c478bd9Sstevel@tonic-gate 	struct cred *cred)
19877c478bd9Sstevel@tonic-gate {
19887c478bd9Sstevel@tonic-gate 	pgcnt_t		pgcnt;
19897c478bd9Sstevel@tonic-gate 	struct anon	*ap;
19907c478bd9Sstevel@tonic-gate 	struct vnode	*vp;
19917c478bd9Sstevel@tonic-gate 	anoff_t		off;
19927c478bd9Sstevel@tonic-gate 	page_t		*pp, *pl[2], *conpp = NULL;
19937c478bd9Sstevel@tonic-gate 	caddr_t		vaddr;
19947c478bd9Sstevel@tonic-gate 	ulong_t		pg_idx, an_idx, i;
19957c478bd9Sstevel@tonic-gate 	spgcnt_t	nreloc = 0;
19967c478bd9Sstevel@tonic-gate 	int		prealloc = 1;
19977c478bd9Sstevel@tonic-gate 	int		err, slotcreate;
19987c478bd9Sstevel@tonic-gate 	uint_t		vpprot;
199907b65a64Saguzovsk 	int		upsize = (szc < seg->s_szc);
20007c478bd9Sstevel@tonic-gate 
2001*86ef0a63SRichard Lowe #if !defined(__x86)
20027c478bd9Sstevel@tonic-gate 	ASSERT(seg->s_szc != 0);
20037c478bd9Sstevel@tonic-gate #endif
20047c478bd9Sstevel@tonic-gate 	ASSERT(szc <= seg->s_szc);
20057c478bd9Sstevel@tonic-gate 	ASSERT(ppa_szc != NULL);
20067c478bd9Sstevel@tonic-gate 	ASSERT(rw != S_CREATE);
20077c478bd9Sstevel@tonic-gate 
20087c478bd9Sstevel@tonic-gate 	*protp = PROT_ALL;
20097c478bd9Sstevel@tonic-gate 
20107c478bd9Sstevel@tonic-gate 	VM_STAT_ADD(anonvmstats.getpages[0]);
20117c478bd9Sstevel@tonic-gate 
20127c478bd9Sstevel@tonic-gate 	if (szc == 0) {
20137c478bd9Sstevel@tonic-gate 		VM_STAT_ADD(anonvmstats.getpages[1]);
20147c478bd9Sstevel@tonic-gate 		if ((ap = anon_get_ptr(amp->ahp, start_idx)) != NULL) {
20157c478bd9Sstevel@tonic-gate 			err = anon_getpage(&ap, protp, pl, PAGESIZE, seg,
20167c478bd9Sstevel@tonic-gate 			    addr, rw, cred);
20177c478bd9Sstevel@tonic-gate 			if (err)
20187c478bd9Sstevel@tonic-gate 				return (err);
20197c478bd9Sstevel@tonic-gate 			ppa[0] = pl[0];
20207c478bd9Sstevel@tonic-gate 			if (brkcow == 0 || (*protp & PROT_WRITE)) {
20217c478bd9Sstevel@tonic-gate 				VM_STAT_ADD(anonvmstats.getpages[2]);
202207b65a64Saguzovsk 				if (ppa[0]->p_szc != 0 && upsize) {
20237c478bd9Sstevel@tonic-gate 					VM_STAT_ADD(anonvmstats.getpages[3]);
202407b65a64Saguzovsk 					*ppa_szc = MIN(ppa[0]->p_szc,
202507b65a64Saguzovsk 					    seg->s_szc);
20267c478bd9Sstevel@tonic-gate 					page_unlock(ppa[0]);
20277c478bd9Sstevel@tonic-gate 					return (-2);
20287c478bd9Sstevel@tonic-gate 				}
20297c478bd9Sstevel@tonic-gate 				return (0);
20307c478bd9Sstevel@tonic-gate 			}
20317c478bd9Sstevel@tonic-gate 			panic("anon_map_getpages: cowfault for szc 0");
20327c478bd9Sstevel@tonic-gate 		} else {
20337c478bd9Sstevel@tonic-gate 			VM_STAT_ADD(anonvmstats.getpages[4]);
20347c478bd9Sstevel@tonic-gate 			ppa[0] = anon_zero(seg, addr, &ap, cred);
20357c478bd9Sstevel@tonic-gate 			if (ppa[0] == NULL)
20367c478bd9Sstevel@tonic-gate 				return (ENOMEM);
20377c478bd9Sstevel@tonic-gate 			(void) anon_set_ptr(amp->ahp, start_idx, ap,
20387c478bd9Sstevel@tonic-gate 			    ANON_SLEEP);
20397c478bd9Sstevel@tonic-gate 			return (0);
20407c478bd9Sstevel@tonic-gate 		}
20417c478bd9Sstevel@tonic-gate 	}
20427c478bd9Sstevel@tonic-gate 
20437c478bd9Sstevel@tonic-gate 	pgcnt = page_get_pagecnt(szc);
20447c478bd9Sstevel@tonic-gate 	ASSERT(IS_P2ALIGNED(pgcnt, pgcnt));
20457c478bd9Sstevel@tonic-gate 	ASSERT(IS_P2ALIGNED(start_idx, pgcnt));
20467c478bd9Sstevel@tonic-gate 
20477c478bd9Sstevel@tonic-gate 	/*
20487c478bd9Sstevel@tonic-gate 	 * First we check for the case that the requtested large
20497c478bd9Sstevel@tonic-gate 	 * page or larger page already exists in the system.
20507c478bd9Sstevel@tonic-gate 	 * Actually we only check if the first constituent page
20517c478bd9Sstevel@tonic-gate 	 * exists and only preallocate if it's not found.
20527c478bd9Sstevel@tonic-gate 	 */
20537c478bd9Sstevel@tonic-gate 	ap = anon_get_ptr(amp->ahp, start_idx);
20547c478bd9Sstevel@tonic-gate 	if (ap) {
20557c478bd9Sstevel@tonic-gate 		uint_t pszc;
20567c478bd9Sstevel@tonic-gate 		swap_xlate(ap, &vp, &off);
20577c478bd9Sstevel@tonic-gate 		if (page_exists_forreal(vp, (u_offset_t)off, &pszc)) {
205807b65a64Saguzovsk 			if (pszc > szc && upsize) {
205907b65a64Saguzovsk 				*ppa_szc = MIN(pszc, seg->s_szc);
20607c478bd9Sstevel@tonic-gate 				return (-2);
20617c478bd9Sstevel@tonic-gate 			}
206207b65a64Saguzovsk 			if (pszc >= szc) {
20637c478bd9Sstevel@tonic-gate 				prealloc = 0;
20647c478bd9Sstevel@tonic-gate 			}
20657c478bd9Sstevel@tonic-gate 		}
20667c478bd9Sstevel@tonic-gate 	}
20677c478bd9Sstevel@tonic-gate 
20687c478bd9Sstevel@tonic-gate 	VM_STAT_COND_ADD(prealloc == 0, anonvmstats.getpages[5]);
20697c478bd9Sstevel@tonic-gate 	VM_STAT_COND_ADD(prealloc != 0, anonvmstats.getpages[6]);
20707c478bd9Sstevel@tonic-gate 
20717c478bd9Sstevel@tonic-gate top:
20727c478bd9Sstevel@tonic-gate 	/*
20737c478bd9Sstevel@tonic-gate 	 * If a smaller page or no page at all was found,
20747c478bd9Sstevel@tonic-gate 	 * grab a large page off the freelist.
20757c478bd9Sstevel@tonic-gate 	 */
20767c478bd9Sstevel@tonic-gate 	if (prealloc) {
20777c478bd9Sstevel@tonic-gate 		ASSERT(conpp == NULL);
2078e44bd21cSsusans 		if (page_alloc_pages(anon_vp, seg, addr, NULL, ppa,
20792cb27123Saguzovsk 		    szc, 0, pgflags) != 0) {
20807c478bd9Sstevel@tonic-gate 			VM_STAT_ADD(anonvmstats.getpages[7]);
208178b03d3aSkchow 			if (brkcow == 0 || szc < seg->s_szc ||
208278b03d3aSkchow 			    !anon_szcshare(amp->ahp, start_idx)) {
20837c478bd9Sstevel@tonic-gate 				/*
20847c478bd9Sstevel@tonic-gate 				 * If the refcnt's of all anon slots are <= 1
20857c478bd9Sstevel@tonic-gate 				 * they can't increase since we are holding
20867c478bd9Sstevel@tonic-gate 				 * the address space's lock. So segvn can
20877c478bd9Sstevel@tonic-gate 				 * safely decrease szc without risking to
20887c478bd9Sstevel@tonic-gate 				 * generate a cow fault for the region smaller
20897c478bd9Sstevel@tonic-gate 				 * than the segment's largest page size.
20907c478bd9Sstevel@tonic-gate 				 */
20917c478bd9Sstevel@tonic-gate 				VM_STAT_ADD(anonvmstats.getpages[8]);
20927c478bd9Sstevel@tonic-gate 				return (-1);
20937c478bd9Sstevel@tonic-gate 			}
20947c478bd9Sstevel@tonic-gate 		docow:
20957c478bd9Sstevel@tonic-gate 			/*
20967c478bd9Sstevel@tonic-gate 			 * This is a cow fault. Copy away the entire 1 large
20977c478bd9Sstevel@tonic-gate 			 * page region of this segment.
20987c478bd9Sstevel@tonic-gate 			 */
20997c478bd9Sstevel@tonic-gate 			if (szc != seg->s_szc)
21007c478bd9Sstevel@tonic-gate 				panic("anon_map_getpages: cowfault for szc %d",
21017c478bd9Sstevel@tonic-gate 				    szc);
21027c478bd9Sstevel@tonic-gate 			vaddr = addr;
21037c478bd9Sstevel@tonic-gate 			for (pg_idx = 0, an_idx = start_idx; pg_idx < pgcnt;
21047c478bd9Sstevel@tonic-gate 			    pg_idx++, an_idx++, vaddr += PAGESIZE) {
21057c478bd9Sstevel@tonic-gate 				if ((ap = anon_get_ptr(amp->ahp, an_idx)) !=
21067c478bd9Sstevel@tonic-gate 				    NULL) {
21077c478bd9Sstevel@tonic-gate 					err = anon_getpage(&ap, &vpprot, pl,
21087c478bd9Sstevel@tonic-gate 					    PAGESIZE, seg, vaddr, rw, cred);
21097c478bd9Sstevel@tonic-gate 					if (err) {
21107c478bd9Sstevel@tonic-gate 						for (i = 0; i < pg_idx; i++) {
21117c478bd9Sstevel@tonic-gate 							if ((pp = ppa[i]) !=
21127c478bd9Sstevel@tonic-gate 							    NULL)
21137c478bd9Sstevel@tonic-gate 								page_unlock(pp);
21147c478bd9Sstevel@tonic-gate 						}
21157c478bd9Sstevel@tonic-gate 						return (err);
21167c478bd9Sstevel@tonic-gate 					}
21177c478bd9Sstevel@tonic-gate 					ppa[pg_idx] = pl[0];
21187c478bd9Sstevel@tonic-gate 				} else {
21197c478bd9Sstevel@tonic-gate 					/*
21207c478bd9Sstevel@tonic-gate 					 * Since this is a cowfault we know
21217c478bd9Sstevel@tonic-gate 					 * that this address space has a
21227c478bd9Sstevel@tonic-gate 					 * parent or children which means
21237c478bd9Sstevel@tonic-gate 					 * anon_dup_fill_holes() has initialized
21247c478bd9Sstevel@tonic-gate 					 * all anon slots within a large page
21257c478bd9Sstevel@tonic-gate 					 * region that had at least one anon
21267c478bd9Sstevel@tonic-gate 					 * slot at the time of fork().
21277c478bd9Sstevel@tonic-gate 					 */
21287c478bd9Sstevel@tonic-gate 					panic("anon_map_getpages: "
21297c478bd9Sstevel@tonic-gate 					    "cowfault but anon slot is empty");
21307c478bd9Sstevel@tonic-gate 				}
21317c478bd9Sstevel@tonic-gate 			}
21327c478bd9Sstevel@tonic-gate 			VM_STAT_ADD(anonvmstats.getpages[9]);
21337c478bd9Sstevel@tonic-gate 			*protp = PROT_ALL;
21347c478bd9Sstevel@tonic-gate 			return (anon_map_privatepages(amp, start_idx, szc, seg,
21352cb27123Saguzovsk 			    addr, prot, ppa, vpage, anypgsz, pgflags, cred));
21367c478bd9Sstevel@tonic-gate 		}
21377c478bd9Sstevel@tonic-gate 	}
21387c478bd9Sstevel@tonic-gate 
21397c478bd9Sstevel@tonic-gate 	VM_STAT_ADD(anonvmstats.getpages[10]);
21407c478bd9Sstevel@tonic-gate 
21417c478bd9Sstevel@tonic-gate 	an_idx = start_idx;
21427c478bd9Sstevel@tonic-gate 	pg_idx = 0;
21437c478bd9Sstevel@tonic-gate 	vaddr = addr;
21447c478bd9Sstevel@tonic-gate 	while (pg_idx < pgcnt) {
21457c478bd9Sstevel@tonic-gate 		slotcreate = 0;
21467c478bd9Sstevel@tonic-gate 		if ((ap = anon_get_ptr(amp->ahp, an_idx)) == NULL) {
21477c478bd9Sstevel@tonic-gate 			VM_STAT_ADD(anonvmstats.getpages[11]);
21487c478bd9Sstevel@tonic-gate 			/*
21497c478bd9Sstevel@tonic-gate 			 * For us to have decided not to preallocate
21507c478bd9Sstevel@tonic-gate 			 * would have meant that a large page
21517c478bd9Sstevel@tonic-gate 			 * was found. Which also means that all of the
21527c478bd9Sstevel@tonic-gate 			 * anon slots for that page would have been
21537c478bd9Sstevel@tonic-gate 			 * already created for us.
21547c478bd9Sstevel@tonic-gate 			 */
21557c478bd9Sstevel@tonic-gate 			if (prealloc == 0)
21567c478bd9Sstevel@tonic-gate 				panic("anon_map_getpages: prealloc = 0");
21577c478bd9Sstevel@tonic-gate 
21587c478bd9Sstevel@tonic-gate 			slotcreate = 1;
21597c478bd9Sstevel@tonic-gate 			ap = anon_alloc(NULL, 0);
21607c478bd9Sstevel@tonic-gate 		}
21617c478bd9Sstevel@tonic-gate 		swap_xlate(ap, &vp, &off);
21627c478bd9Sstevel@tonic-gate 
21637c478bd9Sstevel@tonic-gate 		/*
21647c478bd9Sstevel@tonic-gate 		 * Now setup our preallocated page to pass down
21657c478bd9Sstevel@tonic-gate 		 * to swap_getpage().
21667c478bd9Sstevel@tonic-gate 		 */
21677c478bd9Sstevel@tonic-gate 		if (prealloc) {
21687c478bd9Sstevel@tonic-gate 			ASSERT(ppa[pg_idx]->p_szc == szc);
21697c478bd9Sstevel@tonic-gate 			conpp = ppa[pg_idx];
21707c478bd9Sstevel@tonic-gate 		}
21717c478bd9Sstevel@tonic-gate 		ASSERT(prealloc || conpp == NULL);
21727c478bd9Sstevel@tonic-gate 
21737c478bd9Sstevel@tonic-gate 		/*
21747c478bd9Sstevel@tonic-gate 		 * If we just created this anon slot then call
21757c478bd9Sstevel@tonic-gate 		 * with S_CREATE to prevent doing IO on the page.
21767c478bd9Sstevel@tonic-gate 		 * Similar to the anon_zero case.
21777c478bd9Sstevel@tonic-gate 		 */
21787c478bd9Sstevel@tonic-gate 		err = swap_getconpage(vp, (u_offset_t)off, PAGESIZE,
217907b65a64Saguzovsk 		    NULL, pl, PAGESIZE, conpp, ppa_szc, &nreloc, seg, vaddr,
21807c478bd9Sstevel@tonic-gate 		    slotcreate == 1 ? S_CREATE : rw, cred);
21817c478bd9Sstevel@tonic-gate 
21827c478bd9Sstevel@tonic-gate 		if (err) {
218307b65a64Saguzovsk 			ASSERT(err != -2 || upsize);
21847c478bd9Sstevel@tonic-gate 			VM_STAT_ADD(anonvmstats.getpages[12]);
21857c478bd9Sstevel@tonic-gate 			ASSERT(slotcreate == 0);
21867c478bd9Sstevel@tonic-gate 			goto io_err;
21877c478bd9Sstevel@tonic-gate 		}
21887c478bd9Sstevel@tonic-gate 
21897c478bd9Sstevel@tonic-gate 		pp = pl[0];
21907c478bd9Sstevel@tonic-gate 
219107b65a64Saguzovsk 		if (pp->p_szc < szc || (pp->p_szc > szc && upsize)) {
21927c478bd9Sstevel@tonic-gate 			VM_STAT_ADD(anonvmstats.getpages[13]);
21937c478bd9Sstevel@tonic-gate 			ASSERT(slotcreate == 0);
21947c478bd9Sstevel@tonic-gate 			ASSERT(prealloc == 0);
21957c478bd9Sstevel@tonic-gate 			ASSERT(pg_idx == 0);
21967c478bd9Sstevel@tonic-gate 			if (pp->p_szc > szc) {
219707b65a64Saguzovsk 				ASSERT(upsize);
219807b65a64Saguzovsk 				*ppa_szc = MIN(pp->p_szc, seg->s_szc);
21997c478bd9Sstevel@tonic-gate 				page_unlock(pp);
22007c478bd9Sstevel@tonic-gate 				VM_STAT_ADD(anonvmstats.getpages[14]);
22017c478bd9Sstevel@tonic-gate 				return (-2);
22027c478bd9Sstevel@tonic-gate 			}
22037c478bd9Sstevel@tonic-gate 			page_unlock(pp);
22047c478bd9Sstevel@tonic-gate 			prealloc = 1;
22057c478bd9Sstevel@tonic-gate 			goto top;
22067c478bd9Sstevel@tonic-gate 		}
22077c478bd9Sstevel@tonic-gate 
22087c478bd9Sstevel@tonic-gate 		/*
22097c478bd9Sstevel@tonic-gate 		 * If we decided to preallocate but VOP_GETPAGE
22107c478bd9Sstevel@tonic-gate 		 * found a page in the system that satisfies our
22117c478bd9Sstevel@tonic-gate 		 * request then free up our preallocated large page
22127c478bd9Sstevel@tonic-gate 		 * and continue looping accross the existing large
22137c478bd9Sstevel@tonic-gate 		 * page via VOP_GETPAGE.
22147c478bd9Sstevel@tonic-gate 		 */
22157c478bd9Sstevel@tonic-gate 		if (prealloc && pp != ppa[pg_idx]) {
22167c478bd9Sstevel@tonic-gate 			VM_STAT_ADD(anonvmstats.getpages[15]);
22177c478bd9Sstevel@tonic-gate 			ASSERT(slotcreate == 0);
22187c478bd9Sstevel@tonic-gate 			ASSERT(pg_idx == 0);
22197c478bd9Sstevel@tonic-gate 			conpp = NULL;
22207c478bd9Sstevel@tonic-gate 			prealloc = 0;
22217c478bd9Sstevel@tonic-gate 			page_free_pages(ppa[0]);
22227c478bd9Sstevel@tonic-gate 		}
22237c478bd9Sstevel@tonic-gate 
22247c478bd9Sstevel@tonic-gate 		if (prealloc && nreloc > 1) {
22257c478bd9Sstevel@tonic-gate 			/*
22267c478bd9Sstevel@tonic-gate 			 * we have relocated out of a smaller large page.
22277c478bd9Sstevel@tonic-gate 			 * skip npgs - 1 iterations and continue which will
22287c478bd9Sstevel@tonic-gate 			 * increment by one the loop indices.
22297c478bd9Sstevel@tonic-gate 			 */
22307c478bd9Sstevel@tonic-gate 			spgcnt_t npgs = nreloc;
22317c478bd9Sstevel@tonic-gate 
22327c478bd9Sstevel@tonic-gate 			VM_STAT_ADD(anonvmstats.getpages[16]);
22337c478bd9Sstevel@tonic-gate 
22347c478bd9Sstevel@tonic-gate 			ASSERT(pp == ppa[pg_idx]);
22357c478bd9Sstevel@tonic-gate 			ASSERT(slotcreate == 0);
22367c478bd9Sstevel@tonic-gate 			ASSERT(pg_idx + npgs <= pgcnt);
22377c478bd9Sstevel@tonic-gate 			if ((*protp & PROT_WRITE) &&
22387c478bd9Sstevel@tonic-gate 			    anon_share(amp->ahp, an_idx, npgs)) {
223978b03d3aSkchow 				*protp &= ~PROT_WRITE;
22407c478bd9Sstevel@tonic-gate 			}
22417c478bd9Sstevel@tonic-gate 			pg_idx += npgs;
22427c478bd9Sstevel@tonic-gate 			an_idx += npgs;
22437c478bd9Sstevel@tonic-gate 			vaddr += PAGESIZE * npgs;
22447c478bd9Sstevel@tonic-gate 			continue;
22457c478bd9Sstevel@tonic-gate 		}
22467c478bd9Sstevel@tonic-gate 
22477c478bd9Sstevel@tonic-gate 		VM_STAT_ADD(anonvmstats.getpages[17]);
22487c478bd9Sstevel@tonic-gate 
22497c478bd9Sstevel@tonic-gate 		/*
22507c478bd9Sstevel@tonic-gate 		 * Anon_zero case.
22517c478bd9Sstevel@tonic-gate 		 */
22527c478bd9Sstevel@tonic-gate 		if (slotcreate) {
22537c478bd9Sstevel@tonic-gate 			ASSERT(prealloc);
22547c478bd9Sstevel@tonic-gate 			pagezero(pp, 0, PAGESIZE);
22557c478bd9Sstevel@tonic-gate 			CPU_STATS_ADD_K(vm, zfod, 1);
22567c478bd9Sstevel@tonic-gate 			hat_setrefmod(pp);
22577c478bd9Sstevel@tonic-gate 		}
22587c478bd9Sstevel@tonic-gate 
22597c478bd9Sstevel@tonic-gate 		ASSERT(prealloc == 0 || ppa[pg_idx] == pp);
22607c478bd9Sstevel@tonic-gate 		ASSERT(prealloc != 0 || PAGE_SHARED(pp));
22617c478bd9Sstevel@tonic-gate 		ASSERT(prealloc == 0 || PAGE_EXCL(pp));
22627c478bd9Sstevel@tonic-gate 
22637c478bd9Sstevel@tonic-gate 		if (pg_idx > 0 &&
22647c478bd9Sstevel@tonic-gate 		    ((page_pptonum(pp) != page_pptonum(ppa[pg_idx - 1]) + 1) ||
226507b65a64Saguzovsk 		    (pp->p_szc != ppa[pg_idx - 1]->p_szc))) {
22667c478bd9Sstevel@tonic-gate 			panic("anon_map_getpages: unexpected page");
226707b65a64Saguzovsk 		} else if (pg_idx == 0 && (page_pptonum(pp) & (pgcnt - 1))) {
226807b65a64Saguzovsk 			panic("anon_map_getpages: unaligned page");
226907b65a64Saguzovsk 		}
22707c478bd9Sstevel@tonic-gate 
22717c478bd9Sstevel@tonic-gate 		if (prealloc == 0) {
22727c478bd9Sstevel@tonic-gate 			ppa[pg_idx] = pp;
22737c478bd9Sstevel@tonic-gate 		}
22747c478bd9Sstevel@tonic-gate 
22757c478bd9Sstevel@tonic-gate 		if (ap->an_refcnt > 1) {
22767c478bd9Sstevel@tonic-gate 			VM_STAT_ADD(anonvmstats.getpages[18]);
22777c478bd9Sstevel@tonic-gate 			*protp &= ~PROT_WRITE;
22787c478bd9Sstevel@tonic-gate 		}
22797c478bd9Sstevel@tonic-gate 
22807c478bd9Sstevel@tonic-gate 		/*
22817c478bd9Sstevel@tonic-gate 		 * If this is a new anon slot then initialize
22827c478bd9Sstevel@tonic-gate 		 * the anon array entry.
22837c478bd9Sstevel@tonic-gate 		 */
22847c478bd9Sstevel@tonic-gate 		if (slotcreate) {
22857c478bd9Sstevel@tonic-gate 			(void) anon_set_ptr(amp->ahp, an_idx, ap, ANON_SLEEP);
22867c478bd9Sstevel@tonic-gate 		}
22877c478bd9Sstevel@tonic-gate 		pg_idx++;
22887c478bd9Sstevel@tonic-gate 		an_idx++;
22897c478bd9Sstevel@tonic-gate 		vaddr += PAGESIZE;
22907c478bd9Sstevel@tonic-gate 	}
22917c478bd9Sstevel@tonic-gate 
22927c478bd9Sstevel@tonic-gate 	/*
22937c478bd9Sstevel@tonic-gate 	 * Since preallocated pages come off the freelist
22947c478bd9Sstevel@tonic-gate 	 * they are locked SE_EXCL. Simply downgrade and return.
22957c478bd9Sstevel@tonic-gate 	 */
22967c478bd9Sstevel@tonic-gate 	if (prealloc) {
22977c478bd9Sstevel@tonic-gate 		VM_STAT_ADD(anonvmstats.getpages[19]);
22987c478bd9Sstevel@tonic-gate 		conpp = NULL;
22997c478bd9Sstevel@tonic-gate 		for (pg_idx = 0; pg_idx < pgcnt; pg_idx++) {
23007c478bd9Sstevel@tonic-gate 			page_downgrade(ppa[pg_idx]);
23017c478bd9Sstevel@tonic-gate 		}
23027c478bd9Sstevel@tonic-gate 	}
23037c478bd9Sstevel@tonic-gate 	ASSERT(conpp == NULL);
23047c478bd9Sstevel@tonic-gate 
23057c478bd9Sstevel@tonic-gate 	if (brkcow == 0 || (*protp & PROT_WRITE)) {
23067c478bd9Sstevel@tonic-gate 		VM_STAT_ADD(anonvmstats.getpages[20]);
23077c478bd9Sstevel@tonic-gate 		return (0);
23087c478bd9Sstevel@tonic-gate 	}
23097c478bd9Sstevel@tonic-gate 
23107c478bd9Sstevel@tonic-gate 	if (szc < seg->s_szc)
23117c478bd9Sstevel@tonic-gate 		panic("anon_map_getpages: cowfault for szc %d", szc);
23127c478bd9Sstevel@tonic-gate 
23137c478bd9Sstevel@tonic-gate 	VM_STAT_ADD(anonvmstats.getpages[21]);
23147c478bd9Sstevel@tonic-gate 
23157c478bd9Sstevel@tonic-gate 	*protp = PROT_ALL;
23167c478bd9Sstevel@tonic-gate 	return (anon_map_privatepages(amp, start_idx, szc, seg, addr, prot,
23172cb27123Saguzovsk 	    ppa, vpage, anypgsz, pgflags, cred));
23187c478bd9Sstevel@tonic-gate io_err:
23197c478bd9Sstevel@tonic-gate 	/*
23207c478bd9Sstevel@tonic-gate 	 * We got an IO error somewhere in our large page.
23217c478bd9Sstevel@tonic-gate 	 * If we were using a preallocated page then just demote
23227c478bd9Sstevel@tonic-gate 	 * all the constituent pages that we've succeeded with sofar
23237c478bd9Sstevel@tonic-gate 	 * to PAGESIZE pages and leave them in the system
23247c478bd9Sstevel@tonic-gate 	 * unlocked.
23257c478bd9Sstevel@tonic-gate 	 */
23267c478bd9Sstevel@tonic-gate 
232707b65a64Saguzovsk 	ASSERT(err != -2 || ((pg_idx == 0) && upsize));
23287c478bd9Sstevel@tonic-gate 
23297c478bd9Sstevel@tonic-gate 	VM_STAT_COND_ADD(err > 0, anonvmstats.getpages[22]);
23307c478bd9Sstevel@tonic-gate 	VM_STAT_COND_ADD(err == -1, anonvmstats.getpages[23]);
23317c478bd9Sstevel@tonic-gate 	VM_STAT_COND_ADD(err == -2, anonvmstats.getpages[24]);
23327c478bd9Sstevel@tonic-gate 
23337c478bd9Sstevel@tonic-gate 	if (prealloc) {
23347c478bd9Sstevel@tonic-gate 		conpp = NULL;
23357c478bd9Sstevel@tonic-gate 		if (pg_idx > 0) {
23367c478bd9Sstevel@tonic-gate 			VM_STAT_ADD(anonvmstats.getpages[25]);
23377c478bd9Sstevel@tonic-gate 			for (i = 0; i < pgcnt; i++) {
23387c478bd9Sstevel@tonic-gate 				pp = ppa[i];
23397c478bd9Sstevel@tonic-gate 				ASSERT(PAGE_EXCL(pp));
23407c478bd9Sstevel@tonic-gate 				ASSERT(pp->p_szc == szc);
23417c478bd9Sstevel@tonic-gate 				pp->p_szc = 0;
23427c478bd9Sstevel@tonic-gate 			}
23437c478bd9Sstevel@tonic-gate 			for (i = 0; i < pg_idx; i++) {
23447c478bd9Sstevel@tonic-gate 				ASSERT(!hat_page_is_mapped(ppa[i]));
23457c478bd9Sstevel@tonic-gate 				page_unlock(ppa[i]);
23467c478bd9Sstevel@tonic-gate 			}
23477c478bd9Sstevel@tonic-gate 			/*
23487c478bd9Sstevel@tonic-gate 			 * Now free up the remaining unused constituent
23497c478bd9Sstevel@tonic-gate 			 * pages.
23507c478bd9Sstevel@tonic-gate 			 */
23517c478bd9Sstevel@tonic-gate 			while (pg_idx < pgcnt) {
23527c478bd9Sstevel@tonic-gate 				ASSERT(!hat_page_is_mapped(ppa[pg_idx]));
23537c478bd9Sstevel@tonic-gate 				page_free(ppa[pg_idx], 0);
23547c478bd9Sstevel@tonic-gate 				pg_idx++;
23557c478bd9Sstevel@tonic-gate 			}
23567c478bd9Sstevel@tonic-gate 		} else {
23577c478bd9Sstevel@tonic-gate 			VM_STAT_ADD(anonvmstats.getpages[26]);
23587c478bd9Sstevel@tonic-gate 			page_free_pages(ppa[0]);
23597c478bd9Sstevel@tonic-gate 		}
23607c478bd9Sstevel@tonic-gate 	} else {
23617c478bd9Sstevel@tonic-gate 		VM_STAT_ADD(anonvmstats.getpages[27]);
23627c478bd9Sstevel@tonic-gate 		ASSERT(err > 0);
23637c478bd9Sstevel@tonic-gate 		for (i = 0; i < pg_idx; i++)
23647c478bd9Sstevel@tonic-gate 			page_unlock(ppa[i]);
23657c478bd9Sstevel@tonic-gate 	}
23667c478bd9Sstevel@tonic-gate 	ASSERT(conpp == NULL);
23677c478bd9Sstevel@tonic-gate 	if (err != -1)
23687c478bd9Sstevel@tonic-gate 		return (err);
23697c478bd9Sstevel@tonic-gate 	/*
23707c478bd9Sstevel@tonic-gate 	 * we are here because we failed to relocate.
23717c478bd9Sstevel@tonic-gate 	 */
23727c478bd9Sstevel@tonic-gate 	ASSERT(prealloc);
237378b03d3aSkchow 	if (brkcow == 0 || szc < seg->s_szc ||
237478b03d3aSkchow 	    !anon_szcshare(amp->ahp, start_idx)) {
23757c478bd9Sstevel@tonic-gate 		VM_STAT_ADD(anonvmstats.getpages[28]);
23767c478bd9Sstevel@tonic-gate 		return (-1);
23777c478bd9Sstevel@tonic-gate 	}
23787c478bd9Sstevel@tonic-gate 	VM_STAT_ADD(anonvmstats.getpages[29]);
23797c478bd9Sstevel@tonic-gate 	goto docow;
23807c478bd9Sstevel@tonic-gate }
23817c478bd9Sstevel@tonic-gate 
23827c478bd9Sstevel@tonic-gate 
23837c478bd9Sstevel@tonic-gate /*
23847c478bd9Sstevel@tonic-gate  * Turn a reference to an object or shared anon page
23857c478bd9Sstevel@tonic-gate  * into a private page with a copy of the data from the
23867c478bd9Sstevel@tonic-gate  * original page which is always locked by the caller.
23877c478bd9Sstevel@tonic-gate  * This routine unloads the translation and unlocks the
23887c478bd9Sstevel@tonic-gate  * original page, if it isn't being stolen, before returning
23897c478bd9Sstevel@tonic-gate  * to the caller.
23907c478bd9Sstevel@tonic-gate  *
23917c478bd9Sstevel@tonic-gate  * NOTE:  The original anon slot is not freed by this routine
23927c478bd9Sstevel@tonic-gate  *	  It must be freed by the caller while holding the
23937c478bd9Sstevel@tonic-gate  *	  "anon_map" lock to prevent races which can occur if
23947c478bd9Sstevel@tonic-gate  *	  a process has multiple lwps in its address space.
23957c478bd9Sstevel@tonic-gate  */
23967c478bd9Sstevel@tonic-gate page_t *
anon_private(struct anon ** app,struct seg * seg,caddr_t addr,uint_t prot,page_t * opp,int oppflags,struct cred * cred)23977c478bd9Sstevel@tonic-gate anon_private(
23987c478bd9Sstevel@tonic-gate 	struct anon **app,
23997c478bd9Sstevel@tonic-gate 	struct seg *seg,
24007c478bd9Sstevel@tonic-gate 	caddr_t addr,
24017c478bd9Sstevel@tonic-gate 	uint_t	prot,
24027c478bd9Sstevel@tonic-gate 	page_t *opp,
24037c478bd9Sstevel@tonic-gate 	int oppflags,
24047c478bd9Sstevel@tonic-gate 	struct cred *cred)
24057c478bd9Sstevel@tonic-gate {
24067c478bd9Sstevel@tonic-gate 	struct anon *old = *app;
24077c478bd9Sstevel@tonic-gate 	struct anon *new;
24087c478bd9Sstevel@tonic-gate 	page_t *pp = NULL;
24097c478bd9Sstevel@tonic-gate 	struct vnode *vp;
24107c478bd9Sstevel@tonic-gate 	anoff_t off;
24117c478bd9Sstevel@tonic-gate 	page_t *anon_pl[1 + 1];
24127c478bd9Sstevel@tonic-gate 	int err;
24137c478bd9Sstevel@tonic-gate 
24147c478bd9Sstevel@tonic-gate 	if (oppflags & STEAL_PAGE)
24157c478bd9Sstevel@tonic-gate 		ASSERT(PAGE_EXCL(opp));
24167c478bd9Sstevel@tonic-gate 	else
24177c478bd9Sstevel@tonic-gate 		ASSERT(PAGE_LOCKED(opp));
24187c478bd9Sstevel@tonic-gate 
24197c478bd9Sstevel@tonic-gate 	CPU_STATS_ADD_K(vm, cow_fault, 1);
24207c478bd9Sstevel@tonic-gate 
24217c478bd9Sstevel@tonic-gate 	*app = new = anon_alloc(NULL, 0);
24227c478bd9Sstevel@tonic-gate 	swap_xlate(new, &vp, &off);
24237c478bd9Sstevel@tonic-gate 
24247c478bd9Sstevel@tonic-gate 	if (oppflags & STEAL_PAGE) {
24257c478bd9Sstevel@tonic-gate 		page_rename(opp, vp, (u_offset_t)off);
24267c478bd9Sstevel@tonic-gate 		pp = opp;
24277c478bd9Sstevel@tonic-gate 		TRACE_5(TR_FAC_VM, TR_ANON_PRIVATE,
242878b03d3aSkchow 		    "anon_private:seg %p addr %x pp %p vp %p off %lx",
242978b03d3aSkchow 		    seg, addr, pp, vp, off);
24307c478bd9Sstevel@tonic-gate 		hat_setmod(pp);
24317c478bd9Sstevel@tonic-gate 
24327c478bd9Sstevel@tonic-gate 		/* bug 4026339 */
24337c478bd9Sstevel@tonic-gate 		page_downgrade(pp);
24347c478bd9Sstevel@tonic-gate 		return (pp);
24357c478bd9Sstevel@tonic-gate 	}
24367c478bd9Sstevel@tonic-gate 
24377c478bd9Sstevel@tonic-gate 	/*
24387c478bd9Sstevel@tonic-gate 	 * Call the VOP_GETPAGE routine to create the page, thereby
24397c478bd9Sstevel@tonic-gate 	 * enabling the vnode driver to allocate any filesystem
24407c478bd9Sstevel@tonic-gate 	 * space (e.g., disk block allocation for UFS).  This also
24417c478bd9Sstevel@tonic-gate 	 * prevents more than one page from being added to the
24427c478bd9Sstevel@tonic-gate 	 * vnode at the same time.
24437c478bd9Sstevel@tonic-gate 	 */
24447c478bd9Sstevel@tonic-gate 	err = VOP_GETPAGE(vp, (u_offset_t)off, PAGESIZE, NULL,
2445da6c28aaSamw 	    anon_pl, PAGESIZE, seg, addr, S_CREATE, cred, NULL);
24467c478bd9Sstevel@tonic-gate 	if (err)
24477c478bd9Sstevel@tonic-gate 		goto out;
24487c478bd9Sstevel@tonic-gate 
24497c478bd9Sstevel@tonic-gate 	pp = anon_pl[0];
24507c478bd9Sstevel@tonic-gate 
24517c478bd9Sstevel@tonic-gate 	/*
24527c478bd9Sstevel@tonic-gate 	 * If the original page was locked, we need to move the lock
24537c478bd9Sstevel@tonic-gate 	 * to the new page by transfering 'cowcnt/lckcnt' of the original
24547c478bd9Sstevel@tonic-gate 	 * page to 'cowcnt/lckcnt' of the new page.
24557c478bd9Sstevel@tonic-gate 	 *
24567c478bd9Sstevel@tonic-gate 	 * See Statement at the beginning of segvn_lockop() and
24577c478bd9Sstevel@tonic-gate 	 * comments in page_pp_useclaim() regarding the way
24587c478bd9Sstevel@tonic-gate 	 * cowcnts/lckcnts are handled.
24597c478bd9Sstevel@tonic-gate 	 *
24607c478bd9Sstevel@tonic-gate 	 * Also availrmem must be decremented up front for read only mapping
24617c478bd9Sstevel@tonic-gate 	 * before calling page_pp_useclaim. page_pp_useclaim will bump it back
24627c478bd9Sstevel@tonic-gate 	 * if availrmem did not need to be decremented after all.
24637c478bd9Sstevel@tonic-gate 	 */
24647c478bd9Sstevel@tonic-gate 	if (oppflags & LOCK_PAGE) {
24657c478bd9Sstevel@tonic-gate 		if ((prot & PROT_WRITE) == 0) {
24667c478bd9Sstevel@tonic-gate 			mutex_enter(&freemem_lock);
24677c478bd9Sstevel@tonic-gate 			if (availrmem > pages_pp_maximum) {
24687c478bd9Sstevel@tonic-gate 				availrmem--;
24697c478bd9Sstevel@tonic-gate 				pages_useclaim++;
24707c478bd9Sstevel@tonic-gate 			} else {
24717c478bd9Sstevel@tonic-gate 				mutex_exit(&freemem_lock);
24727c478bd9Sstevel@tonic-gate 				goto out;
24737c478bd9Sstevel@tonic-gate 			}
24747c478bd9Sstevel@tonic-gate 			mutex_exit(&freemem_lock);
24757c478bd9Sstevel@tonic-gate 		}
24767c478bd9Sstevel@tonic-gate 		page_pp_useclaim(opp, pp, prot & PROT_WRITE);
24777c478bd9Sstevel@tonic-gate 	}
24787c478bd9Sstevel@tonic-gate 
24797c478bd9Sstevel@tonic-gate 	/*
24807c478bd9Sstevel@tonic-gate 	 * Now copy the contents from the original page,
24817c478bd9Sstevel@tonic-gate 	 * which is locked and loaded in the MMU by
24827c478bd9Sstevel@tonic-gate 	 * the caller to prevent yet another page fault.
24837c478bd9Sstevel@tonic-gate 	 */
24848b464eb8Smec 	/* XXX - should set mod bit in here */
24858b464eb8Smec 	if (ppcopy(opp, pp) == 0) {
24868b464eb8Smec 		/*
24878b464eb8Smec 		 * Before ppcopy could hanlde UE or other faults, we
24888b464eb8Smec 		 * would have panicked here, and still have no option
24898b464eb8Smec 		 * but to do so now.
24908b464eb8Smec 		 */
24918b464eb8Smec 		panic("anon_private, ppcopy failed, opp = 0x%p, pp = 0x%p",
24928793b36bSNick Todd 		    (void *)opp, (void *)pp);
24938b464eb8Smec 	}
24947c478bd9Sstevel@tonic-gate 
24957c478bd9Sstevel@tonic-gate 	hat_setrefmod(pp);		/* mark as modified */
24967c478bd9Sstevel@tonic-gate 
24977c478bd9Sstevel@tonic-gate 	/*
24987c478bd9Sstevel@tonic-gate 	 * Unload the old translation.
24997c478bd9Sstevel@tonic-gate 	 */
25007c478bd9Sstevel@tonic-gate 	hat_unload(seg->s_as->a_hat, addr, PAGESIZE, HAT_UNLOAD);
25017c478bd9Sstevel@tonic-gate 
25027c478bd9Sstevel@tonic-gate 	/*
25037c478bd9Sstevel@tonic-gate 	 * Free unmapped, unmodified original page.
25047c478bd9Sstevel@tonic-gate 	 * or release the lock on the original page,
25057c478bd9Sstevel@tonic-gate 	 * otherwise the process will sleep forever in
25067c478bd9Sstevel@tonic-gate 	 * anon_decref() waiting for the "exclusive" lock
25077c478bd9Sstevel@tonic-gate 	 * on the page.
25087c478bd9Sstevel@tonic-gate 	 */
25097c478bd9Sstevel@tonic-gate 	(void) page_release(opp, 1);
25107c478bd9Sstevel@tonic-gate 
25117c478bd9Sstevel@tonic-gate 	/*
25127c478bd9Sstevel@tonic-gate 	 * we are done with page creation so downgrade the new
25137c478bd9Sstevel@tonic-gate 	 * page's selock to shared, this helps when multiple
25147c478bd9Sstevel@tonic-gate 	 * as_fault(...SOFTLOCK...) are done to the same
25157c478bd9Sstevel@tonic-gate 	 * page(aio)
25167c478bd9Sstevel@tonic-gate 	 */
25177c478bd9Sstevel@tonic-gate 	page_downgrade(pp);
25187c478bd9Sstevel@tonic-gate 
25197c478bd9Sstevel@tonic-gate 	/*
25207c478bd9Sstevel@tonic-gate 	 * NOTE:  The original anon slot must be freed by the
25217c478bd9Sstevel@tonic-gate 	 * caller while holding the "anon_map" lock, if we
25227c478bd9Sstevel@tonic-gate 	 * copied away from an anonymous page.
25237c478bd9Sstevel@tonic-gate 	 */
25247c478bd9Sstevel@tonic-gate 	return (pp);
25257c478bd9Sstevel@tonic-gate 
25267c478bd9Sstevel@tonic-gate out:
25277c478bd9Sstevel@tonic-gate 	*app = old;
25287c478bd9Sstevel@tonic-gate 	if (pp)
25297c478bd9Sstevel@tonic-gate 		page_unlock(pp);
25307c478bd9Sstevel@tonic-gate 	anon_decref(new);
25317c478bd9Sstevel@tonic-gate 	page_unlock(opp);
25327c478bd9Sstevel@tonic-gate 	return ((page_t *)NULL);
25337c478bd9Sstevel@tonic-gate }
25347c478bd9Sstevel@tonic-gate 
25357c478bd9Sstevel@tonic-gate int
anon_map_privatepages(struct anon_map * amp,ulong_t start_idx,uint_t szc,struct seg * seg,caddr_t addr,uint_t prot,page_t * ppa[],struct vpage vpage[],int anypgsz,int pgflags,struct cred * cred)25367c478bd9Sstevel@tonic-gate anon_map_privatepages(
25377c478bd9Sstevel@tonic-gate 	struct anon_map *amp,
25387c478bd9Sstevel@tonic-gate 	ulong_t	start_idx,
25397c478bd9Sstevel@tonic-gate 	uint_t	szc,
25407c478bd9Sstevel@tonic-gate 	struct seg *seg,
25417c478bd9Sstevel@tonic-gate 	caddr_t addr,
25427c478bd9Sstevel@tonic-gate 	uint_t	prot,
25437c478bd9Sstevel@tonic-gate 	page_t	*ppa[],
25447c478bd9Sstevel@tonic-gate 	struct vpage vpage[],
25457c478bd9Sstevel@tonic-gate 	int anypgsz,
25462cb27123Saguzovsk 	int pgflags,
25477c478bd9Sstevel@tonic-gate 	struct cred *cred)
25487c478bd9Sstevel@tonic-gate {
25497c478bd9Sstevel@tonic-gate 	pgcnt_t		pgcnt;
25507c478bd9Sstevel@tonic-gate 	struct vnode	*vp;
25517c478bd9Sstevel@tonic-gate 	anoff_t		off;
25527c478bd9Sstevel@tonic-gate 	page_t		*pl[2], *conpp = NULL;
25537c478bd9Sstevel@tonic-gate 	int		err;
25547c478bd9Sstevel@tonic-gate 	int		prealloc = 1;
25557c478bd9Sstevel@tonic-gate 	struct anon	*ap, *oldap;
25567c478bd9Sstevel@tonic-gate 	caddr_t		vaddr;
25577c478bd9Sstevel@tonic-gate 	page_t		*pplist, *pp;
25587c478bd9Sstevel@tonic-gate 	ulong_t		pg_idx, an_idx;
25597c478bd9Sstevel@tonic-gate 	spgcnt_t	nreloc = 0;
25607c478bd9Sstevel@tonic-gate 	int		pagelock = 0;
25617c478bd9Sstevel@tonic-gate 	kmutex_t	*ahmpages = NULL;
25627c478bd9Sstevel@tonic-gate #ifdef DEBUG
25637c478bd9Sstevel@tonic-gate 	int		refcnt;
25647c478bd9Sstevel@tonic-gate #endif
25657c478bd9Sstevel@tonic-gate 
25667c478bd9Sstevel@tonic-gate 	ASSERT(szc != 0);
25677c478bd9Sstevel@tonic-gate 	ASSERT(szc == seg->s_szc);
25687c478bd9Sstevel@tonic-gate 
25697c478bd9Sstevel@tonic-gate 	VM_STAT_ADD(anonvmstats.privatepages[0]);
25707c478bd9Sstevel@tonic-gate 
25717c478bd9Sstevel@tonic-gate 	pgcnt = page_get_pagecnt(szc);
25727c478bd9Sstevel@tonic-gate 	ASSERT(IS_P2ALIGNED(pgcnt, pgcnt));
25737c478bd9Sstevel@tonic-gate 	ASSERT(IS_P2ALIGNED(start_idx, pgcnt));
25747c478bd9Sstevel@tonic-gate 
25757c478bd9Sstevel@tonic-gate 	ASSERT(amp != NULL);
25767c478bd9Sstevel@tonic-gate 	ap = anon_get_ptr(amp->ahp, start_idx);
25777c478bd9Sstevel@tonic-gate 	ASSERT(ap == NULL || ap->an_refcnt >= 1);
25787c478bd9Sstevel@tonic-gate 
25797c478bd9Sstevel@tonic-gate 	VM_STAT_COND_ADD(ap == NULL, anonvmstats.privatepages[1]);
25807c478bd9Sstevel@tonic-gate 
25817c478bd9Sstevel@tonic-gate 	/*
25827c478bd9Sstevel@tonic-gate 	 * Now try and allocate the large page. If we fail then just
25837c478bd9Sstevel@tonic-gate 	 * let VOP_GETPAGE give us PAGESIZE pages. Normally we let
25847c478bd9Sstevel@tonic-gate 	 * the caller make this decision but to avoid added complexity
25857c478bd9Sstevel@tonic-gate 	 * it's simplier to handle that case here.
25867c478bd9Sstevel@tonic-gate 	 */
25877c478bd9Sstevel@tonic-gate 	if (anypgsz == -1) {
25887c478bd9Sstevel@tonic-gate 		VM_STAT_ADD(anonvmstats.privatepages[2]);
25897c478bd9Sstevel@tonic-gate 		prealloc = 0;
2590e44bd21cSsusans 	} else if (page_alloc_pages(anon_vp, seg, addr, &pplist, NULL, szc,
25912cb27123Saguzovsk 	    anypgsz, pgflags) != 0) {
25927c478bd9Sstevel@tonic-gate 		VM_STAT_ADD(anonvmstats.privatepages[3]);
25937c478bd9Sstevel@tonic-gate 		prealloc = 0;
25947c478bd9Sstevel@tonic-gate 	}
25957c478bd9Sstevel@tonic-gate 
25967c478bd9Sstevel@tonic-gate 	/*
25977c478bd9Sstevel@tonic-gate 	 * make the decrement of all refcnts of all
25987c478bd9Sstevel@tonic-gate 	 * anon slots of a large page appear atomic by
25997c478bd9Sstevel@tonic-gate 	 * getting an anonpages_hash_lock for the
26007c478bd9Sstevel@tonic-gate 	 * first anon slot of a large page.
26017c478bd9Sstevel@tonic-gate 	 */
26027c478bd9Sstevel@tonic-gate 	if (ap != NULL) {
260323d9e5acSMichael Corcoran 		ahmpages = APH_MUTEX(ap->an_vp, ap->an_off);
26047c478bd9Sstevel@tonic-gate 		mutex_enter(ahmpages);
26057c478bd9Sstevel@tonic-gate 		if (ap->an_refcnt == 1) {
26067c478bd9Sstevel@tonic-gate 			VM_STAT_ADD(anonvmstats.privatepages[4]);
26077c478bd9Sstevel@tonic-gate 			ASSERT(!anon_share(amp->ahp, start_idx, pgcnt));
26087c478bd9Sstevel@tonic-gate 			mutex_exit(ahmpages);
26097c478bd9Sstevel@tonic-gate 
26107c478bd9Sstevel@tonic-gate 			if (prealloc) {
26117c478bd9Sstevel@tonic-gate 				page_free_replacement_page(pplist);
26127c478bd9Sstevel@tonic-gate 				page_create_putback(pgcnt);
26137c478bd9Sstevel@tonic-gate 			}
26147c478bd9Sstevel@tonic-gate 			ASSERT(ppa[0]->p_szc <= szc);
26157c478bd9Sstevel@tonic-gate 			if (ppa[0]->p_szc == szc) {
26167c478bd9Sstevel@tonic-gate 				VM_STAT_ADD(anonvmstats.privatepages[5]);
26177c478bd9Sstevel@tonic-gate 				return (0);
26187c478bd9Sstevel@tonic-gate 			}
26197c478bd9Sstevel@tonic-gate 			for (pg_idx = 0; pg_idx < pgcnt; pg_idx++) {
26207c478bd9Sstevel@tonic-gate 				ASSERT(ppa[pg_idx] != NULL);
26217c478bd9Sstevel@tonic-gate 				page_unlock(ppa[pg_idx]);
26227c478bd9Sstevel@tonic-gate 			}
26237c478bd9Sstevel@tonic-gate 			return (-1);
26247c478bd9Sstevel@tonic-gate 		}
26257c478bd9Sstevel@tonic-gate 	}
26267c478bd9Sstevel@tonic-gate 
26277c478bd9Sstevel@tonic-gate 	/*
26287c478bd9Sstevel@tonic-gate 	 * If we are passed in the vpage array and this is
26297c478bd9Sstevel@tonic-gate 	 * not PROT_WRITE then we need to decrement availrmem
26307c478bd9Sstevel@tonic-gate 	 * up front before we try anything. If we need to and
26317c478bd9Sstevel@tonic-gate 	 * can't decrement availrmem then its better to fail now
26327c478bd9Sstevel@tonic-gate 	 * than in the middle of processing the new large page.
26337c478bd9Sstevel@tonic-gate 	 * page_pp_usclaim() on behalf of each constituent page
26347c478bd9Sstevel@tonic-gate 	 * below will adjust availrmem back for the cases not needed.
26357c478bd9Sstevel@tonic-gate 	 */
26367c478bd9Sstevel@tonic-gate 	if (vpage != NULL && (prot & PROT_WRITE) == 0) {
26377c478bd9Sstevel@tonic-gate 		for (pg_idx = 0; pg_idx < pgcnt; pg_idx++) {
26387c478bd9Sstevel@tonic-gate 			if (VPP_ISPPLOCK(&vpage[pg_idx])) {
26397c478bd9Sstevel@tonic-gate 				pagelock = 1;
26407c478bd9Sstevel@tonic-gate 				break;
26417c478bd9Sstevel@tonic-gate 			}
26427c478bd9Sstevel@tonic-gate 		}
26437c478bd9Sstevel@tonic-gate 		if (pagelock) {
26447c478bd9Sstevel@tonic-gate 			VM_STAT_ADD(anonvmstats.privatepages[6]);
26457c478bd9Sstevel@tonic-gate 			mutex_enter(&freemem_lock);
26467c478bd9Sstevel@tonic-gate 			if (availrmem >= pages_pp_maximum + pgcnt) {
26477c478bd9Sstevel@tonic-gate 				availrmem -= pgcnt;
26487c478bd9Sstevel@tonic-gate 				pages_useclaim += pgcnt;
26497c478bd9Sstevel@tonic-gate 			} else {
26507c478bd9Sstevel@tonic-gate 				VM_STAT_ADD(anonvmstats.privatepages[7]);
26517c478bd9Sstevel@tonic-gate 				mutex_exit(&freemem_lock);
26527c478bd9Sstevel@tonic-gate 				if (ahmpages != NULL) {
26537c478bd9Sstevel@tonic-gate 					mutex_exit(ahmpages);
26547c478bd9Sstevel@tonic-gate 				}
26557c478bd9Sstevel@tonic-gate 				if (prealloc) {
26567c478bd9Sstevel@tonic-gate 					page_free_replacement_page(pplist);
26577c478bd9Sstevel@tonic-gate 					page_create_putback(pgcnt);
26587c478bd9Sstevel@tonic-gate 				}
26597c478bd9Sstevel@tonic-gate 				for (pg_idx = 0; pg_idx < pgcnt; pg_idx++)
26607c478bd9Sstevel@tonic-gate 					if (ppa[pg_idx] != NULL)
26617c478bd9Sstevel@tonic-gate 						page_unlock(ppa[pg_idx]);
26627c478bd9Sstevel@tonic-gate 				return (ENOMEM);
26637c478bd9Sstevel@tonic-gate 			}
26647c478bd9Sstevel@tonic-gate 			mutex_exit(&freemem_lock);
26657c478bd9Sstevel@tonic-gate 		}
26667c478bd9Sstevel@tonic-gate 	}
26677c478bd9Sstevel@tonic-gate 
26687c478bd9Sstevel@tonic-gate 	CPU_STATS_ADD_K(vm, cow_fault, pgcnt);
26697c478bd9Sstevel@tonic-gate 
26707c478bd9Sstevel@tonic-gate 	VM_STAT_ADD(anonvmstats.privatepages[8]);
26717c478bd9Sstevel@tonic-gate 
26727c478bd9Sstevel@tonic-gate 	an_idx = start_idx;
26737c478bd9Sstevel@tonic-gate 	pg_idx = 0;
26747c478bd9Sstevel@tonic-gate 	vaddr = addr;
26757c478bd9Sstevel@tonic-gate 	for (; pg_idx < pgcnt; pg_idx++, an_idx++, vaddr += PAGESIZE) {
26767c478bd9Sstevel@tonic-gate 		ASSERT(ppa[pg_idx] != NULL);
26777c478bd9Sstevel@tonic-gate 		oldap = anon_get_ptr(amp->ahp, an_idx);
26787c478bd9Sstevel@tonic-gate 		ASSERT(ahmpages != NULL || oldap == NULL);
26797c478bd9Sstevel@tonic-gate 		ASSERT(ahmpages == NULL || oldap != NULL);
26807c478bd9Sstevel@tonic-gate 		ASSERT(ahmpages == NULL || oldap->an_refcnt > 1);
26817c478bd9Sstevel@tonic-gate 		ASSERT(ahmpages == NULL || pg_idx != 0 ||
26827c478bd9Sstevel@tonic-gate 		    (refcnt = oldap->an_refcnt));
26837c478bd9Sstevel@tonic-gate 		ASSERT(ahmpages == NULL || pg_idx == 0 ||
26847c478bd9Sstevel@tonic-gate 		    refcnt == oldap->an_refcnt);
26857c478bd9Sstevel@tonic-gate 
26867c478bd9Sstevel@tonic-gate 		ap = anon_alloc(NULL, 0);
26877c478bd9Sstevel@tonic-gate 
26887c478bd9Sstevel@tonic-gate 		swap_xlate(ap, &vp, &off);
26897c478bd9Sstevel@tonic-gate 
26907c478bd9Sstevel@tonic-gate 		/*
26917c478bd9Sstevel@tonic-gate 		 * Now setup our preallocated page to pass down to
26927c478bd9Sstevel@tonic-gate 		 * swap_getpage().
26937c478bd9Sstevel@tonic-gate 		 */
26947c478bd9Sstevel@tonic-gate 		if (prealloc) {
26957c478bd9Sstevel@tonic-gate 			pp = pplist;
26967c478bd9Sstevel@tonic-gate 			page_sub(&pplist, pp);
26977c478bd9Sstevel@tonic-gate 			conpp = pp;
26987c478bd9Sstevel@tonic-gate 		}
26997c478bd9Sstevel@tonic-gate 
27007c478bd9Sstevel@tonic-gate 		err = swap_getconpage(vp, (u_offset_t)off, PAGESIZE, NULL, pl,
270178b03d3aSkchow 		    PAGESIZE, conpp, NULL, &nreloc, seg, vaddr,
270278b03d3aSkchow 		    S_CREATE, cred);
27037c478bd9Sstevel@tonic-gate 
27047c478bd9Sstevel@tonic-gate 		/*
27057c478bd9Sstevel@tonic-gate 		 * Impossible to fail this is S_CREATE.
27067c478bd9Sstevel@tonic-gate 		 */
27077c478bd9Sstevel@tonic-gate 		if (err)
27087c478bd9Sstevel@tonic-gate 			panic("anon_map_privatepages: VOP_GETPAGE failed");
27097c478bd9Sstevel@tonic-gate 
27107c478bd9Sstevel@tonic-gate 		ASSERT(prealloc ? pp == pl[0] : pl[0]->p_szc == 0);
27117c478bd9Sstevel@tonic-gate 		ASSERT(prealloc == 0 || nreloc == 1);
27127c478bd9Sstevel@tonic-gate 
27137c478bd9Sstevel@tonic-gate 		pp = pl[0];
27147c478bd9Sstevel@tonic-gate 
27157c478bd9Sstevel@tonic-gate 		/*
27167c478bd9Sstevel@tonic-gate 		 * If the original page was locked, we need to move
27177c478bd9Sstevel@tonic-gate 		 * the lock to the new page by transfering
27187c478bd9Sstevel@tonic-gate 		 * 'cowcnt/lckcnt' of the original page to 'cowcnt/lckcnt'
27197c478bd9Sstevel@tonic-gate 		 * of the new page. pg_idx can be used to index
27207c478bd9Sstevel@tonic-gate 		 * into the vpage array since the caller will guarentee
27217c478bd9Sstevel@tonic-gate 		 * that vpage struct passed in corresponds to addr
27227c478bd9Sstevel@tonic-gate 		 * and forward.
27237c478bd9Sstevel@tonic-gate 		 */
27247c478bd9Sstevel@tonic-gate 		if (vpage != NULL && VPP_ISPPLOCK(&vpage[pg_idx])) {
27257c478bd9Sstevel@tonic-gate 			page_pp_useclaim(ppa[pg_idx], pp, prot & PROT_WRITE);
27267c478bd9Sstevel@tonic-gate 		} else if (pagelock) {
27277c478bd9Sstevel@tonic-gate 			mutex_enter(&freemem_lock);
27287c478bd9Sstevel@tonic-gate 			availrmem++;
27297c478bd9Sstevel@tonic-gate 			pages_useclaim--;
27307c478bd9Sstevel@tonic-gate 			mutex_exit(&freemem_lock);
27317c478bd9Sstevel@tonic-gate 		}
27327c478bd9Sstevel@tonic-gate 
27337c478bd9Sstevel@tonic-gate 		/*
27347c478bd9Sstevel@tonic-gate 		 * Now copy the contents from the original page.
27357c478bd9Sstevel@tonic-gate 		 */
27368b464eb8Smec 		if (ppcopy(ppa[pg_idx], pp) == 0) {
27378b464eb8Smec 			/*
27388b464eb8Smec 			 * Before ppcopy could hanlde UE or other faults, we
27398b464eb8Smec 			 * would have panicked here, and still have no option
27408b464eb8Smec 			 * but to do so now.
27418b464eb8Smec 			 */
27428b464eb8Smec 			panic("anon_map_privatepages, ppcopy failed");
27438b464eb8Smec 		}
27447c478bd9Sstevel@tonic-gate 
27457c478bd9Sstevel@tonic-gate 		hat_setrefmod(pp);		/* mark as modified */
27467c478bd9Sstevel@tonic-gate 
27477c478bd9Sstevel@tonic-gate 		/*
27487c478bd9Sstevel@tonic-gate 		 * Release the lock on the original page,
27497c478bd9Sstevel@tonic-gate 		 * derement the old slot, and down grade the lock
27507c478bd9Sstevel@tonic-gate 		 * on the new copy.
27517c478bd9Sstevel@tonic-gate 		 */
27527c478bd9Sstevel@tonic-gate 		page_unlock(ppa[pg_idx]);
27537c478bd9Sstevel@tonic-gate 
27547c478bd9Sstevel@tonic-gate 		if (!prealloc)
27557c478bd9Sstevel@tonic-gate 			page_downgrade(pp);
27567c478bd9Sstevel@tonic-gate 
27577c478bd9Sstevel@tonic-gate 		ppa[pg_idx] = pp;
27587c478bd9Sstevel@tonic-gate 
27597c478bd9Sstevel@tonic-gate 		/*
27607c478bd9Sstevel@tonic-gate 		 * Now reflect the copy in the new anon array.
27617c478bd9Sstevel@tonic-gate 		 */
27627c478bd9Sstevel@tonic-gate 		ASSERT(ahmpages == NULL || oldap->an_refcnt > 1);
27637c478bd9Sstevel@tonic-gate 		if (oldap != NULL)
27647c478bd9Sstevel@tonic-gate 			anon_decref(oldap);
27657c478bd9Sstevel@tonic-gate 		(void) anon_set_ptr(amp->ahp, an_idx, ap, ANON_SLEEP);
27667c478bd9Sstevel@tonic-gate 	}
2767ae320ee6Speterte 
2768ae320ee6Speterte 	/*
2769ae320ee6Speterte 	 * Unload the old large page translation.
2770ae320ee6Speterte 	 */
2771ae320ee6Speterte 	hat_unload(seg->s_as->a_hat, addr, pgcnt << PAGESHIFT, HAT_UNLOAD);
2772ae320ee6Speterte 
27737c478bd9Sstevel@tonic-gate 	if (ahmpages != NULL) {
27747c478bd9Sstevel@tonic-gate 		mutex_exit(ahmpages);
27757c478bd9Sstevel@tonic-gate 	}
27767c478bd9Sstevel@tonic-gate 	ASSERT(prealloc == 0 || pplist == NULL);
27777c478bd9Sstevel@tonic-gate 	if (prealloc) {
27787c478bd9Sstevel@tonic-gate 		VM_STAT_ADD(anonvmstats.privatepages[9]);
27797c478bd9Sstevel@tonic-gate 		for (pg_idx = 0; pg_idx < pgcnt; pg_idx++) {
27807c478bd9Sstevel@tonic-gate 			page_downgrade(ppa[pg_idx]);
27817c478bd9Sstevel@tonic-gate 		}
27827c478bd9Sstevel@tonic-gate 	}
27837c478bd9Sstevel@tonic-gate 
27847c478bd9Sstevel@tonic-gate 	return (0);
27857c478bd9Sstevel@tonic-gate }
27867c478bd9Sstevel@tonic-gate 
27877c478bd9Sstevel@tonic-gate /*
27887c478bd9Sstevel@tonic-gate  * Allocate a private zero-filled anon page.
27897c478bd9Sstevel@tonic-gate  */
27907c478bd9Sstevel@tonic-gate page_t *
anon_zero(struct seg * seg,caddr_t addr,struct anon ** app,struct cred * cred)27917c478bd9Sstevel@tonic-gate anon_zero(struct seg *seg, caddr_t addr, struct anon **app, struct cred *cred)
27927c478bd9Sstevel@tonic-gate {
27937c478bd9Sstevel@tonic-gate 	struct anon *ap;
27947c478bd9Sstevel@tonic-gate 	page_t *pp;
27957c478bd9Sstevel@tonic-gate 	struct vnode *vp;
27967c478bd9Sstevel@tonic-gate 	anoff_t off;
27977c478bd9Sstevel@tonic-gate 	page_t *anon_pl[1 + 1];
27987c478bd9Sstevel@tonic-gate 	int err;
27997c478bd9Sstevel@tonic-gate 
28007c478bd9Sstevel@tonic-gate 	*app = ap = anon_alloc(NULL, 0);
28017c478bd9Sstevel@tonic-gate 	swap_xlate(ap, &vp, &off);
28027c478bd9Sstevel@tonic-gate 
28037c478bd9Sstevel@tonic-gate 	/*
28047c478bd9Sstevel@tonic-gate 	 * Call the VOP_GETPAGE routine to create the page, thereby
28057c478bd9Sstevel@tonic-gate 	 * enabling the vnode driver to allocate any filesystem
28067c478bd9Sstevel@tonic-gate 	 * dependent structures (e.g., disk block allocation for UFS).
28077c478bd9Sstevel@tonic-gate 	 * This also prevents more than on page from being added to
28087c478bd9Sstevel@tonic-gate 	 * the vnode at the same time since it is locked.
28097c478bd9Sstevel@tonic-gate 	 */
28107c478bd9Sstevel@tonic-gate 	err = VOP_GETPAGE(vp, off, PAGESIZE, NULL,
2811da6c28aaSamw 	    anon_pl, PAGESIZE, seg, addr, S_CREATE, cred, NULL);
28127c478bd9Sstevel@tonic-gate 	if (err) {
28137c478bd9Sstevel@tonic-gate 		*app = NULL;
28147c478bd9Sstevel@tonic-gate 		anon_decref(ap);
28157c478bd9Sstevel@tonic-gate 		return (NULL);
28167c478bd9Sstevel@tonic-gate 	}
28177c478bd9Sstevel@tonic-gate 	pp = anon_pl[0];
28187c478bd9Sstevel@tonic-gate 
28197c478bd9Sstevel@tonic-gate 	pagezero(pp, 0, PAGESIZE);	/* XXX - should set mod bit */
28207c478bd9Sstevel@tonic-gate 	page_downgrade(pp);
28217c478bd9Sstevel@tonic-gate 	CPU_STATS_ADD_K(vm, zfod, 1);
28227c478bd9Sstevel@tonic-gate 	hat_setrefmod(pp);	/* mark as modified so pageout writes back */
28237c478bd9Sstevel@tonic-gate 	return (pp);
28247c478bd9Sstevel@tonic-gate }
28257c478bd9Sstevel@tonic-gate 
28267c478bd9Sstevel@tonic-gate 
28277c478bd9Sstevel@tonic-gate /*
28287c478bd9Sstevel@tonic-gate  * Allocate array of private zero-filled anon pages for empty slots
28297c478bd9Sstevel@tonic-gate  * and kept pages for non empty slots within given range.
28307c478bd9Sstevel@tonic-gate  *
28317c478bd9Sstevel@tonic-gate  * NOTE: This rontine will try and use large pages
28327c478bd9Sstevel@tonic-gate  *	if available and supported by underlying platform.
28337c478bd9Sstevel@tonic-gate  */
28347c478bd9Sstevel@tonic-gate int
anon_map_createpages(struct anon_map * amp,ulong_t start_index,size_t len,page_t * ppa[],struct seg * seg,caddr_t addr,enum seg_rw rw,struct cred * cred)28357c478bd9Sstevel@tonic-gate anon_map_createpages(
28367c478bd9Sstevel@tonic-gate 	struct anon_map *amp,
28377c478bd9Sstevel@tonic-gate 	ulong_t start_index,
28387c478bd9Sstevel@tonic-gate 	size_t len,
28397c478bd9Sstevel@tonic-gate 	page_t *ppa[],
28407c478bd9Sstevel@tonic-gate 	struct seg *seg,
28417c478bd9Sstevel@tonic-gate 	caddr_t addr,
28427c478bd9Sstevel@tonic-gate 	enum seg_rw rw,
28437c478bd9Sstevel@tonic-gate 	struct cred *cred)
28447c478bd9Sstevel@tonic-gate {
28457c478bd9Sstevel@tonic-gate 
28467c478bd9Sstevel@tonic-gate 	struct anon	*ap;
28477c478bd9Sstevel@tonic-gate 	struct vnode	*ap_vp;
28487c478bd9Sstevel@tonic-gate 	page_t		*pp, *pplist, *anon_pl[1 + 1], *conpp = NULL;
28497c478bd9Sstevel@tonic-gate 	int		err = 0;
28507c478bd9Sstevel@tonic-gate 	ulong_t		p_index, index;
28517c478bd9Sstevel@tonic-gate 	pgcnt_t		npgs, pg_cnt;
28527c478bd9Sstevel@tonic-gate 	spgcnt_t	nreloc = 0;
28537c478bd9Sstevel@tonic-gate 	uint_t		l_szc, szc, prot;
28547c478bd9Sstevel@tonic-gate 	anoff_t		ap_off;
28557c478bd9Sstevel@tonic-gate 	size_t		pgsz;
28567c478bd9Sstevel@tonic-gate 	lgrp_t		*lgrp;
28573230aa08Ssusans 	kmutex_t	*ahm;
28587c478bd9Sstevel@tonic-gate 
28597c478bd9Sstevel@tonic-gate 	/*
28607c478bd9Sstevel@tonic-gate 	 * XXX For now only handle S_CREATE.
28617c478bd9Sstevel@tonic-gate 	 */
28627c478bd9Sstevel@tonic-gate 	ASSERT(rw == S_CREATE);
28637c478bd9Sstevel@tonic-gate 
28647c478bd9Sstevel@tonic-gate 	index	= start_index;
28657c478bd9Sstevel@tonic-gate 	p_index	= 0;
28667c478bd9Sstevel@tonic-gate 	npgs = btopr(len);
28677c478bd9Sstevel@tonic-gate 
28687c478bd9Sstevel@tonic-gate 	/*
28697c478bd9Sstevel@tonic-gate 	 * If this platform supports multiple page sizes
28707c478bd9Sstevel@tonic-gate 	 * then try and allocate directly from the free
28717c478bd9Sstevel@tonic-gate 	 * list for pages larger than PAGESIZE.
28727c478bd9Sstevel@tonic-gate 	 *
28737c478bd9Sstevel@tonic-gate 	 * NOTE:When we have page_create_ru we can stop
28747c478bd9Sstevel@tonic-gate 	 *	directly allocating from the freelist.
28757c478bd9Sstevel@tonic-gate 	 */
28767c478bd9Sstevel@tonic-gate 	l_szc  = seg->s_szc;
28777c478bd9Sstevel@tonic-gate 	ANON_LOCK_ENTER(&amp->a_rwlock, RW_WRITER);
28787c478bd9Sstevel@tonic-gate 	while (npgs) {
28797c478bd9Sstevel@tonic-gate 
28807c478bd9Sstevel@tonic-gate 		/*
28817c478bd9Sstevel@tonic-gate 		 * if anon slot already exists
28827c478bd9Sstevel@tonic-gate 		 *   (means page has been created)
28837c478bd9Sstevel@tonic-gate 		 * so 1) look up the page
28847c478bd9Sstevel@tonic-gate 		 *    2) if the page is still in memory, get it.
28857c478bd9Sstevel@tonic-gate 		 *    3) if not, create a page and
28867c478bd9Sstevel@tonic-gate 		 *	  page in from physical swap device.
28877c478bd9Sstevel@tonic-gate 		 * These are done in anon_getpage().
28887c478bd9Sstevel@tonic-gate 		 */
28897c478bd9Sstevel@tonic-gate 		ap = anon_get_ptr(amp->ahp, index);
28907c478bd9Sstevel@tonic-gate 		if (ap) {
28917c478bd9Sstevel@tonic-gate 			err = anon_getpage(&ap, &prot, anon_pl, PAGESIZE,
28927c478bd9Sstevel@tonic-gate 			    seg, addr, S_READ, cred);
28937c478bd9Sstevel@tonic-gate 			if (err) {
28947c478bd9Sstevel@tonic-gate 				ANON_LOCK_EXIT(&amp->a_rwlock);
28957c478bd9Sstevel@tonic-gate 				panic("anon_map_createpages: anon_getpage");
28967c478bd9Sstevel@tonic-gate 			}
28977c478bd9Sstevel@tonic-gate 			pp = anon_pl[0];
28987c478bd9Sstevel@tonic-gate 			ppa[p_index++] = pp;
28997c478bd9Sstevel@tonic-gate 
29003230aa08Ssusans 			/*
29013230aa08Ssusans 			 * an_pvp can become non-NULL after SysV's page was
29023230aa08Ssusans 			 * paged out before ISM was attached to this SysV
29033230aa08Ssusans 			 * shared memory segment. So free swap slot if needed.
29043230aa08Ssusans 			 */
29053230aa08Ssusans 			if (ap->an_pvp != NULL) {
29063230aa08Ssusans 				page_io_lock(pp);
290723d9e5acSMichael Corcoran 				ahm = AH_MUTEX(ap->an_vp, ap->an_off);
29083230aa08Ssusans 				mutex_enter(ahm);
29093230aa08Ssusans 				if (ap->an_pvp != NULL) {
29103230aa08Ssusans 					swap_phys_free(ap->an_pvp,
29113230aa08Ssusans 					    ap->an_poff, PAGESIZE);
29123230aa08Ssusans 					ap->an_pvp = NULL;
29133230aa08Ssusans 					ap->an_poff = 0;
29143230aa08Ssusans 					mutex_exit(ahm);
29153230aa08Ssusans 					hat_setmod(pp);
29163230aa08Ssusans 				} else {
29173230aa08Ssusans 					mutex_exit(ahm);
29183230aa08Ssusans 				}
29193230aa08Ssusans 				page_io_unlock(pp);
29203230aa08Ssusans 			}
29213230aa08Ssusans 
29227c478bd9Sstevel@tonic-gate 			addr += PAGESIZE;
29237c478bd9Sstevel@tonic-gate 			index++;
29247c478bd9Sstevel@tonic-gate 			npgs--;
29257c478bd9Sstevel@tonic-gate 			continue;
29267c478bd9Sstevel@tonic-gate 		}
29277c478bd9Sstevel@tonic-gate 		/*
29287c478bd9Sstevel@tonic-gate 		 * Now try and allocate the largest page possible
29297c478bd9Sstevel@tonic-gate 		 * for the current address and range.
29307c478bd9Sstevel@tonic-gate 		 * Keep dropping down in page size until:
29317c478bd9Sstevel@tonic-gate 		 *
29327c478bd9Sstevel@tonic-gate 		 *	1) Properly aligned
29337c478bd9Sstevel@tonic-gate 		 *	2) Does not overlap existing anon pages
29347c478bd9Sstevel@tonic-gate 		 *	3) Fits in remaining range.
29357c478bd9Sstevel@tonic-gate 		 *	4) able to allocate one.
29367c478bd9Sstevel@tonic-gate 		 *
29377c478bd9Sstevel@tonic-gate 		 * NOTE: XXX When page_create_ru is completed this code
29387c478bd9Sstevel@tonic-gate 		 *	 will change.
29397c478bd9Sstevel@tonic-gate 		 */
29407c478bd9Sstevel@tonic-gate 		szc    = l_szc;
29417c478bd9Sstevel@tonic-gate 		pplist = NULL;
29427c478bd9Sstevel@tonic-gate 		pg_cnt = 0;
29437c478bd9Sstevel@tonic-gate 		while (szc) {
29447c478bd9Sstevel@tonic-gate 			pgsz	= page_get_pagesize(szc);
29457c478bd9Sstevel@tonic-gate 			pg_cnt	= pgsz >> PAGESHIFT;
29467c478bd9Sstevel@tonic-gate 			if (IS_P2ALIGNED(addr, pgsz) && pg_cnt <= npgs &&
294778b03d3aSkchow 			    anon_pages(amp->ahp, index, pg_cnt) == 0) {
29487c478bd9Sstevel@tonic-gate 				/*
29497c478bd9Sstevel@tonic-gate 				 * XXX
29507c478bd9Sstevel@tonic-gate 				 * Since we are faking page_create()
29517c478bd9Sstevel@tonic-gate 				 * we also need to do the freemem and
29527c478bd9Sstevel@tonic-gate 				 * pcf accounting.
29537c478bd9Sstevel@tonic-gate 				 */
29547c478bd9Sstevel@tonic-gate 				(void) page_create_wait(pg_cnt, PG_WAIT);
29557c478bd9Sstevel@tonic-gate 
29567c478bd9Sstevel@tonic-gate 				/*
29577c478bd9Sstevel@tonic-gate 				 * Get lgroup to allocate next page of shared
29587c478bd9Sstevel@tonic-gate 				 * memory from and use it to specify where to
29597c478bd9Sstevel@tonic-gate 				 * allocate the physical memory
29607c478bd9Sstevel@tonic-gate 				 */
29617c478bd9Sstevel@tonic-gate 				lgrp = lgrp_mem_choose(seg, addr, pgsz);
29627c478bd9Sstevel@tonic-gate 
29637c478bd9Sstevel@tonic-gate 				pplist = page_get_freelist(
2964e44bd21cSsusans 				    anon_vp, (u_offset_t)0, seg,
29657c478bd9Sstevel@tonic-gate 				    addr, pgsz, 0, lgrp);
29667c478bd9Sstevel@tonic-gate 
29677c478bd9Sstevel@tonic-gate 				if (pplist == NULL) {
29687c478bd9Sstevel@tonic-gate 					page_create_putback(pg_cnt);
29697c478bd9Sstevel@tonic-gate 				}
29707c478bd9Sstevel@tonic-gate 
29717c478bd9Sstevel@tonic-gate 				/*
29727c478bd9Sstevel@tonic-gate 				 * If a request for a page of size
29737c478bd9Sstevel@tonic-gate 				 * larger than PAGESIZE failed
29747c478bd9Sstevel@tonic-gate 				 * then don't try that size anymore.
29757c478bd9Sstevel@tonic-gate 				 */
29767c478bd9Sstevel@tonic-gate 				if (pplist == NULL) {
29777c478bd9Sstevel@tonic-gate 					l_szc = szc - 1;
29787c478bd9Sstevel@tonic-gate 				} else {
29797c478bd9Sstevel@tonic-gate 					break;
29807c478bd9Sstevel@tonic-gate 				}
29817c478bd9Sstevel@tonic-gate 			}
29827c478bd9Sstevel@tonic-gate 			szc--;
29837c478bd9Sstevel@tonic-gate 		}
29847c478bd9Sstevel@tonic-gate 
29857c478bd9Sstevel@tonic-gate 		/*
29867c478bd9Sstevel@tonic-gate 		 * If just using PAGESIZE pages then don't
29877c478bd9Sstevel@tonic-gate 		 * directly allocate from the free list.
29887c478bd9Sstevel@tonic-gate 		 */
29897c478bd9Sstevel@tonic-gate 		if (pplist == NULL) {
29907c478bd9Sstevel@tonic-gate 			ASSERT(szc == 0);
29917c478bd9Sstevel@tonic-gate 			pp = anon_zero(seg, addr, &ap, cred);
29927c478bd9Sstevel@tonic-gate 			if (pp == NULL) {
29937c478bd9Sstevel@tonic-gate 				ANON_LOCK_EXIT(&amp->a_rwlock);
29947c478bd9Sstevel@tonic-gate 				panic("anon_map_createpages: anon_zero");
29957c478bd9Sstevel@tonic-gate 			}
29967c478bd9Sstevel@tonic-gate 			ppa[p_index++] = pp;
29977c478bd9Sstevel@tonic-gate 
29987c478bd9Sstevel@tonic-gate 			ASSERT(anon_get_ptr(amp->ahp, index) == NULL);
29997c478bd9Sstevel@tonic-gate 			(void) anon_set_ptr(amp->ahp, index, ap, ANON_SLEEP);
30007c478bd9Sstevel@tonic-gate 
30017c478bd9Sstevel@tonic-gate 			addr += PAGESIZE;
30027c478bd9Sstevel@tonic-gate 			index++;
30037c478bd9Sstevel@tonic-gate 			npgs--;
30047c478bd9Sstevel@tonic-gate 			continue;
30057c478bd9Sstevel@tonic-gate 		}
30067c478bd9Sstevel@tonic-gate 
30077c478bd9Sstevel@tonic-gate 		/*
30087c478bd9Sstevel@tonic-gate 		 * pplist is a list of pg_cnt PAGESIZE pages.
30097c478bd9Sstevel@tonic-gate 		 * These pages are locked SE_EXCL since they
30107c478bd9Sstevel@tonic-gate 		 * came directly off the free list.
30117c478bd9Sstevel@tonic-gate 		 */
30127c478bd9Sstevel@tonic-gate 		ASSERT(IS_P2ALIGNED(pg_cnt, pg_cnt));
30137c478bd9Sstevel@tonic-gate 		ASSERT(IS_P2ALIGNED(index, pg_cnt));
30147c478bd9Sstevel@tonic-gate 		ASSERT(conpp == NULL);
30157c478bd9Sstevel@tonic-gate 		while (pg_cnt--) {
30167c478bd9Sstevel@tonic-gate 
30177c478bd9Sstevel@tonic-gate 			ap = anon_alloc(NULL, 0);
30187c478bd9Sstevel@tonic-gate 			swap_xlate(ap, &ap_vp, &ap_off);
30197c478bd9Sstevel@tonic-gate 
30207c478bd9Sstevel@tonic-gate 			ASSERT(pplist != NULL);
30217c478bd9Sstevel@tonic-gate 			pp = pplist;
30227c478bd9Sstevel@tonic-gate 			page_sub(&pplist, pp);
30237c478bd9Sstevel@tonic-gate 			PP_CLRFREE(pp);
30247c478bd9Sstevel@tonic-gate 			PP_CLRAGED(pp);
30257c478bd9Sstevel@tonic-gate 			conpp = pp;
30267c478bd9Sstevel@tonic-gate 
30277c478bd9Sstevel@tonic-gate 			err = swap_getconpage(ap_vp, ap_off, PAGESIZE,
302807b65a64Saguzovsk 			    (uint_t *)NULL, anon_pl, PAGESIZE, conpp, NULL,
302907b65a64Saguzovsk 			    &nreloc, seg, addr, S_CREATE, cred);
30307c478bd9Sstevel@tonic-gate 
30317c478bd9Sstevel@tonic-gate 			if (err) {
30327c478bd9Sstevel@tonic-gate 				ANON_LOCK_EXIT(&amp->a_rwlock);
30337c478bd9Sstevel@tonic-gate 				panic("anon_map_createpages: S_CREATE");
30347c478bd9Sstevel@tonic-gate 			}
30357c478bd9Sstevel@tonic-gate 
30367c478bd9Sstevel@tonic-gate 			ASSERT(anon_pl[0] == pp);
30377c478bd9Sstevel@tonic-gate 			ASSERT(nreloc == 1);
30387c478bd9Sstevel@tonic-gate 			pagezero(pp, 0, PAGESIZE);
30397c478bd9Sstevel@tonic-gate 			CPU_STATS_ADD_K(vm, zfod, 1);
30407c478bd9Sstevel@tonic-gate 			hat_setrefmod(pp);
30417c478bd9Sstevel@tonic-gate 
30427c478bd9Sstevel@tonic-gate 			ASSERT(anon_get_ptr(amp->ahp, index) == NULL);
30437c478bd9Sstevel@tonic-gate 			(void) anon_set_ptr(amp->ahp, index, ap, ANON_SLEEP);
30447c478bd9Sstevel@tonic-gate 
30457c478bd9Sstevel@tonic-gate 			ppa[p_index++] = pp;
30467c478bd9Sstevel@tonic-gate 
30477c478bd9Sstevel@tonic-gate 			addr += PAGESIZE;
30487c478bd9Sstevel@tonic-gate 			index++;
30497c478bd9Sstevel@tonic-gate 			npgs--;
30507c478bd9Sstevel@tonic-gate 		}
30517c478bd9Sstevel@tonic-gate 		conpp = NULL;
30527c478bd9Sstevel@tonic-gate 		pg_cnt	= pgsz >> PAGESHIFT;
30537c478bd9Sstevel@tonic-gate 		p_index = p_index - pg_cnt;
30547c478bd9Sstevel@tonic-gate 		while (pg_cnt--) {
30557c478bd9Sstevel@tonic-gate 			page_downgrade(ppa[p_index++]);
30567c478bd9Sstevel@tonic-gate 		}
30577c478bd9Sstevel@tonic-gate 	}
30587c478bd9Sstevel@tonic-gate 	ANON_LOCK_EXIT(&amp->a_rwlock);
30597c478bd9Sstevel@tonic-gate 	return (0);
30607c478bd9Sstevel@tonic-gate }
30617c478bd9Sstevel@tonic-gate 
306207b65a64Saguzovsk static int
anon_try_demote_pages(struct anon_hdr * ahp,ulong_t sidx,uint_t szc,page_t ** ppa,int private)306307b65a64Saguzovsk anon_try_demote_pages(
306407b65a64Saguzovsk 	struct anon_hdr *ahp,
306507b65a64Saguzovsk 	ulong_t sidx,
306607b65a64Saguzovsk 	uint_t szc,
306707b65a64Saguzovsk 	page_t **ppa,
306807b65a64Saguzovsk 	int private)
306907b65a64Saguzovsk {
307007b65a64Saguzovsk 	struct anon	*ap;
307107b65a64Saguzovsk 	pgcnt_t		pgcnt = page_get_pagecnt(szc);
307207b65a64Saguzovsk 	page_t		*pp;
307307b65a64Saguzovsk 	pgcnt_t		i;
307407b65a64Saguzovsk 	kmutex_t	*ahmpages = NULL;
307507b65a64Saguzovsk 	int		root = 0;
307607b65a64Saguzovsk 	pgcnt_t		npgs;
307707b65a64Saguzovsk 	pgcnt_t		curnpgs = 0;
307807b65a64Saguzovsk 	size_t		ppasize = 0;
307907b65a64Saguzovsk 
308007b65a64Saguzovsk 	ASSERT(szc != 0);
308107b65a64Saguzovsk 	ASSERT(IS_P2ALIGNED(pgcnt, pgcnt));
308207b65a64Saguzovsk 	ASSERT(IS_P2ALIGNED(sidx, pgcnt));
308307b65a64Saguzovsk 	ASSERT(sidx < ahp->size);
308407b65a64Saguzovsk 
308507b65a64Saguzovsk 	if (ppa == NULL) {
308607b65a64Saguzovsk 		ppasize = pgcnt * sizeof (page_t *);
308707b65a64Saguzovsk 		ppa = kmem_alloc(ppasize, KM_SLEEP);
308807b65a64Saguzovsk 	}
308907b65a64Saguzovsk 
309007b65a64Saguzovsk 	ap = anon_get_ptr(ahp, sidx);
309107b65a64Saguzovsk 	if (ap != NULL && private) {
309207b65a64Saguzovsk 		VM_STAT_ADD(anonvmstats.demotepages[1]);
309323d9e5acSMichael Corcoran 		ahmpages = APH_MUTEX(ap->an_vp, ap->an_off);
309407b65a64Saguzovsk 		mutex_enter(ahmpages);
309507b65a64Saguzovsk 	}
309607b65a64Saguzovsk 
309707b65a64Saguzovsk 	if (ap != NULL && ap->an_refcnt > 1) {
309807b65a64Saguzovsk 		if (ahmpages != NULL) {
309907b65a64Saguzovsk 			VM_STAT_ADD(anonvmstats.demotepages[2]);
310007b65a64Saguzovsk 			mutex_exit(ahmpages);
310107b65a64Saguzovsk 		}
310207b65a64Saguzovsk 		if (ppasize != 0) {
310307b65a64Saguzovsk 			kmem_free(ppa, ppasize);
310407b65a64Saguzovsk 		}
310507b65a64Saguzovsk 		return (0);
310607b65a64Saguzovsk 	}
310707b65a64Saguzovsk 	if (ahmpages != NULL) {
310807b65a64Saguzovsk 		mutex_exit(ahmpages);
310907b65a64Saguzovsk 	}
311007b65a64Saguzovsk 	if (ahp->size - sidx < pgcnt) {
311107b65a64Saguzovsk 		ASSERT(private == 0);
311207b65a64Saguzovsk 		pgcnt = ahp->size - sidx;
311307b65a64Saguzovsk 	}
311407b65a64Saguzovsk 	for (i = 0; i < pgcnt; i++, sidx++) {
311507b65a64Saguzovsk 		ap = anon_get_ptr(ahp, sidx);
311607b65a64Saguzovsk 		if (ap != NULL) {
311707b65a64Saguzovsk 			if (ap->an_refcnt != 1) {
311807b65a64Saguzovsk 				panic("anon_try_demote_pages: an_refcnt != 1");
311907b65a64Saguzovsk 			}
312007b65a64Saguzovsk 			pp = ppa[i] = page_lookup(ap->an_vp, ap->an_off,
312178b03d3aSkchow 			    SE_EXCL);
312207b65a64Saguzovsk 			if (pp != NULL) {
312307b65a64Saguzovsk 				(void) hat_pageunload(pp,
312478b03d3aSkchow 				    HAT_FORCE_PGUNLOAD);
312507b65a64Saguzovsk 			}
312607b65a64Saguzovsk 		} else {
312707b65a64Saguzovsk 			ppa[i] = NULL;
312807b65a64Saguzovsk 		}
312907b65a64Saguzovsk 	}
313007b65a64Saguzovsk 	for (i = 0; i < pgcnt; i++) {
313107b65a64Saguzovsk 		if ((pp = ppa[i]) != NULL && pp->p_szc != 0) {
313207b65a64Saguzovsk 			ASSERT(pp->p_szc <= szc);
313307b65a64Saguzovsk 			if (!root) {
313407b65a64Saguzovsk 				VM_STAT_ADD(anonvmstats.demotepages[3]);
313507b65a64Saguzovsk 				if (curnpgs != 0)
313607b65a64Saguzovsk 					panic("anon_try_demote_pages: "
313778b03d3aSkchow 					    "bad large page");
313807b65a64Saguzovsk 
313907b65a64Saguzovsk 				root = 1;
314007b65a64Saguzovsk 				curnpgs = npgs =
314178b03d3aSkchow 				    page_get_pagecnt(pp->p_szc);
314207b65a64Saguzovsk 
314307b65a64Saguzovsk 				ASSERT(npgs <= pgcnt);
314407b65a64Saguzovsk 				ASSERT(IS_P2ALIGNED(npgs, npgs));
314578b03d3aSkchow 				ASSERT(!(page_pptonum(pp) & (npgs - 1)));
314607b65a64Saguzovsk 			} else {
314707b65a64Saguzovsk 				ASSERT(i > 0);
314807b65a64Saguzovsk 				ASSERT(page_pptonum(pp) - 1 ==
314978b03d3aSkchow 				    page_pptonum(ppa[i - 1]));
315007b65a64Saguzovsk 				if ((page_pptonum(pp) & (npgs - 1)) ==
315178b03d3aSkchow 				    npgs - 1)
315207b65a64Saguzovsk 					root = 0;
315307b65a64Saguzovsk 			}
315407b65a64Saguzovsk 			ASSERT(PAGE_EXCL(pp));
315507b65a64Saguzovsk 			pp->p_szc = 0;
315607b65a64Saguzovsk 			ASSERT(curnpgs > 0);
315707b65a64Saguzovsk 			curnpgs--;
315807b65a64Saguzovsk 		}
315907b65a64Saguzovsk 	}
316007b65a64Saguzovsk 	if (root != 0 || curnpgs != 0)
316107b65a64Saguzovsk 		panic("anon_try_demote_pages: bad large page");
316207b65a64Saguzovsk 
316307b65a64Saguzovsk 	for (i = 0; i < pgcnt; i++) {
316407b65a64Saguzovsk 		if ((pp = ppa[i]) != NULL) {
316507b65a64Saguzovsk 			ASSERT(!hat_page_is_mapped(pp));
316607b65a64Saguzovsk 			ASSERT(pp->p_szc == 0);
316707b65a64Saguzovsk 			page_unlock(pp);
316807b65a64Saguzovsk 		}
316907b65a64Saguzovsk 	}
317007b65a64Saguzovsk 	if (ppasize != 0) {
317107b65a64Saguzovsk 		kmem_free(ppa, ppasize);
317207b65a64Saguzovsk 	}
317307b65a64Saguzovsk 	return (1);
317407b65a64Saguzovsk }
317507b65a64Saguzovsk 
317607b65a64Saguzovsk /*
317707b65a64Saguzovsk  * anon_map_demotepages() can only be called by MAP_PRIVATE segments.
317807b65a64Saguzovsk  */
31797c478bd9Sstevel@tonic-gate int
anon_map_demotepages(struct anon_map * amp,ulong_t start_idx,struct seg * seg,caddr_t addr,uint_t prot,struct vpage vpage[],struct cred * cred)31807c478bd9Sstevel@tonic-gate anon_map_demotepages(
31817c478bd9Sstevel@tonic-gate 	struct anon_map *amp,
31827c478bd9Sstevel@tonic-gate 	ulong_t	start_idx,
31837c478bd9Sstevel@tonic-gate 	struct seg *seg,
31847c478bd9Sstevel@tonic-gate 	caddr_t addr,
31857c478bd9Sstevel@tonic-gate 	uint_t prot,
31867c478bd9Sstevel@tonic-gate 	struct vpage vpage[],
31877c478bd9Sstevel@tonic-gate 	struct cred *cred)
31887c478bd9Sstevel@tonic-gate {
31897c478bd9Sstevel@tonic-gate 	struct anon	*ap;
31907c478bd9Sstevel@tonic-gate 	uint_t		szc = seg->s_szc;
31917c478bd9Sstevel@tonic-gate 	pgcnt_t		pgcnt = page_get_pagecnt(szc);
31927c478bd9Sstevel@tonic-gate 	size_t		ppasize = pgcnt * sizeof (page_t *);
31937c478bd9Sstevel@tonic-gate 	page_t		**ppa = kmem_alloc(ppasize, KM_SLEEP);
31947c478bd9Sstevel@tonic-gate 	page_t		*pp;
31957c478bd9Sstevel@tonic-gate 	page_t		*pl[2];
31967c478bd9Sstevel@tonic-gate 	pgcnt_t		i, pg_idx;
31977c478bd9Sstevel@tonic-gate 	ulong_t		an_idx;
31987c478bd9Sstevel@tonic-gate 	caddr_t		vaddr;
31997e897d1fSToomas Soome 	int		err;
32007c478bd9Sstevel@tonic-gate 	int		retry = 0;
32017c478bd9Sstevel@tonic-gate 	uint_t		vpprot;
32027c478bd9Sstevel@tonic-gate 
32037c478bd9Sstevel@tonic-gate 	ASSERT(RW_WRITE_HELD(&amp->a_rwlock));
32047c478bd9Sstevel@tonic-gate 	ASSERT(IS_P2ALIGNED(pgcnt, pgcnt));
32057c478bd9Sstevel@tonic-gate 	ASSERT(IS_P2ALIGNED(start_idx, pgcnt));
32067c478bd9Sstevel@tonic-gate 	ASSERT(ppa != NULL);
320707b65a64Saguzovsk 	ASSERT(szc != 0);
320807b65a64Saguzovsk 	ASSERT(szc == amp->a_szc);
32097c478bd9Sstevel@tonic-gate 
32107c478bd9Sstevel@tonic-gate 	VM_STAT_ADD(anonvmstats.demotepages[0]);
32117c478bd9Sstevel@tonic-gate 
32127c478bd9Sstevel@tonic-gate top:
321307b65a64Saguzovsk 	if (anon_try_demote_pages(amp->ahp, start_idx, szc, ppa, 1)) {
32141b101e68Saguzovsk 		kmem_free(ppa, ppasize);
32157c478bd9Sstevel@tonic-gate 		return (0);
32167c478bd9Sstevel@tonic-gate 	}
32177c478bd9Sstevel@tonic-gate 
32187c478bd9Sstevel@tonic-gate 	VM_STAT_ADD(anonvmstats.demotepages[4]);
32197c478bd9Sstevel@tonic-gate 
32207c478bd9Sstevel@tonic-gate 	ASSERT(retry == 0); /* we can be here only once */
32217c478bd9Sstevel@tonic-gate 
32227c478bd9Sstevel@tonic-gate 	vaddr = addr;
32237c478bd9Sstevel@tonic-gate 	for (pg_idx = 0, an_idx = start_idx; pg_idx < pgcnt;
32247c478bd9Sstevel@tonic-gate 	    pg_idx++, an_idx++, vaddr += PAGESIZE) {
32257c478bd9Sstevel@tonic-gate 		ap = anon_get_ptr(amp->ahp, an_idx);
32267c478bd9Sstevel@tonic-gate 		if (ap == NULL)
32277c478bd9Sstevel@tonic-gate 			panic("anon_map_demotepages: no anon slot");
32287c478bd9Sstevel@tonic-gate 		err = anon_getpage(&ap, &vpprot, pl, PAGESIZE, seg, vaddr,
32297c478bd9Sstevel@tonic-gate 		    S_READ, cred);
32307c478bd9Sstevel@tonic-gate 		if (err) {
32317c478bd9Sstevel@tonic-gate 			for (i = 0; i < pg_idx; i++) {
32327c478bd9Sstevel@tonic-gate 				if ((pp = ppa[i]) != NULL)
32337c478bd9Sstevel@tonic-gate 					page_unlock(pp);
32347c478bd9Sstevel@tonic-gate 			}
32357c478bd9Sstevel@tonic-gate 			kmem_free(ppa, ppasize);
32367c478bd9Sstevel@tonic-gate 			return (err);
32377c478bd9Sstevel@tonic-gate 		}
32387c478bd9Sstevel@tonic-gate 		ppa[pg_idx] = pl[0];
32397c478bd9Sstevel@tonic-gate 	}
32407c478bd9Sstevel@tonic-gate 
32417c478bd9Sstevel@tonic-gate 	err = anon_map_privatepages(amp, start_idx, szc, seg, addr, prot, ppa,
32422cb27123Saguzovsk 	    vpage, -1, 0, cred);
32437c478bd9Sstevel@tonic-gate 	if (err > 0) {
32447c478bd9Sstevel@tonic-gate 		VM_STAT_ADD(anonvmstats.demotepages[5]);
32457c478bd9Sstevel@tonic-gate 		kmem_free(ppa, ppasize);
32467c478bd9Sstevel@tonic-gate 		return (err);
32477c478bd9Sstevel@tonic-gate 	}
32487c478bd9Sstevel@tonic-gate 	ASSERT(err == 0 || err == -1);
32497c478bd9Sstevel@tonic-gate 	if (err == -1) {
32507c478bd9Sstevel@tonic-gate 		VM_STAT_ADD(anonvmstats.demotepages[6]);
32517c478bd9Sstevel@tonic-gate 		retry = 1;
32527c478bd9Sstevel@tonic-gate 		goto top;
32537c478bd9Sstevel@tonic-gate 	}
32547c478bd9Sstevel@tonic-gate 	for (i = 0; i < pgcnt; i++) {
32557c478bd9Sstevel@tonic-gate 		ASSERT(ppa[i] != NULL);
32567c478bd9Sstevel@tonic-gate 		if (ppa[i]->p_szc != 0)
32577c478bd9Sstevel@tonic-gate 			retry = 1;
32587c478bd9Sstevel@tonic-gate 		page_unlock(ppa[i]);
32597c478bd9Sstevel@tonic-gate 	}
32607c478bd9Sstevel@tonic-gate 	if (retry) {
32617c478bd9Sstevel@tonic-gate 		VM_STAT_ADD(anonvmstats.demotepages[7]);
32627c478bd9Sstevel@tonic-gate 		goto top;
32637c478bd9Sstevel@tonic-gate 	}
32647c478bd9Sstevel@tonic-gate 
32657c478bd9Sstevel@tonic-gate 	VM_STAT_ADD(anonvmstats.demotepages[8]);
32667c478bd9Sstevel@tonic-gate 
32677c478bd9Sstevel@tonic-gate 	kmem_free(ppa, ppasize);
32687c478bd9Sstevel@tonic-gate 
32697c478bd9Sstevel@tonic-gate 	return (0);
32707c478bd9Sstevel@tonic-gate }
32717c478bd9Sstevel@tonic-gate 
327207b65a64Saguzovsk /*
327307b65a64Saguzovsk  * Free pages of shared anon map. It's assumed that anon maps don't share anon
327407b65a64Saguzovsk  * structures with private anon maps. Therefore all anon structures should
327507b65a64Saguzovsk  * have at most one reference at this point. This means underlying pages can
327607b65a64Saguzovsk  * be exclusively locked and demoted or freed.  If not freeing the entire
327707b65a64Saguzovsk  * large pages demote the ends of the region we free to be able to free
3278da6c28aaSamw  * subpages. Page roots correspond to aligned index positions in anon map.
327907b65a64Saguzovsk  */
328007b65a64Saguzovsk void
anon_shmap_free_pages(struct anon_map * amp,ulong_t sidx,size_t len)328107b65a64Saguzovsk anon_shmap_free_pages(struct anon_map *amp, ulong_t sidx, size_t len)
328207b65a64Saguzovsk {
328307b65a64Saguzovsk 	ulong_t eidx = sidx + btopr(len);
328407b65a64Saguzovsk 	pgcnt_t pages = page_get_pagecnt(amp->a_szc);
328507b65a64Saguzovsk 	struct anon_hdr *ahp = amp->ahp;
328607b65a64Saguzovsk 	ulong_t tidx;
328707b65a64Saguzovsk 	size_t size;
328807b65a64Saguzovsk 	ulong_t sidx_aligned;
328907b65a64Saguzovsk 	ulong_t eidx_aligned;
329007b65a64Saguzovsk 
3291a98e9dbfSaguzovsk 	ASSERT(ANON_WRITE_HELD(&amp->a_rwlock));
329207b65a64Saguzovsk 	ASSERT(amp->refcnt <= 1);
329307b65a64Saguzovsk 	ASSERT(amp->a_szc > 0);
329407b65a64Saguzovsk 	ASSERT(eidx <= ahp->size);
329507b65a64Saguzovsk 	ASSERT(!anon_share(ahp, sidx, btopr(len)));
329607b65a64Saguzovsk 
329707b65a64Saguzovsk 	if (len == 0) {	/* XXX */
329807b65a64Saguzovsk 		return;
329907b65a64Saguzovsk 	}
330007b65a64Saguzovsk 
330107b65a64Saguzovsk 	sidx_aligned = P2ALIGN(sidx, pages);
330207b65a64Saguzovsk 	if (sidx_aligned != sidx ||
330307b65a64Saguzovsk 	    (eidx < sidx_aligned + pages && eidx < ahp->size)) {
330407b65a64Saguzovsk 		if (!anon_try_demote_pages(ahp, sidx_aligned,
330507b65a64Saguzovsk 		    amp->a_szc, NULL, 0)) {
330607b65a64Saguzovsk 			panic("anon_shmap_free_pages: demote failed");
330707b65a64Saguzovsk 		}
330807b65a64Saguzovsk 		size = (eidx <= sidx_aligned + pages) ? (eidx - sidx) :
330907b65a64Saguzovsk 		    P2NPHASE(sidx, pages);
331007b65a64Saguzovsk 		size <<= PAGESHIFT;
331107b65a64Saguzovsk 		anon_free(ahp, sidx, size);
331207b65a64Saguzovsk 		sidx = sidx_aligned + pages;
331307b65a64Saguzovsk 		if (eidx <= sidx) {
331407b65a64Saguzovsk 			return;
331507b65a64Saguzovsk 		}
331607b65a64Saguzovsk 	}
331707b65a64Saguzovsk 	eidx_aligned = P2ALIGN(eidx, pages);
331807b65a64Saguzovsk 	if (sidx < eidx_aligned) {
331907b65a64Saguzovsk 		anon_free_pages(ahp, sidx,
332007b65a64Saguzovsk 		    (eidx_aligned - sidx) << PAGESHIFT,
332107b65a64Saguzovsk 		    amp->a_szc);
332207b65a64Saguzovsk 		sidx = eidx_aligned;
332307b65a64Saguzovsk 	}
332407b65a64Saguzovsk 	ASSERT(sidx == eidx_aligned);
332507b65a64Saguzovsk 	if (eidx == eidx_aligned) {
332607b65a64Saguzovsk 		return;
332707b65a64Saguzovsk 	}
332807b65a64Saguzovsk 	tidx = eidx;
332907b65a64Saguzovsk 	if (eidx != ahp->size && anon_get_next_ptr(ahp, &tidx) != NULL &&
333007b65a64Saguzovsk 	    tidx - sidx < pages) {
333107b65a64Saguzovsk 		if (!anon_try_demote_pages(ahp, sidx, amp->a_szc, NULL, 0)) {
333207b65a64Saguzovsk 			panic("anon_shmap_free_pages: demote failed");
333307b65a64Saguzovsk 		}
333407b65a64Saguzovsk 		size = (eidx - sidx) << PAGESHIFT;
333507b65a64Saguzovsk 		anon_free(ahp, sidx, size);
333607b65a64Saguzovsk 	} else {
333707b65a64Saguzovsk 		anon_free_pages(ahp, sidx, pages << PAGESHIFT, amp->a_szc);
333807b65a64Saguzovsk 	}
333907b65a64Saguzovsk }
334007b65a64Saguzovsk 
3341a98e9dbfSaguzovsk /*
3342a98e9dbfSaguzovsk  * This routine should be called with amp's writer lock when there're no other
3343a98e9dbfSaguzovsk  * users of amp.  All pcache entries of this amp must have been already
3344a98e9dbfSaguzovsk  * inactivated. We must not drop a_rwlock here to prevent new users from
3345a98e9dbfSaguzovsk  * attaching to this amp.
3346a98e9dbfSaguzovsk  */
3347a98e9dbfSaguzovsk void
anonmap_purge(struct anon_map * amp)3348a98e9dbfSaguzovsk anonmap_purge(struct anon_map *amp)
3349a98e9dbfSaguzovsk {
3350a98e9dbfSaguzovsk 	ASSERT(ANON_WRITE_HELD(&amp->a_rwlock));
3351a98e9dbfSaguzovsk 	ASSERT(amp->refcnt <= 1);
3352a98e9dbfSaguzovsk 
3353a98e9dbfSaguzovsk 	if (amp->a_softlockcnt != 0) {
3354a98e9dbfSaguzovsk 		seg_ppurge(NULL, amp, 0);
3355a98e9dbfSaguzovsk 	}
3356a98e9dbfSaguzovsk 
3357a98e9dbfSaguzovsk 	/*
3358a98e9dbfSaguzovsk 	 * Since all pcache entries were already inactive before this routine
3359a98e9dbfSaguzovsk 	 * was called seg_ppurge() couldn't return while there're still
3360a98e9dbfSaguzovsk 	 * entries that can be found via the list anchored at a_phead. So we
3361a98e9dbfSaguzovsk 	 * can assert this list is empty now. a_softlockcnt may be still non 0
3362a98e9dbfSaguzovsk 	 * if asynchronous thread that manages pcache already removed pcache
3363a98e9dbfSaguzovsk 	 * entries but hasn't unlocked the pages yet. If a_softlockcnt is non
3364a98e9dbfSaguzovsk 	 * 0 we just wait on a_purgecv for shamp_reclaim() to finish. Even if
3365a98e9dbfSaguzovsk 	 * a_softlockcnt is 0 we grab a_purgemtx to avoid freeing anon map
3366a98e9dbfSaguzovsk 	 * before shamp_reclaim() is done with it. a_purgemtx also taken by
3367a98e9dbfSaguzovsk 	 * shamp_reclaim() while a_softlockcnt was still not 0 acts as a
3368a98e9dbfSaguzovsk 	 * barrier that prevents anonmap_purge() to complete while
3369a98e9dbfSaguzovsk 	 * shamp_reclaim() may still be referencing this amp.
3370a98e9dbfSaguzovsk 	 */
3371a98e9dbfSaguzovsk 	ASSERT(amp->a_phead.p_lnext == &amp->a_phead);
3372a98e9dbfSaguzovsk 	ASSERT(amp->a_phead.p_lprev == &amp->a_phead);
3373a98e9dbfSaguzovsk 
3374a98e9dbfSaguzovsk 	mutex_enter(&amp->a_purgemtx);
3375a98e9dbfSaguzovsk 	while (amp->a_softlockcnt != 0) {
3376a98e9dbfSaguzovsk 		ASSERT(amp->a_phead.p_lnext == &amp->a_phead);
3377a98e9dbfSaguzovsk 		ASSERT(amp->a_phead.p_lprev == &amp->a_phead);
3378a98e9dbfSaguzovsk 		amp->a_purgewait = 1;
3379a98e9dbfSaguzovsk 		cv_wait(&amp->a_purgecv, &amp->a_purgemtx);
3380a98e9dbfSaguzovsk 	}
3381a98e9dbfSaguzovsk 	mutex_exit(&amp->a_purgemtx);
3382a98e9dbfSaguzovsk 
3383a98e9dbfSaguzovsk 	ASSERT(amp->a_phead.p_lnext == &amp->a_phead);
3384a98e9dbfSaguzovsk 	ASSERT(amp->a_phead.p_lprev == &amp->a_phead);
3385a98e9dbfSaguzovsk 	ASSERT(amp->a_softlockcnt == 0);
3386a98e9dbfSaguzovsk }
3387a98e9dbfSaguzovsk 
33887c478bd9Sstevel@tonic-gate /*
33897c478bd9Sstevel@tonic-gate  * Allocate and initialize an anon_map structure for seg
33907c478bd9Sstevel@tonic-gate  * associating the given swap reservation with the new anon_map.
33917c478bd9Sstevel@tonic-gate  */
33927c478bd9Sstevel@tonic-gate struct anon_map *
anonmap_alloc(size_t size,size_t swresv,int flags)33932cb27123Saguzovsk anonmap_alloc(size_t size, size_t swresv, int flags)
33947c478bd9Sstevel@tonic-gate {
33957c478bd9Sstevel@tonic-gate 	struct anon_map *amp;
33962cb27123Saguzovsk 	int kmflags = (flags & ANON_NOSLEEP) ? KM_NOSLEEP : KM_SLEEP;
33977c478bd9Sstevel@tonic-gate 
33982cb27123Saguzovsk 	amp = kmem_cache_alloc(anonmap_cache, kmflags);
33992cb27123Saguzovsk 	if (amp == NULL) {
34002cb27123Saguzovsk 		ASSERT(kmflags == KM_NOSLEEP);
34012cb27123Saguzovsk 		return (NULL);
34022cb27123Saguzovsk 	}
34037c478bd9Sstevel@tonic-gate 
34042cb27123Saguzovsk 	amp->ahp = anon_create(btopr(size), flags);
34052cb27123Saguzovsk 	if (amp->ahp == NULL) {
34062cb27123Saguzovsk 		ASSERT(flags == ANON_NOSLEEP);
34072cb27123Saguzovsk 		kmem_cache_free(anonmap_cache, amp);
34082cb27123Saguzovsk 		return (NULL);
34092cb27123Saguzovsk 	}
34107c478bd9Sstevel@tonic-gate 	amp->refcnt = 1;
34117c478bd9Sstevel@tonic-gate 	amp->size = size;
34127c478bd9Sstevel@tonic-gate 	amp->swresv = swresv;
34137c478bd9Sstevel@tonic-gate 	amp->locality = 0;
34147c478bd9Sstevel@tonic-gate 	amp->a_szc = 0;
3415c6939658Ssl 	amp->a_sp = NULL;
3416a98e9dbfSaguzovsk 	amp->a_softlockcnt = 0;
3417a98e9dbfSaguzovsk 	amp->a_purgewait = 0;
3418a98e9dbfSaguzovsk 	amp->a_phead.p_lnext = &amp->a_phead;
3419a98e9dbfSaguzovsk 	amp->a_phead.p_lprev = &amp->a_phead;
3420a98e9dbfSaguzovsk 
34217c478bd9Sstevel@tonic-gate 	return (amp);
34227c478bd9Sstevel@tonic-gate }
34237c478bd9Sstevel@tonic-gate 
34247c478bd9Sstevel@tonic-gate void
anonmap_free(struct anon_map * amp)34257c478bd9Sstevel@tonic-gate anonmap_free(struct anon_map *amp)
34267c478bd9Sstevel@tonic-gate {
3427a98e9dbfSaguzovsk 	ASSERT(amp->ahp != NULL);
34287c478bd9Sstevel@tonic-gate 	ASSERT(amp->refcnt == 0);
3429a98e9dbfSaguzovsk 	ASSERT(amp->a_softlockcnt == 0);
3430a98e9dbfSaguzovsk 	ASSERT(amp->a_phead.p_lnext == &amp->a_phead);
3431a98e9dbfSaguzovsk 	ASSERT(amp->a_phead.p_lprev == &amp->a_phead);
34327c478bd9Sstevel@tonic-gate 
34337c478bd9Sstevel@tonic-gate 	lgrp_shm_policy_fini(amp, NULL);
34347c478bd9Sstevel@tonic-gate 	anon_release(amp->ahp, btopr(amp->size));
34357c478bd9Sstevel@tonic-gate 	kmem_cache_free(anonmap_cache, amp);
34367c478bd9Sstevel@tonic-gate }
34377c478bd9Sstevel@tonic-gate 
34387c478bd9Sstevel@tonic-gate /*
34397c478bd9Sstevel@tonic-gate  * Returns true if the app array has some empty slots.
3440da6c28aaSamw  * The offp and lenp parameters are in/out parameters.  On entry
34417c478bd9Sstevel@tonic-gate  * these values represent the starting offset and length of the
34427c478bd9Sstevel@tonic-gate  * mapping.  When true is returned, these values may be modified
34437c478bd9Sstevel@tonic-gate  * to be the largest range which includes empty slots.
34447c478bd9Sstevel@tonic-gate  */
34457c478bd9Sstevel@tonic-gate int
non_anon(struct anon_hdr * ahp,ulong_t anon_idx,u_offset_t * offp,size_t * lenp)34467c478bd9Sstevel@tonic-gate non_anon(struct anon_hdr *ahp, ulong_t anon_idx, u_offset_t *offp,
34477e897d1fSToomas Soome     size_t *lenp)
34487c478bd9Sstevel@tonic-gate {
34497c478bd9Sstevel@tonic-gate 	ulong_t i, el;
34507c478bd9Sstevel@tonic-gate 	ssize_t low, high;
34517c478bd9Sstevel@tonic-gate 	struct anon *ap;
34527c478bd9Sstevel@tonic-gate 
34537c478bd9Sstevel@tonic-gate 	low = -1;
34547c478bd9Sstevel@tonic-gate 	for (i = 0, el = *lenp; i < el; i += PAGESIZE, anon_idx++) {
34557c478bd9Sstevel@tonic-gate 		ap = anon_get_ptr(ahp, anon_idx);
34567c478bd9Sstevel@tonic-gate 		if (ap == NULL) {
34577c478bd9Sstevel@tonic-gate 			if (low == -1)
34587c478bd9Sstevel@tonic-gate 				low = i;
34597c478bd9Sstevel@tonic-gate 			high = i;
34607c478bd9Sstevel@tonic-gate 		}
34617c478bd9Sstevel@tonic-gate 	}
34627c478bd9Sstevel@tonic-gate 	if (low != -1) {
34637c478bd9Sstevel@tonic-gate 		/*
34647c478bd9Sstevel@tonic-gate 		 * Found at least one non-anon page.
34657c478bd9Sstevel@tonic-gate 		 * Set up the off and len return values.
34667c478bd9Sstevel@tonic-gate 		 */
34677c478bd9Sstevel@tonic-gate 		if (low != 0)
34687c478bd9Sstevel@tonic-gate 			*offp += low;
34697c478bd9Sstevel@tonic-gate 		*lenp = high - low + PAGESIZE;
34707c478bd9Sstevel@tonic-gate 		return (1);
34717c478bd9Sstevel@tonic-gate 	}
34727c478bd9Sstevel@tonic-gate 	return (0);
34737c478bd9Sstevel@tonic-gate }
34747c478bd9Sstevel@tonic-gate 
34757c478bd9Sstevel@tonic-gate /*
34767c478bd9Sstevel@tonic-gate  * Return a count of the number of existing anon pages in the anon array
34777c478bd9Sstevel@tonic-gate  * app in the range (off, off+len). The array and slots must be guaranteed
34787c478bd9Sstevel@tonic-gate  * stable by the caller.
34797c478bd9Sstevel@tonic-gate  */
34807c478bd9Sstevel@tonic-gate pgcnt_t
anon_pages(struct anon_hdr * ahp,ulong_t anon_index,pgcnt_t nslots)34817c478bd9Sstevel@tonic-gate anon_pages(struct anon_hdr *ahp, ulong_t anon_index, pgcnt_t nslots)
34827c478bd9Sstevel@tonic-gate {
34837c478bd9Sstevel@tonic-gate 	pgcnt_t cnt = 0;
34847c478bd9Sstevel@tonic-gate 
34857c478bd9Sstevel@tonic-gate 	while (nslots-- > 0) {
34867c478bd9Sstevel@tonic-gate 		if ((anon_get_ptr(ahp, anon_index)) != NULL)
34877c478bd9Sstevel@tonic-gate 			cnt++;
34887c478bd9Sstevel@tonic-gate 		anon_index++;
34897c478bd9Sstevel@tonic-gate 	}
34907c478bd9Sstevel@tonic-gate 	return (cnt);
34917c478bd9Sstevel@tonic-gate }
34927c478bd9Sstevel@tonic-gate 
34937c478bd9Sstevel@tonic-gate /*
3494e0cb4e8dSOndrej Kubecka  * Move reserved phys swap into memory swap (unreserve phys swap
3495e0cb4e8dSOndrej Kubecka  * and reserve mem swap by the same amount).
3496e0cb4e8dSOndrej Kubecka  * Used by segspt when it needs to lock reserved swap npages in memory
34977c478bd9Sstevel@tonic-gate  */
34987c478bd9Sstevel@tonic-gate int
anon_swap_adjust(pgcnt_t npages)3499e0cb4e8dSOndrej Kubecka anon_swap_adjust(pgcnt_t npages)
35007c478bd9Sstevel@tonic-gate {
35017c478bd9Sstevel@tonic-gate 	pgcnt_t unlocked_mem_swap;
35027c478bd9Sstevel@tonic-gate 
35037c478bd9Sstevel@tonic-gate 	mutex_enter(&anoninfo_lock);
35047c478bd9Sstevel@tonic-gate 
35057c478bd9Sstevel@tonic-gate 	ASSERT(k_anoninfo.ani_mem_resv >= k_anoninfo.ani_locked_swap);
35067c478bd9Sstevel@tonic-gate 	ASSERT(k_anoninfo.ani_max >= k_anoninfo.ani_phys_resv);
35077c478bd9Sstevel@tonic-gate 
35087c478bd9Sstevel@tonic-gate 	unlocked_mem_swap = k_anoninfo.ani_mem_resv
350978b03d3aSkchow 	    - k_anoninfo.ani_locked_swap;
35107c478bd9Sstevel@tonic-gate 	if (npages > unlocked_mem_swap) {
35117c478bd9Sstevel@tonic-gate 		spgcnt_t adjusted_swap = npages - unlocked_mem_swap;
35127c478bd9Sstevel@tonic-gate 
35137c478bd9Sstevel@tonic-gate 		/*
35147c478bd9Sstevel@tonic-gate 		 * if there is not enough unlocked mem swap we take missing
35157c478bd9Sstevel@tonic-gate 		 * amount from phys swap and give it to mem swap
35167c478bd9Sstevel@tonic-gate 		 */
3517e0cb4e8dSOndrej Kubecka 		if (!page_reclaim_mem(adjusted_swap, segspt_minfree, 1)) {
35187c478bd9Sstevel@tonic-gate 			mutex_exit(&anoninfo_lock);
35197c478bd9Sstevel@tonic-gate 			return (ENOMEM);
35207c478bd9Sstevel@tonic-gate 		}
35217c478bd9Sstevel@tonic-gate 
35227c478bd9Sstevel@tonic-gate 		k_anoninfo.ani_mem_resv += adjusted_swap;
35237c478bd9Sstevel@tonic-gate 		ASSERT(k_anoninfo.ani_phys_resv >= adjusted_swap);
35247c478bd9Sstevel@tonic-gate 		k_anoninfo.ani_phys_resv -= adjusted_swap;
35257c478bd9Sstevel@tonic-gate 
35267c478bd9Sstevel@tonic-gate 		ANI_ADD(adjusted_swap);
35277c478bd9Sstevel@tonic-gate 	}
35287c478bd9Sstevel@tonic-gate 	k_anoninfo.ani_locked_swap += npages;
35297c478bd9Sstevel@tonic-gate 
35307c478bd9Sstevel@tonic-gate 	ASSERT(k_anoninfo.ani_mem_resv >= k_anoninfo.ani_locked_swap);
35317c478bd9Sstevel@tonic-gate 	ASSERT(k_anoninfo.ani_max >= k_anoninfo.ani_phys_resv);
35327c478bd9Sstevel@tonic-gate 
35337c478bd9Sstevel@tonic-gate 	mutex_exit(&anoninfo_lock);
35347c478bd9Sstevel@tonic-gate 
35357c478bd9Sstevel@tonic-gate 	return (0);
35367c478bd9Sstevel@tonic-gate }
35377c478bd9Sstevel@tonic-gate 
35387c478bd9Sstevel@tonic-gate /*
3539e0cb4e8dSOndrej Kubecka  * 'unlocked' reserved mem swap so when it is unreserved it
3540e0cb4e8dSOndrej Kubecka  * can be moved back phys (disk) swap
35417c478bd9Sstevel@tonic-gate  */
35427c478bd9Sstevel@tonic-gate void
anon_swap_restore(pgcnt_t npages)35437c478bd9Sstevel@tonic-gate anon_swap_restore(pgcnt_t npages)
35447c478bd9Sstevel@tonic-gate {
35457c478bd9Sstevel@tonic-gate 	mutex_enter(&anoninfo_lock);
35467c478bd9Sstevel@tonic-gate 
35477c478bd9Sstevel@tonic-gate 	ASSERT(k_anoninfo.ani_locked_swap <= k_anoninfo.ani_mem_resv);
35487c478bd9Sstevel@tonic-gate 
35497c478bd9Sstevel@tonic-gate 	ASSERT(k_anoninfo.ani_locked_swap >= npages);
35507c478bd9Sstevel@tonic-gate 	k_anoninfo.ani_locked_swap -= npages;
35517c478bd9Sstevel@tonic-gate 
35527c478bd9Sstevel@tonic-gate 	ASSERT(k_anoninfo.ani_locked_swap <= k_anoninfo.ani_mem_resv);
35537c478bd9Sstevel@tonic-gate 
35547c478bd9Sstevel@tonic-gate 	mutex_exit(&anoninfo_lock);
35557c478bd9Sstevel@tonic-gate }
35567c478bd9Sstevel@tonic-gate 
35577c478bd9Sstevel@tonic-gate /*
35587c478bd9Sstevel@tonic-gate  * Return the pointer from the list for a
35597c478bd9Sstevel@tonic-gate  * specified anon index.
35607c478bd9Sstevel@tonic-gate  */
35617c478bd9Sstevel@tonic-gate ulong_t *
anon_get_slot(struct anon_hdr * ahp,ulong_t an_idx)35627c478bd9Sstevel@tonic-gate anon_get_slot(struct anon_hdr *ahp, ulong_t an_idx)
35637c478bd9Sstevel@tonic-gate {
35647c478bd9Sstevel@tonic-gate 	struct anon	**app;
35657e897d1fSToomas Soome 	void		**ppp;
35667c478bd9Sstevel@tonic-gate 
35677c478bd9Sstevel@tonic-gate 	ASSERT(an_idx < ahp->size);
35687c478bd9Sstevel@tonic-gate 
35697c478bd9Sstevel@tonic-gate 	/*
35707c478bd9Sstevel@tonic-gate 	 * Single level case.
35717c478bd9Sstevel@tonic-gate 	 */
35727c478bd9Sstevel@tonic-gate 	if ((ahp->size <= ANON_CHUNK_SIZE) || (ahp->flags & ANON_ALLOC_FORCE)) {
35737c478bd9Sstevel@tonic-gate 		return ((ulong_t *)&ahp->array_chunk[an_idx]);
35747c478bd9Sstevel@tonic-gate 	} else {
35757c478bd9Sstevel@tonic-gate 
35767c478bd9Sstevel@tonic-gate 		/*
35777c478bd9Sstevel@tonic-gate 		 * 2 level case.
35787c478bd9Sstevel@tonic-gate 		 */
35797c478bd9Sstevel@tonic-gate 		ppp = &ahp->array_chunk[an_idx >> ANON_CHUNK_SHIFT];
35807c478bd9Sstevel@tonic-gate 		if (*ppp == NULL) {
35817c478bd9Sstevel@tonic-gate 			mutex_enter(&ahp->serial_lock);
35827c478bd9Sstevel@tonic-gate 			ppp = &ahp->array_chunk[an_idx >> ANON_CHUNK_SHIFT];
35837c478bd9Sstevel@tonic-gate 			if (*ppp == NULL)
35847c478bd9Sstevel@tonic-gate 				*ppp = kmem_zalloc(PAGESIZE, KM_SLEEP);
35857c478bd9Sstevel@tonic-gate 			mutex_exit(&ahp->serial_lock);
35867c478bd9Sstevel@tonic-gate 		}
35877c478bd9Sstevel@tonic-gate 		app = *ppp;
35887c478bd9Sstevel@tonic-gate 		return ((ulong_t *)&app[an_idx & ANON_CHUNK_OFF]);
35897c478bd9Sstevel@tonic-gate 	}
35907c478bd9Sstevel@tonic-gate }
35917c478bd9Sstevel@tonic-gate 
35927c478bd9Sstevel@tonic-gate void
anon_array_enter(struct anon_map * amp,ulong_t an_idx,anon_sync_obj_t * sobj)35937c478bd9Sstevel@tonic-gate anon_array_enter(struct anon_map *amp, ulong_t an_idx, anon_sync_obj_t *sobj)
35947c478bd9Sstevel@tonic-gate {
35957c478bd9Sstevel@tonic-gate 	ulong_t		*ap_slot;
35967c478bd9Sstevel@tonic-gate 	kmutex_t	*mtx;
35977c478bd9Sstevel@tonic-gate 	kcondvar_t	*cv;
35987c478bd9Sstevel@tonic-gate 	int		hash;
35997c478bd9Sstevel@tonic-gate 
36007c478bd9Sstevel@tonic-gate 	/*
36017c478bd9Sstevel@tonic-gate 	 * Use szc to determine anon slot(s) to appear atomic.
36027c478bd9Sstevel@tonic-gate 	 * If szc = 0, then lock the anon slot and mark it busy.
36037c478bd9Sstevel@tonic-gate 	 * If szc > 0, then lock the range of slots by getting the
36047c478bd9Sstevel@tonic-gate 	 * anon_array_lock for the first anon slot, and mark only the
36057c478bd9Sstevel@tonic-gate 	 * first anon slot busy to represent whole range being busy.
36067c478bd9Sstevel@tonic-gate 	 */
36077c478bd9Sstevel@tonic-gate 
36087c478bd9Sstevel@tonic-gate 	ASSERT(RW_READ_HELD(&amp->a_rwlock));
36097c478bd9Sstevel@tonic-gate 	an_idx = P2ALIGN(an_idx, page_get_pagecnt(amp->a_szc));
36107c478bd9Sstevel@tonic-gate 	hash = ANON_ARRAY_HASH(amp, an_idx);
36117c478bd9Sstevel@tonic-gate 	sobj->sync_mutex = mtx = &anon_array_lock[hash].pad_mutex;
36127c478bd9Sstevel@tonic-gate 	sobj->sync_cv = cv = &anon_array_cv[hash];
36137c478bd9Sstevel@tonic-gate 	mutex_enter(mtx);
36147c478bd9Sstevel@tonic-gate 	ap_slot = anon_get_slot(amp->ahp, an_idx);
36157c478bd9Sstevel@tonic-gate 	while (ANON_ISBUSY(ap_slot))
36167c478bd9Sstevel@tonic-gate 		cv_wait(cv, mtx);
36177c478bd9Sstevel@tonic-gate 	ANON_SETBUSY(ap_slot);
36187c478bd9Sstevel@tonic-gate 	sobj->sync_data = ap_slot;
36197c478bd9Sstevel@tonic-gate 	mutex_exit(mtx);
36207c478bd9Sstevel@tonic-gate }
36217c478bd9Sstevel@tonic-gate 
362287015465Scwb int
anon_array_try_enter(struct anon_map * amp,ulong_t an_idx,anon_sync_obj_t * sobj)362387015465Scwb anon_array_try_enter(struct anon_map *amp, ulong_t an_idx,
36247e897d1fSToomas Soome     anon_sync_obj_t *sobj)
362587015465Scwb {
362687015465Scwb 	ulong_t		*ap_slot;
362787015465Scwb 	kmutex_t	*mtx;
362887015465Scwb 	int		hash;
362987015465Scwb 
363087015465Scwb 	/*
363187015465Scwb 	 * Try to lock a range of anon slots.
363287015465Scwb 	 * Use szc to determine anon slot(s) to appear atomic.
363387015465Scwb 	 * If szc = 0, then lock the anon slot and mark it busy.
363487015465Scwb 	 * If szc > 0, then lock the range of slots by getting the
363587015465Scwb 	 * anon_array_lock for the first anon slot, and mark only the
363687015465Scwb 	 * first anon slot busy to represent whole range being busy.
363787015465Scwb 	 * Fail if the mutex or the anon_array are busy.
363887015465Scwb 	 */
363987015465Scwb 
364087015465Scwb 	ASSERT(RW_READ_HELD(&amp->a_rwlock));
364187015465Scwb 	an_idx = P2ALIGN(an_idx, page_get_pagecnt(amp->a_szc));
364287015465Scwb 	hash = ANON_ARRAY_HASH(amp, an_idx);
364387015465Scwb 	sobj->sync_mutex = mtx = &anon_array_lock[hash].pad_mutex;
3644f5164d87Scwb 	sobj->sync_cv = &anon_array_cv[hash];
364587015465Scwb 	if (!mutex_tryenter(mtx)) {
364687015465Scwb 		return (EWOULDBLOCK);
364787015465Scwb 	}
364887015465Scwb 	ap_slot = anon_get_slot(amp->ahp, an_idx);
364987015465Scwb 	if (ANON_ISBUSY(ap_slot)) {
365087015465Scwb 		mutex_exit(mtx);
365187015465Scwb 		return (EWOULDBLOCK);
365287015465Scwb 	}
365387015465Scwb 	ANON_SETBUSY(ap_slot);
365487015465Scwb 	sobj->sync_data = ap_slot;
365587015465Scwb 	mutex_exit(mtx);
365687015465Scwb 	return (0);
365787015465Scwb }
365887015465Scwb 
36597c478bd9Sstevel@tonic-gate void
anon_array_exit(anon_sync_obj_t * sobj)36607c478bd9Sstevel@tonic-gate anon_array_exit(anon_sync_obj_t *sobj)
36617c478bd9Sstevel@tonic-gate {
36627c478bd9Sstevel@tonic-gate 	mutex_enter(sobj->sync_mutex);
36637c478bd9Sstevel@tonic-gate 	ASSERT(ANON_ISBUSY(sobj->sync_data));
36647c478bd9Sstevel@tonic-gate 	ANON_CLRBUSY(sobj->sync_data);
36657c478bd9Sstevel@tonic-gate 	if (CV_HAS_WAITERS(sobj->sync_cv))
36667c478bd9Sstevel@tonic-gate 		cv_broadcast(sobj->sync_cv);
36677c478bd9Sstevel@tonic-gate 	mutex_exit(sobj->sync_mutex);
36687c478bd9Sstevel@tonic-gate }
3669