xref: /illumos-gate/usr/src/uts/common/os/mem_config.c (revision 338664df)
17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate  * CDDL HEADER START
37c478bd9Sstevel@tonic-gate  *
47c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5ee88d2b9Skchow  * Common Development and Distribution License (the "License").
6ee88d2b9Skchow  * You may not use this file except in compliance with the License.
77c478bd9Sstevel@tonic-gate  *
87c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
107c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
117c478bd9Sstevel@tonic-gate  * and limitations under the License.
127c478bd9Sstevel@tonic-gate  *
137c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
147c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
167c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
177c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bd9Sstevel@tonic-gate  *
197c478bd9Sstevel@tonic-gate  * CDDL HEADER END
207c478bd9Sstevel@tonic-gate  */
217c478bd9Sstevel@tonic-gate /*
2256f33205SJonathan Adams  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
237c478bd9Sstevel@tonic-gate  * Use is subject to license terms.
24*338664dfSAndy Fiddaman  * Copyright 2017 Joyent, Inc.
257c478bd9Sstevel@tonic-gate  */
267c478bd9Sstevel@tonic-gate 
277c478bd9Sstevel@tonic-gate #include <sys/types.h>
287c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h>
297c478bd9Sstevel@tonic-gate #include <sys/vmem.h>
307c478bd9Sstevel@tonic-gate #include <sys/kmem.h>
317c478bd9Sstevel@tonic-gate #include <sys/systm.h>
327c478bd9Sstevel@tonic-gate #include <sys/machsystm.h>	/* for page_freelist_coalesce() */
337c478bd9Sstevel@tonic-gate #include <sys/errno.h>
347c478bd9Sstevel@tonic-gate #include <sys/memnode.h>
357c478bd9Sstevel@tonic-gate #include <sys/memlist.h>
367c478bd9Sstevel@tonic-gate #include <sys/memlist_impl.h>
377c478bd9Sstevel@tonic-gate #include <sys/tuneable.h>
387c478bd9Sstevel@tonic-gate #include <sys/proc.h>
397c478bd9Sstevel@tonic-gate #include <sys/disp.h>
407c478bd9Sstevel@tonic-gate #include <sys/debug.h>
417c478bd9Sstevel@tonic-gate #include <sys/vm.h>
427c478bd9Sstevel@tonic-gate #include <sys/callb.h>
437c478bd9Sstevel@tonic-gate #include <sys/memlist_plat.h>	/* for installed_top_size() */
447c478bd9Sstevel@tonic-gate #include <sys/condvar_impl.h>	/* for CV_HAS_WAITERS() */
457c478bd9Sstevel@tonic-gate #include <sys/dumphdr.h>	/* for dump_resize() */
467c478bd9Sstevel@tonic-gate #include <sys/atomic.h>		/* for use in stats collection */
477c478bd9Sstevel@tonic-gate #include <sys/rwlock.h>
487c478bd9Sstevel@tonic-gate #include <sys/cpuvar.h>
497c478bd9Sstevel@tonic-gate #include <vm/seg_kmem.h>
507c478bd9Sstevel@tonic-gate #include <vm/seg_kpm.h>
517c478bd9Sstevel@tonic-gate #include <vm/page.h>
52e21bae1bSkchow #include <vm/vm_dep.h>
537c478bd9Sstevel@tonic-gate #define	SUNDDI_IMPL		/* so sunddi.h will not redefine splx() et al */
547c478bd9Sstevel@tonic-gate #include <sys/sunddi.h>
557c478bd9Sstevel@tonic-gate #include <sys/mem_config.h>
567c478bd9Sstevel@tonic-gate #include <sys/mem_cage.h>
577c478bd9Sstevel@tonic-gate #include <sys/lgrp.h>
587c478bd9Sstevel@tonic-gate #include <sys/ddi.h>
597c478bd9Sstevel@tonic-gate #include <sys/modctl.h>
607c478bd9Sstevel@tonic-gate 
617c478bd9Sstevel@tonic-gate extern struct memlist *phys_avail;
627c478bd9Sstevel@tonic-gate 
637c478bd9Sstevel@tonic-gate extern uint_t page_ctrs_adjust(int);
64af4c679fSSean McEnroe void page_ctrs_cleanup(void);
657c478bd9Sstevel@tonic-gate static void kphysm_setup_post_add(pgcnt_t);
667c478bd9Sstevel@tonic-gate static int kphysm_setup_pre_del(pgcnt_t);
677c478bd9Sstevel@tonic-gate static void kphysm_setup_post_del(pgcnt_t, int);
687c478bd9Sstevel@tonic-gate 
697c478bd9Sstevel@tonic-gate static int kphysm_split_memseg(pfn_t base, pgcnt_t npgs);
707c478bd9Sstevel@tonic-gate 
717c478bd9Sstevel@tonic-gate static int delspan_reserve(pfn_t, pgcnt_t);
727c478bd9Sstevel@tonic-gate static void delspan_unreserve(pfn_t, pgcnt_t);
737c478bd9Sstevel@tonic-gate 
749853d9e8SJason Beloro kmutex_t memseg_lists_lock;
759853d9e8SJason Beloro struct memseg *memseg_va_avail;
769853d9e8SJason Beloro struct memseg *memseg_alloc(void);
777c478bd9Sstevel@tonic-gate static struct memseg *memseg_delete_junk;
787c478bd9Sstevel@tonic-gate static struct memseg *memseg_edit_junk;
797c478bd9Sstevel@tonic-gate void memseg_remap_init(void);
809853d9e8SJason Beloro static void memseg_remap_to_dummy(struct memseg *);
817c478bd9Sstevel@tonic-gate static void kphysm_addmem_error_undospan(pfn_t, pgcnt_t);
827c478bd9Sstevel@tonic-gate static struct memseg *memseg_reuse(pgcnt_t);
837c478bd9Sstevel@tonic-gate 
847c478bd9Sstevel@tonic-gate static struct kmem_cache *memseg_cache;
857c478bd9Sstevel@tonic-gate 
867c478bd9Sstevel@tonic-gate /*
879853d9e8SJason Beloro  * Interfaces to manage externally allocated
889853d9e8SJason Beloro  * page_t memory (metadata) for a memseg.
899853d9e8SJason Beloro  */
909853d9e8SJason Beloro #pragma weak	memseg_alloc_meta
919853d9e8SJason Beloro #pragma weak	memseg_free_meta
929853d9e8SJason Beloro #pragma weak	memseg_get_metapfn
939853d9e8SJason Beloro #pragma weak	memseg_remap_meta
949853d9e8SJason Beloro 
959853d9e8SJason Beloro extern int ppvm_enable;
969853d9e8SJason Beloro extern page_t *ppvm_base;
979853d9e8SJason Beloro extern int memseg_alloc_meta(pfn_t, pgcnt_t, void **, pgcnt_t *);
989853d9e8SJason Beloro extern void memseg_free_meta(void *, pgcnt_t);
999853d9e8SJason Beloro extern pfn_t memseg_get_metapfn(void *, pgcnt_t);
1009853d9e8SJason Beloro extern void memseg_remap_meta(struct memseg *);
1019853d9e8SJason Beloro static int memseg_is_dynamic(struct memseg *);
1029853d9e8SJason Beloro static int memseg_includes_meta(struct memseg *);
103af4c679fSSean McEnroe pfn_t memseg_get_start(struct memseg *);
1049853d9e8SJason Beloro static void memseg_cpu_vm_flush(void);
1059853d9e8SJason Beloro 
1069853d9e8SJason Beloro int meta_alloc_enable;
1079853d9e8SJason Beloro 
108a3114836SGerry Liu #ifdef	DEBUG
109a3114836SGerry Liu static int memseg_debug;
110a3114836SGerry Liu #define	MEMSEG_DEBUG(args...) if (memseg_debug) printf(args)
111a3114836SGerry Liu #else
112a3114836SGerry Liu #define	MEMSEG_DEBUG(...)
113a3114836SGerry Liu #endif
114a3114836SGerry Liu 
1159853d9e8SJason Beloro /*
1169853d9e8SJason Beloro  * Add a chunk of memory to the system.
1177c478bd9Sstevel@tonic-gate  * base: starting PAGESIZE page of new memory.
1187c478bd9Sstevel@tonic-gate  * npgs: length in PAGESIZE pages.
1197c478bd9Sstevel@tonic-gate  *
1207c478bd9Sstevel@tonic-gate  * Adding mem this way doesn't increase the size of the hash tables;
1217c478bd9Sstevel@tonic-gate  * growing them would be too hard.  This should be OK, but adding memory
1227c478bd9Sstevel@tonic-gate  * dynamically most likely means more hash misses, since the tables will
1237c478bd9Sstevel@tonic-gate  * be smaller than they otherwise would be.
1247c478bd9Sstevel@tonic-gate  */
1257c478bd9Sstevel@tonic-gate int
kphysm_add_memory_dynamic(pfn_t base,pgcnt_t npgs)1267c478bd9Sstevel@tonic-gate kphysm_add_memory_dynamic(pfn_t base, pgcnt_t npgs)
1277c478bd9Sstevel@tonic-gate {
1289853d9e8SJason Beloro 	page_t *pp;
1299853d9e8SJason Beloro 	page_t		*opp, *oepp, *segpp;
1307c478bd9Sstevel@tonic-gate 	struct memseg	*seg;
1317c478bd9Sstevel@tonic-gate 	uint64_t	avmem;
1327c478bd9Sstevel@tonic-gate 	pfn_t		pfn;
1337c478bd9Sstevel@tonic-gate 	pfn_t		pt_base = base;
1347c478bd9Sstevel@tonic-gate 	pgcnt_t		tpgs = npgs;
1359853d9e8SJason Beloro 	pgcnt_t		metapgs = 0;
1367c478bd9Sstevel@tonic-gate 	int		exhausted;
1377c478bd9Sstevel@tonic-gate 	pfn_t		pnum;
1387c478bd9Sstevel@tonic-gate 	int		mnode;
1397c478bd9Sstevel@tonic-gate 	caddr_t		vaddr;
1407c478bd9Sstevel@tonic-gate 	int		reuse;
1417c478bd9Sstevel@tonic-gate 	int		mlret;
1429853d9e8SJason Beloro 	int		rv;
1439853d9e8SJason Beloro 	int		flags;
1449853d9e8SJason Beloro 	int		meta_alloc = 0;
1457c478bd9Sstevel@tonic-gate 	void		*mapva;
1469853d9e8SJason Beloro 	void		*metabase = (void *)base;
1477c478bd9Sstevel@tonic-gate 	pgcnt_t		nkpmpgs = 0;
148584b574aSToomas Soome 	offset_t	kpm_pages_off = 0;
1497c478bd9Sstevel@tonic-gate 
1507c478bd9Sstevel@tonic-gate 	cmn_err(CE_CONT,
1517c478bd9Sstevel@tonic-gate 	    "?kphysm_add_memory_dynamic: adding %ldK at 0x%" PRIx64 "\n",
1527c478bd9Sstevel@tonic-gate 	    npgs << (PAGESHIFT - 10), (uint64_t)base << PAGESHIFT);
1537c478bd9Sstevel@tonic-gate 
1547c478bd9Sstevel@tonic-gate 	/*
1557c478bd9Sstevel@tonic-gate 	 * Add this span in the delete list to prevent interactions.
1567c478bd9Sstevel@tonic-gate 	 */
1577c478bd9Sstevel@tonic-gate 	if (!delspan_reserve(base, npgs)) {
1587c478bd9Sstevel@tonic-gate 		return (KPHYSM_ESPAN);
1597c478bd9Sstevel@tonic-gate 	}
1607c478bd9Sstevel@tonic-gate 	/*
1617c478bd9Sstevel@tonic-gate 	 * Check to see if any of the memory span has been added
1627c478bd9Sstevel@tonic-gate 	 * by trying an add to the installed memory list. This
1637c478bd9Sstevel@tonic-gate 	 * forms the interlocking process for add.
1647c478bd9Sstevel@tonic-gate 	 */
1657c478bd9Sstevel@tonic-gate 
1667c478bd9Sstevel@tonic-gate 	memlist_write_lock();
1677c478bd9Sstevel@tonic-gate 
1687c478bd9Sstevel@tonic-gate 	mlret = memlist_add_span((uint64_t)(pt_base) << PAGESHIFT,
1697c478bd9Sstevel@tonic-gate 	    (uint64_t)(tpgs) << PAGESHIFT, &phys_install);
1707c478bd9Sstevel@tonic-gate 
1717c478bd9Sstevel@tonic-gate 	if (mlret == MEML_SPANOP_OK)
1727c478bd9Sstevel@tonic-gate 		installed_top_size(phys_install, &physmax, &physinstalled);
1737c478bd9Sstevel@tonic-gate 
1747c478bd9Sstevel@tonic-gate 	memlist_write_unlock();
1757c478bd9Sstevel@tonic-gate 
1767c478bd9Sstevel@tonic-gate 	if (mlret != MEML_SPANOP_OK) {
1777c478bd9Sstevel@tonic-gate 		if (mlret == MEML_SPANOP_EALLOC) {
1787c478bd9Sstevel@tonic-gate 			delspan_unreserve(pt_base, tpgs);
1797c478bd9Sstevel@tonic-gate 			return (KPHYSM_ERESOURCE);
1809853d9e8SJason Beloro 		} else if (mlret == MEML_SPANOP_ESPAN) {
1817c478bd9Sstevel@tonic-gate 			delspan_unreserve(pt_base, tpgs);
1827c478bd9Sstevel@tonic-gate 			return (KPHYSM_ESPAN);
1837c478bd9Sstevel@tonic-gate 		} else {
1847c478bd9Sstevel@tonic-gate 			delspan_unreserve(pt_base, tpgs);
1857c478bd9Sstevel@tonic-gate 			return (KPHYSM_ERESOURCE);
1867c478bd9Sstevel@tonic-gate 		}
1877c478bd9Sstevel@tonic-gate 	}
1887c478bd9Sstevel@tonic-gate 
1899853d9e8SJason Beloro 	if (meta_alloc_enable) {
1909853d9e8SJason Beloro 		/*
1919853d9e8SJason Beloro 		 * Allocate the page_t's from existing memory;
1929853d9e8SJason Beloro 		 * if that fails, allocate from the incoming memory.
1939853d9e8SJason Beloro 		 */
1949853d9e8SJason Beloro 		rv = memseg_alloc_meta(base, npgs, &metabase, &metapgs);
1959853d9e8SJason Beloro 		if (rv == KPHYSM_OK) {
1969853d9e8SJason Beloro 			ASSERT(metapgs);
1979853d9e8SJason Beloro 			ASSERT(btopr(npgs * sizeof (page_t)) <= metapgs);
1989853d9e8SJason Beloro 			meta_alloc = 1;
1999853d9e8SJason Beloro 			goto mapalloc;
2009853d9e8SJason Beloro 		}
2019853d9e8SJason Beloro 	}
2029853d9e8SJason Beloro 
2037c478bd9Sstevel@tonic-gate 	/*
2047c478bd9Sstevel@tonic-gate 	 * We store the page_t's for this new memory in the first
2057c478bd9Sstevel@tonic-gate 	 * few pages of the chunk. Here, we go and get'em ...
2067c478bd9Sstevel@tonic-gate 	 */
2077c478bd9Sstevel@tonic-gate 
2087c478bd9Sstevel@tonic-gate 	/*
2097c478bd9Sstevel@tonic-gate 	 * The expression after the '-' gives the number of pages
2107c478bd9Sstevel@tonic-gate 	 * that will fit in the new memory based on a requirement
2117c478bd9Sstevel@tonic-gate 	 * of (PAGESIZE + sizeof (page_t)) bytes per page.
2127c478bd9Sstevel@tonic-gate 	 */
2137c478bd9Sstevel@tonic-gate 	metapgs = npgs - (((uint64_t)(npgs) << PAGESHIFT) /
2147c478bd9Sstevel@tonic-gate 	    (PAGESIZE + sizeof (page_t)));
2157c478bd9Sstevel@tonic-gate 
2167c478bd9Sstevel@tonic-gate 	npgs -= metapgs;
2177c478bd9Sstevel@tonic-gate 	base += metapgs;
2187c478bd9Sstevel@tonic-gate 
2197c478bd9Sstevel@tonic-gate 	ASSERT(btopr(npgs * sizeof (page_t)) <= metapgs);
2207c478bd9Sstevel@tonic-gate 
2217c478bd9Sstevel@tonic-gate 	exhausted = (metapgs == 0 || npgs == 0);
2227c478bd9Sstevel@tonic-gate 
2237c478bd9Sstevel@tonic-gate 	if (kpm_enable && !exhausted) {
2247c478bd9Sstevel@tonic-gate 		pgcnt_t start, end, nkpmpgs_prelim;
2257c478bd9Sstevel@tonic-gate 		size_t	ptsz;
2267c478bd9Sstevel@tonic-gate 
2277c478bd9Sstevel@tonic-gate 		/*
2287c478bd9Sstevel@tonic-gate 		 * A viable kpm large page mapping must not overlap two
2297c478bd9Sstevel@tonic-gate 		 * dynamic memsegs. Therefore the total size is checked
2307c478bd9Sstevel@tonic-gate 		 * to be at least kpm_pgsz and also whether start and end
2317c478bd9Sstevel@tonic-gate 		 * points are at least kpm_pgsz aligned.
2327c478bd9Sstevel@tonic-gate 		 */
2337c478bd9Sstevel@tonic-gate 		if (ptokpmp(tpgs) < 1 || pmodkpmp(pt_base) ||
2347c478bd9Sstevel@tonic-gate 		    pmodkpmp(base + npgs)) {
2357c478bd9Sstevel@tonic-gate 
2367c478bd9Sstevel@tonic-gate 			kphysm_addmem_error_undospan(pt_base, tpgs);
2377c478bd9Sstevel@tonic-gate 
2387c478bd9Sstevel@tonic-gate 			/*
2397c478bd9Sstevel@tonic-gate 			 * There is no specific error code for violating
2407c478bd9Sstevel@tonic-gate 			 * kpm granularity constraints.
2417c478bd9Sstevel@tonic-gate 			 */
2427c478bd9Sstevel@tonic-gate 			return (KPHYSM_ENOTVIABLE);
2437c478bd9Sstevel@tonic-gate 		}
2447c478bd9Sstevel@tonic-gate 
2457c478bd9Sstevel@tonic-gate 		start = kpmptop(ptokpmp(base));
2467c478bd9Sstevel@tonic-gate 		end = kpmptop(ptokpmp(base + npgs));
2477c478bd9Sstevel@tonic-gate 		nkpmpgs_prelim = ptokpmp(end - start);
2487c478bd9Sstevel@tonic-gate 		ptsz = npgs * sizeof (page_t);
2497c478bd9Sstevel@tonic-gate 		metapgs = btopr(ptsz + nkpmpgs_prelim * KPMPAGE_T_SZ);
2507c478bd9Sstevel@tonic-gate 		exhausted = (tpgs <= metapgs);
2517c478bd9Sstevel@tonic-gate 		if (!exhausted) {
2527c478bd9Sstevel@tonic-gate 			npgs = tpgs - metapgs;
2537c478bd9Sstevel@tonic-gate 			base = pt_base + metapgs;
2547c478bd9Sstevel@tonic-gate 
2557c478bd9Sstevel@tonic-gate 			/* final nkpmpgs */
2567c478bd9Sstevel@tonic-gate 			start = kpmptop(ptokpmp(base));
2577c478bd9Sstevel@tonic-gate 			nkpmpgs = ptokpmp(end - start);
2587c478bd9Sstevel@tonic-gate 			kpm_pages_off = ptsz +
25973347c69Smb 			    (nkpmpgs_prelim - nkpmpgs) * KPMPAGE_T_SZ;
2607c478bd9Sstevel@tonic-gate 		}
2617c478bd9Sstevel@tonic-gate 	}
2627c478bd9Sstevel@tonic-gate 
2637c478bd9Sstevel@tonic-gate 	/*
2647c478bd9Sstevel@tonic-gate 	 * Is memory area supplied too small?
2657c478bd9Sstevel@tonic-gate 	 */
2667c478bd9Sstevel@tonic-gate 	if (exhausted) {
2677c478bd9Sstevel@tonic-gate 		kphysm_addmem_error_undospan(pt_base, tpgs);
2687c478bd9Sstevel@tonic-gate 		/*
2697c478bd9Sstevel@tonic-gate 		 * There is no specific error code for 'too small'.
2707c478bd9Sstevel@tonic-gate 		 */
2717c478bd9Sstevel@tonic-gate 		return (KPHYSM_ERESOURCE);
2727c478bd9Sstevel@tonic-gate 	}
2737c478bd9Sstevel@tonic-gate 
2749853d9e8SJason Beloro mapalloc:
2757c478bd9Sstevel@tonic-gate 	/*
2767c478bd9Sstevel@tonic-gate 	 * We may re-use a previously allocated VA space for the page_ts
2777c478bd9Sstevel@tonic-gate 	 * eventually, but we need to initialize and lock the pages first.
2787c478bd9Sstevel@tonic-gate 	 */
2797c478bd9Sstevel@tonic-gate 
2807c478bd9Sstevel@tonic-gate 	/*
2817c478bd9Sstevel@tonic-gate 	 * Get an address in the kernel address map, map
2827c478bd9Sstevel@tonic-gate 	 * the page_t pages and see if we can touch them.
2837c478bd9Sstevel@tonic-gate 	 */
2847c478bd9Sstevel@tonic-gate 
2857c478bd9Sstevel@tonic-gate 	mapva = vmem_alloc(heap_arena, ptob(metapgs), VM_NOSLEEP);
2867c478bd9Sstevel@tonic-gate 	if (mapva == NULL) {
2877c478bd9Sstevel@tonic-gate 		cmn_err(CE_WARN, "kphysm_add_memory_dynamic:"
2887c478bd9Sstevel@tonic-gate 		    " Can't allocate VA for page_ts");
2897c478bd9Sstevel@tonic-gate 
2909853d9e8SJason Beloro 		if (meta_alloc)
2919853d9e8SJason Beloro 			memseg_free_meta(metabase, metapgs);
2927c478bd9Sstevel@tonic-gate 		kphysm_addmem_error_undospan(pt_base, tpgs);
2937c478bd9Sstevel@tonic-gate 
2947c478bd9Sstevel@tonic-gate 		return (KPHYSM_ERESOURCE);
2957c478bd9Sstevel@tonic-gate 	}
2967c478bd9Sstevel@tonic-gate 	pp = mapva;
2977c478bd9Sstevel@tonic-gate 
2987c478bd9Sstevel@tonic-gate 	if (physmax < (pt_base + tpgs))
2997c478bd9Sstevel@tonic-gate 		physmax = (pt_base + tpgs);
3007c478bd9Sstevel@tonic-gate 
3017c478bd9Sstevel@tonic-gate 	/*
3027c478bd9Sstevel@tonic-gate 	 * In the remapping code we map one page at a time so we must do
3037c478bd9Sstevel@tonic-gate 	 * the same here to match mapping sizes.
3047c478bd9Sstevel@tonic-gate 	 */
3057c478bd9Sstevel@tonic-gate 	pfn = pt_base;
3067c478bd9Sstevel@tonic-gate 	vaddr = (caddr_t)pp;
3077c478bd9Sstevel@tonic-gate 	for (pnum = 0; pnum < metapgs; pnum++) {
3089853d9e8SJason Beloro 		if (meta_alloc)
3099853d9e8SJason Beloro 			pfn = memseg_get_metapfn(metabase, (pgcnt_t)pnum);
3107c478bd9Sstevel@tonic-gate 		hat_devload(kas.a_hat, vaddr, ptob(1), pfn,
3117c478bd9Sstevel@tonic-gate 		    PROT_READ | PROT_WRITE,
3127c478bd9Sstevel@tonic-gate 		    HAT_LOAD | HAT_LOAD_LOCK | HAT_LOAD_NOCONSIST);
3137c478bd9Sstevel@tonic-gate 		pfn++;
3147c478bd9Sstevel@tonic-gate 		vaddr += ptob(1);
3157c478bd9Sstevel@tonic-gate 	}
3167c478bd9Sstevel@tonic-gate 
3177c478bd9Sstevel@tonic-gate 	if (ddi_peek32((dev_info_t *)NULL,
3187c478bd9Sstevel@tonic-gate 	    (int32_t *)pp, (int32_t *)0) == DDI_FAILURE) {
3197c478bd9Sstevel@tonic-gate 
32028e72544SJakub Jirsa 		cmn_err(CE_WARN, "kphysm_add_memory_dynamic:"
3217c478bd9Sstevel@tonic-gate 		    " Can't access pp array at 0x%p [phys 0x%lx]",
3227c478bd9Sstevel@tonic-gate 		    (void *)pp, pt_base);
3237c478bd9Sstevel@tonic-gate 
3247c478bd9Sstevel@tonic-gate 		hat_unload(kas.a_hat, (caddr_t)pp, ptob(metapgs),
3257c478bd9Sstevel@tonic-gate 		    HAT_UNLOAD_UNMAP|HAT_UNLOAD_UNLOCK);
3267c478bd9Sstevel@tonic-gate 
3277c478bd9Sstevel@tonic-gate 		vmem_free(heap_arena, mapva, ptob(metapgs));
3289853d9e8SJason Beloro 		if (meta_alloc)
3299853d9e8SJason Beloro 			memseg_free_meta(metabase, metapgs);
3307c478bd9Sstevel@tonic-gate 		kphysm_addmem_error_undospan(pt_base, tpgs);
3317c478bd9Sstevel@tonic-gate 
3327c478bd9Sstevel@tonic-gate 		return (KPHYSM_EFAULT);
3337c478bd9Sstevel@tonic-gate 	}
3347c478bd9Sstevel@tonic-gate 
3357c478bd9Sstevel@tonic-gate 	/*
3367c478bd9Sstevel@tonic-gate 	 * Add this memory slice to its memory node translation.
3377c478bd9Sstevel@tonic-gate 	 *
3387c478bd9Sstevel@tonic-gate 	 * Note that right now, each node may have only one slice;
3397c478bd9Sstevel@tonic-gate 	 * this may change with COD or in larger SSM systems with
3407c478bd9Sstevel@tonic-gate 	 * nested latency groups, so we must not assume that the
3417c478bd9Sstevel@tonic-gate 	 * node does not yet exist.
342a3114836SGerry Liu 	 *
343a3114836SGerry Liu 	 * Note that there may be multiple memory nodes associated with
344a3114836SGerry Liu 	 * a single lgrp node on x86 systems.
3457c478bd9Sstevel@tonic-gate 	 */
34620c26ed3SChristopher Baumbauer - Sun Microsystems - San Diego United States 	pnum = pt_base + tpgs - 1;
3479853d9e8SJason Beloro 	mem_node_add_range(pt_base, pnum);
3487c478bd9Sstevel@tonic-gate 
3497c478bd9Sstevel@tonic-gate 	/*
350da6c28aaSamw 	 * Allocate or resize page counters as necessary to accommodate
3517c478bd9Sstevel@tonic-gate 	 * the increase in memory pages.
3527c478bd9Sstevel@tonic-gate 	 */
3537c478bd9Sstevel@tonic-gate 	mnode = PFN_2_MEM_NODE(pnum);
3549853d9e8SJason Beloro 	PAGE_CTRS_ADJUST(base, npgs, rv);
3559853d9e8SJason Beloro 	if (rv) {
3567c478bd9Sstevel@tonic-gate 
3579853d9e8SJason Beloro 		mem_node_del_range(pt_base, pnum);
3587c478bd9Sstevel@tonic-gate 
359af4c679fSSean McEnroe 		/* cleanup the  page counters */
360af4c679fSSean McEnroe 		page_ctrs_cleanup();
361af4c679fSSean McEnroe 
3627c478bd9Sstevel@tonic-gate 		hat_unload(kas.a_hat, (caddr_t)pp, ptob(metapgs),
3637c478bd9Sstevel@tonic-gate 		    HAT_UNLOAD_UNMAP|HAT_UNLOAD_UNLOCK);
3647c478bd9Sstevel@tonic-gate 
3657c478bd9Sstevel@tonic-gate 		vmem_free(heap_arena, mapva, ptob(metapgs));
3669853d9e8SJason Beloro 		if (meta_alloc)
3679853d9e8SJason Beloro 			memseg_free_meta(metabase, metapgs);
3687c478bd9Sstevel@tonic-gate 		kphysm_addmem_error_undospan(pt_base, tpgs);
3697c478bd9Sstevel@tonic-gate 
3707c478bd9Sstevel@tonic-gate 		return (KPHYSM_ERESOURCE);
3717c478bd9Sstevel@tonic-gate 	}
3727c478bd9Sstevel@tonic-gate 
3737c478bd9Sstevel@tonic-gate 	/*
3747c478bd9Sstevel@tonic-gate 	 * Update the phys_avail memory list.
3757c478bd9Sstevel@tonic-gate 	 * The phys_install list was done at the start.
3767c478bd9Sstevel@tonic-gate 	 */
3777c478bd9Sstevel@tonic-gate 
3787c478bd9Sstevel@tonic-gate 	memlist_write_lock();
3797c478bd9Sstevel@tonic-gate 
3807c478bd9Sstevel@tonic-gate 	mlret = memlist_add_span((uint64_t)(base) << PAGESHIFT,
3817c478bd9Sstevel@tonic-gate 	    (uint64_t)(npgs) << PAGESHIFT, &phys_avail);
3827c478bd9Sstevel@tonic-gate 	ASSERT(mlret == MEML_SPANOP_OK);
3837c478bd9Sstevel@tonic-gate 
3847c478bd9Sstevel@tonic-gate 	memlist_write_unlock();
3857c478bd9Sstevel@tonic-gate 
3867c478bd9Sstevel@tonic-gate 	/* See if we can find a memseg to re-use. */
3879853d9e8SJason Beloro 	if (meta_alloc) {
3889853d9e8SJason Beloro 		seg = memseg_reuse(0);
3899853d9e8SJason Beloro 		reuse = 1;	/* force unmapping of temp mapva */
3909853d9e8SJason Beloro 		flags = MEMSEG_DYNAMIC | MEMSEG_META_ALLOC;
3919853d9e8SJason Beloro 		/*
3929853d9e8SJason Beloro 		 * There is a 1:1 fixed relationship between a pfn
3939853d9e8SJason Beloro 		 * and a page_t VA.  The pfn is used as an index into
3949853d9e8SJason Beloro 		 * the ppvm_base page_t table in order to calculate
3959853d9e8SJason Beloro 		 * the page_t base address for a given pfn range.
3969853d9e8SJason Beloro 		 */
3979853d9e8SJason Beloro 		segpp = ppvm_base + base;
3989853d9e8SJason Beloro 	} else {
3999853d9e8SJason Beloro 		seg = memseg_reuse(metapgs);
4009853d9e8SJason Beloro 		reuse = (seg != NULL);
4019853d9e8SJason Beloro 		flags = MEMSEG_DYNAMIC | MEMSEG_META_INCL;
4029853d9e8SJason Beloro 		segpp = pp;
4039853d9e8SJason Beloro 	}
4047c478bd9Sstevel@tonic-gate 
4057c478bd9Sstevel@tonic-gate 	/*
4067c478bd9Sstevel@tonic-gate 	 * Initialize the memseg structure representing this memory
4077c478bd9Sstevel@tonic-gate 	 * and add it to the existing list of memsegs. Do some basic
4087c478bd9Sstevel@tonic-gate 	 * initialization and add the memory to the system.
4097c478bd9Sstevel@tonic-gate 	 * In order to prevent lock deadlocks, the add_physmem()
4107c478bd9Sstevel@tonic-gate 	 * code is repeated here, but split into several stages.
4119853d9e8SJason Beloro 	 *
4129853d9e8SJason Beloro 	 * If a memseg is reused, invalidate memseg pointers in
4139853d9e8SJason Beloro 	 * all cpu vm caches.  We need to do this this since the check
414584b574aSToomas Soome 	 *	pp >= seg->pages && pp < seg->epages
4159853d9e8SJason Beloro 	 * used in various places is not atomic and so the first compare
4169853d9e8SJason Beloro 	 * can happen before reuse and the second compare after reuse.
4179853d9e8SJason Beloro 	 * The invalidation ensures that a memseg is not deferenced while
4189853d9e8SJason Beloro 	 * it's page/pfn pointers are changing.
4197c478bd9Sstevel@tonic-gate 	 */
4207c478bd9Sstevel@tonic-gate 	if (seg == NULL) {
4219853d9e8SJason Beloro 		seg = memseg_alloc();
4229853d9e8SJason Beloro 		ASSERT(seg != NULL);
4239853d9e8SJason Beloro 		seg->msegflags = flags;
4249853d9e8SJason Beloro 		MEMSEG_DEBUG("memseg_get: alloc seg=0x%p, pages=0x%p",
4259853d9e8SJason Beloro 		    (void *)seg, (void *)(seg->pages));
4269853d9e8SJason Beloro 		seg->pages = segpp;
4277c478bd9Sstevel@tonic-gate 	} else {
4289853d9e8SJason Beloro 		ASSERT(seg->msegflags == flags);
4299853d9e8SJason Beloro 		ASSERT(seg->pages_base == seg->pages_end);
4309853d9e8SJason Beloro 		MEMSEG_DEBUG("memseg_get: reuse seg=0x%p, pages=0x%p",
4319853d9e8SJason Beloro 		    (void *)seg, (void *)(seg->pages));
4329853d9e8SJason Beloro 		if (meta_alloc) {
4339853d9e8SJason Beloro 			memseg_cpu_vm_flush();
4349853d9e8SJason Beloro 			seg->pages = segpp;
4359853d9e8SJason Beloro 		}
4367c478bd9Sstevel@tonic-gate 	}
4377c478bd9Sstevel@tonic-gate 
4387c478bd9Sstevel@tonic-gate 	seg->epages = seg->pages + npgs;
4397c478bd9Sstevel@tonic-gate 	seg->pages_base = base;
4407c478bd9Sstevel@tonic-gate 	seg->pages_end = base + npgs;
4417c478bd9Sstevel@tonic-gate 
4427c478bd9Sstevel@tonic-gate 	/*
4437c478bd9Sstevel@tonic-gate 	 * Initialize metadata. The page_ts are set to locked state
4447c478bd9Sstevel@tonic-gate 	 * ready to be freed.
4457c478bd9Sstevel@tonic-gate 	 */
4467c478bd9Sstevel@tonic-gate 	bzero((caddr_t)pp, ptob(metapgs));
4477c478bd9Sstevel@tonic-gate 
4487c478bd9Sstevel@tonic-gate 	pfn = seg->pages_base;
4497c478bd9Sstevel@tonic-gate 	/* Save the original pp base in case we reuse a memseg. */
4507c478bd9Sstevel@tonic-gate 	opp = pp;
4517c478bd9Sstevel@tonic-gate 	oepp = opp + npgs;
4527c478bd9Sstevel@tonic-gate 	for (pp = opp; pp < oepp; pp++) {
4537c478bd9Sstevel@tonic-gate 		pp->p_pagenum = pfn;
4547c478bd9Sstevel@tonic-gate 		pfn++;
4557c478bd9Sstevel@tonic-gate 		page_iolock_init(pp);
4567c478bd9Sstevel@tonic-gate 		while (!page_lock(pp, SE_EXCL, (kmutex_t *)NULL, P_RECLAIM))
4577c478bd9Sstevel@tonic-gate 			continue;
4587c478bd9Sstevel@tonic-gate 		pp->p_offset = (u_offset_t)-1;
4597c478bd9Sstevel@tonic-gate 	}
4607c478bd9Sstevel@tonic-gate 
4617c478bd9Sstevel@tonic-gate 	if (reuse) {
4627c478bd9Sstevel@tonic-gate 		/* Remap our page_ts to the re-used memseg VA space. */
4637c478bd9Sstevel@tonic-gate 		pfn = pt_base;
4647c478bd9Sstevel@tonic-gate 		vaddr = (caddr_t)seg->pages;
4657c478bd9Sstevel@tonic-gate 		for (pnum = 0; pnum < metapgs; pnum++) {
4669853d9e8SJason Beloro 			if (meta_alloc)
4679853d9e8SJason Beloro 				pfn = memseg_get_metapfn(metabase,
4689853d9e8SJason Beloro 				    (pgcnt_t)pnum);
4697c478bd9Sstevel@tonic-gate 			hat_devload(kas.a_hat, vaddr, ptob(1), pfn,
4707c478bd9Sstevel@tonic-gate 			    PROT_READ | PROT_WRITE,
4717c478bd9Sstevel@tonic-gate 			    HAT_LOAD_REMAP | HAT_LOAD | HAT_LOAD_NOCONSIST);
4727c478bd9Sstevel@tonic-gate 			pfn++;
4737c478bd9Sstevel@tonic-gate 			vaddr += ptob(1);
4747c478bd9Sstevel@tonic-gate 		}
4757c478bd9Sstevel@tonic-gate 
4767c478bd9Sstevel@tonic-gate 		hat_unload(kas.a_hat, (caddr_t)opp, ptob(metapgs),
4777c478bd9Sstevel@tonic-gate 		    HAT_UNLOAD_UNMAP|HAT_UNLOAD_UNLOCK);
4787c478bd9Sstevel@tonic-gate 
4797c478bd9Sstevel@tonic-gate 		vmem_free(heap_arena, mapva, ptob(metapgs));
4807c478bd9Sstevel@tonic-gate 	}
4817c478bd9Sstevel@tonic-gate 
4827c478bd9Sstevel@tonic-gate 	hat_kpm_addmem_mseg_update(seg, nkpmpgs, kpm_pages_off);
4837c478bd9Sstevel@tonic-gate 
4847c478bd9Sstevel@tonic-gate 	memsegs_lock(1);
4857c478bd9Sstevel@tonic-gate 
4867c478bd9Sstevel@tonic-gate 	/*
4877c478bd9Sstevel@tonic-gate 	 * The new memseg is inserted at the beginning of the list.
4887c478bd9Sstevel@tonic-gate 	 * Not only does this save searching for the tail, but in the
4897c478bd9Sstevel@tonic-gate 	 * case of a re-used memseg, it solves the problem of what
49028e72544SJakub Jirsa 	 * happens if some process has still got a pointer to the
4917c478bd9Sstevel@tonic-gate 	 * memseg and follows the next pointer to continue traversing
4927c478bd9Sstevel@tonic-gate 	 * the memsegs list.
4937c478bd9Sstevel@tonic-gate 	 */
4947c478bd9Sstevel@tonic-gate 
4957c478bd9Sstevel@tonic-gate 	hat_kpm_addmem_mseg_insert(seg);
4967c478bd9Sstevel@tonic-gate 
4977c478bd9Sstevel@tonic-gate 	seg->next = memsegs;
4987c478bd9Sstevel@tonic-gate 	membar_producer();
4997c478bd9Sstevel@tonic-gate 
5007c478bd9Sstevel@tonic-gate 	hat_kpm_addmem_memsegs_update(seg);
5017c478bd9Sstevel@tonic-gate 
5027c478bd9Sstevel@tonic-gate 	memsegs = seg;
5037c478bd9Sstevel@tonic-gate 
5047c478bd9Sstevel@tonic-gate 	build_pfn_hash();
5057c478bd9Sstevel@tonic-gate 
5067c478bd9Sstevel@tonic-gate 	total_pages += npgs;
5077c478bd9Sstevel@tonic-gate 
5087c478bd9Sstevel@tonic-gate 	/*
5097c478bd9Sstevel@tonic-gate 	 * Recalculate the paging parameters now total_pages has changed.
5107c478bd9Sstevel@tonic-gate 	 * This will also cause the clock hands to be reset before next use.
5117c478bd9Sstevel@tonic-gate 	 */
5122d9166aeSJoshua M. Clulow 	setupclock();
5137c478bd9Sstevel@tonic-gate 
5147c478bd9Sstevel@tonic-gate 	memsegs_unlock(1);
5157c478bd9Sstevel@tonic-gate 
516ee88d2b9Skchow 	PLCNT_MODIFY_MAX(seg->pages_base, (long)npgs);
517ee88d2b9Skchow 
5187c478bd9Sstevel@tonic-gate 	/*
5197c478bd9Sstevel@tonic-gate 	 * Free the pages outside the lock to avoid locking loops.
5207c478bd9Sstevel@tonic-gate 	 */
5217c478bd9Sstevel@tonic-gate 	for (pp = seg->pages; pp < seg->epages; pp++) {
5227c478bd9Sstevel@tonic-gate 		page_free(pp, 1);
5237c478bd9Sstevel@tonic-gate 	}
5247c478bd9Sstevel@tonic-gate 
5257c478bd9Sstevel@tonic-gate 	/*
5267c478bd9Sstevel@tonic-gate 	 * Now that we've updated the appropriate memory lists we
5277c478bd9Sstevel@tonic-gate 	 * need to reset a number of globals, since we've increased memory.
5287c478bd9Sstevel@tonic-gate 	 * Several have already been updated for us as noted above. The
5297c478bd9Sstevel@tonic-gate 	 * globals we're interested in at this point are:
5307c478bd9Sstevel@tonic-gate 	 *   physmax - highest page frame number.
5317c478bd9Sstevel@tonic-gate 	 *   physinstalled - number of pages currently installed (done earlier)
5327c478bd9Sstevel@tonic-gate 	 *   maxmem - max free pages in the system
5337c478bd9Sstevel@tonic-gate 	 *   physmem - physical memory pages available
5347c478bd9Sstevel@tonic-gate 	 *   availrmem - real memory available
5357c478bd9Sstevel@tonic-gate 	 */
5367c478bd9Sstevel@tonic-gate 
5377c478bd9Sstevel@tonic-gate 	mutex_enter(&freemem_lock);
5387c478bd9Sstevel@tonic-gate 	maxmem += npgs;
5397c478bd9Sstevel@tonic-gate 	physmem += npgs;
5407c478bd9Sstevel@tonic-gate 	availrmem += npgs;
5417c478bd9Sstevel@tonic-gate 	availrmem_initial += npgs;
5427c478bd9Sstevel@tonic-gate 
5437c478bd9Sstevel@tonic-gate 	mutex_exit(&freemem_lock);
5447c478bd9Sstevel@tonic-gate 
5457c478bd9Sstevel@tonic-gate 	dump_resize();
5467c478bd9Sstevel@tonic-gate 
5477c478bd9Sstevel@tonic-gate 	page_freelist_coalesce_all(mnode);
5487c478bd9Sstevel@tonic-gate 
5497c478bd9Sstevel@tonic-gate 	kphysm_setup_post_add(npgs);
5507c478bd9Sstevel@tonic-gate 
5517c478bd9Sstevel@tonic-gate 	cmn_err(CE_CONT, "?kphysm_add_memory_dynamic: mem = %ldK "
5527c478bd9Sstevel@tonic-gate 	    "(0x%" PRIx64 ")\n",
5537c478bd9Sstevel@tonic-gate 	    physinstalled << (PAGESHIFT - 10),
5547c478bd9Sstevel@tonic-gate 	    (uint64_t)physinstalled << PAGESHIFT);
5557c478bd9Sstevel@tonic-gate 
5567c478bd9Sstevel@tonic-gate 	avmem = (uint64_t)freemem << PAGESHIFT;
5577c478bd9Sstevel@tonic-gate 	cmn_err(CE_CONT, "?kphysm_add_memory_dynamic: "
5587c478bd9Sstevel@tonic-gate 	    "avail mem = %" PRId64 "\n", avmem);
5597c478bd9Sstevel@tonic-gate 
5607c478bd9Sstevel@tonic-gate 	/*
5617c478bd9Sstevel@tonic-gate 	 * Update lgroup generation number on single lgroup systems
5627c478bd9Sstevel@tonic-gate 	 */
5637c478bd9Sstevel@tonic-gate 	if (nlgrps == 1)
5647c478bd9Sstevel@tonic-gate 		lgrp_config(LGRP_CONFIG_GEN_UPDATE, 0, 0);
5657c478bd9Sstevel@tonic-gate 
5663a634bfcSVikram Hegde 	/*
5673a634bfcSVikram Hegde 	 * Inform DDI of update
5683a634bfcSVikram Hegde 	 */
5693a634bfcSVikram Hegde 	ddi_mem_update((uint64_t)(pt_base) << PAGESHIFT,
5703a634bfcSVikram Hegde 	    (uint64_t)(tpgs) << PAGESHIFT);
5713a634bfcSVikram Hegde 
5727c478bd9Sstevel@tonic-gate 	delspan_unreserve(pt_base, tpgs);
5737c478bd9Sstevel@tonic-gate 
574a3114836SGerry Liu 	return (KPHYSM_OK);		/* Successfully added system memory */
5757c478bd9Sstevel@tonic-gate }
5767c478bd9Sstevel@tonic-gate 
5777c478bd9Sstevel@tonic-gate /*
5787c478bd9Sstevel@tonic-gate  * There are various error conditions in kphysm_add_memory_dynamic()
5797c478bd9Sstevel@tonic-gate  * which require a rollback of already changed global state.
5807c478bd9Sstevel@tonic-gate  */
5817c478bd9Sstevel@tonic-gate static void
kphysm_addmem_error_undospan(pfn_t pt_base,pgcnt_t tpgs)5827c478bd9Sstevel@tonic-gate kphysm_addmem_error_undospan(pfn_t pt_base, pgcnt_t tpgs)
5837c478bd9Sstevel@tonic-gate {
5847c478bd9Sstevel@tonic-gate 	int mlret;
5857c478bd9Sstevel@tonic-gate 
5867c478bd9Sstevel@tonic-gate 	/* Unreserve memory span. */
5877c478bd9Sstevel@tonic-gate 	memlist_write_lock();
5887c478bd9Sstevel@tonic-gate 
5897c478bd9Sstevel@tonic-gate 	mlret = memlist_delete_span(
5907c478bd9Sstevel@tonic-gate 	    (uint64_t)(pt_base) << PAGESHIFT,
5917c478bd9Sstevel@tonic-gate 	    (uint64_t)(tpgs) << PAGESHIFT, &phys_install);
5927c478bd9Sstevel@tonic-gate 
5937c478bd9Sstevel@tonic-gate 	ASSERT(mlret == MEML_SPANOP_OK);
5947c478bd9Sstevel@tonic-gate 	phys_install_has_changed();
5957c478bd9Sstevel@tonic-gate 	installed_top_size(phys_install, &physmax, &physinstalled);
5967c478bd9Sstevel@tonic-gate 
5977c478bd9Sstevel@tonic-gate 	memlist_write_unlock();
5987c478bd9Sstevel@tonic-gate 	delspan_unreserve(pt_base, tpgs);
5997c478bd9Sstevel@tonic-gate }
6007c478bd9Sstevel@tonic-gate 
6017c478bd9Sstevel@tonic-gate /*
6029853d9e8SJason Beloro  * Only return an available memseg of exactly the right size
6039853d9e8SJason Beloro  * if size is required.
6047c478bd9Sstevel@tonic-gate  * When the meta data area has it's own virtual address space
6057c478bd9Sstevel@tonic-gate  * we will need to manage this more carefully and do best fit
606da6c28aaSamw  * allocations, possibly splitting an available area.
6077c478bd9Sstevel@tonic-gate  */
6089853d9e8SJason Beloro struct memseg *
memseg_reuse(pgcnt_t metapgs)6097c478bd9Sstevel@tonic-gate memseg_reuse(pgcnt_t metapgs)
6107c478bd9Sstevel@tonic-gate {
6119853d9e8SJason Beloro 	int type;
6127c478bd9Sstevel@tonic-gate 	struct memseg **segpp, *seg;
6137c478bd9Sstevel@tonic-gate 
6147c478bd9Sstevel@tonic-gate 	mutex_enter(&memseg_lists_lock);
6157c478bd9Sstevel@tonic-gate 
6167c478bd9Sstevel@tonic-gate 	segpp = &memseg_va_avail;
6177c478bd9Sstevel@tonic-gate 	for (; (seg = *segpp) != NULL; segpp = &seg->lnext) {
6187c478bd9Sstevel@tonic-gate 		caddr_t end;
6197c478bd9Sstevel@tonic-gate 
6209853d9e8SJason Beloro 		/*
6219853d9e8SJason Beloro 		 * Make sure we are reusing the right segment type.
6229853d9e8SJason Beloro 		 */
6239853d9e8SJason Beloro 		type = metapgs ? MEMSEG_META_INCL : MEMSEG_META_ALLOC;
6249853d9e8SJason Beloro 
6259853d9e8SJason Beloro 		if ((seg->msegflags & (MEMSEG_META_INCL | MEMSEG_META_ALLOC))
6269853d9e8SJason Beloro 		    != type)
6279853d9e8SJason Beloro 			continue;
6289853d9e8SJason Beloro 
6297c478bd9Sstevel@tonic-gate 		if (kpm_enable)
6307c478bd9Sstevel@tonic-gate 			end = hat_kpm_mseg_reuse(seg);
6317c478bd9Sstevel@tonic-gate 		else
6327c478bd9Sstevel@tonic-gate 			end = (caddr_t)seg->epages;
6337c478bd9Sstevel@tonic-gate 
6349853d9e8SJason Beloro 		/*
6359853d9e8SJason Beloro 		 * Check for the right size if it is provided.
6369853d9e8SJason Beloro 		 */
6379853d9e8SJason Beloro 		if (!metapgs || btopr(end - (caddr_t)seg->pages) == metapgs) {
6387c478bd9Sstevel@tonic-gate 			*segpp = seg->lnext;
6397c478bd9Sstevel@tonic-gate 			seg->lnext = NULL;
6407c478bd9Sstevel@tonic-gate 			break;
6417c478bd9Sstevel@tonic-gate 		}
6427c478bd9Sstevel@tonic-gate 	}
6437c478bd9Sstevel@tonic-gate 	mutex_exit(&memseg_lists_lock);
6447c478bd9Sstevel@tonic-gate 
6457c478bd9Sstevel@tonic-gate 	return (seg);
6467c478bd9Sstevel@tonic-gate }
6477c478bd9Sstevel@tonic-gate 
6487c478bd9Sstevel@tonic-gate static uint_t handle_gen;
6497c478bd9Sstevel@tonic-gate 
6507c478bd9Sstevel@tonic-gate struct memdelspan {
6517c478bd9Sstevel@tonic-gate 	struct memdelspan *mds_next;
6527c478bd9Sstevel@tonic-gate 	pfn_t		mds_base;
6537c478bd9Sstevel@tonic-gate 	pgcnt_t		mds_npgs;
6547c478bd9Sstevel@tonic-gate 	uint_t		*mds_bitmap;
6557c478bd9Sstevel@tonic-gate 	uint_t		*mds_bitmap_retired;
6567c478bd9Sstevel@tonic-gate };
6577c478bd9Sstevel@tonic-gate 
6587c478bd9Sstevel@tonic-gate #define	NBPBMW		(sizeof (uint_t) * NBBY)
6597c478bd9Sstevel@tonic-gate #define	MDS_BITMAPBYTES(MDSP) \
6607c478bd9Sstevel@tonic-gate 	((((MDSP)->mds_npgs + NBPBMW - 1) / NBPBMW) * sizeof (uint_t))
6617c478bd9Sstevel@tonic-gate 
6627c478bd9Sstevel@tonic-gate struct transit_list {
6637c478bd9Sstevel@tonic-gate 	struct transit_list	*trl_next;
6647c478bd9Sstevel@tonic-gate 	struct memdelspan	*trl_spans;
6657c478bd9Sstevel@tonic-gate 	int			trl_collect;
6667c478bd9Sstevel@tonic-gate };
6677c478bd9Sstevel@tonic-gate 
6687c478bd9Sstevel@tonic-gate struct transit_list_head {
6697c478bd9Sstevel@tonic-gate 	kmutex_t		trh_lock;
6707c478bd9Sstevel@tonic-gate 	struct transit_list	*trh_head;
6717c478bd9Sstevel@tonic-gate };
6727c478bd9Sstevel@tonic-gate 
6737c478bd9Sstevel@tonic-gate static struct transit_list_head transit_list_head;
6747c478bd9Sstevel@tonic-gate 
6757c478bd9Sstevel@tonic-gate struct mem_handle;
6767c478bd9Sstevel@tonic-gate static void transit_list_collect(struct mem_handle *, int);
6777c478bd9Sstevel@tonic-gate static void transit_list_insert(struct transit_list *);
6787c478bd9Sstevel@tonic-gate static void transit_list_remove(struct transit_list *);
6797c478bd9Sstevel@tonic-gate 
6807c478bd9Sstevel@tonic-gate #ifdef DEBUG
6817c478bd9Sstevel@tonic-gate #define	MEM_DEL_STATS
6827c478bd9Sstevel@tonic-gate #endif /* DEBUG */
6837c478bd9Sstevel@tonic-gate 
6847c478bd9Sstevel@tonic-gate #ifdef MEM_DEL_STATS
6857c478bd9Sstevel@tonic-gate static int mem_del_stat_print = 0;
6867c478bd9Sstevel@tonic-gate struct mem_del_stat {
6877c478bd9Sstevel@tonic-gate 	uint_t	nloop;
6887c478bd9Sstevel@tonic-gate 	uint_t	need_free;
6897c478bd9Sstevel@tonic-gate 	uint_t	free_loop;
6907c478bd9Sstevel@tonic-gate 	uint_t	free_low;
6917c478bd9Sstevel@tonic-gate 	uint_t	free_failed;
6927c478bd9Sstevel@tonic-gate 	uint_t	ncheck;
6937c478bd9Sstevel@tonic-gate 	uint_t	nopaget;
6947c478bd9Sstevel@tonic-gate 	uint_t	lockfail;
6957c478bd9Sstevel@tonic-gate 	uint_t	nfree;
6967c478bd9Sstevel@tonic-gate 	uint_t	nreloc;
6977c478bd9Sstevel@tonic-gate 	uint_t	nrelocfail;
6987c478bd9Sstevel@tonic-gate 	uint_t	already_done;
6997c478bd9Sstevel@tonic-gate 	uint_t	first_notfree;
7007c478bd9Sstevel@tonic-gate 	uint_t	npplocked;
7017c478bd9Sstevel@tonic-gate 	uint_t	nlockreloc;
7027c478bd9Sstevel@tonic-gate 	uint_t	nnorepl;
7037c478bd9Sstevel@tonic-gate 	uint_t	nmodreloc;
7047c478bd9Sstevel@tonic-gate 	uint_t	ndestroy;
7057c478bd9Sstevel@tonic-gate 	uint_t	nputpage;
7067c478bd9Sstevel@tonic-gate 	uint_t	nnoreclaim;
7077c478bd9Sstevel@tonic-gate 	uint_t	ndelay;
7087c478bd9Sstevel@tonic-gate 	uint_t	demotefail;
7097c478bd9Sstevel@tonic-gate 	uint64_t nticks_total;
7107c478bd9Sstevel@tonic-gate 	uint64_t nticks_pgrp;
7117c478bd9Sstevel@tonic-gate 	uint_t	retired;
7127c478bd9Sstevel@tonic-gate 	uint_t	toxic;
7137c478bd9Sstevel@tonic-gate 	uint_t	failing;
7147c478bd9Sstevel@tonic-gate 	uint_t	modtoxic;
7157c478bd9Sstevel@tonic-gate 	uint_t	npplkdtoxic;
7167c478bd9Sstevel@tonic-gate 	uint_t	gptlmodfail;
7177c478bd9Sstevel@tonic-gate 	uint_t	gptllckfail;
7187c478bd9Sstevel@tonic-gate };
7197c478bd9Sstevel@tonic-gate /*
7207c478bd9Sstevel@tonic-gate  * The stat values are only incremented in the delete thread
7217c478bd9Sstevel@tonic-gate  * so no locking or atomic required.
7227c478bd9Sstevel@tonic-gate  */
7237c478bd9Sstevel@tonic-gate #define	MDSTAT_INCR(MHP, FLD)	(MHP)->mh_delstat.FLD++
7247c478bd9Sstevel@tonic-gate #define	MDSTAT_TOTAL(MHP, ntck)	((MHP)->mh_delstat.nticks_total += (ntck))
7257c478bd9Sstevel@tonic-gate #define	MDSTAT_PGRP(MHP, ntck)	((MHP)->mh_delstat.nticks_pgrp += (ntck))
7267c478bd9Sstevel@tonic-gate static void mem_del_stat_print_func(struct mem_handle *);
7277c478bd9Sstevel@tonic-gate #define	MDSTAT_PRINT(MHP)	mem_del_stat_print_func((MHP))
7287c478bd9Sstevel@tonic-gate #else /* MEM_DEL_STATS */
7297c478bd9Sstevel@tonic-gate #define	MDSTAT_INCR(MHP, FLD)
7307c478bd9Sstevel@tonic-gate #define	MDSTAT_TOTAL(MHP, ntck)
7317c478bd9Sstevel@tonic-gate #define	MDSTAT_PGRP(MHP, ntck)
7327c478bd9Sstevel@tonic-gate #define	MDSTAT_PRINT(MHP)
7337c478bd9Sstevel@tonic-gate #endif /* MEM_DEL_STATS */
7347c478bd9Sstevel@tonic-gate 
7357c478bd9Sstevel@tonic-gate typedef enum mhnd_state {MHND_FREE = 0, MHND_INIT, MHND_STARTING,
7367c478bd9Sstevel@tonic-gate 	MHND_RUNNING, MHND_DONE, MHND_RELEASE} mhnd_state_t;
7377c478bd9Sstevel@tonic-gate 
7387c478bd9Sstevel@tonic-gate /*
7397c478bd9Sstevel@tonic-gate  * mh_mutex must be taken to examine or change mh_exthandle and mh_state.
7407c478bd9Sstevel@tonic-gate  * The mutex may not be required for other fields, dependent on mh_state.
7417c478bd9Sstevel@tonic-gate  */
7427c478bd9Sstevel@tonic-gate struct mem_handle {
7437c478bd9Sstevel@tonic-gate 	kmutex_t	mh_mutex;
7447c478bd9Sstevel@tonic-gate 	struct mem_handle *mh_next;
7457c478bd9Sstevel@tonic-gate 	memhandle_t	mh_exthandle;
7467c478bd9Sstevel@tonic-gate 	mhnd_state_t	mh_state;
7477c478bd9Sstevel@tonic-gate 	struct transit_list mh_transit;
7487c478bd9Sstevel@tonic-gate 	pgcnt_t		mh_phys_pages;
7497c478bd9Sstevel@tonic-gate 	pgcnt_t		mh_vm_pages;
7507c478bd9Sstevel@tonic-gate 	pgcnt_t		mh_hold_todo;
7517c478bd9Sstevel@tonic-gate 	void		(*mh_delete_complete)(void *, int error);
7527c478bd9Sstevel@tonic-gate 	void		*mh_delete_complete_arg;
7537c478bd9Sstevel@tonic-gate 	volatile uint_t mh_cancel;
7547c478bd9Sstevel@tonic-gate 	volatile uint_t mh_dr_aio_cleanup_cancel;
7557c478bd9Sstevel@tonic-gate 	volatile uint_t mh_aio_cleanup_done;
7567c478bd9Sstevel@tonic-gate 	kcondvar_t	mh_cv;
7577c478bd9Sstevel@tonic-gate 	kthread_id_t	mh_thread_id;
7587c478bd9Sstevel@tonic-gate 	page_t		*mh_deleted;	/* link through p_next */
7597c478bd9Sstevel@tonic-gate #ifdef MEM_DEL_STATS
7607c478bd9Sstevel@tonic-gate 	struct mem_del_stat mh_delstat;
7617c478bd9Sstevel@tonic-gate #endif /* MEM_DEL_STATS */
7627c478bd9Sstevel@tonic-gate };
7637c478bd9Sstevel@tonic-gate 
7647c478bd9Sstevel@tonic-gate static struct mem_handle *mem_handle_head;
7657c478bd9Sstevel@tonic-gate static kmutex_t mem_handle_list_mutex;
7667c478bd9Sstevel@tonic-gate 
7677c478bd9Sstevel@tonic-gate static struct mem_handle *
kphysm_allocate_mem_handle()7687c478bd9Sstevel@tonic-gate kphysm_allocate_mem_handle()
7697c478bd9Sstevel@tonic-gate {
7707c478bd9Sstevel@tonic-gate 	struct mem_handle *mhp;
7717c478bd9Sstevel@tonic-gate 
7727c478bd9Sstevel@tonic-gate 	mhp = kmem_zalloc(sizeof (struct mem_handle), KM_SLEEP);
7737c478bd9Sstevel@tonic-gate 	mutex_init(&mhp->mh_mutex, NULL, MUTEX_DEFAULT, NULL);
7747c478bd9Sstevel@tonic-gate 	mutex_enter(&mem_handle_list_mutex);
7757c478bd9Sstevel@tonic-gate 	mutex_enter(&mhp->mh_mutex);
7767c478bd9Sstevel@tonic-gate 	/* handle_gen is protected by list mutex. */
7779f1a1f17Sdmick 	mhp->mh_exthandle = (memhandle_t)(uintptr_t)(++handle_gen);
7787c478bd9Sstevel@tonic-gate 	mhp->mh_next = mem_handle_head;
7797c478bd9Sstevel@tonic-gate 	mem_handle_head = mhp;
7807c478bd9Sstevel@tonic-gate 	mutex_exit(&mem_handle_list_mutex);
7817c478bd9Sstevel@tonic-gate 
7827c478bd9Sstevel@tonic-gate 	return (mhp);
7837c478bd9Sstevel@tonic-gate }
7847c478bd9Sstevel@tonic-gate 
7857c478bd9Sstevel@tonic-gate static void
kphysm_free_mem_handle(struct mem_handle * mhp)7867c478bd9Sstevel@tonic-gate kphysm_free_mem_handle(struct mem_handle *mhp)
7877c478bd9Sstevel@tonic-gate {
7887c478bd9Sstevel@tonic-gate 	struct mem_handle **mhpp;
7897c478bd9Sstevel@tonic-gate 
7907c478bd9Sstevel@tonic-gate 	ASSERT(mutex_owned(&mhp->mh_mutex));
7917c478bd9Sstevel@tonic-gate 	ASSERT(mhp->mh_state == MHND_FREE);
7927c478bd9Sstevel@tonic-gate 	/*
7937c478bd9Sstevel@tonic-gate 	 * Exit the mutex to preserve locking order. This is OK
7947c478bd9Sstevel@tonic-gate 	 * here as once in the FREE state, the handle cannot
7957c478bd9Sstevel@tonic-gate 	 * be found by a lookup.
7967c478bd9Sstevel@tonic-gate 	 */
7977c478bd9Sstevel@tonic-gate 	mutex_exit(&mhp->mh_mutex);
7987c478bd9Sstevel@tonic-gate 
7997c478bd9Sstevel@tonic-gate 	mutex_enter(&mem_handle_list_mutex);
8007c478bd9Sstevel@tonic-gate 	mhpp = &mem_handle_head;
8017c478bd9Sstevel@tonic-gate 	while (*mhpp != NULL && *mhpp != mhp)
8027c478bd9Sstevel@tonic-gate 		mhpp = &(*mhpp)->mh_next;
8037c478bd9Sstevel@tonic-gate 	ASSERT(*mhpp == mhp);
8047c478bd9Sstevel@tonic-gate 	/*
8057c478bd9Sstevel@tonic-gate 	 * No need to lock the handle (mh_mutex) as only
8067c478bd9Sstevel@tonic-gate 	 * mh_next changing and this is the only thread that
8077c478bd9Sstevel@tonic-gate 	 * can be referncing mhp.
8087c478bd9Sstevel@tonic-gate 	 */
8097c478bd9Sstevel@tonic-gate 	*mhpp = mhp->mh_next;
8107c478bd9Sstevel@tonic-gate 	mutex_exit(&mem_handle_list_mutex);
8117c478bd9Sstevel@tonic-gate 
8127c478bd9Sstevel@tonic-gate 	mutex_destroy(&mhp->mh_mutex);
8137c478bd9Sstevel@tonic-gate 	kmem_free(mhp, sizeof (struct mem_handle));
8147c478bd9Sstevel@tonic-gate }
8157c478bd9Sstevel@tonic-gate 
8167c478bd9Sstevel@tonic-gate /*
8177c478bd9Sstevel@tonic-gate  * This function finds the internal mem_handle corresponding to an
8187c478bd9Sstevel@tonic-gate  * external handle and returns it with the mh_mutex held.
8197c478bd9Sstevel@tonic-gate  */
8207c478bd9Sstevel@tonic-gate static struct mem_handle *
kphysm_lookup_mem_handle(memhandle_t handle)8217c478bd9Sstevel@tonic-gate kphysm_lookup_mem_handle(memhandle_t handle)
8227c478bd9Sstevel@tonic-gate {
8237c478bd9Sstevel@tonic-gate 	struct mem_handle *mhp;
8247c478bd9Sstevel@tonic-gate 
8257c478bd9Sstevel@tonic-gate 	mutex_enter(&mem_handle_list_mutex);
8267c478bd9Sstevel@tonic-gate 	for (mhp = mem_handle_head; mhp != NULL; mhp = mhp->mh_next) {
8277c478bd9Sstevel@tonic-gate 		if (mhp->mh_exthandle == handle) {
8287c478bd9Sstevel@tonic-gate 			mutex_enter(&mhp->mh_mutex);
8297c478bd9Sstevel@tonic-gate 			/*
8307c478bd9Sstevel@tonic-gate 			 * The state of the handle could have been changed
8317c478bd9Sstevel@tonic-gate 			 * by kphysm_del_release() while waiting for mh_mutex.
8327c478bd9Sstevel@tonic-gate 			 */
8337c478bd9Sstevel@tonic-gate 			if (mhp->mh_state == MHND_FREE) {
8347c478bd9Sstevel@tonic-gate 				mutex_exit(&mhp->mh_mutex);
8357c478bd9Sstevel@tonic-gate 				continue;
8367c478bd9Sstevel@tonic-gate 			}
8377c478bd9Sstevel@tonic-gate 			break;
8387c478bd9Sstevel@tonic-gate 		}
8397c478bd9Sstevel@tonic-gate 	}
8407c478bd9Sstevel@tonic-gate 	mutex_exit(&mem_handle_list_mutex);
8417c478bd9Sstevel@tonic-gate 	return (mhp);
8427c478bd9Sstevel@tonic-gate }
8437c478bd9Sstevel@tonic-gate 
8447c478bd9Sstevel@tonic-gate int
kphysm_del_gethandle(memhandle_t * xmhp)8457c478bd9Sstevel@tonic-gate kphysm_del_gethandle(memhandle_t *xmhp)
8467c478bd9Sstevel@tonic-gate {
8477c478bd9Sstevel@tonic-gate 	struct mem_handle *mhp;
8487c478bd9Sstevel@tonic-gate 
8497c478bd9Sstevel@tonic-gate 	mhp = kphysm_allocate_mem_handle();
8507c478bd9Sstevel@tonic-gate 	/*
8517c478bd9Sstevel@tonic-gate 	 * The handle is allocated using KM_SLEEP, so cannot fail.
8527c478bd9Sstevel@tonic-gate 	 * If the implementation is changed, the correct error to return
8537c478bd9Sstevel@tonic-gate 	 * here would be KPHYSM_ENOHANDLES.
8547c478bd9Sstevel@tonic-gate 	 */
8557c478bd9Sstevel@tonic-gate 	ASSERT(mhp->mh_state == MHND_FREE);
8567c478bd9Sstevel@tonic-gate 	mhp->mh_state = MHND_INIT;
8577c478bd9Sstevel@tonic-gate 	*xmhp = mhp->mh_exthandle;
8587c478bd9Sstevel@tonic-gate 	mutex_exit(&mhp->mh_mutex);
8597c478bd9Sstevel@tonic-gate 	return (KPHYSM_OK);
8607c478bd9Sstevel@tonic-gate }
8617c478bd9Sstevel@tonic-gate 
8627c478bd9Sstevel@tonic-gate static int
overlapping(pfn_t b1,pgcnt_t l1,pfn_t b2,pgcnt_t l2)8637c478bd9Sstevel@tonic-gate overlapping(pfn_t b1, pgcnt_t l1, pfn_t b2, pgcnt_t l2)
8647c478bd9Sstevel@tonic-gate {
8657c478bd9Sstevel@tonic-gate 	pfn_t e1, e2;
8667c478bd9Sstevel@tonic-gate 
8677c478bd9Sstevel@tonic-gate 	e1 = b1 + l1;
8687c478bd9Sstevel@tonic-gate 	e2 = b2 + l2;
8697c478bd9Sstevel@tonic-gate 
8707c478bd9Sstevel@tonic-gate 	return (!(b2 >= e1 || b1 >= e2));
8717c478bd9Sstevel@tonic-gate }
8727c478bd9Sstevel@tonic-gate 
8737c478bd9Sstevel@tonic-gate static int can_remove_pgs(pgcnt_t);
8747c478bd9Sstevel@tonic-gate 
8757c478bd9Sstevel@tonic-gate static struct memdelspan *
span_to_install(pfn_t base,pgcnt_t npgs)8767c478bd9Sstevel@tonic-gate span_to_install(pfn_t base, pgcnt_t npgs)
8777c478bd9Sstevel@tonic-gate {
8787c478bd9Sstevel@tonic-gate 	struct memdelspan *mdsp;
8797c478bd9Sstevel@tonic-gate 	struct memdelspan *mdsp_new;
8807c478bd9Sstevel@tonic-gate 	uint64_t address, size, thislen;
8817c478bd9Sstevel@tonic-gate 	struct memlist *mlp;
8827c478bd9Sstevel@tonic-gate 
8837c478bd9Sstevel@tonic-gate 	mdsp_new = NULL;
8847c478bd9Sstevel@tonic-gate 
8857c478bd9Sstevel@tonic-gate 	address = (uint64_t)base << PAGESHIFT;
8867c478bd9Sstevel@tonic-gate 	size = (uint64_t)npgs << PAGESHIFT;
8877c478bd9Sstevel@tonic-gate 	while (size != 0) {
8887c478bd9Sstevel@tonic-gate 		memlist_read_lock();
88956f33205SJonathan Adams 		for (mlp = phys_install; mlp != NULL; mlp = mlp->ml_next) {
89056f33205SJonathan Adams 			if (address >= (mlp->ml_address + mlp->ml_size))
8917c478bd9Sstevel@tonic-gate 				continue;
89256f33205SJonathan Adams 			if ((address + size) > mlp->ml_address)
8937c478bd9Sstevel@tonic-gate 				break;
8947c478bd9Sstevel@tonic-gate 		}
8957c478bd9Sstevel@tonic-gate 		if (mlp == NULL) {
8967c478bd9Sstevel@tonic-gate 			address += size;
8977c478bd9Sstevel@tonic-gate 			size = 0;
8987c478bd9Sstevel@tonic-gate 			thislen = 0;
8997c478bd9Sstevel@tonic-gate 		} else {
90056f33205SJonathan Adams 			if (address < mlp->ml_address) {
90156f33205SJonathan Adams 				size -= (mlp->ml_address - address);
90256f33205SJonathan Adams 				address = mlp->ml_address;
9037c478bd9Sstevel@tonic-gate 			}
90456f33205SJonathan Adams 			ASSERT(address >= mlp->ml_address);
90556f33205SJonathan Adams 			if ((address + size) >
90656f33205SJonathan Adams 			    (mlp->ml_address + mlp->ml_size)) {
90756f33205SJonathan Adams 				thislen =
90856f33205SJonathan Adams 				    mlp->ml_size - (address - mlp->ml_address);
9097c478bd9Sstevel@tonic-gate 			} else {
9107c478bd9Sstevel@tonic-gate 				thislen = size;
9117c478bd9Sstevel@tonic-gate 			}
9127c478bd9Sstevel@tonic-gate 		}
9137c478bd9Sstevel@tonic-gate 		memlist_read_unlock();
9147c478bd9Sstevel@tonic-gate 		/* TODO: phys_install could change now */
9157c478bd9Sstevel@tonic-gate 		if (thislen == 0)
9167c478bd9Sstevel@tonic-gate 			continue;
9177c478bd9Sstevel@tonic-gate 		mdsp = kmem_zalloc(sizeof (struct memdelspan), KM_SLEEP);
9187c478bd9Sstevel@tonic-gate 		mdsp->mds_base = btop(address);
9197c478bd9Sstevel@tonic-gate 		mdsp->mds_npgs = btop(thislen);
9207c478bd9Sstevel@tonic-gate 		mdsp->mds_next = mdsp_new;
9217c478bd9Sstevel@tonic-gate 		mdsp_new = mdsp;
9227c478bd9Sstevel@tonic-gate 		address += thislen;
9237c478bd9Sstevel@tonic-gate 		size -= thislen;
9247c478bd9Sstevel@tonic-gate 	}
9257c478bd9Sstevel@tonic-gate 	return (mdsp_new);
9267c478bd9Sstevel@tonic-gate }
9277c478bd9Sstevel@tonic-gate 
9287c478bd9Sstevel@tonic-gate static void
free_delspans(struct memdelspan * mdsp)9297c478bd9Sstevel@tonic-gate free_delspans(struct memdelspan *mdsp)
9307c478bd9Sstevel@tonic-gate {
9317c478bd9Sstevel@tonic-gate 	struct memdelspan *amdsp;
9327c478bd9Sstevel@tonic-gate 
9337c478bd9Sstevel@tonic-gate 	while ((amdsp = mdsp) != NULL) {
9347c478bd9Sstevel@tonic-gate 		mdsp = amdsp->mds_next;
9357c478bd9Sstevel@tonic-gate 		kmem_free(amdsp, sizeof (struct memdelspan));
9367c478bd9Sstevel@tonic-gate 	}
9377c478bd9Sstevel@tonic-gate }
9387c478bd9Sstevel@tonic-gate 
9397c478bd9Sstevel@tonic-gate /*
9407c478bd9Sstevel@tonic-gate  * Concatenate lists. No list ordering is required.
9417c478bd9Sstevel@tonic-gate  */
9427c478bd9Sstevel@tonic-gate 
9437c478bd9Sstevel@tonic-gate static void
delspan_concat(struct memdelspan ** mdspp,struct memdelspan * mdsp)9447c478bd9Sstevel@tonic-gate delspan_concat(struct memdelspan **mdspp, struct memdelspan *mdsp)
9457c478bd9Sstevel@tonic-gate {
9467c478bd9Sstevel@tonic-gate 	while (*mdspp != NULL)
9477c478bd9Sstevel@tonic-gate 		mdspp = &(*mdspp)->mds_next;
9487c478bd9Sstevel@tonic-gate 
9497c478bd9Sstevel@tonic-gate 	*mdspp = mdsp;
9507c478bd9Sstevel@tonic-gate }
9517c478bd9Sstevel@tonic-gate 
9527c478bd9Sstevel@tonic-gate /*
9537c478bd9Sstevel@tonic-gate  * Given a new list of delspans, check there is no overlap with
9547c478bd9Sstevel@tonic-gate  * all existing span activity (add or delete) and then concatenate
9557c478bd9Sstevel@tonic-gate  * the new spans to the given list.
9567c478bd9Sstevel@tonic-gate  * Return 1 for OK, 0 if overlapping.
9577c478bd9Sstevel@tonic-gate  */
9587c478bd9Sstevel@tonic-gate static int
delspan_insert(struct transit_list * my_tlp,struct memdelspan * mdsp_new)9597c478bd9Sstevel@tonic-gate delspan_insert(
9607c478bd9Sstevel@tonic-gate 	struct transit_list *my_tlp,
9617c478bd9Sstevel@tonic-gate 	struct memdelspan *mdsp_new)
9627c478bd9Sstevel@tonic-gate {
9637c478bd9Sstevel@tonic-gate 	struct transit_list_head *trh;
9647c478bd9Sstevel@tonic-gate 	struct transit_list *tlp;
9657c478bd9Sstevel@tonic-gate 	int ret;
9667c478bd9Sstevel@tonic-gate 
9677c478bd9Sstevel@tonic-gate 	trh = &transit_list_head;
9687c478bd9Sstevel@tonic-gate 
9697c478bd9Sstevel@tonic-gate 	ASSERT(my_tlp != NULL);
9707c478bd9Sstevel@tonic-gate 	ASSERT(mdsp_new != NULL);
9717c478bd9Sstevel@tonic-gate 
9727c478bd9Sstevel@tonic-gate 	ret = 1;
9737c478bd9Sstevel@tonic-gate 	mutex_enter(&trh->trh_lock);
9747c478bd9Sstevel@tonic-gate 	/* ASSERT(my_tlp->trl_spans == NULL || tlp_in_list(trh, my_tlp)); */
9757c478bd9Sstevel@tonic-gate 	for (tlp = trh->trh_head; tlp != NULL; tlp = tlp->trl_next) {
9767c478bd9Sstevel@tonic-gate 		struct memdelspan *mdsp;
9777c478bd9Sstevel@tonic-gate 
9787c478bd9Sstevel@tonic-gate 		for (mdsp = tlp->trl_spans; mdsp != NULL;
9797c478bd9Sstevel@tonic-gate 		    mdsp = mdsp->mds_next) {
9807c478bd9Sstevel@tonic-gate 			struct memdelspan *nmdsp;
9817c478bd9Sstevel@tonic-gate 
9827c478bd9Sstevel@tonic-gate 			for (nmdsp = mdsp_new; nmdsp != NULL;
9837c478bd9Sstevel@tonic-gate 			    nmdsp = nmdsp->mds_next) {
9847c478bd9Sstevel@tonic-gate 				if (overlapping(mdsp->mds_base, mdsp->mds_npgs,
9857c478bd9Sstevel@tonic-gate 				    nmdsp->mds_base, nmdsp->mds_npgs)) {
9867c478bd9Sstevel@tonic-gate 					ret = 0;
9877c478bd9Sstevel@tonic-gate 					goto done;
9887c478bd9Sstevel@tonic-gate 				}
9897c478bd9Sstevel@tonic-gate 			}
9907c478bd9Sstevel@tonic-gate 		}
9917c478bd9Sstevel@tonic-gate 	}
9927c478bd9Sstevel@tonic-gate done:
9937c478bd9Sstevel@tonic-gate 	if (ret != 0) {
9947c478bd9Sstevel@tonic-gate 		if (my_tlp->trl_spans == NULL)
9957c478bd9Sstevel@tonic-gate 			transit_list_insert(my_tlp);
9967c478bd9Sstevel@tonic-gate 		delspan_concat(&my_tlp->trl_spans, mdsp_new);
9977c478bd9Sstevel@tonic-gate 	}
9987c478bd9Sstevel@tonic-gate 	mutex_exit(&trh->trh_lock);
9997c478bd9Sstevel@tonic-gate 	return (ret);
10007c478bd9Sstevel@tonic-gate }
10017c478bd9Sstevel@tonic-gate 
10027c478bd9Sstevel@tonic-gate static void
delspan_remove(struct transit_list * my_tlp,pfn_t base,pgcnt_t npgs)10037c478bd9Sstevel@tonic-gate delspan_remove(
10047c478bd9Sstevel@tonic-gate 	struct transit_list *my_tlp,
10057c478bd9Sstevel@tonic-gate 	pfn_t base,
10067c478bd9Sstevel@tonic-gate 	pgcnt_t npgs)
10077c478bd9Sstevel@tonic-gate {
10087c478bd9Sstevel@tonic-gate 	struct transit_list_head *trh;
10097c478bd9Sstevel@tonic-gate 	struct memdelspan *mdsp;
10107c478bd9Sstevel@tonic-gate 
10117c478bd9Sstevel@tonic-gate 	trh = &transit_list_head;
10127c478bd9Sstevel@tonic-gate 
10137c478bd9Sstevel@tonic-gate 	ASSERT(my_tlp != NULL);
10147c478bd9Sstevel@tonic-gate 
10157c478bd9Sstevel@tonic-gate 	mutex_enter(&trh->trh_lock);
10167c478bd9Sstevel@tonic-gate 	if ((mdsp = my_tlp->trl_spans) != NULL) {
10177c478bd9Sstevel@tonic-gate 		if (npgs == 0) {
10187c478bd9Sstevel@tonic-gate 			my_tlp->trl_spans = NULL;
10197c478bd9Sstevel@tonic-gate 			free_delspans(mdsp);
10207c478bd9Sstevel@tonic-gate 			transit_list_remove(my_tlp);
10217c478bd9Sstevel@tonic-gate 		} else {
10227c478bd9Sstevel@tonic-gate 			struct memdelspan **prv;
10237c478bd9Sstevel@tonic-gate 
10247c478bd9Sstevel@tonic-gate 			prv = &my_tlp->trl_spans;
10257c478bd9Sstevel@tonic-gate 			while (mdsp != NULL) {
10267c478bd9Sstevel@tonic-gate 				pfn_t p_end;
10277c478bd9Sstevel@tonic-gate 
10287c478bd9Sstevel@tonic-gate 				p_end = mdsp->mds_base + mdsp->mds_npgs;
10297c478bd9Sstevel@tonic-gate 				if (mdsp->mds_base >= base &&
10307c478bd9Sstevel@tonic-gate 				    p_end <= (base + npgs)) {
10317c478bd9Sstevel@tonic-gate 					*prv = mdsp->mds_next;
10327c478bd9Sstevel@tonic-gate 					mdsp->mds_next = NULL;
10337c478bd9Sstevel@tonic-gate 					free_delspans(mdsp);
10347c478bd9Sstevel@tonic-gate 				} else {
10357c478bd9Sstevel@tonic-gate 					prv = &mdsp->mds_next;
10367c478bd9Sstevel@tonic-gate 				}
10377c478bd9Sstevel@tonic-gate 				mdsp = *prv;
10387c478bd9Sstevel@tonic-gate 			}
10397c478bd9Sstevel@tonic-gate 			if (my_tlp->trl_spans == NULL)
10407c478bd9Sstevel@tonic-gate 				transit_list_remove(my_tlp);
10417c478bd9Sstevel@tonic-gate 		}
10427c478bd9Sstevel@tonic-gate 	}
10437c478bd9Sstevel@tonic-gate 	mutex_exit(&trh->trh_lock);
10447c478bd9Sstevel@tonic-gate }
10457c478bd9Sstevel@tonic-gate 
10467c478bd9Sstevel@tonic-gate /*
10477c478bd9Sstevel@tonic-gate  * Reserve interface for add to stop delete before add finished.
10487c478bd9Sstevel@tonic-gate  * This list is only accessed through the delspan_insert/remove
10497c478bd9Sstevel@tonic-gate  * functions and so is fully protected by the mutex in struct transit_list.
10507c478bd9Sstevel@tonic-gate  */
10517c478bd9Sstevel@tonic-gate 
10527c478bd9Sstevel@tonic-gate static struct transit_list reserve_transit;
10537c478bd9Sstevel@tonic-gate 
10547c478bd9Sstevel@tonic-gate static int
delspan_reserve(pfn_t base,pgcnt_t npgs)10557c478bd9Sstevel@tonic-gate delspan_reserve(pfn_t base, pgcnt_t npgs)
10567c478bd9Sstevel@tonic-gate {
10577c478bd9Sstevel@tonic-gate 	struct memdelspan *mdsp;
10587c478bd9Sstevel@tonic-gate 	int ret;
10597c478bd9Sstevel@tonic-gate 
10607c478bd9Sstevel@tonic-gate 	mdsp = kmem_zalloc(sizeof (struct memdelspan), KM_SLEEP);
10617c478bd9Sstevel@tonic-gate 	mdsp->mds_base = base;
10627c478bd9Sstevel@tonic-gate 	mdsp->mds_npgs = npgs;
10637c478bd9Sstevel@tonic-gate 	if ((ret = delspan_insert(&reserve_transit, mdsp)) == 0) {
10647c478bd9Sstevel@tonic-gate 		free_delspans(mdsp);
10657c478bd9Sstevel@tonic-gate 	}
10667c478bd9Sstevel@tonic-gate 	return (ret);
10677c478bd9Sstevel@tonic-gate }
10687c478bd9Sstevel@tonic-gate 
10697c478bd9Sstevel@tonic-gate static void
delspan_unreserve(pfn_t base,pgcnt_t npgs)10707c478bd9Sstevel@tonic-gate delspan_unreserve(pfn_t base, pgcnt_t npgs)
10717c478bd9Sstevel@tonic-gate {
10727c478bd9Sstevel@tonic-gate 	delspan_remove(&reserve_transit, base, npgs);
10737c478bd9Sstevel@tonic-gate }
10747c478bd9Sstevel@tonic-gate 
10757c478bd9Sstevel@tonic-gate /*
10767c478bd9Sstevel@tonic-gate  * Return whether memseg was created by kphysm_add_memory_dynamic().
10777c478bd9Sstevel@tonic-gate  */
10787c478bd9Sstevel@tonic-gate static int
memseg_is_dynamic(struct memseg * seg)10799853d9e8SJason Beloro memseg_is_dynamic(struct memseg *seg)
10807c478bd9Sstevel@tonic-gate {
10819853d9e8SJason Beloro 	return (seg->msegflags & MEMSEG_DYNAMIC);
10827c478bd9Sstevel@tonic-gate }
10837c478bd9Sstevel@tonic-gate 
10847c478bd9Sstevel@tonic-gate int
kphysm_del_span(memhandle_t handle,pfn_t base,pgcnt_t npgs)10857c478bd9Sstevel@tonic-gate kphysm_del_span(
10867c478bd9Sstevel@tonic-gate 	memhandle_t handle,
10877c478bd9Sstevel@tonic-gate 	pfn_t base,
10887c478bd9Sstevel@tonic-gate 	pgcnt_t npgs)
10897c478bd9Sstevel@tonic-gate {
10907c478bd9Sstevel@tonic-gate 	struct mem_handle *mhp;
10917c478bd9Sstevel@tonic-gate 	struct memseg *seg;
10927c478bd9Sstevel@tonic-gate 	struct memdelspan *mdsp;
10937c478bd9Sstevel@tonic-gate 	struct memdelspan *mdsp_new;
10947c478bd9Sstevel@tonic-gate 	pgcnt_t phys_pages, vm_pages;
10957c478bd9Sstevel@tonic-gate 	pfn_t p_end;
10967c478bd9Sstevel@tonic-gate 	page_t *pp;
10977c478bd9Sstevel@tonic-gate 	int ret;
10987c478bd9Sstevel@tonic-gate 
10997c478bd9Sstevel@tonic-gate 	mhp = kphysm_lookup_mem_handle(handle);
11007c478bd9Sstevel@tonic-gate 	if (mhp == NULL) {
11017c478bd9Sstevel@tonic-gate 		return (KPHYSM_EHANDLE);
11027c478bd9Sstevel@tonic-gate 	}
11037c478bd9Sstevel@tonic-gate 	if (mhp->mh_state != MHND_INIT) {
11047c478bd9Sstevel@tonic-gate 		mutex_exit(&mhp->mh_mutex);
11057c478bd9Sstevel@tonic-gate 		return (KPHYSM_ESEQUENCE);
11067c478bd9Sstevel@tonic-gate 	}
11077c478bd9Sstevel@tonic-gate 
11087c478bd9Sstevel@tonic-gate 	/*
11097c478bd9Sstevel@tonic-gate 	 * Intersect the span with the installed memory list (phys_install).
11107c478bd9Sstevel@tonic-gate 	 */
11117c478bd9Sstevel@tonic-gate 	mdsp_new = span_to_install(base, npgs);
11127c478bd9Sstevel@tonic-gate 	if (mdsp_new == NULL) {
11137c478bd9Sstevel@tonic-gate 		/*
11147c478bd9Sstevel@tonic-gate 		 * No physical memory in this range. Is this an
11157c478bd9Sstevel@tonic-gate 		 * error? If an attempt to start the delete is made
11167c478bd9Sstevel@tonic-gate 		 * for OK returns from del_span such as this, start will
11177c478bd9Sstevel@tonic-gate 		 * return an error.
11187c478bd9Sstevel@tonic-gate 		 * Could return KPHYSM_ENOWORK.
11197c478bd9Sstevel@tonic-gate 		 */
11207c478bd9Sstevel@tonic-gate 		/*
11217c478bd9Sstevel@tonic-gate 		 * It is assumed that there are no error returns
11227c478bd9Sstevel@tonic-gate 		 * from span_to_install() due to kmem_alloc failure.
11237c478bd9Sstevel@tonic-gate 		 */
11247c478bd9Sstevel@tonic-gate 		mutex_exit(&mhp->mh_mutex);
11257c478bd9Sstevel@tonic-gate 		return (KPHYSM_OK);
11267c478bd9Sstevel@tonic-gate 	}
11277c478bd9Sstevel@tonic-gate 	/*
11287c478bd9Sstevel@tonic-gate 	 * Does this span overlap an existing span?
11297c478bd9Sstevel@tonic-gate 	 */
11307c478bd9Sstevel@tonic-gate 	if (delspan_insert(&mhp->mh_transit, mdsp_new) == 0) {
11317c478bd9Sstevel@tonic-gate 		/*
11327c478bd9Sstevel@tonic-gate 		 * Differentiate between already on list for this handle
11337c478bd9Sstevel@tonic-gate 		 * (KPHYSM_EDUP) and busy elsewhere (KPHYSM_EBUSY).
11347c478bd9Sstevel@tonic-gate 		 */
11357c478bd9Sstevel@tonic-gate 		ret = KPHYSM_EBUSY;
11367c478bd9Sstevel@tonic-gate 		for (mdsp = mhp->mh_transit.trl_spans; mdsp != NULL;
11377c478bd9Sstevel@tonic-gate 		    mdsp = mdsp->mds_next) {
11387c478bd9Sstevel@tonic-gate 			if (overlapping(mdsp->mds_base, mdsp->mds_npgs,
11397c478bd9Sstevel@tonic-gate 			    base, npgs)) {
11407c478bd9Sstevel@tonic-gate 				ret = KPHYSM_EDUP;
11417c478bd9Sstevel@tonic-gate 				break;
11427c478bd9Sstevel@tonic-gate 			}
11437c478bd9Sstevel@tonic-gate 		}
11447c478bd9Sstevel@tonic-gate 		mutex_exit(&mhp->mh_mutex);
11457c478bd9Sstevel@tonic-gate 		free_delspans(mdsp_new);
11467c478bd9Sstevel@tonic-gate 		return (ret);
11477c478bd9Sstevel@tonic-gate 	}
11487c478bd9Sstevel@tonic-gate 	/*
11497c478bd9Sstevel@tonic-gate 	 * At this point the spans in mdsp_new have been inserted into the
11507c478bd9Sstevel@tonic-gate 	 * list of spans for this handle and thereby to the global list of
11517c478bd9Sstevel@tonic-gate 	 * spans being processed. Each of these spans must now be checked
11527c478bd9Sstevel@tonic-gate 	 * for relocatability. As a side-effect segments in the memseg list
11537c478bd9Sstevel@tonic-gate 	 * may be split.
11547c478bd9Sstevel@tonic-gate 	 *
11557c478bd9Sstevel@tonic-gate 	 * Note that mdsp_new can no longer be used as it is now part of
11567c478bd9Sstevel@tonic-gate 	 * a larger list. Select elements of this larger list based
11577c478bd9Sstevel@tonic-gate 	 * on base and npgs.
11587c478bd9Sstevel@tonic-gate 	 */
11597c478bd9Sstevel@tonic-gate restart:
11607c478bd9Sstevel@tonic-gate 	phys_pages = 0;
11617c478bd9Sstevel@tonic-gate 	vm_pages = 0;
11627c478bd9Sstevel@tonic-gate 	ret = KPHYSM_OK;
11637c478bd9Sstevel@tonic-gate 	for (mdsp = mhp->mh_transit.trl_spans; mdsp != NULL;
11647c478bd9Sstevel@tonic-gate 	    mdsp = mdsp->mds_next) {
11657c478bd9Sstevel@tonic-gate 		pgcnt_t pages_checked;
11667c478bd9Sstevel@tonic-gate 
11677c478bd9Sstevel@tonic-gate 		if (!overlapping(mdsp->mds_base, mdsp->mds_npgs, base, npgs)) {
11687c478bd9Sstevel@tonic-gate 			continue;
11697c478bd9Sstevel@tonic-gate 		}
11707c478bd9Sstevel@tonic-gate 		p_end = mdsp->mds_base + mdsp->mds_npgs;
11717c478bd9Sstevel@tonic-gate 		/*
11727c478bd9Sstevel@tonic-gate 		 * The pages_checked count is a hack. All pages should be
11737c478bd9Sstevel@tonic-gate 		 * checked for relocatability. Those not covered by memsegs
11747c478bd9Sstevel@tonic-gate 		 * should be tested with arch_kphysm_del_span_ok().
11757c478bd9Sstevel@tonic-gate 		 */
11767c478bd9Sstevel@tonic-gate 		pages_checked = 0;
11777c478bd9Sstevel@tonic-gate 		for (seg = memsegs; seg; seg = seg->next) {
11787c478bd9Sstevel@tonic-gate 			pfn_t mseg_start;
11797c478bd9Sstevel@tonic-gate 
11807c478bd9Sstevel@tonic-gate 			if (seg->pages_base >= p_end ||
11817c478bd9Sstevel@tonic-gate 			    seg->pages_end <= mdsp->mds_base) {
11827c478bd9Sstevel@tonic-gate 				/* Span and memseg don't overlap. */
11837c478bd9Sstevel@tonic-gate 				continue;
11847c478bd9Sstevel@tonic-gate 			}
11859853d9e8SJason Beloro 			mseg_start = memseg_get_start(seg);
11867c478bd9Sstevel@tonic-gate 			/* Check that segment is suitable for delete. */
11879853d9e8SJason Beloro 			if (memseg_includes_meta(seg)) {
11887c478bd9Sstevel@tonic-gate 				/*
11899853d9e8SJason Beloro 				 * Check that this segment is completely
11909853d9e8SJason Beloro 				 * within the span.
11917c478bd9Sstevel@tonic-gate 				 */
11927c478bd9Sstevel@tonic-gate 				if (mseg_start < mdsp->mds_base ||
11937c478bd9Sstevel@tonic-gate 				    seg->pages_end > p_end) {
11947c478bd9Sstevel@tonic-gate 					ret = KPHYSM_EBUSY;
11957c478bd9Sstevel@tonic-gate 					break;
11967c478bd9Sstevel@tonic-gate 				}
11977c478bd9Sstevel@tonic-gate 				pages_checked += seg->pages_end - mseg_start;
11987c478bd9Sstevel@tonic-gate 			} else {
11997c478bd9Sstevel@tonic-gate 				/*
12007c478bd9Sstevel@tonic-gate 				 * If this segment is larger than the span,
12017c478bd9Sstevel@tonic-gate 				 * try to split it. After the split, it
12027c478bd9Sstevel@tonic-gate 				 * is necessary to restart.
12037c478bd9Sstevel@tonic-gate 				 */
12047c478bd9Sstevel@tonic-gate 				if (seg->pages_base < mdsp->mds_base ||
12057c478bd9Sstevel@tonic-gate 				    seg->pages_end > p_end) {
12067c478bd9Sstevel@tonic-gate 					pfn_t abase;
12077c478bd9Sstevel@tonic-gate 					pgcnt_t anpgs;
12087c478bd9Sstevel@tonic-gate 					int s_ret;
12097c478bd9Sstevel@tonic-gate 
12107c478bd9Sstevel@tonic-gate 					/* Split required.  */
12117c478bd9Sstevel@tonic-gate 					if (mdsp->mds_base < seg->pages_base)
12127c478bd9Sstevel@tonic-gate 						abase = seg->pages_base;
12137c478bd9Sstevel@tonic-gate 					else
12147c478bd9Sstevel@tonic-gate 						abase = mdsp->mds_base;
12157c478bd9Sstevel@tonic-gate 					if (p_end > seg->pages_end)
12167c478bd9Sstevel@tonic-gate 						anpgs = seg->pages_end - abase;
12177c478bd9Sstevel@tonic-gate 					else
12187c478bd9Sstevel@tonic-gate 						anpgs = p_end - abase;
12197c478bd9Sstevel@tonic-gate 					s_ret = kphysm_split_memseg(abase,
12207c478bd9Sstevel@tonic-gate 					    anpgs);
12217c478bd9Sstevel@tonic-gate 					if (s_ret == 0) {
12227c478bd9Sstevel@tonic-gate 						/* Split failed. */
12237c478bd9Sstevel@tonic-gate 						ret = KPHYSM_ERESOURCE;
12247c478bd9Sstevel@tonic-gate 						break;
12257c478bd9Sstevel@tonic-gate 					}
12267c478bd9Sstevel@tonic-gate 					goto restart;
12277c478bd9Sstevel@tonic-gate 				}
12287c478bd9Sstevel@tonic-gate 				pages_checked +=
12297c478bd9Sstevel@tonic-gate 				    seg->pages_end - seg->pages_base;
12307c478bd9Sstevel@tonic-gate 			}
12317c478bd9Sstevel@tonic-gate 			/*
12327c478bd9Sstevel@tonic-gate 			 * The memseg is wholly within the delete span.
12337c478bd9Sstevel@tonic-gate 			 * The individual pages can now be checked.
12347c478bd9Sstevel@tonic-gate 			 */
12357c478bd9Sstevel@tonic-gate 			/* Cage test. */
12367c478bd9Sstevel@tonic-gate 			for (pp = seg->pages; pp < seg->epages; pp++) {
12377c478bd9Sstevel@tonic-gate 				if (PP_ISNORELOC(pp)) {
12387c478bd9Sstevel@tonic-gate 					ret = KPHYSM_ENONRELOC;
12397c478bd9Sstevel@tonic-gate 					break;
12407c478bd9Sstevel@tonic-gate 				}
12417c478bd9Sstevel@tonic-gate 			}
12427c478bd9Sstevel@tonic-gate 			if (ret != KPHYSM_OK) {
12437c478bd9Sstevel@tonic-gate 				break;
12447c478bd9Sstevel@tonic-gate 			}
12457c478bd9Sstevel@tonic-gate 			phys_pages += (seg->pages_end - mseg_start);
12467c478bd9Sstevel@tonic-gate 			vm_pages += MSEG_NPAGES(seg);
12477c478bd9Sstevel@tonic-gate 		}
12487c478bd9Sstevel@tonic-gate 		if (ret != KPHYSM_OK)
12497c478bd9Sstevel@tonic-gate 			break;
12507c478bd9Sstevel@tonic-gate 		if (pages_checked != mdsp->mds_npgs) {
12517c478bd9Sstevel@tonic-gate 			ret = KPHYSM_ENONRELOC;
12527c478bd9Sstevel@tonic-gate 			break;
12537c478bd9Sstevel@tonic-gate 		}
12547c478bd9Sstevel@tonic-gate 	}
12557c478bd9Sstevel@tonic-gate 
12567c478bd9Sstevel@tonic-gate 	if (ret == KPHYSM_OK) {
12577c478bd9Sstevel@tonic-gate 		mhp->mh_phys_pages += phys_pages;
12587c478bd9Sstevel@tonic-gate 		mhp->mh_vm_pages += vm_pages;
12597c478bd9Sstevel@tonic-gate 	} else {
12607c478bd9Sstevel@tonic-gate 		/*
12617c478bd9Sstevel@tonic-gate 		 * Keep holding the mh_mutex to prevent it going away.
12627c478bd9Sstevel@tonic-gate 		 */
12637c478bd9Sstevel@tonic-gate 		delspan_remove(&mhp->mh_transit, base, npgs);
12647c478bd9Sstevel@tonic-gate 	}
12657c478bd9Sstevel@tonic-gate 	mutex_exit(&mhp->mh_mutex);
12667c478bd9Sstevel@tonic-gate 	return (ret);
12677c478bd9Sstevel@tonic-gate }
12687c478bd9Sstevel@tonic-gate 
12697c478bd9Sstevel@tonic-gate int
kphysm_del_span_query(pfn_t base,pgcnt_t npgs,memquery_t * mqp)12707c478bd9Sstevel@tonic-gate kphysm_del_span_query(
12717c478bd9Sstevel@tonic-gate 	pfn_t base,
12727c478bd9Sstevel@tonic-gate 	pgcnt_t npgs,
12737c478bd9Sstevel@tonic-gate 	memquery_t *mqp)
12747c478bd9Sstevel@tonic-gate {
12757c478bd9Sstevel@tonic-gate 	struct memdelspan *mdsp;
12767c478bd9Sstevel@tonic-gate 	struct memdelspan *mdsp_new;
12777c478bd9Sstevel@tonic-gate 	int done_first_nonreloc;
12787c478bd9Sstevel@tonic-gate 
12797c478bd9Sstevel@tonic-gate 	mqp->phys_pages = 0;
12807c478bd9Sstevel@tonic-gate 	mqp->managed = 0;
12817c478bd9Sstevel@tonic-gate 	mqp->nonrelocatable = 0;
12827c478bd9Sstevel@tonic-gate 	mqp->first_nonrelocatable = 0;
12837c478bd9Sstevel@tonic-gate 	mqp->last_nonrelocatable = 0;
12847c478bd9Sstevel@tonic-gate 
12857c478bd9Sstevel@tonic-gate 	mdsp_new = span_to_install(base, npgs);
12867c478bd9Sstevel@tonic-gate 	/*
12877c478bd9Sstevel@tonic-gate 	 * It is OK to proceed here if mdsp_new == NULL.
12887c478bd9Sstevel@tonic-gate 	 */
12897c478bd9Sstevel@tonic-gate 	done_first_nonreloc = 0;
12907c478bd9Sstevel@tonic-gate 	for (mdsp = mdsp_new; mdsp != NULL; mdsp = mdsp->mds_next) {
12917c478bd9Sstevel@tonic-gate 		pfn_t sbase;
12927c478bd9Sstevel@tonic-gate 		pgcnt_t snpgs;
12937c478bd9Sstevel@tonic-gate 
12947c478bd9Sstevel@tonic-gate 		mqp->phys_pages += mdsp->mds_npgs;
12957c478bd9Sstevel@tonic-gate 		sbase = mdsp->mds_base;
12967c478bd9Sstevel@tonic-gate 		snpgs = mdsp->mds_npgs;
12977c478bd9Sstevel@tonic-gate 		while (snpgs != 0) {
12987c478bd9Sstevel@tonic-gate 			struct memseg *lseg, *seg;
12997c478bd9Sstevel@tonic-gate 			pfn_t p_end;
13007c478bd9Sstevel@tonic-gate 			page_t *pp;
13017c478bd9Sstevel@tonic-gate 			pfn_t mseg_start;
13027c478bd9Sstevel@tonic-gate 
13037c478bd9Sstevel@tonic-gate 			p_end = sbase + snpgs;
13047c478bd9Sstevel@tonic-gate 			/*
13057c478bd9Sstevel@tonic-gate 			 * Find the lowest addressed memseg that starts
13067c478bd9Sstevel@tonic-gate 			 * after sbase and account for it.
13077c478bd9Sstevel@tonic-gate 			 * This is to catch dynamic memsegs whose start
13087c478bd9Sstevel@tonic-gate 			 * is hidden.
13097c478bd9Sstevel@tonic-gate 			 */
13107c478bd9Sstevel@tonic-gate 			seg = NULL;
13117c478bd9Sstevel@tonic-gate 			for (lseg = memsegs; lseg != NULL; lseg = lseg->next) {
13127c478bd9Sstevel@tonic-gate 				if ((lseg->pages_base >= sbase) ||
13137c478bd9Sstevel@tonic-gate 				    (lseg->pages_base < p_end &&
13147c478bd9Sstevel@tonic-gate 				    lseg->pages_end > sbase)) {
13157c478bd9Sstevel@tonic-gate 					if (seg == NULL ||
13167c478bd9Sstevel@tonic-gate 					    seg->pages_base > lseg->pages_base)
13177c478bd9Sstevel@tonic-gate 						seg = lseg;
13187c478bd9Sstevel@tonic-gate 				}
13197c478bd9Sstevel@tonic-gate 			}
13207c478bd9Sstevel@tonic-gate 			if (seg != NULL) {
13219853d9e8SJason Beloro 				mseg_start = memseg_get_start(seg);
13227c478bd9Sstevel@tonic-gate 				/*
13237c478bd9Sstevel@tonic-gate 				 * Now have the full extent of the memseg so
13247c478bd9Sstevel@tonic-gate 				 * do the range check.
13257c478bd9Sstevel@tonic-gate 				 */
13267c478bd9Sstevel@tonic-gate 				if (mseg_start >= p_end ||
13277c478bd9Sstevel@tonic-gate 				    seg->pages_end <= sbase) {
13287c478bd9Sstevel@tonic-gate 					/* Span does not overlap memseg. */
13297c478bd9Sstevel@tonic-gate 					seg = NULL;
13307c478bd9Sstevel@tonic-gate 				}
13317c478bd9Sstevel@tonic-gate 			}
13327c478bd9Sstevel@tonic-gate 			/*
13337c478bd9Sstevel@tonic-gate 			 * Account for gap either before the segment if
13347c478bd9Sstevel@tonic-gate 			 * there is one or to the end of the span.
13357c478bd9Sstevel@tonic-gate 			 */
13367c478bd9Sstevel@tonic-gate 			if (seg == NULL || mseg_start > sbase) {
13377c478bd9Sstevel@tonic-gate 				pfn_t a_end;
13387c478bd9Sstevel@tonic-gate 
13397c478bd9Sstevel@tonic-gate 				a_end = (seg == NULL) ? p_end : mseg_start;
13407c478bd9Sstevel@tonic-gate 				/*
13417c478bd9Sstevel@tonic-gate 				 * Check with arch layer for relocatability.
13427c478bd9Sstevel@tonic-gate 				 */
13437c478bd9Sstevel@tonic-gate 				if (arch_kphysm_del_span_ok(sbase,
13447c478bd9Sstevel@tonic-gate 				    (a_end - sbase))) {
13457c478bd9Sstevel@tonic-gate 					/*
13467c478bd9Sstevel@tonic-gate 					 * No non-relocatble pages in this
13477c478bd9Sstevel@tonic-gate 					 * area, avoid the fine-grained
13487c478bd9Sstevel@tonic-gate 					 * test.
13497c478bd9Sstevel@tonic-gate 					 */
13507c478bd9Sstevel@tonic-gate 					snpgs -= (a_end - sbase);
13517c478bd9Sstevel@tonic-gate 					sbase = a_end;
13527c478bd9Sstevel@tonic-gate 				}
13537c478bd9Sstevel@tonic-gate 				while (sbase < a_end) {
13547c478bd9Sstevel@tonic-gate 					if (!arch_kphysm_del_span_ok(sbase,
13557c478bd9Sstevel@tonic-gate 					    1)) {
13567c478bd9Sstevel@tonic-gate 						mqp->nonrelocatable++;
13577c478bd9Sstevel@tonic-gate 						if (!done_first_nonreloc) {
13587c478bd9Sstevel@tonic-gate 							mqp->
13597c478bd9Sstevel@tonic-gate 							    first_nonrelocatable
13607c478bd9Sstevel@tonic-gate 							    = sbase;
13617c478bd9Sstevel@tonic-gate 							done_first_nonreloc = 1;
13627c478bd9Sstevel@tonic-gate 						}
13637c478bd9Sstevel@tonic-gate 						mqp->last_nonrelocatable =
13647c478bd9Sstevel@tonic-gate 						    sbase;
13657c478bd9Sstevel@tonic-gate 					}
13667c478bd9Sstevel@tonic-gate 					sbase++;
13677c478bd9Sstevel@tonic-gate 					snpgs--;
13687c478bd9Sstevel@tonic-gate 				}
13697c478bd9Sstevel@tonic-gate 			}
13707c478bd9Sstevel@tonic-gate 			if (seg != NULL) {
13717c478bd9Sstevel@tonic-gate 				ASSERT(mseg_start <= sbase);
13727c478bd9Sstevel@tonic-gate 				if (seg->pages_base != mseg_start &&
13737c478bd9Sstevel@tonic-gate 				    seg->pages_base > sbase) {
13747c478bd9Sstevel@tonic-gate 					pgcnt_t skip_pgs;
13757c478bd9Sstevel@tonic-gate 
13767c478bd9Sstevel@tonic-gate 					/*
13777c478bd9Sstevel@tonic-gate 					 * Skip the page_t area of a
13787c478bd9Sstevel@tonic-gate 					 * dynamic memseg.
13797c478bd9Sstevel@tonic-gate 					 */
13807c478bd9Sstevel@tonic-gate 					skip_pgs = seg->pages_base - sbase;
13817c478bd9Sstevel@tonic-gate 					if (snpgs <= skip_pgs) {
13827c478bd9Sstevel@tonic-gate 						sbase += snpgs;
13837c478bd9Sstevel@tonic-gate 						snpgs = 0;
13847c478bd9Sstevel@tonic-gate 						continue;
13857c478bd9Sstevel@tonic-gate 					}
13867c478bd9Sstevel@tonic-gate 					snpgs -= skip_pgs;
13877c478bd9Sstevel@tonic-gate 					sbase += skip_pgs;
13887c478bd9Sstevel@tonic-gate 				}
13897c478bd9Sstevel@tonic-gate 				ASSERT(snpgs != 0);
13907c478bd9Sstevel@tonic-gate 				ASSERT(seg->pages_base <= sbase);
13917c478bd9Sstevel@tonic-gate 				/*
13927c478bd9Sstevel@tonic-gate 				 * The individual pages can now be checked.
13937c478bd9Sstevel@tonic-gate 				 */
13947c478bd9Sstevel@tonic-gate 				for (pp = seg->pages +
13957c478bd9Sstevel@tonic-gate 				    (sbase - seg->pages_base);
13967c478bd9Sstevel@tonic-gate 				    snpgs != 0 && pp < seg->epages; pp++) {
13977c478bd9Sstevel@tonic-gate 					mqp->managed++;
13987c478bd9Sstevel@tonic-gate 					if (PP_ISNORELOC(pp)) {
13997c478bd9Sstevel@tonic-gate 						mqp->nonrelocatable++;
14007c478bd9Sstevel@tonic-gate 						if (!done_first_nonreloc) {
14017c478bd9Sstevel@tonic-gate 							mqp->
14027c478bd9Sstevel@tonic-gate 							    first_nonrelocatable
14037c478bd9Sstevel@tonic-gate 							    = sbase;
14047c478bd9Sstevel@tonic-gate 							done_first_nonreloc = 1;
14057c478bd9Sstevel@tonic-gate 						}
14067c478bd9Sstevel@tonic-gate 						mqp->last_nonrelocatable =
14077c478bd9Sstevel@tonic-gate 						    sbase;
14087c478bd9Sstevel@tonic-gate 					}
14097c478bd9Sstevel@tonic-gate 					sbase++;
14107c478bd9Sstevel@tonic-gate 					snpgs--;
14117c478bd9Sstevel@tonic-gate 				}
14127c478bd9Sstevel@tonic-gate 			}
14137c478bd9Sstevel@tonic-gate 		}
14147c478bd9Sstevel@tonic-gate 	}
14157c478bd9Sstevel@tonic-gate 
14167c478bd9Sstevel@tonic-gate 	free_delspans(mdsp_new);
14177c478bd9Sstevel@tonic-gate 
14187c478bd9Sstevel@tonic-gate 	return (KPHYSM_OK);
14197c478bd9Sstevel@tonic-gate }
14207c478bd9Sstevel@tonic-gate 
14217c478bd9Sstevel@tonic-gate /*
14227c478bd9Sstevel@tonic-gate  * This release function can be called at any stage as follows:
14237c478bd9Sstevel@tonic-gate  *	_gethandle only called
14247c478bd9Sstevel@tonic-gate  *	_span(s) only called
14257c478bd9Sstevel@tonic-gate  *	_start called but failed
14267c478bd9Sstevel@tonic-gate  *	delete thread exited
14277c478bd9Sstevel@tonic-gate  */
14287c478bd9Sstevel@tonic-gate int
kphysm_del_release(memhandle_t handle)14297c478bd9Sstevel@tonic-gate kphysm_del_release(memhandle_t handle)
14307c478bd9Sstevel@tonic-gate {
14317c478bd9Sstevel@tonic-gate 	struct mem_handle *mhp;
14327c478bd9Sstevel@tonic-gate 
14337c478bd9Sstevel@tonic-gate 	mhp = kphysm_lookup_mem_handle(handle);
14347c478bd9Sstevel@tonic-gate 	if (mhp == NULL) {
14357c478bd9Sstevel@tonic-gate 		return (KPHYSM_EHANDLE);
14367c478bd9Sstevel@tonic-gate 	}
14377c478bd9Sstevel@tonic-gate 	switch (mhp->mh_state) {
14387c478bd9Sstevel@tonic-gate 	case MHND_STARTING:
14397c478bd9Sstevel@tonic-gate 	case MHND_RUNNING:
14407c478bd9Sstevel@tonic-gate 		mutex_exit(&mhp->mh_mutex);
14417c478bd9Sstevel@tonic-gate 		return (KPHYSM_ENOTFINISHED);
14427c478bd9Sstevel@tonic-gate 	case MHND_FREE:
14437c478bd9Sstevel@tonic-gate 		ASSERT(mhp->mh_state != MHND_FREE);
14447c478bd9Sstevel@tonic-gate 		mutex_exit(&mhp->mh_mutex);
14457c478bd9Sstevel@tonic-gate 		return (KPHYSM_EHANDLE);
14467c478bd9Sstevel@tonic-gate 	case MHND_INIT:
14477c478bd9Sstevel@tonic-gate 		break;
14487c478bd9Sstevel@tonic-gate 	case MHND_DONE:
14497c478bd9Sstevel@tonic-gate 		break;
14507c478bd9Sstevel@tonic-gate 	case MHND_RELEASE:
14517c478bd9Sstevel@tonic-gate 		mutex_exit(&mhp->mh_mutex);
14527c478bd9Sstevel@tonic-gate 		return (KPHYSM_ESEQUENCE);
14537c478bd9Sstevel@tonic-gate 	default:
14547c478bd9Sstevel@tonic-gate #ifdef DEBUG
14557c478bd9Sstevel@tonic-gate 		cmn_err(CE_WARN, "kphysm_del_release(0x%p) state corrupt %d",
14567c478bd9Sstevel@tonic-gate 		    (void *)mhp, mhp->mh_state);
14577c478bd9Sstevel@tonic-gate #endif /* DEBUG */
14587c478bd9Sstevel@tonic-gate 		mutex_exit(&mhp->mh_mutex);
14597c478bd9Sstevel@tonic-gate 		return (KPHYSM_EHANDLE);
14607c478bd9Sstevel@tonic-gate 	}
14617c478bd9Sstevel@tonic-gate 	/*
14627c478bd9Sstevel@tonic-gate 	 * Set state so that we can wait if necessary.
14637c478bd9Sstevel@tonic-gate 	 * Also this means that we have read/write access to all
14647c478bd9Sstevel@tonic-gate 	 * fields except mh_exthandle and mh_state.
14657c478bd9Sstevel@tonic-gate 	 */
14667c478bd9Sstevel@tonic-gate 	mhp->mh_state = MHND_RELEASE;
14677c478bd9Sstevel@tonic-gate 	/*
14687c478bd9Sstevel@tonic-gate 	 * The mem_handle cannot be de-allocated by any other operation
14697c478bd9Sstevel@tonic-gate 	 * now, so no need to hold mh_mutex.
14707c478bd9Sstevel@tonic-gate 	 */
14717c478bd9Sstevel@tonic-gate 	mutex_exit(&mhp->mh_mutex);
14727c478bd9Sstevel@tonic-gate 
14737c478bd9Sstevel@tonic-gate 	delspan_remove(&mhp->mh_transit, 0, 0);
14747c478bd9Sstevel@tonic-gate 	mhp->mh_phys_pages = 0;
14757c478bd9Sstevel@tonic-gate 	mhp->mh_vm_pages = 0;
14767c478bd9Sstevel@tonic-gate 	mhp->mh_hold_todo = 0;
14777c478bd9Sstevel@tonic-gate 	mhp->mh_delete_complete = NULL;
14787c478bd9Sstevel@tonic-gate 	mhp->mh_delete_complete_arg = NULL;
14797c478bd9Sstevel@tonic-gate 	mhp->mh_cancel = 0;
14807c478bd9Sstevel@tonic-gate 
14817c478bd9Sstevel@tonic-gate 	mutex_enter(&mhp->mh_mutex);
14827c478bd9Sstevel@tonic-gate 	ASSERT(mhp->mh_state == MHND_RELEASE);
14837c478bd9Sstevel@tonic-gate 	mhp->mh_state = MHND_FREE;
14847c478bd9Sstevel@tonic-gate 
14857c478bd9Sstevel@tonic-gate 	kphysm_free_mem_handle(mhp);
14867c478bd9Sstevel@tonic-gate 
14877c478bd9Sstevel@tonic-gate 	return (KPHYSM_OK);
14887c478bd9Sstevel@tonic-gate }
14897c478bd9Sstevel@tonic-gate 
14907c478bd9Sstevel@tonic-gate /*
14917c478bd9Sstevel@tonic-gate  * This cancel function can only be called with the thread running.
14927c478bd9Sstevel@tonic-gate  */
14937c478bd9Sstevel@tonic-gate int
kphysm_del_cancel(memhandle_t handle)14947c478bd9Sstevel@tonic-gate kphysm_del_cancel(memhandle_t handle)
14957c478bd9Sstevel@tonic-gate {
14967c478bd9Sstevel@tonic-gate 	struct mem_handle *mhp;
14977c478bd9Sstevel@tonic-gate 
14987c478bd9Sstevel@tonic-gate 	mhp = kphysm_lookup_mem_handle(handle);
14997c478bd9Sstevel@tonic-gate 	if (mhp == NULL) {
15007c478bd9Sstevel@tonic-gate 		return (KPHYSM_EHANDLE);
15017c478bd9Sstevel@tonic-gate 	}
15027c478bd9Sstevel@tonic-gate 	if (mhp->mh_state != MHND_STARTING && mhp->mh_state != MHND_RUNNING) {
15037c478bd9Sstevel@tonic-gate 		mutex_exit(&mhp->mh_mutex);
15047c478bd9Sstevel@tonic-gate 		return (KPHYSM_ENOTRUNNING);
15057c478bd9Sstevel@tonic-gate 	}
15067c478bd9Sstevel@tonic-gate 	/*
15077c478bd9Sstevel@tonic-gate 	 * Set the cancel flag and wake the delete thread up.
15087c478bd9Sstevel@tonic-gate 	 * The thread may be waiting on I/O, so the effect of the cancel
15097c478bd9Sstevel@tonic-gate 	 * may be delayed.
15107c478bd9Sstevel@tonic-gate 	 */
15117c478bd9Sstevel@tonic-gate 	if (mhp->mh_cancel == 0) {
15127c478bd9Sstevel@tonic-gate 		mhp->mh_cancel = KPHYSM_ECANCELLED;
15137c478bd9Sstevel@tonic-gate 		cv_signal(&mhp->mh_cv);
15147c478bd9Sstevel@tonic-gate 	}
15157c478bd9Sstevel@tonic-gate 	mutex_exit(&mhp->mh_mutex);
15167c478bd9Sstevel@tonic-gate 	return (KPHYSM_OK);
15177c478bd9Sstevel@tonic-gate }
15187c478bd9Sstevel@tonic-gate 
15197c478bd9Sstevel@tonic-gate int
kphysm_del_status(memhandle_t handle,memdelstat_t * mdstp)15207c478bd9Sstevel@tonic-gate kphysm_del_status(
15217c478bd9Sstevel@tonic-gate 	memhandle_t handle,
15227c478bd9Sstevel@tonic-gate 	memdelstat_t *mdstp)
15237c478bd9Sstevel@tonic-gate {
15247c478bd9Sstevel@tonic-gate 	struct mem_handle *mhp;
15257c478bd9Sstevel@tonic-gate 
15267c478bd9Sstevel@tonic-gate 	mhp = kphysm_lookup_mem_handle(handle);
15277c478bd9Sstevel@tonic-gate 	if (mhp == NULL) {
15287c478bd9Sstevel@tonic-gate 		return (KPHYSM_EHANDLE);
15297c478bd9Sstevel@tonic-gate 	}
15307c478bd9Sstevel@tonic-gate 	/*
15317c478bd9Sstevel@tonic-gate 	 * Calling kphysm_del_status() is allowed before the delete
15327c478bd9Sstevel@tonic-gate 	 * is started to allow for status display.
15337c478bd9Sstevel@tonic-gate 	 */
15347c478bd9Sstevel@tonic-gate 	if (mhp->mh_state != MHND_INIT && mhp->mh_state != MHND_STARTING &&
15357c478bd9Sstevel@tonic-gate 	    mhp->mh_state != MHND_RUNNING) {
15367c478bd9Sstevel@tonic-gate 		mutex_exit(&mhp->mh_mutex);
15377c478bd9Sstevel@tonic-gate 		return (KPHYSM_ENOTRUNNING);
15387c478bd9Sstevel@tonic-gate 	}
15397c478bd9Sstevel@tonic-gate 	mdstp->phys_pages = mhp->mh_phys_pages;
15407c478bd9Sstevel@tonic-gate 	mdstp->managed = mhp->mh_vm_pages;
15417c478bd9Sstevel@tonic-gate 	mdstp->collected = mhp->mh_vm_pages - mhp->mh_hold_todo;
15427c478bd9Sstevel@tonic-gate 	mutex_exit(&mhp->mh_mutex);
15437c478bd9Sstevel@tonic-gate 	return (KPHYSM_OK);
15447c478bd9Sstevel@tonic-gate }
15457c478bd9Sstevel@tonic-gate 
15467c478bd9Sstevel@tonic-gate static int mem_delete_additional_pages = 100;
15477c478bd9Sstevel@tonic-gate 
15487c478bd9Sstevel@tonic-gate static int
can_remove_pgs(pgcnt_t npgs)15497c478bd9Sstevel@tonic-gate can_remove_pgs(pgcnt_t npgs)
15507c478bd9Sstevel@tonic-gate {
15517c478bd9Sstevel@tonic-gate 	/*
15527c478bd9Sstevel@tonic-gate 	 * If all pageable pages were paged out, freemem would
15537c478bd9Sstevel@tonic-gate 	 * equal availrmem.  There is a minimum requirement for
15547c478bd9Sstevel@tonic-gate 	 * availrmem.
15557c478bd9Sstevel@tonic-gate 	 */
15567c478bd9Sstevel@tonic-gate 	if ((availrmem - (tune.t_minarmem + mem_delete_additional_pages))
15577c478bd9Sstevel@tonic-gate 	    < npgs)
15587c478bd9Sstevel@tonic-gate 		return (0);
15597c478bd9Sstevel@tonic-gate 	/* TODO: check swap space, etc. */
15607c478bd9Sstevel@tonic-gate 	return (1);
15617c478bd9Sstevel@tonic-gate }
15627c478bd9Sstevel@tonic-gate 
15637c478bd9Sstevel@tonic-gate static int
get_availrmem(pgcnt_t npgs)15647c478bd9Sstevel@tonic-gate get_availrmem(pgcnt_t npgs)
15657c478bd9Sstevel@tonic-gate {
15667c478bd9Sstevel@tonic-gate 	int ret;
15677c478bd9Sstevel@tonic-gate 
15687c478bd9Sstevel@tonic-gate 	mutex_enter(&freemem_lock);
15697c478bd9Sstevel@tonic-gate 	ret = can_remove_pgs(npgs);
15707c478bd9Sstevel@tonic-gate 	if (ret != 0)
15717c478bd9Sstevel@tonic-gate 		availrmem -= npgs;
15727c478bd9Sstevel@tonic-gate 	mutex_exit(&freemem_lock);
15737c478bd9Sstevel@tonic-gate 	return (ret);
15747c478bd9Sstevel@tonic-gate }
15757c478bd9Sstevel@tonic-gate 
15767c478bd9Sstevel@tonic-gate static void
put_availrmem(pgcnt_t npgs)15777c478bd9Sstevel@tonic-gate put_availrmem(pgcnt_t npgs)
15787c478bd9Sstevel@tonic-gate {
15797c478bd9Sstevel@tonic-gate 	mutex_enter(&freemem_lock);
15807c478bd9Sstevel@tonic-gate 	availrmem += npgs;
15817c478bd9Sstevel@tonic-gate 	mutex_exit(&freemem_lock);
15827c478bd9Sstevel@tonic-gate }
15837c478bd9Sstevel@tonic-gate 
15847c478bd9Sstevel@tonic-gate #define	FREEMEM_INCR	100
15857c478bd9Sstevel@tonic-gate static pgcnt_t freemem_incr = FREEMEM_INCR;
15867c478bd9Sstevel@tonic-gate #define	DEL_FREE_WAIT_FRAC	4
15877c478bd9Sstevel@tonic-gate #define	DEL_FREE_WAIT_TICKS	((hz+DEL_FREE_WAIT_FRAC-1)/DEL_FREE_WAIT_FRAC)
15887c478bd9Sstevel@tonic-gate 
15897c478bd9Sstevel@tonic-gate #define	DEL_BUSY_WAIT_FRAC	20
15907c478bd9Sstevel@tonic-gate #define	DEL_BUSY_WAIT_TICKS	((hz+DEL_BUSY_WAIT_FRAC-1)/DEL_BUSY_WAIT_FRAC)
15917c478bd9Sstevel@tonic-gate 
15927c478bd9Sstevel@tonic-gate static void kphysm_del_cleanup(struct mem_handle *);
15937c478bd9Sstevel@tonic-gate 
15947c478bd9Sstevel@tonic-gate static void page_delete_collect(page_t *, struct mem_handle *);
15957c478bd9Sstevel@tonic-gate 
15967c478bd9Sstevel@tonic-gate static pgcnt_t
delthr_get_freemem(struct mem_handle * mhp)15977c478bd9Sstevel@tonic-gate delthr_get_freemem(struct mem_handle *mhp)
15987c478bd9Sstevel@tonic-gate {
15997c478bd9Sstevel@tonic-gate 	pgcnt_t free_get;
16007c478bd9Sstevel@tonic-gate 	int ret;
16017c478bd9Sstevel@tonic-gate 
16027c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&mhp->mh_mutex));
16037c478bd9Sstevel@tonic-gate 
16047c478bd9Sstevel@tonic-gate 	MDSTAT_INCR(mhp, need_free);
16057c478bd9Sstevel@tonic-gate 	/*
16067c478bd9Sstevel@tonic-gate 	 * Get up to freemem_incr pages.
16077c478bd9Sstevel@tonic-gate 	 */
16087c478bd9Sstevel@tonic-gate 	free_get = freemem_incr;
16097c478bd9Sstevel@tonic-gate 	if (free_get > mhp->mh_hold_todo)
16107c478bd9Sstevel@tonic-gate 		free_get = mhp->mh_hold_todo;
16117c478bd9Sstevel@tonic-gate 	/*
16127c478bd9Sstevel@tonic-gate 	 * Take free_get pages away from freemem,
16137c478bd9Sstevel@tonic-gate 	 * waiting if necessary.
16147c478bd9Sstevel@tonic-gate 	 */
16157c478bd9Sstevel@tonic-gate 
16167c478bd9Sstevel@tonic-gate 	while (!mhp->mh_cancel) {
16177c478bd9Sstevel@tonic-gate 		mutex_exit(&mhp->mh_mutex);
16187c478bd9Sstevel@tonic-gate 		MDSTAT_INCR(mhp, free_loop);
16197c478bd9Sstevel@tonic-gate 		/*
16207c478bd9Sstevel@tonic-gate 		 * Duplicate test from page_create_throttle()
16217c478bd9Sstevel@tonic-gate 		 * but don't override with !PG_WAIT.
16227c478bd9Sstevel@tonic-gate 		 */
16237c478bd9Sstevel@tonic-gate 		if (freemem < (free_get + throttlefree)) {
16247c478bd9Sstevel@tonic-gate 			MDSTAT_INCR(mhp, free_low);
16257c478bd9Sstevel@tonic-gate 			ret = 0;
16267c478bd9Sstevel@tonic-gate 		} else {
16277c478bd9Sstevel@tonic-gate 			ret = page_create_wait(free_get, 0);
16287c478bd9Sstevel@tonic-gate 			if (ret == 0) {
16297c478bd9Sstevel@tonic-gate 				/* EMPTY */
16307c478bd9Sstevel@tonic-gate 				MDSTAT_INCR(mhp, free_failed);
16317c478bd9Sstevel@tonic-gate 			}
16327c478bd9Sstevel@tonic-gate 		}
16337c478bd9Sstevel@tonic-gate 		if (ret != 0) {
16347c478bd9Sstevel@tonic-gate 			mutex_enter(&mhp->mh_mutex);
16357c478bd9Sstevel@tonic-gate 			return (free_get);
16367c478bd9Sstevel@tonic-gate 		}
16377c478bd9Sstevel@tonic-gate 
16387c478bd9Sstevel@tonic-gate 		/*
16397c478bd9Sstevel@tonic-gate 		 * Put pressure on pageout.
16407c478bd9Sstevel@tonic-gate 		 */
16417c478bd9Sstevel@tonic-gate 		page_needfree(free_get);
1642*338664dfSAndy Fiddaman 		WAKE_PAGEOUT_SCANNER(delthr);
16437c478bd9Sstevel@tonic-gate 
16447c478bd9Sstevel@tonic-gate 		mutex_enter(&mhp->mh_mutex);
1645d3d50737SRafael Vanoni 		(void) cv_reltimedwait(&mhp->mh_cv, &mhp->mh_mutex,
1646d3d50737SRafael Vanoni 		    DEL_FREE_WAIT_TICKS, TR_CLOCK_TICK);
16477c478bd9Sstevel@tonic-gate 		mutex_exit(&mhp->mh_mutex);
16487c478bd9Sstevel@tonic-gate 		page_needfree(-(spgcnt_t)free_get);
16497c478bd9Sstevel@tonic-gate 
16507c478bd9Sstevel@tonic-gate 		mutex_enter(&mhp->mh_mutex);
16517c478bd9Sstevel@tonic-gate 	}
16527c478bd9Sstevel@tonic-gate 	return (0);
16537c478bd9Sstevel@tonic-gate }
16547c478bd9Sstevel@tonic-gate 
16557c478bd9Sstevel@tonic-gate #define	DR_AIO_CLEANUP_DELAY	25000	/* 0.025secs, in usec */
16567c478bd9Sstevel@tonic-gate #define	DR_AIO_CLEANUP_MAXLOOPS_NODELAY	100
16577c478bd9Sstevel@tonic-gate /*
16587c478bd9Sstevel@tonic-gate  * This function is run as a helper thread for delete_memory_thread.
16597c478bd9Sstevel@tonic-gate  * It is needed in order to force kaio cleanup, so that pages used in kaio
16607c478bd9Sstevel@tonic-gate  * will be unlocked and subsequently relocated by delete_memory_thread.
16617c478bd9Sstevel@tonic-gate  * The address of the delete_memory_threads's mem_handle is passed in to
16627c478bd9Sstevel@tonic-gate  * this thread function, and is used to set the mh_aio_cleanup_done member
16637c478bd9Sstevel@tonic-gate  * prior to calling thread_exit().
16647c478bd9Sstevel@tonic-gate  */
16657c478bd9Sstevel@tonic-gate static void
dr_aio_cleanup_thread(caddr_t amhp)16667c478bd9Sstevel@tonic-gate dr_aio_cleanup_thread(caddr_t amhp)
16677c478bd9Sstevel@tonic-gate {
16687c478bd9Sstevel@tonic-gate 	proc_t *procp;
16697c478bd9Sstevel@tonic-gate 	int (*aio_cleanup_dr_delete_memory)(proc_t *);
16707c478bd9Sstevel@tonic-gate 	int cleaned;
16717c478bd9Sstevel@tonic-gate 	int n = 0;
16727c478bd9Sstevel@tonic-gate 	struct mem_handle *mhp;
16737c478bd9Sstevel@tonic-gate 	volatile uint_t *pcancel;
16747c478bd9Sstevel@tonic-gate 
16757c478bd9Sstevel@tonic-gate 	mhp = (struct mem_handle *)amhp;
16767c478bd9Sstevel@tonic-gate 	ASSERT(mhp != NULL);
16777c478bd9Sstevel@tonic-gate 	pcancel = &mhp->mh_dr_aio_cleanup_cancel;
16787c478bd9Sstevel@tonic-gate 	if (modload("sys", "kaio") == -1) {
16797c478bd9Sstevel@tonic-gate 		mhp->mh_aio_cleanup_done = 1;
16807c478bd9Sstevel@tonic-gate 		cmn_err(CE_WARN, "dr_aio_cleanup_thread: cannot load kaio");
16817c478bd9Sstevel@tonic-gate 		thread_exit();
16827c478bd9Sstevel@tonic-gate 	}
16837c478bd9Sstevel@tonic-gate 	aio_cleanup_dr_delete_memory = (int (*)(proc_t *))
16847c478bd9Sstevel@tonic-gate 	    modgetsymvalue("aio_cleanup_dr_delete_memory", 0);
16857c478bd9Sstevel@tonic-gate 	if (aio_cleanup_dr_delete_memory == NULL) {
16867c478bd9Sstevel@tonic-gate 		mhp->mh_aio_cleanup_done = 1;
16877c478bd9Sstevel@tonic-gate 		cmn_err(CE_WARN,
16887c478bd9Sstevel@tonic-gate 	    "aio_cleanup_dr_delete_memory not found in kaio");
16897c478bd9Sstevel@tonic-gate 		thread_exit();
16907c478bd9Sstevel@tonic-gate 	}
16917c478bd9Sstevel@tonic-gate 	do {
16927c478bd9Sstevel@tonic-gate 		cleaned = 0;
16937c478bd9Sstevel@tonic-gate 		mutex_enter(&pidlock);
16947c478bd9Sstevel@tonic-gate 		for (procp = practive; (*pcancel == 0) && (procp != NULL);
16957c478bd9Sstevel@tonic-gate 		    procp = procp->p_next) {
16967c478bd9Sstevel@tonic-gate 			mutex_enter(&procp->p_lock);
16977c478bd9Sstevel@tonic-gate 			if (procp->p_aio != NULL) {
16987c478bd9Sstevel@tonic-gate 				/* cleanup proc's outstanding kaio */
16997c478bd9Sstevel@tonic-gate 				cleaned +=
17007c478bd9Sstevel@tonic-gate 				    (*aio_cleanup_dr_delete_memory)(procp);
17017c478bd9Sstevel@tonic-gate 			}
17027c478bd9Sstevel@tonic-gate 			mutex_exit(&procp->p_lock);
17037c478bd9Sstevel@tonic-gate 		}
17047c478bd9Sstevel@tonic-gate 		mutex_exit(&pidlock);
17057c478bd9Sstevel@tonic-gate 		if ((*pcancel == 0) &&
17067c478bd9Sstevel@tonic-gate 		    (!cleaned || (++n == DR_AIO_CLEANUP_MAXLOOPS_NODELAY))) {
17077c478bd9Sstevel@tonic-gate 			/* delay a bit before retrying all procs again */
17087c478bd9Sstevel@tonic-gate 			delay(drv_usectohz(DR_AIO_CLEANUP_DELAY));
17097c478bd9Sstevel@tonic-gate 			n = 0;
17107c478bd9Sstevel@tonic-gate 		}
17117c478bd9Sstevel@tonic-gate 	} while (*pcancel == 0);
17127c478bd9Sstevel@tonic-gate 	mhp->mh_aio_cleanup_done = 1;
17137c478bd9Sstevel@tonic-gate 	thread_exit();
17147c478bd9Sstevel@tonic-gate }
17157c478bd9Sstevel@tonic-gate 
17167c478bd9Sstevel@tonic-gate static void
delete_memory_thread(caddr_t amhp)17177c478bd9Sstevel@tonic-gate delete_memory_thread(caddr_t amhp)
17187c478bd9Sstevel@tonic-gate {
17197c478bd9Sstevel@tonic-gate 	struct mem_handle *mhp;
17207c478bd9Sstevel@tonic-gate 	struct memdelspan *mdsp;
17217c478bd9Sstevel@tonic-gate 	callb_cpr_t cprinfo;
17227c478bd9Sstevel@tonic-gate 	page_t *pp_targ;
17237c478bd9Sstevel@tonic-gate 	spgcnt_t freemem_left;
17247c478bd9Sstevel@tonic-gate 	void (*del_complete_funcp)(void *, int error);
17257c478bd9Sstevel@tonic-gate 	void *del_complete_arg;
17267c478bd9Sstevel@tonic-gate 	int comp_code;
17277c478bd9Sstevel@tonic-gate 	int ret;
17287c478bd9Sstevel@tonic-gate 	int first_scan;
17297c478bd9Sstevel@tonic-gate 	uint_t szc;
17307c478bd9Sstevel@tonic-gate #ifdef MEM_DEL_STATS
17317c478bd9Sstevel@tonic-gate 	uint64_t start_total, ntick_total;
17327c478bd9Sstevel@tonic-gate 	uint64_t start_pgrp, ntick_pgrp;
17337c478bd9Sstevel@tonic-gate #endif /* MEM_DEL_STATS */
17347c478bd9Sstevel@tonic-gate 
17357c478bd9Sstevel@tonic-gate 	mhp = (struct mem_handle *)amhp;
17367c478bd9Sstevel@tonic-gate 
17377c478bd9Sstevel@tonic-gate #ifdef MEM_DEL_STATS
17387c478bd9Sstevel@tonic-gate 	start_total = ddi_get_lbolt();
17397c478bd9Sstevel@tonic-gate #endif /* MEM_DEL_STATS */
17407c478bd9Sstevel@tonic-gate 
17417c478bd9Sstevel@tonic-gate 	CALLB_CPR_INIT(&cprinfo, &mhp->mh_mutex,
17427c478bd9Sstevel@tonic-gate 	    callb_generic_cpr, "memdel");
17437c478bd9Sstevel@tonic-gate 
17447c478bd9Sstevel@tonic-gate 	mutex_enter(&mhp->mh_mutex);
17457c478bd9Sstevel@tonic-gate 	ASSERT(mhp->mh_state == MHND_STARTING);
17467c478bd9Sstevel@tonic-gate 
17477c478bd9Sstevel@tonic-gate 	mhp->mh_state = MHND_RUNNING;
17487c478bd9Sstevel@tonic-gate 	mhp->mh_thread_id = curthread;
17497c478bd9Sstevel@tonic-gate 
17507c478bd9Sstevel@tonic-gate 	mhp->mh_hold_todo = mhp->mh_vm_pages;
17517c478bd9Sstevel@tonic-gate 	mutex_exit(&mhp->mh_mutex);
17527c478bd9Sstevel@tonic-gate 
17537c478bd9Sstevel@tonic-gate 	/* Allocate the remap pages now, if necessary. */
17547c478bd9Sstevel@tonic-gate 	memseg_remap_init();
17557c478bd9Sstevel@tonic-gate 
17567c478bd9Sstevel@tonic-gate 	/*
17577c478bd9Sstevel@tonic-gate 	 * Subtract from availrmem now if possible as availrmem
17587c478bd9Sstevel@tonic-gate 	 * may not be available by the end of the delete.
17597c478bd9Sstevel@tonic-gate 	 */
17607c478bd9Sstevel@tonic-gate 	if (!get_availrmem(mhp->mh_vm_pages)) {
17617c478bd9Sstevel@tonic-gate 		comp_code = KPHYSM_ENOTVIABLE;
17627c478bd9Sstevel@tonic-gate 		mutex_enter(&mhp->mh_mutex);
17637c478bd9Sstevel@tonic-gate 		goto early_exit;
17647c478bd9Sstevel@tonic-gate 	}
17657c478bd9Sstevel@tonic-gate 
17667c478bd9Sstevel@tonic-gate 	ret = kphysm_setup_pre_del(mhp->mh_vm_pages);
17677c478bd9Sstevel@tonic-gate 
17687c478bd9Sstevel@tonic-gate 	mutex_enter(&mhp->mh_mutex);
17697c478bd9Sstevel@tonic-gate 
17707c478bd9Sstevel@tonic-gate 	if (ret != 0) {
17717c478bd9Sstevel@tonic-gate 		mhp->mh_cancel = KPHYSM_EREFUSED;
17727c478bd9Sstevel@tonic-gate 		goto refused;
17737c478bd9Sstevel@tonic-gate 	}
17747c478bd9Sstevel@tonic-gate 
17757c478bd9Sstevel@tonic-gate 	transit_list_collect(mhp, 1);
17767c478bd9Sstevel@tonic-gate 
17777c478bd9Sstevel@tonic-gate 	for (mdsp = mhp->mh_transit.trl_spans; mdsp != NULL;
17787c478bd9Sstevel@tonic-gate 	    mdsp = mdsp->mds_next) {
17797c478bd9Sstevel@tonic-gate 		ASSERT(mdsp->mds_bitmap == NULL);
17807c478bd9Sstevel@tonic-gate 		mdsp->mds_bitmap = kmem_zalloc(MDS_BITMAPBYTES(mdsp), KM_SLEEP);
17817c478bd9Sstevel@tonic-gate 		mdsp->mds_bitmap_retired = kmem_zalloc(MDS_BITMAPBYTES(mdsp),
178273347c69Smb 		    KM_SLEEP);
17837c478bd9Sstevel@tonic-gate 	}
17847c478bd9Sstevel@tonic-gate 
17857c478bd9Sstevel@tonic-gate 	first_scan = 1;
17867c478bd9Sstevel@tonic-gate 	freemem_left = 0;
17877c478bd9Sstevel@tonic-gate 	/*
17887c478bd9Sstevel@tonic-gate 	 * Start dr_aio_cleanup_thread, which periodically iterates
17897c478bd9Sstevel@tonic-gate 	 * through the process list and invokes aio cleanup.  This
17907c478bd9Sstevel@tonic-gate 	 * is needed in order to avoid a deadly embrace between the
17917c478bd9Sstevel@tonic-gate 	 * delete_memory_thread (waiting on writer lock for page, with the
17927c478bd9Sstevel@tonic-gate 	 * exclusive-wanted bit set), kaio read request threads (waiting for a
17937c478bd9Sstevel@tonic-gate 	 * reader lock on the same page that is wanted by the
17947c478bd9Sstevel@tonic-gate 	 * delete_memory_thread), and threads waiting for kaio completion
17957c478bd9Sstevel@tonic-gate 	 * (blocked on spt_amp->lock).
17967c478bd9Sstevel@tonic-gate 	 */
17977c478bd9Sstevel@tonic-gate 	mhp->mh_dr_aio_cleanup_cancel = 0;
17987c478bd9Sstevel@tonic-gate 	mhp->mh_aio_cleanup_done = 0;
17997c478bd9Sstevel@tonic-gate 	(void) thread_create(NULL, 0, dr_aio_cleanup_thread,
18007c478bd9Sstevel@tonic-gate 	    (caddr_t)mhp, 0, &p0, TS_RUN, maxclsyspri - 1);
18017c478bd9Sstevel@tonic-gate 	while ((mhp->mh_hold_todo != 0) && (mhp->mh_cancel == 0)) {
18027c478bd9Sstevel@tonic-gate 		pgcnt_t collected;
18037c478bd9Sstevel@tonic-gate 
18047c478bd9Sstevel@tonic-gate 		MDSTAT_INCR(mhp, nloop);
18057c478bd9Sstevel@tonic-gate 		collected = 0;
18067c478bd9Sstevel@tonic-gate 		for (mdsp = mhp->mh_transit.trl_spans; (mdsp != NULL) &&
18077c478bd9Sstevel@tonic-gate 		    (mhp->mh_cancel == 0); mdsp = mdsp->mds_next) {
18087c478bd9Sstevel@tonic-gate 			pfn_t pfn, p_end;
18097c478bd9Sstevel@tonic-gate 
18107c478bd9Sstevel@tonic-gate 			p_end = mdsp->mds_base + mdsp->mds_npgs;
18117c478bd9Sstevel@tonic-gate 			for (pfn = mdsp->mds_base; (pfn < p_end) &&
18127c478bd9Sstevel@tonic-gate 			    (mhp->mh_cancel == 0); pfn++) {
18137c478bd9Sstevel@tonic-gate 				page_t *pp, *tpp, *tpp_targ;
18147c478bd9Sstevel@tonic-gate 				pgcnt_t bit;
18157c478bd9Sstevel@tonic-gate 				struct vnode *vp;
18167c478bd9Sstevel@tonic-gate 				u_offset_t offset;
18177c478bd9Sstevel@tonic-gate 				int mod, result;
18187c478bd9Sstevel@tonic-gate 				spgcnt_t pgcnt;
18197c478bd9Sstevel@tonic-gate 
18207c478bd9Sstevel@tonic-gate 				bit = pfn - mdsp->mds_base;
18217c478bd9Sstevel@tonic-gate 				if ((mdsp->mds_bitmap[bit / NBPBMW] &
18227c478bd9Sstevel@tonic-gate 				    (1 << (bit % NBPBMW))) != 0) {
18237c478bd9Sstevel@tonic-gate 					MDSTAT_INCR(mhp, already_done);
18247c478bd9Sstevel@tonic-gate 					continue;
18257c478bd9Sstevel@tonic-gate 				}
18267c478bd9Sstevel@tonic-gate 				if (freemem_left == 0) {
18277c478bd9Sstevel@tonic-gate 					freemem_left += delthr_get_freemem(mhp);
18287c478bd9Sstevel@tonic-gate 					if (freemem_left == 0)
18297c478bd9Sstevel@tonic-gate 						break;
18307c478bd9Sstevel@tonic-gate 				}
18317c478bd9Sstevel@tonic-gate 
18327c478bd9Sstevel@tonic-gate 				/*
18337c478bd9Sstevel@tonic-gate 				 * Release mh_mutex - some of this
18347c478bd9Sstevel@tonic-gate 				 * stuff takes some time (eg PUTPAGE).
18357c478bd9Sstevel@tonic-gate 				 */
18367c478bd9Sstevel@tonic-gate 
18377c478bd9Sstevel@tonic-gate 				mutex_exit(&mhp->mh_mutex);
18387c478bd9Sstevel@tonic-gate 				MDSTAT_INCR(mhp, ncheck);
18397c478bd9Sstevel@tonic-gate 
18407c478bd9Sstevel@tonic-gate 				pp = page_numtopp_nolock(pfn);
18417c478bd9Sstevel@tonic-gate 				if (pp == NULL) {
18427c478bd9Sstevel@tonic-gate 					/*
18437c478bd9Sstevel@tonic-gate 					 * Not covered by a page_t - will
18447c478bd9Sstevel@tonic-gate 					 * be dealt with elsewhere.
18457c478bd9Sstevel@tonic-gate 					 */
18467c478bd9Sstevel@tonic-gate 					MDSTAT_INCR(mhp, nopaget);
18477c478bd9Sstevel@tonic-gate 					mutex_enter(&mhp->mh_mutex);
18487c478bd9Sstevel@tonic-gate 					mdsp->mds_bitmap[bit / NBPBMW] |=
18497c478bd9Sstevel@tonic-gate 					    (1 << (bit % NBPBMW));
18507c478bd9Sstevel@tonic-gate 					continue;
18517c478bd9Sstevel@tonic-gate 				}
18527c478bd9Sstevel@tonic-gate 
18537c478bd9Sstevel@tonic-gate 				if (!page_try_reclaim_lock(pp, SE_EXCL,
1854db874c57Selowe 				    SE_EXCL_WANTED | SE_RETIRED)) {
1855db874c57Selowe 					/*
1856db874c57Selowe 					 * Page in use elsewhere.  Skip it.
1857db874c57Selowe 					 */
1858db874c57Selowe 					MDSTAT_INCR(mhp, lockfail);
1859db874c57Selowe 					mutex_enter(&mhp->mh_mutex);
1860db874c57Selowe 					continue;
18617c478bd9Sstevel@tonic-gate 				}
18627c478bd9Sstevel@tonic-gate 				/*
18637c478bd9Sstevel@tonic-gate 				 * See if the cage expanded into the delete.
18647c478bd9Sstevel@tonic-gate 				 * This can happen as we have to allow the
18657c478bd9Sstevel@tonic-gate 				 * cage to expand.
18667c478bd9Sstevel@tonic-gate 				 */
18677c478bd9Sstevel@tonic-gate 				if (PP_ISNORELOC(pp)) {
1868db874c57Selowe 					page_unlock(pp);
18697c478bd9Sstevel@tonic-gate 					mutex_enter(&mhp->mh_mutex);
18707c478bd9Sstevel@tonic-gate 					mhp->mh_cancel = KPHYSM_ENONRELOC;
18717c478bd9Sstevel@tonic-gate 					break;
18727c478bd9Sstevel@tonic-gate 				}
1873db874c57Selowe 				if (PP_RETIRED(pp)) {
18747c478bd9Sstevel@tonic-gate 					/*
18757c478bd9Sstevel@tonic-gate 					 * Page has been retired and is
18767c478bd9Sstevel@tonic-gate 					 * not part of the cage so we
18777c478bd9Sstevel@tonic-gate 					 * can now do the accounting for
18787c478bd9Sstevel@tonic-gate 					 * it.
18797c478bd9Sstevel@tonic-gate 					 */
18807c478bd9Sstevel@tonic-gate 					MDSTAT_INCR(mhp, retired);
18817c478bd9Sstevel@tonic-gate 					mutex_enter(&mhp->mh_mutex);
18827c478bd9Sstevel@tonic-gate 					mdsp->mds_bitmap[bit / NBPBMW]
18837c478bd9Sstevel@tonic-gate 					    |= (1 << (bit % NBPBMW));
18847c478bd9Sstevel@tonic-gate 					mdsp->mds_bitmap_retired[bit /
18857c478bd9Sstevel@tonic-gate 					    NBPBMW] |=
18867c478bd9Sstevel@tonic-gate 					    (1 << (bit % NBPBMW));
18877c478bd9Sstevel@tonic-gate 					mhp->mh_hold_todo--;
18887c478bd9Sstevel@tonic-gate 					continue;
18897c478bd9Sstevel@tonic-gate 				}
18907c478bd9Sstevel@tonic-gate 				ASSERT(freemem_left != 0);
18917c478bd9Sstevel@tonic-gate 				if (PP_ISFREE(pp)) {
18927c478bd9Sstevel@tonic-gate 					/*
18937c478bd9Sstevel@tonic-gate 					 * Like page_reclaim() only 'freemem'
18947c478bd9Sstevel@tonic-gate 					 * processing is already done.
18957c478bd9Sstevel@tonic-gate 					 */
18967c478bd9Sstevel@tonic-gate 					MDSTAT_INCR(mhp, nfree);
18977c478bd9Sstevel@tonic-gate 				free_page_collect:
18987c478bd9Sstevel@tonic-gate 					if (PP_ISAGED(pp)) {
18997c478bd9Sstevel@tonic-gate 						page_list_sub(pp,
19007c478bd9Sstevel@tonic-gate 						    PG_FREE_LIST);
19017c478bd9Sstevel@tonic-gate 					} else {
19027c478bd9Sstevel@tonic-gate 						page_list_sub(pp,
19037c478bd9Sstevel@tonic-gate 						    PG_CACHE_LIST);
19047c478bd9Sstevel@tonic-gate 					}
19057c478bd9Sstevel@tonic-gate 					PP_CLRFREE(pp);
19067c478bd9Sstevel@tonic-gate 					PP_CLRAGED(pp);
19077c478bd9Sstevel@tonic-gate 					collected++;
19087c478bd9Sstevel@tonic-gate 					mutex_enter(&mhp->mh_mutex);
19097c478bd9Sstevel@tonic-gate 					page_delete_collect(pp, mhp);
19107c478bd9Sstevel@tonic-gate 					mdsp->mds_bitmap[bit / NBPBMW] |=
19117c478bd9Sstevel@tonic-gate 					    (1 << (bit % NBPBMW));
19127c478bd9Sstevel@tonic-gate 					freemem_left--;
19137c478bd9Sstevel@tonic-gate 					continue;
19147c478bd9Sstevel@tonic-gate 				}
19157c478bd9Sstevel@tonic-gate 				ASSERT(pp->p_vnode != NULL);
19167c478bd9Sstevel@tonic-gate 				if (first_scan) {
19177c478bd9Sstevel@tonic-gate 					MDSTAT_INCR(mhp, first_notfree);
19187c478bd9Sstevel@tonic-gate 					page_unlock(pp);
19197c478bd9Sstevel@tonic-gate 					mutex_enter(&mhp->mh_mutex);
19207c478bd9Sstevel@tonic-gate 					continue;
19217c478bd9Sstevel@tonic-gate 				}
19227c478bd9Sstevel@tonic-gate 				/*
19237c478bd9Sstevel@tonic-gate 				 * Keep stats on pages encountered that
1924db874c57Selowe 				 * are marked for retirement.
19257c478bd9Sstevel@tonic-gate 				 */
1926db874c57Selowe 				if (PP_TOXIC(pp)) {
19277c478bd9Sstevel@tonic-gate 					MDSTAT_INCR(mhp, toxic);
1928db874c57Selowe 				} else if (PP_PR_REQ(pp)) {
19297c478bd9Sstevel@tonic-gate 					MDSTAT_INCR(mhp, failing);
19307c478bd9Sstevel@tonic-gate 				}
19317c478bd9Sstevel@tonic-gate 				/*
19327c478bd9Sstevel@tonic-gate 				 * In certain cases below, special exceptions
19337c478bd9Sstevel@tonic-gate 				 * are made for pages that are toxic.  This
19347c478bd9Sstevel@tonic-gate 				 * is because the current meaning of toxic
19357c478bd9Sstevel@tonic-gate 				 * is that an uncorrectable error has been
19367c478bd9Sstevel@tonic-gate 				 * previously associated with the page.
19377c478bd9Sstevel@tonic-gate 				 */
19387c478bd9Sstevel@tonic-gate 				if (pp->p_lckcnt != 0 || pp->p_cowcnt != 0) {
1939db874c57Selowe 					if (!PP_TOXIC(pp)) {
19407c478bd9Sstevel@tonic-gate 						/*
19417c478bd9Sstevel@tonic-gate 						 * Must relocate locked in
19427c478bd9Sstevel@tonic-gate 						 * memory pages.
19437c478bd9Sstevel@tonic-gate 						 */
19447c478bd9Sstevel@tonic-gate #ifdef MEM_DEL_STATS
19457c478bd9Sstevel@tonic-gate 						start_pgrp = ddi_get_lbolt();
19467c478bd9Sstevel@tonic-gate #endif /* MEM_DEL_STATS */
19477c478bd9Sstevel@tonic-gate 						/*
19487c478bd9Sstevel@tonic-gate 						 * Lock all constituent pages
19497c478bd9Sstevel@tonic-gate 						 * of a large page to ensure
19507c478bd9Sstevel@tonic-gate 						 * that p_szc won't change.
19517c478bd9Sstevel@tonic-gate 						 */
19527c478bd9Sstevel@tonic-gate 						if (!group_page_trylock(pp,
19537c478bd9Sstevel@tonic-gate 						    SE_EXCL)) {
19547c478bd9Sstevel@tonic-gate 							MDSTAT_INCR(mhp,
19557c478bd9Sstevel@tonic-gate 							    gptllckfail);
19567c478bd9Sstevel@tonic-gate 							page_unlock(pp);
19577c478bd9Sstevel@tonic-gate 							mutex_enter(
19587c478bd9Sstevel@tonic-gate 							    &mhp->mh_mutex);
19597c478bd9Sstevel@tonic-gate 							continue;
19607c478bd9Sstevel@tonic-gate 						}
19617c478bd9Sstevel@tonic-gate 						MDSTAT_INCR(mhp, npplocked);
19627c478bd9Sstevel@tonic-gate 						pp_targ =
19637c478bd9Sstevel@tonic-gate 						    page_get_replacement_page(
196473347c69Smb 						    pp, NULL, 0);
19657c478bd9Sstevel@tonic-gate 						if (pp_targ != NULL) {
19667c478bd9Sstevel@tonic-gate #ifdef MEM_DEL_STATS
19677c478bd9Sstevel@tonic-gate 							ntick_pgrp =
19687c478bd9Sstevel@tonic-gate 							    (uint64_t)
19697c478bd9Sstevel@tonic-gate 							    ddi_get_lbolt() -
19707c478bd9Sstevel@tonic-gate 							    start_pgrp;
19717c478bd9Sstevel@tonic-gate #endif /* MEM_DEL_STATS */
19727c478bd9Sstevel@tonic-gate 							MDSTAT_PGRP(mhp,
19737c478bd9Sstevel@tonic-gate 							    ntick_pgrp);
19747c478bd9Sstevel@tonic-gate 							MDSTAT_INCR(mhp,
19757c478bd9Sstevel@tonic-gate 							    nlockreloc);
19767c478bd9Sstevel@tonic-gate 							goto reloc;
19777c478bd9Sstevel@tonic-gate 						}
19787c478bd9Sstevel@tonic-gate 						group_page_unlock(pp);
19797c478bd9Sstevel@tonic-gate 						page_unlock(pp);
19807c478bd9Sstevel@tonic-gate #ifdef MEM_DEL_STATS
19817c478bd9Sstevel@tonic-gate 						ntick_pgrp =
19827c478bd9Sstevel@tonic-gate 						    (uint64_t)ddi_get_lbolt() -
19837c478bd9Sstevel@tonic-gate 						    start_pgrp;
19847c478bd9Sstevel@tonic-gate #endif /* MEM_DEL_STATS */
19857c478bd9Sstevel@tonic-gate 						MDSTAT_PGRP(mhp, ntick_pgrp);
19867c478bd9Sstevel@tonic-gate 						MDSTAT_INCR(mhp, nnorepl);
19877c478bd9Sstevel@tonic-gate 						mutex_enter(&mhp->mh_mutex);
19887c478bd9Sstevel@tonic-gate 						continue;
19897c478bd9Sstevel@tonic-gate 					} else {
19907c478bd9Sstevel@tonic-gate 						/*
19917c478bd9Sstevel@tonic-gate 						 * Cannot do anything about
19927c478bd9Sstevel@tonic-gate 						 * this page because it is
19937c478bd9Sstevel@tonic-gate 						 * toxic.
19947c478bd9Sstevel@tonic-gate 						 */
19957c478bd9Sstevel@tonic-gate 						MDSTAT_INCR(mhp, npplkdtoxic);
19967c478bd9Sstevel@tonic-gate 						page_unlock(pp);
19977c478bd9Sstevel@tonic-gate 						mutex_enter(&mhp->mh_mutex);
19987c478bd9Sstevel@tonic-gate 						continue;
19997c478bd9Sstevel@tonic-gate 					}
20007c478bd9Sstevel@tonic-gate 				}
20017c478bd9Sstevel@tonic-gate 				/*
20027c478bd9Sstevel@tonic-gate 				 * Unload the mappings and check if mod bit
20037c478bd9Sstevel@tonic-gate 				 * is set.
20047c478bd9Sstevel@tonic-gate 				 */
2005ad23a2dbSjohansen 				ASSERT(!PP_ISKAS(pp));
20067c478bd9Sstevel@tonic-gate 				(void) hat_pageunload(pp, HAT_FORCE_PGUNLOAD);
20077c478bd9Sstevel@tonic-gate 				mod = hat_ismod(pp);
20087c478bd9Sstevel@tonic-gate 
20097c478bd9Sstevel@tonic-gate #ifdef MEM_DEL_STATS
20107c478bd9Sstevel@tonic-gate 				start_pgrp = ddi_get_lbolt();
20117c478bd9Sstevel@tonic-gate #endif /* MEM_DEL_STATS */
2012db874c57Selowe 				if (mod && !PP_TOXIC(pp)) {
20137c478bd9Sstevel@tonic-gate 					/*
20147c478bd9Sstevel@tonic-gate 					 * Lock all constituent pages
20157c478bd9Sstevel@tonic-gate 					 * of a large page to ensure
20167c478bd9Sstevel@tonic-gate 					 * that p_szc won't change.
20177c478bd9Sstevel@tonic-gate 					 */
20187c478bd9Sstevel@tonic-gate 					if (!group_page_trylock(pp, SE_EXCL)) {
20197c478bd9Sstevel@tonic-gate 						MDSTAT_INCR(mhp, gptlmodfail);
20207c478bd9Sstevel@tonic-gate 						page_unlock(pp);
20217c478bd9Sstevel@tonic-gate 						mutex_enter(&mhp->mh_mutex);
20227c478bd9Sstevel@tonic-gate 						continue;
20237c478bd9Sstevel@tonic-gate 					}
20247c478bd9Sstevel@tonic-gate 					pp_targ = page_get_replacement_page(pp,
20257c478bd9Sstevel@tonic-gate 					    NULL, 0);
20267c478bd9Sstevel@tonic-gate 					if (pp_targ != NULL) {
20277c478bd9Sstevel@tonic-gate 						MDSTAT_INCR(mhp, nmodreloc);
20287c478bd9Sstevel@tonic-gate #ifdef MEM_DEL_STATS
20297c478bd9Sstevel@tonic-gate 						ntick_pgrp =
20307c478bd9Sstevel@tonic-gate 						    (uint64_t)ddi_get_lbolt() -
203173347c69Smb 						    start_pgrp;
20327c478bd9Sstevel@tonic-gate #endif /* MEM_DEL_STATS */
20337c478bd9Sstevel@tonic-gate 						MDSTAT_PGRP(mhp, ntick_pgrp);
20347c478bd9Sstevel@tonic-gate 						goto reloc;
20357c478bd9Sstevel@tonic-gate 					}
20367c478bd9Sstevel@tonic-gate 					group_page_unlock(pp);
20377c478bd9Sstevel@tonic-gate 				}
20387c478bd9Sstevel@tonic-gate 
20397c478bd9Sstevel@tonic-gate 				if (!page_try_demote_pages(pp)) {
20407c478bd9Sstevel@tonic-gate 					MDSTAT_INCR(mhp, demotefail);
20417c478bd9Sstevel@tonic-gate 					page_unlock(pp);
20427c478bd9Sstevel@tonic-gate #ifdef MEM_DEL_STATS
20437c478bd9Sstevel@tonic-gate 					ntick_pgrp = (uint64_t)ddi_get_lbolt() -
20447c478bd9Sstevel@tonic-gate 					    start_pgrp;
20457c478bd9Sstevel@tonic-gate #endif /* MEM_DEL_STATS */
20467c478bd9Sstevel@tonic-gate 					MDSTAT_PGRP(mhp, ntick_pgrp);
20477c478bd9Sstevel@tonic-gate 					mutex_enter(&mhp->mh_mutex);
20487c478bd9Sstevel@tonic-gate 					continue;
20497c478bd9Sstevel@tonic-gate 				}
20507c478bd9Sstevel@tonic-gate 
20517c478bd9Sstevel@tonic-gate 				/*
20527c478bd9Sstevel@tonic-gate 				 * Regular 'page-out'.
20537c478bd9Sstevel@tonic-gate 				 */
20547c478bd9Sstevel@tonic-gate 				if (!mod) {
20557c478bd9Sstevel@tonic-gate 					MDSTAT_INCR(mhp, ndestroy);
20567c478bd9Sstevel@tonic-gate 					page_destroy(pp, 1);
20577c478bd9Sstevel@tonic-gate 					/*
20587c478bd9Sstevel@tonic-gate 					 * page_destroy was called with
20597c478bd9Sstevel@tonic-gate 					 * dontfree. As long as p_lckcnt
20607c478bd9Sstevel@tonic-gate 					 * and p_cowcnt are both zero, the
20617c478bd9Sstevel@tonic-gate 					 * only additional action of
20627c478bd9Sstevel@tonic-gate 					 * page_destroy with !dontfree is to
20637c478bd9Sstevel@tonic-gate 					 * call page_free, so we can collect
20647c478bd9Sstevel@tonic-gate 					 * the page here.
20657c478bd9Sstevel@tonic-gate 					 */
20667c478bd9Sstevel@tonic-gate 					collected++;
20677c478bd9Sstevel@tonic-gate #ifdef MEM_DEL_STATS
20687c478bd9Sstevel@tonic-gate 					ntick_pgrp = (uint64_t)ddi_get_lbolt() -
20697c478bd9Sstevel@tonic-gate 					    start_pgrp;
20707c478bd9Sstevel@tonic-gate #endif /* MEM_DEL_STATS */
20717c478bd9Sstevel@tonic-gate 					MDSTAT_PGRP(mhp, ntick_pgrp);
20727c478bd9Sstevel@tonic-gate 					mutex_enter(&mhp->mh_mutex);
20737c478bd9Sstevel@tonic-gate 					page_delete_collect(pp, mhp);
20747c478bd9Sstevel@tonic-gate 					mdsp->mds_bitmap[bit / NBPBMW] |=
20757c478bd9Sstevel@tonic-gate 					    (1 << (bit % NBPBMW));
20767c478bd9Sstevel@tonic-gate 					continue;
20777c478bd9Sstevel@tonic-gate 				}
20787c478bd9Sstevel@tonic-gate 				/*
20797c478bd9Sstevel@tonic-gate 				 * The page is toxic and the mod bit is
20807c478bd9Sstevel@tonic-gate 				 * set, we cannot do anything here to deal
20817c478bd9Sstevel@tonic-gate 				 * with it.
20827c478bd9Sstevel@tonic-gate 				 */
2083db874c57Selowe 				if (PP_TOXIC(pp)) {
20847c478bd9Sstevel@tonic-gate 					page_unlock(pp);
20857c478bd9Sstevel@tonic-gate #ifdef MEM_DEL_STATS
20867c478bd9Sstevel@tonic-gate 					ntick_pgrp = (uint64_t)ddi_get_lbolt() -
20877c478bd9Sstevel@tonic-gate 					    start_pgrp;
20887c478bd9Sstevel@tonic-gate #endif /* MEM_DEL_STATS */
20897c478bd9Sstevel@tonic-gate 					MDSTAT_PGRP(mhp, ntick_pgrp);
20907c478bd9Sstevel@tonic-gate 					MDSTAT_INCR(mhp, modtoxic);
20917c478bd9Sstevel@tonic-gate 					mutex_enter(&mhp->mh_mutex);
20927c478bd9Sstevel@tonic-gate 					continue;
20937c478bd9Sstevel@tonic-gate 				}
20947c478bd9Sstevel@tonic-gate 				MDSTAT_INCR(mhp, nputpage);
20957c478bd9Sstevel@tonic-gate 				vp = pp->p_vnode;
20967c478bd9Sstevel@tonic-gate 				offset = pp->p_offset;
20977c478bd9Sstevel@tonic-gate 				VN_HOLD(vp);
20987c478bd9Sstevel@tonic-gate 				page_unlock(pp);
20997c478bd9Sstevel@tonic-gate 				(void) VOP_PUTPAGE(vp, offset, PAGESIZE,
2100da6c28aaSamw 				    B_INVAL|B_FORCE, kcred, NULL);
21017c478bd9Sstevel@tonic-gate 				VN_RELE(vp);
21027c478bd9Sstevel@tonic-gate #ifdef MEM_DEL_STATS
21037c478bd9Sstevel@tonic-gate 				ntick_pgrp = (uint64_t)ddi_get_lbolt() -
21047c478bd9Sstevel@tonic-gate 				    start_pgrp;
21057c478bd9Sstevel@tonic-gate #endif /* MEM_DEL_STATS */
21067c478bd9Sstevel@tonic-gate 				MDSTAT_PGRP(mhp, ntick_pgrp);
21077c478bd9Sstevel@tonic-gate 				/*
21087c478bd9Sstevel@tonic-gate 				 * Try to get the page back immediately
21097c478bd9Sstevel@tonic-gate 				 * so that it can be collected.
21107c478bd9Sstevel@tonic-gate 				 */
21117c478bd9Sstevel@tonic-gate 				pp = page_numtopp_nolock(pfn);
21127c478bd9Sstevel@tonic-gate 				if (pp == NULL) {
21137c478bd9Sstevel@tonic-gate 					MDSTAT_INCR(mhp, nnoreclaim);
21147c478bd9Sstevel@tonic-gate 					/*
21157c478bd9Sstevel@tonic-gate 					 * This should not happen as this
21167c478bd9Sstevel@tonic-gate 					 * thread is deleting the page.
21177c478bd9Sstevel@tonic-gate 					 * If this code is generalized, this
21187c478bd9Sstevel@tonic-gate 					 * becomes a reality.
21197c478bd9Sstevel@tonic-gate 					 */
21207c478bd9Sstevel@tonic-gate #ifdef DEBUG
21217c478bd9Sstevel@tonic-gate 					cmn_err(CE_WARN,
21227c478bd9Sstevel@tonic-gate 					    "delete_memory_thread(0x%p) "
21237c478bd9Sstevel@tonic-gate 					    "pfn 0x%lx has no page_t",
21247c478bd9Sstevel@tonic-gate 					    (void *)mhp, pfn);
21257c478bd9Sstevel@tonic-gate #endif /* DEBUG */
21267c478bd9Sstevel@tonic-gate 					mutex_enter(&mhp->mh_mutex);
21277c478bd9Sstevel@tonic-gate 					continue;
21287c478bd9Sstevel@tonic-gate 				}
21297c478bd9Sstevel@tonic-gate 				if (page_try_reclaim_lock(pp, SE_EXCL,
2130db874c57Selowe 				    SE_EXCL_WANTED | SE_RETIRED)) {
21317c478bd9Sstevel@tonic-gate 					if (PP_ISFREE(pp)) {
21327c478bd9Sstevel@tonic-gate 						goto free_page_collect;
21337c478bd9Sstevel@tonic-gate 					}
21347c478bd9Sstevel@tonic-gate 					page_unlock(pp);
21357c478bd9Sstevel@tonic-gate 				}
21367c478bd9Sstevel@tonic-gate 				MDSTAT_INCR(mhp, nnoreclaim);
21377c478bd9Sstevel@tonic-gate 				mutex_enter(&mhp->mh_mutex);
21387c478bd9Sstevel@tonic-gate 				continue;
21397c478bd9Sstevel@tonic-gate 
21407c478bd9Sstevel@tonic-gate 			reloc:
21417c478bd9Sstevel@tonic-gate 				/*
21427c478bd9Sstevel@tonic-gate 				 * Got some freemem and a target
21437c478bd9Sstevel@tonic-gate 				 * page, so move the data to avoid
21447c478bd9Sstevel@tonic-gate 				 * I/O and lock problems.
21457c478bd9Sstevel@tonic-gate 				 */
21467c478bd9Sstevel@tonic-gate 				ASSERT(!page_iolock_assert(pp));
21477c478bd9Sstevel@tonic-gate 				MDSTAT_INCR(mhp, nreloc);
21487c478bd9Sstevel@tonic-gate 				/*
21497c478bd9Sstevel@tonic-gate 				 * page_relocate() will return pgcnt: the
21507c478bd9Sstevel@tonic-gate 				 * number of consecutive pages relocated.
21517c478bd9Sstevel@tonic-gate 				 * If it is successful, pp will be a
21527c478bd9Sstevel@tonic-gate 				 * linked list of the page structs that
21537c478bd9Sstevel@tonic-gate 				 * were relocated. If page_relocate() is
21547c478bd9Sstevel@tonic-gate 				 * unsuccessful, pp will be unmodified.
21557c478bd9Sstevel@tonic-gate 				 */
21567c478bd9Sstevel@tonic-gate #ifdef MEM_DEL_STATS
21577c478bd9Sstevel@tonic-gate 				start_pgrp = ddi_get_lbolt();
21587c478bd9Sstevel@tonic-gate #endif /* MEM_DEL_STATS */
21597c478bd9Sstevel@tonic-gate 				result = page_relocate(&pp, &pp_targ, 0, 0,
21607c478bd9Sstevel@tonic-gate 				    &pgcnt, NULL);
21617c478bd9Sstevel@tonic-gate #ifdef MEM_DEL_STATS
21627c478bd9Sstevel@tonic-gate 				ntick_pgrp = (uint64_t)ddi_get_lbolt() -
21637c478bd9Sstevel@tonic-gate 				    start_pgrp;
21647c478bd9Sstevel@tonic-gate #endif /* MEM_DEL_STATS */
21657c478bd9Sstevel@tonic-gate 				MDSTAT_PGRP(mhp, ntick_pgrp);
21667c478bd9Sstevel@tonic-gate 				if (result != 0) {
21677c478bd9Sstevel@tonic-gate 					MDSTAT_INCR(mhp, nrelocfail);
21687c478bd9Sstevel@tonic-gate 					/*
21697c478bd9Sstevel@tonic-gate 					 * We did not succeed. We need
21707c478bd9Sstevel@tonic-gate 					 * to give the pp_targ pages back.
21717c478bd9Sstevel@tonic-gate 					 * page_free(pp_targ, 1) without
21727c478bd9Sstevel@tonic-gate 					 * the freemem accounting.
21737c478bd9Sstevel@tonic-gate 					 */
21747c478bd9Sstevel@tonic-gate 					group_page_unlock(pp);
21757c478bd9Sstevel@tonic-gate 					page_free_replacement_page(pp_targ);
21767c478bd9Sstevel@tonic-gate 					page_unlock(pp);
21777c478bd9Sstevel@tonic-gate 					mutex_enter(&mhp->mh_mutex);
21787c478bd9Sstevel@tonic-gate 					continue;
21797c478bd9Sstevel@tonic-gate 				}
21807c478bd9Sstevel@tonic-gate 
21817c478bd9Sstevel@tonic-gate 				/*
21827c478bd9Sstevel@tonic-gate 				 * We will then collect pgcnt pages.
21837c478bd9Sstevel@tonic-gate 				 */
21847c478bd9Sstevel@tonic-gate 				ASSERT(pgcnt > 0);
21857c478bd9Sstevel@tonic-gate 				mutex_enter(&mhp->mh_mutex);
21867c478bd9Sstevel@tonic-gate 				/*
21877c478bd9Sstevel@tonic-gate 				 * We need to make sure freemem_left is
21887c478bd9Sstevel@tonic-gate 				 * large enough.
21897c478bd9Sstevel@tonic-gate 				 */
21907c478bd9Sstevel@tonic-gate 				while ((freemem_left < pgcnt) &&
219173347c69Smb 				    (!mhp->mh_cancel)) {
21927c478bd9Sstevel@tonic-gate 					freemem_left +=
219373347c69Smb 					    delthr_get_freemem(mhp);
21947c478bd9Sstevel@tonic-gate 				}
21957c478bd9Sstevel@tonic-gate 
21967c478bd9Sstevel@tonic-gate 				/*
21977c478bd9Sstevel@tonic-gate 				 * Do not proceed if mh_cancel is set.
21987c478bd9Sstevel@tonic-gate 				 */
21997c478bd9Sstevel@tonic-gate 				if (mhp->mh_cancel) {
22007c478bd9Sstevel@tonic-gate 					while (pp_targ != NULL) {
22017c478bd9Sstevel@tonic-gate 						/*
22027c478bd9Sstevel@tonic-gate 						 * Unlink and unlock each page.
22037c478bd9Sstevel@tonic-gate 						 */
22047c478bd9Sstevel@tonic-gate 						tpp_targ = pp_targ;
22057c478bd9Sstevel@tonic-gate 						page_sub(&pp_targ, tpp_targ);
22067c478bd9Sstevel@tonic-gate 						page_unlock(tpp_targ);
22077c478bd9Sstevel@tonic-gate 					}
22087c478bd9Sstevel@tonic-gate 					/*
22097c478bd9Sstevel@tonic-gate 					 * We need to give the pp pages back.
22107c478bd9Sstevel@tonic-gate 					 * page_free(pp, 1) without the
22117c478bd9Sstevel@tonic-gate 					 * freemem accounting.
22127c478bd9Sstevel@tonic-gate 					 */
22137c478bd9Sstevel@tonic-gate 					page_free_replacement_page(pp);
22147c478bd9Sstevel@tonic-gate 					break;
22157c478bd9Sstevel@tonic-gate 				}
22167c478bd9Sstevel@tonic-gate 
22177c478bd9Sstevel@tonic-gate 				/* Now remove pgcnt from freemem_left */
22187c478bd9Sstevel@tonic-gate 				freemem_left -= pgcnt;
22197c478bd9Sstevel@tonic-gate 				ASSERT(freemem_left >= 0);
22207c478bd9Sstevel@tonic-gate 				szc = pp->p_szc;
22217c478bd9Sstevel@tonic-gate 				while (pp != NULL) {
22227c478bd9Sstevel@tonic-gate 					/*
22237c478bd9Sstevel@tonic-gate 					 * pp and pp_targ were passed back as
22247c478bd9Sstevel@tonic-gate 					 * a linked list of pages.
22257c478bd9Sstevel@tonic-gate 					 * Unlink and unlock each page.
22267c478bd9Sstevel@tonic-gate 					 */
22277c478bd9Sstevel@tonic-gate 					tpp_targ = pp_targ;
22287c478bd9Sstevel@tonic-gate 					page_sub(&pp_targ, tpp_targ);
22297c478bd9Sstevel@tonic-gate 					page_unlock(tpp_targ);
22307c478bd9Sstevel@tonic-gate 					/*
22317c478bd9Sstevel@tonic-gate 					 * The original page is now free
22327c478bd9Sstevel@tonic-gate 					 * so remove it from the linked
22337c478bd9Sstevel@tonic-gate 					 * list and collect it.
22347c478bd9Sstevel@tonic-gate 					 */
22357c478bd9Sstevel@tonic-gate 					tpp = pp;
22367c478bd9Sstevel@tonic-gate 					page_sub(&pp, tpp);
22377c478bd9Sstevel@tonic-gate 					pfn = page_pptonum(tpp);
22387c478bd9Sstevel@tonic-gate 					collected++;
22397c478bd9Sstevel@tonic-gate 					ASSERT(PAGE_EXCL(tpp));
22407c478bd9Sstevel@tonic-gate 					ASSERT(tpp->p_vnode == NULL);
22417c478bd9Sstevel@tonic-gate 					ASSERT(!hat_page_is_mapped(tpp));
22427c478bd9Sstevel@tonic-gate 					ASSERT(tpp->p_szc == szc);
22437c478bd9Sstevel@tonic-gate 					tpp->p_szc = 0;
22447c478bd9Sstevel@tonic-gate 					page_delete_collect(tpp, mhp);
22457c478bd9Sstevel@tonic-gate 					bit = pfn - mdsp->mds_base;
22467c478bd9Sstevel@tonic-gate 					mdsp->mds_bitmap[bit / NBPBMW] |=
224773347c69Smb 					    (1 << (bit % NBPBMW));
22487c478bd9Sstevel@tonic-gate 				}
22497c478bd9Sstevel@tonic-gate 				ASSERT(pp_targ == NULL);
22507c478bd9Sstevel@tonic-gate 			}
22517c478bd9Sstevel@tonic-gate 		}
22527c478bd9Sstevel@tonic-gate 		first_scan = 0;
22537c478bd9Sstevel@tonic-gate 		if ((mhp->mh_cancel == 0) && (mhp->mh_hold_todo != 0) &&
225473347c69Smb 		    (collected == 0)) {
22557c478bd9Sstevel@tonic-gate 			/*
22567c478bd9Sstevel@tonic-gate 			 * This code is needed as we cannot wait
22577c478bd9Sstevel@tonic-gate 			 * for a page to be locked OR the delete to
22587c478bd9Sstevel@tonic-gate 			 * be cancelled.  Also, we must delay so
22597c478bd9Sstevel@tonic-gate 			 * that other threads get a chance to run
22607c478bd9Sstevel@tonic-gate 			 * on our cpu, otherwise page locks may be
22617c478bd9Sstevel@tonic-gate 			 * held indefinitely by those threads.
22627c478bd9Sstevel@tonic-gate 			 */
22637c478bd9Sstevel@tonic-gate 			MDSTAT_INCR(mhp, ndelay);
22647c478bd9Sstevel@tonic-gate 			CALLB_CPR_SAFE_BEGIN(&cprinfo);
2265d3d50737SRafael Vanoni 			(void) cv_reltimedwait(&mhp->mh_cv, &mhp->mh_mutex,
2266d3d50737SRafael Vanoni 			    DEL_BUSY_WAIT_TICKS, TR_CLOCK_TICK);
22677c478bd9Sstevel@tonic-gate 			CALLB_CPR_SAFE_END(&cprinfo, &mhp->mh_mutex);
22687c478bd9Sstevel@tonic-gate 		}
22697c478bd9Sstevel@tonic-gate 	}
22707c478bd9Sstevel@tonic-gate 	/* stop the dr aio cleanup thread */
22717c478bd9Sstevel@tonic-gate 	mhp->mh_dr_aio_cleanup_cancel = 1;
22727c478bd9Sstevel@tonic-gate 	transit_list_collect(mhp, 0);
22737c478bd9Sstevel@tonic-gate 	if (freemem_left != 0) {
22747c478bd9Sstevel@tonic-gate 		/* Return any surplus. */
22757c478bd9Sstevel@tonic-gate 		page_create_putback(freemem_left);
22767c478bd9Sstevel@tonic-gate 		freemem_left = 0;
22777c478bd9Sstevel@tonic-gate 	}
22787c478bd9Sstevel@tonic-gate #ifdef MEM_DEL_STATS
22797c478bd9Sstevel@tonic-gate 	ntick_total = (uint64_t)ddi_get_lbolt() - start_total;
22807c478bd9Sstevel@tonic-gate #endif /* MEM_DEL_STATS */
22817c478bd9Sstevel@tonic-gate 	MDSTAT_TOTAL(mhp, ntick_total);
22827c478bd9Sstevel@tonic-gate 	MDSTAT_PRINT(mhp);
22837c478bd9Sstevel@tonic-gate 
22847c478bd9Sstevel@tonic-gate 	/*
22857c478bd9Sstevel@tonic-gate 	 * If the memory delete was cancelled, exclusive-wanted bits must
2286db874c57Selowe 	 * be cleared. If there are retired pages being deleted, they need
2287db874c57Selowe 	 * to be unretired.
22887c478bd9Sstevel@tonic-gate 	 */
22897c478bd9Sstevel@tonic-gate 	for (mdsp = mhp->mh_transit.trl_spans; mdsp != NULL;
22907c478bd9Sstevel@tonic-gate 	    mdsp = mdsp->mds_next) {
22917c478bd9Sstevel@tonic-gate 		pfn_t pfn, p_end;
22927c478bd9Sstevel@tonic-gate 
22937c478bd9Sstevel@tonic-gate 		p_end = mdsp->mds_base + mdsp->mds_npgs;
22947c478bd9Sstevel@tonic-gate 		for (pfn = mdsp->mds_base; pfn < p_end; pfn++) {
22957c478bd9Sstevel@tonic-gate 			page_t *pp;
22967c478bd9Sstevel@tonic-gate 			pgcnt_t bit;
22977c478bd9Sstevel@tonic-gate 
22987c478bd9Sstevel@tonic-gate 			bit = pfn - mdsp->mds_base;
22997c478bd9Sstevel@tonic-gate 			if (mhp->mh_cancel) {
23007c478bd9Sstevel@tonic-gate 				pp = page_numtopp_nolock(pfn);
23017c478bd9Sstevel@tonic-gate 				if (pp != NULL) {
23027c478bd9Sstevel@tonic-gate 					if ((mdsp->mds_bitmap[bit / NBPBMW] &
23037c478bd9Sstevel@tonic-gate 					    (1 << (bit % NBPBMW))) == 0) {
23047c478bd9Sstevel@tonic-gate 						page_lock_clr_exclwanted(pp);
23057c478bd9Sstevel@tonic-gate 					}
23067c478bd9Sstevel@tonic-gate 				}
23077c478bd9Sstevel@tonic-gate 			} else {
23087c478bd9Sstevel@tonic-gate 				pp = NULL;
23097c478bd9Sstevel@tonic-gate 			}
23107c478bd9Sstevel@tonic-gate 			if ((mdsp->mds_bitmap_retired[bit / NBPBMW] &
23117c478bd9Sstevel@tonic-gate 			    (1 << (bit % NBPBMW))) != 0) {
23127c478bd9Sstevel@tonic-gate 				/* do we already have pp? */
23137c478bd9Sstevel@tonic-gate 				if (pp == NULL) {
23147c478bd9Sstevel@tonic-gate 					pp = page_numtopp_nolock(pfn);
23157c478bd9Sstevel@tonic-gate 				}
23167c478bd9Sstevel@tonic-gate 				ASSERT(pp != NULL);
2317db874c57Selowe 				ASSERT(PP_RETIRED(pp));
23187c478bd9Sstevel@tonic-gate 				if (mhp->mh_cancel != 0) {
2319db874c57Selowe 					page_unlock(pp);
23207c478bd9Sstevel@tonic-gate 					/*
23217c478bd9Sstevel@tonic-gate 					 * To satisfy ASSERT below in
23227c478bd9Sstevel@tonic-gate 					 * cancel code.
23237c478bd9Sstevel@tonic-gate 					 */
23247c478bd9Sstevel@tonic-gate 					mhp->mh_hold_todo++;
23257c478bd9Sstevel@tonic-gate 				} else {
23268b464eb8Smec 					(void) page_unretire_pp(pp,
23278b464eb8Smec 					    PR_UNR_CLEAN);
23287c478bd9Sstevel@tonic-gate 				}
23297c478bd9Sstevel@tonic-gate 			}
23307c478bd9Sstevel@tonic-gate 		}
23317c478bd9Sstevel@tonic-gate 	}
23327c478bd9Sstevel@tonic-gate 	/*
23337c478bd9Sstevel@tonic-gate 	 * Free retired page bitmap and collected page bitmap
23347c478bd9Sstevel@tonic-gate 	 */
23357c478bd9Sstevel@tonic-gate 	for (mdsp = mhp->mh_transit.trl_spans; mdsp != NULL;
23367c478bd9Sstevel@tonic-gate 	    mdsp = mdsp->mds_next) {
23377c478bd9Sstevel@tonic-gate 		ASSERT(mdsp->mds_bitmap_retired != NULL);
23387c478bd9Sstevel@tonic-gate 		kmem_free(mdsp->mds_bitmap_retired, MDS_BITMAPBYTES(mdsp));
23397c478bd9Sstevel@tonic-gate 		mdsp->mds_bitmap_retired = NULL;	/* Paranoia. */
23407c478bd9Sstevel@tonic-gate 		ASSERT(mdsp->mds_bitmap != NULL);
23417c478bd9Sstevel@tonic-gate 		kmem_free(mdsp->mds_bitmap, MDS_BITMAPBYTES(mdsp));
23427c478bd9Sstevel@tonic-gate 		mdsp->mds_bitmap = NULL;	/* Paranoia. */
23437c478bd9Sstevel@tonic-gate 	}
23447c478bd9Sstevel@tonic-gate 
23457c478bd9Sstevel@tonic-gate 	/* wait for our dr aio cancel thread to exit */
23467c478bd9Sstevel@tonic-gate 	while (!(mhp->mh_aio_cleanup_done)) {
23477c478bd9Sstevel@tonic-gate 		CALLB_CPR_SAFE_BEGIN(&cprinfo);
23487c478bd9Sstevel@tonic-gate 		delay(drv_usectohz(DR_AIO_CLEANUP_DELAY));
23497c478bd9Sstevel@tonic-gate 		CALLB_CPR_SAFE_END(&cprinfo, &mhp->mh_mutex);
23507c478bd9Sstevel@tonic-gate 	}
23517c478bd9Sstevel@tonic-gate refused:
23527c478bd9Sstevel@tonic-gate 	if (mhp->mh_cancel != 0) {
23537c478bd9Sstevel@tonic-gate 		page_t *pp;
23547c478bd9Sstevel@tonic-gate 
23557c478bd9Sstevel@tonic-gate 		comp_code = mhp->mh_cancel;
23567c478bd9Sstevel@tonic-gate 		/*
23577c478bd9Sstevel@tonic-gate 		 * Go through list of deleted pages (mh_deleted) freeing
23587c478bd9Sstevel@tonic-gate 		 * them.
23597c478bd9Sstevel@tonic-gate 		 */
23607c478bd9Sstevel@tonic-gate 		while ((pp = mhp->mh_deleted) != NULL) {
23617c478bd9Sstevel@tonic-gate 			mhp->mh_deleted = pp->p_next;
23627c478bd9Sstevel@tonic-gate 			mhp->mh_hold_todo++;
23637c478bd9Sstevel@tonic-gate 			mutex_exit(&mhp->mh_mutex);
23647c478bd9Sstevel@tonic-gate 			/* Restore p_next. */
23657c478bd9Sstevel@tonic-gate 			pp->p_next = pp->p_prev;
23667c478bd9Sstevel@tonic-gate 			if (PP_ISFREE(pp)) {
23677c478bd9Sstevel@tonic-gate 				cmn_err(CE_PANIC,
23687c478bd9Sstevel@tonic-gate 				    "page %p is free",
23697c478bd9Sstevel@tonic-gate 				    (void *)pp);
23707c478bd9Sstevel@tonic-gate 			}
23717c478bd9Sstevel@tonic-gate 			page_free(pp, 1);
23727c478bd9Sstevel@tonic-gate 			mutex_enter(&mhp->mh_mutex);
23737c478bd9Sstevel@tonic-gate 		}
23747c478bd9Sstevel@tonic-gate 		ASSERT(mhp->mh_hold_todo == mhp->mh_vm_pages);
23757c478bd9Sstevel@tonic-gate 
23767c478bd9Sstevel@tonic-gate 		mutex_exit(&mhp->mh_mutex);
23777c478bd9Sstevel@tonic-gate 		put_availrmem(mhp->mh_vm_pages);
23787c478bd9Sstevel@tonic-gate 		mutex_enter(&mhp->mh_mutex);
23797c478bd9Sstevel@tonic-gate 
23807c478bd9Sstevel@tonic-gate 		goto t_exit;
23817c478bd9Sstevel@tonic-gate 	}
23827c478bd9Sstevel@tonic-gate 
23837c478bd9Sstevel@tonic-gate 	/*
23847c478bd9Sstevel@tonic-gate 	 * All the pages are no longer in use and are exclusively locked.
23857c478bd9Sstevel@tonic-gate 	 */
23867c478bd9Sstevel@tonic-gate 
23877c478bd9Sstevel@tonic-gate 	mhp->mh_deleted = NULL;
23887c478bd9Sstevel@tonic-gate 
23897c478bd9Sstevel@tonic-gate 	kphysm_del_cleanup(mhp);
23907c478bd9Sstevel@tonic-gate 
239173347c69Smb 	/*
23929853d9e8SJason Beloro 	 * mem_node_del_range needs to be after kphysm_del_cleanup so
239373347c69Smb 	 * that the mem_node_config[] will remain intact for the cleanup.
239473347c69Smb 	 */
239573347c69Smb 	for (mdsp = mhp->mh_transit.trl_spans; mdsp != NULL;
239673347c69Smb 	    mdsp = mdsp->mds_next) {
23979853d9e8SJason Beloro 		mem_node_del_range(mdsp->mds_base,
23989853d9e8SJason Beloro 		    mdsp->mds_base + mdsp->mds_npgs - 1);
239973347c69Smb 	}
2400af4c679fSSean McEnroe 	/* cleanup the page counters */
2401af4c679fSSean McEnroe 	page_ctrs_cleanup();
240273347c69Smb 
24037c478bd9Sstevel@tonic-gate 	comp_code = KPHYSM_OK;
24047c478bd9Sstevel@tonic-gate 
24057c478bd9Sstevel@tonic-gate t_exit:
24067c478bd9Sstevel@tonic-gate 	mutex_exit(&mhp->mh_mutex);
24077c478bd9Sstevel@tonic-gate 	kphysm_setup_post_del(mhp->mh_vm_pages,
24087c478bd9Sstevel@tonic-gate 	    (comp_code == KPHYSM_OK) ? 0 : 1);
24097c478bd9Sstevel@tonic-gate 	mutex_enter(&mhp->mh_mutex);
24107c478bd9Sstevel@tonic-gate 
24117c478bd9Sstevel@tonic-gate early_exit:
24127c478bd9Sstevel@tonic-gate 	/* mhp->mh_mutex exited by CALLB_CPR_EXIT() */
24137c478bd9Sstevel@tonic-gate 	mhp->mh_state = MHND_DONE;
24147c478bd9Sstevel@tonic-gate 	del_complete_funcp = mhp->mh_delete_complete;
24157c478bd9Sstevel@tonic-gate 	del_complete_arg = mhp->mh_delete_complete_arg;
24167c478bd9Sstevel@tonic-gate 	CALLB_CPR_EXIT(&cprinfo);
24177c478bd9Sstevel@tonic-gate 	(*del_complete_funcp)(del_complete_arg, comp_code);
24187c478bd9Sstevel@tonic-gate 	thread_exit();
24197c478bd9Sstevel@tonic-gate 	/*NOTREACHED*/
24207c478bd9Sstevel@tonic-gate }
24217c478bd9Sstevel@tonic-gate 
24227c478bd9Sstevel@tonic-gate /*
24237c478bd9Sstevel@tonic-gate  * Start the delete of the memory from the system.
24247c478bd9Sstevel@tonic-gate  */
24257c478bd9Sstevel@tonic-gate int
kphysm_del_start(memhandle_t handle,void (* complete)(void *,int),void * complete_arg)24267c478bd9Sstevel@tonic-gate kphysm_del_start(
24277c478bd9Sstevel@tonic-gate 	memhandle_t handle,
24287c478bd9Sstevel@tonic-gate 	void (*complete)(void *, int),
24297c478bd9Sstevel@tonic-gate 	void *complete_arg)
24307c478bd9Sstevel@tonic-gate {
24317c478bd9Sstevel@tonic-gate 	struct mem_handle *mhp;
24327c478bd9Sstevel@tonic-gate 
24337c478bd9Sstevel@tonic-gate 	mhp = kphysm_lookup_mem_handle(handle);
24347c478bd9Sstevel@tonic-gate 	if (mhp == NULL) {
24357c478bd9Sstevel@tonic-gate 		return (KPHYSM_EHANDLE);
24367c478bd9Sstevel@tonic-gate 	}
24377c478bd9Sstevel@tonic-gate 	switch (mhp->mh_state) {
24387c478bd9Sstevel@tonic-gate 	case MHND_FREE:
24397c478bd9Sstevel@tonic-gate 		ASSERT(mhp->mh_state != MHND_FREE);
24407c478bd9Sstevel@tonic-gate 		mutex_exit(&mhp->mh_mutex);
24417c478bd9Sstevel@tonic-gate 		return (KPHYSM_EHANDLE);
24427c478bd9Sstevel@tonic-gate 	case MHND_INIT:
24437c478bd9Sstevel@tonic-gate 		break;
24447c478bd9Sstevel@tonic-gate 	case MHND_STARTING:
24457c478bd9Sstevel@tonic-gate 	case MHND_RUNNING:
24467c478bd9Sstevel@tonic-gate 		mutex_exit(&mhp->mh_mutex);
24477c478bd9Sstevel@tonic-gate 		return (KPHYSM_ESEQUENCE);
24487c478bd9Sstevel@tonic-gate 	case MHND_DONE:
24497c478bd9Sstevel@tonic-gate 		mutex_exit(&mhp->mh_mutex);
24507c478bd9Sstevel@tonic-gate 		return (KPHYSM_ESEQUENCE);
24517c478bd9Sstevel@tonic-gate 	case MHND_RELEASE:
24527c478bd9Sstevel@tonic-gate 		mutex_exit(&mhp->mh_mutex);
24537c478bd9Sstevel@tonic-gate 		return (KPHYSM_ESEQUENCE);
24547c478bd9Sstevel@tonic-gate 	default:
24557c478bd9Sstevel@tonic-gate #ifdef DEBUG
24567c478bd9Sstevel@tonic-gate 		cmn_err(CE_WARN, "kphysm_del_start(0x%p) state corrupt %d",
24577c478bd9Sstevel@tonic-gate 		    (void *)mhp, mhp->mh_state);
24587c478bd9Sstevel@tonic-gate #endif /* DEBUG */
24597c478bd9Sstevel@tonic-gate 		mutex_exit(&mhp->mh_mutex);
24607c478bd9Sstevel@tonic-gate 		return (KPHYSM_EHANDLE);
24617c478bd9Sstevel@tonic-gate 	}
24627c478bd9Sstevel@tonic-gate 
24637c478bd9Sstevel@tonic-gate 	if (mhp->mh_transit.trl_spans == NULL) {
24647c478bd9Sstevel@tonic-gate 		mutex_exit(&mhp->mh_mutex);
24657c478bd9Sstevel@tonic-gate 		return (KPHYSM_ENOWORK);
24667c478bd9Sstevel@tonic-gate 	}
24677c478bd9Sstevel@tonic-gate 
24687c478bd9Sstevel@tonic-gate 	ASSERT(complete != NULL);
24697c478bd9Sstevel@tonic-gate 	mhp->mh_delete_complete = complete;
24707c478bd9Sstevel@tonic-gate 	mhp->mh_delete_complete_arg = complete_arg;
24717c478bd9Sstevel@tonic-gate 	mhp->mh_state = MHND_STARTING;
24727c478bd9Sstevel@tonic-gate 	/*
24737c478bd9Sstevel@tonic-gate 	 * Release the mutex in case thread_create sleeps.
24747c478bd9Sstevel@tonic-gate 	 */
24757c478bd9Sstevel@tonic-gate 	mutex_exit(&mhp->mh_mutex);
24767c478bd9Sstevel@tonic-gate 
24777c478bd9Sstevel@tonic-gate 	/*
24787c478bd9Sstevel@tonic-gate 	 * The "obvious" process for this thread is pageout (proc_pageout)
24797c478bd9Sstevel@tonic-gate 	 * but this gives the thread too much power over freemem
24807c478bd9Sstevel@tonic-gate 	 * which results in freemem starvation.
24817c478bd9Sstevel@tonic-gate 	 */
24827c478bd9Sstevel@tonic-gate 	(void) thread_create(NULL, 0, delete_memory_thread, mhp, 0, &p0,
24837c478bd9Sstevel@tonic-gate 	    TS_RUN, maxclsyspri - 1);
24847c478bd9Sstevel@tonic-gate 
24857c478bd9Sstevel@tonic-gate 	return (KPHYSM_OK);
24867c478bd9Sstevel@tonic-gate }
24877c478bd9Sstevel@tonic-gate 
24887c478bd9Sstevel@tonic-gate static kmutex_t pp_dummy_lock;		/* Protects init. of pp_dummy. */
24897c478bd9Sstevel@tonic-gate static caddr_t pp_dummy;
24907c478bd9Sstevel@tonic-gate static pgcnt_t pp_dummy_npages;
24917c478bd9Sstevel@tonic-gate static pfn_t *pp_dummy_pfn;	/* Array of dummy pfns. */
24927c478bd9Sstevel@tonic-gate 
24937c478bd9Sstevel@tonic-gate static void
memseg_remap_init_pages(page_t * pages,page_t * epages)24947c478bd9Sstevel@tonic-gate memseg_remap_init_pages(page_t *pages, page_t *epages)
24957c478bd9Sstevel@tonic-gate {
24967c478bd9Sstevel@tonic-gate 	page_t *pp;
24977c478bd9Sstevel@tonic-gate 
24987c478bd9Sstevel@tonic-gate 	for (pp = pages; pp < epages; pp++) {
24997c478bd9Sstevel@tonic-gate 		pp->p_pagenum = PFN_INVALID;	/* XXXX */
25007c478bd9Sstevel@tonic-gate 		pp->p_offset = (u_offset_t)-1;
25017c478bd9Sstevel@tonic-gate 		page_iolock_init(pp);
25027c478bd9Sstevel@tonic-gate 		while (!page_lock(pp, SE_EXCL, (kmutex_t *)NULL, P_RECLAIM))
25037c478bd9Sstevel@tonic-gate 			continue;
25047c478bd9Sstevel@tonic-gate 		page_lock_delete(pp);
25057c478bd9Sstevel@tonic-gate 	}
25067c478bd9Sstevel@tonic-gate }
25077c478bd9Sstevel@tonic-gate 
25087c478bd9Sstevel@tonic-gate void
memseg_remap_init()25097c478bd9Sstevel@tonic-gate memseg_remap_init()
25107c478bd9Sstevel@tonic-gate {
25117c478bd9Sstevel@tonic-gate 	mutex_enter(&pp_dummy_lock);
25127c478bd9Sstevel@tonic-gate 	if (pp_dummy == NULL) {
25137c478bd9Sstevel@tonic-gate 		uint_t dpages;
25147c478bd9Sstevel@tonic-gate 		int i;
25157c478bd9Sstevel@tonic-gate 
25167c478bd9Sstevel@tonic-gate 		/*
25177c478bd9Sstevel@tonic-gate 		 * dpages starts off as the size of the structure and
25187c478bd9Sstevel@tonic-gate 		 * ends up as the minimum number of pages that will
25197c478bd9Sstevel@tonic-gate 		 * hold a whole number of page_t structures.
25207c478bd9Sstevel@tonic-gate 		 */
25217c478bd9Sstevel@tonic-gate 		dpages = sizeof (page_t);
25227c478bd9Sstevel@tonic-gate 		ASSERT(dpages != 0);
25237c478bd9Sstevel@tonic-gate 		ASSERT(dpages <= MMU_PAGESIZE);
25247c478bd9Sstevel@tonic-gate 
25257c478bd9Sstevel@tonic-gate 		while ((dpages & 1) == 0)
25267c478bd9Sstevel@tonic-gate 			dpages >>= 1;
25277c478bd9Sstevel@tonic-gate 
25287c478bd9Sstevel@tonic-gate 		pp_dummy_npages = dpages;
25297c478bd9Sstevel@tonic-gate 		/*
25307c478bd9Sstevel@tonic-gate 		 * Allocate pp_dummy pages directly from static_arena,
25317c478bd9Sstevel@tonic-gate 		 * since these are whole page allocations and are
25327c478bd9Sstevel@tonic-gate 		 * referenced by physical address.  This also has the
25337c478bd9Sstevel@tonic-gate 		 * nice fringe benefit of hiding the memory from
25347c478bd9Sstevel@tonic-gate 		 * ::findleaks since it doesn't deal well with allocated
25357c478bd9Sstevel@tonic-gate 		 * kernel heap memory that doesn't have any mappings.
25367c478bd9Sstevel@tonic-gate 		 */
25377c478bd9Sstevel@tonic-gate 		pp_dummy = vmem_xalloc(static_arena, ptob(pp_dummy_npages),
25387c478bd9Sstevel@tonic-gate 		    PAGESIZE, 0, 0, NULL, NULL, VM_SLEEP);
25397c478bd9Sstevel@tonic-gate 		bzero(pp_dummy, ptob(pp_dummy_npages));
25407c478bd9Sstevel@tonic-gate 		ASSERT(((uintptr_t)pp_dummy & MMU_PAGEOFFSET) == 0);
25417c478bd9Sstevel@tonic-gate 		pp_dummy_pfn = kmem_alloc(sizeof (*pp_dummy_pfn) *
25427c478bd9Sstevel@tonic-gate 		    pp_dummy_npages, KM_SLEEP);
25437c478bd9Sstevel@tonic-gate 		for (i = 0; i < pp_dummy_npages; i++) {
25447c478bd9Sstevel@tonic-gate 			pp_dummy_pfn[i] = hat_getpfnum(kas.a_hat,
25457c478bd9Sstevel@tonic-gate 			    &pp_dummy[MMU_PAGESIZE * i]);
25467c478bd9Sstevel@tonic-gate 			ASSERT(pp_dummy_pfn[i] != PFN_INVALID);
25477c478bd9Sstevel@tonic-gate 		}
25487c478bd9Sstevel@tonic-gate 		/*
25497c478bd9Sstevel@tonic-gate 		 * Initialize the page_t's to a known 'deleted' state
25507c478bd9Sstevel@tonic-gate 		 * that matches the state of deleted pages.
25517c478bd9Sstevel@tonic-gate 		 */
25527c478bd9Sstevel@tonic-gate 		memseg_remap_init_pages((page_t *)pp_dummy,
255373347c69Smb 		    (page_t *)(pp_dummy + ptob(pp_dummy_npages)));
25547c478bd9Sstevel@tonic-gate 		/* Remove kmem mappings for the pages for safety. */
25557c478bd9Sstevel@tonic-gate 		hat_unload(kas.a_hat, pp_dummy, ptob(pp_dummy_npages),
25567c478bd9Sstevel@tonic-gate 		    HAT_UNLOAD_UNLOCK);
25577c478bd9Sstevel@tonic-gate 		/* Leave pp_dummy pointer set as flag that init is done. */
25587c478bd9Sstevel@tonic-gate 	}
25597c478bd9Sstevel@tonic-gate 	mutex_exit(&pp_dummy_lock);
25607c478bd9Sstevel@tonic-gate }
25617c478bd9Sstevel@tonic-gate 
25629853d9e8SJason Beloro /*
25639853d9e8SJason Beloro  * Remap a page-aglined range of page_t's to dummy pages.
25649853d9e8SJason Beloro  */
25659853d9e8SJason Beloro void
remap_to_dummy(caddr_t va,pgcnt_t metapgs)25669853d9e8SJason Beloro remap_to_dummy(caddr_t va, pgcnt_t metapgs)
25677c478bd9Sstevel@tonic-gate {
25689853d9e8SJason Beloro 	int phase;
25699853d9e8SJason Beloro 
2570a3114836SGerry Liu 	ASSERT(IS_P2ALIGNED((uint64_t)(uintptr_t)va, PAGESIZE));
25719853d9e8SJason Beloro 
25729853d9e8SJason Beloro 	/*
25739853d9e8SJason Beloro 	 * We may start remapping at a non-zero page offset
25749853d9e8SJason Beloro 	 * within the dummy pages since the low/high ends
25759853d9e8SJason Beloro 	 * of the outgoing pp's could be shared by other
25769853d9e8SJason Beloro 	 * memsegs (see memseg_remap_meta).
25779853d9e8SJason Beloro 	 */
2578a3114836SGerry Liu 	phase = btop((uint64_t)(uintptr_t)va) % pp_dummy_npages;
2579a3114836SGerry Liu 	/*CONSTCOND*/
25809853d9e8SJason Beloro 	ASSERT(PAGESIZE % sizeof (page_t) || phase == 0);
25817c478bd9Sstevel@tonic-gate 
25827c478bd9Sstevel@tonic-gate 	while (metapgs != 0) {
25837c478bd9Sstevel@tonic-gate 		pgcnt_t n;
25849853d9e8SJason Beloro 		int i, j;
25857c478bd9Sstevel@tonic-gate 
25867c478bd9Sstevel@tonic-gate 		n = pp_dummy_npages;
25877c478bd9Sstevel@tonic-gate 		if (n > metapgs)
25887c478bd9Sstevel@tonic-gate 			n = metapgs;
25897c478bd9Sstevel@tonic-gate 		for (i = 0; i < n; i++) {
25909853d9e8SJason Beloro 			j = (i + phase) % pp_dummy_npages;
25919853d9e8SJason Beloro 			hat_devload(kas.a_hat, va, ptob(1), pp_dummy_pfn[j],
25927c478bd9Sstevel@tonic-gate 			    PROT_READ,
25937c478bd9Sstevel@tonic-gate 			    HAT_LOAD | HAT_LOAD_NOCONSIST |
25947c478bd9Sstevel@tonic-gate 			    HAT_LOAD_REMAP);
25959853d9e8SJason Beloro 			va += ptob(1);
25967c478bd9Sstevel@tonic-gate 		}
25977c478bd9Sstevel@tonic-gate 		metapgs -= n;
25987c478bd9Sstevel@tonic-gate 	}
25997c478bd9Sstevel@tonic-gate }
26007c478bd9Sstevel@tonic-gate 
26019853d9e8SJason Beloro static void
memseg_remap_to_dummy(struct memseg * seg)26029853d9e8SJason Beloro memseg_remap_to_dummy(struct memseg *seg)
26039853d9e8SJason Beloro {
26049853d9e8SJason Beloro 	caddr_t pp;
26059853d9e8SJason Beloro 	pgcnt_t metapgs;
26069853d9e8SJason Beloro 
26079853d9e8SJason Beloro 	ASSERT(memseg_is_dynamic(seg));
26089853d9e8SJason Beloro 	ASSERT(pp_dummy != NULL);
26099853d9e8SJason Beloro 
26109853d9e8SJason Beloro 
26119853d9e8SJason Beloro 	if (!memseg_includes_meta(seg)) {
26129853d9e8SJason Beloro 		memseg_remap_meta(seg);
26139853d9e8SJason Beloro 		return;
26149853d9e8SJason Beloro 	}
26159853d9e8SJason Beloro 
26169853d9e8SJason Beloro 	pp = (caddr_t)seg->pages;
26179853d9e8SJason Beloro 	metapgs = seg->pages_base - memseg_get_start(seg);
26189853d9e8SJason Beloro 	ASSERT(metapgs != 0);
26199853d9e8SJason Beloro 
26209853d9e8SJason Beloro 	seg->pages_end = seg->pages_base;
26219853d9e8SJason Beloro 
26229853d9e8SJason Beloro 	remap_to_dummy(pp, metapgs);
26239853d9e8SJason Beloro }
26249853d9e8SJason Beloro 
26257c478bd9Sstevel@tonic-gate /*
26267c478bd9Sstevel@tonic-gate  * Transition all the deleted pages to the deleted state so that
26277c478bd9Sstevel@tonic-gate  * page_lock will not wait. The page_lock_delete call will
26287c478bd9Sstevel@tonic-gate  * also wake up any waiters.
26297c478bd9Sstevel@tonic-gate  */
26307c478bd9Sstevel@tonic-gate static void
memseg_lock_delete_all(struct memseg * seg)26317c478bd9Sstevel@tonic-gate memseg_lock_delete_all(struct memseg *seg)
26327c478bd9Sstevel@tonic-gate {
26337c478bd9Sstevel@tonic-gate 	page_t *pp;
26347c478bd9Sstevel@tonic-gate 
26357c478bd9Sstevel@tonic-gate 	for (pp = seg->pages; pp < seg->epages; pp++) {
26367c478bd9Sstevel@tonic-gate 		pp->p_pagenum = PFN_INVALID;	/* XXXX */
26377c478bd9Sstevel@tonic-gate 		page_lock_delete(pp);
26387c478bd9Sstevel@tonic-gate 	}
26397c478bd9Sstevel@tonic-gate }
26407c478bd9Sstevel@tonic-gate 
26417c478bd9Sstevel@tonic-gate static void
kphysm_del_cleanup(struct mem_handle * mhp)26427c478bd9Sstevel@tonic-gate kphysm_del_cleanup(struct mem_handle *mhp)
26437c478bd9Sstevel@tonic-gate {
26447c478bd9Sstevel@tonic-gate 	struct memdelspan	*mdsp;
26457c478bd9Sstevel@tonic-gate 	struct memseg		*seg;
2646584b574aSToomas Soome 	struct memseg		**segpp;
26477c478bd9Sstevel@tonic-gate 	struct memseg		*seglist;
26487c478bd9Sstevel@tonic-gate 	pfn_t			p_end;
26497c478bd9Sstevel@tonic-gate 	uint64_t		avmem;
26507c478bd9Sstevel@tonic-gate 	pgcnt_t			avpgs;
26517c478bd9Sstevel@tonic-gate 	pgcnt_t			npgs;
26527c478bd9Sstevel@tonic-gate 
26537c478bd9Sstevel@tonic-gate 	avpgs = mhp->mh_vm_pages;
26547c478bd9Sstevel@tonic-gate 
26557c478bd9Sstevel@tonic-gate 	memsegs_lock(1);
26567c478bd9Sstevel@tonic-gate 
26577c478bd9Sstevel@tonic-gate 	/*
26587c478bd9Sstevel@tonic-gate 	 * remove from main segment list.
26597c478bd9Sstevel@tonic-gate 	 */
26607c478bd9Sstevel@tonic-gate 	npgs = 0;
26617c478bd9Sstevel@tonic-gate 	seglist = NULL;
26627c478bd9Sstevel@tonic-gate 	for (mdsp = mhp->mh_transit.trl_spans; mdsp != NULL;
26637c478bd9Sstevel@tonic-gate 	    mdsp = mdsp->mds_next) {
26647c478bd9Sstevel@tonic-gate 		p_end = mdsp->mds_base + mdsp->mds_npgs;
26657c478bd9Sstevel@tonic-gate 		for (segpp = &memsegs; (seg = *segpp) != NULL; ) {
26667c478bd9Sstevel@tonic-gate 			if (seg->pages_base >= p_end ||
26677c478bd9Sstevel@tonic-gate 			    seg->pages_end <= mdsp->mds_base) {
26687c478bd9Sstevel@tonic-gate 				/* Span and memseg don't overlap. */
26697c478bd9Sstevel@tonic-gate 				segpp = &((*segpp)->next);
26707c478bd9Sstevel@tonic-gate 				continue;
26717c478bd9Sstevel@tonic-gate 			}
26727c478bd9Sstevel@tonic-gate 			ASSERT(seg->pages_base >= mdsp->mds_base);
26737c478bd9Sstevel@tonic-gate 			ASSERT(seg->pages_end <= p_end);
26747c478bd9Sstevel@tonic-gate 
2675e21bae1bSkchow 			PLCNT_MODIFY_MAX(seg->pages_base,
2676e21bae1bSkchow 			    seg->pages_base - seg->pages_end);
2677e21bae1bSkchow 
26787c478bd9Sstevel@tonic-gate 			/* Hide the memseg from future scans. */
26797c478bd9Sstevel@tonic-gate 			hat_kpm_delmem_mseg_update(seg, segpp);
26807c478bd9Sstevel@tonic-gate 			*segpp = seg->next;
26817c478bd9Sstevel@tonic-gate 			membar_producer();	/* TODO: Needed? */
26827c478bd9Sstevel@tonic-gate 			npgs += MSEG_NPAGES(seg);
26837c478bd9Sstevel@tonic-gate 
26847c478bd9Sstevel@tonic-gate 			/*
26857c478bd9Sstevel@tonic-gate 			 * Leave the deleted segment's next pointer intact
26867c478bd9Sstevel@tonic-gate 			 * in case a memsegs scanning loop is walking this
26877c478bd9Sstevel@tonic-gate 			 * segment concurrently.
26887c478bd9Sstevel@tonic-gate 			 */
26897c478bd9Sstevel@tonic-gate 			seg->lnext = seglist;
26907c478bd9Sstevel@tonic-gate 			seglist = seg;
26917c478bd9Sstevel@tonic-gate 		}
26927c478bd9Sstevel@tonic-gate 	}
26937c478bd9Sstevel@tonic-gate 
26947c478bd9Sstevel@tonic-gate 	build_pfn_hash();
26957c478bd9Sstevel@tonic-gate 
26967c478bd9Sstevel@tonic-gate 	ASSERT(npgs < total_pages);
26977c478bd9Sstevel@tonic-gate 	total_pages -= npgs;
26987c478bd9Sstevel@tonic-gate 
26997c478bd9Sstevel@tonic-gate 	/*
27007c478bd9Sstevel@tonic-gate 	 * Recalculate the paging parameters now total_pages has changed.
27017c478bd9Sstevel@tonic-gate 	 * This will also cause the clock hands to be reset before next use.
27027c478bd9Sstevel@tonic-gate 	 */
27032d9166aeSJoshua M. Clulow 	setupclock();
27047c478bd9Sstevel@tonic-gate 
27057c478bd9Sstevel@tonic-gate 	memsegs_unlock(1);
27067c478bd9Sstevel@tonic-gate 
27077c478bd9Sstevel@tonic-gate 	mutex_exit(&mhp->mh_mutex);
27087c478bd9Sstevel@tonic-gate 
27097c478bd9Sstevel@tonic-gate 	while ((seg = seglist) != NULL) {
27107c478bd9Sstevel@tonic-gate 		pfn_t mseg_start;
27117c478bd9Sstevel@tonic-gate 		pfn_t mseg_base, mseg_end;
27127c478bd9Sstevel@tonic-gate 		pgcnt_t mseg_npgs;
27137c478bd9Sstevel@tonic-gate 		int mlret;
27147c478bd9Sstevel@tonic-gate 
27157c478bd9Sstevel@tonic-gate 		seglist = seg->lnext;
27167c478bd9Sstevel@tonic-gate 
27177c478bd9Sstevel@tonic-gate 		/*
27187c478bd9Sstevel@tonic-gate 		 * Put the page_t's into the deleted state to stop
27197c478bd9Sstevel@tonic-gate 		 * cv_wait()s on the pages. When we remap, the dummy
27207c478bd9Sstevel@tonic-gate 		 * page_t's will be in the same state.
27217c478bd9Sstevel@tonic-gate 		 */
27227c478bd9Sstevel@tonic-gate 		memseg_lock_delete_all(seg);
27237c478bd9Sstevel@tonic-gate 		/*
27247c478bd9Sstevel@tonic-gate 		 * Collect up information based on pages_base and pages_end
27257c478bd9Sstevel@tonic-gate 		 * early so that we can flag early that the memseg has been
27267c478bd9Sstevel@tonic-gate 		 * deleted by setting pages_end == pages_base.
27277c478bd9Sstevel@tonic-gate 		 */
27287c478bd9Sstevel@tonic-gate 		mseg_base = seg->pages_base;
27297c478bd9Sstevel@tonic-gate 		mseg_end = seg->pages_end;
27307c478bd9Sstevel@tonic-gate 		mseg_npgs = MSEG_NPAGES(seg);
27319853d9e8SJason Beloro 		mseg_start = memseg_get_start(seg);
27327c478bd9Sstevel@tonic-gate 
27339853d9e8SJason Beloro 		if (memseg_is_dynamic(seg)) {
27347c478bd9Sstevel@tonic-gate 			/* Remap the meta data to our special dummy area. */
27359853d9e8SJason Beloro 			memseg_remap_to_dummy(seg);
27367c478bd9Sstevel@tonic-gate 
27377c478bd9Sstevel@tonic-gate 			mutex_enter(&memseg_lists_lock);
27387c478bd9Sstevel@tonic-gate 			seg->lnext = memseg_va_avail;
27397c478bd9Sstevel@tonic-gate 			memseg_va_avail = seg;
27407c478bd9Sstevel@tonic-gate 			mutex_exit(&memseg_lists_lock);
27417c478bd9Sstevel@tonic-gate 		} else {
27427c478bd9Sstevel@tonic-gate 			/*
27437c478bd9Sstevel@tonic-gate 			 * For memory whose page_ts were allocated
27447c478bd9Sstevel@tonic-gate 			 * at boot, we need to find a new use for
27457c478bd9Sstevel@tonic-gate 			 * the page_t memory.
27467c478bd9Sstevel@tonic-gate 			 * For the moment, just leak it.
27477c478bd9Sstevel@tonic-gate 			 * (It is held in the memseg_delete_junk list.)
27487c478bd9Sstevel@tonic-gate 			 */
27499853d9e8SJason Beloro 			seg->pages_end = seg->pages_base;
27507c478bd9Sstevel@tonic-gate 
27517c478bd9Sstevel@tonic-gate 			mutex_enter(&memseg_lists_lock);
27527c478bd9Sstevel@tonic-gate 			seg->lnext = memseg_delete_junk;
27537c478bd9Sstevel@tonic-gate 			memseg_delete_junk = seg;
27547c478bd9Sstevel@tonic-gate 			mutex_exit(&memseg_lists_lock);
27557c478bd9Sstevel@tonic-gate 		}
27567c478bd9Sstevel@tonic-gate 
27577c478bd9Sstevel@tonic-gate 		/* Must not use seg now as it could be re-used. */
27587c478bd9Sstevel@tonic-gate 
27597c478bd9Sstevel@tonic-gate 		memlist_write_lock();
27607c478bd9Sstevel@tonic-gate 
27617c478bd9Sstevel@tonic-gate 		mlret = memlist_delete_span(
27627c478bd9Sstevel@tonic-gate 		    (uint64_t)(mseg_base) << PAGESHIFT,
27637c478bd9Sstevel@tonic-gate 		    (uint64_t)(mseg_npgs) << PAGESHIFT,
27647c478bd9Sstevel@tonic-gate 		    &phys_avail);
27657c478bd9Sstevel@tonic-gate 		ASSERT(mlret == MEML_SPANOP_OK);
27667c478bd9Sstevel@tonic-gate 
27677c478bd9Sstevel@tonic-gate 		mlret = memlist_delete_span(
27687c478bd9Sstevel@tonic-gate 		    (uint64_t)(mseg_start) << PAGESHIFT,
27697c478bd9Sstevel@tonic-gate 		    (uint64_t)(mseg_end - mseg_start) <<
27707c478bd9Sstevel@tonic-gate 		    PAGESHIFT,
27717c478bd9Sstevel@tonic-gate 		    &phys_install);
27727c478bd9Sstevel@tonic-gate 		ASSERT(mlret == MEML_SPANOP_OK);
27737c478bd9Sstevel@tonic-gate 		phys_install_has_changed();
27747c478bd9Sstevel@tonic-gate 
27757c478bd9Sstevel@tonic-gate 		memlist_write_unlock();
27767c478bd9Sstevel@tonic-gate 	}
27777c478bd9Sstevel@tonic-gate 
27787c478bd9Sstevel@tonic-gate 	memlist_read_lock();
27797c478bd9Sstevel@tonic-gate 	installed_top_size(phys_install, &physmax, &physinstalled);
27807c478bd9Sstevel@tonic-gate 	memlist_read_unlock();
27817c478bd9Sstevel@tonic-gate 
27827c478bd9Sstevel@tonic-gate 	mutex_enter(&freemem_lock);
27837c478bd9Sstevel@tonic-gate 	maxmem -= avpgs;
27847c478bd9Sstevel@tonic-gate 	physmem -= avpgs;
27857c478bd9Sstevel@tonic-gate 	/* availrmem is adjusted during the delete. */
27867c478bd9Sstevel@tonic-gate 	availrmem_initial -= avpgs;
27877c478bd9Sstevel@tonic-gate 
27887c478bd9Sstevel@tonic-gate 	mutex_exit(&freemem_lock);
27897c478bd9Sstevel@tonic-gate 
27907c478bd9Sstevel@tonic-gate 	dump_resize();
27917c478bd9Sstevel@tonic-gate 
27927c478bd9Sstevel@tonic-gate 	cmn_err(CE_CONT, "?kphysm_delete: mem = %ldK "
27937c478bd9Sstevel@tonic-gate 	    "(0x%" PRIx64 ")\n",
27947c478bd9Sstevel@tonic-gate 	    physinstalled << (PAGESHIFT - 10),
27957c478bd9Sstevel@tonic-gate 	    (uint64_t)physinstalled << PAGESHIFT);
27967c478bd9Sstevel@tonic-gate 
27977c478bd9Sstevel@tonic-gate 	avmem = (uint64_t)freemem << PAGESHIFT;
27987c478bd9Sstevel@tonic-gate 	cmn_err(CE_CONT, "?kphysm_delete: "
27997c478bd9Sstevel@tonic-gate 	    "avail mem = %" PRId64 "\n", avmem);
28007c478bd9Sstevel@tonic-gate 
28017c478bd9Sstevel@tonic-gate 	/*
28027c478bd9Sstevel@tonic-gate 	 * Update lgroup generation number on single lgroup systems
28037c478bd9Sstevel@tonic-gate 	 */
28047c478bd9Sstevel@tonic-gate 	if (nlgrps == 1)
28057c478bd9Sstevel@tonic-gate 		lgrp_config(LGRP_CONFIG_GEN_UPDATE, 0, 0);
28067c478bd9Sstevel@tonic-gate 
28077c478bd9Sstevel@tonic-gate 	/* Successfully deleted system memory */
28087c478bd9Sstevel@tonic-gate 	mutex_enter(&mhp->mh_mutex);
28097c478bd9Sstevel@tonic-gate }
28107c478bd9Sstevel@tonic-gate 
28117c478bd9Sstevel@tonic-gate static uint_t mdel_nullvp_waiter;
28127c478bd9Sstevel@tonic-gate 
28137c478bd9Sstevel@tonic-gate static void
page_delete_collect(page_t * pp,struct mem_handle * mhp)28147c478bd9Sstevel@tonic-gate page_delete_collect(
28157c478bd9Sstevel@tonic-gate 	page_t *pp,
28167c478bd9Sstevel@tonic-gate 	struct mem_handle *mhp)
28177c478bd9Sstevel@tonic-gate {
28187c478bd9Sstevel@tonic-gate 	if (pp->p_vnode) {
28197c478bd9Sstevel@tonic-gate 		page_hashout(pp, (kmutex_t *)NULL);
28207c478bd9Sstevel@tonic-gate 		/* do not do PP_SETAGED(pp); */
28217c478bd9Sstevel@tonic-gate 	} else {
28227c478bd9Sstevel@tonic-gate 		kmutex_t *sep;
28237c478bd9Sstevel@tonic-gate 
28247c478bd9Sstevel@tonic-gate 		sep = page_se_mutex(pp);
28257c478bd9Sstevel@tonic-gate 		mutex_enter(sep);
28267c478bd9Sstevel@tonic-gate 		if (CV_HAS_WAITERS(&pp->p_cv)) {
28277c478bd9Sstevel@tonic-gate 			mdel_nullvp_waiter++;
28287c478bd9Sstevel@tonic-gate 			cv_broadcast(&pp->p_cv);
28297c478bd9Sstevel@tonic-gate 		}
28307c478bd9Sstevel@tonic-gate 		mutex_exit(sep);
28317c478bd9Sstevel@tonic-gate 	}
28327c478bd9Sstevel@tonic-gate 	ASSERT(pp->p_next == pp->p_prev);
28337c478bd9Sstevel@tonic-gate 	ASSERT(pp->p_next == NULL || pp->p_next == pp);
28347c478bd9Sstevel@tonic-gate 	pp->p_next = mhp->mh_deleted;
28357c478bd9Sstevel@tonic-gate 	mhp->mh_deleted = pp;
28367c478bd9Sstevel@tonic-gate 	ASSERT(mhp->mh_hold_todo != 0);
28377c478bd9Sstevel@tonic-gate 	mhp->mh_hold_todo--;
28387c478bd9Sstevel@tonic-gate }
28397c478bd9Sstevel@tonic-gate 
28407c478bd9Sstevel@tonic-gate static void
transit_list_collect(struct mem_handle * mhp,int v)28417c478bd9Sstevel@tonic-gate transit_list_collect(struct mem_handle *mhp, int v)
28427c478bd9Sstevel@tonic-gate {
28437c478bd9Sstevel@tonic-gate 	struct transit_list_head *trh;
28447c478bd9Sstevel@tonic-gate 
28457c478bd9Sstevel@tonic-gate 	trh = &transit_list_head;
28467c478bd9Sstevel@tonic-gate 	mutex_enter(&trh->trh_lock);
28477c478bd9Sstevel@tonic-gate 	mhp->mh_transit.trl_collect = v;
28487c478bd9Sstevel@tonic-gate 	mutex_exit(&trh->trh_lock);
28497c478bd9Sstevel@tonic-gate }
28507c478bd9Sstevel@tonic-gate 
28517c478bd9Sstevel@tonic-gate static void
transit_list_insert(struct transit_list * tlp)28527c478bd9Sstevel@tonic-gate transit_list_insert(struct transit_list *tlp)
28537c478bd9Sstevel@tonic-gate {
28547c478bd9Sstevel@tonic-gate 	struct transit_list_head *trh;
28557c478bd9Sstevel@tonic-gate 
28567c478bd9Sstevel@tonic-gate 	trh = &transit_list_head;
28577c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&trh->trh_lock));
28587c478bd9Sstevel@tonic-gate 	tlp->trl_next = trh->trh_head;
28597c478bd9Sstevel@tonic-gate 	trh->trh_head = tlp;
28607c478bd9Sstevel@tonic-gate }
28617c478bd9Sstevel@tonic-gate 
28627c478bd9Sstevel@tonic-gate static void
transit_list_remove(struct transit_list * tlp)28637c478bd9Sstevel@tonic-gate transit_list_remove(struct transit_list *tlp)
28647c478bd9Sstevel@tonic-gate {
28657c478bd9Sstevel@tonic-gate 	struct transit_list_head *trh;
28667c478bd9Sstevel@tonic-gate 	struct transit_list **tlpp;
28677c478bd9Sstevel@tonic-gate 
28687c478bd9Sstevel@tonic-gate 	trh = &transit_list_head;
28697c478bd9Sstevel@tonic-gate 	tlpp = &trh->trh_head;
28707c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&trh->trh_lock));
28717c478bd9Sstevel@tonic-gate 	while (*tlpp != NULL && *tlpp != tlp)
28727c478bd9Sstevel@tonic-gate 		tlpp = &(*tlpp)->trl_next;
28737c478bd9Sstevel@tonic-gate 	ASSERT(*tlpp != NULL);
28747c478bd9Sstevel@tonic-gate 	if (*tlpp == tlp)
28757c478bd9Sstevel@tonic-gate 		*tlpp = tlp->trl_next;
28767c478bd9Sstevel@tonic-gate 	tlp->trl_next = NULL;
28777c478bd9Sstevel@tonic-gate }
28787c478bd9Sstevel@tonic-gate 
28797c478bd9Sstevel@tonic-gate static struct transit_list *
pfnum_to_transit_list(struct transit_list_head * trh,pfn_t pfnum)28807c478bd9Sstevel@tonic-gate pfnum_to_transit_list(struct transit_list_head *trh, pfn_t pfnum)
28817c478bd9Sstevel@tonic-gate {
28827c478bd9Sstevel@tonic-gate 	struct transit_list *tlp;
28837c478bd9Sstevel@tonic-gate 
28847c478bd9Sstevel@tonic-gate 	for (tlp = trh->trh_head; tlp != NULL; tlp = tlp->trl_next) {
28857c478bd9Sstevel@tonic-gate 		struct memdelspan *mdsp;
28867c478bd9Sstevel@tonic-gate 
28877c478bd9Sstevel@tonic-gate 		for (mdsp = tlp->trl_spans; mdsp != NULL;
28887c478bd9Sstevel@tonic-gate 		    mdsp = mdsp->mds_next) {
28897c478bd9Sstevel@tonic-gate 			if (pfnum >= mdsp->mds_base &&
28907c478bd9Sstevel@tonic-gate 			    pfnum < (mdsp->mds_base + mdsp->mds_npgs)) {
28917c478bd9Sstevel@tonic-gate 				return (tlp);
28927c478bd9Sstevel@tonic-gate 			}
28937c478bd9Sstevel@tonic-gate 		}
28947c478bd9Sstevel@tonic-gate 	}
28957c478bd9Sstevel@tonic-gate 	return (NULL);
28967c478bd9Sstevel@tonic-gate }
28977c478bd9Sstevel@tonic-gate 
28987c478bd9Sstevel@tonic-gate int
pfn_is_being_deleted(pfn_t pfnum)28997c478bd9Sstevel@tonic-gate pfn_is_being_deleted(pfn_t pfnum)
29007c478bd9Sstevel@tonic-gate {
29017c478bd9Sstevel@tonic-gate 	struct transit_list_head *trh;
29027c478bd9Sstevel@tonic-gate 	struct transit_list *tlp;
29037c478bd9Sstevel@tonic-gate 	int ret;
29047c478bd9Sstevel@tonic-gate 
29057c478bd9Sstevel@tonic-gate 	trh = &transit_list_head;
29067c478bd9Sstevel@tonic-gate 	if (trh->trh_head == NULL)
29077c478bd9Sstevel@tonic-gate 		return (0);
29087c478bd9Sstevel@tonic-gate 
29097c478bd9Sstevel@tonic-gate 	mutex_enter(&trh->trh_lock);
29107c478bd9Sstevel@tonic-gate 	tlp = pfnum_to_transit_list(trh, pfnum);
29117c478bd9Sstevel@tonic-gate 	ret = (tlp != NULL && tlp->trl_collect);
29127c478bd9Sstevel@tonic-gate 	mutex_exit(&trh->trh_lock);
29137c478bd9Sstevel@tonic-gate 
29147c478bd9Sstevel@tonic-gate 	return (ret);
29157c478bd9Sstevel@tonic-gate }
29167c478bd9Sstevel@tonic-gate 
29177c478bd9Sstevel@tonic-gate #ifdef MEM_DEL_STATS
29187c478bd9Sstevel@tonic-gate extern int hz;
29197c478bd9Sstevel@tonic-gate static void
mem_del_stat_print_func(struct mem_handle * mhp)29207c478bd9Sstevel@tonic-gate mem_del_stat_print_func(struct mem_handle *mhp)
29217c478bd9Sstevel@tonic-gate {
29227c478bd9Sstevel@tonic-gate 	uint64_t tmp;
29237c478bd9Sstevel@tonic-gate 
29247c478bd9Sstevel@tonic-gate 	if (mem_del_stat_print) {
29257c478bd9Sstevel@tonic-gate 		printf("memory delete loop %x/%x, statistics%s\n",
29267c478bd9Sstevel@tonic-gate 		    (uint_t)mhp->mh_transit.trl_spans->mds_base,
29277c478bd9Sstevel@tonic-gate 		    (uint_t)mhp->mh_transit.trl_spans->mds_npgs,
29287c478bd9Sstevel@tonic-gate 		    (mhp->mh_cancel ? " (cancelled)" : ""));
29297c478bd9Sstevel@tonic-gate 		printf("\t%8u nloop\n", mhp->mh_delstat.nloop);
29307c478bd9Sstevel@tonic-gate 		printf("\t%8u need_free\n", mhp->mh_delstat.need_free);
29317c478bd9Sstevel@tonic-gate 		printf("\t%8u free_loop\n", mhp->mh_delstat.free_loop);
29327c478bd9Sstevel@tonic-gate 		printf("\t%8u free_low\n", mhp->mh_delstat.free_low);
29337c478bd9Sstevel@tonic-gate 		printf("\t%8u free_failed\n", mhp->mh_delstat.free_failed);
29347c478bd9Sstevel@tonic-gate 		printf("\t%8u ncheck\n", mhp->mh_delstat.ncheck);
29357c478bd9Sstevel@tonic-gate 		printf("\t%8u nopaget\n", mhp->mh_delstat.nopaget);
29367c478bd9Sstevel@tonic-gate 		printf("\t%8u lockfail\n", mhp->mh_delstat.lockfail);
29377c478bd9Sstevel@tonic-gate 		printf("\t%8u nfree\n", mhp->mh_delstat.nfree);
29387c478bd9Sstevel@tonic-gate 		printf("\t%8u nreloc\n", mhp->mh_delstat.nreloc);
29397c478bd9Sstevel@tonic-gate 		printf("\t%8u nrelocfail\n", mhp->mh_delstat.nrelocfail);
29407c478bd9Sstevel@tonic-gate 		printf("\t%8u already_done\n", mhp->mh_delstat.already_done);
29417c478bd9Sstevel@tonic-gate 		printf("\t%8u first_notfree\n", mhp->mh_delstat.first_notfree);
29427c478bd9Sstevel@tonic-gate 		printf("\t%8u npplocked\n", mhp->mh_delstat.npplocked);
29437c478bd9Sstevel@tonic-gate 		printf("\t%8u nlockreloc\n", mhp->mh_delstat.nlockreloc);
29447c478bd9Sstevel@tonic-gate 		printf("\t%8u nnorepl\n", mhp->mh_delstat.nnorepl);
29457c478bd9Sstevel@tonic-gate 		printf("\t%8u nmodreloc\n", mhp->mh_delstat.nmodreloc);
29467c478bd9Sstevel@tonic-gate 		printf("\t%8u ndestroy\n", mhp->mh_delstat.ndestroy);
29477c478bd9Sstevel@tonic-gate 		printf("\t%8u nputpage\n", mhp->mh_delstat.nputpage);
29487c478bd9Sstevel@tonic-gate 		printf("\t%8u nnoreclaim\n", mhp->mh_delstat.nnoreclaim);
29497c478bd9Sstevel@tonic-gate 		printf("\t%8u ndelay\n", mhp->mh_delstat.ndelay);
29507c478bd9Sstevel@tonic-gate 		printf("\t%8u demotefail\n", mhp->mh_delstat.demotefail);
29517c478bd9Sstevel@tonic-gate 		printf("\t%8u retired\n", mhp->mh_delstat.retired);
29527c478bd9Sstevel@tonic-gate 		printf("\t%8u toxic\n", mhp->mh_delstat.toxic);
29537c478bd9Sstevel@tonic-gate 		printf("\t%8u failing\n", mhp->mh_delstat.failing);
29547c478bd9Sstevel@tonic-gate 		printf("\t%8u modtoxic\n", mhp->mh_delstat.modtoxic);
29557c478bd9Sstevel@tonic-gate 		printf("\t%8u npplkdtoxic\n", mhp->mh_delstat.npplkdtoxic);
29567c478bd9Sstevel@tonic-gate 		printf("\t%8u gptlmodfail\n", mhp->mh_delstat.gptlmodfail);
29577c478bd9Sstevel@tonic-gate 		printf("\t%8u gptllckfail\n", mhp->mh_delstat.gptllckfail);
29587c478bd9Sstevel@tonic-gate 		tmp = mhp->mh_delstat.nticks_total / hz;  /* seconds */
29597c478bd9Sstevel@tonic-gate 		printf(
29607c478bd9Sstevel@tonic-gate 		    "\t%"PRIu64" nticks_total - %"PRIu64" min %"PRIu64" sec\n",
29617c478bd9Sstevel@tonic-gate 		    mhp->mh_delstat.nticks_total, tmp / 60, tmp % 60);
29627c478bd9Sstevel@tonic-gate 
29637c478bd9Sstevel@tonic-gate 		tmp = mhp->mh_delstat.nticks_pgrp / hz;  /* seconds */
29647c478bd9Sstevel@tonic-gate 		printf(
29657c478bd9Sstevel@tonic-gate 		    "\t%"PRIu64" nticks_pgrp - %"PRIu64" min %"PRIu64" sec\n",
29667c478bd9Sstevel@tonic-gate 		    mhp->mh_delstat.nticks_pgrp, tmp / 60, tmp % 60);
29677c478bd9Sstevel@tonic-gate 	}
29687c478bd9Sstevel@tonic-gate }
29697c478bd9Sstevel@tonic-gate #endif /* MEM_DEL_STATS */
29707c478bd9Sstevel@tonic-gate 
29717c478bd9Sstevel@tonic-gate struct mem_callback {
29727c478bd9Sstevel@tonic-gate 	kphysm_setup_vector_t	*vec;
29737c478bd9Sstevel@tonic-gate 	void			*arg;
29747c478bd9Sstevel@tonic-gate };
29757c478bd9Sstevel@tonic-gate 
29767c478bd9Sstevel@tonic-gate #define	NMEMCALLBACKS		100
29777c478bd9Sstevel@tonic-gate 
29787c478bd9Sstevel@tonic-gate static struct mem_callback mem_callbacks[NMEMCALLBACKS];
29797c478bd9Sstevel@tonic-gate static uint_t nmemcallbacks;
29807c478bd9Sstevel@tonic-gate static krwlock_t mem_callback_rwlock;
29817c478bd9Sstevel@tonic-gate 
29827c478bd9Sstevel@tonic-gate int
kphysm_setup_func_register(kphysm_setup_vector_t * vec,void * arg)29837c478bd9Sstevel@tonic-gate kphysm_setup_func_register(kphysm_setup_vector_t *vec, void *arg)
29847c478bd9Sstevel@tonic-gate {
29857c478bd9Sstevel@tonic-gate 	uint_t i, found;
29867c478bd9Sstevel@tonic-gate 
29877c478bd9Sstevel@tonic-gate 	/*
29887c478bd9Sstevel@tonic-gate 	 * This test will become more complicated when the version must
29897c478bd9Sstevel@tonic-gate 	 * change.
29907c478bd9Sstevel@tonic-gate 	 */
29917c478bd9Sstevel@tonic-gate 	if (vec->version != KPHYSM_SETUP_VECTOR_VERSION)
29927c478bd9Sstevel@tonic-gate 		return (EINVAL);
29937c478bd9Sstevel@tonic-gate 
29947c478bd9Sstevel@tonic-gate 	if (vec->post_add == NULL || vec->pre_del == NULL ||
29957c478bd9Sstevel@tonic-gate 	    vec->post_del == NULL)
29967c478bd9Sstevel@tonic-gate 		return (EINVAL);
29977c478bd9Sstevel@tonic-gate 
29987c478bd9Sstevel@tonic-gate 	rw_enter(&mem_callback_rwlock, RW_WRITER);
29997c478bd9Sstevel@tonic-gate 	for (i = 0, found = 0; i < nmemcallbacks; i++) {
30007c478bd9Sstevel@tonic-gate 		if (mem_callbacks[i].vec == NULL && found == 0)
30017c478bd9Sstevel@tonic-gate 			found = i + 1;
30027c478bd9Sstevel@tonic-gate 		if (mem_callbacks[i].vec == vec &&
30037c478bd9Sstevel@tonic-gate 		    mem_callbacks[i].arg == arg) {
30047c478bd9Sstevel@tonic-gate #ifdef DEBUG
30057c478bd9Sstevel@tonic-gate 			/* Catch this in DEBUG kernels. */
30067c478bd9Sstevel@tonic-gate 			cmn_err(CE_WARN, "kphysm_setup_func_register"
30077c478bd9Sstevel@tonic-gate 			    "(0x%p, 0x%p) duplicate registration from 0x%p",
30087c478bd9Sstevel@tonic-gate 			    (void *)vec, arg, (void *)caller());
30097c478bd9Sstevel@tonic-gate #endif /* DEBUG */
30107c478bd9Sstevel@tonic-gate 			rw_exit(&mem_callback_rwlock);
30117c478bd9Sstevel@tonic-gate 			return (EEXIST);
30127c478bd9Sstevel@tonic-gate 		}
30137c478bd9Sstevel@tonic-gate 	}
30147c478bd9Sstevel@tonic-gate 	if (found != 0) {
30157c478bd9Sstevel@tonic-gate 		i = found - 1;
30167c478bd9Sstevel@tonic-gate 	} else {
30177c478bd9Sstevel@tonic-gate 		ASSERT(nmemcallbacks < NMEMCALLBACKS);
30187c478bd9Sstevel@tonic-gate 		if (nmemcallbacks == NMEMCALLBACKS) {
30197c478bd9Sstevel@tonic-gate 			rw_exit(&mem_callback_rwlock);
30207c478bd9Sstevel@tonic-gate 			return (ENOMEM);
30217c478bd9Sstevel@tonic-gate 		}
30227c478bd9Sstevel@tonic-gate 		i = nmemcallbacks++;
30237c478bd9Sstevel@tonic-gate 	}
30247c478bd9Sstevel@tonic-gate 	mem_callbacks[i].vec = vec;
30257c478bd9Sstevel@tonic-gate 	mem_callbacks[i].arg = arg;
30267c478bd9Sstevel@tonic-gate 	rw_exit(&mem_callback_rwlock);
30277c478bd9Sstevel@tonic-gate 	return (0);
30287c478bd9Sstevel@tonic-gate }
30297c478bd9Sstevel@tonic-gate 
30307c478bd9Sstevel@tonic-gate void
kphysm_setup_func_unregister(kphysm_setup_vector_t * vec,void * arg)30317c478bd9Sstevel@tonic-gate kphysm_setup_func_unregister(kphysm_setup_vector_t *vec, void *arg)
30327c478bd9Sstevel@tonic-gate {
30337c478bd9Sstevel@tonic-gate 	uint_t i;
30347c478bd9Sstevel@tonic-gate 
30357c478bd9Sstevel@tonic-gate 	rw_enter(&mem_callback_rwlock, RW_WRITER);
30367c478bd9Sstevel@tonic-gate 	for (i = 0; i < nmemcallbacks; i++) {
30377c478bd9Sstevel@tonic-gate 		if (mem_callbacks[i].vec == vec &&
30387c478bd9Sstevel@tonic-gate 		    mem_callbacks[i].arg == arg) {
30397c478bd9Sstevel@tonic-gate 			mem_callbacks[i].vec = NULL;
30407c478bd9Sstevel@tonic-gate 			mem_callbacks[i].arg = NULL;
30417c478bd9Sstevel@tonic-gate 			if (i == (nmemcallbacks - 1))
30427c478bd9Sstevel@tonic-gate 				nmemcallbacks--;
30437c478bd9Sstevel@tonic-gate 			break;
30447c478bd9Sstevel@tonic-gate 		}
30457c478bd9Sstevel@tonic-gate 	}
30467c478bd9Sstevel@tonic-gate 	rw_exit(&mem_callback_rwlock);
30477c478bd9Sstevel@tonic-gate }
30487c478bd9Sstevel@tonic-gate 
30497c478bd9Sstevel@tonic-gate static void
kphysm_setup_post_add(pgcnt_t delta_pages)30507c478bd9Sstevel@tonic-gate kphysm_setup_post_add(pgcnt_t delta_pages)
30517c478bd9Sstevel@tonic-gate {
30527c478bd9Sstevel@tonic-gate 	uint_t i;
30537c478bd9Sstevel@tonic-gate 
30547c478bd9Sstevel@tonic-gate 	rw_enter(&mem_callback_rwlock, RW_READER);
30557c478bd9Sstevel@tonic-gate 	for (i = 0; i < nmemcallbacks; i++) {
30567c478bd9Sstevel@tonic-gate 		if (mem_callbacks[i].vec != NULL) {
30577c478bd9Sstevel@tonic-gate 			(*mem_callbacks[i].vec->post_add)
30587c478bd9Sstevel@tonic-gate 			    (mem_callbacks[i].arg, delta_pages);
30597c478bd9Sstevel@tonic-gate 		}
30607c478bd9Sstevel@tonic-gate 	}
30617c478bd9Sstevel@tonic-gate 	rw_exit(&mem_callback_rwlock);
30627c478bd9Sstevel@tonic-gate }
30637c478bd9Sstevel@tonic-gate 
30647c478bd9Sstevel@tonic-gate /*
30657c478bd9Sstevel@tonic-gate  * Note the locking between pre_del and post_del: The reader lock is held
30667c478bd9Sstevel@tonic-gate  * between the two calls to stop the set of functions from changing.
30677c478bd9Sstevel@tonic-gate  */
30687c478bd9Sstevel@tonic-gate 
30697c478bd9Sstevel@tonic-gate static int
kphysm_setup_pre_del(pgcnt_t delta_pages)30707c478bd9Sstevel@tonic-gate kphysm_setup_pre_del(pgcnt_t delta_pages)
30717c478bd9Sstevel@tonic-gate {
30727c478bd9Sstevel@tonic-gate 	uint_t i;
30737c478bd9Sstevel@tonic-gate 	int ret;
30747c478bd9Sstevel@tonic-gate 	int aret;
30757c478bd9Sstevel@tonic-gate 
30767c478bd9Sstevel@tonic-gate 	ret = 0;
30777c478bd9Sstevel@tonic-gate 	rw_enter(&mem_callback_rwlock, RW_READER);
30787c478bd9Sstevel@tonic-gate 	for (i = 0; i < nmemcallbacks; i++) {
30797c478bd9Sstevel@tonic-gate 		if (mem_callbacks[i].vec != NULL) {
30807c478bd9Sstevel@tonic-gate 			aret = (*mem_callbacks[i].vec->pre_del)
30817c478bd9Sstevel@tonic-gate 			    (mem_callbacks[i].arg, delta_pages);
30827c478bd9Sstevel@tonic-gate 			ret |= aret;
30837c478bd9Sstevel@tonic-gate 		}
30847c478bd9Sstevel@tonic-gate 	}
30857c478bd9Sstevel@tonic-gate 
30867c478bd9Sstevel@tonic-gate 	return (ret);
30877c478bd9Sstevel@tonic-gate }
30887c478bd9Sstevel@tonic-gate 
30897c478bd9Sstevel@tonic-gate static void
kphysm_setup_post_del(pgcnt_t delta_pages,int cancelled)30907c478bd9Sstevel@tonic-gate kphysm_setup_post_del(pgcnt_t delta_pages, int cancelled)
30917c478bd9Sstevel@tonic-gate {
30927c478bd9Sstevel@tonic-gate 	uint_t i;
30937c478bd9Sstevel@tonic-gate 
30947c478bd9Sstevel@tonic-gate 	for (i = 0; i < nmemcallbacks; i++) {
30957c478bd9Sstevel@tonic-gate 		if (mem_callbacks[i].vec != NULL) {
30967c478bd9Sstevel@tonic-gate 			(*mem_callbacks[i].vec->post_del)
30977c478bd9Sstevel@tonic-gate 			    (mem_callbacks[i].arg, delta_pages, cancelled);
30987c478bd9Sstevel@tonic-gate 		}
30997c478bd9Sstevel@tonic-gate 	}
31007c478bd9Sstevel@tonic-gate 	rw_exit(&mem_callback_rwlock);
31017c478bd9Sstevel@tonic-gate }
31027c478bd9Sstevel@tonic-gate 
31037c478bd9Sstevel@tonic-gate static int
kphysm_split_memseg(pfn_t base,pgcnt_t npgs)31047c478bd9Sstevel@tonic-gate kphysm_split_memseg(
31057c478bd9Sstevel@tonic-gate 	pfn_t base,
31067c478bd9Sstevel@tonic-gate 	pgcnt_t npgs)
31077c478bd9Sstevel@tonic-gate {
31087c478bd9Sstevel@tonic-gate 	struct memseg *seg;
31097c478bd9Sstevel@tonic-gate 	struct memseg **segpp;
31107c478bd9Sstevel@tonic-gate 	pgcnt_t size_low, size_high;
31117c478bd9Sstevel@tonic-gate 	struct memseg *seg_low, *seg_mid, *seg_high;
31127c478bd9Sstevel@tonic-gate 
31137c478bd9Sstevel@tonic-gate 	/*
31147c478bd9Sstevel@tonic-gate 	 * Lock the memsegs list against other updates now
31157c478bd9Sstevel@tonic-gate 	 */
31167c478bd9Sstevel@tonic-gate 	memsegs_lock(1);
31177c478bd9Sstevel@tonic-gate 
31187c478bd9Sstevel@tonic-gate 	/*
31197c478bd9Sstevel@tonic-gate 	 * Find boot time memseg that wholly covers this area.
31207c478bd9Sstevel@tonic-gate 	 */
31217c478bd9Sstevel@tonic-gate 
31227c478bd9Sstevel@tonic-gate 	/* First find the memseg with page 'base' in it. */
31237c478bd9Sstevel@tonic-gate 	for (segpp = &memsegs; (seg = *segpp) != NULL;
31247c478bd9Sstevel@tonic-gate 	    segpp = &((*segpp)->next)) {
31257c478bd9Sstevel@tonic-gate 		if (base >= seg->pages_base && base < seg->pages_end)
31267c478bd9Sstevel@tonic-gate 			break;
31277c478bd9Sstevel@tonic-gate 	}
31287c478bd9Sstevel@tonic-gate 	if (seg == NULL) {
31297c478bd9Sstevel@tonic-gate 		memsegs_unlock(1);
31307c478bd9Sstevel@tonic-gate 		return (0);
31317c478bd9Sstevel@tonic-gate 	}
31329853d9e8SJason Beloro 	if (memseg_includes_meta(seg)) {
31337c478bd9Sstevel@tonic-gate 		memsegs_unlock(1);
31347c478bd9Sstevel@tonic-gate 		return (0);
31357c478bd9Sstevel@tonic-gate 	}
31367c478bd9Sstevel@tonic-gate 	if ((base + npgs) > seg->pages_end) {
31377c478bd9Sstevel@tonic-gate 		memsegs_unlock(1);
31387c478bd9Sstevel@tonic-gate 		return (0);
31397c478bd9Sstevel@tonic-gate 	}
31407c478bd9Sstevel@tonic-gate 
31417c478bd9Sstevel@tonic-gate 	/*
31427c478bd9Sstevel@tonic-gate 	 * Work out the size of the two segments that will
31437c478bd9Sstevel@tonic-gate 	 * surround the new segment, one for low address
31447c478bd9Sstevel@tonic-gate 	 * and one for high.
31457c478bd9Sstevel@tonic-gate 	 */
31467c478bd9Sstevel@tonic-gate 	ASSERT(base >= seg->pages_base);
31477c478bd9Sstevel@tonic-gate 	size_low = base - seg->pages_base;
31487c478bd9Sstevel@tonic-gate 	ASSERT(seg->pages_end >= (base + npgs));
31497c478bd9Sstevel@tonic-gate 	size_high = seg->pages_end - (base + npgs);
31507c478bd9Sstevel@tonic-gate 
31517c478bd9Sstevel@tonic-gate 	/*
31527c478bd9Sstevel@tonic-gate 	 * Sanity check.
31537c478bd9Sstevel@tonic-gate 	 */
31547c478bd9Sstevel@tonic-gate 	if ((size_low + size_high) == 0) {
31557c478bd9Sstevel@tonic-gate 		memsegs_unlock(1);
31567c478bd9Sstevel@tonic-gate 		return (0);
31577c478bd9Sstevel@tonic-gate 	}
31587c478bd9Sstevel@tonic-gate 
31597c478bd9Sstevel@tonic-gate 	/*
31607c478bd9Sstevel@tonic-gate 	 * Allocate the new structures. The old memseg will not be freed
31617c478bd9Sstevel@tonic-gate 	 * as there may be a reference to it.
31627c478bd9Sstevel@tonic-gate 	 */
31637c478bd9Sstevel@tonic-gate 	seg_low = NULL;
31647c478bd9Sstevel@tonic-gate 	seg_high = NULL;
31657c478bd9Sstevel@tonic-gate 
31669853d9e8SJason Beloro 	if (size_low != 0)
31679853d9e8SJason Beloro 		seg_low = memseg_alloc();
31687c478bd9Sstevel@tonic-gate 
31699853d9e8SJason Beloro 	seg_mid = memseg_alloc();
31707c478bd9Sstevel@tonic-gate 
31719853d9e8SJason Beloro 	if (size_high != 0)
31729853d9e8SJason Beloro 		seg_high = memseg_alloc();
31737c478bd9Sstevel@tonic-gate 
31747c478bd9Sstevel@tonic-gate 	/*
31757c478bd9Sstevel@tonic-gate 	 * All allocation done now.
31767c478bd9Sstevel@tonic-gate 	 */
31777c478bd9Sstevel@tonic-gate 	if (size_low != 0) {
31787c478bd9Sstevel@tonic-gate 		seg_low->pages = seg->pages;
31797c478bd9Sstevel@tonic-gate 		seg_low->epages = seg_low->pages + size_low;
31807c478bd9Sstevel@tonic-gate 		seg_low->pages_base = seg->pages_base;
31817c478bd9Sstevel@tonic-gate 		seg_low->pages_end = seg_low->pages_base + size_low;
31827c478bd9Sstevel@tonic-gate 		seg_low->next = seg_mid;
31839853d9e8SJason Beloro 		seg_low->msegflags = seg->msegflags;
31847c478bd9Sstevel@tonic-gate 	}
31857c478bd9Sstevel@tonic-gate 	if (size_high != 0) {
31867c478bd9Sstevel@tonic-gate 		seg_high->pages = seg->epages - size_high;
31877c478bd9Sstevel@tonic-gate 		seg_high->epages = seg_high->pages + size_high;
31887c478bd9Sstevel@tonic-gate 		seg_high->pages_base = seg->pages_end - size_high;
31897c478bd9Sstevel@tonic-gate 		seg_high->pages_end = seg_high->pages_base + size_high;
31907c478bd9Sstevel@tonic-gate 		seg_high->next = seg->next;
31919853d9e8SJason Beloro 		seg_high->msegflags = seg->msegflags;
31927c478bd9Sstevel@tonic-gate 	}
31937c478bd9Sstevel@tonic-gate 
31947c478bd9Sstevel@tonic-gate 	seg_mid->pages = seg->pages + size_low;
31957c478bd9Sstevel@tonic-gate 	seg_mid->pages_base = seg->pages_base + size_low;
31967c478bd9Sstevel@tonic-gate 	seg_mid->epages = seg->epages - size_high;
31977c478bd9Sstevel@tonic-gate 	seg_mid->pages_end = seg->pages_end - size_high;
31987c478bd9Sstevel@tonic-gate 	seg_mid->next = (seg_high != NULL) ? seg_high : seg->next;
31999853d9e8SJason Beloro 	seg_mid->msegflags = seg->msegflags;
32007c478bd9Sstevel@tonic-gate 
32017c478bd9Sstevel@tonic-gate 	/*
32027c478bd9Sstevel@tonic-gate 	 * Update hat_kpm specific info of all involved memsegs and
32037c478bd9Sstevel@tonic-gate 	 * allow hat_kpm specific global chain updates.
32047c478bd9Sstevel@tonic-gate 	 */
32057c478bd9Sstevel@tonic-gate 	hat_kpm_split_mseg_update(seg, segpp, seg_low, seg_mid, seg_high);
32067c478bd9Sstevel@tonic-gate 
32077c478bd9Sstevel@tonic-gate 	/*
32087c478bd9Sstevel@tonic-gate 	 * At this point we have two equivalent memseg sub-chains,
32097c478bd9Sstevel@tonic-gate 	 * seg and seg_low/seg_mid/seg_high, which both chain on to
32107c478bd9Sstevel@tonic-gate 	 * the same place in the global chain. By re-writing the pointer
32117c478bd9Sstevel@tonic-gate 	 * in the previous element we switch atomically from using the old
32127c478bd9Sstevel@tonic-gate 	 * (seg) to the new.
32137c478bd9Sstevel@tonic-gate 	 */
32147c478bd9Sstevel@tonic-gate 	*segpp = (seg_low != NULL) ? seg_low : seg_mid;
32157c478bd9Sstevel@tonic-gate 
32167c478bd9Sstevel@tonic-gate 	membar_enter();
32177c478bd9Sstevel@tonic-gate 
32187c478bd9Sstevel@tonic-gate 	build_pfn_hash();
32197c478bd9Sstevel@tonic-gate 	memsegs_unlock(1);
32207c478bd9Sstevel@tonic-gate 
32217c478bd9Sstevel@tonic-gate 	/*
32227c478bd9Sstevel@tonic-gate 	 * We leave the old segment, 'seg', intact as there may be
32237c478bd9Sstevel@tonic-gate 	 * references to it. Also, as the value of total_pages has not
32247c478bd9Sstevel@tonic-gate 	 * changed and the memsegs list is effectively the same when
32257c478bd9Sstevel@tonic-gate 	 * accessed via the old or the new pointer, we do not have to
32267c478bd9Sstevel@tonic-gate 	 * cause pageout_scanner() to re-evaluate its hand pointers.
32277c478bd9Sstevel@tonic-gate 	 *
32287c478bd9Sstevel@tonic-gate 	 * We currently do not re-use or reclaim the page_t memory.
32297c478bd9Sstevel@tonic-gate 	 * If we do, then this may have to change.
32307c478bd9Sstevel@tonic-gate 	 */
32317c478bd9Sstevel@tonic-gate 
32327c478bd9Sstevel@tonic-gate 	mutex_enter(&memseg_lists_lock);
32337c478bd9Sstevel@tonic-gate 	seg->lnext = memseg_edit_junk;
32347c478bd9Sstevel@tonic-gate 	memseg_edit_junk = seg;
32357c478bd9Sstevel@tonic-gate 	mutex_exit(&memseg_lists_lock);
32367c478bd9Sstevel@tonic-gate 
32377c478bd9Sstevel@tonic-gate 	return (1);
32387c478bd9Sstevel@tonic-gate }
32397c478bd9Sstevel@tonic-gate 
32407c478bd9Sstevel@tonic-gate /*
32417c478bd9Sstevel@tonic-gate  * The sfmmu hat layer (e.g.) accesses some parts of the memseg
32427c478bd9Sstevel@tonic-gate  * structure using physical addresses. Therefore a kmem_cache is
32437c478bd9Sstevel@tonic-gate  * used with KMC_NOHASH to avoid page crossings within a memseg
32447c478bd9Sstevel@tonic-gate  * structure. KMC_NOHASH requires that no external (outside of
32457c478bd9Sstevel@tonic-gate  * slab) information is allowed. This, in turn, implies that the
32467c478bd9Sstevel@tonic-gate  * cache's slabsize must be exactly a single page, since per-slab
32477c478bd9Sstevel@tonic-gate  * information (e.g. the freelist for the slab) is kept at the
32487c478bd9Sstevel@tonic-gate  * end of the slab, where it is easy to locate. Should be changed
32497c478bd9Sstevel@tonic-gate  * when a more obvious kmem_cache interface/flag will become
32507c478bd9Sstevel@tonic-gate  * available.
32517c478bd9Sstevel@tonic-gate  */
32527c478bd9Sstevel@tonic-gate void
mem_config_init()32537c478bd9Sstevel@tonic-gate mem_config_init()
32547c478bd9Sstevel@tonic-gate {
32557c478bd9Sstevel@tonic-gate 	memseg_cache = kmem_cache_create("memseg_cache", sizeof (struct memseg),
325673347c69Smb 	    0, NULL, NULL, NULL, NULL, static_arena, KMC_NOHASH);
32577c478bd9Sstevel@tonic-gate }
32589853d9e8SJason Beloro 
32599853d9e8SJason Beloro struct memseg *
memseg_alloc()32609853d9e8SJason Beloro memseg_alloc()
32619853d9e8SJason Beloro {
32629853d9e8SJason Beloro 	struct memseg *seg;
32639853d9e8SJason Beloro 
32649853d9e8SJason Beloro 	seg = kmem_cache_alloc(memseg_cache, KM_SLEEP);
32659853d9e8SJason Beloro 	bzero(seg, sizeof (struct memseg));
32669853d9e8SJason Beloro 
32679853d9e8SJason Beloro 	return (seg);
32689853d9e8SJason Beloro }
32699853d9e8SJason Beloro 
32709853d9e8SJason Beloro /*
32719853d9e8SJason Beloro  * Return whether the page_t memory for this memseg
32729853d9e8SJason Beloro  * is included in the memseg itself.
32739853d9e8SJason Beloro  */
32749853d9e8SJason Beloro static int
memseg_includes_meta(struct memseg * seg)32759853d9e8SJason Beloro memseg_includes_meta(struct memseg *seg)
32769853d9e8SJason Beloro {
32779853d9e8SJason Beloro 	return (seg->msegflags & MEMSEG_META_INCL);
32789853d9e8SJason Beloro }
32799853d9e8SJason Beloro 
32809853d9e8SJason Beloro pfn_t
memseg_get_start(struct memseg * seg)32819853d9e8SJason Beloro memseg_get_start(struct memseg *seg)
32829853d9e8SJason Beloro {
32839853d9e8SJason Beloro 	pfn_t		pt_start;
32849853d9e8SJason Beloro 
32859853d9e8SJason Beloro 	if (memseg_includes_meta(seg)) {
32869853d9e8SJason Beloro 		pt_start = hat_getpfnum(kas.a_hat, (caddr_t)seg->pages);
32879853d9e8SJason Beloro 
32889853d9e8SJason Beloro 		/* Meta data is required to be at the beginning */
32899853d9e8SJason Beloro 		ASSERT(pt_start < seg->pages_base);
32909853d9e8SJason Beloro 	} else
32919853d9e8SJason Beloro 		pt_start = seg->pages_base;
32929853d9e8SJason Beloro 
32939853d9e8SJason Beloro 	return (pt_start);
32949853d9e8SJason Beloro }
32959853d9e8SJason Beloro 
32969853d9e8SJason Beloro /*
32979853d9e8SJason Beloro  * Invalidate memseg pointers in cpu private vm data caches.
32989853d9e8SJason Beloro  */
32999853d9e8SJason Beloro static void
memseg_cpu_vm_flush()33009853d9e8SJason Beloro memseg_cpu_vm_flush()
33019853d9e8SJason Beloro {
33029853d9e8SJason Beloro 	cpu_t *cp;
33039853d9e8SJason Beloro 	vm_cpu_data_t *vc;
33049853d9e8SJason Beloro 
33059853d9e8SJason Beloro 	mutex_enter(&cpu_lock);
33060ed5c46eSJosef 'Jeff' Sipek 	pause_cpus(NULL, NULL);
33079853d9e8SJason Beloro 
33089853d9e8SJason Beloro 	cp = cpu_list;
33099853d9e8SJason Beloro 	do {
33109853d9e8SJason Beloro 		vc = cp->cpu_vm_data;
33119853d9e8SJason Beloro 		vc->vc_pnum_memseg = NULL;
33129853d9e8SJason Beloro 		vc->vc_pnext_memseg = NULL;
33139853d9e8SJason Beloro 
33149853d9e8SJason Beloro 	} while ((cp = cp->cpu_next) != cpu_list);
33159853d9e8SJason Beloro 
33169853d9e8SJason Beloro 	start_cpus();
33179853d9e8SJason Beloro 	mutex_exit(&cpu_lock);
33189853d9e8SJason Beloro }
3319