17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate * CDDL HEADER START
37c478bd9Sstevel@tonic-gate *
47c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the
5ee88d2b9Skchow * Common Development and Distribution License (the "License").
6ee88d2b9Skchow * You may not use this file except in compliance with the License.
77c478bd9Sstevel@tonic-gate *
87c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing.
107c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions
117c478bd9Sstevel@tonic-gate * and limitations under the License.
127c478bd9Sstevel@tonic-gate *
137c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each
147c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the
167c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying
177c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bd9Sstevel@tonic-gate *
197c478bd9Sstevel@tonic-gate * CDDL HEADER END
207c478bd9Sstevel@tonic-gate */
217c478bd9Sstevel@tonic-gate /*
2256f33205SJonathan Adams * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
237c478bd9Sstevel@tonic-gate * Use is subject to license terms.
24*338664dfSAndy Fiddaman * Copyright 2017 Joyent, Inc.
257c478bd9Sstevel@tonic-gate */
267c478bd9Sstevel@tonic-gate
277c478bd9Sstevel@tonic-gate #include <sys/types.h>
287c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h>
297c478bd9Sstevel@tonic-gate #include <sys/vmem.h>
307c478bd9Sstevel@tonic-gate #include <sys/kmem.h>
317c478bd9Sstevel@tonic-gate #include <sys/systm.h>
327c478bd9Sstevel@tonic-gate #include <sys/machsystm.h> /* for page_freelist_coalesce() */
337c478bd9Sstevel@tonic-gate #include <sys/errno.h>
347c478bd9Sstevel@tonic-gate #include <sys/memnode.h>
357c478bd9Sstevel@tonic-gate #include <sys/memlist.h>
367c478bd9Sstevel@tonic-gate #include <sys/memlist_impl.h>
377c478bd9Sstevel@tonic-gate #include <sys/tuneable.h>
387c478bd9Sstevel@tonic-gate #include <sys/proc.h>
397c478bd9Sstevel@tonic-gate #include <sys/disp.h>
407c478bd9Sstevel@tonic-gate #include <sys/debug.h>
417c478bd9Sstevel@tonic-gate #include <sys/vm.h>
427c478bd9Sstevel@tonic-gate #include <sys/callb.h>
437c478bd9Sstevel@tonic-gate #include <sys/memlist_plat.h> /* for installed_top_size() */
447c478bd9Sstevel@tonic-gate #include <sys/condvar_impl.h> /* for CV_HAS_WAITERS() */
457c478bd9Sstevel@tonic-gate #include <sys/dumphdr.h> /* for dump_resize() */
467c478bd9Sstevel@tonic-gate #include <sys/atomic.h> /* for use in stats collection */
477c478bd9Sstevel@tonic-gate #include <sys/rwlock.h>
487c478bd9Sstevel@tonic-gate #include <sys/cpuvar.h>
497c478bd9Sstevel@tonic-gate #include <vm/seg_kmem.h>
507c478bd9Sstevel@tonic-gate #include <vm/seg_kpm.h>
517c478bd9Sstevel@tonic-gate #include <vm/page.h>
52e21bae1bSkchow #include <vm/vm_dep.h>
537c478bd9Sstevel@tonic-gate #define SUNDDI_IMPL /* so sunddi.h will not redefine splx() et al */
547c478bd9Sstevel@tonic-gate #include <sys/sunddi.h>
557c478bd9Sstevel@tonic-gate #include <sys/mem_config.h>
567c478bd9Sstevel@tonic-gate #include <sys/mem_cage.h>
577c478bd9Sstevel@tonic-gate #include <sys/lgrp.h>
587c478bd9Sstevel@tonic-gate #include <sys/ddi.h>
597c478bd9Sstevel@tonic-gate #include <sys/modctl.h>
607c478bd9Sstevel@tonic-gate
617c478bd9Sstevel@tonic-gate extern struct memlist *phys_avail;
627c478bd9Sstevel@tonic-gate
637c478bd9Sstevel@tonic-gate extern uint_t page_ctrs_adjust(int);
64af4c679fSSean McEnroe void page_ctrs_cleanup(void);
657c478bd9Sstevel@tonic-gate static void kphysm_setup_post_add(pgcnt_t);
667c478bd9Sstevel@tonic-gate static int kphysm_setup_pre_del(pgcnt_t);
677c478bd9Sstevel@tonic-gate static void kphysm_setup_post_del(pgcnt_t, int);
687c478bd9Sstevel@tonic-gate
697c478bd9Sstevel@tonic-gate static int kphysm_split_memseg(pfn_t base, pgcnt_t npgs);
707c478bd9Sstevel@tonic-gate
717c478bd9Sstevel@tonic-gate static int delspan_reserve(pfn_t, pgcnt_t);
727c478bd9Sstevel@tonic-gate static void delspan_unreserve(pfn_t, pgcnt_t);
737c478bd9Sstevel@tonic-gate
749853d9e8SJason Beloro kmutex_t memseg_lists_lock;
759853d9e8SJason Beloro struct memseg *memseg_va_avail;
769853d9e8SJason Beloro struct memseg *memseg_alloc(void);
777c478bd9Sstevel@tonic-gate static struct memseg *memseg_delete_junk;
787c478bd9Sstevel@tonic-gate static struct memseg *memseg_edit_junk;
797c478bd9Sstevel@tonic-gate void memseg_remap_init(void);
809853d9e8SJason Beloro static void memseg_remap_to_dummy(struct memseg *);
817c478bd9Sstevel@tonic-gate static void kphysm_addmem_error_undospan(pfn_t, pgcnt_t);
827c478bd9Sstevel@tonic-gate static struct memseg *memseg_reuse(pgcnt_t);
837c478bd9Sstevel@tonic-gate
847c478bd9Sstevel@tonic-gate static struct kmem_cache *memseg_cache;
857c478bd9Sstevel@tonic-gate
867c478bd9Sstevel@tonic-gate /*
879853d9e8SJason Beloro * Interfaces to manage externally allocated
889853d9e8SJason Beloro * page_t memory (metadata) for a memseg.
899853d9e8SJason Beloro */
909853d9e8SJason Beloro #pragma weak memseg_alloc_meta
919853d9e8SJason Beloro #pragma weak memseg_free_meta
929853d9e8SJason Beloro #pragma weak memseg_get_metapfn
939853d9e8SJason Beloro #pragma weak memseg_remap_meta
949853d9e8SJason Beloro
959853d9e8SJason Beloro extern int ppvm_enable;
969853d9e8SJason Beloro extern page_t *ppvm_base;
979853d9e8SJason Beloro extern int memseg_alloc_meta(pfn_t, pgcnt_t, void **, pgcnt_t *);
989853d9e8SJason Beloro extern void memseg_free_meta(void *, pgcnt_t);
999853d9e8SJason Beloro extern pfn_t memseg_get_metapfn(void *, pgcnt_t);
1009853d9e8SJason Beloro extern void memseg_remap_meta(struct memseg *);
1019853d9e8SJason Beloro static int memseg_is_dynamic(struct memseg *);
1029853d9e8SJason Beloro static int memseg_includes_meta(struct memseg *);
103af4c679fSSean McEnroe pfn_t memseg_get_start(struct memseg *);
1049853d9e8SJason Beloro static void memseg_cpu_vm_flush(void);
1059853d9e8SJason Beloro
1069853d9e8SJason Beloro int meta_alloc_enable;
1079853d9e8SJason Beloro
108a3114836SGerry Liu #ifdef DEBUG
109a3114836SGerry Liu static int memseg_debug;
110a3114836SGerry Liu #define MEMSEG_DEBUG(args...) if (memseg_debug) printf(args)
111a3114836SGerry Liu #else
112a3114836SGerry Liu #define MEMSEG_DEBUG(...)
113a3114836SGerry Liu #endif
114a3114836SGerry Liu
1159853d9e8SJason Beloro /*
1169853d9e8SJason Beloro * Add a chunk of memory to the system.
1177c478bd9Sstevel@tonic-gate * base: starting PAGESIZE page of new memory.
1187c478bd9Sstevel@tonic-gate * npgs: length in PAGESIZE pages.
1197c478bd9Sstevel@tonic-gate *
1207c478bd9Sstevel@tonic-gate * Adding mem this way doesn't increase the size of the hash tables;
1217c478bd9Sstevel@tonic-gate * growing them would be too hard. This should be OK, but adding memory
1227c478bd9Sstevel@tonic-gate * dynamically most likely means more hash misses, since the tables will
1237c478bd9Sstevel@tonic-gate * be smaller than they otherwise would be.
1247c478bd9Sstevel@tonic-gate */
1257c478bd9Sstevel@tonic-gate int
kphysm_add_memory_dynamic(pfn_t base,pgcnt_t npgs)1267c478bd9Sstevel@tonic-gate kphysm_add_memory_dynamic(pfn_t base, pgcnt_t npgs)
1277c478bd9Sstevel@tonic-gate {
1289853d9e8SJason Beloro page_t *pp;
1299853d9e8SJason Beloro page_t *opp, *oepp, *segpp;
1307c478bd9Sstevel@tonic-gate struct memseg *seg;
1317c478bd9Sstevel@tonic-gate uint64_t avmem;
1327c478bd9Sstevel@tonic-gate pfn_t pfn;
1337c478bd9Sstevel@tonic-gate pfn_t pt_base = base;
1347c478bd9Sstevel@tonic-gate pgcnt_t tpgs = npgs;
1359853d9e8SJason Beloro pgcnt_t metapgs = 0;
1367c478bd9Sstevel@tonic-gate int exhausted;
1377c478bd9Sstevel@tonic-gate pfn_t pnum;
1387c478bd9Sstevel@tonic-gate int mnode;
1397c478bd9Sstevel@tonic-gate caddr_t vaddr;
1407c478bd9Sstevel@tonic-gate int reuse;
1417c478bd9Sstevel@tonic-gate int mlret;
1429853d9e8SJason Beloro int rv;
1439853d9e8SJason Beloro int flags;
1449853d9e8SJason Beloro int meta_alloc = 0;
1457c478bd9Sstevel@tonic-gate void *mapva;
1469853d9e8SJason Beloro void *metabase = (void *)base;
1477c478bd9Sstevel@tonic-gate pgcnt_t nkpmpgs = 0;
148584b574aSToomas Soome offset_t kpm_pages_off = 0;
1497c478bd9Sstevel@tonic-gate
1507c478bd9Sstevel@tonic-gate cmn_err(CE_CONT,
1517c478bd9Sstevel@tonic-gate "?kphysm_add_memory_dynamic: adding %ldK at 0x%" PRIx64 "\n",
1527c478bd9Sstevel@tonic-gate npgs << (PAGESHIFT - 10), (uint64_t)base << PAGESHIFT);
1537c478bd9Sstevel@tonic-gate
1547c478bd9Sstevel@tonic-gate /*
1557c478bd9Sstevel@tonic-gate * Add this span in the delete list to prevent interactions.
1567c478bd9Sstevel@tonic-gate */
1577c478bd9Sstevel@tonic-gate if (!delspan_reserve(base, npgs)) {
1587c478bd9Sstevel@tonic-gate return (KPHYSM_ESPAN);
1597c478bd9Sstevel@tonic-gate }
1607c478bd9Sstevel@tonic-gate /*
1617c478bd9Sstevel@tonic-gate * Check to see if any of the memory span has been added
1627c478bd9Sstevel@tonic-gate * by trying an add to the installed memory list. This
1637c478bd9Sstevel@tonic-gate * forms the interlocking process for add.
1647c478bd9Sstevel@tonic-gate */
1657c478bd9Sstevel@tonic-gate
1667c478bd9Sstevel@tonic-gate memlist_write_lock();
1677c478bd9Sstevel@tonic-gate
1687c478bd9Sstevel@tonic-gate mlret = memlist_add_span((uint64_t)(pt_base) << PAGESHIFT,
1697c478bd9Sstevel@tonic-gate (uint64_t)(tpgs) << PAGESHIFT, &phys_install);
1707c478bd9Sstevel@tonic-gate
1717c478bd9Sstevel@tonic-gate if (mlret == MEML_SPANOP_OK)
1727c478bd9Sstevel@tonic-gate installed_top_size(phys_install, &physmax, &physinstalled);
1737c478bd9Sstevel@tonic-gate
1747c478bd9Sstevel@tonic-gate memlist_write_unlock();
1757c478bd9Sstevel@tonic-gate
1767c478bd9Sstevel@tonic-gate if (mlret != MEML_SPANOP_OK) {
1777c478bd9Sstevel@tonic-gate if (mlret == MEML_SPANOP_EALLOC) {
1787c478bd9Sstevel@tonic-gate delspan_unreserve(pt_base, tpgs);
1797c478bd9Sstevel@tonic-gate return (KPHYSM_ERESOURCE);
1809853d9e8SJason Beloro } else if (mlret == MEML_SPANOP_ESPAN) {
1817c478bd9Sstevel@tonic-gate delspan_unreserve(pt_base, tpgs);
1827c478bd9Sstevel@tonic-gate return (KPHYSM_ESPAN);
1837c478bd9Sstevel@tonic-gate } else {
1847c478bd9Sstevel@tonic-gate delspan_unreserve(pt_base, tpgs);
1857c478bd9Sstevel@tonic-gate return (KPHYSM_ERESOURCE);
1867c478bd9Sstevel@tonic-gate }
1877c478bd9Sstevel@tonic-gate }
1887c478bd9Sstevel@tonic-gate
1899853d9e8SJason Beloro if (meta_alloc_enable) {
1909853d9e8SJason Beloro /*
1919853d9e8SJason Beloro * Allocate the page_t's from existing memory;
1929853d9e8SJason Beloro * if that fails, allocate from the incoming memory.
1939853d9e8SJason Beloro */
1949853d9e8SJason Beloro rv = memseg_alloc_meta(base, npgs, &metabase, &metapgs);
1959853d9e8SJason Beloro if (rv == KPHYSM_OK) {
1969853d9e8SJason Beloro ASSERT(metapgs);
1979853d9e8SJason Beloro ASSERT(btopr(npgs * sizeof (page_t)) <= metapgs);
1989853d9e8SJason Beloro meta_alloc = 1;
1999853d9e8SJason Beloro goto mapalloc;
2009853d9e8SJason Beloro }
2019853d9e8SJason Beloro }
2029853d9e8SJason Beloro
2037c478bd9Sstevel@tonic-gate /*
2047c478bd9Sstevel@tonic-gate * We store the page_t's for this new memory in the first
2057c478bd9Sstevel@tonic-gate * few pages of the chunk. Here, we go and get'em ...
2067c478bd9Sstevel@tonic-gate */
2077c478bd9Sstevel@tonic-gate
2087c478bd9Sstevel@tonic-gate /*
2097c478bd9Sstevel@tonic-gate * The expression after the '-' gives the number of pages
2107c478bd9Sstevel@tonic-gate * that will fit in the new memory based on a requirement
2117c478bd9Sstevel@tonic-gate * of (PAGESIZE + sizeof (page_t)) bytes per page.
2127c478bd9Sstevel@tonic-gate */
2137c478bd9Sstevel@tonic-gate metapgs = npgs - (((uint64_t)(npgs) << PAGESHIFT) /
2147c478bd9Sstevel@tonic-gate (PAGESIZE + sizeof (page_t)));
2157c478bd9Sstevel@tonic-gate
2167c478bd9Sstevel@tonic-gate npgs -= metapgs;
2177c478bd9Sstevel@tonic-gate base += metapgs;
2187c478bd9Sstevel@tonic-gate
2197c478bd9Sstevel@tonic-gate ASSERT(btopr(npgs * sizeof (page_t)) <= metapgs);
2207c478bd9Sstevel@tonic-gate
2217c478bd9Sstevel@tonic-gate exhausted = (metapgs == 0 || npgs == 0);
2227c478bd9Sstevel@tonic-gate
2237c478bd9Sstevel@tonic-gate if (kpm_enable && !exhausted) {
2247c478bd9Sstevel@tonic-gate pgcnt_t start, end, nkpmpgs_prelim;
2257c478bd9Sstevel@tonic-gate size_t ptsz;
2267c478bd9Sstevel@tonic-gate
2277c478bd9Sstevel@tonic-gate /*
2287c478bd9Sstevel@tonic-gate * A viable kpm large page mapping must not overlap two
2297c478bd9Sstevel@tonic-gate * dynamic memsegs. Therefore the total size is checked
2307c478bd9Sstevel@tonic-gate * to be at least kpm_pgsz and also whether start and end
2317c478bd9Sstevel@tonic-gate * points are at least kpm_pgsz aligned.
2327c478bd9Sstevel@tonic-gate */
2337c478bd9Sstevel@tonic-gate if (ptokpmp(tpgs) < 1 || pmodkpmp(pt_base) ||
2347c478bd9Sstevel@tonic-gate pmodkpmp(base + npgs)) {
2357c478bd9Sstevel@tonic-gate
2367c478bd9Sstevel@tonic-gate kphysm_addmem_error_undospan(pt_base, tpgs);
2377c478bd9Sstevel@tonic-gate
2387c478bd9Sstevel@tonic-gate /*
2397c478bd9Sstevel@tonic-gate * There is no specific error code for violating
2407c478bd9Sstevel@tonic-gate * kpm granularity constraints.
2417c478bd9Sstevel@tonic-gate */
2427c478bd9Sstevel@tonic-gate return (KPHYSM_ENOTVIABLE);
2437c478bd9Sstevel@tonic-gate }
2447c478bd9Sstevel@tonic-gate
2457c478bd9Sstevel@tonic-gate start = kpmptop(ptokpmp(base));
2467c478bd9Sstevel@tonic-gate end = kpmptop(ptokpmp(base + npgs));
2477c478bd9Sstevel@tonic-gate nkpmpgs_prelim = ptokpmp(end - start);
2487c478bd9Sstevel@tonic-gate ptsz = npgs * sizeof (page_t);
2497c478bd9Sstevel@tonic-gate metapgs = btopr(ptsz + nkpmpgs_prelim * KPMPAGE_T_SZ);
2507c478bd9Sstevel@tonic-gate exhausted = (tpgs <= metapgs);
2517c478bd9Sstevel@tonic-gate if (!exhausted) {
2527c478bd9Sstevel@tonic-gate npgs = tpgs - metapgs;
2537c478bd9Sstevel@tonic-gate base = pt_base + metapgs;
2547c478bd9Sstevel@tonic-gate
2557c478bd9Sstevel@tonic-gate /* final nkpmpgs */
2567c478bd9Sstevel@tonic-gate start = kpmptop(ptokpmp(base));
2577c478bd9Sstevel@tonic-gate nkpmpgs = ptokpmp(end - start);
2587c478bd9Sstevel@tonic-gate kpm_pages_off = ptsz +
25973347c69Smb (nkpmpgs_prelim - nkpmpgs) * KPMPAGE_T_SZ;
2607c478bd9Sstevel@tonic-gate }
2617c478bd9Sstevel@tonic-gate }
2627c478bd9Sstevel@tonic-gate
2637c478bd9Sstevel@tonic-gate /*
2647c478bd9Sstevel@tonic-gate * Is memory area supplied too small?
2657c478bd9Sstevel@tonic-gate */
2667c478bd9Sstevel@tonic-gate if (exhausted) {
2677c478bd9Sstevel@tonic-gate kphysm_addmem_error_undospan(pt_base, tpgs);
2687c478bd9Sstevel@tonic-gate /*
2697c478bd9Sstevel@tonic-gate * There is no specific error code for 'too small'.
2707c478bd9Sstevel@tonic-gate */
2717c478bd9Sstevel@tonic-gate return (KPHYSM_ERESOURCE);
2727c478bd9Sstevel@tonic-gate }
2737c478bd9Sstevel@tonic-gate
2749853d9e8SJason Beloro mapalloc:
2757c478bd9Sstevel@tonic-gate /*
2767c478bd9Sstevel@tonic-gate * We may re-use a previously allocated VA space for the page_ts
2777c478bd9Sstevel@tonic-gate * eventually, but we need to initialize and lock the pages first.
2787c478bd9Sstevel@tonic-gate */
2797c478bd9Sstevel@tonic-gate
2807c478bd9Sstevel@tonic-gate /*
2817c478bd9Sstevel@tonic-gate * Get an address in the kernel address map, map
2827c478bd9Sstevel@tonic-gate * the page_t pages and see if we can touch them.
2837c478bd9Sstevel@tonic-gate */
2847c478bd9Sstevel@tonic-gate
2857c478bd9Sstevel@tonic-gate mapva = vmem_alloc(heap_arena, ptob(metapgs), VM_NOSLEEP);
2867c478bd9Sstevel@tonic-gate if (mapva == NULL) {
2877c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, "kphysm_add_memory_dynamic:"
2887c478bd9Sstevel@tonic-gate " Can't allocate VA for page_ts");
2897c478bd9Sstevel@tonic-gate
2909853d9e8SJason Beloro if (meta_alloc)
2919853d9e8SJason Beloro memseg_free_meta(metabase, metapgs);
2927c478bd9Sstevel@tonic-gate kphysm_addmem_error_undospan(pt_base, tpgs);
2937c478bd9Sstevel@tonic-gate
2947c478bd9Sstevel@tonic-gate return (KPHYSM_ERESOURCE);
2957c478bd9Sstevel@tonic-gate }
2967c478bd9Sstevel@tonic-gate pp = mapva;
2977c478bd9Sstevel@tonic-gate
2987c478bd9Sstevel@tonic-gate if (physmax < (pt_base + tpgs))
2997c478bd9Sstevel@tonic-gate physmax = (pt_base + tpgs);
3007c478bd9Sstevel@tonic-gate
3017c478bd9Sstevel@tonic-gate /*
3027c478bd9Sstevel@tonic-gate * In the remapping code we map one page at a time so we must do
3037c478bd9Sstevel@tonic-gate * the same here to match mapping sizes.
3047c478bd9Sstevel@tonic-gate */
3057c478bd9Sstevel@tonic-gate pfn = pt_base;
3067c478bd9Sstevel@tonic-gate vaddr = (caddr_t)pp;
3077c478bd9Sstevel@tonic-gate for (pnum = 0; pnum < metapgs; pnum++) {
3089853d9e8SJason Beloro if (meta_alloc)
3099853d9e8SJason Beloro pfn = memseg_get_metapfn(metabase, (pgcnt_t)pnum);
3107c478bd9Sstevel@tonic-gate hat_devload(kas.a_hat, vaddr, ptob(1), pfn,
3117c478bd9Sstevel@tonic-gate PROT_READ | PROT_WRITE,
3127c478bd9Sstevel@tonic-gate HAT_LOAD | HAT_LOAD_LOCK | HAT_LOAD_NOCONSIST);
3137c478bd9Sstevel@tonic-gate pfn++;
3147c478bd9Sstevel@tonic-gate vaddr += ptob(1);
3157c478bd9Sstevel@tonic-gate }
3167c478bd9Sstevel@tonic-gate
3177c478bd9Sstevel@tonic-gate if (ddi_peek32((dev_info_t *)NULL,
3187c478bd9Sstevel@tonic-gate (int32_t *)pp, (int32_t *)0) == DDI_FAILURE) {
3197c478bd9Sstevel@tonic-gate
32028e72544SJakub Jirsa cmn_err(CE_WARN, "kphysm_add_memory_dynamic:"
3217c478bd9Sstevel@tonic-gate " Can't access pp array at 0x%p [phys 0x%lx]",
3227c478bd9Sstevel@tonic-gate (void *)pp, pt_base);
3237c478bd9Sstevel@tonic-gate
3247c478bd9Sstevel@tonic-gate hat_unload(kas.a_hat, (caddr_t)pp, ptob(metapgs),
3257c478bd9Sstevel@tonic-gate HAT_UNLOAD_UNMAP|HAT_UNLOAD_UNLOCK);
3267c478bd9Sstevel@tonic-gate
3277c478bd9Sstevel@tonic-gate vmem_free(heap_arena, mapva, ptob(metapgs));
3289853d9e8SJason Beloro if (meta_alloc)
3299853d9e8SJason Beloro memseg_free_meta(metabase, metapgs);
3307c478bd9Sstevel@tonic-gate kphysm_addmem_error_undospan(pt_base, tpgs);
3317c478bd9Sstevel@tonic-gate
3327c478bd9Sstevel@tonic-gate return (KPHYSM_EFAULT);
3337c478bd9Sstevel@tonic-gate }
3347c478bd9Sstevel@tonic-gate
3357c478bd9Sstevel@tonic-gate /*
3367c478bd9Sstevel@tonic-gate * Add this memory slice to its memory node translation.
3377c478bd9Sstevel@tonic-gate *
3387c478bd9Sstevel@tonic-gate * Note that right now, each node may have only one slice;
3397c478bd9Sstevel@tonic-gate * this may change with COD or in larger SSM systems with
3407c478bd9Sstevel@tonic-gate * nested latency groups, so we must not assume that the
3417c478bd9Sstevel@tonic-gate * node does not yet exist.
342a3114836SGerry Liu *
343a3114836SGerry Liu * Note that there may be multiple memory nodes associated with
344a3114836SGerry Liu * a single lgrp node on x86 systems.
3457c478bd9Sstevel@tonic-gate */
34620c26ed3SChristopher Baumbauer - Sun Microsystems - San Diego United States pnum = pt_base + tpgs - 1;
3479853d9e8SJason Beloro mem_node_add_range(pt_base, pnum);
3487c478bd9Sstevel@tonic-gate
3497c478bd9Sstevel@tonic-gate /*
350da6c28aaSamw * Allocate or resize page counters as necessary to accommodate
3517c478bd9Sstevel@tonic-gate * the increase in memory pages.
3527c478bd9Sstevel@tonic-gate */
3537c478bd9Sstevel@tonic-gate mnode = PFN_2_MEM_NODE(pnum);
3549853d9e8SJason Beloro PAGE_CTRS_ADJUST(base, npgs, rv);
3559853d9e8SJason Beloro if (rv) {
3567c478bd9Sstevel@tonic-gate
3579853d9e8SJason Beloro mem_node_del_range(pt_base, pnum);
3587c478bd9Sstevel@tonic-gate
359af4c679fSSean McEnroe /* cleanup the page counters */
360af4c679fSSean McEnroe page_ctrs_cleanup();
361af4c679fSSean McEnroe
3627c478bd9Sstevel@tonic-gate hat_unload(kas.a_hat, (caddr_t)pp, ptob(metapgs),
3637c478bd9Sstevel@tonic-gate HAT_UNLOAD_UNMAP|HAT_UNLOAD_UNLOCK);
3647c478bd9Sstevel@tonic-gate
3657c478bd9Sstevel@tonic-gate vmem_free(heap_arena, mapva, ptob(metapgs));
3669853d9e8SJason Beloro if (meta_alloc)
3679853d9e8SJason Beloro memseg_free_meta(metabase, metapgs);
3687c478bd9Sstevel@tonic-gate kphysm_addmem_error_undospan(pt_base, tpgs);
3697c478bd9Sstevel@tonic-gate
3707c478bd9Sstevel@tonic-gate return (KPHYSM_ERESOURCE);
3717c478bd9Sstevel@tonic-gate }
3727c478bd9Sstevel@tonic-gate
3737c478bd9Sstevel@tonic-gate /*
3747c478bd9Sstevel@tonic-gate * Update the phys_avail memory list.
3757c478bd9Sstevel@tonic-gate * The phys_install list was done at the start.
3767c478bd9Sstevel@tonic-gate */
3777c478bd9Sstevel@tonic-gate
3787c478bd9Sstevel@tonic-gate memlist_write_lock();
3797c478bd9Sstevel@tonic-gate
3807c478bd9Sstevel@tonic-gate mlret = memlist_add_span((uint64_t)(base) << PAGESHIFT,
3817c478bd9Sstevel@tonic-gate (uint64_t)(npgs) << PAGESHIFT, &phys_avail);
3827c478bd9Sstevel@tonic-gate ASSERT(mlret == MEML_SPANOP_OK);
3837c478bd9Sstevel@tonic-gate
3847c478bd9Sstevel@tonic-gate memlist_write_unlock();
3857c478bd9Sstevel@tonic-gate
3867c478bd9Sstevel@tonic-gate /* See if we can find a memseg to re-use. */
3879853d9e8SJason Beloro if (meta_alloc) {
3889853d9e8SJason Beloro seg = memseg_reuse(0);
3899853d9e8SJason Beloro reuse = 1; /* force unmapping of temp mapva */
3909853d9e8SJason Beloro flags = MEMSEG_DYNAMIC | MEMSEG_META_ALLOC;
3919853d9e8SJason Beloro /*
3929853d9e8SJason Beloro * There is a 1:1 fixed relationship between a pfn
3939853d9e8SJason Beloro * and a page_t VA. The pfn is used as an index into
3949853d9e8SJason Beloro * the ppvm_base page_t table in order to calculate
3959853d9e8SJason Beloro * the page_t base address for a given pfn range.
3969853d9e8SJason Beloro */
3979853d9e8SJason Beloro segpp = ppvm_base + base;
3989853d9e8SJason Beloro } else {
3999853d9e8SJason Beloro seg = memseg_reuse(metapgs);
4009853d9e8SJason Beloro reuse = (seg != NULL);
4019853d9e8SJason Beloro flags = MEMSEG_DYNAMIC | MEMSEG_META_INCL;
4029853d9e8SJason Beloro segpp = pp;
4039853d9e8SJason Beloro }
4047c478bd9Sstevel@tonic-gate
4057c478bd9Sstevel@tonic-gate /*
4067c478bd9Sstevel@tonic-gate * Initialize the memseg structure representing this memory
4077c478bd9Sstevel@tonic-gate * and add it to the existing list of memsegs. Do some basic
4087c478bd9Sstevel@tonic-gate * initialization and add the memory to the system.
4097c478bd9Sstevel@tonic-gate * In order to prevent lock deadlocks, the add_physmem()
4107c478bd9Sstevel@tonic-gate * code is repeated here, but split into several stages.
4119853d9e8SJason Beloro *
4129853d9e8SJason Beloro * If a memseg is reused, invalidate memseg pointers in
4139853d9e8SJason Beloro * all cpu vm caches. We need to do this this since the check
414584b574aSToomas Soome * pp >= seg->pages && pp < seg->epages
4159853d9e8SJason Beloro * used in various places is not atomic and so the first compare
4169853d9e8SJason Beloro * can happen before reuse and the second compare after reuse.
4179853d9e8SJason Beloro * The invalidation ensures that a memseg is not deferenced while
4189853d9e8SJason Beloro * it's page/pfn pointers are changing.
4197c478bd9Sstevel@tonic-gate */
4207c478bd9Sstevel@tonic-gate if (seg == NULL) {
4219853d9e8SJason Beloro seg = memseg_alloc();
4229853d9e8SJason Beloro ASSERT(seg != NULL);
4239853d9e8SJason Beloro seg->msegflags = flags;
4249853d9e8SJason Beloro MEMSEG_DEBUG("memseg_get: alloc seg=0x%p, pages=0x%p",
4259853d9e8SJason Beloro (void *)seg, (void *)(seg->pages));
4269853d9e8SJason Beloro seg->pages = segpp;
4277c478bd9Sstevel@tonic-gate } else {
4289853d9e8SJason Beloro ASSERT(seg->msegflags == flags);
4299853d9e8SJason Beloro ASSERT(seg->pages_base == seg->pages_end);
4309853d9e8SJason Beloro MEMSEG_DEBUG("memseg_get: reuse seg=0x%p, pages=0x%p",
4319853d9e8SJason Beloro (void *)seg, (void *)(seg->pages));
4329853d9e8SJason Beloro if (meta_alloc) {
4339853d9e8SJason Beloro memseg_cpu_vm_flush();
4349853d9e8SJason Beloro seg->pages = segpp;
4359853d9e8SJason Beloro }
4367c478bd9Sstevel@tonic-gate }
4377c478bd9Sstevel@tonic-gate
4387c478bd9Sstevel@tonic-gate seg->epages = seg->pages + npgs;
4397c478bd9Sstevel@tonic-gate seg->pages_base = base;
4407c478bd9Sstevel@tonic-gate seg->pages_end = base + npgs;
4417c478bd9Sstevel@tonic-gate
4427c478bd9Sstevel@tonic-gate /*
4437c478bd9Sstevel@tonic-gate * Initialize metadata. The page_ts are set to locked state
4447c478bd9Sstevel@tonic-gate * ready to be freed.
4457c478bd9Sstevel@tonic-gate */
4467c478bd9Sstevel@tonic-gate bzero((caddr_t)pp, ptob(metapgs));
4477c478bd9Sstevel@tonic-gate
4487c478bd9Sstevel@tonic-gate pfn = seg->pages_base;
4497c478bd9Sstevel@tonic-gate /* Save the original pp base in case we reuse a memseg. */
4507c478bd9Sstevel@tonic-gate opp = pp;
4517c478bd9Sstevel@tonic-gate oepp = opp + npgs;
4527c478bd9Sstevel@tonic-gate for (pp = opp; pp < oepp; pp++) {
4537c478bd9Sstevel@tonic-gate pp->p_pagenum = pfn;
4547c478bd9Sstevel@tonic-gate pfn++;
4557c478bd9Sstevel@tonic-gate page_iolock_init(pp);
4567c478bd9Sstevel@tonic-gate while (!page_lock(pp, SE_EXCL, (kmutex_t *)NULL, P_RECLAIM))
4577c478bd9Sstevel@tonic-gate continue;
4587c478bd9Sstevel@tonic-gate pp->p_offset = (u_offset_t)-1;
4597c478bd9Sstevel@tonic-gate }
4607c478bd9Sstevel@tonic-gate
4617c478bd9Sstevel@tonic-gate if (reuse) {
4627c478bd9Sstevel@tonic-gate /* Remap our page_ts to the re-used memseg VA space. */
4637c478bd9Sstevel@tonic-gate pfn = pt_base;
4647c478bd9Sstevel@tonic-gate vaddr = (caddr_t)seg->pages;
4657c478bd9Sstevel@tonic-gate for (pnum = 0; pnum < metapgs; pnum++) {
4669853d9e8SJason Beloro if (meta_alloc)
4679853d9e8SJason Beloro pfn = memseg_get_metapfn(metabase,
4689853d9e8SJason Beloro (pgcnt_t)pnum);
4697c478bd9Sstevel@tonic-gate hat_devload(kas.a_hat, vaddr, ptob(1), pfn,
4707c478bd9Sstevel@tonic-gate PROT_READ | PROT_WRITE,
4717c478bd9Sstevel@tonic-gate HAT_LOAD_REMAP | HAT_LOAD | HAT_LOAD_NOCONSIST);
4727c478bd9Sstevel@tonic-gate pfn++;
4737c478bd9Sstevel@tonic-gate vaddr += ptob(1);
4747c478bd9Sstevel@tonic-gate }
4757c478bd9Sstevel@tonic-gate
4767c478bd9Sstevel@tonic-gate hat_unload(kas.a_hat, (caddr_t)opp, ptob(metapgs),
4777c478bd9Sstevel@tonic-gate HAT_UNLOAD_UNMAP|HAT_UNLOAD_UNLOCK);
4787c478bd9Sstevel@tonic-gate
4797c478bd9Sstevel@tonic-gate vmem_free(heap_arena, mapva, ptob(metapgs));
4807c478bd9Sstevel@tonic-gate }
4817c478bd9Sstevel@tonic-gate
4827c478bd9Sstevel@tonic-gate hat_kpm_addmem_mseg_update(seg, nkpmpgs, kpm_pages_off);
4837c478bd9Sstevel@tonic-gate
4847c478bd9Sstevel@tonic-gate memsegs_lock(1);
4857c478bd9Sstevel@tonic-gate
4867c478bd9Sstevel@tonic-gate /*
4877c478bd9Sstevel@tonic-gate * The new memseg is inserted at the beginning of the list.
4887c478bd9Sstevel@tonic-gate * Not only does this save searching for the tail, but in the
4897c478bd9Sstevel@tonic-gate * case of a re-used memseg, it solves the problem of what
49028e72544SJakub Jirsa * happens if some process has still got a pointer to the
4917c478bd9Sstevel@tonic-gate * memseg and follows the next pointer to continue traversing
4927c478bd9Sstevel@tonic-gate * the memsegs list.
4937c478bd9Sstevel@tonic-gate */
4947c478bd9Sstevel@tonic-gate
4957c478bd9Sstevel@tonic-gate hat_kpm_addmem_mseg_insert(seg);
4967c478bd9Sstevel@tonic-gate
4977c478bd9Sstevel@tonic-gate seg->next = memsegs;
4987c478bd9Sstevel@tonic-gate membar_producer();
4997c478bd9Sstevel@tonic-gate
5007c478bd9Sstevel@tonic-gate hat_kpm_addmem_memsegs_update(seg);
5017c478bd9Sstevel@tonic-gate
5027c478bd9Sstevel@tonic-gate memsegs = seg;
5037c478bd9Sstevel@tonic-gate
5047c478bd9Sstevel@tonic-gate build_pfn_hash();
5057c478bd9Sstevel@tonic-gate
5067c478bd9Sstevel@tonic-gate total_pages += npgs;
5077c478bd9Sstevel@tonic-gate
5087c478bd9Sstevel@tonic-gate /*
5097c478bd9Sstevel@tonic-gate * Recalculate the paging parameters now total_pages has changed.
5107c478bd9Sstevel@tonic-gate * This will also cause the clock hands to be reset before next use.
5117c478bd9Sstevel@tonic-gate */
5122d9166aeSJoshua M. Clulow setupclock();
5137c478bd9Sstevel@tonic-gate
5147c478bd9Sstevel@tonic-gate memsegs_unlock(1);
5157c478bd9Sstevel@tonic-gate
516ee88d2b9Skchow PLCNT_MODIFY_MAX(seg->pages_base, (long)npgs);
517ee88d2b9Skchow
5187c478bd9Sstevel@tonic-gate /*
5197c478bd9Sstevel@tonic-gate * Free the pages outside the lock to avoid locking loops.
5207c478bd9Sstevel@tonic-gate */
5217c478bd9Sstevel@tonic-gate for (pp = seg->pages; pp < seg->epages; pp++) {
5227c478bd9Sstevel@tonic-gate page_free(pp, 1);
5237c478bd9Sstevel@tonic-gate }
5247c478bd9Sstevel@tonic-gate
5257c478bd9Sstevel@tonic-gate /*
5267c478bd9Sstevel@tonic-gate * Now that we've updated the appropriate memory lists we
5277c478bd9Sstevel@tonic-gate * need to reset a number of globals, since we've increased memory.
5287c478bd9Sstevel@tonic-gate * Several have already been updated for us as noted above. The
5297c478bd9Sstevel@tonic-gate * globals we're interested in at this point are:
5307c478bd9Sstevel@tonic-gate * physmax - highest page frame number.
5317c478bd9Sstevel@tonic-gate * physinstalled - number of pages currently installed (done earlier)
5327c478bd9Sstevel@tonic-gate * maxmem - max free pages in the system
5337c478bd9Sstevel@tonic-gate * physmem - physical memory pages available
5347c478bd9Sstevel@tonic-gate * availrmem - real memory available
5357c478bd9Sstevel@tonic-gate */
5367c478bd9Sstevel@tonic-gate
5377c478bd9Sstevel@tonic-gate mutex_enter(&freemem_lock);
5387c478bd9Sstevel@tonic-gate maxmem += npgs;
5397c478bd9Sstevel@tonic-gate physmem += npgs;
5407c478bd9Sstevel@tonic-gate availrmem += npgs;
5417c478bd9Sstevel@tonic-gate availrmem_initial += npgs;
5427c478bd9Sstevel@tonic-gate
5437c478bd9Sstevel@tonic-gate mutex_exit(&freemem_lock);
5447c478bd9Sstevel@tonic-gate
5457c478bd9Sstevel@tonic-gate dump_resize();
5467c478bd9Sstevel@tonic-gate
5477c478bd9Sstevel@tonic-gate page_freelist_coalesce_all(mnode);
5487c478bd9Sstevel@tonic-gate
5497c478bd9Sstevel@tonic-gate kphysm_setup_post_add(npgs);
5507c478bd9Sstevel@tonic-gate
5517c478bd9Sstevel@tonic-gate cmn_err(CE_CONT, "?kphysm_add_memory_dynamic: mem = %ldK "
5527c478bd9Sstevel@tonic-gate "(0x%" PRIx64 ")\n",
5537c478bd9Sstevel@tonic-gate physinstalled << (PAGESHIFT - 10),
5547c478bd9Sstevel@tonic-gate (uint64_t)physinstalled << PAGESHIFT);
5557c478bd9Sstevel@tonic-gate
5567c478bd9Sstevel@tonic-gate avmem = (uint64_t)freemem << PAGESHIFT;
5577c478bd9Sstevel@tonic-gate cmn_err(CE_CONT, "?kphysm_add_memory_dynamic: "
5587c478bd9Sstevel@tonic-gate "avail mem = %" PRId64 "\n", avmem);
5597c478bd9Sstevel@tonic-gate
5607c478bd9Sstevel@tonic-gate /*
5617c478bd9Sstevel@tonic-gate * Update lgroup generation number on single lgroup systems
5627c478bd9Sstevel@tonic-gate */
5637c478bd9Sstevel@tonic-gate if (nlgrps == 1)
5647c478bd9Sstevel@tonic-gate lgrp_config(LGRP_CONFIG_GEN_UPDATE, 0, 0);
5657c478bd9Sstevel@tonic-gate
5663a634bfcSVikram Hegde /*
5673a634bfcSVikram Hegde * Inform DDI of update
5683a634bfcSVikram Hegde */
5693a634bfcSVikram Hegde ddi_mem_update((uint64_t)(pt_base) << PAGESHIFT,
5703a634bfcSVikram Hegde (uint64_t)(tpgs) << PAGESHIFT);
5713a634bfcSVikram Hegde
5727c478bd9Sstevel@tonic-gate delspan_unreserve(pt_base, tpgs);
5737c478bd9Sstevel@tonic-gate
574a3114836SGerry Liu return (KPHYSM_OK); /* Successfully added system memory */
5757c478bd9Sstevel@tonic-gate }
5767c478bd9Sstevel@tonic-gate
5777c478bd9Sstevel@tonic-gate /*
5787c478bd9Sstevel@tonic-gate * There are various error conditions in kphysm_add_memory_dynamic()
5797c478bd9Sstevel@tonic-gate * which require a rollback of already changed global state.
5807c478bd9Sstevel@tonic-gate */
5817c478bd9Sstevel@tonic-gate static void
kphysm_addmem_error_undospan(pfn_t pt_base,pgcnt_t tpgs)5827c478bd9Sstevel@tonic-gate kphysm_addmem_error_undospan(pfn_t pt_base, pgcnt_t tpgs)
5837c478bd9Sstevel@tonic-gate {
5847c478bd9Sstevel@tonic-gate int mlret;
5857c478bd9Sstevel@tonic-gate
5867c478bd9Sstevel@tonic-gate /* Unreserve memory span. */
5877c478bd9Sstevel@tonic-gate memlist_write_lock();
5887c478bd9Sstevel@tonic-gate
5897c478bd9Sstevel@tonic-gate mlret = memlist_delete_span(
5907c478bd9Sstevel@tonic-gate (uint64_t)(pt_base) << PAGESHIFT,
5917c478bd9Sstevel@tonic-gate (uint64_t)(tpgs) << PAGESHIFT, &phys_install);
5927c478bd9Sstevel@tonic-gate
5937c478bd9Sstevel@tonic-gate ASSERT(mlret == MEML_SPANOP_OK);
5947c478bd9Sstevel@tonic-gate phys_install_has_changed();
5957c478bd9Sstevel@tonic-gate installed_top_size(phys_install, &physmax, &physinstalled);
5967c478bd9Sstevel@tonic-gate
5977c478bd9Sstevel@tonic-gate memlist_write_unlock();
5987c478bd9Sstevel@tonic-gate delspan_unreserve(pt_base, tpgs);
5997c478bd9Sstevel@tonic-gate }
6007c478bd9Sstevel@tonic-gate
6017c478bd9Sstevel@tonic-gate /*
6029853d9e8SJason Beloro * Only return an available memseg of exactly the right size
6039853d9e8SJason Beloro * if size is required.
6047c478bd9Sstevel@tonic-gate * When the meta data area has it's own virtual address space
6057c478bd9Sstevel@tonic-gate * we will need to manage this more carefully and do best fit
606da6c28aaSamw * allocations, possibly splitting an available area.
6077c478bd9Sstevel@tonic-gate */
6089853d9e8SJason Beloro struct memseg *
memseg_reuse(pgcnt_t metapgs)6097c478bd9Sstevel@tonic-gate memseg_reuse(pgcnt_t metapgs)
6107c478bd9Sstevel@tonic-gate {
6119853d9e8SJason Beloro int type;
6127c478bd9Sstevel@tonic-gate struct memseg **segpp, *seg;
6137c478bd9Sstevel@tonic-gate
6147c478bd9Sstevel@tonic-gate mutex_enter(&memseg_lists_lock);
6157c478bd9Sstevel@tonic-gate
6167c478bd9Sstevel@tonic-gate segpp = &memseg_va_avail;
6177c478bd9Sstevel@tonic-gate for (; (seg = *segpp) != NULL; segpp = &seg->lnext) {
6187c478bd9Sstevel@tonic-gate caddr_t end;
6197c478bd9Sstevel@tonic-gate
6209853d9e8SJason Beloro /*
6219853d9e8SJason Beloro * Make sure we are reusing the right segment type.
6229853d9e8SJason Beloro */
6239853d9e8SJason Beloro type = metapgs ? MEMSEG_META_INCL : MEMSEG_META_ALLOC;
6249853d9e8SJason Beloro
6259853d9e8SJason Beloro if ((seg->msegflags & (MEMSEG_META_INCL | MEMSEG_META_ALLOC))
6269853d9e8SJason Beloro != type)
6279853d9e8SJason Beloro continue;
6289853d9e8SJason Beloro
6297c478bd9Sstevel@tonic-gate if (kpm_enable)
6307c478bd9Sstevel@tonic-gate end = hat_kpm_mseg_reuse(seg);
6317c478bd9Sstevel@tonic-gate else
6327c478bd9Sstevel@tonic-gate end = (caddr_t)seg->epages;
6337c478bd9Sstevel@tonic-gate
6349853d9e8SJason Beloro /*
6359853d9e8SJason Beloro * Check for the right size if it is provided.
6369853d9e8SJason Beloro */
6379853d9e8SJason Beloro if (!metapgs || btopr(end - (caddr_t)seg->pages) == metapgs) {
6387c478bd9Sstevel@tonic-gate *segpp = seg->lnext;
6397c478bd9Sstevel@tonic-gate seg->lnext = NULL;
6407c478bd9Sstevel@tonic-gate break;
6417c478bd9Sstevel@tonic-gate }
6427c478bd9Sstevel@tonic-gate }
6437c478bd9Sstevel@tonic-gate mutex_exit(&memseg_lists_lock);
6447c478bd9Sstevel@tonic-gate
6457c478bd9Sstevel@tonic-gate return (seg);
6467c478bd9Sstevel@tonic-gate }
6477c478bd9Sstevel@tonic-gate
6487c478bd9Sstevel@tonic-gate static uint_t handle_gen;
6497c478bd9Sstevel@tonic-gate
6507c478bd9Sstevel@tonic-gate struct memdelspan {
6517c478bd9Sstevel@tonic-gate struct memdelspan *mds_next;
6527c478bd9Sstevel@tonic-gate pfn_t mds_base;
6537c478bd9Sstevel@tonic-gate pgcnt_t mds_npgs;
6547c478bd9Sstevel@tonic-gate uint_t *mds_bitmap;
6557c478bd9Sstevel@tonic-gate uint_t *mds_bitmap_retired;
6567c478bd9Sstevel@tonic-gate };
6577c478bd9Sstevel@tonic-gate
6587c478bd9Sstevel@tonic-gate #define NBPBMW (sizeof (uint_t) * NBBY)
6597c478bd9Sstevel@tonic-gate #define MDS_BITMAPBYTES(MDSP) \
6607c478bd9Sstevel@tonic-gate ((((MDSP)->mds_npgs + NBPBMW - 1) / NBPBMW) * sizeof (uint_t))
6617c478bd9Sstevel@tonic-gate
6627c478bd9Sstevel@tonic-gate struct transit_list {
6637c478bd9Sstevel@tonic-gate struct transit_list *trl_next;
6647c478bd9Sstevel@tonic-gate struct memdelspan *trl_spans;
6657c478bd9Sstevel@tonic-gate int trl_collect;
6667c478bd9Sstevel@tonic-gate };
6677c478bd9Sstevel@tonic-gate
6687c478bd9Sstevel@tonic-gate struct transit_list_head {
6697c478bd9Sstevel@tonic-gate kmutex_t trh_lock;
6707c478bd9Sstevel@tonic-gate struct transit_list *trh_head;
6717c478bd9Sstevel@tonic-gate };
6727c478bd9Sstevel@tonic-gate
6737c478bd9Sstevel@tonic-gate static struct transit_list_head transit_list_head;
6747c478bd9Sstevel@tonic-gate
6757c478bd9Sstevel@tonic-gate struct mem_handle;
6767c478bd9Sstevel@tonic-gate static void transit_list_collect(struct mem_handle *, int);
6777c478bd9Sstevel@tonic-gate static void transit_list_insert(struct transit_list *);
6787c478bd9Sstevel@tonic-gate static void transit_list_remove(struct transit_list *);
6797c478bd9Sstevel@tonic-gate
6807c478bd9Sstevel@tonic-gate #ifdef DEBUG
6817c478bd9Sstevel@tonic-gate #define MEM_DEL_STATS
6827c478bd9Sstevel@tonic-gate #endif /* DEBUG */
6837c478bd9Sstevel@tonic-gate
6847c478bd9Sstevel@tonic-gate #ifdef MEM_DEL_STATS
6857c478bd9Sstevel@tonic-gate static int mem_del_stat_print = 0;
6867c478bd9Sstevel@tonic-gate struct mem_del_stat {
6877c478bd9Sstevel@tonic-gate uint_t nloop;
6887c478bd9Sstevel@tonic-gate uint_t need_free;
6897c478bd9Sstevel@tonic-gate uint_t free_loop;
6907c478bd9Sstevel@tonic-gate uint_t free_low;
6917c478bd9Sstevel@tonic-gate uint_t free_failed;
6927c478bd9Sstevel@tonic-gate uint_t ncheck;
6937c478bd9Sstevel@tonic-gate uint_t nopaget;
6947c478bd9Sstevel@tonic-gate uint_t lockfail;
6957c478bd9Sstevel@tonic-gate uint_t nfree;
6967c478bd9Sstevel@tonic-gate uint_t nreloc;
6977c478bd9Sstevel@tonic-gate uint_t nrelocfail;
6987c478bd9Sstevel@tonic-gate uint_t already_done;
6997c478bd9Sstevel@tonic-gate uint_t first_notfree;
7007c478bd9Sstevel@tonic-gate uint_t npplocked;
7017c478bd9Sstevel@tonic-gate uint_t nlockreloc;
7027c478bd9Sstevel@tonic-gate uint_t nnorepl;
7037c478bd9Sstevel@tonic-gate uint_t nmodreloc;
7047c478bd9Sstevel@tonic-gate uint_t ndestroy;
7057c478bd9Sstevel@tonic-gate uint_t nputpage;
7067c478bd9Sstevel@tonic-gate uint_t nnoreclaim;
7077c478bd9Sstevel@tonic-gate uint_t ndelay;
7087c478bd9Sstevel@tonic-gate uint_t demotefail;
7097c478bd9Sstevel@tonic-gate uint64_t nticks_total;
7107c478bd9Sstevel@tonic-gate uint64_t nticks_pgrp;
7117c478bd9Sstevel@tonic-gate uint_t retired;
7127c478bd9Sstevel@tonic-gate uint_t toxic;
7137c478bd9Sstevel@tonic-gate uint_t failing;
7147c478bd9Sstevel@tonic-gate uint_t modtoxic;
7157c478bd9Sstevel@tonic-gate uint_t npplkdtoxic;
7167c478bd9Sstevel@tonic-gate uint_t gptlmodfail;
7177c478bd9Sstevel@tonic-gate uint_t gptllckfail;
7187c478bd9Sstevel@tonic-gate };
7197c478bd9Sstevel@tonic-gate /*
7207c478bd9Sstevel@tonic-gate * The stat values are only incremented in the delete thread
7217c478bd9Sstevel@tonic-gate * so no locking or atomic required.
7227c478bd9Sstevel@tonic-gate */
7237c478bd9Sstevel@tonic-gate #define MDSTAT_INCR(MHP, FLD) (MHP)->mh_delstat.FLD++
7247c478bd9Sstevel@tonic-gate #define MDSTAT_TOTAL(MHP, ntck) ((MHP)->mh_delstat.nticks_total += (ntck))
7257c478bd9Sstevel@tonic-gate #define MDSTAT_PGRP(MHP, ntck) ((MHP)->mh_delstat.nticks_pgrp += (ntck))
7267c478bd9Sstevel@tonic-gate static void mem_del_stat_print_func(struct mem_handle *);
7277c478bd9Sstevel@tonic-gate #define MDSTAT_PRINT(MHP) mem_del_stat_print_func((MHP))
7287c478bd9Sstevel@tonic-gate #else /* MEM_DEL_STATS */
7297c478bd9Sstevel@tonic-gate #define MDSTAT_INCR(MHP, FLD)
7307c478bd9Sstevel@tonic-gate #define MDSTAT_TOTAL(MHP, ntck)
7317c478bd9Sstevel@tonic-gate #define MDSTAT_PGRP(MHP, ntck)
7327c478bd9Sstevel@tonic-gate #define MDSTAT_PRINT(MHP)
7337c478bd9Sstevel@tonic-gate #endif /* MEM_DEL_STATS */
7347c478bd9Sstevel@tonic-gate
7357c478bd9Sstevel@tonic-gate typedef enum mhnd_state {MHND_FREE = 0, MHND_INIT, MHND_STARTING,
7367c478bd9Sstevel@tonic-gate MHND_RUNNING, MHND_DONE, MHND_RELEASE} mhnd_state_t;
7377c478bd9Sstevel@tonic-gate
7387c478bd9Sstevel@tonic-gate /*
7397c478bd9Sstevel@tonic-gate * mh_mutex must be taken to examine or change mh_exthandle and mh_state.
7407c478bd9Sstevel@tonic-gate * The mutex may not be required for other fields, dependent on mh_state.
7417c478bd9Sstevel@tonic-gate */
7427c478bd9Sstevel@tonic-gate struct mem_handle {
7437c478bd9Sstevel@tonic-gate kmutex_t mh_mutex;
7447c478bd9Sstevel@tonic-gate struct mem_handle *mh_next;
7457c478bd9Sstevel@tonic-gate memhandle_t mh_exthandle;
7467c478bd9Sstevel@tonic-gate mhnd_state_t mh_state;
7477c478bd9Sstevel@tonic-gate struct transit_list mh_transit;
7487c478bd9Sstevel@tonic-gate pgcnt_t mh_phys_pages;
7497c478bd9Sstevel@tonic-gate pgcnt_t mh_vm_pages;
7507c478bd9Sstevel@tonic-gate pgcnt_t mh_hold_todo;
7517c478bd9Sstevel@tonic-gate void (*mh_delete_complete)(void *, int error);
7527c478bd9Sstevel@tonic-gate void *mh_delete_complete_arg;
7537c478bd9Sstevel@tonic-gate volatile uint_t mh_cancel;
7547c478bd9Sstevel@tonic-gate volatile uint_t mh_dr_aio_cleanup_cancel;
7557c478bd9Sstevel@tonic-gate volatile uint_t mh_aio_cleanup_done;
7567c478bd9Sstevel@tonic-gate kcondvar_t mh_cv;
7577c478bd9Sstevel@tonic-gate kthread_id_t mh_thread_id;
7587c478bd9Sstevel@tonic-gate page_t *mh_deleted; /* link through p_next */
7597c478bd9Sstevel@tonic-gate #ifdef MEM_DEL_STATS
7607c478bd9Sstevel@tonic-gate struct mem_del_stat mh_delstat;
7617c478bd9Sstevel@tonic-gate #endif /* MEM_DEL_STATS */
7627c478bd9Sstevel@tonic-gate };
7637c478bd9Sstevel@tonic-gate
7647c478bd9Sstevel@tonic-gate static struct mem_handle *mem_handle_head;
7657c478bd9Sstevel@tonic-gate static kmutex_t mem_handle_list_mutex;
7667c478bd9Sstevel@tonic-gate
7677c478bd9Sstevel@tonic-gate static struct mem_handle *
kphysm_allocate_mem_handle()7687c478bd9Sstevel@tonic-gate kphysm_allocate_mem_handle()
7697c478bd9Sstevel@tonic-gate {
7707c478bd9Sstevel@tonic-gate struct mem_handle *mhp;
7717c478bd9Sstevel@tonic-gate
7727c478bd9Sstevel@tonic-gate mhp = kmem_zalloc(sizeof (struct mem_handle), KM_SLEEP);
7737c478bd9Sstevel@tonic-gate mutex_init(&mhp->mh_mutex, NULL, MUTEX_DEFAULT, NULL);
7747c478bd9Sstevel@tonic-gate mutex_enter(&mem_handle_list_mutex);
7757c478bd9Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex);
7767c478bd9Sstevel@tonic-gate /* handle_gen is protected by list mutex. */
7779f1a1f17Sdmick mhp->mh_exthandle = (memhandle_t)(uintptr_t)(++handle_gen);
7787c478bd9Sstevel@tonic-gate mhp->mh_next = mem_handle_head;
7797c478bd9Sstevel@tonic-gate mem_handle_head = mhp;
7807c478bd9Sstevel@tonic-gate mutex_exit(&mem_handle_list_mutex);
7817c478bd9Sstevel@tonic-gate
7827c478bd9Sstevel@tonic-gate return (mhp);
7837c478bd9Sstevel@tonic-gate }
7847c478bd9Sstevel@tonic-gate
7857c478bd9Sstevel@tonic-gate static void
kphysm_free_mem_handle(struct mem_handle * mhp)7867c478bd9Sstevel@tonic-gate kphysm_free_mem_handle(struct mem_handle *mhp)
7877c478bd9Sstevel@tonic-gate {
7887c478bd9Sstevel@tonic-gate struct mem_handle **mhpp;
7897c478bd9Sstevel@tonic-gate
7907c478bd9Sstevel@tonic-gate ASSERT(mutex_owned(&mhp->mh_mutex));
7917c478bd9Sstevel@tonic-gate ASSERT(mhp->mh_state == MHND_FREE);
7927c478bd9Sstevel@tonic-gate /*
7937c478bd9Sstevel@tonic-gate * Exit the mutex to preserve locking order. This is OK
7947c478bd9Sstevel@tonic-gate * here as once in the FREE state, the handle cannot
7957c478bd9Sstevel@tonic-gate * be found by a lookup.
7967c478bd9Sstevel@tonic-gate */
7977c478bd9Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex);
7987c478bd9Sstevel@tonic-gate
7997c478bd9Sstevel@tonic-gate mutex_enter(&mem_handle_list_mutex);
8007c478bd9Sstevel@tonic-gate mhpp = &mem_handle_head;
8017c478bd9Sstevel@tonic-gate while (*mhpp != NULL && *mhpp != mhp)
8027c478bd9Sstevel@tonic-gate mhpp = &(*mhpp)->mh_next;
8037c478bd9Sstevel@tonic-gate ASSERT(*mhpp == mhp);
8047c478bd9Sstevel@tonic-gate /*
8057c478bd9Sstevel@tonic-gate * No need to lock the handle (mh_mutex) as only
8067c478bd9Sstevel@tonic-gate * mh_next changing and this is the only thread that
8077c478bd9Sstevel@tonic-gate * can be referncing mhp.
8087c478bd9Sstevel@tonic-gate */
8097c478bd9Sstevel@tonic-gate *mhpp = mhp->mh_next;
8107c478bd9Sstevel@tonic-gate mutex_exit(&mem_handle_list_mutex);
8117c478bd9Sstevel@tonic-gate
8127c478bd9Sstevel@tonic-gate mutex_destroy(&mhp->mh_mutex);
8137c478bd9Sstevel@tonic-gate kmem_free(mhp, sizeof (struct mem_handle));
8147c478bd9Sstevel@tonic-gate }
8157c478bd9Sstevel@tonic-gate
8167c478bd9Sstevel@tonic-gate /*
8177c478bd9Sstevel@tonic-gate * This function finds the internal mem_handle corresponding to an
8187c478bd9Sstevel@tonic-gate * external handle and returns it with the mh_mutex held.
8197c478bd9Sstevel@tonic-gate */
8207c478bd9Sstevel@tonic-gate static struct mem_handle *
kphysm_lookup_mem_handle(memhandle_t handle)8217c478bd9Sstevel@tonic-gate kphysm_lookup_mem_handle(memhandle_t handle)
8227c478bd9Sstevel@tonic-gate {
8237c478bd9Sstevel@tonic-gate struct mem_handle *mhp;
8247c478bd9Sstevel@tonic-gate
8257c478bd9Sstevel@tonic-gate mutex_enter(&mem_handle_list_mutex);
8267c478bd9Sstevel@tonic-gate for (mhp = mem_handle_head; mhp != NULL; mhp = mhp->mh_next) {
8277c478bd9Sstevel@tonic-gate if (mhp->mh_exthandle == handle) {
8287c478bd9Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex);
8297c478bd9Sstevel@tonic-gate /*
8307c478bd9Sstevel@tonic-gate * The state of the handle could have been changed
8317c478bd9Sstevel@tonic-gate * by kphysm_del_release() while waiting for mh_mutex.
8327c478bd9Sstevel@tonic-gate */
8337c478bd9Sstevel@tonic-gate if (mhp->mh_state == MHND_FREE) {
8347c478bd9Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex);
8357c478bd9Sstevel@tonic-gate continue;
8367c478bd9Sstevel@tonic-gate }
8377c478bd9Sstevel@tonic-gate break;
8387c478bd9Sstevel@tonic-gate }
8397c478bd9Sstevel@tonic-gate }
8407c478bd9Sstevel@tonic-gate mutex_exit(&mem_handle_list_mutex);
8417c478bd9Sstevel@tonic-gate return (mhp);
8427c478bd9Sstevel@tonic-gate }
8437c478bd9Sstevel@tonic-gate
8447c478bd9Sstevel@tonic-gate int
kphysm_del_gethandle(memhandle_t * xmhp)8457c478bd9Sstevel@tonic-gate kphysm_del_gethandle(memhandle_t *xmhp)
8467c478bd9Sstevel@tonic-gate {
8477c478bd9Sstevel@tonic-gate struct mem_handle *mhp;
8487c478bd9Sstevel@tonic-gate
8497c478bd9Sstevel@tonic-gate mhp = kphysm_allocate_mem_handle();
8507c478bd9Sstevel@tonic-gate /*
8517c478bd9Sstevel@tonic-gate * The handle is allocated using KM_SLEEP, so cannot fail.
8527c478bd9Sstevel@tonic-gate * If the implementation is changed, the correct error to return
8537c478bd9Sstevel@tonic-gate * here would be KPHYSM_ENOHANDLES.
8547c478bd9Sstevel@tonic-gate */
8557c478bd9Sstevel@tonic-gate ASSERT(mhp->mh_state == MHND_FREE);
8567c478bd9Sstevel@tonic-gate mhp->mh_state = MHND_INIT;
8577c478bd9Sstevel@tonic-gate *xmhp = mhp->mh_exthandle;
8587c478bd9Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex);
8597c478bd9Sstevel@tonic-gate return (KPHYSM_OK);
8607c478bd9Sstevel@tonic-gate }
8617c478bd9Sstevel@tonic-gate
8627c478bd9Sstevel@tonic-gate static int
overlapping(pfn_t b1,pgcnt_t l1,pfn_t b2,pgcnt_t l2)8637c478bd9Sstevel@tonic-gate overlapping(pfn_t b1, pgcnt_t l1, pfn_t b2, pgcnt_t l2)
8647c478bd9Sstevel@tonic-gate {
8657c478bd9Sstevel@tonic-gate pfn_t e1, e2;
8667c478bd9Sstevel@tonic-gate
8677c478bd9Sstevel@tonic-gate e1 = b1 + l1;
8687c478bd9Sstevel@tonic-gate e2 = b2 + l2;
8697c478bd9Sstevel@tonic-gate
8707c478bd9Sstevel@tonic-gate return (!(b2 >= e1 || b1 >= e2));
8717c478bd9Sstevel@tonic-gate }
8727c478bd9Sstevel@tonic-gate
8737c478bd9Sstevel@tonic-gate static int can_remove_pgs(pgcnt_t);
8747c478bd9Sstevel@tonic-gate
8757c478bd9Sstevel@tonic-gate static struct memdelspan *
span_to_install(pfn_t base,pgcnt_t npgs)8767c478bd9Sstevel@tonic-gate span_to_install(pfn_t base, pgcnt_t npgs)
8777c478bd9Sstevel@tonic-gate {
8787c478bd9Sstevel@tonic-gate struct memdelspan *mdsp;
8797c478bd9Sstevel@tonic-gate struct memdelspan *mdsp_new;
8807c478bd9Sstevel@tonic-gate uint64_t address, size, thislen;
8817c478bd9Sstevel@tonic-gate struct memlist *mlp;
8827c478bd9Sstevel@tonic-gate
8837c478bd9Sstevel@tonic-gate mdsp_new = NULL;
8847c478bd9Sstevel@tonic-gate
8857c478bd9Sstevel@tonic-gate address = (uint64_t)base << PAGESHIFT;
8867c478bd9Sstevel@tonic-gate size = (uint64_t)npgs << PAGESHIFT;
8877c478bd9Sstevel@tonic-gate while (size != 0) {
8887c478bd9Sstevel@tonic-gate memlist_read_lock();
88956f33205SJonathan Adams for (mlp = phys_install; mlp != NULL; mlp = mlp->ml_next) {
89056f33205SJonathan Adams if (address >= (mlp->ml_address + mlp->ml_size))
8917c478bd9Sstevel@tonic-gate continue;
89256f33205SJonathan Adams if ((address + size) > mlp->ml_address)
8937c478bd9Sstevel@tonic-gate break;
8947c478bd9Sstevel@tonic-gate }
8957c478bd9Sstevel@tonic-gate if (mlp == NULL) {
8967c478bd9Sstevel@tonic-gate address += size;
8977c478bd9Sstevel@tonic-gate size = 0;
8987c478bd9Sstevel@tonic-gate thislen = 0;
8997c478bd9Sstevel@tonic-gate } else {
90056f33205SJonathan Adams if (address < mlp->ml_address) {
90156f33205SJonathan Adams size -= (mlp->ml_address - address);
90256f33205SJonathan Adams address = mlp->ml_address;
9037c478bd9Sstevel@tonic-gate }
90456f33205SJonathan Adams ASSERT(address >= mlp->ml_address);
90556f33205SJonathan Adams if ((address + size) >
90656f33205SJonathan Adams (mlp->ml_address + mlp->ml_size)) {
90756f33205SJonathan Adams thislen =
90856f33205SJonathan Adams mlp->ml_size - (address - mlp->ml_address);
9097c478bd9Sstevel@tonic-gate } else {
9107c478bd9Sstevel@tonic-gate thislen = size;
9117c478bd9Sstevel@tonic-gate }
9127c478bd9Sstevel@tonic-gate }
9137c478bd9Sstevel@tonic-gate memlist_read_unlock();
9147c478bd9Sstevel@tonic-gate /* TODO: phys_install could change now */
9157c478bd9Sstevel@tonic-gate if (thislen == 0)
9167c478bd9Sstevel@tonic-gate continue;
9177c478bd9Sstevel@tonic-gate mdsp = kmem_zalloc(sizeof (struct memdelspan), KM_SLEEP);
9187c478bd9Sstevel@tonic-gate mdsp->mds_base = btop(address);
9197c478bd9Sstevel@tonic-gate mdsp->mds_npgs = btop(thislen);
9207c478bd9Sstevel@tonic-gate mdsp->mds_next = mdsp_new;
9217c478bd9Sstevel@tonic-gate mdsp_new = mdsp;
9227c478bd9Sstevel@tonic-gate address += thislen;
9237c478bd9Sstevel@tonic-gate size -= thislen;
9247c478bd9Sstevel@tonic-gate }
9257c478bd9Sstevel@tonic-gate return (mdsp_new);
9267c478bd9Sstevel@tonic-gate }
9277c478bd9Sstevel@tonic-gate
9287c478bd9Sstevel@tonic-gate static void
free_delspans(struct memdelspan * mdsp)9297c478bd9Sstevel@tonic-gate free_delspans(struct memdelspan *mdsp)
9307c478bd9Sstevel@tonic-gate {
9317c478bd9Sstevel@tonic-gate struct memdelspan *amdsp;
9327c478bd9Sstevel@tonic-gate
9337c478bd9Sstevel@tonic-gate while ((amdsp = mdsp) != NULL) {
9347c478bd9Sstevel@tonic-gate mdsp = amdsp->mds_next;
9357c478bd9Sstevel@tonic-gate kmem_free(amdsp, sizeof (struct memdelspan));
9367c478bd9Sstevel@tonic-gate }
9377c478bd9Sstevel@tonic-gate }
9387c478bd9Sstevel@tonic-gate
9397c478bd9Sstevel@tonic-gate /*
9407c478bd9Sstevel@tonic-gate * Concatenate lists. No list ordering is required.
9417c478bd9Sstevel@tonic-gate */
9427c478bd9Sstevel@tonic-gate
9437c478bd9Sstevel@tonic-gate static void
delspan_concat(struct memdelspan ** mdspp,struct memdelspan * mdsp)9447c478bd9Sstevel@tonic-gate delspan_concat(struct memdelspan **mdspp, struct memdelspan *mdsp)
9457c478bd9Sstevel@tonic-gate {
9467c478bd9Sstevel@tonic-gate while (*mdspp != NULL)
9477c478bd9Sstevel@tonic-gate mdspp = &(*mdspp)->mds_next;
9487c478bd9Sstevel@tonic-gate
9497c478bd9Sstevel@tonic-gate *mdspp = mdsp;
9507c478bd9Sstevel@tonic-gate }
9517c478bd9Sstevel@tonic-gate
9527c478bd9Sstevel@tonic-gate /*
9537c478bd9Sstevel@tonic-gate * Given a new list of delspans, check there is no overlap with
9547c478bd9Sstevel@tonic-gate * all existing span activity (add or delete) and then concatenate
9557c478bd9Sstevel@tonic-gate * the new spans to the given list.
9567c478bd9Sstevel@tonic-gate * Return 1 for OK, 0 if overlapping.
9577c478bd9Sstevel@tonic-gate */
9587c478bd9Sstevel@tonic-gate static int
delspan_insert(struct transit_list * my_tlp,struct memdelspan * mdsp_new)9597c478bd9Sstevel@tonic-gate delspan_insert(
9607c478bd9Sstevel@tonic-gate struct transit_list *my_tlp,
9617c478bd9Sstevel@tonic-gate struct memdelspan *mdsp_new)
9627c478bd9Sstevel@tonic-gate {
9637c478bd9Sstevel@tonic-gate struct transit_list_head *trh;
9647c478bd9Sstevel@tonic-gate struct transit_list *tlp;
9657c478bd9Sstevel@tonic-gate int ret;
9667c478bd9Sstevel@tonic-gate
9677c478bd9Sstevel@tonic-gate trh = &transit_list_head;
9687c478bd9Sstevel@tonic-gate
9697c478bd9Sstevel@tonic-gate ASSERT(my_tlp != NULL);
9707c478bd9Sstevel@tonic-gate ASSERT(mdsp_new != NULL);
9717c478bd9Sstevel@tonic-gate
9727c478bd9Sstevel@tonic-gate ret = 1;
9737c478bd9Sstevel@tonic-gate mutex_enter(&trh->trh_lock);
9747c478bd9Sstevel@tonic-gate /* ASSERT(my_tlp->trl_spans == NULL || tlp_in_list(trh, my_tlp)); */
9757c478bd9Sstevel@tonic-gate for (tlp = trh->trh_head; tlp != NULL; tlp = tlp->trl_next) {
9767c478bd9Sstevel@tonic-gate struct memdelspan *mdsp;
9777c478bd9Sstevel@tonic-gate
9787c478bd9Sstevel@tonic-gate for (mdsp = tlp->trl_spans; mdsp != NULL;
9797c478bd9Sstevel@tonic-gate mdsp = mdsp->mds_next) {
9807c478bd9Sstevel@tonic-gate struct memdelspan *nmdsp;
9817c478bd9Sstevel@tonic-gate
9827c478bd9Sstevel@tonic-gate for (nmdsp = mdsp_new; nmdsp != NULL;
9837c478bd9Sstevel@tonic-gate nmdsp = nmdsp->mds_next) {
9847c478bd9Sstevel@tonic-gate if (overlapping(mdsp->mds_base, mdsp->mds_npgs,
9857c478bd9Sstevel@tonic-gate nmdsp->mds_base, nmdsp->mds_npgs)) {
9867c478bd9Sstevel@tonic-gate ret = 0;
9877c478bd9Sstevel@tonic-gate goto done;
9887c478bd9Sstevel@tonic-gate }
9897c478bd9Sstevel@tonic-gate }
9907c478bd9Sstevel@tonic-gate }
9917c478bd9Sstevel@tonic-gate }
9927c478bd9Sstevel@tonic-gate done:
9937c478bd9Sstevel@tonic-gate if (ret != 0) {
9947c478bd9Sstevel@tonic-gate if (my_tlp->trl_spans == NULL)
9957c478bd9Sstevel@tonic-gate transit_list_insert(my_tlp);
9967c478bd9Sstevel@tonic-gate delspan_concat(&my_tlp->trl_spans, mdsp_new);
9977c478bd9Sstevel@tonic-gate }
9987c478bd9Sstevel@tonic-gate mutex_exit(&trh->trh_lock);
9997c478bd9Sstevel@tonic-gate return (ret);
10007c478bd9Sstevel@tonic-gate }
10017c478bd9Sstevel@tonic-gate
10027c478bd9Sstevel@tonic-gate static void
delspan_remove(struct transit_list * my_tlp,pfn_t base,pgcnt_t npgs)10037c478bd9Sstevel@tonic-gate delspan_remove(
10047c478bd9Sstevel@tonic-gate struct transit_list *my_tlp,
10057c478bd9Sstevel@tonic-gate pfn_t base,
10067c478bd9Sstevel@tonic-gate pgcnt_t npgs)
10077c478bd9Sstevel@tonic-gate {
10087c478bd9Sstevel@tonic-gate struct transit_list_head *trh;
10097c478bd9Sstevel@tonic-gate struct memdelspan *mdsp;
10107c478bd9Sstevel@tonic-gate
10117c478bd9Sstevel@tonic-gate trh = &transit_list_head;
10127c478bd9Sstevel@tonic-gate
10137c478bd9Sstevel@tonic-gate ASSERT(my_tlp != NULL);
10147c478bd9Sstevel@tonic-gate
10157c478bd9Sstevel@tonic-gate mutex_enter(&trh->trh_lock);
10167c478bd9Sstevel@tonic-gate if ((mdsp = my_tlp->trl_spans) != NULL) {
10177c478bd9Sstevel@tonic-gate if (npgs == 0) {
10187c478bd9Sstevel@tonic-gate my_tlp->trl_spans = NULL;
10197c478bd9Sstevel@tonic-gate free_delspans(mdsp);
10207c478bd9Sstevel@tonic-gate transit_list_remove(my_tlp);
10217c478bd9Sstevel@tonic-gate } else {
10227c478bd9Sstevel@tonic-gate struct memdelspan **prv;
10237c478bd9Sstevel@tonic-gate
10247c478bd9Sstevel@tonic-gate prv = &my_tlp->trl_spans;
10257c478bd9Sstevel@tonic-gate while (mdsp != NULL) {
10267c478bd9Sstevel@tonic-gate pfn_t p_end;
10277c478bd9Sstevel@tonic-gate
10287c478bd9Sstevel@tonic-gate p_end = mdsp->mds_base + mdsp->mds_npgs;
10297c478bd9Sstevel@tonic-gate if (mdsp->mds_base >= base &&
10307c478bd9Sstevel@tonic-gate p_end <= (base + npgs)) {
10317c478bd9Sstevel@tonic-gate *prv = mdsp->mds_next;
10327c478bd9Sstevel@tonic-gate mdsp->mds_next = NULL;
10337c478bd9Sstevel@tonic-gate free_delspans(mdsp);
10347c478bd9Sstevel@tonic-gate } else {
10357c478bd9Sstevel@tonic-gate prv = &mdsp->mds_next;
10367c478bd9Sstevel@tonic-gate }
10377c478bd9Sstevel@tonic-gate mdsp = *prv;
10387c478bd9Sstevel@tonic-gate }
10397c478bd9Sstevel@tonic-gate if (my_tlp->trl_spans == NULL)
10407c478bd9Sstevel@tonic-gate transit_list_remove(my_tlp);
10417c478bd9Sstevel@tonic-gate }
10427c478bd9Sstevel@tonic-gate }
10437c478bd9Sstevel@tonic-gate mutex_exit(&trh->trh_lock);
10447c478bd9Sstevel@tonic-gate }
10457c478bd9Sstevel@tonic-gate
10467c478bd9Sstevel@tonic-gate /*
10477c478bd9Sstevel@tonic-gate * Reserve interface for add to stop delete before add finished.
10487c478bd9Sstevel@tonic-gate * This list is only accessed through the delspan_insert/remove
10497c478bd9Sstevel@tonic-gate * functions and so is fully protected by the mutex in struct transit_list.
10507c478bd9Sstevel@tonic-gate */
10517c478bd9Sstevel@tonic-gate
10527c478bd9Sstevel@tonic-gate static struct transit_list reserve_transit;
10537c478bd9Sstevel@tonic-gate
10547c478bd9Sstevel@tonic-gate static int
delspan_reserve(pfn_t base,pgcnt_t npgs)10557c478bd9Sstevel@tonic-gate delspan_reserve(pfn_t base, pgcnt_t npgs)
10567c478bd9Sstevel@tonic-gate {
10577c478bd9Sstevel@tonic-gate struct memdelspan *mdsp;
10587c478bd9Sstevel@tonic-gate int ret;
10597c478bd9Sstevel@tonic-gate
10607c478bd9Sstevel@tonic-gate mdsp = kmem_zalloc(sizeof (struct memdelspan), KM_SLEEP);
10617c478bd9Sstevel@tonic-gate mdsp->mds_base = base;
10627c478bd9Sstevel@tonic-gate mdsp->mds_npgs = npgs;
10637c478bd9Sstevel@tonic-gate if ((ret = delspan_insert(&reserve_transit, mdsp)) == 0) {
10647c478bd9Sstevel@tonic-gate free_delspans(mdsp);
10657c478bd9Sstevel@tonic-gate }
10667c478bd9Sstevel@tonic-gate return (ret);
10677c478bd9Sstevel@tonic-gate }
10687c478bd9Sstevel@tonic-gate
10697c478bd9Sstevel@tonic-gate static void
delspan_unreserve(pfn_t base,pgcnt_t npgs)10707c478bd9Sstevel@tonic-gate delspan_unreserve(pfn_t base, pgcnt_t npgs)
10717c478bd9Sstevel@tonic-gate {
10727c478bd9Sstevel@tonic-gate delspan_remove(&reserve_transit, base, npgs);
10737c478bd9Sstevel@tonic-gate }
10747c478bd9Sstevel@tonic-gate
10757c478bd9Sstevel@tonic-gate /*
10767c478bd9Sstevel@tonic-gate * Return whether memseg was created by kphysm_add_memory_dynamic().
10777c478bd9Sstevel@tonic-gate */
10787c478bd9Sstevel@tonic-gate static int
memseg_is_dynamic(struct memseg * seg)10799853d9e8SJason Beloro memseg_is_dynamic(struct memseg *seg)
10807c478bd9Sstevel@tonic-gate {
10819853d9e8SJason Beloro return (seg->msegflags & MEMSEG_DYNAMIC);
10827c478bd9Sstevel@tonic-gate }
10837c478bd9Sstevel@tonic-gate
10847c478bd9Sstevel@tonic-gate int
kphysm_del_span(memhandle_t handle,pfn_t base,pgcnt_t npgs)10857c478bd9Sstevel@tonic-gate kphysm_del_span(
10867c478bd9Sstevel@tonic-gate memhandle_t handle,
10877c478bd9Sstevel@tonic-gate pfn_t base,
10887c478bd9Sstevel@tonic-gate pgcnt_t npgs)
10897c478bd9Sstevel@tonic-gate {
10907c478bd9Sstevel@tonic-gate struct mem_handle *mhp;
10917c478bd9Sstevel@tonic-gate struct memseg *seg;
10927c478bd9Sstevel@tonic-gate struct memdelspan *mdsp;
10937c478bd9Sstevel@tonic-gate struct memdelspan *mdsp_new;
10947c478bd9Sstevel@tonic-gate pgcnt_t phys_pages, vm_pages;
10957c478bd9Sstevel@tonic-gate pfn_t p_end;
10967c478bd9Sstevel@tonic-gate page_t *pp;
10977c478bd9Sstevel@tonic-gate int ret;
10987c478bd9Sstevel@tonic-gate
10997c478bd9Sstevel@tonic-gate mhp = kphysm_lookup_mem_handle(handle);
11007c478bd9Sstevel@tonic-gate if (mhp == NULL) {
11017c478bd9Sstevel@tonic-gate return (KPHYSM_EHANDLE);
11027c478bd9Sstevel@tonic-gate }
11037c478bd9Sstevel@tonic-gate if (mhp->mh_state != MHND_INIT) {
11047c478bd9Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex);
11057c478bd9Sstevel@tonic-gate return (KPHYSM_ESEQUENCE);
11067c478bd9Sstevel@tonic-gate }
11077c478bd9Sstevel@tonic-gate
11087c478bd9Sstevel@tonic-gate /*
11097c478bd9Sstevel@tonic-gate * Intersect the span with the installed memory list (phys_install).
11107c478bd9Sstevel@tonic-gate */
11117c478bd9Sstevel@tonic-gate mdsp_new = span_to_install(base, npgs);
11127c478bd9Sstevel@tonic-gate if (mdsp_new == NULL) {
11137c478bd9Sstevel@tonic-gate /*
11147c478bd9Sstevel@tonic-gate * No physical memory in this range. Is this an
11157c478bd9Sstevel@tonic-gate * error? If an attempt to start the delete is made
11167c478bd9Sstevel@tonic-gate * for OK returns from del_span such as this, start will
11177c478bd9Sstevel@tonic-gate * return an error.
11187c478bd9Sstevel@tonic-gate * Could return KPHYSM_ENOWORK.
11197c478bd9Sstevel@tonic-gate */
11207c478bd9Sstevel@tonic-gate /*
11217c478bd9Sstevel@tonic-gate * It is assumed that there are no error returns
11227c478bd9Sstevel@tonic-gate * from span_to_install() due to kmem_alloc failure.
11237c478bd9Sstevel@tonic-gate */
11247c478bd9Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex);
11257c478bd9Sstevel@tonic-gate return (KPHYSM_OK);
11267c478bd9Sstevel@tonic-gate }
11277c478bd9Sstevel@tonic-gate /*
11287c478bd9Sstevel@tonic-gate * Does this span overlap an existing span?
11297c478bd9Sstevel@tonic-gate */
11307c478bd9Sstevel@tonic-gate if (delspan_insert(&mhp->mh_transit, mdsp_new) == 0) {
11317c478bd9Sstevel@tonic-gate /*
11327c478bd9Sstevel@tonic-gate * Differentiate between already on list for this handle
11337c478bd9Sstevel@tonic-gate * (KPHYSM_EDUP) and busy elsewhere (KPHYSM_EBUSY).
11347c478bd9Sstevel@tonic-gate */
11357c478bd9Sstevel@tonic-gate ret = KPHYSM_EBUSY;
11367c478bd9Sstevel@tonic-gate for (mdsp = mhp->mh_transit.trl_spans; mdsp != NULL;
11377c478bd9Sstevel@tonic-gate mdsp = mdsp->mds_next) {
11387c478bd9Sstevel@tonic-gate if (overlapping(mdsp->mds_base, mdsp->mds_npgs,
11397c478bd9Sstevel@tonic-gate base, npgs)) {
11407c478bd9Sstevel@tonic-gate ret = KPHYSM_EDUP;
11417c478bd9Sstevel@tonic-gate break;
11427c478bd9Sstevel@tonic-gate }
11437c478bd9Sstevel@tonic-gate }
11447c478bd9Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex);
11457c478bd9Sstevel@tonic-gate free_delspans(mdsp_new);
11467c478bd9Sstevel@tonic-gate return (ret);
11477c478bd9Sstevel@tonic-gate }
11487c478bd9Sstevel@tonic-gate /*
11497c478bd9Sstevel@tonic-gate * At this point the spans in mdsp_new have been inserted into the
11507c478bd9Sstevel@tonic-gate * list of spans for this handle and thereby to the global list of
11517c478bd9Sstevel@tonic-gate * spans being processed. Each of these spans must now be checked
11527c478bd9Sstevel@tonic-gate * for relocatability. As a side-effect segments in the memseg list
11537c478bd9Sstevel@tonic-gate * may be split.
11547c478bd9Sstevel@tonic-gate *
11557c478bd9Sstevel@tonic-gate * Note that mdsp_new can no longer be used as it is now part of
11567c478bd9Sstevel@tonic-gate * a larger list. Select elements of this larger list based
11577c478bd9Sstevel@tonic-gate * on base and npgs.
11587c478bd9Sstevel@tonic-gate */
11597c478bd9Sstevel@tonic-gate restart:
11607c478bd9Sstevel@tonic-gate phys_pages = 0;
11617c478bd9Sstevel@tonic-gate vm_pages = 0;
11627c478bd9Sstevel@tonic-gate ret = KPHYSM_OK;
11637c478bd9Sstevel@tonic-gate for (mdsp = mhp->mh_transit.trl_spans; mdsp != NULL;
11647c478bd9Sstevel@tonic-gate mdsp = mdsp->mds_next) {
11657c478bd9Sstevel@tonic-gate pgcnt_t pages_checked;
11667c478bd9Sstevel@tonic-gate
11677c478bd9Sstevel@tonic-gate if (!overlapping(mdsp->mds_base, mdsp->mds_npgs, base, npgs)) {
11687c478bd9Sstevel@tonic-gate continue;
11697c478bd9Sstevel@tonic-gate }
11707c478bd9Sstevel@tonic-gate p_end = mdsp->mds_base + mdsp->mds_npgs;
11717c478bd9Sstevel@tonic-gate /*
11727c478bd9Sstevel@tonic-gate * The pages_checked count is a hack. All pages should be
11737c478bd9Sstevel@tonic-gate * checked for relocatability. Those not covered by memsegs
11747c478bd9Sstevel@tonic-gate * should be tested with arch_kphysm_del_span_ok().
11757c478bd9Sstevel@tonic-gate */
11767c478bd9Sstevel@tonic-gate pages_checked = 0;
11777c478bd9Sstevel@tonic-gate for (seg = memsegs; seg; seg = seg->next) {
11787c478bd9Sstevel@tonic-gate pfn_t mseg_start;
11797c478bd9Sstevel@tonic-gate
11807c478bd9Sstevel@tonic-gate if (seg->pages_base >= p_end ||
11817c478bd9Sstevel@tonic-gate seg->pages_end <= mdsp->mds_base) {
11827c478bd9Sstevel@tonic-gate /* Span and memseg don't overlap. */
11837c478bd9Sstevel@tonic-gate continue;
11847c478bd9Sstevel@tonic-gate }
11859853d9e8SJason Beloro mseg_start = memseg_get_start(seg);
11867c478bd9Sstevel@tonic-gate /* Check that segment is suitable for delete. */
11879853d9e8SJason Beloro if (memseg_includes_meta(seg)) {
11887c478bd9Sstevel@tonic-gate /*
11899853d9e8SJason Beloro * Check that this segment is completely
11909853d9e8SJason Beloro * within the span.
11917c478bd9Sstevel@tonic-gate */
11927c478bd9Sstevel@tonic-gate if (mseg_start < mdsp->mds_base ||
11937c478bd9Sstevel@tonic-gate seg->pages_end > p_end) {
11947c478bd9Sstevel@tonic-gate ret = KPHYSM_EBUSY;
11957c478bd9Sstevel@tonic-gate break;
11967c478bd9Sstevel@tonic-gate }
11977c478bd9Sstevel@tonic-gate pages_checked += seg->pages_end - mseg_start;
11987c478bd9Sstevel@tonic-gate } else {
11997c478bd9Sstevel@tonic-gate /*
12007c478bd9Sstevel@tonic-gate * If this segment is larger than the span,
12017c478bd9Sstevel@tonic-gate * try to split it. After the split, it
12027c478bd9Sstevel@tonic-gate * is necessary to restart.
12037c478bd9Sstevel@tonic-gate */
12047c478bd9Sstevel@tonic-gate if (seg->pages_base < mdsp->mds_base ||
12057c478bd9Sstevel@tonic-gate seg->pages_end > p_end) {
12067c478bd9Sstevel@tonic-gate pfn_t abase;
12077c478bd9Sstevel@tonic-gate pgcnt_t anpgs;
12087c478bd9Sstevel@tonic-gate int s_ret;
12097c478bd9Sstevel@tonic-gate
12107c478bd9Sstevel@tonic-gate /* Split required. */
12117c478bd9Sstevel@tonic-gate if (mdsp->mds_base < seg->pages_base)
12127c478bd9Sstevel@tonic-gate abase = seg->pages_base;
12137c478bd9Sstevel@tonic-gate else
12147c478bd9Sstevel@tonic-gate abase = mdsp->mds_base;
12157c478bd9Sstevel@tonic-gate if (p_end > seg->pages_end)
12167c478bd9Sstevel@tonic-gate anpgs = seg->pages_end - abase;
12177c478bd9Sstevel@tonic-gate else
12187c478bd9Sstevel@tonic-gate anpgs = p_end - abase;
12197c478bd9Sstevel@tonic-gate s_ret = kphysm_split_memseg(abase,
12207c478bd9Sstevel@tonic-gate anpgs);
12217c478bd9Sstevel@tonic-gate if (s_ret == 0) {
12227c478bd9Sstevel@tonic-gate /* Split failed. */
12237c478bd9Sstevel@tonic-gate ret = KPHYSM_ERESOURCE;
12247c478bd9Sstevel@tonic-gate break;
12257c478bd9Sstevel@tonic-gate }
12267c478bd9Sstevel@tonic-gate goto restart;
12277c478bd9Sstevel@tonic-gate }
12287c478bd9Sstevel@tonic-gate pages_checked +=
12297c478bd9Sstevel@tonic-gate seg->pages_end - seg->pages_base;
12307c478bd9Sstevel@tonic-gate }
12317c478bd9Sstevel@tonic-gate /*
12327c478bd9Sstevel@tonic-gate * The memseg is wholly within the delete span.
12337c478bd9Sstevel@tonic-gate * The individual pages can now be checked.
12347c478bd9Sstevel@tonic-gate */
12357c478bd9Sstevel@tonic-gate /* Cage test. */
12367c478bd9Sstevel@tonic-gate for (pp = seg->pages; pp < seg->epages; pp++) {
12377c478bd9Sstevel@tonic-gate if (PP_ISNORELOC(pp)) {
12387c478bd9Sstevel@tonic-gate ret = KPHYSM_ENONRELOC;
12397c478bd9Sstevel@tonic-gate break;
12407c478bd9Sstevel@tonic-gate }
12417c478bd9Sstevel@tonic-gate }
12427c478bd9Sstevel@tonic-gate if (ret != KPHYSM_OK) {
12437c478bd9Sstevel@tonic-gate break;
12447c478bd9Sstevel@tonic-gate }
12457c478bd9Sstevel@tonic-gate phys_pages += (seg->pages_end - mseg_start);
12467c478bd9Sstevel@tonic-gate vm_pages += MSEG_NPAGES(seg);
12477c478bd9Sstevel@tonic-gate }
12487c478bd9Sstevel@tonic-gate if (ret != KPHYSM_OK)
12497c478bd9Sstevel@tonic-gate break;
12507c478bd9Sstevel@tonic-gate if (pages_checked != mdsp->mds_npgs) {
12517c478bd9Sstevel@tonic-gate ret = KPHYSM_ENONRELOC;
12527c478bd9Sstevel@tonic-gate break;
12537c478bd9Sstevel@tonic-gate }
12547c478bd9Sstevel@tonic-gate }
12557c478bd9Sstevel@tonic-gate
12567c478bd9Sstevel@tonic-gate if (ret == KPHYSM_OK) {
12577c478bd9Sstevel@tonic-gate mhp->mh_phys_pages += phys_pages;
12587c478bd9Sstevel@tonic-gate mhp->mh_vm_pages += vm_pages;
12597c478bd9Sstevel@tonic-gate } else {
12607c478bd9Sstevel@tonic-gate /*
12617c478bd9Sstevel@tonic-gate * Keep holding the mh_mutex to prevent it going away.
12627c478bd9Sstevel@tonic-gate */
12637c478bd9Sstevel@tonic-gate delspan_remove(&mhp->mh_transit, base, npgs);
12647c478bd9Sstevel@tonic-gate }
12657c478bd9Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex);
12667c478bd9Sstevel@tonic-gate return (ret);
12677c478bd9Sstevel@tonic-gate }
12687c478bd9Sstevel@tonic-gate
12697c478bd9Sstevel@tonic-gate int
kphysm_del_span_query(pfn_t base,pgcnt_t npgs,memquery_t * mqp)12707c478bd9Sstevel@tonic-gate kphysm_del_span_query(
12717c478bd9Sstevel@tonic-gate pfn_t base,
12727c478bd9Sstevel@tonic-gate pgcnt_t npgs,
12737c478bd9Sstevel@tonic-gate memquery_t *mqp)
12747c478bd9Sstevel@tonic-gate {
12757c478bd9Sstevel@tonic-gate struct memdelspan *mdsp;
12767c478bd9Sstevel@tonic-gate struct memdelspan *mdsp_new;
12777c478bd9Sstevel@tonic-gate int done_first_nonreloc;
12787c478bd9Sstevel@tonic-gate
12797c478bd9Sstevel@tonic-gate mqp->phys_pages = 0;
12807c478bd9Sstevel@tonic-gate mqp->managed = 0;
12817c478bd9Sstevel@tonic-gate mqp->nonrelocatable = 0;
12827c478bd9Sstevel@tonic-gate mqp->first_nonrelocatable = 0;
12837c478bd9Sstevel@tonic-gate mqp->last_nonrelocatable = 0;
12847c478bd9Sstevel@tonic-gate
12857c478bd9Sstevel@tonic-gate mdsp_new = span_to_install(base, npgs);
12867c478bd9Sstevel@tonic-gate /*
12877c478bd9Sstevel@tonic-gate * It is OK to proceed here if mdsp_new == NULL.
12887c478bd9Sstevel@tonic-gate */
12897c478bd9Sstevel@tonic-gate done_first_nonreloc = 0;
12907c478bd9Sstevel@tonic-gate for (mdsp = mdsp_new; mdsp != NULL; mdsp = mdsp->mds_next) {
12917c478bd9Sstevel@tonic-gate pfn_t sbase;
12927c478bd9Sstevel@tonic-gate pgcnt_t snpgs;
12937c478bd9Sstevel@tonic-gate
12947c478bd9Sstevel@tonic-gate mqp->phys_pages += mdsp->mds_npgs;
12957c478bd9Sstevel@tonic-gate sbase = mdsp->mds_base;
12967c478bd9Sstevel@tonic-gate snpgs = mdsp->mds_npgs;
12977c478bd9Sstevel@tonic-gate while (snpgs != 0) {
12987c478bd9Sstevel@tonic-gate struct memseg *lseg, *seg;
12997c478bd9Sstevel@tonic-gate pfn_t p_end;
13007c478bd9Sstevel@tonic-gate page_t *pp;
13017c478bd9Sstevel@tonic-gate pfn_t mseg_start;
13027c478bd9Sstevel@tonic-gate
13037c478bd9Sstevel@tonic-gate p_end = sbase + snpgs;
13047c478bd9Sstevel@tonic-gate /*
13057c478bd9Sstevel@tonic-gate * Find the lowest addressed memseg that starts
13067c478bd9Sstevel@tonic-gate * after sbase and account for it.
13077c478bd9Sstevel@tonic-gate * This is to catch dynamic memsegs whose start
13087c478bd9Sstevel@tonic-gate * is hidden.
13097c478bd9Sstevel@tonic-gate */
13107c478bd9Sstevel@tonic-gate seg = NULL;
13117c478bd9Sstevel@tonic-gate for (lseg = memsegs; lseg != NULL; lseg = lseg->next) {
13127c478bd9Sstevel@tonic-gate if ((lseg->pages_base >= sbase) ||
13137c478bd9Sstevel@tonic-gate (lseg->pages_base < p_end &&
13147c478bd9Sstevel@tonic-gate lseg->pages_end > sbase)) {
13157c478bd9Sstevel@tonic-gate if (seg == NULL ||
13167c478bd9Sstevel@tonic-gate seg->pages_base > lseg->pages_base)
13177c478bd9Sstevel@tonic-gate seg = lseg;
13187c478bd9Sstevel@tonic-gate }
13197c478bd9Sstevel@tonic-gate }
13207c478bd9Sstevel@tonic-gate if (seg != NULL) {
13219853d9e8SJason Beloro mseg_start = memseg_get_start(seg);
13227c478bd9Sstevel@tonic-gate /*
13237c478bd9Sstevel@tonic-gate * Now have the full extent of the memseg so
13247c478bd9Sstevel@tonic-gate * do the range check.
13257c478bd9Sstevel@tonic-gate */
13267c478bd9Sstevel@tonic-gate if (mseg_start >= p_end ||
13277c478bd9Sstevel@tonic-gate seg->pages_end <= sbase) {
13287c478bd9Sstevel@tonic-gate /* Span does not overlap memseg. */
13297c478bd9Sstevel@tonic-gate seg = NULL;
13307c478bd9Sstevel@tonic-gate }
13317c478bd9Sstevel@tonic-gate }
13327c478bd9Sstevel@tonic-gate /*
13337c478bd9Sstevel@tonic-gate * Account for gap either before the segment if
13347c478bd9Sstevel@tonic-gate * there is one or to the end of the span.
13357c478bd9Sstevel@tonic-gate */
13367c478bd9Sstevel@tonic-gate if (seg == NULL || mseg_start > sbase) {
13377c478bd9Sstevel@tonic-gate pfn_t a_end;
13387c478bd9Sstevel@tonic-gate
13397c478bd9Sstevel@tonic-gate a_end = (seg == NULL) ? p_end : mseg_start;
13407c478bd9Sstevel@tonic-gate /*
13417c478bd9Sstevel@tonic-gate * Check with arch layer for relocatability.
13427c478bd9Sstevel@tonic-gate */
13437c478bd9Sstevel@tonic-gate if (arch_kphysm_del_span_ok(sbase,
13447c478bd9Sstevel@tonic-gate (a_end - sbase))) {
13457c478bd9Sstevel@tonic-gate /*
13467c478bd9Sstevel@tonic-gate * No non-relocatble pages in this
13477c478bd9Sstevel@tonic-gate * area, avoid the fine-grained
13487c478bd9Sstevel@tonic-gate * test.
13497c478bd9Sstevel@tonic-gate */
13507c478bd9Sstevel@tonic-gate snpgs -= (a_end - sbase);
13517c478bd9Sstevel@tonic-gate sbase = a_end;
13527c478bd9Sstevel@tonic-gate }
13537c478bd9Sstevel@tonic-gate while (sbase < a_end) {
13547c478bd9Sstevel@tonic-gate if (!arch_kphysm_del_span_ok(sbase,
13557c478bd9Sstevel@tonic-gate 1)) {
13567c478bd9Sstevel@tonic-gate mqp->nonrelocatable++;
13577c478bd9Sstevel@tonic-gate if (!done_first_nonreloc) {
13587c478bd9Sstevel@tonic-gate mqp->
13597c478bd9Sstevel@tonic-gate first_nonrelocatable
13607c478bd9Sstevel@tonic-gate = sbase;
13617c478bd9Sstevel@tonic-gate done_first_nonreloc = 1;
13627c478bd9Sstevel@tonic-gate }
13637c478bd9Sstevel@tonic-gate mqp->last_nonrelocatable =
13647c478bd9Sstevel@tonic-gate sbase;
13657c478bd9Sstevel@tonic-gate }
13667c478bd9Sstevel@tonic-gate sbase++;
13677c478bd9Sstevel@tonic-gate snpgs--;
13687c478bd9Sstevel@tonic-gate }
13697c478bd9Sstevel@tonic-gate }
13707c478bd9Sstevel@tonic-gate if (seg != NULL) {
13717c478bd9Sstevel@tonic-gate ASSERT(mseg_start <= sbase);
13727c478bd9Sstevel@tonic-gate if (seg->pages_base != mseg_start &&
13737c478bd9Sstevel@tonic-gate seg->pages_base > sbase) {
13747c478bd9Sstevel@tonic-gate pgcnt_t skip_pgs;
13757c478bd9Sstevel@tonic-gate
13767c478bd9Sstevel@tonic-gate /*
13777c478bd9Sstevel@tonic-gate * Skip the page_t area of a
13787c478bd9Sstevel@tonic-gate * dynamic memseg.
13797c478bd9Sstevel@tonic-gate */
13807c478bd9Sstevel@tonic-gate skip_pgs = seg->pages_base - sbase;
13817c478bd9Sstevel@tonic-gate if (snpgs <= skip_pgs) {
13827c478bd9Sstevel@tonic-gate sbase += snpgs;
13837c478bd9Sstevel@tonic-gate snpgs = 0;
13847c478bd9Sstevel@tonic-gate continue;
13857c478bd9Sstevel@tonic-gate }
13867c478bd9Sstevel@tonic-gate snpgs -= skip_pgs;
13877c478bd9Sstevel@tonic-gate sbase += skip_pgs;
13887c478bd9Sstevel@tonic-gate }
13897c478bd9Sstevel@tonic-gate ASSERT(snpgs != 0);
13907c478bd9Sstevel@tonic-gate ASSERT(seg->pages_base <= sbase);
13917c478bd9Sstevel@tonic-gate /*
13927c478bd9Sstevel@tonic-gate * The individual pages can now be checked.
13937c478bd9Sstevel@tonic-gate */
13947c478bd9Sstevel@tonic-gate for (pp = seg->pages +
13957c478bd9Sstevel@tonic-gate (sbase - seg->pages_base);
13967c478bd9Sstevel@tonic-gate snpgs != 0 && pp < seg->epages; pp++) {
13977c478bd9Sstevel@tonic-gate mqp->managed++;
13987c478bd9Sstevel@tonic-gate if (PP_ISNORELOC(pp)) {
13997c478bd9Sstevel@tonic-gate mqp->nonrelocatable++;
14007c478bd9Sstevel@tonic-gate if (!done_first_nonreloc) {
14017c478bd9Sstevel@tonic-gate mqp->
14027c478bd9Sstevel@tonic-gate first_nonrelocatable
14037c478bd9Sstevel@tonic-gate = sbase;
14047c478bd9Sstevel@tonic-gate done_first_nonreloc = 1;
14057c478bd9Sstevel@tonic-gate }
14067c478bd9Sstevel@tonic-gate mqp->last_nonrelocatable =
14077c478bd9Sstevel@tonic-gate sbase;
14087c478bd9Sstevel@tonic-gate }
14097c478bd9Sstevel@tonic-gate sbase++;
14107c478bd9Sstevel@tonic-gate snpgs--;
14117c478bd9Sstevel@tonic-gate }
14127c478bd9Sstevel@tonic-gate }
14137c478bd9Sstevel@tonic-gate }
14147c478bd9Sstevel@tonic-gate }
14157c478bd9Sstevel@tonic-gate
14167c478bd9Sstevel@tonic-gate free_delspans(mdsp_new);
14177c478bd9Sstevel@tonic-gate
14187c478bd9Sstevel@tonic-gate return (KPHYSM_OK);
14197c478bd9Sstevel@tonic-gate }
14207c478bd9Sstevel@tonic-gate
14217c478bd9Sstevel@tonic-gate /*
14227c478bd9Sstevel@tonic-gate * This release function can be called at any stage as follows:
14237c478bd9Sstevel@tonic-gate * _gethandle only called
14247c478bd9Sstevel@tonic-gate * _span(s) only called
14257c478bd9Sstevel@tonic-gate * _start called but failed
14267c478bd9Sstevel@tonic-gate * delete thread exited
14277c478bd9Sstevel@tonic-gate */
14287c478bd9Sstevel@tonic-gate int
kphysm_del_release(memhandle_t handle)14297c478bd9Sstevel@tonic-gate kphysm_del_release(memhandle_t handle)
14307c478bd9Sstevel@tonic-gate {
14317c478bd9Sstevel@tonic-gate struct mem_handle *mhp;
14327c478bd9Sstevel@tonic-gate
14337c478bd9Sstevel@tonic-gate mhp = kphysm_lookup_mem_handle(handle);
14347c478bd9Sstevel@tonic-gate if (mhp == NULL) {
14357c478bd9Sstevel@tonic-gate return (KPHYSM_EHANDLE);
14367c478bd9Sstevel@tonic-gate }
14377c478bd9Sstevel@tonic-gate switch (mhp->mh_state) {
14387c478bd9Sstevel@tonic-gate case MHND_STARTING:
14397c478bd9Sstevel@tonic-gate case MHND_RUNNING:
14407c478bd9Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex);
14417c478bd9Sstevel@tonic-gate return (KPHYSM_ENOTFINISHED);
14427c478bd9Sstevel@tonic-gate case MHND_FREE:
14437c478bd9Sstevel@tonic-gate ASSERT(mhp->mh_state != MHND_FREE);
14447c478bd9Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex);
14457c478bd9Sstevel@tonic-gate return (KPHYSM_EHANDLE);
14467c478bd9Sstevel@tonic-gate case MHND_INIT:
14477c478bd9Sstevel@tonic-gate break;
14487c478bd9Sstevel@tonic-gate case MHND_DONE:
14497c478bd9Sstevel@tonic-gate break;
14507c478bd9Sstevel@tonic-gate case MHND_RELEASE:
14517c478bd9Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex);
14527c478bd9Sstevel@tonic-gate return (KPHYSM_ESEQUENCE);
14537c478bd9Sstevel@tonic-gate default:
14547c478bd9Sstevel@tonic-gate #ifdef DEBUG
14557c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, "kphysm_del_release(0x%p) state corrupt %d",
14567c478bd9Sstevel@tonic-gate (void *)mhp, mhp->mh_state);
14577c478bd9Sstevel@tonic-gate #endif /* DEBUG */
14587c478bd9Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex);
14597c478bd9Sstevel@tonic-gate return (KPHYSM_EHANDLE);
14607c478bd9Sstevel@tonic-gate }
14617c478bd9Sstevel@tonic-gate /*
14627c478bd9Sstevel@tonic-gate * Set state so that we can wait if necessary.
14637c478bd9Sstevel@tonic-gate * Also this means that we have read/write access to all
14647c478bd9Sstevel@tonic-gate * fields except mh_exthandle and mh_state.
14657c478bd9Sstevel@tonic-gate */
14667c478bd9Sstevel@tonic-gate mhp->mh_state = MHND_RELEASE;
14677c478bd9Sstevel@tonic-gate /*
14687c478bd9Sstevel@tonic-gate * The mem_handle cannot be de-allocated by any other operation
14697c478bd9Sstevel@tonic-gate * now, so no need to hold mh_mutex.
14707c478bd9Sstevel@tonic-gate */
14717c478bd9Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex);
14727c478bd9Sstevel@tonic-gate
14737c478bd9Sstevel@tonic-gate delspan_remove(&mhp->mh_transit, 0, 0);
14747c478bd9Sstevel@tonic-gate mhp->mh_phys_pages = 0;
14757c478bd9Sstevel@tonic-gate mhp->mh_vm_pages = 0;
14767c478bd9Sstevel@tonic-gate mhp->mh_hold_todo = 0;
14777c478bd9Sstevel@tonic-gate mhp->mh_delete_complete = NULL;
14787c478bd9Sstevel@tonic-gate mhp->mh_delete_complete_arg = NULL;
14797c478bd9Sstevel@tonic-gate mhp->mh_cancel = 0;
14807c478bd9Sstevel@tonic-gate
14817c478bd9Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex);
14827c478bd9Sstevel@tonic-gate ASSERT(mhp->mh_state == MHND_RELEASE);
14837c478bd9Sstevel@tonic-gate mhp->mh_state = MHND_FREE;
14847c478bd9Sstevel@tonic-gate
14857c478bd9Sstevel@tonic-gate kphysm_free_mem_handle(mhp);
14867c478bd9Sstevel@tonic-gate
14877c478bd9Sstevel@tonic-gate return (KPHYSM_OK);
14887c478bd9Sstevel@tonic-gate }
14897c478bd9Sstevel@tonic-gate
14907c478bd9Sstevel@tonic-gate /*
14917c478bd9Sstevel@tonic-gate * This cancel function can only be called with the thread running.
14927c478bd9Sstevel@tonic-gate */
14937c478bd9Sstevel@tonic-gate int
kphysm_del_cancel(memhandle_t handle)14947c478bd9Sstevel@tonic-gate kphysm_del_cancel(memhandle_t handle)
14957c478bd9Sstevel@tonic-gate {
14967c478bd9Sstevel@tonic-gate struct mem_handle *mhp;
14977c478bd9Sstevel@tonic-gate
14987c478bd9Sstevel@tonic-gate mhp = kphysm_lookup_mem_handle(handle);
14997c478bd9Sstevel@tonic-gate if (mhp == NULL) {
15007c478bd9Sstevel@tonic-gate return (KPHYSM_EHANDLE);
15017c478bd9Sstevel@tonic-gate }
15027c478bd9Sstevel@tonic-gate if (mhp->mh_state != MHND_STARTING && mhp->mh_state != MHND_RUNNING) {
15037c478bd9Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex);
15047c478bd9Sstevel@tonic-gate return (KPHYSM_ENOTRUNNING);
15057c478bd9Sstevel@tonic-gate }
15067c478bd9Sstevel@tonic-gate /*
15077c478bd9Sstevel@tonic-gate * Set the cancel flag and wake the delete thread up.
15087c478bd9Sstevel@tonic-gate * The thread may be waiting on I/O, so the effect of the cancel
15097c478bd9Sstevel@tonic-gate * may be delayed.
15107c478bd9Sstevel@tonic-gate */
15117c478bd9Sstevel@tonic-gate if (mhp->mh_cancel == 0) {
15127c478bd9Sstevel@tonic-gate mhp->mh_cancel = KPHYSM_ECANCELLED;
15137c478bd9Sstevel@tonic-gate cv_signal(&mhp->mh_cv);
15147c478bd9Sstevel@tonic-gate }
15157c478bd9Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex);
15167c478bd9Sstevel@tonic-gate return (KPHYSM_OK);
15177c478bd9Sstevel@tonic-gate }
15187c478bd9Sstevel@tonic-gate
15197c478bd9Sstevel@tonic-gate int
kphysm_del_status(memhandle_t handle,memdelstat_t * mdstp)15207c478bd9Sstevel@tonic-gate kphysm_del_status(
15217c478bd9Sstevel@tonic-gate memhandle_t handle,
15227c478bd9Sstevel@tonic-gate memdelstat_t *mdstp)
15237c478bd9Sstevel@tonic-gate {
15247c478bd9Sstevel@tonic-gate struct mem_handle *mhp;
15257c478bd9Sstevel@tonic-gate
15267c478bd9Sstevel@tonic-gate mhp = kphysm_lookup_mem_handle(handle);
15277c478bd9Sstevel@tonic-gate if (mhp == NULL) {
15287c478bd9Sstevel@tonic-gate return (KPHYSM_EHANDLE);
15297c478bd9Sstevel@tonic-gate }
15307c478bd9Sstevel@tonic-gate /*
15317c478bd9Sstevel@tonic-gate * Calling kphysm_del_status() is allowed before the delete
15327c478bd9Sstevel@tonic-gate * is started to allow for status display.
15337c478bd9Sstevel@tonic-gate */
15347c478bd9Sstevel@tonic-gate if (mhp->mh_state != MHND_INIT && mhp->mh_state != MHND_STARTING &&
15357c478bd9Sstevel@tonic-gate mhp->mh_state != MHND_RUNNING) {
15367c478bd9Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex);
15377c478bd9Sstevel@tonic-gate return (KPHYSM_ENOTRUNNING);
15387c478bd9Sstevel@tonic-gate }
15397c478bd9Sstevel@tonic-gate mdstp->phys_pages = mhp->mh_phys_pages;
15407c478bd9Sstevel@tonic-gate mdstp->managed = mhp->mh_vm_pages;
15417c478bd9Sstevel@tonic-gate mdstp->collected = mhp->mh_vm_pages - mhp->mh_hold_todo;
15427c478bd9Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex);
15437c478bd9Sstevel@tonic-gate return (KPHYSM_OK);
15447c478bd9Sstevel@tonic-gate }
15457c478bd9Sstevel@tonic-gate
15467c478bd9Sstevel@tonic-gate static int mem_delete_additional_pages = 100;
15477c478bd9Sstevel@tonic-gate
15487c478bd9Sstevel@tonic-gate static int
can_remove_pgs(pgcnt_t npgs)15497c478bd9Sstevel@tonic-gate can_remove_pgs(pgcnt_t npgs)
15507c478bd9Sstevel@tonic-gate {
15517c478bd9Sstevel@tonic-gate /*
15527c478bd9Sstevel@tonic-gate * If all pageable pages were paged out, freemem would
15537c478bd9Sstevel@tonic-gate * equal availrmem. There is a minimum requirement for
15547c478bd9Sstevel@tonic-gate * availrmem.
15557c478bd9Sstevel@tonic-gate */
15567c478bd9Sstevel@tonic-gate if ((availrmem - (tune.t_minarmem + mem_delete_additional_pages))
15577c478bd9Sstevel@tonic-gate < npgs)
15587c478bd9Sstevel@tonic-gate return (0);
15597c478bd9Sstevel@tonic-gate /* TODO: check swap space, etc. */
15607c478bd9Sstevel@tonic-gate return (1);
15617c478bd9Sstevel@tonic-gate }
15627c478bd9Sstevel@tonic-gate
15637c478bd9Sstevel@tonic-gate static int
get_availrmem(pgcnt_t npgs)15647c478bd9Sstevel@tonic-gate get_availrmem(pgcnt_t npgs)
15657c478bd9Sstevel@tonic-gate {
15667c478bd9Sstevel@tonic-gate int ret;
15677c478bd9Sstevel@tonic-gate
15687c478bd9Sstevel@tonic-gate mutex_enter(&freemem_lock);
15697c478bd9Sstevel@tonic-gate ret = can_remove_pgs(npgs);
15707c478bd9Sstevel@tonic-gate if (ret != 0)
15717c478bd9Sstevel@tonic-gate availrmem -= npgs;
15727c478bd9Sstevel@tonic-gate mutex_exit(&freemem_lock);
15737c478bd9Sstevel@tonic-gate return (ret);
15747c478bd9Sstevel@tonic-gate }
15757c478bd9Sstevel@tonic-gate
15767c478bd9Sstevel@tonic-gate static void
put_availrmem(pgcnt_t npgs)15777c478bd9Sstevel@tonic-gate put_availrmem(pgcnt_t npgs)
15787c478bd9Sstevel@tonic-gate {
15797c478bd9Sstevel@tonic-gate mutex_enter(&freemem_lock);
15807c478bd9Sstevel@tonic-gate availrmem += npgs;
15817c478bd9Sstevel@tonic-gate mutex_exit(&freemem_lock);
15827c478bd9Sstevel@tonic-gate }
15837c478bd9Sstevel@tonic-gate
15847c478bd9Sstevel@tonic-gate #define FREEMEM_INCR 100
15857c478bd9Sstevel@tonic-gate static pgcnt_t freemem_incr = FREEMEM_INCR;
15867c478bd9Sstevel@tonic-gate #define DEL_FREE_WAIT_FRAC 4
15877c478bd9Sstevel@tonic-gate #define DEL_FREE_WAIT_TICKS ((hz+DEL_FREE_WAIT_FRAC-1)/DEL_FREE_WAIT_FRAC)
15887c478bd9Sstevel@tonic-gate
15897c478bd9Sstevel@tonic-gate #define DEL_BUSY_WAIT_FRAC 20
15907c478bd9Sstevel@tonic-gate #define DEL_BUSY_WAIT_TICKS ((hz+DEL_BUSY_WAIT_FRAC-1)/DEL_BUSY_WAIT_FRAC)
15917c478bd9Sstevel@tonic-gate
15927c478bd9Sstevel@tonic-gate static void kphysm_del_cleanup(struct mem_handle *);
15937c478bd9Sstevel@tonic-gate
15947c478bd9Sstevel@tonic-gate static void page_delete_collect(page_t *, struct mem_handle *);
15957c478bd9Sstevel@tonic-gate
15967c478bd9Sstevel@tonic-gate static pgcnt_t
delthr_get_freemem(struct mem_handle * mhp)15977c478bd9Sstevel@tonic-gate delthr_get_freemem(struct mem_handle *mhp)
15987c478bd9Sstevel@tonic-gate {
15997c478bd9Sstevel@tonic-gate pgcnt_t free_get;
16007c478bd9Sstevel@tonic-gate int ret;
16017c478bd9Sstevel@tonic-gate
16027c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&mhp->mh_mutex));
16037c478bd9Sstevel@tonic-gate
16047c478bd9Sstevel@tonic-gate MDSTAT_INCR(mhp, need_free);
16057c478bd9Sstevel@tonic-gate /*
16067c478bd9Sstevel@tonic-gate * Get up to freemem_incr pages.
16077c478bd9Sstevel@tonic-gate */
16087c478bd9Sstevel@tonic-gate free_get = freemem_incr;
16097c478bd9Sstevel@tonic-gate if (free_get > mhp->mh_hold_todo)
16107c478bd9Sstevel@tonic-gate free_get = mhp->mh_hold_todo;
16117c478bd9Sstevel@tonic-gate /*
16127c478bd9Sstevel@tonic-gate * Take free_get pages away from freemem,
16137c478bd9Sstevel@tonic-gate * waiting if necessary.
16147c478bd9Sstevel@tonic-gate */
16157c478bd9Sstevel@tonic-gate
16167c478bd9Sstevel@tonic-gate while (!mhp->mh_cancel) {
16177c478bd9Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex);
16187c478bd9Sstevel@tonic-gate MDSTAT_INCR(mhp, free_loop);
16197c478bd9Sstevel@tonic-gate /*
16207c478bd9Sstevel@tonic-gate * Duplicate test from page_create_throttle()
16217c478bd9Sstevel@tonic-gate * but don't override with !PG_WAIT.
16227c478bd9Sstevel@tonic-gate */
16237c478bd9Sstevel@tonic-gate if (freemem < (free_get + throttlefree)) {
16247c478bd9Sstevel@tonic-gate MDSTAT_INCR(mhp, free_low);
16257c478bd9Sstevel@tonic-gate ret = 0;
16267c478bd9Sstevel@tonic-gate } else {
16277c478bd9Sstevel@tonic-gate ret = page_create_wait(free_get, 0);
16287c478bd9Sstevel@tonic-gate if (ret == 0) {
16297c478bd9Sstevel@tonic-gate /* EMPTY */
16307c478bd9Sstevel@tonic-gate MDSTAT_INCR(mhp, free_failed);
16317c478bd9Sstevel@tonic-gate }
16327c478bd9Sstevel@tonic-gate }
16337c478bd9Sstevel@tonic-gate if (ret != 0) {
16347c478bd9Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex);
16357c478bd9Sstevel@tonic-gate return (free_get);
16367c478bd9Sstevel@tonic-gate }
16377c478bd9Sstevel@tonic-gate
16387c478bd9Sstevel@tonic-gate /*
16397c478bd9Sstevel@tonic-gate * Put pressure on pageout.
16407c478bd9Sstevel@tonic-gate */
16417c478bd9Sstevel@tonic-gate page_needfree(free_get);
1642*338664dfSAndy Fiddaman WAKE_PAGEOUT_SCANNER(delthr);
16437c478bd9Sstevel@tonic-gate
16447c478bd9Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex);
1645d3d50737SRafael Vanoni (void) cv_reltimedwait(&mhp->mh_cv, &mhp->mh_mutex,
1646d3d50737SRafael Vanoni DEL_FREE_WAIT_TICKS, TR_CLOCK_TICK);
16477c478bd9Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex);
16487c478bd9Sstevel@tonic-gate page_needfree(-(spgcnt_t)free_get);
16497c478bd9Sstevel@tonic-gate
16507c478bd9Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex);
16517c478bd9Sstevel@tonic-gate }
16527c478bd9Sstevel@tonic-gate return (0);
16537c478bd9Sstevel@tonic-gate }
16547c478bd9Sstevel@tonic-gate
16557c478bd9Sstevel@tonic-gate #define DR_AIO_CLEANUP_DELAY 25000 /* 0.025secs, in usec */
16567c478bd9Sstevel@tonic-gate #define DR_AIO_CLEANUP_MAXLOOPS_NODELAY 100
16577c478bd9Sstevel@tonic-gate /*
16587c478bd9Sstevel@tonic-gate * This function is run as a helper thread for delete_memory_thread.
16597c478bd9Sstevel@tonic-gate * It is needed in order to force kaio cleanup, so that pages used in kaio
16607c478bd9Sstevel@tonic-gate * will be unlocked and subsequently relocated by delete_memory_thread.
16617c478bd9Sstevel@tonic-gate * The address of the delete_memory_threads's mem_handle is passed in to
16627c478bd9Sstevel@tonic-gate * this thread function, and is used to set the mh_aio_cleanup_done member
16637c478bd9Sstevel@tonic-gate * prior to calling thread_exit().
16647c478bd9Sstevel@tonic-gate */
16657c478bd9Sstevel@tonic-gate static void
dr_aio_cleanup_thread(caddr_t amhp)16667c478bd9Sstevel@tonic-gate dr_aio_cleanup_thread(caddr_t amhp)
16677c478bd9Sstevel@tonic-gate {
16687c478bd9Sstevel@tonic-gate proc_t *procp;
16697c478bd9Sstevel@tonic-gate int (*aio_cleanup_dr_delete_memory)(proc_t *);
16707c478bd9Sstevel@tonic-gate int cleaned;
16717c478bd9Sstevel@tonic-gate int n = 0;
16727c478bd9Sstevel@tonic-gate struct mem_handle *mhp;
16737c478bd9Sstevel@tonic-gate volatile uint_t *pcancel;
16747c478bd9Sstevel@tonic-gate
16757c478bd9Sstevel@tonic-gate mhp = (struct mem_handle *)amhp;
16767c478bd9Sstevel@tonic-gate ASSERT(mhp != NULL);
16777c478bd9Sstevel@tonic-gate pcancel = &mhp->mh_dr_aio_cleanup_cancel;
16787c478bd9Sstevel@tonic-gate if (modload("sys", "kaio") == -1) {
16797c478bd9Sstevel@tonic-gate mhp->mh_aio_cleanup_done = 1;
16807c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, "dr_aio_cleanup_thread: cannot load kaio");
16817c478bd9Sstevel@tonic-gate thread_exit();
16827c478bd9Sstevel@tonic-gate }
16837c478bd9Sstevel@tonic-gate aio_cleanup_dr_delete_memory = (int (*)(proc_t *))
16847c478bd9Sstevel@tonic-gate modgetsymvalue("aio_cleanup_dr_delete_memory", 0);
16857c478bd9Sstevel@tonic-gate if (aio_cleanup_dr_delete_memory == NULL) {
16867c478bd9Sstevel@tonic-gate mhp->mh_aio_cleanup_done = 1;
16877c478bd9Sstevel@tonic-gate cmn_err(CE_WARN,
16887c478bd9Sstevel@tonic-gate "aio_cleanup_dr_delete_memory not found in kaio");
16897c478bd9Sstevel@tonic-gate thread_exit();
16907c478bd9Sstevel@tonic-gate }
16917c478bd9Sstevel@tonic-gate do {
16927c478bd9Sstevel@tonic-gate cleaned = 0;
16937c478bd9Sstevel@tonic-gate mutex_enter(&pidlock);
16947c478bd9Sstevel@tonic-gate for (procp = practive; (*pcancel == 0) && (procp != NULL);
16957c478bd9Sstevel@tonic-gate procp = procp->p_next) {
16967c478bd9Sstevel@tonic-gate mutex_enter(&procp->p_lock);
16977c478bd9Sstevel@tonic-gate if (procp->p_aio != NULL) {
16987c478bd9Sstevel@tonic-gate /* cleanup proc's outstanding kaio */
16997c478bd9Sstevel@tonic-gate cleaned +=
17007c478bd9Sstevel@tonic-gate (*aio_cleanup_dr_delete_memory)(procp);
17017c478bd9Sstevel@tonic-gate }
17027c478bd9Sstevel@tonic-gate mutex_exit(&procp->p_lock);
17037c478bd9Sstevel@tonic-gate }
17047c478bd9Sstevel@tonic-gate mutex_exit(&pidlock);
17057c478bd9Sstevel@tonic-gate if ((*pcancel == 0) &&
17067c478bd9Sstevel@tonic-gate (!cleaned || (++n == DR_AIO_CLEANUP_MAXLOOPS_NODELAY))) {
17077c478bd9Sstevel@tonic-gate /* delay a bit before retrying all procs again */
17087c478bd9Sstevel@tonic-gate delay(drv_usectohz(DR_AIO_CLEANUP_DELAY));
17097c478bd9Sstevel@tonic-gate n = 0;
17107c478bd9Sstevel@tonic-gate }
17117c478bd9Sstevel@tonic-gate } while (*pcancel == 0);
17127c478bd9Sstevel@tonic-gate mhp->mh_aio_cleanup_done = 1;
17137c478bd9Sstevel@tonic-gate thread_exit();
17147c478bd9Sstevel@tonic-gate }
17157c478bd9Sstevel@tonic-gate
17167c478bd9Sstevel@tonic-gate static void
delete_memory_thread(caddr_t amhp)17177c478bd9Sstevel@tonic-gate delete_memory_thread(caddr_t amhp)
17187c478bd9Sstevel@tonic-gate {
17197c478bd9Sstevel@tonic-gate struct mem_handle *mhp;
17207c478bd9Sstevel@tonic-gate struct memdelspan *mdsp;
17217c478bd9Sstevel@tonic-gate callb_cpr_t cprinfo;
17227c478bd9Sstevel@tonic-gate page_t *pp_targ;
17237c478bd9Sstevel@tonic-gate spgcnt_t freemem_left;
17247c478bd9Sstevel@tonic-gate void (*del_complete_funcp)(void *, int error);
17257c478bd9Sstevel@tonic-gate void *del_complete_arg;
17267c478bd9Sstevel@tonic-gate int comp_code;
17277c478bd9Sstevel@tonic-gate int ret;
17287c478bd9Sstevel@tonic-gate int first_scan;
17297c478bd9Sstevel@tonic-gate uint_t szc;
17307c478bd9Sstevel@tonic-gate #ifdef MEM_DEL_STATS
17317c478bd9Sstevel@tonic-gate uint64_t start_total, ntick_total;
17327c478bd9Sstevel@tonic-gate uint64_t start_pgrp, ntick_pgrp;
17337c478bd9Sstevel@tonic-gate #endif /* MEM_DEL_STATS */
17347c478bd9Sstevel@tonic-gate
17357c478bd9Sstevel@tonic-gate mhp = (struct mem_handle *)amhp;
17367c478bd9Sstevel@tonic-gate
17377c478bd9Sstevel@tonic-gate #ifdef MEM_DEL_STATS
17387c478bd9Sstevel@tonic-gate start_total = ddi_get_lbolt();
17397c478bd9Sstevel@tonic-gate #endif /* MEM_DEL_STATS */
17407c478bd9Sstevel@tonic-gate
17417c478bd9Sstevel@tonic-gate CALLB_CPR_INIT(&cprinfo, &mhp->mh_mutex,
17427c478bd9Sstevel@tonic-gate callb_generic_cpr, "memdel");
17437c478bd9Sstevel@tonic-gate
17447c478bd9Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex);
17457c478bd9Sstevel@tonic-gate ASSERT(mhp->mh_state == MHND_STARTING);
17467c478bd9Sstevel@tonic-gate
17477c478bd9Sstevel@tonic-gate mhp->mh_state = MHND_RUNNING;
17487c478bd9Sstevel@tonic-gate mhp->mh_thread_id = curthread;
17497c478bd9Sstevel@tonic-gate
17507c478bd9Sstevel@tonic-gate mhp->mh_hold_todo = mhp->mh_vm_pages;
17517c478bd9Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex);
17527c478bd9Sstevel@tonic-gate
17537c478bd9Sstevel@tonic-gate /* Allocate the remap pages now, if necessary. */
17547c478bd9Sstevel@tonic-gate memseg_remap_init();
17557c478bd9Sstevel@tonic-gate
17567c478bd9Sstevel@tonic-gate /*
17577c478bd9Sstevel@tonic-gate * Subtract from availrmem now if possible as availrmem
17587c478bd9Sstevel@tonic-gate * may not be available by the end of the delete.
17597c478bd9Sstevel@tonic-gate */
17607c478bd9Sstevel@tonic-gate if (!get_availrmem(mhp->mh_vm_pages)) {
17617c478bd9Sstevel@tonic-gate comp_code = KPHYSM_ENOTVIABLE;
17627c478bd9Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex);
17637c478bd9Sstevel@tonic-gate goto early_exit;
17647c478bd9Sstevel@tonic-gate }
17657c478bd9Sstevel@tonic-gate
17667c478bd9Sstevel@tonic-gate ret = kphysm_setup_pre_del(mhp->mh_vm_pages);
17677c478bd9Sstevel@tonic-gate
17687c478bd9Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex);
17697c478bd9Sstevel@tonic-gate
17707c478bd9Sstevel@tonic-gate if (ret != 0) {
17717c478bd9Sstevel@tonic-gate mhp->mh_cancel = KPHYSM_EREFUSED;
17727c478bd9Sstevel@tonic-gate goto refused;
17737c478bd9Sstevel@tonic-gate }
17747c478bd9Sstevel@tonic-gate
17757c478bd9Sstevel@tonic-gate transit_list_collect(mhp, 1);
17767c478bd9Sstevel@tonic-gate
17777c478bd9Sstevel@tonic-gate for (mdsp = mhp->mh_transit.trl_spans; mdsp != NULL;
17787c478bd9Sstevel@tonic-gate mdsp = mdsp->mds_next) {
17797c478bd9Sstevel@tonic-gate ASSERT(mdsp->mds_bitmap == NULL);
17807c478bd9Sstevel@tonic-gate mdsp->mds_bitmap = kmem_zalloc(MDS_BITMAPBYTES(mdsp), KM_SLEEP);
17817c478bd9Sstevel@tonic-gate mdsp->mds_bitmap_retired = kmem_zalloc(MDS_BITMAPBYTES(mdsp),
178273347c69Smb KM_SLEEP);
17837c478bd9Sstevel@tonic-gate }
17847c478bd9Sstevel@tonic-gate
17857c478bd9Sstevel@tonic-gate first_scan = 1;
17867c478bd9Sstevel@tonic-gate freemem_left = 0;
17877c478bd9Sstevel@tonic-gate /*
17887c478bd9Sstevel@tonic-gate * Start dr_aio_cleanup_thread, which periodically iterates
17897c478bd9Sstevel@tonic-gate * through the process list and invokes aio cleanup. This
17907c478bd9Sstevel@tonic-gate * is needed in order to avoid a deadly embrace between the
17917c478bd9Sstevel@tonic-gate * delete_memory_thread (waiting on writer lock for page, with the
17927c478bd9Sstevel@tonic-gate * exclusive-wanted bit set), kaio read request threads (waiting for a
17937c478bd9Sstevel@tonic-gate * reader lock on the same page that is wanted by the
17947c478bd9Sstevel@tonic-gate * delete_memory_thread), and threads waiting for kaio completion
17957c478bd9Sstevel@tonic-gate * (blocked on spt_amp->lock).
17967c478bd9Sstevel@tonic-gate */
17977c478bd9Sstevel@tonic-gate mhp->mh_dr_aio_cleanup_cancel = 0;
17987c478bd9Sstevel@tonic-gate mhp->mh_aio_cleanup_done = 0;
17997c478bd9Sstevel@tonic-gate (void) thread_create(NULL, 0, dr_aio_cleanup_thread,
18007c478bd9Sstevel@tonic-gate (caddr_t)mhp, 0, &p0, TS_RUN, maxclsyspri - 1);
18017c478bd9Sstevel@tonic-gate while ((mhp->mh_hold_todo != 0) && (mhp->mh_cancel == 0)) {
18027c478bd9Sstevel@tonic-gate pgcnt_t collected;
18037c478bd9Sstevel@tonic-gate
18047c478bd9Sstevel@tonic-gate MDSTAT_INCR(mhp, nloop);
18057c478bd9Sstevel@tonic-gate collected = 0;
18067c478bd9Sstevel@tonic-gate for (mdsp = mhp->mh_transit.trl_spans; (mdsp != NULL) &&
18077c478bd9Sstevel@tonic-gate (mhp->mh_cancel == 0); mdsp = mdsp->mds_next) {
18087c478bd9Sstevel@tonic-gate pfn_t pfn, p_end;
18097c478bd9Sstevel@tonic-gate
18107c478bd9Sstevel@tonic-gate p_end = mdsp->mds_base + mdsp->mds_npgs;
18117c478bd9Sstevel@tonic-gate for (pfn = mdsp->mds_base; (pfn < p_end) &&
18127c478bd9Sstevel@tonic-gate (mhp->mh_cancel == 0); pfn++) {
18137c478bd9Sstevel@tonic-gate page_t *pp, *tpp, *tpp_targ;
18147c478bd9Sstevel@tonic-gate pgcnt_t bit;
18157c478bd9Sstevel@tonic-gate struct vnode *vp;
18167c478bd9Sstevel@tonic-gate u_offset_t offset;
18177c478bd9Sstevel@tonic-gate int mod, result;
18187c478bd9Sstevel@tonic-gate spgcnt_t pgcnt;
18197c478bd9Sstevel@tonic-gate
18207c478bd9Sstevel@tonic-gate bit = pfn - mdsp->mds_base;
18217c478bd9Sstevel@tonic-gate if ((mdsp->mds_bitmap[bit / NBPBMW] &
18227c478bd9Sstevel@tonic-gate (1 << (bit % NBPBMW))) != 0) {
18237c478bd9Sstevel@tonic-gate MDSTAT_INCR(mhp, already_done);
18247c478bd9Sstevel@tonic-gate continue;
18257c478bd9Sstevel@tonic-gate }
18267c478bd9Sstevel@tonic-gate if (freemem_left == 0) {
18277c478bd9Sstevel@tonic-gate freemem_left += delthr_get_freemem(mhp);
18287c478bd9Sstevel@tonic-gate if (freemem_left == 0)
18297c478bd9Sstevel@tonic-gate break;
18307c478bd9Sstevel@tonic-gate }
18317c478bd9Sstevel@tonic-gate
18327c478bd9Sstevel@tonic-gate /*
18337c478bd9Sstevel@tonic-gate * Release mh_mutex - some of this
18347c478bd9Sstevel@tonic-gate * stuff takes some time (eg PUTPAGE).
18357c478bd9Sstevel@tonic-gate */
18367c478bd9Sstevel@tonic-gate
18377c478bd9Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex);
18387c478bd9Sstevel@tonic-gate MDSTAT_INCR(mhp, ncheck);
18397c478bd9Sstevel@tonic-gate
18407c478bd9Sstevel@tonic-gate pp = page_numtopp_nolock(pfn);
18417c478bd9Sstevel@tonic-gate if (pp == NULL) {
18427c478bd9Sstevel@tonic-gate /*
18437c478bd9Sstevel@tonic-gate * Not covered by a page_t - will
18447c478bd9Sstevel@tonic-gate * be dealt with elsewhere.
18457c478bd9Sstevel@tonic-gate */
18467c478bd9Sstevel@tonic-gate MDSTAT_INCR(mhp, nopaget);
18477c478bd9Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex);
18487c478bd9Sstevel@tonic-gate mdsp->mds_bitmap[bit / NBPBMW] |=
18497c478bd9Sstevel@tonic-gate (1 << (bit % NBPBMW));
18507c478bd9Sstevel@tonic-gate continue;
18517c478bd9Sstevel@tonic-gate }
18527c478bd9Sstevel@tonic-gate
18537c478bd9Sstevel@tonic-gate if (!page_try_reclaim_lock(pp, SE_EXCL,
1854db874c57Selowe SE_EXCL_WANTED | SE_RETIRED)) {
1855db874c57Selowe /*
1856db874c57Selowe * Page in use elsewhere. Skip it.
1857db874c57Selowe */
1858db874c57Selowe MDSTAT_INCR(mhp, lockfail);
1859db874c57Selowe mutex_enter(&mhp->mh_mutex);
1860db874c57Selowe continue;
18617c478bd9Sstevel@tonic-gate }
18627c478bd9Sstevel@tonic-gate /*
18637c478bd9Sstevel@tonic-gate * See if the cage expanded into the delete.
18647c478bd9Sstevel@tonic-gate * This can happen as we have to allow the
18657c478bd9Sstevel@tonic-gate * cage to expand.
18667c478bd9Sstevel@tonic-gate */
18677c478bd9Sstevel@tonic-gate if (PP_ISNORELOC(pp)) {
1868db874c57Selowe page_unlock(pp);
18697c478bd9Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex);
18707c478bd9Sstevel@tonic-gate mhp->mh_cancel = KPHYSM_ENONRELOC;
18717c478bd9Sstevel@tonic-gate break;
18727c478bd9Sstevel@tonic-gate }
1873db874c57Selowe if (PP_RETIRED(pp)) {
18747c478bd9Sstevel@tonic-gate /*
18757c478bd9Sstevel@tonic-gate * Page has been retired and is
18767c478bd9Sstevel@tonic-gate * not part of the cage so we
18777c478bd9Sstevel@tonic-gate * can now do the accounting for
18787c478bd9Sstevel@tonic-gate * it.
18797c478bd9Sstevel@tonic-gate */
18807c478bd9Sstevel@tonic-gate MDSTAT_INCR(mhp, retired);
18817c478bd9Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex);
18827c478bd9Sstevel@tonic-gate mdsp->mds_bitmap[bit / NBPBMW]
18837c478bd9Sstevel@tonic-gate |= (1 << (bit % NBPBMW));
18847c478bd9Sstevel@tonic-gate mdsp->mds_bitmap_retired[bit /
18857c478bd9Sstevel@tonic-gate NBPBMW] |=
18867c478bd9Sstevel@tonic-gate (1 << (bit % NBPBMW));
18877c478bd9Sstevel@tonic-gate mhp->mh_hold_todo--;
18887c478bd9Sstevel@tonic-gate continue;
18897c478bd9Sstevel@tonic-gate }
18907c478bd9Sstevel@tonic-gate ASSERT(freemem_left != 0);
18917c478bd9Sstevel@tonic-gate if (PP_ISFREE(pp)) {
18927c478bd9Sstevel@tonic-gate /*
18937c478bd9Sstevel@tonic-gate * Like page_reclaim() only 'freemem'
18947c478bd9Sstevel@tonic-gate * processing is already done.
18957c478bd9Sstevel@tonic-gate */
18967c478bd9Sstevel@tonic-gate MDSTAT_INCR(mhp, nfree);
18977c478bd9Sstevel@tonic-gate free_page_collect:
18987c478bd9Sstevel@tonic-gate if (PP_ISAGED(pp)) {
18997c478bd9Sstevel@tonic-gate page_list_sub(pp,
19007c478bd9Sstevel@tonic-gate PG_FREE_LIST);
19017c478bd9Sstevel@tonic-gate } else {
19027c478bd9Sstevel@tonic-gate page_list_sub(pp,
19037c478bd9Sstevel@tonic-gate PG_CACHE_LIST);
19047c478bd9Sstevel@tonic-gate }
19057c478bd9Sstevel@tonic-gate PP_CLRFREE(pp);
19067c478bd9Sstevel@tonic-gate PP_CLRAGED(pp);
19077c478bd9Sstevel@tonic-gate collected++;
19087c478bd9Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex);
19097c478bd9Sstevel@tonic-gate page_delete_collect(pp, mhp);
19107c478bd9Sstevel@tonic-gate mdsp->mds_bitmap[bit / NBPBMW] |=
19117c478bd9Sstevel@tonic-gate (1 << (bit % NBPBMW));
19127c478bd9Sstevel@tonic-gate freemem_left--;
19137c478bd9Sstevel@tonic-gate continue;
19147c478bd9Sstevel@tonic-gate }
19157c478bd9Sstevel@tonic-gate ASSERT(pp->p_vnode != NULL);
19167c478bd9Sstevel@tonic-gate if (first_scan) {
19177c478bd9Sstevel@tonic-gate MDSTAT_INCR(mhp, first_notfree);
19187c478bd9Sstevel@tonic-gate page_unlock(pp);
19197c478bd9Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex);
19207c478bd9Sstevel@tonic-gate continue;
19217c478bd9Sstevel@tonic-gate }
19227c478bd9Sstevel@tonic-gate /*
19237c478bd9Sstevel@tonic-gate * Keep stats on pages encountered that
1924db874c57Selowe * are marked for retirement.
19257c478bd9Sstevel@tonic-gate */
1926db874c57Selowe if (PP_TOXIC(pp)) {
19277c478bd9Sstevel@tonic-gate MDSTAT_INCR(mhp, toxic);
1928db874c57Selowe } else if (PP_PR_REQ(pp)) {
19297c478bd9Sstevel@tonic-gate MDSTAT_INCR(mhp, failing);
19307c478bd9Sstevel@tonic-gate }
19317c478bd9Sstevel@tonic-gate /*
19327c478bd9Sstevel@tonic-gate * In certain cases below, special exceptions
19337c478bd9Sstevel@tonic-gate * are made for pages that are toxic. This
19347c478bd9Sstevel@tonic-gate * is because the current meaning of toxic
19357c478bd9Sstevel@tonic-gate * is that an uncorrectable error has been
19367c478bd9Sstevel@tonic-gate * previously associated with the page.
19377c478bd9Sstevel@tonic-gate */
19387c478bd9Sstevel@tonic-gate if (pp->p_lckcnt != 0 || pp->p_cowcnt != 0) {
1939db874c57Selowe if (!PP_TOXIC(pp)) {
19407c478bd9Sstevel@tonic-gate /*
19417c478bd9Sstevel@tonic-gate * Must relocate locked in
19427c478bd9Sstevel@tonic-gate * memory pages.
19437c478bd9Sstevel@tonic-gate */
19447c478bd9Sstevel@tonic-gate #ifdef MEM_DEL_STATS
19457c478bd9Sstevel@tonic-gate start_pgrp = ddi_get_lbolt();
19467c478bd9Sstevel@tonic-gate #endif /* MEM_DEL_STATS */
19477c478bd9Sstevel@tonic-gate /*
19487c478bd9Sstevel@tonic-gate * Lock all constituent pages
19497c478bd9Sstevel@tonic-gate * of a large page to ensure
19507c478bd9Sstevel@tonic-gate * that p_szc won't change.
19517c478bd9Sstevel@tonic-gate */
19527c478bd9Sstevel@tonic-gate if (!group_page_trylock(pp,
19537c478bd9Sstevel@tonic-gate SE_EXCL)) {
19547c478bd9Sstevel@tonic-gate MDSTAT_INCR(mhp,
19557c478bd9Sstevel@tonic-gate gptllckfail);
19567c478bd9Sstevel@tonic-gate page_unlock(pp);
19577c478bd9Sstevel@tonic-gate mutex_enter(
19587c478bd9Sstevel@tonic-gate &mhp->mh_mutex);
19597c478bd9Sstevel@tonic-gate continue;
19607c478bd9Sstevel@tonic-gate }
19617c478bd9Sstevel@tonic-gate MDSTAT_INCR(mhp, npplocked);
19627c478bd9Sstevel@tonic-gate pp_targ =
19637c478bd9Sstevel@tonic-gate page_get_replacement_page(
196473347c69Smb pp, NULL, 0);
19657c478bd9Sstevel@tonic-gate if (pp_targ != NULL) {
19667c478bd9Sstevel@tonic-gate #ifdef MEM_DEL_STATS
19677c478bd9Sstevel@tonic-gate ntick_pgrp =
19687c478bd9Sstevel@tonic-gate (uint64_t)
19697c478bd9Sstevel@tonic-gate ddi_get_lbolt() -
19707c478bd9Sstevel@tonic-gate start_pgrp;
19717c478bd9Sstevel@tonic-gate #endif /* MEM_DEL_STATS */
19727c478bd9Sstevel@tonic-gate MDSTAT_PGRP(mhp,
19737c478bd9Sstevel@tonic-gate ntick_pgrp);
19747c478bd9Sstevel@tonic-gate MDSTAT_INCR(mhp,
19757c478bd9Sstevel@tonic-gate nlockreloc);
19767c478bd9Sstevel@tonic-gate goto reloc;
19777c478bd9Sstevel@tonic-gate }
19787c478bd9Sstevel@tonic-gate group_page_unlock(pp);
19797c478bd9Sstevel@tonic-gate page_unlock(pp);
19807c478bd9Sstevel@tonic-gate #ifdef MEM_DEL_STATS
19817c478bd9Sstevel@tonic-gate ntick_pgrp =
19827c478bd9Sstevel@tonic-gate (uint64_t)ddi_get_lbolt() -
19837c478bd9Sstevel@tonic-gate start_pgrp;
19847c478bd9Sstevel@tonic-gate #endif /* MEM_DEL_STATS */
19857c478bd9Sstevel@tonic-gate MDSTAT_PGRP(mhp, ntick_pgrp);
19867c478bd9Sstevel@tonic-gate MDSTAT_INCR(mhp, nnorepl);
19877c478bd9Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex);
19887c478bd9Sstevel@tonic-gate continue;
19897c478bd9Sstevel@tonic-gate } else {
19907c478bd9Sstevel@tonic-gate /*
19917c478bd9Sstevel@tonic-gate * Cannot do anything about
19927c478bd9Sstevel@tonic-gate * this page because it is
19937c478bd9Sstevel@tonic-gate * toxic.
19947c478bd9Sstevel@tonic-gate */
19957c478bd9Sstevel@tonic-gate MDSTAT_INCR(mhp, npplkdtoxic);
19967c478bd9Sstevel@tonic-gate page_unlock(pp);
19977c478bd9Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex);
19987c478bd9Sstevel@tonic-gate continue;
19997c478bd9Sstevel@tonic-gate }
20007c478bd9Sstevel@tonic-gate }
20017c478bd9Sstevel@tonic-gate /*
20027c478bd9Sstevel@tonic-gate * Unload the mappings and check if mod bit
20037c478bd9Sstevel@tonic-gate * is set.
20047c478bd9Sstevel@tonic-gate */
2005ad23a2dbSjohansen ASSERT(!PP_ISKAS(pp));
20067c478bd9Sstevel@tonic-gate (void) hat_pageunload(pp, HAT_FORCE_PGUNLOAD);
20077c478bd9Sstevel@tonic-gate mod = hat_ismod(pp);
20087c478bd9Sstevel@tonic-gate
20097c478bd9Sstevel@tonic-gate #ifdef MEM_DEL_STATS
20107c478bd9Sstevel@tonic-gate start_pgrp = ddi_get_lbolt();
20117c478bd9Sstevel@tonic-gate #endif /* MEM_DEL_STATS */
2012db874c57Selowe if (mod && !PP_TOXIC(pp)) {
20137c478bd9Sstevel@tonic-gate /*
20147c478bd9Sstevel@tonic-gate * Lock all constituent pages
20157c478bd9Sstevel@tonic-gate * of a large page to ensure
20167c478bd9Sstevel@tonic-gate * that p_szc won't change.
20177c478bd9Sstevel@tonic-gate */
20187c478bd9Sstevel@tonic-gate if (!group_page_trylock(pp, SE_EXCL)) {
20197c478bd9Sstevel@tonic-gate MDSTAT_INCR(mhp, gptlmodfail);
20207c478bd9Sstevel@tonic-gate page_unlock(pp);
20217c478bd9Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex);
20227c478bd9Sstevel@tonic-gate continue;
20237c478bd9Sstevel@tonic-gate }
20247c478bd9Sstevel@tonic-gate pp_targ = page_get_replacement_page(pp,
20257c478bd9Sstevel@tonic-gate NULL, 0);
20267c478bd9Sstevel@tonic-gate if (pp_targ != NULL) {
20277c478bd9Sstevel@tonic-gate MDSTAT_INCR(mhp, nmodreloc);
20287c478bd9Sstevel@tonic-gate #ifdef MEM_DEL_STATS
20297c478bd9Sstevel@tonic-gate ntick_pgrp =
20307c478bd9Sstevel@tonic-gate (uint64_t)ddi_get_lbolt() -
203173347c69Smb start_pgrp;
20327c478bd9Sstevel@tonic-gate #endif /* MEM_DEL_STATS */
20337c478bd9Sstevel@tonic-gate MDSTAT_PGRP(mhp, ntick_pgrp);
20347c478bd9Sstevel@tonic-gate goto reloc;
20357c478bd9Sstevel@tonic-gate }
20367c478bd9Sstevel@tonic-gate group_page_unlock(pp);
20377c478bd9Sstevel@tonic-gate }
20387c478bd9Sstevel@tonic-gate
20397c478bd9Sstevel@tonic-gate if (!page_try_demote_pages(pp)) {
20407c478bd9Sstevel@tonic-gate MDSTAT_INCR(mhp, demotefail);
20417c478bd9Sstevel@tonic-gate page_unlock(pp);
20427c478bd9Sstevel@tonic-gate #ifdef MEM_DEL_STATS
20437c478bd9Sstevel@tonic-gate ntick_pgrp = (uint64_t)ddi_get_lbolt() -
20447c478bd9Sstevel@tonic-gate start_pgrp;
20457c478bd9Sstevel@tonic-gate #endif /* MEM_DEL_STATS */
20467c478bd9Sstevel@tonic-gate MDSTAT_PGRP(mhp, ntick_pgrp);
20477c478bd9Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex);
20487c478bd9Sstevel@tonic-gate continue;
20497c478bd9Sstevel@tonic-gate }
20507c478bd9Sstevel@tonic-gate
20517c478bd9Sstevel@tonic-gate /*
20527c478bd9Sstevel@tonic-gate * Regular 'page-out'.
20537c478bd9Sstevel@tonic-gate */
20547c478bd9Sstevel@tonic-gate if (!mod) {
20557c478bd9Sstevel@tonic-gate MDSTAT_INCR(mhp, ndestroy);
20567c478bd9Sstevel@tonic-gate page_destroy(pp, 1);
20577c478bd9Sstevel@tonic-gate /*
20587c478bd9Sstevel@tonic-gate * page_destroy was called with
20597c478bd9Sstevel@tonic-gate * dontfree. As long as p_lckcnt
20607c478bd9Sstevel@tonic-gate * and p_cowcnt are both zero, the
20617c478bd9Sstevel@tonic-gate * only additional action of
20627c478bd9Sstevel@tonic-gate * page_destroy with !dontfree is to
20637c478bd9Sstevel@tonic-gate * call page_free, so we can collect
20647c478bd9Sstevel@tonic-gate * the page here.
20657c478bd9Sstevel@tonic-gate */
20667c478bd9Sstevel@tonic-gate collected++;
20677c478bd9Sstevel@tonic-gate #ifdef MEM_DEL_STATS
20687c478bd9Sstevel@tonic-gate ntick_pgrp = (uint64_t)ddi_get_lbolt() -
20697c478bd9Sstevel@tonic-gate start_pgrp;
20707c478bd9Sstevel@tonic-gate #endif /* MEM_DEL_STATS */
20717c478bd9Sstevel@tonic-gate MDSTAT_PGRP(mhp, ntick_pgrp);
20727c478bd9Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex);
20737c478bd9Sstevel@tonic-gate page_delete_collect(pp, mhp);
20747c478bd9Sstevel@tonic-gate mdsp->mds_bitmap[bit / NBPBMW] |=
20757c478bd9Sstevel@tonic-gate (1 << (bit % NBPBMW));
20767c478bd9Sstevel@tonic-gate continue;
20777c478bd9Sstevel@tonic-gate }
20787c478bd9Sstevel@tonic-gate /*
20797c478bd9Sstevel@tonic-gate * The page is toxic and the mod bit is
20807c478bd9Sstevel@tonic-gate * set, we cannot do anything here to deal
20817c478bd9Sstevel@tonic-gate * with it.
20827c478bd9Sstevel@tonic-gate */
2083db874c57Selowe if (PP_TOXIC(pp)) {
20847c478bd9Sstevel@tonic-gate page_unlock(pp);
20857c478bd9Sstevel@tonic-gate #ifdef MEM_DEL_STATS
20867c478bd9Sstevel@tonic-gate ntick_pgrp = (uint64_t)ddi_get_lbolt() -
20877c478bd9Sstevel@tonic-gate start_pgrp;
20887c478bd9Sstevel@tonic-gate #endif /* MEM_DEL_STATS */
20897c478bd9Sstevel@tonic-gate MDSTAT_PGRP(mhp, ntick_pgrp);
20907c478bd9Sstevel@tonic-gate MDSTAT_INCR(mhp, modtoxic);
20917c478bd9Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex);
20927c478bd9Sstevel@tonic-gate continue;
20937c478bd9Sstevel@tonic-gate }
20947c478bd9Sstevel@tonic-gate MDSTAT_INCR(mhp, nputpage);
20957c478bd9Sstevel@tonic-gate vp = pp->p_vnode;
20967c478bd9Sstevel@tonic-gate offset = pp->p_offset;
20977c478bd9Sstevel@tonic-gate VN_HOLD(vp);
20987c478bd9Sstevel@tonic-gate page_unlock(pp);
20997c478bd9Sstevel@tonic-gate (void) VOP_PUTPAGE(vp, offset, PAGESIZE,
2100da6c28aaSamw B_INVAL|B_FORCE, kcred, NULL);
21017c478bd9Sstevel@tonic-gate VN_RELE(vp);
21027c478bd9Sstevel@tonic-gate #ifdef MEM_DEL_STATS
21037c478bd9Sstevel@tonic-gate ntick_pgrp = (uint64_t)ddi_get_lbolt() -
21047c478bd9Sstevel@tonic-gate start_pgrp;
21057c478bd9Sstevel@tonic-gate #endif /* MEM_DEL_STATS */
21067c478bd9Sstevel@tonic-gate MDSTAT_PGRP(mhp, ntick_pgrp);
21077c478bd9Sstevel@tonic-gate /*
21087c478bd9Sstevel@tonic-gate * Try to get the page back immediately
21097c478bd9Sstevel@tonic-gate * so that it can be collected.
21107c478bd9Sstevel@tonic-gate */
21117c478bd9Sstevel@tonic-gate pp = page_numtopp_nolock(pfn);
21127c478bd9Sstevel@tonic-gate if (pp == NULL) {
21137c478bd9Sstevel@tonic-gate MDSTAT_INCR(mhp, nnoreclaim);
21147c478bd9Sstevel@tonic-gate /*
21157c478bd9Sstevel@tonic-gate * This should not happen as this
21167c478bd9Sstevel@tonic-gate * thread is deleting the page.
21177c478bd9Sstevel@tonic-gate * If this code is generalized, this
21187c478bd9Sstevel@tonic-gate * becomes a reality.
21197c478bd9Sstevel@tonic-gate */
21207c478bd9Sstevel@tonic-gate #ifdef DEBUG
21217c478bd9Sstevel@tonic-gate cmn_err(CE_WARN,
21227c478bd9Sstevel@tonic-gate "delete_memory_thread(0x%p) "
21237c478bd9Sstevel@tonic-gate "pfn 0x%lx has no page_t",
21247c478bd9Sstevel@tonic-gate (void *)mhp, pfn);
21257c478bd9Sstevel@tonic-gate #endif /* DEBUG */
21267c478bd9Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex);
21277c478bd9Sstevel@tonic-gate continue;
21287c478bd9Sstevel@tonic-gate }
21297c478bd9Sstevel@tonic-gate if (page_try_reclaim_lock(pp, SE_EXCL,
2130db874c57Selowe SE_EXCL_WANTED | SE_RETIRED)) {
21317c478bd9Sstevel@tonic-gate if (PP_ISFREE(pp)) {
21327c478bd9Sstevel@tonic-gate goto free_page_collect;
21337c478bd9Sstevel@tonic-gate }
21347c478bd9Sstevel@tonic-gate page_unlock(pp);
21357c478bd9Sstevel@tonic-gate }
21367c478bd9Sstevel@tonic-gate MDSTAT_INCR(mhp, nnoreclaim);
21377c478bd9Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex);
21387c478bd9Sstevel@tonic-gate continue;
21397c478bd9Sstevel@tonic-gate
21407c478bd9Sstevel@tonic-gate reloc:
21417c478bd9Sstevel@tonic-gate /*
21427c478bd9Sstevel@tonic-gate * Got some freemem and a target
21437c478bd9Sstevel@tonic-gate * page, so move the data to avoid
21447c478bd9Sstevel@tonic-gate * I/O and lock problems.
21457c478bd9Sstevel@tonic-gate */
21467c478bd9Sstevel@tonic-gate ASSERT(!page_iolock_assert(pp));
21477c478bd9Sstevel@tonic-gate MDSTAT_INCR(mhp, nreloc);
21487c478bd9Sstevel@tonic-gate /*
21497c478bd9Sstevel@tonic-gate * page_relocate() will return pgcnt: the
21507c478bd9Sstevel@tonic-gate * number of consecutive pages relocated.
21517c478bd9Sstevel@tonic-gate * If it is successful, pp will be a
21527c478bd9Sstevel@tonic-gate * linked list of the page structs that
21537c478bd9Sstevel@tonic-gate * were relocated. If page_relocate() is
21547c478bd9Sstevel@tonic-gate * unsuccessful, pp will be unmodified.
21557c478bd9Sstevel@tonic-gate */
21567c478bd9Sstevel@tonic-gate #ifdef MEM_DEL_STATS
21577c478bd9Sstevel@tonic-gate start_pgrp = ddi_get_lbolt();
21587c478bd9Sstevel@tonic-gate #endif /* MEM_DEL_STATS */
21597c478bd9Sstevel@tonic-gate result = page_relocate(&pp, &pp_targ, 0, 0,
21607c478bd9Sstevel@tonic-gate &pgcnt, NULL);
21617c478bd9Sstevel@tonic-gate #ifdef MEM_DEL_STATS
21627c478bd9Sstevel@tonic-gate ntick_pgrp = (uint64_t)ddi_get_lbolt() -
21637c478bd9Sstevel@tonic-gate start_pgrp;
21647c478bd9Sstevel@tonic-gate #endif /* MEM_DEL_STATS */
21657c478bd9Sstevel@tonic-gate MDSTAT_PGRP(mhp, ntick_pgrp);
21667c478bd9Sstevel@tonic-gate if (result != 0) {
21677c478bd9Sstevel@tonic-gate MDSTAT_INCR(mhp, nrelocfail);
21687c478bd9Sstevel@tonic-gate /*
21697c478bd9Sstevel@tonic-gate * We did not succeed. We need
21707c478bd9Sstevel@tonic-gate * to give the pp_targ pages back.
21717c478bd9Sstevel@tonic-gate * page_free(pp_targ, 1) without
21727c478bd9Sstevel@tonic-gate * the freemem accounting.
21737c478bd9Sstevel@tonic-gate */
21747c478bd9Sstevel@tonic-gate group_page_unlock(pp);
21757c478bd9Sstevel@tonic-gate page_free_replacement_page(pp_targ);
21767c478bd9Sstevel@tonic-gate page_unlock(pp);
21777c478bd9Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex);
21787c478bd9Sstevel@tonic-gate continue;
21797c478bd9Sstevel@tonic-gate }
21807c478bd9Sstevel@tonic-gate
21817c478bd9Sstevel@tonic-gate /*
21827c478bd9Sstevel@tonic-gate * We will then collect pgcnt pages.
21837c478bd9Sstevel@tonic-gate */
21847c478bd9Sstevel@tonic-gate ASSERT(pgcnt > 0);
21857c478bd9Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex);
21867c478bd9Sstevel@tonic-gate /*
21877c478bd9Sstevel@tonic-gate * We need to make sure freemem_left is
21887c478bd9Sstevel@tonic-gate * large enough.
21897c478bd9Sstevel@tonic-gate */
21907c478bd9Sstevel@tonic-gate while ((freemem_left < pgcnt) &&
219173347c69Smb (!mhp->mh_cancel)) {
21927c478bd9Sstevel@tonic-gate freemem_left +=
219373347c69Smb delthr_get_freemem(mhp);
21947c478bd9Sstevel@tonic-gate }
21957c478bd9Sstevel@tonic-gate
21967c478bd9Sstevel@tonic-gate /*
21977c478bd9Sstevel@tonic-gate * Do not proceed if mh_cancel is set.
21987c478bd9Sstevel@tonic-gate */
21997c478bd9Sstevel@tonic-gate if (mhp->mh_cancel) {
22007c478bd9Sstevel@tonic-gate while (pp_targ != NULL) {
22017c478bd9Sstevel@tonic-gate /*
22027c478bd9Sstevel@tonic-gate * Unlink and unlock each page.
22037c478bd9Sstevel@tonic-gate */
22047c478bd9Sstevel@tonic-gate tpp_targ = pp_targ;
22057c478bd9Sstevel@tonic-gate page_sub(&pp_targ, tpp_targ);
22067c478bd9Sstevel@tonic-gate page_unlock(tpp_targ);
22077c478bd9Sstevel@tonic-gate }
22087c478bd9Sstevel@tonic-gate /*
22097c478bd9Sstevel@tonic-gate * We need to give the pp pages back.
22107c478bd9Sstevel@tonic-gate * page_free(pp, 1) without the
22117c478bd9Sstevel@tonic-gate * freemem accounting.
22127c478bd9Sstevel@tonic-gate */
22137c478bd9Sstevel@tonic-gate page_free_replacement_page(pp);
22147c478bd9Sstevel@tonic-gate break;
22157c478bd9Sstevel@tonic-gate }
22167c478bd9Sstevel@tonic-gate
22177c478bd9Sstevel@tonic-gate /* Now remove pgcnt from freemem_left */
22187c478bd9Sstevel@tonic-gate freemem_left -= pgcnt;
22197c478bd9Sstevel@tonic-gate ASSERT(freemem_left >= 0);
22207c478bd9Sstevel@tonic-gate szc = pp->p_szc;
22217c478bd9Sstevel@tonic-gate while (pp != NULL) {
22227c478bd9Sstevel@tonic-gate /*
22237c478bd9Sstevel@tonic-gate * pp and pp_targ were passed back as
22247c478bd9Sstevel@tonic-gate * a linked list of pages.
22257c478bd9Sstevel@tonic-gate * Unlink and unlock each page.
22267c478bd9Sstevel@tonic-gate */
22277c478bd9Sstevel@tonic-gate tpp_targ = pp_targ;
22287c478bd9Sstevel@tonic-gate page_sub(&pp_targ, tpp_targ);
22297c478bd9Sstevel@tonic-gate page_unlock(tpp_targ);
22307c478bd9Sstevel@tonic-gate /*
22317c478bd9Sstevel@tonic-gate * The original page is now free
22327c478bd9Sstevel@tonic-gate * so remove it from the linked
22337c478bd9Sstevel@tonic-gate * list and collect it.
22347c478bd9Sstevel@tonic-gate */
22357c478bd9Sstevel@tonic-gate tpp = pp;
22367c478bd9Sstevel@tonic-gate page_sub(&pp, tpp);
22377c478bd9Sstevel@tonic-gate pfn = page_pptonum(tpp);
22387c478bd9Sstevel@tonic-gate collected++;
22397c478bd9Sstevel@tonic-gate ASSERT(PAGE_EXCL(tpp));
22407c478bd9Sstevel@tonic-gate ASSERT(tpp->p_vnode == NULL);
22417c478bd9Sstevel@tonic-gate ASSERT(!hat_page_is_mapped(tpp));
22427c478bd9Sstevel@tonic-gate ASSERT(tpp->p_szc == szc);
22437c478bd9Sstevel@tonic-gate tpp->p_szc = 0;
22447c478bd9Sstevel@tonic-gate page_delete_collect(tpp, mhp);
22457c478bd9Sstevel@tonic-gate bit = pfn - mdsp->mds_base;
22467c478bd9Sstevel@tonic-gate mdsp->mds_bitmap[bit / NBPBMW] |=
224773347c69Smb (1 << (bit % NBPBMW));
22487c478bd9Sstevel@tonic-gate }
22497c478bd9Sstevel@tonic-gate ASSERT(pp_targ == NULL);
22507c478bd9Sstevel@tonic-gate }
22517c478bd9Sstevel@tonic-gate }
22527c478bd9Sstevel@tonic-gate first_scan = 0;
22537c478bd9Sstevel@tonic-gate if ((mhp->mh_cancel == 0) && (mhp->mh_hold_todo != 0) &&
225473347c69Smb (collected == 0)) {
22557c478bd9Sstevel@tonic-gate /*
22567c478bd9Sstevel@tonic-gate * This code is needed as we cannot wait
22577c478bd9Sstevel@tonic-gate * for a page to be locked OR the delete to
22587c478bd9Sstevel@tonic-gate * be cancelled. Also, we must delay so
22597c478bd9Sstevel@tonic-gate * that other threads get a chance to run
22607c478bd9Sstevel@tonic-gate * on our cpu, otherwise page locks may be
22617c478bd9Sstevel@tonic-gate * held indefinitely by those threads.
22627c478bd9Sstevel@tonic-gate */
22637c478bd9Sstevel@tonic-gate MDSTAT_INCR(mhp, ndelay);
22647c478bd9Sstevel@tonic-gate CALLB_CPR_SAFE_BEGIN(&cprinfo);
2265d3d50737SRafael Vanoni (void) cv_reltimedwait(&mhp->mh_cv, &mhp->mh_mutex,
2266d3d50737SRafael Vanoni DEL_BUSY_WAIT_TICKS, TR_CLOCK_TICK);
22677c478bd9Sstevel@tonic-gate CALLB_CPR_SAFE_END(&cprinfo, &mhp->mh_mutex);
22687c478bd9Sstevel@tonic-gate }
22697c478bd9Sstevel@tonic-gate }
22707c478bd9Sstevel@tonic-gate /* stop the dr aio cleanup thread */
22717c478bd9Sstevel@tonic-gate mhp->mh_dr_aio_cleanup_cancel = 1;
22727c478bd9Sstevel@tonic-gate transit_list_collect(mhp, 0);
22737c478bd9Sstevel@tonic-gate if (freemem_left != 0) {
22747c478bd9Sstevel@tonic-gate /* Return any surplus. */
22757c478bd9Sstevel@tonic-gate page_create_putback(freemem_left);
22767c478bd9Sstevel@tonic-gate freemem_left = 0;
22777c478bd9Sstevel@tonic-gate }
22787c478bd9Sstevel@tonic-gate #ifdef MEM_DEL_STATS
22797c478bd9Sstevel@tonic-gate ntick_total = (uint64_t)ddi_get_lbolt() - start_total;
22807c478bd9Sstevel@tonic-gate #endif /* MEM_DEL_STATS */
22817c478bd9Sstevel@tonic-gate MDSTAT_TOTAL(mhp, ntick_total);
22827c478bd9Sstevel@tonic-gate MDSTAT_PRINT(mhp);
22837c478bd9Sstevel@tonic-gate
22847c478bd9Sstevel@tonic-gate /*
22857c478bd9Sstevel@tonic-gate * If the memory delete was cancelled, exclusive-wanted bits must
2286db874c57Selowe * be cleared. If there are retired pages being deleted, they need
2287db874c57Selowe * to be unretired.
22887c478bd9Sstevel@tonic-gate */
22897c478bd9Sstevel@tonic-gate for (mdsp = mhp->mh_transit.trl_spans; mdsp != NULL;
22907c478bd9Sstevel@tonic-gate mdsp = mdsp->mds_next) {
22917c478bd9Sstevel@tonic-gate pfn_t pfn, p_end;
22927c478bd9Sstevel@tonic-gate
22937c478bd9Sstevel@tonic-gate p_end = mdsp->mds_base + mdsp->mds_npgs;
22947c478bd9Sstevel@tonic-gate for (pfn = mdsp->mds_base; pfn < p_end; pfn++) {
22957c478bd9Sstevel@tonic-gate page_t *pp;
22967c478bd9Sstevel@tonic-gate pgcnt_t bit;
22977c478bd9Sstevel@tonic-gate
22987c478bd9Sstevel@tonic-gate bit = pfn - mdsp->mds_base;
22997c478bd9Sstevel@tonic-gate if (mhp->mh_cancel) {
23007c478bd9Sstevel@tonic-gate pp = page_numtopp_nolock(pfn);
23017c478bd9Sstevel@tonic-gate if (pp != NULL) {
23027c478bd9Sstevel@tonic-gate if ((mdsp->mds_bitmap[bit / NBPBMW] &
23037c478bd9Sstevel@tonic-gate (1 << (bit % NBPBMW))) == 0) {
23047c478bd9Sstevel@tonic-gate page_lock_clr_exclwanted(pp);
23057c478bd9Sstevel@tonic-gate }
23067c478bd9Sstevel@tonic-gate }
23077c478bd9Sstevel@tonic-gate } else {
23087c478bd9Sstevel@tonic-gate pp = NULL;
23097c478bd9Sstevel@tonic-gate }
23107c478bd9Sstevel@tonic-gate if ((mdsp->mds_bitmap_retired[bit / NBPBMW] &
23117c478bd9Sstevel@tonic-gate (1 << (bit % NBPBMW))) != 0) {
23127c478bd9Sstevel@tonic-gate /* do we already have pp? */
23137c478bd9Sstevel@tonic-gate if (pp == NULL) {
23147c478bd9Sstevel@tonic-gate pp = page_numtopp_nolock(pfn);
23157c478bd9Sstevel@tonic-gate }
23167c478bd9Sstevel@tonic-gate ASSERT(pp != NULL);
2317db874c57Selowe ASSERT(PP_RETIRED(pp));
23187c478bd9Sstevel@tonic-gate if (mhp->mh_cancel != 0) {
2319db874c57Selowe page_unlock(pp);
23207c478bd9Sstevel@tonic-gate /*
23217c478bd9Sstevel@tonic-gate * To satisfy ASSERT below in
23227c478bd9Sstevel@tonic-gate * cancel code.
23237c478bd9Sstevel@tonic-gate */
23247c478bd9Sstevel@tonic-gate mhp->mh_hold_todo++;
23257c478bd9Sstevel@tonic-gate } else {
23268b464eb8Smec (void) page_unretire_pp(pp,
23278b464eb8Smec PR_UNR_CLEAN);
23287c478bd9Sstevel@tonic-gate }
23297c478bd9Sstevel@tonic-gate }
23307c478bd9Sstevel@tonic-gate }
23317c478bd9Sstevel@tonic-gate }
23327c478bd9Sstevel@tonic-gate /*
23337c478bd9Sstevel@tonic-gate * Free retired page bitmap and collected page bitmap
23347c478bd9Sstevel@tonic-gate */
23357c478bd9Sstevel@tonic-gate for (mdsp = mhp->mh_transit.trl_spans; mdsp != NULL;
23367c478bd9Sstevel@tonic-gate mdsp = mdsp->mds_next) {
23377c478bd9Sstevel@tonic-gate ASSERT(mdsp->mds_bitmap_retired != NULL);
23387c478bd9Sstevel@tonic-gate kmem_free(mdsp->mds_bitmap_retired, MDS_BITMAPBYTES(mdsp));
23397c478bd9Sstevel@tonic-gate mdsp->mds_bitmap_retired = NULL; /* Paranoia. */
23407c478bd9Sstevel@tonic-gate ASSERT(mdsp->mds_bitmap != NULL);
23417c478bd9Sstevel@tonic-gate kmem_free(mdsp->mds_bitmap, MDS_BITMAPBYTES(mdsp));
23427c478bd9Sstevel@tonic-gate mdsp->mds_bitmap = NULL; /* Paranoia. */
23437c478bd9Sstevel@tonic-gate }
23447c478bd9Sstevel@tonic-gate
23457c478bd9Sstevel@tonic-gate /* wait for our dr aio cancel thread to exit */
23467c478bd9Sstevel@tonic-gate while (!(mhp->mh_aio_cleanup_done)) {
23477c478bd9Sstevel@tonic-gate CALLB_CPR_SAFE_BEGIN(&cprinfo);
23487c478bd9Sstevel@tonic-gate delay(drv_usectohz(DR_AIO_CLEANUP_DELAY));
23497c478bd9Sstevel@tonic-gate CALLB_CPR_SAFE_END(&cprinfo, &mhp->mh_mutex);
23507c478bd9Sstevel@tonic-gate }
23517c478bd9Sstevel@tonic-gate refused:
23527c478bd9Sstevel@tonic-gate if (mhp->mh_cancel != 0) {
23537c478bd9Sstevel@tonic-gate page_t *pp;
23547c478bd9Sstevel@tonic-gate
23557c478bd9Sstevel@tonic-gate comp_code = mhp->mh_cancel;
23567c478bd9Sstevel@tonic-gate /*
23577c478bd9Sstevel@tonic-gate * Go through list of deleted pages (mh_deleted) freeing
23587c478bd9Sstevel@tonic-gate * them.
23597c478bd9Sstevel@tonic-gate */
23607c478bd9Sstevel@tonic-gate while ((pp = mhp->mh_deleted) != NULL) {
23617c478bd9Sstevel@tonic-gate mhp->mh_deleted = pp->p_next;
23627c478bd9Sstevel@tonic-gate mhp->mh_hold_todo++;
23637c478bd9Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex);
23647c478bd9Sstevel@tonic-gate /* Restore p_next. */
23657c478bd9Sstevel@tonic-gate pp->p_next = pp->p_prev;
23667c478bd9Sstevel@tonic-gate if (PP_ISFREE(pp)) {
23677c478bd9Sstevel@tonic-gate cmn_err(CE_PANIC,
23687c478bd9Sstevel@tonic-gate "page %p is free",
23697c478bd9Sstevel@tonic-gate (void *)pp);
23707c478bd9Sstevel@tonic-gate }
23717c478bd9Sstevel@tonic-gate page_free(pp, 1);
23727c478bd9Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex);
23737c478bd9Sstevel@tonic-gate }
23747c478bd9Sstevel@tonic-gate ASSERT(mhp->mh_hold_todo == mhp->mh_vm_pages);
23757c478bd9Sstevel@tonic-gate
23767c478bd9Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex);
23777c478bd9Sstevel@tonic-gate put_availrmem(mhp->mh_vm_pages);
23787c478bd9Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex);
23797c478bd9Sstevel@tonic-gate
23807c478bd9Sstevel@tonic-gate goto t_exit;
23817c478bd9Sstevel@tonic-gate }
23827c478bd9Sstevel@tonic-gate
23837c478bd9Sstevel@tonic-gate /*
23847c478bd9Sstevel@tonic-gate * All the pages are no longer in use and are exclusively locked.
23857c478bd9Sstevel@tonic-gate */
23867c478bd9Sstevel@tonic-gate
23877c478bd9Sstevel@tonic-gate mhp->mh_deleted = NULL;
23887c478bd9Sstevel@tonic-gate
23897c478bd9Sstevel@tonic-gate kphysm_del_cleanup(mhp);
23907c478bd9Sstevel@tonic-gate
239173347c69Smb /*
23929853d9e8SJason Beloro * mem_node_del_range needs to be after kphysm_del_cleanup so
239373347c69Smb * that the mem_node_config[] will remain intact for the cleanup.
239473347c69Smb */
239573347c69Smb for (mdsp = mhp->mh_transit.trl_spans; mdsp != NULL;
239673347c69Smb mdsp = mdsp->mds_next) {
23979853d9e8SJason Beloro mem_node_del_range(mdsp->mds_base,
23989853d9e8SJason Beloro mdsp->mds_base + mdsp->mds_npgs - 1);
239973347c69Smb }
2400af4c679fSSean McEnroe /* cleanup the page counters */
2401af4c679fSSean McEnroe page_ctrs_cleanup();
240273347c69Smb
24037c478bd9Sstevel@tonic-gate comp_code = KPHYSM_OK;
24047c478bd9Sstevel@tonic-gate
24057c478bd9Sstevel@tonic-gate t_exit:
24067c478bd9Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex);
24077c478bd9Sstevel@tonic-gate kphysm_setup_post_del(mhp->mh_vm_pages,
24087c478bd9Sstevel@tonic-gate (comp_code == KPHYSM_OK) ? 0 : 1);
24097c478bd9Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex);
24107c478bd9Sstevel@tonic-gate
24117c478bd9Sstevel@tonic-gate early_exit:
24127c478bd9Sstevel@tonic-gate /* mhp->mh_mutex exited by CALLB_CPR_EXIT() */
24137c478bd9Sstevel@tonic-gate mhp->mh_state = MHND_DONE;
24147c478bd9Sstevel@tonic-gate del_complete_funcp = mhp->mh_delete_complete;
24157c478bd9Sstevel@tonic-gate del_complete_arg = mhp->mh_delete_complete_arg;
24167c478bd9Sstevel@tonic-gate CALLB_CPR_EXIT(&cprinfo);
24177c478bd9Sstevel@tonic-gate (*del_complete_funcp)(del_complete_arg, comp_code);
24187c478bd9Sstevel@tonic-gate thread_exit();
24197c478bd9Sstevel@tonic-gate /*NOTREACHED*/
24207c478bd9Sstevel@tonic-gate }
24217c478bd9Sstevel@tonic-gate
24227c478bd9Sstevel@tonic-gate /*
24237c478bd9Sstevel@tonic-gate * Start the delete of the memory from the system.
24247c478bd9Sstevel@tonic-gate */
24257c478bd9Sstevel@tonic-gate int
kphysm_del_start(memhandle_t handle,void (* complete)(void *,int),void * complete_arg)24267c478bd9Sstevel@tonic-gate kphysm_del_start(
24277c478bd9Sstevel@tonic-gate memhandle_t handle,
24287c478bd9Sstevel@tonic-gate void (*complete)(void *, int),
24297c478bd9Sstevel@tonic-gate void *complete_arg)
24307c478bd9Sstevel@tonic-gate {
24317c478bd9Sstevel@tonic-gate struct mem_handle *mhp;
24327c478bd9Sstevel@tonic-gate
24337c478bd9Sstevel@tonic-gate mhp = kphysm_lookup_mem_handle(handle);
24347c478bd9Sstevel@tonic-gate if (mhp == NULL) {
24357c478bd9Sstevel@tonic-gate return (KPHYSM_EHANDLE);
24367c478bd9Sstevel@tonic-gate }
24377c478bd9Sstevel@tonic-gate switch (mhp->mh_state) {
24387c478bd9Sstevel@tonic-gate case MHND_FREE:
24397c478bd9Sstevel@tonic-gate ASSERT(mhp->mh_state != MHND_FREE);
24407c478bd9Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex);
24417c478bd9Sstevel@tonic-gate return (KPHYSM_EHANDLE);
24427c478bd9Sstevel@tonic-gate case MHND_INIT:
24437c478bd9Sstevel@tonic-gate break;
24447c478bd9Sstevel@tonic-gate case MHND_STARTING:
24457c478bd9Sstevel@tonic-gate case MHND_RUNNING:
24467c478bd9Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex);
24477c478bd9Sstevel@tonic-gate return (KPHYSM_ESEQUENCE);
24487c478bd9Sstevel@tonic-gate case MHND_DONE:
24497c478bd9Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex);
24507c478bd9Sstevel@tonic-gate return (KPHYSM_ESEQUENCE);
24517c478bd9Sstevel@tonic-gate case MHND_RELEASE:
24527c478bd9Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex);
24537c478bd9Sstevel@tonic-gate return (KPHYSM_ESEQUENCE);
24547c478bd9Sstevel@tonic-gate default:
24557c478bd9Sstevel@tonic-gate #ifdef DEBUG
24567c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, "kphysm_del_start(0x%p) state corrupt %d",
24577c478bd9Sstevel@tonic-gate (void *)mhp, mhp->mh_state);
24587c478bd9Sstevel@tonic-gate #endif /* DEBUG */
24597c478bd9Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex);
24607c478bd9Sstevel@tonic-gate return (KPHYSM_EHANDLE);
24617c478bd9Sstevel@tonic-gate }
24627c478bd9Sstevel@tonic-gate
24637c478bd9Sstevel@tonic-gate if (mhp->mh_transit.trl_spans == NULL) {
24647c478bd9Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex);
24657c478bd9Sstevel@tonic-gate return (KPHYSM_ENOWORK);
24667c478bd9Sstevel@tonic-gate }
24677c478bd9Sstevel@tonic-gate
24687c478bd9Sstevel@tonic-gate ASSERT(complete != NULL);
24697c478bd9Sstevel@tonic-gate mhp->mh_delete_complete = complete;
24707c478bd9Sstevel@tonic-gate mhp->mh_delete_complete_arg = complete_arg;
24717c478bd9Sstevel@tonic-gate mhp->mh_state = MHND_STARTING;
24727c478bd9Sstevel@tonic-gate /*
24737c478bd9Sstevel@tonic-gate * Release the mutex in case thread_create sleeps.
24747c478bd9Sstevel@tonic-gate */
24757c478bd9Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex);
24767c478bd9Sstevel@tonic-gate
24777c478bd9Sstevel@tonic-gate /*
24787c478bd9Sstevel@tonic-gate * The "obvious" process for this thread is pageout (proc_pageout)
24797c478bd9Sstevel@tonic-gate * but this gives the thread too much power over freemem
24807c478bd9Sstevel@tonic-gate * which results in freemem starvation.
24817c478bd9Sstevel@tonic-gate */
24827c478bd9Sstevel@tonic-gate (void) thread_create(NULL, 0, delete_memory_thread, mhp, 0, &p0,
24837c478bd9Sstevel@tonic-gate TS_RUN, maxclsyspri - 1);
24847c478bd9Sstevel@tonic-gate
24857c478bd9Sstevel@tonic-gate return (KPHYSM_OK);
24867c478bd9Sstevel@tonic-gate }
24877c478bd9Sstevel@tonic-gate
24887c478bd9Sstevel@tonic-gate static kmutex_t pp_dummy_lock; /* Protects init. of pp_dummy. */
24897c478bd9Sstevel@tonic-gate static caddr_t pp_dummy;
24907c478bd9Sstevel@tonic-gate static pgcnt_t pp_dummy_npages;
24917c478bd9Sstevel@tonic-gate static pfn_t *pp_dummy_pfn; /* Array of dummy pfns. */
24927c478bd9Sstevel@tonic-gate
24937c478bd9Sstevel@tonic-gate static void
memseg_remap_init_pages(page_t * pages,page_t * epages)24947c478bd9Sstevel@tonic-gate memseg_remap_init_pages(page_t *pages, page_t *epages)
24957c478bd9Sstevel@tonic-gate {
24967c478bd9Sstevel@tonic-gate page_t *pp;
24977c478bd9Sstevel@tonic-gate
24987c478bd9Sstevel@tonic-gate for (pp = pages; pp < epages; pp++) {
24997c478bd9Sstevel@tonic-gate pp->p_pagenum = PFN_INVALID; /* XXXX */
25007c478bd9Sstevel@tonic-gate pp->p_offset = (u_offset_t)-1;
25017c478bd9Sstevel@tonic-gate page_iolock_init(pp);
25027c478bd9Sstevel@tonic-gate while (!page_lock(pp, SE_EXCL, (kmutex_t *)NULL, P_RECLAIM))
25037c478bd9Sstevel@tonic-gate continue;
25047c478bd9Sstevel@tonic-gate page_lock_delete(pp);
25057c478bd9Sstevel@tonic-gate }
25067c478bd9Sstevel@tonic-gate }
25077c478bd9Sstevel@tonic-gate
25087c478bd9Sstevel@tonic-gate void
memseg_remap_init()25097c478bd9Sstevel@tonic-gate memseg_remap_init()
25107c478bd9Sstevel@tonic-gate {
25117c478bd9Sstevel@tonic-gate mutex_enter(&pp_dummy_lock);
25127c478bd9Sstevel@tonic-gate if (pp_dummy == NULL) {
25137c478bd9Sstevel@tonic-gate uint_t dpages;
25147c478bd9Sstevel@tonic-gate int i;
25157c478bd9Sstevel@tonic-gate
25167c478bd9Sstevel@tonic-gate /*
25177c478bd9Sstevel@tonic-gate * dpages starts off as the size of the structure and
25187c478bd9Sstevel@tonic-gate * ends up as the minimum number of pages that will
25197c478bd9Sstevel@tonic-gate * hold a whole number of page_t structures.
25207c478bd9Sstevel@tonic-gate */
25217c478bd9Sstevel@tonic-gate dpages = sizeof (page_t);
25227c478bd9Sstevel@tonic-gate ASSERT(dpages != 0);
25237c478bd9Sstevel@tonic-gate ASSERT(dpages <= MMU_PAGESIZE);
25247c478bd9Sstevel@tonic-gate
25257c478bd9Sstevel@tonic-gate while ((dpages & 1) == 0)
25267c478bd9Sstevel@tonic-gate dpages >>= 1;
25277c478bd9Sstevel@tonic-gate
25287c478bd9Sstevel@tonic-gate pp_dummy_npages = dpages;
25297c478bd9Sstevel@tonic-gate /*
25307c478bd9Sstevel@tonic-gate * Allocate pp_dummy pages directly from static_arena,
25317c478bd9Sstevel@tonic-gate * since these are whole page allocations and are
25327c478bd9Sstevel@tonic-gate * referenced by physical address. This also has the
25337c478bd9Sstevel@tonic-gate * nice fringe benefit of hiding the memory from
25347c478bd9Sstevel@tonic-gate * ::findleaks since it doesn't deal well with allocated
25357c478bd9Sstevel@tonic-gate * kernel heap memory that doesn't have any mappings.
25367c478bd9Sstevel@tonic-gate */
25377c478bd9Sstevel@tonic-gate pp_dummy = vmem_xalloc(static_arena, ptob(pp_dummy_npages),
25387c478bd9Sstevel@tonic-gate PAGESIZE, 0, 0, NULL, NULL, VM_SLEEP);
25397c478bd9Sstevel@tonic-gate bzero(pp_dummy, ptob(pp_dummy_npages));
25407c478bd9Sstevel@tonic-gate ASSERT(((uintptr_t)pp_dummy & MMU_PAGEOFFSET) == 0);
25417c478bd9Sstevel@tonic-gate pp_dummy_pfn = kmem_alloc(sizeof (*pp_dummy_pfn) *
25427c478bd9Sstevel@tonic-gate pp_dummy_npages, KM_SLEEP);
25437c478bd9Sstevel@tonic-gate for (i = 0; i < pp_dummy_npages; i++) {
25447c478bd9Sstevel@tonic-gate pp_dummy_pfn[i] = hat_getpfnum(kas.a_hat,
25457c478bd9Sstevel@tonic-gate &pp_dummy[MMU_PAGESIZE * i]);
25467c478bd9Sstevel@tonic-gate ASSERT(pp_dummy_pfn[i] != PFN_INVALID);
25477c478bd9Sstevel@tonic-gate }
25487c478bd9Sstevel@tonic-gate /*
25497c478bd9Sstevel@tonic-gate * Initialize the page_t's to a known 'deleted' state
25507c478bd9Sstevel@tonic-gate * that matches the state of deleted pages.
25517c478bd9Sstevel@tonic-gate */
25527c478bd9Sstevel@tonic-gate memseg_remap_init_pages((page_t *)pp_dummy,
255373347c69Smb (page_t *)(pp_dummy + ptob(pp_dummy_npages)));
25547c478bd9Sstevel@tonic-gate /* Remove kmem mappings for the pages for safety. */
25557c478bd9Sstevel@tonic-gate hat_unload(kas.a_hat, pp_dummy, ptob(pp_dummy_npages),
25567c478bd9Sstevel@tonic-gate HAT_UNLOAD_UNLOCK);
25577c478bd9Sstevel@tonic-gate /* Leave pp_dummy pointer set as flag that init is done. */
25587c478bd9Sstevel@tonic-gate }
25597c478bd9Sstevel@tonic-gate mutex_exit(&pp_dummy_lock);
25607c478bd9Sstevel@tonic-gate }
25617c478bd9Sstevel@tonic-gate
25629853d9e8SJason Beloro /*
25639853d9e8SJason Beloro * Remap a page-aglined range of page_t's to dummy pages.
25649853d9e8SJason Beloro */
25659853d9e8SJason Beloro void
remap_to_dummy(caddr_t va,pgcnt_t metapgs)25669853d9e8SJason Beloro remap_to_dummy(caddr_t va, pgcnt_t metapgs)
25677c478bd9Sstevel@tonic-gate {
25689853d9e8SJason Beloro int phase;
25699853d9e8SJason Beloro
2570a3114836SGerry Liu ASSERT(IS_P2ALIGNED((uint64_t)(uintptr_t)va, PAGESIZE));
25719853d9e8SJason Beloro
25729853d9e8SJason Beloro /*
25739853d9e8SJason Beloro * We may start remapping at a non-zero page offset
25749853d9e8SJason Beloro * within the dummy pages since the low/high ends
25759853d9e8SJason Beloro * of the outgoing pp's could be shared by other
25769853d9e8SJason Beloro * memsegs (see memseg_remap_meta).
25779853d9e8SJason Beloro */
2578a3114836SGerry Liu phase = btop((uint64_t)(uintptr_t)va) % pp_dummy_npages;
2579a3114836SGerry Liu /*CONSTCOND*/
25809853d9e8SJason Beloro ASSERT(PAGESIZE % sizeof (page_t) || phase == 0);
25817c478bd9Sstevel@tonic-gate
25827c478bd9Sstevel@tonic-gate while (metapgs != 0) {
25837c478bd9Sstevel@tonic-gate pgcnt_t n;
25849853d9e8SJason Beloro int i, j;
25857c478bd9Sstevel@tonic-gate
25867c478bd9Sstevel@tonic-gate n = pp_dummy_npages;
25877c478bd9Sstevel@tonic-gate if (n > metapgs)
25887c478bd9Sstevel@tonic-gate n = metapgs;
25897c478bd9Sstevel@tonic-gate for (i = 0; i < n; i++) {
25909853d9e8SJason Beloro j = (i + phase) % pp_dummy_npages;
25919853d9e8SJason Beloro hat_devload(kas.a_hat, va, ptob(1), pp_dummy_pfn[j],
25927c478bd9Sstevel@tonic-gate PROT_READ,
25937c478bd9Sstevel@tonic-gate HAT_LOAD | HAT_LOAD_NOCONSIST |
25947c478bd9Sstevel@tonic-gate HAT_LOAD_REMAP);
25959853d9e8SJason Beloro va += ptob(1);
25967c478bd9Sstevel@tonic-gate }
25977c478bd9Sstevel@tonic-gate metapgs -= n;
25987c478bd9Sstevel@tonic-gate }
25997c478bd9Sstevel@tonic-gate }
26007c478bd9Sstevel@tonic-gate
26019853d9e8SJason Beloro static void
memseg_remap_to_dummy(struct memseg * seg)26029853d9e8SJason Beloro memseg_remap_to_dummy(struct memseg *seg)
26039853d9e8SJason Beloro {
26049853d9e8SJason Beloro caddr_t pp;
26059853d9e8SJason Beloro pgcnt_t metapgs;
26069853d9e8SJason Beloro
26079853d9e8SJason Beloro ASSERT(memseg_is_dynamic(seg));
26089853d9e8SJason Beloro ASSERT(pp_dummy != NULL);
26099853d9e8SJason Beloro
26109853d9e8SJason Beloro
26119853d9e8SJason Beloro if (!memseg_includes_meta(seg)) {
26129853d9e8SJason Beloro memseg_remap_meta(seg);
26139853d9e8SJason Beloro return;
26149853d9e8SJason Beloro }
26159853d9e8SJason Beloro
26169853d9e8SJason Beloro pp = (caddr_t)seg->pages;
26179853d9e8SJason Beloro metapgs = seg->pages_base - memseg_get_start(seg);
26189853d9e8SJason Beloro ASSERT(metapgs != 0);
26199853d9e8SJason Beloro
26209853d9e8SJason Beloro seg->pages_end = seg->pages_base;
26219853d9e8SJason Beloro
26229853d9e8SJason Beloro remap_to_dummy(pp, metapgs);
26239853d9e8SJason Beloro }
26249853d9e8SJason Beloro
26257c478bd9Sstevel@tonic-gate /*
26267c478bd9Sstevel@tonic-gate * Transition all the deleted pages to the deleted state so that
26277c478bd9Sstevel@tonic-gate * page_lock will not wait. The page_lock_delete call will
26287c478bd9Sstevel@tonic-gate * also wake up any waiters.
26297c478bd9Sstevel@tonic-gate */
26307c478bd9Sstevel@tonic-gate static void
memseg_lock_delete_all(struct memseg * seg)26317c478bd9Sstevel@tonic-gate memseg_lock_delete_all(struct memseg *seg)
26327c478bd9Sstevel@tonic-gate {
26337c478bd9Sstevel@tonic-gate page_t *pp;
26347c478bd9Sstevel@tonic-gate
26357c478bd9Sstevel@tonic-gate for (pp = seg->pages; pp < seg->epages; pp++) {
26367c478bd9Sstevel@tonic-gate pp->p_pagenum = PFN_INVALID; /* XXXX */
26377c478bd9Sstevel@tonic-gate page_lock_delete(pp);
26387c478bd9Sstevel@tonic-gate }
26397c478bd9Sstevel@tonic-gate }
26407c478bd9Sstevel@tonic-gate
26417c478bd9Sstevel@tonic-gate static void
kphysm_del_cleanup(struct mem_handle * mhp)26427c478bd9Sstevel@tonic-gate kphysm_del_cleanup(struct mem_handle *mhp)
26437c478bd9Sstevel@tonic-gate {
26447c478bd9Sstevel@tonic-gate struct memdelspan *mdsp;
26457c478bd9Sstevel@tonic-gate struct memseg *seg;
2646584b574aSToomas Soome struct memseg **segpp;
26477c478bd9Sstevel@tonic-gate struct memseg *seglist;
26487c478bd9Sstevel@tonic-gate pfn_t p_end;
26497c478bd9Sstevel@tonic-gate uint64_t avmem;
26507c478bd9Sstevel@tonic-gate pgcnt_t avpgs;
26517c478bd9Sstevel@tonic-gate pgcnt_t npgs;
26527c478bd9Sstevel@tonic-gate
26537c478bd9Sstevel@tonic-gate avpgs = mhp->mh_vm_pages;
26547c478bd9Sstevel@tonic-gate
26557c478bd9Sstevel@tonic-gate memsegs_lock(1);
26567c478bd9Sstevel@tonic-gate
26577c478bd9Sstevel@tonic-gate /*
26587c478bd9Sstevel@tonic-gate * remove from main segment list.
26597c478bd9Sstevel@tonic-gate */
26607c478bd9Sstevel@tonic-gate npgs = 0;
26617c478bd9Sstevel@tonic-gate seglist = NULL;
26627c478bd9Sstevel@tonic-gate for (mdsp = mhp->mh_transit.trl_spans; mdsp != NULL;
26637c478bd9Sstevel@tonic-gate mdsp = mdsp->mds_next) {
26647c478bd9Sstevel@tonic-gate p_end = mdsp->mds_base + mdsp->mds_npgs;
26657c478bd9Sstevel@tonic-gate for (segpp = &memsegs; (seg = *segpp) != NULL; ) {
26667c478bd9Sstevel@tonic-gate if (seg->pages_base >= p_end ||
26677c478bd9Sstevel@tonic-gate seg->pages_end <= mdsp->mds_base) {
26687c478bd9Sstevel@tonic-gate /* Span and memseg don't overlap. */
26697c478bd9Sstevel@tonic-gate segpp = &((*segpp)->next);
26707c478bd9Sstevel@tonic-gate continue;
26717c478bd9Sstevel@tonic-gate }
26727c478bd9Sstevel@tonic-gate ASSERT(seg->pages_base >= mdsp->mds_base);
26737c478bd9Sstevel@tonic-gate ASSERT(seg->pages_end <= p_end);
26747c478bd9Sstevel@tonic-gate
2675e21bae1bSkchow PLCNT_MODIFY_MAX(seg->pages_base,
2676e21bae1bSkchow seg->pages_base - seg->pages_end);
2677e21bae1bSkchow
26787c478bd9Sstevel@tonic-gate /* Hide the memseg from future scans. */
26797c478bd9Sstevel@tonic-gate hat_kpm_delmem_mseg_update(seg, segpp);
26807c478bd9Sstevel@tonic-gate *segpp = seg->next;
26817c478bd9Sstevel@tonic-gate membar_producer(); /* TODO: Needed? */
26827c478bd9Sstevel@tonic-gate npgs += MSEG_NPAGES(seg);
26837c478bd9Sstevel@tonic-gate
26847c478bd9Sstevel@tonic-gate /*
26857c478bd9Sstevel@tonic-gate * Leave the deleted segment's next pointer intact
26867c478bd9Sstevel@tonic-gate * in case a memsegs scanning loop is walking this
26877c478bd9Sstevel@tonic-gate * segment concurrently.
26887c478bd9Sstevel@tonic-gate */
26897c478bd9Sstevel@tonic-gate seg->lnext = seglist;
26907c478bd9Sstevel@tonic-gate seglist = seg;
26917c478bd9Sstevel@tonic-gate }
26927c478bd9Sstevel@tonic-gate }
26937c478bd9Sstevel@tonic-gate
26947c478bd9Sstevel@tonic-gate build_pfn_hash();
26957c478bd9Sstevel@tonic-gate
26967c478bd9Sstevel@tonic-gate ASSERT(npgs < total_pages);
26977c478bd9Sstevel@tonic-gate total_pages -= npgs;
26987c478bd9Sstevel@tonic-gate
26997c478bd9Sstevel@tonic-gate /*
27007c478bd9Sstevel@tonic-gate * Recalculate the paging parameters now total_pages has changed.
27017c478bd9Sstevel@tonic-gate * This will also cause the clock hands to be reset before next use.
27027c478bd9Sstevel@tonic-gate */
27032d9166aeSJoshua M. Clulow setupclock();
27047c478bd9Sstevel@tonic-gate
27057c478bd9Sstevel@tonic-gate memsegs_unlock(1);
27067c478bd9Sstevel@tonic-gate
27077c478bd9Sstevel@tonic-gate mutex_exit(&mhp->mh_mutex);
27087c478bd9Sstevel@tonic-gate
27097c478bd9Sstevel@tonic-gate while ((seg = seglist) != NULL) {
27107c478bd9Sstevel@tonic-gate pfn_t mseg_start;
27117c478bd9Sstevel@tonic-gate pfn_t mseg_base, mseg_end;
27127c478bd9Sstevel@tonic-gate pgcnt_t mseg_npgs;
27137c478bd9Sstevel@tonic-gate int mlret;
27147c478bd9Sstevel@tonic-gate
27157c478bd9Sstevel@tonic-gate seglist = seg->lnext;
27167c478bd9Sstevel@tonic-gate
27177c478bd9Sstevel@tonic-gate /*
27187c478bd9Sstevel@tonic-gate * Put the page_t's into the deleted state to stop
27197c478bd9Sstevel@tonic-gate * cv_wait()s on the pages. When we remap, the dummy
27207c478bd9Sstevel@tonic-gate * page_t's will be in the same state.
27217c478bd9Sstevel@tonic-gate */
27227c478bd9Sstevel@tonic-gate memseg_lock_delete_all(seg);
27237c478bd9Sstevel@tonic-gate /*
27247c478bd9Sstevel@tonic-gate * Collect up information based on pages_base and pages_end
27257c478bd9Sstevel@tonic-gate * early so that we can flag early that the memseg has been
27267c478bd9Sstevel@tonic-gate * deleted by setting pages_end == pages_base.
27277c478bd9Sstevel@tonic-gate */
27287c478bd9Sstevel@tonic-gate mseg_base = seg->pages_base;
27297c478bd9Sstevel@tonic-gate mseg_end = seg->pages_end;
27307c478bd9Sstevel@tonic-gate mseg_npgs = MSEG_NPAGES(seg);
27319853d9e8SJason Beloro mseg_start = memseg_get_start(seg);
27327c478bd9Sstevel@tonic-gate
27339853d9e8SJason Beloro if (memseg_is_dynamic(seg)) {
27347c478bd9Sstevel@tonic-gate /* Remap the meta data to our special dummy area. */
27359853d9e8SJason Beloro memseg_remap_to_dummy(seg);
27367c478bd9Sstevel@tonic-gate
27377c478bd9Sstevel@tonic-gate mutex_enter(&memseg_lists_lock);
27387c478bd9Sstevel@tonic-gate seg->lnext = memseg_va_avail;
27397c478bd9Sstevel@tonic-gate memseg_va_avail = seg;
27407c478bd9Sstevel@tonic-gate mutex_exit(&memseg_lists_lock);
27417c478bd9Sstevel@tonic-gate } else {
27427c478bd9Sstevel@tonic-gate /*
27437c478bd9Sstevel@tonic-gate * For memory whose page_ts were allocated
27447c478bd9Sstevel@tonic-gate * at boot, we need to find a new use for
27457c478bd9Sstevel@tonic-gate * the page_t memory.
27467c478bd9Sstevel@tonic-gate * For the moment, just leak it.
27477c478bd9Sstevel@tonic-gate * (It is held in the memseg_delete_junk list.)
27487c478bd9Sstevel@tonic-gate */
27499853d9e8SJason Beloro seg->pages_end = seg->pages_base;
27507c478bd9Sstevel@tonic-gate
27517c478bd9Sstevel@tonic-gate mutex_enter(&memseg_lists_lock);
27527c478bd9Sstevel@tonic-gate seg->lnext = memseg_delete_junk;
27537c478bd9Sstevel@tonic-gate memseg_delete_junk = seg;
27547c478bd9Sstevel@tonic-gate mutex_exit(&memseg_lists_lock);
27557c478bd9Sstevel@tonic-gate }
27567c478bd9Sstevel@tonic-gate
27577c478bd9Sstevel@tonic-gate /* Must not use seg now as it could be re-used. */
27587c478bd9Sstevel@tonic-gate
27597c478bd9Sstevel@tonic-gate memlist_write_lock();
27607c478bd9Sstevel@tonic-gate
27617c478bd9Sstevel@tonic-gate mlret = memlist_delete_span(
27627c478bd9Sstevel@tonic-gate (uint64_t)(mseg_base) << PAGESHIFT,
27637c478bd9Sstevel@tonic-gate (uint64_t)(mseg_npgs) << PAGESHIFT,
27647c478bd9Sstevel@tonic-gate &phys_avail);
27657c478bd9Sstevel@tonic-gate ASSERT(mlret == MEML_SPANOP_OK);
27667c478bd9Sstevel@tonic-gate
27677c478bd9Sstevel@tonic-gate mlret = memlist_delete_span(
27687c478bd9Sstevel@tonic-gate (uint64_t)(mseg_start) << PAGESHIFT,
27697c478bd9Sstevel@tonic-gate (uint64_t)(mseg_end - mseg_start) <<
27707c478bd9Sstevel@tonic-gate PAGESHIFT,
27717c478bd9Sstevel@tonic-gate &phys_install);
27727c478bd9Sstevel@tonic-gate ASSERT(mlret == MEML_SPANOP_OK);
27737c478bd9Sstevel@tonic-gate phys_install_has_changed();
27747c478bd9Sstevel@tonic-gate
27757c478bd9Sstevel@tonic-gate memlist_write_unlock();
27767c478bd9Sstevel@tonic-gate }
27777c478bd9Sstevel@tonic-gate
27787c478bd9Sstevel@tonic-gate memlist_read_lock();
27797c478bd9Sstevel@tonic-gate installed_top_size(phys_install, &physmax, &physinstalled);
27807c478bd9Sstevel@tonic-gate memlist_read_unlock();
27817c478bd9Sstevel@tonic-gate
27827c478bd9Sstevel@tonic-gate mutex_enter(&freemem_lock);
27837c478bd9Sstevel@tonic-gate maxmem -= avpgs;
27847c478bd9Sstevel@tonic-gate physmem -= avpgs;
27857c478bd9Sstevel@tonic-gate /* availrmem is adjusted during the delete. */
27867c478bd9Sstevel@tonic-gate availrmem_initial -= avpgs;
27877c478bd9Sstevel@tonic-gate
27887c478bd9Sstevel@tonic-gate mutex_exit(&freemem_lock);
27897c478bd9Sstevel@tonic-gate
27907c478bd9Sstevel@tonic-gate dump_resize();
27917c478bd9Sstevel@tonic-gate
27927c478bd9Sstevel@tonic-gate cmn_err(CE_CONT, "?kphysm_delete: mem = %ldK "
27937c478bd9Sstevel@tonic-gate "(0x%" PRIx64 ")\n",
27947c478bd9Sstevel@tonic-gate physinstalled << (PAGESHIFT - 10),
27957c478bd9Sstevel@tonic-gate (uint64_t)physinstalled << PAGESHIFT);
27967c478bd9Sstevel@tonic-gate
27977c478bd9Sstevel@tonic-gate avmem = (uint64_t)freemem << PAGESHIFT;
27987c478bd9Sstevel@tonic-gate cmn_err(CE_CONT, "?kphysm_delete: "
27997c478bd9Sstevel@tonic-gate "avail mem = %" PRId64 "\n", avmem);
28007c478bd9Sstevel@tonic-gate
28017c478bd9Sstevel@tonic-gate /*
28027c478bd9Sstevel@tonic-gate * Update lgroup generation number on single lgroup systems
28037c478bd9Sstevel@tonic-gate */
28047c478bd9Sstevel@tonic-gate if (nlgrps == 1)
28057c478bd9Sstevel@tonic-gate lgrp_config(LGRP_CONFIG_GEN_UPDATE, 0, 0);
28067c478bd9Sstevel@tonic-gate
28077c478bd9Sstevel@tonic-gate /* Successfully deleted system memory */
28087c478bd9Sstevel@tonic-gate mutex_enter(&mhp->mh_mutex);
28097c478bd9Sstevel@tonic-gate }
28107c478bd9Sstevel@tonic-gate
28117c478bd9Sstevel@tonic-gate static uint_t mdel_nullvp_waiter;
28127c478bd9Sstevel@tonic-gate
28137c478bd9Sstevel@tonic-gate static void
page_delete_collect(page_t * pp,struct mem_handle * mhp)28147c478bd9Sstevel@tonic-gate page_delete_collect(
28157c478bd9Sstevel@tonic-gate page_t *pp,
28167c478bd9Sstevel@tonic-gate struct mem_handle *mhp)
28177c478bd9Sstevel@tonic-gate {
28187c478bd9Sstevel@tonic-gate if (pp->p_vnode) {
28197c478bd9Sstevel@tonic-gate page_hashout(pp, (kmutex_t *)NULL);
28207c478bd9Sstevel@tonic-gate /* do not do PP_SETAGED(pp); */
28217c478bd9Sstevel@tonic-gate } else {
28227c478bd9Sstevel@tonic-gate kmutex_t *sep;
28237c478bd9Sstevel@tonic-gate
28247c478bd9Sstevel@tonic-gate sep = page_se_mutex(pp);
28257c478bd9Sstevel@tonic-gate mutex_enter(sep);
28267c478bd9Sstevel@tonic-gate if (CV_HAS_WAITERS(&pp->p_cv)) {
28277c478bd9Sstevel@tonic-gate mdel_nullvp_waiter++;
28287c478bd9Sstevel@tonic-gate cv_broadcast(&pp->p_cv);
28297c478bd9Sstevel@tonic-gate }
28307c478bd9Sstevel@tonic-gate mutex_exit(sep);
28317c478bd9Sstevel@tonic-gate }
28327c478bd9Sstevel@tonic-gate ASSERT(pp->p_next == pp->p_prev);
28337c478bd9Sstevel@tonic-gate ASSERT(pp->p_next == NULL || pp->p_next == pp);
28347c478bd9Sstevel@tonic-gate pp->p_next = mhp->mh_deleted;
28357c478bd9Sstevel@tonic-gate mhp->mh_deleted = pp;
28367c478bd9Sstevel@tonic-gate ASSERT(mhp->mh_hold_todo != 0);
28377c478bd9Sstevel@tonic-gate mhp->mh_hold_todo--;
28387c478bd9Sstevel@tonic-gate }
28397c478bd9Sstevel@tonic-gate
28407c478bd9Sstevel@tonic-gate static void
transit_list_collect(struct mem_handle * mhp,int v)28417c478bd9Sstevel@tonic-gate transit_list_collect(struct mem_handle *mhp, int v)
28427c478bd9Sstevel@tonic-gate {
28437c478bd9Sstevel@tonic-gate struct transit_list_head *trh;
28447c478bd9Sstevel@tonic-gate
28457c478bd9Sstevel@tonic-gate trh = &transit_list_head;
28467c478bd9Sstevel@tonic-gate mutex_enter(&trh->trh_lock);
28477c478bd9Sstevel@tonic-gate mhp->mh_transit.trl_collect = v;
28487c478bd9Sstevel@tonic-gate mutex_exit(&trh->trh_lock);
28497c478bd9Sstevel@tonic-gate }
28507c478bd9Sstevel@tonic-gate
28517c478bd9Sstevel@tonic-gate static void
transit_list_insert(struct transit_list * tlp)28527c478bd9Sstevel@tonic-gate transit_list_insert(struct transit_list *tlp)
28537c478bd9Sstevel@tonic-gate {
28547c478bd9Sstevel@tonic-gate struct transit_list_head *trh;
28557c478bd9Sstevel@tonic-gate
28567c478bd9Sstevel@tonic-gate trh = &transit_list_head;
28577c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&trh->trh_lock));
28587c478bd9Sstevel@tonic-gate tlp->trl_next = trh->trh_head;
28597c478bd9Sstevel@tonic-gate trh->trh_head = tlp;
28607c478bd9Sstevel@tonic-gate }
28617c478bd9Sstevel@tonic-gate
28627c478bd9Sstevel@tonic-gate static void
transit_list_remove(struct transit_list * tlp)28637c478bd9Sstevel@tonic-gate transit_list_remove(struct transit_list *tlp)
28647c478bd9Sstevel@tonic-gate {
28657c478bd9Sstevel@tonic-gate struct transit_list_head *trh;
28667c478bd9Sstevel@tonic-gate struct transit_list **tlpp;
28677c478bd9Sstevel@tonic-gate
28687c478bd9Sstevel@tonic-gate trh = &transit_list_head;
28697c478bd9Sstevel@tonic-gate tlpp = &trh->trh_head;
28707c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&trh->trh_lock));
28717c478bd9Sstevel@tonic-gate while (*tlpp != NULL && *tlpp != tlp)
28727c478bd9Sstevel@tonic-gate tlpp = &(*tlpp)->trl_next;
28737c478bd9Sstevel@tonic-gate ASSERT(*tlpp != NULL);
28747c478bd9Sstevel@tonic-gate if (*tlpp == tlp)
28757c478bd9Sstevel@tonic-gate *tlpp = tlp->trl_next;
28767c478bd9Sstevel@tonic-gate tlp->trl_next = NULL;
28777c478bd9Sstevel@tonic-gate }
28787c478bd9Sstevel@tonic-gate
28797c478bd9Sstevel@tonic-gate static struct transit_list *
pfnum_to_transit_list(struct transit_list_head * trh,pfn_t pfnum)28807c478bd9Sstevel@tonic-gate pfnum_to_transit_list(struct transit_list_head *trh, pfn_t pfnum)
28817c478bd9Sstevel@tonic-gate {
28827c478bd9Sstevel@tonic-gate struct transit_list *tlp;
28837c478bd9Sstevel@tonic-gate
28847c478bd9Sstevel@tonic-gate for (tlp = trh->trh_head; tlp != NULL; tlp = tlp->trl_next) {
28857c478bd9Sstevel@tonic-gate struct memdelspan *mdsp;
28867c478bd9Sstevel@tonic-gate
28877c478bd9Sstevel@tonic-gate for (mdsp = tlp->trl_spans; mdsp != NULL;
28887c478bd9Sstevel@tonic-gate mdsp = mdsp->mds_next) {
28897c478bd9Sstevel@tonic-gate if (pfnum >= mdsp->mds_base &&
28907c478bd9Sstevel@tonic-gate pfnum < (mdsp->mds_base + mdsp->mds_npgs)) {
28917c478bd9Sstevel@tonic-gate return (tlp);
28927c478bd9Sstevel@tonic-gate }
28937c478bd9Sstevel@tonic-gate }
28947c478bd9Sstevel@tonic-gate }
28957c478bd9Sstevel@tonic-gate return (NULL);
28967c478bd9Sstevel@tonic-gate }
28977c478bd9Sstevel@tonic-gate
28987c478bd9Sstevel@tonic-gate int
pfn_is_being_deleted(pfn_t pfnum)28997c478bd9Sstevel@tonic-gate pfn_is_being_deleted(pfn_t pfnum)
29007c478bd9Sstevel@tonic-gate {
29017c478bd9Sstevel@tonic-gate struct transit_list_head *trh;
29027c478bd9Sstevel@tonic-gate struct transit_list *tlp;
29037c478bd9Sstevel@tonic-gate int ret;
29047c478bd9Sstevel@tonic-gate
29057c478bd9Sstevel@tonic-gate trh = &transit_list_head;
29067c478bd9Sstevel@tonic-gate if (trh->trh_head == NULL)
29077c478bd9Sstevel@tonic-gate return (0);
29087c478bd9Sstevel@tonic-gate
29097c478bd9Sstevel@tonic-gate mutex_enter(&trh->trh_lock);
29107c478bd9Sstevel@tonic-gate tlp = pfnum_to_transit_list(trh, pfnum);
29117c478bd9Sstevel@tonic-gate ret = (tlp != NULL && tlp->trl_collect);
29127c478bd9Sstevel@tonic-gate mutex_exit(&trh->trh_lock);
29137c478bd9Sstevel@tonic-gate
29147c478bd9Sstevel@tonic-gate return (ret);
29157c478bd9Sstevel@tonic-gate }
29167c478bd9Sstevel@tonic-gate
29177c478bd9Sstevel@tonic-gate #ifdef MEM_DEL_STATS
29187c478bd9Sstevel@tonic-gate extern int hz;
29197c478bd9Sstevel@tonic-gate static void
mem_del_stat_print_func(struct mem_handle * mhp)29207c478bd9Sstevel@tonic-gate mem_del_stat_print_func(struct mem_handle *mhp)
29217c478bd9Sstevel@tonic-gate {
29227c478bd9Sstevel@tonic-gate uint64_t tmp;
29237c478bd9Sstevel@tonic-gate
29247c478bd9Sstevel@tonic-gate if (mem_del_stat_print) {
29257c478bd9Sstevel@tonic-gate printf("memory delete loop %x/%x, statistics%s\n",
29267c478bd9Sstevel@tonic-gate (uint_t)mhp->mh_transit.trl_spans->mds_base,
29277c478bd9Sstevel@tonic-gate (uint_t)mhp->mh_transit.trl_spans->mds_npgs,
29287c478bd9Sstevel@tonic-gate (mhp->mh_cancel ? " (cancelled)" : ""));
29297c478bd9Sstevel@tonic-gate printf("\t%8u nloop\n", mhp->mh_delstat.nloop);
29307c478bd9Sstevel@tonic-gate printf("\t%8u need_free\n", mhp->mh_delstat.need_free);
29317c478bd9Sstevel@tonic-gate printf("\t%8u free_loop\n", mhp->mh_delstat.free_loop);
29327c478bd9Sstevel@tonic-gate printf("\t%8u free_low\n", mhp->mh_delstat.free_low);
29337c478bd9Sstevel@tonic-gate printf("\t%8u free_failed\n", mhp->mh_delstat.free_failed);
29347c478bd9Sstevel@tonic-gate printf("\t%8u ncheck\n", mhp->mh_delstat.ncheck);
29357c478bd9Sstevel@tonic-gate printf("\t%8u nopaget\n", mhp->mh_delstat.nopaget);
29367c478bd9Sstevel@tonic-gate printf("\t%8u lockfail\n", mhp->mh_delstat.lockfail);
29377c478bd9Sstevel@tonic-gate printf("\t%8u nfree\n", mhp->mh_delstat.nfree);
29387c478bd9Sstevel@tonic-gate printf("\t%8u nreloc\n", mhp->mh_delstat.nreloc);
29397c478bd9Sstevel@tonic-gate printf("\t%8u nrelocfail\n", mhp->mh_delstat.nrelocfail);
29407c478bd9Sstevel@tonic-gate printf("\t%8u already_done\n", mhp->mh_delstat.already_done);
29417c478bd9Sstevel@tonic-gate printf("\t%8u first_notfree\n", mhp->mh_delstat.first_notfree);
29427c478bd9Sstevel@tonic-gate printf("\t%8u npplocked\n", mhp->mh_delstat.npplocked);
29437c478bd9Sstevel@tonic-gate printf("\t%8u nlockreloc\n", mhp->mh_delstat.nlockreloc);
29447c478bd9Sstevel@tonic-gate printf("\t%8u nnorepl\n", mhp->mh_delstat.nnorepl);
29457c478bd9Sstevel@tonic-gate printf("\t%8u nmodreloc\n", mhp->mh_delstat.nmodreloc);
29467c478bd9Sstevel@tonic-gate printf("\t%8u ndestroy\n", mhp->mh_delstat.ndestroy);
29477c478bd9Sstevel@tonic-gate printf("\t%8u nputpage\n", mhp->mh_delstat.nputpage);
29487c478bd9Sstevel@tonic-gate printf("\t%8u nnoreclaim\n", mhp->mh_delstat.nnoreclaim);
29497c478bd9Sstevel@tonic-gate printf("\t%8u ndelay\n", mhp->mh_delstat.ndelay);
29507c478bd9Sstevel@tonic-gate printf("\t%8u demotefail\n", mhp->mh_delstat.demotefail);
29517c478bd9Sstevel@tonic-gate printf("\t%8u retired\n", mhp->mh_delstat.retired);
29527c478bd9Sstevel@tonic-gate printf("\t%8u toxic\n", mhp->mh_delstat.toxic);
29537c478bd9Sstevel@tonic-gate printf("\t%8u failing\n", mhp->mh_delstat.failing);
29547c478bd9Sstevel@tonic-gate printf("\t%8u modtoxic\n", mhp->mh_delstat.modtoxic);
29557c478bd9Sstevel@tonic-gate printf("\t%8u npplkdtoxic\n", mhp->mh_delstat.npplkdtoxic);
29567c478bd9Sstevel@tonic-gate printf("\t%8u gptlmodfail\n", mhp->mh_delstat.gptlmodfail);
29577c478bd9Sstevel@tonic-gate printf("\t%8u gptllckfail\n", mhp->mh_delstat.gptllckfail);
29587c478bd9Sstevel@tonic-gate tmp = mhp->mh_delstat.nticks_total / hz; /* seconds */
29597c478bd9Sstevel@tonic-gate printf(
29607c478bd9Sstevel@tonic-gate "\t%"PRIu64" nticks_total - %"PRIu64" min %"PRIu64" sec\n",
29617c478bd9Sstevel@tonic-gate mhp->mh_delstat.nticks_total, tmp / 60, tmp % 60);
29627c478bd9Sstevel@tonic-gate
29637c478bd9Sstevel@tonic-gate tmp = mhp->mh_delstat.nticks_pgrp / hz; /* seconds */
29647c478bd9Sstevel@tonic-gate printf(
29657c478bd9Sstevel@tonic-gate "\t%"PRIu64" nticks_pgrp - %"PRIu64" min %"PRIu64" sec\n",
29667c478bd9Sstevel@tonic-gate mhp->mh_delstat.nticks_pgrp, tmp / 60, tmp % 60);
29677c478bd9Sstevel@tonic-gate }
29687c478bd9Sstevel@tonic-gate }
29697c478bd9Sstevel@tonic-gate #endif /* MEM_DEL_STATS */
29707c478bd9Sstevel@tonic-gate
29717c478bd9Sstevel@tonic-gate struct mem_callback {
29727c478bd9Sstevel@tonic-gate kphysm_setup_vector_t *vec;
29737c478bd9Sstevel@tonic-gate void *arg;
29747c478bd9Sstevel@tonic-gate };
29757c478bd9Sstevel@tonic-gate
29767c478bd9Sstevel@tonic-gate #define NMEMCALLBACKS 100
29777c478bd9Sstevel@tonic-gate
29787c478bd9Sstevel@tonic-gate static struct mem_callback mem_callbacks[NMEMCALLBACKS];
29797c478bd9Sstevel@tonic-gate static uint_t nmemcallbacks;
29807c478bd9Sstevel@tonic-gate static krwlock_t mem_callback_rwlock;
29817c478bd9Sstevel@tonic-gate
29827c478bd9Sstevel@tonic-gate int
kphysm_setup_func_register(kphysm_setup_vector_t * vec,void * arg)29837c478bd9Sstevel@tonic-gate kphysm_setup_func_register(kphysm_setup_vector_t *vec, void *arg)
29847c478bd9Sstevel@tonic-gate {
29857c478bd9Sstevel@tonic-gate uint_t i, found;
29867c478bd9Sstevel@tonic-gate
29877c478bd9Sstevel@tonic-gate /*
29887c478bd9Sstevel@tonic-gate * This test will become more complicated when the version must
29897c478bd9Sstevel@tonic-gate * change.
29907c478bd9Sstevel@tonic-gate */
29917c478bd9Sstevel@tonic-gate if (vec->version != KPHYSM_SETUP_VECTOR_VERSION)
29927c478bd9Sstevel@tonic-gate return (EINVAL);
29937c478bd9Sstevel@tonic-gate
29947c478bd9Sstevel@tonic-gate if (vec->post_add == NULL || vec->pre_del == NULL ||
29957c478bd9Sstevel@tonic-gate vec->post_del == NULL)
29967c478bd9Sstevel@tonic-gate return (EINVAL);
29977c478bd9Sstevel@tonic-gate
29987c478bd9Sstevel@tonic-gate rw_enter(&mem_callback_rwlock, RW_WRITER);
29997c478bd9Sstevel@tonic-gate for (i = 0, found = 0; i < nmemcallbacks; i++) {
30007c478bd9Sstevel@tonic-gate if (mem_callbacks[i].vec == NULL && found == 0)
30017c478bd9Sstevel@tonic-gate found = i + 1;
30027c478bd9Sstevel@tonic-gate if (mem_callbacks[i].vec == vec &&
30037c478bd9Sstevel@tonic-gate mem_callbacks[i].arg == arg) {
30047c478bd9Sstevel@tonic-gate #ifdef DEBUG
30057c478bd9Sstevel@tonic-gate /* Catch this in DEBUG kernels. */
30067c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, "kphysm_setup_func_register"
30077c478bd9Sstevel@tonic-gate "(0x%p, 0x%p) duplicate registration from 0x%p",
30087c478bd9Sstevel@tonic-gate (void *)vec, arg, (void *)caller());
30097c478bd9Sstevel@tonic-gate #endif /* DEBUG */
30107c478bd9Sstevel@tonic-gate rw_exit(&mem_callback_rwlock);
30117c478bd9Sstevel@tonic-gate return (EEXIST);
30127c478bd9Sstevel@tonic-gate }
30137c478bd9Sstevel@tonic-gate }
30147c478bd9Sstevel@tonic-gate if (found != 0) {
30157c478bd9Sstevel@tonic-gate i = found - 1;
30167c478bd9Sstevel@tonic-gate } else {
30177c478bd9Sstevel@tonic-gate ASSERT(nmemcallbacks < NMEMCALLBACKS);
30187c478bd9Sstevel@tonic-gate if (nmemcallbacks == NMEMCALLBACKS) {
30197c478bd9Sstevel@tonic-gate rw_exit(&mem_callback_rwlock);
30207c478bd9Sstevel@tonic-gate return (ENOMEM);
30217c478bd9Sstevel@tonic-gate }
30227c478bd9Sstevel@tonic-gate i = nmemcallbacks++;
30237c478bd9Sstevel@tonic-gate }
30247c478bd9Sstevel@tonic-gate mem_callbacks[i].vec = vec;
30257c478bd9Sstevel@tonic-gate mem_callbacks[i].arg = arg;
30267c478bd9Sstevel@tonic-gate rw_exit(&mem_callback_rwlock);
30277c478bd9Sstevel@tonic-gate return (0);
30287c478bd9Sstevel@tonic-gate }
30297c478bd9Sstevel@tonic-gate
30307c478bd9Sstevel@tonic-gate void
kphysm_setup_func_unregister(kphysm_setup_vector_t * vec,void * arg)30317c478bd9Sstevel@tonic-gate kphysm_setup_func_unregister(kphysm_setup_vector_t *vec, void *arg)
30327c478bd9Sstevel@tonic-gate {
30337c478bd9Sstevel@tonic-gate uint_t i;
30347c478bd9Sstevel@tonic-gate
30357c478bd9Sstevel@tonic-gate rw_enter(&mem_callback_rwlock, RW_WRITER);
30367c478bd9Sstevel@tonic-gate for (i = 0; i < nmemcallbacks; i++) {
30377c478bd9Sstevel@tonic-gate if (mem_callbacks[i].vec == vec &&
30387c478bd9Sstevel@tonic-gate mem_callbacks[i].arg == arg) {
30397c478bd9Sstevel@tonic-gate mem_callbacks[i].vec = NULL;
30407c478bd9Sstevel@tonic-gate mem_callbacks[i].arg = NULL;
30417c478bd9Sstevel@tonic-gate if (i == (nmemcallbacks - 1))
30427c478bd9Sstevel@tonic-gate nmemcallbacks--;
30437c478bd9Sstevel@tonic-gate break;
30447c478bd9Sstevel@tonic-gate }
30457c478bd9Sstevel@tonic-gate }
30467c478bd9Sstevel@tonic-gate rw_exit(&mem_callback_rwlock);
30477c478bd9Sstevel@tonic-gate }
30487c478bd9Sstevel@tonic-gate
30497c478bd9Sstevel@tonic-gate static void
kphysm_setup_post_add(pgcnt_t delta_pages)30507c478bd9Sstevel@tonic-gate kphysm_setup_post_add(pgcnt_t delta_pages)
30517c478bd9Sstevel@tonic-gate {
30527c478bd9Sstevel@tonic-gate uint_t i;
30537c478bd9Sstevel@tonic-gate
30547c478bd9Sstevel@tonic-gate rw_enter(&mem_callback_rwlock, RW_READER);
30557c478bd9Sstevel@tonic-gate for (i = 0; i < nmemcallbacks; i++) {
30567c478bd9Sstevel@tonic-gate if (mem_callbacks[i].vec != NULL) {
30577c478bd9Sstevel@tonic-gate (*mem_callbacks[i].vec->post_add)
30587c478bd9Sstevel@tonic-gate (mem_callbacks[i].arg, delta_pages);
30597c478bd9Sstevel@tonic-gate }
30607c478bd9Sstevel@tonic-gate }
30617c478bd9Sstevel@tonic-gate rw_exit(&mem_callback_rwlock);
30627c478bd9Sstevel@tonic-gate }
30637c478bd9Sstevel@tonic-gate
30647c478bd9Sstevel@tonic-gate /*
30657c478bd9Sstevel@tonic-gate * Note the locking between pre_del and post_del: The reader lock is held
30667c478bd9Sstevel@tonic-gate * between the two calls to stop the set of functions from changing.
30677c478bd9Sstevel@tonic-gate */
30687c478bd9Sstevel@tonic-gate
30697c478bd9Sstevel@tonic-gate static int
kphysm_setup_pre_del(pgcnt_t delta_pages)30707c478bd9Sstevel@tonic-gate kphysm_setup_pre_del(pgcnt_t delta_pages)
30717c478bd9Sstevel@tonic-gate {
30727c478bd9Sstevel@tonic-gate uint_t i;
30737c478bd9Sstevel@tonic-gate int ret;
30747c478bd9Sstevel@tonic-gate int aret;
30757c478bd9Sstevel@tonic-gate
30767c478bd9Sstevel@tonic-gate ret = 0;
30777c478bd9Sstevel@tonic-gate rw_enter(&mem_callback_rwlock, RW_READER);
30787c478bd9Sstevel@tonic-gate for (i = 0; i < nmemcallbacks; i++) {
30797c478bd9Sstevel@tonic-gate if (mem_callbacks[i].vec != NULL) {
30807c478bd9Sstevel@tonic-gate aret = (*mem_callbacks[i].vec->pre_del)
30817c478bd9Sstevel@tonic-gate (mem_callbacks[i].arg, delta_pages);
30827c478bd9Sstevel@tonic-gate ret |= aret;
30837c478bd9Sstevel@tonic-gate }
30847c478bd9Sstevel@tonic-gate }
30857c478bd9Sstevel@tonic-gate
30867c478bd9Sstevel@tonic-gate return (ret);
30877c478bd9Sstevel@tonic-gate }
30887c478bd9Sstevel@tonic-gate
30897c478bd9Sstevel@tonic-gate static void
kphysm_setup_post_del(pgcnt_t delta_pages,int cancelled)30907c478bd9Sstevel@tonic-gate kphysm_setup_post_del(pgcnt_t delta_pages, int cancelled)
30917c478bd9Sstevel@tonic-gate {
30927c478bd9Sstevel@tonic-gate uint_t i;
30937c478bd9Sstevel@tonic-gate
30947c478bd9Sstevel@tonic-gate for (i = 0; i < nmemcallbacks; i++) {
30957c478bd9Sstevel@tonic-gate if (mem_callbacks[i].vec != NULL) {
30967c478bd9Sstevel@tonic-gate (*mem_callbacks[i].vec->post_del)
30977c478bd9Sstevel@tonic-gate (mem_callbacks[i].arg, delta_pages, cancelled);
30987c478bd9Sstevel@tonic-gate }
30997c478bd9Sstevel@tonic-gate }
31007c478bd9Sstevel@tonic-gate rw_exit(&mem_callback_rwlock);
31017c478bd9Sstevel@tonic-gate }
31027c478bd9Sstevel@tonic-gate
31037c478bd9Sstevel@tonic-gate static int
kphysm_split_memseg(pfn_t base,pgcnt_t npgs)31047c478bd9Sstevel@tonic-gate kphysm_split_memseg(
31057c478bd9Sstevel@tonic-gate pfn_t base,
31067c478bd9Sstevel@tonic-gate pgcnt_t npgs)
31077c478bd9Sstevel@tonic-gate {
31087c478bd9Sstevel@tonic-gate struct memseg *seg;
31097c478bd9Sstevel@tonic-gate struct memseg **segpp;
31107c478bd9Sstevel@tonic-gate pgcnt_t size_low, size_high;
31117c478bd9Sstevel@tonic-gate struct memseg *seg_low, *seg_mid, *seg_high;
31127c478bd9Sstevel@tonic-gate
31137c478bd9Sstevel@tonic-gate /*
31147c478bd9Sstevel@tonic-gate * Lock the memsegs list against other updates now
31157c478bd9Sstevel@tonic-gate */
31167c478bd9Sstevel@tonic-gate memsegs_lock(1);
31177c478bd9Sstevel@tonic-gate
31187c478bd9Sstevel@tonic-gate /*
31197c478bd9Sstevel@tonic-gate * Find boot time memseg that wholly covers this area.
31207c478bd9Sstevel@tonic-gate */
31217c478bd9Sstevel@tonic-gate
31227c478bd9Sstevel@tonic-gate /* First find the memseg with page 'base' in it. */
31237c478bd9Sstevel@tonic-gate for (segpp = &memsegs; (seg = *segpp) != NULL;
31247c478bd9Sstevel@tonic-gate segpp = &((*segpp)->next)) {
31257c478bd9Sstevel@tonic-gate if (base >= seg->pages_base && base < seg->pages_end)
31267c478bd9Sstevel@tonic-gate break;
31277c478bd9Sstevel@tonic-gate }
31287c478bd9Sstevel@tonic-gate if (seg == NULL) {
31297c478bd9Sstevel@tonic-gate memsegs_unlock(1);
31307c478bd9Sstevel@tonic-gate return (0);
31317c478bd9Sstevel@tonic-gate }
31329853d9e8SJason Beloro if (memseg_includes_meta(seg)) {
31337c478bd9Sstevel@tonic-gate memsegs_unlock(1);
31347c478bd9Sstevel@tonic-gate return (0);
31357c478bd9Sstevel@tonic-gate }
31367c478bd9Sstevel@tonic-gate if ((base + npgs) > seg->pages_end) {
31377c478bd9Sstevel@tonic-gate memsegs_unlock(1);
31387c478bd9Sstevel@tonic-gate return (0);
31397c478bd9Sstevel@tonic-gate }
31407c478bd9Sstevel@tonic-gate
31417c478bd9Sstevel@tonic-gate /*
31427c478bd9Sstevel@tonic-gate * Work out the size of the two segments that will
31437c478bd9Sstevel@tonic-gate * surround the new segment, one for low address
31447c478bd9Sstevel@tonic-gate * and one for high.
31457c478bd9Sstevel@tonic-gate */
31467c478bd9Sstevel@tonic-gate ASSERT(base >= seg->pages_base);
31477c478bd9Sstevel@tonic-gate size_low = base - seg->pages_base;
31487c478bd9Sstevel@tonic-gate ASSERT(seg->pages_end >= (base + npgs));
31497c478bd9Sstevel@tonic-gate size_high = seg->pages_end - (base + npgs);
31507c478bd9Sstevel@tonic-gate
31517c478bd9Sstevel@tonic-gate /*
31527c478bd9Sstevel@tonic-gate * Sanity check.
31537c478bd9Sstevel@tonic-gate */
31547c478bd9Sstevel@tonic-gate if ((size_low + size_high) == 0) {
31557c478bd9Sstevel@tonic-gate memsegs_unlock(1);
31567c478bd9Sstevel@tonic-gate return (0);
31577c478bd9Sstevel@tonic-gate }
31587c478bd9Sstevel@tonic-gate
31597c478bd9Sstevel@tonic-gate /*
31607c478bd9Sstevel@tonic-gate * Allocate the new structures. The old memseg will not be freed
31617c478bd9Sstevel@tonic-gate * as there may be a reference to it.
31627c478bd9Sstevel@tonic-gate */
31637c478bd9Sstevel@tonic-gate seg_low = NULL;
31647c478bd9Sstevel@tonic-gate seg_high = NULL;
31657c478bd9Sstevel@tonic-gate
31669853d9e8SJason Beloro if (size_low != 0)
31679853d9e8SJason Beloro seg_low = memseg_alloc();
31687c478bd9Sstevel@tonic-gate
31699853d9e8SJason Beloro seg_mid = memseg_alloc();
31707c478bd9Sstevel@tonic-gate
31719853d9e8SJason Beloro if (size_high != 0)
31729853d9e8SJason Beloro seg_high = memseg_alloc();
31737c478bd9Sstevel@tonic-gate
31747c478bd9Sstevel@tonic-gate /*
31757c478bd9Sstevel@tonic-gate * All allocation done now.
31767c478bd9Sstevel@tonic-gate */
31777c478bd9Sstevel@tonic-gate if (size_low != 0) {
31787c478bd9Sstevel@tonic-gate seg_low->pages = seg->pages;
31797c478bd9Sstevel@tonic-gate seg_low->epages = seg_low->pages + size_low;
31807c478bd9Sstevel@tonic-gate seg_low->pages_base = seg->pages_base;
31817c478bd9Sstevel@tonic-gate seg_low->pages_end = seg_low->pages_base + size_low;
31827c478bd9Sstevel@tonic-gate seg_low->next = seg_mid;
31839853d9e8SJason Beloro seg_low->msegflags = seg->msegflags;
31847c478bd9Sstevel@tonic-gate }
31857c478bd9Sstevel@tonic-gate if (size_high != 0) {
31867c478bd9Sstevel@tonic-gate seg_high->pages = seg->epages - size_high;
31877c478bd9Sstevel@tonic-gate seg_high->epages = seg_high->pages + size_high;
31887c478bd9Sstevel@tonic-gate seg_high->pages_base = seg->pages_end - size_high;
31897c478bd9Sstevel@tonic-gate seg_high->pages_end = seg_high->pages_base + size_high;
31907c478bd9Sstevel@tonic-gate seg_high->next = seg->next;
31919853d9e8SJason Beloro seg_high->msegflags = seg->msegflags;
31927c478bd9Sstevel@tonic-gate }
31937c478bd9Sstevel@tonic-gate
31947c478bd9Sstevel@tonic-gate seg_mid->pages = seg->pages + size_low;
31957c478bd9Sstevel@tonic-gate seg_mid->pages_base = seg->pages_base + size_low;
31967c478bd9Sstevel@tonic-gate seg_mid->epages = seg->epages - size_high;
31977c478bd9Sstevel@tonic-gate seg_mid->pages_end = seg->pages_end - size_high;
31987c478bd9Sstevel@tonic-gate seg_mid->next = (seg_high != NULL) ? seg_high : seg->next;
31999853d9e8SJason Beloro seg_mid->msegflags = seg->msegflags;
32007c478bd9Sstevel@tonic-gate
32017c478bd9Sstevel@tonic-gate /*
32027c478bd9Sstevel@tonic-gate * Update hat_kpm specific info of all involved memsegs and
32037c478bd9Sstevel@tonic-gate * allow hat_kpm specific global chain updates.
32047c478bd9Sstevel@tonic-gate */
32057c478bd9Sstevel@tonic-gate hat_kpm_split_mseg_update(seg, segpp, seg_low, seg_mid, seg_high);
32067c478bd9Sstevel@tonic-gate
32077c478bd9Sstevel@tonic-gate /*
32087c478bd9Sstevel@tonic-gate * At this point we have two equivalent memseg sub-chains,
32097c478bd9Sstevel@tonic-gate * seg and seg_low/seg_mid/seg_high, which both chain on to
32107c478bd9Sstevel@tonic-gate * the same place in the global chain. By re-writing the pointer
32117c478bd9Sstevel@tonic-gate * in the previous element we switch atomically from using the old
32127c478bd9Sstevel@tonic-gate * (seg) to the new.
32137c478bd9Sstevel@tonic-gate */
32147c478bd9Sstevel@tonic-gate *segpp = (seg_low != NULL) ? seg_low : seg_mid;
32157c478bd9Sstevel@tonic-gate
32167c478bd9Sstevel@tonic-gate membar_enter();
32177c478bd9Sstevel@tonic-gate
32187c478bd9Sstevel@tonic-gate build_pfn_hash();
32197c478bd9Sstevel@tonic-gate memsegs_unlock(1);
32207c478bd9Sstevel@tonic-gate
32217c478bd9Sstevel@tonic-gate /*
32227c478bd9Sstevel@tonic-gate * We leave the old segment, 'seg', intact as there may be
32237c478bd9Sstevel@tonic-gate * references to it. Also, as the value of total_pages has not
32247c478bd9Sstevel@tonic-gate * changed and the memsegs list is effectively the same when
32257c478bd9Sstevel@tonic-gate * accessed via the old or the new pointer, we do not have to
32267c478bd9Sstevel@tonic-gate * cause pageout_scanner() to re-evaluate its hand pointers.
32277c478bd9Sstevel@tonic-gate *
32287c478bd9Sstevel@tonic-gate * We currently do not re-use or reclaim the page_t memory.
32297c478bd9Sstevel@tonic-gate * If we do, then this may have to change.
32307c478bd9Sstevel@tonic-gate */
32317c478bd9Sstevel@tonic-gate
32327c478bd9Sstevel@tonic-gate mutex_enter(&memseg_lists_lock);
32337c478bd9Sstevel@tonic-gate seg->lnext = memseg_edit_junk;
32347c478bd9Sstevel@tonic-gate memseg_edit_junk = seg;
32357c478bd9Sstevel@tonic-gate mutex_exit(&memseg_lists_lock);
32367c478bd9Sstevel@tonic-gate
32377c478bd9Sstevel@tonic-gate return (1);
32387c478bd9Sstevel@tonic-gate }
32397c478bd9Sstevel@tonic-gate
32407c478bd9Sstevel@tonic-gate /*
32417c478bd9Sstevel@tonic-gate * The sfmmu hat layer (e.g.) accesses some parts of the memseg
32427c478bd9Sstevel@tonic-gate * structure using physical addresses. Therefore a kmem_cache is
32437c478bd9Sstevel@tonic-gate * used with KMC_NOHASH to avoid page crossings within a memseg
32447c478bd9Sstevel@tonic-gate * structure. KMC_NOHASH requires that no external (outside of
32457c478bd9Sstevel@tonic-gate * slab) information is allowed. This, in turn, implies that the
32467c478bd9Sstevel@tonic-gate * cache's slabsize must be exactly a single page, since per-slab
32477c478bd9Sstevel@tonic-gate * information (e.g. the freelist for the slab) is kept at the
32487c478bd9Sstevel@tonic-gate * end of the slab, where it is easy to locate. Should be changed
32497c478bd9Sstevel@tonic-gate * when a more obvious kmem_cache interface/flag will become
32507c478bd9Sstevel@tonic-gate * available.
32517c478bd9Sstevel@tonic-gate */
32527c478bd9Sstevel@tonic-gate void
mem_config_init()32537c478bd9Sstevel@tonic-gate mem_config_init()
32547c478bd9Sstevel@tonic-gate {
32557c478bd9Sstevel@tonic-gate memseg_cache = kmem_cache_create("memseg_cache", sizeof (struct memseg),
325673347c69Smb 0, NULL, NULL, NULL, NULL, static_arena, KMC_NOHASH);
32577c478bd9Sstevel@tonic-gate }
32589853d9e8SJason Beloro
32599853d9e8SJason Beloro struct memseg *
memseg_alloc()32609853d9e8SJason Beloro memseg_alloc()
32619853d9e8SJason Beloro {
32629853d9e8SJason Beloro struct memseg *seg;
32639853d9e8SJason Beloro
32649853d9e8SJason Beloro seg = kmem_cache_alloc(memseg_cache, KM_SLEEP);
32659853d9e8SJason Beloro bzero(seg, sizeof (struct memseg));
32669853d9e8SJason Beloro
32679853d9e8SJason Beloro return (seg);
32689853d9e8SJason Beloro }
32699853d9e8SJason Beloro
32709853d9e8SJason Beloro /*
32719853d9e8SJason Beloro * Return whether the page_t memory for this memseg
32729853d9e8SJason Beloro * is included in the memseg itself.
32739853d9e8SJason Beloro */
32749853d9e8SJason Beloro static int
memseg_includes_meta(struct memseg * seg)32759853d9e8SJason Beloro memseg_includes_meta(struct memseg *seg)
32769853d9e8SJason Beloro {
32779853d9e8SJason Beloro return (seg->msegflags & MEMSEG_META_INCL);
32789853d9e8SJason Beloro }
32799853d9e8SJason Beloro
32809853d9e8SJason Beloro pfn_t
memseg_get_start(struct memseg * seg)32819853d9e8SJason Beloro memseg_get_start(struct memseg *seg)
32829853d9e8SJason Beloro {
32839853d9e8SJason Beloro pfn_t pt_start;
32849853d9e8SJason Beloro
32859853d9e8SJason Beloro if (memseg_includes_meta(seg)) {
32869853d9e8SJason Beloro pt_start = hat_getpfnum(kas.a_hat, (caddr_t)seg->pages);
32879853d9e8SJason Beloro
32889853d9e8SJason Beloro /* Meta data is required to be at the beginning */
32899853d9e8SJason Beloro ASSERT(pt_start < seg->pages_base);
32909853d9e8SJason Beloro } else
32919853d9e8SJason Beloro pt_start = seg->pages_base;
32929853d9e8SJason Beloro
32939853d9e8SJason Beloro return (pt_start);
32949853d9e8SJason Beloro }
32959853d9e8SJason Beloro
32969853d9e8SJason Beloro /*
32979853d9e8SJason Beloro * Invalidate memseg pointers in cpu private vm data caches.
32989853d9e8SJason Beloro */
32999853d9e8SJason Beloro static void
memseg_cpu_vm_flush()33009853d9e8SJason Beloro memseg_cpu_vm_flush()
33019853d9e8SJason Beloro {
33029853d9e8SJason Beloro cpu_t *cp;
33039853d9e8SJason Beloro vm_cpu_data_t *vc;
33049853d9e8SJason Beloro
33059853d9e8SJason Beloro mutex_enter(&cpu_lock);
33060ed5c46eSJosef 'Jeff' Sipek pause_cpus(NULL, NULL);
33079853d9e8SJason Beloro
33089853d9e8SJason Beloro cp = cpu_list;
33099853d9e8SJason Beloro do {
33109853d9e8SJason Beloro vc = cp->cpu_vm_data;
33119853d9e8SJason Beloro vc->vc_pnum_memseg = NULL;
33129853d9e8SJason Beloro vc->vc_pnext_memseg = NULL;
33139853d9e8SJason Beloro
33149853d9e8SJason Beloro } while ((cp = cp->cpu_next) != cpu_list);
33159853d9e8SJason Beloro
33169853d9e8SJason Beloro start_cpus();
33179853d9e8SJason Beloro mutex_exit(&cpu_lock);
33189853d9e8SJason Beloro }
3319