xref: /illumos-gate/usr/src/uts/sun4v/os/memseg.c (revision af4c679f)
19853d9e8SJason Beloro /*
29853d9e8SJason Beloro  *
39853d9e8SJason Beloro  * CDDL HEADER START
49853d9e8SJason Beloro  *
59853d9e8SJason Beloro  * The contents of this file are subject to the terms of the
69853d9e8SJason Beloro  * Common Development and Distribution License (the "License").
79853d9e8SJason Beloro  * You may not use this file except in compliance with the License.
89853d9e8SJason Beloro  *
99853d9e8SJason Beloro  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
109853d9e8SJason Beloro  * or http://www.opensolaris.org/os/licensing.
119853d9e8SJason Beloro  * See the License for the specific language governing permissions
129853d9e8SJason Beloro  * and limitations under the License.
139853d9e8SJason Beloro  *
149853d9e8SJason Beloro  * When distributing Covered Code, include this CDDL HEADER in each
159853d9e8SJason Beloro  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
169853d9e8SJason Beloro  * If applicable, add the following below this CDDL HEADER, with the
179853d9e8SJason Beloro  * fields enclosed by brackets "[]" replaced with your own identifying
189853d9e8SJason Beloro  * information: Portions Copyright [yyyy] [name of copyright owner]
199853d9e8SJason Beloro  *
209853d9e8SJason Beloro  * CDDL HEADER END
219853d9e8SJason Beloro  */
229853d9e8SJason Beloro /*
239853d9e8SJason Beloro  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
249853d9e8SJason Beloro  * Use is subject to license terms.
259853d9e8SJason Beloro  */
269853d9e8SJason Beloro 
279853d9e8SJason Beloro #include <sys/types.h>
289853d9e8SJason Beloro #include <sys/cmn_err.h>
299853d9e8SJason Beloro #include <sys/vm.h>
309853d9e8SJason Beloro #include <sys/mman.h>
319853d9e8SJason Beloro #include <vm/vm_dep.h>
329853d9e8SJason Beloro #include <vm/seg_kmem.h>
339853d9e8SJason Beloro #include <vm/seg_kpm.h>
349853d9e8SJason Beloro #include <sys/mem_config.h>
359853d9e8SJason Beloro #include <sys/sysmacros.h>
369853d9e8SJason Beloro 
379853d9e8SJason Beloro extern pgcnt_t pp_dummy_npages;
389853d9e8SJason Beloro extern pfn_t *pp_dummy_pfn;	/* Array of dummy pfns. */
399853d9e8SJason Beloro 
409853d9e8SJason Beloro extern kmutex_t memseg_lists_lock;
419853d9e8SJason Beloro extern struct memseg *memseg_va_avail;
429853d9e8SJason Beloro extern struct memseg *memseg_alloc();
439853d9e8SJason Beloro 
449853d9e8SJason Beloro extern page_t *ppvm_base;
459853d9e8SJason Beloro extern pgcnt_t ppvm_size;
469853d9e8SJason Beloro 
479853d9e8SJason Beloro static int sun4v_memseg_debug;
489853d9e8SJason Beloro 
499853d9e8SJason Beloro extern struct memseg *memseg_reuse(pgcnt_t);
509853d9e8SJason Beloro extern void remap_to_dummy(caddr_t, pgcnt_t);
519853d9e8SJason Beloro 
529853d9e8SJason Beloro /*
539853d9e8SJason Beloro  * The page_t memory for incoming pages is allocated from existing memory
549853d9e8SJason Beloro  * which can create a potential situation where memory addition fails
559853d9e8SJason Beloro  * because of shortage of existing memory.  To mitigate this situation
569853d9e8SJason Beloro  * some memory is always reserved ahead of time for page_t allocation.
579853d9e8SJason Beloro  * Each 4MB of reserved page_t's guarantees a 256MB (x64) addition without
589853d9e8SJason Beloro  * page_t allocation.  The added 256MB added memory could theoretically
599853d9e8SJason Beloro  * allow an addition of 16GB.
609853d9e8SJason Beloro  */
619853d9e8SJason Beloro #define	RSV_SIZE	0x40000000	/* add size with rsrvd page_t's 1G */
629853d9e8SJason Beloro 
639853d9e8SJason Beloro #ifdef	DEBUG
649853d9e8SJason Beloro #define	MEMSEG_DEBUG(args...) if (sun4v_memseg_debug) printf(args)
659853d9e8SJason Beloro #else
669853d9e8SJason Beloro #define	MEMSEG_DEBUG(...)
679853d9e8SJason Beloro #endif
689853d9e8SJason Beloro 
699853d9e8SJason Beloro /*
709853d9e8SJason Beloro  * The page_t's for the incoming memory are allocated from
719853d9e8SJason Beloro  * existing pages.
729853d9e8SJason Beloro  */
739853d9e8SJason Beloro /*ARGSUSED*/
749853d9e8SJason Beloro int
memseg_alloc_meta(pfn_t base,pgcnt_t npgs,void ** ptp,pgcnt_t * metap)759853d9e8SJason Beloro memseg_alloc_meta(pfn_t base, pgcnt_t npgs, void **ptp, pgcnt_t *metap)
769853d9e8SJason Beloro {
77*af4c679fSSean McEnroe 	page_t		*pp, *opp, *epp;
789853d9e8SJason Beloro 	pgcnt_t		metapgs;
79*af4c679fSSean McEnroe 	int		i;
809853d9e8SJason Beloro 	struct seg	kseg;
819853d9e8SJason Beloro 	caddr_t		vaddr;
829853d9e8SJason Beloro 
839853d9e8SJason Beloro 	/*
849853d9e8SJason Beloro 	 * Verify incoming memory is within supported DR range.
859853d9e8SJason Beloro 	 */
869853d9e8SJason Beloro 	if ((base + npgs) * sizeof (page_t) > ppvm_size)
879853d9e8SJason Beloro 		return (KPHYSM_ENOTSUP);
889853d9e8SJason Beloro 
899853d9e8SJason Beloro 	opp = pp = ppvm_base + base;
909853d9e8SJason Beloro 	epp = pp + npgs;
919853d9e8SJason Beloro 	metapgs = btopr(npgs * sizeof (page_t));
929853d9e8SJason Beloro 
939853d9e8SJason Beloro 	if (!IS_P2ALIGNED((uint64_t)pp, PAGESIZE) &&
94*af4c679fSSean McEnroe 	    page_find(&mpvp, (u_offset_t)pp)) {
959853d9e8SJason Beloro 		/*
969853d9e8SJason Beloro 		 * Another memseg has page_t's in the same
979853d9e8SJason Beloro 		 * page which 'pp' resides.  This would happen
989853d9e8SJason Beloro 		 * if PAGESIZE is not an integral multiple of
999853d9e8SJason Beloro 		 * sizeof (page_t) and therefore 'pp'
1009853d9e8SJason Beloro 		 * does not start on a page boundry.
1019853d9e8SJason Beloro 		 *
1029853d9e8SJason Beloro 		 * Since the other memseg's pages_t's still
1039853d9e8SJason Beloro 		 * map valid pages, skip allocation of this page.
1049853d9e8SJason Beloro 		 * Advance 'pp' to the next page which should
1059853d9e8SJason Beloro 		 * belong only to the incoming memseg.
1069853d9e8SJason Beloro 		 *
1079853d9e8SJason Beloro 		 * If the last page_t in the current page
1089853d9e8SJason Beloro 		 * crosses a page boundary, this should still
1099853d9e8SJason Beloro 		 * work.  The first part of the page_t is
1109853d9e8SJason Beloro 		 * already allocated.  The second part of
1119853d9e8SJason Beloro 		 * the page_t will be allocated below.
1129853d9e8SJason Beloro 		 */
1139853d9e8SJason Beloro 		ASSERT(PAGESIZE % sizeof (page_t));
1149853d9e8SJason Beloro 		pp = (page_t *)P2ROUNDUP((uint64_t)pp, PAGESIZE);
1159853d9e8SJason Beloro 		metapgs--;
1169853d9e8SJason Beloro 	}
1179853d9e8SJason Beloro 
1189853d9e8SJason Beloro 	if (!IS_P2ALIGNED((uint64_t)epp, PAGESIZE) &&
119*af4c679fSSean McEnroe 	    page_find(&mpvp, (u_offset_t)epp)) {
1209853d9e8SJason Beloro 		/*
1219853d9e8SJason Beloro 		 * Another memseg has page_t's in the same
1229853d9e8SJason Beloro 		 * page which 'epp' resides.  This would happen
1239853d9e8SJason Beloro 		 * if PAGESIZE is not an integral multiple of
1249853d9e8SJason Beloro 		 * sizeof (page_t) and therefore 'epp'
1259853d9e8SJason Beloro 		 * does not start on a page boundry.
1269853d9e8SJason Beloro 		 *
1279853d9e8SJason Beloro 		 * Since the other memseg's pages_t's still
1289853d9e8SJason Beloro 		 * map valid pages, skip allocation of this page.
1299853d9e8SJason Beloro 		 */
1309853d9e8SJason Beloro 		ASSERT(PAGESIZE % sizeof (page_t));
1319853d9e8SJason Beloro 		metapgs--;
1329853d9e8SJason Beloro 	}
1339853d9e8SJason Beloro 
1349853d9e8SJason Beloro 	ASSERT(IS_P2ALIGNED((uint64_t)pp, PAGESIZE));
1359853d9e8SJason Beloro 
1369853d9e8SJason Beloro 	/*
1379853d9e8SJason Beloro 	 * Back metadata space with physical pages.
1389853d9e8SJason Beloro 	 */
1399853d9e8SJason Beloro 	kseg.s_as = &kas;
1409853d9e8SJason Beloro 	vaddr = (caddr_t)pp;
1419853d9e8SJason Beloro 
1429853d9e8SJason Beloro 	for (i = 0; i < metapgs; i++)
143*af4c679fSSean McEnroe 		if (page_find(&mpvp, (u_offset_t)(vaddr + i * PAGESIZE)))
1449853d9e8SJason Beloro 			panic("page_find(0x%p, %p)\n",
145*af4c679fSSean McEnroe 			    (void *)&mpvp, (void *)(vaddr + i * PAGESIZE));
1469853d9e8SJason Beloro 
1479853d9e8SJason Beloro 	/*
1489853d9e8SJason Beloro 	 * Allocate the metadata pages; these are the pages that will
1499853d9e8SJason Beloro 	 * contain the page_t's for the incoming memory.
1509853d9e8SJason Beloro 	 */
151*af4c679fSSean McEnroe 	if ((page_create_va(&mpvp, (u_offset_t)pp, ptob(metapgs),
1529853d9e8SJason Beloro 	    PG_NORELOC | PG_EXCL, &kseg, vaddr)) == NULL) {
153*af4c679fSSean McEnroe 		MEMSEG_DEBUG("memseg_alloc_meta: can't get 0x%ld metapgs",
1549853d9e8SJason Beloro 		    metapgs);
1559853d9e8SJason Beloro 		return (KPHYSM_ERESOURCE);
1569853d9e8SJason Beloro 	}
1579853d9e8SJason Beloro 
1589853d9e8SJason Beloro 	ASSERT(ptp);
1599853d9e8SJason Beloro 	ASSERT(metap);
1609853d9e8SJason Beloro 
1619853d9e8SJason Beloro 	*ptp = (void *)opp;
1629853d9e8SJason Beloro 	*metap = metapgs;
1639853d9e8SJason Beloro 
1649853d9e8SJason Beloro 	return (KPHYSM_OK);
1659853d9e8SJason Beloro }
1669853d9e8SJason Beloro 
1679853d9e8SJason Beloro void
memseg_free_meta(void * ptp,pgcnt_t metapgs)1689853d9e8SJason Beloro memseg_free_meta(void *ptp, pgcnt_t metapgs)
1699853d9e8SJason Beloro {
1709853d9e8SJason Beloro 	int i;
1719853d9e8SJason Beloro 	page_t *pp;
1729853d9e8SJason Beloro 	u_offset_t off;
1739853d9e8SJason Beloro 
1749853d9e8SJason Beloro 	if (!metapgs)
1759853d9e8SJason Beloro 		return;
1769853d9e8SJason Beloro 
1779853d9e8SJason Beloro 	off = (u_offset_t)ptp;
1789853d9e8SJason Beloro 
1799853d9e8SJason Beloro 	ASSERT(off);
1809853d9e8SJason Beloro 	ASSERT(IS_P2ALIGNED((uint64_t)off, PAGESIZE));
1819853d9e8SJason Beloro 
1829853d9e8SJason Beloro 	MEMSEG_DEBUG("memseg_free_meta: off=0x%lx metapgs=0x%lx\n",
1839853d9e8SJason Beloro 	    (uint64_t)off, metapgs);
1849853d9e8SJason Beloro 	/*
1859853d9e8SJason Beloro 	 * Free pages allocated during add.
1869853d9e8SJason Beloro 	 */
1879853d9e8SJason Beloro 	for (i = 0; i < metapgs; i++) {
188*af4c679fSSean McEnroe 		pp = page_find(&mpvp, off);
1899853d9e8SJason Beloro 		ASSERT(pp);
1909853d9e8SJason Beloro 		ASSERT(pp->p_szc == 0);
1919853d9e8SJason Beloro 		page_io_unlock(pp);
1929853d9e8SJason Beloro 		page_destroy(pp, 0);
1939853d9e8SJason Beloro 		off += PAGESIZE;
1949853d9e8SJason Beloro 	}
1959853d9e8SJason Beloro }
1969853d9e8SJason Beloro 
1979853d9e8SJason Beloro pfn_t
memseg_get_metapfn(void * ptp,pgcnt_t metapg)1989853d9e8SJason Beloro memseg_get_metapfn(void *ptp, pgcnt_t metapg)
1999853d9e8SJason Beloro {
2009853d9e8SJason Beloro 	page_t *pp;
2019853d9e8SJason Beloro 	u_offset_t off;
2029853d9e8SJason Beloro 
2039853d9e8SJason Beloro 	off = (u_offset_t)ptp + ptob(metapg);
2049853d9e8SJason Beloro 
2059853d9e8SJason Beloro 	ASSERT(off);
2069853d9e8SJason Beloro 	ASSERT(IS_P2ALIGNED((uint64_t)off, PAGESIZE));
2079853d9e8SJason Beloro 
208*af4c679fSSean McEnroe 	pp = page_find(&mpvp, off);
2099853d9e8SJason Beloro 	ASSERT(pp);
2109853d9e8SJason Beloro 	ASSERT(pp->p_szc == 0);
2119853d9e8SJason Beloro 	ASSERT(pp->p_pagenum != PFN_INVALID);
2129853d9e8SJason Beloro 
2139853d9e8SJason Beloro 	return (pp->p_pagenum);
2149853d9e8SJason Beloro }
2159853d9e8SJason Beloro 
2169853d9e8SJason Beloro /*
2179853d9e8SJason Beloro  * Remap a memseg's page_t's to dummy pages.  Skip the low/high
2189853d9e8SJason Beloro  * ends of the range if they are already in use.
2199853d9e8SJason Beloro  */
2209853d9e8SJason Beloro void
memseg_remap_meta(struct memseg * seg)2219853d9e8SJason Beloro memseg_remap_meta(struct memseg *seg)
2229853d9e8SJason Beloro {
2239853d9e8SJason Beloro 	int i;
2249853d9e8SJason Beloro 	u_offset_t off;
2259853d9e8SJason Beloro 	page_t *pp;
2269853d9e8SJason Beloro #if 0
2279853d9e8SJason Beloro 	page_t *epp;
2289853d9e8SJason Beloro #endif
2299853d9e8SJason Beloro 	pgcnt_t metapgs;
2309853d9e8SJason Beloro 
2319853d9e8SJason Beloro 	metapgs = btopr(MSEG_NPAGES(seg) * sizeof (page_t));
2329853d9e8SJason Beloro 	ASSERT(metapgs);
2339853d9e8SJason Beloro 	pp = seg->pages;
2349853d9e8SJason Beloro 	seg->pages_end = seg->pages_base;
2359853d9e8SJason Beloro #if 0
2369853d9e8SJason Beloro 	epp = seg->epages;
2379853d9e8SJason Beloro 
2389853d9e8SJason Beloro 	/*
2399853d9e8SJason Beloro 	 * This code cannot be tested as the kernel does not compile
2409853d9e8SJason Beloro 	 * when page_t size is changed.  It is left here as a starting
2419853d9e8SJason Beloro 	 * point if the unaligned page_t size needs to be supported.
2429853d9e8SJason Beloro 	 */
2439853d9e8SJason Beloro 
2449853d9e8SJason Beloro 	if (!IS_P2ALIGNED((uint64_t)pp, PAGESIZE) &&
245*af4c679fSSean McEnroe 	    page_find(&mpvp, (u_offset_t)(pp - 1)) && !page_deleted(pp - 1)) {
2469853d9e8SJason Beloro 		/*
2479853d9e8SJason Beloro 		 * Another memseg has page_t's in the same
2489853d9e8SJason Beloro 		 * page which 'pp' resides.  This would happen
2499853d9e8SJason Beloro 		 * if PAGESIZE is not an integral multiple of
2509853d9e8SJason Beloro 		 * sizeof (page_t) and therefore 'seg->pages'
2519853d9e8SJason Beloro 		 * does not start on a page boundry.
2529853d9e8SJason Beloro 		 *
2539853d9e8SJason Beloro 		 * Since the other memseg's pages_t's still
2549853d9e8SJason Beloro 		 * map valid pages, skip remap of this page.
2559853d9e8SJason Beloro 		 * Advance 'pp' to the next page which should
2569853d9e8SJason Beloro 		 * belong only to the outgoing memseg.
2579853d9e8SJason Beloro 		 *
2589853d9e8SJason Beloro 		 * If the last page_t in the current page
2599853d9e8SJason Beloro 		 * crosses a page boundary, this should still
2609853d9e8SJason Beloro 		 * work.  The first part of the page_t is
2619853d9e8SJason Beloro 		 * valid since memseg_lock_delete_all() has
2629853d9e8SJason Beloro 		 * been called.  The second part of the page_t
2639853d9e8SJason Beloro 		 * will be remapped to the corresponding
2649853d9e8SJason Beloro 		 * dummy page below.
2659853d9e8SJason Beloro 		 */
2669853d9e8SJason Beloro 		ASSERT(PAGESIZE % sizeof (page_t));
2679853d9e8SJason Beloro 		pp = (page_t *)P2ROUNDUP((uint64_t)pp, PAGESIZE);
2689853d9e8SJason Beloro 		metapgs--;
2699853d9e8SJason Beloro 	}
2709853d9e8SJason Beloro 
2719853d9e8SJason Beloro 	if (!IS_P2ALIGNED((uint64_t)epp, PAGESIZE) &&
272*af4c679fSSean McEnroe 	    page_find(&mpvp, (u_offset_t)epp) && !page_deleted(epp)) {
2739853d9e8SJason Beloro 		/*
2749853d9e8SJason Beloro 		 * Another memseg has page_t's in the same
2759853d9e8SJason Beloro 		 * page which 'epp' resides.  This would happen
2769853d9e8SJason Beloro 		 * if PAGESIZE is not an integral multiple of
2779853d9e8SJason Beloro 		 * sizeof (page_t) and therefore 'seg->epages'
2789853d9e8SJason Beloro 		 * does not start on a page boundry.
2799853d9e8SJason Beloro 		 *
2809853d9e8SJason Beloro 		 * Since the other memseg's pages_t's still
2819853d9e8SJason Beloro 		 * map valid pages, skip remap of this page.
2829853d9e8SJason Beloro 		 */
2839853d9e8SJason Beloro 		ASSERT(PAGESIZE % sizeof (page_t));
2849853d9e8SJason Beloro 		metapgs--;
2859853d9e8SJason Beloro 	}
2869853d9e8SJason Beloro #endif
2879853d9e8SJason Beloro 	ASSERT(IS_P2ALIGNED((uint64_t)pp, PAGESIZE));
2889853d9e8SJason Beloro 
2899853d9e8SJason Beloro 	remap_to_dummy((caddr_t)pp, metapgs);
2909853d9e8SJason Beloro 
2919853d9e8SJason Beloro 	off = (u_offset_t)pp;
2929853d9e8SJason Beloro 
293*af4c679fSSean McEnroe 	MEMSEG_DEBUG("memseg_remap_meta: off=0x%lx metapgs=0x%lx\n",
294*af4c679fSSean McEnroe 	    (uint64_t)off, metapgs);
2959853d9e8SJason Beloro 	/*
2969853d9e8SJason Beloro 	 * Free pages allocated during add.
2979853d9e8SJason Beloro 	 */
2989853d9e8SJason Beloro 	for (i = 0; i < metapgs; i++) {
299*af4c679fSSean McEnroe 		pp = page_find(&mpvp, off);
3009853d9e8SJason Beloro 		ASSERT(pp);
3019853d9e8SJason Beloro 		ASSERT(pp->p_szc == 0);
3029853d9e8SJason Beloro 		page_io_unlock(pp);
3039853d9e8SJason Beloro 		page_destroy(pp, 0);
3049853d9e8SJason Beloro 		off += PAGESIZE;
3059853d9e8SJason Beloro 	}
3069853d9e8SJason Beloro }
307