xref: /illumos-gate/usr/src/uts/i86pc/os/pmem.c (revision 2d6eb4a5)
17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate  * CDDL HEADER START
37c478bd9Sstevel@tonic-gate  *
47c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
57b93957cSeota  * Common Development and Distribution License (the "License").
67b93957cSeota  * You may not use this file except in compliance with the License.
77c478bd9Sstevel@tonic-gate  *
87c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
107c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
117c478bd9Sstevel@tonic-gate  * and limitations under the License.
127c478bd9Sstevel@tonic-gate  *
137c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
147c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
167c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
177c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bd9Sstevel@tonic-gate  *
197c478bd9Sstevel@tonic-gate  * CDDL HEADER END
207c478bd9Sstevel@tonic-gate  */
217c478bd9Sstevel@tonic-gate /*
22*733cdf20Smrj  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
237c478bd9Sstevel@tonic-gate  * Use is subject to license terms.
247c478bd9Sstevel@tonic-gate  */
257c478bd9Sstevel@tonic-gate 
267c478bd9Sstevel@tonic-gate /*
277c478bd9Sstevel@tonic-gate  * PMEM - Direct mapping physical memory pages to userland process
287c478bd9Sstevel@tonic-gate  *
297c478bd9Sstevel@tonic-gate  * Provide functions used for directly (w/o occupying kernel virtual address
307c478bd9Sstevel@tonic-gate  * space) allocating and exporting physical memory pages to userland.
317c478bd9Sstevel@tonic-gate  */
327c478bd9Sstevel@tonic-gate 
337c478bd9Sstevel@tonic-gate #include <sys/types.h>
347c478bd9Sstevel@tonic-gate #include <sys/mutex.h>
357c478bd9Sstevel@tonic-gate #include <sys/sunddi.h>
367c478bd9Sstevel@tonic-gate #include <sys/ddidevmap.h>
377c478bd9Sstevel@tonic-gate #include <sys/vnode.h>
387c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h>
397c478bd9Sstevel@tonic-gate #include <vm/seg_dev.h>
407c478bd9Sstevel@tonic-gate #include <sys/pmem.h>
417c478bd9Sstevel@tonic-gate #include <vm/hat_i86.h>
427c478bd9Sstevel@tonic-gate #include <sys/task.h>
437c478bd9Sstevel@tonic-gate #include <sys/sdt.h>
447c478bd9Sstevel@tonic-gate 
457c478bd9Sstevel@tonic-gate /*
467c478bd9Sstevel@tonic-gate  * The routines in this file allocate memory which will be accessed through
477c478bd9Sstevel@tonic-gate  * the AGP GART hardware.  The GART is programmed with the PFNs for this
487c478bd9Sstevel@tonic-gate  * memory, and the only mechanism for removing these entries is by an
497c478bd9Sstevel@tonic-gate  * explicit process operation (ioctl/close of the driver, or process exit).
507c478bd9Sstevel@tonic-gate  * As such, the pages need to remain locked to ensure that they won't be
517c478bd9Sstevel@tonic-gate  * relocated or paged out.
527c478bd9Sstevel@tonic-gate  *
537c478bd9Sstevel@tonic-gate  * To prevent these locked pages from getting in the way of page
547c478bd9Sstevel@tonic-gate  * coalescing, we try to allocate large pages from the system, and carve
557c478bd9Sstevel@tonic-gate  * them up to satisfy pmem allocation requests.  This will keep the locked
567c478bd9Sstevel@tonic-gate  * pages within a constrained area of physical memory, limiting the number
577c478bd9Sstevel@tonic-gate  * of large pages that would be pinned by our locked pages.  This is, of
587c478bd9Sstevel@tonic-gate  * course, another take on the infamous kernel cage, and it has many of the
597c478bd9Sstevel@tonic-gate  * downsides of the original cage.  It also interferes with system-wide
607c478bd9Sstevel@tonic-gate  * resource management decisions, as it maintains its own pool of unused
617c478bd9Sstevel@tonic-gate  * pages which can't be easily reclaimed and used during low-memory
627c478bd9Sstevel@tonic-gate  * situations.
637c478bd9Sstevel@tonic-gate  *
647c478bd9Sstevel@tonic-gate  * The right solution is for pmem to register a callback that the VM system
657c478bd9Sstevel@tonic-gate  * could call, which would temporarily remove any GART entries for pages
667c478bd9Sstevel@tonic-gate  * that were being relocated.  This would let us leave the pages unlocked,
677c478bd9Sstevel@tonic-gate  * which would remove the need for using large pages, which would simplify
687c478bd9Sstevel@tonic-gate  * this code a great deal.  Unfortunately, the support for these callbacks
697c478bd9Sstevel@tonic-gate  * only exists on some SPARC platforms right now.
707c478bd9Sstevel@tonic-gate  *
717c478bd9Sstevel@tonic-gate  * Note that this is the *only* reason that large pages are used here.  The
727c478bd9Sstevel@tonic-gate  * GART can't perform large-page translations, and the code appropriately
737c478bd9Sstevel@tonic-gate  * falls back to using small pages if page_create_va_large() fails.
747c478bd9Sstevel@tonic-gate  */
757c478bd9Sstevel@tonic-gate 
767c478bd9Sstevel@tonic-gate #define	HOLD_DHP_LOCK(dhp)  if (dhp->dh_flags & DEVMAP_ALLOW_REMAP) \
777c478bd9Sstevel@tonic-gate 			{ mutex_enter(&dhp->dh_lock); }
787c478bd9Sstevel@tonic-gate 
797c478bd9Sstevel@tonic-gate #define	RELE_DHP_LOCK(dhp) if (dhp->dh_flags & DEVMAP_ALLOW_REMAP) \
807c478bd9Sstevel@tonic-gate 			{ mutex_exit(&dhp->dh_lock); }
817c478bd9Sstevel@tonic-gate 
827c478bd9Sstevel@tonic-gate #define	FROM_LPG(pp) (pp->p_szc != 0)
837c478bd9Sstevel@tonic-gate #define	PFIND(pp) (page_pptonum(pp) & (pmem_pgcnt - 1))
847c478bd9Sstevel@tonic-gate 
857c478bd9Sstevel@tonic-gate /*
867c478bd9Sstevel@tonic-gate  * Structs and static variables used for pmem only.
877c478bd9Sstevel@tonic-gate  */
887c478bd9Sstevel@tonic-gate typedef struct pmem_lpg {
897c478bd9Sstevel@tonic-gate 	page_t	*pl_pp;		/* start pp */
907c478bd9Sstevel@tonic-gate 	ulong_t	*pl_bitmap;	/* allocation status for each page */
917c478bd9Sstevel@tonic-gate 	ushort_t pl_pfree;	/* this large page might be fully freed */
927c478bd9Sstevel@tonic-gate 	struct pmem_lpg *pl_next;
937c478bd9Sstevel@tonic-gate 	struct pmem_lpg *pl_prev;
947c478bd9Sstevel@tonic-gate } pmem_lpg_t;
957c478bd9Sstevel@tonic-gate 
967c478bd9Sstevel@tonic-gate static size_t	pmem_lpgsize;	/* the size of one large page */
977c478bd9Sstevel@tonic-gate static pgcnt_t	pmem_pgcnt;	/* the number of small pages in a large page */
987c478bd9Sstevel@tonic-gate static uint_t	pmem_lszc;	/* page size code of the large page */
997c478bd9Sstevel@tonic-gate /* The segment to be associated with all the allocated pages. */
1007c478bd9Sstevel@tonic-gate static struct seg	pmem_seg;
1017c478bd9Sstevel@tonic-gate /* Fully occupied large pages allocated for pmem. */
1027c478bd9Sstevel@tonic-gate static pmem_lpg_t *pmem_occ_lpgs;
1037c478bd9Sstevel@tonic-gate /* Memory pool to store residual small pages from large pages. */
1047c478bd9Sstevel@tonic-gate static page_t	*pmem_mpool = NULL;
1057c478bd9Sstevel@tonic-gate /* Number of small pages reside in pmem_mpool currently. */
1067c478bd9Sstevel@tonic-gate static pgcnt_t	pmem_nmpages = 0;
1077c478bd9Sstevel@tonic-gate /* To protect pmem_nmpages, pmem_mpool and pmem_occ_lpgs. */
1087c478bd9Sstevel@tonic-gate kmutex_t	pmem_mutex;
1097c478bd9Sstevel@tonic-gate 
1107c478bd9Sstevel@tonic-gate static int lpg_isfree(pmem_lpg_t *);
1117c478bd9Sstevel@tonic-gate static void pmem_lpg_sub(pmem_lpg_t **, pmem_lpg_t *);
1127c478bd9Sstevel@tonic-gate static void pmem_lpg_concat(pmem_lpg_t **, pmem_lpg_t **);
1137c478bd9Sstevel@tonic-gate static pmem_lpg_t *pmem_lpg_get(pmem_lpg_t *, page_t *, pmem_lpg_t **);
1147c478bd9Sstevel@tonic-gate static pmem_lpg_t *pmem_lpg_alloc(uint_t);
1157c478bd9Sstevel@tonic-gate static void pmem_lpg_free(pmem_lpg_t **, pmem_lpg_t *);
1167c478bd9Sstevel@tonic-gate static void lpg_free(page_t *spp);
1177c478bd9Sstevel@tonic-gate static pgcnt_t mpool_break(page_t **, pgcnt_t);
1187c478bd9Sstevel@tonic-gate static void mpool_append(page_t **, pgcnt_t);
1197c478bd9Sstevel@tonic-gate static void lpp_break(page_t **, pgcnt_t, pgcnt_t, pmem_lpg_t *);
1207c478bd9Sstevel@tonic-gate static void lpp_free(page_t *, pgcnt_t, pmem_lpg_t **);
1217c478bd9Sstevel@tonic-gate static int lpp_create(page_t **, pgcnt_t, pgcnt_t *, pmem_lpg_t **,
1227c478bd9Sstevel@tonic-gate     vnode_t *, u_offset_t *, uint_t);
1237c478bd9Sstevel@tonic-gate static void tlist_in(page_t *, pgcnt_t, vnode_t *, u_offset_t *);
1247c478bd9Sstevel@tonic-gate static void tlist_out(page_t *, pgcnt_t);
1257c478bd9Sstevel@tonic-gate static int pmem_cookie_alloc(struct devmap_pmem_cookie **, pgcnt_t, uint_t);
126c6939658Ssl static int pmem_lock(pgcnt_t, proc_t *p);
1277c478bd9Sstevel@tonic-gate 
1287c478bd9Sstevel@tonic-gate /*
1297c478bd9Sstevel@tonic-gate  * Called by driver devmap routine to pass physical memory mapping info to
1307c478bd9Sstevel@tonic-gate  * seg_dev framework, used only for physical memory allocated from
1317c478bd9Sstevel@tonic-gate  * devmap_pmem_alloc().
1327c478bd9Sstevel@tonic-gate  */
1337c478bd9Sstevel@tonic-gate /* ARGSUSED */
1347c478bd9Sstevel@tonic-gate int
devmap_pmem_setup(devmap_cookie_t dhc,dev_info_t * dip,struct devmap_callback_ctl * callbackops,devmap_pmem_cookie_t cookie,offset_t off,size_t len,uint_t maxprot,uint_t flags,ddi_device_acc_attr_t * accattrp)1357c478bd9Sstevel@tonic-gate devmap_pmem_setup(devmap_cookie_t dhc, dev_info_t *dip,
1367c478bd9Sstevel@tonic-gate     struct devmap_callback_ctl *callbackops, devmap_pmem_cookie_t cookie,
1377c478bd9Sstevel@tonic-gate     offset_t off, size_t len, uint_t maxprot, uint_t flags,
1387c478bd9Sstevel@tonic-gate     ddi_device_acc_attr_t *accattrp)
1397c478bd9Sstevel@tonic-gate {
1407c478bd9Sstevel@tonic-gate 	devmap_handle_t *dhp = (devmap_handle_t *)dhc;
1417c478bd9Sstevel@tonic-gate 	struct devmap_pmem_cookie *pcp = (struct devmap_pmem_cookie *)cookie;
1427b93957cSeota 	uint_t cache_attr = IOMEM_CACHE_ATTR(flags);
1437c478bd9Sstevel@tonic-gate 
1447c478bd9Sstevel@tonic-gate 	if (pcp == NULL || (off + len) > ptob(pcp->dp_npages))
1457c478bd9Sstevel@tonic-gate 		return (DDI_FAILURE);
1467c478bd9Sstevel@tonic-gate 
1477c478bd9Sstevel@tonic-gate 	/*
1487c478bd9Sstevel@tonic-gate 	 * First to check if this function has been called for this dhp.
1497c478bd9Sstevel@tonic-gate 	 */
1507c478bd9Sstevel@tonic-gate 	if (dhp->dh_flags & DEVMAP_SETUP_DONE)
1517c478bd9Sstevel@tonic-gate 		return (DDI_FAILURE);
1527c478bd9Sstevel@tonic-gate 
1537c478bd9Sstevel@tonic-gate 	if ((dhp->dh_prot & dhp->dh_orig_maxprot & maxprot) != dhp->dh_prot)
1547c478bd9Sstevel@tonic-gate 		return (DDI_FAILURE);
1557c478bd9Sstevel@tonic-gate 
1567b93957cSeota 	/*
1577b93957cSeota 	 * Check if the cache attributes are supported. Need to pay
1587b93957cSeota 	 * attention that only uncachable or write-combining is
1597b93957cSeota 	 * permitted for pmem.
1607b93957cSeota 	 */
1617b93957cSeota 	if (i_ddi_check_cache_attr(flags) == B_FALSE ||
1627b93957cSeota 	    (cache_attr & (IOMEM_DATA_UNCACHED|IOMEM_DATA_UC_WR_COMBINE)) == 0)
1637b93957cSeota 		return (DDI_FAILURE);
1647b93957cSeota 
1657c478bd9Sstevel@tonic-gate 	if (flags & DEVMAP_MAPPING_INVALID) {
1667c478bd9Sstevel@tonic-gate 		/*
1677c478bd9Sstevel@tonic-gate 		 * If DEVMAP_MAPPING_INVALID is specified, we have to grant
1687c478bd9Sstevel@tonic-gate 		 * remap permission.
1697c478bd9Sstevel@tonic-gate 		 */
1707c478bd9Sstevel@tonic-gate 		if (!(flags & DEVMAP_ALLOW_REMAP))
1717c478bd9Sstevel@tonic-gate 			return (DDI_FAILURE);
1727c478bd9Sstevel@tonic-gate 	} else {
1737c478bd9Sstevel@tonic-gate 		dhp->dh_pcookie = (devmap_pmem_cookie_t)pcp;
1747c478bd9Sstevel@tonic-gate 		/* dh_roff is the offset inside the dh_pcookie. */
1757c478bd9Sstevel@tonic-gate 		dhp->dh_roff = ptob(btop(off));
1767b93957cSeota 		/* Set the cache attributes correctly */
1777b93957cSeota 		i_ddi_cacheattr_to_hatacc(cache_attr, &dhp->dh_hat_attr);
1787c478bd9Sstevel@tonic-gate 	}
1797c478bd9Sstevel@tonic-gate 
1807c478bd9Sstevel@tonic-gate 	dhp->dh_cookie = DEVMAP_PMEM_COOKIE;
1817c478bd9Sstevel@tonic-gate 	dhp->dh_flags |= (flags & DEVMAP_SETUP_FLAGS);
1827c478bd9Sstevel@tonic-gate 	dhp->dh_len = ptob(btopr(len));
1837c478bd9Sstevel@tonic-gate 
1847c478bd9Sstevel@tonic-gate 	dhp->dh_maxprot = maxprot & dhp->dh_orig_maxprot;
1857c478bd9Sstevel@tonic-gate 	ASSERT((dhp->dh_prot & dhp->dh_orig_maxprot & maxprot) == dhp->dh_prot);
1867c478bd9Sstevel@tonic-gate 
1877c478bd9Sstevel@tonic-gate 	if (callbackops != NULL) {
1887c478bd9Sstevel@tonic-gate 		bcopy(callbackops, &dhp->dh_callbackops,
1897c478bd9Sstevel@tonic-gate 		    sizeof (struct devmap_callback_ctl));
1907c478bd9Sstevel@tonic-gate 	}
1917c478bd9Sstevel@tonic-gate 
1927c478bd9Sstevel@tonic-gate 	/*
1937c478bd9Sstevel@tonic-gate 	 * Initialize dh_lock if we want to do remap.
1947c478bd9Sstevel@tonic-gate 	 */
1957c478bd9Sstevel@tonic-gate 	if (dhp->dh_flags & DEVMAP_ALLOW_REMAP) {
1967c478bd9Sstevel@tonic-gate 		mutex_init(&dhp->dh_lock, NULL, MUTEX_DEFAULT, NULL);
1977c478bd9Sstevel@tonic-gate 		dhp->dh_flags |= DEVMAP_LOCK_INITED;
1987c478bd9Sstevel@tonic-gate 	}
1997c478bd9Sstevel@tonic-gate 
2007c478bd9Sstevel@tonic-gate 	dhp->dh_flags |= DEVMAP_SETUP_DONE;
2017c478bd9Sstevel@tonic-gate 
2027c478bd9Sstevel@tonic-gate 	return (DDI_SUCCESS);
2037c478bd9Sstevel@tonic-gate }
2047c478bd9Sstevel@tonic-gate 
2057c478bd9Sstevel@tonic-gate /*
2067c478bd9Sstevel@tonic-gate  * Replace existing mapping using a new cookie, mainly gets called when doing
2077c478bd9Sstevel@tonic-gate  * fork(). Should be called in associated devmap_dup(9E).
2087c478bd9Sstevel@tonic-gate  */
2097c478bd9Sstevel@tonic-gate /* ARGSUSED */
2107c478bd9Sstevel@tonic-gate int
devmap_pmem_remap(devmap_cookie_t dhc,dev_info_t * dip,devmap_pmem_cookie_t cookie,offset_t off,size_t len,uint_t maxprot,uint_t flags,ddi_device_acc_attr_t * accattrp)2117c478bd9Sstevel@tonic-gate devmap_pmem_remap(devmap_cookie_t dhc, dev_info_t *dip,
2127c478bd9Sstevel@tonic-gate     devmap_pmem_cookie_t cookie, offset_t off, size_t len, uint_t maxprot,
2137c478bd9Sstevel@tonic-gate     uint_t flags, ddi_device_acc_attr_t *accattrp)
2147c478bd9Sstevel@tonic-gate {
2157c478bd9Sstevel@tonic-gate 	devmap_handle_t *dhp = (devmap_handle_t *)dhc;
2167c478bd9Sstevel@tonic-gate 	struct devmap_pmem_cookie *pcp = (struct devmap_pmem_cookie *)cookie;
2177b93957cSeota 	uint_t cache_attr = IOMEM_CACHE_ATTR(flags);
2187c478bd9Sstevel@tonic-gate 
2197c478bd9Sstevel@tonic-gate 	/*
2207c478bd9Sstevel@tonic-gate 	 * Reture failure if setup has not been done or no remap permission
2217c478bd9Sstevel@tonic-gate 	 * has been granted during the setup.
2227c478bd9Sstevel@tonic-gate 	 */
2237c478bd9Sstevel@tonic-gate 	if ((dhp->dh_flags & DEVMAP_SETUP_DONE) == 0 ||
2247c478bd9Sstevel@tonic-gate 	    (dhp->dh_flags & DEVMAP_ALLOW_REMAP) == 0)
2257c478bd9Sstevel@tonic-gate 		return (DDI_FAILURE);
2267c478bd9Sstevel@tonic-gate 
2277c478bd9Sstevel@tonic-gate 	/* No flags supported for remap yet. */
2287c478bd9Sstevel@tonic-gate 	if (flags != 0)
2297c478bd9Sstevel@tonic-gate 		return (DDI_FAILURE);
2307c478bd9Sstevel@tonic-gate 
2317c478bd9Sstevel@tonic-gate 	if ((dhp->dh_prot & dhp->dh_orig_maxprot & maxprot) != dhp->dh_prot)
2327c478bd9Sstevel@tonic-gate 		return (DDI_FAILURE);
2337c478bd9Sstevel@tonic-gate 
2347c478bd9Sstevel@tonic-gate 	if (pcp == NULL || (off + len) > ptob(pcp->dp_npages))
2357c478bd9Sstevel@tonic-gate 		return (DDI_FAILURE);
2367c478bd9Sstevel@tonic-gate 
2377b93957cSeota 	/*
2387b93957cSeota 	 * Check if the cache attributes are supported. Need to pay
2397b93957cSeota 	 * attention that only uncachable or write-combining is
2407b93957cSeota 	 * permitted for pmem.
2417b93957cSeota 	 */
2427b93957cSeota 	if (i_ddi_check_cache_attr(flags) == B_FALSE ||
2437b93957cSeota 	    (cache_attr & (IOMEM_DATA_UNCACHED|IOMEM_DATA_UC_WR_COMBINE)) == 0)
2447b93957cSeota 		return (DDI_FAILURE);
2457b93957cSeota 
2467c478bd9Sstevel@tonic-gate 	HOLD_DHP_LOCK(dhp);
2477c478bd9Sstevel@tonic-gate 	/*
2487c478bd9Sstevel@tonic-gate 	 * Unload the old mapping of pages reloated with this dhp, so next
2497c478bd9Sstevel@tonic-gate 	 * fault will setup the new mappings. It is in segdev_faultpage that
2507c478bd9Sstevel@tonic-gate 	 * calls hat_devload to establish the mapping. Do this while holding
2517c478bd9Sstevel@tonic-gate 	 * the dhp lock so other faults dont reestablish the mappings.
2527c478bd9Sstevel@tonic-gate 	 */
2537c478bd9Sstevel@tonic-gate 	hat_unload(dhp->dh_seg->s_as->a_hat, dhp->dh_uvaddr,
2547c478bd9Sstevel@tonic-gate 	    dhp->dh_len, HAT_UNLOAD|HAT_UNLOAD_OTHER);
2557c478bd9Sstevel@tonic-gate 
2567b93957cSeota 	/* Set the cache attributes correctly */
2577b93957cSeota 	i_ddi_cacheattr_to_hatacc(cache_attr, &dhp->dh_hat_attr);
2587b93957cSeota 
2597c478bd9Sstevel@tonic-gate 	dhp->dh_pcookie = cookie;
2607c478bd9Sstevel@tonic-gate 	dhp->dh_roff = ptob(btop(off));
2617c478bd9Sstevel@tonic-gate 	dhp->dh_len = ptob(btopr(len));
2627c478bd9Sstevel@tonic-gate 
2637c478bd9Sstevel@tonic-gate 	/* Clear the large page size flag. */
2647c478bd9Sstevel@tonic-gate 	dhp->dh_flags &= ~DEVMAP_FLAG_LARGE;
2657c478bd9Sstevel@tonic-gate 
2667c478bd9Sstevel@tonic-gate 	dhp->dh_maxprot = maxprot & dhp->dh_orig_maxprot;
2677c478bd9Sstevel@tonic-gate 	ASSERT((dhp->dh_prot & dhp->dh_orig_maxprot & maxprot) == dhp->dh_prot);
2687c478bd9Sstevel@tonic-gate 	RELE_DHP_LOCK(dhp);
2697c478bd9Sstevel@tonic-gate 	return (DDI_SUCCESS);
2707c478bd9Sstevel@tonic-gate }
2717c478bd9Sstevel@tonic-gate 
2727c478bd9Sstevel@tonic-gate /*
2737c478bd9Sstevel@tonic-gate  * Directly (i.e., without occupying kernel virtual address space) allocate
2747c478bd9Sstevel@tonic-gate  * 'npages' physical memory pages for exporting to user land. The allocated
2757c478bd9Sstevel@tonic-gate  * page_t pointer will be recorded in cookie.
2767c478bd9Sstevel@tonic-gate  */
2777c478bd9Sstevel@tonic-gate int
devmap_pmem_alloc(size_t size,uint_t flags,devmap_pmem_cookie_t * cookiep)2787c478bd9Sstevel@tonic-gate devmap_pmem_alloc(size_t size, uint_t flags, devmap_pmem_cookie_t *cookiep)
2797c478bd9Sstevel@tonic-gate {
2807c478bd9Sstevel@tonic-gate 	u_offset_t	pmem_off = 0;
2817c478bd9Sstevel@tonic-gate 	page_t		*pp = NULL;
2827c478bd9Sstevel@tonic-gate 	page_t		*lpp = NULL;
2837c478bd9Sstevel@tonic-gate 	page_t		*tlist = NULL;
2847c478bd9Sstevel@tonic-gate 	pgcnt_t		i = 0;
2857c478bd9Sstevel@tonic-gate 	pgcnt_t		rpages = 0;
2867c478bd9Sstevel@tonic-gate 	pgcnt_t		lpages = 0;
2877c478bd9Sstevel@tonic-gate 	pgcnt_t		tpages = 0;
2887c478bd9Sstevel@tonic-gate 	pgcnt_t		npages = btopr(size);
2897c478bd9Sstevel@tonic-gate 	pmem_lpg_t	*plp = NULL;
2907c478bd9Sstevel@tonic-gate 	struct devmap_pmem_cookie	*pcp;
2917c478bd9Sstevel@tonic-gate 	uint_t		reserved = 0;
2927c478bd9Sstevel@tonic-gate 	uint_t		locked = 0;
2937c478bd9Sstevel@tonic-gate 	uint_t		pflags, kflags;
2947c478bd9Sstevel@tonic-gate 
2957c478bd9Sstevel@tonic-gate 	*cookiep = NULL;
2967c478bd9Sstevel@tonic-gate 
2977c478bd9Sstevel@tonic-gate 	/*
2987c478bd9Sstevel@tonic-gate 	 * Number larger than this will cause page_create_va() to loop
2997c478bd9Sstevel@tonic-gate 	 * infinitely.
3007c478bd9Sstevel@tonic-gate 	 */
3017c478bd9Sstevel@tonic-gate 	if (npages == 0 || npages >= total_pages / 2)
3027c478bd9Sstevel@tonic-gate 		return (DDI_FAILURE);
3037c478bd9Sstevel@tonic-gate 	if ((flags & (PMEM_SLEEP | PMEM_NOSLEEP)) == 0)
3047c478bd9Sstevel@tonic-gate 		return (DDI_FAILURE);
3057c478bd9Sstevel@tonic-gate 	pflags = flags & PMEM_NOSLEEP ? PG_EXCL : PG_WAIT;
3067c478bd9Sstevel@tonic-gate 	kflags = flags & PMEM_NOSLEEP ? KM_NOSLEEP : KM_SLEEP;
3077c478bd9Sstevel@tonic-gate 
3087c478bd9Sstevel@tonic-gate 	/* Allocate pmem cookie. */
3097c478bd9Sstevel@tonic-gate 	if (pmem_cookie_alloc(&pcp, npages, kflags) == DDI_FAILURE)
3107c478bd9Sstevel@tonic-gate 		return (DDI_FAILURE);
3117c478bd9Sstevel@tonic-gate 	pcp->dp_npages = npages;
3127c478bd9Sstevel@tonic-gate 
3137c478bd9Sstevel@tonic-gate 	/*
314c6939658Ssl 	 * See if the requested memory can be locked.
3157c478bd9Sstevel@tonic-gate 	 */
316c6939658Ssl 	pcp->dp_proc = curproc;
317c6939658Ssl 	if (pmem_lock(npages, curproc) == DDI_FAILURE)
3187c478bd9Sstevel@tonic-gate 		goto alloc_fail;
3197c478bd9Sstevel@tonic-gate 	locked = 1;
3207c478bd9Sstevel@tonic-gate 	/*
3217c478bd9Sstevel@tonic-gate 	 * First, grab as many as possible from pmem_mpool. If pages in
3227c478bd9Sstevel@tonic-gate 	 * pmem_mpool are enough for this request, we are done.
3237c478bd9Sstevel@tonic-gate 	 */
3247c478bd9Sstevel@tonic-gate 	mutex_enter(&pmem_mutex);
3257c478bd9Sstevel@tonic-gate 	tpages = mpool_break(&tlist, npages);
3267c478bd9Sstevel@tonic-gate 	/* IOlock and hashin them into the new offset. */
3277c478bd9Sstevel@tonic-gate 	if (tpages)
3287c478bd9Sstevel@tonic-gate 		tlist_in(tlist, tpages, pcp->dp_vnp, &pmem_off);
3297c478bd9Sstevel@tonic-gate 	mutex_exit(&pmem_mutex);
3307c478bd9Sstevel@tonic-gate 
3317c478bd9Sstevel@tonic-gate 	if (tpages == npages)
3327c478bd9Sstevel@tonic-gate 		goto done;
3337c478bd9Sstevel@tonic-gate 
3347c478bd9Sstevel@tonic-gate 	rpages = npages - tpages;
3357c478bd9Sstevel@tonic-gate 	/* Quit now if memory cannot be reserved. */
3367c478bd9Sstevel@tonic-gate 	if (!page_resv(rpages, kflags))
3377c478bd9Sstevel@tonic-gate 		goto alloc_fail;
3387c478bd9Sstevel@tonic-gate 	reserved = 1;
3397c478bd9Sstevel@tonic-gate 
340*733cdf20Smrj 	/* If we have large pages */
341*733cdf20Smrj 	if (pmem_lpgsize > PAGESIZE) {
342*733cdf20Smrj 		/* Try to alloc large pages first to decrease fragmentation. */
343*733cdf20Smrj 		i = (rpages + (pmem_pgcnt - 1)) / pmem_pgcnt;
344*733cdf20Smrj 		if (lpp_create(&lpp, i, &lpages, &plp, pcp->dp_vnp, &pmem_off,
345*733cdf20Smrj 		    kflags) == DDI_FAILURE)
346*733cdf20Smrj 			goto alloc_fail;
347*733cdf20Smrj 		ASSERT(lpages == 0 ? lpp == NULL : 1);
348*733cdf20Smrj 	}
3497c478bd9Sstevel@tonic-gate 
3507c478bd9Sstevel@tonic-gate 	/*
3517c478bd9Sstevel@tonic-gate 	 * Pages in large pages is more than the request, put the residual
3527c478bd9Sstevel@tonic-gate 	 * pages into pmem_mpool.
3537c478bd9Sstevel@tonic-gate 	 */
3547c478bd9Sstevel@tonic-gate 	if (lpages >= rpages) {
3557c478bd9Sstevel@tonic-gate 		lpp_break(&lpp, lpages, lpages - rpages, plp);
3567c478bd9Sstevel@tonic-gate 		goto done;
3577c478bd9Sstevel@tonic-gate 	}
3587c478bd9Sstevel@tonic-gate 
3597c478bd9Sstevel@tonic-gate 	/* Allocate small pages if lpp+tlist cannot satisfy the request. */
3607c478bd9Sstevel@tonic-gate 	i =  rpages - lpages;
3617c478bd9Sstevel@tonic-gate 	if ((pp = page_create_va(pcp->dp_vnp, pmem_off, ptob(i),
36202bbca18Sms 	    pflags, &pmem_seg, (caddr_t)(uintptr_t)pmem_off)) == NULL)
3637c478bd9Sstevel@tonic-gate 		goto alloc_fail;
3647c478bd9Sstevel@tonic-gate 
3657c478bd9Sstevel@tonic-gate done:
3667c478bd9Sstevel@tonic-gate 	page_list_concat(&tlist, &lpp);
3677c478bd9Sstevel@tonic-gate 	page_list_concat(&tlist, &pp);
3687c478bd9Sstevel@tonic-gate 	/* Set those small pages from large pages as allocated. */
3697c478bd9Sstevel@tonic-gate 	mutex_enter(&pmem_mutex);
3707c478bd9Sstevel@tonic-gate 	pmem_lpg_concat(&pmem_occ_lpgs, &plp);
3717c478bd9Sstevel@tonic-gate 	mutex_exit(&pmem_mutex);
3727c478bd9Sstevel@tonic-gate 
3737c478bd9Sstevel@tonic-gate 	/*
3747c478bd9Sstevel@tonic-gate 	 * Now tlist holds all the pages for this cookie. Record these pages in
3757c478bd9Sstevel@tonic-gate 	 * pmem cookie.
3767c478bd9Sstevel@tonic-gate 	 */
3777c478bd9Sstevel@tonic-gate 	for (pp = tlist, i = 0; i < npages; i++) {
3787c478bd9Sstevel@tonic-gate 		pcp->dp_pparray[i] = pp;
3797c478bd9Sstevel@tonic-gate 		page_io_unlock(pp);
3807c478bd9Sstevel@tonic-gate 		pp = pp->p_next;
3817c478bd9Sstevel@tonic-gate 		page_sub(&tlist, pp->p_prev);
3827c478bd9Sstevel@tonic-gate 	}
3837c478bd9Sstevel@tonic-gate 	ASSERT(tlist == NULL);
3847c478bd9Sstevel@tonic-gate 	*cookiep = (devmap_pmem_cookie_t)pcp;
3857c478bd9Sstevel@tonic-gate 
3867c478bd9Sstevel@tonic-gate 	return (DDI_SUCCESS);
3877c478bd9Sstevel@tonic-gate 
3887c478bd9Sstevel@tonic-gate alloc_fail:
3897c478bd9Sstevel@tonic-gate 	DTRACE_PROBE(pmem__alloc__fail);
3907c478bd9Sstevel@tonic-gate 	/* Free large pages and the associated allocation records. */
3917c478bd9Sstevel@tonic-gate 	if (lpp)
3927c478bd9Sstevel@tonic-gate 		lpp_free(lpp, lpages / pmem_pgcnt, &plp);
3937c478bd9Sstevel@tonic-gate 	if (reserved == 1)
3947c478bd9Sstevel@tonic-gate 		page_unresv(rpages);
3957c478bd9Sstevel@tonic-gate 	/* Put those pages in tlist back into pmem_mpool. */
3967c478bd9Sstevel@tonic-gate 	if (tpages != 0) {
3977c478bd9Sstevel@tonic-gate 		mutex_enter(&pmem_mutex);
3987c478bd9Sstevel@tonic-gate 		/* IOunlock, hashout and update the allocation records. */
3997c478bd9Sstevel@tonic-gate 		tlist_out(tlist, tpages);
4007c478bd9Sstevel@tonic-gate 		mpool_append(&tlist, tpages);
4017c478bd9Sstevel@tonic-gate 		mutex_exit(&pmem_mutex);
4027c478bd9Sstevel@tonic-gate 	}
4037c478bd9Sstevel@tonic-gate 	if (locked == 1)
404c6939658Ssl 		i_ddi_decr_locked_memory(pcp->dp_proc, ptob(pcp->dp_npages));
4057c478bd9Sstevel@tonic-gate 	/* Freeing pmem_cookie. */
4067c478bd9Sstevel@tonic-gate 	kmem_free(pcp->dp_vnp, sizeof (vnode_t));
4077c478bd9Sstevel@tonic-gate 	kmem_free(pcp->dp_pparray, npages * sizeof (page_t *));
4087c478bd9Sstevel@tonic-gate 	kmem_free(pcp, sizeof (struct devmap_pmem_cookie));
4097c478bd9Sstevel@tonic-gate 	return (DDI_FAILURE);
4107c478bd9Sstevel@tonic-gate }
4117c478bd9Sstevel@tonic-gate 
4127c478bd9Sstevel@tonic-gate /*
4137c478bd9Sstevel@tonic-gate  * Free all small pages inside cookie, and return pages from large pages into
4147c478bd9Sstevel@tonic-gate  * mpool, if all the pages from one large page is in mpool, free it as a whole.
4157c478bd9Sstevel@tonic-gate  */
4167c478bd9Sstevel@tonic-gate void
devmap_pmem_free(devmap_pmem_cookie_t cookie)4177c478bd9Sstevel@tonic-gate devmap_pmem_free(devmap_pmem_cookie_t cookie)
4187c478bd9Sstevel@tonic-gate {
4197c478bd9Sstevel@tonic-gate 	struct	devmap_pmem_cookie *pcp = (struct devmap_pmem_cookie *)cookie;
4207c478bd9Sstevel@tonic-gate 	pgcnt_t		i;
4217c478bd9Sstevel@tonic-gate 	pgcnt_t		tpages = 0;
4227c478bd9Sstevel@tonic-gate 	page_t		*pp;
4237c478bd9Sstevel@tonic-gate 	pmem_lpg_t 	*pl1, *plp;
4247c478bd9Sstevel@tonic-gate 	pmem_lpg_t	*pf_lpgs = NULL;
4257c478bd9Sstevel@tonic-gate 	uint_t		npls = 0;
4267c478bd9Sstevel@tonic-gate 	pmem_lpg_t *last_pl = NULL;
4277c478bd9Sstevel@tonic-gate 	pmem_lpg_t *plast_pl = NULL;
4287c478bd9Sstevel@tonic-gate 
4297c478bd9Sstevel@tonic-gate 	ASSERT(pcp);
4307c478bd9Sstevel@tonic-gate 	mutex_enter(&pmem_mutex);
4317c478bd9Sstevel@tonic-gate 	/* Free small pages and return them to memory pool. */
4327c478bd9Sstevel@tonic-gate 	for (i = pcp->dp_npages; i > 0; i--) {
4337c478bd9Sstevel@tonic-gate 		pp = pcp->dp_pparray[i - 1];
4347c478bd9Sstevel@tonic-gate 		page_hashout(pp, NULL);
4357c478bd9Sstevel@tonic-gate 		/*
4367c478bd9Sstevel@tonic-gate 		 * Remove the mapping of this single page, this mapping is
4377c478bd9Sstevel@tonic-gate 		 * created using hat_devload() in segdev_faultpage().
4387c478bd9Sstevel@tonic-gate 		 */
4397c478bd9Sstevel@tonic-gate 		(void) hat_pageunload(pp, HAT_FORCE_PGUNLOAD);
4407c478bd9Sstevel@tonic-gate 		if (!FROM_LPG(pp)) {
4417c478bd9Sstevel@tonic-gate 			/* Normal small page. */
4427c478bd9Sstevel@tonic-gate 			page_free(pp, 1);
4437c478bd9Sstevel@tonic-gate 			page_unresv(1);
4447c478bd9Sstevel@tonic-gate 		} else {
4457c478bd9Sstevel@tonic-gate 			/* Small page from large pages. */
4467c478bd9Sstevel@tonic-gate 			plp = pmem_lpg_get(pmem_occ_lpgs, pp, &last_pl);
4477c478bd9Sstevel@tonic-gate 			if (plp && !(plp->pl_pfree)) {
4487c478bd9Sstevel@tonic-gate 				/*
4497c478bd9Sstevel@tonic-gate 				 * Move this record to pf_lpgs list, this large
4507c478bd9Sstevel@tonic-gate 				 * page may be able to be freed as a whole.
4517c478bd9Sstevel@tonic-gate 				 */
4527c478bd9Sstevel@tonic-gate 				pmem_lpg_sub(&pmem_occ_lpgs, plp);
4537c478bd9Sstevel@tonic-gate 				pmem_lpg_concat(&pf_lpgs, &plp);
4547c478bd9Sstevel@tonic-gate 				plp->pl_pfree = 1;
4557c478bd9Sstevel@tonic-gate 				npls++;
4567c478bd9Sstevel@tonic-gate 				last_pl = NULL;
4577c478bd9Sstevel@tonic-gate 			} else {
4587c478bd9Sstevel@tonic-gate 				/* Search in pf_lpgs list. */
4597c478bd9Sstevel@tonic-gate 				plp = pmem_lpg_get(pf_lpgs, pp, &plast_pl);
4607c478bd9Sstevel@tonic-gate 			}
4617c478bd9Sstevel@tonic-gate 			ASSERT(plp);
4627c478bd9Sstevel@tonic-gate 			/* Mark this page as free. */
4637c478bd9Sstevel@tonic-gate 			BT_SET(plp->pl_bitmap, PFIND(pp));
4647c478bd9Sstevel@tonic-gate 			/* Record this page in pmem_mpool. */
4657c478bd9Sstevel@tonic-gate 			mpool_append(&pp, 1);
4667c478bd9Sstevel@tonic-gate 		}
4677c478bd9Sstevel@tonic-gate 	}
4687c478bd9Sstevel@tonic-gate 
4697c478bd9Sstevel@tonic-gate 	/*
4707c478bd9Sstevel@tonic-gate 	 * Find out the large pages whose pages have been freed, remove them
4717c478bd9Sstevel@tonic-gate 	 * from plp list, free them and the associated pmem_lpg struct.
4727c478bd9Sstevel@tonic-gate 	 */
4737c478bd9Sstevel@tonic-gate 	for (plp = pf_lpgs; npls != 0; npls--) {
4747c478bd9Sstevel@tonic-gate 		pl1 = plp;
4757c478bd9Sstevel@tonic-gate 		plp = plp->pl_next;
4767c478bd9Sstevel@tonic-gate 		if (lpg_isfree(pl1)) {
4777c478bd9Sstevel@tonic-gate 			/*
4787c478bd9Sstevel@tonic-gate 			 * Get one free large page.  Find all pages in this
4797c478bd9Sstevel@tonic-gate 			 * large page and remove them from pmem_mpool.
4807c478bd9Sstevel@tonic-gate 			 */
4817c478bd9Sstevel@tonic-gate 			lpg_free(pl1->pl_pp);
4827c478bd9Sstevel@tonic-gate 			/* Remove associated allocation records. */
4837c478bd9Sstevel@tonic-gate 			pmem_lpg_sub(&pf_lpgs, pl1);
4847c478bd9Sstevel@tonic-gate 			pmem_lpg_free(&pf_lpgs, pl1);
4857c478bd9Sstevel@tonic-gate 			tpages -= pmem_pgcnt;
4867c478bd9Sstevel@tonic-gate 		} else
4877c478bd9Sstevel@tonic-gate 			pl1->pl_pfree = 0;
4887c478bd9Sstevel@tonic-gate 	}
4897c478bd9Sstevel@tonic-gate 	/* Update allocation records accordingly. */
4907c478bd9Sstevel@tonic-gate 	pmem_lpg_concat(&pmem_occ_lpgs, &pf_lpgs);
4917c478bd9Sstevel@tonic-gate 	mutex_exit(&pmem_mutex);
4927c478bd9Sstevel@tonic-gate 
493c6939658Ssl 	if (curproc == pcp->dp_proc)
494c6939658Ssl 		i_ddi_decr_locked_memory(curproc, ptob(pcp->dp_npages));
4957c478bd9Sstevel@tonic-gate 	kmem_free(pcp->dp_vnp, sizeof (vnode_t));
4967c478bd9Sstevel@tonic-gate 	kmem_free(pcp->dp_pparray, pcp->dp_npages * sizeof (page_t *));
4977c478bd9Sstevel@tonic-gate 	kmem_free(pcp, sizeof (struct devmap_pmem_cookie));
4987c478bd9Sstevel@tonic-gate }
4997c478bd9Sstevel@tonic-gate 
5007c478bd9Sstevel@tonic-gate /*
5017c478bd9Sstevel@tonic-gate  * To extract page frame number from specified range in a cookie.
5027c478bd9Sstevel@tonic-gate  */
5037c478bd9Sstevel@tonic-gate int
devmap_pmem_getpfns(devmap_pmem_cookie_t cookie,uint_t start,pgcnt_t npages,pfn_t * pfnarray)5047c478bd9Sstevel@tonic-gate devmap_pmem_getpfns(devmap_pmem_cookie_t cookie, uint_t start, pgcnt_t npages,
5057c478bd9Sstevel@tonic-gate     pfn_t *pfnarray)
5067c478bd9Sstevel@tonic-gate {
5077c478bd9Sstevel@tonic-gate 	struct devmap_pmem_cookie *pcp = (struct devmap_pmem_cookie *)cookie;
5087c478bd9Sstevel@tonic-gate 	pgcnt_t i;
5097c478bd9Sstevel@tonic-gate 
5107c478bd9Sstevel@tonic-gate 	if (pcp == NULL || start + npages > pcp->dp_npages)
5117c478bd9Sstevel@tonic-gate 		return (DDI_FAILURE);
5127c478bd9Sstevel@tonic-gate 
5137c478bd9Sstevel@tonic-gate 	for (i = start; i < start + npages; i++)
514*733cdf20Smrj 		pfnarray[i - start] = pfn_to_mfn(pcp->dp_pparray[i]->p_pagenum);
515*733cdf20Smrj 
5167c478bd9Sstevel@tonic-gate 	return (DDI_SUCCESS);
5177c478bd9Sstevel@tonic-gate }
5187c478bd9Sstevel@tonic-gate 
5197c478bd9Sstevel@tonic-gate void
pmem_init()5207c478bd9Sstevel@tonic-gate pmem_init()
5217c478bd9Sstevel@tonic-gate {
5227c478bd9Sstevel@tonic-gate 	mutex_init(&pmem_mutex, NULL, MUTEX_DEFAULT, NULL);
5237c478bd9Sstevel@tonic-gate 	pmem_lszc = MIN(1, page_num_pagesizes() - 1);
5247c478bd9Sstevel@tonic-gate 	pmem_lpgsize = page_get_pagesize(pmem_lszc);
5257c478bd9Sstevel@tonic-gate 	pmem_pgcnt = pmem_lpgsize >> PAGESHIFT;
5267c478bd9Sstevel@tonic-gate 	bzero(&pmem_seg, sizeof (struct seg));
5277c478bd9Sstevel@tonic-gate 	pmem_seg.s_as = &kas;
5287c478bd9Sstevel@tonic-gate }
5297c478bd9Sstevel@tonic-gate 
5307c478bd9Sstevel@tonic-gate /* Allocate kernel memory for one pmem cookie with n pages. */
5317c478bd9Sstevel@tonic-gate static int
pmem_cookie_alloc(struct devmap_pmem_cookie ** pcpp,pgcnt_t n,uint_t kflags)5327c478bd9Sstevel@tonic-gate pmem_cookie_alloc(struct devmap_pmem_cookie **pcpp, pgcnt_t n, uint_t kflags)
5337c478bd9Sstevel@tonic-gate {
5347c478bd9Sstevel@tonic-gate 	struct devmap_pmem_cookie *pcp;
5357c478bd9Sstevel@tonic-gate 
5367c478bd9Sstevel@tonic-gate 	if ((*pcpp = kmem_zalloc(sizeof (struct devmap_pmem_cookie),
5377c478bd9Sstevel@tonic-gate 	    kflags)) == NULL)
5387c478bd9Sstevel@tonic-gate 		return (DDI_FAILURE);
5397c478bd9Sstevel@tonic-gate 	pcp = *pcpp;
5407c478bd9Sstevel@tonic-gate 	if ((pcp->dp_vnp =
5417c478bd9Sstevel@tonic-gate 	    kmem_zalloc(sizeof (vnode_t), kflags)) == NULL) {
5427c478bd9Sstevel@tonic-gate 		kmem_free(pcp, sizeof (struct devmap_pmem_cookie));
5437c478bd9Sstevel@tonic-gate 		return (DDI_FAILURE);
5447c478bd9Sstevel@tonic-gate 	}
5457c478bd9Sstevel@tonic-gate 	if ((pcp->dp_pparray =
5467c478bd9Sstevel@tonic-gate 	    kmem_zalloc(n * sizeof (page_t *), kflags)) == NULL) {
5477c478bd9Sstevel@tonic-gate 		kmem_free(pcp->dp_vnp, sizeof (vnode_t));
5487c478bd9Sstevel@tonic-gate 		kmem_free(pcp, sizeof (struct devmap_pmem_cookie));
5497c478bd9Sstevel@tonic-gate 		return (DDI_FAILURE);
5507c478bd9Sstevel@tonic-gate 	}
5517c478bd9Sstevel@tonic-gate 	return (DDI_SUCCESS);
5527c478bd9Sstevel@tonic-gate }
5537c478bd9Sstevel@tonic-gate 
554c6939658Ssl /* Try to lock down n pages resource */
5557c478bd9Sstevel@tonic-gate static int
pmem_lock(pgcnt_t n,proc_t * p)556c6939658Ssl pmem_lock(pgcnt_t n, proc_t *p)
5577c478bd9Sstevel@tonic-gate {
558c6939658Ssl 	if (i_ddi_incr_locked_memory(p, ptob(n)) != 0) {
5597c478bd9Sstevel@tonic-gate 		return (DDI_FAILURE);
5607c478bd9Sstevel@tonic-gate 	}
5617c478bd9Sstevel@tonic-gate 	return (DDI_SUCCESS);
5627c478bd9Sstevel@tonic-gate }
5637c478bd9Sstevel@tonic-gate 
5647c478bd9Sstevel@tonic-gate /* To check if all the pages in a large page are freed. */
5657c478bd9Sstevel@tonic-gate static int
lpg_isfree(pmem_lpg_t * plp)5667c478bd9Sstevel@tonic-gate lpg_isfree(pmem_lpg_t *plp)
5677c478bd9Sstevel@tonic-gate {
5687c478bd9Sstevel@tonic-gate 	uint_t i;
5697c478bd9Sstevel@tonic-gate 
5707c478bd9Sstevel@tonic-gate 	for (i = 0; i < BT_BITOUL(pmem_pgcnt); i++)
5717c478bd9Sstevel@tonic-gate 		if (plp->pl_bitmap[i] != BT_ULMAXMASK)
5727c478bd9Sstevel@tonic-gate 			return (0);
5737c478bd9Sstevel@tonic-gate 	/* All 1 means all pages are freed. */
5747c478bd9Sstevel@tonic-gate 	return (1);
5757c478bd9Sstevel@tonic-gate }
5767c478bd9Sstevel@tonic-gate 
5777c478bd9Sstevel@tonic-gate /*
5787c478bd9Sstevel@tonic-gate  * Using pp to get the associated large page allocation record, searching in
5797c478bd9Sstevel@tonic-gate  * the splp linked list with *last as the heuristic pointer. Return NULL if
5807c478bd9Sstevel@tonic-gate  * not found.
5817c478bd9Sstevel@tonic-gate  */
5827c478bd9Sstevel@tonic-gate static pmem_lpg_t *
pmem_lpg_get(pmem_lpg_t * splp,page_t * pp,pmem_lpg_t ** last)5837c478bd9Sstevel@tonic-gate pmem_lpg_get(pmem_lpg_t *splp, page_t *pp, pmem_lpg_t **last)
5847c478bd9Sstevel@tonic-gate {
5857c478bd9Sstevel@tonic-gate 	pmem_lpg_t *plp;
5867c478bd9Sstevel@tonic-gate 	pgcnt_t root_pfn;
5877c478bd9Sstevel@tonic-gate 
5887c478bd9Sstevel@tonic-gate 	ASSERT(pp);
5897c478bd9Sstevel@tonic-gate 	if (splp == NULL)
5907c478bd9Sstevel@tonic-gate 		return (NULL);
5917c478bd9Sstevel@tonic-gate 	root_pfn = page_pptonum(pp) & ~(pmem_pgcnt - 1);
5927c478bd9Sstevel@tonic-gate 
5937c478bd9Sstevel@tonic-gate 	/* Try last winner first. */
5947c478bd9Sstevel@tonic-gate 	if (*last && root_pfn == page_pptonum((*last)->pl_pp))
5957c478bd9Sstevel@tonic-gate 		goto pl_found;
5967c478bd9Sstevel@tonic-gate 
5977c478bd9Sstevel@tonic-gate 	/* Else search the whole pmem_lpg list. */
5987c478bd9Sstevel@tonic-gate 	for (plp = splp; root_pfn != page_pptonum(plp->pl_pp); ) {
5997c478bd9Sstevel@tonic-gate 		plp = plp->pl_next;
6007c478bd9Sstevel@tonic-gate 		if (plp == splp) {
6017c478bd9Sstevel@tonic-gate 			plp = NULL;
6027c478bd9Sstevel@tonic-gate 			break;
6037c478bd9Sstevel@tonic-gate 		}
6047c478bd9Sstevel@tonic-gate 		ASSERT(plp->pl_pp);
6057c478bd9Sstevel@tonic-gate 	}
6067c478bd9Sstevel@tonic-gate 
6077c478bd9Sstevel@tonic-gate 	*last = plp;
6087c478bd9Sstevel@tonic-gate 
6097c478bd9Sstevel@tonic-gate pl_found:
6107c478bd9Sstevel@tonic-gate 	return (*last);
6117c478bd9Sstevel@tonic-gate }
6127c478bd9Sstevel@tonic-gate 
6137c478bd9Sstevel@tonic-gate /*
6147c478bd9Sstevel@tonic-gate  *  Remove one pmem_lpg plp from the oplpp list.
6157c478bd9Sstevel@tonic-gate  */
6167c478bd9Sstevel@tonic-gate static void
pmem_lpg_sub(pmem_lpg_t ** oplpp,pmem_lpg_t * plp)6177c478bd9Sstevel@tonic-gate pmem_lpg_sub(pmem_lpg_t **oplpp, pmem_lpg_t *plp)
6187c478bd9Sstevel@tonic-gate {
6197c478bd9Sstevel@tonic-gate 	if (*oplpp == plp)
6207c478bd9Sstevel@tonic-gate 		*oplpp = plp->pl_next;		/* go to next pmem_lpg */
6217c478bd9Sstevel@tonic-gate 
6227c478bd9Sstevel@tonic-gate 	if (*oplpp == plp)
6237c478bd9Sstevel@tonic-gate 		*oplpp = NULL;			/* pmem_lpg list is gone */
6247c478bd9Sstevel@tonic-gate 	else {
6257c478bd9Sstevel@tonic-gate 		plp->pl_prev->pl_next = plp->pl_next;
6267c478bd9Sstevel@tonic-gate 		plp->pl_next->pl_prev = plp->pl_prev;
6277c478bd9Sstevel@tonic-gate 	}
6287c478bd9Sstevel@tonic-gate 	plp->pl_prev = plp->pl_next = plp;	/* make plp a list of one */
6297c478bd9Sstevel@tonic-gate }
6307c478bd9Sstevel@tonic-gate 
6317c478bd9Sstevel@tonic-gate /*
6327c478bd9Sstevel@tonic-gate  * Concatenate page list nplpp onto the end of list plpp.
6337c478bd9Sstevel@tonic-gate  */
6347c478bd9Sstevel@tonic-gate static void
pmem_lpg_concat(pmem_lpg_t ** plpp,pmem_lpg_t ** nplpp)6357c478bd9Sstevel@tonic-gate pmem_lpg_concat(pmem_lpg_t **plpp, pmem_lpg_t **nplpp)
6367c478bd9Sstevel@tonic-gate {
6377c478bd9Sstevel@tonic-gate 	pmem_lpg_t *s1p, *s2p, *e1p, *e2p;
6387c478bd9Sstevel@tonic-gate 
6397c478bd9Sstevel@tonic-gate 	if (*nplpp == NULL) {
6407c478bd9Sstevel@tonic-gate 		return;
6417c478bd9Sstevel@tonic-gate 	}
6427c478bd9Sstevel@tonic-gate 	if (*plpp == NULL) {
6437c478bd9Sstevel@tonic-gate 		*plpp = *nplpp;
6447c478bd9Sstevel@tonic-gate 		return;
6457c478bd9Sstevel@tonic-gate 	}
6467c478bd9Sstevel@tonic-gate 	s1p = *plpp;
6477c478bd9Sstevel@tonic-gate 	e1p =  s1p->pl_prev;
6487c478bd9Sstevel@tonic-gate 	s2p = *nplpp;
6497c478bd9Sstevel@tonic-gate 	e2p = s2p->pl_prev;
6507c478bd9Sstevel@tonic-gate 	s1p->pl_prev = e2p;
6517c478bd9Sstevel@tonic-gate 	e2p->pl_next = s1p;
6527c478bd9Sstevel@tonic-gate 	e1p->pl_next = s2p;
6537c478bd9Sstevel@tonic-gate 	s2p->pl_prev = e1p;
6547c478bd9Sstevel@tonic-gate }
6557c478bd9Sstevel@tonic-gate 
6567c478bd9Sstevel@tonic-gate /*
6577c478bd9Sstevel@tonic-gate  * Allocate and initialize the allocation record of one large page, the init
6587c478bd9Sstevel@tonic-gate  * value is 'allocated'.
6597c478bd9Sstevel@tonic-gate  */
6607c478bd9Sstevel@tonic-gate static pmem_lpg_t *
pmem_lpg_alloc(uint_t kflags)6617c478bd9Sstevel@tonic-gate pmem_lpg_alloc(uint_t kflags)
6627c478bd9Sstevel@tonic-gate {
6637c478bd9Sstevel@tonic-gate 	pmem_lpg_t *plp;
6647c478bd9Sstevel@tonic-gate 
6657c478bd9Sstevel@tonic-gate 	ASSERT(pmem_pgcnt % BT_NBIPUL == 0);
6667c478bd9Sstevel@tonic-gate 	plp = kmem_zalloc(sizeof (pmem_lpg_t), kflags);
6677c478bd9Sstevel@tonic-gate 	if (plp == NULL)
6687c478bd9Sstevel@tonic-gate 		return (NULL);
6697c478bd9Sstevel@tonic-gate 	plp->pl_bitmap = kmem_zalloc(BT_SIZEOFMAP(pmem_pgcnt), kflags);
6707c478bd9Sstevel@tonic-gate 	if (plp->pl_bitmap == NULL) {
6717c478bd9Sstevel@tonic-gate 		kmem_free(plp, sizeof (*plp));
6727c478bd9Sstevel@tonic-gate 		return (NULL);
6737c478bd9Sstevel@tonic-gate 	}
6747c478bd9Sstevel@tonic-gate 	plp->pl_next = plp->pl_prev = plp;
6757c478bd9Sstevel@tonic-gate 	return (plp);
6767c478bd9Sstevel@tonic-gate }
6777c478bd9Sstevel@tonic-gate 
6787c478bd9Sstevel@tonic-gate /* Free one allocation record pointed by oplp. */
6797c478bd9Sstevel@tonic-gate static void
pmem_lpg_free(pmem_lpg_t ** headp,pmem_lpg_t * plp)6807c478bd9Sstevel@tonic-gate pmem_lpg_free(pmem_lpg_t **headp, pmem_lpg_t *plp)
6817c478bd9Sstevel@tonic-gate {
6827c478bd9Sstevel@tonic-gate 	if (*headp == plp)
6837c478bd9Sstevel@tonic-gate 		*headp = plp->pl_next;		/* go to next pmem_lpg_t */
6847c478bd9Sstevel@tonic-gate 
6857c478bd9Sstevel@tonic-gate 	if (*headp == plp)
6867c478bd9Sstevel@tonic-gate 		*headp = NULL;			/* this list is gone */
6877c478bd9Sstevel@tonic-gate 	else {
6887c478bd9Sstevel@tonic-gate 		plp->pl_prev->pl_next = plp->pl_next;
6897c478bd9Sstevel@tonic-gate 		plp->pl_next->pl_prev = plp->pl_prev;
6907c478bd9Sstevel@tonic-gate 	}
6917c478bd9Sstevel@tonic-gate 	kmem_free(plp->pl_bitmap, BT_SIZEOFMAP(pmem_pgcnt));
6927c478bd9Sstevel@tonic-gate 	kmem_free(plp, sizeof (*plp));
6937c478bd9Sstevel@tonic-gate }
6947c478bd9Sstevel@tonic-gate 
6957c478bd9Sstevel@tonic-gate /* Free one large page headed by spp from pmem_mpool. */
6967c478bd9Sstevel@tonic-gate static void
lpg_free(page_t * spp)6977c478bd9Sstevel@tonic-gate lpg_free(page_t *spp)
6987c478bd9Sstevel@tonic-gate {
6997c478bd9Sstevel@tonic-gate 	page_t *pp1 = spp;
7007c478bd9Sstevel@tonic-gate 	uint_t i;
7017c478bd9Sstevel@tonic-gate 
7027c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&pmem_mutex));
7037c478bd9Sstevel@tonic-gate 	for (i = 0; i < pmem_pgcnt; i++) {
7047c478bd9Sstevel@tonic-gate 		/* Break pp1 from pmem_mpool. */
7057c478bd9Sstevel@tonic-gate 		page_sub(&pmem_mpool, pp1);
7067c478bd9Sstevel@tonic-gate 		pp1++;
7077c478bd9Sstevel@tonic-gate 	}
7087c478bd9Sstevel@tonic-gate 	/* Free pages in this large page. */
7097c478bd9Sstevel@tonic-gate 	page_free_pages(spp);
7107c478bd9Sstevel@tonic-gate 	page_unresv(pmem_pgcnt);
7117c478bd9Sstevel@tonic-gate 	pmem_nmpages -= pmem_pgcnt;
7127c478bd9Sstevel@tonic-gate 	ASSERT((pmem_nmpages && pmem_mpool) || (!pmem_nmpages && !pmem_mpool));
7137c478bd9Sstevel@tonic-gate }
7147c478bd9Sstevel@tonic-gate 
7157c478bd9Sstevel@tonic-gate /* Put n pages in *ppp list back into pmem_mpool. */
7167c478bd9Sstevel@tonic-gate static void
mpool_append(page_t ** ppp,pgcnt_t n)7177c478bd9Sstevel@tonic-gate mpool_append(page_t **ppp, pgcnt_t n)
7187c478bd9Sstevel@tonic-gate {
7197c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&pmem_mutex));
7207c478bd9Sstevel@tonic-gate 	/* Put back pages. */
7217c478bd9Sstevel@tonic-gate 	page_list_concat(&pmem_mpool, ppp);
7227c478bd9Sstevel@tonic-gate 	pmem_nmpages += n;
7237c478bd9Sstevel@tonic-gate 	ASSERT((pmem_nmpages && pmem_mpool) || (!pmem_nmpages && !pmem_mpool));
7247c478bd9Sstevel@tonic-gate }
7257c478bd9Sstevel@tonic-gate 
7267c478bd9Sstevel@tonic-gate /*
7277c478bd9Sstevel@tonic-gate  * Try to grab MIN(pmem_nmpages, n) pages from pmem_mpool, put them into *ppp
7287c478bd9Sstevel@tonic-gate  * list, and return the number of grabbed pages.
7297c478bd9Sstevel@tonic-gate  */
7307c478bd9Sstevel@tonic-gate static pgcnt_t
mpool_break(page_t ** ppp,pgcnt_t n)7317c478bd9Sstevel@tonic-gate mpool_break(page_t **ppp, pgcnt_t n)
7327c478bd9Sstevel@tonic-gate {
7337c478bd9Sstevel@tonic-gate 	pgcnt_t i;
7347c478bd9Sstevel@tonic-gate 
7357c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&pmem_mutex));
7367c478bd9Sstevel@tonic-gate 	/* Grab the pages. */
7377c478bd9Sstevel@tonic-gate 	i = MIN(pmem_nmpages, n);
7387c478bd9Sstevel@tonic-gate 	*ppp = pmem_mpool;
7397c478bd9Sstevel@tonic-gate 	page_list_break(ppp, &pmem_mpool, i);
7407c478bd9Sstevel@tonic-gate 	pmem_nmpages -= i;
7417c478bd9Sstevel@tonic-gate 	ASSERT((pmem_nmpages && pmem_mpool) || (!pmem_nmpages && !pmem_mpool));
7427c478bd9Sstevel@tonic-gate 	return (i);
7437c478bd9Sstevel@tonic-gate }
7447c478bd9Sstevel@tonic-gate 
7457c478bd9Sstevel@tonic-gate /*
7467c478bd9Sstevel@tonic-gate  * Create n large pages, lpages and plpp contains the number of small pages and
7477c478bd9Sstevel@tonic-gate  * allocation records list respectively.
7487c478bd9Sstevel@tonic-gate  */
7497c478bd9Sstevel@tonic-gate static int
lpp_create(page_t ** lppp,pgcnt_t n,pgcnt_t * lpages,pmem_lpg_t ** plpp,vnode_t * vnp,u_offset_t * offp,uint_t kflags)7507c478bd9Sstevel@tonic-gate lpp_create(page_t **lppp, pgcnt_t n, pgcnt_t *lpages, pmem_lpg_t **plpp,
7517c478bd9Sstevel@tonic-gate     vnode_t *vnp, u_offset_t *offp, uint_t kflags)
7527c478bd9Sstevel@tonic-gate {
7537c478bd9Sstevel@tonic-gate 	pgcnt_t i;
7547c478bd9Sstevel@tonic-gate 	pmem_lpg_t *plp;
7557c478bd9Sstevel@tonic-gate 	page_t *pp;
7567c478bd9Sstevel@tonic-gate 
7577c478bd9Sstevel@tonic-gate 	for (i = 0, *lpages = 0; i < n; i++) {
7587c478bd9Sstevel@tonic-gate 		/* Allocte one large page each time. */
7597c478bd9Sstevel@tonic-gate 		pp = page_create_va_large(vnp, *offp, pmem_lpgsize,
76002bbca18Sms 		    PG_EXCL, &pmem_seg, (caddr_t)(uintptr_t)*offp, NULL);
7617c478bd9Sstevel@tonic-gate 		if (pp == NULL)
7627c478bd9Sstevel@tonic-gate 			break;
7637c478bd9Sstevel@tonic-gate 		*offp += pmem_lpgsize;
7647c478bd9Sstevel@tonic-gate 		page_list_concat(lppp, &pp);
7657c478bd9Sstevel@tonic-gate 		*lpages += pmem_pgcnt;
7667c478bd9Sstevel@tonic-gate 		/* Add one allocation record for this large page. */
7677c478bd9Sstevel@tonic-gate 		if ((plp = pmem_lpg_alloc(kflags)) == NULL)
7687c478bd9Sstevel@tonic-gate 			return (DDI_FAILURE);
7697c478bd9Sstevel@tonic-gate 		plp->pl_pp = pp;
7707c478bd9Sstevel@tonic-gate 		pmem_lpg_concat(plpp, &plp);
7717c478bd9Sstevel@tonic-gate 	}
7727c478bd9Sstevel@tonic-gate 	return (DDI_SUCCESS);
7737c478bd9Sstevel@tonic-gate }
7747c478bd9Sstevel@tonic-gate 
7757c478bd9Sstevel@tonic-gate /*
7767c478bd9Sstevel@tonic-gate  * Break the last r small pages from the large page list *lppp (with totally n
7777c478bd9Sstevel@tonic-gate  * small pages) and put them into pmem_mpool.
7787c478bd9Sstevel@tonic-gate  */
7797c478bd9Sstevel@tonic-gate static void
lpp_break(page_t ** lppp,pgcnt_t n,pgcnt_t r,pmem_lpg_t * oplp)7807c478bd9Sstevel@tonic-gate lpp_break(page_t **lppp, pgcnt_t n, pgcnt_t r, pmem_lpg_t *oplp)
7817c478bd9Sstevel@tonic-gate {
7827c478bd9Sstevel@tonic-gate 	page_t *pp, *pp1;
7837c478bd9Sstevel@tonic-gate 	pgcnt_t i;
7847c478bd9Sstevel@tonic-gate 	pmem_lpg_t *plp;
7857c478bd9Sstevel@tonic-gate 
7867c478bd9Sstevel@tonic-gate 	if (r == 0)
7877c478bd9Sstevel@tonic-gate 		return;
7887c478bd9Sstevel@tonic-gate 	ASSERT(*lppp != NULL && r < pmem_pgcnt);
7897c478bd9Sstevel@tonic-gate 	page_list_break(lppp, &pp, n - r);
7907c478bd9Sstevel@tonic-gate 
7917c478bd9Sstevel@tonic-gate 	/* The residual should reside in the last large page.  */
7927c478bd9Sstevel@tonic-gate 	plp = oplp->pl_prev;
7937c478bd9Sstevel@tonic-gate 	/* IOunlock and hashout the residual pages. */
7947c478bd9Sstevel@tonic-gate 	for (pp1 = pp, i = 0; i < r; i++) {
7957c478bd9Sstevel@tonic-gate 		page_io_unlock(pp1);
7967c478bd9Sstevel@tonic-gate 		page_hashout(pp1, NULL);
7977c478bd9Sstevel@tonic-gate 		/* Mark this page as free. */
7987c478bd9Sstevel@tonic-gate 		BT_SET(plp->pl_bitmap, PFIND(pp1));
7997c478bd9Sstevel@tonic-gate 		pp1 = pp1->p_next;
8007c478bd9Sstevel@tonic-gate 	}
8017c478bd9Sstevel@tonic-gate 	ASSERT(pp1 == pp);
8027c478bd9Sstevel@tonic-gate 	/* Put these residual pages into memory pool. */
8037c478bd9Sstevel@tonic-gate 	mutex_enter(&pmem_mutex);
8047c478bd9Sstevel@tonic-gate 	mpool_append(&pp, r);
8057c478bd9Sstevel@tonic-gate 	mutex_exit(&pmem_mutex);
8067c478bd9Sstevel@tonic-gate }
8077c478bd9Sstevel@tonic-gate 
8087c478bd9Sstevel@tonic-gate /* Freeing large pages in lpp and the associated allocation records in plp. */
8097c478bd9Sstevel@tonic-gate static void
lpp_free(page_t * lpp,pgcnt_t lpgs,pmem_lpg_t ** plpp)8107c478bd9Sstevel@tonic-gate lpp_free(page_t *lpp, pgcnt_t lpgs, pmem_lpg_t **plpp)
8117c478bd9Sstevel@tonic-gate {
8127c478bd9Sstevel@tonic-gate 	pgcnt_t i, j;
8137c478bd9Sstevel@tonic-gate 	page_t *pp = lpp, *pp1;
8147c478bd9Sstevel@tonic-gate 	pmem_lpg_t *plp1, *plp2;
8157c478bd9Sstevel@tonic-gate 
8167c478bd9Sstevel@tonic-gate 	for (i = 0; i < lpgs; i++) {
8177c478bd9Sstevel@tonic-gate 		for (j = 0; j < pmem_pgcnt; j++) {
8187c478bd9Sstevel@tonic-gate 			/* IO unlock and hashout this small page. */
8197c478bd9Sstevel@tonic-gate 			page_io_unlock(pp);
8207c478bd9Sstevel@tonic-gate 			page_hashout(pp, NULL);
8217c478bd9Sstevel@tonic-gate 			pp1 = pp->p_next;
8227c478bd9Sstevel@tonic-gate 			pp->p_prev = pp->p_next = pp;
8237c478bd9Sstevel@tonic-gate 			pp = pp1;
8247c478bd9Sstevel@tonic-gate 		}
8257c478bd9Sstevel@tonic-gate 		/* Free one large page at one time. */
8267c478bd9Sstevel@tonic-gate 		page_free_pages(lpp);
8277c478bd9Sstevel@tonic-gate 		lpp = pp;
8287c478bd9Sstevel@tonic-gate 	}
8297c478bd9Sstevel@tonic-gate 	/* Free associate pmem large page allocation records. */
8307c478bd9Sstevel@tonic-gate 	for (plp1 = *plpp; *plpp; plp1 = plp2) {
8317c478bd9Sstevel@tonic-gate 		plp2 = plp1->pl_next;
8327c478bd9Sstevel@tonic-gate 		pmem_lpg_free(plpp, plp1);
8337c478bd9Sstevel@tonic-gate 	}
8347c478bd9Sstevel@tonic-gate }
8357c478bd9Sstevel@tonic-gate 
8367c478bd9Sstevel@tonic-gate /*
8377c478bd9Sstevel@tonic-gate  * IOlock and hashin all pages in tlist, associate them with vnode *pvnp
8387c478bd9Sstevel@tonic-gate  * and offset starting with *poffp. Update allocation records accordingly at
8397c478bd9Sstevel@tonic-gate  * the same time.
8407c478bd9Sstevel@tonic-gate  */
8417c478bd9Sstevel@tonic-gate static void
tlist_in(page_t * tlist,pgcnt_t tpages,vnode_t * pvnp,u_offset_t * poffp)8427c478bd9Sstevel@tonic-gate tlist_in(page_t *tlist, pgcnt_t tpages, vnode_t *pvnp, u_offset_t *poffp)
8437c478bd9Sstevel@tonic-gate {
8447c478bd9Sstevel@tonic-gate 	page_t *pp;
8457c478bd9Sstevel@tonic-gate 	pgcnt_t i = 0;
8467c478bd9Sstevel@tonic-gate 	pmem_lpg_t *plp, *last_pl = NULL;
8477c478bd9Sstevel@tonic-gate 
8487c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&pmem_mutex));
8497c478bd9Sstevel@tonic-gate 	for (pp = tlist; i < tpages; i++) {
8507c478bd9Sstevel@tonic-gate 		ASSERT(FROM_LPG(pp));
8517c478bd9Sstevel@tonic-gate 		page_io_lock(pp);
8527c478bd9Sstevel@tonic-gate 		(void) page_hashin(pp, pvnp, *poffp, NULL);
8537c478bd9Sstevel@tonic-gate 		plp = pmem_lpg_get(pmem_occ_lpgs, pp, &last_pl);
8547c478bd9Sstevel@tonic-gate 		/* Mark this page as allocated. */
8557c478bd9Sstevel@tonic-gate 		BT_CLEAR(plp->pl_bitmap, PFIND(pp));
8567c478bd9Sstevel@tonic-gate 		*poffp += PAGESIZE;
8577c478bd9Sstevel@tonic-gate 		pp = pp->p_next;
8587c478bd9Sstevel@tonic-gate 	}
8597c478bd9Sstevel@tonic-gate 	ASSERT(pp == tlist);
8607c478bd9Sstevel@tonic-gate }
8617c478bd9Sstevel@tonic-gate 
8627c478bd9Sstevel@tonic-gate /*
8637c478bd9Sstevel@tonic-gate  * IOunlock and hashout all pages in tlist, update allocation records
8647c478bd9Sstevel@tonic-gate  * accordingly at the same time.
8657c478bd9Sstevel@tonic-gate  */
8667c478bd9Sstevel@tonic-gate static void
tlist_out(page_t * tlist,pgcnt_t tpages)8677c478bd9Sstevel@tonic-gate tlist_out(page_t *tlist, pgcnt_t tpages)
8687c478bd9Sstevel@tonic-gate {
8697c478bd9Sstevel@tonic-gate 	page_t *pp;
8707c478bd9Sstevel@tonic-gate 	pgcnt_t i = 0;
8717c478bd9Sstevel@tonic-gate 	pmem_lpg_t *plp, *last_pl = NULL;
8727c478bd9Sstevel@tonic-gate 
8737c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&pmem_mutex));
8747c478bd9Sstevel@tonic-gate 	for (pp = tlist; i < tpages; i++) {
8757c478bd9Sstevel@tonic-gate 		ASSERT(FROM_LPG(pp));
8767c478bd9Sstevel@tonic-gate 		page_io_unlock(pp);
8777c478bd9Sstevel@tonic-gate 		page_hashout(pp, NULL);
8787c478bd9Sstevel@tonic-gate 		plp = pmem_lpg_get(pmem_occ_lpgs, pp, &last_pl);
8797c478bd9Sstevel@tonic-gate 		/* Mark this page as free. */
8807c478bd9Sstevel@tonic-gate 		BT_SET(plp->pl_bitmap, PFIND(pp));
8817c478bd9Sstevel@tonic-gate 		pp = pp->p_next;
8827c478bd9Sstevel@tonic-gate 	}
8837c478bd9Sstevel@tonic-gate 	ASSERT(pp == tlist);
8847c478bd9Sstevel@tonic-gate }
885