17c478bd9Sstevel@tonic-gate /* 27c478bd9Sstevel@tonic-gate * CDDL HEADER START 37c478bd9Sstevel@tonic-gate * 47c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the 5*0b5aa17bSmec * Common Development and Distribution License (the "License"). 6*0b5aa17bSmec * You may not use this file except in compliance with the License. 77c478bd9Sstevel@tonic-gate * 87c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 97c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 107c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions 117c478bd9Sstevel@tonic-gate * and limitations under the License. 127c478bd9Sstevel@tonic-gate * 137c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 147c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 157c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 167c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 177c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 187c478bd9Sstevel@tonic-gate * 197c478bd9Sstevel@tonic-gate * CDDL HEADER END 207c478bd9Sstevel@tonic-gate */ 217c478bd9Sstevel@tonic-gate /* 22e21bae1bSkchow * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 237c478bd9Sstevel@tonic-gate * Use is subject to license terms. 247c478bd9Sstevel@tonic-gate */ 257c478bd9Sstevel@tonic-gate 267c478bd9Sstevel@tonic-gate /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 277c478bd9Sstevel@tonic-gate /* All Rights Reserved */ 287c478bd9Sstevel@tonic-gate 297c478bd9Sstevel@tonic-gate /* 307c478bd9Sstevel@tonic-gate * Portions of this source code were derived from Berkeley 4.3 BSD 317c478bd9Sstevel@tonic-gate * under license from the Regents of the University of California. 327c478bd9Sstevel@tonic-gate */ 337c478bd9Sstevel@tonic-gate 347c478bd9Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 357c478bd9Sstevel@tonic-gate 367c478bd9Sstevel@tonic-gate /* 377c478bd9Sstevel@tonic-gate * This file contains common functions to access and manage the page lists. 387c478bd9Sstevel@tonic-gate * Many of these routines originated from platform dependent modules 397c478bd9Sstevel@tonic-gate * (sun4/vm/vm_dep.c, i86pc/vm/vm_machdep.c) and modified to function in 407c478bd9Sstevel@tonic-gate * a platform independent manner. 417c478bd9Sstevel@tonic-gate * 427c478bd9Sstevel@tonic-gate * vm/vm_dep.h provides for platform specific support. 437c478bd9Sstevel@tonic-gate */ 447c478bd9Sstevel@tonic-gate 457c478bd9Sstevel@tonic-gate #include <sys/types.h> 467c478bd9Sstevel@tonic-gate #include <sys/debug.h> 477c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h> 487c478bd9Sstevel@tonic-gate #include <sys/systm.h> 497c478bd9Sstevel@tonic-gate #include <sys/atomic.h> 507c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h> 517c478bd9Sstevel@tonic-gate #include <vm/as.h> 527c478bd9Sstevel@tonic-gate #include <vm/page.h> 537c478bd9Sstevel@tonic-gate #include <vm/seg_kmem.h> 547c478bd9Sstevel@tonic-gate #include <vm/seg_vn.h> 557c478bd9Sstevel@tonic-gate #include <sys/memnode.h> 567c478bd9Sstevel@tonic-gate #include <vm/vm_dep.h> 577c478bd9Sstevel@tonic-gate #include <sys/lgrp.h> 587c478bd9Sstevel@tonic-gate #include <sys/mem_config.h> 597c478bd9Sstevel@tonic-gate #include <sys/callb.h> 607c478bd9Sstevel@tonic-gate #include <sys/mem_cage.h> 617c478bd9Sstevel@tonic-gate #include <sys/sdt.h> 627c478bd9Sstevel@tonic-gate 637c478bd9Sstevel@tonic-gate extern uint_t vac_colors; 647c478bd9Sstevel@tonic-gate 656061ce8aSkchow #define MAX_PRAGMA_ALIGN 128 666061ce8aSkchow 676061ce8aSkchow /* vm_cpu_data0 for the boot cpu before kmem is initialized */ 686061ce8aSkchow 696061ce8aSkchow #if L2CACHE_ALIGN_MAX <= MAX_PRAGMA_ALIGN 70affbd3ccSkchow #pragma align L2CACHE_ALIGN_MAX(vm_cpu_data0) 716061ce8aSkchow #else 726061ce8aSkchow #pragma align MAX_PRAGMA_ALIGN(vm_cpu_data0) 736061ce8aSkchow #endif 74affbd3ccSkchow char vm_cpu_data0[VM_CPU_DATA_PADSIZE]; 75affbd3ccSkchow 767c478bd9Sstevel@tonic-gate /* 777c478bd9Sstevel@tonic-gate * number of page colors equivalent to reqested color in page_get routines. 787c478bd9Sstevel@tonic-gate * If set, keeps large pages intact longer and keeps MPO allocation 797c478bd9Sstevel@tonic-gate * from the local mnode in favor of acquiring the 'correct' page color from 807c478bd9Sstevel@tonic-gate * a demoted large page or from a remote mnode. 817c478bd9Sstevel@tonic-gate */ 827c478bd9Sstevel@tonic-gate int colorequiv; 837c478bd9Sstevel@tonic-gate 847c478bd9Sstevel@tonic-gate /* 857c478bd9Sstevel@tonic-gate * if set, specifies the percentage of large pages that are free from within 867c478bd9Sstevel@tonic-gate * a large page region before attempting to lock those pages for 877c478bd9Sstevel@tonic-gate * page_get_contig_pages processing. 887c478bd9Sstevel@tonic-gate * 897c478bd9Sstevel@tonic-gate * Should be turned on when kpr is available when page_trylock_contig_pages 907c478bd9Sstevel@tonic-gate * can be more selective. 917c478bd9Sstevel@tonic-gate */ 927c478bd9Sstevel@tonic-gate 937c478bd9Sstevel@tonic-gate int ptcpthreshold; 947c478bd9Sstevel@tonic-gate 957c478bd9Sstevel@tonic-gate /* 967c478bd9Sstevel@tonic-gate * Limit page get contig page search based on failure cnts in pgcpfailcnt[]. 9783f9b804Skchow * Enabled by default via pgcplimitsearch. 9883f9b804Skchow * 9983f9b804Skchow * pgcpfailcnt[] is bounded by PGCPFAILMAX (>= 1/2 of installed 10083f9b804Skchow * memory). When reached, pgcpfailcnt[] is reset to 1/2 of this upper 10183f9b804Skchow * bound. This upper bound range guarantees: 10283f9b804Skchow * - all large page 'slots' will be searched over time 10383f9b804Skchow * - the minimum (1) large page candidates considered on each pgcp call 10483f9b804Skchow * - count doesn't wrap around to 0 1057c478bd9Sstevel@tonic-gate */ 10683f9b804Skchow pgcnt_t pgcpfailcnt[MMU_PAGE_SIZES]; 1077c478bd9Sstevel@tonic-gate int pgcplimitsearch = 1; 1087c478bd9Sstevel@tonic-gate 10983f9b804Skchow #define PGCPFAILMAX (1 << (highbit(physinstalled) - 1)) 11083f9b804Skchow #define SETPGCPFAILCNT(szc) \ 11183f9b804Skchow if (++pgcpfailcnt[szc] >= PGCPFAILMAX) \ 11283f9b804Skchow pgcpfailcnt[szc] = PGCPFAILMAX / 2; 11383f9b804Skchow 1147c478bd9Sstevel@tonic-gate #ifdef VM_STATS 1157c478bd9Sstevel@tonic-gate struct vmm_vmstats_str vmm_vmstats; 1167c478bd9Sstevel@tonic-gate 1177c478bd9Sstevel@tonic-gate #endif /* VM_STATS */ 1187c478bd9Sstevel@tonic-gate 1197c478bd9Sstevel@tonic-gate #if defined(__sparc) 1207c478bd9Sstevel@tonic-gate #define LPGCREATE 0 1217c478bd9Sstevel@tonic-gate #else 1227c478bd9Sstevel@tonic-gate /* enable page_get_contig_pages */ 1237c478bd9Sstevel@tonic-gate #define LPGCREATE 1 1247c478bd9Sstevel@tonic-gate #endif 1257c478bd9Sstevel@tonic-gate 1267c478bd9Sstevel@tonic-gate int pg_contig_disable; 1277c478bd9Sstevel@tonic-gate int pg_lpgcreate_nocage = LPGCREATE; 1287c478bd9Sstevel@tonic-gate 1297c478bd9Sstevel@tonic-gate /* 1307c478bd9Sstevel@tonic-gate * page_freelist_fill pfn flag to signify no hi pfn requirement. 1317c478bd9Sstevel@tonic-gate */ 1327c478bd9Sstevel@tonic-gate #define PFNNULL 0 1337c478bd9Sstevel@tonic-gate 1347c478bd9Sstevel@tonic-gate /* Flags involved in promotion and demotion routines */ 1357c478bd9Sstevel@tonic-gate #define PC_FREE 0x1 /* put page on freelist */ 1367c478bd9Sstevel@tonic-gate #define PC_ALLOC 0x2 /* return page for allocation */ 1377c478bd9Sstevel@tonic-gate 1387c478bd9Sstevel@tonic-gate /* 1397c478bd9Sstevel@tonic-gate * Flag for page_demote to be used with PC_FREE to denote that we don't care 1407c478bd9Sstevel@tonic-gate * what the color is as the color parameter to the function is ignored. 1417c478bd9Sstevel@tonic-gate */ 1427c478bd9Sstevel@tonic-gate #define PC_NO_COLOR (-1) 1437c478bd9Sstevel@tonic-gate 1447c478bd9Sstevel@tonic-gate /* 1457c478bd9Sstevel@tonic-gate * page counters candidates info 1467c478bd9Sstevel@tonic-gate * See page_ctrs_cands comment below for more details. 1477c478bd9Sstevel@tonic-gate * fields are as follows: 1487c478bd9Sstevel@tonic-gate * pcc_pages_free: # pages which freelist coalesce can create 1497c478bd9Sstevel@tonic-gate * pcc_color_free_len: number of elements in pcc_color_free array 1507c478bd9Sstevel@tonic-gate * pcc_color_free: pointer to page free counts per color 1517c478bd9Sstevel@tonic-gate */ 1527c478bd9Sstevel@tonic-gate typedef struct pcc_info { 1537c478bd9Sstevel@tonic-gate pgcnt_t pcc_pages_free; 1547c478bd9Sstevel@tonic-gate int pcc_color_free_len; 1557c478bd9Sstevel@tonic-gate pgcnt_t *pcc_color_free; 1567c478bd9Sstevel@tonic-gate } pcc_info_t; 1577c478bd9Sstevel@tonic-gate 1587c478bd9Sstevel@tonic-gate /* 1597c478bd9Sstevel@tonic-gate * On big machines it can take a long time to check page_counters 1607c478bd9Sstevel@tonic-gate * arrays. page_ctrs_cands is a summary array whose elements are a dynamically 1617c478bd9Sstevel@tonic-gate * updated sum of all elements of the corresponding page_counters arrays. 1627c478bd9Sstevel@tonic-gate * page_freelist_coalesce() searches page_counters only if an appropriate 1637c478bd9Sstevel@tonic-gate * element of page_ctrs_cands array is greater than 0. 1647c478bd9Sstevel@tonic-gate * 1657c478bd9Sstevel@tonic-gate * An extra dimension is used for page_ctrs_cands to spread the elements 1667c478bd9Sstevel@tonic-gate * over a few e$ cache lines to avoid serialization during the array 1677c478bd9Sstevel@tonic-gate * updates. 1687c478bd9Sstevel@tonic-gate */ 1697c478bd9Sstevel@tonic-gate #pragma align 64(page_ctrs_cands) 1707c478bd9Sstevel@tonic-gate 1717c478bd9Sstevel@tonic-gate static pcc_info_t *page_ctrs_cands[NPC_MUTEX][MMU_PAGE_SIZES]; 1727c478bd9Sstevel@tonic-gate 1737c478bd9Sstevel@tonic-gate /* 1747c478bd9Sstevel@tonic-gate * Return in val the total number of free pages which can be created 1757c478bd9Sstevel@tonic-gate * for the given mnode (m) and region size (r) 1767c478bd9Sstevel@tonic-gate */ 1777c478bd9Sstevel@tonic-gate #define PGCTRS_CANDS_GETVALUE(m, r, val) { \ 1787c478bd9Sstevel@tonic-gate int i; \ 1797c478bd9Sstevel@tonic-gate val = 0; \ 1807c478bd9Sstevel@tonic-gate for (i = 0; i < NPC_MUTEX; i++) { \ 1817c478bd9Sstevel@tonic-gate val += page_ctrs_cands[i][(r)][(m)].pcc_pages_free; \ 1827c478bd9Sstevel@tonic-gate } \ 1837c478bd9Sstevel@tonic-gate } 1847c478bd9Sstevel@tonic-gate 1857c478bd9Sstevel@tonic-gate /* 1867c478bd9Sstevel@tonic-gate * Return in val the total number of free pages which can be created 1877c478bd9Sstevel@tonic-gate * for the given mnode (m), region size (r), and color (c) 1887c478bd9Sstevel@tonic-gate */ 1897c478bd9Sstevel@tonic-gate #define PGCTRS_CANDS_GETVALUECOLOR(m, r, c, val) { \ 1907c478bd9Sstevel@tonic-gate int i; \ 1917c478bd9Sstevel@tonic-gate val = 0; \ 1927c478bd9Sstevel@tonic-gate ASSERT((c) < page_ctrs_cands[0][(r)][(m)].pcc_color_free_len); \ 1937c478bd9Sstevel@tonic-gate for (i = 0; i < NPC_MUTEX; i++) { \ 1947c478bd9Sstevel@tonic-gate val += page_ctrs_cands[i][(r)][(m)].pcc_color_free[(c)]; \ 1957c478bd9Sstevel@tonic-gate } \ 1967c478bd9Sstevel@tonic-gate } 1977c478bd9Sstevel@tonic-gate 1987c478bd9Sstevel@tonic-gate /* 1997c478bd9Sstevel@tonic-gate * We can only allow a single thread to update a counter within the physical 2007c478bd9Sstevel@tonic-gate * range of the largest supported page size. That is the finest granularity 2017c478bd9Sstevel@tonic-gate * possible since the counter values are dependent on each other 2027c478bd9Sstevel@tonic-gate * as you move accross region sizes. PP_CTR_LOCK_INDX is used to determine the 2037c478bd9Sstevel@tonic-gate * ctr_mutex lock index for a particular physical range. 2047c478bd9Sstevel@tonic-gate */ 2057c478bd9Sstevel@tonic-gate static kmutex_t *ctr_mutex[NPC_MUTEX]; 2067c478bd9Sstevel@tonic-gate 2077c478bd9Sstevel@tonic-gate #define PP_CTR_LOCK_INDX(pp) \ 2087c478bd9Sstevel@tonic-gate (((pp)->p_pagenum >> \ 2097c478bd9Sstevel@tonic-gate (PAGE_BSZS_SHIFT(mmu_page_sizes - 1))) & (NPC_MUTEX - 1)) 2107c478bd9Sstevel@tonic-gate 2117c478bd9Sstevel@tonic-gate /* 2127c478bd9Sstevel@tonic-gate * Local functions prototypes. 2137c478bd9Sstevel@tonic-gate */ 2147c478bd9Sstevel@tonic-gate 215affbd3ccSkchow void page_ctr_add(int, int, page_t *, int); 216affbd3ccSkchow void page_ctr_add_internal(int, int, page_t *, int); 217affbd3ccSkchow void page_ctr_sub(int, int, page_t *, int); 2187c478bd9Sstevel@tonic-gate uint_t page_convert_color(uchar_t, uchar_t, uint_t); 2197c478bd9Sstevel@tonic-gate void page_freelist_lock(int); 2207c478bd9Sstevel@tonic-gate void page_freelist_unlock(int); 2217c478bd9Sstevel@tonic-gate page_t *page_promote(int, pfn_t, uchar_t, int); 2227c478bd9Sstevel@tonic-gate page_t *page_demote(int, pfn_t, uchar_t, uchar_t, int, int); 2237c478bd9Sstevel@tonic-gate page_t *page_freelist_fill(uchar_t, int, int, int, pfn_t); 2247c478bd9Sstevel@tonic-gate page_t *page_get_mnode_cachelist(uint_t, uint_t, int, int); 2257c478bd9Sstevel@tonic-gate static int page_trylock_cons(page_t *pp, se_t se); 2267c478bd9Sstevel@tonic-gate 2277c478bd9Sstevel@tonic-gate #define PNUM_SIZE(szc) \ 2287c478bd9Sstevel@tonic-gate (hw_page_array[(szc)].hp_size >> hw_page_array[0].hp_shift) 2297c478bd9Sstevel@tonic-gate #define PNUM_SHIFT(szc) \ 2307c478bd9Sstevel@tonic-gate (hw_page_array[(szc)].hp_shift - hw_page_array[0].hp_shift) 2317c478bd9Sstevel@tonic-gate 2327c478bd9Sstevel@tonic-gate /* 2337c478bd9Sstevel@tonic-gate * The page_counters array below is used to keep track of free contiguous 2347c478bd9Sstevel@tonic-gate * physical memory. A hw_page_map_t will be allocated per mnode per szc. 2357c478bd9Sstevel@tonic-gate * This contains an array of counters, the size of the array, a shift value 2367c478bd9Sstevel@tonic-gate * used to convert a pagenum into a counter array index or vice versa, as 2377c478bd9Sstevel@tonic-gate * well as a cache of the last successful index to be promoted to a larger 2387c478bd9Sstevel@tonic-gate * page size. As an optimization, we keep track of the last successful index 2397c478bd9Sstevel@tonic-gate * to be promoted per page color for the given size region, and this is 2407c478bd9Sstevel@tonic-gate * allocated dynamically based upon the number of colors for a given 2417c478bd9Sstevel@tonic-gate * region size. 2427c478bd9Sstevel@tonic-gate * 2437c478bd9Sstevel@tonic-gate * Conceptually, the page counters are represented as: 2447c478bd9Sstevel@tonic-gate * 2457c478bd9Sstevel@tonic-gate * page_counters[region_size][mnode] 2467c478bd9Sstevel@tonic-gate * 2477c478bd9Sstevel@tonic-gate * region_size: size code of a candidate larger page made up 2487c478bd9Sstevel@tonic-gate * of contiguous free smaller pages. 2497c478bd9Sstevel@tonic-gate * 2507c478bd9Sstevel@tonic-gate * page_counters[region_size][mnode].hpm_counters[index]: 2517c478bd9Sstevel@tonic-gate * represents how many (region_size - 1) pages either 2527c478bd9Sstevel@tonic-gate * exist or can be created within the given index range. 2537c478bd9Sstevel@tonic-gate * 2547c478bd9Sstevel@tonic-gate * Let's look at a sparc example: 2557c478bd9Sstevel@tonic-gate * If we want to create a free 512k page, we look at region_size 2 2567c478bd9Sstevel@tonic-gate * for the mnode we want. We calculate the index and look at a specific 2577c478bd9Sstevel@tonic-gate * hpm_counters location. If we see 8 (FULL_REGION_CNT on sparc) at 2587c478bd9Sstevel@tonic-gate * this location, it means that 8 64k pages either exist or can be created 2597c478bd9Sstevel@tonic-gate * from 8K pages in order to make a single free 512k page at the given 2607c478bd9Sstevel@tonic-gate * index. Note that when a region is full, it will contribute to the 2617c478bd9Sstevel@tonic-gate * counts in the region above it. Thus we will not know what page 2627c478bd9Sstevel@tonic-gate * size the free pages will be which can be promoted to this new free 2637c478bd9Sstevel@tonic-gate * page unless we look at all regions below the current region. 2647c478bd9Sstevel@tonic-gate */ 2657c478bd9Sstevel@tonic-gate 2667c478bd9Sstevel@tonic-gate /* 2677c478bd9Sstevel@tonic-gate * Note: hpmctr_t is defined in platform vm_dep.h 2687c478bd9Sstevel@tonic-gate * hw_page_map_t contains all the information needed for the page_counters 2697c478bd9Sstevel@tonic-gate * logic. The fields are as follows: 2707c478bd9Sstevel@tonic-gate * 2717c478bd9Sstevel@tonic-gate * hpm_counters: dynamically allocated array to hold counter data 2727c478bd9Sstevel@tonic-gate * hpm_entries: entries in hpm_counters 2737c478bd9Sstevel@tonic-gate * hpm_shift: shift for pnum/array index conv 2747c478bd9Sstevel@tonic-gate * hpm_base: PFN mapped to counter index 0 2757c478bd9Sstevel@tonic-gate * hpm_color_current_len: # of elements in hpm_color_current "array" below 2767c478bd9Sstevel@tonic-gate * hpm_color_current: last index in counter array for this color at 2777c478bd9Sstevel@tonic-gate * which we successfully created a large page 2787c478bd9Sstevel@tonic-gate */ 2797c478bd9Sstevel@tonic-gate typedef struct hw_page_map { 2807c478bd9Sstevel@tonic-gate hpmctr_t *hpm_counters; 2817c478bd9Sstevel@tonic-gate size_t hpm_entries; 2827c478bd9Sstevel@tonic-gate int hpm_shift; 2837c478bd9Sstevel@tonic-gate pfn_t hpm_base; 2847c478bd9Sstevel@tonic-gate size_t hpm_color_current_len; 2857c478bd9Sstevel@tonic-gate size_t *hpm_color_current; 2867c478bd9Sstevel@tonic-gate } hw_page_map_t; 2877c478bd9Sstevel@tonic-gate 2887c478bd9Sstevel@tonic-gate /* 2897c478bd9Sstevel@tonic-gate * Element zero is not used, but is allocated for convenience. 2907c478bd9Sstevel@tonic-gate */ 2917c478bd9Sstevel@tonic-gate static hw_page_map_t *page_counters[MMU_PAGE_SIZES]; 2927c478bd9Sstevel@tonic-gate 2937c478bd9Sstevel@tonic-gate /* 2947c478bd9Sstevel@tonic-gate * The following macros are convenient ways to get access to the individual 2957c478bd9Sstevel@tonic-gate * elements of the page_counters arrays. They can be used on both 2967c478bd9Sstevel@tonic-gate * the left side and right side of equations. 2977c478bd9Sstevel@tonic-gate */ 2987c478bd9Sstevel@tonic-gate #define PAGE_COUNTERS(mnode, rg_szc, idx) \ 2997c478bd9Sstevel@tonic-gate (page_counters[(rg_szc)][(mnode)].hpm_counters[(idx)]) 3007c478bd9Sstevel@tonic-gate 3017c478bd9Sstevel@tonic-gate #define PAGE_COUNTERS_COUNTERS(mnode, rg_szc) \ 3027c478bd9Sstevel@tonic-gate (page_counters[(rg_szc)][(mnode)].hpm_counters) 3037c478bd9Sstevel@tonic-gate 3047c478bd9Sstevel@tonic-gate #define PAGE_COUNTERS_SHIFT(mnode, rg_szc) \ 3057c478bd9Sstevel@tonic-gate (page_counters[(rg_szc)][(mnode)].hpm_shift) 3067c478bd9Sstevel@tonic-gate 3077c478bd9Sstevel@tonic-gate #define PAGE_COUNTERS_ENTRIES(mnode, rg_szc) \ 3087c478bd9Sstevel@tonic-gate (page_counters[(rg_szc)][(mnode)].hpm_entries) 3097c478bd9Sstevel@tonic-gate 3107c478bd9Sstevel@tonic-gate #define PAGE_COUNTERS_BASE(mnode, rg_szc) \ 3117c478bd9Sstevel@tonic-gate (page_counters[(rg_szc)][(mnode)].hpm_base) 3127c478bd9Sstevel@tonic-gate 3137c478bd9Sstevel@tonic-gate #define PAGE_COUNTERS_CURRENT_COLOR_LEN(mnode, rg_szc) \ 3147c478bd9Sstevel@tonic-gate (page_counters[(rg_szc)][(mnode)].hpm_color_current_len) 3157c478bd9Sstevel@tonic-gate 3167c478bd9Sstevel@tonic-gate #define PAGE_COUNTERS_CURRENT_COLOR_ARRAY(mnode, rg_szc) \ 3177c478bd9Sstevel@tonic-gate (page_counters[(rg_szc)][(mnode)].hpm_color_current) 3187c478bd9Sstevel@tonic-gate 3197c478bd9Sstevel@tonic-gate #define PAGE_COUNTERS_CURRENT_COLOR(mnode, rg_szc, color) \ 3207c478bd9Sstevel@tonic-gate (page_counters[(rg_szc)][(mnode)].hpm_color_current[(color)]) 3217c478bd9Sstevel@tonic-gate 3227c478bd9Sstevel@tonic-gate #define PNUM_TO_IDX(mnode, rg_szc, pnum) \ 3237c478bd9Sstevel@tonic-gate (((pnum) - PAGE_COUNTERS_BASE((mnode), (rg_szc))) >> \ 3247c478bd9Sstevel@tonic-gate PAGE_COUNTERS_SHIFT((mnode), (rg_szc))) 3257c478bd9Sstevel@tonic-gate 3267c478bd9Sstevel@tonic-gate #define IDX_TO_PNUM(mnode, rg_szc, index) \ 3277c478bd9Sstevel@tonic-gate (PAGE_COUNTERS_BASE((mnode), (rg_szc)) + \ 3287c478bd9Sstevel@tonic-gate ((index) << PAGE_COUNTERS_SHIFT((mnode), (rg_szc)))) 3297c478bd9Sstevel@tonic-gate 3307c478bd9Sstevel@tonic-gate /* 3317c478bd9Sstevel@tonic-gate * Protects the hpm_counters and hpm_color_current memory from changing while 3327c478bd9Sstevel@tonic-gate * looking at page counters information. 3337c478bd9Sstevel@tonic-gate * Grab the write lock to modify what these fields point at. 3347c478bd9Sstevel@tonic-gate * Grab the read lock to prevent any pointers from changing. 3357c478bd9Sstevel@tonic-gate * The write lock can not be held during memory allocation due to a possible 3367c478bd9Sstevel@tonic-gate * recursion deadlock with trying to grab the read lock while the 3377c478bd9Sstevel@tonic-gate * write lock is already held. 3387c478bd9Sstevel@tonic-gate */ 3397c478bd9Sstevel@tonic-gate krwlock_t page_ctrs_rwlock[MAX_MEM_NODES]; 3407c478bd9Sstevel@tonic-gate 341affbd3ccSkchow 342affbd3ccSkchow /* 343affbd3ccSkchow * initialize cpu_vm_data to point at cache aligned vm_cpu_data_t. 344affbd3ccSkchow */ 345affbd3ccSkchow void 346affbd3ccSkchow cpu_vm_data_init(struct cpu *cp) 347affbd3ccSkchow { 348affbd3ccSkchow if (cp == CPU0) { 349affbd3ccSkchow cp->cpu_vm_data = (void *)&vm_cpu_data0; 350affbd3ccSkchow } else { 351affbd3ccSkchow void *kmptr; 3526061ce8aSkchow int align; 3536061ce8aSkchow size_t sz; 354affbd3ccSkchow 3556061ce8aSkchow align = (L2CACHE_ALIGN) ? L2CACHE_ALIGN : L2CACHE_ALIGN_MAX; 3566061ce8aSkchow sz = P2ROUNDUP(sizeof (vm_cpu_data_t), align) + align; 3576061ce8aSkchow kmptr = kmem_zalloc(sz, KM_SLEEP); 358affbd3ccSkchow cp->cpu_vm_data = (void *) P2ROUNDUP((uintptr_t)kmptr, align); 359affbd3ccSkchow ((vm_cpu_data_t *)cp->cpu_vm_data)->vc_kmptr = kmptr; 3606061ce8aSkchow ((vm_cpu_data_t *)cp->cpu_vm_data)->vc_kmsize = sz; 361affbd3ccSkchow } 362affbd3ccSkchow } 363affbd3ccSkchow 364affbd3ccSkchow /* 365affbd3ccSkchow * free cpu_vm_data 366affbd3ccSkchow */ 367affbd3ccSkchow void 368affbd3ccSkchow cpu_vm_data_destroy(struct cpu *cp) 369affbd3ccSkchow { 370affbd3ccSkchow if (cp->cpu_seqid && cp->cpu_vm_data) { 371affbd3ccSkchow ASSERT(cp != CPU0); 372affbd3ccSkchow kmem_free(((vm_cpu_data_t *)cp->cpu_vm_data)->vc_kmptr, 3736061ce8aSkchow ((vm_cpu_data_t *)cp->cpu_vm_data)->vc_kmsize); 374affbd3ccSkchow } 375affbd3ccSkchow cp->cpu_vm_data = NULL; 376affbd3ccSkchow } 377affbd3ccSkchow 378affbd3ccSkchow 3797c478bd9Sstevel@tonic-gate /* 3807c478bd9Sstevel@tonic-gate * page size to page size code 3817c478bd9Sstevel@tonic-gate */ 3827c478bd9Sstevel@tonic-gate int 3837c478bd9Sstevel@tonic-gate page_szc(size_t pagesize) 3847c478bd9Sstevel@tonic-gate { 3857c478bd9Sstevel@tonic-gate int i = 0; 3867c478bd9Sstevel@tonic-gate 3877c478bd9Sstevel@tonic-gate while (hw_page_array[i].hp_size) { 3887c478bd9Sstevel@tonic-gate if (pagesize == hw_page_array[i].hp_size) 3897c478bd9Sstevel@tonic-gate return (i); 3907c478bd9Sstevel@tonic-gate i++; 3917c478bd9Sstevel@tonic-gate } 3927c478bd9Sstevel@tonic-gate return (-1); 3937c478bd9Sstevel@tonic-gate } 3947c478bd9Sstevel@tonic-gate 3957c478bd9Sstevel@tonic-gate /* 3964abce959Smec * page size to page size code with the restriction that it be a supported 3974abce959Smec * user page size. If it's not a supported user page size, -1 will be returned. 3987c478bd9Sstevel@tonic-gate */ 3997c478bd9Sstevel@tonic-gate int 4004abce959Smec page_szc_user_filtered(size_t pagesize) 4017c478bd9Sstevel@tonic-gate { 4027c478bd9Sstevel@tonic-gate int szc = page_szc(pagesize); 4034abce959Smec if ((szc != -1) && (SZC_2_USERSZC(szc) != -1)) { 4044abce959Smec return (szc); 4054abce959Smec } 4067c478bd9Sstevel@tonic-gate return (-1); 4077c478bd9Sstevel@tonic-gate } 4087c478bd9Sstevel@tonic-gate 4097c478bd9Sstevel@tonic-gate /* 4107c478bd9Sstevel@tonic-gate * Return how many page sizes are available for the user to use. This is 4117c478bd9Sstevel@tonic-gate * what the hardware supports and not based upon how the OS implements the 4127c478bd9Sstevel@tonic-gate * support of different page sizes. 4137c478bd9Sstevel@tonic-gate */ 4147c478bd9Sstevel@tonic-gate uint_t 4157c478bd9Sstevel@tonic-gate page_num_user_pagesizes(void) 4167c478bd9Sstevel@tonic-gate { 4177c478bd9Sstevel@tonic-gate return (mmu_exported_page_sizes); 4187c478bd9Sstevel@tonic-gate } 4197c478bd9Sstevel@tonic-gate 4207c478bd9Sstevel@tonic-gate uint_t 4217c478bd9Sstevel@tonic-gate page_num_pagesizes(void) 4227c478bd9Sstevel@tonic-gate { 4237c478bd9Sstevel@tonic-gate return (mmu_page_sizes); 4247c478bd9Sstevel@tonic-gate } 4257c478bd9Sstevel@tonic-gate 4267c478bd9Sstevel@tonic-gate /* 4277c478bd9Sstevel@tonic-gate * returns the count of the number of base pagesize pages associated with szc 4287c478bd9Sstevel@tonic-gate */ 4297c478bd9Sstevel@tonic-gate pgcnt_t 4307c478bd9Sstevel@tonic-gate page_get_pagecnt(uint_t szc) 4317c478bd9Sstevel@tonic-gate { 4327c478bd9Sstevel@tonic-gate if (szc >= mmu_page_sizes) 4337c478bd9Sstevel@tonic-gate panic("page_get_pagecnt: out of range %d", szc); 4347c478bd9Sstevel@tonic-gate return (hw_page_array[szc].hp_pgcnt); 4357c478bd9Sstevel@tonic-gate } 4367c478bd9Sstevel@tonic-gate 4377c478bd9Sstevel@tonic-gate size_t 4387c478bd9Sstevel@tonic-gate page_get_pagesize(uint_t szc) 4397c478bd9Sstevel@tonic-gate { 4407c478bd9Sstevel@tonic-gate if (szc >= mmu_page_sizes) 4417c478bd9Sstevel@tonic-gate panic("page_get_pagesize: out of range %d", szc); 4427c478bd9Sstevel@tonic-gate return (hw_page_array[szc].hp_size); 4437c478bd9Sstevel@tonic-gate } 4447c478bd9Sstevel@tonic-gate 4457c478bd9Sstevel@tonic-gate /* 4467c478bd9Sstevel@tonic-gate * Return the size of a page based upon the index passed in. An index of 4477c478bd9Sstevel@tonic-gate * zero refers to the smallest page size in the system, and as index increases 4487c478bd9Sstevel@tonic-gate * it refers to the next larger supported page size in the system. 4497c478bd9Sstevel@tonic-gate * Note that szc and userszc may not be the same due to unsupported szc's on 4507c478bd9Sstevel@tonic-gate * some systems. 4517c478bd9Sstevel@tonic-gate */ 4527c478bd9Sstevel@tonic-gate size_t 4537c478bd9Sstevel@tonic-gate page_get_user_pagesize(uint_t userszc) 4547c478bd9Sstevel@tonic-gate { 4557c478bd9Sstevel@tonic-gate uint_t szc = USERSZC_2_SZC(userszc); 4567c478bd9Sstevel@tonic-gate 4577c478bd9Sstevel@tonic-gate if (szc >= mmu_page_sizes) 4587c478bd9Sstevel@tonic-gate panic("page_get_user_pagesize: out of range %d", szc); 4597c478bd9Sstevel@tonic-gate return (hw_page_array[szc].hp_size); 4607c478bd9Sstevel@tonic-gate } 4617c478bd9Sstevel@tonic-gate 4627c478bd9Sstevel@tonic-gate uint_t 4637c478bd9Sstevel@tonic-gate page_get_shift(uint_t szc) 4647c478bd9Sstevel@tonic-gate { 4657c478bd9Sstevel@tonic-gate if (szc >= mmu_page_sizes) 4667c478bd9Sstevel@tonic-gate panic("page_get_shift: out of range %d", szc); 4677c478bd9Sstevel@tonic-gate return (hw_page_array[szc].hp_shift); 4687c478bd9Sstevel@tonic-gate } 4697c478bd9Sstevel@tonic-gate 4707c478bd9Sstevel@tonic-gate uint_t 4717c478bd9Sstevel@tonic-gate page_get_pagecolors(uint_t szc) 4727c478bd9Sstevel@tonic-gate { 4737c478bd9Sstevel@tonic-gate ASSERT(page_colors != 0); 4747c478bd9Sstevel@tonic-gate return (MAX(page_colors >> PAGE_BSZS_SHIFT(szc), 1)); 4757c478bd9Sstevel@tonic-gate } 4767c478bd9Sstevel@tonic-gate 4777c478bd9Sstevel@tonic-gate /* 4787c478bd9Sstevel@tonic-gate * Called by startup(). 4797c478bd9Sstevel@tonic-gate * Size up the per page size free list counters based on physmax 4807c478bd9Sstevel@tonic-gate * of each node and max_mem_nodes. 4817c478bd9Sstevel@tonic-gate */ 4827c478bd9Sstevel@tonic-gate size_t 4837c478bd9Sstevel@tonic-gate page_ctrs_sz(void) 4847c478bd9Sstevel@tonic-gate { 4857c478bd9Sstevel@tonic-gate int r; /* region size */ 4867c478bd9Sstevel@tonic-gate int mnode; 4877c478bd9Sstevel@tonic-gate uint_t ctrs_sz = 0; 4887c478bd9Sstevel@tonic-gate int i; 4897c478bd9Sstevel@tonic-gate pgcnt_t colors_per_szc[MMU_PAGE_SIZES]; 4907c478bd9Sstevel@tonic-gate 4917c478bd9Sstevel@tonic-gate /* 4927c478bd9Sstevel@tonic-gate * We need to determine how many page colors there are for each 4937c478bd9Sstevel@tonic-gate * page size in order to allocate memory for any color specific 4947c478bd9Sstevel@tonic-gate * arrays. 4957c478bd9Sstevel@tonic-gate */ 4967c478bd9Sstevel@tonic-gate colors_per_szc[0] = page_colors; 4977c478bd9Sstevel@tonic-gate for (i = 1; i < mmu_page_sizes; i++) { 4987c478bd9Sstevel@tonic-gate colors_per_szc[i] = 4997c478bd9Sstevel@tonic-gate page_convert_color(0, i, page_colors - 1) + 1; 5007c478bd9Sstevel@tonic-gate } 5017c478bd9Sstevel@tonic-gate 5027c478bd9Sstevel@tonic-gate for (mnode = 0; mnode < max_mem_nodes; mnode++) { 5037c478bd9Sstevel@tonic-gate 5047c478bd9Sstevel@tonic-gate pgcnt_t r_pgcnt; 5057c478bd9Sstevel@tonic-gate pfn_t r_base; 5067c478bd9Sstevel@tonic-gate pgcnt_t r_align; 5077c478bd9Sstevel@tonic-gate 5087c478bd9Sstevel@tonic-gate if (mem_node_config[mnode].exists == 0) 5097c478bd9Sstevel@tonic-gate continue; 5107c478bd9Sstevel@tonic-gate 5117c478bd9Sstevel@tonic-gate /* 5127c478bd9Sstevel@tonic-gate * determine size needed for page counter arrays with 5137c478bd9Sstevel@tonic-gate * base aligned to large page size. 5147c478bd9Sstevel@tonic-gate */ 5157c478bd9Sstevel@tonic-gate for (r = 1; r < mmu_page_sizes; r++) { 5167c478bd9Sstevel@tonic-gate /* add in space for hpm_counters */ 5177c478bd9Sstevel@tonic-gate r_align = page_get_pagecnt(r); 5187c478bd9Sstevel@tonic-gate r_base = mem_node_config[mnode].physbase; 5197c478bd9Sstevel@tonic-gate r_base &= ~(r_align - 1); 5207c478bd9Sstevel@tonic-gate r_pgcnt = howmany(mem_node_config[mnode].physmax - 5216bb54764Skchow r_base + 1, r_align); 5227c478bd9Sstevel@tonic-gate /* 5237c478bd9Sstevel@tonic-gate * Round up to always allocate on pointer sized 5247c478bd9Sstevel@tonic-gate * boundaries. 5257c478bd9Sstevel@tonic-gate */ 5267c478bd9Sstevel@tonic-gate ctrs_sz += P2ROUNDUP((r_pgcnt * sizeof (hpmctr_t)), 5277c478bd9Sstevel@tonic-gate sizeof (hpmctr_t *)); 5287c478bd9Sstevel@tonic-gate 5297c478bd9Sstevel@tonic-gate /* add in space for hpm_color_current */ 5307c478bd9Sstevel@tonic-gate ctrs_sz += (colors_per_szc[r] * 5317c478bd9Sstevel@tonic-gate sizeof (size_t)); 5327c478bd9Sstevel@tonic-gate } 5337c478bd9Sstevel@tonic-gate } 5347c478bd9Sstevel@tonic-gate 5357c478bd9Sstevel@tonic-gate for (r = 1; r < mmu_page_sizes; r++) { 5367c478bd9Sstevel@tonic-gate ctrs_sz += (max_mem_nodes * sizeof (hw_page_map_t)); 5377c478bd9Sstevel@tonic-gate 5387c478bd9Sstevel@tonic-gate /* add in space for page_ctrs_cands */ 5397c478bd9Sstevel@tonic-gate ctrs_sz += NPC_MUTEX * max_mem_nodes * (sizeof (pcc_info_t)); 5407c478bd9Sstevel@tonic-gate ctrs_sz += NPC_MUTEX * max_mem_nodes * colors_per_szc[r] * 5417c478bd9Sstevel@tonic-gate sizeof (pgcnt_t); 5427c478bd9Sstevel@tonic-gate } 5437c478bd9Sstevel@tonic-gate 5447c478bd9Sstevel@tonic-gate /* ctr_mutex */ 5457c478bd9Sstevel@tonic-gate ctrs_sz += (max_mem_nodes * NPC_MUTEX * sizeof (kmutex_t)); 5467c478bd9Sstevel@tonic-gate 5477c478bd9Sstevel@tonic-gate /* size for page list counts */ 5487c478bd9Sstevel@tonic-gate PLCNT_SZ(ctrs_sz); 5497c478bd9Sstevel@tonic-gate 5507c478bd9Sstevel@tonic-gate /* 5517c478bd9Sstevel@tonic-gate * add some slop for roundups. page_ctrs_alloc will roundup the start 5527c478bd9Sstevel@tonic-gate * address of the counters to ecache_alignsize boundary for every 5537c478bd9Sstevel@tonic-gate * memory node. 5547c478bd9Sstevel@tonic-gate */ 5557c478bd9Sstevel@tonic-gate return (ctrs_sz + max_mem_nodes * L2CACHE_ALIGN); 5567c478bd9Sstevel@tonic-gate } 5577c478bd9Sstevel@tonic-gate 5587c478bd9Sstevel@tonic-gate caddr_t 5597c478bd9Sstevel@tonic-gate page_ctrs_alloc(caddr_t alloc_base) 5607c478bd9Sstevel@tonic-gate { 5617c478bd9Sstevel@tonic-gate int mnode; 5627c478bd9Sstevel@tonic-gate int r; /* region size */ 5637c478bd9Sstevel@tonic-gate int i; 5647c478bd9Sstevel@tonic-gate pgcnt_t colors_per_szc[MMU_PAGE_SIZES]; 5657c478bd9Sstevel@tonic-gate 5667c478bd9Sstevel@tonic-gate /* 5677c478bd9Sstevel@tonic-gate * We need to determine how many page colors there are for each 5687c478bd9Sstevel@tonic-gate * page size in order to allocate memory for any color specific 5697c478bd9Sstevel@tonic-gate * arrays. 5707c478bd9Sstevel@tonic-gate */ 5717c478bd9Sstevel@tonic-gate colors_per_szc[0] = page_colors; 5727c478bd9Sstevel@tonic-gate for (i = 1; i < mmu_page_sizes; i++) { 5737c478bd9Sstevel@tonic-gate colors_per_szc[i] = 5747c478bd9Sstevel@tonic-gate page_convert_color(0, i, page_colors - 1) + 1; 5757c478bd9Sstevel@tonic-gate } 5767c478bd9Sstevel@tonic-gate 5777c478bd9Sstevel@tonic-gate for (r = 1; r < mmu_page_sizes; r++) { 5787c478bd9Sstevel@tonic-gate page_counters[r] = (hw_page_map_t *)alloc_base; 5797c478bd9Sstevel@tonic-gate alloc_base += (max_mem_nodes * sizeof (hw_page_map_t)); 5807c478bd9Sstevel@tonic-gate } 5817c478bd9Sstevel@tonic-gate 5827c478bd9Sstevel@tonic-gate /* page_ctrs_cands */ 5837c478bd9Sstevel@tonic-gate for (r = 1; r < mmu_page_sizes; r++) { 5847c478bd9Sstevel@tonic-gate for (i = 0; i < NPC_MUTEX; i++) { 5857c478bd9Sstevel@tonic-gate page_ctrs_cands[i][r] = (pcc_info_t *)alloc_base; 5867c478bd9Sstevel@tonic-gate alloc_base += max_mem_nodes * (sizeof (pcc_info_t)); 5877c478bd9Sstevel@tonic-gate 5887c478bd9Sstevel@tonic-gate } 5897c478bd9Sstevel@tonic-gate } 5907c478bd9Sstevel@tonic-gate 5917c478bd9Sstevel@tonic-gate /* page_ctrs_cands pcc_color_free array */ 5927c478bd9Sstevel@tonic-gate for (r = 1; r < mmu_page_sizes; r++) { 5937c478bd9Sstevel@tonic-gate for (i = 0; i < NPC_MUTEX; i++) { 5947c478bd9Sstevel@tonic-gate for (mnode = 0; mnode < max_mem_nodes; mnode++) { 5957c478bd9Sstevel@tonic-gate page_ctrs_cands[i][r][mnode].pcc_color_free_len 5967c478bd9Sstevel@tonic-gate = colors_per_szc[r]; 5977c478bd9Sstevel@tonic-gate page_ctrs_cands[i][r][mnode].pcc_color_free = 5987c478bd9Sstevel@tonic-gate (pgcnt_t *)alloc_base; 5997c478bd9Sstevel@tonic-gate alloc_base += colors_per_szc[r] * 6007c478bd9Sstevel@tonic-gate sizeof (pgcnt_t); 6017c478bd9Sstevel@tonic-gate } 6027c478bd9Sstevel@tonic-gate } 6037c478bd9Sstevel@tonic-gate } 6047c478bd9Sstevel@tonic-gate 6057c478bd9Sstevel@tonic-gate /* ctr_mutex */ 6067c478bd9Sstevel@tonic-gate for (i = 0; i < NPC_MUTEX; i++) { 6077c478bd9Sstevel@tonic-gate ctr_mutex[i] = (kmutex_t *)alloc_base; 6087c478bd9Sstevel@tonic-gate alloc_base += (max_mem_nodes * sizeof (kmutex_t)); 6097c478bd9Sstevel@tonic-gate } 6107c478bd9Sstevel@tonic-gate 6117c478bd9Sstevel@tonic-gate /* initialize page list counts */ 6127c478bd9Sstevel@tonic-gate PLCNT_INIT(alloc_base); 6137c478bd9Sstevel@tonic-gate 6147c478bd9Sstevel@tonic-gate for (mnode = 0; mnode < max_mem_nodes; mnode++) { 6157c478bd9Sstevel@tonic-gate 6167c478bd9Sstevel@tonic-gate pgcnt_t r_pgcnt; 6177c478bd9Sstevel@tonic-gate pfn_t r_base; 6187c478bd9Sstevel@tonic-gate pgcnt_t r_align; 6197c478bd9Sstevel@tonic-gate int r_shift; 6207c478bd9Sstevel@tonic-gate 6217c478bd9Sstevel@tonic-gate if (mem_node_config[mnode].exists == 0) 6227c478bd9Sstevel@tonic-gate continue; 6237c478bd9Sstevel@tonic-gate 6247c478bd9Sstevel@tonic-gate for (r = 1; r < mmu_page_sizes; r++) { 6257c478bd9Sstevel@tonic-gate /* 6267c478bd9Sstevel@tonic-gate * the page_counters base has to be aligned to the 6277c478bd9Sstevel@tonic-gate * page count of page size code r otherwise the counts 6287c478bd9Sstevel@tonic-gate * will cross large page boundaries. 6297c478bd9Sstevel@tonic-gate */ 6307c478bd9Sstevel@tonic-gate r_align = page_get_pagecnt(r); 6317c478bd9Sstevel@tonic-gate r_base = mem_node_config[mnode].physbase; 6327c478bd9Sstevel@tonic-gate /* base needs to be aligned - lower to aligned value */ 6337c478bd9Sstevel@tonic-gate r_base &= ~(r_align - 1); 6347c478bd9Sstevel@tonic-gate r_pgcnt = howmany(mem_node_config[mnode].physmax - 6356bb54764Skchow r_base + 1, r_align); 6367c478bd9Sstevel@tonic-gate r_shift = PAGE_BSZS_SHIFT(r); 6377c478bd9Sstevel@tonic-gate 6387c478bd9Sstevel@tonic-gate PAGE_COUNTERS_SHIFT(mnode, r) = r_shift; 6397c478bd9Sstevel@tonic-gate PAGE_COUNTERS_ENTRIES(mnode, r) = r_pgcnt; 6407c478bd9Sstevel@tonic-gate PAGE_COUNTERS_BASE(mnode, r) = r_base; 6417c478bd9Sstevel@tonic-gate PAGE_COUNTERS_CURRENT_COLOR_LEN(mnode, r) = 6427c478bd9Sstevel@tonic-gate colors_per_szc[r]; 6437c478bd9Sstevel@tonic-gate PAGE_COUNTERS_CURRENT_COLOR_ARRAY(mnode, r) = 6447c478bd9Sstevel@tonic-gate (size_t *)alloc_base; 6457c478bd9Sstevel@tonic-gate alloc_base += (sizeof (size_t) * colors_per_szc[r]); 6467c478bd9Sstevel@tonic-gate for (i = 0; i < colors_per_szc[r]; i++) { 6477c478bd9Sstevel@tonic-gate PAGE_COUNTERS_CURRENT_COLOR(mnode, r, i) = i; 6487c478bd9Sstevel@tonic-gate } 6497c478bd9Sstevel@tonic-gate PAGE_COUNTERS_COUNTERS(mnode, r) = 6507c478bd9Sstevel@tonic-gate (hpmctr_t *)alloc_base; 6517c478bd9Sstevel@tonic-gate /* 6527c478bd9Sstevel@tonic-gate * Round up to make alloc_base always be aligned on 6537c478bd9Sstevel@tonic-gate * a pointer boundary. 6547c478bd9Sstevel@tonic-gate */ 6557c478bd9Sstevel@tonic-gate alloc_base += P2ROUNDUP((sizeof (hpmctr_t) * r_pgcnt), 6567c478bd9Sstevel@tonic-gate sizeof (hpmctr_t *)); 6577c478bd9Sstevel@tonic-gate 6587c478bd9Sstevel@tonic-gate /* 6597c478bd9Sstevel@tonic-gate * Verify that PNUM_TO_IDX and IDX_TO_PNUM 6607c478bd9Sstevel@tonic-gate * satisfy the identity requirement. 6617c478bd9Sstevel@tonic-gate * We should be able to go from one to the other 6627c478bd9Sstevel@tonic-gate * and get consistent values. 6637c478bd9Sstevel@tonic-gate */ 6647c478bd9Sstevel@tonic-gate ASSERT(PNUM_TO_IDX(mnode, r, 6657c478bd9Sstevel@tonic-gate (IDX_TO_PNUM(mnode, r, 0))) == 0); 6667c478bd9Sstevel@tonic-gate ASSERT(IDX_TO_PNUM(mnode, r, 6677c478bd9Sstevel@tonic-gate (PNUM_TO_IDX(mnode, r, r_base))) == r_base); 6687c478bd9Sstevel@tonic-gate } 6697c478bd9Sstevel@tonic-gate /* 6707c478bd9Sstevel@tonic-gate * Roundup the start address of the page_counters to 6717c478bd9Sstevel@tonic-gate * cache aligned boundary for every memory node. 6727c478bd9Sstevel@tonic-gate * page_ctrs_sz() has added some slop for these roundups. 6737c478bd9Sstevel@tonic-gate */ 6747c478bd9Sstevel@tonic-gate alloc_base = (caddr_t)P2ROUNDUP((uintptr_t)alloc_base, 6757c478bd9Sstevel@tonic-gate L2CACHE_ALIGN); 6767c478bd9Sstevel@tonic-gate } 6777c478bd9Sstevel@tonic-gate 6787c478bd9Sstevel@tonic-gate /* Initialize other page counter specific data structures. */ 6797c478bd9Sstevel@tonic-gate for (mnode = 0; mnode < MAX_MEM_NODES; mnode++) { 6807c478bd9Sstevel@tonic-gate rw_init(&page_ctrs_rwlock[mnode], NULL, RW_DEFAULT, NULL); 6817c478bd9Sstevel@tonic-gate } 6827c478bd9Sstevel@tonic-gate 6837c478bd9Sstevel@tonic-gate return (alloc_base); 6847c478bd9Sstevel@tonic-gate } 6857c478bd9Sstevel@tonic-gate 6867c478bd9Sstevel@tonic-gate /* 6877c478bd9Sstevel@tonic-gate * Functions to adjust region counters for each size free list. 6887c478bd9Sstevel@tonic-gate * Caller is responsible to acquire the ctr_mutex lock if necessary and 6897c478bd9Sstevel@tonic-gate * thus can be called during startup without locks. 6907c478bd9Sstevel@tonic-gate */ 6917c478bd9Sstevel@tonic-gate /* ARGSUSED */ 6927c478bd9Sstevel@tonic-gate void 693affbd3ccSkchow page_ctr_add_internal(int mnode, int mtype, page_t *pp, int flags) 6947c478bd9Sstevel@tonic-gate { 6957c478bd9Sstevel@tonic-gate ssize_t r; /* region size */ 6967c478bd9Sstevel@tonic-gate ssize_t idx; 6977c478bd9Sstevel@tonic-gate pfn_t pfnum; 6987c478bd9Sstevel@tonic-gate int lckidx; 6997c478bd9Sstevel@tonic-gate 700affbd3ccSkchow ASSERT(mnode == PP_2_MEM_NODE(pp)); 701affbd3ccSkchow ASSERT(mtype == PP_2_MTYPE(pp)); 702affbd3ccSkchow 7037c478bd9Sstevel@tonic-gate ASSERT(pp->p_szc < mmu_page_sizes); 7047c478bd9Sstevel@tonic-gate 705affbd3ccSkchow PLCNT_INCR(pp, mnode, mtype, pp->p_szc, flags); 7067c478bd9Sstevel@tonic-gate 7077c478bd9Sstevel@tonic-gate /* no counter update needed for largest page size */ 7087c478bd9Sstevel@tonic-gate if (pp->p_szc >= mmu_page_sizes - 1) { 7097c478bd9Sstevel@tonic-gate return; 7107c478bd9Sstevel@tonic-gate } 7117c478bd9Sstevel@tonic-gate 7127c478bd9Sstevel@tonic-gate r = pp->p_szc + 1; 7137c478bd9Sstevel@tonic-gate pfnum = pp->p_pagenum; 7147c478bd9Sstevel@tonic-gate lckidx = PP_CTR_LOCK_INDX(pp); 7157c478bd9Sstevel@tonic-gate 7167c478bd9Sstevel@tonic-gate /* 7177c478bd9Sstevel@tonic-gate * Increment the count of free pages for the current 7187c478bd9Sstevel@tonic-gate * region. Continue looping up in region size incrementing 7197c478bd9Sstevel@tonic-gate * count if the preceeding region is full. 7207c478bd9Sstevel@tonic-gate */ 7217c478bd9Sstevel@tonic-gate while (r < mmu_page_sizes) { 7227c478bd9Sstevel@tonic-gate idx = PNUM_TO_IDX(mnode, r, pfnum); 7237c478bd9Sstevel@tonic-gate 7247c478bd9Sstevel@tonic-gate ASSERT(idx < PAGE_COUNTERS_ENTRIES(mnode, r)); 7257c478bd9Sstevel@tonic-gate ASSERT(PAGE_COUNTERS(mnode, r, idx) < FULL_REGION_CNT(r)); 7267c478bd9Sstevel@tonic-gate 7277c478bd9Sstevel@tonic-gate if (++PAGE_COUNTERS(mnode, r, idx) != FULL_REGION_CNT(r)) 7287c478bd9Sstevel@tonic-gate break; 7297c478bd9Sstevel@tonic-gate 7307c478bd9Sstevel@tonic-gate page_ctrs_cands[lckidx][r][mnode].pcc_pages_free++; 7317c478bd9Sstevel@tonic-gate page_ctrs_cands[lckidx][r][mnode]. 7327c478bd9Sstevel@tonic-gate pcc_color_free[PP_2_BIN_SZC(pp, r)]++; 7337c478bd9Sstevel@tonic-gate r++; 7347c478bd9Sstevel@tonic-gate } 7357c478bd9Sstevel@tonic-gate } 7367c478bd9Sstevel@tonic-gate 7377c478bd9Sstevel@tonic-gate void 738affbd3ccSkchow page_ctr_add(int mnode, int mtype, page_t *pp, int flags) 7397c478bd9Sstevel@tonic-gate { 7407c478bd9Sstevel@tonic-gate int lckidx = PP_CTR_LOCK_INDX(pp); 7417c478bd9Sstevel@tonic-gate kmutex_t *lock = &ctr_mutex[lckidx][mnode]; 7427c478bd9Sstevel@tonic-gate 7437c478bd9Sstevel@tonic-gate mutex_enter(lock); 744affbd3ccSkchow page_ctr_add_internal(mnode, mtype, pp, flags); 7457c478bd9Sstevel@tonic-gate mutex_exit(lock); 7467c478bd9Sstevel@tonic-gate } 7477c478bd9Sstevel@tonic-gate 7487c478bd9Sstevel@tonic-gate void 749affbd3ccSkchow page_ctr_sub(int mnode, int mtype, page_t *pp, int flags) 7507c478bd9Sstevel@tonic-gate { 7517c478bd9Sstevel@tonic-gate int lckidx; 7527c478bd9Sstevel@tonic-gate kmutex_t *lock; 7537c478bd9Sstevel@tonic-gate ssize_t r; /* region size */ 7547c478bd9Sstevel@tonic-gate ssize_t idx; 7557c478bd9Sstevel@tonic-gate pfn_t pfnum; 7567c478bd9Sstevel@tonic-gate 757affbd3ccSkchow ASSERT(mnode == PP_2_MEM_NODE(pp)); 758affbd3ccSkchow ASSERT(mtype == PP_2_MTYPE(pp)); 759affbd3ccSkchow 7607c478bd9Sstevel@tonic-gate ASSERT(pp->p_szc < mmu_page_sizes); 7617c478bd9Sstevel@tonic-gate 762affbd3ccSkchow PLCNT_DECR(pp, mnode, mtype, pp->p_szc, flags); 7637c478bd9Sstevel@tonic-gate 7647c478bd9Sstevel@tonic-gate /* no counter update needed for largest page size */ 7657c478bd9Sstevel@tonic-gate if (pp->p_szc >= mmu_page_sizes - 1) { 7667c478bd9Sstevel@tonic-gate return; 7677c478bd9Sstevel@tonic-gate } 7687c478bd9Sstevel@tonic-gate 7697c478bd9Sstevel@tonic-gate r = pp->p_szc + 1; 7707c478bd9Sstevel@tonic-gate pfnum = pp->p_pagenum; 7717c478bd9Sstevel@tonic-gate lckidx = PP_CTR_LOCK_INDX(pp); 7727c478bd9Sstevel@tonic-gate lock = &ctr_mutex[lckidx][mnode]; 7737c478bd9Sstevel@tonic-gate 7747c478bd9Sstevel@tonic-gate /* 7757c478bd9Sstevel@tonic-gate * Decrement the count of free pages for the current 7767c478bd9Sstevel@tonic-gate * region. Continue looping up in region size decrementing 7777c478bd9Sstevel@tonic-gate * count if the preceeding region was full. 7787c478bd9Sstevel@tonic-gate */ 7797c478bd9Sstevel@tonic-gate mutex_enter(lock); 7807c478bd9Sstevel@tonic-gate while (r < mmu_page_sizes) { 7817c478bd9Sstevel@tonic-gate idx = PNUM_TO_IDX(mnode, r, pfnum); 7827c478bd9Sstevel@tonic-gate 7837c478bd9Sstevel@tonic-gate ASSERT(idx < PAGE_COUNTERS_ENTRIES(mnode, r)); 7847c478bd9Sstevel@tonic-gate ASSERT(PAGE_COUNTERS(mnode, r, idx) > 0); 7857c478bd9Sstevel@tonic-gate 7867c478bd9Sstevel@tonic-gate if (--PAGE_COUNTERS(mnode, r, idx) != FULL_REGION_CNT(r) - 1) { 7877c478bd9Sstevel@tonic-gate break; 7887c478bd9Sstevel@tonic-gate } 7897c478bd9Sstevel@tonic-gate ASSERT(page_ctrs_cands[lckidx][r][mnode].pcc_pages_free != 0); 7907c478bd9Sstevel@tonic-gate ASSERT(page_ctrs_cands[lckidx][r][mnode]. 7917c478bd9Sstevel@tonic-gate pcc_color_free[PP_2_BIN_SZC(pp, r)] != 0); 7927c478bd9Sstevel@tonic-gate 7937c478bd9Sstevel@tonic-gate page_ctrs_cands[lckidx][r][mnode].pcc_pages_free--; 7947c478bd9Sstevel@tonic-gate page_ctrs_cands[lckidx][r][mnode]. 7957c478bd9Sstevel@tonic-gate pcc_color_free[PP_2_BIN_SZC(pp, r)]--; 7967c478bd9Sstevel@tonic-gate r++; 7977c478bd9Sstevel@tonic-gate } 7987c478bd9Sstevel@tonic-gate mutex_exit(lock); 7997c478bd9Sstevel@tonic-gate } 8007c478bd9Sstevel@tonic-gate 8017c478bd9Sstevel@tonic-gate /* 8027c478bd9Sstevel@tonic-gate * Adjust page counters following a memory attach, since typically the 8037c478bd9Sstevel@tonic-gate * size of the array needs to change, and the PFN to counter index 8047c478bd9Sstevel@tonic-gate * mapping needs to change. 8057c478bd9Sstevel@tonic-gate */ 8067c478bd9Sstevel@tonic-gate uint_t 8077c478bd9Sstevel@tonic-gate page_ctrs_adjust(int mnode) 8087c478bd9Sstevel@tonic-gate { 8097c478bd9Sstevel@tonic-gate pgcnt_t npgs; 8107c478bd9Sstevel@tonic-gate int r; /* region size */ 8117c478bd9Sstevel@tonic-gate int i; 8127c478bd9Sstevel@tonic-gate size_t pcsz, old_csz; 8137c478bd9Sstevel@tonic-gate hpmctr_t *new_ctr, *old_ctr; 8147c478bd9Sstevel@tonic-gate pfn_t oldbase, newbase; 8157c478bd9Sstevel@tonic-gate size_t old_npgs; 8167c478bd9Sstevel@tonic-gate hpmctr_t *ctr_cache[MMU_PAGE_SIZES]; 8177c478bd9Sstevel@tonic-gate size_t size_cache[MMU_PAGE_SIZES]; 8187c478bd9Sstevel@tonic-gate size_t *color_cache[MMU_PAGE_SIZES]; 8197c478bd9Sstevel@tonic-gate size_t *old_color_array; 8207c478bd9Sstevel@tonic-gate pgcnt_t colors_per_szc[MMU_PAGE_SIZES]; 8217c478bd9Sstevel@tonic-gate 8227c478bd9Sstevel@tonic-gate newbase = mem_node_config[mnode].physbase & ~PC_BASE_ALIGN_MASK; 8237c478bd9Sstevel@tonic-gate npgs = roundup(mem_node_config[mnode].physmax, 8247c478bd9Sstevel@tonic-gate PC_BASE_ALIGN) - newbase; 8257c478bd9Sstevel@tonic-gate 8267c478bd9Sstevel@tonic-gate /* 8277c478bd9Sstevel@tonic-gate * We need to determine how many page colors there are for each 8287c478bd9Sstevel@tonic-gate * page size in order to allocate memory for any color specific 8297c478bd9Sstevel@tonic-gate * arrays. 8307c478bd9Sstevel@tonic-gate */ 8317c478bd9Sstevel@tonic-gate colors_per_szc[0] = page_colors; 8327c478bd9Sstevel@tonic-gate for (r = 1; r < mmu_page_sizes; r++) { 8337c478bd9Sstevel@tonic-gate colors_per_szc[r] = 8347c478bd9Sstevel@tonic-gate page_convert_color(0, r, page_colors - 1) + 1; 8357c478bd9Sstevel@tonic-gate } 8367c478bd9Sstevel@tonic-gate 8377c478bd9Sstevel@tonic-gate /* 8387c478bd9Sstevel@tonic-gate * Preallocate all of the new hpm_counters arrays as we can't 8397c478bd9Sstevel@tonic-gate * hold the page_ctrs_rwlock as a writer and allocate memory. 8407c478bd9Sstevel@tonic-gate * If we can't allocate all of the arrays, undo our work so far 8417c478bd9Sstevel@tonic-gate * and return failure. 8427c478bd9Sstevel@tonic-gate */ 8437c478bd9Sstevel@tonic-gate for (r = 1; r < mmu_page_sizes; r++) { 8447c478bd9Sstevel@tonic-gate pcsz = npgs >> PAGE_BSZS_SHIFT(r); 8457c478bd9Sstevel@tonic-gate 8467c478bd9Sstevel@tonic-gate ctr_cache[r] = kmem_zalloc(pcsz * 8477c478bd9Sstevel@tonic-gate sizeof (hpmctr_t), KM_NOSLEEP); 8487c478bd9Sstevel@tonic-gate if (ctr_cache[r] == NULL) { 8497c478bd9Sstevel@tonic-gate while (--r >= 1) { 8507c478bd9Sstevel@tonic-gate kmem_free(ctr_cache[r], 8517c478bd9Sstevel@tonic-gate size_cache[r] * sizeof (hpmctr_t)); 8527c478bd9Sstevel@tonic-gate } 8537c478bd9Sstevel@tonic-gate return (ENOMEM); 8547c478bd9Sstevel@tonic-gate } 8557c478bd9Sstevel@tonic-gate size_cache[r] = pcsz; 8567c478bd9Sstevel@tonic-gate } 8577c478bd9Sstevel@tonic-gate /* 8587c478bd9Sstevel@tonic-gate * Preallocate all of the new color current arrays as we can't 8597c478bd9Sstevel@tonic-gate * hold the page_ctrs_rwlock as a writer and allocate memory. 8607c478bd9Sstevel@tonic-gate * If we can't allocate all of the arrays, undo our work so far 8617c478bd9Sstevel@tonic-gate * and return failure. 8627c478bd9Sstevel@tonic-gate */ 8637c478bd9Sstevel@tonic-gate for (r = 1; r < mmu_page_sizes; r++) { 8647c478bd9Sstevel@tonic-gate color_cache[r] = kmem_zalloc(sizeof (size_t) * 8657c478bd9Sstevel@tonic-gate colors_per_szc[r], KM_NOSLEEP); 8667c478bd9Sstevel@tonic-gate if (color_cache[r] == NULL) { 8677c478bd9Sstevel@tonic-gate while (--r >= 1) { 8687c478bd9Sstevel@tonic-gate kmem_free(color_cache[r], 8697c478bd9Sstevel@tonic-gate colors_per_szc[r] * sizeof (size_t)); 8707c478bd9Sstevel@tonic-gate } 8717c478bd9Sstevel@tonic-gate for (r = 1; r < mmu_page_sizes; r++) { 8727c478bd9Sstevel@tonic-gate kmem_free(ctr_cache[r], 8737c478bd9Sstevel@tonic-gate size_cache[r] * sizeof (hpmctr_t)); 8747c478bd9Sstevel@tonic-gate } 8757c478bd9Sstevel@tonic-gate return (ENOMEM); 8767c478bd9Sstevel@tonic-gate } 8777c478bd9Sstevel@tonic-gate } 8787c478bd9Sstevel@tonic-gate 8797c478bd9Sstevel@tonic-gate /* 8807c478bd9Sstevel@tonic-gate * Grab the write lock to prevent others from walking these arrays 8817c478bd9Sstevel@tonic-gate * while we are modifying them. 8827c478bd9Sstevel@tonic-gate */ 8837c478bd9Sstevel@tonic-gate rw_enter(&page_ctrs_rwlock[mnode], RW_WRITER); 8847c478bd9Sstevel@tonic-gate page_freelist_lock(mnode); 8857c478bd9Sstevel@tonic-gate for (r = 1; r < mmu_page_sizes; r++) { 8867c478bd9Sstevel@tonic-gate PAGE_COUNTERS_SHIFT(mnode, r) = PAGE_BSZS_SHIFT(r); 8877c478bd9Sstevel@tonic-gate old_ctr = PAGE_COUNTERS_COUNTERS(mnode, r); 8887c478bd9Sstevel@tonic-gate old_csz = PAGE_COUNTERS_ENTRIES(mnode, r); 8897c478bd9Sstevel@tonic-gate oldbase = PAGE_COUNTERS_BASE(mnode, r); 8907c478bd9Sstevel@tonic-gate old_npgs = old_csz << PAGE_COUNTERS_SHIFT(mnode, r); 8917c478bd9Sstevel@tonic-gate old_color_array = PAGE_COUNTERS_CURRENT_COLOR_ARRAY(mnode, r); 8927c478bd9Sstevel@tonic-gate 8937c478bd9Sstevel@tonic-gate pcsz = npgs >> PAGE_COUNTERS_SHIFT(mnode, r); 8947c478bd9Sstevel@tonic-gate new_ctr = ctr_cache[r]; 8957c478bd9Sstevel@tonic-gate ctr_cache[r] = NULL; 8967c478bd9Sstevel@tonic-gate if (old_ctr != NULL && 8977c478bd9Sstevel@tonic-gate (oldbase + old_npgs > newbase) && 8987c478bd9Sstevel@tonic-gate (newbase + npgs > oldbase)) { 8997c478bd9Sstevel@tonic-gate /* 9007c478bd9Sstevel@tonic-gate * Map the intersection of the old and new 9017c478bd9Sstevel@tonic-gate * counters into the new array. 9027c478bd9Sstevel@tonic-gate */ 9037c478bd9Sstevel@tonic-gate size_t offset; 9047c478bd9Sstevel@tonic-gate if (newbase > oldbase) { 9057c478bd9Sstevel@tonic-gate offset = (newbase - oldbase) >> 9067c478bd9Sstevel@tonic-gate PAGE_COUNTERS_SHIFT(mnode, r); 9077c478bd9Sstevel@tonic-gate bcopy(old_ctr + offset, new_ctr, 9087c478bd9Sstevel@tonic-gate MIN(pcsz, (old_csz - offset)) * 9097c478bd9Sstevel@tonic-gate sizeof (hpmctr_t)); 9107c478bd9Sstevel@tonic-gate } else { 9117c478bd9Sstevel@tonic-gate offset = (oldbase - newbase) >> 9127c478bd9Sstevel@tonic-gate PAGE_COUNTERS_SHIFT(mnode, r); 9137c478bd9Sstevel@tonic-gate bcopy(old_ctr, new_ctr + offset, 9147c478bd9Sstevel@tonic-gate MIN(pcsz - offset, old_csz) * 9157c478bd9Sstevel@tonic-gate sizeof (hpmctr_t)); 9167c478bd9Sstevel@tonic-gate } 9177c478bd9Sstevel@tonic-gate } 9187c478bd9Sstevel@tonic-gate 9197c478bd9Sstevel@tonic-gate PAGE_COUNTERS_COUNTERS(mnode, r) = new_ctr; 9207c478bd9Sstevel@tonic-gate PAGE_COUNTERS_ENTRIES(mnode, r) = pcsz; 9217c478bd9Sstevel@tonic-gate PAGE_COUNTERS_BASE(mnode, r) = newbase; 9227c478bd9Sstevel@tonic-gate PAGE_COUNTERS_CURRENT_COLOR_LEN(mnode, r) = colors_per_szc[r]; 9237c478bd9Sstevel@tonic-gate PAGE_COUNTERS_CURRENT_COLOR_ARRAY(mnode, r) = color_cache[r]; 9247c478bd9Sstevel@tonic-gate color_cache[r] = NULL; 9257c478bd9Sstevel@tonic-gate /* 9267c478bd9Sstevel@tonic-gate * for now, just reset on these events as it's probably 9277c478bd9Sstevel@tonic-gate * not worthwhile to try and optimize this. 9287c478bd9Sstevel@tonic-gate */ 9297c478bd9Sstevel@tonic-gate for (i = 0; i < colors_per_szc[r]; i++) { 9307c478bd9Sstevel@tonic-gate PAGE_COUNTERS_CURRENT_COLOR(mnode, r, i) = i; 9317c478bd9Sstevel@tonic-gate } 9327c478bd9Sstevel@tonic-gate 9337c478bd9Sstevel@tonic-gate /* cache info for freeing out of the critical path */ 9347c478bd9Sstevel@tonic-gate if ((caddr_t)old_ctr >= kernelheap && 9357c478bd9Sstevel@tonic-gate (caddr_t)old_ctr < ekernelheap) { 9367c478bd9Sstevel@tonic-gate ctr_cache[r] = old_ctr; 9377c478bd9Sstevel@tonic-gate size_cache[r] = old_csz; 9387c478bd9Sstevel@tonic-gate } 9397c478bd9Sstevel@tonic-gate if ((caddr_t)old_color_array >= kernelheap && 9407c478bd9Sstevel@tonic-gate (caddr_t)old_color_array < ekernelheap) { 9417c478bd9Sstevel@tonic-gate color_cache[r] = old_color_array; 9427c478bd9Sstevel@tonic-gate } 9437c478bd9Sstevel@tonic-gate /* 9447c478bd9Sstevel@tonic-gate * Verify that PNUM_TO_IDX and IDX_TO_PNUM 9457c478bd9Sstevel@tonic-gate * satisfy the identity requirement. 9467c478bd9Sstevel@tonic-gate * We should be able to go from one to the other 9477c478bd9Sstevel@tonic-gate * and get consistent values. 9487c478bd9Sstevel@tonic-gate */ 9497c478bd9Sstevel@tonic-gate ASSERT(PNUM_TO_IDX(mnode, r, 9507c478bd9Sstevel@tonic-gate (IDX_TO_PNUM(mnode, r, 0))) == 0); 9517c478bd9Sstevel@tonic-gate ASSERT(IDX_TO_PNUM(mnode, r, 9527c478bd9Sstevel@tonic-gate (PNUM_TO_IDX(mnode, r, newbase))) == newbase); 9537c478bd9Sstevel@tonic-gate } 9547c478bd9Sstevel@tonic-gate page_freelist_unlock(mnode); 9557c478bd9Sstevel@tonic-gate rw_exit(&page_ctrs_rwlock[mnode]); 9567c478bd9Sstevel@tonic-gate 9577c478bd9Sstevel@tonic-gate /* 9587c478bd9Sstevel@tonic-gate * Now that we have dropped the write lock, it is safe to free all 9597c478bd9Sstevel@tonic-gate * of the memory we have cached above. 9607c478bd9Sstevel@tonic-gate */ 9617c478bd9Sstevel@tonic-gate for (r = 1; r < mmu_page_sizes; r++) { 9627c478bd9Sstevel@tonic-gate if (ctr_cache[r] != NULL) { 9637c478bd9Sstevel@tonic-gate kmem_free(ctr_cache[r], 9647c478bd9Sstevel@tonic-gate size_cache[r] * sizeof (hpmctr_t)); 9657c478bd9Sstevel@tonic-gate } 9667c478bd9Sstevel@tonic-gate if (color_cache[r] != NULL) { 9677c478bd9Sstevel@tonic-gate kmem_free(color_cache[r], 9687c478bd9Sstevel@tonic-gate colors_per_szc[r] * sizeof (size_t)); 9697c478bd9Sstevel@tonic-gate } 9707c478bd9Sstevel@tonic-gate } 9717c478bd9Sstevel@tonic-gate return (0); 9727c478bd9Sstevel@tonic-gate } 9737c478bd9Sstevel@tonic-gate 9747c478bd9Sstevel@tonic-gate /* 9757c478bd9Sstevel@tonic-gate * color contains a valid color index or bin for cur_szc 9767c478bd9Sstevel@tonic-gate */ 9777c478bd9Sstevel@tonic-gate uint_t 9787c478bd9Sstevel@tonic-gate page_convert_color(uchar_t cur_szc, uchar_t new_szc, uint_t color) 9797c478bd9Sstevel@tonic-gate { 9807c478bd9Sstevel@tonic-gate uint_t shift; 9817c478bd9Sstevel@tonic-gate 9827c478bd9Sstevel@tonic-gate if (cur_szc > new_szc) { 9837c478bd9Sstevel@tonic-gate shift = page_get_shift(cur_szc) - page_get_shift(new_szc); 9847c478bd9Sstevel@tonic-gate return (color << shift); 9857c478bd9Sstevel@tonic-gate } else if (cur_szc < new_szc) { 9867c478bd9Sstevel@tonic-gate shift = page_get_shift(new_szc) - page_get_shift(cur_szc); 9877c478bd9Sstevel@tonic-gate return (color >> shift); 9887c478bd9Sstevel@tonic-gate } 9897c478bd9Sstevel@tonic-gate return (color); 9907c478bd9Sstevel@tonic-gate } 9917c478bd9Sstevel@tonic-gate 9927c478bd9Sstevel@tonic-gate #ifdef DEBUG 9937c478bd9Sstevel@tonic-gate 9947c478bd9Sstevel@tonic-gate /* 9957c478bd9Sstevel@tonic-gate * confirm pp is a large page corresponding to szc 9967c478bd9Sstevel@tonic-gate */ 9977c478bd9Sstevel@tonic-gate void 9987c478bd9Sstevel@tonic-gate chk_lpg(page_t *pp, uchar_t szc) 9997c478bd9Sstevel@tonic-gate { 10007c478bd9Sstevel@tonic-gate spgcnt_t npgs = page_get_pagecnt(pp->p_szc); 10017c478bd9Sstevel@tonic-gate uint_t noreloc; 10027c478bd9Sstevel@tonic-gate 10037c478bd9Sstevel@tonic-gate if (npgs == 1) { 10047c478bd9Sstevel@tonic-gate ASSERT(pp->p_szc == 0); 10057c478bd9Sstevel@tonic-gate ASSERT(pp->p_next == pp); 10067c478bd9Sstevel@tonic-gate ASSERT(pp->p_prev == pp); 10077c478bd9Sstevel@tonic-gate return; 10087c478bd9Sstevel@tonic-gate } 10097c478bd9Sstevel@tonic-gate 10107c478bd9Sstevel@tonic-gate ASSERT(pp->p_vpnext == pp || pp->p_vpnext == NULL); 10117c478bd9Sstevel@tonic-gate ASSERT(pp->p_vpprev == pp || pp->p_vpprev == NULL); 10127c478bd9Sstevel@tonic-gate 10137c478bd9Sstevel@tonic-gate ASSERT(IS_P2ALIGNED(pp->p_pagenum, npgs)); 10147c478bd9Sstevel@tonic-gate ASSERT(pp->p_pagenum == (pp->p_next->p_pagenum - 1)); 10157c478bd9Sstevel@tonic-gate ASSERT(pp->p_prev->p_pagenum == (pp->p_pagenum + (npgs - 1))); 10167c478bd9Sstevel@tonic-gate ASSERT(pp->p_prev == (pp + (npgs - 1))); 10177c478bd9Sstevel@tonic-gate 10187c478bd9Sstevel@tonic-gate /* 10197c478bd9Sstevel@tonic-gate * Check list of pages. 10207c478bd9Sstevel@tonic-gate */ 10217c478bd9Sstevel@tonic-gate noreloc = PP_ISNORELOC(pp); 10227c478bd9Sstevel@tonic-gate while (npgs--) { 10237c478bd9Sstevel@tonic-gate if (npgs != 0) { 10247c478bd9Sstevel@tonic-gate ASSERT(pp->p_pagenum == pp->p_next->p_pagenum - 1); 10257c478bd9Sstevel@tonic-gate ASSERT(pp->p_next == (pp + 1)); 10267c478bd9Sstevel@tonic-gate } 10277c478bd9Sstevel@tonic-gate ASSERT(pp->p_szc == szc); 10287c478bd9Sstevel@tonic-gate ASSERT(PP_ISFREE(pp)); 10297c478bd9Sstevel@tonic-gate ASSERT(PP_ISAGED(pp)); 10307c478bd9Sstevel@tonic-gate ASSERT(pp->p_vpnext == pp || pp->p_vpnext == NULL); 10317c478bd9Sstevel@tonic-gate ASSERT(pp->p_vpprev == pp || pp->p_vpprev == NULL); 10327c478bd9Sstevel@tonic-gate ASSERT(pp->p_vnode == NULL); 10337c478bd9Sstevel@tonic-gate ASSERT(PP_ISNORELOC(pp) == noreloc); 10347c478bd9Sstevel@tonic-gate 10357c478bd9Sstevel@tonic-gate pp = pp->p_next; 10367c478bd9Sstevel@tonic-gate } 10377c478bd9Sstevel@tonic-gate } 10387c478bd9Sstevel@tonic-gate #endif /* DEBUG */ 10397c478bd9Sstevel@tonic-gate 10407c478bd9Sstevel@tonic-gate void 10417c478bd9Sstevel@tonic-gate page_freelist_lock(int mnode) 10427c478bd9Sstevel@tonic-gate { 10437c478bd9Sstevel@tonic-gate int i; 10447c478bd9Sstevel@tonic-gate for (i = 0; i < NPC_MUTEX; i++) { 10457c478bd9Sstevel@tonic-gate mutex_enter(FPC_MUTEX(mnode, i)); 10467c478bd9Sstevel@tonic-gate mutex_enter(CPC_MUTEX(mnode, i)); 10477c478bd9Sstevel@tonic-gate } 10487c478bd9Sstevel@tonic-gate } 10497c478bd9Sstevel@tonic-gate 10507c478bd9Sstevel@tonic-gate void 10517c478bd9Sstevel@tonic-gate page_freelist_unlock(int mnode) 10527c478bd9Sstevel@tonic-gate { 10537c478bd9Sstevel@tonic-gate int i; 10547c478bd9Sstevel@tonic-gate for (i = 0; i < NPC_MUTEX; i++) { 10557c478bd9Sstevel@tonic-gate mutex_exit(FPC_MUTEX(mnode, i)); 10567c478bd9Sstevel@tonic-gate mutex_exit(CPC_MUTEX(mnode, i)); 10577c478bd9Sstevel@tonic-gate } 10587c478bd9Sstevel@tonic-gate } 10597c478bd9Sstevel@tonic-gate 10607c478bd9Sstevel@tonic-gate /* 10617c478bd9Sstevel@tonic-gate * add pp to the specified page list. Defaults to head of the page list 10627c478bd9Sstevel@tonic-gate * unless PG_LIST_TAIL is specified. 10637c478bd9Sstevel@tonic-gate */ 10647c478bd9Sstevel@tonic-gate void 10657c478bd9Sstevel@tonic-gate page_list_add(page_t *pp, int flags) 10667c478bd9Sstevel@tonic-gate { 10677c478bd9Sstevel@tonic-gate page_t **ppp; 10687c478bd9Sstevel@tonic-gate kmutex_t *pcm; 10697c478bd9Sstevel@tonic-gate uint_t bin, mtype; 10707c478bd9Sstevel@tonic-gate int mnode; 10717c478bd9Sstevel@tonic-gate 10727c478bd9Sstevel@tonic-gate ASSERT(PAGE_EXCL(pp) || (flags & PG_LIST_ISINIT)); 10737c478bd9Sstevel@tonic-gate ASSERT(PP_ISFREE(pp)); 10747c478bd9Sstevel@tonic-gate ASSERT(!hat_page_is_mapped(pp)); 10757c478bd9Sstevel@tonic-gate ASSERT(hat_page_getshare(pp) == 0); 10767c478bd9Sstevel@tonic-gate 10777c478bd9Sstevel@tonic-gate /* 10787c478bd9Sstevel@tonic-gate * Large pages should be freed via page_list_add_pages(). 10797c478bd9Sstevel@tonic-gate */ 10807c478bd9Sstevel@tonic-gate ASSERT(pp->p_szc == 0); 10817c478bd9Sstevel@tonic-gate 10827c478bd9Sstevel@tonic-gate /* 10837c478bd9Sstevel@tonic-gate * Don't need to lock the freelist first here 10847c478bd9Sstevel@tonic-gate * because the page isn't on the freelist yet. 10857c478bd9Sstevel@tonic-gate * This means p_szc can't change on us. 10867c478bd9Sstevel@tonic-gate */ 10877c478bd9Sstevel@tonic-gate 10887c478bd9Sstevel@tonic-gate bin = PP_2_BIN(pp); 10897c478bd9Sstevel@tonic-gate mnode = PP_2_MEM_NODE(pp); 10907c478bd9Sstevel@tonic-gate mtype = PP_2_MTYPE(pp); 10917c478bd9Sstevel@tonic-gate 10927c478bd9Sstevel@tonic-gate if (flags & PG_LIST_ISINIT) { 10937c478bd9Sstevel@tonic-gate /* 10947c478bd9Sstevel@tonic-gate * PG_LIST_ISINIT is set during system startup (ie. single 10957c478bd9Sstevel@tonic-gate * threaded), add a page to the free list and add to the 10967c478bd9Sstevel@tonic-gate * the free region counters w/o any locking 10977c478bd9Sstevel@tonic-gate */ 10987c478bd9Sstevel@tonic-gate ppp = &PAGE_FREELISTS(mnode, 0, bin, mtype); 10997c478bd9Sstevel@tonic-gate 11007c478bd9Sstevel@tonic-gate /* inline version of page_add() */ 11017c478bd9Sstevel@tonic-gate if (*ppp != NULL) { 11027c478bd9Sstevel@tonic-gate pp->p_next = *ppp; 11037c478bd9Sstevel@tonic-gate pp->p_prev = (*ppp)->p_prev; 11047c478bd9Sstevel@tonic-gate (*ppp)->p_prev = pp; 11057c478bd9Sstevel@tonic-gate pp->p_prev->p_next = pp; 11067c478bd9Sstevel@tonic-gate } else 11077c478bd9Sstevel@tonic-gate *ppp = pp; 11087c478bd9Sstevel@tonic-gate 1109affbd3ccSkchow page_ctr_add_internal(mnode, mtype, pp, flags); 1110affbd3ccSkchow VM_STAT_ADD(vmm_vmstats.pladd_free[0]); 11117c478bd9Sstevel@tonic-gate } else { 11127c478bd9Sstevel@tonic-gate pcm = PC_BIN_MUTEX(mnode, bin, flags); 11137c478bd9Sstevel@tonic-gate 11147c478bd9Sstevel@tonic-gate if (flags & PG_FREE_LIST) { 1115affbd3ccSkchow VM_STAT_ADD(vmm_vmstats.pladd_free[0]); 11167c478bd9Sstevel@tonic-gate ASSERT(PP_ISAGED(pp)); 11177c478bd9Sstevel@tonic-gate ppp = &PAGE_FREELISTS(mnode, 0, bin, mtype); 11187c478bd9Sstevel@tonic-gate 11197c478bd9Sstevel@tonic-gate } else { 1120affbd3ccSkchow VM_STAT_ADD(vmm_vmstats.pladd_cache); 11217c478bd9Sstevel@tonic-gate ASSERT(pp->p_vnode); 11227c478bd9Sstevel@tonic-gate ASSERT((pp->p_offset & PAGEOFFSET) == 0); 11237c478bd9Sstevel@tonic-gate ppp = &PAGE_CACHELISTS(mnode, bin, mtype); 11247c478bd9Sstevel@tonic-gate } 11257c478bd9Sstevel@tonic-gate mutex_enter(pcm); 11267c478bd9Sstevel@tonic-gate page_add(ppp, pp); 11277c478bd9Sstevel@tonic-gate 11287c478bd9Sstevel@tonic-gate if (flags & PG_LIST_TAIL) 11297c478bd9Sstevel@tonic-gate *ppp = (*ppp)->p_next; 11307c478bd9Sstevel@tonic-gate /* 11317c478bd9Sstevel@tonic-gate * Add counters before releasing pcm mutex to avoid a race with 11327c478bd9Sstevel@tonic-gate * page_freelist_coalesce and page_freelist_fill. 11337c478bd9Sstevel@tonic-gate */ 1134affbd3ccSkchow page_ctr_add(mnode, mtype, pp, flags); 11357c478bd9Sstevel@tonic-gate mutex_exit(pcm); 11367c478bd9Sstevel@tonic-gate } 11377c478bd9Sstevel@tonic-gate 11387c478bd9Sstevel@tonic-gate 11397c478bd9Sstevel@tonic-gate #if defined(__sparc) 11407c478bd9Sstevel@tonic-gate if (PP_ISNORELOC(pp)) { 11417c478bd9Sstevel@tonic-gate kcage_freemem_add(1); 11427c478bd9Sstevel@tonic-gate } 11437c478bd9Sstevel@tonic-gate #endif 11447c478bd9Sstevel@tonic-gate /* 11457c478bd9Sstevel@tonic-gate * It is up to the caller to unlock the page! 11467c478bd9Sstevel@tonic-gate */ 11477c478bd9Sstevel@tonic-gate ASSERT(PAGE_EXCL(pp) || (flags & PG_LIST_ISINIT)); 11487c478bd9Sstevel@tonic-gate } 11497c478bd9Sstevel@tonic-gate 11507c478bd9Sstevel@tonic-gate 11517c478bd9Sstevel@tonic-gate #ifdef __sparc 11527c478bd9Sstevel@tonic-gate /* 11537c478bd9Sstevel@tonic-gate * This routine is only used by kcage_init during system startup. 11547c478bd9Sstevel@tonic-gate * It performs the function of page_list_sub/PP_SETNORELOC/page_list_add 11557c478bd9Sstevel@tonic-gate * without the overhead of taking locks and updating counters. 11567c478bd9Sstevel@tonic-gate */ 11577c478bd9Sstevel@tonic-gate void 11587c478bd9Sstevel@tonic-gate page_list_noreloc_startup(page_t *pp) 11597c478bd9Sstevel@tonic-gate { 11607c478bd9Sstevel@tonic-gate page_t **ppp; 11617c478bd9Sstevel@tonic-gate uint_t bin; 11627c478bd9Sstevel@tonic-gate int mnode; 11637c478bd9Sstevel@tonic-gate int mtype; 1164e21bae1bSkchow int flags = 0; 11657c478bd9Sstevel@tonic-gate 11667c478bd9Sstevel@tonic-gate /* 11677c478bd9Sstevel@tonic-gate * If this is a large page on the freelist then 11687c478bd9Sstevel@tonic-gate * break it up into smaller pages. 11697c478bd9Sstevel@tonic-gate */ 11707c478bd9Sstevel@tonic-gate if (pp->p_szc != 0) 11717c478bd9Sstevel@tonic-gate page_boot_demote(pp); 11727c478bd9Sstevel@tonic-gate 11737c478bd9Sstevel@tonic-gate /* 11747c478bd9Sstevel@tonic-gate * Get list page is currently on. 11757c478bd9Sstevel@tonic-gate */ 11767c478bd9Sstevel@tonic-gate bin = PP_2_BIN(pp); 11777c478bd9Sstevel@tonic-gate mnode = PP_2_MEM_NODE(pp); 11787c478bd9Sstevel@tonic-gate mtype = PP_2_MTYPE(pp); 11797c478bd9Sstevel@tonic-gate ASSERT(mtype == MTYPE_RELOC); 11807c478bd9Sstevel@tonic-gate ASSERT(pp->p_szc == 0); 11817c478bd9Sstevel@tonic-gate 11827c478bd9Sstevel@tonic-gate if (PP_ISAGED(pp)) { 11837c478bd9Sstevel@tonic-gate ppp = &PAGE_FREELISTS(mnode, 0, bin, mtype); 11847c478bd9Sstevel@tonic-gate flags |= PG_FREE_LIST; 11857c478bd9Sstevel@tonic-gate } else { 11867c478bd9Sstevel@tonic-gate ppp = &PAGE_CACHELISTS(mnode, bin, mtype); 11877c478bd9Sstevel@tonic-gate flags |= PG_CACHE_LIST; 11887c478bd9Sstevel@tonic-gate } 11897c478bd9Sstevel@tonic-gate 11907c478bd9Sstevel@tonic-gate ASSERT(*ppp != NULL); 11917c478bd9Sstevel@tonic-gate 11927c478bd9Sstevel@tonic-gate /* 11937c478bd9Sstevel@tonic-gate * Delete page from current list. 11947c478bd9Sstevel@tonic-gate */ 11957c478bd9Sstevel@tonic-gate if (*ppp == pp) 11967c478bd9Sstevel@tonic-gate *ppp = pp->p_next; /* go to next page */ 11977c478bd9Sstevel@tonic-gate if (*ppp == pp) { 11987c478bd9Sstevel@tonic-gate *ppp = NULL; /* page list is gone */ 11997c478bd9Sstevel@tonic-gate } else { 12007c478bd9Sstevel@tonic-gate pp->p_prev->p_next = pp->p_next; 12017c478bd9Sstevel@tonic-gate pp->p_next->p_prev = pp->p_prev; 12027c478bd9Sstevel@tonic-gate } 12037c478bd9Sstevel@tonic-gate 12047c478bd9Sstevel@tonic-gate /* LINTED */ 1205affbd3ccSkchow PLCNT_DECR(pp, mnode, mtype, 0, flags); 12067c478bd9Sstevel@tonic-gate 12077c478bd9Sstevel@tonic-gate /* 12087c478bd9Sstevel@tonic-gate * Set no reloc for cage initted pages. 12097c478bd9Sstevel@tonic-gate */ 12107c478bd9Sstevel@tonic-gate PP_SETNORELOC(pp); 12117c478bd9Sstevel@tonic-gate 12127c478bd9Sstevel@tonic-gate mtype = PP_2_MTYPE(pp); 12137c478bd9Sstevel@tonic-gate ASSERT(mtype == MTYPE_NORELOC); 12147c478bd9Sstevel@tonic-gate 12157c478bd9Sstevel@tonic-gate /* 12167c478bd9Sstevel@tonic-gate * Get new list for page. 12177c478bd9Sstevel@tonic-gate */ 12187c478bd9Sstevel@tonic-gate if (PP_ISAGED(pp)) { 12197c478bd9Sstevel@tonic-gate ppp = &PAGE_FREELISTS(mnode, 0, bin, mtype); 12207c478bd9Sstevel@tonic-gate } else { 12217c478bd9Sstevel@tonic-gate ppp = &PAGE_CACHELISTS(mnode, bin, mtype); 12227c478bd9Sstevel@tonic-gate } 12237c478bd9Sstevel@tonic-gate 12247c478bd9Sstevel@tonic-gate /* 12257c478bd9Sstevel@tonic-gate * Insert page on new list. 12267c478bd9Sstevel@tonic-gate */ 12277c478bd9Sstevel@tonic-gate if (*ppp == NULL) { 12287c478bd9Sstevel@tonic-gate *ppp = pp; 12297c478bd9Sstevel@tonic-gate pp->p_next = pp->p_prev = pp; 12307c478bd9Sstevel@tonic-gate } else { 12317c478bd9Sstevel@tonic-gate pp->p_next = *ppp; 12327c478bd9Sstevel@tonic-gate pp->p_prev = (*ppp)->p_prev; 12337c478bd9Sstevel@tonic-gate (*ppp)->p_prev = pp; 12347c478bd9Sstevel@tonic-gate pp->p_prev->p_next = pp; 12357c478bd9Sstevel@tonic-gate } 12367c478bd9Sstevel@tonic-gate 12377c478bd9Sstevel@tonic-gate /* LINTED */ 1238affbd3ccSkchow PLCNT_INCR(pp, mnode, mtype, 0, flags); 12397c478bd9Sstevel@tonic-gate 12407c478bd9Sstevel@tonic-gate /* 12417c478bd9Sstevel@tonic-gate * Update cage freemem counter 12427c478bd9Sstevel@tonic-gate */ 12437c478bd9Sstevel@tonic-gate atomic_add_long(&kcage_freemem, 1); 12447c478bd9Sstevel@tonic-gate } 12457c478bd9Sstevel@tonic-gate #else /* __sparc */ 12467c478bd9Sstevel@tonic-gate 12477c478bd9Sstevel@tonic-gate /* ARGSUSED */ 12487c478bd9Sstevel@tonic-gate void 12497c478bd9Sstevel@tonic-gate page_list_noreloc_startup(page_t *pp) 12507c478bd9Sstevel@tonic-gate { 12517c478bd9Sstevel@tonic-gate panic("page_list_noreloc_startup: should be here only for sparc"); 12527c478bd9Sstevel@tonic-gate } 12537c478bd9Sstevel@tonic-gate #endif 12547c478bd9Sstevel@tonic-gate 12557c478bd9Sstevel@tonic-gate void 12567c478bd9Sstevel@tonic-gate page_list_add_pages(page_t *pp, int flags) 12577c478bd9Sstevel@tonic-gate { 12587c478bd9Sstevel@tonic-gate kmutex_t *pcm; 12597c478bd9Sstevel@tonic-gate pgcnt_t pgcnt; 12607c478bd9Sstevel@tonic-gate uint_t bin, mtype, i; 12617c478bd9Sstevel@tonic-gate int mnode; 12627c478bd9Sstevel@tonic-gate 12637c478bd9Sstevel@tonic-gate /* default to freelist/head */ 12647c478bd9Sstevel@tonic-gate ASSERT((flags & (PG_CACHE_LIST | PG_LIST_TAIL)) == 0); 12657c478bd9Sstevel@tonic-gate 12667c478bd9Sstevel@tonic-gate CHK_LPG(pp, pp->p_szc); 1267affbd3ccSkchow VM_STAT_ADD(vmm_vmstats.pladd_free[pp->p_szc]); 12687c478bd9Sstevel@tonic-gate 12697c478bd9Sstevel@tonic-gate bin = PP_2_BIN(pp); 12707c478bd9Sstevel@tonic-gate mnode = PP_2_MEM_NODE(pp); 12717c478bd9Sstevel@tonic-gate mtype = PP_2_MTYPE(pp); 12727c478bd9Sstevel@tonic-gate 12737c478bd9Sstevel@tonic-gate if (flags & PG_LIST_ISINIT) { 12747c478bd9Sstevel@tonic-gate ASSERT(pp->p_szc == mmu_page_sizes - 1); 12757c478bd9Sstevel@tonic-gate page_vpadd(&PAGE_FREELISTS(mnode, pp->p_szc, bin, mtype), pp); 12767c478bd9Sstevel@tonic-gate ASSERT(!PP_ISNORELOC(pp)); 1277affbd3ccSkchow PLCNT_INCR(pp, mnode, mtype, pp->p_szc, flags); 12787c478bd9Sstevel@tonic-gate } else { 12797c478bd9Sstevel@tonic-gate 12807c478bd9Sstevel@tonic-gate ASSERT(pp->p_szc != 0 && pp->p_szc < mmu_page_sizes); 12817c478bd9Sstevel@tonic-gate 12827c478bd9Sstevel@tonic-gate pcm = PC_BIN_MUTEX(mnode, bin, PG_FREE_LIST); 12837c478bd9Sstevel@tonic-gate 12847c478bd9Sstevel@tonic-gate mutex_enter(pcm); 12857c478bd9Sstevel@tonic-gate page_vpadd(&PAGE_FREELISTS(mnode, pp->p_szc, bin, mtype), pp); 1286affbd3ccSkchow page_ctr_add(mnode, mtype, pp, PG_FREE_LIST); 12877c478bd9Sstevel@tonic-gate mutex_exit(pcm); 12887c478bd9Sstevel@tonic-gate 12897c478bd9Sstevel@tonic-gate pgcnt = page_get_pagecnt(pp->p_szc); 12907c478bd9Sstevel@tonic-gate #if defined(__sparc) 12917c478bd9Sstevel@tonic-gate if (PP_ISNORELOC(pp)) 12927c478bd9Sstevel@tonic-gate kcage_freemem_add(pgcnt); 12937c478bd9Sstevel@tonic-gate #endif 12947c478bd9Sstevel@tonic-gate for (i = 0; i < pgcnt; i++, pp++) 1295db874c57Selowe page_unlock_noretire(pp); 12967c478bd9Sstevel@tonic-gate } 12977c478bd9Sstevel@tonic-gate } 12987c478bd9Sstevel@tonic-gate 12997c478bd9Sstevel@tonic-gate /* 13007c478bd9Sstevel@tonic-gate * During boot, need to demote a large page to base 13017c478bd9Sstevel@tonic-gate * pagesize pages for seg_kmem for use in boot_alloc() 13027c478bd9Sstevel@tonic-gate */ 13037c478bd9Sstevel@tonic-gate void 13047c478bd9Sstevel@tonic-gate page_boot_demote(page_t *pp) 13057c478bd9Sstevel@tonic-gate { 13067c478bd9Sstevel@tonic-gate ASSERT(pp->p_szc != 0); 13077c478bd9Sstevel@tonic-gate ASSERT(PP_ISFREE(pp)); 13087c478bd9Sstevel@tonic-gate ASSERT(PP_ISAGED(pp)); 13097c478bd9Sstevel@tonic-gate 13107c478bd9Sstevel@tonic-gate (void) page_demote(PP_2_MEM_NODE(pp), 13117c478bd9Sstevel@tonic-gate PFN_BASE(pp->p_pagenum, pp->p_szc), pp->p_szc, 0, PC_NO_COLOR, 13127c478bd9Sstevel@tonic-gate PC_FREE); 13137c478bd9Sstevel@tonic-gate 13147c478bd9Sstevel@tonic-gate ASSERT(PP_ISFREE(pp)); 13157c478bd9Sstevel@tonic-gate ASSERT(PP_ISAGED(pp)); 13167c478bd9Sstevel@tonic-gate ASSERT(pp->p_szc == 0); 13177c478bd9Sstevel@tonic-gate } 13187c478bd9Sstevel@tonic-gate 13197c478bd9Sstevel@tonic-gate /* 13207c478bd9Sstevel@tonic-gate * Take a particular page off of whatever freelist the page 13217c478bd9Sstevel@tonic-gate * is claimed to be on. 13227c478bd9Sstevel@tonic-gate * 13237c478bd9Sstevel@tonic-gate * NOTE: Only used for PAGESIZE pages. 13247c478bd9Sstevel@tonic-gate */ 13257c478bd9Sstevel@tonic-gate void 13267c478bd9Sstevel@tonic-gate page_list_sub(page_t *pp, int flags) 13277c478bd9Sstevel@tonic-gate { 13287c478bd9Sstevel@tonic-gate int bin; 13297c478bd9Sstevel@tonic-gate uint_t mtype; 13307c478bd9Sstevel@tonic-gate int mnode; 13317c478bd9Sstevel@tonic-gate kmutex_t *pcm; 13327c478bd9Sstevel@tonic-gate page_t **ppp; 13337c478bd9Sstevel@tonic-gate 13347c478bd9Sstevel@tonic-gate ASSERT(PAGE_EXCL(pp)); 13357c478bd9Sstevel@tonic-gate ASSERT(PP_ISFREE(pp)); 13367c478bd9Sstevel@tonic-gate 13377c478bd9Sstevel@tonic-gate /* 13387c478bd9Sstevel@tonic-gate * The p_szc field can only be changed by page_promote() 13397c478bd9Sstevel@tonic-gate * and page_demote(). Only free pages can be promoted and 13407c478bd9Sstevel@tonic-gate * demoted and the free list MUST be locked during these 13417c478bd9Sstevel@tonic-gate * operations. So to prevent a race in page_list_sub() 13427c478bd9Sstevel@tonic-gate * between computing which bin of the freelist lock to 13437c478bd9Sstevel@tonic-gate * grab and actually grabing the lock we check again that 13447c478bd9Sstevel@tonic-gate * the bin we locked is still the correct one. Notice that 13457c478bd9Sstevel@tonic-gate * the p_szc field could have actually changed on us but 13467c478bd9Sstevel@tonic-gate * if the bin happens to still be the same we are safe. 13477c478bd9Sstevel@tonic-gate */ 13487c478bd9Sstevel@tonic-gate try_again: 13497c478bd9Sstevel@tonic-gate bin = PP_2_BIN(pp); 13507c478bd9Sstevel@tonic-gate mnode = PP_2_MEM_NODE(pp); 13517c478bd9Sstevel@tonic-gate pcm = PC_BIN_MUTEX(mnode, bin, flags); 13527c478bd9Sstevel@tonic-gate mutex_enter(pcm); 13537c478bd9Sstevel@tonic-gate if (PP_2_BIN(pp) != bin) { 13547c478bd9Sstevel@tonic-gate mutex_exit(pcm); 13557c478bd9Sstevel@tonic-gate goto try_again; 13567c478bd9Sstevel@tonic-gate } 13577c478bd9Sstevel@tonic-gate mtype = PP_2_MTYPE(pp); 13587c478bd9Sstevel@tonic-gate 13597c478bd9Sstevel@tonic-gate if (flags & PG_FREE_LIST) { 1360affbd3ccSkchow VM_STAT_ADD(vmm_vmstats.plsub_free[0]); 13617c478bd9Sstevel@tonic-gate ASSERT(PP_ISAGED(pp)); 13627c478bd9Sstevel@tonic-gate ppp = &PAGE_FREELISTS(mnode, pp->p_szc, bin, mtype); 13637c478bd9Sstevel@tonic-gate } else { 1364affbd3ccSkchow VM_STAT_ADD(vmm_vmstats.plsub_cache); 13657c478bd9Sstevel@tonic-gate ASSERT(!PP_ISAGED(pp)); 13667c478bd9Sstevel@tonic-gate ppp = &PAGE_CACHELISTS(mnode, bin, mtype); 13677c478bd9Sstevel@tonic-gate } 13687c478bd9Sstevel@tonic-gate 13697c478bd9Sstevel@tonic-gate /* 13707c478bd9Sstevel@tonic-gate * Common PAGESIZE case. 13717c478bd9Sstevel@tonic-gate * 13727c478bd9Sstevel@tonic-gate * Note that we locked the freelist. This prevents 13737c478bd9Sstevel@tonic-gate * any page promotion/demotion operations. Therefore 13747c478bd9Sstevel@tonic-gate * the p_szc will not change until we drop pcm mutex. 13757c478bd9Sstevel@tonic-gate */ 13767c478bd9Sstevel@tonic-gate if (pp->p_szc == 0) { 13777c478bd9Sstevel@tonic-gate page_sub(ppp, pp); 13787c478bd9Sstevel@tonic-gate /* 13797c478bd9Sstevel@tonic-gate * Subtract counters before releasing pcm mutex 13807c478bd9Sstevel@tonic-gate * to avoid race with page_freelist_coalesce. 13817c478bd9Sstevel@tonic-gate */ 1382affbd3ccSkchow page_ctr_sub(mnode, mtype, pp, flags); 13837c478bd9Sstevel@tonic-gate mutex_exit(pcm); 13847c478bd9Sstevel@tonic-gate 13857c478bd9Sstevel@tonic-gate #if defined(__sparc) 13867c478bd9Sstevel@tonic-gate if (PP_ISNORELOC(pp)) { 13877c478bd9Sstevel@tonic-gate kcage_freemem_sub(1); 13887c478bd9Sstevel@tonic-gate } 13897c478bd9Sstevel@tonic-gate #endif 13907c478bd9Sstevel@tonic-gate return; 13917c478bd9Sstevel@tonic-gate } 13927c478bd9Sstevel@tonic-gate 13937c478bd9Sstevel@tonic-gate /* 13947c478bd9Sstevel@tonic-gate * Large pages on the cache list are not supported. 13957c478bd9Sstevel@tonic-gate */ 13967c478bd9Sstevel@tonic-gate if (flags & PG_CACHE_LIST) 13977c478bd9Sstevel@tonic-gate panic("page_list_sub: large page on cachelist"); 13987c478bd9Sstevel@tonic-gate 13997c478bd9Sstevel@tonic-gate /* 14007c478bd9Sstevel@tonic-gate * Slow but rare. 14017c478bd9Sstevel@tonic-gate * 14027c478bd9Sstevel@tonic-gate * Somebody wants this particular page which is part 14037c478bd9Sstevel@tonic-gate * of a large page. In this case we just demote the page 14047c478bd9Sstevel@tonic-gate * if it's on the freelist. 14057c478bd9Sstevel@tonic-gate * 14067c478bd9Sstevel@tonic-gate * We have to drop pcm before locking the entire freelist. 14077c478bd9Sstevel@tonic-gate * Once we have re-locked the freelist check to make sure 14087c478bd9Sstevel@tonic-gate * the page hasn't already been demoted or completely 14097c478bd9Sstevel@tonic-gate * freed. 14107c478bd9Sstevel@tonic-gate */ 14117c478bd9Sstevel@tonic-gate mutex_exit(pcm); 14127c478bd9Sstevel@tonic-gate page_freelist_lock(mnode); 14137c478bd9Sstevel@tonic-gate if (pp->p_szc != 0) { 14147c478bd9Sstevel@tonic-gate /* 14157c478bd9Sstevel@tonic-gate * Large page is on freelist. 14167c478bd9Sstevel@tonic-gate */ 14177c478bd9Sstevel@tonic-gate (void) page_demote(mnode, PFN_BASE(pp->p_pagenum, pp->p_szc), 14187c478bd9Sstevel@tonic-gate pp->p_szc, 0, PC_NO_COLOR, PC_FREE); 14197c478bd9Sstevel@tonic-gate } 14207c478bd9Sstevel@tonic-gate ASSERT(PP_ISFREE(pp)); 14217c478bd9Sstevel@tonic-gate ASSERT(PP_ISAGED(pp)); 14227c478bd9Sstevel@tonic-gate ASSERT(pp->p_szc == 0); 14237c478bd9Sstevel@tonic-gate 14247c478bd9Sstevel@tonic-gate /* 14257c478bd9Sstevel@tonic-gate * Subtract counters before releasing pcm mutex 14267c478bd9Sstevel@tonic-gate * to avoid race with page_freelist_coalesce. 14277c478bd9Sstevel@tonic-gate */ 14287c478bd9Sstevel@tonic-gate bin = PP_2_BIN(pp); 14297c478bd9Sstevel@tonic-gate mtype = PP_2_MTYPE(pp); 14307c478bd9Sstevel@tonic-gate ppp = &PAGE_FREELISTS(mnode, pp->p_szc, bin, mtype); 14317c478bd9Sstevel@tonic-gate 14327c478bd9Sstevel@tonic-gate page_sub(ppp, pp); 1433affbd3ccSkchow page_ctr_sub(mnode, mtype, pp, flags); 14347c478bd9Sstevel@tonic-gate page_freelist_unlock(mnode); 14357c478bd9Sstevel@tonic-gate 14367c478bd9Sstevel@tonic-gate #if defined(__sparc) 14377c478bd9Sstevel@tonic-gate if (PP_ISNORELOC(pp)) { 14387c478bd9Sstevel@tonic-gate kcage_freemem_sub(1); 14397c478bd9Sstevel@tonic-gate } 14407c478bd9Sstevel@tonic-gate #endif 14417c478bd9Sstevel@tonic-gate } 14427c478bd9Sstevel@tonic-gate 14437c478bd9Sstevel@tonic-gate void 14447c478bd9Sstevel@tonic-gate page_list_sub_pages(page_t *pp, uint_t szc) 14457c478bd9Sstevel@tonic-gate { 14467c478bd9Sstevel@tonic-gate kmutex_t *pcm; 14477c478bd9Sstevel@tonic-gate uint_t bin, mtype; 14487c478bd9Sstevel@tonic-gate int mnode; 14497c478bd9Sstevel@tonic-gate 14507c478bd9Sstevel@tonic-gate ASSERT(PAGE_EXCL(pp)); 14517c478bd9Sstevel@tonic-gate ASSERT(PP_ISFREE(pp)); 14527c478bd9Sstevel@tonic-gate ASSERT(PP_ISAGED(pp)); 14537c478bd9Sstevel@tonic-gate 14547c478bd9Sstevel@tonic-gate /* 14557c478bd9Sstevel@tonic-gate * See comment in page_list_sub(). 14567c478bd9Sstevel@tonic-gate */ 14577c478bd9Sstevel@tonic-gate try_again: 14587c478bd9Sstevel@tonic-gate bin = PP_2_BIN(pp); 14597c478bd9Sstevel@tonic-gate mnode = PP_2_MEM_NODE(pp); 14607c478bd9Sstevel@tonic-gate pcm = PC_BIN_MUTEX(mnode, bin, PG_FREE_LIST); 14617c478bd9Sstevel@tonic-gate mutex_enter(pcm); 14627c478bd9Sstevel@tonic-gate if (PP_2_BIN(pp) != bin) { 14637c478bd9Sstevel@tonic-gate mutex_exit(pcm); 14647c478bd9Sstevel@tonic-gate goto try_again; 14657c478bd9Sstevel@tonic-gate } 14667c478bd9Sstevel@tonic-gate 14677c478bd9Sstevel@tonic-gate /* 14687c478bd9Sstevel@tonic-gate * If we're called with a page larger than szc or it got 14697c478bd9Sstevel@tonic-gate * promoted above szc before we locked the freelist then 14707c478bd9Sstevel@tonic-gate * drop pcm and re-lock entire freelist. If page still larger 14717c478bd9Sstevel@tonic-gate * than szc then demote it. 14727c478bd9Sstevel@tonic-gate */ 14737c478bd9Sstevel@tonic-gate if (pp->p_szc > szc) { 14747c478bd9Sstevel@tonic-gate mutex_exit(pcm); 14757c478bd9Sstevel@tonic-gate pcm = NULL; 14767c478bd9Sstevel@tonic-gate page_freelist_lock(mnode); 14777c478bd9Sstevel@tonic-gate if (pp->p_szc > szc) { 1478affbd3ccSkchow VM_STAT_ADD(vmm_vmstats.plsubpages_szcbig); 14797c478bd9Sstevel@tonic-gate (void) page_demote(mnode, 14807c478bd9Sstevel@tonic-gate PFN_BASE(pp->p_pagenum, pp->p_szc), 14817c478bd9Sstevel@tonic-gate pp->p_szc, szc, PC_NO_COLOR, PC_FREE); 14827c478bd9Sstevel@tonic-gate } 14837c478bd9Sstevel@tonic-gate bin = PP_2_BIN(pp); 14847c478bd9Sstevel@tonic-gate } 14857c478bd9Sstevel@tonic-gate ASSERT(PP_ISFREE(pp)); 14867c478bd9Sstevel@tonic-gate ASSERT(PP_ISAGED(pp)); 14877c478bd9Sstevel@tonic-gate ASSERT(pp->p_szc <= szc); 14887c478bd9Sstevel@tonic-gate ASSERT(pp == PP_PAGEROOT(pp)); 14897c478bd9Sstevel@tonic-gate 1490affbd3ccSkchow VM_STAT_ADD(vmm_vmstats.plsub_free[pp->p_szc]); 1491affbd3ccSkchow 14927c478bd9Sstevel@tonic-gate mtype = PP_2_MTYPE(pp); 14937c478bd9Sstevel@tonic-gate if (pp->p_szc != 0) { 14947c478bd9Sstevel@tonic-gate page_vpsub(&PAGE_FREELISTS(mnode, pp->p_szc, bin, mtype), pp); 14957c478bd9Sstevel@tonic-gate CHK_LPG(pp, pp->p_szc); 14967c478bd9Sstevel@tonic-gate } else { 1497affbd3ccSkchow VM_STAT_ADD(vmm_vmstats.plsubpages_szc0); 14987c478bd9Sstevel@tonic-gate page_sub(&PAGE_FREELISTS(mnode, pp->p_szc, bin, mtype), pp); 14997c478bd9Sstevel@tonic-gate } 1500affbd3ccSkchow page_ctr_sub(mnode, mtype, pp, PG_FREE_LIST); 15017c478bd9Sstevel@tonic-gate 15027c478bd9Sstevel@tonic-gate if (pcm != NULL) { 15037c478bd9Sstevel@tonic-gate mutex_exit(pcm); 15047c478bd9Sstevel@tonic-gate } else { 15057c478bd9Sstevel@tonic-gate page_freelist_unlock(mnode); 15067c478bd9Sstevel@tonic-gate } 15077c478bd9Sstevel@tonic-gate 15087c478bd9Sstevel@tonic-gate #if defined(__sparc) 15097c478bd9Sstevel@tonic-gate if (PP_ISNORELOC(pp)) { 15107c478bd9Sstevel@tonic-gate pgcnt_t pgcnt; 15117c478bd9Sstevel@tonic-gate 15127c478bd9Sstevel@tonic-gate pgcnt = page_get_pagecnt(pp->p_szc); 15137c478bd9Sstevel@tonic-gate kcage_freemem_sub(pgcnt); 15147c478bd9Sstevel@tonic-gate } 15157c478bd9Sstevel@tonic-gate #endif 15167c478bd9Sstevel@tonic-gate } 15177c478bd9Sstevel@tonic-gate 15187c478bd9Sstevel@tonic-gate /* 15197c478bd9Sstevel@tonic-gate * Add the page to the front of a linked list of pages 15207c478bd9Sstevel@tonic-gate * using the p_next & p_prev pointers for the list. 15217c478bd9Sstevel@tonic-gate * The caller is responsible for protecting the list pointers. 15227c478bd9Sstevel@tonic-gate */ 15237c478bd9Sstevel@tonic-gate void 15247c478bd9Sstevel@tonic-gate mach_page_add(page_t **ppp, page_t *pp) 15257c478bd9Sstevel@tonic-gate { 15267c478bd9Sstevel@tonic-gate if (*ppp == NULL) { 15277c478bd9Sstevel@tonic-gate pp->p_next = pp->p_prev = pp; 15287c478bd9Sstevel@tonic-gate } else { 15297c478bd9Sstevel@tonic-gate pp->p_next = *ppp; 15307c478bd9Sstevel@tonic-gate pp->p_prev = (*ppp)->p_prev; 15317c478bd9Sstevel@tonic-gate (*ppp)->p_prev = pp; 15327c478bd9Sstevel@tonic-gate pp->p_prev->p_next = pp; 15337c478bd9Sstevel@tonic-gate } 15347c478bd9Sstevel@tonic-gate *ppp = pp; 15357c478bd9Sstevel@tonic-gate } 15367c478bd9Sstevel@tonic-gate 15377c478bd9Sstevel@tonic-gate /* 15387c478bd9Sstevel@tonic-gate * Remove this page from a linked list of pages 15397c478bd9Sstevel@tonic-gate * using the p_next & p_prev pointers for the list. 15407c478bd9Sstevel@tonic-gate * 15417c478bd9Sstevel@tonic-gate * The caller is responsible for protecting the list pointers. 15427c478bd9Sstevel@tonic-gate */ 15437c478bd9Sstevel@tonic-gate void 15447c478bd9Sstevel@tonic-gate mach_page_sub(page_t **ppp, page_t *pp) 15457c478bd9Sstevel@tonic-gate { 15467c478bd9Sstevel@tonic-gate ASSERT(PP_ISFREE(pp)); 15477c478bd9Sstevel@tonic-gate 15487c478bd9Sstevel@tonic-gate if (*ppp == NULL || pp == NULL) 15497c478bd9Sstevel@tonic-gate panic("mach_page_sub"); 15507c478bd9Sstevel@tonic-gate 15517c478bd9Sstevel@tonic-gate if (*ppp == pp) 15527c478bd9Sstevel@tonic-gate *ppp = pp->p_next; /* go to next page */ 15537c478bd9Sstevel@tonic-gate 15547c478bd9Sstevel@tonic-gate if (*ppp == pp) 15557c478bd9Sstevel@tonic-gate *ppp = NULL; /* page list is gone */ 15567c478bd9Sstevel@tonic-gate else { 15577c478bd9Sstevel@tonic-gate pp->p_prev->p_next = pp->p_next; 15587c478bd9Sstevel@tonic-gate pp->p_next->p_prev = pp->p_prev; 15597c478bd9Sstevel@tonic-gate } 15607c478bd9Sstevel@tonic-gate pp->p_prev = pp->p_next = pp; /* make pp a list of one */ 15617c478bd9Sstevel@tonic-gate } 15627c478bd9Sstevel@tonic-gate 15637c478bd9Sstevel@tonic-gate /* 15647c478bd9Sstevel@tonic-gate * Routine fsflush uses to gradually coalesce the free list into larger pages. 15657c478bd9Sstevel@tonic-gate */ 15667c478bd9Sstevel@tonic-gate void 15677c478bd9Sstevel@tonic-gate page_promote_size(page_t *pp, uint_t cur_szc) 15687c478bd9Sstevel@tonic-gate { 15697c478bd9Sstevel@tonic-gate pfn_t pfn; 15707c478bd9Sstevel@tonic-gate int mnode; 15717c478bd9Sstevel@tonic-gate int idx; 15727c478bd9Sstevel@tonic-gate int new_szc = cur_szc + 1; 15737c478bd9Sstevel@tonic-gate int full = FULL_REGION_CNT(new_szc); 15747c478bd9Sstevel@tonic-gate 15757c478bd9Sstevel@tonic-gate pfn = page_pptonum(pp); 15767c478bd9Sstevel@tonic-gate mnode = PFN_2_MEM_NODE(pfn); 15777c478bd9Sstevel@tonic-gate 15787c478bd9Sstevel@tonic-gate page_freelist_lock(mnode); 15797c478bd9Sstevel@tonic-gate 15807c478bd9Sstevel@tonic-gate idx = PNUM_TO_IDX(mnode, new_szc, pfn); 15817c478bd9Sstevel@tonic-gate if (PAGE_COUNTERS(mnode, new_szc, idx) == full) 15827c478bd9Sstevel@tonic-gate (void) page_promote(mnode, pfn, new_szc, PC_FREE); 15837c478bd9Sstevel@tonic-gate 15847c478bd9Sstevel@tonic-gate page_freelist_unlock(mnode); 15857c478bd9Sstevel@tonic-gate } 15867c478bd9Sstevel@tonic-gate 15877c478bd9Sstevel@tonic-gate static uint_t page_promote_err; 15887c478bd9Sstevel@tonic-gate static uint_t page_promote_noreloc_err; 15897c478bd9Sstevel@tonic-gate 15907c478bd9Sstevel@tonic-gate /* 15917c478bd9Sstevel@tonic-gate * Create a single larger page (of szc new_szc) from smaller contiguous pages 15927c478bd9Sstevel@tonic-gate * for the given mnode starting at pfnum. Pages involved are on the freelist 15937c478bd9Sstevel@tonic-gate * before the call and may be returned to the caller if requested, otherwise 15947c478bd9Sstevel@tonic-gate * they will be placed back on the freelist. 15957c478bd9Sstevel@tonic-gate * If flags is PC_ALLOC, then the large page will be returned to the user in 15967c478bd9Sstevel@tonic-gate * a state which is consistent with a page being taken off the freelist. If 15977c478bd9Sstevel@tonic-gate * we failed to lock the new large page, then we will return NULL to the 15987c478bd9Sstevel@tonic-gate * caller and put the large page on the freelist instead. 15997c478bd9Sstevel@tonic-gate * If flags is PC_FREE, then the large page will be placed on the freelist, 16007c478bd9Sstevel@tonic-gate * and NULL will be returned. 16017c478bd9Sstevel@tonic-gate * The caller is responsible for locking the freelist as well as any other 16027c478bd9Sstevel@tonic-gate * accounting which needs to be done for a returned page. 16037c478bd9Sstevel@tonic-gate * 16047c478bd9Sstevel@tonic-gate * RFE: For performance pass in pp instead of pfnum so 16057c478bd9Sstevel@tonic-gate * we can avoid excessive calls to page_numtopp_nolock(). 16067c478bd9Sstevel@tonic-gate * This would depend on an assumption that all contiguous 16077c478bd9Sstevel@tonic-gate * pages are in the same memseg so we can just add/dec 16087c478bd9Sstevel@tonic-gate * our pp. 16097c478bd9Sstevel@tonic-gate * 16107c478bd9Sstevel@tonic-gate * Lock ordering: 16117c478bd9Sstevel@tonic-gate * 16127c478bd9Sstevel@tonic-gate * There is a potential but rare deadlock situation 16137c478bd9Sstevel@tonic-gate * for page promotion and demotion operations. The problem 16147c478bd9Sstevel@tonic-gate * is there are two paths into the freelist manager and 16157c478bd9Sstevel@tonic-gate * they have different lock orders: 16167c478bd9Sstevel@tonic-gate * 16177c478bd9Sstevel@tonic-gate * page_create() 16187c478bd9Sstevel@tonic-gate * lock freelist 16197c478bd9Sstevel@tonic-gate * page_lock(EXCL) 16207c478bd9Sstevel@tonic-gate * unlock freelist 16217c478bd9Sstevel@tonic-gate * return 16227c478bd9Sstevel@tonic-gate * caller drops page_lock 16237c478bd9Sstevel@tonic-gate * 16247c478bd9Sstevel@tonic-gate * page_free() and page_reclaim() 16257c478bd9Sstevel@tonic-gate * caller grabs page_lock(EXCL) 16267c478bd9Sstevel@tonic-gate * 16277c478bd9Sstevel@tonic-gate * lock freelist 16287c478bd9Sstevel@tonic-gate * unlock freelist 16297c478bd9Sstevel@tonic-gate * drop page_lock 16307c478bd9Sstevel@tonic-gate * 16317c478bd9Sstevel@tonic-gate * What prevents a thread in page_create() from deadlocking 16327c478bd9Sstevel@tonic-gate * with a thread freeing or reclaiming the same page is the 16337c478bd9Sstevel@tonic-gate * page_trylock() in page_get_freelist(). If the trylock fails 16347c478bd9Sstevel@tonic-gate * it skips the page. 16357c478bd9Sstevel@tonic-gate * 16367c478bd9Sstevel@tonic-gate * The lock ordering for promotion and demotion is the same as 16377c478bd9Sstevel@tonic-gate * for page_create(). Since the same deadlock could occur during 16387c478bd9Sstevel@tonic-gate * page promotion and freeing or reclaiming of a page on the 16397c478bd9Sstevel@tonic-gate * cache list we might have to fail the operation and undo what 16407c478bd9Sstevel@tonic-gate * have done so far. Again this is rare. 16417c478bd9Sstevel@tonic-gate */ 16427c478bd9Sstevel@tonic-gate page_t * 16437c478bd9Sstevel@tonic-gate page_promote(int mnode, pfn_t pfnum, uchar_t new_szc, int flags) 16447c478bd9Sstevel@tonic-gate { 16457c478bd9Sstevel@tonic-gate page_t *pp, *pplist, *tpp, *start_pp; 16467c478bd9Sstevel@tonic-gate pgcnt_t new_npgs, npgs; 16477c478bd9Sstevel@tonic-gate uint_t bin; 16487c478bd9Sstevel@tonic-gate pgcnt_t tmpnpgs, pages_left; 16497c478bd9Sstevel@tonic-gate uint_t mtype; 16507c478bd9Sstevel@tonic-gate uint_t noreloc; 16517c478bd9Sstevel@tonic-gate uint_t i; 16527c478bd9Sstevel@tonic-gate int which_list; 16537c478bd9Sstevel@tonic-gate ulong_t index; 16547c478bd9Sstevel@tonic-gate kmutex_t *phm; 16557c478bd9Sstevel@tonic-gate 16567c478bd9Sstevel@tonic-gate /* 16577c478bd9Sstevel@tonic-gate * General algorithm: 16587c478bd9Sstevel@tonic-gate * Find the starting page 16597c478bd9Sstevel@tonic-gate * Walk each page struct removing it from the freelist, 16607c478bd9Sstevel@tonic-gate * and linking it to all the other pages removed. 16617c478bd9Sstevel@tonic-gate * Once all pages are off the freelist, 16627c478bd9Sstevel@tonic-gate * walk the list, modifying p_szc to new_szc and what 16637c478bd9Sstevel@tonic-gate * ever other info needs to be done to create a large free page. 16647c478bd9Sstevel@tonic-gate * According to the flags, either return the page or put it 16657c478bd9Sstevel@tonic-gate * on the freelist. 16667c478bd9Sstevel@tonic-gate */ 16677c478bd9Sstevel@tonic-gate 16687c478bd9Sstevel@tonic-gate start_pp = page_numtopp_nolock(pfnum); 16697c478bd9Sstevel@tonic-gate ASSERT(start_pp && (start_pp->p_pagenum == pfnum)); 16707c478bd9Sstevel@tonic-gate new_npgs = page_get_pagecnt(new_szc); 16717c478bd9Sstevel@tonic-gate ASSERT(IS_P2ALIGNED(pfnum, new_npgs)); 16727c478bd9Sstevel@tonic-gate 16737c478bd9Sstevel@tonic-gate /* 16747c478bd9Sstevel@tonic-gate * Loop through smaller pages to confirm that all pages 16757c478bd9Sstevel@tonic-gate * give the same result for PP_ISNORELOC(). 16767c478bd9Sstevel@tonic-gate * We can check this reliably here as the protocol for setting 16777c478bd9Sstevel@tonic-gate * P_NORELOC requires pages to be taken off the free list first. 16787c478bd9Sstevel@tonic-gate */ 16797c478bd9Sstevel@tonic-gate for (i = 0, pp = start_pp; i < new_npgs; i++, pp++) { 16807c478bd9Sstevel@tonic-gate if (pp == start_pp) { 16817c478bd9Sstevel@tonic-gate /* First page, set requirement. */ 16827c478bd9Sstevel@tonic-gate noreloc = PP_ISNORELOC(pp); 16837c478bd9Sstevel@tonic-gate } else if (noreloc != PP_ISNORELOC(pp)) { 16847c478bd9Sstevel@tonic-gate page_promote_noreloc_err++; 16857c478bd9Sstevel@tonic-gate page_promote_err++; 16867c478bd9Sstevel@tonic-gate return (NULL); 16877c478bd9Sstevel@tonic-gate } 16887c478bd9Sstevel@tonic-gate } 16897c478bd9Sstevel@tonic-gate 16907c478bd9Sstevel@tonic-gate pages_left = new_npgs; 16917c478bd9Sstevel@tonic-gate pplist = NULL; 16927c478bd9Sstevel@tonic-gate pp = start_pp; 16937c478bd9Sstevel@tonic-gate 16947c478bd9Sstevel@tonic-gate /* Loop around coalescing the smaller pages into a big page. */ 16957c478bd9Sstevel@tonic-gate while (pages_left) { 16967c478bd9Sstevel@tonic-gate /* 16977c478bd9Sstevel@tonic-gate * Remove from the freelist. 16987c478bd9Sstevel@tonic-gate */ 16997c478bd9Sstevel@tonic-gate ASSERT(PP_ISFREE(pp)); 17007c478bd9Sstevel@tonic-gate bin = PP_2_BIN(pp); 17017c478bd9Sstevel@tonic-gate ASSERT(mnode == PP_2_MEM_NODE(pp)); 17027c478bd9Sstevel@tonic-gate mtype = PP_2_MTYPE(pp); 17037c478bd9Sstevel@tonic-gate if (PP_ISAGED(pp)) { 17047c478bd9Sstevel@tonic-gate 17057c478bd9Sstevel@tonic-gate /* 17067c478bd9Sstevel@tonic-gate * PG_FREE_LIST 17077c478bd9Sstevel@tonic-gate */ 17087c478bd9Sstevel@tonic-gate if (pp->p_szc) { 17097c478bd9Sstevel@tonic-gate page_vpsub(&PAGE_FREELISTS(mnode, 17107c478bd9Sstevel@tonic-gate pp->p_szc, bin, mtype), pp); 17117c478bd9Sstevel@tonic-gate } else { 17127c478bd9Sstevel@tonic-gate mach_page_sub(&PAGE_FREELISTS(mnode, 0, 17137c478bd9Sstevel@tonic-gate bin, mtype), pp); 17147c478bd9Sstevel@tonic-gate } 17157c478bd9Sstevel@tonic-gate which_list = PG_FREE_LIST; 17167c478bd9Sstevel@tonic-gate } else { 17177c478bd9Sstevel@tonic-gate ASSERT(pp->p_szc == 0); 17187c478bd9Sstevel@tonic-gate 17197c478bd9Sstevel@tonic-gate /* 17207c478bd9Sstevel@tonic-gate * PG_CACHE_LIST 17217c478bd9Sstevel@tonic-gate * 17227c478bd9Sstevel@tonic-gate * Since this page comes from the 17237c478bd9Sstevel@tonic-gate * cachelist, we must destroy the 17247c478bd9Sstevel@tonic-gate * vnode association. 17257c478bd9Sstevel@tonic-gate */ 17267c478bd9Sstevel@tonic-gate if (!page_trylock(pp, SE_EXCL)) { 17277c478bd9Sstevel@tonic-gate goto fail_promote; 17287c478bd9Sstevel@tonic-gate } 17297c478bd9Sstevel@tonic-gate 17307c478bd9Sstevel@tonic-gate /* 17317c478bd9Sstevel@tonic-gate * We need to be careful not to deadlock 17327c478bd9Sstevel@tonic-gate * with another thread in page_lookup(). 17337c478bd9Sstevel@tonic-gate * The page_lookup() thread could be holding 17347c478bd9Sstevel@tonic-gate * the same phm that we need if the two 17357c478bd9Sstevel@tonic-gate * pages happen to hash to the same phm lock. 17367c478bd9Sstevel@tonic-gate * At this point we have locked the entire 17377c478bd9Sstevel@tonic-gate * freelist and page_lookup() could be trying 17387c478bd9Sstevel@tonic-gate * to grab a freelist lock. 17397c478bd9Sstevel@tonic-gate */ 17407c478bd9Sstevel@tonic-gate index = PAGE_HASH_FUNC(pp->p_vnode, pp->p_offset); 17417c478bd9Sstevel@tonic-gate phm = PAGE_HASH_MUTEX(index); 17427c478bd9Sstevel@tonic-gate if (!mutex_tryenter(phm)) { 1743db874c57Selowe page_unlock_noretire(pp); 17447c478bd9Sstevel@tonic-gate goto fail_promote; 17457c478bd9Sstevel@tonic-gate } 17467c478bd9Sstevel@tonic-gate 17477c478bd9Sstevel@tonic-gate mach_page_sub(&PAGE_CACHELISTS(mnode, bin, mtype), pp); 17487c478bd9Sstevel@tonic-gate page_hashout(pp, phm); 17497c478bd9Sstevel@tonic-gate mutex_exit(phm); 17507c478bd9Sstevel@tonic-gate PP_SETAGED(pp); 1751db874c57Selowe page_unlock_noretire(pp); 17527c478bd9Sstevel@tonic-gate which_list = PG_CACHE_LIST; 17537c478bd9Sstevel@tonic-gate } 1754affbd3ccSkchow page_ctr_sub(mnode, mtype, pp, which_list); 17557c478bd9Sstevel@tonic-gate 17567c478bd9Sstevel@tonic-gate /* 17577c478bd9Sstevel@tonic-gate * Concatenate the smaller page(s) onto 17587c478bd9Sstevel@tonic-gate * the large page list. 17597c478bd9Sstevel@tonic-gate */ 17607c478bd9Sstevel@tonic-gate tmpnpgs = npgs = page_get_pagecnt(pp->p_szc); 17617c478bd9Sstevel@tonic-gate pages_left -= npgs; 17627c478bd9Sstevel@tonic-gate tpp = pp; 17637c478bd9Sstevel@tonic-gate while (npgs--) { 17647c478bd9Sstevel@tonic-gate tpp->p_szc = new_szc; 17657c478bd9Sstevel@tonic-gate tpp = tpp->p_next; 17667c478bd9Sstevel@tonic-gate } 17677c478bd9Sstevel@tonic-gate page_list_concat(&pplist, &pp); 17687c478bd9Sstevel@tonic-gate pp += tmpnpgs; 17697c478bd9Sstevel@tonic-gate } 17707c478bd9Sstevel@tonic-gate CHK_LPG(pplist, new_szc); 17717c478bd9Sstevel@tonic-gate 17727c478bd9Sstevel@tonic-gate /* 17737c478bd9Sstevel@tonic-gate * return the page to the user if requested 17747c478bd9Sstevel@tonic-gate * in the properly locked state. 17757c478bd9Sstevel@tonic-gate */ 17767c478bd9Sstevel@tonic-gate if (flags == PC_ALLOC && (page_trylock_cons(pplist, SE_EXCL))) { 17777c478bd9Sstevel@tonic-gate return (pplist); 17787c478bd9Sstevel@tonic-gate } 17797c478bd9Sstevel@tonic-gate 17807c478bd9Sstevel@tonic-gate /* 17817c478bd9Sstevel@tonic-gate * Otherwise place the new large page on the freelist 17827c478bd9Sstevel@tonic-gate */ 17837c478bd9Sstevel@tonic-gate bin = PP_2_BIN(pplist); 17847c478bd9Sstevel@tonic-gate mnode = PP_2_MEM_NODE(pplist); 17857c478bd9Sstevel@tonic-gate mtype = PP_2_MTYPE(pplist); 17867c478bd9Sstevel@tonic-gate page_vpadd(&PAGE_FREELISTS(mnode, new_szc, bin, mtype), pplist); 17877c478bd9Sstevel@tonic-gate 1788affbd3ccSkchow page_ctr_add(mnode, mtype, pplist, PG_FREE_LIST); 17897c478bd9Sstevel@tonic-gate return (NULL); 17907c478bd9Sstevel@tonic-gate 17917c478bd9Sstevel@tonic-gate fail_promote: 17927c478bd9Sstevel@tonic-gate /* 17937c478bd9Sstevel@tonic-gate * A thread must have still been freeing or 17947c478bd9Sstevel@tonic-gate * reclaiming the page on the cachelist. 17957c478bd9Sstevel@tonic-gate * To prevent a deadlock undo what we have 17967c478bd9Sstevel@tonic-gate * done sofar and return failure. This 17977c478bd9Sstevel@tonic-gate * situation can only happen while promoting 17987c478bd9Sstevel@tonic-gate * PAGESIZE pages. 17997c478bd9Sstevel@tonic-gate */ 18007c478bd9Sstevel@tonic-gate page_promote_err++; 18017c478bd9Sstevel@tonic-gate while (pplist) { 18027c478bd9Sstevel@tonic-gate pp = pplist; 18037c478bd9Sstevel@tonic-gate mach_page_sub(&pplist, pp); 18047c478bd9Sstevel@tonic-gate pp->p_szc = 0; 18057c478bd9Sstevel@tonic-gate bin = PP_2_BIN(pp); 18067c478bd9Sstevel@tonic-gate mtype = PP_2_MTYPE(pp); 18077c478bd9Sstevel@tonic-gate mach_page_add(&PAGE_FREELISTS(mnode, 0, bin, mtype), pp); 1808affbd3ccSkchow page_ctr_add(mnode, mtype, pp, PG_FREE_LIST); 18097c478bd9Sstevel@tonic-gate } 18107c478bd9Sstevel@tonic-gate return (NULL); 18117c478bd9Sstevel@tonic-gate 18127c478bd9Sstevel@tonic-gate } 18137c478bd9Sstevel@tonic-gate 18147c478bd9Sstevel@tonic-gate /* 18157c478bd9Sstevel@tonic-gate * Break up a large page into smaller size pages. 18167c478bd9Sstevel@tonic-gate * Pages involved are on the freelist before the call and may 18177c478bd9Sstevel@tonic-gate * be returned to the caller if requested, otherwise they will 18187c478bd9Sstevel@tonic-gate * be placed back on the freelist. 18197c478bd9Sstevel@tonic-gate * The caller is responsible for locking the freelist as well as any other 18207c478bd9Sstevel@tonic-gate * accounting which needs to be done for a returned page. 18217c478bd9Sstevel@tonic-gate * If flags is not PC_ALLOC, the color argument is ignored, and thus 18227c478bd9Sstevel@tonic-gate * technically, any value may be passed in but PC_NO_COLOR is the standard 18237c478bd9Sstevel@tonic-gate * which should be followed for clarity's sake. 18247c478bd9Sstevel@tonic-gate */ 18257c478bd9Sstevel@tonic-gate page_t * 18267c478bd9Sstevel@tonic-gate page_demote(int mnode, pfn_t pfnum, uchar_t cur_szc, uchar_t new_szc, 18277c478bd9Sstevel@tonic-gate int color, int flags) 18287c478bd9Sstevel@tonic-gate { 18297c478bd9Sstevel@tonic-gate page_t *pp, *pplist, *npplist; 18307c478bd9Sstevel@tonic-gate pgcnt_t npgs, n; 18317c478bd9Sstevel@tonic-gate uint_t bin; 18327c478bd9Sstevel@tonic-gate uint_t mtype; 18337c478bd9Sstevel@tonic-gate page_t *ret_pp = NULL; 18347c478bd9Sstevel@tonic-gate 18357c478bd9Sstevel@tonic-gate ASSERT(cur_szc != 0); 18367c478bd9Sstevel@tonic-gate ASSERT(new_szc < cur_szc); 18377c478bd9Sstevel@tonic-gate 18387c478bd9Sstevel@tonic-gate pplist = page_numtopp_nolock(pfnum); 18397c478bd9Sstevel@tonic-gate ASSERT(pplist != NULL); 18407c478bd9Sstevel@tonic-gate 18417c478bd9Sstevel@tonic-gate ASSERT(pplist->p_szc == cur_szc); 18427c478bd9Sstevel@tonic-gate 18437c478bd9Sstevel@tonic-gate bin = PP_2_BIN(pplist); 18447c478bd9Sstevel@tonic-gate ASSERT(mnode == PP_2_MEM_NODE(pplist)); 18457c478bd9Sstevel@tonic-gate mtype = PP_2_MTYPE(pplist); 18467c478bd9Sstevel@tonic-gate page_vpsub(&PAGE_FREELISTS(mnode, cur_szc, bin, mtype), pplist); 18477c478bd9Sstevel@tonic-gate 18487c478bd9Sstevel@tonic-gate CHK_LPG(pplist, cur_szc); 1849affbd3ccSkchow page_ctr_sub(mnode, mtype, pplist, PG_FREE_LIST); 18507c478bd9Sstevel@tonic-gate 18517c478bd9Sstevel@tonic-gate /* 18527c478bd9Sstevel@tonic-gate * Number of PAGESIZE pages for smaller new_szc 18537c478bd9Sstevel@tonic-gate * page. 18547c478bd9Sstevel@tonic-gate */ 18557c478bd9Sstevel@tonic-gate npgs = page_get_pagecnt(new_szc); 18567c478bd9Sstevel@tonic-gate 18577c478bd9Sstevel@tonic-gate while (pplist) { 18587c478bd9Sstevel@tonic-gate pp = pplist; 18597c478bd9Sstevel@tonic-gate 18607c478bd9Sstevel@tonic-gate ASSERT(pp->p_szc == cur_szc); 18617c478bd9Sstevel@tonic-gate 18627c478bd9Sstevel@tonic-gate /* 18637c478bd9Sstevel@tonic-gate * We either break it up into PAGESIZE pages or larger. 18647c478bd9Sstevel@tonic-gate */ 18657c478bd9Sstevel@tonic-gate if (npgs == 1) { /* PAGESIZE case */ 18667c478bd9Sstevel@tonic-gate mach_page_sub(&pplist, pp); 18677c478bd9Sstevel@tonic-gate ASSERT(pp->p_szc == cur_szc); 18687c478bd9Sstevel@tonic-gate ASSERT(new_szc == 0); 18697c478bd9Sstevel@tonic-gate ASSERT(mnode == PP_2_MEM_NODE(pp)); 18707c478bd9Sstevel@tonic-gate pp->p_szc = new_szc; 18717c478bd9Sstevel@tonic-gate bin = PP_2_BIN(pp); 18727c478bd9Sstevel@tonic-gate if ((bin == color) && (flags == PC_ALLOC) && 18737c478bd9Sstevel@tonic-gate (ret_pp == NULL) && 18747c478bd9Sstevel@tonic-gate page_trylock_cons(pp, SE_EXCL)) { 18757c478bd9Sstevel@tonic-gate ret_pp = pp; 18767c478bd9Sstevel@tonic-gate } else { 18777c478bd9Sstevel@tonic-gate mtype = PP_2_MTYPE(pp); 18787c478bd9Sstevel@tonic-gate mach_page_add(&PAGE_FREELISTS(mnode, 0, bin, 18797c478bd9Sstevel@tonic-gate mtype), pp); 1880affbd3ccSkchow page_ctr_add(mnode, mtype, pp, PG_FREE_LIST); 18817c478bd9Sstevel@tonic-gate } 18827c478bd9Sstevel@tonic-gate } else { 18837c478bd9Sstevel@tonic-gate 18847c478bd9Sstevel@tonic-gate /* 18857c478bd9Sstevel@tonic-gate * Break down into smaller lists of pages. 18867c478bd9Sstevel@tonic-gate */ 18877c478bd9Sstevel@tonic-gate page_list_break(&pplist, &npplist, npgs); 18887c478bd9Sstevel@tonic-gate 18897c478bd9Sstevel@tonic-gate pp = pplist; 18907c478bd9Sstevel@tonic-gate n = npgs; 18917c478bd9Sstevel@tonic-gate while (n--) { 18927c478bd9Sstevel@tonic-gate ASSERT(pp->p_szc == cur_szc); 18937c478bd9Sstevel@tonic-gate pp->p_szc = new_szc; 18947c478bd9Sstevel@tonic-gate pp = pp->p_next; 18957c478bd9Sstevel@tonic-gate } 18967c478bd9Sstevel@tonic-gate 18977c478bd9Sstevel@tonic-gate CHK_LPG(pplist, new_szc); 18987c478bd9Sstevel@tonic-gate 18997c478bd9Sstevel@tonic-gate bin = PP_2_BIN(pplist); 19007c478bd9Sstevel@tonic-gate ASSERT(mnode == PP_2_MEM_NODE(pp)); 19017c478bd9Sstevel@tonic-gate if ((bin == color) && (flags == PC_ALLOC) && 19027c478bd9Sstevel@tonic-gate (ret_pp == NULL) && 19037c478bd9Sstevel@tonic-gate page_trylock_cons(pp, SE_EXCL)) { 19047c478bd9Sstevel@tonic-gate ret_pp = pp; 19057c478bd9Sstevel@tonic-gate } else { 19067c478bd9Sstevel@tonic-gate mtype = PP_2_MTYPE(pp); 19077c478bd9Sstevel@tonic-gate page_vpadd(&PAGE_FREELISTS(mnode, new_szc, 19087c478bd9Sstevel@tonic-gate bin, mtype), pplist); 19097c478bd9Sstevel@tonic-gate 1910affbd3ccSkchow page_ctr_add(mnode, mtype, pplist, 1911affbd3ccSkchow PG_FREE_LIST); 19127c478bd9Sstevel@tonic-gate } 19137c478bd9Sstevel@tonic-gate pplist = npplist; 19147c478bd9Sstevel@tonic-gate } 19157c478bd9Sstevel@tonic-gate } 19167c478bd9Sstevel@tonic-gate return (ret_pp); 19177c478bd9Sstevel@tonic-gate } 19187c478bd9Sstevel@tonic-gate 19197c478bd9Sstevel@tonic-gate int mpss_coalesce_disable = 0; 19207c478bd9Sstevel@tonic-gate 19217c478bd9Sstevel@tonic-gate /* 19227c478bd9Sstevel@tonic-gate * Coalesce free pages into a page of the given szc and color if possible. 19237c478bd9Sstevel@tonic-gate * Return the pointer to the page created, otherwise, return NULL. 19247c478bd9Sstevel@tonic-gate */ 19257c478bd9Sstevel@tonic-gate static page_t * 19267c478bd9Sstevel@tonic-gate page_freelist_coalesce(int mnode, uchar_t szc, int color) 19277c478bd9Sstevel@tonic-gate { 19287c478bd9Sstevel@tonic-gate int r; /* region size */ 19297c478bd9Sstevel@tonic-gate int idx, full, i; 19307c478bd9Sstevel@tonic-gate pfn_t pfnum; 19317c478bd9Sstevel@tonic-gate size_t len; 19327c478bd9Sstevel@tonic-gate size_t buckets_to_check; 19337c478bd9Sstevel@tonic-gate pgcnt_t cands; 19347c478bd9Sstevel@tonic-gate page_t *ret_pp; 19357c478bd9Sstevel@tonic-gate int color_stride; 19367c478bd9Sstevel@tonic-gate 19377c478bd9Sstevel@tonic-gate VM_STAT_ADD(vmm_vmstats.page_ctrs_coalesce); 19387c478bd9Sstevel@tonic-gate 19397c478bd9Sstevel@tonic-gate if (mpss_coalesce_disable) { 19407c478bd9Sstevel@tonic-gate return (NULL); 19417c478bd9Sstevel@tonic-gate } 19427c478bd9Sstevel@tonic-gate 19437c478bd9Sstevel@tonic-gate r = szc; 19447c478bd9Sstevel@tonic-gate PGCTRS_CANDS_GETVALUECOLOR(mnode, r, color, cands); 19457c478bd9Sstevel@tonic-gate if (cands == 0) { 19467c478bd9Sstevel@tonic-gate VM_STAT_ADD(vmm_vmstats.page_ctrs_cands_skip); 19477c478bd9Sstevel@tonic-gate return (NULL); 19487c478bd9Sstevel@tonic-gate } 19497c478bd9Sstevel@tonic-gate full = FULL_REGION_CNT(r); 19507c478bd9Sstevel@tonic-gate color_stride = (szc) ? page_convert_color(0, szc, page_colors - 1) + 1 : 19517c478bd9Sstevel@tonic-gate page_colors; 19527c478bd9Sstevel@tonic-gate 19537c478bd9Sstevel@tonic-gate /* Prevent page_counters dynamic memory from being freed */ 19547c478bd9Sstevel@tonic-gate rw_enter(&page_ctrs_rwlock[mnode], RW_READER); 19557c478bd9Sstevel@tonic-gate len = PAGE_COUNTERS_ENTRIES(mnode, r); 19567c478bd9Sstevel@tonic-gate buckets_to_check = len / color_stride; 19577c478bd9Sstevel@tonic-gate idx = PAGE_COUNTERS_CURRENT_COLOR(mnode, r, color); 19587c478bd9Sstevel@tonic-gate ASSERT((idx % color_stride) == color); 19597c478bd9Sstevel@tonic-gate idx += color_stride; 19607c478bd9Sstevel@tonic-gate if (idx >= len) 19617c478bd9Sstevel@tonic-gate idx = color; 19627c478bd9Sstevel@tonic-gate for (i = 0; i < buckets_to_check; i++) { 19637c478bd9Sstevel@tonic-gate if (PAGE_COUNTERS(mnode, r, idx) == full) { 19647c478bd9Sstevel@tonic-gate pfnum = IDX_TO_PNUM(mnode, r, idx); 19657c478bd9Sstevel@tonic-gate ASSERT(pfnum >= mem_node_config[mnode].physbase && 19667c478bd9Sstevel@tonic-gate pfnum < mem_node_config[mnode].physmax); 19677c478bd9Sstevel@tonic-gate /* 19687c478bd9Sstevel@tonic-gate * RFE: For performance maybe we can do something less 19697c478bd9Sstevel@tonic-gate * brutal than locking the entire freelist. So far 19707c478bd9Sstevel@tonic-gate * this doesn't seem to be a performance problem? 19717c478bd9Sstevel@tonic-gate */ 19727c478bd9Sstevel@tonic-gate page_freelist_lock(mnode); 19737c478bd9Sstevel@tonic-gate if (PAGE_COUNTERS(mnode, r, idx) != full) { 19747c478bd9Sstevel@tonic-gate VM_STAT_ADD(vmm_vmstats.page_ctrs_changed); 19757c478bd9Sstevel@tonic-gate goto skip_this_one; 19767c478bd9Sstevel@tonic-gate } 19777c478bd9Sstevel@tonic-gate ret_pp = page_promote(mnode, pfnum, r, PC_ALLOC); 19787c478bd9Sstevel@tonic-gate if (ret_pp != NULL) { 19797c478bd9Sstevel@tonic-gate PAGE_COUNTERS_CURRENT_COLOR(mnode, r, color) = 19807c478bd9Sstevel@tonic-gate idx; 19817c478bd9Sstevel@tonic-gate page_freelist_unlock(mnode); 19827c478bd9Sstevel@tonic-gate rw_exit(&page_ctrs_rwlock[mnode]); 19837c478bd9Sstevel@tonic-gate #if defined(__sparc) 19847c478bd9Sstevel@tonic-gate if (PP_ISNORELOC(ret_pp)) { 19857c478bd9Sstevel@tonic-gate pgcnt_t npgs; 19867c478bd9Sstevel@tonic-gate 19877c478bd9Sstevel@tonic-gate npgs = page_get_pagecnt(ret_pp->p_szc); 19887c478bd9Sstevel@tonic-gate kcage_freemem_sub(npgs); 19897c478bd9Sstevel@tonic-gate } 19907c478bd9Sstevel@tonic-gate #endif 19917c478bd9Sstevel@tonic-gate return (ret_pp); 19927c478bd9Sstevel@tonic-gate } 19937c478bd9Sstevel@tonic-gate skip_this_one: 19947c478bd9Sstevel@tonic-gate page_freelist_unlock(mnode); 19957c478bd9Sstevel@tonic-gate /* 19967c478bd9Sstevel@tonic-gate * No point looking for another page if we've 19977c478bd9Sstevel@tonic-gate * already tried all of the ones that 19987c478bd9Sstevel@tonic-gate * page_ctr_cands indicated. Stash off where we left 19997c478bd9Sstevel@tonic-gate * off. 20007c478bd9Sstevel@tonic-gate * Note: this is not exact since we don't hold the 20017c478bd9Sstevel@tonic-gate * page_freelist_locks before we initially get the 20027c478bd9Sstevel@tonic-gate * value of cands for performance reasons, but should 20037c478bd9Sstevel@tonic-gate * be a decent approximation. 20047c478bd9Sstevel@tonic-gate */ 20057c478bd9Sstevel@tonic-gate if (--cands == 0) { 20067c478bd9Sstevel@tonic-gate PAGE_COUNTERS_CURRENT_COLOR(mnode, r, color) = 20077c478bd9Sstevel@tonic-gate idx; 20087c478bd9Sstevel@tonic-gate break; 20097c478bd9Sstevel@tonic-gate } 20107c478bd9Sstevel@tonic-gate } 20117c478bd9Sstevel@tonic-gate idx += color_stride; 20127c478bd9Sstevel@tonic-gate if (idx >= len) 20137c478bd9Sstevel@tonic-gate idx = color; 20147c478bd9Sstevel@tonic-gate } 20157c478bd9Sstevel@tonic-gate rw_exit(&page_ctrs_rwlock[mnode]); 20167c478bd9Sstevel@tonic-gate VM_STAT_ADD(vmm_vmstats.page_ctrs_failed); 20177c478bd9Sstevel@tonic-gate return (NULL); 20187c478bd9Sstevel@tonic-gate } 20197c478bd9Sstevel@tonic-gate 20207c478bd9Sstevel@tonic-gate /* 20217c478bd9Sstevel@tonic-gate * For the given mnode, promote as many small pages to large pages as possible. 20227c478bd9Sstevel@tonic-gate */ 20237c478bd9Sstevel@tonic-gate void 20247c478bd9Sstevel@tonic-gate page_freelist_coalesce_all(int mnode) 20257c478bd9Sstevel@tonic-gate { 20267c478bd9Sstevel@tonic-gate int r; /* region size */ 20277c478bd9Sstevel@tonic-gate int idx, full; 20287c478bd9Sstevel@tonic-gate pfn_t pfnum; 20297c478bd9Sstevel@tonic-gate size_t len; 20307c478bd9Sstevel@tonic-gate 20317c478bd9Sstevel@tonic-gate VM_STAT_ADD(vmm_vmstats.page_ctrs_coalesce_all); 20327c478bd9Sstevel@tonic-gate 20337c478bd9Sstevel@tonic-gate if (mpss_coalesce_disable) { 20347c478bd9Sstevel@tonic-gate return; 20357c478bd9Sstevel@tonic-gate } 20367c478bd9Sstevel@tonic-gate 20377c478bd9Sstevel@tonic-gate /* 20387c478bd9Sstevel@tonic-gate * Lock the entire freelist and coalesce what we can. 20397c478bd9Sstevel@tonic-gate * 20407c478bd9Sstevel@tonic-gate * Always promote to the largest page possible 20417c478bd9Sstevel@tonic-gate * first to reduce the number of page promotions. 20427c478bd9Sstevel@tonic-gate */ 20437c478bd9Sstevel@tonic-gate rw_enter(&page_ctrs_rwlock[mnode], RW_READER); 20447c478bd9Sstevel@tonic-gate page_freelist_lock(mnode); 20457c478bd9Sstevel@tonic-gate for (r = mmu_page_sizes - 1; r > 0; r--) { 20467c478bd9Sstevel@tonic-gate pgcnt_t cands; 20477c478bd9Sstevel@tonic-gate 20487c478bd9Sstevel@tonic-gate PGCTRS_CANDS_GETVALUE(mnode, r, cands); 20497c478bd9Sstevel@tonic-gate if (cands == 0) { 20507c478bd9Sstevel@tonic-gate VM_STAT_ADD(vmm_vmstats.page_ctrs_cands_skip_all); 20517c478bd9Sstevel@tonic-gate continue; 20527c478bd9Sstevel@tonic-gate } 20537c478bd9Sstevel@tonic-gate 20547c478bd9Sstevel@tonic-gate full = FULL_REGION_CNT(r); 20557c478bd9Sstevel@tonic-gate len = PAGE_COUNTERS_ENTRIES(mnode, r); 20567c478bd9Sstevel@tonic-gate 20577c478bd9Sstevel@tonic-gate for (idx = 0; idx < len; idx++) { 20587c478bd9Sstevel@tonic-gate if (PAGE_COUNTERS(mnode, r, idx) == full) { 20597c478bd9Sstevel@tonic-gate pfnum = IDX_TO_PNUM(mnode, r, idx); 20607c478bd9Sstevel@tonic-gate ASSERT(pfnum >= 20617c478bd9Sstevel@tonic-gate mem_node_config[mnode].physbase && 20627c478bd9Sstevel@tonic-gate pfnum < 20637c478bd9Sstevel@tonic-gate mem_node_config[mnode].physmax); 20647c478bd9Sstevel@tonic-gate (void) page_promote(mnode, pfnum, r, PC_FREE); 20657c478bd9Sstevel@tonic-gate } 20667c478bd9Sstevel@tonic-gate } 20677c478bd9Sstevel@tonic-gate } 20687c478bd9Sstevel@tonic-gate page_freelist_unlock(mnode); 20697c478bd9Sstevel@tonic-gate rw_exit(&page_ctrs_rwlock[mnode]); 20707c478bd9Sstevel@tonic-gate } 20717c478bd9Sstevel@tonic-gate 20727c478bd9Sstevel@tonic-gate /* 20737c478bd9Sstevel@tonic-gate * This is where all polices for moving pages around 20747c478bd9Sstevel@tonic-gate * to different page size free lists is implemented. 20757c478bd9Sstevel@tonic-gate * Returns 1 on success, 0 on failure. 20767c478bd9Sstevel@tonic-gate * 20777c478bd9Sstevel@tonic-gate * So far these are the priorities for this algorithm in descending 20787c478bd9Sstevel@tonic-gate * order: 20797c478bd9Sstevel@tonic-gate * 20807c478bd9Sstevel@tonic-gate * 1) When servicing a request try to do so with a free page 20817c478bd9Sstevel@tonic-gate * from next size up. Helps defer fragmentation as long 20827c478bd9Sstevel@tonic-gate * as possible. 20837c478bd9Sstevel@tonic-gate * 20847c478bd9Sstevel@tonic-gate * 2) Page coalesce on demand. Only when a freelist 20857c478bd9Sstevel@tonic-gate * larger than PAGESIZE is empty and step 1 20867c478bd9Sstevel@tonic-gate * will not work since all larger size lists are 20877c478bd9Sstevel@tonic-gate * also empty. 20887c478bd9Sstevel@tonic-gate * 20897c478bd9Sstevel@tonic-gate * If pfnhi is non-zero, search for large page with pfn range less than pfnhi. 20907c478bd9Sstevel@tonic-gate */ 20917c478bd9Sstevel@tonic-gate page_t * 20927c478bd9Sstevel@tonic-gate page_freelist_fill(uchar_t szc, int color, int mnode, int mtype, pfn_t pfnhi) 20937c478bd9Sstevel@tonic-gate { 20947c478bd9Sstevel@tonic-gate uchar_t nszc = szc + 1; 20957c478bd9Sstevel@tonic-gate int bin; 20967c478bd9Sstevel@tonic-gate page_t *pp, *firstpp; 20977c478bd9Sstevel@tonic-gate page_t *ret_pp = NULL; 20987c478bd9Sstevel@tonic-gate 20997c478bd9Sstevel@tonic-gate ASSERT(szc < mmu_page_sizes); 21007c478bd9Sstevel@tonic-gate 2101affbd3ccSkchow VM_STAT_ADD(vmm_vmstats.pff_req[szc]); 21027c478bd9Sstevel@tonic-gate /* 21037c478bd9Sstevel@tonic-gate * First try to break up a larger page to fill 21047c478bd9Sstevel@tonic-gate * current size freelist. 21057c478bd9Sstevel@tonic-gate */ 21067c478bd9Sstevel@tonic-gate while (nszc < mmu_page_sizes) { 21077c478bd9Sstevel@tonic-gate /* 21087c478bd9Sstevel@tonic-gate * If page found then demote it. 21097c478bd9Sstevel@tonic-gate */ 21107c478bd9Sstevel@tonic-gate bin = page_convert_color(szc, nszc, color); 21117c478bd9Sstevel@tonic-gate if (PAGE_FREELISTS(mnode, nszc, bin, mtype)) { 21127c478bd9Sstevel@tonic-gate page_freelist_lock(mnode); 21137c478bd9Sstevel@tonic-gate firstpp = pp = PAGE_FREELISTS(mnode, nszc, bin, mtype); 21147c478bd9Sstevel@tonic-gate 21157c478bd9Sstevel@tonic-gate /* 21167c478bd9Sstevel@tonic-gate * If pfnhi is not PFNNULL, look for large page below 21177c478bd9Sstevel@tonic-gate * pfnhi. PFNNULL signifies no pfn requirement. 21187c478bd9Sstevel@tonic-gate */ 21197c478bd9Sstevel@tonic-gate if (pfnhi != PFNNULL && pp->p_pagenum >= pfnhi) { 21207c478bd9Sstevel@tonic-gate do { 21217c478bd9Sstevel@tonic-gate pp = pp->p_vpnext; 21227c478bd9Sstevel@tonic-gate if (pp == firstpp) { 21237c478bd9Sstevel@tonic-gate pp = NULL; 21247c478bd9Sstevel@tonic-gate break; 21257c478bd9Sstevel@tonic-gate } 21267c478bd9Sstevel@tonic-gate } while (pp->p_pagenum >= pfnhi); 21277c478bd9Sstevel@tonic-gate } 21287c478bd9Sstevel@tonic-gate if (pp) { 21297c478bd9Sstevel@tonic-gate ASSERT(pp->p_szc == nszc); 2130affbd3ccSkchow VM_STAT_ADD(vmm_vmstats.pff_demote[nszc]); 21317c478bd9Sstevel@tonic-gate ret_pp = page_demote(mnode, pp->p_pagenum, 21327c478bd9Sstevel@tonic-gate pp->p_szc, szc, color, PC_ALLOC); 21337c478bd9Sstevel@tonic-gate if (ret_pp) { 21347c478bd9Sstevel@tonic-gate page_freelist_unlock(mnode); 21357c478bd9Sstevel@tonic-gate #if defined(__sparc) 21367c478bd9Sstevel@tonic-gate if (PP_ISNORELOC(ret_pp)) { 21377c478bd9Sstevel@tonic-gate pgcnt_t npgs; 21387c478bd9Sstevel@tonic-gate 21397c478bd9Sstevel@tonic-gate npgs = page_get_pagecnt( 21407c478bd9Sstevel@tonic-gate ret_pp->p_szc); 21417c478bd9Sstevel@tonic-gate kcage_freemem_sub(npgs); 21427c478bd9Sstevel@tonic-gate } 21437c478bd9Sstevel@tonic-gate #endif 21447c478bd9Sstevel@tonic-gate return (ret_pp); 21457c478bd9Sstevel@tonic-gate } 21467c478bd9Sstevel@tonic-gate } 21477c478bd9Sstevel@tonic-gate page_freelist_unlock(mnode); 21487c478bd9Sstevel@tonic-gate } 21497c478bd9Sstevel@tonic-gate nszc++; 21507c478bd9Sstevel@tonic-gate } 21517c478bd9Sstevel@tonic-gate 21527c478bd9Sstevel@tonic-gate /* 21537c478bd9Sstevel@tonic-gate * Ok that didn't work. Time to coalesce. 21547c478bd9Sstevel@tonic-gate */ 21557c478bd9Sstevel@tonic-gate if (szc != 0) { 21567c478bd9Sstevel@tonic-gate ret_pp = page_freelist_coalesce(mnode, szc, color); 2157affbd3ccSkchow VM_STAT_COND_ADD(ret_pp, vmm_vmstats.pff_coalok[szc]); 21587c478bd9Sstevel@tonic-gate } 21597c478bd9Sstevel@tonic-gate 21607c478bd9Sstevel@tonic-gate return (ret_pp); 21617c478bd9Sstevel@tonic-gate } 21627c478bd9Sstevel@tonic-gate 21637c478bd9Sstevel@tonic-gate /* 21647c478bd9Sstevel@tonic-gate * Helper routine used only by the freelist code to lock 21657c478bd9Sstevel@tonic-gate * a page. If the page is a large page then it succeeds in 21667c478bd9Sstevel@tonic-gate * locking all the constituent pages or none at all. 21677c478bd9Sstevel@tonic-gate * Returns 1 on sucess, 0 on failure. 21687c478bd9Sstevel@tonic-gate */ 21697c478bd9Sstevel@tonic-gate static int 21707c478bd9Sstevel@tonic-gate page_trylock_cons(page_t *pp, se_t se) 21717c478bd9Sstevel@tonic-gate { 21727c478bd9Sstevel@tonic-gate page_t *tpp, *first_pp = pp; 21737c478bd9Sstevel@tonic-gate 21747c478bd9Sstevel@tonic-gate /* 21757c478bd9Sstevel@tonic-gate * Fail if can't lock first or only page. 21767c478bd9Sstevel@tonic-gate */ 21777c478bd9Sstevel@tonic-gate if (!page_trylock(pp, se)) { 21787c478bd9Sstevel@tonic-gate return (0); 21797c478bd9Sstevel@tonic-gate } 21807c478bd9Sstevel@tonic-gate 21817c478bd9Sstevel@tonic-gate /* 21827c478bd9Sstevel@tonic-gate * PAGESIZE: common case. 21837c478bd9Sstevel@tonic-gate */ 21847c478bd9Sstevel@tonic-gate if (pp->p_szc == 0) { 21857c478bd9Sstevel@tonic-gate return (1); 21867c478bd9Sstevel@tonic-gate } 21877c478bd9Sstevel@tonic-gate 21887c478bd9Sstevel@tonic-gate /* 21897c478bd9Sstevel@tonic-gate * Large page case. 21907c478bd9Sstevel@tonic-gate */ 21917c478bd9Sstevel@tonic-gate tpp = pp->p_next; 21927c478bd9Sstevel@tonic-gate while (tpp != pp) { 21937c478bd9Sstevel@tonic-gate if (!page_trylock(tpp, se)) { 21947c478bd9Sstevel@tonic-gate /* 21957c478bd9Sstevel@tonic-gate * On failure unlock what we 21967c478bd9Sstevel@tonic-gate * have locked so far. 21977c478bd9Sstevel@tonic-gate */ 21987c478bd9Sstevel@tonic-gate while (first_pp != tpp) { 2199db874c57Selowe page_unlock_noretire(first_pp); 22007c478bd9Sstevel@tonic-gate first_pp = first_pp->p_next; 22017c478bd9Sstevel@tonic-gate } 22027c478bd9Sstevel@tonic-gate return (0); 22037c478bd9Sstevel@tonic-gate } 22047c478bd9Sstevel@tonic-gate tpp = tpp->p_next; 22057c478bd9Sstevel@tonic-gate } 22067c478bd9Sstevel@tonic-gate return (1); 22077c478bd9Sstevel@tonic-gate } 22087c478bd9Sstevel@tonic-gate 22097c478bd9Sstevel@tonic-gate page_t * 22107c478bd9Sstevel@tonic-gate page_get_mnode_freelist(int mnode, uint_t bin, int mtype, uchar_t szc, 22117c478bd9Sstevel@tonic-gate uint_t flags) 22127c478bd9Sstevel@tonic-gate { 22137c478bd9Sstevel@tonic-gate kmutex_t *pcm; 22147c478bd9Sstevel@tonic-gate int i, fill_tried, fill_marker; 22157c478bd9Sstevel@tonic-gate page_t *pp, *first_pp; 22167c478bd9Sstevel@tonic-gate uint_t bin_marker; 22177c478bd9Sstevel@tonic-gate int colors, cpucolors; 22187c478bd9Sstevel@tonic-gate uchar_t nszc; 22197c478bd9Sstevel@tonic-gate uint_t nszc_color_shift; 22207c478bd9Sstevel@tonic-gate int nwaybins = 0, nwaycnt; 22217c478bd9Sstevel@tonic-gate 22227c478bd9Sstevel@tonic-gate ASSERT(szc < mmu_page_sizes); 22237c478bd9Sstevel@tonic-gate 22247c478bd9Sstevel@tonic-gate VM_STAT_ADD(vmm_vmstats.pgmf_alloc[szc]); 22257c478bd9Sstevel@tonic-gate 22267c478bd9Sstevel@tonic-gate MTYPE_START(mnode, mtype, flags); 22277c478bd9Sstevel@tonic-gate if (mtype < 0) { /* mnode foes not have memory in mtype range */ 22287c478bd9Sstevel@tonic-gate VM_STAT_ADD(vmm_vmstats.pgmf_allocempty[szc]); 22297c478bd9Sstevel@tonic-gate return (NULL); 22307c478bd9Sstevel@tonic-gate } 22317c478bd9Sstevel@tonic-gate 22327c478bd9Sstevel@tonic-gate /* 22337c478bd9Sstevel@tonic-gate * Set how many physical colors for this page size. 22347c478bd9Sstevel@tonic-gate */ 22357c478bd9Sstevel@tonic-gate colors = (szc) ? page_convert_color(0, szc, page_colors - 1) + 1 : 22367c478bd9Sstevel@tonic-gate page_colors; 22377c478bd9Sstevel@tonic-gate 22387c478bd9Sstevel@tonic-gate nszc = MIN(szc + 1, mmu_page_sizes - 1); 22397c478bd9Sstevel@tonic-gate nszc_color_shift = page_get_shift(nszc) - page_get_shift(szc); 22407c478bd9Sstevel@tonic-gate 22417c478bd9Sstevel@tonic-gate /* cpu_page_colors is non-zero if a page color may be in > 1 bin */ 22427c478bd9Sstevel@tonic-gate cpucolors = cpu_page_colors; 22437c478bd9Sstevel@tonic-gate 22447c478bd9Sstevel@tonic-gate /* 22457c478bd9Sstevel@tonic-gate * adjust cpucolors to possibly check additional 'equivalent' bins 22467c478bd9Sstevel@tonic-gate * to try to minimize fragmentation of large pages by delaying calls 22477c478bd9Sstevel@tonic-gate * to page_freelist_fill. 22487c478bd9Sstevel@tonic-gate */ 22497c478bd9Sstevel@tonic-gate if (colorequiv > 1) { 22507c478bd9Sstevel@tonic-gate int equivcolors = colors / colorequiv; 22517c478bd9Sstevel@tonic-gate 22527c478bd9Sstevel@tonic-gate if (equivcolors && (cpucolors == 0 || equivcolors < cpucolors)) 22537c478bd9Sstevel@tonic-gate cpucolors = equivcolors; 22547c478bd9Sstevel@tonic-gate } 22557c478bd9Sstevel@tonic-gate 22567c478bd9Sstevel@tonic-gate ASSERT(colors <= page_colors); 22577c478bd9Sstevel@tonic-gate ASSERT(colors); 22587c478bd9Sstevel@tonic-gate ASSERT((colors & (colors - 1)) == 0); 22597c478bd9Sstevel@tonic-gate 22607c478bd9Sstevel@tonic-gate ASSERT(bin < colors); 22617c478bd9Sstevel@tonic-gate 22627c478bd9Sstevel@tonic-gate /* 22637c478bd9Sstevel@tonic-gate * Only hold one freelist lock at a time, that way we 22647c478bd9Sstevel@tonic-gate * can start anywhere and not have to worry about lock 22657c478bd9Sstevel@tonic-gate * ordering. 22667c478bd9Sstevel@tonic-gate */ 22677c478bd9Sstevel@tonic-gate big_try_again: 22687c478bd9Sstevel@tonic-gate fill_tried = 0; 22697c478bd9Sstevel@tonic-gate nwaycnt = 0; 22707c478bd9Sstevel@tonic-gate for (i = 0; i <= colors; i++) { 22717c478bd9Sstevel@tonic-gate try_again: 22727c478bd9Sstevel@tonic-gate ASSERT(bin < colors); 22737c478bd9Sstevel@tonic-gate if (PAGE_FREELISTS(mnode, szc, bin, mtype)) { 22747c478bd9Sstevel@tonic-gate pcm = PC_BIN_MUTEX(mnode, bin, PG_FREE_LIST); 22757c478bd9Sstevel@tonic-gate mutex_enter(pcm); 22767c478bd9Sstevel@tonic-gate pp = PAGE_FREELISTS(mnode, szc, bin, mtype); 22777c478bd9Sstevel@tonic-gate if (pp != NULL) { 22787c478bd9Sstevel@tonic-gate /* 22797c478bd9Sstevel@tonic-gate * These were set before the page 22807c478bd9Sstevel@tonic-gate * was put on the free list, 22817c478bd9Sstevel@tonic-gate * they must still be set. 22827c478bd9Sstevel@tonic-gate */ 22837c478bd9Sstevel@tonic-gate ASSERT(PP_ISFREE(pp)); 22847c478bd9Sstevel@tonic-gate ASSERT(PP_ISAGED(pp)); 22857c478bd9Sstevel@tonic-gate ASSERT(pp->p_vnode == NULL); 22867c478bd9Sstevel@tonic-gate ASSERT(pp->p_hash == NULL); 22877c478bd9Sstevel@tonic-gate ASSERT(pp->p_offset == (u_offset_t)-1); 22887c478bd9Sstevel@tonic-gate ASSERT(pp->p_szc == szc); 22897c478bd9Sstevel@tonic-gate ASSERT(PFN_2_MEM_NODE(pp->p_pagenum) == mnode); 22907c478bd9Sstevel@tonic-gate 22917c478bd9Sstevel@tonic-gate /* 22927c478bd9Sstevel@tonic-gate * Walk down the hash chain. 22937c478bd9Sstevel@tonic-gate * 8k pages are linked on p_next 22947c478bd9Sstevel@tonic-gate * and p_prev fields. Large pages 22957c478bd9Sstevel@tonic-gate * are a contiguous group of 22967c478bd9Sstevel@tonic-gate * constituent pages linked together 22977c478bd9Sstevel@tonic-gate * on their p_next and p_prev fields. 22987c478bd9Sstevel@tonic-gate * The large pages are linked together 22997c478bd9Sstevel@tonic-gate * on the hash chain using p_vpnext 23007c478bd9Sstevel@tonic-gate * p_vpprev of the base constituent 23017c478bd9Sstevel@tonic-gate * page of each large page. 23027c478bd9Sstevel@tonic-gate */ 23037c478bd9Sstevel@tonic-gate first_pp = pp; 23047c478bd9Sstevel@tonic-gate while (!page_trylock_cons(pp, SE_EXCL)) { 23057c478bd9Sstevel@tonic-gate if (szc == 0) { 23067c478bd9Sstevel@tonic-gate pp = pp->p_next; 23077c478bd9Sstevel@tonic-gate } else { 23087c478bd9Sstevel@tonic-gate pp = pp->p_vpnext; 23097c478bd9Sstevel@tonic-gate } 23107c478bd9Sstevel@tonic-gate 23117c478bd9Sstevel@tonic-gate ASSERT(PP_ISFREE(pp)); 23127c478bd9Sstevel@tonic-gate ASSERT(PP_ISAGED(pp)); 23137c478bd9Sstevel@tonic-gate ASSERT(pp->p_vnode == NULL); 23147c478bd9Sstevel@tonic-gate ASSERT(pp->p_hash == NULL); 23157c478bd9Sstevel@tonic-gate ASSERT(pp->p_offset == (u_offset_t)-1); 23167c478bd9Sstevel@tonic-gate ASSERT(pp->p_szc == szc); 23177c478bd9Sstevel@tonic-gate ASSERT(PFN_2_MEM_NODE(pp->p_pagenum) == 23187c478bd9Sstevel@tonic-gate mnode); 23197c478bd9Sstevel@tonic-gate 23207c478bd9Sstevel@tonic-gate if (pp == first_pp) { 23217c478bd9Sstevel@tonic-gate pp = NULL; 23227c478bd9Sstevel@tonic-gate break; 23237c478bd9Sstevel@tonic-gate } 23247c478bd9Sstevel@tonic-gate } 23257c478bd9Sstevel@tonic-gate 23267c478bd9Sstevel@tonic-gate if (pp) { 23277c478bd9Sstevel@tonic-gate ASSERT(mtype == PP_2_MTYPE(pp)); 23287c478bd9Sstevel@tonic-gate ASSERT(pp->p_szc == szc); 23297c478bd9Sstevel@tonic-gate if (szc == 0) { 23307c478bd9Sstevel@tonic-gate page_sub(&PAGE_FREELISTS(mnode, 23317c478bd9Sstevel@tonic-gate szc, bin, mtype), pp); 23327c478bd9Sstevel@tonic-gate } else { 23337c478bd9Sstevel@tonic-gate page_vpsub(&PAGE_FREELISTS( 23347c478bd9Sstevel@tonic-gate mnode, szc, bin, mtype), 23357c478bd9Sstevel@tonic-gate pp); 23367c478bd9Sstevel@tonic-gate CHK_LPG(pp, szc); 23377c478bd9Sstevel@tonic-gate } 2338affbd3ccSkchow page_ctr_sub(mnode, mtype, pp, 2339affbd3ccSkchow PG_FREE_LIST); 23407c478bd9Sstevel@tonic-gate 23417c478bd9Sstevel@tonic-gate if ((PP_ISFREE(pp) == 0) || 23427c478bd9Sstevel@tonic-gate (PP_ISAGED(pp) == 0)) 23437c478bd9Sstevel@tonic-gate panic("free page is not. pp %p", 23447c478bd9Sstevel@tonic-gate (void *)pp); 23457c478bd9Sstevel@tonic-gate mutex_exit(pcm); 23467c478bd9Sstevel@tonic-gate 23477c478bd9Sstevel@tonic-gate #if defined(__sparc) 23487c478bd9Sstevel@tonic-gate ASSERT(!kcage_on || PP_ISNORELOC(pp) || 23497c478bd9Sstevel@tonic-gate (flags & PG_NORELOC) == 0); 23507c478bd9Sstevel@tonic-gate 23517c478bd9Sstevel@tonic-gate if (PP_ISNORELOC(pp)) { 23527c478bd9Sstevel@tonic-gate pgcnt_t npgs; 23537c478bd9Sstevel@tonic-gate 23547c478bd9Sstevel@tonic-gate npgs = page_get_pagecnt(szc); 23557c478bd9Sstevel@tonic-gate kcage_freemem_sub(npgs); 23567c478bd9Sstevel@tonic-gate } 23577c478bd9Sstevel@tonic-gate #endif 23587c478bd9Sstevel@tonic-gate VM_STAT_ADD(vmm_vmstats. 23597c478bd9Sstevel@tonic-gate pgmf_allocok[szc]); 23607c478bd9Sstevel@tonic-gate return (pp); 23617c478bd9Sstevel@tonic-gate } 23627c478bd9Sstevel@tonic-gate } 23637c478bd9Sstevel@tonic-gate mutex_exit(pcm); 23647c478bd9Sstevel@tonic-gate } 23657c478bd9Sstevel@tonic-gate 23667c478bd9Sstevel@tonic-gate /* 23677c478bd9Sstevel@tonic-gate * Wow! The initial bin is empty. 23687c478bd9Sstevel@tonic-gate * If specific color is needed, check if page color may be 23697c478bd9Sstevel@tonic-gate * in other bins. cpucolors is: 23707c478bd9Sstevel@tonic-gate * 0 if the colors for this cpu is equal to page_colors. 23717c478bd9Sstevel@tonic-gate * This means that pages with a particular color are in a 23727c478bd9Sstevel@tonic-gate * single bin. 23737c478bd9Sstevel@tonic-gate * -1 if colors of cpus (cheetah+) are heterogenous. Need to 23747c478bd9Sstevel@tonic-gate * first determine the colors for the current cpu. 23757c478bd9Sstevel@tonic-gate * >0 colors of all cpus are homogenous and < page_colors 23767c478bd9Sstevel@tonic-gate */ 23777c478bd9Sstevel@tonic-gate 23787c478bd9Sstevel@tonic-gate if ((flags & PG_MATCH_COLOR) && (cpucolors != 0)) { 23797c478bd9Sstevel@tonic-gate if (!nwaybins) { 23807c478bd9Sstevel@tonic-gate /* 23817c478bd9Sstevel@tonic-gate * cpucolors is negative if ecache setsizes 23827c478bd9Sstevel@tonic-gate * are heterogenous. determine colors for this 23837c478bd9Sstevel@tonic-gate * particular cpu. 23847c478bd9Sstevel@tonic-gate */ 23857c478bd9Sstevel@tonic-gate if (cpucolors < 0) { 23867c478bd9Sstevel@tonic-gate cpucolors = CPUSETSIZE() / MMU_PAGESIZE; 23877c478bd9Sstevel@tonic-gate ASSERT(cpucolors > 0); 23887c478bd9Sstevel@tonic-gate nwaybins = colors / cpucolors; 23897c478bd9Sstevel@tonic-gate } else { 23907c478bd9Sstevel@tonic-gate nwaybins = colors / cpucolors; 23917c478bd9Sstevel@tonic-gate ASSERT(szc > 0 || nwaybins > 1); 23927c478bd9Sstevel@tonic-gate } 23937c478bd9Sstevel@tonic-gate if (nwaybins < 2) 23947c478bd9Sstevel@tonic-gate cpucolors = 0; 23957c478bd9Sstevel@tonic-gate } 23967c478bd9Sstevel@tonic-gate 23977c478bd9Sstevel@tonic-gate if (cpucolors && (nwaycnt + 1 <= nwaybins)) { 23987c478bd9Sstevel@tonic-gate nwaycnt++; 23997c478bd9Sstevel@tonic-gate bin = (bin + (colors / nwaybins)) & 24007c478bd9Sstevel@tonic-gate (colors - 1); 24017c478bd9Sstevel@tonic-gate if (nwaycnt < nwaybins) { 24027c478bd9Sstevel@tonic-gate goto try_again; 24037c478bd9Sstevel@tonic-gate } 24047c478bd9Sstevel@tonic-gate } 24057c478bd9Sstevel@tonic-gate /* back to initial color if fall-thru */ 24067c478bd9Sstevel@tonic-gate } 24077c478bd9Sstevel@tonic-gate 24087c478bd9Sstevel@tonic-gate /* 24097c478bd9Sstevel@tonic-gate * color bins are all empty if color match. Try and satisfy 24107c478bd9Sstevel@tonic-gate * the request by breaking up or coalescing pages from 24117c478bd9Sstevel@tonic-gate * a different size freelist of the correct color that 24127c478bd9Sstevel@tonic-gate * satisfies the ORIGINAL color requested. If that 24137c478bd9Sstevel@tonic-gate * fails then try pages of the same size but different 24147c478bd9Sstevel@tonic-gate * colors assuming we are not called with 24157c478bd9Sstevel@tonic-gate * PG_MATCH_COLOR. 24167c478bd9Sstevel@tonic-gate */ 24177c478bd9Sstevel@tonic-gate if (!fill_tried) { 24187c478bd9Sstevel@tonic-gate fill_tried = 1; 24197c478bd9Sstevel@tonic-gate fill_marker = bin >> nszc_color_shift; 24207c478bd9Sstevel@tonic-gate pp = page_freelist_fill(szc, bin, mnode, mtype, 24217c478bd9Sstevel@tonic-gate PFNNULL); 24227c478bd9Sstevel@tonic-gate if (pp != NULL) { 24237c478bd9Sstevel@tonic-gate return (pp); 24247c478bd9Sstevel@tonic-gate } 24257c478bd9Sstevel@tonic-gate } 24267c478bd9Sstevel@tonic-gate 24277c478bd9Sstevel@tonic-gate if (flags & PG_MATCH_COLOR) 24287c478bd9Sstevel@tonic-gate break; 24297c478bd9Sstevel@tonic-gate 24307c478bd9Sstevel@tonic-gate /* 24317c478bd9Sstevel@tonic-gate * Select next color bin to try. 24327c478bd9Sstevel@tonic-gate */ 24337c478bd9Sstevel@tonic-gate if (szc == 0) { 24347c478bd9Sstevel@tonic-gate /* 24357c478bd9Sstevel@tonic-gate * PAGESIZE page case. 24367c478bd9Sstevel@tonic-gate */ 24377c478bd9Sstevel@tonic-gate if (i == 0) { 24387c478bd9Sstevel@tonic-gate bin = (bin + BIN_STEP) & page_colors_mask; 24397c478bd9Sstevel@tonic-gate bin_marker = bin; 24407c478bd9Sstevel@tonic-gate } else { 24417c478bd9Sstevel@tonic-gate bin = (bin + vac_colors) & page_colors_mask; 24427c478bd9Sstevel@tonic-gate if (bin == bin_marker) { 24437c478bd9Sstevel@tonic-gate bin = (bin + 1) & page_colors_mask; 24447c478bd9Sstevel@tonic-gate bin_marker = bin; 24457c478bd9Sstevel@tonic-gate } 24467c478bd9Sstevel@tonic-gate } 24477c478bd9Sstevel@tonic-gate } else { 24487c478bd9Sstevel@tonic-gate /* 24497c478bd9Sstevel@tonic-gate * Large page case. 24507c478bd9Sstevel@tonic-gate */ 24517c478bd9Sstevel@tonic-gate bin = (bin + 1) & (colors - 1); 24527c478bd9Sstevel@tonic-gate } 24537c478bd9Sstevel@tonic-gate /* 24547c478bd9Sstevel@tonic-gate * If bin advanced to the next color bin of the 24557c478bd9Sstevel@tonic-gate * next larger pagesize, there is a chance the fill 24567c478bd9Sstevel@tonic-gate * could succeed. 24577c478bd9Sstevel@tonic-gate */ 24587c478bd9Sstevel@tonic-gate if (fill_marker != (bin >> nszc_color_shift)) 24597c478bd9Sstevel@tonic-gate fill_tried = 0; 24607c478bd9Sstevel@tonic-gate } 24617c478bd9Sstevel@tonic-gate 2462affbd3ccSkchow /* if allowed, cycle through additional mtypes */ 2463affbd3ccSkchow MTYPE_NEXT(mnode, mtype, flags); 2464affbd3ccSkchow if (mtype >= 0) 24657c478bd9Sstevel@tonic-gate goto big_try_again; 2466affbd3ccSkchow 24677c478bd9Sstevel@tonic-gate VM_STAT_ADD(vmm_vmstats.pgmf_allocfailed[szc]); 24687c478bd9Sstevel@tonic-gate 24697c478bd9Sstevel@tonic-gate return (NULL); 24707c478bd9Sstevel@tonic-gate } 24717c478bd9Sstevel@tonic-gate 24727c478bd9Sstevel@tonic-gate 24737c478bd9Sstevel@tonic-gate /* 24747c478bd9Sstevel@tonic-gate * Returns the count of free pages for 'pp' with size code 'szc'. 24757c478bd9Sstevel@tonic-gate * Note: This function does not return an exact value as the page freelist 24767c478bd9Sstevel@tonic-gate * locks are not held and thus the values in the page_counters may be 24777c478bd9Sstevel@tonic-gate * changing as we walk through the data. 24787c478bd9Sstevel@tonic-gate */ 24797c478bd9Sstevel@tonic-gate static int 24807c478bd9Sstevel@tonic-gate page_freecnt(int mnode, page_t *pp, uchar_t szc) 24817c478bd9Sstevel@tonic-gate { 24827c478bd9Sstevel@tonic-gate pgcnt_t pgfree; 24837c478bd9Sstevel@tonic-gate pgcnt_t cnt; 24847c478bd9Sstevel@tonic-gate ssize_t r = szc; /* region size */ 24857c478bd9Sstevel@tonic-gate ssize_t idx; 24867c478bd9Sstevel@tonic-gate int i; 24877c478bd9Sstevel@tonic-gate int full, range; 24887c478bd9Sstevel@tonic-gate 24897c478bd9Sstevel@tonic-gate /* Make sure pagenum passed in is aligned properly */ 24907c478bd9Sstevel@tonic-gate ASSERT((pp->p_pagenum & (PNUM_SIZE(szc) - 1)) == 0); 24917c478bd9Sstevel@tonic-gate ASSERT(szc > 0); 24927c478bd9Sstevel@tonic-gate 24937c478bd9Sstevel@tonic-gate /* Prevent page_counters dynamic memory from being freed */ 24947c478bd9Sstevel@tonic-gate rw_enter(&page_ctrs_rwlock[mnode], RW_READER); 24957c478bd9Sstevel@tonic-gate idx = PNUM_TO_IDX(mnode, r, pp->p_pagenum); 24967c478bd9Sstevel@tonic-gate cnt = PAGE_COUNTERS(mnode, r, idx); 24977c478bd9Sstevel@tonic-gate pgfree = cnt << PNUM_SHIFT(r - 1); 24987c478bd9Sstevel@tonic-gate range = FULL_REGION_CNT(szc); 24997c478bd9Sstevel@tonic-gate 25007c478bd9Sstevel@tonic-gate /* Check for completely full region */ 25017c478bd9Sstevel@tonic-gate if (cnt == range) { 25027c478bd9Sstevel@tonic-gate rw_exit(&page_ctrs_rwlock[mnode]); 25037c478bd9Sstevel@tonic-gate return (pgfree); 25047c478bd9Sstevel@tonic-gate } 25057c478bd9Sstevel@tonic-gate 25067c478bd9Sstevel@tonic-gate while (--r > 0) { 25077c478bd9Sstevel@tonic-gate idx = PNUM_TO_IDX(mnode, r, pp->p_pagenum); 25087c478bd9Sstevel@tonic-gate full = FULL_REGION_CNT(r); 25097c478bd9Sstevel@tonic-gate for (i = 0; i < range; i++, idx++) { 25107c478bd9Sstevel@tonic-gate cnt = PAGE_COUNTERS(mnode, r, idx); 25117c478bd9Sstevel@tonic-gate /* 25127c478bd9Sstevel@tonic-gate * If cnt here is full, that means we have already 25137c478bd9Sstevel@tonic-gate * accounted for these pages earlier. 25147c478bd9Sstevel@tonic-gate */ 25157c478bd9Sstevel@tonic-gate if (cnt != full) { 25167c478bd9Sstevel@tonic-gate pgfree += (cnt << PNUM_SHIFT(r - 1)); 25177c478bd9Sstevel@tonic-gate } 25187c478bd9Sstevel@tonic-gate } 25197c478bd9Sstevel@tonic-gate range *= full; 25207c478bd9Sstevel@tonic-gate } 25217c478bd9Sstevel@tonic-gate rw_exit(&page_ctrs_rwlock[mnode]); 25227c478bd9Sstevel@tonic-gate return (pgfree); 25237c478bd9Sstevel@tonic-gate } 25247c478bd9Sstevel@tonic-gate 25257c478bd9Sstevel@tonic-gate /* 25267c478bd9Sstevel@tonic-gate * Called from page_geti_contig_pages to exclusively lock constituent pages 25277c478bd9Sstevel@tonic-gate * starting from 'spp' for page size code 'szc'. 25287c478bd9Sstevel@tonic-gate * 25297c478bd9Sstevel@tonic-gate * If 'ptcpthreshold' is set, the number of free pages needed in the 'szc' 25307c478bd9Sstevel@tonic-gate * region needs to be greater than or equal to the threshold. 25317c478bd9Sstevel@tonic-gate */ 25327c478bd9Sstevel@tonic-gate static int 25337c478bd9Sstevel@tonic-gate page_trylock_contig_pages(int mnode, page_t *spp, uchar_t szc, int flags) 25347c478bd9Sstevel@tonic-gate { 25357c478bd9Sstevel@tonic-gate pgcnt_t pgcnt = PNUM_SIZE(szc); 25367c478bd9Sstevel@tonic-gate pgcnt_t pgfree, i; 25377c478bd9Sstevel@tonic-gate page_t *pp; 25387c478bd9Sstevel@tonic-gate 25397c478bd9Sstevel@tonic-gate VM_STAT_ADD(vmm_vmstats.ptcp[szc]); 25407c478bd9Sstevel@tonic-gate 25417c478bd9Sstevel@tonic-gate 25427c478bd9Sstevel@tonic-gate if ((ptcpthreshold == 0) || (flags & PGI_PGCPHIPRI)) 25437c478bd9Sstevel@tonic-gate goto skipptcpcheck; 25447c478bd9Sstevel@tonic-gate /* 25457c478bd9Sstevel@tonic-gate * check if there are sufficient free pages available before attempting 25467c478bd9Sstevel@tonic-gate * to trylock. Count is approximate as page counters can change. 25477c478bd9Sstevel@tonic-gate */ 25487c478bd9Sstevel@tonic-gate pgfree = page_freecnt(mnode, spp, szc); 25497c478bd9Sstevel@tonic-gate 25507c478bd9Sstevel@tonic-gate /* attempt to trylock if there are sufficient already free pages */ 25517c478bd9Sstevel@tonic-gate if (pgfree < pgcnt/ptcpthreshold) { 25527c478bd9Sstevel@tonic-gate VM_STAT_ADD(vmm_vmstats.ptcpfreethresh[szc]); 25537c478bd9Sstevel@tonic-gate return (0); 25547c478bd9Sstevel@tonic-gate } 25557c478bd9Sstevel@tonic-gate 25567c478bd9Sstevel@tonic-gate skipptcpcheck: 25577c478bd9Sstevel@tonic-gate 25587c478bd9Sstevel@tonic-gate for (i = 0; i < pgcnt; i++) { 25597c478bd9Sstevel@tonic-gate pp = &spp[i]; 25607c478bd9Sstevel@tonic-gate if (!page_trylock(pp, SE_EXCL)) { 25617c478bd9Sstevel@tonic-gate VM_STAT_ADD(vmm_vmstats.ptcpfailexcl[szc]); 25627c478bd9Sstevel@tonic-gate while (--i != (pgcnt_t)-1) { 25637c478bd9Sstevel@tonic-gate pp = &spp[i]; 25647c478bd9Sstevel@tonic-gate ASSERT(PAGE_EXCL(pp)); 2565db874c57Selowe page_unlock_noretire(pp); 25667c478bd9Sstevel@tonic-gate } 25677c478bd9Sstevel@tonic-gate return (0); 25687c478bd9Sstevel@tonic-gate } 25697c478bd9Sstevel@tonic-gate ASSERT(spp[i].p_pagenum == spp->p_pagenum + i); 25707c478bd9Sstevel@tonic-gate if ((pp->p_szc > szc || (szc && pp->p_szc == szc)) && 25717c478bd9Sstevel@tonic-gate !PP_ISFREE(pp)) { 25727c478bd9Sstevel@tonic-gate VM_STAT_ADD(vmm_vmstats.ptcpfailszc[szc]); 25737c478bd9Sstevel@tonic-gate ASSERT(i == 0); 2574db874c57Selowe page_unlock_noretire(pp); 25757c478bd9Sstevel@tonic-gate return (0); 25767c478bd9Sstevel@tonic-gate } 25777c478bd9Sstevel@tonic-gate if (PP_ISNORELOC(pp)) { 25787c478bd9Sstevel@tonic-gate VM_STAT_ADD(vmm_vmstats.ptcpfailcage[szc]); 25797c478bd9Sstevel@tonic-gate while (i != (pgcnt_t)-1) { 25807c478bd9Sstevel@tonic-gate pp = &spp[i]; 25817c478bd9Sstevel@tonic-gate ASSERT(PAGE_EXCL(pp)); 2582db874c57Selowe page_unlock_noretire(pp); 25837c478bd9Sstevel@tonic-gate i--; 25847c478bd9Sstevel@tonic-gate } 25857c478bd9Sstevel@tonic-gate return (0); 25867c478bd9Sstevel@tonic-gate } 25877c478bd9Sstevel@tonic-gate } 25887c478bd9Sstevel@tonic-gate VM_STAT_ADD(vmm_vmstats.ptcpok[szc]); 25897c478bd9Sstevel@tonic-gate return (1); 25907c478bd9Sstevel@tonic-gate } 25917c478bd9Sstevel@tonic-gate 25927c478bd9Sstevel@tonic-gate /* 25937c478bd9Sstevel@tonic-gate * Claim large page pointed to by 'pp'. 'pp' is the starting set 25947c478bd9Sstevel@tonic-gate * of 'szc' constituent pages that had been locked exclusively previously. 25957c478bd9Sstevel@tonic-gate * Will attempt to relocate constituent pages in use. 25967c478bd9Sstevel@tonic-gate */ 25977c478bd9Sstevel@tonic-gate static page_t * 25987c478bd9Sstevel@tonic-gate page_claim_contig_pages(page_t *pp, uchar_t szc, int flags) 25997c478bd9Sstevel@tonic-gate { 26007c478bd9Sstevel@tonic-gate spgcnt_t pgcnt, npgs, i; 26017c478bd9Sstevel@tonic-gate page_t *targpp, *rpp, *hpp; 26027c478bd9Sstevel@tonic-gate page_t *replpp = NULL; 26037c478bd9Sstevel@tonic-gate page_t *pplist = NULL; 26047c478bd9Sstevel@tonic-gate 26057c478bd9Sstevel@tonic-gate ASSERT(pp != NULL); 26067c478bd9Sstevel@tonic-gate 26077c478bd9Sstevel@tonic-gate pgcnt = page_get_pagecnt(szc); 26087c478bd9Sstevel@tonic-gate while (pgcnt) { 26097c478bd9Sstevel@tonic-gate ASSERT(PAGE_EXCL(pp)); 26107c478bd9Sstevel@tonic-gate ASSERT(!PP_ISNORELOC(pp)); 26117c478bd9Sstevel@tonic-gate if (PP_ISFREE(pp)) { 26127c478bd9Sstevel@tonic-gate /* 26137c478bd9Sstevel@tonic-gate * If this is a PG_FREE_LIST page then its 26147c478bd9Sstevel@tonic-gate * size code can change underneath us due to 26157c478bd9Sstevel@tonic-gate * page promotion or demotion. As an optimzation 26167c478bd9Sstevel@tonic-gate * use page_list_sub_pages() instead of 26177c478bd9Sstevel@tonic-gate * page_list_sub(). 26187c478bd9Sstevel@tonic-gate */ 26197c478bd9Sstevel@tonic-gate if (PP_ISAGED(pp)) { 26207c478bd9Sstevel@tonic-gate page_list_sub_pages(pp, szc); 26217c478bd9Sstevel@tonic-gate if (pp->p_szc == szc) { 26227c478bd9Sstevel@tonic-gate return (pp); 26237c478bd9Sstevel@tonic-gate } 26247c478bd9Sstevel@tonic-gate ASSERT(pp->p_szc < szc); 26257c478bd9Sstevel@tonic-gate npgs = page_get_pagecnt(pp->p_szc); 26267c478bd9Sstevel@tonic-gate hpp = pp; 26277c478bd9Sstevel@tonic-gate for (i = 0; i < npgs; i++, pp++) { 26287c478bd9Sstevel@tonic-gate pp->p_szc = szc; 26297c478bd9Sstevel@tonic-gate } 26307c478bd9Sstevel@tonic-gate page_list_concat(&pplist, &hpp); 26317c478bd9Sstevel@tonic-gate pgcnt -= npgs; 26327c478bd9Sstevel@tonic-gate continue; 26337c478bd9Sstevel@tonic-gate } 26347c478bd9Sstevel@tonic-gate ASSERT(!PP_ISAGED(pp)); 26357c478bd9Sstevel@tonic-gate ASSERT(pp->p_szc == 0); 26367c478bd9Sstevel@tonic-gate page_list_sub(pp, PG_CACHE_LIST); 26377c478bd9Sstevel@tonic-gate page_hashout(pp, NULL); 26387c478bd9Sstevel@tonic-gate PP_SETAGED(pp); 26397c478bd9Sstevel@tonic-gate pp->p_szc = szc; 26407c478bd9Sstevel@tonic-gate page_list_concat(&pplist, &pp); 26417c478bd9Sstevel@tonic-gate pp++; 26427c478bd9Sstevel@tonic-gate pgcnt--; 26437c478bd9Sstevel@tonic-gate continue; 26447c478bd9Sstevel@tonic-gate } 26457c478bd9Sstevel@tonic-gate npgs = page_get_pagecnt(pp->p_szc); 26467c478bd9Sstevel@tonic-gate 26477c478bd9Sstevel@tonic-gate /* 26487c478bd9Sstevel@tonic-gate * page_create_wait freemem accounting done by caller of 26497c478bd9Sstevel@tonic-gate * page_get_freelist and not necessary to call it prior to 26507c478bd9Sstevel@tonic-gate * calling page_get_replacement_page. 26517c478bd9Sstevel@tonic-gate * 26527c478bd9Sstevel@tonic-gate * page_get_replacement_page can call page_get_contig_pages 26537c478bd9Sstevel@tonic-gate * to acquire a large page (szc > 0); the replacement must be 26547c478bd9Sstevel@tonic-gate * smaller than the contig page size to avoid looping or 26557c478bd9Sstevel@tonic-gate * szc == 0 and PGI_PGCPSZC0 is set. 26567c478bd9Sstevel@tonic-gate */ 26577c478bd9Sstevel@tonic-gate if (pp->p_szc < szc || (szc == 0 && (flags & PGI_PGCPSZC0))) { 26587c478bd9Sstevel@tonic-gate replpp = page_get_replacement_page(pp, NULL, 0); 26597c478bd9Sstevel@tonic-gate if (replpp) { 26607c478bd9Sstevel@tonic-gate npgs = page_get_pagecnt(pp->p_szc); 26617c478bd9Sstevel@tonic-gate ASSERT(npgs <= pgcnt); 26627c478bd9Sstevel@tonic-gate targpp = pp; 26637c478bd9Sstevel@tonic-gate } 26647c478bd9Sstevel@tonic-gate } 26657c478bd9Sstevel@tonic-gate 26667c478bd9Sstevel@tonic-gate /* 26677c478bd9Sstevel@tonic-gate * If replacement is NULL or do_page_relocate fails, fail 26687c478bd9Sstevel@tonic-gate * coalescing of pages. 26697c478bd9Sstevel@tonic-gate */ 26707c478bd9Sstevel@tonic-gate if (replpp == NULL || (do_page_relocate(&targpp, &replpp, 0, 26717c478bd9Sstevel@tonic-gate &npgs, NULL) != 0)) { 26727c478bd9Sstevel@tonic-gate /* 26737c478bd9Sstevel@tonic-gate * Unlock un-processed target list 26747c478bd9Sstevel@tonic-gate */ 26757c478bd9Sstevel@tonic-gate while (pgcnt--) { 26767c478bd9Sstevel@tonic-gate ASSERT(PAGE_EXCL(pp)); 2677db874c57Selowe page_unlock_noretire(pp); 26787c478bd9Sstevel@tonic-gate pp++; 26797c478bd9Sstevel@tonic-gate } 26807c478bd9Sstevel@tonic-gate /* 26817c478bd9Sstevel@tonic-gate * Free the processed target list. 26827c478bd9Sstevel@tonic-gate */ 26837c478bd9Sstevel@tonic-gate while (pplist) { 26847c478bd9Sstevel@tonic-gate pp = pplist; 26857c478bd9Sstevel@tonic-gate page_sub(&pplist, pp); 26867c478bd9Sstevel@tonic-gate ASSERT(PAGE_EXCL(pp)); 26877c478bd9Sstevel@tonic-gate ASSERT(pp->p_szc == szc); 26887c478bd9Sstevel@tonic-gate ASSERT(PP_ISFREE(pp)); 26897c478bd9Sstevel@tonic-gate ASSERT(PP_ISAGED(pp)); 26907c478bd9Sstevel@tonic-gate pp->p_szc = 0; 26917c478bd9Sstevel@tonic-gate page_list_add(pp, PG_FREE_LIST | PG_LIST_TAIL); 2692db874c57Selowe page_unlock_noretire(pp); 26937c478bd9Sstevel@tonic-gate } 26947c478bd9Sstevel@tonic-gate 26957c478bd9Sstevel@tonic-gate if (replpp != NULL) 26967c478bd9Sstevel@tonic-gate page_free_replacement_page(replpp); 26977c478bd9Sstevel@tonic-gate 26987c478bd9Sstevel@tonic-gate return (NULL); 26997c478bd9Sstevel@tonic-gate } 27007c478bd9Sstevel@tonic-gate ASSERT(pp == targpp); 27017c478bd9Sstevel@tonic-gate 27027c478bd9Sstevel@tonic-gate /* LINTED */ 27037c478bd9Sstevel@tonic-gate ASSERT(hpp = pp); /* That's right, it's an assignment */ 27047c478bd9Sstevel@tonic-gate 27057c478bd9Sstevel@tonic-gate pp += npgs; 27067c478bd9Sstevel@tonic-gate pgcnt -= npgs; 27077c478bd9Sstevel@tonic-gate 27087c478bd9Sstevel@tonic-gate while (npgs--) { 27097c478bd9Sstevel@tonic-gate ASSERT(PAGE_EXCL(targpp)); 27107c478bd9Sstevel@tonic-gate ASSERT(!PP_ISFREE(targpp)); 27117c478bd9Sstevel@tonic-gate ASSERT(!PP_ISNORELOC(targpp)); 27127c478bd9Sstevel@tonic-gate PP_SETFREE(targpp); 27137c478bd9Sstevel@tonic-gate ASSERT(PP_ISAGED(targpp)); 27147c478bd9Sstevel@tonic-gate ASSERT(targpp->p_szc < szc || (szc == 0 && 27157c478bd9Sstevel@tonic-gate (flags & PGI_PGCPSZC0))); 27167c478bd9Sstevel@tonic-gate targpp->p_szc = szc; 27177c478bd9Sstevel@tonic-gate targpp = targpp->p_next; 27187c478bd9Sstevel@tonic-gate 27197c478bd9Sstevel@tonic-gate rpp = replpp; 27207c478bd9Sstevel@tonic-gate ASSERT(rpp != NULL); 27217c478bd9Sstevel@tonic-gate page_sub(&replpp, rpp); 27227c478bd9Sstevel@tonic-gate ASSERT(PAGE_EXCL(rpp)); 27237c478bd9Sstevel@tonic-gate ASSERT(!PP_ISFREE(rpp)); 2724db874c57Selowe page_unlock_noretire(rpp); 27257c478bd9Sstevel@tonic-gate } 27267c478bd9Sstevel@tonic-gate ASSERT(targpp == hpp); 27277c478bd9Sstevel@tonic-gate ASSERT(replpp == NULL); 27287c478bd9Sstevel@tonic-gate page_list_concat(&pplist, &targpp); 27297c478bd9Sstevel@tonic-gate } 27307c478bd9Sstevel@tonic-gate CHK_LPG(pplist, szc); 27317c478bd9Sstevel@tonic-gate return (pplist); 27327c478bd9Sstevel@tonic-gate } 27337c478bd9Sstevel@tonic-gate 27347c478bd9Sstevel@tonic-gate /* 27357c478bd9Sstevel@tonic-gate * Trim kernel cage from pfnlo-pfnhi and store result in lo-hi. Return code 27367c478bd9Sstevel@tonic-gate * of 0 means nothing left after trim. 27377c478bd9Sstevel@tonic-gate */ 27387c478bd9Sstevel@tonic-gate 27397c478bd9Sstevel@tonic-gate int 27407c478bd9Sstevel@tonic-gate trimkcage(struct memseg *mseg, pfn_t *lo, pfn_t *hi, pfn_t pfnlo, pfn_t pfnhi) 27417c478bd9Sstevel@tonic-gate { 27427c478bd9Sstevel@tonic-gate pfn_t kcagepfn; 27437c478bd9Sstevel@tonic-gate int decr; 27447c478bd9Sstevel@tonic-gate int rc = 0; 27457c478bd9Sstevel@tonic-gate 27467c478bd9Sstevel@tonic-gate if (PP_ISNORELOC(mseg->pages)) { 27477c478bd9Sstevel@tonic-gate if (PP_ISNORELOC(mseg->epages - 1) == 0) { 27487c478bd9Sstevel@tonic-gate 27497c478bd9Sstevel@tonic-gate /* lower part of this mseg inside kernel cage */ 27507c478bd9Sstevel@tonic-gate decr = kcage_current_pfn(&kcagepfn); 27517c478bd9Sstevel@tonic-gate 27527c478bd9Sstevel@tonic-gate /* kernel cage may have transitioned past mseg */ 27537c478bd9Sstevel@tonic-gate if (kcagepfn >= mseg->pages_base && 27547c478bd9Sstevel@tonic-gate kcagepfn < mseg->pages_end) { 27557c478bd9Sstevel@tonic-gate ASSERT(decr == 0); 27567c478bd9Sstevel@tonic-gate *lo = kcagepfn; 27577c478bd9Sstevel@tonic-gate *hi = MIN(pfnhi, 27587c478bd9Sstevel@tonic-gate (mseg->pages_end - 1)); 27597c478bd9Sstevel@tonic-gate rc = 1; 27607c478bd9Sstevel@tonic-gate } 27617c478bd9Sstevel@tonic-gate } 27627c478bd9Sstevel@tonic-gate /* else entire mseg in the cage */ 27637c478bd9Sstevel@tonic-gate } else { 27647c478bd9Sstevel@tonic-gate if (PP_ISNORELOC(mseg->epages - 1)) { 27657c478bd9Sstevel@tonic-gate 27667c478bd9Sstevel@tonic-gate /* upper part of this mseg inside kernel cage */ 27677c478bd9Sstevel@tonic-gate decr = kcage_current_pfn(&kcagepfn); 27687c478bd9Sstevel@tonic-gate 27697c478bd9Sstevel@tonic-gate /* kernel cage may have transitioned past mseg */ 27707c478bd9Sstevel@tonic-gate if (kcagepfn >= mseg->pages_base && 27717c478bd9Sstevel@tonic-gate kcagepfn < mseg->pages_end) { 27727c478bd9Sstevel@tonic-gate ASSERT(decr); 27737c478bd9Sstevel@tonic-gate *hi = kcagepfn; 27747c478bd9Sstevel@tonic-gate *lo = MAX(pfnlo, mseg->pages_base); 27757c478bd9Sstevel@tonic-gate rc = 1; 27767c478bd9Sstevel@tonic-gate } 27777c478bd9Sstevel@tonic-gate } else { 27787c478bd9Sstevel@tonic-gate /* entire mseg outside of kernel cage */ 27797c478bd9Sstevel@tonic-gate *lo = MAX(pfnlo, mseg->pages_base); 27807c478bd9Sstevel@tonic-gate *hi = MIN(pfnhi, (mseg->pages_end - 1)); 27817c478bd9Sstevel@tonic-gate rc = 1; 27827c478bd9Sstevel@tonic-gate } 27837c478bd9Sstevel@tonic-gate } 27847c478bd9Sstevel@tonic-gate return (rc); 27857c478bd9Sstevel@tonic-gate } 27867c478bd9Sstevel@tonic-gate 27877c478bd9Sstevel@tonic-gate /* 27887c478bd9Sstevel@tonic-gate * called from page_get_contig_pages to search 'pfnlo' thru 'pfnhi' to "claim" a 27897c478bd9Sstevel@tonic-gate * page with size code 'szc'. Claiming such a page requires acquiring 27907c478bd9Sstevel@tonic-gate * exclusive locks on all constituent pages (page_trylock_contig_pages), 27917c478bd9Sstevel@tonic-gate * relocating pages in use and concatenating these constituent pages into a 27927c478bd9Sstevel@tonic-gate * large page. 27937c478bd9Sstevel@tonic-gate * 27947c478bd9Sstevel@tonic-gate * The page lists do not have such a large page and page_freelist_fill has 27957c478bd9Sstevel@tonic-gate * already failed to demote larger pages and/or coalesce smaller free pages. 27967c478bd9Sstevel@tonic-gate * 27977c478bd9Sstevel@tonic-gate * 'flags' may specify PG_COLOR_MATCH which would limit the search of large 27987c478bd9Sstevel@tonic-gate * pages with the same color as 'bin'. 27997c478bd9Sstevel@tonic-gate * 28007c478bd9Sstevel@tonic-gate * 'pfnflag' specifies the subset of the pfn range to search. 28017c478bd9Sstevel@tonic-gate */ 28027c478bd9Sstevel@tonic-gate 28037c478bd9Sstevel@tonic-gate 28047c478bd9Sstevel@tonic-gate static page_t * 28057c478bd9Sstevel@tonic-gate page_geti_contig_pages(int mnode, uint_t bin, uchar_t szc, int flags, 280683f9b804Skchow pfn_t pfnlo, pfn_t pfnhi, pgcnt_t pfnflag) 28077c478bd9Sstevel@tonic-gate { 28087c478bd9Sstevel@tonic-gate struct memseg *mseg; 28097c478bd9Sstevel@tonic-gate pgcnt_t szcpgcnt = page_get_pagecnt(szc); 28107c478bd9Sstevel@tonic-gate pgcnt_t szcpgmask = szcpgcnt - 1; 28117c478bd9Sstevel@tonic-gate pfn_t randpfn; 28127c478bd9Sstevel@tonic-gate page_t *pp, *randpp, *endpp; 28137c478bd9Sstevel@tonic-gate uint_t colors; 28147c478bd9Sstevel@tonic-gate pfn_t hi, lo; 28157c478bd9Sstevel@tonic-gate uint_t skip; 28167c478bd9Sstevel@tonic-gate 28177c478bd9Sstevel@tonic-gate ASSERT(szc != 0 || (flags & PGI_PGCPSZC0)); 28187c478bd9Sstevel@tonic-gate 28197c478bd9Sstevel@tonic-gate if ((pfnhi - pfnlo) + 1 < szcpgcnt) 28207c478bd9Sstevel@tonic-gate return (NULL); 28217c478bd9Sstevel@tonic-gate 28227c478bd9Sstevel@tonic-gate ASSERT(szc < mmu_page_sizes); 28237c478bd9Sstevel@tonic-gate 28247c478bd9Sstevel@tonic-gate colors = (szc) ? page_convert_color(0, szc, page_colors - 1) + 1 : 28257c478bd9Sstevel@tonic-gate page_colors; 28267c478bd9Sstevel@tonic-gate 28277c478bd9Sstevel@tonic-gate ASSERT(bin < colors); 28287c478bd9Sstevel@tonic-gate 28297c478bd9Sstevel@tonic-gate /* 28307c478bd9Sstevel@tonic-gate * trim the pfn range to search based on pfnflag. pfnflag is set 28317c478bd9Sstevel@tonic-gate * when there have been previous page_get_contig_page failures to 28327c478bd9Sstevel@tonic-gate * limit the search. 28337c478bd9Sstevel@tonic-gate * 28347c478bd9Sstevel@tonic-gate * The high bit in pfnflag specifies the number of 'slots' in the 28357c478bd9Sstevel@tonic-gate * pfn range and the remainder of pfnflag specifies which slot. 28367c478bd9Sstevel@tonic-gate * For example, a value of 1010b would mean the second slot of 28377c478bd9Sstevel@tonic-gate * the pfn range that has been divided into 8 slots. 28387c478bd9Sstevel@tonic-gate */ 28397c478bd9Sstevel@tonic-gate if (pfnflag > 1) { 28407c478bd9Sstevel@tonic-gate int slots = 1 << (highbit(pfnflag) - 1); 28417c478bd9Sstevel@tonic-gate int slotid = pfnflag & (slots - 1); 28427c478bd9Sstevel@tonic-gate pgcnt_t szcpages; 28437c478bd9Sstevel@tonic-gate int slotlen; 28447c478bd9Sstevel@tonic-gate 28457c478bd9Sstevel@tonic-gate pfnlo = P2ROUNDUP(pfnlo, szcpgcnt); 28467c478bd9Sstevel@tonic-gate pfnhi = pfnhi & ~(szcpgcnt - 1); 28477c478bd9Sstevel@tonic-gate 28487c478bd9Sstevel@tonic-gate szcpages = ((pfnhi - pfnlo) + 1) / szcpgcnt; 28497c478bd9Sstevel@tonic-gate slotlen = howmany(szcpages, slots); 28507c478bd9Sstevel@tonic-gate pfnlo = pfnlo + (((slotid * slotlen) % szcpages) * szcpgcnt); 28517c478bd9Sstevel@tonic-gate ASSERT(pfnlo < pfnhi); 28527c478bd9Sstevel@tonic-gate if (pfnhi > pfnlo + (slotlen * szcpgcnt)) 28537c478bd9Sstevel@tonic-gate pfnhi = pfnlo + (slotlen * szcpgcnt); 28547c478bd9Sstevel@tonic-gate } 28557c478bd9Sstevel@tonic-gate 28567c478bd9Sstevel@tonic-gate memsegs_lock(0); 28577c478bd9Sstevel@tonic-gate 28587c478bd9Sstevel@tonic-gate /* 28597c478bd9Sstevel@tonic-gate * loop through memsegs to look for contig page candidates 28607c478bd9Sstevel@tonic-gate */ 28617c478bd9Sstevel@tonic-gate 28627c478bd9Sstevel@tonic-gate for (mseg = memsegs; mseg != NULL; mseg = mseg->next) { 28637c478bd9Sstevel@tonic-gate if (pfnhi < mseg->pages_base || pfnlo >= mseg->pages_end) { 28647c478bd9Sstevel@tonic-gate /* no overlap */ 28657c478bd9Sstevel@tonic-gate continue; 28667c478bd9Sstevel@tonic-gate } 28677c478bd9Sstevel@tonic-gate 28687c478bd9Sstevel@tonic-gate if (mseg->pages_end - mseg->pages_base < szcpgcnt) 28697c478bd9Sstevel@tonic-gate /* mseg too small */ 28707c478bd9Sstevel@tonic-gate continue; 28717c478bd9Sstevel@tonic-gate 28727c478bd9Sstevel@tonic-gate /* trim off kernel cage pages from pfn range */ 28737c478bd9Sstevel@tonic-gate if (kcage_on) { 28747c478bd9Sstevel@tonic-gate if (trimkcage(mseg, &lo, &hi, pfnlo, pfnhi) == 0) 28757c478bd9Sstevel@tonic-gate continue; 28767c478bd9Sstevel@tonic-gate } else { 28777c478bd9Sstevel@tonic-gate lo = MAX(pfnlo, mseg->pages_base); 28787c478bd9Sstevel@tonic-gate hi = MIN(pfnhi, (mseg->pages_end - 1)); 28797c478bd9Sstevel@tonic-gate } 28807c478bd9Sstevel@tonic-gate 28817c478bd9Sstevel@tonic-gate /* round to szcpgcnt boundaries */ 28827c478bd9Sstevel@tonic-gate lo = P2ROUNDUP(lo, szcpgcnt); 28837c478bd9Sstevel@tonic-gate hi = hi & ~(szcpgcnt - 1); 28847c478bd9Sstevel@tonic-gate 28857c478bd9Sstevel@tonic-gate if (hi <= lo) 28867c478bd9Sstevel@tonic-gate continue; 28877c478bd9Sstevel@tonic-gate 28887c478bd9Sstevel@tonic-gate /* 28897c478bd9Sstevel@tonic-gate * set lo to point to the pfn for the desired bin. Large 28907c478bd9Sstevel@tonic-gate * page sizes may only have a single page color 28917c478bd9Sstevel@tonic-gate */ 28927c478bd9Sstevel@tonic-gate if ((colors > 1) && (flags & PG_MATCH_COLOR)) { 28937c478bd9Sstevel@tonic-gate uint_t lobin; 28947c478bd9Sstevel@tonic-gate 28957c478bd9Sstevel@tonic-gate /* 28967c478bd9Sstevel@tonic-gate * factor in colorequiv to check additional 28977c478bd9Sstevel@tonic-gate * 'equivalent' bins. 28987c478bd9Sstevel@tonic-gate */ 28997c478bd9Sstevel@tonic-gate if (colorequiv > 1 && colors > colorequiv) 29007c478bd9Sstevel@tonic-gate colors = colors / colorequiv; 29017c478bd9Sstevel@tonic-gate 29027c478bd9Sstevel@tonic-gate /* determine bin that lo currently points to */ 29037c478bd9Sstevel@tonic-gate lobin = (lo & ((szcpgcnt * colors) - 1)) / szcpgcnt; 29047c478bd9Sstevel@tonic-gate 29057c478bd9Sstevel@tonic-gate /* 29067c478bd9Sstevel@tonic-gate * set lo to point at appropriate color and set skip 29077c478bd9Sstevel@tonic-gate * to arrive at the next szc page of the same color. 29087c478bd9Sstevel@tonic-gate */ 29097c478bd9Sstevel@tonic-gate lo += ((bin - lobin) & (colors - 1)) * szcpgcnt; 29107c478bd9Sstevel@tonic-gate 29117c478bd9Sstevel@tonic-gate skip = colors * szcpgcnt; 29127c478bd9Sstevel@tonic-gate } else { 29137c478bd9Sstevel@tonic-gate /* check all pages starting from lo */ 29147c478bd9Sstevel@tonic-gate skip = szcpgcnt; 29157c478bd9Sstevel@tonic-gate } 29167c478bd9Sstevel@tonic-gate if (hi <= lo) 29177c478bd9Sstevel@tonic-gate /* mseg cannot satisfy color request */ 29187c478bd9Sstevel@tonic-gate continue; 29197c478bd9Sstevel@tonic-gate 29207c478bd9Sstevel@tonic-gate /* randomly choose a point between lo and hi to begin search */ 29217c478bd9Sstevel@tonic-gate 29227c478bd9Sstevel@tonic-gate randpfn = (pfn_t)GETTICK(); 29237c478bd9Sstevel@tonic-gate randpfn = ((randpfn % (hi - lo)) + lo) & ~(skip - 1); 29247c478bd9Sstevel@tonic-gate randpp = mseg->pages + (randpfn - mseg->pages_base); 29257c478bd9Sstevel@tonic-gate 29267c478bd9Sstevel@tonic-gate ASSERT(randpp->p_pagenum == randpfn); 29277c478bd9Sstevel@tonic-gate 29287c478bd9Sstevel@tonic-gate pp = randpp; 29297c478bd9Sstevel@tonic-gate endpp = mseg->pages + (hi - mseg->pages_base); 29307c478bd9Sstevel@tonic-gate 29317c478bd9Sstevel@tonic-gate ASSERT(randpp + szcpgcnt <= endpp); 29327c478bd9Sstevel@tonic-gate 29337c478bd9Sstevel@tonic-gate do { 29347c478bd9Sstevel@tonic-gate ASSERT(!(pp->p_pagenum & szcpgmask)); 29357c478bd9Sstevel@tonic-gate ASSERT((flags & PG_MATCH_COLOR) == 0 || 29367c478bd9Sstevel@tonic-gate colorequiv > 1 || 29377c478bd9Sstevel@tonic-gate PP_2_BIN(pp) == bin); 29387c478bd9Sstevel@tonic-gate if (page_trylock_contig_pages(mnode, pp, szc, flags)) { 29397c478bd9Sstevel@tonic-gate /* pages unlocked by page_claim on failure */ 29407c478bd9Sstevel@tonic-gate if (page_claim_contig_pages(pp, szc, flags)) { 29417c478bd9Sstevel@tonic-gate memsegs_unlock(0); 29427c478bd9Sstevel@tonic-gate return (pp); 29437c478bd9Sstevel@tonic-gate } 29447c478bd9Sstevel@tonic-gate } 29457c478bd9Sstevel@tonic-gate 29467c478bd9Sstevel@tonic-gate pp += skip; 29477c478bd9Sstevel@tonic-gate if (pp >= endpp) { 29487c478bd9Sstevel@tonic-gate /* start from the beginning */ 29497c478bd9Sstevel@tonic-gate pp = mseg->pages + (lo - mseg->pages_base); 29507c478bd9Sstevel@tonic-gate ASSERT(pp->p_pagenum == lo); 29517c478bd9Sstevel@tonic-gate ASSERT(pp + szcpgcnt <= endpp); 29527c478bd9Sstevel@tonic-gate } 29537c478bd9Sstevel@tonic-gate } while (pp != randpp); 29547c478bd9Sstevel@tonic-gate } 29557c478bd9Sstevel@tonic-gate memsegs_unlock(0); 29567c478bd9Sstevel@tonic-gate return (NULL); 29577c478bd9Sstevel@tonic-gate } 29587c478bd9Sstevel@tonic-gate 29597c478bd9Sstevel@tonic-gate 29607c478bd9Sstevel@tonic-gate /* 29617c478bd9Sstevel@tonic-gate * controlling routine that searches through physical memory in an attempt to 29627c478bd9Sstevel@tonic-gate * claim a large page based on the input parameters. 29637c478bd9Sstevel@tonic-gate * on the page free lists. 29647c478bd9Sstevel@tonic-gate * 29657c478bd9Sstevel@tonic-gate * calls page_geti_contig_pages with an initial pfn range from the mnode 29667c478bd9Sstevel@tonic-gate * and mtype. page_geti_contig_pages will trim off the parts of the pfn range 29677c478bd9Sstevel@tonic-gate * that overlaps with the kernel cage or does not match the requested page 29687c478bd9Sstevel@tonic-gate * color if PG_MATCH_COLOR is set. Since this search is very expensive, 29697c478bd9Sstevel@tonic-gate * page_geti_contig_pages may further limit the search range based on 29707c478bd9Sstevel@tonic-gate * previous failure counts (pgcpfailcnt[]). 29717c478bd9Sstevel@tonic-gate * 29727c478bd9Sstevel@tonic-gate * for PGI_PGCPSZC0 requests, page_get_contig_pages will relocate a base 29737c478bd9Sstevel@tonic-gate * pagesize page that satisfies mtype. 29747c478bd9Sstevel@tonic-gate */ 29757c478bd9Sstevel@tonic-gate page_t * 29767c478bd9Sstevel@tonic-gate page_get_contig_pages(int mnode, uint_t bin, int mtype, uchar_t szc, 29777c478bd9Sstevel@tonic-gate uint_t flags) 29787c478bd9Sstevel@tonic-gate { 29797c478bd9Sstevel@tonic-gate pfn_t pfnlo, pfnhi; /* contig pages pfn range */ 29807c478bd9Sstevel@tonic-gate page_t *pp; 298183f9b804Skchow pgcnt_t pfnflag = 0; /* no limit on search if 0 */ 29827c478bd9Sstevel@tonic-gate 29837c478bd9Sstevel@tonic-gate VM_STAT_ADD(vmm_vmstats.pgcp_alloc[szc]); 29847c478bd9Sstevel@tonic-gate 2985*0b5aa17bSmec /* no allocations from cage */ 2986*0b5aa17bSmec flags |= PGI_NOCAGE; 2987*0b5aa17bSmec 29887c478bd9Sstevel@tonic-gate /* LINTED */ 29897c478bd9Sstevel@tonic-gate MTYPE_START(mnode, mtype, flags); 29907c478bd9Sstevel@tonic-gate if (mtype < 0) { /* mnode does not have memory in mtype range */ 29917c478bd9Sstevel@tonic-gate VM_STAT_ADD(vmm_vmstats.pgcp_allocempty[szc]); 29927c478bd9Sstevel@tonic-gate return (NULL); 29937c478bd9Sstevel@tonic-gate } 29947c478bd9Sstevel@tonic-gate 29957c478bd9Sstevel@tonic-gate ASSERT(szc > 0 || (flags & PGI_PGCPSZC0)); 29967c478bd9Sstevel@tonic-gate 29977c478bd9Sstevel@tonic-gate /* do not limit search and ignore color if hi pri */ 29987c478bd9Sstevel@tonic-gate 29997c478bd9Sstevel@tonic-gate if (pgcplimitsearch && ((flags & PGI_PGCPHIPRI) == 0)) 30007c478bd9Sstevel@tonic-gate pfnflag = pgcpfailcnt[szc]; 30017c478bd9Sstevel@tonic-gate 30027c478bd9Sstevel@tonic-gate /* remove color match to improve chances */ 30037c478bd9Sstevel@tonic-gate 30047c478bd9Sstevel@tonic-gate if (flags & PGI_PGCPHIPRI || pfnflag) 30057c478bd9Sstevel@tonic-gate flags &= ~PG_MATCH_COLOR; 30067c478bd9Sstevel@tonic-gate 30077c478bd9Sstevel@tonic-gate do { 30087c478bd9Sstevel@tonic-gate /* get pfn range based on mnode and mtype */ 30097c478bd9Sstevel@tonic-gate MNODETYPE_2_PFN(mnode, mtype, pfnlo, pfnhi); 30107c478bd9Sstevel@tonic-gate 30117c478bd9Sstevel@tonic-gate ASSERT(pfnhi >= pfnlo); 30127c478bd9Sstevel@tonic-gate 30137c478bd9Sstevel@tonic-gate pp = page_geti_contig_pages(mnode, bin, szc, flags, 30147c478bd9Sstevel@tonic-gate pfnlo, pfnhi, pfnflag); 30157c478bd9Sstevel@tonic-gate 30167c478bd9Sstevel@tonic-gate if (pp != NULL) { 30177c478bd9Sstevel@tonic-gate pfnflag = pgcpfailcnt[szc]; 30187c478bd9Sstevel@tonic-gate if (pfnflag) { 30197c478bd9Sstevel@tonic-gate /* double the search size */ 30207c478bd9Sstevel@tonic-gate pgcpfailcnt[szc] = pfnflag >> 1; 30217c478bd9Sstevel@tonic-gate } 30227c478bd9Sstevel@tonic-gate VM_STAT_ADD(vmm_vmstats.pgcp_allocok[szc]); 30237c478bd9Sstevel@tonic-gate return (pp); 30247c478bd9Sstevel@tonic-gate } 3025affbd3ccSkchow MTYPE_NEXT(mnode, mtype, flags); 3026affbd3ccSkchow } while (mtype >= 0); 30277c478bd9Sstevel@tonic-gate 30287c478bd9Sstevel@tonic-gate VM_STAT_ADD(vmm_vmstats.pgcp_allocfailed[szc]); 30297c478bd9Sstevel@tonic-gate return (NULL); 30307c478bd9Sstevel@tonic-gate } 30317c478bd9Sstevel@tonic-gate 30327c478bd9Sstevel@tonic-gate 30337c478bd9Sstevel@tonic-gate /* 30347c478bd9Sstevel@tonic-gate * Find the `best' page on the freelist for this (vp,off) (as,vaddr) pair. 30357c478bd9Sstevel@tonic-gate * 30367c478bd9Sstevel@tonic-gate * Does its own locking and accounting. 30377c478bd9Sstevel@tonic-gate * If PG_MATCH_COLOR is set, then NULL will be returned if there are no 30387c478bd9Sstevel@tonic-gate * pages of the proper color even if there are pages of a different color. 30397c478bd9Sstevel@tonic-gate * 30407c478bd9Sstevel@tonic-gate * Finds a page, removes it, THEN locks it. 30417c478bd9Sstevel@tonic-gate */ 30427c478bd9Sstevel@tonic-gate 30437c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 30447c478bd9Sstevel@tonic-gate page_t * 30457c478bd9Sstevel@tonic-gate page_get_freelist(struct vnode *vp, u_offset_t off, struct seg *seg, 30467c478bd9Sstevel@tonic-gate caddr_t vaddr, size_t size, uint_t flags, struct lgrp *lgrp) 30477c478bd9Sstevel@tonic-gate { 30487c478bd9Sstevel@tonic-gate struct as *as = seg->s_as; 30497c478bd9Sstevel@tonic-gate page_t *pp = NULL; 30507c478bd9Sstevel@tonic-gate ulong_t bin; 30517c478bd9Sstevel@tonic-gate uchar_t szc; 30527c478bd9Sstevel@tonic-gate int mnode; 30537c478bd9Sstevel@tonic-gate int mtype; 30547c478bd9Sstevel@tonic-gate page_t *(*page_get_func)(int, uint_t, int, uchar_t, uint_t); 30557c478bd9Sstevel@tonic-gate lgrp_mnode_cookie_t lgrp_cookie; 30567c478bd9Sstevel@tonic-gate 30577c478bd9Sstevel@tonic-gate page_get_func = page_get_mnode_freelist; 30587c478bd9Sstevel@tonic-gate 30597c478bd9Sstevel@tonic-gate /* 30607c478bd9Sstevel@tonic-gate * If we aren't passed a specific lgroup, or passed a freed lgrp 30617c478bd9Sstevel@tonic-gate * assume we wish to allocate near to the current thread's home. 30627c478bd9Sstevel@tonic-gate */ 30637c478bd9Sstevel@tonic-gate if (!LGRP_EXISTS(lgrp)) 30647c478bd9Sstevel@tonic-gate lgrp = lgrp_home_lgrp(); 30657c478bd9Sstevel@tonic-gate 30667c478bd9Sstevel@tonic-gate if (kcage_on) { 30677c478bd9Sstevel@tonic-gate if ((flags & (PG_NORELOC | PG_PANIC)) == PG_NORELOC && 30687c478bd9Sstevel@tonic-gate kcage_freemem < kcage_throttlefree + btop(size) && 30697c478bd9Sstevel@tonic-gate curthread != kcage_cageout_thread) { 30707c478bd9Sstevel@tonic-gate /* 30717c478bd9Sstevel@tonic-gate * Set a "reserve" of kcage_throttlefree pages for 30727c478bd9Sstevel@tonic-gate * PG_PANIC and cageout thread allocations. 30737c478bd9Sstevel@tonic-gate * 30747c478bd9Sstevel@tonic-gate * Everybody else has to serialize in 30757c478bd9Sstevel@tonic-gate * page_create_get_something() to get a cage page, so 30767c478bd9Sstevel@tonic-gate * that we don't deadlock cageout! 30777c478bd9Sstevel@tonic-gate */ 30787c478bd9Sstevel@tonic-gate return (NULL); 30797c478bd9Sstevel@tonic-gate } 30807c478bd9Sstevel@tonic-gate } else { 30817c478bd9Sstevel@tonic-gate flags &= ~PG_NORELOC; 30827c478bd9Sstevel@tonic-gate flags |= PGI_NOCAGE; 30837c478bd9Sstevel@tonic-gate } 30847c478bd9Sstevel@tonic-gate 30857c478bd9Sstevel@tonic-gate /* LINTED */ 308607ad560dSkchow MTYPE_INIT(mtype, vp, vaddr, flags, size); 30877c478bd9Sstevel@tonic-gate 30887c478bd9Sstevel@tonic-gate /* 30897c478bd9Sstevel@tonic-gate * Convert size to page size code. 30907c478bd9Sstevel@tonic-gate */ 30917c478bd9Sstevel@tonic-gate if ((szc = page_szc(size)) == (uchar_t)-1) 30927c478bd9Sstevel@tonic-gate panic("page_get_freelist: illegal page size request"); 30937c478bd9Sstevel@tonic-gate ASSERT(szc < mmu_page_sizes); 30947c478bd9Sstevel@tonic-gate 30957c478bd9Sstevel@tonic-gate VM_STAT_ADD(vmm_vmstats.pgf_alloc[szc]); 30967c478bd9Sstevel@tonic-gate 30977c478bd9Sstevel@tonic-gate /* LINTED */ 30987c478bd9Sstevel@tonic-gate AS_2_BIN(as, seg, vp, vaddr, bin); 30997c478bd9Sstevel@tonic-gate 31007c478bd9Sstevel@tonic-gate /* bin is for base pagesize color - convert if larger pagesize. */ 31017c478bd9Sstevel@tonic-gate if (szc) 31027c478bd9Sstevel@tonic-gate bin = page_convert_color(0, szc, bin); 31037c478bd9Sstevel@tonic-gate 31047c478bd9Sstevel@tonic-gate /* 31057c478bd9Sstevel@tonic-gate * Try to get a local page first, but try remote if we can't 31067c478bd9Sstevel@tonic-gate * get a page of the right color. 31077c478bd9Sstevel@tonic-gate */ 31087c478bd9Sstevel@tonic-gate pgretry: 31097c478bd9Sstevel@tonic-gate LGRP_MNODE_COOKIE_INIT(lgrp_cookie, lgrp, LGRP_SRCH_LOCAL); 31107c478bd9Sstevel@tonic-gate while ((mnode = lgrp_memnode_choose(&lgrp_cookie)) >= 0) { 31117c478bd9Sstevel@tonic-gate pp = page_get_func(mnode, bin, mtype, szc, flags); 31127c478bd9Sstevel@tonic-gate if (pp != NULL) { 31137c478bd9Sstevel@tonic-gate VM_STAT_ADD(vmm_vmstats.pgf_allocok[szc]); 31147c478bd9Sstevel@tonic-gate DTRACE_PROBE4(page__get, 31157c478bd9Sstevel@tonic-gate lgrp_t *, lgrp, 31167c478bd9Sstevel@tonic-gate int, mnode, 31177c478bd9Sstevel@tonic-gate ulong_t, bin, 31187c478bd9Sstevel@tonic-gate uint_t, flags); 31197c478bd9Sstevel@tonic-gate return (pp); 31207c478bd9Sstevel@tonic-gate } 31217c478bd9Sstevel@tonic-gate } 31227c478bd9Sstevel@tonic-gate ASSERT(pp == NULL); 31237c478bd9Sstevel@tonic-gate 31247c478bd9Sstevel@tonic-gate /* 31257c478bd9Sstevel@tonic-gate * for non-SZC0 PAGESIZE requests, check cachelist before checking 31267c478bd9Sstevel@tonic-gate * remote free lists. Caller expected to call page_get_cachelist which 31277c478bd9Sstevel@tonic-gate * will check local cache lists and remote free lists. 31287c478bd9Sstevel@tonic-gate */ 31297c478bd9Sstevel@tonic-gate if (szc == 0 && ((flags & PGI_PGCPSZC0) == 0)) { 31307c478bd9Sstevel@tonic-gate VM_STAT_ADD(vmm_vmstats.pgf_allocdeferred); 31317c478bd9Sstevel@tonic-gate return (NULL); 31327c478bd9Sstevel@tonic-gate } 31337c478bd9Sstevel@tonic-gate 31347c478bd9Sstevel@tonic-gate ASSERT(szc > 0 || (flags & PGI_PGCPSZC0)); 31357c478bd9Sstevel@tonic-gate 31367c478bd9Sstevel@tonic-gate lgrp_stat_add(lgrp->lgrp_id, LGRP_NUM_ALLOC_FAIL, 1); 31377c478bd9Sstevel@tonic-gate 31387c478bd9Sstevel@tonic-gate /* 31397c478bd9Sstevel@tonic-gate * Try to get a non-local freelist page. 31407c478bd9Sstevel@tonic-gate */ 31417c478bd9Sstevel@tonic-gate LGRP_MNODE_COOKIE_UPGRADE(lgrp_cookie); 31427c478bd9Sstevel@tonic-gate while ((mnode = lgrp_memnode_choose(&lgrp_cookie)) >= 0) { 31437c478bd9Sstevel@tonic-gate pp = page_get_func(mnode, bin, mtype, szc, flags); 31447c478bd9Sstevel@tonic-gate if (pp != NULL) { 31457c478bd9Sstevel@tonic-gate DTRACE_PROBE4(page__get, 31467c478bd9Sstevel@tonic-gate lgrp_t *, lgrp, 31477c478bd9Sstevel@tonic-gate int, mnode, 31487c478bd9Sstevel@tonic-gate ulong_t, bin, 31497c478bd9Sstevel@tonic-gate uint_t, flags); 31507c478bd9Sstevel@tonic-gate VM_STAT_ADD(vmm_vmstats.pgf_allocokrem[szc]); 31517c478bd9Sstevel@tonic-gate return (pp); 31527c478bd9Sstevel@tonic-gate } 31537c478bd9Sstevel@tonic-gate } 31547c478bd9Sstevel@tonic-gate 31557c478bd9Sstevel@tonic-gate ASSERT(pp == NULL); 31567c478bd9Sstevel@tonic-gate 31577c478bd9Sstevel@tonic-gate /* 31587c478bd9Sstevel@tonic-gate * when the cage is off chances are page_get_contig_pages() will fail 31597c478bd9Sstevel@tonic-gate * to lock a large page chunk therefore when the cage is off it's not 31607c478bd9Sstevel@tonic-gate * called by default. this can be changed via /etc/system. 31617c478bd9Sstevel@tonic-gate * 31627c478bd9Sstevel@tonic-gate * page_get_contig_pages() also called to acquire a base pagesize page 31637c478bd9Sstevel@tonic-gate * for page_create_get_something(). 31647c478bd9Sstevel@tonic-gate */ 31657c478bd9Sstevel@tonic-gate if (!(flags & PG_NORELOC) && (pg_contig_disable == 0) && 31667c478bd9Sstevel@tonic-gate (kcage_on || pg_lpgcreate_nocage || szc == 0) && 31677c478bd9Sstevel@tonic-gate (page_get_func != page_get_contig_pages)) { 31687c478bd9Sstevel@tonic-gate 31697c478bd9Sstevel@tonic-gate VM_STAT_ADD(vmm_vmstats.pgf_allocretry[szc]); 31707c478bd9Sstevel@tonic-gate page_get_func = page_get_contig_pages; 31717c478bd9Sstevel@tonic-gate goto pgretry; 31727c478bd9Sstevel@tonic-gate } 31737c478bd9Sstevel@tonic-gate 31747c478bd9Sstevel@tonic-gate if (pgcplimitsearch && page_get_func == page_get_contig_pages) 317583f9b804Skchow SETPGCPFAILCNT(szc); 31767c478bd9Sstevel@tonic-gate 31777c478bd9Sstevel@tonic-gate VM_STAT_ADD(vmm_vmstats.pgf_allocfailed[szc]); 31787c478bd9Sstevel@tonic-gate return (NULL); 31797c478bd9Sstevel@tonic-gate } 31807c478bd9Sstevel@tonic-gate 31817c478bd9Sstevel@tonic-gate /* 31827c478bd9Sstevel@tonic-gate * Find the `best' page on the cachelist for this (vp,off) (as,vaddr) pair. 31837c478bd9Sstevel@tonic-gate * 31847c478bd9Sstevel@tonic-gate * Does its own locking. 31857c478bd9Sstevel@tonic-gate * If PG_MATCH_COLOR is set, then NULL will be returned if there are no 31867c478bd9Sstevel@tonic-gate * pages of the proper color even if there are pages of a different color. 31877c478bd9Sstevel@tonic-gate * Otherwise, scan the bins for ones with pages. For each bin with pages, 31887c478bd9Sstevel@tonic-gate * try to lock one of them. If no page can be locked, try the 31897c478bd9Sstevel@tonic-gate * next bin. Return NULL if a page can not be found and locked. 31907c478bd9Sstevel@tonic-gate * 31917c478bd9Sstevel@tonic-gate * Finds a pages, trys to lock it, then removes it. 31927c478bd9Sstevel@tonic-gate */ 31937c478bd9Sstevel@tonic-gate 31947c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 31957c478bd9Sstevel@tonic-gate page_t * 31967c478bd9Sstevel@tonic-gate page_get_cachelist(struct vnode *vp, u_offset_t off, struct seg *seg, 31977c478bd9Sstevel@tonic-gate caddr_t vaddr, uint_t flags, struct lgrp *lgrp) 31987c478bd9Sstevel@tonic-gate { 31997c478bd9Sstevel@tonic-gate page_t *pp; 32007c478bd9Sstevel@tonic-gate struct as *as = seg->s_as; 32017c478bd9Sstevel@tonic-gate ulong_t bin; 32027c478bd9Sstevel@tonic-gate /*LINTED*/ 32037c478bd9Sstevel@tonic-gate int mnode; 32047c478bd9Sstevel@tonic-gate int mtype; 32057c478bd9Sstevel@tonic-gate lgrp_mnode_cookie_t lgrp_cookie; 32067c478bd9Sstevel@tonic-gate 32077c478bd9Sstevel@tonic-gate /* 32087c478bd9Sstevel@tonic-gate * If we aren't passed a specific lgroup, or pasased a freed lgrp 32097c478bd9Sstevel@tonic-gate * assume we wish to allocate near to the current thread's home. 32107c478bd9Sstevel@tonic-gate */ 32117c478bd9Sstevel@tonic-gate if (!LGRP_EXISTS(lgrp)) 32127c478bd9Sstevel@tonic-gate lgrp = lgrp_home_lgrp(); 32137c478bd9Sstevel@tonic-gate 32147c478bd9Sstevel@tonic-gate if (!kcage_on) { 32157c478bd9Sstevel@tonic-gate flags &= ~PG_NORELOC; 32167c478bd9Sstevel@tonic-gate flags |= PGI_NOCAGE; 32177c478bd9Sstevel@tonic-gate } 32187c478bd9Sstevel@tonic-gate 32197c478bd9Sstevel@tonic-gate if ((flags & (PG_NORELOC | PG_PANIC | PG_PUSHPAGE)) == PG_NORELOC && 32207c478bd9Sstevel@tonic-gate kcage_freemem <= kcage_throttlefree) { 32217c478bd9Sstevel@tonic-gate /* 32227c478bd9Sstevel@tonic-gate * Reserve kcage_throttlefree pages for critical kernel 32237c478bd9Sstevel@tonic-gate * threads. 32247c478bd9Sstevel@tonic-gate * 32257c478bd9Sstevel@tonic-gate * Everybody else has to go to page_create_get_something() 32267c478bd9Sstevel@tonic-gate * to get a cage page, so we don't deadlock cageout. 32277c478bd9Sstevel@tonic-gate */ 32287c478bd9Sstevel@tonic-gate return (NULL); 32297c478bd9Sstevel@tonic-gate } 32307c478bd9Sstevel@tonic-gate 32317c478bd9Sstevel@tonic-gate /* LINTED */ 32327c478bd9Sstevel@tonic-gate AS_2_BIN(as, seg, vp, vaddr, bin); 32337c478bd9Sstevel@tonic-gate 32347c478bd9Sstevel@tonic-gate ASSERT(bin <= page_colors_mask); 32357c478bd9Sstevel@tonic-gate 32367c478bd9Sstevel@tonic-gate /* LINTED */ 323707ad560dSkchow MTYPE_INIT(mtype, vp, vaddr, flags, MMU_PAGESIZE); 32387c478bd9Sstevel@tonic-gate 32397c478bd9Sstevel@tonic-gate VM_STAT_ADD(vmm_vmstats.pgc_alloc); 32407c478bd9Sstevel@tonic-gate 32417c478bd9Sstevel@tonic-gate /* 32427c478bd9Sstevel@tonic-gate * Try local cachelists first 32437c478bd9Sstevel@tonic-gate */ 32447c478bd9Sstevel@tonic-gate LGRP_MNODE_COOKIE_INIT(lgrp_cookie, lgrp, LGRP_SRCH_LOCAL); 32457c478bd9Sstevel@tonic-gate while ((mnode = lgrp_memnode_choose(&lgrp_cookie)) >= 0) { 32467c478bd9Sstevel@tonic-gate pp = page_get_mnode_cachelist(bin, flags, mnode, mtype); 32477c478bd9Sstevel@tonic-gate if (pp != NULL) { 32487c478bd9Sstevel@tonic-gate VM_STAT_ADD(vmm_vmstats.pgc_allocok); 32497c478bd9Sstevel@tonic-gate DTRACE_PROBE4(page__get, 32507c478bd9Sstevel@tonic-gate lgrp_t *, lgrp, 32517c478bd9Sstevel@tonic-gate int, mnode, 32527c478bd9Sstevel@tonic-gate ulong_t, bin, 32537c478bd9Sstevel@tonic-gate uint_t, flags); 32547c478bd9Sstevel@tonic-gate return (pp); 32557c478bd9Sstevel@tonic-gate } 32567c478bd9Sstevel@tonic-gate } 32577c478bd9Sstevel@tonic-gate 32587c478bd9Sstevel@tonic-gate lgrp_stat_add(lgrp->lgrp_id, LGRP_NUM_ALLOC_FAIL, 1); 32597c478bd9Sstevel@tonic-gate 32607c478bd9Sstevel@tonic-gate /* 32617c478bd9Sstevel@tonic-gate * Try freelists/cachelists that are farther away 32627c478bd9Sstevel@tonic-gate * This is our only chance to allocate remote pages for PAGESIZE 32637c478bd9Sstevel@tonic-gate * requests. 32647c478bd9Sstevel@tonic-gate */ 32657c478bd9Sstevel@tonic-gate LGRP_MNODE_COOKIE_UPGRADE(lgrp_cookie); 32667c478bd9Sstevel@tonic-gate while ((mnode = lgrp_memnode_choose(&lgrp_cookie)) >= 0) { 32677c478bd9Sstevel@tonic-gate pp = page_get_mnode_freelist(mnode, bin, mtype, 32687c478bd9Sstevel@tonic-gate 0, flags); 32697c478bd9Sstevel@tonic-gate if (pp != NULL) { 32707c478bd9Sstevel@tonic-gate VM_STAT_ADD(vmm_vmstats.pgc_allocokdeferred); 32717c478bd9Sstevel@tonic-gate DTRACE_PROBE4(page__get, 32727c478bd9Sstevel@tonic-gate lgrp_t *, lgrp, 32737c478bd9Sstevel@tonic-gate int, mnode, 32747c478bd9Sstevel@tonic-gate ulong_t, bin, 32757c478bd9Sstevel@tonic-gate uint_t, flags); 32767c478bd9Sstevel@tonic-gate return (pp); 32777c478bd9Sstevel@tonic-gate } 32787c478bd9Sstevel@tonic-gate pp = page_get_mnode_cachelist(bin, flags, mnode, mtype); 32797c478bd9Sstevel@tonic-gate if (pp != NULL) { 32807c478bd9Sstevel@tonic-gate VM_STAT_ADD(vmm_vmstats.pgc_allocokrem); 32817c478bd9Sstevel@tonic-gate DTRACE_PROBE4(page__get, 32827c478bd9Sstevel@tonic-gate lgrp_t *, lgrp, 32837c478bd9Sstevel@tonic-gate int, mnode, 32847c478bd9Sstevel@tonic-gate ulong_t, bin, 32857c478bd9Sstevel@tonic-gate uint_t, flags); 32867c478bd9Sstevel@tonic-gate return (pp); 32877c478bd9Sstevel@tonic-gate } 32887c478bd9Sstevel@tonic-gate } 32897c478bd9Sstevel@tonic-gate 32907c478bd9Sstevel@tonic-gate VM_STAT_ADD(vmm_vmstats.pgc_allocfailed); 32917c478bd9Sstevel@tonic-gate return (NULL); 32927c478bd9Sstevel@tonic-gate } 32937c478bd9Sstevel@tonic-gate 32947c478bd9Sstevel@tonic-gate page_t * 32957c478bd9Sstevel@tonic-gate page_get_mnode_cachelist(uint_t bin, uint_t flags, int mnode, int mtype) 32967c478bd9Sstevel@tonic-gate { 32977c478bd9Sstevel@tonic-gate kmutex_t *pcm; 32987c478bd9Sstevel@tonic-gate int i; 32997c478bd9Sstevel@tonic-gate page_t *pp; 33007c478bd9Sstevel@tonic-gate page_t *first_pp; 33017c478bd9Sstevel@tonic-gate uint_t bin_marker; 33027c478bd9Sstevel@tonic-gate int nwaybins, nwaycnt; 33037c478bd9Sstevel@tonic-gate int cpucolors; 33047c478bd9Sstevel@tonic-gate 33057c478bd9Sstevel@tonic-gate VM_STAT_ADD(vmm_vmstats.pgmc_alloc); 33067c478bd9Sstevel@tonic-gate 33077c478bd9Sstevel@tonic-gate /* LINTED */ 33087c478bd9Sstevel@tonic-gate MTYPE_START(mnode, mtype, flags); 33097c478bd9Sstevel@tonic-gate if (mtype < 0) { /* mnode does not have memory in mtype range */ 33107c478bd9Sstevel@tonic-gate VM_STAT_ADD(vmm_vmstats.pgmc_allocempty); 33117c478bd9Sstevel@tonic-gate return (NULL); 33127c478bd9Sstevel@tonic-gate } 33137c478bd9Sstevel@tonic-gate 33147c478bd9Sstevel@tonic-gate nwaybins = 0; 33157c478bd9Sstevel@tonic-gate cpucolors = cpu_page_colors; 33167c478bd9Sstevel@tonic-gate /* 33177c478bd9Sstevel@tonic-gate * adjust cpucolors to possibly check additional 'equivalent' bins 33187c478bd9Sstevel@tonic-gate * to try to minimize fragmentation of large pages by delaying calls 33197c478bd9Sstevel@tonic-gate * to page_freelist_fill. 33207c478bd9Sstevel@tonic-gate */ 33217c478bd9Sstevel@tonic-gate if (colorequiv > 1) { 33227c478bd9Sstevel@tonic-gate int equivcolors = page_colors / colorequiv; 33237c478bd9Sstevel@tonic-gate 33247c478bd9Sstevel@tonic-gate if (equivcolors && (cpucolors == 0 || equivcolors < cpucolors)) 33257c478bd9Sstevel@tonic-gate cpucolors = equivcolors; 33267c478bd9Sstevel@tonic-gate } 33277c478bd9Sstevel@tonic-gate 33287c478bd9Sstevel@tonic-gate /* 33297c478bd9Sstevel@tonic-gate * Only hold one cachelist lock at a time, that way we 33307c478bd9Sstevel@tonic-gate * can start anywhere and not have to worry about lock 33317c478bd9Sstevel@tonic-gate * ordering. 33327c478bd9Sstevel@tonic-gate */ 33337c478bd9Sstevel@tonic-gate 33347c478bd9Sstevel@tonic-gate big_try_again: 33357c478bd9Sstevel@tonic-gate nwaycnt = 0; 33367c478bd9Sstevel@tonic-gate for (i = 0; i <= page_colors; i++) { 33377c478bd9Sstevel@tonic-gate if (PAGE_CACHELISTS(mnode, bin, mtype)) { 33387c478bd9Sstevel@tonic-gate pcm = PC_BIN_MUTEX(mnode, bin, PG_CACHE_LIST); 33397c478bd9Sstevel@tonic-gate mutex_enter(pcm); 33407c478bd9Sstevel@tonic-gate pp = PAGE_CACHELISTS(mnode, bin, mtype); 33417c478bd9Sstevel@tonic-gate if (pp != NULL) { 33427c478bd9Sstevel@tonic-gate first_pp = pp; 33437c478bd9Sstevel@tonic-gate ASSERT(pp->p_vnode); 33447c478bd9Sstevel@tonic-gate ASSERT(PP_ISAGED(pp) == 0); 33457c478bd9Sstevel@tonic-gate ASSERT(pp->p_szc == 0); 33467c478bd9Sstevel@tonic-gate ASSERT(PFN_2_MEM_NODE(pp->p_pagenum) == mnode); 33477c478bd9Sstevel@tonic-gate while (!page_trylock(pp, SE_EXCL)) { 33487c478bd9Sstevel@tonic-gate pp = pp->p_next; 33497c478bd9Sstevel@tonic-gate ASSERT(pp->p_szc == 0); 33507c478bd9Sstevel@tonic-gate if (pp == first_pp) { 33517c478bd9Sstevel@tonic-gate /* 33527c478bd9Sstevel@tonic-gate * We have searched the 33537c478bd9Sstevel@tonic-gate * complete list! 33547c478bd9Sstevel@tonic-gate * And all of them (might 33557c478bd9Sstevel@tonic-gate * only be one) are locked. 33567c478bd9Sstevel@tonic-gate * This can happen since 33577c478bd9Sstevel@tonic-gate * these pages can also be 33587c478bd9Sstevel@tonic-gate * found via the hash list. 33597c478bd9Sstevel@tonic-gate * When found via the hash 33607c478bd9Sstevel@tonic-gate * list, they are locked 33617c478bd9Sstevel@tonic-gate * first, then removed. 33627c478bd9Sstevel@tonic-gate * We give up to let the 33637c478bd9Sstevel@tonic-gate * other thread run. 33647c478bd9Sstevel@tonic-gate */ 33657c478bd9Sstevel@tonic-gate pp = NULL; 33667c478bd9Sstevel@tonic-gate break; 33677c478bd9Sstevel@tonic-gate } 33687c478bd9Sstevel@tonic-gate ASSERT(pp->p_vnode); 33697c478bd9Sstevel@tonic-gate ASSERT(PP_ISFREE(pp)); 33707c478bd9Sstevel@tonic-gate ASSERT(PP_ISAGED(pp) == 0); 33717c478bd9Sstevel@tonic-gate ASSERT(PFN_2_MEM_NODE(pp->p_pagenum) == 33727c478bd9Sstevel@tonic-gate mnode); 33737c478bd9Sstevel@tonic-gate } 33747c478bd9Sstevel@tonic-gate 33757c478bd9Sstevel@tonic-gate if (pp) { 33767c478bd9Sstevel@tonic-gate page_t **ppp; 33777c478bd9Sstevel@tonic-gate /* 33787c478bd9Sstevel@tonic-gate * Found and locked a page. 33797c478bd9Sstevel@tonic-gate * Pull it off the list. 33807c478bd9Sstevel@tonic-gate */ 33817c478bd9Sstevel@tonic-gate ASSERT(mtype == PP_2_MTYPE(pp)); 33827c478bd9Sstevel@tonic-gate ppp = &PAGE_CACHELISTS(mnode, bin, 33837c478bd9Sstevel@tonic-gate mtype); 33847c478bd9Sstevel@tonic-gate page_sub(ppp, pp); 33857c478bd9Sstevel@tonic-gate /* 33867c478bd9Sstevel@tonic-gate * Subtract counters before releasing 33877c478bd9Sstevel@tonic-gate * pcm mutex to avoid a race with 33887c478bd9Sstevel@tonic-gate * page_freelist_coalesce and 33897c478bd9Sstevel@tonic-gate * page_freelist_fill. 33907c478bd9Sstevel@tonic-gate */ 3391affbd3ccSkchow page_ctr_sub(mnode, mtype, pp, 3392affbd3ccSkchow PG_CACHE_LIST); 33937c478bd9Sstevel@tonic-gate mutex_exit(pcm); 33947c478bd9Sstevel@tonic-gate ASSERT(pp->p_vnode); 33957c478bd9Sstevel@tonic-gate ASSERT(PP_ISAGED(pp) == 0); 33967c478bd9Sstevel@tonic-gate #if defined(__sparc) 33977c478bd9Sstevel@tonic-gate ASSERT(!kcage_on || 33987c478bd9Sstevel@tonic-gate (flags & PG_NORELOC) == 0 || 33997c478bd9Sstevel@tonic-gate PP_ISNORELOC(pp)); 34007c478bd9Sstevel@tonic-gate if (PP_ISNORELOC(pp)) { 34017c478bd9Sstevel@tonic-gate kcage_freemem_sub(1); 34027c478bd9Sstevel@tonic-gate } 34037c478bd9Sstevel@tonic-gate #endif 34047c478bd9Sstevel@tonic-gate VM_STAT_ADD(vmm_vmstats. 34057c478bd9Sstevel@tonic-gate pgmc_allocok); 34067c478bd9Sstevel@tonic-gate return (pp); 34077c478bd9Sstevel@tonic-gate } 34087c478bd9Sstevel@tonic-gate } 34097c478bd9Sstevel@tonic-gate mutex_exit(pcm); 34107c478bd9Sstevel@tonic-gate } 34117c478bd9Sstevel@tonic-gate 34127c478bd9Sstevel@tonic-gate /* 34137c478bd9Sstevel@tonic-gate * Wow! The initial bin is empty or no page in the bin could 34147c478bd9Sstevel@tonic-gate * be locked. 34157c478bd9Sstevel@tonic-gate * 34167c478bd9Sstevel@tonic-gate * If specific color is needed, check if page color may be in 34177c478bd9Sstevel@tonic-gate * other bins. 34187c478bd9Sstevel@tonic-gate */ 34197c478bd9Sstevel@tonic-gate if ((flags & PG_MATCH_COLOR) && (cpucolors != 0)) { 34207c478bd9Sstevel@tonic-gate if (!nwaybins) { 34217c478bd9Sstevel@tonic-gate if (cpucolors < 0) { 34227c478bd9Sstevel@tonic-gate cpucolors = CPUSETSIZE() / MMU_PAGESIZE; 34237c478bd9Sstevel@tonic-gate ASSERT(cpucolors > 0); 34247c478bd9Sstevel@tonic-gate nwaybins = page_colors / cpucolors; 34257c478bd9Sstevel@tonic-gate if (nwaybins < 2) 34267c478bd9Sstevel@tonic-gate cpucolors = 0; 34277c478bd9Sstevel@tonic-gate } else { 34287c478bd9Sstevel@tonic-gate nwaybins = page_colors / cpucolors; 34297c478bd9Sstevel@tonic-gate ASSERT(nwaybins > 1); 34307c478bd9Sstevel@tonic-gate } 34317c478bd9Sstevel@tonic-gate } 34327c478bd9Sstevel@tonic-gate 34337c478bd9Sstevel@tonic-gate if (++nwaycnt >= nwaybins) { 34347c478bd9Sstevel@tonic-gate break; 34357c478bd9Sstevel@tonic-gate } 34367c478bd9Sstevel@tonic-gate bin = (bin + (page_colors / nwaybins)) & 34377c478bd9Sstevel@tonic-gate page_colors_mask; 34387c478bd9Sstevel@tonic-gate continue; 34397c478bd9Sstevel@tonic-gate } 34407c478bd9Sstevel@tonic-gate 34417c478bd9Sstevel@tonic-gate if (i == 0) { 34427c478bd9Sstevel@tonic-gate bin = (bin + BIN_STEP) & page_colors_mask; 34437c478bd9Sstevel@tonic-gate bin_marker = bin; 34447c478bd9Sstevel@tonic-gate } else { 34457c478bd9Sstevel@tonic-gate bin = (bin + vac_colors) & page_colors_mask; 34467c478bd9Sstevel@tonic-gate if (bin == bin_marker) { 34477c478bd9Sstevel@tonic-gate bin = (bin + 1) & page_colors_mask; 34487c478bd9Sstevel@tonic-gate bin_marker = bin; 34497c478bd9Sstevel@tonic-gate } 34507c478bd9Sstevel@tonic-gate } 34517c478bd9Sstevel@tonic-gate } 34527c478bd9Sstevel@tonic-gate 3453affbd3ccSkchow MTYPE_NEXT(mnode, mtype, flags); 3454affbd3ccSkchow if (mtype >= 0) 34557c478bd9Sstevel@tonic-gate goto big_try_again; 3456affbd3ccSkchow 34577c478bd9Sstevel@tonic-gate VM_STAT_ADD(vmm_vmstats.pgmc_allocfailed); 34587c478bd9Sstevel@tonic-gate return (NULL); 34597c478bd9Sstevel@tonic-gate } 34607c478bd9Sstevel@tonic-gate 34617c478bd9Sstevel@tonic-gate #ifdef DEBUG 34627c478bd9Sstevel@tonic-gate #define REPL_PAGE_STATS 34637c478bd9Sstevel@tonic-gate #endif /* DEBUG */ 34647c478bd9Sstevel@tonic-gate 34657c478bd9Sstevel@tonic-gate #ifdef REPL_PAGE_STATS 34667c478bd9Sstevel@tonic-gate struct repl_page_stats { 34677c478bd9Sstevel@tonic-gate uint_t ngets; 34687c478bd9Sstevel@tonic-gate uint_t ngets_noreloc; 34697c478bd9Sstevel@tonic-gate uint_t npgr_noreloc; 34707c478bd9Sstevel@tonic-gate uint_t nnopage_first; 34717c478bd9Sstevel@tonic-gate uint_t nnopage; 34727c478bd9Sstevel@tonic-gate uint_t nhashout; 34737c478bd9Sstevel@tonic-gate uint_t nnofree; 34747c478bd9Sstevel@tonic-gate uint_t nnext_pp; 34757c478bd9Sstevel@tonic-gate } repl_page_stats; 34767c478bd9Sstevel@tonic-gate #define REPL_STAT_INCR(v) atomic_add_32(&repl_page_stats.v, 1) 34777c478bd9Sstevel@tonic-gate #else /* REPL_PAGE_STATS */ 34787c478bd9Sstevel@tonic-gate #define REPL_STAT_INCR(v) 34797c478bd9Sstevel@tonic-gate #endif /* REPL_PAGE_STATS */ 34807c478bd9Sstevel@tonic-gate 34817c478bd9Sstevel@tonic-gate int pgrppgcp; 34827c478bd9Sstevel@tonic-gate 34837c478bd9Sstevel@tonic-gate /* 34847c478bd9Sstevel@tonic-gate * The freemem accounting must be done by the caller. 34857c478bd9Sstevel@tonic-gate * First we try to get a replacement page of the same size as like_pp, 34867c478bd9Sstevel@tonic-gate * if that is not possible, then we just get a set of discontiguous 34877c478bd9Sstevel@tonic-gate * PAGESIZE pages. 34887c478bd9Sstevel@tonic-gate */ 34897c478bd9Sstevel@tonic-gate page_t * 34902dae3fb5Sjjc page_get_replacement_page(page_t *orig_like_pp, struct lgrp *lgrp_target, 34917c478bd9Sstevel@tonic-gate uint_t pgrflags) 34927c478bd9Sstevel@tonic-gate { 34937c478bd9Sstevel@tonic-gate page_t *like_pp; 34947c478bd9Sstevel@tonic-gate page_t *pp, *pplist; 34957c478bd9Sstevel@tonic-gate page_t *pl = NULL; 34967c478bd9Sstevel@tonic-gate ulong_t bin; 34977c478bd9Sstevel@tonic-gate int mnode, page_mnode; 34987c478bd9Sstevel@tonic-gate int szc; 34997c478bd9Sstevel@tonic-gate spgcnt_t npgs, pg_cnt; 35007c478bd9Sstevel@tonic-gate pfn_t pfnum; 35017c478bd9Sstevel@tonic-gate int mtype; 35027c478bd9Sstevel@tonic-gate int flags = 0; 35037c478bd9Sstevel@tonic-gate lgrp_mnode_cookie_t lgrp_cookie; 35042dae3fb5Sjjc lgrp_t *lgrp; 35057c478bd9Sstevel@tonic-gate 35067c478bd9Sstevel@tonic-gate REPL_STAT_INCR(ngets); 35077c478bd9Sstevel@tonic-gate like_pp = orig_like_pp; 35087c478bd9Sstevel@tonic-gate ASSERT(PAGE_EXCL(like_pp)); 35097c478bd9Sstevel@tonic-gate 35107c478bd9Sstevel@tonic-gate szc = like_pp->p_szc; 35117c478bd9Sstevel@tonic-gate npgs = page_get_pagecnt(szc); 35127c478bd9Sstevel@tonic-gate /* 35137c478bd9Sstevel@tonic-gate * Now we reset like_pp to the base page_t. 35147c478bd9Sstevel@tonic-gate * That way, we won't walk past the end of this 'szc' page. 35157c478bd9Sstevel@tonic-gate */ 35167c478bd9Sstevel@tonic-gate pfnum = PFN_BASE(like_pp->p_pagenum, szc); 35177c478bd9Sstevel@tonic-gate like_pp = page_numtopp_nolock(pfnum); 35187c478bd9Sstevel@tonic-gate ASSERT(like_pp->p_szc == szc); 35197c478bd9Sstevel@tonic-gate 35207c478bd9Sstevel@tonic-gate if (PP_ISNORELOC(like_pp)) { 35217c478bd9Sstevel@tonic-gate ASSERT(kcage_on); 35227c478bd9Sstevel@tonic-gate REPL_STAT_INCR(ngets_noreloc); 35237c478bd9Sstevel@tonic-gate flags = PGI_RELOCONLY; 35247c478bd9Sstevel@tonic-gate } else if (pgrflags & PGR_NORELOC) { 35257c478bd9Sstevel@tonic-gate ASSERT(kcage_on); 35267c478bd9Sstevel@tonic-gate REPL_STAT_INCR(npgr_noreloc); 35277c478bd9Sstevel@tonic-gate flags = PG_NORELOC; 35287c478bd9Sstevel@tonic-gate } 35297c478bd9Sstevel@tonic-gate 35307c478bd9Sstevel@tonic-gate /* 35317c478bd9Sstevel@tonic-gate * Kernel pages must always be replaced with the same size 35327c478bd9Sstevel@tonic-gate * pages, since we cannot properly handle demotion of kernel 35337c478bd9Sstevel@tonic-gate * pages. 35347c478bd9Sstevel@tonic-gate */ 35357c478bd9Sstevel@tonic-gate if (like_pp->p_vnode == &kvp) 35367c478bd9Sstevel@tonic-gate pgrflags |= PGR_SAMESZC; 35377c478bd9Sstevel@tonic-gate 35387c478bd9Sstevel@tonic-gate /* LINTED */ 353907ad560dSkchow MTYPE_PGR_INIT(mtype, flags, like_pp, page_mnode, npgs); 35407c478bd9Sstevel@tonic-gate 35417c478bd9Sstevel@tonic-gate while (npgs) { 35427c478bd9Sstevel@tonic-gate pplist = NULL; 35437c478bd9Sstevel@tonic-gate for (;;) { 35447c478bd9Sstevel@tonic-gate pg_cnt = page_get_pagecnt(szc); 35457c478bd9Sstevel@tonic-gate bin = PP_2_BIN(like_pp); 35467c478bd9Sstevel@tonic-gate ASSERT(like_pp->p_szc == orig_like_pp->p_szc); 35477c478bd9Sstevel@tonic-gate ASSERT(pg_cnt <= npgs); 35487c478bd9Sstevel@tonic-gate 35497c478bd9Sstevel@tonic-gate /* 35507c478bd9Sstevel@tonic-gate * If an lgroup was specified, try to get the 35517c478bd9Sstevel@tonic-gate * page from that lgroup. 35522dae3fb5Sjjc * NOTE: Must be careful with code below because 35532dae3fb5Sjjc * lgroup may disappear and reappear since there 35542dae3fb5Sjjc * is no locking for lgroup here. 35557c478bd9Sstevel@tonic-gate */ 35562dae3fb5Sjjc if (LGRP_EXISTS(lgrp_target)) { 35572dae3fb5Sjjc /* 35582dae3fb5Sjjc * Keep local variable for lgroup separate 35592dae3fb5Sjjc * from lgroup argument since this code should 35602dae3fb5Sjjc * only be exercised when lgroup argument 35612dae3fb5Sjjc * exists.... 35622dae3fb5Sjjc */ 35632dae3fb5Sjjc lgrp = lgrp_target; 35642dae3fb5Sjjc 35657c478bd9Sstevel@tonic-gate /* Try the lgroup's freelists first */ 35667c478bd9Sstevel@tonic-gate LGRP_MNODE_COOKIE_INIT(lgrp_cookie, lgrp, 35677c478bd9Sstevel@tonic-gate LGRP_SRCH_LOCAL); 35687c478bd9Sstevel@tonic-gate while ((pplist == NULL) && 35697c478bd9Sstevel@tonic-gate (mnode = lgrp_memnode_choose(&lgrp_cookie)) 35707c478bd9Sstevel@tonic-gate != -1) { 35717c478bd9Sstevel@tonic-gate pplist = page_get_mnode_freelist( 35727c478bd9Sstevel@tonic-gate mnode, bin, mtype, szc, 35737c478bd9Sstevel@tonic-gate flags); 35747c478bd9Sstevel@tonic-gate } 35757c478bd9Sstevel@tonic-gate 35767c478bd9Sstevel@tonic-gate /* 35777c478bd9Sstevel@tonic-gate * Now try it's cachelists if this is a 35787c478bd9Sstevel@tonic-gate * small page. Don't need to do it for 35797c478bd9Sstevel@tonic-gate * larger ones since page_freelist_coalesce() 35807c478bd9Sstevel@tonic-gate * already failed. 35817c478bd9Sstevel@tonic-gate */ 35827c478bd9Sstevel@tonic-gate if (pplist != NULL || szc != 0) 35837c478bd9Sstevel@tonic-gate break; 35847c478bd9Sstevel@tonic-gate 35857c478bd9Sstevel@tonic-gate /* Now try it's cachelists */ 35867c478bd9Sstevel@tonic-gate LGRP_MNODE_COOKIE_INIT(lgrp_cookie, lgrp, 35877c478bd9Sstevel@tonic-gate LGRP_SRCH_LOCAL); 35887c478bd9Sstevel@tonic-gate 35897c478bd9Sstevel@tonic-gate while ((pplist == NULL) && 35907c478bd9Sstevel@tonic-gate (mnode = lgrp_memnode_choose(&lgrp_cookie)) 35917c478bd9Sstevel@tonic-gate != -1) { 35927c478bd9Sstevel@tonic-gate pplist = page_get_mnode_cachelist( 35937c478bd9Sstevel@tonic-gate bin, flags, mnode, mtype); 35947c478bd9Sstevel@tonic-gate } 35957c478bd9Sstevel@tonic-gate if (pplist != NULL) { 35967c478bd9Sstevel@tonic-gate page_hashout(pplist, NULL); 35977c478bd9Sstevel@tonic-gate PP_SETAGED(pplist); 35987c478bd9Sstevel@tonic-gate REPL_STAT_INCR(nhashout); 35997c478bd9Sstevel@tonic-gate break; 36007c478bd9Sstevel@tonic-gate } 36017c478bd9Sstevel@tonic-gate /* Done looking in this lgroup. Bail out. */ 36027c478bd9Sstevel@tonic-gate break; 36037c478bd9Sstevel@tonic-gate } 36047c478bd9Sstevel@tonic-gate 36057c478bd9Sstevel@tonic-gate /* 36062dae3fb5Sjjc * No lgroup was specified (or lgroup was removed by 36072dae3fb5Sjjc * DR, so just try to get the page as close to 36082dae3fb5Sjjc * like_pp's mnode as possible. 36097c478bd9Sstevel@tonic-gate * First try the local freelist... 36107c478bd9Sstevel@tonic-gate */ 36117c478bd9Sstevel@tonic-gate mnode = PP_2_MEM_NODE(like_pp); 36127c478bd9Sstevel@tonic-gate pplist = page_get_mnode_freelist(mnode, bin, 36137c478bd9Sstevel@tonic-gate mtype, szc, flags); 36147c478bd9Sstevel@tonic-gate if (pplist != NULL) 36157c478bd9Sstevel@tonic-gate break; 36167c478bd9Sstevel@tonic-gate 36177c478bd9Sstevel@tonic-gate REPL_STAT_INCR(nnofree); 36187c478bd9Sstevel@tonic-gate 36197c478bd9Sstevel@tonic-gate /* 36207c478bd9Sstevel@tonic-gate * ...then the local cachelist. Don't need to do it for 36217c478bd9Sstevel@tonic-gate * larger pages cause page_freelist_coalesce() already 36227c478bd9Sstevel@tonic-gate * failed there anyway. 36237c478bd9Sstevel@tonic-gate */ 36247c478bd9Sstevel@tonic-gate if (szc == 0) { 36257c478bd9Sstevel@tonic-gate pplist = page_get_mnode_cachelist(bin, flags, 36267c478bd9Sstevel@tonic-gate mnode, mtype); 36277c478bd9Sstevel@tonic-gate if (pplist != NULL) { 36287c478bd9Sstevel@tonic-gate page_hashout(pplist, NULL); 36297c478bd9Sstevel@tonic-gate PP_SETAGED(pplist); 36307c478bd9Sstevel@tonic-gate REPL_STAT_INCR(nhashout); 36317c478bd9Sstevel@tonic-gate break; 36327c478bd9Sstevel@tonic-gate } 36337c478bd9Sstevel@tonic-gate } 36347c478bd9Sstevel@tonic-gate 36357c478bd9Sstevel@tonic-gate /* Now try remote freelists */ 36367c478bd9Sstevel@tonic-gate page_mnode = mnode; 36377c478bd9Sstevel@tonic-gate lgrp = 36387c478bd9Sstevel@tonic-gate lgrp_hand_to_lgrp(MEM_NODE_2_LGRPHAND(page_mnode)); 36397c478bd9Sstevel@tonic-gate LGRP_MNODE_COOKIE_INIT(lgrp_cookie, lgrp, 36407c478bd9Sstevel@tonic-gate LGRP_SRCH_HIER); 36417c478bd9Sstevel@tonic-gate while (pplist == NULL && 36427c478bd9Sstevel@tonic-gate (mnode = lgrp_memnode_choose(&lgrp_cookie)) 36437c478bd9Sstevel@tonic-gate != -1) { 36447c478bd9Sstevel@tonic-gate /* 36457c478bd9Sstevel@tonic-gate * Skip local mnode. 36467c478bd9Sstevel@tonic-gate */ 36477c478bd9Sstevel@tonic-gate if ((mnode == page_mnode) || 36487c478bd9Sstevel@tonic-gate (mem_node_config[mnode].exists == 0)) 36497c478bd9Sstevel@tonic-gate continue; 36507c478bd9Sstevel@tonic-gate 36517c478bd9Sstevel@tonic-gate pplist = page_get_mnode_freelist(mnode, 36527c478bd9Sstevel@tonic-gate bin, mtype, szc, flags); 36537c478bd9Sstevel@tonic-gate } 36547c478bd9Sstevel@tonic-gate 36557c478bd9Sstevel@tonic-gate if (pplist != NULL) 36567c478bd9Sstevel@tonic-gate break; 36577c478bd9Sstevel@tonic-gate 36587c478bd9Sstevel@tonic-gate 36597c478bd9Sstevel@tonic-gate /* Now try remote cachelists */ 36607c478bd9Sstevel@tonic-gate LGRP_MNODE_COOKIE_INIT(lgrp_cookie, lgrp, 36617c478bd9Sstevel@tonic-gate LGRP_SRCH_HIER); 36627c478bd9Sstevel@tonic-gate while (pplist == NULL && szc == 0) { 36637c478bd9Sstevel@tonic-gate mnode = lgrp_memnode_choose(&lgrp_cookie); 36647c478bd9Sstevel@tonic-gate if (mnode == -1) 36657c478bd9Sstevel@tonic-gate break; 36667c478bd9Sstevel@tonic-gate /* 36677c478bd9Sstevel@tonic-gate * Skip local mnode. 36687c478bd9Sstevel@tonic-gate */ 36697c478bd9Sstevel@tonic-gate if ((mnode == page_mnode) || 36707c478bd9Sstevel@tonic-gate (mem_node_config[mnode].exists == 0)) 36717c478bd9Sstevel@tonic-gate continue; 36727c478bd9Sstevel@tonic-gate 36737c478bd9Sstevel@tonic-gate pplist = page_get_mnode_cachelist(bin, 36747c478bd9Sstevel@tonic-gate flags, mnode, mtype); 36757c478bd9Sstevel@tonic-gate 36767c478bd9Sstevel@tonic-gate if (pplist != NULL) { 36777c478bd9Sstevel@tonic-gate page_hashout(pplist, NULL); 36787c478bd9Sstevel@tonic-gate PP_SETAGED(pplist); 36797c478bd9Sstevel@tonic-gate REPL_STAT_INCR(nhashout); 36807c478bd9Sstevel@tonic-gate break; 36817c478bd9Sstevel@tonic-gate } 36827c478bd9Sstevel@tonic-gate } 36837c478bd9Sstevel@tonic-gate 36847c478bd9Sstevel@tonic-gate /* 36857c478bd9Sstevel@tonic-gate * Break out of while loop under the following cases: 36867c478bd9Sstevel@tonic-gate * - If we successfully got a page. 36877c478bd9Sstevel@tonic-gate * - If pgrflags specified only returning a specific 36887c478bd9Sstevel@tonic-gate * page size and we could not find that page size. 36897c478bd9Sstevel@tonic-gate * - If we could not satisfy the request with PAGESIZE 36907c478bd9Sstevel@tonic-gate * or larger pages. 36917c478bd9Sstevel@tonic-gate */ 36927c478bd9Sstevel@tonic-gate if (pplist != NULL || szc == 0) 36937c478bd9Sstevel@tonic-gate break; 36947c478bd9Sstevel@tonic-gate 36957c478bd9Sstevel@tonic-gate if ((pgrflags & PGR_SAMESZC) || pgrppgcp) { 36967c478bd9Sstevel@tonic-gate /* try to find contig page */ 36977c478bd9Sstevel@tonic-gate 36987c478bd9Sstevel@tonic-gate LGRP_MNODE_COOKIE_INIT(lgrp_cookie, lgrp, 36997c478bd9Sstevel@tonic-gate LGRP_SRCH_HIER); 37007c478bd9Sstevel@tonic-gate 37017c478bd9Sstevel@tonic-gate while ((pplist == NULL) && 37027c478bd9Sstevel@tonic-gate (mnode = 37037c478bd9Sstevel@tonic-gate lgrp_memnode_choose(&lgrp_cookie)) 37047c478bd9Sstevel@tonic-gate != -1) { 37057c478bd9Sstevel@tonic-gate pplist = page_get_contig_pages( 37067c478bd9Sstevel@tonic-gate mnode, bin, mtype, szc, 37077c478bd9Sstevel@tonic-gate flags | PGI_PGCPHIPRI); 37087c478bd9Sstevel@tonic-gate } 37097c478bd9Sstevel@tonic-gate break; 37107c478bd9Sstevel@tonic-gate } 37117c478bd9Sstevel@tonic-gate 37127c478bd9Sstevel@tonic-gate /* 37137c478bd9Sstevel@tonic-gate * The correct thing to do here is try the next 37147c478bd9Sstevel@tonic-gate * page size down using szc--. Due to a bug 37157c478bd9Sstevel@tonic-gate * with the processing of HAT_RELOAD_SHARE 37167c478bd9Sstevel@tonic-gate * where the sfmmu_ttecnt arrays of all 37177c478bd9Sstevel@tonic-gate * hats sharing an ISM segment don't get updated, 37187c478bd9Sstevel@tonic-gate * using intermediate size pages for relocation 37197c478bd9Sstevel@tonic-gate * can lead to continuous page faults. 37207c478bd9Sstevel@tonic-gate */ 37217c478bd9Sstevel@tonic-gate szc = 0; 37227c478bd9Sstevel@tonic-gate } 37237c478bd9Sstevel@tonic-gate 37247c478bd9Sstevel@tonic-gate if (pplist != NULL) { 37257c478bd9Sstevel@tonic-gate DTRACE_PROBE4(page__get, 37267c478bd9Sstevel@tonic-gate lgrp_t *, lgrp, 37277c478bd9Sstevel@tonic-gate int, mnode, 37287c478bd9Sstevel@tonic-gate ulong_t, bin, 37297c478bd9Sstevel@tonic-gate uint_t, flags); 37307c478bd9Sstevel@tonic-gate 37317c478bd9Sstevel@tonic-gate while (pplist != NULL && pg_cnt--) { 37327c478bd9Sstevel@tonic-gate ASSERT(pplist != NULL); 37337c478bd9Sstevel@tonic-gate pp = pplist; 37347c478bd9Sstevel@tonic-gate page_sub(&pplist, pp); 37357c478bd9Sstevel@tonic-gate PP_CLRFREE(pp); 37367c478bd9Sstevel@tonic-gate PP_CLRAGED(pp); 37377c478bd9Sstevel@tonic-gate page_list_concat(&pl, &pp); 37387c478bd9Sstevel@tonic-gate npgs--; 37397c478bd9Sstevel@tonic-gate like_pp = like_pp + 1; 37407c478bd9Sstevel@tonic-gate REPL_STAT_INCR(nnext_pp); 37417c478bd9Sstevel@tonic-gate } 37427c478bd9Sstevel@tonic-gate ASSERT(pg_cnt == 0); 37437c478bd9Sstevel@tonic-gate } else { 37447c478bd9Sstevel@tonic-gate break; 37457c478bd9Sstevel@tonic-gate } 37467c478bd9Sstevel@tonic-gate } 37477c478bd9Sstevel@tonic-gate 37487c478bd9Sstevel@tonic-gate if (npgs) { 37497c478bd9Sstevel@tonic-gate /* 37507c478bd9Sstevel@tonic-gate * We were unable to allocate the necessary number 37517c478bd9Sstevel@tonic-gate * of pages. 37527c478bd9Sstevel@tonic-gate * We need to free up any pl. 37537c478bd9Sstevel@tonic-gate */ 37547c478bd9Sstevel@tonic-gate REPL_STAT_INCR(nnopage); 37557c478bd9Sstevel@tonic-gate page_free_replacement_page(pl); 37567c478bd9Sstevel@tonic-gate return (NULL); 37577c478bd9Sstevel@tonic-gate } else { 37587c478bd9Sstevel@tonic-gate return (pl); 37597c478bd9Sstevel@tonic-gate } 37607c478bd9Sstevel@tonic-gate } 37617c478bd9Sstevel@tonic-gate 37627c478bd9Sstevel@tonic-gate /* 37637c478bd9Sstevel@tonic-gate * demote a free large page to it's constituent pages 37647c478bd9Sstevel@tonic-gate */ 37657c478bd9Sstevel@tonic-gate void 37667c478bd9Sstevel@tonic-gate page_demote_free_pages(page_t *pp) 37677c478bd9Sstevel@tonic-gate { 37687c478bd9Sstevel@tonic-gate 37697c478bd9Sstevel@tonic-gate int mnode; 37707c478bd9Sstevel@tonic-gate 37717c478bd9Sstevel@tonic-gate ASSERT(pp != NULL); 37727c478bd9Sstevel@tonic-gate ASSERT(PAGE_LOCKED(pp)); 37737c478bd9Sstevel@tonic-gate ASSERT(PP_ISFREE(pp)); 37747c478bd9Sstevel@tonic-gate ASSERT(pp->p_szc != 0 && pp->p_szc < mmu_page_sizes); 37757c478bd9Sstevel@tonic-gate 37767c478bd9Sstevel@tonic-gate mnode = PP_2_MEM_NODE(pp); 37777c478bd9Sstevel@tonic-gate page_freelist_lock(mnode); 37787c478bd9Sstevel@tonic-gate if (pp->p_szc != 0) { 37797c478bd9Sstevel@tonic-gate (void) page_demote(mnode, PFN_BASE(pp->p_pagenum, 37807c478bd9Sstevel@tonic-gate pp->p_szc), pp->p_szc, 0, PC_NO_COLOR, PC_FREE); 37817c478bd9Sstevel@tonic-gate } 37827c478bd9Sstevel@tonic-gate page_freelist_unlock(mnode); 37837c478bd9Sstevel@tonic-gate ASSERT(pp->p_szc == 0); 37847c478bd9Sstevel@tonic-gate } 3785