17c478bd9Sstevel@tonic-gate /* 27c478bd9Sstevel@tonic-gate * CDDL HEADER START 37c478bd9Sstevel@tonic-gate * 47c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the 57c478bd9Sstevel@tonic-gate * Common Development and Distribution License, Version 1.0 only 67c478bd9Sstevel@tonic-gate * (the "License"). You may not use this file except in compliance 77c478bd9Sstevel@tonic-gate * with the License. 87c478bd9Sstevel@tonic-gate * 97c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 107c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 117c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions 127c478bd9Sstevel@tonic-gate * and limitations under the License. 137c478bd9Sstevel@tonic-gate * 147c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 157c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 167c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 177c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 187c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 197c478bd9Sstevel@tonic-gate * 207c478bd9Sstevel@tonic-gate * CDDL HEADER END 217c478bd9Sstevel@tonic-gate */ 227c478bd9Sstevel@tonic-gate /* 237c478bd9Sstevel@tonic-gate * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 247c478bd9Sstevel@tonic-gate * Use is subject to license terms. 257c478bd9Sstevel@tonic-gate */ 267c478bd9Sstevel@tonic-gate 277c478bd9Sstevel@tonic-gate /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 287c478bd9Sstevel@tonic-gate /* All Rights Reserved */ 297c478bd9Sstevel@tonic-gate 307c478bd9Sstevel@tonic-gate /* 317c478bd9Sstevel@tonic-gate * Portions of this source code were derived from Berkeley 4.3 BSD 327c478bd9Sstevel@tonic-gate * under license from the Regents of the University of California. 337c478bd9Sstevel@tonic-gate */ 347c478bd9Sstevel@tonic-gate 357c478bd9Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 367c478bd9Sstevel@tonic-gate 377c478bd9Sstevel@tonic-gate /* 387c478bd9Sstevel@tonic-gate * This file contains common functions to access and manage the page lists. 397c478bd9Sstevel@tonic-gate * Many of these routines originated from platform dependent modules 407c478bd9Sstevel@tonic-gate * (sun4/vm/vm_dep.c, i86pc/vm/vm_machdep.c) and modified to function in 417c478bd9Sstevel@tonic-gate * a platform independent manner. 427c478bd9Sstevel@tonic-gate * 437c478bd9Sstevel@tonic-gate * vm/vm_dep.h provides for platform specific support. 447c478bd9Sstevel@tonic-gate */ 457c478bd9Sstevel@tonic-gate 467c478bd9Sstevel@tonic-gate #include <sys/types.h> 477c478bd9Sstevel@tonic-gate #include <sys/debug.h> 487c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h> 497c478bd9Sstevel@tonic-gate #include <sys/systm.h> 507c478bd9Sstevel@tonic-gate #include <sys/atomic.h> 517c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h> 527c478bd9Sstevel@tonic-gate #include <vm/as.h> 537c478bd9Sstevel@tonic-gate #include <vm/page.h> 547c478bd9Sstevel@tonic-gate #include <vm/seg_kmem.h> 557c478bd9Sstevel@tonic-gate #include <vm/seg_vn.h> 567c478bd9Sstevel@tonic-gate #include <sys/memnode.h> 577c478bd9Sstevel@tonic-gate #include <vm/vm_dep.h> 587c478bd9Sstevel@tonic-gate #include <sys/lgrp.h> 597c478bd9Sstevel@tonic-gate #include <sys/mem_config.h> 607c478bd9Sstevel@tonic-gate #include <sys/callb.h> 617c478bd9Sstevel@tonic-gate #include <sys/mem_cage.h> 627c478bd9Sstevel@tonic-gate #include <sys/sdt.h> 637c478bd9Sstevel@tonic-gate 647c478bd9Sstevel@tonic-gate extern uint_t vac_colors; 657c478bd9Sstevel@tonic-gate 667c478bd9Sstevel@tonic-gate /* 677c478bd9Sstevel@tonic-gate * number of page colors equivalent to reqested color in page_get routines. 687c478bd9Sstevel@tonic-gate * If set, keeps large pages intact longer and keeps MPO allocation 697c478bd9Sstevel@tonic-gate * from the local mnode in favor of acquiring the 'correct' page color from 707c478bd9Sstevel@tonic-gate * a demoted large page or from a remote mnode. 717c478bd9Sstevel@tonic-gate */ 727c478bd9Sstevel@tonic-gate int colorequiv; 737c478bd9Sstevel@tonic-gate 747c478bd9Sstevel@tonic-gate /* 757c478bd9Sstevel@tonic-gate * if set, specifies the percentage of large pages that are free from within 767c478bd9Sstevel@tonic-gate * a large page region before attempting to lock those pages for 777c478bd9Sstevel@tonic-gate * page_get_contig_pages processing. 787c478bd9Sstevel@tonic-gate * 797c478bd9Sstevel@tonic-gate * Should be turned on when kpr is available when page_trylock_contig_pages 807c478bd9Sstevel@tonic-gate * can be more selective. 817c478bd9Sstevel@tonic-gate */ 827c478bd9Sstevel@tonic-gate 837c478bd9Sstevel@tonic-gate int ptcpthreshold; 847c478bd9Sstevel@tonic-gate 857c478bd9Sstevel@tonic-gate /* 867c478bd9Sstevel@tonic-gate * Limit page get contig page search based on failure cnts in pgcpfailcnt[]. 877c478bd9Sstevel@tonic-gate * use slot 0 (base page size unused) to enable or disable limiting search. 887c478bd9Sstevel@tonic-gate * Enabled by default. 897c478bd9Sstevel@tonic-gate */ 907c478bd9Sstevel@tonic-gate int pgcpfailcnt[MMU_PAGE_SIZES]; 917c478bd9Sstevel@tonic-gate int pgcplimitsearch = 1; 927c478bd9Sstevel@tonic-gate 937c478bd9Sstevel@tonic-gate #ifdef VM_STATS 947c478bd9Sstevel@tonic-gate struct vmm_vmstats_str vmm_vmstats; 957c478bd9Sstevel@tonic-gate 967c478bd9Sstevel@tonic-gate #endif /* VM_STATS */ 977c478bd9Sstevel@tonic-gate 987c478bd9Sstevel@tonic-gate #if defined(__sparc) 997c478bd9Sstevel@tonic-gate #define LPGCREATE 0 1007c478bd9Sstevel@tonic-gate #else 1017c478bd9Sstevel@tonic-gate /* enable page_get_contig_pages */ 1027c478bd9Sstevel@tonic-gate #define LPGCREATE 1 1037c478bd9Sstevel@tonic-gate #endif 1047c478bd9Sstevel@tonic-gate 1057c478bd9Sstevel@tonic-gate int pg_contig_disable; 1067c478bd9Sstevel@tonic-gate int pg_lpgcreate_nocage = LPGCREATE; 1077c478bd9Sstevel@tonic-gate 1087c478bd9Sstevel@tonic-gate /* 1097c478bd9Sstevel@tonic-gate * page_freelist_fill pfn flag to signify no hi pfn requirement. 1107c478bd9Sstevel@tonic-gate */ 1117c478bd9Sstevel@tonic-gate #define PFNNULL 0 1127c478bd9Sstevel@tonic-gate 1137c478bd9Sstevel@tonic-gate /* Flags involved in promotion and demotion routines */ 1147c478bd9Sstevel@tonic-gate #define PC_FREE 0x1 /* put page on freelist */ 1157c478bd9Sstevel@tonic-gate #define PC_ALLOC 0x2 /* return page for allocation */ 1167c478bd9Sstevel@tonic-gate 1177c478bd9Sstevel@tonic-gate /* 1187c478bd9Sstevel@tonic-gate * Flag for page_demote to be used with PC_FREE to denote that we don't care 1197c478bd9Sstevel@tonic-gate * what the color is as the color parameter to the function is ignored. 1207c478bd9Sstevel@tonic-gate */ 1217c478bd9Sstevel@tonic-gate #define PC_NO_COLOR (-1) 1227c478bd9Sstevel@tonic-gate 1237c478bd9Sstevel@tonic-gate /* 1247c478bd9Sstevel@tonic-gate * page counters candidates info 1257c478bd9Sstevel@tonic-gate * See page_ctrs_cands comment below for more details. 1267c478bd9Sstevel@tonic-gate * fields are as follows: 1277c478bd9Sstevel@tonic-gate * pcc_pages_free: # pages which freelist coalesce can create 1287c478bd9Sstevel@tonic-gate * pcc_color_free_len: number of elements in pcc_color_free array 1297c478bd9Sstevel@tonic-gate * pcc_color_free: pointer to page free counts per color 1307c478bd9Sstevel@tonic-gate */ 1317c478bd9Sstevel@tonic-gate typedef struct pcc_info { 1327c478bd9Sstevel@tonic-gate pgcnt_t pcc_pages_free; 1337c478bd9Sstevel@tonic-gate int pcc_color_free_len; 1347c478bd9Sstevel@tonic-gate pgcnt_t *pcc_color_free; 1357c478bd9Sstevel@tonic-gate } pcc_info_t; 1367c478bd9Sstevel@tonic-gate 1377c478bd9Sstevel@tonic-gate /* 1387c478bd9Sstevel@tonic-gate * On big machines it can take a long time to check page_counters 1397c478bd9Sstevel@tonic-gate * arrays. page_ctrs_cands is a summary array whose elements are a dynamically 1407c478bd9Sstevel@tonic-gate * updated sum of all elements of the corresponding page_counters arrays. 1417c478bd9Sstevel@tonic-gate * page_freelist_coalesce() searches page_counters only if an appropriate 1427c478bd9Sstevel@tonic-gate * element of page_ctrs_cands array is greater than 0. 1437c478bd9Sstevel@tonic-gate * 1447c478bd9Sstevel@tonic-gate * An extra dimension is used for page_ctrs_cands to spread the elements 1457c478bd9Sstevel@tonic-gate * over a few e$ cache lines to avoid serialization during the array 1467c478bd9Sstevel@tonic-gate * updates. 1477c478bd9Sstevel@tonic-gate */ 1487c478bd9Sstevel@tonic-gate #pragma align 64(page_ctrs_cands) 1497c478bd9Sstevel@tonic-gate 1507c478bd9Sstevel@tonic-gate static pcc_info_t *page_ctrs_cands[NPC_MUTEX][MMU_PAGE_SIZES]; 1517c478bd9Sstevel@tonic-gate 1527c478bd9Sstevel@tonic-gate /* 1537c478bd9Sstevel@tonic-gate * Return in val the total number of free pages which can be created 1547c478bd9Sstevel@tonic-gate * for the given mnode (m) and region size (r) 1557c478bd9Sstevel@tonic-gate */ 1567c478bd9Sstevel@tonic-gate #define PGCTRS_CANDS_GETVALUE(m, r, val) { \ 1577c478bd9Sstevel@tonic-gate int i; \ 1587c478bd9Sstevel@tonic-gate val = 0; \ 1597c478bd9Sstevel@tonic-gate for (i = 0; i < NPC_MUTEX; i++) { \ 1607c478bd9Sstevel@tonic-gate val += page_ctrs_cands[i][(r)][(m)].pcc_pages_free; \ 1617c478bd9Sstevel@tonic-gate } \ 1627c478bd9Sstevel@tonic-gate } 1637c478bd9Sstevel@tonic-gate 1647c478bd9Sstevel@tonic-gate /* 1657c478bd9Sstevel@tonic-gate * Return in val the total number of free pages which can be created 1667c478bd9Sstevel@tonic-gate * for the given mnode (m), region size (r), and color (c) 1677c478bd9Sstevel@tonic-gate */ 1687c478bd9Sstevel@tonic-gate #define PGCTRS_CANDS_GETVALUECOLOR(m, r, c, val) { \ 1697c478bd9Sstevel@tonic-gate int i; \ 1707c478bd9Sstevel@tonic-gate val = 0; \ 1717c478bd9Sstevel@tonic-gate ASSERT((c) < page_ctrs_cands[0][(r)][(m)].pcc_color_free_len); \ 1727c478bd9Sstevel@tonic-gate for (i = 0; i < NPC_MUTEX; i++) { \ 1737c478bd9Sstevel@tonic-gate val += page_ctrs_cands[i][(r)][(m)].pcc_color_free[(c)]; \ 1747c478bd9Sstevel@tonic-gate } \ 1757c478bd9Sstevel@tonic-gate } 1767c478bd9Sstevel@tonic-gate 1777c478bd9Sstevel@tonic-gate /* 1787c478bd9Sstevel@tonic-gate * We can only allow a single thread to update a counter within the physical 1797c478bd9Sstevel@tonic-gate * range of the largest supported page size. That is the finest granularity 1807c478bd9Sstevel@tonic-gate * possible since the counter values are dependent on each other 1817c478bd9Sstevel@tonic-gate * as you move accross region sizes. PP_CTR_LOCK_INDX is used to determine the 1827c478bd9Sstevel@tonic-gate * ctr_mutex lock index for a particular physical range. 1837c478bd9Sstevel@tonic-gate */ 1847c478bd9Sstevel@tonic-gate static kmutex_t *ctr_mutex[NPC_MUTEX]; 1857c478bd9Sstevel@tonic-gate 1867c478bd9Sstevel@tonic-gate #define PP_CTR_LOCK_INDX(pp) \ 1877c478bd9Sstevel@tonic-gate (((pp)->p_pagenum >> \ 1887c478bd9Sstevel@tonic-gate (PAGE_BSZS_SHIFT(mmu_page_sizes - 1))) & (NPC_MUTEX - 1)) 1897c478bd9Sstevel@tonic-gate 1907c478bd9Sstevel@tonic-gate /* 1917c478bd9Sstevel@tonic-gate * Local functions prototypes. 1927c478bd9Sstevel@tonic-gate */ 1937c478bd9Sstevel@tonic-gate 1947c478bd9Sstevel@tonic-gate void page_ctr_add(page_t *, int); 1957c478bd9Sstevel@tonic-gate void page_ctr_add_internal(int, page_t *, int); 1967c478bd9Sstevel@tonic-gate void page_ctr_sub(page_t *, int); 1977c478bd9Sstevel@tonic-gate uint_t page_convert_color(uchar_t, uchar_t, uint_t); 1987c478bd9Sstevel@tonic-gate void page_freelist_lock(int); 1997c478bd9Sstevel@tonic-gate void page_freelist_unlock(int); 2007c478bd9Sstevel@tonic-gate page_t *page_promote(int, pfn_t, uchar_t, int); 2017c478bd9Sstevel@tonic-gate page_t *page_demote(int, pfn_t, uchar_t, uchar_t, int, int); 2027c478bd9Sstevel@tonic-gate page_t *page_freelist_fill(uchar_t, int, int, int, pfn_t); 2037c478bd9Sstevel@tonic-gate page_t *page_get_mnode_cachelist(uint_t, uint_t, int, int); 2047c478bd9Sstevel@tonic-gate static int page_trylock_cons(page_t *pp, se_t se); 2057c478bd9Sstevel@tonic-gate 2067c478bd9Sstevel@tonic-gate #define PNUM_SIZE(szc) \ 2077c478bd9Sstevel@tonic-gate (hw_page_array[(szc)].hp_size >> hw_page_array[0].hp_shift) 2087c478bd9Sstevel@tonic-gate #define PNUM_SHIFT(szc) \ 2097c478bd9Sstevel@tonic-gate (hw_page_array[(szc)].hp_shift - hw_page_array[0].hp_shift) 2107c478bd9Sstevel@tonic-gate 2117c478bd9Sstevel@tonic-gate /* 2127c478bd9Sstevel@tonic-gate * The page_counters array below is used to keep track of free contiguous 2137c478bd9Sstevel@tonic-gate * physical memory. A hw_page_map_t will be allocated per mnode per szc. 2147c478bd9Sstevel@tonic-gate * This contains an array of counters, the size of the array, a shift value 2157c478bd9Sstevel@tonic-gate * used to convert a pagenum into a counter array index or vice versa, as 2167c478bd9Sstevel@tonic-gate * well as a cache of the last successful index to be promoted to a larger 2177c478bd9Sstevel@tonic-gate * page size. As an optimization, we keep track of the last successful index 2187c478bd9Sstevel@tonic-gate * to be promoted per page color for the given size region, and this is 2197c478bd9Sstevel@tonic-gate * allocated dynamically based upon the number of colors for a given 2207c478bd9Sstevel@tonic-gate * region size. 2217c478bd9Sstevel@tonic-gate * 2227c478bd9Sstevel@tonic-gate * Conceptually, the page counters are represented as: 2237c478bd9Sstevel@tonic-gate * 2247c478bd9Sstevel@tonic-gate * page_counters[region_size][mnode] 2257c478bd9Sstevel@tonic-gate * 2267c478bd9Sstevel@tonic-gate * region_size: size code of a candidate larger page made up 2277c478bd9Sstevel@tonic-gate * of contiguous free smaller pages. 2287c478bd9Sstevel@tonic-gate * 2297c478bd9Sstevel@tonic-gate * page_counters[region_size][mnode].hpm_counters[index]: 2307c478bd9Sstevel@tonic-gate * represents how many (region_size - 1) pages either 2317c478bd9Sstevel@tonic-gate * exist or can be created within the given index range. 2327c478bd9Sstevel@tonic-gate * 2337c478bd9Sstevel@tonic-gate * Let's look at a sparc example: 2347c478bd9Sstevel@tonic-gate * If we want to create a free 512k page, we look at region_size 2 2357c478bd9Sstevel@tonic-gate * for the mnode we want. We calculate the index and look at a specific 2367c478bd9Sstevel@tonic-gate * hpm_counters location. If we see 8 (FULL_REGION_CNT on sparc) at 2377c478bd9Sstevel@tonic-gate * this location, it means that 8 64k pages either exist or can be created 2387c478bd9Sstevel@tonic-gate * from 8K pages in order to make a single free 512k page at the given 2397c478bd9Sstevel@tonic-gate * index. Note that when a region is full, it will contribute to the 2407c478bd9Sstevel@tonic-gate * counts in the region above it. Thus we will not know what page 2417c478bd9Sstevel@tonic-gate * size the free pages will be which can be promoted to this new free 2427c478bd9Sstevel@tonic-gate * page unless we look at all regions below the current region. 2437c478bd9Sstevel@tonic-gate */ 2447c478bd9Sstevel@tonic-gate 2457c478bd9Sstevel@tonic-gate /* 2467c478bd9Sstevel@tonic-gate * Note: hpmctr_t is defined in platform vm_dep.h 2477c478bd9Sstevel@tonic-gate * hw_page_map_t contains all the information needed for the page_counters 2487c478bd9Sstevel@tonic-gate * logic. The fields are as follows: 2497c478bd9Sstevel@tonic-gate * 2507c478bd9Sstevel@tonic-gate * hpm_counters: dynamically allocated array to hold counter data 2517c478bd9Sstevel@tonic-gate * hpm_entries: entries in hpm_counters 2527c478bd9Sstevel@tonic-gate * hpm_shift: shift for pnum/array index conv 2537c478bd9Sstevel@tonic-gate * hpm_base: PFN mapped to counter index 0 2547c478bd9Sstevel@tonic-gate * hpm_color_current_len: # of elements in hpm_color_current "array" below 2557c478bd9Sstevel@tonic-gate * hpm_color_current: last index in counter array for this color at 2567c478bd9Sstevel@tonic-gate * which we successfully created a large page 2577c478bd9Sstevel@tonic-gate */ 2587c478bd9Sstevel@tonic-gate typedef struct hw_page_map { 2597c478bd9Sstevel@tonic-gate hpmctr_t *hpm_counters; 2607c478bd9Sstevel@tonic-gate size_t hpm_entries; 2617c478bd9Sstevel@tonic-gate int hpm_shift; 2627c478bd9Sstevel@tonic-gate pfn_t hpm_base; 2637c478bd9Sstevel@tonic-gate size_t hpm_color_current_len; 2647c478bd9Sstevel@tonic-gate size_t *hpm_color_current; 2657c478bd9Sstevel@tonic-gate } hw_page_map_t; 2667c478bd9Sstevel@tonic-gate 2677c478bd9Sstevel@tonic-gate /* 2687c478bd9Sstevel@tonic-gate * Element zero is not used, but is allocated for convenience. 2697c478bd9Sstevel@tonic-gate */ 2707c478bd9Sstevel@tonic-gate static hw_page_map_t *page_counters[MMU_PAGE_SIZES]; 2717c478bd9Sstevel@tonic-gate 2727c478bd9Sstevel@tonic-gate /* 2737c478bd9Sstevel@tonic-gate * The following macros are convenient ways to get access to the individual 2747c478bd9Sstevel@tonic-gate * elements of the page_counters arrays. They can be used on both 2757c478bd9Sstevel@tonic-gate * the left side and right side of equations. 2767c478bd9Sstevel@tonic-gate */ 2777c478bd9Sstevel@tonic-gate #define PAGE_COUNTERS(mnode, rg_szc, idx) \ 2787c478bd9Sstevel@tonic-gate (page_counters[(rg_szc)][(mnode)].hpm_counters[(idx)]) 2797c478bd9Sstevel@tonic-gate 2807c478bd9Sstevel@tonic-gate #define PAGE_COUNTERS_COUNTERS(mnode, rg_szc) \ 2817c478bd9Sstevel@tonic-gate (page_counters[(rg_szc)][(mnode)].hpm_counters) 2827c478bd9Sstevel@tonic-gate 2837c478bd9Sstevel@tonic-gate #define PAGE_COUNTERS_SHIFT(mnode, rg_szc) \ 2847c478bd9Sstevel@tonic-gate (page_counters[(rg_szc)][(mnode)].hpm_shift) 2857c478bd9Sstevel@tonic-gate 2867c478bd9Sstevel@tonic-gate #define PAGE_COUNTERS_ENTRIES(mnode, rg_szc) \ 2877c478bd9Sstevel@tonic-gate (page_counters[(rg_szc)][(mnode)].hpm_entries) 2887c478bd9Sstevel@tonic-gate 2897c478bd9Sstevel@tonic-gate #define PAGE_COUNTERS_BASE(mnode, rg_szc) \ 2907c478bd9Sstevel@tonic-gate (page_counters[(rg_szc)][(mnode)].hpm_base) 2917c478bd9Sstevel@tonic-gate 2927c478bd9Sstevel@tonic-gate #define PAGE_COUNTERS_CURRENT_COLOR_LEN(mnode, rg_szc) \ 2937c478bd9Sstevel@tonic-gate (page_counters[(rg_szc)][(mnode)].hpm_color_current_len) 2947c478bd9Sstevel@tonic-gate 2957c478bd9Sstevel@tonic-gate #define PAGE_COUNTERS_CURRENT_COLOR_ARRAY(mnode, rg_szc) \ 2967c478bd9Sstevel@tonic-gate (page_counters[(rg_szc)][(mnode)].hpm_color_current) 2977c478bd9Sstevel@tonic-gate 2987c478bd9Sstevel@tonic-gate #define PAGE_COUNTERS_CURRENT_COLOR(mnode, rg_szc, color) \ 2997c478bd9Sstevel@tonic-gate (page_counters[(rg_szc)][(mnode)].hpm_color_current[(color)]) 3007c478bd9Sstevel@tonic-gate 3017c478bd9Sstevel@tonic-gate #define PNUM_TO_IDX(mnode, rg_szc, pnum) \ 3027c478bd9Sstevel@tonic-gate (((pnum) - PAGE_COUNTERS_BASE((mnode), (rg_szc))) >> \ 3037c478bd9Sstevel@tonic-gate PAGE_COUNTERS_SHIFT((mnode), (rg_szc))) 3047c478bd9Sstevel@tonic-gate 3057c478bd9Sstevel@tonic-gate #define IDX_TO_PNUM(mnode, rg_szc, index) \ 3067c478bd9Sstevel@tonic-gate (PAGE_COUNTERS_BASE((mnode), (rg_szc)) + \ 3077c478bd9Sstevel@tonic-gate ((index) << PAGE_COUNTERS_SHIFT((mnode), (rg_szc)))) 3087c478bd9Sstevel@tonic-gate 3097c478bd9Sstevel@tonic-gate /* 3107c478bd9Sstevel@tonic-gate * Protects the hpm_counters and hpm_color_current memory from changing while 3117c478bd9Sstevel@tonic-gate * looking at page counters information. 3127c478bd9Sstevel@tonic-gate * Grab the write lock to modify what these fields point at. 3137c478bd9Sstevel@tonic-gate * Grab the read lock to prevent any pointers from changing. 3147c478bd9Sstevel@tonic-gate * The write lock can not be held during memory allocation due to a possible 3157c478bd9Sstevel@tonic-gate * recursion deadlock with trying to grab the read lock while the 3167c478bd9Sstevel@tonic-gate * write lock is already held. 3177c478bd9Sstevel@tonic-gate */ 3187c478bd9Sstevel@tonic-gate krwlock_t page_ctrs_rwlock[MAX_MEM_NODES]; 3197c478bd9Sstevel@tonic-gate 3207c478bd9Sstevel@tonic-gate /* 3217c478bd9Sstevel@tonic-gate * page size to page size code 3227c478bd9Sstevel@tonic-gate */ 3237c478bd9Sstevel@tonic-gate int 3247c478bd9Sstevel@tonic-gate page_szc(size_t pagesize) 3257c478bd9Sstevel@tonic-gate { 3267c478bd9Sstevel@tonic-gate int i = 0; 3277c478bd9Sstevel@tonic-gate 3287c478bd9Sstevel@tonic-gate while (hw_page_array[i].hp_size) { 3297c478bd9Sstevel@tonic-gate if (pagesize == hw_page_array[i].hp_size) 3307c478bd9Sstevel@tonic-gate return (i); 3317c478bd9Sstevel@tonic-gate i++; 3327c478bd9Sstevel@tonic-gate } 3337c478bd9Sstevel@tonic-gate return (-1); 3347c478bd9Sstevel@tonic-gate } 3357c478bd9Sstevel@tonic-gate 3367c478bd9Sstevel@tonic-gate /* 337*4abce959Smec * page size to page size code with the restriction that it be a supported 338*4abce959Smec * user page size. If it's not a supported user page size, -1 will be returned. 3397c478bd9Sstevel@tonic-gate */ 3407c478bd9Sstevel@tonic-gate int 341*4abce959Smec page_szc_user_filtered(size_t pagesize) 3427c478bd9Sstevel@tonic-gate { 3437c478bd9Sstevel@tonic-gate int szc = page_szc(pagesize); 344*4abce959Smec if ((szc != -1) && (SZC_2_USERSZC(szc) != -1)) { 345*4abce959Smec return (szc); 346*4abce959Smec } 3477c478bd9Sstevel@tonic-gate return (-1); 3487c478bd9Sstevel@tonic-gate } 3497c478bd9Sstevel@tonic-gate 3507c478bd9Sstevel@tonic-gate /* 3517c478bd9Sstevel@tonic-gate * Return how many page sizes are available for the user to use. This is 3527c478bd9Sstevel@tonic-gate * what the hardware supports and not based upon how the OS implements the 3537c478bd9Sstevel@tonic-gate * support of different page sizes. 3547c478bd9Sstevel@tonic-gate */ 3557c478bd9Sstevel@tonic-gate uint_t 3567c478bd9Sstevel@tonic-gate page_num_user_pagesizes(void) 3577c478bd9Sstevel@tonic-gate { 3587c478bd9Sstevel@tonic-gate return (mmu_exported_page_sizes); 3597c478bd9Sstevel@tonic-gate } 3607c478bd9Sstevel@tonic-gate 3617c478bd9Sstevel@tonic-gate uint_t 3627c478bd9Sstevel@tonic-gate page_num_pagesizes(void) 3637c478bd9Sstevel@tonic-gate { 3647c478bd9Sstevel@tonic-gate return (mmu_page_sizes); 3657c478bd9Sstevel@tonic-gate } 3667c478bd9Sstevel@tonic-gate 3677c478bd9Sstevel@tonic-gate /* 3687c478bd9Sstevel@tonic-gate * returns the count of the number of base pagesize pages associated with szc 3697c478bd9Sstevel@tonic-gate */ 3707c478bd9Sstevel@tonic-gate pgcnt_t 3717c478bd9Sstevel@tonic-gate page_get_pagecnt(uint_t szc) 3727c478bd9Sstevel@tonic-gate { 3737c478bd9Sstevel@tonic-gate if (szc >= mmu_page_sizes) 3747c478bd9Sstevel@tonic-gate panic("page_get_pagecnt: out of range %d", szc); 3757c478bd9Sstevel@tonic-gate return (hw_page_array[szc].hp_pgcnt); 3767c478bd9Sstevel@tonic-gate } 3777c478bd9Sstevel@tonic-gate 3787c478bd9Sstevel@tonic-gate size_t 3797c478bd9Sstevel@tonic-gate page_get_pagesize(uint_t szc) 3807c478bd9Sstevel@tonic-gate { 3817c478bd9Sstevel@tonic-gate if (szc >= mmu_page_sizes) 3827c478bd9Sstevel@tonic-gate panic("page_get_pagesize: out of range %d", szc); 3837c478bd9Sstevel@tonic-gate return (hw_page_array[szc].hp_size); 3847c478bd9Sstevel@tonic-gate } 3857c478bd9Sstevel@tonic-gate 3867c478bd9Sstevel@tonic-gate /* 3877c478bd9Sstevel@tonic-gate * Return the size of a page based upon the index passed in. An index of 3887c478bd9Sstevel@tonic-gate * zero refers to the smallest page size in the system, and as index increases 3897c478bd9Sstevel@tonic-gate * it refers to the next larger supported page size in the system. 3907c478bd9Sstevel@tonic-gate * Note that szc and userszc may not be the same due to unsupported szc's on 3917c478bd9Sstevel@tonic-gate * some systems. 3927c478bd9Sstevel@tonic-gate */ 3937c478bd9Sstevel@tonic-gate size_t 3947c478bd9Sstevel@tonic-gate page_get_user_pagesize(uint_t userszc) 3957c478bd9Sstevel@tonic-gate { 3967c478bd9Sstevel@tonic-gate uint_t szc = USERSZC_2_SZC(userszc); 3977c478bd9Sstevel@tonic-gate 3987c478bd9Sstevel@tonic-gate if (szc >= mmu_page_sizes) 3997c478bd9Sstevel@tonic-gate panic("page_get_user_pagesize: out of range %d", szc); 4007c478bd9Sstevel@tonic-gate return (hw_page_array[szc].hp_size); 4017c478bd9Sstevel@tonic-gate } 4027c478bd9Sstevel@tonic-gate 4037c478bd9Sstevel@tonic-gate uint_t 4047c478bd9Sstevel@tonic-gate page_get_shift(uint_t szc) 4057c478bd9Sstevel@tonic-gate { 4067c478bd9Sstevel@tonic-gate if (szc >= mmu_page_sizes) 4077c478bd9Sstevel@tonic-gate panic("page_get_shift: out of range %d", szc); 4087c478bd9Sstevel@tonic-gate return (hw_page_array[szc].hp_shift); 4097c478bd9Sstevel@tonic-gate } 4107c478bd9Sstevel@tonic-gate 4117c478bd9Sstevel@tonic-gate uint_t 4127c478bd9Sstevel@tonic-gate page_get_pagecolors(uint_t szc) 4137c478bd9Sstevel@tonic-gate { 4147c478bd9Sstevel@tonic-gate ASSERT(page_colors != 0); 4157c478bd9Sstevel@tonic-gate return (MAX(page_colors >> PAGE_BSZS_SHIFT(szc), 1)); 4167c478bd9Sstevel@tonic-gate } 4177c478bd9Sstevel@tonic-gate 4187c478bd9Sstevel@tonic-gate /* 4197c478bd9Sstevel@tonic-gate * Called by startup(). 4207c478bd9Sstevel@tonic-gate * Size up the per page size free list counters based on physmax 4217c478bd9Sstevel@tonic-gate * of each node and max_mem_nodes. 4227c478bd9Sstevel@tonic-gate */ 4237c478bd9Sstevel@tonic-gate size_t 4247c478bd9Sstevel@tonic-gate page_ctrs_sz(void) 4257c478bd9Sstevel@tonic-gate { 4267c478bd9Sstevel@tonic-gate int r; /* region size */ 4277c478bd9Sstevel@tonic-gate int mnode; 4287c478bd9Sstevel@tonic-gate uint_t ctrs_sz = 0; 4297c478bd9Sstevel@tonic-gate int i; 4307c478bd9Sstevel@tonic-gate pgcnt_t colors_per_szc[MMU_PAGE_SIZES]; 4317c478bd9Sstevel@tonic-gate 4327c478bd9Sstevel@tonic-gate /* 4337c478bd9Sstevel@tonic-gate * We need to determine how many page colors there are for each 4347c478bd9Sstevel@tonic-gate * page size in order to allocate memory for any color specific 4357c478bd9Sstevel@tonic-gate * arrays. 4367c478bd9Sstevel@tonic-gate */ 4377c478bd9Sstevel@tonic-gate colors_per_szc[0] = page_colors; 4387c478bd9Sstevel@tonic-gate for (i = 1; i < mmu_page_sizes; i++) { 4397c478bd9Sstevel@tonic-gate colors_per_szc[i] = 4407c478bd9Sstevel@tonic-gate page_convert_color(0, i, page_colors - 1) + 1; 4417c478bd9Sstevel@tonic-gate } 4427c478bd9Sstevel@tonic-gate 4437c478bd9Sstevel@tonic-gate for (mnode = 0; mnode < max_mem_nodes; mnode++) { 4447c478bd9Sstevel@tonic-gate 4457c478bd9Sstevel@tonic-gate pgcnt_t r_pgcnt; 4467c478bd9Sstevel@tonic-gate pfn_t r_base; 4477c478bd9Sstevel@tonic-gate pgcnt_t r_align; 4487c478bd9Sstevel@tonic-gate 4497c478bd9Sstevel@tonic-gate if (mem_node_config[mnode].exists == 0) 4507c478bd9Sstevel@tonic-gate continue; 4517c478bd9Sstevel@tonic-gate 4527c478bd9Sstevel@tonic-gate /* 4537c478bd9Sstevel@tonic-gate * determine size needed for page counter arrays with 4547c478bd9Sstevel@tonic-gate * base aligned to large page size. 4557c478bd9Sstevel@tonic-gate */ 4567c478bd9Sstevel@tonic-gate for (r = 1; r < mmu_page_sizes; r++) { 4577c478bd9Sstevel@tonic-gate /* add in space for hpm_counters */ 4587c478bd9Sstevel@tonic-gate r_align = page_get_pagecnt(r); 4597c478bd9Sstevel@tonic-gate r_base = mem_node_config[mnode].physbase; 4607c478bd9Sstevel@tonic-gate r_base &= ~(r_align - 1); 4617c478bd9Sstevel@tonic-gate r_pgcnt = howmany(mem_node_config[mnode].physmax - 4627c478bd9Sstevel@tonic-gate r_base, r_align); 4637c478bd9Sstevel@tonic-gate /* 4647c478bd9Sstevel@tonic-gate * Round up to always allocate on pointer sized 4657c478bd9Sstevel@tonic-gate * boundaries. 4667c478bd9Sstevel@tonic-gate */ 4677c478bd9Sstevel@tonic-gate ctrs_sz += P2ROUNDUP((r_pgcnt * sizeof (hpmctr_t)), 4687c478bd9Sstevel@tonic-gate sizeof (hpmctr_t *)); 4697c478bd9Sstevel@tonic-gate 4707c478bd9Sstevel@tonic-gate /* add in space for hpm_color_current */ 4717c478bd9Sstevel@tonic-gate ctrs_sz += (colors_per_szc[r] * 4727c478bd9Sstevel@tonic-gate sizeof (size_t)); 4737c478bd9Sstevel@tonic-gate } 4747c478bd9Sstevel@tonic-gate } 4757c478bd9Sstevel@tonic-gate 4767c478bd9Sstevel@tonic-gate for (r = 1; r < mmu_page_sizes; r++) { 4777c478bd9Sstevel@tonic-gate ctrs_sz += (max_mem_nodes * sizeof (hw_page_map_t)); 4787c478bd9Sstevel@tonic-gate 4797c478bd9Sstevel@tonic-gate /* add in space for page_ctrs_cands */ 4807c478bd9Sstevel@tonic-gate ctrs_sz += NPC_MUTEX * max_mem_nodes * (sizeof (pcc_info_t)); 4817c478bd9Sstevel@tonic-gate ctrs_sz += NPC_MUTEX * max_mem_nodes * colors_per_szc[r] * 4827c478bd9Sstevel@tonic-gate sizeof (pgcnt_t); 4837c478bd9Sstevel@tonic-gate } 4847c478bd9Sstevel@tonic-gate 4857c478bd9Sstevel@tonic-gate /* ctr_mutex */ 4867c478bd9Sstevel@tonic-gate ctrs_sz += (max_mem_nodes * NPC_MUTEX * sizeof (kmutex_t)); 4877c478bd9Sstevel@tonic-gate 4887c478bd9Sstevel@tonic-gate /* size for page list counts */ 4897c478bd9Sstevel@tonic-gate PLCNT_SZ(ctrs_sz); 4907c478bd9Sstevel@tonic-gate 4917c478bd9Sstevel@tonic-gate /* 4927c478bd9Sstevel@tonic-gate * add some slop for roundups. page_ctrs_alloc will roundup the start 4937c478bd9Sstevel@tonic-gate * address of the counters to ecache_alignsize boundary for every 4947c478bd9Sstevel@tonic-gate * memory node. 4957c478bd9Sstevel@tonic-gate */ 4967c478bd9Sstevel@tonic-gate return (ctrs_sz + max_mem_nodes * L2CACHE_ALIGN); 4977c478bd9Sstevel@tonic-gate } 4987c478bd9Sstevel@tonic-gate 4997c478bd9Sstevel@tonic-gate caddr_t 5007c478bd9Sstevel@tonic-gate page_ctrs_alloc(caddr_t alloc_base) 5017c478bd9Sstevel@tonic-gate { 5027c478bd9Sstevel@tonic-gate int mnode; 5037c478bd9Sstevel@tonic-gate int r; /* region size */ 5047c478bd9Sstevel@tonic-gate int i; 5057c478bd9Sstevel@tonic-gate pgcnt_t colors_per_szc[MMU_PAGE_SIZES]; 5067c478bd9Sstevel@tonic-gate 5077c478bd9Sstevel@tonic-gate /* 5087c478bd9Sstevel@tonic-gate * We need to determine how many page colors there are for each 5097c478bd9Sstevel@tonic-gate * page size in order to allocate memory for any color specific 5107c478bd9Sstevel@tonic-gate * arrays. 5117c478bd9Sstevel@tonic-gate */ 5127c478bd9Sstevel@tonic-gate colors_per_szc[0] = page_colors; 5137c478bd9Sstevel@tonic-gate for (i = 1; i < mmu_page_sizes; i++) { 5147c478bd9Sstevel@tonic-gate colors_per_szc[i] = 5157c478bd9Sstevel@tonic-gate page_convert_color(0, i, page_colors - 1) + 1; 5167c478bd9Sstevel@tonic-gate } 5177c478bd9Sstevel@tonic-gate 5187c478bd9Sstevel@tonic-gate for (r = 1; r < mmu_page_sizes; r++) { 5197c478bd9Sstevel@tonic-gate page_counters[r] = (hw_page_map_t *)alloc_base; 5207c478bd9Sstevel@tonic-gate alloc_base += (max_mem_nodes * sizeof (hw_page_map_t)); 5217c478bd9Sstevel@tonic-gate } 5227c478bd9Sstevel@tonic-gate 5237c478bd9Sstevel@tonic-gate /* page_ctrs_cands */ 5247c478bd9Sstevel@tonic-gate for (r = 1; r < mmu_page_sizes; r++) { 5257c478bd9Sstevel@tonic-gate for (i = 0; i < NPC_MUTEX; i++) { 5267c478bd9Sstevel@tonic-gate page_ctrs_cands[i][r] = (pcc_info_t *)alloc_base; 5277c478bd9Sstevel@tonic-gate alloc_base += max_mem_nodes * (sizeof (pcc_info_t)); 5287c478bd9Sstevel@tonic-gate 5297c478bd9Sstevel@tonic-gate } 5307c478bd9Sstevel@tonic-gate } 5317c478bd9Sstevel@tonic-gate 5327c478bd9Sstevel@tonic-gate /* page_ctrs_cands pcc_color_free array */ 5337c478bd9Sstevel@tonic-gate for (r = 1; r < mmu_page_sizes; r++) { 5347c478bd9Sstevel@tonic-gate for (i = 0; i < NPC_MUTEX; i++) { 5357c478bd9Sstevel@tonic-gate for (mnode = 0; mnode < max_mem_nodes; mnode++) { 5367c478bd9Sstevel@tonic-gate page_ctrs_cands[i][r][mnode].pcc_color_free_len 5377c478bd9Sstevel@tonic-gate = colors_per_szc[r]; 5387c478bd9Sstevel@tonic-gate page_ctrs_cands[i][r][mnode].pcc_color_free = 5397c478bd9Sstevel@tonic-gate (pgcnt_t *)alloc_base; 5407c478bd9Sstevel@tonic-gate alloc_base += colors_per_szc[r] * 5417c478bd9Sstevel@tonic-gate sizeof (pgcnt_t); 5427c478bd9Sstevel@tonic-gate } 5437c478bd9Sstevel@tonic-gate } 5447c478bd9Sstevel@tonic-gate } 5457c478bd9Sstevel@tonic-gate 5467c478bd9Sstevel@tonic-gate /* ctr_mutex */ 5477c478bd9Sstevel@tonic-gate for (i = 0; i < NPC_MUTEX; i++) { 5487c478bd9Sstevel@tonic-gate ctr_mutex[i] = (kmutex_t *)alloc_base; 5497c478bd9Sstevel@tonic-gate alloc_base += (max_mem_nodes * sizeof (kmutex_t)); 5507c478bd9Sstevel@tonic-gate } 5517c478bd9Sstevel@tonic-gate 5527c478bd9Sstevel@tonic-gate /* initialize page list counts */ 5537c478bd9Sstevel@tonic-gate PLCNT_INIT(alloc_base); 5547c478bd9Sstevel@tonic-gate 5557c478bd9Sstevel@tonic-gate for (mnode = 0; mnode < max_mem_nodes; mnode++) { 5567c478bd9Sstevel@tonic-gate 5577c478bd9Sstevel@tonic-gate pgcnt_t r_pgcnt; 5587c478bd9Sstevel@tonic-gate pfn_t r_base; 5597c478bd9Sstevel@tonic-gate pgcnt_t r_align; 5607c478bd9Sstevel@tonic-gate int r_shift; 5617c478bd9Sstevel@tonic-gate 5627c478bd9Sstevel@tonic-gate if (mem_node_config[mnode].exists == 0) 5637c478bd9Sstevel@tonic-gate continue; 5647c478bd9Sstevel@tonic-gate 5657c478bd9Sstevel@tonic-gate for (r = 1; r < mmu_page_sizes; r++) { 5667c478bd9Sstevel@tonic-gate /* 5677c478bd9Sstevel@tonic-gate * the page_counters base has to be aligned to the 5687c478bd9Sstevel@tonic-gate * page count of page size code r otherwise the counts 5697c478bd9Sstevel@tonic-gate * will cross large page boundaries. 5707c478bd9Sstevel@tonic-gate */ 5717c478bd9Sstevel@tonic-gate r_align = page_get_pagecnt(r); 5727c478bd9Sstevel@tonic-gate r_base = mem_node_config[mnode].physbase; 5737c478bd9Sstevel@tonic-gate /* base needs to be aligned - lower to aligned value */ 5747c478bd9Sstevel@tonic-gate r_base &= ~(r_align - 1); 5757c478bd9Sstevel@tonic-gate r_pgcnt = howmany(mem_node_config[mnode].physmax - 5767c478bd9Sstevel@tonic-gate r_base, r_align); 5777c478bd9Sstevel@tonic-gate r_shift = PAGE_BSZS_SHIFT(r); 5787c478bd9Sstevel@tonic-gate 5797c478bd9Sstevel@tonic-gate PAGE_COUNTERS_SHIFT(mnode, r) = r_shift; 5807c478bd9Sstevel@tonic-gate PAGE_COUNTERS_ENTRIES(mnode, r) = r_pgcnt; 5817c478bd9Sstevel@tonic-gate PAGE_COUNTERS_BASE(mnode, r) = r_base; 5827c478bd9Sstevel@tonic-gate PAGE_COUNTERS_CURRENT_COLOR_LEN(mnode, r) = 5837c478bd9Sstevel@tonic-gate colors_per_szc[r]; 5847c478bd9Sstevel@tonic-gate PAGE_COUNTERS_CURRENT_COLOR_ARRAY(mnode, r) = 5857c478bd9Sstevel@tonic-gate (size_t *)alloc_base; 5867c478bd9Sstevel@tonic-gate alloc_base += (sizeof (size_t) * colors_per_szc[r]); 5877c478bd9Sstevel@tonic-gate for (i = 0; i < colors_per_szc[r]; i++) { 5887c478bd9Sstevel@tonic-gate PAGE_COUNTERS_CURRENT_COLOR(mnode, r, i) = i; 5897c478bd9Sstevel@tonic-gate } 5907c478bd9Sstevel@tonic-gate PAGE_COUNTERS_COUNTERS(mnode, r) = 5917c478bd9Sstevel@tonic-gate (hpmctr_t *)alloc_base; 5927c478bd9Sstevel@tonic-gate /* 5937c478bd9Sstevel@tonic-gate * Round up to make alloc_base always be aligned on 5947c478bd9Sstevel@tonic-gate * a pointer boundary. 5957c478bd9Sstevel@tonic-gate */ 5967c478bd9Sstevel@tonic-gate alloc_base += P2ROUNDUP((sizeof (hpmctr_t) * r_pgcnt), 5977c478bd9Sstevel@tonic-gate sizeof (hpmctr_t *)); 5987c478bd9Sstevel@tonic-gate 5997c478bd9Sstevel@tonic-gate /* 6007c478bd9Sstevel@tonic-gate * Verify that PNUM_TO_IDX and IDX_TO_PNUM 6017c478bd9Sstevel@tonic-gate * satisfy the identity requirement. 6027c478bd9Sstevel@tonic-gate * We should be able to go from one to the other 6037c478bd9Sstevel@tonic-gate * and get consistent values. 6047c478bd9Sstevel@tonic-gate */ 6057c478bd9Sstevel@tonic-gate ASSERT(PNUM_TO_IDX(mnode, r, 6067c478bd9Sstevel@tonic-gate (IDX_TO_PNUM(mnode, r, 0))) == 0); 6077c478bd9Sstevel@tonic-gate ASSERT(IDX_TO_PNUM(mnode, r, 6087c478bd9Sstevel@tonic-gate (PNUM_TO_IDX(mnode, r, r_base))) == r_base); 6097c478bd9Sstevel@tonic-gate } 6107c478bd9Sstevel@tonic-gate /* 6117c478bd9Sstevel@tonic-gate * Roundup the start address of the page_counters to 6127c478bd9Sstevel@tonic-gate * cache aligned boundary for every memory node. 6137c478bd9Sstevel@tonic-gate * page_ctrs_sz() has added some slop for these roundups. 6147c478bd9Sstevel@tonic-gate */ 6157c478bd9Sstevel@tonic-gate alloc_base = (caddr_t)P2ROUNDUP((uintptr_t)alloc_base, 6167c478bd9Sstevel@tonic-gate L2CACHE_ALIGN); 6177c478bd9Sstevel@tonic-gate } 6187c478bd9Sstevel@tonic-gate 6197c478bd9Sstevel@tonic-gate /* Initialize other page counter specific data structures. */ 6207c478bd9Sstevel@tonic-gate for (mnode = 0; mnode < MAX_MEM_NODES; mnode++) { 6217c478bd9Sstevel@tonic-gate rw_init(&page_ctrs_rwlock[mnode], NULL, RW_DEFAULT, NULL); 6227c478bd9Sstevel@tonic-gate } 6237c478bd9Sstevel@tonic-gate 6247c478bd9Sstevel@tonic-gate return (alloc_base); 6257c478bd9Sstevel@tonic-gate } 6267c478bd9Sstevel@tonic-gate 6277c478bd9Sstevel@tonic-gate /* 6287c478bd9Sstevel@tonic-gate * Functions to adjust region counters for each size free list. 6297c478bd9Sstevel@tonic-gate * Caller is responsible to acquire the ctr_mutex lock if necessary and 6307c478bd9Sstevel@tonic-gate * thus can be called during startup without locks. 6317c478bd9Sstevel@tonic-gate */ 6327c478bd9Sstevel@tonic-gate /* ARGSUSED */ 6337c478bd9Sstevel@tonic-gate void 6347c478bd9Sstevel@tonic-gate page_ctr_add_internal(int mnode, page_t *pp, int flags) 6357c478bd9Sstevel@tonic-gate { 6367c478bd9Sstevel@tonic-gate ssize_t r; /* region size */ 6377c478bd9Sstevel@tonic-gate ssize_t idx; 6387c478bd9Sstevel@tonic-gate pfn_t pfnum; 6397c478bd9Sstevel@tonic-gate int lckidx; 6407c478bd9Sstevel@tonic-gate 6417c478bd9Sstevel@tonic-gate ASSERT(pp->p_szc < mmu_page_sizes); 6427c478bd9Sstevel@tonic-gate 6437c478bd9Sstevel@tonic-gate PLCNT_INCR(pp, mnode, pp->p_szc, flags); 6447c478bd9Sstevel@tonic-gate 6457c478bd9Sstevel@tonic-gate /* no counter update needed for largest page size */ 6467c478bd9Sstevel@tonic-gate if (pp->p_szc >= mmu_page_sizes - 1) { 6477c478bd9Sstevel@tonic-gate return; 6487c478bd9Sstevel@tonic-gate } 6497c478bd9Sstevel@tonic-gate 6507c478bd9Sstevel@tonic-gate r = pp->p_szc + 1; 6517c478bd9Sstevel@tonic-gate pfnum = pp->p_pagenum; 6527c478bd9Sstevel@tonic-gate lckidx = PP_CTR_LOCK_INDX(pp); 6537c478bd9Sstevel@tonic-gate 6547c478bd9Sstevel@tonic-gate /* 6557c478bd9Sstevel@tonic-gate * Increment the count of free pages for the current 6567c478bd9Sstevel@tonic-gate * region. Continue looping up in region size incrementing 6577c478bd9Sstevel@tonic-gate * count if the preceeding region is full. 6587c478bd9Sstevel@tonic-gate */ 6597c478bd9Sstevel@tonic-gate while (r < mmu_page_sizes) { 6607c478bd9Sstevel@tonic-gate idx = PNUM_TO_IDX(mnode, r, pfnum); 6617c478bd9Sstevel@tonic-gate 6627c478bd9Sstevel@tonic-gate ASSERT(idx < PAGE_COUNTERS_ENTRIES(mnode, r)); 6637c478bd9Sstevel@tonic-gate ASSERT(PAGE_COUNTERS(mnode, r, idx) < FULL_REGION_CNT(r)); 6647c478bd9Sstevel@tonic-gate 6657c478bd9Sstevel@tonic-gate if (++PAGE_COUNTERS(mnode, r, idx) != FULL_REGION_CNT(r)) 6667c478bd9Sstevel@tonic-gate break; 6677c478bd9Sstevel@tonic-gate 6687c478bd9Sstevel@tonic-gate page_ctrs_cands[lckidx][r][mnode].pcc_pages_free++; 6697c478bd9Sstevel@tonic-gate page_ctrs_cands[lckidx][r][mnode]. 6707c478bd9Sstevel@tonic-gate pcc_color_free[PP_2_BIN_SZC(pp, r)]++; 6717c478bd9Sstevel@tonic-gate r++; 6727c478bd9Sstevel@tonic-gate } 6737c478bd9Sstevel@tonic-gate } 6747c478bd9Sstevel@tonic-gate 6757c478bd9Sstevel@tonic-gate void 6767c478bd9Sstevel@tonic-gate page_ctr_add(page_t *pp, int flags) 6777c478bd9Sstevel@tonic-gate { 6787c478bd9Sstevel@tonic-gate int lckidx = PP_CTR_LOCK_INDX(pp); 6797c478bd9Sstevel@tonic-gate int mnode = PP_2_MEM_NODE(pp); 6807c478bd9Sstevel@tonic-gate kmutex_t *lock = &ctr_mutex[lckidx][mnode]; 6817c478bd9Sstevel@tonic-gate 6827c478bd9Sstevel@tonic-gate mutex_enter(lock); 6837c478bd9Sstevel@tonic-gate page_ctr_add_internal(mnode, pp, flags); 6847c478bd9Sstevel@tonic-gate mutex_exit(lock); 6857c478bd9Sstevel@tonic-gate } 6867c478bd9Sstevel@tonic-gate 6877c478bd9Sstevel@tonic-gate void 6887c478bd9Sstevel@tonic-gate page_ctr_sub(page_t *pp, int flags) 6897c478bd9Sstevel@tonic-gate { 6907c478bd9Sstevel@tonic-gate int lckidx; 6917c478bd9Sstevel@tonic-gate int mnode = PP_2_MEM_NODE(pp); 6927c478bd9Sstevel@tonic-gate kmutex_t *lock; 6937c478bd9Sstevel@tonic-gate ssize_t r; /* region size */ 6947c478bd9Sstevel@tonic-gate ssize_t idx; 6957c478bd9Sstevel@tonic-gate pfn_t pfnum; 6967c478bd9Sstevel@tonic-gate 6977c478bd9Sstevel@tonic-gate ASSERT(pp->p_szc < mmu_page_sizes); 6987c478bd9Sstevel@tonic-gate 6997c478bd9Sstevel@tonic-gate PLCNT_DECR(pp, mnode, pp->p_szc, flags); 7007c478bd9Sstevel@tonic-gate 7017c478bd9Sstevel@tonic-gate /* no counter update needed for largest page size */ 7027c478bd9Sstevel@tonic-gate if (pp->p_szc >= mmu_page_sizes - 1) { 7037c478bd9Sstevel@tonic-gate return; 7047c478bd9Sstevel@tonic-gate } 7057c478bd9Sstevel@tonic-gate 7067c478bd9Sstevel@tonic-gate r = pp->p_szc + 1; 7077c478bd9Sstevel@tonic-gate pfnum = pp->p_pagenum; 7087c478bd9Sstevel@tonic-gate lckidx = PP_CTR_LOCK_INDX(pp); 7097c478bd9Sstevel@tonic-gate lock = &ctr_mutex[lckidx][mnode]; 7107c478bd9Sstevel@tonic-gate 7117c478bd9Sstevel@tonic-gate /* 7127c478bd9Sstevel@tonic-gate * Decrement the count of free pages for the current 7137c478bd9Sstevel@tonic-gate * region. Continue looping up in region size decrementing 7147c478bd9Sstevel@tonic-gate * count if the preceeding region was full. 7157c478bd9Sstevel@tonic-gate */ 7167c478bd9Sstevel@tonic-gate mutex_enter(lock); 7177c478bd9Sstevel@tonic-gate while (r < mmu_page_sizes) { 7187c478bd9Sstevel@tonic-gate idx = PNUM_TO_IDX(mnode, r, pfnum); 7197c478bd9Sstevel@tonic-gate 7207c478bd9Sstevel@tonic-gate ASSERT(idx < PAGE_COUNTERS_ENTRIES(mnode, r)); 7217c478bd9Sstevel@tonic-gate ASSERT(PAGE_COUNTERS(mnode, r, idx) > 0); 7227c478bd9Sstevel@tonic-gate 7237c478bd9Sstevel@tonic-gate if (--PAGE_COUNTERS(mnode, r, idx) != FULL_REGION_CNT(r) - 1) { 7247c478bd9Sstevel@tonic-gate break; 7257c478bd9Sstevel@tonic-gate } 7267c478bd9Sstevel@tonic-gate ASSERT(page_ctrs_cands[lckidx][r][mnode].pcc_pages_free != 0); 7277c478bd9Sstevel@tonic-gate ASSERT(page_ctrs_cands[lckidx][r][mnode]. 7287c478bd9Sstevel@tonic-gate pcc_color_free[PP_2_BIN_SZC(pp, r)] != 0); 7297c478bd9Sstevel@tonic-gate 7307c478bd9Sstevel@tonic-gate page_ctrs_cands[lckidx][r][mnode].pcc_pages_free--; 7317c478bd9Sstevel@tonic-gate page_ctrs_cands[lckidx][r][mnode]. 7327c478bd9Sstevel@tonic-gate pcc_color_free[PP_2_BIN_SZC(pp, r)]--; 7337c478bd9Sstevel@tonic-gate r++; 7347c478bd9Sstevel@tonic-gate } 7357c478bd9Sstevel@tonic-gate mutex_exit(lock); 7367c478bd9Sstevel@tonic-gate } 7377c478bd9Sstevel@tonic-gate 7387c478bd9Sstevel@tonic-gate /* 7397c478bd9Sstevel@tonic-gate * Adjust page counters following a memory attach, since typically the 7407c478bd9Sstevel@tonic-gate * size of the array needs to change, and the PFN to counter index 7417c478bd9Sstevel@tonic-gate * mapping needs to change. 7427c478bd9Sstevel@tonic-gate */ 7437c478bd9Sstevel@tonic-gate uint_t 7447c478bd9Sstevel@tonic-gate page_ctrs_adjust(int mnode) 7457c478bd9Sstevel@tonic-gate { 7467c478bd9Sstevel@tonic-gate pgcnt_t npgs; 7477c478bd9Sstevel@tonic-gate int r; /* region size */ 7487c478bd9Sstevel@tonic-gate int i; 7497c478bd9Sstevel@tonic-gate size_t pcsz, old_csz; 7507c478bd9Sstevel@tonic-gate hpmctr_t *new_ctr, *old_ctr; 7517c478bd9Sstevel@tonic-gate pfn_t oldbase, newbase; 7527c478bd9Sstevel@tonic-gate size_t old_npgs; 7537c478bd9Sstevel@tonic-gate hpmctr_t *ctr_cache[MMU_PAGE_SIZES]; 7547c478bd9Sstevel@tonic-gate size_t size_cache[MMU_PAGE_SIZES]; 7557c478bd9Sstevel@tonic-gate size_t *color_cache[MMU_PAGE_SIZES]; 7567c478bd9Sstevel@tonic-gate size_t *old_color_array; 7577c478bd9Sstevel@tonic-gate pgcnt_t colors_per_szc[MMU_PAGE_SIZES]; 7587c478bd9Sstevel@tonic-gate 7597c478bd9Sstevel@tonic-gate newbase = mem_node_config[mnode].physbase & ~PC_BASE_ALIGN_MASK; 7607c478bd9Sstevel@tonic-gate npgs = roundup(mem_node_config[mnode].physmax, 7617c478bd9Sstevel@tonic-gate PC_BASE_ALIGN) - newbase; 7627c478bd9Sstevel@tonic-gate 7637c478bd9Sstevel@tonic-gate /* 7647c478bd9Sstevel@tonic-gate * We need to determine how many page colors there are for each 7657c478bd9Sstevel@tonic-gate * page size in order to allocate memory for any color specific 7667c478bd9Sstevel@tonic-gate * arrays. 7677c478bd9Sstevel@tonic-gate */ 7687c478bd9Sstevel@tonic-gate colors_per_szc[0] = page_colors; 7697c478bd9Sstevel@tonic-gate for (r = 1; r < mmu_page_sizes; r++) { 7707c478bd9Sstevel@tonic-gate colors_per_szc[r] = 7717c478bd9Sstevel@tonic-gate page_convert_color(0, r, page_colors - 1) + 1; 7727c478bd9Sstevel@tonic-gate } 7737c478bd9Sstevel@tonic-gate 7747c478bd9Sstevel@tonic-gate /* 7757c478bd9Sstevel@tonic-gate * Preallocate all of the new hpm_counters arrays as we can't 7767c478bd9Sstevel@tonic-gate * hold the page_ctrs_rwlock as a writer and allocate memory. 7777c478bd9Sstevel@tonic-gate * If we can't allocate all of the arrays, undo our work so far 7787c478bd9Sstevel@tonic-gate * and return failure. 7797c478bd9Sstevel@tonic-gate */ 7807c478bd9Sstevel@tonic-gate for (r = 1; r < mmu_page_sizes; r++) { 7817c478bd9Sstevel@tonic-gate pcsz = npgs >> PAGE_BSZS_SHIFT(r); 7827c478bd9Sstevel@tonic-gate 7837c478bd9Sstevel@tonic-gate ctr_cache[r] = kmem_zalloc(pcsz * 7847c478bd9Sstevel@tonic-gate sizeof (hpmctr_t), KM_NOSLEEP); 7857c478bd9Sstevel@tonic-gate if (ctr_cache[r] == NULL) { 7867c478bd9Sstevel@tonic-gate while (--r >= 1) { 7877c478bd9Sstevel@tonic-gate kmem_free(ctr_cache[r], 7887c478bd9Sstevel@tonic-gate size_cache[r] * sizeof (hpmctr_t)); 7897c478bd9Sstevel@tonic-gate } 7907c478bd9Sstevel@tonic-gate return (ENOMEM); 7917c478bd9Sstevel@tonic-gate } 7927c478bd9Sstevel@tonic-gate size_cache[r] = pcsz; 7937c478bd9Sstevel@tonic-gate } 7947c478bd9Sstevel@tonic-gate /* 7957c478bd9Sstevel@tonic-gate * Preallocate all of the new color current arrays as we can't 7967c478bd9Sstevel@tonic-gate * hold the page_ctrs_rwlock as a writer and allocate memory. 7977c478bd9Sstevel@tonic-gate * If we can't allocate all of the arrays, undo our work so far 7987c478bd9Sstevel@tonic-gate * and return failure. 7997c478bd9Sstevel@tonic-gate */ 8007c478bd9Sstevel@tonic-gate for (r = 1; r < mmu_page_sizes; r++) { 8017c478bd9Sstevel@tonic-gate color_cache[r] = kmem_zalloc(sizeof (size_t) * 8027c478bd9Sstevel@tonic-gate colors_per_szc[r], KM_NOSLEEP); 8037c478bd9Sstevel@tonic-gate if (color_cache[r] == NULL) { 8047c478bd9Sstevel@tonic-gate while (--r >= 1) { 8057c478bd9Sstevel@tonic-gate kmem_free(color_cache[r], 8067c478bd9Sstevel@tonic-gate colors_per_szc[r] * sizeof (size_t)); 8077c478bd9Sstevel@tonic-gate } 8087c478bd9Sstevel@tonic-gate for (r = 1; r < mmu_page_sizes; r++) { 8097c478bd9Sstevel@tonic-gate kmem_free(ctr_cache[r], 8107c478bd9Sstevel@tonic-gate size_cache[r] * sizeof (hpmctr_t)); 8117c478bd9Sstevel@tonic-gate } 8127c478bd9Sstevel@tonic-gate return (ENOMEM); 8137c478bd9Sstevel@tonic-gate } 8147c478bd9Sstevel@tonic-gate } 8157c478bd9Sstevel@tonic-gate 8167c478bd9Sstevel@tonic-gate /* 8177c478bd9Sstevel@tonic-gate * Grab the write lock to prevent others from walking these arrays 8187c478bd9Sstevel@tonic-gate * while we are modifying them. 8197c478bd9Sstevel@tonic-gate */ 8207c478bd9Sstevel@tonic-gate rw_enter(&page_ctrs_rwlock[mnode], RW_WRITER); 8217c478bd9Sstevel@tonic-gate page_freelist_lock(mnode); 8227c478bd9Sstevel@tonic-gate for (r = 1; r < mmu_page_sizes; r++) { 8237c478bd9Sstevel@tonic-gate PAGE_COUNTERS_SHIFT(mnode, r) = PAGE_BSZS_SHIFT(r); 8247c478bd9Sstevel@tonic-gate old_ctr = PAGE_COUNTERS_COUNTERS(mnode, r); 8257c478bd9Sstevel@tonic-gate old_csz = PAGE_COUNTERS_ENTRIES(mnode, r); 8267c478bd9Sstevel@tonic-gate oldbase = PAGE_COUNTERS_BASE(mnode, r); 8277c478bd9Sstevel@tonic-gate old_npgs = old_csz << PAGE_COUNTERS_SHIFT(mnode, r); 8287c478bd9Sstevel@tonic-gate old_color_array = PAGE_COUNTERS_CURRENT_COLOR_ARRAY(mnode, r); 8297c478bd9Sstevel@tonic-gate 8307c478bd9Sstevel@tonic-gate pcsz = npgs >> PAGE_COUNTERS_SHIFT(mnode, r); 8317c478bd9Sstevel@tonic-gate new_ctr = ctr_cache[r]; 8327c478bd9Sstevel@tonic-gate ctr_cache[r] = NULL; 8337c478bd9Sstevel@tonic-gate if (old_ctr != NULL && 8347c478bd9Sstevel@tonic-gate (oldbase + old_npgs > newbase) && 8357c478bd9Sstevel@tonic-gate (newbase + npgs > oldbase)) { 8367c478bd9Sstevel@tonic-gate /* 8377c478bd9Sstevel@tonic-gate * Map the intersection of the old and new 8387c478bd9Sstevel@tonic-gate * counters into the new array. 8397c478bd9Sstevel@tonic-gate */ 8407c478bd9Sstevel@tonic-gate size_t offset; 8417c478bd9Sstevel@tonic-gate if (newbase > oldbase) { 8427c478bd9Sstevel@tonic-gate offset = (newbase - oldbase) >> 8437c478bd9Sstevel@tonic-gate PAGE_COUNTERS_SHIFT(mnode, r); 8447c478bd9Sstevel@tonic-gate bcopy(old_ctr + offset, new_ctr, 8457c478bd9Sstevel@tonic-gate MIN(pcsz, (old_csz - offset)) * 8467c478bd9Sstevel@tonic-gate sizeof (hpmctr_t)); 8477c478bd9Sstevel@tonic-gate } else { 8487c478bd9Sstevel@tonic-gate offset = (oldbase - newbase) >> 8497c478bd9Sstevel@tonic-gate PAGE_COUNTERS_SHIFT(mnode, r); 8507c478bd9Sstevel@tonic-gate bcopy(old_ctr, new_ctr + offset, 8517c478bd9Sstevel@tonic-gate MIN(pcsz - offset, old_csz) * 8527c478bd9Sstevel@tonic-gate sizeof (hpmctr_t)); 8537c478bd9Sstevel@tonic-gate } 8547c478bd9Sstevel@tonic-gate } 8557c478bd9Sstevel@tonic-gate 8567c478bd9Sstevel@tonic-gate PAGE_COUNTERS_COUNTERS(mnode, r) = new_ctr; 8577c478bd9Sstevel@tonic-gate PAGE_COUNTERS_ENTRIES(mnode, r) = pcsz; 8587c478bd9Sstevel@tonic-gate PAGE_COUNTERS_BASE(mnode, r) = newbase; 8597c478bd9Sstevel@tonic-gate PAGE_COUNTERS_CURRENT_COLOR_LEN(mnode, r) = colors_per_szc[r]; 8607c478bd9Sstevel@tonic-gate PAGE_COUNTERS_CURRENT_COLOR_ARRAY(mnode, r) = color_cache[r]; 8617c478bd9Sstevel@tonic-gate color_cache[r] = NULL; 8627c478bd9Sstevel@tonic-gate /* 8637c478bd9Sstevel@tonic-gate * for now, just reset on these events as it's probably 8647c478bd9Sstevel@tonic-gate * not worthwhile to try and optimize this. 8657c478bd9Sstevel@tonic-gate */ 8667c478bd9Sstevel@tonic-gate for (i = 0; i < colors_per_szc[r]; i++) { 8677c478bd9Sstevel@tonic-gate PAGE_COUNTERS_CURRENT_COLOR(mnode, r, i) = i; 8687c478bd9Sstevel@tonic-gate } 8697c478bd9Sstevel@tonic-gate 8707c478bd9Sstevel@tonic-gate /* cache info for freeing out of the critical path */ 8717c478bd9Sstevel@tonic-gate if ((caddr_t)old_ctr >= kernelheap && 8727c478bd9Sstevel@tonic-gate (caddr_t)old_ctr < ekernelheap) { 8737c478bd9Sstevel@tonic-gate ctr_cache[r] = old_ctr; 8747c478bd9Sstevel@tonic-gate size_cache[r] = old_csz; 8757c478bd9Sstevel@tonic-gate } 8767c478bd9Sstevel@tonic-gate if ((caddr_t)old_color_array >= kernelheap && 8777c478bd9Sstevel@tonic-gate (caddr_t)old_color_array < ekernelheap) { 8787c478bd9Sstevel@tonic-gate color_cache[r] = old_color_array; 8797c478bd9Sstevel@tonic-gate } 8807c478bd9Sstevel@tonic-gate /* 8817c478bd9Sstevel@tonic-gate * Verify that PNUM_TO_IDX and IDX_TO_PNUM 8827c478bd9Sstevel@tonic-gate * satisfy the identity requirement. 8837c478bd9Sstevel@tonic-gate * We should be able to go from one to the other 8847c478bd9Sstevel@tonic-gate * and get consistent values. 8857c478bd9Sstevel@tonic-gate */ 8867c478bd9Sstevel@tonic-gate ASSERT(PNUM_TO_IDX(mnode, r, 8877c478bd9Sstevel@tonic-gate (IDX_TO_PNUM(mnode, r, 0))) == 0); 8887c478bd9Sstevel@tonic-gate ASSERT(IDX_TO_PNUM(mnode, r, 8897c478bd9Sstevel@tonic-gate (PNUM_TO_IDX(mnode, r, newbase))) == newbase); 8907c478bd9Sstevel@tonic-gate } 8917c478bd9Sstevel@tonic-gate page_freelist_unlock(mnode); 8927c478bd9Sstevel@tonic-gate rw_exit(&page_ctrs_rwlock[mnode]); 8937c478bd9Sstevel@tonic-gate 8947c478bd9Sstevel@tonic-gate /* 8957c478bd9Sstevel@tonic-gate * Now that we have dropped the write lock, it is safe to free all 8967c478bd9Sstevel@tonic-gate * of the memory we have cached above. 8977c478bd9Sstevel@tonic-gate */ 8987c478bd9Sstevel@tonic-gate for (r = 1; r < mmu_page_sizes; r++) { 8997c478bd9Sstevel@tonic-gate if (ctr_cache[r] != NULL) { 9007c478bd9Sstevel@tonic-gate kmem_free(ctr_cache[r], 9017c478bd9Sstevel@tonic-gate size_cache[r] * sizeof (hpmctr_t)); 9027c478bd9Sstevel@tonic-gate } 9037c478bd9Sstevel@tonic-gate if (color_cache[r] != NULL) { 9047c478bd9Sstevel@tonic-gate kmem_free(color_cache[r], 9057c478bd9Sstevel@tonic-gate colors_per_szc[r] * sizeof (size_t)); 9067c478bd9Sstevel@tonic-gate } 9077c478bd9Sstevel@tonic-gate } 9087c478bd9Sstevel@tonic-gate return (0); 9097c478bd9Sstevel@tonic-gate } 9107c478bd9Sstevel@tonic-gate 9117c478bd9Sstevel@tonic-gate /* 9127c478bd9Sstevel@tonic-gate * color contains a valid color index or bin for cur_szc 9137c478bd9Sstevel@tonic-gate */ 9147c478bd9Sstevel@tonic-gate uint_t 9157c478bd9Sstevel@tonic-gate page_convert_color(uchar_t cur_szc, uchar_t new_szc, uint_t color) 9167c478bd9Sstevel@tonic-gate { 9177c478bd9Sstevel@tonic-gate uint_t shift; 9187c478bd9Sstevel@tonic-gate 9197c478bd9Sstevel@tonic-gate if (cur_szc > new_szc) { 9207c478bd9Sstevel@tonic-gate shift = page_get_shift(cur_szc) - page_get_shift(new_szc); 9217c478bd9Sstevel@tonic-gate return (color << shift); 9227c478bd9Sstevel@tonic-gate } else if (cur_szc < new_szc) { 9237c478bd9Sstevel@tonic-gate shift = page_get_shift(new_szc) - page_get_shift(cur_szc); 9247c478bd9Sstevel@tonic-gate return (color >> shift); 9257c478bd9Sstevel@tonic-gate } 9267c478bd9Sstevel@tonic-gate return (color); 9277c478bd9Sstevel@tonic-gate } 9287c478bd9Sstevel@tonic-gate 9297c478bd9Sstevel@tonic-gate #ifdef DEBUG 9307c478bd9Sstevel@tonic-gate 9317c478bd9Sstevel@tonic-gate /* 9327c478bd9Sstevel@tonic-gate * confirm pp is a large page corresponding to szc 9337c478bd9Sstevel@tonic-gate */ 9347c478bd9Sstevel@tonic-gate void 9357c478bd9Sstevel@tonic-gate chk_lpg(page_t *pp, uchar_t szc) 9367c478bd9Sstevel@tonic-gate { 9377c478bd9Sstevel@tonic-gate spgcnt_t npgs = page_get_pagecnt(pp->p_szc); 9387c478bd9Sstevel@tonic-gate uint_t noreloc; 9397c478bd9Sstevel@tonic-gate 9407c478bd9Sstevel@tonic-gate if (npgs == 1) { 9417c478bd9Sstevel@tonic-gate ASSERT(pp->p_szc == 0); 9427c478bd9Sstevel@tonic-gate ASSERT(pp->p_next == pp); 9437c478bd9Sstevel@tonic-gate ASSERT(pp->p_prev == pp); 9447c478bd9Sstevel@tonic-gate return; 9457c478bd9Sstevel@tonic-gate } 9467c478bd9Sstevel@tonic-gate 9477c478bd9Sstevel@tonic-gate ASSERT(pp->p_vpnext == pp || pp->p_vpnext == NULL); 9487c478bd9Sstevel@tonic-gate ASSERT(pp->p_vpprev == pp || pp->p_vpprev == NULL); 9497c478bd9Sstevel@tonic-gate 9507c478bd9Sstevel@tonic-gate ASSERT(IS_P2ALIGNED(pp->p_pagenum, npgs)); 9517c478bd9Sstevel@tonic-gate ASSERT(pp->p_pagenum == (pp->p_next->p_pagenum - 1)); 9527c478bd9Sstevel@tonic-gate ASSERT(pp->p_prev->p_pagenum == (pp->p_pagenum + (npgs - 1))); 9537c478bd9Sstevel@tonic-gate ASSERT(pp->p_prev == (pp + (npgs - 1))); 9547c478bd9Sstevel@tonic-gate 9557c478bd9Sstevel@tonic-gate /* 9567c478bd9Sstevel@tonic-gate * Check list of pages. 9577c478bd9Sstevel@tonic-gate */ 9587c478bd9Sstevel@tonic-gate noreloc = PP_ISNORELOC(pp); 9597c478bd9Sstevel@tonic-gate while (npgs--) { 9607c478bd9Sstevel@tonic-gate if (npgs != 0) { 9617c478bd9Sstevel@tonic-gate ASSERT(pp->p_pagenum == pp->p_next->p_pagenum - 1); 9627c478bd9Sstevel@tonic-gate ASSERT(pp->p_next == (pp + 1)); 9637c478bd9Sstevel@tonic-gate } 9647c478bd9Sstevel@tonic-gate ASSERT(pp->p_szc == szc); 9657c478bd9Sstevel@tonic-gate ASSERT(PP_ISFREE(pp)); 9667c478bd9Sstevel@tonic-gate ASSERT(PP_ISAGED(pp)); 9677c478bd9Sstevel@tonic-gate ASSERT(pp->p_vpnext == pp || pp->p_vpnext == NULL); 9687c478bd9Sstevel@tonic-gate ASSERT(pp->p_vpprev == pp || pp->p_vpprev == NULL); 9697c478bd9Sstevel@tonic-gate ASSERT(pp->p_vnode == NULL); 9707c478bd9Sstevel@tonic-gate ASSERT(PP_ISNORELOC(pp) == noreloc); 9717c478bd9Sstevel@tonic-gate 9727c478bd9Sstevel@tonic-gate pp = pp->p_next; 9737c478bd9Sstevel@tonic-gate } 9747c478bd9Sstevel@tonic-gate } 9757c478bd9Sstevel@tonic-gate #endif /* DEBUG */ 9767c478bd9Sstevel@tonic-gate 9777c478bd9Sstevel@tonic-gate void 9787c478bd9Sstevel@tonic-gate page_freelist_lock(int mnode) 9797c478bd9Sstevel@tonic-gate { 9807c478bd9Sstevel@tonic-gate int i; 9817c478bd9Sstevel@tonic-gate for (i = 0; i < NPC_MUTEX; i++) { 9827c478bd9Sstevel@tonic-gate mutex_enter(FPC_MUTEX(mnode, i)); 9837c478bd9Sstevel@tonic-gate mutex_enter(CPC_MUTEX(mnode, i)); 9847c478bd9Sstevel@tonic-gate } 9857c478bd9Sstevel@tonic-gate } 9867c478bd9Sstevel@tonic-gate 9877c478bd9Sstevel@tonic-gate void 9887c478bd9Sstevel@tonic-gate page_freelist_unlock(int mnode) 9897c478bd9Sstevel@tonic-gate { 9907c478bd9Sstevel@tonic-gate int i; 9917c478bd9Sstevel@tonic-gate for (i = 0; i < NPC_MUTEX; i++) { 9927c478bd9Sstevel@tonic-gate mutex_exit(FPC_MUTEX(mnode, i)); 9937c478bd9Sstevel@tonic-gate mutex_exit(CPC_MUTEX(mnode, i)); 9947c478bd9Sstevel@tonic-gate } 9957c478bd9Sstevel@tonic-gate } 9967c478bd9Sstevel@tonic-gate 9977c478bd9Sstevel@tonic-gate /* 9987c478bd9Sstevel@tonic-gate * add pp to the specified page list. Defaults to head of the page list 9997c478bd9Sstevel@tonic-gate * unless PG_LIST_TAIL is specified. 10007c478bd9Sstevel@tonic-gate */ 10017c478bd9Sstevel@tonic-gate void 10027c478bd9Sstevel@tonic-gate page_list_add(page_t *pp, int flags) 10037c478bd9Sstevel@tonic-gate { 10047c478bd9Sstevel@tonic-gate page_t **ppp; 10057c478bd9Sstevel@tonic-gate kmutex_t *pcm; 10067c478bd9Sstevel@tonic-gate uint_t bin, mtype; 10077c478bd9Sstevel@tonic-gate int mnode; 10087c478bd9Sstevel@tonic-gate 10097c478bd9Sstevel@tonic-gate ASSERT(PAGE_EXCL(pp) || (flags & PG_LIST_ISINIT)); 10107c478bd9Sstevel@tonic-gate ASSERT(PP_ISFREE(pp)); 10117c478bd9Sstevel@tonic-gate ASSERT(!hat_page_is_mapped(pp)); 10127c478bd9Sstevel@tonic-gate ASSERT(hat_page_getshare(pp) == 0); 10137c478bd9Sstevel@tonic-gate 10147c478bd9Sstevel@tonic-gate /* 10157c478bd9Sstevel@tonic-gate * Large pages should be freed via page_list_add_pages(). 10167c478bd9Sstevel@tonic-gate */ 10177c478bd9Sstevel@tonic-gate ASSERT(pp->p_szc == 0); 10187c478bd9Sstevel@tonic-gate 10197c478bd9Sstevel@tonic-gate /* 10207c478bd9Sstevel@tonic-gate * Don't need to lock the freelist first here 10217c478bd9Sstevel@tonic-gate * because the page isn't on the freelist yet. 10227c478bd9Sstevel@tonic-gate * This means p_szc can't change on us. 10237c478bd9Sstevel@tonic-gate */ 10247c478bd9Sstevel@tonic-gate 10257c478bd9Sstevel@tonic-gate bin = PP_2_BIN(pp); 10267c478bd9Sstevel@tonic-gate mnode = PP_2_MEM_NODE(pp); 10277c478bd9Sstevel@tonic-gate mtype = PP_2_MTYPE(pp); 10287c478bd9Sstevel@tonic-gate 10297c478bd9Sstevel@tonic-gate if (flags & PG_LIST_ISINIT) { 10307c478bd9Sstevel@tonic-gate /* 10317c478bd9Sstevel@tonic-gate * PG_LIST_ISINIT is set during system startup (ie. single 10327c478bd9Sstevel@tonic-gate * threaded), add a page to the free list and add to the 10337c478bd9Sstevel@tonic-gate * the free region counters w/o any locking 10347c478bd9Sstevel@tonic-gate */ 10357c478bd9Sstevel@tonic-gate ppp = &PAGE_FREELISTS(mnode, 0, bin, mtype); 10367c478bd9Sstevel@tonic-gate 10377c478bd9Sstevel@tonic-gate /* inline version of page_add() */ 10387c478bd9Sstevel@tonic-gate if (*ppp != NULL) { 10397c478bd9Sstevel@tonic-gate pp->p_next = *ppp; 10407c478bd9Sstevel@tonic-gate pp->p_prev = (*ppp)->p_prev; 10417c478bd9Sstevel@tonic-gate (*ppp)->p_prev = pp; 10427c478bd9Sstevel@tonic-gate pp->p_prev->p_next = pp; 10437c478bd9Sstevel@tonic-gate } else 10447c478bd9Sstevel@tonic-gate *ppp = pp; 10457c478bd9Sstevel@tonic-gate 10467c478bd9Sstevel@tonic-gate page_ctr_add_internal(mnode, pp, flags); 10477c478bd9Sstevel@tonic-gate } else { 10487c478bd9Sstevel@tonic-gate pcm = PC_BIN_MUTEX(mnode, bin, flags); 10497c478bd9Sstevel@tonic-gate 10507c478bd9Sstevel@tonic-gate if (flags & PG_FREE_LIST) { 10517c478bd9Sstevel@tonic-gate ASSERT(PP_ISAGED(pp)); 10527c478bd9Sstevel@tonic-gate ppp = &PAGE_FREELISTS(mnode, 0, bin, mtype); 10537c478bd9Sstevel@tonic-gate 10547c478bd9Sstevel@tonic-gate } else { 10557c478bd9Sstevel@tonic-gate ASSERT(pp->p_vnode); 10567c478bd9Sstevel@tonic-gate ASSERT((pp->p_offset & PAGEOFFSET) == 0); 10577c478bd9Sstevel@tonic-gate ppp = &PAGE_CACHELISTS(mnode, bin, mtype); 10587c478bd9Sstevel@tonic-gate } 10597c478bd9Sstevel@tonic-gate mutex_enter(pcm); 10607c478bd9Sstevel@tonic-gate page_add(ppp, pp); 10617c478bd9Sstevel@tonic-gate 10627c478bd9Sstevel@tonic-gate if (flags & PG_LIST_TAIL) 10637c478bd9Sstevel@tonic-gate *ppp = (*ppp)->p_next; 10647c478bd9Sstevel@tonic-gate /* 10657c478bd9Sstevel@tonic-gate * Add counters before releasing pcm mutex to avoid a race with 10667c478bd9Sstevel@tonic-gate * page_freelist_coalesce and page_freelist_fill. 10677c478bd9Sstevel@tonic-gate */ 10687c478bd9Sstevel@tonic-gate page_ctr_add(pp, flags); 10697c478bd9Sstevel@tonic-gate mutex_exit(pcm); 10707c478bd9Sstevel@tonic-gate } 10717c478bd9Sstevel@tonic-gate 10727c478bd9Sstevel@tonic-gate 10737c478bd9Sstevel@tonic-gate #if defined(__sparc) 10747c478bd9Sstevel@tonic-gate if (PP_ISNORELOC(pp)) { 10757c478bd9Sstevel@tonic-gate kcage_freemem_add(1); 10767c478bd9Sstevel@tonic-gate } 10777c478bd9Sstevel@tonic-gate #endif 10787c478bd9Sstevel@tonic-gate /* 10797c478bd9Sstevel@tonic-gate * It is up to the caller to unlock the page! 10807c478bd9Sstevel@tonic-gate */ 10817c478bd9Sstevel@tonic-gate ASSERT(PAGE_EXCL(pp) || (flags & PG_LIST_ISINIT)); 10827c478bd9Sstevel@tonic-gate } 10837c478bd9Sstevel@tonic-gate 10847c478bd9Sstevel@tonic-gate 10857c478bd9Sstevel@tonic-gate #ifdef __sparc 10867c478bd9Sstevel@tonic-gate /* 10877c478bd9Sstevel@tonic-gate * This routine is only used by kcage_init during system startup. 10887c478bd9Sstevel@tonic-gate * It performs the function of page_list_sub/PP_SETNORELOC/page_list_add 10897c478bd9Sstevel@tonic-gate * without the overhead of taking locks and updating counters. 10907c478bd9Sstevel@tonic-gate */ 10917c478bd9Sstevel@tonic-gate void 10927c478bd9Sstevel@tonic-gate page_list_noreloc_startup(page_t *pp) 10937c478bd9Sstevel@tonic-gate { 10947c478bd9Sstevel@tonic-gate page_t **ppp; 10957c478bd9Sstevel@tonic-gate uint_t bin; 10967c478bd9Sstevel@tonic-gate int mnode; 10977c478bd9Sstevel@tonic-gate int mtype; 10987c478bd9Sstevel@tonic-gate int flags = PG_LIST_ISCAGE; 10997c478bd9Sstevel@tonic-gate 11007c478bd9Sstevel@tonic-gate /* 11017c478bd9Sstevel@tonic-gate * If this is a large page on the freelist then 11027c478bd9Sstevel@tonic-gate * break it up into smaller pages. 11037c478bd9Sstevel@tonic-gate */ 11047c478bd9Sstevel@tonic-gate if (pp->p_szc != 0) 11057c478bd9Sstevel@tonic-gate page_boot_demote(pp); 11067c478bd9Sstevel@tonic-gate 11077c478bd9Sstevel@tonic-gate /* 11087c478bd9Sstevel@tonic-gate * Get list page is currently on. 11097c478bd9Sstevel@tonic-gate */ 11107c478bd9Sstevel@tonic-gate bin = PP_2_BIN(pp); 11117c478bd9Sstevel@tonic-gate mnode = PP_2_MEM_NODE(pp); 11127c478bd9Sstevel@tonic-gate mtype = PP_2_MTYPE(pp); 11137c478bd9Sstevel@tonic-gate ASSERT(mtype == MTYPE_RELOC); 11147c478bd9Sstevel@tonic-gate ASSERT(pp->p_szc == 0); 11157c478bd9Sstevel@tonic-gate 11167c478bd9Sstevel@tonic-gate if (PP_ISAGED(pp)) { 11177c478bd9Sstevel@tonic-gate ppp = &PAGE_FREELISTS(mnode, 0, bin, mtype); 11187c478bd9Sstevel@tonic-gate flags |= PG_FREE_LIST; 11197c478bd9Sstevel@tonic-gate } else { 11207c478bd9Sstevel@tonic-gate ppp = &PAGE_CACHELISTS(mnode, bin, mtype); 11217c478bd9Sstevel@tonic-gate flags |= PG_CACHE_LIST; 11227c478bd9Sstevel@tonic-gate } 11237c478bd9Sstevel@tonic-gate 11247c478bd9Sstevel@tonic-gate ASSERT(*ppp != NULL); 11257c478bd9Sstevel@tonic-gate 11267c478bd9Sstevel@tonic-gate /* 11277c478bd9Sstevel@tonic-gate * Delete page from current list. 11287c478bd9Sstevel@tonic-gate */ 11297c478bd9Sstevel@tonic-gate if (*ppp == pp) 11307c478bd9Sstevel@tonic-gate *ppp = pp->p_next; /* go to next page */ 11317c478bd9Sstevel@tonic-gate if (*ppp == pp) { 11327c478bd9Sstevel@tonic-gate *ppp = NULL; /* page list is gone */ 11337c478bd9Sstevel@tonic-gate } else { 11347c478bd9Sstevel@tonic-gate pp->p_prev->p_next = pp->p_next; 11357c478bd9Sstevel@tonic-gate pp->p_next->p_prev = pp->p_prev; 11367c478bd9Sstevel@tonic-gate } 11377c478bd9Sstevel@tonic-gate 11387c478bd9Sstevel@tonic-gate /* LINTED */ 11397c478bd9Sstevel@tonic-gate PLCNT_DECR(pp, mnode, 0, flags); 11407c478bd9Sstevel@tonic-gate 11417c478bd9Sstevel@tonic-gate /* 11427c478bd9Sstevel@tonic-gate * Set no reloc for cage initted pages. 11437c478bd9Sstevel@tonic-gate */ 11447c478bd9Sstevel@tonic-gate PP_SETNORELOC(pp); 11457c478bd9Sstevel@tonic-gate 11467c478bd9Sstevel@tonic-gate mtype = PP_2_MTYPE(pp); 11477c478bd9Sstevel@tonic-gate ASSERT(mtype == MTYPE_NORELOC); 11487c478bd9Sstevel@tonic-gate 11497c478bd9Sstevel@tonic-gate /* 11507c478bd9Sstevel@tonic-gate * Get new list for page. 11517c478bd9Sstevel@tonic-gate */ 11527c478bd9Sstevel@tonic-gate if (PP_ISAGED(pp)) { 11537c478bd9Sstevel@tonic-gate ppp = &PAGE_FREELISTS(mnode, 0, bin, mtype); 11547c478bd9Sstevel@tonic-gate } else { 11557c478bd9Sstevel@tonic-gate ppp = &PAGE_CACHELISTS(mnode, bin, mtype); 11567c478bd9Sstevel@tonic-gate } 11577c478bd9Sstevel@tonic-gate 11587c478bd9Sstevel@tonic-gate /* 11597c478bd9Sstevel@tonic-gate * Insert page on new list. 11607c478bd9Sstevel@tonic-gate */ 11617c478bd9Sstevel@tonic-gate if (*ppp == NULL) { 11627c478bd9Sstevel@tonic-gate *ppp = pp; 11637c478bd9Sstevel@tonic-gate pp->p_next = pp->p_prev = pp; 11647c478bd9Sstevel@tonic-gate } else { 11657c478bd9Sstevel@tonic-gate pp->p_next = *ppp; 11667c478bd9Sstevel@tonic-gate pp->p_prev = (*ppp)->p_prev; 11677c478bd9Sstevel@tonic-gate (*ppp)->p_prev = pp; 11687c478bd9Sstevel@tonic-gate pp->p_prev->p_next = pp; 11697c478bd9Sstevel@tonic-gate } 11707c478bd9Sstevel@tonic-gate 11717c478bd9Sstevel@tonic-gate /* LINTED */ 11727c478bd9Sstevel@tonic-gate PLCNT_INCR(pp, mnode, 0, flags); 11737c478bd9Sstevel@tonic-gate 11747c478bd9Sstevel@tonic-gate /* 11757c478bd9Sstevel@tonic-gate * Update cage freemem counter 11767c478bd9Sstevel@tonic-gate */ 11777c478bd9Sstevel@tonic-gate atomic_add_long(&kcage_freemem, 1); 11787c478bd9Sstevel@tonic-gate } 11797c478bd9Sstevel@tonic-gate #else /* __sparc */ 11807c478bd9Sstevel@tonic-gate 11817c478bd9Sstevel@tonic-gate /* ARGSUSED */ 11827c478bd9Sstevel@tonic-gate void 11837c478bd9Sstevel@tonic-gate page_list_noreloc_startup(page_t *pp) 11847c478bd9Sstevel@tonic-gate { 11857c478bd9Sstevel@tonic-gate panic("page_list_noreloc_startup: should be here only for sparc"); 11867c478bd9Sstevel@tonic-gate } 11877c478bd9Sstevel@tonic-gate #endif 11887c478bd9Sstevel@tonic-gate 11897c478bd9Sstevel@tonic-gate void 11907c478bd9Sstevel@tonic-gate page_list_add_pages(page_t *pp, int flags) 11917c478bd9Sstevel@tonic-gate { 11927c478bd9Sstevel@tonic-gate kmutex_t *pcm; 11937c478bd9Sstevel@tonic-gate pgcnt_t pgcnt; 11947c478bd9Sstevel@tonic-gate uint_t bin, mtype, i; 11957c478bd9Sstevel@tonic-gate int mnode; 11967c478bd9Sstevel@tonic-gate 11977c478bd9Sstevel@tonic-gate /* default to freelist/head */ 11987c478bd9Sstevel@tonic-gate ASSERT((flags & (PG_CACHE_LIST | PG_LIST_TAIL)) == 0); 11997c478bd9Sstevel@tonic-gate 12007c478bd9Sstevel@tonic-gate CHK_LPG(pp, pp->p_szc); 12017c478bd9Sstevel@tonic-gate VM_STAT_ADD(vmm_vmstats.pc_list_add_pages[pp->p_szc]); 12027c478bd9Sstevel@tonic-gate 12037c478bd9Sstevel@tonic-gate bin = PP_2_BIN(pp); 12047c478bd9Sstevel@tonic-gate mnode = PP_2_MEM_NODE(pp); 12057c478bd9Sstevel@tonic-gate mtype = PP_2_MTYPE(pp); 12067c478bd9Sstevel@tonic-gate 12077c478bd9Sstevel@tonic-gate if (flags & PG_LIST_ISINIT) { 12087c478bd9Sstevel@tonic-gate ASSERT(pp->p_szc == mmu_page_sizes - 1); 12097c478bd9Sstevel@tonic-gate page_vpadd(&PAGE_FREELISTS(mnode, pp->p_szc, bin, mtype), pp); 12107c478bd9Sstevel@tonic-gate ASSERT(!PP_ISNORELOC(pp)); 12117c478bd9Sstevel@tonic-gate PLCNT_INCR(pp, mnode, pp->p_szc, flags); 12127c478bd9Sstevel@tonic-gate } else { 12137c478bd9Sstevel@tonic-gate 12147c478bd9Sstevel@tonic-gate ASSERT(pp->p_szc != 0 && pp->p_szc < mmu_page_sizes); 12157c478bd9Sstevel@tonic-gate 12167c478bd9Sstevel@tonic-gate pcm = PC_BIN_MUTEX(mnode, bin, PG_FREE_LIST); 12177c478bd9Sstevel@tonic-gate 12187c478bd9Sstevel@tonic-gate mutex_enter(pcm); 12197c478bd9Sstevel@tonic-gate page_vpadd(&PAGE_FREELISTS(mnode, pp->p_szc, bin, mtype), pp); 12207c478bd9Sstevel@tonic-gate page_ctr_add(pp, PG_FREE_LIST); 12217c478bd9Sstevel@tonic-gate mutex_exit(pcm); 12227c478bd9Sstevel@tonic-gate 12237c478bd9Sstevel@tonic-gate pgcnt = page_get_pagecnt(pp->p_szc); 12247c478bd9Sstevel@tonic-gate #if defined(__sparc) 12257c478bd9Sstevel@tonic-gate if (PP_ISNORELOC(pp)) 12267c478bd9Sstevel@tonic-gate kcage_freemem_add(pgcnt); 12277c478bd9Sstevel@tonic-gate #endif 12287c478bd9Sstevel@tonic-gate for (i = 0; i < pgcnt; i++, pp++) 12297c478bd9Sstevel@tonic-gate page_unlock(pp); 12307c478bd9Sstevel@tonic-gate } 12317c478bd9Sstevel@tonic-gate } 12327c478bd9Sstevel@tonic-gate 12337c478bd9Sstevel@tonic-gate /* 12347c478bd9Sstevel@tonic-gate * During boot, need to demote a large page to base 12357c478bd9Sstevel@tonic-gate * pagesize pages for seg_kmem for use in boot_alloc() 12367c478bd9Sstevel@tonic-gate */ 12377c478bd9Sstevel@tonic-gate void 12387c478bd9Sstevel@tonic-gate page_boot_demote(page_t *pp) 12397c478bd9Sstevel@tonic-gate { 12407c478bd9Sstevel@tonic-gate ASSERT(pp->p_szc != 0); 12417c478bd9Sstevel@tonic-gate ASSERT(PP_ISFREE(pp)); 12427c478bd9Sstevel@tonic-gate ASSERT(PP_ISAGED(pp)); 12437c478bd9Sstevel@tonic-gate 12447c478bd9Sstevel@tonic-gate (void) page_demote(PP_2_MEM_NODE(pp), 12457c478bd9Sstevel@tonic-gate PFN_BASE(pp->p_pagenum, pp->p_szc), pp->p_szc, 0, PC_NO_COLOR, 12467c478bd9Sstevel@tonic-gate PC_FREE); 12477c478bd9Sstevel@tonic-gate 12487c478bd9Sstevel@tonic-gate ASSERT(PP_ISFREE(pp)); 12497c478bd9Sstevel@tonic-gate ASSERT(PP_ISAGED(pp)); 12507c478bd9Sstevel@tonic-gate ASSERT(pp->p_szc == 0); 12517c478bd9Sstevel@tonic-gate } 12527c478bd9Sstevel@tonic-gate 12537c478bd9Sstevel@tonic-gate /* 12547c478bd9Sstevel@tonic-gate * Take a particular page off of whatever freelist the page 12557c478bd9Sstevel@tonic-gate * is claimed to be on. 12567c478bd9Sstevel@tonic-gate * 12577c478bd9Sstevel@tonic-gate * NOTE: Only used for PAGESIZE pages. 12587c478bd9Sstevel@tonic-gate */ 12597c478bd9Sstevel@tonic-gate void 12607c478bd9Sstevel@tonic-gate page_list_sub(page_t *pp, int flags) 12617c478bd9Sstevel@tonic-gate { 12627c478bd9Sstevel@tonic-gate int bin; 12637c478bd9Sstevel@tonic-gate uint_t mtype; 12647c478bd9Sstevel@tonic-gate int mnode; 12657c478bd9Sstevel@tonic-gate kmutex_t *pcm; 12667c478bd9Sstevel@tonic-gate page_t **ppp; 12677c478bd9Sstevel@tonic-gate 12687c478bd9Sstevel@tonic-gate ASSERT(PAGE_EXCL(pp)); 12697c478bd9Sstevel@tonic-gate ASSERT(PP_ISFREE(pp)); 12707c478bd9Sstevel@tonic-gate 12717c478bd9Sstevel@tonic-gate /* 12727c478bd9Sstevel@tonic-gate * The p_szc field can only be changed by page_promote() 12737c478bd9Sstevel@tonic-gate * and page_demote(). Only free pages can be promoted and 12747c478bd9Sstevel@tonic-gate * demoted and the free list MUST be locked during these 12757c478bd9Sstevel@tonic-gate * operations. So to prevent a race in page_list_sub() 12767c478bd9Sstevel@tonic-gate * between computing which bin of the freelist lock to 12777c478bd9Sstevel@tonic-gate * grab and actually grabing the lock we check again that 12787c478bd9Sstevel@tonic-gate * the bin we locked is still the correct one. Notice that 12797c478bd9Sstevel@tonic-gate * the p_szc field could have actually changed on us but 12807c478bd9Sstevel@tonic-gate * if the bin happens to still be the same we are safe. 12817c478bd9Sstevel@tonic-gate */ 12827c478bd9Sstevel@tonic-gate try_again: 12837c478bd9Sstevel@tonic-gate bin = PP_2_BIN(pp); 12847c478bd9Sstevel@tonic-gate mnode = PP_2_MEM_NODE(pp); 12857c478bd9Sstevel@tonic-gate pcm = PC_BIN_MUTEX(mnode, bin, flags); 12867c478bd9Sstevel@tonic-gate mutex_enter(pcm); 12877c478bd9Sstevel@tonic-gate if (PP_2_BIN(pp) != bin) { 12887c478bd9Sstevel@tonic-gate mutex_exit(pcm); 12897c478bd9Sstevel@tonic-gate goto try_again; 12907c478bd9Sstevel@tonic-gate } 12917c478bd9Sstevel@tonic-gate mtype = PP_2_MTYPE(pp); 12927c478bd9Sstevel@tonic-gate 12937c478bd9Sstevel@tonic-gate if (flags & PG_FREE_LIST) { 12947c478bd9Sstevel@tonic-gate ASSERT(PP_ISAGED(pp)); 12957c478bd9Sstevel@tonic-gate ppp = &PAGE_FREELISTS(mnode, pp->p_szc, bin, mtype); 12967c478bd9Sstevel@tonic-gate } else { 12977c478bd9Sstevel@tonic-gate ASSERT(!PP_ISAGED(pp)); 12987c478bd9Sstevel@tonic-gate ppp = &PAGE_CACHELISTS(mnode, bin, mtype); 12997c478bd9Sstevel@tonic-gate } 13007c478bd9Sstevel@tonic-gate 13017c478bd9Sstevel@tonic-gate /* 13027c478bd9Sstevel@tonic-gate * Common PAGESIZE case. 13037c478bd9Sstevel@tonic-gate * 13047c478bd9Sstevel@tonic-gate * Note that we locked the freelist. This prevents 13057c478bd9Sstevel@tonic-gate * any page promotion/demotion operations. Therefore 13067c478bd9Sstevel@tonic-gate * the p_szc will not change until we drop pcm mutex. 13077c478bd9Sstevel@tonic-gate */ 13087c478bd9Sstevel@tonic-gate if (pp->p_szc == 0) { 13097c478bd9Sstevel@tonic-gate page_sub(ppp, pp); 13107c478bd9Sstevel@tonic-gate /* 13117c478bd9Sstevel@tonic-gate * Subtract counters before releasing pcm mutex 13127c478bd9Sstevel@tonic-gate * to avoid race with page_freelist_coalesce. 13137c478bd9Sstevel@tonic-gate */ 13147c478bd9Sstevel@tonic-gate page_ctr_sub(pp, flags); 13157c478bd9Sstevel@tonic-gate mutex_exit(pcm); 13167c478bd9Sstevel@tonic-gate 13177c478bd9Sstevel@tonic-gate #if defined(__sparc) 13187c478bd9Sstevel@tonic-gate if (PP_ISNORELOC(pp)) { 13197c478bd9Sstevel@tonic-gate kcage_freemem_sub(1); 13207c478bd9Sstevel@tonic-gate } 13217c478bd9Sstevel@tonic-gate #endif 13227c478bd9Sstevel@tonic-gate return; 13237c478bd9Sstevel@tonic-gate } 13247c478bd9Sstevel@tonic-gate 13257c478bd9Sstevel@tonic-gate /* 13267c478bd9Sstevel@tonic-gate * Large pages on the cache list are not supported. 13277c478bd9Sstevel@tonic-gate */ 13287c478bd9Sstevel@tonic-gate if (flags & PG_CACHE_LIST) 13297c478bd9Sstevel@tonic-gate panic("page_list_sub: large page on cachelist"); 13307c478bd9Sstevel@tonic-gate 13317c478bd9Sstevel@tonic-gate /* 13327c478bd9Sstevel@tonic-gate * Slow but rare. 13337c478bd9Sstevel@tonic-gate * 13347c478bd9Sstevel@tonic-gate * Somebody wants this particular page which is part 13357c478bd9Sstevel@tonic-gate * of a large page. In this case we just demote the page 13367c478bd9Sstevel@tonic-gate * if it's on the freelist. 13377c478bd9Sstevel@tonic-gate * 13387c478bd9Sstevel@tonic-gate * We have to drop pcm before locking the entire freelist. 13397c478bd9Sstevel@tonic-gate * Once we have re-locked the freelist check to make sure 13407c478bd9Sstevel@tonic-gate * the page hasn't already been demoted or completely 13417c478bd9Sstevel@tonic-gate * freed. 13427c478bd9Sstevel@tonic-gate */ 13437c478bd9Sstevel@tonic-gate mutex_exit(pcm); 13447c478bd9Sstevel@tonic-gate page_freelist_lock(mnode); 13457c478bd9Sstevel@tonic-gate if (pp->p_szc != 0) { 13467c478bd9Sstevel@tonic-gate /* 13477c478bd9Sstevel@tonic-gate * Large page is on freelist. 13487c478bd9Sstevel@tonic-gate */ 13497c478bd9Sstevel@tonic-gate (void) page_demote(mnode, PFN_BASE(pp->p_pagenum, pp->p_szc), 13507c478bd9Sstevel@tonic-gate pp->p_szc, 0, PC_NO_COLOR, PC_FREE); 13517c478bd9Sstevel@tonic-gate } 13527c478bd9Sstevel@tonic-gate ASSERT(PP_ISFREE(pp)); 13537c478bd9Sstevel@tonic-gate ASSERT(PP_ISAGED(pp)); 13547c478bd9Sstevel@tonic-gate ASSERT(pp->p_szc == 0); 13557c478bd9Sstevel@tonic-gate 13567c478bd9Sstevel@tonic-gate /* 13577c478bd9Sstevel@tonic-gate * Subtract counters before releasing pcm mutex 13587c478bd9Sstevel@tonic-gate * to avoid race with page_freelist_coalesce. 13597c478bd9Sstevel@tonic-gate */ 13607c478bd9Sstevel@tonic-gate bin = PP_2_BIN(pp); 13617c478bd9Sstevel@tonic-gate mtype = PP_2_MTYPE(pp); 13627c478bd9Sstevel@tonic-gate ppp = &PAGE_FREELISTS(mnode, pp->p_szc, bin, mtype); 13637c478bd9Sstevel@tonic-gate 13647c478bd9Sstevel@tonic-gate page_sub(ppp, pp); 13657c478bd9Sstevel@tonic-gate page_ctr_sub(pp, flags); 13667c478bd9Sstevel@tonic-gate page_freelist_unlock(mnode); 13677c478bd9Sstevel@tonic-gate 13687c478bd9Sstevel@tonic-gate #if defined(__sparc) 13697c478bd9Sstevel@tonic-gate if (PP_ISNORELOC(pp)) { 13707c478bd9Sstevel@tonic-gate kcage_freemem_sub(1); 13717c478bd9Sstevel@tonic-gate } 13727c478bd9Sstevel@tonic-gate #endif 13737c478bd9Sstevel@tonic-gate } 13747c478bd9Sstevel@tonic-gate 13757c478bd9Sstevel@tonic-gate void 13767c478bd9Sstevel@tonic-gate page_list_sub_pages(page_t *pp, uint_t szc) 13777c478bd9Sstevel@tonic-gate { 13787c478bd9Sstevel@tonic-gate kmutex_t *pcm; 13797c478bd9Sstevel@tonic-gate uint_t bin, mtype; 13807c478bd9Sstevel@tonic-gate int mnode; 13817c478bd9Sstevel@tonic-gate 13827c478bd9Sstevel@tonic-gate ASSERT(PAGE_EXCL(pp)); 13837c478bd9Sstevel@tonic-gate ASSERT(PP_ISFREE(pp)); 13847c478bd9Sstevel@tonic-gate ASSERT(PP_ISAGED(pp)); 13857c478bd9Sstevel@tonic-gate 13867c478bd9Sstevel@tonic-gate /* 13877c478bd9Sstevel@tonic-gate * See comment in page_list_sub(). 13887c478bd9Sstevel@tonic-gate */ 13897c478bd9Sstevel@tonic-gate try_again: 13907c478bd9Sstevel@tonic-gate bin = PP_2_BIN(pp); 13917c478bd9Sstevel@tonic-gate mnode = PP_2_MEM_NODE(pp); 13927c478bd9Sstevel@tonic-gate pcm = PC_BIN_MUTEX(mnode, bin, PG_FREE_LIST); 13937c478bd9Sstevel@tonic-gate mutex_enter(pcm); 13947c478bd9Sstevel@tonic-gate if (PP_2_BIN(pp) != bin) { 13957c478bd9Sstevel@tonic-gate mutex_exit(pcm); 13967c478bd9Sstevel@tonic-gate goto try_again; 13977c478bd9Sstevel@tonic-gate } 13987c478bd9Sstevel@tonic-gate 13997c478bd9Sstevel@tonic-gate VM_STAT_ADD(vmm_vmstats.pc_list_sub_pages1[pp->p_szc]); 14007c478bd9Sstevel@tonic-gate 14017c478bd9Sstevel@tonic-gate /* 14027c478bd9Sstevel@tonic-gate * If we're called with a page larger than szc or it got 14037c478bd9Sstevel@tonic-gate * promoted above szc before we locked the freelist then 14047c478bd9Sstevel@tonic-gate * drop pcm and re-lock entire freelist. If page still larger 14057c478bd9Sstevel@tonic-gate * than szc then demote it. 14067c478bd9Sstevel@tonic-gate */ 14077c478bd9Sstevel@tonic-gate if (pp->p_szc > szc) { 14087c478bd9Sstevel@tonic-gate VM_STAT_ADD(vmm_vmstats.pc_list_sub_pages2[pp->p_szc]); 14097c478bd9Sstevel@tonic-gate mutex_exit(pcm); 14107c478bd9Sstevel@tonic-gate pcm = NULL; 14117c478bd9Sstevel@tonic-gate page_freelist_lock(mnode); 14127c478bd9Sstevel@tonic-gate if (pp->p_szc > szc) { 14137c478bd9Sstevel@tonic-gate VM_STAT_ADD(vmm_vmstats.pc_list_sub_pages3[pp->p_szc]); 14147c478bd9Sstevel@tonic-gate (void) page_demote(mnode, 14157c478bd9Sstevel@tonic-gate PFN_BASE(pp->p_pagenum, pp->p_szc), 14167c478bd9Sstevel@tonic-gate pp->p_szc, szc, PC_NO_COLOR, PC_FREE); 14177c478bd9Sstevel@tonic-gate } 14187c478bd9Sstevel@tonic-gate bin = PP_2_BIN(pp); 14197c478bd9Sstevel@tonic-gate } 14207c478bd9Sstevel@tonic-gate ASSERT(PP_ISFREE(pp)); 14217c478bd9Sstevel@tonic-gate ASSERT(PP_ISAGED(pp)); 14227c478bd9Sstevel@tonic-gate ASSERT(pp->p_szc <= szc); 14237c478bd9Sstevel@tonic-gate ASSERT(pp == PP_PAGEROOT(pp)); 14247c478bd9Sstevel@tonic-gate 14257c478bd9Sstevel@tonic-gate mtype = PP_2_MTYPE(pp); 14267c478bd9Sstevel@tonic-gate if (pp->p_szc != 0) { 14277c478bd9Sstevel@tonic-gate page_vpsub(&PAGE_FREELISTS(mnode, pp->p_szc, bin, mtype), pp); 14287c478bd9Sstevel@tonic-gate CHK_LPG(pp, pp->p_szc); 14297c478bd9Sstevel@tonic-gate } else { 14307c478bd9Sstevel@tonic-gate page_sub(&PAGE_FREELISTS(mnode, pp->p_szc, bin, mtype), pp); 14317c478bd9Sstevel@tonic-gate } 14327c478bd9Sstevel@tonic-gate page_ctr_sub(pp, PG_FREE_LIST); 14337c478bd9Sstevel@tonic-gate 14347c478bd9Sstevel@tonic-gate if (pcm != NULL) { 14357c478bd9Sstevel@tonic-gate mutex_exit(pcm); 14367c478bd9Sstevel@tonic-gate } else { 14377c478bd9Sstevel@tonic-gate page_freelist_unlock(mnode); 14387c478bd9Sstevel@tonic-gate } 14397c478bd9Sstevel@tonic-gate 14407c478bd9Sstevel@tonic-gate #if defined(__sparc) 14417c478bd9Sstevel@tonic-gate if (PP_ISNORELOC(pp)) { 14427c478bd9Sstevel@tonic-gate pgcnt_t pgcnt; 14437c478bd9Sstevel@tonic-gate 14447c478bd9Sstevel@tonic-gate pgcnt = page_get_pagecnt(pp->p_szc); 14457c478bd9Sstevel@tonic-gate kcage_freemem_sub(pgcnt); 14467c478bd9Sstevel@tonic-gate } 14477c478bd9Sstevel@tonic-gate #endif 14487c478bd9Sstevel@tonic-gate } 14497c478bd9Sstevel@tonic-gate 14507c478bd9Sstevel@tonic-gate /* 14517c478bd9Sstevel@tonic-gate * Add the page to the front of a linked list of pages 14527c478bd9Sstevel@tonic-gate * using the p_next & p_prev pointers for the list. 14537c478bd9Sstevel@tonic-gate * The caller is responsible for protecting the list pointers. 14547c478bd9Sstevel@tonic-gate */ 14557c478bd9Sstevel@tonic-gate void 14567c478bd9Sstevel@tonic-gate mach_page_add(page_t **ppp, page_t *pp) 14577c478bd9Sstevel@tonic-gate { 14587c478bd9Sstevel@tonic-gate if (*ppp == NULL) { 14597c478bd9Sstevel@tonic-gate pp->p_next = pp->p_prev = pp; 14607c478bd9Sstevel@tonic-gate } else { 14617c478bd9Sstevel@tonic-gate pp->p_next = *ppp; 14627c478bd9Sstevel@tonic-gate pp->p_prev = (*ppp)->p_prev; 14637c478bd9Sstevel@tonic-gate (*ppp)->p_prev = pp; 14647c478bd9Sstevel@tonic-gate pp->p_prev->p_next = pp; 14657c478bd9Sstevel@tonic-gate } 14667c478bd9Sstevel@tonic-gate *ppp = pp; 14677c478bd9Sstevel@tonic-gate } 14687c478bd9Sstevel@tonic-gate 14697c478bd9Sstevel@tonic-gate /* 14707c478bd9Sstevel@tonic-gate * Remove this page from a linked list of pages 14717c478bd9Sstevel@tonic-gate * using the p_next & p_prev pointers for the list. 14727c478bd9Sstevel@tonic-gate * 14737c478bd9Sstevel@tonic-gate * The caller is responsible for protecting the list pointers. 14747c478bd9Sstevel@tonic-gate */ 14757c478bd9Sstevel@tonic-gate void 14767c478bd9Sstevel@tonic-gate mach_page_sub(page_t **ppp, page_t *pp) 14777c478bd9Sstevel@tonic-gate { 14787c478bd9Sstevel@tonic-gate ASSERT(PP_ISFREE(pp)); 14797c478bd9Sstevel@tonic-gate 14807c478bd9Sstevel@tonic-gate if (*ppp == NULL || pp == NULL) 14817c478bd9Sstevel@tonic-gate panic("mach_page_sub"); 14827c478bd9Sstevel@tonic-gate 14837c478bd9Sstevel@tonic-gate if (*ppp == pp) 14847c478bd9Sstevel@tonic-gate *ppp = pp->p_next; /* go to next page */ 14857c478bd9Sstevel@tonic-gate 14867c478bd9Sstevel@tonic-gate if (*ppp == pp) 14877c478bd9Sstevel@tonic-gate *ppp = NULL; /* page list is gone */ 14887c478bd9Sstevel@tonic-gate else { 14897c478bd9Sstevel@tonic-gate pp->p_prev->p_next = pp->p_next; 14907c478bd9Sstevel@tonic-gate pp->p_next->p_prev = pp->p_prev; 14917c478bd9Sstevel@tonic-gate } 14927c478bd9Sstevel@tonic-gate pp->p_prev = pp->p_next = pp; /* make pp a list of one */ 14937c478bd9Sstevel@tonic-gate } 14947c478bd9Sstevel@tonic-gate 14957c478bd9Sstevel@tonic-gate /* 14967c478bd9Sstevel@tonic-gate * Routine fsflush uses to gradually coalesce the free list into larger pages. 14977c478bd9Sstevel@tonic-gate */ 14987c478bd9Sstevel@tonic-gate void 14997c478bd9Sstevel@tonic-gate page_promote_size(page_t *pp, uint_t cur_szc) 15007c478bd9Sstevel@tonic-gate { 15017c478bd9Sstevel@tonic-gate pfn_t pfn; 15027c478bd9Sstevel@tonic-gate int mnode; 15037c478bd9Sstevel@tonic-gate int idx; 15047c478bd9Sstevel@tonic-gate int new_szc = cur_szc + 1; 15057c478bd9Sstevel@tonic-gate int full = FULL_REGION_CNT(new_szc); 15067c478bd9Sstevel@tonic-gate 15077c478bd9Sstevel@tonic-gate pfn = page_pptonum(pp); 15087c478bd9Sstevel@tonic-gate mnode = PFN_2_MEM_NODE(pfn); 15097c478bd9Sstevel@tonic-gate 15107c478bd9Sstevel@tonic-gate page_freelist_lock(mnode); 15117c478bd9Sstevel@tonic-gate 15127c478bd9Sstevel@tonic-gate idx = PNUM_TO_IDX(mnode, new_szc, pfn); 15137c478bd9Sstevel@tonic-gate if (PAGE_COUNTERS(mnode, new_szc, idx) == full) 15147c478bd9Sstevel@tonic-gate (void) page_promote(mnode, pfn, new_szc, PC_FREE); 15157c478bd9Sstevel@tonic-gate 15167c478bd9Sstevel@tonic-gate page_freelist_unlock(mnode); 15177c478bd9Sstevel@tonic-gate } 15187c478bd9Sstevel@tonic-gate 15197c478bd9Sstevel@tonic-gate static uint_t page_promote_err; 15207c478bd9Sstevel@tonic-gate static uint_t page_promote_noreloc_err; 15217c478bd9Sstevel@tonic-gate 15227c478bd9Sstevel@tonic-gate /* 15237c478bd9Sstevel@tonic-gate * Create a single larger page (of szc new_szc) from smaller contiguous pages 15247c478bd9Sstevel@tonic-gate * for the given mnode starting at pfnum. Pages involved are on the freelist 15257c478bd9Sstevel@tonic-gate * before the call and may be returned to the caller if requested, otherwise 15267c478bd9Sstevel@tonic-gate * they will be placed back on the freelist. 15277c478bd9Sstevel@tonic-gate * If flags is PC_ALLOC, then the large page will be returned to the user in 15287c478bd9Sstevel@tonic-gate * a state which is consistent with a page being taken off the freelist. If 15297c478bd9Sstevel@tonic-gate * we failed to lock the new large page, then we will return NULL to the 15307c478bd9Sstevel@tonic-gate * caller and put the large page on the freelist instead. 15317c478bd9Sstevel@tonic-gate * If flags is PC_FREE, then the large page will be placed on the freelist, 15327c478bd9Sstevel@tonic-gate * and NULL will be returned. 15337c478bd9Sstevel@tonic-gate * The caller is responsible for locking the freelist as well as any other 15347c478bd9Sstevel@tonic-gate * accounting which needs to be done for a returned page. 15357c478bd9Sstevel@tonic-gate * 15367c478bd9Sstevel@tonic-gate * RFE: For performance pass in pp instead of pfnum so 15377c478bd9Sstevel@tonic-gate * we can avoid excessive calls to page_numtopp_nolock(). 15387c478bd9Sstevel@tonic-gate * This would depend on an assumption that all contiguous 15397c478bd9Sstevel@tonic-gate * pages are in the same memseg so we can just add/dec 15407c478bd9Sstevel@tonic-gate * our pp. 15417c478bd9Sstevel@tonic-gate * 15427c478bd9Sstevel@tonic-gate * Lock ordering: 15437c478bd9Sstevel@tonic-gate * 15447c478bd9Sstevel@tonic-gate * There is a potential but rare deadlock situation 15457c478bd9Sstevel@tonic-gate * for page promotion and demotion operations. The problem 15467c478bd9Sstevel@tonic-gate * is there are two paths into the freelist manager and 15477c478bd9Sstevel@tonic-gate * they have different lock orders: 15487c478bd9Sstevel@tonic-gate * 15497c478bd9Sstevel@tonic-gate * page_create() 15507c478bd9Sstevel@tonic-gate * lock freelist 15517c478bd9Sstevel@tonic-gate * page_lock(EXCL) 15527c478bd9Sstevel@tonic-gate * unlock freelist 15537c478bd9Sstevel@tonic-gate * return 15547c478bd9Sstevel@tonic-gate * caller drops page_lock 15557c478bd9Sstevel@tonic-gate * 15567c478bd9Sstevel@tonic-gate * page_free() and page_reclaim() 15577c478bd9Sstevel@tonic-gate * caller grabs page_lock(EXCL) 15587c478bd9Sstevel@tonic-gate * 15597c478bd9Sstevel@tonic-gate * lock freelist 15607c478bd9Sstevel@tonic-gate * unlock freelist 15617c478bd9Sstevel@tonic-gate * drop page_lock 15627c478bd9Sstevel@tonic-gate * 15637c478bd9Sstevel@tonic-gate * What prevents a thread in page_create() from deadlocking 15647c478bd9Sstevel@tonic-gate * with a thread freeing or reclaiming the same page is the 15657c478bd9Sstevel@tonic-gate * page_trylock() in page_get_freelist(). If the trylock fails 15667c478bd9Sstevel@tonic-gate * it skips the page. 15677c478bd9Sstevel@tonic-gate * 15687c478bd9Sstevel@tonic-gate * The lock ordering for promotion and demotion is the same as 15697c478bd9Sstevel@tonic-gate * for page_create(). Since the same deadlock could occur during 15707c478bd9Sstevel@tonic-gate * page promotion and freeing or reclaiming of a page on the 15717c478bd9Sstevel@tonic-gate * cache list we might have to fail the operation and undo what 15727c478bd9Sstevel@tonic-gate * have done so far. Again this is rare. 15737c478bd9Sstevel@tonic-gate */ 15747c478bd9Sstevel@tonic-gate page_t * 15757c478bd9Sstevel@tonic-gate page_promote(int mnode, pfn_t pfnum, uchar_t new_szc, int flags) 15767c478bd9Sstevel@tonic-gate { 15777c478bd9Sstevel@tonic-gate page_t *pp, *pplist, *tpp, *start_pp; 15787c478bd9Sstevel@tonic-gate pgcnt_t new_npgs, npgs; 15797c478bd9Sstevel@tonic-gate uint_t bin; 15807c478bd9Sstevel@tonic-gate pgcnt_t tmpnpgs, pages_left; 15817c478bd9Sstevel@tonic-gate uint_t mtype; 15827c478bd9Sstevel@tonic-gate uint_t noreloc; 15837c478bd9Sstevel@tonic-gate uint_t i; 15847c478bd9Sstevel@tonic-gate int which_list; 15857c478bd9Sstevel@tonic-gate ulong_t index; 15867c478bd9Sstevel@tonic-gate kmutex_t *phm; 15877c478bd9Sstevel@tonic-gate 15887c478bd9Sstevel@tonic-gate /* 15897c478bd9Sstevel@tonic-gate * General algorithm: 15907c478bd9Sstevel@tonic-gate * Find the starting page 15917c478bd9Sstevel@tonic-gate * Walk each page struct removing it from the freelist, 15927c478bd9Sstevel@tonic-gate * and linking it to all the other pages removed. 15937c478bd9Sstevel@tonic-gate * Once all pages are off the freelist, 15947c478bd9Sstevel@tonic-gate * walk the list, modifying p_szc to new_szc and what 15957c478bd9Sstevel@tonic-gate * ever other info needs to be done to create a large free page. 15967c478bd9Sstevel@tonic-gate * According to the flags, either return the page or put it 15977c478bd9Sstevel@tonic-gate * on the freelist. 15987c478bd9Sstevel@tonic-gate */ 15997c478bd9Sstevel@tonic-gate 16007c478bd9Sstevel@tonic-gate start_pp = page_numtopp_nolock(pfnum); 16017c478bd9Sstevel@tonic-gate ASSERT(start_pp && (start_pp->p_pagenum == pfnum)); 16027c478bd9Sstevel@tonic-gate new_npgs = page_get_pagecnt(new_szc); 16037c478bd9Sstevel@tonic-gate ASSERT(IS_P2ALIGNED(pfnum, new_npgs)); 16047c478bd9Sstevel@tonic-gate 16057c478bd9Sstevel@tonic-gate /* 16067c478bd9Sstevel@tonic-gate * Loop through smaller pages to confirm that all pages 16077c478bd9Sstevel@tonic-gate * give the same result for PP_ISNORELOC(). 16087c478bd9Sstevel@tonic-gate * We can check this reliably here as the protocol for setting 16097c478bd9Sstevel@tonic-gate * P_NORELOC requires pages to be taken off the free list first. 16107c478bd9Sstevel@tonic-gate */ 16117c478bd9Sstevel@tonic-gate for (i = 0, pp = start_pp; i < new_npgs; i++, pp++) { 16127c478bd9Sstevel@tonic-gate if (pp == start_pp) { 16137c478bd9Sstevel@tonic-gate /* First page, set requirement. */ 16147c478bd9Sstevel@tonic-gate noreloc = PP_ISNORELOC(pp); 16157c478bd9Sstevel@tonic-gate } else if (noreloc != PP_ISNORELOC(pp)) { 16167c478bd9Sstevel@tonic-gate page_promote_noreloc_err++; 16177c478bd9Sstevel@tonic-gate page_promote_err++; 16187c478bd9Sstevel@tonic-gate return (NULL); 16197c478bd9Sstevel@tonic-gate } 16207c478bd9Sstevel@tonic-gate } 16217c478bd9Sstevel@tonic-gate 16227c478bd9Sstevel@tonic-gate pages_left = new_npgs; 16237c478bd9Sstevel@tonic-gate pplist = NULL; 16247c478bd9Sstevel@tonic-gate pp = start_pp; 16257c478bd9Sstevel@tonic-gate 16267c478bd9Sstevel@tonic-gate /* Loop around coalescing the smaller pages into a big page. */ 16277c478bd9Sstevel@tonic-gate while (pages_left) { 16287c478bd9Sstevel@tonic-gate /* 16297c478bd9Sstevel@tonic-gate * Remove from the freelist. 16307c478bd9Sstevel@tonic-gate */ 16317c478bd9Sstevel@tonic-gate ASSERT(PP_ISFREE(pp)); 16327c478bd9Sstevel@tonic-gate bin = PP_2_BIN(pp); 16337c478bd9Sstevel@tonic-gate ASSERT(mnode == PP_2_MEM_NODE(pp)); 16347c478bd9Sstevel@tonic-gate mtype = PP_2_MTYPE(pp); 16357c478bd9Sstevel@tonic-gate if (PP_ISAGED(pp)) { 16367c478bd9Sstevel@tonic-gate 16377c478bd9Sstevel@tonic-gate /* 16387c478bd9Sstevel@tonic-gate * PG_FREE_LIST 16397c478bd9Sstevel@tonic-gate */ 16407c478bd9Sstevel@tonic-gate if (pp->p_szc) { 16417c478bd9Sstevel@tonic-gate page_vpsub(&PAGE_FREELISTS(mnode, 16427c478bd9Sstevel@tonic-gate pp->p_szc, bin, mtype), pp); 16437c478bd9Sstevel@tonic-gate } else { 16447c478bd9Sstevel@tonic-gate mach_page_sub(&PAGE_FREELISTS(mnode, 0, 16457c478bd9Sstevel@tonic-gate bin, mtype), pp); 16467c478bd9Sstevel@tonic-gate } 16477c478bd9Sstevel@tonic-gate which_list = PG_FREE_LIST; 16487c478bd9Sstevel@tonic-gate } else { 16497c478bd9Sstevel@tonic-gate ASSERT(pp->p_szc == 0); 16507c478bd9Sstevel@tonic-gate 16517c478bd9Sstevel@tonic-gate /* 16527c478bd9Sstevel@tonic-gate * PG_CACHE_LIST 16537c478bd9Sstevel@tonic-gate * 16547c478bd9Sstevel@tonic-gate * Since this page comes from the 16557c478bd9Sstevel@tonic-gate * cachelist, we must destroy the 16567c478bd9Sstevel@tonic-gate * vnode association. 16577c478bd9Sstevel@tonic-gate */ 16587c478bd9Sstevel@tonic-gate if (!page_trylock(pp, SE_EXCL)) { 16597c478bd9Sstevel@tonic-gate goto fail_promote; 16607c478bd9Sstevel@tonic-gate } 16617c478bd9Sstevel@tonic-gate 16627c478bd9Sstevel@tonic-gate /* 16637c478bd9Sstevel@tonic-gate * We need to be careful not to deadlock 16647c478bd9Sstevel@tonic-gate * with another thread in page_lookup(). 16657c478bd9Sstevel@tonic-gate * The page_lookup() thread could be holding 16667c478bd9Sstevel@tonic-gate * the same phm that we need if the two 16677c478bd9Sstevel@tonic-gate * pages happen to hash to the same phm lock. 16687c478bd9Sstevel@tonic-gate * At this point we have locked the entire 16697c478bd9Sstevel@tonic-gate * freelist and page_lookup() could be trying 16707c478bd9Sstevel@tonic-gate * to grab a freelist lock. 16717c478bd9Sstevel@tonic-gate */ 16727c478bd9Sstevel@tonic-gate index = PAGE_HASH_FUNC(pp->p_vnode, pp->p_offset); 16737c478bd9Sstevel@tonic-gate phm = PAGE_HASH_MUTEX(index); 16747c478bd9Sstevel@tonic-gate if (!mutex_tryenter(phm)) { 16757c478bd9Sstevel@tonic-gate page_unlock(pp); 16767c478bd9Sstevel@tonic-gate goto fail_promote; 16777c478bd9Sstevel@tonic-gate } 16787c478bd9Sstevel@tonic-gate 16797c478bd9Sstevel@tonic-gate mach_page_sub(&PAGE_CACHELISTS(mnode, bin, mtype), pp); 16807c478bd9Sstevel@tonic-gate page_hashout(pp, phm); 16817c478bd9Sstevel@tonic-gate mutex_exit(phm); 16827c478bd9Sstevel@tonic-gate PP_SETAGED(pp); 16837c478bd9Sstevel@tonic-gate page_unlock(pp); 16847c478bd9Sstevel@tonic-gate which_list = PG_CACHE_LIST; 16857c478bd9Sstevel@tonic-gate } 16867c478bd9Sstevel@tonic-gate page_ctr_sub(pp, which_list); 16877c478bd9Sstevel@tonic-gate 16887c478bd9Sstevel@tonic-gate /* 16897c478bd9Sstevel@tonic-gate * Concatenate the smaller page(s) onto 16907c478bd9Sstevel@tonic-gate * the large page list. 16917c478bd9Sstevel@tonic-gate */ 16927c478bd9Sstevel@tonic-gate tmpnpgs = npgs = page_get_pagecnt(pp->p_szc); 16937c478bd9Sstevel@tonic-gate pages_left -= npgs; 16947c478bd9Sstevel@tonic-gate tpp = pp; 16957c478bd9Sstevel@tonic-gate while (npgs--) { 16967c478bd9Sstevel@tonic-gate tpp->p_szc = new_szc; 16977c478bd9Sstevel@tonic-gate tpp = tpp->p_next; 16987c478bd9Sstevel@tonic-gate } 16997c478bd9Sstevel@tonic-gate page_list_concat(&pplist, &pp); 17007c478bd9Sstevel@tonic-gate pp += tmpnpgs; 17017c478bd9Sstevel@tonic-gate } 17027c478bd9Sstevel@tonic-gate CHK_LPG(pplist, new_szc); 17037c478bd9Sstevel@tonic-gate 17047c478bd9Sstevel@tonic-gate /* 17057c478bd9Sstevel@tonic-gate * return the page to the user if requested 17067c478bd9Sstevel@tonic-gate * in the properly locked state. 17077c478bd9Sstevel@tonic-gate */ 17087c478bd9Sstevel@tonic-gate if (flags == PC_ALLOC && (page_trylock_cons(pplist, SE_EXCL))) { 17097c478bd9Sstevel@tonic-gate return (pplist); 17107c478bd9Sstevel@tonic-gate } 17117c478bd9Sstevel@tonic-gate 17127c478bd9Sstevel@tonic-gate /* 17137c478bd9Sstevel@tonic-gate * Otherwise place the new large page on the freelist 17147c478bd9Sstevel@tonic-gate */ 17157c478bd9Sstevel@tonic-gate bin = PP_2_BIN(pplist); 17167c478bd9Sstevel@tonic-gate mnode = PP_2_MEM_NODE(pplist); 17177c478bd9Sstevel@tonic-gate mtype = PP_2_MTYPE(pplist); 17187c478bd9Sstevel@tonic-gate page_vpadd(&PAGE_FREELISTS(mnode, new_szc, bin, mtype), pplist); 17197c478bd9Sstevel@tonic-gate 17207c478bd9Sstevel@tonic-gate page_ctr_add(pplist, PG_FREE_LIST); 17217c478bd9Sstevel@tonic-gate return (NULL); 17227c478bd9Sstevel@tonic-gate 17237c478bd9Sstevel@tonic-gate fail_promote: 17247c478bd9Sstevel@tonic-gate /* 17257c478bd9Sstevel@tonic-gate * A thread must have still been freeing or 17267c478bd9Sstevel@tonic-gate * reclaiming the page on the cachelist. 17277c478bd9Sstevel@tonic-gate * To prevent a deadlock undo what we have 17287c478bd9Sstevel@tonic-gate * done sofar and return failure. This 17297c478bd9Sstevel@tonic-gate * situation can only happen while promoting 17307c478bd9Sstevel@tonic-gate * PAGESIZE pages. 17317c478bd9Sstevel@tonic-gate */ 17327c478bd9Sstevel@tonic-gate page_promote_err++; 17337c478bd9Sstevel@tonic-gate while (pplist) { 17347c478bd9Sstevel@tonic-gate pp = pplist; 17357c478bd9Sstevel@tonic-gate mach_page_sub(&pplist, pp); 17367c478bd9Sstevel@tonic-gate pp->p_szc = 0; 17377c478bd9Sstevel@tonic-gate bin = PP_2_BIN(pp); 17387c478bd9Sstevel@tonic-gate mtype = PP_2_MTYPE(pp); 17397c478bd9Sstevel@tonic-gate mach_page_add(&PAGE_FREELISTS(mnode, 0, bin, mtype), pp); 17407c478bd9Sstevel@tonic-gate page_ctr_add(pp, PG_FREE_LIST); 17417c478bd9Sstevel@tonic-gate } 17427c478bd9Sstevel@tonic-gate return (NULL); 17437c478bd9Sstevel@tonic-gate 17447c478bd9Sstevel@tonic-gate } 17457c478bd9Sstevel@tonic-gate 17467c478bd9Sstevel@tonic-gate /* 17477c478bd9Sstevel@tonic-gate * Break up a large page into smaller size pages. 17487c478bd9Sstevel@tonic-gate * Pages involved are on the freelist before the call and may 17497c478bd9Sstevel@tonic-gate * be returned to the caller if requested, otherwise they will 17507c478bd9Sstevel@tonic-gate * be placed back on the freelist. 17517c478bd9Sstevel@tonic-gate * The caller is responsible for locking the freelist as well as any other 17527c478bd9Sstevel@tonic-gate * accounting which needs to be done for a returned page. 17537c478bd9Sstevel@tonic-gate * If flags is not PC_ALLOC, the color argument is ignored, and thus 17547c478bd9Sstevel@tonic-gate * technically, any value may be passed in but PC_NO_COLOR is the standard 17557c478bd9Sstevel@tonic-gate * which should be followed for clarity's sake. 17567c478bd9Sstevel@tonic-gate */ 17577c478bd9Sstevel@tonic-gate page_t * 17587c478bd9Sstevel@tonic-gate page_demote(int mnode, pfn_t pfnum, uchar_t cur_szc, uchar_t new_szc, 17597c478bd9Sstevel@tonic-gate int color, int flags) 17607c478bd9Sstevel@tonic-gate { 17617c478bd9Sstevel@tonic-gate page_t *pp, *pplist, *npplist; 17627c478bd9Sstevel@tonic-gate pgcnt_t npgs, n; 17637c478bd9Sstevel@tonic-gate uint_t bin; 17647c478bd9Sstevel@tonic-gate uint_t mtype; 17657c478bd9Sstevel@tonic-gate page_t *ret_pp = NULL; 17667c478bd9Sstevel@tonic-gate 17677c478bd9Sstevel@tonic-gate ASSERT(cur_szc != 0); 17687c478bd9Sstevel@tonic-gate ASSERT(new_szc < cur_szc); 17697c478bd9Sstevel@tonic-gate 17707c478bd9Sstevel@tonic-gate pplist = page_numtopp_nolock(pfnum); 17717c478bd9Sstevel@tonic-gate ASSERT(pplist != NULL); 17727c478bd9Sstevel@tonic-gate 17737c478bd9Sstevel@tonic-gate ASSERT(pplist->p_szc == cur_szc); 17747c478bd9Sstevel@tonic-gate 17757c478bd9Sstevel@tonic-gate bin = PP_2_BIN(pplist); 17767c478bd9Sstevel@tonic-gate ASSERT(mnode == PP_2_MEM_NODE(pplist)); 17777c478bd9Sstevel@tonic-gate mtype = PP_2_MTYPE(pplist); 17787c478bd9Sstevel@tonic-gate page_vpsub(&PAGE_FREELISTS(mnode, cur_szc, bin, mtype), pplist); 17797c478bd9Sstevel@tonic-gate 17807c478bd9Sstevel@tonic-gate CHK_LPG(pplist, cur_szc); 17817c478bd9Sstevel@tonic-gate page_ctr_sub(pplist, PG_FREE_LIST); 17827c478bd9Sstevel@tonic-gate 17837c478bd9Sstevel@tonic-gate /* 17847c478bd9Sstevel@tonic-gate * Number of PAGESIZE pages for smaller new_szc 17857c478bd9Sstevel@tonic-gate * page. 17867c478bd9Sstevel@tonic-gate */ 17877c478bd9Sstevel@tonic-gate npgs = page_get_pagecnt(new_szc); 17887c478bd9Sstevel@tonic-gate 17897c478bd9Sstevel@tonic-gate while (pplist) { 17907c478bd9Sstevel@tonic-gate pp = pplist; 17917c478bd9Sstevel@tonic-gate 17927c478bd9Sstevel@tonic-gate ASSERT(pp->p_szc == cur_szc); 17937c478bd9Sstevel@tonic-gate 17947c478bd9Sstevel@tonic-gate /* 17957c478bd9Sstevel@tonic-gate * We either break it up into PAGESIZE pages or larger. 17967c478bd9Sstevel@tonic-gate */ 17977c478bd9Sstevel@tonic-gate if (npgs == 1) { /* PAGESIZE case */ 17987c478bd9Sstevel@tonic-gate mach_page_sub(&pplist, pp); 17997c478bd9Sstevel@tonic-gate ASSERT(pp->p_szc == cur_szc); 18007c478bd9Sstevel@tonic-gate ASSERT(new_szc == 0); 18017c478bd9Sstevel@tonic-gate ASSERT(mnode == PP_2_MEM_NODE(pp)); 18027c478bd9Sstevel@tonic-gate pp->p_szc = new_szc; 18037c478bd9Sstevel@tonic-gate bin = PP_2_BIN(pp); 18047c478bd9Sstevel@tonic-gate if ((bin == color) && (flags == PC_ALLOC) && 18057c478bd9Sstevel@tonic-gate (ret_pp == NULL) && 18067c478bd9Sstevel@tonic-gate page_trylock_cons(pp, SE_EXCL)) { 18077c478bd9Sstevel@tonic-gate ret_pp = pp; 18087c478bd9Sstevel@tonic-gate } else { 18097c478bd9Sstevel@tonic-gate mtype = PP_2_MTYPE(pp); 18107c478bd9Sstevel@tonic-gate mach_page_add(&PAGE_FREELISTS(mnode, 0, bin, 18117c478bd9Sstevel@tonic-gate mtype), pp); 18127c478bd9Sstevel@tonic-gate page_ctr_add(pp, PG_FREE_LIST); 18137c478bd9Sstevel@tonic-gate } 18147c478bd9Sstevel@tonic-gate } else { 18157c478bd9Sstevel@tonic-gate 18167c478bd9Sstevel@tonic-gate /* 18177c478bd9Sstevel@tonic-gate * Break down into smaller lists of pages. 18187c478bd9Sstevel@tonic-gate */ 18197c478bd9Sstevel@tonic-gate page_list_break(&pplist, &npplist, npgs); 18207c478bd9Sstevel@tonic-gate 18217c478bd9Sstevel@tonic-gate pp = pplist; 18227c478bd9Sstevel@tonic-gate n = npgs; 18237c478bd9Sstevel@tonic-gate while (n--) { 18247c478bd9Sstevel@tonic-gate ASSERT(pp->p_szc == cur_szc); 18257c478bd9Sstevel@tonic-gate pp->p_szc = new_szc; 18267c478bd9Sstevel@tonic-gate pp = pp->p_next; 18277c478bd9Sstevel@tonic-gate } 18287c478bd9Sstevel@tonic-gate 18297c478bd9Sstevel@tonic-gate CHK_LPG(pplist, new_szc); 18307c478bd9Sstevel@tonic-gate 18317c478bd9Sstevel@tonic-gate bin = PP_2_BIN(pplist); 18327c478bd9Sstevel@tonic-gate ASSERT(mnode == PP_2_MEM_NODE(pp)); 18337c478bd9Sstevel@tonic-gate if ((bin == color) && (flags == PC_ALLOC) && 18347c478bd9Sstevel@tonic-gate (ret_pp == NULL) && 18357c478bd9Sstevel@tonic-gate page_trylock_cons(pp, SE_EXCL)) { 18367c478bd9Sstevel@tonic-gate ret_pp = pp; 18377c478bd9Sstevel@tonic-gate } else { 18387c478bd9Sstevel@tonic-gate mtype = PP_2_MTYPE(pp); 18397c478bd9Sstevel@tonic-gate page_vpadd(&PAGE_FREELISTS(mnode, new_szc, 18407c478bd9Sstevel@tonic-gate bin, mtype), pplist); 18417c478bd9Sstevel@tonic-gate 18427c478bd9Sstevel@tonic-gate page_ctr_add(pplist, PG_FREE_LIST); 18437c478bd9Sstevel@tonic-gate } 18447c478bd9Sstevel@tonic-gate pplist = npplist; 18457c478bd9Sstevel@tonic-gate } 18467c478bd9Sstevel@tonic-gate } 18477c478bd9Sstevel@tonic-gate return (ret_pp); 18487c478bd9Sstevel@tonic-gate } 18497c478bd9Sstevel@tonic-gate 18507c478bd9Sstevel@tonic-gate int mpss_coalesce_disable = 0; 18517c478bd9Sstevel@tonic-gate 18527c478bd9Sstevel@tonic-gate /* 18537c478bd9Sstevel@tonic-gate * Coalesce free pages into a page of the given szc and color if possible. 18547c478bd9Sstevel@tonic-gate * Return the pointer to the page created, otherwise, return NULL. 18557c478bd9Sstevel@tonic-gate */ 18567c478bd9Sstevel@tonic-gate static page_t * 18577c478bd9Sstevel@tonic-gate page_freelist_coalesce(int mnode, uchar_t szc, int color) 18587c478bd9Sstevel@tonic-gate { 18597c478bd9Sstevel@tonic-gate int r; /* region size */ 18607c478bd9Sstevel@tonic-gate int idx, full, i; 18617c478bd9Sstevel@tonic-gate pfn_t pfnum; 18627c478bd9Sstevel@tonic-gate size_t len; 18637c478bd9Sstevel@tonic-gate size_t buckets_to_check; 18647c478bd9Sstevel@tonic-gate pgcnt_t cands; 18657c478bd9Sstevel@tonic-gate page_t *ret_pp; 18667c478bd9Sstevel@tonic-gate int color_stride; 18677c478bd9Sstevel@tonic-gate 18687c478bd9Sstevel@tonic-gate VM_STAT_ADD(vmm_vmstats.page_ctrs_coalesce); 18697c478bd9Sstevel@tonic-gate 18707c478bd9Sstevel@tonic-gate if (mpss_coalesce_disable) { 18717c478bd9Sstevel@tonic-gate return (NULL); 18727c478bd9Sstevel@tonic-gate } 18737c478bd9Sstevel@tonic-gate 18747c478bd9Sstevel@tonic-gate r = szc; 18757c478bd9Sstevel@tonic-gate PGCTRS_CANDS_GETVALUECOLOR(mnode, r, color, cands); 18767c478bd9Sstevel@tonic-gate if (cands == 0) { 18777c478bd9Sstevel@tonic-gate VM_STAT_ADD(vmm_vmstats.page_ctrs_cands_skip); 18787c478bd9Sstevel@tonic-gate return (NULL); 18797c478bd9Sstevel@tonic-gate } 18807c478bd9Sstevel@tonic-gate full = FULL_REGION_CNT(r); 18817c478bd9Sstevel@tonic-gate color_stride = (szc) ? page_convert_color(0, szc, page_colors - 1) + 1 : 18827c478bd9Sstevel@tonic-gate page_colors; 18837c478bd9Sstevel@tonic-gate 18847c478bd9Sstevel@tonic-gate /* Prevent page_counters dynamic memory from being freed */ 18857c478bd9Sstevel@tonic-gate rw_enter(&page_ctrs_rwlock[mnode], RW_READER); 18867c478bd9Sstevel@tonic-gate len = PAGE_COUNTERS_ENTRIES(mnode, r); 18877c478bd9Sstevel@tonic-gate buckets_to_check = len / color_stride; 18887c478bd9Sstevel@tonic-gate idx = PAGE_COUNTERS_CURRENT_COLOR(mnode, r, color); 18897c478bd9Sstevel@tonic-gate ASSERT((idx % color_stride) == color); 18907c478bd9Sstevel@tonic-gate idx += color_stride; 18917c478bd9Sstevel@tonic-gate if (idx >= len) 18927c478bd9Sstevel@tonic-gate idx = color; 18937c478bd9Sstevel@tonic-gate for (i = 0; i < buckets_to_check; i++) { 18947c478bd9Sstevel@tonic-gate if (PAGE_COUNTERS(mnode, r, idx) == full) { 18957c478bd9Sstevel@tonic-gate pfnum = IDX_TO_PNUM(mnode, r, idx); 18967c478bd9Sstevel@tonic-gate ASSERT(pfnum >= mem_node_config[mnode].physbase && 18977c478bd9Sstevel@tonic-gate pfnum < mem_node_config[mnode].physmax); 18987c478bd9Sstevel@tonic-gate /* 18997c478bd9Sstevel@tonic-gate * RFE: For performance maybe we can do something less 19007c478bd9Sstevel@tonic-gate * brutal than locking the entire freelist. So far 19017c478bd9Sstevel@tonic-gate * this doesn't seem to be a performance problem? 19027c478bd9Sstevel@tonic-gate */ 19037c478bd9Sstevel@tonic-gate page_freelist_lock(mnode); 19047c478bd9Sstevel@tonic-gate if (PAGE_COUNTERS(mnode, r, idx) != full) { 19057c478bd9Sstevel@tonic-gate VM_STAT_ADD(vmm_vmstats.page_ctrs_changed); 19067c478bd9Sstevel@tonic-gate goto skip_this_one; 19077c478bd9Sstevel@tonic-gate } 19087c478bd9Sstevel@tonic-gate ret_pp = page_promote(mnode, pfnum, r, PC_ALLOC); 19097c478bd9Sstevel@tonic-gate if (ret_pp != NULL) { 19107c478bd9Sstevel@tonic-gate PAGE_COUNTERS_CURRENT_COLOR(mnode, r, color) = 19117c478bd9Sstevel@tonic-gate idx; 19127c478bd9Sstevel@tonic-gate page_freelist_unlock(mnode); 19137c478bd9Sstevel@tonic-gate rw_exit(&page_ctrs_rwlock[mnode]); 19147c478bd9Sstevel@tonic-gate #if defined(__sparc) 19157c478bd9Sstevel@tonic-gate if (PP_ISNORELOC(ret_pp)) { 19167c478bd9Sstevel@tonic-gate pgcnt_t npgs; 19177c478bd9Sstevel@tonic-gate 19187c478bd9Sstevel@tonic-gate npgs = page_get_pagecnt(ret_pp->p_szc); 19197c478bd9Sstevel@tonic-gate kcage_freemem_sub(npgs); 19207c478bd9Sstevel@tonic-gate } 19217c478bd9Sstevel@tonic-gate #endif 19227c478bd9Sstevel@tonic-gate return (ret_pp); 19237c478bd9Sstevel@tonic-gate } 19247c478bd9Sstevel@tonic-gate skip_this_one: 19257c478bd9Sstevel@tonic-gate page_freelist_unlock(mnode); 19267c478bd9Sstevel@tonic-gate /* 19277c478bd9Sstevel@tonic-gate * No point looking for another page if we've 19287c478bd9Sstevel@tonic-gate * already tried all of the ones that 19297c478bd9Sstevel@tonic-gate * page_ctr_cands indicated. Stash off where we left 19307c478bd9Sstevel@tonic-gate * off. 19317c478bd9Sstevel@tonic-gate * Note: this is not exact since we don't hold the 19327c478bd9Sstevel@tonic-gate * page_freelist_locks before we initially get the 19337c478bd9Sstevel@tonic-gate * value of cands for performance reasons, but should 19347c478bd9Sstevel@tonic-gate * be a decent approximation. 19357c478bd9Sstevel@tonic-gate */ 19367c478bd9Sstevel@tonic-gate if (--cands == 0) { 19377c478bd9Sstevel@tonic-gate PAGE_COUNTERS_CURRENT_COLOR(mnode, r, color) = 19387c478bd9Sstevel@tonic-gate idx; 19397c478bd9Sstevel@tonic-gate break; 19407c478bd9Sstevel@tonic-gate } 19417c478bd9Sstevel@tonic-gate } 19427c478bd9Sstevel@tonic-gate idx += color_stride; 19437c478bd9Sstevel@tonic-gate if (idx >= len) 19447c478bd9Sstevel@tonic-gate idx = color; 19457c478bd9Sstevel@tonic-gate } 19467c478bd9Sstevel@tonic-gate rw_exit(&page_ctrs_rwlock[mnode]); 19477c478bd9Sstevel@tonic-gate VM_STAT_ADD(vmm_vmstats.page_ctrs_failed); 19487c478bd9Sstevel@tonic-gate return (NULL); 19497c478bd9Sstevel@tonic-gate } 19507c478bd9Sstevel@tonic-gate 19517c478bd9Sstevel@tonic-gate /* 19527c478bd9Sstevel@tonic-gate * For the given mnode, promote as many small pages to large pages as possible. 19537c478bd9Sstevel@tonic-gate */ 19547c478bd9Sstevel@tonic-gate void 19557c478bd9Sstevel@tonic-gate page_freelist_coalesce_all(int mnode) 19567c478bd9Sstevel@tonic-gate { 19577c478bd9Sstevel@tonic-gate int r; /* region size */ 19587c478bd9Sstevel@tonic-gate int idx, full; 19597c478bd9Sstevel@tonic-gate pfn_t pfnum; 19607c478bd9Sstevel@tonic-gate size_t len; 19617c478bd9Sstevel@tonic-gate 19627c478bd9Sstevel@tonic-gate VM_STAT_ADD(vmm_vmstats.page_ctrs_coalesce_all); 19637c478bd9Sstevel@tonic-gate 19647c478bd9Sstevel@tonic-gate if (mpss_coalesce_disable) { 19657c478bd9Sstevel@tonic-gate return; 19667c478bd9Sstevel@tonic-gate } 19677c478bd9Sstevel@tonic-gate 19687c478bd9Sstevel@tonic-gate /* 19697c478bd9Sstevel@tonic-gate * Lock the entire freelist and coalesce what we can. 19707c478bd9Sstevel@tonic-gate * 19717c478bd9Sstevel@tonic-gate * Always promote to the largest page possible 19727c478bd9Sstevel@tonic-gate * first to reduce the number of page promotions. 19737c478bd9Sstevel@tonic-gate */ 19747c478bd9Sstevel@tonic-gate rw_enter(&page_ctrs_rwlock[mnode], RW_READER); 19757c478bd9Sstevel@tonic-gate page_freelist_lock(mnode); 19767c478bd9Sstevel@tonic-gate for (r = mmu_page_sizes - 1; r > 0; r--) { 19777c478bd9Sstevel@tonic-gate pgcnt_t cands; 19787c478bd9Sstevel@tonic-gate 19797c478bd9Sstevel@tonic-gate PGCTRS_CANDS_GETVALUE(mnode, r, cands); 19807c478bd9Sstevel@tonic-gate if (cands == 0) { 19817c478bd9Sstevel@tonic-gate VM_STAT_ADD(vmm_vmstats.page_ctrs_cands_skip_all); 19827c478bd9Sstevel@tonic-gate continue; 19837c478bd9Sstevel@tonic-gate } 19847c478bd9Sstevel@tonic-gate 19857c478bd9Sstevel@tonic-gate full = FULL_REGION_CNT(r); 19867c478bd9Sstevel@tonic-gate len = PAGE_COUNTERS_ENTRIES(mnode, r); 19877c478bd9Sstevel@tonic-gate 19887c478bd9Sstevel@tonic-gate for (idx = 0; idx < len; idx++) { 19897c478bd9Sstevel@tonic-gate if (PAGE_COUNTERS(mnode, r, idx) == full) { 19907c478bd9Sstevel@tonic-gate pfnum = IDX_TO_PNUM(mnode, r, idx); 19917c478bd9Sstevel@tonic-gate ASSERT(pfnum >= 19927c478bd9Sstevel@tonic-gate mem_node_config[mnode].physbase && 19937c478bd9Sstevel@tonic-gate pfnum < 19947c478bd9Sstevel@tonic-gate mem_node_config[mnode].physmax); 19957c478bd9Sstevel@tonic-gate (void) page_promote(mnode, pfnum, r, PC_FREE); 19967c478bd9Sstevel@tonic-gate } 19977c478bd9Sstevel@tonic-gate } 19987c478bd9Sstevel@tonic-gate } 19997c478bd9Sstevel@tonic-gate page_freelist_unlock(mnode); 20007c478bd9Sstevel@tonic-gate rw_exit(&page_ctrs_rwlock[mnode]); 20017c478bd9Sstevel@tonic-gate } 20027c478bd9Sstevel@tonic-gate 20037c478bd9Sstevel@tonic-gate /* 20047c478bd9Sstevel@tonic-gate * This is where all polices for moving pages around 20057c478bd9Sstevel@tonic-gate * to different page size free lists is implemented. 20067c478bd9Sstevel@tonic-gate * Returns 1 on success, 0 on failure. 20077c478bd9Sstevel@tonic-gate * 20087c478bd9Sstevel@tonic-gate * So far these are the priorities for this algorithm in descending 20097c478bd9Sstevel@tonic-gate * order: 20107c478bd9Sstevel@tonic-gate * 20117c478bd9Sstevel@tonic-gate * 1) When servicing a request try to do so with a free page 20127c478bd9Sstevel@tonic-gate * from next size up. Helps defer fragmentation as long 20137c478bd9Sstevel@tonic-gate * as possible. 20147c478bd9Sstevel@tonic-gate * 20157c478bd9Sstevel@tonic-gate * 2) Page coalesce on demand. Only when a freelist 20167c478bd9Sstevel@tonic-gate * larger than PAGESIZE is empty and step 1 20177c478bd9Sstevel@tonic-gate * will not work since all larger size lists are 20187c478bd9Sstevel@tonic-gate * also empty. 20197c478bd9Sstevel@tonic-gate * 20207c478bd9Sstevel@tonic-gate * If pfnhi is non-zero, search for large page with pfn range less than pfnhi. 20217c478bd9Sstevel@tonic-gate */ 20227c478bd9Sstevel@tonic-gate page_t * 20237c478bd9Sstevel@tonic-gate page_freelist_fill(uchar_t szc, int color, int mnode, int mtype, pfn_t pfnhi) 20247c478bd9Sstevel@tonic-gate { 20257c478bd9Sstevel@tonic-gate uchar_t nszc = szc + 1; 20267c478bd9Sstevel@tonic-gate int bin; 20277c478bd9Sstevel@tonic-gate page_t *pp, *firstpp; 20287c478bd9Sstevel@tonic-gate page_t *ret_pp = NULL; 20297c478bd9Sstevel@tonic-gate 20307c478bd9Sstevel@tonic-gate ASSERT(szc < mmu_page_sizes); 20317c478bd9Sstevel@tonic-gate 20327c478bd9Sstevel@tonic-gate /* 20337c478bd9Sstevel@tonic-gate * First try to break up a larger page to fill 20347c478bd9Sstevel@tonic-gate * current size freelist. 20357c478bd9Sstevel@tonic-gate */ 20367c478bd9Sstevel@tonic-gate while (nszc < mmu_page_sizes) { 20377c478bd9Sstevel@tonic-gate /* 20387c478bd9Sstevel@tonic-gate * If page found then demote it. 20397c478bd9Sstevel@tonic-gate */ 20407c478bd9Sstevel@tonic-gate bin = page_convert_color(szc, nszc, color); 20417c478bd9Sstevel@tonic-gate if (PAGE_FREELISTS(mnode, nszc, bin, mtype)) { 20427c478bd9Sstevel@tonic-gate page_freelist_lock(mnode); 20437c478bd9Sstevel@tonic-gate firstpp = pp = PAGE_FREELISTS(mnode, nszc, bin, mtype); 20447c478bd9Sstevel@tonic-gate 20457c478bd9Sstevel@tonic-gate /* 20467c478bd9Sstevel@tonic-gate * If pfnhi is not PFNNULL, look for large page below 20477c478bd9Sstevel@tonic-gate * pfnhi. PFNNULL signifies no pfn requirement. 20487c478bd9Sstevel@tonic-gate */ 20497c478bd9Sstevel@tonic-gate if (pfnhi != PFNNULL && pp->p_pagenum >= pfnhi) { 20507c478bd9Sstevel@tonic-gate do { 20517c478bd9Sstevel@tonic-gate pp = pp->p_vpnext; 20527c478bd9Sstevel@tonic-gate if (pp == firstpp) { 20537c478bd9Sstevel@tonic-gate pp = NULL; 20547c478bd9Sstevel@tonic-gate break; 20557c478bd9Sstevel@tonic-gate } 20567c478bd9Sstevel@tonic-gate } while (pp->p_pagenum >= pfnhi); 20577c478bd9Sstevel@tonic-gate } 20587c478bd9Sstevel@tonic-gate if (pp) { 20597c478bd9Sstevel@tonic-gate ASSERT(pp->p_szc == nszc); 20607c478bd9Sstevel@tonic-gate ret_pp = page_demote(mnode, pp->p_pagenum, 20617c478bd9Sstevel@tonic-gate pp->p_szc, szc, color, PC_ALLOC); 20627c478bd9Sstevel@tonic-gate if (ret_pp) { 20637c478bd9Sstevel@tonic-gate page_freelist_unlock(mnode); 20647c478bd9Sstevel@tonic-gate #if defined(__sparc) 20657c478bd9Sstevel@tonic-gate if (PP_ISNORELOC(ret_pp)) { 20667c478bd9Sstevel@tonic-gate pgcnt_t npgs; 20677c478bd9Sstevel@tonic-gate 20687c478bd9Sstevel@tonic-gate npgs = page_get_pagecnt( 20697c478bd9Sstevel@tonic-gate ret_pp->p_szc); 20707c478bd9Sstevel@tonic-gate kcage_freemem_sub(npgs); 20717c478bd9Sstevel@tonic-gate } 20727c478bd9Sstevel@tonic-gate #endif 20737c478bd9Sstevel@tonic-gate return (ret_pp); 20747c478bd9Sstevel@tonic-gate } 20757c478bd9Sstevel@tonic-gate } 20767c478bd9Sstevel@tonic-gate page_freelist_unlock(mnode); 20777c478bd9Sstevel@tonic-gate } 20787c478bd9Sstevel@tonic-gate nszc++; 20797c478bd9Sstevel@tonic-gate } 20807c478bd9Sstevel@tonic-gate 20817c478bd9Sstevel@tonic-gate /* 20827c478bd9Sstevel@tonic-gate * Ok that didn't work. Time to coalesce. 20837c478bd9Sstevel@tonic-gate */ 20847c478bd9Sstevel@tonic-gate if (szc != 0) { 20857c478bd9Sstevel@tonic-gate ret_pp = page_freelist_coalesce(mnode, szc, color); 20867c478bd9Sstevel@tonic-gate } 20877c478bd9Sstevel@tonic-gate 20887c478bd9Sstevel@tonic-gate return (ret_pp); 20897c478bd9Sstevel@tonic-gate } 20907c478bd9Sstevel@tonic-gate 20917c478bd9Sstevel@tonic-gate /* 20927c478bd9Sstevel@tonic-gate * Helper routine used only by the freelist code to lock 20937c478bd9Sstevel@tonic-gate * a page. If the page is a large page then it succeeds in 20947c478bd9Sstevel@tonic-gate * locking all the constituent pages or none at all. 20957c478bd9Sstevel@tonic-gate * Returns 1 on sucess, 0 on failure. 20967c478bd9Sstevel@tonic-gate */ 20977c478bd9Sstevel@tonic-gate static int 20987c478bd9Sstevel@tonic-gate page_trylock_cons(page_t *pp, se_t se) 20997c478bd9Sstevel@tonic-gate { 21007c478bd9Sstevel@tonic-gate page_t *tpp, *first_pp = pp; 21017c478bd9Sstevel@tonic-gate 21027c478bd9Sstevel@tonic-gate /* 21037c478bd9Sstevel@tonic-gate * Fail if can't lock first or only page. 21047c478bd9Sstevel@tonic-gate */ 21057c478bd9Sstevel@tonic-gate if (!page_trylock(pp, se)) { 21067c478bd9Sstevel@tonic-gate return (0); 21077c478bd9Sstevel@tonic-gate } 21087c478bd9Sstevel@tonic-gate 21097c478bd9Sstevel@tonic-gate /* 21107c478bd9Sstevel@tonic-gate * PAGESIZE: common case. 21117c478bd9Sstevel@tonic-gate */ 21127c478bd9Sstevel@tonic-gate if (pp->p_szc == 0) { 21137c478bd9Sstevel@tonic-gate return (1); 21147c478bd9Sstevel@tonic-gate } 21157c478bd9Sstevel@tonic-gate 21167c478bd9Sstevel@tonic-gate /* 21177c478bd9Sstevel@tonic-gate * Large page case. 21187c478bd9Sstevel@tonic-gate */ 21197c478bd9Sstevel@tonic-gate tpp = pp->p_next; 21207c478bd9Sstevel@tonic-gate while (tpp != pp) { 21217c478bd9Sstevel@tonic-gate if (!page_trylock(tpp, se)) { 21227c478bd9Sstevel@tonic-gate /* 21237c478bd9Sstevel@tonic-gate * On failure unlock what we 21247c478bd9Sstevel@tonic-gate * have locked so far. 21257c478bd9Sstevel@tonic-gate */ 21267c478bd9Sstevel@tonic-gate while (first_pp != tpp) { 21277c478bd9Sstevel@tonic-gate page_unlock(first_pp); 21287c478bd9Sstevel@tonic-gate first_pp = first_pp->p_next; 21297c478bd9Sstevel@tonic-gate } 21307c478bd9Sstevel@tonic-gate return (0); 21317c478bd9Sstevel@tonic-gate } 21327c478bd9Sstevel@tonic-gate tpp = tpp->p_next; 21337c478bd9Sstevel@tonic-gate } 21347c478bd9Sstevel@tonic-gate return (1); 21357c478bd9Sstevel@tonic-gate } 21367c478bd9Sstevel@tonic-gate 21377c478bd9Sstevel@tonic-gate page_t * 21387c478bd9Sstevel@tonic-gate page_get_mnode_freelist(int mnode, uint_t bin, int mtype, uchar_t szc, 21397c478bd9Sstevel@tonic-gate uint_t flags) 21407c478bd9Sstevel@tonic-gate { 21417c478bd9Sstevel@tonic-gate kmutex_t *pcm; 21427c478bd9Sstevel@tonic-gate int i, fill_tried, fill_marker; 21437c478bd9Sstevel@tonic-gate page_t *pp, *first_pp; 21447c478bd9Sstevel@tonic-gate uint_t bin_marker; 21457c478bd9Sstevel@tonic-gate int colors, cpucolors; 21467c478bd9Sstevel@tonic-gate uchar_t nszc; 21477c478bd9Sstevel@tonic-gate uint_t nszc_color_shift; 21487c478bd9Sstevel@tonic-gate int nwaybins = 0, nwaycnt; 21497c478bd9Sstevel@tonic-gate 21507c478bd9Sstevel@tonic-gate ASSERT(szc < mmu_page_sizes); 21517c478bd9Sstevel@tonic-gate 21527c478bd9Sstevel@tonic-gate VM_STAT_ADD(vmm_vmstats.pgmf_alloc[szc]); 21537c478bd9Sstevel@tonic-gate 21547c478bd9Sstevel@tonic-gate /* LINTED */ 21557c478bd9Sstevel@tonic-gate MTYPE_START(mnode, mtype, flags); 21567c478bd9Sstevel@tonic-gate if (mtype < 0) { /* mnode foes not have memory in mtype range */ 21577c478bd9Sstevel@tonic-gate VM_STAT_ADD(vmm_vmstats.pgmf_allocempty[szc]); 21587c478bd9Sstevel@tonic-gate return (NULL); 21597c478bd9Sstevel@tonic-gate } 21607c478bd9Sstevel@tonic-gate 21617c478bd9Sstevel@tonic-gate /* 21627c478bd9Sstevel@tonic-gate * Set how many physical colors for this page size. 21637c478bd9Sstevel@tonic-gate */ 21647c478bd9Sstevel@tonic-gate colors = (szc) ? page_convert_color(0, szc, page_colors - 1) + 1 : 21657c478bd9Sstevel@tonic-gate page_colors; 21667c478bd9Sstevel@tonic-gate 21677c478bd9Sstevel@tonic-gate nszc = MIN(szc + 1, mmu_page_sizes - 1); 21687c478bd9Sstevel@tonic-gate nszc_color_shift = page_get_shift(nszc) - page_get_shift(szc); 21697c478bd9Sstevel@tonic-gate 21707c478bd9Sstevel@tonic-gate /* cpu_page_colors is non-zero if a page color may be in > 1 bin */ 21717c478bd9Sstevel@tonic-gate cpucolors = cpu_page_colors; 21727c478bd9Sstevel@tonic-gate 21737c478bd9Sstevel@tonic-gate /* 21747c478bd9Sstevel@tonic-gate * adjust cpucolors to possibly check additional 'equivalent' bins 21757c478bd9Sstevel@tonic-gate * to try to minimize fragmentation of large pages by delaying calls 21767c478bd9Sstevel@tonic-gate * to page_freelist_fill. 21777c478bd9Sstevel@tonic-gate */ 21787c478bd9Sstevel@tonic-gate if (colorequiv > 1) { 21797c478bd9Sstevel@tonic-gate int equivcolors = colors / colorequiv; 21807c478bd9Sstevel@tonic-gate 21817c478bd9Sstevel@tonic-gate if (equivcolors && (cpucolors == 0 || equivcolors < cpucolors)) 21827c478bd9Sstevel@tonic-gate cpucolors = equivcolors; 21837c478bd9Sstevel@tonic-gate } 21847c478bd9Sstevel@tonic-gate 21857c478bd9Sstevel@tonic-gate ASSERT(colors <= page_colors); 21867c478bd9Sstevel@tonic-gate ASSERT(colors); 21877c478bd9Sstevel@tonic-gate ASSERT((colors & (colors - 1)) == 0); 21887c478bd9Sstevel@tonic-gate 21897c478bd9Sstevel@tonic-gate ASSERT(bin < colors); 21907c478bd9Sstevel@tonic-gate 21917c478bd9Sstevel@tonic-gate /* 21927c478bd9Sstevel@tonic-gate * Only hold one freelist lock at a time, that way we 21937c478bd9Sstevel@tonic-gate * can start anywhere and not have to worry about lock 21947c478bd9Sstevel@tonic-gate * ordering. 21957c478bd9Sstevel@tonic-gate */ 21967c478bd9Sstevel@tonic-gate big_try_again: 21977c478bd9Sstevel@tonic-gate fill_tried = 0; 21987c478bd9Sstevel@tonic-gate nwaycnt = 0; 21997c478bd9Sstevel@tonic-gate for (i = 0; i <= colors; i++) { 22007c478bd9Sstevel@tonic-gate try_again: 22017c478bd9Sstevel@tonic-gate ASSERT(bin < colors); 22027c478bd9Sstevel@tonic-gate if (PAGE_FREELISTS(mnode, szc, bin, mtype)) { 22037c478bd9Sstevel@tonic-gate pcm = PC_BIN_MUTEX(mnode, bin, PG_FREE_LIST); 22047c478bd9Sstevel@tonic-gate mutex_enter(pcm); 22057c478bd9Sstevel@tonic-gate pp = PAGE_FREELISTS(mnode, szc, bin, mtype); 22067c478bd9Sstevel@tonic-gate if (pp != NULL) { 22077c478bd9Sstevel@tonic-gate /* 22087c478bd9Sstevel@tonic-gate * These were set before the page 22097c478bd9Sstevel@tonic-gate * was put on the free list, 22107c478bd9Sstevel@tonic-gate * they must still be set. 22117c478bd9Sstevel@tonic-gate */ 22127c478bd9Sstevel@tonic-gate ASSERT(PP_ISFREE(pp)); 22137c478bd9Sstevel@tonic-gate ASSERT(PP_ISAGED(pp)); 22147c478bd9Sstevel@tonic-gate ASSERT(pp->p_vnode == NULL); 22157c478bd9Sstevel@tonic-gate ASSERT(pp->p_hash == NULL); 22167c478bd9Sstevel@tonic-gate ASSERT(pp->p_offset == (u_offset_t)-1); 22177c478bd9Sstevel@tonic-gate ASSERT(pp->p_szc == szc); 22187c478bd9Sstevel@tonic-gate ASSERT(PFN_2_MEM_NODE(pp->p_pagenum) == mnode); 22197c478bd9Sstevel@tonic-gate 22207c478bd9Sstevel@tonic-gate /* 22217c478bd9Sstevel@tonic-gate * Walk down the hash chain. 22227c478bd9Sstevel@tonic-gate * 8k pages are linked on p_next 22237c478bd9Sstevel@tonic-gate * and p_prev fields. Large pages 22247c478bd9Sstevel@tonic-gate * are a contiguous group of 22257c478bd9Sstevel@tonic-gate * constituent pages linked together 22267c478bd9Sstevel@tonic-gate * on their p_next and p_prev fields. 22277c478bd9Sstevel@tonic-gate * The large pages are linked together 22287c478bd9Sstevel@tonic-gate * on the hash chain using p_vpnext 22297c478bd9Sstevel@tonic-gate * p_vpprev of the base constituent 22307c478bd9Sstevel@tonic-gate * page of each large page. 22317c478bd9Sstevel@tonic-gate */ 22327c478bd9Sstevel@tonic-gate first_pp = pp; 22337c478bd9Sstevel@tonic-gate while (!page_trylock_cons(pp, SE_EXCL)) { 22347c478bd9Sstevel@tonic-gate if (szc == 0) { 22357c478bd9Sstevel@tonic-gate pp = pp->p_next; 22367c478bd9Sstevel@tonic-gate } else { 22377c478bd9Sstevel@tonic-gate pp = pp->p_vpnext; 22387c478bd9Sstevel@tonic-gate } 22397c478bd9Sstevel@tonic-gate 22407c478bd9Sstevel@tonic-gate ASSERT(PP_ISFREE(pp)); 22417c478bd9Sstevel@tonic-gate ASSERT(PP_ISAGED(pp)); 22427c478bd9Sstevel@tonic-gate ASSERT(pp->p_vnode == NULL); 22437c478bd9Sstevel@tonic-gate ASSERT(pp->p_hash == NULL); 22447c478bd9Sstevel@tonic-gate ASSERT(pp->p_offset == (u_offset_t)-1); 22457c478bd9Sstevel@tonic-gate ASSERT(pp->p_szc == szc); 22467c478bd9Sstevel@tonic-gate ASSERT(PFN_2_MEM_NODE(pp->p_pagenum) == 22477c478bd9Sstevel@tonic-gate mnode); 22487c478bd9Sstevel@tonic-gate 22497c478bd9Sstevel@tonic-gate if (pp == first_pp) { 22507c478bd9Sstevel@tonic-gate pp = NULL; 22517c478bd9Sstevel@tonic-gate break; 22527c478bd9Sstevel@tonic-gate } 22537c478bd9Sstevel@tonic-gate } 22547c478bd9Sstevel@tonic-gate 22557c478bd9Sstevel@tonic-gate if (pp) { 22567c478bd9Sstevel@tonic-gate ASSERT(mtype == PP_2_MTYPE(pp)); 22577c478bd9Sstevel@tonic-gate ASSERT(pp->p_szc == szc); 22587c478bd9Sstevel@tonic-gate if (szc == 0) { 22597c478bd9Sstevel@tonic-gate page_sub(&PAGE_FREELISTS(mnode, 22607c478bd9Sstevel@tonic-gate szc, bin, mtype), pp); 22617c478bd9Sstevel@tonic-gate } else { 22627c478bd9Sstevel@tonic-gate page_vpsub(&PAGE_FREELISTS( 22637c478bd9Sstevel@tonic-gate mnode, szc, bin, mtype), 22647c478bd9Sstevel@tonic-gate pp); 22657c478bd9Sstevel@tonic-gate CHK_LPG(pp, szc); 22667c478bd9Sstevel@tonic-gate } 22677c478bd9Sstevel@tonic-gate page_ctr_sub(pp, PG_FREE_LIST); 22687c478bd9Sstevel@tonic-gate 22697c478bd9Sstevel@tonic-gate if ((PP_ISFREE(pp) == 0) || 22707c478bd9Sstevel@tonic-gate (PP_ISAGED(pp) == 0)) 22717c478bd9Sstevel@tonic-gate panic("free page is not. pp %p", 22727c478bd9Sstevel@tonic-gate (void *)pp); 22737c478bd9Sstevel@tonic-gate mutex_exit(pcm); 22747c478bd9Sstevel@tonic-gate 22757c478bd9Sstevel@tonic-gate #if defined(__sparc) 22767c478bd9Sstevel@tonic-gate ASSERT(!kcage_on || PP_ISNORELOC(pp) || 22777c478bd9Sstevel@tonic-gate (flags & PG_NORELOC) == 0); 22787c478bd9Sstevel@tonic-gate 22797c478bd9Sstevel@tonic-gate if (PP_ISNORELOC(pp)) { 22807c478bd9Sstevel@tonic-gate pgcnt_t npgs; 22817c478bd9Sstevel@tonic-gate 22827c478bd9Sstevel@tonic-gate npgs = page_get_pagecnt(szc); 22837c478bd9Sstevel@tonic-gate kcage_freemem_sub(npgs); 22847c478bd9Sstevel@tonic-gate } 22857c478bd9Sstevel@tonic-gate #endif 22867c478bd9Sstevel@tonic-gate VM_STAT_ADD(vmm_vmstats. 22877c478bd9Sstevel@tonic-gate pgmf_allocok[szc]); 22887c478bd9Sstevel@tonic-gate return (pp); 22897c478bd9Sstevel@tonic-gate } 22907c478bd9Sstevel@tonic-gate } 22917c478bd9Sstevel@tonic-gate mutex_exit(pcm); 22927c478bd9Sstevel@tonic-gate } 22937c478bd9Sstevel@tonic-gate 22947c478bd9Sstevel@tonic-gate /* 22957c478bd9Sstevel@tonic-gate * Wow! The initial bin is empty. 22967c478bd9Sstevel@tonic-gate * If specific color is needed, check if page color may be 22977c478bd9Sstevel@tonic-gate * in other bins. cpucolors is: 22987c478bd9Sstevel@tonic-gate * 0 if the colors for this cpu is equal to page_colors. 22997c478bd9Sstevel@tonic-gate * This means that pages with a particular color are in a 23007c478bd9Sstevel@tonic-gate * single bin. 23017c478bd9Sstevel@tonic-gate * -1 if colors of cpus (cheetah+) are heterogenous. Need to 23027c478bd9Sstevel@tonic-gate * first determine the colors for the current cpu. 23037c478bd9Sstevel@tonic-gate * >0 colors of all cpus are homogenous and < page_colors 23047c478bd9Sstevel@tonic-gate */ 23057c478bd9Sstevel@tonic-gate 23067c478bd9Sstevel@tonic-gate if ((flags & PG_MATCH_COLOR) && (cpucolors != 0)) { 23077c478bd9Sstevel@tonic-gate if (!nwaybins) { 23087c478bd9Sstevel@tonic-gate /* 23097c478bd9Sstevel@tonic-gate * cpucolors is negative if ecache setsizes 23107c478bd9Sstevel@tonic-gate * are heterogenous. determine colors for this 23117c478bd9Sstevel@tonic-gate * particular cpu. 23127c478bd9Sstevel@tonic-gate */ 23137c478bd9Sstevel@tonic-gate if (cpucolors < 0) { 23147c478bd9Sstevel@tonic-gate cpucolors = CPUSETSIZE() / MMU_PAGESIZE; 23157c478bd9Sstevel@tonic-gate ASSERT(cpucolors > 0); 23167c478bd9Sstevel@tonic-gate nwaybins = colors / cpucolors; 23177c478bd9Sstevel@tonic-gate } else { 23187c478bd9Sstevel@tonic-gate nwaybins = colors / cpucolors; 23197c478bd9Sstevel@tonic-gate ASSERT(szc > 0 || nwaybins > 1); 23207c478bd9Sstevel@tonic-gate } 23217c478bd9Sstevel@tonic-gate if (nwaybins < 2) 23227c478bd9Sstevel@tonic-gate cpucolors = 0; 23237c478bd9Sstevel@tonic-gate } 23247c478bd9Sstevel@tonic-gate 23257c478bd9Sstevel@tonic-gate if (cpucolors && (nwaycnt + 1 <= nwaybins)) { 23267c478bd9Sstevel@tonic-gate nwaycnt++; 23277c478bd9Sstevel@tonic-gate bin = (bin + (colors / nwaybins)) & 23287c478bd9Sstevel@tonic-gate (colors - 1); 23297c478bd9Sstevel@tonic-gate if (nwaycnt < nwaybins) { 23307c478bd9Sstevel@tonic-gate goto try_again; 23317c478bd9Sstevel@tonic-gate } 23327c478bd9Sstevel@tonic-gate } 23337c478bd9Sstevel@tonic-gate /* back to initial color if fall-thru */ 23347c478bd9Sstevel@tonic-gate } 23357c478bd9Sstevel@tonic-gate 23367c478bd9Sstevel@tonic-gate /* 23377c478bd9Sstevel@tonic-gate * color bins are all empty if color match. Try and satisfy 23387c478bd9Sstevel@tonic-gate * the request by breaking up or coalescing pages from 23397c478bd9Sstevel@tonic-gate * a different size freelist of the correct color that 23407c478bd9Sstevel@tonic-gate * satisfies the ORIGINAL color requested. If that 23417c478bd9Sstevel@tonic-gate * fails then try pages of the same size but different 23427c478bd9Sstevel@tonic-gate * colors assuming we are not called with 23437c478bd9Sstevel@tonic-gate * PG_MATCH_COLOR. 23447c478bd9Sstevel@tonic-gate */ 23457c478bd9Sstevel@tonic-gate if (!fill_tried) { 23467c478bd9Sstevel@tonic-gate fill_tried = 1; 23477c478bd9Sstevel@tonic-gate fill_marker = bin >> nszc_color_shift; 23487c478bd9Sstevel@tonic-gate pp = page_freelist_fill(szc, bin, mnode, mtype, 23497c478bd9Sstevel@tonic-gate PFNNULL); 23507c478bd9Sstevel@tonic-gate if (pp != NULL) { 23517c478bd9Sstevel@tonic-gate return (pp); 23527c478bd9Sstevel@tonic-gate } 23537c478bd9Sstevel@tonic-gate } 23547c478bd9Sstevel@tonic-gate 23557c478bd9Sstevel@tonic-gate if (flags & PG_MATCH_COLOR) 23567c478bd9Sstevel@tonic-gate break; 23577c478bd9Sstevel@tonic-gate 23587c478bd9Sstevel@tonic-gate /* 23597c478bd9Sstevel@tonic-gate * Select next color bin to try. 23607c478bd9Sstevel@tonic-gate */ 23617c478bd9Sstevel@tonic-gate if (szc == 0) { 23627c478bd9Sstevel@tonic-gate /* 23637c478bd9Sstevel@tonic-gate * PAGESIZE page case. 23647c478bd9Sstevel@tonic-gate */ 23657c478bd9Sstevel@tonic-gate if (i == 0) { 23667c478bd9Sstevel@tonic-gate bin = (bin + BIN_STEP) & page_colors_mask; 23677c478bd9Sstevel@tonic-gate bin_marker = bin; 23687c478bd9Sstevel@tonic-gate } else { 23697c478bd9Sstevel@tonic-gate bin = (bin + vac_colors) & page_colors_mask; 23707c478bd9Sstevel@tonic-gate if (bin == bin_marker) { 23717c478bd9Sstevel@tonic-gate bin = (bin + 1) & page_colors_mask; 23727c478bd9Sstevel@tonic-gate bin_marker = bin; 23737c478bd9Sstevel@tonic-gate } 23747c478bd9Sstevel@tonic-gate } 23757c478bd9Sstevel@tonic-gate } else { 23767c478bd9Sstevel@tonic-gate /* 23777c478bd9Sstevel@tonic-gate * Large page case. 23787c478bd9Sstevel@tonic-gate */ 23797c478bd9Sstevel@tonic-gate bin = (bin + 1) & (colors - 1); 23807c478bd9Sstevel@tonic-gate } 23817c478bd9Sstevel@tonic-gate /* 23827c478bd9Sstevel@tonic-gate * If bin advanced to the next color bin of the 23837c478bd9Sstevel@tonic-gate * next larger pagesize, there is a chance the fill 23847c478bd9Sstevel@tonic-gate * could succeed. 23857c478bd9Sstevel@tonic-gate */ 23867c478bd9Sstevel@tonic-gate if (fill_marker != (bin >> nszc_color_shift)) 23877c478bd9Sstevel@tonic-gate fill_tried = 0; 23887c478bd9Sstevel@tonic-gate } 23897c478bd9Sstevel@tonic-gate 23907c478bd9Sstevel@tonic-gate #if defined(__sparc) 23917c478bd9Sstevel@tonic-gate if (!(flags & (PG_NORELOC | PGI_NOCAGE | PGI_RELOCONLY)) && 23927c478bd9Sstevel@tonic-gate (kcage_freemem >= kcage_lotsfree)) { 23937c478bd9Sstevel@tonic-gate /* 23947c478bd9Sstevel@tonic-gate * The Cage is ON and with plenty of free mem, and 23957c478bd9Sstevel@tonic-gate * we're willing to check for a NORELOC page if we 23967c478bd9Sstevel@tonic-gate * couldn't find a RELOC page, so spin again. 23977c478bd9Sstevel@tonic-gate */ 23987c478bd9Sstevel@tonic-gate flags |= PG_NORELOC; 23997c478bd9Sstevel@tonic-gate mtype = MTYPE_NORELOC; 24007c478bd9Sstevel@tonic-gate goto big_try_again; 24017c478bd9Sstevel@tonic-gate } 24027c478bd9Sstevel@tonic-gate #else 24037c478bd9Sstevel@tonic-gate if (flags & PGI_MT_RANGE) { 24047c478bd9Sstevel@tonic-gate /* cycle through range of mtypes */ 24057c478bd9Sstevel@tonic-gate MTYPE_NEXT(mnode, mtype, flags); 24067c478bd9Sstevel@tonic-gate if (mtype >= 0) 24077c478bd9Sstevel@tonic-gate goto big_try_again; 24087c478bd9Sstevel@tonic-gate } 24097c478bd9Sstevel@tonic-gate #endif 24107c478bd9Sstevel@tonic-gate VM_STAT_ADD(vmm_vmstats.pgmf_allocfailed[szc]); 24117c478bd9Sstevel@tonic-gate 24127c478bd9Sstevel@tonic-gate return (NULL); 24137c478bd9Sstevel@tonic-gate } 24147c478bd9Sstevel@tonic-gate 24157c478bd9Sstevel@tonic-gate 24167c478bd9Sstevel@tonic-gate /* 24177c478bd9Sstevel@tonic-gate * Returns the count of free pages for 'pp' with size code 'szc'. 24187c478bd9Sstevel@tonic-gate * Note: This function does not return an exact value as the page freelist 24197c478bd9Sstevel@tonic-gate * locks are not held and thus the values in the page_counters may be 24207c478bd9Sstevel@tonic-gate * changing as we walk through the data. 24217c478bd9Sstevel@tonic-gate */ 24227c478bd9Sstevel@tonic-gate static int 24237c478bd9Sstevel@tonic-gate page_freecnt(int mnode, page_t *pp, uchar_t szc) 24247c478bd9Sstevel@tonic-gate { 24257c478bd9Sstevel@tonic-gate pgcnt_t pgfree; 24267c478bd9Sstevel@tonic-gate pgcnt_t cnt; 24277c478bd9Sstevel@tonic-gate ssize_t r = szc; /* region size */ 24287c478bd9Sstevel@tonic-gate ssize_t idx; 24297c478bd9Sstevel@tonic-gate int i; 24307c478bd9Sstevel@tonic-gate int full, range; 24317c478bd9Sstevel@tonic-gate 24327c478bd9Sstevel@tonic-gate /* Make sure pagenum passed in is aligned properly */ 24337c478bd9Sstevel@tonic-gate ASSERT((pp->p_pagenum & (PNUM_SIZE(szc) - 1)) == 0); 24347c478bd9Sstevel@tonic-gate ASSERT(szc > 0); 24357c478bd9Sstevel@tonic-gate 24367c478bd9Sstevel@tonic-gate /* Prevent page_counters dynamic memory from being freed */ 24377c478bd9Sstevel@tonic-gate rw_enter(&page_ctrs_rwlock[mnode], RW_READER); 24387c478bd9Sstevel@tonic-gate idx = PNUM_TO_IDX(mnode, r, pp->p_pagenum); 24397c478bd9Sstevel@tonic-gate cnt = PAGE_COUNTERS(mnode, r, idx); 24407c478bd9Sstevel@tonic-gate pgfree = cnt << PNUM_SHIFT(r - 1); 24417c478bd9Sstevel@tonic-gate range = FULL_REGION_CNT(szc); 24427c478bd9Sstevel@tonic-gate 24437c478bd9Sstevel@tonic-gate /* Check for completely full region */ 24447c478bd9Sstevel@tonic-gate if (cnt == range) { 24457c478bd9Sstevel@tonic-gate rw_exit(&page_ctrs_rwlock[mnode]); 24467c478bd9Sstevel@tonic-gate return (pgfree); 24477c478bd9Sstevel@tonic-gate } 24487c478bd9Sstevel@tonic-gate 24497c478bd9Sstevel@tonic-gate while (--r > 0) { 24507c478bd9Sstevel@tonic-gate idx = PNUM_TO_IDX(mnode, r, pp->p_pagenum); 24517c478bd9Sstevel@tonic-gate full = FULL_REGION_CNT(r); 24527c478bd9Sstevel@tonic-gate for (i = 0; i < range; i++, idx++) { 24537c478bd9Sstevel@tonic-gate cnt = PAGE_COUNTERS(mnode, r, idx); 24547c478bd9Sstevel@tonic-gate /* 24557c478bd9Sstevel@tonic-gate * If cnt here is full, that means we have already 24567c478bd9Sstevel@tonic-gate * accounted for these pages earlier. 24577c478bd9Sstevel@tonic-gate */ 24587c478bd9Sstevel@tonic-gate if (cnt != full) { 24597c478bd9Sstevel@tonic-gate pgfree += (cnt << PNUM_SHIFT(r - 1)); 24607c478bd9Sstevel@tonic-gate } 24617c478bd9Sstevel@tonic-gate } 24627c478bd9Sstevel@tonic-gate range *= full; 24637c478bd9Sstevel@tonic-gate } 24647c478bd9Sstevel@tonic-gate rw_exit(&page_ctrs_rwlock[mnode]); 24657c478bd9Sstevel@tonic-gate return (pgfree); 24667c478bd9Sstevel@tonic-gate } 24677c478bd9Sstevel@tonic-gate 24687c478bd9Sstevel@tonic-gate /* 24697c478bd9Sstevel@tonic-gate * Called from page_geti_contig_pages to exclusively lock constituent pages 24707c478bd9Sstevel@tonic-gate * starting from 'spp' for page size code 'szc'. 24717c478bd9Sstevel@tonic-gate * 24727c478bd9Sstevel@tonic-gate * If 'ptcpthreshold' is set, the number of free pages needed in the 'szc' 24737c478bd9Sstevel@tonic-gate * region needs to be greater than or equal to the threshold. 24747c478bd9Sstevel@tonic-gate */ 24757c478bd9Sstevel@tonic-gate static int 24767c478bd9Sstevel@tonic-gate page_trylock_contig_pages(int mnode, page_t *spp, uchar_t szc, int flags) 24777c478bd9Sstevel@tonic-gate { 24787c478bd9Sstevel@tonic-gate pgcnt_t pgcnt = PNUM_SIZE(szc); 24797c478bd9Sstevel@tonic-gate pgcnt_t pgfree, i; 24807c478bd9Sstevel@tonic-gate page_t *pp; 24817c478bd9Sstevel@tonic-gate 24827c478bd9Sstevel@tonic-gate VM_STAT_ADD(vmm_vmstats.ptcp[szc]); 24837c478bd9Sstevel@tonic-gate 24847c478bd9Sstevel@tonic-gate 24857c478bd9Sstevel@tonic-gate if ((ptcpthreshold == 0) || (flags & PGI_PGCPHIPRI)) 24867c478bd9Sstevel@tonic-gate goto skipptcpcheck; 24877c478bd9Sstevel@tonic-gate /* 24887c478bd9Sstevel@tonic-gate * check if there are sufficient free pages available before attempting 24897c478bd9Sstevel@tonic-gate * to trylock. Count is approximate as page counters can change. 24907c478bd9Sstevel@tonic-gate */ 24917c478bd9Sstevel@tonic-gate pgfree = page_freecnt(mnode, spp, szc); 24927c478bd9Sstevel@tonic-gate 24937c478bd9Sstevel@tonic-gate /* attempt to trylock if there are sufficient already free pages */ 24947c478bd9Sstevel@tonic-gate if (pgfree < pgcnt/ptcpthreshold) { 24957c478bd9Sstevel@tonic-gate VM_STAT_ADD(vmm_vmstats.ptcpfreethresh[szc]); 24967c478bd9Sstevel@tonic-gate return (0); 24977c478bd9Sstevel@tonic-gate } 24987c478bd9Sstevel@tonic-gate 24997c478bd9Sstevel@tonic-gate skipptcpcheck: 25007c478bd9Sstevel@tonic-gate 25017c478bd9Sstevel@tonic-gate for (i = 0; i < pgcnt; i++) { 25027c478bd9Sstevel@tonic-gate pp = &spp[i]; 25037c478bd9Sstevel@tonic-gate if (!page_trylock(pp, SE_EXCL)) { 25047c478bd9Sstevel@tonic-gate VM_STAT_ADD(vmm_vmstats.ptcpfailexcl[szc]); 25057c478bd9Sstevel@tonic-gate while (--i != (pgcnt_t)-1) { 25067c478bd9Sstevel@tonic-gate pp = &spp[i]; 25077c478bd9Sstevel@tonic-gate ASSERT(PAGE_EXCL(pp)); 25087c478bd9Sstevel@tonic-gate page_unlock(pp); 25097c478bd9Sstevel@tonic-gate } 25107c478bd9Sstevel@tonic-gate return (0); 25117c478bd9Sstevel@tonic-gate } 25127c478bd9Sstevel@tonic-gate ASSERT(spp[i].p_pagenum == spp->p_pagenum + i); 25137c478bd9Sstevel@tonic-gate if ((pp->p_szc > szc || (szc && pp->p_szc == szc)) && 25147c478bd9Sstevel@tonic-gate !PP_ISFREE(pp)) { 25157c478bd9Sstevel@tonic-gate VM_STAT_ADD(vmm_vmstats.ptcpfailszc[szc]); 25167c478bd9Sstevel@tonic-gate ASSERT(i == 0); 25177c478bd9Sstevel@tonic-gate page_unlock(pp); 25187c478bd9Sstevel@tonic-gate return (0); 25197c478bd9Sstevel@tonic-gate } 25207c478bd9Sstevel@tonic-gate if (PP_ISNORELOC(pp)) { 25217c478bd9Sstevel@tonic-gate VM_STAT_ADD(vmm_vmstats.ptcpfailcage[szc]); 25227c478bd9Sstevel@tonic-gate while (i != (pgcnt_t)-1) { 25237c478bd9Sstevel@tonic-gate pp = &spp[i]; 25247c478bd9Sstevel@tonic-gate ASSERT(PAGE_EXCL(pp)); 25257c478bd9Sstevel@tonic-gate page_unlock(pp); 25267c478bd9Sstevel@tonic-gate i--; 25277c478bd9Sstevel@tonic-gate } 25287c478bd9Sstevel@tonic-gate return (0); 25297c478bd9Sstevel@tonic-gate } 25307c478bd9Sstevel@tonic-gate } 25317c478bd9Sstevel@tonic-gate VM_STAT_ADD(vmm_vmstats.ptcpok[szc]); 25327c478bd9Sstevel@tonic-gate return (1); 25337c478bd9Sstevel@tonic-gate } 25347c478bd9Sstevel@tonic-gate 25357c478bd9Sstevel@tonic-gate /* 25367c478bd9Sstevel@tonic-gate * Claim large page pointed to by 'pp'. 'pp' is the starting set 25377c478bd9Sstevel@tonic-gate * of 'szc' constituent pages that had been locked exclusively previously. 25387c478bd9Sstevel@tonic-gate * Will attempt to relocate constituent pages in use. 25397c478bd9Sstevel@tonic-gate */ 25407c478bd9Sstevel@tonic-gate static page_t * 25417c478bd9Sstevel@tonic-gate page_claim_contig_pages(page_t *pp, uchar_t szc, int flags) 25427c478bd9Sstevel@tonic-gate { 25437c478bd9Sstevel@tonic-gate spgcnt_t pgcnt, npgs, i; 25447c478bd9Sstevel@tonic-gate page_t *targpp, *rpp, *hpp; 25457c478bd9Sstevel@tonic-gate page_t *replpp = NULL; 25467c478bd9Sstevel@tonic-gate page_t *pplist = NULL; 25477c478bd9Sstevel@tonic-gate 25487c478bd9Sstevel@tonic-gate ASSERT(pp != NULL); 25497c478bd9Sstevel@tonic-gate 25507c478bd9Sstevel@tonic-gate pgcnt = page_get_pagecnt(szc); 25517c478bd9Sstevel@tonic-gate while (pgcnt) { 25527c478bd9Sstevel@tonic-gate ASSERT(PAGE_EXCL(pp)); 25537c478bd9Sstevel@tonic-gate ASSERT(!PP_ISNORELOC(pp)); 25547c478bd9Sstevel@tonic-gate if (PP_ISFREE(pp)) { 25557c478bd9Sstevel@tonic-gate /* 25567c478bd9Sstevel@tonic-gate * If this is a PG_FREE_LIST page then its 25577c478bd9Sstevel@tonic-gate * size code can change underneath us due to 25587c478bd9Sstevel@tonic-gate * page promotion or demotion. As an optimzation 25597c478bd9Sstevel@tonic-gate * use page_list_sub_pages() instead of 25607c478bd9Sstevel@tonic-gate * page_list_sub(). 25617c478bd9Sstevel@tonic-gate */ 25627c478bd9Sstevel@tonic-gate if (PP_ISAGED(pp)) { 25637c478bd9Sstevel@tonic-gate page_list_sub_pages(pp, szc); 25647c478bd9Sstevel@tonic-gate if (pp->p_szc == szc) { 25657c478bd9Sstevel@tonic-gate return (pp); 25667c478bd9Sstevel@tonic-gate } 25677c478bd9Sstevel@tonic-gate ASSERT(pp->p_szc < szc); 25687c478bd9Sstevel@tonic-gate npgs = page_get_pagecnt(pp->p_szc); 25697c478bd9Sstevel@tonic-gate hpp = pp; 25707c478bd9Sstevel@tonic-gate for (i = 0; i < npgs; i++, pp++) { 25717c478bd9Sstevel@tonic-gate pp->p_szc = szc; 25727c478bd9Sstevel@tonic-gate } 25737c478bd9Sstevel@tonic-gate page_list_concat(&pplist, &hpp); 25747c478bd9Sstevel@tonic-gate pgcnt -= npgs; 25757c478bd9Sstevel@tonic-gate continue; 25767c478bd9Sstevel@tonic-gate } 25777c478bd9Sstevel@tonic-gate ASSERT(!PP_ISAGED(pp)); 25787c478bd9Sstevel@tonic-gate ASSERT(pp->p_szc == 0); 25797c478bd9Sstevel@tonic-gate page_list_sub(pp, PG_CACHE_LIST); 25807c478bd9Sstevel@tonic-gate page_hashout(pp, NULL); 25817c478bd9Sstevel@tonic-gate PP_SETAGED(pp); 25827c478bd9Sstevel@tonic-gate pp->p_szc = szc; 25837c478bd9Sstevel@tonic-gate page_list_concat(&pplist, &pp); 25847c478bd9Sstevel@tonic-gate pp++; 25857c478bd9Sstevel@tonic-gate pgcnt--; 25867c478bd9Sstevel@tonic-gate continue; 25877c478bd9Sstevel@tonic-gate } 25887c478bd9Sstevel@tonic-gate npgs = page_get_pagecnt(pp->p_szc); 25897c478bd9Sstevel@tonic-gate 25907c478bd9Sstevel@tonic-gate /* 25917c478bd9Sstevel@tonic-gate * page_create_wait freemem accounting done by caller of 25927c478bd9Sstevel@tonic-gate * page_get_freelist and not necessary to call it prior to 25937c478bd9Sstevel@tonic-gate * calling page_get_replacement_page. 25947c478bd9Sstevel@tonic-gate * 25957c478bd9Sstevel@tonic-gate * page_get_replacement_page can call page_get_contig_pages 25967c478bd9Sstevel@tonic-gate * to acquire a large page (szc > 0); the replacement must be 25977c478bd9Sstevel@tonic-gate * smaller than the contig page size to avoid looping or 25987c478bd9Sstevel@tonic-gate * szc == 0 and PGI_PGCPSZC0 is set. 25997c478bd9Sstevel@tonic-gate */ 26007c478bd9Sstevel@tonic-gate if (pp->p_szc < szc || (szc == 0 && (flags & PGI_PGCPSZC0))) { 26017c478bd9Sstevel@tonic-gate replpp = page_get_replacement_page(pp, NULL, 0); 26027c478bd9Sstevel@tonic-gate if (replpp) { 26037c478bd9Sstevel@tonic-gate npgs = page_get_pagecnt(pp->p_szc); 26047c478bd9Sstevel@tonic-gate ASSERT(npgs <= pgcnt); 26057c478bd9Sstevel@tonic-gate targpp = pp; 26067c478bd9Sstevel@tonic-gate } 26077c478bd9Sstevel@tonic-gate } 26087c478bd9Sstevel@tonic-gate 26097c478bd9Sstevel@tonic-gate /* 26107c478bd9Sstevel@tonic-gate * If replacement is NULL or do_page_relocate fails, fail 26117c478bd9Sstevel@tonic-gate * coalescing of pages. 26127c478bd9Sstevel@tonic-gate */ 26137c478bd9Sstevel@tonic-gate if (replpp == NULL || (do_page_relocate(&targpp, &replpp, 0, 26147c478bd9Sstevel@tonic-gate &npgs, NULL) != 0)) { 26157c478bd9Sstevel@tonic-gate /* 26167c478bd9Sstevel@tonic-gate * Unlock un-processed target list 26177c478bd9Sstevel@tonic-gate */ 26187c478bd9Sstevel@tonic-gate while (pgcnt--) { 26197c478bd9Sstevel@tonic-gate ASSERT(PAGE_EXCL(pp)); 26207c478bd9Sstevel@tonic-gate page_unlock(pp); 26217c478bd9Sstevel@tonic-gate pp++; 26227c478bd9Sstevel@tonic-gate } 26237c478bd9Sstevel@tonic-gate /* 26247c478bd9Sstevel@tonic-gate * Free the processed target list. 26257c478bd9Sstevel@tonic-gate */ 26267c478bd9Sstevel@tonic-gate while (pplist) { 26277c478bd9Sstevel@tonic-gate pp = pplist; 26287c478bd9Sstevel@tonic-gate page_sub(&pplist, pp); 26297c478bd9Sstevel@tonic-gate ASSERT(PAGE_EXCL(pp)); 26307c478bd9Sstevel@tonic-gate ASSERT(pp->p_szc == szc); 26317c478bd9Sstevel@tonic-gate ASSERT(PP_ISFREE(pp)); 26327c478bd9Sstevel@tonic-gate ASSERT(PP_ISAGED(pp)); 26337c478bd9Sstevel@tonic-gate pp->p_szc = 0; 26347c478bd9Sstevel@tonic-gate page_list_add(pp, PG_FREE_LIST | PG_LIST_TAIL); 26357c478bd9Sstevel@tonic-gate page_unlock(pp); 26367c478bd9Sstevel@tonic-gate } 26377c478bd9Sstevel@tonic-gate 26387c478bd9Sstevel@tonic-gate if (replpp != NULL) 26397c478bd9Sstevel@tonic-gate page_free_replacement_page(replpp); 26407c478bd9Sstevel@tonic-gate 26417c478bd9Sstevel@tonic-gate return (NULL); 26427c478bd9Sstevel@tonic-gate } 26437c478bd9Sstevel@tonic-gate ASSERT(pp == targpp); 26447c478bd9Sstevel@tonic-gate 26457c478bd9Sstevel@tonic-gate /* LINTED */ 26467c478bd9Sstevel@tonic-gate ASSERT(hpp = pp); /* That's right, it's an assignment */ 26477c478bd9Sstevel@tonic-gate 26487c478bd9Sstevel@tonic-gate pp += npgs; 26497c478bd9Sstevel@tonic-gate pgcnt -= npgs; 26507c478bd9Sstevel@tonic-gate 26517c478bd9Sstevel@tonic-gate while (npgs--) { 26527c478bd9Sstevel@tonic-gate ASSERT(PAGE_EXCL(targpp)); 26537c478bd9Sstevel@tonic-gate ASSERT(!PP_ISFREE(targpp)); 26547c478bd9Sstevel@tonic-gate ASSERT(!PP_ISNORELOC(targpp)); 26557c478bd9Sstevel@tonic-gate PP_SETFREE(targpp); 26567c478bd9Sstevel@tonic-gate ASSERT(PP_ISAGED(targpp)); 26577c478bd9Sstevel@tonic-gate ASSERT(targpp->p_szc < szc || (szc == 0 && 26587c478bd9Sstevel@tonic-gate (flags & PGI_PGCPSZC0))); 26597c478bd9Sstevel@tonic-gate targpp->p_szc = szc; 26607c478bd9Sstevel@tonic-gate targpp = targpp->p_next; 26617c478bd9Sstevel@tonic-gate 26627c478bd9Sstevel@tonic-gate rpp = replpp; 26637c478bd9Sstevel@tonic-gate ASSERT(rpp != NULL); 26647c478bd9Sstevel@tonic-gate page_sub(&replpp, rpp); 26657c478bd9Sstevel@tonic-gate ASSERT(PAGE_EXCL(rpp)); 26667c478bd9Sstevel@tonic-gate ASSERT(!PP_ISFREE(rpp)); 26677c478bd9Sstevel@tonic-gate page_unlock(rpp); 26687c478bd9Sstevel@tonic-gate } 26697c478bd9Sstevel@tonic-gate ASSERT(targpp == hpp); 26707c478bd9Sstevel@tonic-gate ASSERT(replpp == NULL); 26717c478bd9Sstevel@tonic-gate page_list_concat(&pplist, &targpp); 26727c478bd9Sstevel@tonic-gate } 26737c478bd9Sstevel@tonic-gate CHK_LPG(pplist, szc); 26747c478bd9Sstevel@tonic-gate return (pplist); 26757c478bd9Sstevel@tonic-gate } 26767c478bd9Sstevel@tonic-gate 26777c478bd9Sstevel@tonic-gate /* 26787c478bd9Sstevel@tonic-gate * Trim kernel cage from pfnlo-pfnhi and store result in lo-hi. Return code 26797c478bd9Sstevel@tonic-gate * of 0 means nothing left after trim. 26807c478bd9Sstevel@tonic-gate */ 26817c478bd9Sstevel@tonic-gate 26827c478bd9Sstevel@tonic-gate int 26837c478bd9Sstevel@tonic-gate trimkcage(struct memseg *mseg, pfn_t *lo, pfn_t *hi, pfn_t pfnlo, pfn_t pfnhi) 26847c478bd9Sstevel@tonic-gate { 26857c478bd9Sstevel@tonic-gate pfn_t kcagepfn; 26867c478bd9Sstevel@tonic-gate int decr; 26877c478bd9Sstevel@tonic-gate int rc = 0; 26887c478bd9Sstevel@tonic-gate 26897c478bd9Sstevel@tonic-gate if (PP_ISNORELOC(mseg->pages)) { 26907c478bd9Sstevel@tonic-gate if (PP_ISNORELOC(mseg->epages - 1) == 0) { 26917c478bd9Sstevel@tonic-gate 26927c478bd9Sstevel@tonic-gate /* lower part of this mseg inside kernel cage */ 26937c478bd9Sstevel@tonic-gate decr = kcage_current_pfn(&kcagepfn); 26947c478bd9Sstevel@tonic-gate 26957c478bd9Sstevel@tonic-gate /* kernel cage may have transitioned past mseg */ 26967c478bd9Sstevel@tonic-gate if (kcagepfn >= mseg->pages_base && 26977c478bd9Sstevel@tonic-gate kcagepfn < mseg->pages_end) { 26987c478bd9Sstevel@tonic-gate ASSERT(decr == 0); 26997c478bd9Sstevel@tonic-gate *lo = kcagepfn; 27007c478bd9Sstevel@tonic-gate *hi = MIN(pfnhi, 27017c478bd9Sstevel@tonic-gate (mseg->pages_end - 1)); 27027c478bd9Sstevel@tonic-gate rc = 1; 27037c478bd9Sstevel@tonic-gate } 27047c478bd9Sstevel@tonic-gate } 27057c478bd9Sstevel@tonic-gate /* else entire mseg in the cage */ 27067c478bd9Sstevel@tonic-gate } else { 27077c478bd9Sstevel@tonic-gate if (PP_ISNORELOC(mseg->epages - 1)) { 27087c478bd9Sstevel@tonic-gate 27097c478bd9Sstevel@tonic-gate /* upper part of this mseg inside kernel cage */ 27107c478bd9Sstevel@tonic-gate decr = kcage_current_pfn(&kcagepfn); 27117c478bd9Sstevel@tonic-gate 27127c478bd9Sstevel@tonic-gate /* kernel cage may have transitioned past mseg */ 27137c478bd9Sstevel@tonic-gate if (kcagepfn >= mseg->pages_base && 27147c478bd9Sstevel@tonic-gate kcagepfn < mseg->pages_end) { 27157c478bd9Sstevel@tonic-gate ASSERT(decr); 27167c478bd9Sstevel@tonic-gate *hi = kcagepfn; 27177c478bd9Sstevel@tonic-gate *lo = MAX(pfnlo, mseg->pages_base); 27187c478bd9Sstevel@tonic-gate rc = 1; 27197c478bd9Sstevel@tonic-gate } 27207c478bd9Sstevel@tonic-gate } else { 27217c478bd9Sstevel@tonic-gate /* entire mseg outside of kernel cage */ 27227c478bd9Sstevel@tonic-gate *lo = MAX(pfnlo, mseg->pages_base); 27237c478bd9Sstevel@tonic-gate *hi = MIN(pfnhi, (mseg->pages_end - 1)); 27247c478bd9Sstevel@tonic-gate rc = 1; 27257c478bd9Sstevel@tonic-gate } 27267c478bd9Sstevel@tonic-gate } 27277c478bd9Sstevel@tonic-gate return (rc); 27287c478bd9Sstevel@tonic-gate } 27297c478bd9Sstevel@tonic-gate 27307c478bd9Sstevel@tonic-gate /* 27317c478bd9Sstevel@tonic-gate * called from page_get_contig_pages to search 'pfnlo' thru 'pfnhi' to "claim" a 27327c478bd9Sstevel@tonic-gate * page with size code 'szc'. Claiming such a page requires acquiring 27337c478bd9Sstevel@tonic-gate * exclusive locks on all constituent pages (page_trylock_contig_pages), 27347c478bd9Sstevel@tonic-gate * relocating pages in use and concatenating these constituent pages into a 27357c478bd9Sstevel@tonic-gate * large page. 27367c478bd9Sstevel@tonic-gate * 27377c478bd9Sstevel@tonic-gate * The page lists do not have such a large page and page_freelist_fill has 27387c478bd9Sstevel@tonic-gate * already failed to demote larger pages and/or coalesce smaller free pages. 27397c478bd9Sstevel@tonic-gate * 27407c478bd9Sstevel@tonic-gate * 'flags' may specify PG_COLOR_MATCH which would limit the search of large 27417c478bd9Sstevel@tonic-gate * pages with the same color as 'bin'. 27427c478bd9Sstevel@tonic-gate * 27437c478bd9Sstevel@tonic-gate * 'pfnflag' specifies the subset of the pfn range to search. 27447c478bd9Sstevel@tonic-gate */ 27457c478bd9Sstevel@tonic-gate 27467c478bd9Sstevel@tonic-gate 27477c478bd9Sstevel@tonic-gate static page_t * 27487c478bd9Sstevel@tonic-gate page_geti_contig_pages(int mnode, uint_t bin, uchar_t szc, int flags, 27497c478bd9Sstevel@tonic-gate pfn_t pfnlo, pfn_t pfnhi, int pfnflag) 27507c478bd9Sstevel@tonic-gate { 27517c478bd9Sstevel@tonic-gate struct memseg *mseg; 27527c478bd9Sstevel@tonic-gate pgcnt_t szcpgcnt = page_get_pagecnt(szc); 27537c478bd9Sstevel@tonic-gate pgcnt_t szcpgmask = szcpgcnt - 1; 27547c478bd9Sstevel@tonic-gate pfn_t randpfn; 27557c478bd9Sstevel@tonic-gate page_t *pp, *randpp, *endpp; 27567c478bd9Sstevel@tonic-gate uint_t colors; 27577c478bd9Sstevel@tonic-gate pfn_t hi, lo; 27587c478bd9Sstevel@tonic-gate uint_t skip; 27597c478bd9Sstevel@tonic-gate 27607c478bd9Sstevel@tonic-gate ASSERT(szc != 0 || (flags & PGI_PGCPSZC0)); 27617c478bd9Sstevel@tonic-gate 27627c478bd9Sstevel@tonic-gate if ((pfnhi - pfnlo) + 1 < szcpgcnt) 27637c478bd9Sstevel@tonic-gate return (NULL); 27647c478bd9Sstevel@tonic-gate 27657c478bd9Sstevel@tonic-gate ASSERT(szc < mmu_page_sizes); 27667c478bd9Sstevel@tonic-gate 27677c478bd9Sstevel@tonic-gate colors = (szc) ? page_convert_color(0, szc, page_colors - 1) + 1 : 27687c478bd9Sstevel@tonic-gate page_colors; 27697c478bd9Sstevel@tonic-gate 27707c478bd9Sstevel@tonic-gate ASSERT(bin < colors); 27717c478bd9Sstevel@tonic-gate 27727c478bd9Sstevel@tonic-gate /* 27737c478bd9Sstevel@tonic-gate * trim the pfn range to search based on pfnflag. pfnflag is set 27747c478bd9Sstevel@tonic-gate * when there have been previous page_get_contig_page failures to 27757c478bd9Sstevel@tonic-gate * limit the search. 27767c478bd9Sstevel@tonic-gate * 27777c478bd9Sstevel@tonic-gate * The high bit in pfnflag specifies the number of 'slots' in the 27787c478bd9Sstevel@tonic-gate * pfn range and the remainder of pfnflag specifies which slot. 27797c478bd9Sstevel@tonic-gate * For example, a value of 1010b would mean the second slot of 27807c478bd9Sstevel@tonic-gate * the pfn range that has been divided into 8 slots. 27817c478bd9Sstevel@tonic-gate */ 27827c478bd9Sstevel@tonic-gate if (pfnflag > 1) { 27837c478bd9Sstevel@tonic-gate int slots = 1 << (highbit(pfnflag) - 1); 27847c478bd9Sstevel@tonic-gate int slotid = pfnflag & (slots - 1); 27857c478bd9Sstevel@tonic-gate pgcnt_t szcpages; 27867c478bd9Sstevel@tonic-gate int slotlen; 27877c478bd9Sstevel@tonic-gate 27887c478bd9Sstevel@tonic-gate pfnlo = P2ROUNDUP(pfnlo, szcpgcnt); 27897c478bd9Sstevel@tonic-gate pfnhi = pfnhi & ~(szcpgcnt - 1); 27907c478bd9Sstevel@tonic-gate 27917c478bd9Sstevel@tonic-gate szcpages = ((pfnhi - pfnlo) + 1) / szcpgcnt; 27927c478bd9Sstevel@tonic-gate slotlen = howmany(szcpages, slots); 27937c478bd9Sstevel@tonic-gate pfnlo = pfnlo + (((slotid * slotlen) % szcpages) * szcpgcnt); 27947c478bd9Sstevel@tonic-gate ASSERT(pfnlo < pfnhi); 27957c478bd9Sstevel@tonic-gate if (pfnhi > pfnlo + (slotlen * szcpgcnt)) 27967c478bd9Sstevel@tonic-gate pfnhi = pfnlo + (slotlen * szcpgcnt); 27977c478bd9Sstevel@tonic-gate } 27987c478bd9Sstevel@tonic-gate 27997c478bd9Sstevel@tonic-gate memsegs_lock(0); 28007c478bd9Sstevel@tonic-gate 28017c478bd9Sstevel@tonic-gate /* 28027c478bd9Sstevel@tonic-gate * loop through memsegs to look for contig page candidates 28037c478bd9Sstevel@tonic-gate */ 28047c478bd9Sstevel@tonic-gate 28057c478bd9Sstevel@tonic-gate for (mseg = memsegs; mseg != NULL; mseg = mseg->next) { 28067c478bd9Sstevel@tonic-gate if (pfnhi < mseg->pages_base || pfnlo >= mseg->pages_end) { 28077c478bd9Sstevel@tonic-gate /* no overlap */ 28087c478bd9Sstevel@tonic-gate continue; 28097c478bd9Sstevel@tonic-gate } 28107c478bd9Sstevel@tonic-gate 28117c478bd9Sstevel@tonic-gate if (mseg->pages_end - mseg->pages_base < szcpgcnt) 28127c478bd9Sstevel@tonic-gate /* mseg too small */ 28137c478bd9Sstevel@tonic-gate continue; 28147c478bd9Sstevel@tonic-gate 28157c478bd9Sstevel@tonic-gate /* trim off kernel cage pages from pfn range */ 28167c478bd9Sstevel@tonic-gate if (kcage_on) { 28177c478bd9Sstevel@tonic-gate if (trimkcage(mseg, &lo, &hi, pfnlo, pfnhi) == 0) 28187c478bd9Sstevel@tonic-gate continue; 28197c478bd9Sstevel@tonic-gate } else { 28207c478bd9Sstevel@tonic-gate lo = MAX(pfnlo, mseg->pages_base); 28217c478bd9Sstevel@tonic-gate hi = MIN(pfnhi, (mseg->pages_end - 1)); 28227c478bd9Sstevel@tonic-gate } 28237c478bd9Sstevel@tonic-gate 28247c478bd9Sstevel@tonic-gate /* round to szcpgcnt boundaries */ 28257c478bd9Sstevel@tonic-gate lo = P2ROUNDUP(lo, szcpgcnt); 28267c478bd9Sstevel@tonic-gate hi = hi & ~(szcpgcnt - 1); 28277c478bd9Sstevel@tonic-gate 28287c478bd9Sstevel@tonic-gate if (hi <= lo) 28297c478bd9Sstevel@tonic-gate continue; 28307c478bd9Sstevel@tonic-gate 28317c478bd9Sstevel@tonic-gate /* 28327c478bd9Sstevel@tonic-gate * set lo to point to the pfn for the desired bin. Large 28337c478bd9Sstevel@tonic-gate * page sizes may only have a single page color 28347c478bd9Sstevel@tonic-gate */ 28357c478bd9Sstevel@tonic-gate if ((colors > 1) && (flags & PG_MATCH_COLOR)) { 28367c478bd9Sstevel@tonic-gate uint_t lobin; 28377c478bd9Sstevel@tonic-gate 28387c478bd9Sstevel@tonic-gate /* 28397c478bd9Sstevel@tonic-gate * factor in colorequiv to check additional 28407c478bd9Sstevel@tonic-gate * 'equivalent' bins. 28417c478bd9Sstevel@tonic-gate */ 28427c478bd9Sstevel@tonic-gate if (colorequiv > 1 && colors > colorequiv) 28437c478bd9Sstevel@tonic-gate colors = colors / colorequiv; 28447c478bd9Sstevel@tonic-gate 28457c478bd9Sstevel@tonic-gate /* determine bin that lo currently points to */ 28467c478bd9Sstevel@tonic-gate lobin = (lo & ((szcpgcnt * colors) - 1)) / szcpgcnt; 28477c478bd9Sstevel@tonic-gate 28487c478bd9Sstevel@tonic-gate /* 28497c478bd9Sstevel@tonic-gate * set lo to point at appropriate color and set skip 28507c478bd9Sstevel@tonic-gate * to arrive at the next szc page of the same color. 28517c478bd9Sstevel@tonic-gate */ 28527c478bd9Sstevel@tonic-gate lo += ((bin - lobin) & (colors - 1)) * szcpgcnt; 28537c478bd9Sstevel@tonic-gate 28547c478bd9Sstevel@tonic-gate skip = colors * szcpgcnt; 28557c478bd9Sstevel@tonic-gate } else { 28567c478bd9Sstevel@tonic-gate /* check all pages starting from lo */ 28577c478bd9Sstevel@tonic-gate skip = szcpgcnt; 28587c478bd9Sstevel@tonic-gate } 28597c478bd9Sstevel@tonic-gate if (hi <= lo) 28607c478bd9Sstevel@tonic-gate /* mseg cannot satisfy color request */ 28617c478bd9Sstevel@tonic-gate continue; 28627c478bd9Sstevel@tonic-gate 28637c478bd9Sstevel@tonic-gate /* randomly choose a point between lo and hi to begin search */ 28647c478bd9Sstevel@tonic-gate 28657c478bd9Sstevel@tonic-gate randpfn = (pfn_t)GETTICK(); 28667c478bd9Sstevel@tonic-gate randpfn = ((randpfn % (hi - lo)) + lo) & ~(skip - 1); 28677c478bd9Sstevel@tonic-gate randpp = mseg->pages + (randpfn - mseg->pages_base); 28687c478bd9Sstevel@tonic-gate 28697c478bd9Sstevel@tonic-gate ASSERT(randpp->p_pagenum == randpfn); 28707c478bd9Sstevel@tonic-gate 28717c478bd9Sstevel@tonic-gate pp = randpp; 28727c478bd9Sstevel@tonic-gate endpp = mseg->pages + (hi - mseg->pages_base); 28737c478bd9Sstevel@tonic-gate 28747c478bd9Sstevel@tonic-gate ASSERT(randpp + szcpgcnt <= endpp); 28757c478bd9Sstevel@tonic-gate 28767c478bd9Sstevel@tonic-gate do { 28777c478bd9Sstevel@tonic-gate ASSERT(!(pp->p_pagenum & szcpgmask)); 28787c478bd9Sstevel@tonic-gate ASSERT((flags & PG_MATCH_COLOR) == 0 || 28797c478bd9Sstevel@tonic-gate colorequiv > 1 || 28807c478bd9Sstevel@tonic-gate PP_2_BIN(pp) == bin); 28817c478bd9Sstevel@tonic-gate if (page_trylock_contig_pages(mnode, pp, szc, flags)) { 28827c478bd9Sstevel@tonic-gate /* pages unlocked by page_claim on failure */ 28837c478bd9Sstevel@tonic-gate if (page_claim_contig_pages(pp, szc, flags)) { 28847c478bd9Sstevel@tonic-gate memsegs_unlock(0); 28857c478bd9Sstevel@tonic-gate return (pp); 28867c478bd9Sstevel@tonic-gate } 28877c478bd9Sstevel@tonic-gate } 28887c478bd9Sstevel@tonic-gate 28897c478bd9Sstevel@tonic-gate pp += skip; 28907c478bd9Sstevel@tonic-gate if (pp >= endpp) { 28917c478bd9Sstevel@tonic-gate /* start from the beginning */ 28927c478bd9Sstevel@tonic-gate pp = mseg->pages + (lo - mseg->pages_base); 28937c478bd9Sstevel@tonic-gate ASSERT(pp->p_pagenum == lo); 28947c478bd9Sstevel@tonic-gate ASSERT(pp + szcpgcnt <= endpp); 28957c478bd9Sstevel@tonic-gate } 28967c478bd9Sstevel@tonic-gate } while (pp != randpp); 28977c478bd9Sstevel@tonic-gate } 28987c478bd9Sstevel@tonic-gate memsegs_unlock(0); 28997c478bd9Sstevel@tonic-gate return (NULL); 29007c478bd9Sstevel@tonic-gate } 29017c478bd9Sstevel@tonic-gate 29027c478bd9Sstevel@tonic-gate 29037c478bd9Sstevel@tonic-gate /* 29047c478bd9Sstevel@tonic-gate * controlling routine that searches through physical memory in an attempt to 29057c478bd9Sstevel@tonic-gate * claim a large page based on the input parameters. 29067c478bd9Sstevel@tonic-gate * on the page free lists. 29077c478bd9Sstevel@tonic-gate * 29087c478bd9Sstevel@tonic-gate * calls page_geti_contig_pages with an initial pfn range from the mnode 29097c478bd9Sstevel@tonic-gate * and mtype. page_geti_contig_pages will trim off the parts of the pfn range 29107c478bd9Sstevel@tonic-gate * that overlaps with the kernel cage or does not match the requested page 29117c478bd9Sstevel@tonic-gate * color if PG_MATCH_COLOR is set. Since this search is very expensive, 29127c478bd9Sstevel@tonic-gate * page_geti_contig_pages may further limit the search range based on 29137c478bd9Sstevel@tonic-gate * previous failure counts (pgcpfailcnt[]). 29147c478bd9Sstevel@tonic-gate * 29157c478bd9Sstevel@tonic-gate * for PGI_PGCPSZC0 requests, page_get_contig_pages will relocate a base 29167c478bd9Sstevel@tonic-gate * pagesize page that satisfies mtype. 29177c478bd9Sstevel@tonic-gate */ 29187c478bd9Sstevel@tonic-gate page_t * 29197c478bd9Sstevel@tonic-gate page_get_contig_pages(int mnode, uint_t bin, int mtype, uchar_t szc, 29207c478bd9Sstevel@tonic-gate uint_t flags) 29217c478bd9Sstevel@tonic-gate { 29227c478bd9Sstevel@tonic-gate pfn_t pfnlo, pfnhi; /* contig pages pfn range */ 29237c478bd9Sstevel@tonic-gate page_t *pp; 29247c478bd9Sstevel@tonic-gate int pfnflag = 0; /* no limit on search if 0 */ 29257c478bd9Sstevel@tonic-gate 29267c478bd9Sstevel@tonic-gate VM_STAT_ADD(vmm_vmstats.pgcp_alloc[szc]); 29277c478bd9Sstevel@tonic-gate 29287c478bd9Sstevel@tonic-gate /* LINTED */ 29297c478bd9Sstevel@tonic-gate MTYPE_START(mnode, mtype, flags); 29307c478bd9Sstevel@tonic-gate if (mtype < 0) { /* mnode does not have memory in mtype range */ 29317c478bd9Sstevel@tonic-gate VM_STAT_ADD(vmm_vmstats.pgcp_allocempty[szc]); 29327c478bd9Sstevel@tonic-gate return (NULL); 29337c478bd9Sstevel@tonic-gate } 29347c478bd9Sstevel@tonic-gate 29357c478bd9Sstevel@tonic-gate ASSERT(szc > 0 || (flags & PGI_PGCPSZC0)); 29367c478bd9Sstevel@tonic-gate 29377c478bd9Sstevel@tonic-gate /* do not limit search and ignore color if hi pri */ 29387c478bd9Sstevel@tonic-gate 29397c478bd9Sstevel@tonic-gate if (pgcplimitsearch && ((flags & PGI_PGCPHIPRI) == 0)) 29407c478bd9Sstevel@tonic-gate pfnflag = pgcpfailcnt[szc]; 29417c478bd9Sstevel@tonic-gate 29427c478bd9Sstevel@tonic-gate /* remove color match to improve chances */ 29437c478bd9Sstevel@tonic-gate 29447c478bd9Sstevel@tonic-gate if (flags & PGI_PGCPHIPRI || pfnflag) 29457c478bd9Sstevel@tonic-gate flags &= ~PG_MATCH_COLOR; 29467c478bd9Sstevel@tonic-gate 29477c478bd9Sstevel@tonic-gate do { 29487c478bd9Sstevel@tonic-gate /* get pfn range based on mnode and mtype */ 29497c478bd9Sstevel@tonic-gate MNODETYPE_2_PFN(mnode, mtype, pfnlo, pfnhi); 29507c478bd9Sstevel@tonic-gate 29517c478bd9Sstevel@tonic-gate ASSERT(pfnhi >= pfnlo); 29527c478bd9Sstevel@tonic-gate 29537c478bd9Sstevel@tonic-gate pp = page_geti_contig_pages(mnode, bin, szc, flags, 29547c478bd9Sstevel@tonic-gate pfnlo, pfnhi, pfnflag); 29557c478bd9Sstevel@tonic-gate 29567c478bd9Sstevel@tonic-gate if (pp != NULL) { 29577c478bd9Sstevel@tonic-gate pfnflag = pgcpfailcnt[szc]; 29587c478bd9Sstevel@tonic-gate if (pfnflag) { 29597c478bd9Sstevel@tonic-gate /* double the search size */ 29607c478bd9Sstevel@tonic-gate pgcpfailcnt[szc] = pfnflag >> 1; 29617c478bd9Sstevel@tonic-gate } 29627c478bd9Sstevel@tonic-gate VM_STAT_ADD(vmm_vmstats.pgcp_allocok[szc]); 29637c478bd9Sstevel@tonic-gate return (pp); 29647c478bd9Sstevel@tonic-gate } 29657c478bd9Sstevel@tonic-gate /* LINTED */ 29667c478bd9Sstevel@tonic-gate } while ((flags & PGI_MT_RANGE) && 29677c478bd9Sstevel@tonic-gate (MTYPE_NEXT(mnode, mtype, flags) >= 0)); 29687c478bd9Sstevel@tonic-gate 29697c478bd9Sstevel@tonic-gate VM_STAT_ADD(vmm_vmstats.pgcp_allocfailed[szc]); 29707c478bd9Sstevel@tonic-gate return (NULL); 29717c478bd9Sstevel@tonic-gate } 29727c478bd9Sstevel@tonic-gate 29737c478bd9Sstevel@tonic-gate 29747c478bd9Sstevel@tonic-gate /* 29757c478bd9Sstevel@tonic-gate * Find the `best' page on the freelist for this (vp,off) (as,vaddr) pair. 29767c478bd9Sstevel@tonic-gate * 29777c478bd9Sstevel@tonic-gate * Does its own locking and accounting. 29787c478bd9Sstevel@tonic-gate * If PG_MATCH_COLOR is set, then NULL will be returned if there are no 29797c478bd9Sstevel@tonic-gate * pages of the proper color even if there are pages of a different color. 29807c478bd9Sstevel@tonic-gate * 29817c478bd9Sstevel@tonic-gate * Finds a page, removes it, THEN locks it. 29827c478bd9Sstevel@tonic-gate */ 29837c478bd9Sstevel@tonic-gate 29847c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 29857c478bd9Sstevel@tonic-gate page_t * 29867c478bd9Sstevel@tonic-gate page_get_freelist(struct vnode *vp, u_offset_t off, struct seg *seg, 29877c478bd9Sstevel@tonic-gate caddr_t vaddr, size_t size, uint_t flags, struct lgrp *lgrp) 29887c478bd9Sstevel@tonic-gate { 29897c478bd9Sstevel@tonic-gate struct as *as = seg->s_as; 29907c478bd9Sstevel@tonic-gate page_t *pp = NULL; 29917c478bd9Sstevel@tonic-gate ulong_t bin; 29927c478bd9Sstevel@tonic-gate uchar_t szc; 29937c478bd9Sstevel@tonic-gate int mnode; 29947c478bd9Sstevel@tonic-gate int mtype; 29957c478bd9Sstevel@tonic-gate page_t *(*page_get_func)(int, uint_t, int, uchar_t, uint_t); 29967c478bd9Sstevel@tonic-gate lgrp_mnode_cookie_t lgrp_cookie; 29977c478bd9Sstevel@tonic-gate 29987c478bd9Sstevel@tonic-gate page_get_func = page_get_mnode_freelist; 29997c478bd9Sstevel@tonic-gate 30007c478bd9Sstevel@tonic-gate /* 30017c478bd9Sstevel@tonic-gate * If we aren't passed a specific lgroup, or passed a freed lgrp 30027c478bd9Sstevel@tonic-gate * assume we wish to allocate near to the current thread's home. 30037c478bd9Sstevel@tonic-gate */ 30047c478bd9Sstevel@tonic-gate if (!LGRP_EXISTS(lgrp)) 30057c478bd9Sstevel@tonic-gate lgrp = lgrp_home_lgrp(); 30067c478bd9Sstevel@tonic-gate 30077c478bd9Sstevel@tonic-gate if (kcage_on) { 30087c478bd9Sstevel@tonic-gate if ((flags & (PG_NORELOC | PG_PANIC)) == PG_NORELOC && 30097c478bd9Sstevel@tonic-gate kcage_freemem < kcage_throttlefree + btop(size) && 30107c478bd9Sstevel@tonic-gate curthread != kcage_cageout_thread) { 30117c478bd9Sstevel@tonic-gate /* 30127c478bd9Sstevel@tonic-gate * Set a "reserve" of kcage_throttlefree pages for 30137c478bd9Sstevel@tonic-gate * PG_PANIC and cageout thread allocations. 30147c478bd9Sstevel@tonic-gate * 30157c478bd9Sstevel@tonic-gate * Everybody else has to serialize in 30167c478bd9Sstevel@tonic-gate * page_create_get_something() to get a cage page, so 30177c478bd9Sstevel@tonic-gate * that we don't deadlock cageout! 30187c478bd9Sstevel@tonic-gate */ 30197c478bd9Sstevel@tonic-gate return (NULL); 30207c478bd9Sstevel@tonic-gate } 30217c478bd9Sstevel@tonic-gate } else { 30227c478bd9Sstevel@tonic-gate flags &= ~PG_NORELOC; 30237c478bd9Sstevel@tonic-gate flags |= PGI_NOCAGE; 30247c478bd9Sstevel@tonic-gate } 30257c478bd9Sstevel@tonic-gate 30267c478bd9Sstevel@tonic-gate /* LINTED */ 30277c478bd9Sstevel@tonic-gate MTYPE_INIT(mtype, vp, vaddr, flags); 30287c478bd9Sstevel@tonic-gate 30297c478bd9Sstevel@tonic-gate /* 30307c478bd9Sstevel@tonic-gate * Convert size to page size code. 30317c478bd9Sstevel@tonic-gate */ 30327c478bd9Sstevel@tonic-gate if ((szc = page_szc(size)) == (uchar_t)-1) 30337c478bd9Sstevel@tonic-gate panic("page_get_freelist: illegal page size request"); 30347c478bd9Sstevel@tonic-gate ASSERT(szc < mmu_page_sizes); 30357c478bd9Sstevel@tonic-gate 30367c478bd9Sstevel@tonic-gate VM_STAT_ADD(vmm_vmstats.pgf_alloc[szc]); 30377c478bd9Sstevel@tonic-gate 30387c478bd9Sstevel@tonic-gate /* LINTED */ 30397c478bd9Sstevel@tonic-gate AS_2_BIN(as, seg, vp, vaddr, bin); 30407c478bd9Sstevel@tonic-gate 30417c478bd9Sstevel@tonic-gate /* bin is for base pagesize color - convert if larger pagesize. */ 30427c478bd9Sstevel@tonic-gate if (szc) 30437c478bd9Sstevel@tonic-gate bin = page_convert_color(0, szc, bin); 30447c478bd9Sstevel@tonic-gate 30457c478bd9Sstevel@tonic-gate /* 30467c478bd9Sstevel@tonic-gate * Try to get a local page first, but try remote if we can't 30477c478bd9Sstevel@tonic-gate * get a page of the right color. 30487c478bd9Sstevel@tonic-gate */ 30497c478bd9Sstevel@tonic-gate pgretry: 30507c478bd9Sstevel@tonic-gate LGRP_MNODE_COOKIE_INIT(lgrp_cookie, lgrp, LGRP_SRCH_LOCAL); 30517c478bd9Sstevel@tonic-gate while ((mnode = lgrp_memnode_choose(&lgrp_cookie)) >= 0) { 30527c478bd9Sstevel@tonic-gate pp = page_get_func(mnode, bin, mtype, szc, flags); 30537c478bd9Sstevel@tonic-gate if (pp != NULL) { 30547c478bd9Sstevel@tonic-gate VM_STAT_ADD(vmm_vmstats.pgf_allocok[szc]); 30557c478bd9Sstevel@tonic-gate DTRACE_PROBE4(page__get, 30567c478bd9Sstevel@tonic-gate lgrp_t *, lgrp, 30577c478bd9Sstevel@tonic-gate int, mnode, 30587c478bd9Sstevel@tonic-gate ulong_t, bin, 30597c478bd9Sstevel@tonic-gate uint_t, flags); 30607c478bd9Sstevel@tonic-gate return (pp); 30617c478bd9Sstevel@tonic-gate } 30627c478bd9Sstevel@tonic-gate } 30637c478bd9Sstevel@tonic-gate ASSERT(pp == NULL); 30647c478bd9Sstevel@tonic-gate 30657c478bd9Sstevel@tonic-gate /* 30667c478bd9Sstevel@tonic-gate * for non-SZC0 PAGESIZE requests, check cachelist before checking 30677c478bd9Sstevel@tonic-gate * remote free lists. Caller expected to call page_get_cachelist which 30687c478bd9Sstevel@tonic-gate * will check local cache lists and remote free lists. 30697c478bd9Sstevel@tonic-gate */ 30707c478bd9Sstevel@tonic-gate if (szc == 0 && ((flags & PGI_PGCPSZC0) == 0)) { 30717c478bd9Sstevel@tonic-gate VM_STAT_ADD(vmm_vmstats.pgf_allocdeferred); 30727c478bd9Sstevel@tonic-gate return (NULL); 30737c478bd9Sstevel@tonic-gate } 30747c478bd9Sstevel@tonic-gate 30757c478bd9Sstevel@tonic-gate ASSERT(szc > 0 || (flags & PGI_PGCPSZC0)); 30767c478bd9Sstevel@tonic-gate 30777c478bd9Sstevel@tonic-gate lgrp_stat_add(lgrp->lgrp_id, LGRP_NUM_ALLOC_FAIL, 1); 30787c478bd9Sstevel@tonic-gate 30797c478bd9Sstevel@tonic-gate /* 30807c478bd9Sstevel@tonic-gate * Try to get a non-local freelist page. 30817c478bd9Sstevel@tonic-gate */ 30827c478bd9Sstevel@tonic-gate LGRP_MNODE_COOKIE_UPGRADE(lgrp_cookie); 30837c478bd9Sstevel@tonic-gate while ((mnode = lgrp_memnode_choose(&lgrp_cookie)) >= 0) { 30847c478bd9Sstevel@tonic-gate pp = page_get_func(mnode, bin, mtype, szc, flags); 30857c478bd9Sstevel@tonic-gate if (pp != NULL) { 30867c478bd9Sstevel@tonic-gate DTRACE_PROBE4(page__get, 30877c478bd9Sstevel@tonic-gate lgrp_t *, lgrp, 30887c478bd9Sstevel@tonic-gate int, mnode, 30897c478bd9Sstevel@tonic-gate ulong_t, bin, 30907c478bd9Sstevel@tonic-gate uint_t, flags); 30917c478bd9Sstevel@tonic-gate VM_STAT_ADD(vmm_vmstats.pgf_allocokrem[szc]); 30927c478bd9Sstevel@tonic-gate return (pp); 30937c478bd9Sstevel@tonic-gate } 30947c478bd9Sstevel@tonic-gate } 30957c478bd9Sstevel@tonic-gate 30967c478bd9Sstevel@tonic-gate ASSERT(pp == NULL); 30977c478bd9Sstevel@tonic-gate 30987c478bd9Sstevel@tonic-gate /* 30997c478bd9Sstevel@tonic-gate * when the cage is off chances are page_get_contig_pages() will fail 31007c478bd9Sstevel@tonic-gate * to lock a large page chunk therefore when the cage is off it's not 31017c478bd9Sstevel@tonic-gate * called by default. this can be changed via /etc/system. 31027c478bd9Sstevel@tonic-gate * 31037c478bd9Sstevel@tonic-gate * page_get_contig_pages() also called to acquire a base pagesize page 31047c478bd9Sstevel@tonic-gate * for page_create_get_something(). 31057c478bd9Sstevel@tonic-gate */ 31067c478bd9Sstevel@tonic-gate if (!(flags & PG_NORELOC) && (pg_contig_disable == 0) && 31077c478bd9Sstevel@tonic-gate (kcage_on || pg_lpgcreate_nocage || szc == 0) && 31087c478bd9Sstevel@tonic-gate (page_get_func != page_get_contig_pages)) { 31097c478bd9Sstevel@tonic-gate 31107c478bd9Sstevel@tonic-gate VM_STAT_ADD(vmm_vmstats.pgf_allocretry[szc]); 31117c478bd9Sstevel@tonic-gate page_get_func = page_get_contig_pages; 31127c478bd9Sstevel@tonic-gate goto pgretry; 31137c478bd9Sstevel@tonic-gate } 31147c478bd9Sstevel@tonic-gate 31157c478bd9Sstevel@tonic-gate if (pgcplimitsearch && page_get_func == page_get_contig_pages) 31167c478bd9Sstevel@tonic-gate pgcpfailcnt[szc]++; 31177c478bd9Sstevel@tonic-gate 31187c478bd9Sstevel@tonic-gate VM_STAT_ADD(vmm_vmstats.pgf_allocfailed[szc]); 31197c478bd9Sstevel@tonic-gate return (NULL); 31207c478bd9Sstevel@tonic-gate } 31217c478bd9Sstevel@tonic-gate 31227c478bd9Sstevel@tonic-gate /* 31237c478bd9Sstevel@tonic-gate * Find the `best' page on the cachelist for this (vp,off) (as,vaddr) pair. 31247c478bd9Sstevel@tonic-gate * 31257c478bd9Sstevel@tonic-gate * Does its own locking. 31267c478bd9Sstevel@tonic-gate * If PG_MATCH_COLOR is set, then NULL will be returned if there are no 31277c478bd9Sstevel@tonic-gate * pages of the proper color even if there are pages of a different color. 31287c478bd9Sstevel@tonic-gate * Otherwise, scan the bins for ones with pages. For each bin with pages, 31297c478bd9Sstevel@tonic-gate * try to lock one of them. If no page can be locked, try the 31307c478bd9Sstevel@tonic-gate * next bin. Return NULL if a page can not be found and locked. 31317c478bd9Sstevel@tonic-gate * 31327c478bd9Sstevel@tonic-gate * Finds a pages, trys to lock it, then removes it. 31337c478bd9Sstevel@tonic-gate */ 31347c478bd9Sstevel@tonic-gate 31357c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 31367c478bd9Sstevel@tonic-gate page_t * 31377c478bd9Sstevel@tonic-gate page_get_cachelist(struct vnode *vp, u_offset_t off, struct seg *seg, 31387c478bd9Sstevel@tonic-gate caddr_t vaddr, uint_t flags, struct lgrp *lgrp) 31397c478bd9Sstevel@tonic-gate { 31407c478bd9Sstevel@tonic-gate page_t *pp; 31417c478bd9Sstevel@tonic-gate struct as *as = seg->s_as; 31427c478bd9Sstevel@tonic-gate ulong_t bin; 31437c478bd9Sstevel@tonic-gate /*LINTED*/ 31447c478bd9Sstevel@tonic-gate int mnode; 31457c478bd9Sstevel@tonic-gate int mtype; 31467c478bd9Sstevel@tonic-gate lgrp_mnode_cookie_t lgrp_cookie; 31477c478bd9Sstevel@tonic-gate 31487c478bd9Sstevel@tonic-gate /* 31497c478bd9Sstevel@tonic-gate * If we aren't passed a specific lgroup, or pasased a freed lgrp 31507c478bd9Sstevel@tonic-gate * assume we wish to allocate near to the current thread's home. 31517c478bd9Sstevel@tonic-gate */ 31527c478bd9Sstevel@tonic-gate if (!LGRP_EXISTS(lgrp)) 31537c478bd9Sstevel@tonic-gate lgrp = lgrp_home_lgrp(); 31547c478bd9Sstevel@tonic-gate 31557c478bd9Sstevel@tonic-gate if (!kcage_on) { 31567c478bd9Sstevel@tonic-gate flags &= ~PG_NORELOC; 31577c478bd9Sstevel@tonic-gate flags |= PGI_NOCAGE; 31587c478bd9Sstevel@tonic-gate } 31597c478bd9Sstevel@tonic-gate 31607c478bd9Sstevel@tonic-gate if ((flags & (PG_NORELOC | PG_PANIC | PG_PUSHPAGE)) == PG_NORELOC && 31617c478bd9Sstevel@tonic-gate kcage_freemem <= kcage_throttlefree) { 31627c478bd9Sstevel@tonic-gate /* 31637c478bd9Sstevel@tonic-gate * Reserve kcage_throttlefree pages for critical kernel 31647c478bd9Sstevel@tonic-gate * threads. 31657c478bd9Sstevel@tonic-gate * 31667c478bd9Sstevel@tonic-gate * Everybody else has to go to page_create_get_something() 31677c478bd9Sstevel@tonic-gate * to get a cage page, so we don't deadlock cageout. 31687c478bd9Sstevel@tonic-gate */ 31697c478bd9Sstevel@tonic-gate return (NULL); 31707c478bd9Sstevel@tonic-gate } 31717c478bd9Sstevel@tonic-gate 31727c478bd9Sstevel@tonic-gate /* LINTED */ 31737c478bd9Sstevel@tonic-gate AS_2_BIN(as, seg, vp, vaddr, bin); 31747c478bd9Sstevel@tonic-gate 31757c478bd9Sstevel@tonic-gate ASSERT(bin <= page_colors_mask); 31767c478bd9Sstevel@tonic-gate 31777c478bd9Sstevel@tonic-gate /* LINTED */ 31787c478bd9Sstevel@tonic-gate MTYPE_INIT(mtype, vp, vaddr, flags); 31797c478bd9Sstevel@tonic-gate 31807c478bd9Sstevel@tonic-gate VM_STAT_ADD(vmm_vmstats.pgc_alloc); 31817c478bd9Sstevel@tonic-gate 31827c478bd9Sstevel@tonic-gate /* 31837c478bd9Sstevel@tonic-gate * Try local cachelists first 31847c478bd9Sstevel@tonic-gate */ 31857c478bd9Sstevel@tonic-gate LGRP_MNODE_COOKIE_INIT(lgrp_cookie, lgrp, LGRP_SRCH_LOCAL); 31867c478bd9Sstevel@tonic-gate while ((mnode = lgrp_memnode_choose(&lgrp_cookie)) >= 0) { 31877c478bd9Sstevel@tonic-gate pp = page_get_mnode_cachelist(bin, flags, mnode, mtype); 31887c478bd9Sstevel@tonic-gate if (pp != NULL) { 31897c478bd9Sstevel@tonic-gate VM_STAT_ADD(vmm_vmstats.pgc_allocok); 31907c478bd9Sstevel@tonic-gate DTRACE_PROBE4(page__get, 31917c478bd9Sstevel@tonic-gate lgrp_t *, lgrp, 31927c478bd9Sstevel@tonic-gate int, mnode, 31937c478bd9Sstevel@tonic-gate ulong_t, bin, 31947c478bd9Sstevel@tonic-gate uint_t, flags); 31957c478bd9Sstevel@tonic-gate return (pp); 31967c478bd9Sstevel@tonic-gate } 31977c478bd9Sstevel@tonic-gate } 31987c478bd9Sstevel@tonic-gate 31997c478bd9Sstevel@tonic-gate lgrp_stat_add(lgrp->lgrp_id, LGRP_NUM_ALLOC_FAIL, 1); 32007c478bd9Sstevel@tonic-gate 32017c478bd9Sstevel@tonic-gate /* 32027c478bd9Sstevel@tonic-gate * Try freelists/cachelists that are farther away 32037c478bd9Sstevel@tonic-gate * This is our only chance to allocate remote pages for PAGESIZE 32047c478bd9Sstevel@tonic-gate * requests. 32057c478bd9Sstevel@tonic-gate */ 32067c478bd9Sstevel@tonic-gate LGRP_MNODE_COOKIE_UPGRADE(lgrp_cookie); 32077c478bd9Sstevel@tonic-gate while ((mnode = lgrp_memnode_choose(&lgrp_cookie)) >= 0) { 32087c478bd9Sstevel@tonic-gate pp = page_get_mnode_freelist(mnode, bin, mtype, 32097c478bd9Sstevel@tonic-gate 0, flags); 32107c478bd9Sstevel@tonic-gate if (pp != NULL) { 32117c478bd9Sstevel@tonic-gate VM_STAT_ADD(vmm_vmstats.pgc_allocokdeferred); 32127c478bd9Sstevel@tonic-gate DTRACE_PROBE4(page__get, 32137c478bd9Sstevel@tonic-gate lgrp_t *, lgrp, 32147c478bd9Sstevel@tonic-gate int, mnode, 32157c478bd9Sstevel@tonic-gate ulong_t, bin, 32167c478bd9Sstevel@tonic-gate uint_t, flags); 32177c478bd9Sstevel@tonic-gate return (pp); 32187c478bd9Sstevel@tonic-gate } 32197c478bd9Sstevel@tonic-gate pp = page_get_mnode_cachelist(bin, flags, mnode, mtype); 32207c478bd9Sstevel@tonic-gate if (pp != NULL) { 32217c478bd9Sstevel@tonic-gate VM_STAT_ADD(vmm_vmstats.pgc_allocokrem); 32227c478bd9Sstevel@tonic-gate DTRACE_PROBE4(page__get, 32237c478bd9Sstevel@tonic-gate lgrp_t *, lgrp, 32247c478bd9Sstevel@tonic-gate int, mnode, 32257c478bd9Sstevel@tonic-gate ulong_t, bin, 32267c478bd9Sstevel@tonic-gate uint_t, flags); 32277c478bd9Sstevel@tonic-gate return (pp); 32287c478bd9Sstevel@tonic-gate } 32297c478bd9Sstevel@tonic-gate } 32307c478bd9Sstevel@tonic-gate 32317c478bd9Sstevel@tonic-gate VM_STAT_ADD(vmm_vmstats.pgc_allocfailed); 32327c478bd9Sstevel@tonic-gate return (NULL); 32337c478bd9Sstevel@tonic-gate } 32347c478bd9Sstevel@tonic-gate 32357c478bd9Sstevel@tonic-gate page_t * 32367c478bd9Sstevel@tonic-gate page_get_mnode_cachelist(uint_t bin, uint_t flags, int mnode, int mtype) 32377c478bd9Sstevel@tonic-gate { 32387c478bd9Sstevel@tonic-gate kmutex_t *pcm; 32397c478bd9Sstevel@tonic-gate int i; 32407c478bd9Sstevel@tonic-gate page_t *pp; 32417c478bd9Sstevel@tonic-gate page_t *first_pp; 32427c478bd9Sstevel@tonic-gate uint_t bin_marker; 32437c478bd9Sstevel@tonic-gate int nwaybins, nwaycnt; 32447c478bd9Sstevel@tonic-gate int cpucolors; 32457c478bd9Sstevel@tonic-gate 32467c478bd9Sstevel@tonic-gate VM_STAT_ADD(vmm_vmstats.pgmc_alloc); 32477c478bd9Sstevel@tonic-gate 32487c478bd9Sstevel@tonic-gate /* LINTED */ 32497c478bd9Sstevel@tonic-gate MTYPE_START(mnode, mtype, flags); 32507c478bd9Sstevel@tonic-gate if (mtype < 0) { /* mnode does not have memory in mtype range */ 32517c478bd9Sstevel@tonic-gate VM_STAT_ADD(vmm_vmstats.pgmc_allocempty); 32527c478bd9Sstevel@tonic-gate return (NULL); 32537c478bd9Sstevel@tonic-gate } 32547c478bd9Sstevel@tonic-gate 32557c478bd9Sstevel@tonic-gate nwaybins = 0; 32567c478bd9Sstevel@tonic-gate cpucolors = cpu_page_colors; 32577c478bd9Sstevel@tonic-gate /* 32587c478bd9Sstevel@tonic-gate * adjust cpucolors to possibly check additional 'equivalent' bins 32597c478bd9Sstevel@tonic-gate * to try to minimize fragmentation of large pages by delaying calls 32607c478bd9Sstevel@tonic-gate * to page_freelist_fill. 32617c478bd9Sstevel@tonic-gate */ 32627c478bd9Sstevel@tonic-gate if (colorequiv > 1) { 32637c478bd9Sstevel@tonic-gate int equivcolors = page_colors / colorequiv; 32647c478bd9Sstevel@tonic-gate 32657c478bd9Sstevel@tonic-gate if (equivcolors && (cpucolors == 0 || equivcolors < cpucolors)) 32667c478bd9Sstevel@tonic-gate cpucolors = equivcolors; 32677c478bd9Sstevel@tonic-gate } 32687c478bd9Sstevel@tonic-gate 32697c478bd9Sstevel@tonic-gate /* 32707c478bd9Sstevel@tonic-gate * Only hold one cachelist lock at a time, that way we 32717c478bd9Sstevel@tonic-gate * can start anywhere and not have to worry about lock 32727c478bd9Sstevel@tonic-gate * ordering. 32737c478bd9Sstevel@tonic-gate */ 32747c478bd9Sstevel@tonic-gate 32757c478bd9Sstevel@tonic-gate big_try_again: 32767c478bd9Sstevel@tonic-gate nwaycnt = 0; 32777c478bd9Sstevel@tonic-gate for (i = 0; i <= page_colors; i++) { 32787c478bd9Sstevel@tonic-gate if (PAGE_CACHELISTS(mnode, bin, mtype)) { 32797c478bd9Sstevel@tonic-gate pcm = PC_BIN_MUTEX(mnode, bin, PG_CACHE_LIST); 32807c478bd9Sstevel@tonic-gate mutex_enter(pcm); 32817c478bd9Sstevel@tonic-gate pp = PAGE_CACHELISTS(mnode, bin, mtype); 32827c478bd9Sstevel@tonic-gate if (pp != NULL) { 32837c478bd9Sstevel@tonic-gate first_pp = pp; 32847c478bd9Sstevel@tonic-gate ASSERT(pp->p_vnode); 32857c478bd9Sstevel@tonic-gate ASSERT(PP_ISAGED(pp) == 0); 32867c478bd9Sstevel@tonic-gate ASSERT(pp->p_szc == 0); 32877c478bd9Sstevel@tonic-gate ASSERT(PFN_2_MEM_NODE(pp->p_pagenum) == mnode); 32887c478bd9Sstevel@tonic-gate while (!page_trylock(pp, SE_EXCL)) { 32897c478bd9Sstevel@tonic-gate pp = pp->p_next; 32907c478bd9Sstevel@tonic-gate ASSERT(pp->p_szc == 0); 32917c478bd9Sstevel@tonic-gate if (pp == first_pp) { 32927c478bd9Sstevel@tonic-gate /* 32937c478bd9Sstevel@tonic-gate * We have searched the 32947c478bd9Sstevel@tonic-gate * complete list! 32957c478bd9Sstevel@tonic-gate * And all of them (might 32967c478bd9Sstevel@tonic-gate * only be one) are locked. 32977c478bd9Sstevel@tonic-gate * This can happen since 32987c478bd9Sstevel@tonic-gate * these pages can also be 32997c478bd9Sstevel@tonic-gate * found via the hash list. 33007c478bd9Sstevel@tonic-gate * When found via the hash 33017c478bd9Sstevel@tonic-gate * list, they are locked 33027c478bd9Sstevel@tonic-gate * first, then removed. 33037c478bd9Sstevel@tonic-gate * We give up to let the 33047c478bd9Sstevel@tonic-gate * other thread run. 33057c478bd9Sstevel@tonic-gate */ 33067c478bd9Sstevel@tonic-gate pp = NULL; 33077c478bd9Sstevel@tonic-gate break; 33087c478bd9Sstevel@tonic-gate } 33097c478bd9Sstevel@tonic-gate ASSERT(pp->p_vnode); 33107c478bd9Sstevel@tonic-gate ASSERT(PP_ISFREE(pp)); 33117c478bd9Sstevel@tonic-gate ASSERT(PP_ISAGED(pp) == 0); 33127c478bd9Sstevel@tonic-gate ASSERT(PFN_2_MEM_NODE(pp->p_pagenum) == 33137c478bd9Sstevel@tonic-gate mnode); 33147c478bd9Sstevel@tonic-gate } 33157c478bd9Sstevel@tonic-gate 33167c478bd9Sstevel@tonic-gate if (pp) { 33177c478bd9Sstevel@tonic-gate page_t **ppp; 33187c478bd9Sstevel@tonic-gate /* 33197c478bd9Sstevel@tonic-gate * Found and locked a page. 33207c478bd9Sstevel@tonic-gate * Pull it off the list. 33217c478bd9Sstevel@tonic-gate */ 33227c478bd9Sstevel@tonic-gate ASSERT(mtype == PP_2_MTYPE(pp)); 33237c478bd9Sstevel@tonic-gate ppp = &PAGE_CACHELISTS(mnode, bin, 33247c478bd9Sstevel@tonic-gate mtype); 33257c478bd9Sstevel@tonic-gate page_sub(ppp, pp); 33267c478bd9Sstevel@tonic-gate /* 33277c478bd9Sstevel@tonic-gate * Subtract counters before releasing 33287c478bd9Sstevel@tonic-gate * pcm mutex to avoid a race with 33297c478bd9Sstevel@tonic-gate * page_freelist_coalesce and 33307c478bd9Sstevel@tonic-gate * page_freelist_fill. 33317c478bd9Sstevel@tonic-gate */ 33327c478bd9Sstevel@tonic-gate page_ctr_sub(pp, PG_CACHE_LIST); 33337c478bd9Sstevel@tonic-gate mutex_exit(pcm); 33347c478bd9Sstevel@tonic-gate ASSERT(pp->p_vnode); 33357c478bd9Sstevel@tonic-gate ASSERT(PP_ISAGED(pp) == 0); 33367c478bd9Sstevel@tonic-gate #if defined(__sparc) 33377c478bd9Sstevel@tonic-gate ASSERT(!kcage_on || 33387c478bd9Sstevel@tonic-gate (flags & PG_NORELOC) == 0 || 33397c478bd9Sstevel@tonic-gate PP_ISNORELOC(pp)); 33407c478bd9Sstevel@tonic-gate if (PP_ISNORELOC(pp)) { 33417c478bd9Sstevel@tonic-gate kcage_freemem_sub(1); 33427c478bd9Sstevel@tonic-gate } 33437c478bd9Sstevel@tonic-gate #endif 33447c478bd9Sstevel@tonic-gate VM_STAT_ADD(vmm_vmstats. 33457c478bd9Sstevel@tonic-gate pgmc_allocok); 33467c478bd9Sstevel@tonic-gate return (pp); 33477c478bd9Sstevel@tonic-gate } 33487c478bd9Sstevel@tonic-gate } 33497c478bd9Sstevel@tonic-gate mutex_exit(pcm); 33507c478bd9Sstevel@tonic-gate } 33517c478bd9Sstevel@tonic-gate 33527c478bd9Sstevel@tonic-gate /* 33537c478bd9Sstevel@tonic-gate * Wow! The initial bin is empty or no page in the bin could 33547c478bd9Sstevel@tonic-gate * be locked. 33557c478bd9Sstevel@tonic-gate * 33567c478bd9Sstevel@tonic-gate * If specific color is needed, check if page color may be in 33577c478bd9Sstevel@tonic-gate * other bins. 33587c478bd9Sstevel@tonic-gate */ 33597c478bd9Sstevel@tonic-gate if ((flags & PG_MATCH_COLOR) && (cpucolors != 0)) { 33607c478bd9Sstevel@tonic-gate if (!nwaybins) { 33617c478bd9Sstevel@tonic-gate if (cpucolors < 0) { 33627c478bd9Sstevel@tonic-gate cpucolors = CPUSETSIZE() / MMU_PAGESIZE; 33637c478bd9Sstevel@tonic-gate ASSERT(cpucolors > 0); 33647c478bd9Sstevel@tonic-gate nwaybins = page_colors / cpucolors; 33657c478bd9Sstevel@tonic-gate if (nwaybins < 2) 33667c478bd9Sstevel@tonic-gate cpucolors = 0; 33677c478bd9Sstevel@tonic-gate } else { 33687c478bd9Sstevel@tonic-gate nwaybins = page_colors / cpucolors; 33697c478bd9Sstevel@tonic-gate ASSERT(nwaybins > 1); 33707c478bd9Sstevel@tonic-gate } 33717c478bd9Sstevel@tonic-gate } 33727c478bd9Sstevel@tonic-gate 33737c478bd9Sstevel@tonic-gate if (++nwaycnt >= nwaybins) { 33747c478bd9Sstevel@tonic-gate break; 33757c478bd9Sstevel@tonic-gate } 33767c478bd9Sstevel@tonic-gate bin = (bin + (page_colors / nwaybins)) & 33777c478bd9Sstevel@tonic-gate page_colors_mask; 33787c478bd9Sstevel@tonic-gate continue; 33797c478bd9Sstevel@tonic-gate } 33807c478bd9Sstevel@tonic-gate 33817c478bd9Sstevel@tonic-gate if (i == 0) { 33827c478bd9Sstevel@tonic-gate bin = (bin + BIN_STEP) & page_colors_mask; 33837c478bd9Sstevel@tonic-gate bin_marker = bin; 33847c478bd9Sstevel@tonic-gate } else { 33857c478bd9Sstevel@tonic-gate bin = (bin + vac_colors) & page_colors_mask; 33867c478bd9Sstevel@tonic-gate if (bin == bin_marker) { 33877c478bd9Sstevel@tonic-gate bin = (bin + 1) & page_colors_mask; 33887c478bd9Sstevel@tonic-gate bin_marker = bin; 33897c478bd9Sstevel@tonic-gate } 33907c478bd9Sstevel@tonic-gate } 33917c478bd9Sstevel@tonic-gate } 33927c478bd9Sstevel@tonic-gate 33937c478bd9Sstevel@tonic-gate #if defined(__sparc) 33947c478bd9Sstevel@tonic-gate if (!(flags & (PG_NORELOC | PGI_NOCAGE | PGI_RELOCONLY)) && 33957c478bd9Sstevel@tonic-gate (kcage_freemem >= kcage_lotsfree)) { 33967c478bd9Sstevel@tonic-gate /* 33977c478bd9Sstevel@tonic-gate * The Cage is ON and with plenty of free mem, and 33987c478bd9Sstevel@tonic-gate * we're willing to check for a NORELOC page if we 33997c478bd9Sstevel@tonic-gate * couldn't find a RELOC page, so spin again. 34007c478bd9Sstevel@tonic-gate */ 34017c478bd9Sstevel@tonic-gate flags |= PG_NORELOC; 34027c478bd9Sstevel@tonic-gate mtype = MTYPE_NORELOC; 34037c478bd9Sstevel@tonic-gate goto big_try_again; 34047c478bd9Sstevel@tonic-gate } 34057c478bd9Sstevel@tonic-gate #else 34067c478bd9Sstevel@tonic-gate if (flags & PGI_MT_RANGE) { 34077c478bd9Sstevel@tonic-gate MTYPE_NEXT(mnode, mtype, flags); 34087c478bd9Sstevel@tonic-gate if (mtype >= 0) 34097c478bd9Sstevel@tonic-gate goto big_try_again; 34107c478bd9Sstevel@tonic-gate } 34117c478bd9Sstevel@tonic-gate #endif 34127c478bd9Sstevel@tonic-gate VM_STAT_ADD(vmm_vmstats.pgmc_allocfailed); 34137c478bd9Sstevel@tonic-gate return (NULL); 34147c478bd9Sstevel@tonic-gate } 34157c478bd9Sstevel@tonic-gate 34167c478bd9Sstevel@tonic-gate #ifdef DEBUG 34177c478bd9Sstevel@tonic-gate #define REPL_PAGE_STATS 34187c478bd9Sstevel@tonic-gate #endif /* DEBUG */ 34197c478bd9Sstevel@tonic-gate 34207c478bd9Sstevel@tonic-gate #ifdef REPL_PAGE_STATS 34217c478bd9Sstevel@tonic-gate struct repl_page_stats { 34227c478bd9Sstevel@tonic-gate uint_t ngets; 34237c478bd9Sstevel@tonic-gate uint_t ngets_noreloc; 34247c478bd9Sstevel@tonic-gate uint_t npgr_noreloc; 34257c478bd9Sstevel@tonic-gate uint_t nnopage_first; 34267c478bd9Sstevel@tonic-gate uint_t nnopage; 34277c478bd9Sstevel@tonic-gate uint_t nhashout; 34287c478bd9Sstevel@tonic-gate uint_t nnofree; 34297c478bd9Sstevel@tonic-gate uint_t nnext_pp; 34307c478bd9Sstevel@tonic-gate } repl_page_stats; 34317c478bd9Sstevel@tonic-gate #define REPL_STAT_INCR(v) atomic_add_32(&repl_page_stats.v, 1) 34327c478bd9Sstevel@tonic-gate #else /* REPL_PAGE_STATS */ 34337c478bd9Sstevel@tonic-gate #define REPL_STAT_INCR(v) 34347c478bd9Sstevel@tonic-gate #endif /* REPL_PAGE_STATS */ 34357c478bd9Sstevel@tonic-gate 34367c478bd9Sstevel@tonic-gate int pgrppgcp; 34377c478bd9Sstevel@tonic-gate 34387c478bd9Sstevel@tonic-gate /* 34397c478bd9Sstevel@tonic-gate * The freemem accounting must be done by the caller. 34407c478bd9Sstevel@tonic-gate * First we try to get a replacement page of the same size as like_pp, 34417c478bd9Sstevel@tonic-gate * if that is not possible, then we just get a set of discontiguous 34427c478bd9Sstevel@tonic-gate * PAGESIZE pages. 34437c478bd9Sstevel@tonic-gate */ 34447c478bd9Sstevel@tonic-gate page_t * 34452dae3fb5Sjjc page_get_replacement_page(page_t *orig_like_pp, struct lgrp *lgrp_target, 34467c478bd9Sstevel@tonic-gate uint_t pgrflags) 34477c478bd9Sstevel@tonic-gate { 34487c478bd9Sstevel@tonic-gate page_t *like_pp; 34497c478bd9Sstevel@tonic-gate page_t *pp, *pplist; 34507c478bd9Sstevel@tonic-gate page_t *pl = NULL; 34517c478bd9Sstevel@tonic-gate ulong_t bin; 34527c478bd9Sstevel@tonic-gate int mnode, page_mnode; 34537c478bd9Sstevel@tonic-gate int szc; 34547c478bd9Sstevel@tonic-gate spgcnt_t npgs, pg_cnt; 34557c478bd9Sstevel@tonic-gate pfn_t pfnum; 34567c478bd9Sstevel@tonic-gate int mtype; 34577c478bd9Sstevel@tonic-gate int flags = 0; 34587c478bd9Sstevel@tonic-gate lgrp_mnode_cookie_t lgrp_cookie; 34592dae3fb5Sjjc lgrp_t *lgrp; 34607c478bd9Sstevel@tonic-gate 34617c478bd9Sstevel@tonic-gate REPL_STAT_INCR(ngets); 34627c478bd9Sstevel@tonic-gate like_pp = orig_like_pp; 34637c478bd9Sstevel@tonic-gate ASSERT(PAGE_EXCL(like_pp)); 34647c478bd9Sstevel@tonic-gate 34657c478bd9Sstevel@tonic-gate szc = like_pp->p_szc; 34667c478bd9Sstevel@tonic-gate npgs = page_get_pagecnt(szc); 34677c478bd9Sstevel@tonic-gate /* 34687c478bd9Sstevel@tonic-gate * Now we reset like_pp to the base page_t. 34697c478bd9Sstevel@tonic-gate * That way, we won't walk past the end of this 'szc' page. 34707c478bd9Sstevel@tonic-gate */ 34717c478bd9Sstevel@tonic-gate pfnum = PFN_BASE(like_pp->p_pagenum, szc); 34727c478bd9Sstevel@tonic-gate like_pp = page_numtopp_nolock(pfnum); 34737c478bd9Sstevel@tonic-gate ASSERT(like_pp->p_szc == szc); 34747c478bd9Sstevel@tonic-gate 34757c478bd9Sstevel@tonic-gate if (PP_ISNORELOC(like_pp)) { 34767c478bd9Sstevel@tonic-gate ASSERT(kcage_on); 34777c478bd9Sstevel@tonic-gate REPL_STAT_INCR(ngets_noreloc); 34787c478bd9Sstevel@tonic-gate flags = PGI_RELOCONLY; 34797c478bd9Sstevel@tonic-gate } else if (pgrflags & PGR_NORELOC) { 34807c478bd9Sstevel@tonic-gate ASSERT(kcage_on); 34817c478bd9Sstevel@tonic-gate REPL_STAT_INCR(npgr_noreloc); 34827c478bd9Sstevel@tonic-gate flags = PG_NORELOC; 34837c478bd9Sstevel@tonic-gate } 34847c478bd9Sstevel@tonic-gate 34857c478bd9Sstevel@tonic-gate /* 34867c478bd9Sstevel@tonic-gate * Kernel pages must always be replaced with the same size 34877c478bd9Sstevel@tonic-gate * pages, since we cannot properly handle demotion of kernel 34887c478bd9Sstevel@tonic-gate * pages. 34897c478bd9Sstevel@tonic-gate */ 34907c478bd9Sstevel@tonic-gate if (like_pp->p_vnode == &kvp) 34917c478bd9Sstevel@tonic-gate pgrflags |= PGR_SAMESZC; 34927c478bd9Sstevel@tonic-gate 34937c478bd9Sstevel@tonic-gate /* LINTED */ 34947c478bd9Sstevel@tonic-gate MTYPE_PGR_INIT(mtype, flags, like_pp, page_mnode); 34957c478bd9Sstevel@tonic-gate 34967c478bd9Sstevel@tonic-gate while (npgs) { 34977c478bd9Sstevel@tonic-gate pplist = NULL; 34987c478bd9Sstevel@tonic-gate for (;;) { 34997c478bd9Sstevel@tonic-gate pg_cnt = page_get_pagecnt(szc); 35007c478bd9Sstevel@tonic-gate bin = PP_2_BIN(like_pp); 35017c478bd9Sstevel@tonic-gate ASSERT(like_pp->p_szc == orig_like_pp->p_szc); 35027c478bd9Sstevel@tonic-gate ASSERT(pg_cnt <= npgs); 35037c478bd9Sstevel@tonic-gate 35047c478bd9Sstevel@tonic-gate /* 35057c478bd9Sstevel@tonic-gate * If an lgroup was specified, try to get the 35067c478bd9Sstevel@tonic-gate * page from that lgroup. 35072dae3fb5Sjjc * NOTE: Must be careful with code below because 35082dae3fb5Sjjc * lgroup may disappear and reappear since there 35092dae3fb5Sjjc * is no locking for lgroup here. 35107c478bd9Sstevel@tonic-gate */ 35112dae3fb5Sjjc if (LGRP_EXISTS(lgrp_target)) { 35122dae3fb5Sjjc /* 35132dae3fb5Sjjc * Keep local variable for lgroup separate 35142dae3fb5Sjjc * from lgroup argument since this code should 35152dae3fb5Sjjc * only be exercised when lgroup argument 35162dae3fb5Sjjc * exists.... 35172dae3fb5Sjjc */ 35182dae3fb5Sjjc lgrp = lgrp_target; 35192dae3fb5Sjjc 35207c478bd9Sstevel@tonic-gate /* Try the lgroup's freelists first */ 35217c478bd9Sstevel@tonic-gate LGRP_MNODE_COOKIE_INIT(lgrp_cookie, lgrp, 35227c478bd9Sstevel@tonic-gate LGRP_SRCH_LOCAL); 35237c478bd9Sstevel@tonic-gate while ((pplist == NULL) && 35247c478bd9Sstevel@tonic-gate (mnode = lgrp_memnode_choose(&lgrp_cookie)) 35257c478bd9Sstevel@tonic-gate != -1) { 35267c478bd9Sstevel@tonic-gate pplist = page_get_mnode_freelist( 35277c478bd9Sstevel@tonic-gate mnode, bin, mtype, szc, 35287c478bd9Sstevel@tonic-gate flags); 35297c478bd9Sstevel@tonic-gate } 35307c478bd9Sstevel@tonic-gate 35317c478bd9Sstevel@tonic-gate /* 35327c478bd9Sstevel@tonic-gate * Now try it's cachelists if this is a 35337c478bd9Sstevel@tonic-gate * small page. Don't need to do it for 35347c478bd9Sstevel@tonic-gate * larger ones since page_freelist_coalesce() 35357c478bd9Sstevel@tonic-gate * already failed. 35367c478bd9Sstevel@tonic-gate */ 35377c478bd9Sstevel@tonic-gate if (pplist != NULL || szc != 0) 35387c478bd9Sstevel@tonic-gate break; 35397c478bd9Sstevel@tonic-gate 35407c478bd9Sstevel@tonic-gate /* Now try it's cachelists */ 35417c478bd9Sstevel@tonic-gate LGRP_MNODE_COOKIE_INIT(lgrp_cookie, lgrp, 35427c478bd9Sstevel@tonic-gate LGRP_SRCH_LOCAL); 35437c478bd9Sstevel@tonic-gate 35447c478bd9Sstevel@tonic-gate while ((pplist == NULL) && 35457c478bd9Sstevel@tonic-gate (mnode = lgrp_memnode_choose(&lgrp_cookie)) 35467c478bd9Sstevel@tonic-gate != -1) { 35477c478bd9Sstevel@tonic-gate pplist = page_get_mnode_cachelist( 35487c478bd9Sstevel@tonic-gate bin, flags, mnode, mtype); 35497c478bd9Sstevel@tonic-gate } 35507c478bd9Sstevel@tonic-gate if (pplist != NULL) { 35517c478bd9Sstevel@tonic-gate page_hashout(pplist, NULL); 35527c478bd9Sstevel@tonic-gate PP_SETAGED(pplist); 35537c478bd9Sstevel@tonic-gate REPL_STAT_INCR(nhashout); 35547c478bd9Sstevel@tonic-gate break; 35557c478bd9Sstevel@tonic-gate } 35567c478bd9Sstevel@tonic-gate /* Done looking in this lgroup. Bail out. */ 35577c478bd9Sstevel@tonic-gate break; 35587c478bd9Sstevel@tonic-gate } 35597c478bd9Sstevel@tonic-gate 35607c478bd9Sstevel@tonic-gate /* 35612dae3fb5Sjjc * No lgroup was specified (or lgroup was removed by 35622dae3fb5Sjjc * DR, so just try to get the page as close to 35632dae3fb5Sjjc * like_pp's mnode as possible. 35647c478bd9Sstevel@tonic-gate * First try the local freelist... 35657c478bd9Sstevel@tonic-gate */ 35667c478bd9Sstevel@tonic-gate mnode = PP_2_MEM_NODE(like_pp); 35677c478bd9Sstevel@tonic-gate pplist = page_get_mnode_freelist(mnode, bin, 35687c478bd9Sstevel@tonic-gate mtype, szc, flags); 35697c478bd9Sstevel@tonic-gate if (pplist != NULL) 35707c478bd9Sstevel@tonic-gate break; 35717c478bd9Sstevel@tonic-gate 35727c478bd9Sstevel@tonic-gate REPL_STAT_INCR(nnofree); 35737c478bd9Sstevel@tonic-gate 35747c478bd9Sstevel@tonic-gate /* 35757c478bd9Sstevel@tonic-gate * ...then the local cachelist. Don't need to do it for 35767c478bd9Sstevel@tonic-gate * larger pages cause page_freelist_coalesce() already 35777c478bd9Sstevel@tonic-gate * failed there anyway. 35787c478bd9Sstevel@tonic-gate */ 35797c478bd9Sstevel@tonic-gate if (szc == 0) { 35807c478bd9Sstevel@tonic-gate pplist = page_get_mnode_cachelist(bin, flags, 35817c478bd9Sstevel@tonic-gate mnode, mtype); 35827c478bd9Sstevel@tonic-gate if (pplist != NULL) { 35837c478bd9Sstevel@tonic-gate page_hashout(pplist, NULL); 35847c478bd9Sstevel@tonic-gate PP_SETAGED(pplist); 35857c478bd9Sstevel@tonic-gate REPL_STAT_INCR(nhashout); 35867c478bd9Sstevel@tonic-gate break; 35877c478bd9Sstevel@tonic-gate } 35887c478bd9Sstevel@tonic-gate } 35897c478bd9Sstevel@tonic-gate 35907c478bd9Sstevel@tonic-gate /* Now try remote freelists */ 35917c478bd9Sstevel@tonic-gate page_mnode = mnode; 35927c478bd9Sstevel@tonic-gate lgrp = 35937c478bd9Sstevel@tonic-gate lgrp_hand_to_lgrp(MEM_NODE_2_LGRPHAND(page_mnode)); 35947c478bd9Sstevel@tonic-gate LGRP_MNODE_COOKIE_INIT(lgrp_cookie, lgrp, 35957c478bd9Sstevel@tonic-gate LGRP_SRCH_HIER); 35967c478bd9Sstevel@tonic-gate while (pplist == NULL && 35977c478bd9Sstevel@tonic-gate (mnode = lgrp_memnode_choose(&lgrp_cookie)) 35987c478bd9Sstevel@tonic-gate != -1) { 35997c478bd9Sstevel@tonic-gate /* 36007c478bd9Sstevel@tonic-gate * Skip local mnode. 36017c478bd9Sstevel@tonic-gate */ 36027c478bd9Sstevel@tonic-gate if ((mnode == page_mnode) || 36037c478bd9Sstevel@tonic-gate (mem_node_config[mnode].exists == 0)) 36047c478bd9Sstevel@tonic-gate continue; 36057c478bd9Sstevel@tonic-gate 36067c478bd9Sstevel@tonic-gate pplist = page_get_mnode_freelist(mnode, 36077c478bd9Sstevel@tonic-gate bin, mtype, szc, flags); 36087c478bd9Sstevel@tonic-gate } 36097c478bd9Sstevel@tonic-gate 36107c478bd9Sstevel@tonic-gate if (pplist != NULL) 36117c478bd9Sstevel@tonic-gate break; 36127c478bd9Sstevel@tonic-gate 36137c478bd9Sstevel@tonic-gate 36147c478bd9Sstevel@tonic-gate /* Now try remote cachelists */ 36157c478bd9Sstevel@tonic-gate LGRP_MNODE_COOKIE_INIT(lgrp_cookie, lgrp, 36167c478bd9Sstevel@tonic-gate LGRP_SRCH_HIER); 36177c478bd9Sstevel@tonic-gate while (pplist == NULL && szc == 0) { 36187c478bd9Sstevel@tonic-gate mnode = lgrp_memnode_choose(&lgrp_cookie); 36197c478bd9Sstevel@tonic-gate if (mnode == -1) 36207c478bd9Sstevel@tonic-gate break; 36217c478bd9Sstevel@tonic-gate /* 36227c478bd9Sstevel@tonic-gate * Skip local mnode. 36237c478bd9Sstevel@tonic-gate */ 36247c478bd9Sstevel@tonic-gate if ((mnode == page_mnode) || 36257c478bd9Sstevel@tonic-gate (mem_node_config[mnode].exists == 0)) 36267c478bd9Sstevel@tonic-gate continue; 36277c478bd9Sstevel@tonic-gate 36287c478bd9Sstevel@tonic-gate pplist = page_get_mnode_cachelist(bin, 36297c478bd9Sstevel@tonic-gate flags, mnode, mtype); 36307c478bd9Sstevel@tonic-gate 36317c478bd9Sstevel@tonic-gate if (pplist != NULL) { 36327c478bd9Sstevel@tonic-gate page_hashout(pplist, NULL); 36337c478bd9Sstevel@tonic-gate PP_SETAGED(pplist); 36347c478bd9Sstevel@tonic-gate REPL_STAT_INCR(nhashout); 36357c478bd9Sstevel@tonic-gate break; 36367c478bd9Sstevel@tonic-gate } 36377c478bd9Sstevel@tonic-gate } 36387c478bd9Sstevel@tonic-gate 36397c478bd9Sstevel@tonic-gate /* 36407c478bd9Sstevel@tonic-gate * Break out of while loop under the following cases: 36417c478bd9Sstevel@tonic-gate * - If we successfully got a page. 36427c478bd9Sstevel@tonic-gate * - If pgrflags specified only returning a specific 36437c478bd9Sstevel@tonic-gate * page size and we could not find that page size. 36447c478bd9Sstevel@tonic-gate * - If we could not satisfy the request with PAGESIZE 36457c478bd9Sstevel@tonic-gate * or larger pages. 36467c478bd9Sstevel@tonic-gate */ 36477c478bd9Sstevel@tonic-gate if (pplist != NULL || szc == 0) 36487c478bd9Sstevel@tonic-gate break; 36497c478bd9Sstevel@tonic-gate 36507c478bd9Sstevel@tonic-gate if ((pgrflags & PGR_SAMESZC) || pgrppgcp) { 36517c478bd9Sstevel@tonic-gate /* try to find contig page */ 36527c478bd9Sstevel@tonic-gate 36537c478bd9Sstevel@tonic-gate LGRP_MNODE_COOKIE_INIT(lgrp_cookie, lgrp, 36547c478bd9Sstevel@tonic-gate LGRP_SRCH_HIER); 36557c478bd9Sstevel@tonic-gate 36567c478bd9Sstevel@tonic-gate while ((pplist == NULL) && 36577c478bd9Sstevel@tonic-gate (mnode = 36587c478bd9Sstevel@tonic-gate lgrp_memnode_choose(&lgrp_cookie)) 36597c478bd9Sstevel@tonic-gate != -1) { 36607c478bd9Sstevel@tonic-gate pplist = page_get_contig_pages( 36617c478bd9Sstevel@tonic-gate mnode, bin, mtype, szc, 36627c478bd9Sstevel@tonic-gate flags | PGI_PGCPHIPRI); 36637c478bd9Sstevel@tonic-gate } 36647c478bd9Sstevel@tonic-gate break; 36657c478bd9Sstevel@tonic-gate } 36667c478bd9Sstevel@tonic-gate 36677c478bd9Sstevel@tonic-gate /* 36687c478bd9Sstevel@tonic-gate * The correct thing to do here is try the next 36697c478bd9Sstevel@tonic-gate * page size down using szc--. Due to a bug 36707c478bd9Sstevel@tonic-gate * with the processing of HAT_RELOAD_SHARE 36717c478bd9Sstevel@tonic-gate * where the sfmmu_ttecnt arrays of all 36727c478bd9Sstevel@tonic-gate * hats sharing an ISM segment don't get updated, 36737c478bd9Sstevel@tonic-gate * using intermediate size pages for relocation 36747c478bd9Sstevel@tonic-gate * can lead to continuous page faults. 36757c478bd9Sstevel@tonic-gate */ 36767c478bd9Sstevel@tonic-gate szc = 0; 36777c478bd9Sstevel@tonic-gate } 36787c478bd9Sstevel@tonic-gate 36797c478bd9Sstevel@tonic-gate if (pplist != NULL) { 36807c478bd9Sstevel@tonic-gate DTRACE_PROBE4(page__get, 36817c478bd9Sstevel@tonic-gate lgrp_t *, lgrp, 36827c478bd9Sstevel@tonic-gate int, mnode, 36837c478bd9Sstevel@tonic-gate ulong_t, bin, 36847c478bd9Sstevel@tonic-gate uint_t, flags); 36857c478bd9Sstevel@tonic-gate 36867c478bd9Sstevel@tonic-gate while (pplist != NULL && pg_cnt--) { 36877c478bd9Sstevel@tonic-gate ASSERT(pplist != NULL); 36887c478bd9Sstevel@tonic-gate pp = pplist; 36897c478bd9Sstevel@tonic-gate page_sub(&pplist, pp); 36907c478bd9Sstevel@tonic-gate PP_CLRFREE(pp); 36917c478bd9Sstevel@tonic-gate PP_CLRAGED(pp); 36927c478bd9Sstevel@tonic-gate page_list_concat(&pl, &pp); 36937c478bd9Sstevel@tonic-gate npgs--; 36947c478bd9Sstevel@tonic-gate like_pp = like_pp + 1; 36957c478bd9Sstevel@tonic-gate REPL_STAT_INCR(nnext_pp); 36967c478bd9Sstevel@tonic-gate } 36977c478bd9Sstevel@tonic-gate ASSERT(pg_cnt == 0); 36987c478bd9Sstevel@tonic-gate } else { 36997c478bd9Sstevel@tonic-gate break; 37007c478bd9Sstevel@tonic-gate } 37017c478bd9Sstevel@tonic-gate } 37027c478bd9Sstevel@tonic-gate 37037c478bd9Sstevel@tonic-gate if (npgs) { 37047c478bd9Sstevel@tonic-gate /* 37057c478bd9Sstevel@tonic-gate * We were unable to allocate the necessary number 37067c478bd9Sstevel@tonic-gate * of pages. 37077c478bd9Sstevel@tonic-gate * We need to free up any pl. 37087c478bd9Sstevel@tonic-gate */ 37097c478bd9Sstevel@tonic-gate REPL_STAT_INCR(nnopage); 37107c478bd9Sstevel@tonic-gate page_free_replacement_page(pl); 37117c478bd9Sstevel@tonic-gate return (NULL); 37127c478bd9Sstevel@tonic-gate } else { 37137c478bd9Sstevel@tonic-gate return (pl); 37147c478bd9Sstevel@tonic-gate } 37157c478bd9Sstevel@tonic-gate } 37167c478bd9Sstevel@tonic-gate 37177c478bd9Sstevel@tonic-gate /* 37187c478bd9Sstevel@tonic-gate * demote a free large page to it's constituent pages 37197c478bd9Sstevel@tonic-gate */ 37207c478bd9Sstevel@tonic-gate void 37217c478bd9Sstevel@tonic-gate page_demote_free_pages(page_t *pp) 37227c478bd9Sstevel@tonic-gate { 37237c478bd9Sstevel@tonic-gate 37247c478bd9Sstevel@tonic-gate int mnode; 37257c478bd9Sstevel@tonic-gate 37267c478bd9Sstevel@tonic-gate ASSERT(pp != NULL); 37277c478bd9Sstevel@tonic-gate ASSERT(PAGE_LOCKED(pp)); 37287c478bd9Sstevel@tonic-gate ASSERT(PP_ISFREE(pp)); 37297c478bd9Sstevel@tonic-gate ASSERT(pp->p_szc != 0 && pp->p_szc < mmu_page_sizes); 37307c478bd9Sstevel@tonic-gate 37317c478bd9Sstevel@tonic-gate mnode = PP_2_MEM_NODE(pp); 37327c478bd9Sstevel@tonic-gate page_freelist_lock(mnode); 37337c478bd9Sstevel@tonic-gate if (pp->p_szc != 0) { 37347c478bd9Sstevel@tonic-gate (void) page_demote(mnode, PFN_BASE(pp->p_pagenum, 37357c478bd9Sstevel@tonic-gate pp->p_szc), pp->p_szc, 0, PC_NO_COLOR, PC_FREE); 37367c478bd9Sstevel@tonic-gate } 37377c478bd9Sstevel@tonic-gate page_freelist_unlock(mnode); 37387c478bd9Sstevel@tonic-gate ASSERT(pp->p_szc == 0); 37397c478bd9Sstevel@tonic-gate } 3740