17c478bd9Sstevel@tonic-gate /* 27c478bd9Sstevel@tonic-gate * CDDL HEADER START 37c478bd9Sstevel@tonic-gate * 47c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the 57c478bd9Sstevel@tonic-gate * Common Development and Distribution License, Version 1.0 only 67c478bd9Sstevel@tonic-gate * (the "License"). You may not use this file except in compliance 77c478bd9Sstevel@tonic-gate * with the License. 87c478bd9Sstevel@tonic-gate * 97c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 107c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 117c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions 127c478bd9Sstevel@tonic-gate * and limitations under the License. 137c478bd9Sstevel@tonic-gate * 147c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 157c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 167c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 177c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 187c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 197c478bd9Sstevel@tonic-gate * 207c478bd9Sstevel@tonic-gate * CDDL HEADER END 217c478bd9Sstevel@tonic-gate */ 227c478bd9Sstevel@tonic-gate /* 237c478bd9Sstevel@tonic-gate * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 247c478bd9Sstevel@tonic-gate * Use is subject to license terms. 257c478bd9Sstevel@tonic-gate */ 267c478bd9Sstevel@tonic-gate 277c478bd9Sstevel@tonic-gate /* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */ 287c478bd9Sstevel@tonic-gate /* All Rights Reserved */ 297c478bd9Sstevel@tonic-gate 307c478bd9Sstevel@tonic-gate /* 317c478bd9Sstevel@tonic-gate * University Copyright- Copyright (c) 1982, 1986, 1988 327c478bd9Sstevel@tonic-gate * The Regents of the University of California 337c478bd9Sstevel@tonic-gate * All Rights Reserved 347c478bd9Sstevel@tonic-gate * 357c478bd9Sstevel@tonic-gate * University Acknowledgment- Portions of this document are derived from 367c478bd9Sstevel@tonic-gate * software developed by the University of California, Berkeley, and its 377c478bd9Sstevel@tonic-gate * contributors. 387c478bd9Sstevel@tonic-gate */ 397c478bd9Sstevel@tonic-gate 407c478bd9Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 417c478bd9Sstevel@tonic-gate 427c478bd9Sstevel@tonic-gate /* 437c478bd9Sstevel@tonic-gate * VM - physical page management. 447c478bd9Sstevel@tonic-gate */ 457c478bd9Sstevel@tonic-gate 467c478bd9Sstevel@tonic-gate #include <sys/types.h> 477c478bd9Sstevel@tonic-gate #include <sys/t_lock.h> 487c478bd9Sstevel@tonic-gate #include <sys/param.h> 497c478bd9Sstevel@tonic-gate #include <sys/systm.h> 507c478bd9Sstevel@tonic-gate #include <sys/errno.h> 517c478bd9Sstevel@tonic-gate #include <sys/time.h> 527c478bd9Sstevel@tonic-gate #include <sys/vnode.h> 537c478bd9Sstevel@tonic-gate #include <sys/vm.h> 547c478bd9Sstevel@tonic-gate #include <sys/vtrace.h> 557c478bd9Sstevel@tonic-gate #include <sys/swap.h> 567c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h> 577c478bd9Sstevel@tonic-gate #include <sys/tuneable.h> 587c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h> 597c478bd9Sstevel@tonic-gate #include <sys/cpuvar.h> 607c478bd9Sstevel@tonic-gate #include <sys/callb.h> 617c478bd9Sstevel@tonic-gate #include <sys/debug.h> 627c478bd9Sstevel@tonic-gate #include <sys/tnf_probe.h> 637c478bd9Sstevel@tonic-gate #include <sys/condvar_impl.h> 647c478bd9Sstevel@tonic-gate #include <sys/mem_config.h> 657c478bd9Sstevel@tonic-gate #include <sys/mem_cage.h> 667c478bd9Sstevel@tonic-gate #include <sys/kmem.h> 677c478bd9Sstevel@tonic-gate #include <sys/atomic.h> 687c478bd9Sstevel@tonic-gate #include <sys/strlog.h> 697c478bd9Sstevel@tonic-gate #include <sys/mman.h> 707c478bd9Sstevel@tonic-gate #include <sys/ontrap.h> 717c478bd9Sstevel@tonic-gate #include <sys/lgrp.h> 727c478bd9Sstevel@tonic-gate #include <sys/vfs.h> 737c478bd9Sstevel@tonic-gate 747c478bd9Sstevel@tonic-gate #include <vm/hat.h> 757c478bd9Sstevel@tonic-gate #include <vm/anon.h> 767c478bd9Sstevel@tonic-gate #include <vm/page.h> 777c478bd9Sstevel@tonic-gate #include <vm/seg.h> 787c478bd9Sstevel@tonic-gate #include <vm/pvn.h> 797c478bd9Sstevel@tonic-gate #include <vm/seg_kmem.h> 807c478bd9Sstevel@tonic-gate #include <vm/vm_dep.h> 817c478bd9Sstevel@tonic-gate 827c478bd9Sstevel@tonic-gate #include <fs/fs_subr.h> 837c478bd9Sstevel@tonic-gate 847c478bd9Sstevel@tonic-gate static int nopageage = 0; 857c478bd9Sstevel@tonic-gate 867c478bd9Sstevel@tonic-gate static pgcnt_t max_page_get; /* max page_get request size in pages */ 877c478bd9Sstevel@tonic-gate pgcnt_t total_pages = 0; /* total number of pages (used by /proc) */ 887c478bd9Sstevel@tonic-gate 897c478bd9Sstevel@tonic-gate /* 907c478bd9Sstevel@tonic-gate * vnode for all pages which are retired from the VM system; 917c478bd9Sstevel@tonic-gate * such as pages with Uncorrectable Errors. 927c478bd9Sstevel@tonic-gate */ 937c478bd9Sstevel@tonic-gate struct vnode retired_ppages; 947c478bd9Sstevel@tonic-gate 957c478bd9Sstevel@tonic-gate static void page_retired_init(void); 967c478bd9Sstevel@tonic-gate static void retired_dispose(vnode_t *vp, page_t *pp, int flag, 977c478bd9Sstevel@tonic-gate int dn, cred_t *cr); 987c478bd9Sstevel@tonic-gate static void retired_inactive(vnode_t *vp, cred_t *cr); 997c478bd9Sstevel@tonic-gate static void page_retired(page_t *pp); 1007c478bd9Sstevel@tonic-gate static void retired_page_removed(page_t *pp); 1017c478bd9Sstevel@tonic-gate void page_unretire_pages(void); 1027c478bd9Sstevel@tonic-gate 1037c478bd9Sstevel@tonic-gate /* 1047c478bd9Sstevel@tonic-gate * The maximum number of pages that will be unretired in one iteration. 1057c478bd9Sstevel@tonic-gate * This number is totally arbitrary. 1067c478bd9Sstevel@tonic-gate */ 1077c478bd9Sstevel@tonic-gate #define UNRETIRE_PAGES 256 1087c478bd9Sstevel@tonic-gate 1097c478bd9Sstevel@tonic-gate /* 1107c478bd9Sstevel@tonic-gate * We limit the number of pages that may be retired to 1117c478bd9Sstevel@tonic-gate * a percentage of the total physical memory. Note that 1127c478bd9Sstevel@tonic-gate * the percentage values are stored as 'basis points', 1137c478bd9Sstevel@tonic-gate * ie, 100 basis points is 1%. 1147c478bd9Sstevel@tonic-gate */ 1157c478bd9Sstevel@tonic-gate #define MAX_PAGES_RETIRED_BPS_DEFAULT 10 /* .1% */ 1167c478bd9Sstevel@tonic-gate 1177c478bd9Sstevel@tonic-gate uint64_t max_pages_retired_bps = MAX_PAGES_RETIRED_BPS_DEFAULT; 1187c478bd9Sstevel@tonic-gate 1197c478bd9Sstevel@tonic-gate static int pages_retired_limit_exceeded(void); 1207c478bd9Sstevel@tonic-gate 1217c478bd9Sstevel@tonic-gate /* 1227c478bd9Sstevel@tonic-gate * operations vector for vnode with retired pages. Only VOP_DISPOSE 1237c478bd9Sstevel@tonic-gate * and VOP_INACTIVE are intercepted. 1247c478bd9Sstevel@tonic-gate */ 1257c478bd9Sstevel@tonic-gate struct vnodeops retired_vnodeops = { 1267c478bd9Sstevel@tonic-gate "retired_vnodeops", 1277c478bd9Sstevel@tonic-gate fs_nosys, /* open */ 1287c478bd9Sstevel@tonic-gate fs_nosys, /* close */ 1297c478bd9Sstevel@tonic-gate fs_nosys, /* read */ 1307c478bd9Sstevel@tonic-gate fs_nosys, /* write */ 1317c478bd9Sstevel@tonic-gate fs_nosys, /* ioctl */ 1327c478bd9Sstevel@tonic-gate fs_nosys, /* setfl */ 1337c478bd9Sstevel@tonic-gate fs_nosys, /* getattr */ 1347c478bd9Sstevel@tonic-gate fs_nosys, /* setattr */ 1357c478bd9Sstevel@tonic-gate fs_nosys, /* access */ 1367c478bd9Sstevel@tonic-gate fs_nosys, /* lookup */ 1377c478bd9Sstevel@tonic-gate fs_nosys, /* create */ 1387c478bd9Sstevel@tonic-gate fs_nosys, /* remove */ 1397c478bd9Sstevel@tonic-gate fs_nosys, /* link */ 1407c478bd9Sstevel@tonic-gate fs_nosys, /* rename */ 1417c478bd9Sstevel@tonic-gate fs_nosys, /* mkdir */ 1427c478bd9Sstevel@tonic-gate fs_nosys, /* rmdir */ 1437c478bd9Sstevel@tonic-gate fs_nosys, /* readdir */ 1447c478bd9Sstevel@tonic-gate fs_nosys, /* symlink */ 1457c478bd9Sstevel@tonic-gate fs_nosys, /* readlink */ 1467c478bd9Sstevel@tonic-gate fs_nosys, /* fsync */ 1477c478bd9Sstevel@tonic-gate retired_inactive, 1487c478bd9Sstevel@tonic-gate fs_nosys, /* fid */ 1497c478bd9Sstevel@tonic-gate fs_rwlock, /* rwlock */ 1507c478bd9Sstevel@tonic-gate fs_rwunlock, /* rwunlock */ 1517c478bd9Sstevel@tonic-gate fs_nosys, /* seek */ 1527c478bd9Sstevel@tonic-gate fs_nosys, /* cmp */ 1537c478bd9Sstevel@tonic-gate fs_nosys, /* frlock */ 1547c478bd9Sstevel@tonic-gate fs_nosys, /* space */ 1557c478bd9Sstevel@tonic-gate fs_nosys, /* realvp */ 1567c478bd9Sstevel@tonic-gate fs_nosys, /* getpage */ 1577c478bd9Sstevel@tonic-gate fs_nosys, /* putpage */ 1587c478bd9Sstevel@tonic-gate fs_nosys_map, 1597c478bd9Sstevel@tonic-gate fs_nosys_addmap, 1607c478bd9Sstevel@tonic-gate fs_nosys, /* delmap */ 1617c478bd9Sstevel@tonic-gate fs_nosys_poll, 1627c478bd9Sstevel@tonic-gate fs_nosys, /* dump */ 1637c478bd9Sstevel@tonic-gate fs_nosys, /* l_pathconf */ 1647c478bd9Sstevel@tonic-gate fs_nosys, /* pageio */ 1657c478bd9Sstevel@tonic-gate fs_nosys, /* dumpctl */ 1667c478bd9Sstevel@tonic-gate retired_dispose, 1677c478bd9Sstevel@tonic-gate fs_nosys, /* setsecattr */ 1687c478bd9Sstevel@tonic-gate fs_nosys, /* getsecatt */ 1697c478bd9Sstevel@tonic-gate fs_nosys, /* shrlock */ 1707c478bd9Sstevel@tonic-gate fs_vnevent_nosupport /* vnevent */ 1717c478bd9Sstevel@tonic-gate }; 1727c478bd9Sstevel@tonic-gate 1737c478bd9Sstevel@tonic-gate /* 1747c478bd9Sstevel@tonic-gate * freemem_lock protects all freemem variables: 1757c478bd9Sstevel@tonic-gate * availrmem. Also this lock protects the globals which track the 1767c478bd9Sstevel@tonic-gate * availrmem changes for accurate kernel footprint calculation. 1777c478bd9Sstevel@tonic-gate * See below for an explanation of these 1787c478bd9Sstevel@tonic-gate * globals. 1797c478bd9Sstevel@tonic-gate */ 1807c478bd9Sstevel@tonic-gate kmutex_t freemem_lock; 1817c478bd9Sstevel@tonic-gate pgcnt_t availrmem; 1827c478bd9Sstevel@tonic-gate pgcnt_t availrmem_initial; 1837c478bd9Sstevel@tonic-gate 1847c478bd9Sstevel@tonic-gate /* 1857c478bd9Sstevel@tonic-gate * These globals track availrmem changes to get a more accurate 1867c478bd9Sstevel@tonic-gate * estimate of tke kernel size. Historically pp_kernel is used for 1877c478bd9Sstevel@tonic-gate * kernel size and is based on availrmem. But availrmem is adjusted for 1887c478bd9Sstevel@tonic-gate * locked pages in the system not just for kernel locked pages. 1897c478bd9Sstevel@tonic-gate * These new counters will track the pages locked through segvn and 1907c478bd9Sstevel@tonic-gate * by explicit user locking. 1917c478bd9Sstevel@tonic-gate * 1927c478bd9Sstevel@tonic-gate * segvn_pages_locked : This keeps track on a global basis how many pages 1937c478bd9Sstevel@tonic-gate * are currently locked because of I/O. 1947c478bd9Sstevel@tonic-gate * 1957c478bd9Sstevel@tonic-gate * pages_locked : How many pages are locked becuase of user specified 1967c478bd9Sstevel@tonic-gate * locking through mlock or plock. 1977c478bd9Sstevel@tonic-gate * 1987c478bd9Sstevel@tonic-gate * pages_useclaim,pages_claimed : These two variables track the 1997c478bd9Sstevel@tonic-gate * cliam adjustments because of the protection changes on a segvn segment. 2007c478bd9Sstevel@tonic-gate * 2017c478bd9Sstevel@tonic-gate * All these globals are protected by the same lock which protects availrmem. 2027c478bd9Sstevel@tonic-gate */ 2037c478bd9Sstevel@tonic-gate pgcnt_t segvn_pages_locked; 2047c478bd9Sstevel@tonic-gate pgcnt_t pages_locked; 2057c478bd9Sstevel@tonic-gate pgcnt_t pages_useclaim; 2067c478bd9Sstevel@tonic-gate pgcnt_t pages_claimed; 2077c478bd9Sstevel@tonic-gate 2087c478bd9Sstevel@tonic-gate 2097c478bd9Sstevel@tonic-gate /* 2107c478bd9Sstevel@tonic-gate * new_freemem_lock protects freemem, freemem_wait & freemem_cv. 2117c478bd9Sstevel@tonic-gate */ 2127c478bd9Sstevel@tonic-gate static kmutex_t new_freemem_lock; 2137c478bd9Sstevel@tonic-gate static uint_t freemem_wait; /* someone waiting for freemem */ 2147c478bd9Sstevel@tonic-gate static kcondvar_t freemem_cv; 2157c478bd9Sstevel@tonic-gate 2167c478bd9Sstevel@tonic-gate /* 2177c478bd9Sstevel@tonic-gate * The logical page free list is maintained as two lists, the 'free' 2187c478bd9Sstevel@tonic-gate * and the 'cache' lists. 2197c478bd9Sstevel@tonic-gate * The free list contains those pages that should be reused first. 2207c478bd9Sstevel@tonic-gate * 2217c478bd9Sstevel@tonic-gate * The implementation of the lists is machine dependent. 2227c478bd9Sstevel@tonic-gate * page_get_freelist(), page_get_cachelist(), 2237c478bd9Sstevel@tonic-gate * page_list_sub(), and page_list_add() 2247c478bd9Sstevel@tonic-gate * form the interface to the machine dependent implementation. 2257c478bd9Sstevel@tonic-gate * 2267c478bd9Sstevel@tonic-gate * Pages with p_free set are on the cache list. 2277c478bd9Sstevel@tonic-gate * Pages with p_free and p_age set are on the free list, 2287c478bd9Sstevel@tonic-gate * 2297c478bd9Sstevel@tonic-gate * A page may be locked while on either list. 2307c478bd9Sstevel@tonic-gate */ 2317c478bd9Sstevel@tonic-gate 2327c478bd9Sstevel@tonic-gate /* 2337c478bd9Sstevel@tonic-gate * free list accounting stuff. 2347c478bd9Sstevel@tonic-gate * 2357c478bd9Sstevel@tonic-gate * 2367c478bd9Sstevel@tonic-gate * Spread out the value for the number of pages on the 2377c478bd9Sstevel@tonic-gate * page free and page cache lists. If there is just one 2387c478bd9Sstevel@tonic-gate * value, then it must be under just one lock. 2397c478bd9Sstevel@tonic-gate * The lock contention and cache traffic are a real bother. 2407c478bd9Sstevel@tonic-gate * 2417c478bd9Sstevel@tonic-gate * When we acquire and then drop a single pcf lock 2427c478bd9Sstevel@tonic-gate * we can start in the middle of the array of pcf structures. 2437c478bd9Sstevel@tonic-gate * If we acquire more than one pcf lock at a time, we need to 2447c478bd9Sstevel@tonic-gate * start at the front to avoid deadlocking. 2457c478bd9Sstevel@tonic-gate * 2467c478bd9Sstevel@tonic-gate * pcf_count holds the number of pages in each pool. 2477c478bd9Sstevel@tonic-gate * 2487c478bd9Sstevel@tonic-gate * pcf_block is set when page_create_get_something() has asked the 2497c478bd9Sstevel@tonic-gate * PSM page freelist and page cachelist routines without specifying 2507c478bd9Sstevel@tonic-gate * a color and nothing came back. This is used to block anything 2517c478bd9Sstevel@tonic-gate * else from moving pages from one list to the other while the 2527c478bd9Sstevel@tonic-gate * lists are searched again. If a page is freeed while pcf_block is 2537c478bd9Sstevel@tonic-gate * set, then pcf_reserve is incremented. pcgs_unblock() takes care 2547c478bd9Sstevel@tonic-gate * of clearning pcf_block, doing the wakeups, etc. 2557c478bd9Sstevel@tonic-gate */ 2567c478bd9Sstevel@tonic-gate 2577c478bd9Sstevel@tonic-gate #if NCPU <= 4 2587c478bd9Sstevel@tonic-gate #define PAD 1 2597c478bd9Sstevel@tonic-gate #define PCF_FANOUT 4 2607c478bd9Sstevel@tonic-gate static uint_t pcf_mask = PCF_FANOUT - 1; 2617c478bd9Sstevel@tonic-gate #else 2627c478bd9Sstevel@tonic-gate #define PAD 9 2637c478bd9Sstevel@tonic-gate #ifdef sun4v 2647c478bd9Sstevel@tonic-gate #define PCF_FANOUT 32 2657c478bd9Sstevel@tonic-gate #else 2667c478bd9Sstevel@tonic-gate #define PCF_FANOUT 128 2677c478bd9Sstevel@tonic-gate #endif 2687c478bd9Sstevel@tonic-gate static uint_t pcf_mask = PCF_FANOUT - 1; 2697c478bd9Sstevel@tonic-gate #endif 2707c478bd9Sstevel@tonic-gate 2717c478bd9Sstevel@tonic-gate struct pcf { 2727c478bd9Sstevel@tonic-gate uint_t pcf_touch; /* just to help the cache */ 2737c478bd9Sstevel@tonic-gate uint_t pcf_count; /* page count */ 2747c478bd9Sstevel@tonic-gate kmutex_t pcf_lock; /* protects the structure */ 2757c478bd9Sstevel@tonic-gate uint_t pcf_wait; /* number of waiters */ 2767c478bd9Sstevel@tonic-gate uint_t pcf_block; /* pcgs flag to page_free() */ 2777c478bd9Sstevel@tonic-gate uint_t pcf_reserve; /* pages freed after pcf_block set */ 2787c478bd9Sstevel@tonic-gate uint_t pcf_fill[PAD]; /* to line up on the caches */ 2797c478bd9Sstevel@tonic-gate }; 2807c478bd9Sstevel@tonic-gate 2817c478bd9Sstevel@tonic-gate static struct pcf pcf[PCF_FANOUT]; 2827c478bd9Sstevel@tonic-gate #define PCF_INDEX() ((CPU->cpu_id) & (pcf_mask)) 2837c478bd9Sstevel@tonic-gate 2847c478bd9Sstevel@tonic-gate kmutex_t pcgs_lock; /* serializes page_create_get_ */ 2857c478bd9Sstevel@tonic-gate kmutex_t pcgs_cagelock; /* serializes NOSLEEP cage allocs */ 2867c478bd9Sstevel@tonic-gate kmutex_t pcgs_wait_lock; /* used for delay in pcgs */ 2877c478bd9Sstevel@tonic-gate static kcondvar_t pcgs_cv; /* cv for delay in pcgs */ 2887c478bd9Sstevel@tonic-gate 2897c478bd9Sstevel@tonic-gate #define PAGE_LOCK_MAXIMUM \ 2907c478bd9Sstevel@tonic-gate ((1 << (sizeof (((page_t *)0)->p_lckcnt) * NBBY)) - 1) 2917c478bd9Sstevel@tonic-gate 2927c478bd9Sstevel@tonic-gate /* 2937c478bd9Sstevel@tonic-gate * Control over the verbosity of page retirement. When set to zero, no messages 2947c478bd9Sstevel@tonic-gate * will be printed. A value of one will trigger messages for retirement 2957c478bd9Sstevel@tonic-gate * operations, and is intended for processors which don't yet support FMA 2967c478bd9Sstevel@tonic-gate * (spitfire). Two will cause verbose messages to be printed when retirements 2977c478bd9Sstevel@tonic-gate * complete, and is intended only for debugging purposes. 2987c478bd9Sstevel@tonic-gate */ 2997c478bd9Sstevel@tonic-gate int page_retire_messages = 0; 3007c478bd9Sstevel@tonic-gate 3017c478bd9Sstevel@tonic-gate #ifdef VM_STATS 3027c478bd9Sstevel@tonic-gate 3037c478bd9Sstevel@tonic-gate /* 3047c478bd9Sstevel@tonic-gate * No locks, but so what, they are only statistics. 3057c478bd9Sstevel@tonic-gate */ 3067c478bd9Sstevel@tonic-gate 3077c478bd9Sstevel@tonic-gate static struct page_tcnt { 3087c478bd9Sstevel@tonic-gate int pc_free_cache; /* free's into cache list */ 3097c478bd9Sstevel@tonic-gate int pc_free_dontneed; /* free's with dontneed */ 3107c478bd9Sstevel@tonic-gate int pc_free_pageout; /* free's from pageout */ 3117c478bd9Sstevel@tonic-gate int pc_free_free; /* free's into free list */ 3127c478bd9Sstevel@tonic-gate int pc_free_pages; /* free's into large page free list */ 3137c478bd9Sstevel@tonic-gate int pc_destroy_pages; /* large page destroy's */ 3147c478bd9Sstevel@tonic-gate int pc_get_cache; /* get's from cache list */ 3157c478bd9Sstevel@tonic-gate int pc_get_free; /* get's from free list */ 3167c478bd9Sstevel@tonic-gate int pc_reclaim; /* reclaim's */ 3177c478bd9Sstevel@tonic-gate int pc_abortfree; /* abort's of free pages */ 3187c478bd9Sstevel@tonic-gate int pc_find_hit; /* find's that find page */ 3197c478bd9Sstevel@tonic-gate int pc_find_miss; /* find's that don't find page */ 3207c478bd9Sstevel@tonic-gate int pc_destroy_free; /* # of free pages destroyed */ 3217c478bd9Sstevel@tonic-gate #define PC_HASH_CNT (4*PAGE_HASHAVELEN) 3227c478bd9Sstevel@tonic-gate int pc_find_hashlen[PC_HASH_CNT+1]; 3237c478bd9Sstevel@tonic-gate int pc_addclaim_pages; 3247c478bd9Sstevel@tonic-gate int pc_subclaim_pages; 3257c478bd9Sstevel@tonic-gate int pc_free_replacement_page[2]; 3267c478bd9Sstevel@tonic-gate int pc_try_demote_pages[6]; 3277c478bd9Sstevel@tonic-gate int pc_demote_pages[2]; 3287c478bd9Sstevel@tonic-gate } pagecnt; 3297c478bd9Sstevel@tonic-gate 3307c478bd9Sstevel@tonic-gate uint_t hashin_count; 3317c478bd9Sstevel@tonic-gate uint_t hashin_not_held; 3327c478bd9Sstevel@tonic-gate uint_t hashin_already; 3337c478bd9Sstevel@tonic-gate 3347c478bd9Sstevel@tonic-gate uint_t hashout_count; 3357c478bd9Sstevel@tonic-gate uint_t hashout_not_held; 3367c478bd9Sstevel@tonic-gate 3377c478bd9Sstevel@tonic-gate uint_t page_create_count; 3387c478bd9Sstevel@tonic-gate uint_t page_create_not_enough; 3397c478bd9Sstevel@tonic-gate uint_t page_create_not_enough_again; 3407c478bd9Sstevel@tonic-gate uint_t page_create_zero; 3417c478bd9Sstevel@tonic-gate uint_t page_create_hashout; 3427c478bd9Sstevel@tonic-gate uint_t page_create_page_lock_failed; 3437c478bd9Sstevel@tonic-gate uint_t page_create_trylock_failed; 3447c478bd9Sstevel@tonic-gate uint_t page_create_found_one; 3457c478bd9Sstevel@tonic-gate uint_t page_create_hashin_failed; 3467c478bd9Sstevel@tonic-gate uint_t page_create_dropped_phm; 3477c478bd9Sstevel@tonic-gate 3487c478bd9Sstevel@tonic-gate uint_t page_create_new; 3497c478bd9Sstevel@tonic-gate uint_t page_create_exists; 3507c478bd9Sstevel@tonic-gate uint_t page_create_putbacks; 3517c478bd9Sstevel@tonic-gate uint_t page_create_overshoot; 3527c478bd9Sstevel@tonic-gate 3537c478bd9Sstevel@tonic-gate uint_t page_reclaim_zero; 3547c478bd9Sstevel@tonic-gate uint_t page_reclaim_zero_locked; 3557c478bd9Sstevel@tonic-gate 3567c478bd9Sstevel@tonic-gate uint_t page_rename_exists; 3577c478bd9Sstevel@tonic-gate uint_t page_rename_count; 3587c478bd9Sstevel@tonic-gate 3597c478bd9Sstevel@tonic-gate uint_t page_lookup_cnt[20]; 3607c478bd9Sstevel@tonic-gate uint_t page_lookup_nowait_cnt[10]; 3617c478bd9Sstevel@tonic-gate uint_t page_find_cnt; 3627c478bd9Sstevel@tonic-gate uint_t page_exists_cnt; 3637c478bd9Sstevel@tonic-gate uint_t page_exists_forreal_cnt; 3647c478bd9Sstevel@tonic-gate uint_t page_lookup_dev_cnt; 3657c478bd9Sstevel@tonic-gate uint_t get_cachelist_cnt; 3667c478bd9Sstevel@tonic-gate uint_t page_create_cnt[10]; 3677c478bd9Sstevel@tonic-gate uint_t alloc_pages[8]; 3687c478bd9Sstevel@tonic-gate uint_t page_exphcontg[19]; 3697c478bd9Sstevel@tonic-gate uint_t page_create_large_cnt[10]; 3707c478bd9Sstevel@tonic-gate 3717c478bd9Sstevel@tonic-gate /* 3727c478bd9Sstevel@tonic-gate * Collects statistics. 3737c478bd9Sstevel@tonic-gate */ 3747c478bd9Sstevel@tonic-gate #define PAGE_HASH_SEARCH(index, pp, vp, off) { \ 3757c478bd9Sstevel@tonic-gate uint_t mylen = 0; \ 3767c478bd9Sstevel@tonic-gate \ 3777c478bd9Sstevel@tonic-gate for ((pp) = page_hash[(index)]; (pp); (pp) = (pp)->p_hash, mylen++) { \ 3787c478bd9Sstevel@tonic-gate if ((pp)->p_vnode == (vp) && (pp)->p_offset == (off)) \ 3797c478bd9Sstevel@tonic-gate break; \ 3807c478bd9Sstevel@tonic-gate } \ 3817c478bd9Sstevel@tonic-gate if ((pp) != NULL) \ 3827c478bd9Sstevel@tonic-gate pagecnt.pc_find_hit++; \ 3837c478bd9Sstevel@tonic-gate else \ 3847c478bd9Sstevel@tonic-gate pagecnt.pc_find_miss++; \ 3857c478bd9Sstevel@tonic-gate if (mylen > PC_HASH_CNT) \ 3867c478bd9Sstevel@tonic-gate mylen = PC_HASH_CNT; \ 3877c478bd9Sstevel@tonic-gate pagecnt.pc_find_hashlen[mylen]++; \ 3887c478bd9Sstevel@tonic-gate } 3897c478bd9Sstevel@tonic-gate 3907c478bd9Sstevel@tonic-gate #else /* VM_STATS */ 3917c478bd9Sstevel@tonic-gate 3927c478bd9Sstevel@tonic-gate /* 3937c478bd9Sstevel@tonic-gate * Don't collect statistics 3947c478bd9Sstevel@tonic-gate */ 3957c478bd9Sstevel@tonic-gate #define PAGE_HASH_SEARCH(index, pp, vp, off) { \ 3967c478bd9Sstevel@tonic-gate for ((pp) = page_hash[(index)]; (pp); (pp) = (pp)->p_hash) { \ 3977c478bd9Sstevel@tonic-gate if ((pp)->p_vnode == (vp) && (pp)->p_offset == (off)) \ 3987c478bd9Sstevel@tonic-gate break; \ 3997c478bd9Sstevel@tonic-gate } \ 4007c478bd9Sstevel@tonic-gate } 4017c478bd9Sstevel@tonic-gate 4027c478bd9Sstevel@tonic-gate #endif /* VM_STATS */ 4037c478bd9Sstevel@tonic-gate 4047c478bd9Sstevel@tonic-gate 4057c478bd9Sstevel@tonic-gate 4067c478bd9Sstevel@tonic-gate #ifdef DEBUG 4077c478bd9Sstevel@tonic-gate #define MEMSEG_SEARCH_STATS 4087c478bd9Sstevel@tonic-gate #endif 4097c478bd9Sstevel@tonic-gate 4107c478bd9Sstevel@tonic-gate #ifdef MEMSEG_SEARCH_STATS 4117c478bd9Sstevel@tonic-gate struct memseg_stats { 4127c478bd9Sstevel@tonic-gate uint_t nsearch; 4137c478bd9Sstevel@tonic-gate uint_t nlastwon; 4147c478bd9Sstevel@tonic-gate uint_t nhashwon; 4157c478bd9Sstevel@tonic-gate uint_t nnotfound; 4167c478bd9Sstevel@tonic-gate } memseg_stats; 4177c478bd9Sstevel@tonic-gate 4187c478bd9Sstevel@tonic-gate #define MEMSEG_STAT_INCR(v) \ 4197c478bd9Sstevel@tonic-gate atomic_add_32(&memseg_stats.v, 1) 4207c478bd9Sstevel@tonic-gate #else 4217c478bd9Sstevel@tonic-gate #define MEMSEG_STAT_INCR(x) 4227c478bd9Sstevel@tonic-gate #endif 4237c478bd9Sstevel@tonic-gate 4247c478bd9Sstevel@tonic-gate struct memseg *memsegs; /* list of memory segments */ 4257c478bd9Sstevel@tonic-gate 4267c478bd9Sstevel@tonic-gate 4277c478bd9Sstevel@tonic-gate static void page_init_mem_config(void); 4287c478bd9Sstevel@tonic-gate static int page_do_hashin(page_t *, vnode_t *, u_offset_t); 4297c478bd9Sstevel@tonic-gate static void page_do_hashout(page_t *); 4307c478bd9Sstevel@tonic-gate 4317c478bd9Sstevel@tonic-gate static void page_demote_vp_pages(page_t *); 4327c478bd9Sstevel@tonic-gate 4337c478bd9Sstevel@tonic-gate /* 4347c478bd9Sstevel@tonic-gate * vm subsystem related initialization 4357c478bd9Sstevel@tonic-gate */ 4367c478bd9Sstevel@tonic-gate void 4377c478bd9Sstevel@tonic-gate vm_init(void) 4387c478bd9Sstevel@tonic-gate { 4397c478bd9Sstevel@tonic-gate boolean_t callb_vm_cpr(void *, int); 4407c478bd9Sstevel@tonic-gate 4417c478bd9Sstevel@tonic-gate (void) callb_add(callb_vm_cpr, 0, CB_CL_CPR_VM, "vm"); 4427c478bd9Sstevel@tonic-gate page_init_mem_config(); 4437c478bd9Sstevel@tonic-gate 4447c478bd9Sstevel@tonic-gate /* 4457c478bd9Sstevel@tonic-gate * initialise the vnode for retired pages 4467c478bd9Sstevel@tonic-gate */ 4477c478bd9Sstevel@tonic-gate page_retired_init(); 4487c478bd9Sstevel@tonic-gate } 4497c478bd9Sstevel@tonic-gate 4507c478bd9Sstevel@tonic-gate /* 4517c478bd9Sstevel@tonic-gate * This function is called at startup and when memory is added or deleted. 4527c478bd9Sstevel@tonic-gate */ 4537c478bd9Sstevel@tonic-gate void 4547c478bd9Sstevel@tonic-gate init_pages_pp_maximum() 4557c478bd9Sstevel@tonic-gate { 4567c478bd9Sstevel@tonic-gate static pgcnt_t p_min; 4577c478bd9Sstevel@tonic-gate static pgcnt_t pages_pp_maximum_startup; 4587c478bd9Sstevel@tonic-gate static pgcnt_t avrmem_delta; 4597c478bd9Sstevel@tonic-gate static int init_done; 4607c478bd9Sstevel@tonic-gate static int user_set; /* true if set in /etc/system */ 4617c478bd9Sstevel@tonic-gate 4627c478bd9Sstevel@tonic-gate if (init_done == 0) { 4637c478bd9Sstevel@tonic-gate 4647c478bd9Sstevel@tonic-gate /* If the user specified a value, save it */ 4657c478bd9Sstevel@tonic-gate if (pages_pp_maximum != 0) { 4667c478bd9Sstevel@tonic-gate user_set = 1; 4677c478bd9Sstevel@tonic-gate pages_pp_maximum_startup = pages_pp_maximum; 4687c478bd9Sstevel@tonic-gate } 4697c478bd9Sstevel@tonic-gate 4707c478bd9Sstevel@tonic-gate /* 4717c478bd9Sstevel@tonic-gate * Setting of pages_pp_maximum is based first time 4727c478bd9Sstevel@tonic-gate * on the value of availrmem just after the start-up 4737c478bd9Sstevel@tonic-gate * allocations. To preserve this relationship at run 4747c478bd9Sstevel@tonic-gate * time, use a delta from availrmem_initial. 4757c478bd9Sstevel@tonic-gate */ 4767c478bd9Sstevel@tonic-gate ASSERT(availrmem_initial >= availrmem); 4777c478bd9Sstevel@tonic-gate avrmem_delta = availrmem_initial - availrmem; 4787c478bd9Sstevel@tonic-gate 4797c478bd9Sstevel@tonic-gate /* The allowable floor of pages_pp_maximum */ 4807c478bd9Sstevel@tonic-gate p_min = tune.t_minarmem + 100; 4817c478bd9Sstevel@tonic-gate 4827c478bd9Sstevel@tonic-gate /* Make sure we don't come through here again. */ 4837c478bd9Sstevel@tonic-gate init_done = 1; 4847c478bd9Sstevel@tonic-gate } 4857c478bd9Sstevel@tonic-gate /* 4867c478bd9Sstevel@tonic-gate * Determine pages_pp_maximum, the number of currently available 4877c478bd9Sstevel@tonic-gate * pages (availrmem) that can't be `locked'. If not set by 4887c478bd9Sstevel@tonic-gate * the user, we set it to 4% of the currently available memory 4897c478bd9Sstevel@tonic-gate * plus 4MB. 4907c478bd9Sstevel@tonic-gate * But we also insist that it be greater than tune.t_minarmem; 4917c478bd9Sstevel@tonic-gate * otherwise a process could lock down a lot of memory, get swapped 4927c478bd9Sstevel@tonic-gate * out, and never have enough to get swapped back in. 4937c478bd9Sstevel@tonic-gate */ 4947c478bd9Sstevel@tonic-gate if (user_set) 4957c478bd9Sstevel@tonic-gate pages_pp_maximum = pages_pp_maximum_startup; 4967c478bd9Sstevel@tonic-gate else 4977c478bd9Sstevel@tonic-gate pages_pp_maximum = ((availrmem_initial - avrmem_delta) / 25) 4987c478bd9Sstevel@tonic-gate + btop(4 * 1024 * 1024); 4997c478bd9Sstevel@tonic-gate 5007c478bd9Sstevel@tonic-gate if (pages_pp_maximum <= p_min) { 5017c478bd9Sstevel@tonic-gate pages_pp_maximum = p_min; 5027c478bd9Sstevel@tonic-gate } 5037c478bd9Sstevel@tonic-gate } 5047c478bd9Sstevel@tonic-gate 5057c478bd9Sstevel@tonic-gate void 5067c478bd9Sstevel@tonic-gate set_max_page_get(pgcnt_t target_total_pages) 5077c478bd9Sstevel@tonic-gate { 5087c478bd9Sstevel@tonic-gate max_page_get = target_total_pages / 2; 5097c478bd9Sstevel@tonic-gate } 5107c478bd9Sstevel@tonic-gate 5117c478bd9Sstevel@tonic-gate static pgcnt_t pending_delete; 5127c478bd9Sstevel@tonic-gate 5137c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 5147c478bd9Sstevel@tonic-gate static void 5157c478bd9Sstevel@tonic-gate page_mem_config_post_add( 5167c478bd9Sstevel@tonic-gate void *arg, 5177c478bd9Sstevel@tonic-gate pgcnt_t delta_pages) 5187c478bd9Sstevel@tonic-gate { 5197c478bd9Sstevel@tonic-gate set_max_page_get(total_pages - pending_delete); 5207c478bd9Sstevel@tonic-gate init_pages_pp_maximum(); 5217c478bd9Sstevel@tonic-gate } 5227c478bd9Sstevel@tonic-gate 5237c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 5247c478bd9Sstevel@tonic-gate static int 5257c478bd9Sstevel@tonic-gate page_mem_config_pre_del( 5267c478bd9Sstevel@tonic-gate void *arg, 5277c478bd9Sstevel@tonic-gate pgcnt_t delta_pages) 5287c478bd9Sstevel@tonic-gate { 5297c478bd9Sstevel@tonic-gate pgcnt_t nv; 5307c478bd9Sstevel@tonic-gate 5317c478bd9Sstevel@tonic-gate nv = atomic_add_long_nv(&pending_delete, (spgcnt_t)delta_pages); 5327c478bd9Sstevel@tonic-gate set_max_page_get(total_pages - nv); 5337c478bd9Sstevel@tonic-gate return (0); 5347c478bd9Sstevel@tonic-gate } 5357c478bd9Sstevel@tonic-gate 5367c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 5377c478bd9Sstevel@tonic-gate static void 5387c478bd9Sstevel@tonic-gate page_mem_config_post_del( 5397c478bd9Sstevel@tonic-gate void *arg, 5407c478bd9Sstevel@tonic-gate pgcnt_t delta_pages, 5417c478bd9Sstevel@tonic-gate int cancelled) 5427c478bd9Sstevel@tonic-gate { 5437c478bd9Sstevel@tonic-gate pgcnt_t nv; 5447c478bd9Sstevel@tonic-gate 5457c478bd9Sstevel@tonic-gate nv = atomic_add_long_nv(&pending_delete, -(spgcnt_t)delta_pages); 5467c478bd9Sstevel@tonic-gate set_max_page_get(total_pages - nv); 5477c478bd9Sstevel@tonic-gate if (!cancelled) 5487c478bd9Sstevel@tonic-gate init_pages_pp_maximum(); 5497c478bd9Sstevel@tonic-gate } 5507c478bd9Sstevel@tonic-gate 5517c478bd9Sstevel@tonic-gate static kphysm_setup_vector_t page_mem_config_vec = { 5527c478bd9Sstevel@tonic-gate KPHYSM_SETUP_VECTOR_VERSION, 5537c478bd9Sstevel@tonic-gate page_mem_config_post_add, 5547c478bd9Sstevel@tonic-gate page_mem_config_pre_del, 5557c478bd9Sstevel@tonic-gate page_mem_config_post_del, 5567c478bd9Sstevel@tonic-gate }; 5577c478bd9Sstevel@tonic-gate 5587c478bd9Sstevel@tonic-gate static void 5597c478bd9Sstevel@tonic-gate page_init_mem_config(void) 5607c478bd9Sstevel@tonic-gate { 5617c478bd9Sstevel@tonic-gate int ret; 5627c478bd9Sstevel@tonic-gate 5637c478bd9Sstevel@tonic-gate ret = kphysm_setup_func_register(&page_mem_config_vec, (void *)NULL); 5647c478bd9Sstevel@tonic-gate ASSERT(ret == 0); 5657c478bd9Sstevel@tonic-gate } 5667c478bd9Sstevel@tonic-gate 5677c478bd9Sstevel@tonic-gate /* 5687c478bd9Sstevel@tonic-gate * Evenly spread out the PCF counters for large free pages 5697c478bd9Sstevel@tonic-gate */ 5707c478bd9Sstevel@tonic-gate static void 5717c478bd9Sstevel@tonic-gate page_free_large_ctr(pgcnt_t npages) 5727c478bd9Sstevel@tonic-gate { 5737c478bd9Sstevel@tonic-gate static struct pcf *p = pcf; 5747c478bd9Sstevel@tonic-gate pgcnt_t lump; 5757c478bd9Sstevel@tonic-gate 5767c478bd9Sstevel@tonic-gate freemem += npages; 5777c478bd9Sstevel@tonic-gate 5787c478bd9Sstevel@tonic-gate lump = roundup(npages, PCF_FANOUT) / PCF_FANOUT; 5797c478bd9Sstevel@tonic-gate 5807c478bd9Sstevel@tonic-gate while (npages > 0) { 5817c478bd9Sstevel@tonic-gate 5827c478bd9Sstevel@tonic-gate ASSERT(!p->pcf_block); 5837c478bd9Sstevel@tonic-gate 5847c478bd9Sstevel@tonic-gate if (lump < npages) { 5857c478bd9Sstevel@tonic-gate p->pcf_count += (uint_t)lump; 5867c478bd9Sstevel@tonic-gate npages -= lump; 5877c478bd9Sstevel@tonic-gate } else { 5887c478bd9Sstevel@tonic-gate p->pcf_count += (uint_t)npages; 5897c478bd9Sstevel@tonic-gate npages = 0; 5907c478bd9Sstevel@tonic-gate } 5917c478bd9Sstevel@tonic-gate 5927c478bd9Sstevel@tonic-gate ASSERT(!p->pcf_wait); 5937c478bd9Sstevel@tonic-gate 5947c478bd9Sstevel@tonic-gate if (++p > &pcf[PCF_FANOUT - 1]) 5957c478bd9Sstevel@tonic-gate p = pcf; 5967c478bd9Sstevel@tonic-gate } 5977c478bd9Sstevel@tonic-gate 5987c478bd9Sstevel@tonic-gate ASSERT(npages == 0); 5997c478bd9Sstevel@tonic-gate } 6007c478bd9Sstevel@tonic-gate 6017c478bd9Sstevel@tonic-gate /* 6027c478bd9Sstevel@tonic-gate * Add a physical chunk of memory to the system freee lists during startup. 6037c478bd9Sstevel@tonic-gate * Platform specific startup() allocates the memory for the page structs. 6047c478bd9Sstevel@tonic-gate * 6057c478bd9Sstevel@tonic-gate * num - number of page structures 6067c478bd9Sstevel@tonic-gate * base - page number (pfn) to be associated with the first page. 6077c478bd9Sstevel@tonic-gate * 6087c478bd9Sstevel@tonic-gate * Since we are doing this during startup (ie. single threaded), we will 6097c478bd9Sstevel@tonic-gate * use shortcut routines to avoid any locking overhead while putting all 6107c478bd9Sstevel@tonic-gate * these pages on the freelists. 6117c478bd9Sstevel@tonic-gate * 6127c478bd9Sstevel@tonic-gate * NOTE: Any changes performed to page_free(), must also be performed to 6137c478bd9Sstevel@tonic-gate * add_physmem() since this is how we initialize all page_t's at 6147c478bd9Sstevel@tonic-gate * boot time. 6157c478bd9Sstevel@tonic-gate */ 6167c478bd9Sstevel@tonic-gate void 6177c478bd9Sstevel@tonic-gate add_physmem( 6187c478bd9Sstevel@tonic-gate page_t *pp, 6197c478bd9Sstevel@tonic-gate pgcnt_t num, 6207c478bd9Sstevel@tonic-gate pfn_t pnum) 6217c478bd9Sstevel@tonic-gate { 6227c478bd9Sstevel@tonic-gate page_t *root = NULL; 6237c478bd9Sstevel@tonic-gate uint_t szc = page_num_pagesizes() - 1; 6247c478bd9Sstevel@tonic-gate pgcnt_t large = page_get_pagecnt(szc); 6257c478bd9Sstevel@tonic-gate pgcnt_t cnt = 0; 6267c478bd9Sstevel@tonic-gate 6277c478bd9Sstevel@tonic-gate TRACE_2(TR_FAC_VM, TR_PAGE_INIT, 6287c478bd9Sstevel@tonic-gate "add_physmem:pp %p num %lu", pp, num); 6297c478bd9Sstevel@tonic-gate 6307c478bd9Sstevel@tonic-gate /* 6317c478bd9Sstevel@tonic-gate * Arbitrarily limit the max page_get request 6327c478bd9Sstevel@tonic-gate * to 1/2 of the page structs we have. 6337c478bd9Sstevel@tonic-gate */ 6347c478bd9Sstevel@tonic-gate total_pages += num; 6357c478bd9Sstevel@tonic-gate set_max_page_get(total_pages); 6367c478bd9Sstevel@tonic-gate 6377c478bd9Sstevel@tonic-gate /* 6387c478bd9Sstevel@tonic-gate * The physical space for the pages array 6397c478bd9Sstevel@tonic-gate * representing ram pages has already been 6407c478bd9Sstevel@tonic-gate * allocated. Here we initialize each lock 6417c478bd9Sstevel@tonic-gate * in the page structure, and put each on 6427c478bd9Sstevel@tonic-gate * the free list 6437c478bd9Sstevel@tonic-gate */ 644*affbd3ccSkchow for (; num; pp++, pnum++, num--) { 6457c478bd9Sstevel@tonic-gate 6467c478bd9Sstevel@tonic-gate /* 6477c478bd9Sstevel@tonic-gate * this needs to fill in the page number 6487c478bd9Sstevel@tonic-gate * and do any other arch specific initialization 6497c478bd9Sstevel@tonic-gate */ 6507c478bd9Sstevel@tonic-gate add_physmem_cb(pp, pnum); 6517c478bd9Sstevel@tonic-gate 6527c478bd9Sstevel@tonic-gate /* 6537c478bd9Sstevel@tonic-gate * Initialize the page lock as unlocked, since nobody 6547c478bd9Sstevel@tonic-gate * can see or access this page yet. 6557c478bd9Sstevel@tonic-gate */ 6567c478bd9Sstevel@tonic-gate pp->p_selock = 0; 6577c478bd9Sstevel@tonic-gate 6587c478bd9Sstevel@tonic-gate /* 6597c478bd9Sstevel@tonic-gate * Initialize IO lock 6607c478bd9Sstevel@tonic-gate */ 6617c478bd9Sstevel@tonic-gate page_iolock_init(pp); 6627c478bd9Sstevel@tonic-gate 6637c478bd9Sstevel@tonic-gate /* 6647c478bd9Sstevel@tonic-gate * initialize other fields in the page_t 6657c478bd9Sstevel@tonic-gate */ 6667c478bd9Sstevel@tonic-gate PP_SETFREE(pp); 6677c478bd9Sstevel@tonic-gate page_clr_all_props(pp); 6687c478bd9Sstevel@tonic-gate PP_SETAGED(pp); 6697c478bd9Sstevel@tonic-gate pp->p_offset = (u_offset_t)-1; 6707c478bd9Sstevel@tonic-gate pp->p_next = pp; 6717c478bd9Sstevel@tonic-gate pp->p_prev = pp; 6727c478bd9Sstevel@tonic-gate 6737c478bd9Sstevel@tonic-gate /* 6747c478bd9Sstevel@tonic-gate * Simple case: System doesn't support large pages. 6757c478bd9Sstevel@tonic-gate */ 6767c478bd9Sstevel@tonic-gate if (szc == 0) { 6777c478bd9Sstevel@tonic-gate pp->p_szc = 0; 6787c478bd9Sstevel@tonic-gate page_free_at_startup(pp); 6797c478bd9Sstevel@tonic-gate continue; 6807c478bd9Sstevel@tonic-gate } 6817c478bd9Sstevel@tonic-gate 6827c478bd9Sstevel@tonic-gate /* 6837c478bd9Sstevel@tonic-gate * Handle unaligned pages, we collect them up onto 6847c478bd9Sstevel@tonic-gate * the root page until we have a full large page. 6857c478bd9Sstevel@tonic-gate */ 6867c478bd9Sstevel@tonic-gate if (!IS_P2ALIGNED(pnum, large)) { 6877c478bd9Sstevel@tonic-gate 6887c478bd9Sstevel@tonic-gate /* 6897c478bd9Sstevel@tonic-gate * If not in a large page, 6907c478bd9Sstevel@tonic-gate * just free as small page. 6917c478bd9Sstevel@tonic-gate */ 6927c478bd9Sstevel@tonic-gate if (root == NULL) { 6937c478bd9Sstevel@tonic-gate pp->p_szc = 0; 6947c478bd9Sstevel@tonic-gate page_free_at_startup(pp); 6957c478bd9Sstevel@tonic-gate continue; 6967c478bd9Sstevel@tonic-gate } 6977c478bd9Sstevel@tonic-gate 6987c478bd9Sstevel@tonic-gate /* 6997c478bd9Sstevel@tonic-gate * Link a constituent page into the large page. 7007c478bd9Sstevel@tonic-gate */ 7017c478bd9Sstevel@tonic-gate pp->p_szc = szc; 7027c478bd9Sstevel@tonic-gate page_list_concat(&root, &pp); 7037c478bd9Sstevel@tonic-gate 7047c478bd9Sstevel@tonic-gate /* 7057c478bd9Sstevel@tonic-gate * When large page is fully formed, free it. 7067c478bd9Sstevel@tonic-gate */ 7077c478bd9Sstevel@tonic-gate if (++cnt == large) { 7087c478bd9Sstevel@tonic-gate page_free_large_ctr(cnt); 7097c478bd9Sstevel@tonic-gate page_list_add_pages(root, PG_LIST_ISINIT); 7107c478bd9Sstevel@tonic-gate root = NULL; 7117c478bd9Sstevel@tonic-gate cnt = 0; 7127c478bd9Sstevel@tonic-gate } 7137c478bd9Sstevel@tonic-gate continue; 7147c478bd9Sstevel@tonic-gate } 7157c478bd9Sstevel@tonic-gate 7167c478bd9Sstevel@tonic-gate /* 7177c478bd9Sstevel@tonic-gate * At this point we have a page number which 7187c478bd9Sstevel@tonic-gate * is aligned. We assert that we aren't already 7197c478bd9Sstevel@tonic-gate * in a different large page. 7207c478bd9Sstevel@tonic-gate */ 7217c478bd9Sstevel@tonic-gate ASSERT(IS_P2ALIGNED(pnum, large)); 7227c478bd9Sstevel@tonic-gate ASSERT(root == NULL && cnt == 0); 7237c478bd9Sstevel@tonic-gate 7247c478bd9Sstevel@tonic-gate /* 7257c478bd9Sstevel@tonic-gate * If insufficient number of pages left to form 7267c478bd9Sstevel@tonic-gate * a large page, just free the small page. 7277c478bd9Sstevel@tonic-gate */ 7287c478bd9Sstevel@tonic-gate if (num < large) { 7297c478bd9Sstevel@tonic-gate pp->p_szc = 0; 7307c478bd9Sstevel@tonic-gate page_free_at_startup(pp); 7317c478bd9Sstevel@tonic-gate continue; 7327c478bd9Sstevel@tonic-gate } 7337c478bd9Sstevel@tonic-gate 7347c478bd9Sstevel@tonic-gate /* 7357c478bd9Sstevel@tonic-gate * Otherwise start a new large page. 7367c478bd9Sstevel@tonic-gate */ 7377c478bd9Sstevel@tonic-gate pp->p_szc = szc; 7387c478bd9Sstevel@tonic-gate cnt++; 7397c478bd9Sstevel@tonic-gate root = pp; 7407c478bd9Sstevel@tonic-gate } 7417c478bd9Sstevel@tonic-gate ASSERT(root == NULL && cnt == 0); 7427c478bd9Sstevel@tonic-gate } 7437c478bd9Sstevel@tonic-gate 7447c478bd9Sstevel@tonic-gate /* 7457c478bd9Sstevel@tonic-gate * Find a page representing the specified [vp, offset]. 7467c478bd9Sstevel@tonic-gate * If we find the page but it is intransit coming in, 7477c478bd9Sstevel@tonic-gate * it will have an "exclusive" lock and we wait for 7487c478bd9Sstevel@tonic-gate * the i/o to complete. A page found on the free list 7497c478bd9Sstevel@tonic-gate * is always reclaimed and then locked. On success, the page 7507c478bd9Sstevel@tonic-gate * is locked, its data is valid and it isn't on the free 7517c478bd9Sstevel@tonic-gate * list, while a NULL is returned if the page doesn't exist. 7527c478bd9Sstevel@tonic-gate */ 7537c478bd9Sstevel@tonic-gate page_t * 7547c478bd9Sstevel@tonic-gate page_lookup(vnode_t *vp, u_offset_t off, se_t se) 7557c478bd9Sstevel@tonic-gate { 7567c478bd9Sstevel@tonic-gate return (page_lookup_create(vp, off, se, NULL, NULL, 0)); 7577c478bd9Sstevel@tonic-gate } 7587c478bd9Sstevel@tonic-gate 7597c478bd9Sstevel@tonic-gate /* 7607c478bd9Sstevel@tonic-gate * Find a page representing the specified [vp, offset]. 7617c478bd9Sstevel@tonic-gate * We either return the one we found or, if passed in, 7627c478bd9Sstevel@tonic-gate * create one with identity of [vp, offset] of the 7637c478bd9Sstevel@tonic-gate * pre-allocated page. If we find exsisting page but it is 7647c478bd9Sstevel@tonic-gate * intransit coming in, it will have an "exclusive" lock 7657c478bd9Sstevel@tonic-gate * and we wait for the i/o to complete. A page found on 7667c478bd9Sstevel@tonic-gate * the free list is always reclaimed and then locked. 7677c478bd9Sstevel@tonic-gate * On success, the page is locked, its data is valid and 7687c478bd9Sstevel@tonic-gate * it isn't on the free list, while a NULL is returned 7697c478bd9Sstevel@tonic-gate * if the page doesn't exist and newpp is NULL; 7707c478bd9Sstevel@tonic-gate */ 7717c478bd9Sstevel@tonic-gate page_t * 7727c478bd9Sstevel@tonic-gate page_lookup_create( 7737c478bd9Sstevel@tonic-gate vnode_t *vp, 7747c478bd9Sstevel@tonic-gate u_offset_t off, 7757c478bd9Sstevel@tonic-gate se_t se, 7767c478bd9Sstevel@tonic-gate page_t *newpp, 7777c478bd9Sstevel@tonic-gate spgcnt_t *nrelocp, 7787c478bd9Sstevel@tonic-gate int flags) 7797c478bd9Sstevel@tonic-gate { 7807c478bd9Sstevel@tonic-gate page_t *pp; 7817c478bd9Sstevel@tonic-gate kmutex_t *phm; 7827c478bd9Sstevel@tonic-gate ulong_t index; 7837c478bd9Sstevel@tonic-gate uint_t hash_locked; 7847c478bd9Sstevel@tonic-gate uint_t es; 7857c478bd9Sstevel@tonic-gate 7867c478bd9Sstevel@tonic-gate ASSERT(MUTEX_NOT_HELD(page_vnode_mutex(vp))); 7877c478bd9Sstevel@tonic-gate VM_STAT_ADD(page_lookup_cnt[0]); 7887c478bd9Sstevel@tonic-gate ASSERT(newpp ? PAGE_EXCL(newpp) : 1); 7897c478bd9Sstevel@tonic-gate 7907c478bd9Sstevel@tonic-gate /* 7917c478bd9Sstevel@tonic-gate * Acquire the appropriate page hash lock since 7927c478bd9Sstevel@tonic-gate * we have to search the hash list. Pages that 7937c478bd9Sstevel@tonic-gate * hash to this list can't change identity while 7947c478bd9Sstevel@tonic-gate * this lock is held. 7957c478bd9Sstevel@tonic-gate */ 7967c478bd9Sstevel@tonic-gate hash_locked = 0; 7977c478bd9Sstevel@tonic-gate index = PAGE_HASH_FUNC(vp, off); 7987c478bd9Sstevel@tonic-gate phm = NULL; 7997c478bd9Sstevel@tonic-gate top: 8007c478bd9Sstevel@tonic-gate PAGE_HASH_SEARCH(index, pp, vp, off); 8017c478bd9Sstevel@tonic-gate if (pp != NULL) { 8027c478bd9Sstevel@tonic-gate VM_STAT_ADD(page_lookup_cnt[1]); 8037c478bd9Sstevel@tonic-gate es = (newpp != NULL) ? 1 : 0; 8047c478bd9Sstevel@tonic-gate es |= flags; 8057c478bd9Sstevel@tonic-gate if (!hash_locked) { 8067c478bd9Sstevel@tonic-gate VM_STAT_ADD(page_lookup_cnt[2]); 8077c478bd9Sstevel@tonic-gate if (!page_try_reclaim_lock(pp, se, es)) { 8087c478bd9Sstevel@tonic-gate /* 8097c478bd9Sstevel@tonic-gate * On a miss, acquire the phm. Then 8107c478bd9Sstevel@tonic-gate * next time, page_lock() will be called, 8117c478bd9Sstevel@tonic-gate * causing a wait if the page is busy. 8127c478bd9Sstevel@tonic-gate * just looping with page_trylock() would 8137c478bd9Sstevel@tonic-gate * get pretty boring. 8147c478bd9Sstevel@tonic-gate */ 8157c478bd9Sstevel@tonic-gate VM_STAT_ADD(page_lookup_cnt[3]); 8167c478bd9Sstevel@tonic-gate phm = PAGE_HASH_MUTEX(index); 8177c478bd9Sstevel@tonic-gate mutex_enter(phm); 8187c478bd9Sstevel@tonic-gate hash_locked = 1; 8197c478bd9Sstevel@tonic-gate goto top; 8207c478bd9Sstevel@tonic-gate } 8217c478bd9Sstevel@tonic-gate } else { 8227c478bd9Sstevel@tonic-gate VM_STAT_ADD(page_lookup_cnt[4]); 8237c478bd9Sstevel@tonic-gate if (!page_lock_es(pp, se, phm, P_RECLAIM, es)) { 8247c478bd9Sstevel@tonic-gate VM_STAT_ADD(page_lookup_cnt[5]); 8257c478bd9Sstevel@tonic-gate goto top; 8267c478bd9Sstevel@tonic-gate } 8277c478bd9Sstevel@tonic-gate } 8287c478bd9Sstevel@tonic-gate 8297c478bd9Sstevel@tonic-gate /* 8307c478bd9Sstevel@tonic-gate * Since `pp' is locked it can not change identity now. 8317c478bd9Sstevel@tonic-gate * Reconfirm we locked the correct page. 8327c478bd9Sstevel@tonic-gate * 8337c478bd9Sstevel@tonic-gate * Both the p_vnode and p_offset *must* be cast volatile 8347c478bd9Sstevel@tonic-gate * to force a reload of their values: The PAGE_HASH_SEARCH 8357c478bd9Sstevel@tonic-gate * macro will have stuffed p_vnode and p_offset into 8367c478bd9Sstevel@tonic-gate * registers before calling page_trylock(); another thread, 8377c478bd9Sstevel@tonic-gate * actually holding the hash lock, could have changed the 8387c478bd9Sstevel@tonic-gate * page's identity in memory, but our registers would not 8397c478bd9Sstevel@tonic-gate * be changed, fooling the reconfirmation. If the hash 8407c478bd9Sstevel@tonic-gate * lock was held during the search, the casting would 8417c478bd9Sstevel@tonic-gate * not be needed. 8427c478bd9Sstevel@tonic-gate */ 8437c478bd9Sstevel@tonic-gate VM_STAT_ADD(page_lookup_cnt[6]); 8447c478bd9Sstevel@tonic-gate if (((volatile struct vnode *)(pp->p_vnode) != vp) || 8457c478bd9Sstevel@tonic-gate ((volatile u_offset_t)(pp->p_offset) != off)) { 8467c478bd9Sstevel@tonic-gate VM_STAT_ADD(page_lookup_cnt[7]); 8477c478bd9Sstevel@tonic-gate if (hash_locked) { 8487c478bd9Sstevel@tonic-gate panic("page_lookup_create: lost page %p", 8497c478bd9Sstevel@tonic-gate (void *)pp); 8507c478bd9Sstevel@tonic-gate /*NOTREACHED*/ 8517c478bd9Sstevel@tonic-gate } 8527c478bd9Sstevel@tonic-gate page_unlock(pp); 8537c478bd9Sstevel@tonic-gate phm = PAGE_HASH_MUTEX(index); 8547c478bd9Sstevel@tonic-gate mutex_enter(phm); 8557c478bd9Sstevel@tonic-gate hash_locked = 1; 8567c478bd9Sstevel@tonic-gate goto top; 8577c478bd9Sstevel@tonic-gate } 8587c478bd9Sstevel@tonic-gate 8597c478bd9Sstevel@tonic-gate /* 8607c478bd9Sstevel@tonic-gate * If page_trylock() was called, then pp may still be on 8617c478bd9Sstevel@tonic-gate * the cachelist (can't be on the free list, it would not 8627c478bd9Sstevel@tonic-gate * have been found in the search). If it is on the 8637c478bd9Sstevel@tonic-gate * cachelist it must be pulled now. To pull the page from 8647c478bd9Sstevel@tonic-gate * the cachelist, it must be exclusively locked. 8657c478bd9Sstevel@tonic-gate * 8667c478bd9Sstevel@tonic-gate * The other big difference between page_trylock() and 8677c478bd9Sstevel@tonic-gate * page_lock(), is that page_lock() will pull the 8687c478bd9Sstevel@tonic-gate * page from whatever free list (the cache list in this 8697c478bd9Sstevel@tonic-gate * case) the page is on. If page_trylock() was used 8707c478bd9Sstevel@tonic-gate * above, then we have to do the reclaim ourselves. 8717c478bd9Sstevel@tonic-gate */ 8727c478bd9Sstevel@tonic-gate if ((!hash_locked) && (PP_ISFREE(pp))) { 8737c478bd9Sstevel@tonic-gate ASSERT(PP_ISAGED(pp) == 0); 8747c478bd9Sstevel@tonic-gate VM_STAT_ADD(page_lookup_cnt[8]); 8757c478bd9Sstevel@tonic-gate 8767c478bd9Sstevel@tonic-gate /* 8777c478bd9Sstevel@tonic-gate * page_relcaim will insure that we 8787c478bd9Sstevel@tonic-gate * have this page exclusively 8797c478bd9Sstevel@tonic-gate */ 8807c478bd9Sstevel@tonic-gate 8817c478bd9Sstevel@tonic-gate if (!page_reclaim(pp, NULL)) { 8827c478bd9Sstevel@tonic-gate /* 8837c478bd9Sstevel@tonic-gate * Page_reclaim dropped whatever lock 8847c478bd9Sstevel@tonic-gate * we held. 8857c478bd9Sstevel@tonic-gate */ 8867c478bd9Sstevel@tonic-gate VM_STAT_ADD(page_lookup_cnt[9]); 8877c478bd9Sstevel@tonic-gate phm = PAGE_HASH_MUTEX(index); 8887c478bd9Sstevel@tonic-gate mutex_enter(phm); 8897c478bd9Sstevel@tonic-gate hash_locked = 1; 8907c478bd9Sstevel@tonic-gate goto top; 8917c478bd9Sstevel@tonic-gate } else if (se == SE_SHARED && newpp == NULL) { 8927c478bd9Sstevel@tonic-gate VM_STAT_ADD(page_lookup_cnt[10]); 8937c478bd9Sstevel@tonic-gate page_downgrade(pp); 8947c478bd9Sstevel@tonic-gate } 8957c478bd9Sstevel@tonic-gate } 8967c478bd9Sstevel@tonic-gate 8977c478bd9Sstevel@tonic-gate if (hash_locked) { 8987c478bd9Sstevel@tonic-gate mutex_exit(phm); 8997c478bd9Sstevel@tonic-gate } 9007c478bd9Sstevel@tonic-gate 9017c478bd9Sstevel@tonic-gate if (newpp != NULL && pp->p_szc < newpp->p_szc && 9027c478bd9Sstevel@tonic-gate PAGE_EXCL(pp) && nrelocp != NULL) { 9037c478bd9Sstevel@tonic-gate ASSERT(nrelocp != NULL); 9047c478bd9Sstevel@tonic-gate (void) page_relocate(&pp, &newpp, 1, 1, nrelocp, 9057c478bd9Sstevel@tonic-gate NULL); 9067c478bd9Sstevel@tonic-gate if (*nrelocp > 0) { 9077c478bd9Sstevel@tonic-gate VM_STAT_COND_ADD(*nrelocp == 1, 9087c478bd9Sstevel@tonic-gate page_lookup_cnt[11]); 9097c478bd9Sstevel@tonic-gate VM_STAT_COND_ADD(*nrelocp > 1, 9107c478bd9Sstevel@tonic-gate page_lookup_cnt[12]); 9117c478bd9Sstevel@tonic-gate pp = newpp; 9127c478bd9Sstevel@tonic-gate se = SE_EXCL; 9137c478bd9Sstevel@tonic-gate } else { 9147c478bd9Sstevel@tonic-gate if (se == SE_SHARED) { 9157c478bd9Sstevel@tonic-gate page_downgrade(pp); 9167c478bd9Sstevel@tonic-gate } 9177c478bd9Sstevel@tonic-gate VM_STAT_ADD(page_lookup_cnt[13]); 9187c478bd9Sstevel@tonic-gate } 9197c478bd9Sstevel@tonic-gate } else if (newpp != NULL && nrelocp != NULL) { 9207c478bd9Sstevel@tonic-gate if (PAGE_EXCL(pp) && se == SE_SHARED) { 9217c478bd9Sstevel@tonic-gate page_downgrade(pp); 9227c478bd9Sstevel@tonic-gate } 9237c478bd9Sstevel@tonic-gate VM_STAT_COND_ADD(pp->p_szc < newpp->p_szc, 9247c478bd9Sstevel@tonic-gate page_lookup_cnt[14]); 9257c478bd9Sstevel@tonic-gate VM_STAT_COND_ADD(pp->p_szc == newpp->p_szc, 9267c478bd9Sstevel@tonic-gate page_lookup_cnt[15]); 9277c478bd9Sstevel@tonic-gate VM_STAT_COND_ADD(pp->p_szc > newpp->p_szc, 9287c478bd9Sstevel@tonic-gate page_lookup_cnt[16]); 9297c478bd9Sstevel@tonic-gate } else if (newpp != NULL && PAGE_EXCL(pp)) { 9307c478bd9Sstevel@tonic-gate se = SE_EXCL; 9317c478bd9Sstevel@tonic-gate } 9327c478bd9Sstevel@tonic-gate } else if (!hash_locked) { 9337c478bd9Sstevel@tonic-gate VM_STAT_ADD(page_lookup_cnt[17]); 9347c478bd9Sstevel@tonic-gate phm = PAGE_HASH_MUTEX(index); 9357c478bd9Sstevel@tonic-gate mutex_enter(phm); 9367c478bd9Sstevel@tonic-gate hash_locked = 1; 9377c478bd9Sstevel@tonic-gate goto top; 9387c478bd9Sstevel@tonic-gate } else if (newpp != NULL) { 9397c478bd9Sstevel@tonic-gate /* 9407c478bd9Sstevel@tonic-gate * If we have a preallocated page then 9417c478bd9Sstevel@tonic-gate * insert it now and basically behave like 9427c478bd9Sstevel@tonic-gate * page_create. 9437c478bd9Sstevel@tonic-gate */ 9447c478bd9Sstevel@tonic-gate VM_STAT_ADD(page_lookup_cnt[18]); 9457c478bd9Sstevel@tonic-gate /* 9467c478bd9Sstevel@tonic-gate * Since we hold the page hash mutex and 9477c478bd9Sstevel@tonic-gate * just searched for this page, page_hashin 9487c478bd9Sstevel@tonic-gate * had better not fail. If it does, that 9497c478bd9Sstevel@tonic-gate * means some thread did not follow the 9507c478bd9Sstevel@tonic-gate * page hash mutex rules. Panic now and 9517c478bd9Sstevel@tonic-gate * get it over with. As usual, go down 9527c478bd9Sstevel@tonic-gate * holding all the locks. 9537c478bd9Sstevel@tonic-gate */ 9547c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(phm)); 9557c478bd9Sstevel@tonic-gate if (!page_hashin(newpp, vp, off, phm)) { 9567c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(phm)); 9577c478bd9Sstevel@tonic-gate panic("page_lookup_create: hashin failed %p %p %llx %p", 9587c478bd9Sstevel@tonic-gate (void *)newpp, (void *)vp, off, (void *)phm); 9597c478bd9Sstevel@tonic-gate /*NOTREACHED*/ 9607c478bd9Sstevel@tonic-gate } 9617c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(phm)); 9627c478bd9Sstevel@tonic-gate mutex_exit(phm); 9637c478bd9Sstevel@tonic-gate phm = NULL; 9647c478bd9Sstevel@tonic-gate page_set_props(newpp, P_REF); 9657c478bd9Sstevel@tonic-gate page_io_lock(newpp); 9667c478bd9Sstevel@tonic-gate pp = newpp; 9677c478bd9Sstevel@tonic-gate se = SE_EXCL; 9687c478bd9Sstevel@tonic-gate } else { 9697c478bd9Sstevel@tonic-gate VM_STAT_ADD(page_lookup_cnt[19]); 9707c478bd9Sstevel@tonic-gate mutex_exit(phm); 9717c478bd9Sstevel@tonic-gate } 9727c478bd9Sstevel@tonic-gate 9737c478bd9Sstevel@tonic-gate ASSERT(pp ? PAGE_LOCKED_SE(pp, se) : 1); 9747c478bd9Sstevel@tonic-gate 9757c478bd9Sstevel@tonic-gate ASSERT(pp ? ((PP_ISFREE(pp) == 0) && (PP_ISAGED(pp) == 0)) : 1); 9767c478bd9Sstevel@tonic-gate 9777c478bd9Sstevel@tonic-gate return (pp); 9787c478bd9Sstevel@tonic-gate } 9797c478bd9Sstevel@tonic-gate 9807c478bd9Sstevel@tonic-gate /* 9817c478bd9Sstevel@tonic-gate * Search the hash list for the page representing the 9827c478bd9Sstevel@tonic-gate * specified [vp, offset] and return it locked. Skip 9837c478bd9Sstevel@tonic-gate * free pages and pages that cannot be locked as requested. 9847c478bd9Sstevel@tonic-gate * Used while attempting to kluster pages. 9857c478bd9Sstevel@tonic-gate */ 9867c478bd9Sstevel@tonic-gate page_t * 9877c478bd9Sstevel@tonic-gate page_lookup_nowait(vnode_t *vp, u_offset_t off, se_t se) 9887c478bd9Sstevel@tonic-gate { 9897c478bd9Sstevel@tonic-gate page_t *pp; 9907c478bd9Sstevel@tonic-gate kmutex_t *phm; 9917c478bd9Sstevel@tonic-gate ulong_t index; 9927c478bd9Sstevel@tonic-gate uint_t locked; 9937c478bd9Sstevel@tonic-gate 9947c478bd9Sstevel@tonic-gate ASSERT(MUTEX_NOT_HELD(page_vnode_mutex(vp))); 9957c478bd9Sstevel@tonic-gate VM_STAT_ADD(page_lookup_nowait_cnt[0]); 9967c478bd9Sstevel@tonic-gate 9977c478bd9Sstevel@tonic-gate index = PAGE_HASH_FUNC(vp, off); 9987c478bd9Sstevel@tonic-gate PAGE_HASH_SEARCH(index, pp, vp, off); 9997c478bd9Sstevel@tonic-gate locked = 0; 10007c478bd9Sstevel@tonic-gate if (pp == NULL) { 10017c478bd9Sstevel@tonic-gate top: 10027c478bd9Sstevel@tonic-gate VM_STAT_ADD(page_lookup_nowait_cnt[1]); 10037c478bd9Sstevel@tonic-gate locked = 1; 10047c478bd9Sstevel@tonic-gate phm = PAGE_HASH_MUTEX(index); 10057c478bd9Sstevel@tonic-gate mutex_enter(phm); 10067c478bd9Sstevel@tonic-gate PAGE_HASH_SEARCH(index, pp, vp, off); 10077c478bd9Sstevel@tonic-gate } 10087c478bd9Sstevel@tonic-gate 10097c478bd9Sstevel@tonic-gate if (pp == NULL || PP_ISFREE(pp)) { 10107c478bd9Sstevel@tonic-gate VM_STAT_ADD(page_lookup_nowait_cnt[2]); 10117c478bd9Sstevel@tonic-gate pp = NULL; 10127c478bd9Sstevel@tonic-gate } else { 10137c478bd9Sstevel@tonic-gate if (!page_trylock(pp, se)) { 10147c478bd9Sstevel@tonic-gate VM_STAT_ADD(page_lookup_nowait_cnt[3]); 10157c478bd9Sstevel@tonic-gate pp = NULL; 10167c478bd9Sstevel@tonic-gate } else { 10177c478bd9Sstevel@tonic-gate VM_STAT_ADD(page_lookup_nowait_cnt[4]); 10187c478bd9Sstevel@tonic-gate /* 10197c478bd9Sstevel@tonic-gate * See the comment in page_lookup() 10207c478bd9Sstevel@tonic-gate */ 10217c478bd9Sstevel@tonic-gate if (((volatile struct vnode *)(pp->p_vnode) != vp) || 10227c478bd9Sstevel@tonic-gate ((u_offset_t)(pp->p_offset) != off)) { 10237c478bd9Sstevel@tonic-gate VM_STAT_ADD(page_lookup_nowait_cnt[5]); 10247c478bd9Sstevel@tonic-gate if (locked) { 10257c478bd9Sstevel@tonic-gate panic("page_lookup_nowait %p", 10267c478bd9Sstevel@tonic-gate (void *)pp); 10277c478bd9Sstevel@tonic-gate /*NOTREACHED*/ 10287c478bd9Sstevel@tonic-gate } 10297c478bd9Sstevel@tonic-gate page_unlock(pp); 10307c478bd9Sstevel@tonic-gate goto top; 10317c478bd9Sstevel@tonic-gate } 10327c478bd9Sstevel@tonic-gate if (PP_ISFREE(pp)) { 10337c478bd9Sstevel@tonic-gate VM_STAT_ADD(page_lookup_nowait_cnt[6]); 10347c478bd9Sstevel@tonic-gate page_unlock(pp); 10357c478bd9Sstevel@tonic-gate pp = NULL; 10367c478bd9Sstevel@tonic-gate } 10377c478bd9Sstevel@tonic-gate } 10387c478bd9Sstevel@tonic-gate } 10397c478bd9Sstevel@tonic-gate if (locked) { 10407c478bd9Sstevel@tonic-gate VM_STAT_ADD(page_lookup_nowait_cnt[7]); 10417c478bd9Sstevel@tonic-gate mutex_exit(phm); 10427c478bd9Sstevel@tonic-gate } 10437c478bd9Sstevel@tonic-gate 10447c478bd9Sstevel@tonic-gate ASSERT(pp ? PAGE_LOCKED_SE(pp, se) : 1); 10457c478bd9Sstevel@tonic-gate 10467c478bd9Sstevel@tonic-gate return (pp); 10477c478bd9Sstevel@tonic-gate } 10487c478bd9Sstevel@tonic-gate 10497c478bd9Sstevel@tonic-gate /* 10507c478bd9Sstevel@tonic-gate * Search the hash list for a page with the specified [vp, off] 10517c478bd9Sstevel@tonic-gate * that is known to exist and is already locked. This routine 10527c478bd9Sstevel@tonic-gate * is typically used by segment SOFTUNLOCK routines. 10537c478bd9Sstevel@tonic-gate */ 10547c478bd9Sstevel@tonic-gate page_t * 10557c478bd9Sstevel@tonic-gate page_find(vnode_t *vp, u_offset_t off) 10567c478bd9Sstevel@tonic-gate { 10577c478bd9Sstevel@tonic-gate page_t *pp; 10587c478bd9Sstevel@tonic-gate kmutex_t *phm; 10597c478bd9Sstevel@tonic-gate ulong_t index; 10607c478bd9Sstevel@tonic-gate 10617c478bd9Sstevel@tonic-gate ASSERT(MUTEX_NOT_HELD(page_vnode_mutex(vp))); 10627c478bd9Sstevel@tonic-gate VM_STAT_ADD(page_find_cnt); 10637c478bd9Sstevel@tonic-gate 10647c478bd9Sstevel@tonic-gate index = PAGE_HASH_FUNC(vp, off); 10657c478bd9Sstevel@tonic-gate phm = PAGE_HASH_MUTEX(index); 10667c478bd9Sstevel@tonic-gate 10677c478bd9Sstevel@tonic-gate mutex_enter(phm); 10687c478bd9Sstevel@tonic-gate PAGE_HASH_SEARCH(index, pp, vp, off); 10697c478bd9Sstevel@tonic-gate mutex_exit(phm); 10707c478bd9Sstevel@tonic-gate 10717c478bd9Sstevel@tonic-gate ASSERT(pp != NULL); 10727c478bd9Sstevel@tonic-gate ASSERT(PAGE_LOCKED(pp) || panicstr); 10737c478bd9Sstevel@tonic-gate return (pp); 10747c478bd9Sstevel@tonic-gate } 10757c478bd9Sstevel@tonic-gate 10767c478bd9Sstevel@tonic-gate /* 10777c478bd9Sstevel@tonic-gate * Determine whether a page with the specified [vp, off] 10787c478bd9Sstevel@tonic-gate * currently exists in the system. Obviously this should 10797c478bd9Sstevel@tonic-gate * only be considered as a hint since nothing prevents the 10807c478bd9Sstevel@tonic-gate * page from disappearing or appearing immediately after 10817c478bd9Sstevel@tonic-gate * the return from this routine. Subsequently, we don't 10827c478bd9Sstevel@tonic-gate * even bother to lock the list. 10837c478bd9Sstevel@tonic-gate */ 10847c478bd9Sstevel@tonic-gate page_t * 10857c478bd9Sstevel@tonic-gate page_exists(vnode_t *vp, u_offset_t off) 10867c478bd9Sstevel@tonic-gate { 10877c478bd9Sstevel@tonic-gate page_t *pp; 10887c478bd9Sstevel@tonic-gate ulong_t index; 10897c478bd9Sstevel@tonic-gate 10907c478bd9Sstevel@tonic-gate ASSERT(MUTEX_NOT_HELD(page_vnode_mutex(vp))); 10917c478bd9Sstevel@tonic-gate VM_STAT_ADD(page_exists_cnt); 10927c478bd9Sstevel@tonic-gate 10937c478bd9Sstevel@tonic-gate index = PAGE_HASH_FUNC(vp, off); 10947c478bd9Sstevel@tonic-gate PAGE_HASH_SEARCH(index, pp, vp, off); 10957c478bd9Sstevel@tonic-gate 10967c478bd9Sstevel@tonic-gate return (pp); 10977c478bd9Sstevel@tonic-gate } 10987c478bd9Sstevel@tonic-gate 10997c478bd9Sstevel@tonic-gate /* 11007c478bd9Sstevel@tonic-gate * Determine if physically contiguous pages exist for [vp, off] - [vp, off + 11017c478bd9Sstevel@tonic-gate * page_size(szc)) range. if they exist and ppa is not NULL fill ppa array 11027c478bd9Sstevel@tonic-gate * with these pages locked SHARED. If necessary reclaim pages from 11037c478bd9Sstevel@tonic-gate * freelist. Return 1 if contiguous pages exist and 0 otherwise. 11047c478bd9Sstevel@tonic-gate * 11057c478bd9Sstevel@tonic-gate * If we fail to lock pages still return 1 if pages exist and contiguous. 11067c478bd9Sstevel@tonic-gate * But in this case return value is just a hint. ppa array won't be filled. 11077c478bd9Sstevel@tonic-gate * Caller should initialize ppa[0] as NULL to distinguish return value. 11087c478bd9Sstevel@tonic-gate * 11097c478bd9Sstevel@tonic-gate * Returns 0 if pages don't exist or not physically contiguous. 11107c478bd9Sstevel@tonic-gate * 11117c478bd9Sstevel@tonic-gate * This routine doesn't work for anonymous(swapfs) pages. 11127c478bd9Sstevel@tonic-gate */ 11137c478bd9Sstevel@tonic-gate int 11147c478bd9Sstevel@tonic-gate page_exists_physcontig(vnode_t *vp, u_offset_t off, uint_t szc, page_t *ppa[]) 11157c478bd9Sstevel@tonic-gate { 11167c478bd9Sstevel@tonic-gate pgcnt_t pages; 11177c478bd9Sstevel@tonic-gate pfn_t pfn; 11187c478bd9Sstevel@tonic-gate page_t *rootpp; 11197c478bd9Sstevel@tonic-gate pgcnt_t i; 11207c478bd9Sstevel@tonic-gate pgcnt_t j; 11217c478bd9Sstevel@tonic-gate u_offset_t save_off = off; 11227c478bd9Sstevel@tonic-gate ulong_t index; 11237c478bd9Sstevel@tonic-gate kmutex_t *phm; 11247c478bd9Sstevel@tonic-gate page_t *pp; 11257c478bd9Sstevel@tonic-gate uint_t pszc; 11267c478bd9Sstevel@tonic-gate int loopcnt = 0; 11277c478bd9Sstevel@tonic-gate 11287c478bd9Sstevel@tonic-gate ASSERT(szc != 0); 11297c478bd9Sstevel@tonic-gate ASSERT(vp != NULL); 11307c478bd9Sstevel@tonic-gate ASSERT(!IS_SWAPFSVP(vp)); 11317c478bd9Sstevel@tonic-gate ASSERT(vp != &kvp); 11327c478bd9Sstevel@tonic-gate 11337c478bd9Sstevel@tonic-gate again: 11347c478bd9Sstevel@tonic-gate if (++loopcnt > 3) { 11357c478bd9Sstevel@tonic-gate VM_STAT_ADD(page_exphcontg[0]); 11367c478bd9Sstevel@tonic-gate return (0); 11377c478bd9Sstevel@tonic-gate } 11387c478bd9Sstevel@tonic-gate 11397c478bd9Sstevel@tonic-gate index = PAGE_HASH_FUNC(vp, off); 11407c478bd9Sstevel@tonic-gate phm = PAGE_HASH_MUTEX(index); 11417c478bd9Sstevel@tonic-gate 11427c478bd9Sstevel@tonic-gate mutex_enter(phm); 11437c478bd9Sstevel@tonic-gate PAGE_HASH_SEARCH(index, pp, vp, off); 11447c478bd9Sstevel@tonic-gate mutex_exit(phm); 11457c478bd9Sstevel@tonic-gate 11467c478bd9Sstevel@tonic-gate VM_STAT_ADD(page_exphcontg[1]); 11477c478bd9Sstevel@tonic-gate 11487c478bd9Sstevel@tonic-gate if (pp == NULL) { 11497c478bd9Sstevel@tonic-gate VM_STAT_ADD(page_exphcontg[2]); 11507c478bd9Sstevel@tonic-gate return (0); 11517c478bd9Sstevel@tonic-gate } 11527c478bd9Sstevel@tonic-gate 11537c478bd9Sstevel@tonic-gate pages = page_get_pagecnt(szc); 11547c478bd9Sstevel@tonic-gate rootpp = pp; 11557c478bd9Sstevel@tonic-gate pfn = rootpp->p_pagenum; 11567c478bd9Sstevel@tonic-gate 11577c478bd9Sstevel@tonic-gate if ((pszc = pp->p_szc) >= szc && ppa != NULL) { 11587c478bd9Sstevel@tonic-gate VM_STAT_ADD(page_exphcontg[3]); 11597c478bd9Sstevel@tonic-gate if (!page_trylock(pp, SE_SHARED)) { 11607c478bd9Sstevel@tonic-gate VM_STAT_ADD(page_exphcontg[4]); 11617c478bd9Sstevel@tonic-gate return (1); 11627c478bd9Sstevel@tonic-gate } 11637c478bd9Sstevel@tonic-gate if (pp->p_szc != pszc || pp->p_vnode != vp || 11647c478bd9Sstevel@tonic-gate pp->p_offset != off) { 11657c478bd9Sstevel@tonic-gate VM_STAT_ADD(page_exphcontg[5]); 11667c478bd9Sstevel@tonic-gate page_unlock(pp); 11677c478bd9Sstevel@tonic-gate off = save_off; 11687c478bd9Sstevel@tonic-gate goto again; 11697c478bd9Sstevel@tonic-gate } 11707c478bd9Sstevel@tonic-gate /* 11717c478bd9Sstevel@tonic-gate * szc was non zero and vnode and offset matched after we 11727c478bd9Sstevel@tonic-gate * locked the page it means it can't become free on us. 11737c478bd9Sstevel@tonic-gate */ 11747c478bd9Sstevel@tonic-gate ASSERT(!PP_ISFREE(pp)); 11757c478bd9Sstevel@tonic-gate if (!IS_P2ALIGNED(pfn, pages)) { 11767c478bd9Sstevel@tonic-gate page_unlock(pp); 11777c478bd9Sstevel@tonic-gate return (0); 11787c478bd9Sstevel@tonic-gate } 11797c478bd9Sstevel@tonic-gate ppa[0] = pp; 11807c478bd9Sstevel@tonic-gate pp++; 11817c478bd9Sstevel@tonic-gate off += PAGESIZE; 11827c478bd9Sstevel@tonic-gate pfn++; 11837c478bd9Sstevel@tonic-gate for (i = 1; i < pages; i++, pp++, off += PAGESIZE, pfn++) { 11847c478bd9Sstevel@tonic-gate if (!page_trylock(pp, SE_SHARED)) { 11857c478bd9Sstevel@tonic-gate VM_STAT_ADD(page_exphcontg[6]); 11867c478bd9Sstevel@tonic-gate pp--; 11877c478bd9Sstevel@tonic-gate while (i-- > 0) { 11887c478bd9Sstevel@tonic-gate page_unlock(pp); 11897c478bd9Sstevel@tonic-gate pp--; 11907c478bd9Sstevel@tonic-gate } 11917c478bd9Sstevel@tonic-gate ppa[0] = NULL; 11927c478bd9Sstevel@tonic-gate return (1); 11937c478bd9Sstevel@tonic-gate } 11947c478bd9Sstevel@tonic-gate if (pp->p_szc != pszc) { 11957c478bd9Sstevel@tonic-gate VM_STAT_ADD(page_exphcontg[7]); 11967c478bd9Sstevel@tonic-gate page_unlock(pp); 11977c478bd9Sstevel@tonic-gate pp--; 11987c478bd9Sstevel@tonic-gate while (i-- > 0) { 11997c478bd9Sstevel@tonic-gate page_unlock(pp); 12007c478bd9Sstevel@tonic-gate pp--; 12017c478bd9Sstevel@tonic-gate } 12027c478bd9Sstevel@tonic-gate ppa[0] = NULL; 12037c478bd9Sstevel@tonic-gate off = save_off; 12047c478bd9Sstevel@tonic-gate goto again; 12057c478bd9Sstevel@tonic-gate } 12067c478bd9Sstevel@tonic-gate /* 12077c478bd9Sstevel@tonic-gate * szc the same as for previous already locked pages 12087c478bd9Sstevel@tonic-gate * with right identity. Since this page had correct 12097c478bd9Sstevel@tonic-gate * szc after we locked it can't get freed or destroyed 12107c478bd9Sstevel@tonic-gate * and therefore must have the expected identity. 12117c478bd9Sstevel@tonic-gate */ 12127c478bd9Sstevel@tonic-gate ASSERT(!PP_ISFREE(pp)); 12137c478bd9Sstevel@tonic-gate if (pp->p_vnode != vp || 12147c478bd9Sstevel@tonic-gate pp->p_offset != off) { 12157c478bd9Sstevel@tonic-gate panic("page_exists_physcontig: " 12167c478bd9Sstevel@tonic-gate "large page identity doesn't match"); 12177c478bd9Sstevel@tonic-gate } 12187c478bd9Sstevel@tonic-gate ppa[i] = pp; 12197c478bd9Sstevel@tonic-gate ASSERT(pp->p_pagenum == pfn); 12207c478bd9Sstevel@tonic-gate } 12217c478bd9Sstevel@tonic-gate VM_STAT_ADD(page_exphcontg[8]); 12227c478bd9Sstevel@tonic-gate ppa[pages] = NULL; 12237c478bd9Sstevel@tonic-gate return (1); 12247c478bd9Sstevel@tonic-gate } else if (pszc >= szc) { 12257c478bd9Sstevel@tonic-gate VM_STAT_ADD(page_exphcontg[9]); 12267c478bd9Sstevel@tonic-gate if (!IS_P2ALIGNED(pfn, pages)) { 12277c478bd9Sstevel@tonic-gate return (0); 12287c478bd9Sstevel@tonic-gate } 12297c478bd9Sstevel@tonic-gate return (1); 12307c478bd9Sstevel@tonic-gate } 12317c478bd9Sstevel@tonic-gate 12327c478bd9Sstevel@tonic-gate if (!IS_P2ALIGNED(pfn, pages)) { 12337c478bd9Sstevel@tonic-gate VM_STAT_ADD(page_exphcontg[10]); 12347c478bd9Sstevel@tonic-gate return (0); 12357c478bd9Sstevel@tonic-gate } 12367c478bd9Sstevel@tonic-gate 12377c478bd9Sstevel@tonic-gate if (page_numtomemseg_nolock(pfn) != 12387c478bd9Sstevel@tonic-gate page_numtomemseg_nolock(pfn + pages - 1)) { 12397c478bd9Sstevel@tonic-gate VM_STAT_ADD(page_exphcontg[11]); 12407c478bd9Sstevel@tonic-gate return (0); 12417c478bd9Sstevel@tonic-gate } 12427c478bd9Sstevel@tonic-gate 12437c478bd9Sstevel@tonic-gate /* 12447c478bd9Sstevel@tonic-gate * We loop up 4 times across pages to promote page size. 12457c478bd9Sstevel@tonic-gate * We're extra cautious to promote page size atomically with respect 12467c478bd9Sstevel@tonic-gate * to everybody else. But we can probably optimize into 1 loop if 12477c478bd9Sstevel@tonic-gate * this becomes an issue. 12487c478bd9Sstevel@tonic-gate */ 12497c478bd9Sstevel@tonic-gate 12507c478bd9Sstevel@tonic-gate for (i = 0; i < pages; i++, pp++, off += PAGESIZE, pfn++) { 12517c478bd9Sstevel@tonic-gate ASSERT(pp->p_pagenum == pfn); 12527c478bd9Sstevel@tonic-gate if (!page_trylock(pp, SE_EXCL)) { 12537c478bd9Sstevel@tonic-gate VM_STAT_ADD(page_exphcontg[12]); 12547c478bd9Sstevel@tonic-gate break; 12557c478bd9Sstevel@tonic-gate } 12567c478bd9Sstevel@tonic-gate if (pp->p_vnode != vp || 12577c478bd9Sstevel@tonic-gate pp->p_offset != off) { 12587c478bd9Sstevel@tonic-gate VM_STAT_ADD(page_exphcontg[13]); 12597c478bd9Sstevel@tonic-gate page_unlock(pp); 12607c478bd9Sstevel@tonic-gate break; 12617c478bd9Sstevel@tonic-gate } 12627c478bd9Sstevel@tonic-gate if (pp->p_szc >= szc) { 12637c478bd9Sstevel@tonic-gate ASSERT(i == 0); 12647c478bd9Sstevel@tonic-gate page_unlock(pp); 12657c478bd9Sstevel@tonic-gate off = save_off; 12667c478bd9Sstevel@tonic-gate goto again; 12677c478bd9Sstevel@tonic-gate } 12687c478bd9Sstevel@tonic-gate } 12697c478bd9Sstevel@tonic-gate 12707c478bd9Sstevel@tonic-gate if (i != pages) { 12717c478bd9Sstevel@tonic-gate VM_STAT_ADD(page_exphcontg[14]); 12727c478bd9Sstevel@tonic-gate --pp; 12737c478bd9Sstevel@tonic-gate while (i-- > 0) { 12747c478bd9Sstevel@tonic-gate page_unlock(pp); 12757c478bd9Sstevel@tonic-gate --pp; 12767c478bd9Sstevel@tonic-gate } 12777c478bd9Sstevel@tonic-gate return (0); 12787c478bd9Sstevel@tonic-gate } 12797c478bd9Sstevel@tonic-gate 12807c478bd9Sstevel@tonic-gate pp = rootpp; 12817c478bd9Sstevel@tonic-gate for (i = 0; i < pages; i++, pp++) { 12827c478bd9Sstevel@tonic-gate if (PP_ISFREE(pp)) { 12837c478bd9Sstevel@tonic-gate VM_STAT_ADD(page_exphcontg[15]); 12847c478bd9Sstevel@tonic-gate ASSERT(!PP_ISAGED(pp)); 12857c478bd9Sstevel@tonic-gate ASSERT(pp->p_szc == 0); 12867c478bd9Sstevel@tonic-gate if (!page_reclaim(pp, NULL)) { 12877c478bd9Sstevel@tonic-gate break; 12887c478bd9Sstevel@tonic-gate } 12897c478bd9Sstevel@tonic-gate } else { 12907c478bd9Sstevel@tonic-gate ASSERT(pp->p_szc < szc); 12917c478bd9Sstevel@tonic-gate VM_STAT_ADD(page_exphcontg[16]); 12927c478bd9Sstevel@tonic-gate (void) hat_pageunload(pp, HAT_FORCE_PGUNLOAD); 12937c478bd9Sstevel@tonic-gate } 12947c478bd9Sstevel@tonic-gate } 12957c478bd9Sstevel@tonic-gate if (i < pages) { 12967c478bd9Sstevel@tonic-gate VM_STAT_ADD(page_exphcontg[17]); 12977c478bd9Sstevel@tonic-gate /* 12987c478bd9Sstevel@tonic-gate * page_reclaim failed because we were out of memory. 12997c478bd9Sstevel@tonic-gate * drop the rest of the locks and return because this page 13007c478bd9Sstevel@tonic-gate * must be already reallocated anyway. 13017c478bd9Sstevel@tonic-gate */ 13027c478bd9Sstevel@tonic-gate pp = rootpp; 13037c478bd9Sstevel@tonic-gate for (j = 0; j < pages; j++, pp++) { 13047c478bd9Sstevel@tonic-gate if (j != i) { 13057c478bd9Sstevel@tonic-gate page_unlock(pp); 13067c478bd9Sstevel@tonic-gate } 13077c478bd9Sstevel@tonic-gate } 13087c478bd9Sstevel@tonic-gate return (0); 13097c478bd9Sstevel@tonic-gate } 13107c478bd9Sstevel@tonic-gate 13117c478bd9Sstevel@tonic-gate off = save_off; 13127c478bd9Sstevel@tonic-gate pp = rootpp; 13137c478bd9Sstevel@tonic-gate for (i = 0; i < pages; i++, pp++, off += PAGESIZE) { 13147c478bd9Sstevel@tonic-gate ASSERT(PAGE_EXCL(pp)); 13157c478bd9Sstevel@tonic-gate ASSERT(!PP_ISFREE(pp)); 13167c478bd9Sstevel@tonic-gate ASSERT(!hat_page_is_mapped(pp)); 13177c478bd9Sstevel@tonic-gate ASSERT(pp->p_vnode == vp); 13187c478bd9Sstevel@tonic-gate ASSERT(pp->p_offset == off); 13197c478bd9Sstevel@tonic-gate pp->p_szc = szc; 13207c478bd9Sstevel@tonic-gate } 13217c478bd9Sstevel@tonic-gate pp = rootpp; 13227c478bd9Sstevel@tonic-gate for (i = 0; i < pages; i++, pp++) { 13237c478bd9Sstevel@tonic-gate if (ppa == NULL) { 13247c478bd9Sstevel@tonic-gate page_unlock(pp); 13257c478bd9Sstevel@tonic-gate } else { 13267c478bd9Sstevel@tonic-gate ppa[i] = pp; 13277c478bd9Sstevel@tonic-gate page_downgrade(ppa[i]); 13287c478bd9Sstevel@tonic-gate } 13297c478bd9Sstevel@tonic-gate } 13307c478bd9Sstevel@tonic-gate if (ppa != NULL) { 13317c478bd9Sstevel@tonic-gate ppa[pages] = NULL; 13327c478bd9Sstevel@tonic-gate } 13337c478bd9Sstevel@tonic-gate VM_STAT_ADD(page_exphcontg[18]); 13347c478bd9Sstevel@tonic-gate ASSERT(vp->v_pages != NULL); 13357c478bd9Sstevel@tonic-gate return (1); 13367c478bd9Sstevel@tonic-gate } 13377c478bd9Sstevel@tonic-gate 13387c478bd9Sstevel@tonic-gate /* 13397c478bd9Sstevel@tonic-gate * Determine whether a page with the specified [vp, off] 13407c478bd9Sstevel@tonic-gate * currently exists in the system and if so return its 13417c478bd9Sstevel@tonic-gate * size code. Obviously this should only be considered as 13427c478bd9Sstevel@tonic-gate * a hint since nothing prevents the page from disappearing 13437c478bd9Sstevel@tonic-gate * or appearing immediately after the return from this routine. 13447c478bd9Sstevel@tonic-gate */ 13457c478bd9Sstevel@tonic-gate int 13467c478bd9Sstevel@tonic-gate page_exists_forreal(vnode_t *vp, u_offset_t off, uint_t *szc) 13477c478bd9Sstevel@tonic-gate { 13487c478bd9Sstevel@tonic-gate page_t *pp; 13497c478bd9Sstevel@tonic-gate kmutex_t *phm; 13507c478bd9Sstevel@tonic-gate ulong_t index; 13517c478bd9Sstevel@tonic-gate int rc = 0; 13527c478bd9Sstevel@tonic-gate 13537c478bd9Sstevel@tonic-gate ASSERT(MUTEX_NOT_HELD(page_vnode_mutex(vp))); 13547c478bd9Sstevel@tonic-gate ASSERT(szc != NULL); 13557c478bd9Sstevel@tonic-gate VM_STAT_ADD(page_exists_forreal_cnt); 13567c478bd9Sstevel@tonic-gate 13577c478bd9Sstevel@tonic-gate index = PAGE_HASH_FUNC(vp, off); 13587c478bd9Sstevel@tonic-gate phm = PAGE_HASH_MUTEX(index); 13597c478bd9Sstevel@tonic-gate 13607c478bd9Sstevel@tonic-gate mutex_enter(phm); 13617c478bd9Sstevel@tonic-gate PAGE_HASH_SEARCH(index, pp, vp, off); 13627c478bd9Sstevel@tonic-gate if (pp != NULL) { 13637c478bd9Sstevel@tonic-gate *szc = pp->p_szc; 13647c478bd9Sstevel@tonic-gate rc = 1; 13657c478bd9Sstevel@tonic-gate } 13667c478bd9Sstevel@tonic-gate mutex_exit(phm); 13677c478bd9Sstevel@tonic-gate return (rc); 13687c478bd9Sstevel@tonic-gate } 13697c478bd9Sstevel@tonic-gate 13707c478bd9Sstevel@tonic-gate /* wakeup threads waiting for pages in page_create_get_something() */ 13717c478bd9Sstevel@tonic-gate void 13727c478bd9Sstevel@tonic-gate wakeup_pcgs(void) 13737c478bd9Sstevel@tonic-gate { 13747c478bd9Sstevel@tonic-gate if (!CV_HAS_WAITERS(&pcgs_cv)) 13757c478bd9Sstevel@tonic-gate return; 13767c478bd9Sstevel@tonic-gate cv_broadcast(&pcgs_cv); 13777c478bd9Sstevel@tonic-gate } 13787c478bd9Sstevel@tonic-gate 13797c478bd9Sstevel@tonic-gate /* 13807c478bd9Sstevel@tonic-gate * 'freemem' is used all over the kernel as an indication of how many 13817c478bd9Sstevel@tonic-gate * pages are free (either on the cache list or on the free page list) 13827c478bd9Sstevel@tonic-gate * in the system. In very few places is a really accurate 'freemem' 13837c478bd9Sstevel@tonic-gate * needed. To avoid contention of the lock protecting a the 13847c478bd9Sstevel@tonic-gate * single freemem, it was spread out into NCPU buckets. Set_freemem 13857c478bd9Sstevel@tonic-gate * sets freemem to the total of all NCPU buckets. It is called from 13867c478bd9Sstevel@tonic-gate * clock() on each TICK. 13877c478bd9Sstevel@tonic-gate */ 13887c478bd9Sstevel@tonic-gate void 13897c478bd9Sstevel@tonic-gate set_freemem() 13907c478bd9Sstevel@tonic-gate { 13917c478bd9Sstevel@tonic-gate struct pcf *p; 13927c478bd9Sstevel@tonic-gate ulong_t t; 13937c478bd9Sstevel@tonic-gate uint_t i; 13947c478bd9Sstevel@tonic-gate 13957c478bd9Sstevel@tonic-gate t = 0; 13967c478bd9Sstevel@tonic-gate p = pcf; 13977c478bd9Sstevel@tonic-gate for (i = 0; i < PCF_FANOUT; i++) { 13987c478bd9Sstevel@tonic-gate t += p->pcf_count; 13997c478bd9Sstevel@tonic-gate p++; 14007c478bd9Sstevel@tonic-gate } 14017c478bd9Sstevel@tonic-gate freemem = t; 14027c478bd9Sstevel@tonic-gate 14037c478bd9Sstevel@tonic-gate /* 14047c478bd9Sstevel@tonic-gate * Don't worry about grabbing mutex. It's not that 14057c478bd9Sstevel@tonic-gate * critical if we miss a tick or two. This is 14067c478bd9Sstevel@tonic-gate * where we wakeup possible delayers in 14077c478bd9Sstevel@tonic-gate * page_create_get_something(). 14087c478bd9Sstevel@tonic-gate */ 14097c478bd9Sstevel@tonic-gate wakeup_pcgs(); 14107c478bd9Sstevel@tonic-gate } 14117c478bd9Sstevel@tonic-gate 14127c478bd9Sstevel@tonic-gate ulong_t 14137c478bd9Sstevel@tonic-gate get_freemem() 14147c478bd9Sstevel@tonic-gate { 14157c478bd9Sstevel@tonic-gate struct pcf *p; 14167c478bd9Sstevel@tonic-gate ulong_t t; 14177c478bd9Sstevel@tonic-gate uint_t i; 14187c478bd9Sstevel@tonic-gate 14197c478bd9Sstevel@tonic-gate t = 0; 14207c478bd9Sstevel@tonic-gate p = pcf; 14217c478bd9Sstevel@tonic-gate for (i = 0; i < PCF_FANOUT; i++) { 14227c478bd9Sstevel@tonic-gate t += p->pcf_count; 14237c478bd9Sstevel@tonic-gate p++; 14247c478bd9Sstevel@tonic-gate } 14257c478bd9Sstevel@tonic-gate /* 14267c478bd9Sstevel@tonic-gate * We just calculated it, might as well set it. 14277c478bd9Sstevel@tonic-gate */ 14287c478bd9Sstevel@tonic-gate freemem = t; 14297c478bd9Sstevel@tonic-gate return (t); 14307c478bd9Sstevel@tonic-gate } 14317c478bd9Sstevel@tonic-gate 14327c478bd9Sstevel@tonic-gate /* 14337c478bd9Sstevel@tonic-gate * Acquire all of the page cache & free (pcf) locks. 14347c478bd9Sstevel@tonic-gate */ 14357c478bd9Sstevel@tonic-gate void 14367c478bd9Sstevel@tonic-gate pcf_acquire_all() 14377c478bd9Sstevel@tonic-gate { 14387c478bd9Sstevel@tonic-gate struct pcf *p; 14397c478bd9Sstevel@tonic-gate uint_t i; 14407c478bd9Sstevel@tonic-gate 14417c478bd9Sstevel@tonic-gate p = pcf; 14427c478bd9Sstevel@tonic-gate for (i = 0; i < PCF_FANOUT; i++) { 14437c478bd9Sstevel@tonic-gate p->pcf_touch = 1; 14447c478bd9Sstevel@tonic-gate mutex_enter(&p->pcf_lock); 14457c478bd9Sstevel@tonic-gate p++; 14467c478bd9Sstevel@tonic-gate } 14477c478bd9Sstevel@tonic-gate } 14487c478bd9Sstevel@tonic-gate 14497c478bd9Sstevel@tonic-gate /* 14507c478bd9Sstevel@tonic-gate * Release all the pcf_locks. 14517c478bd9Sstevel@tonic-gate */ 14527c478bd9Sstevel@tonic-gate void 14537c478bd9Sstevel@tonic-gate pcf_release_all() 14547c478bd9Sstevel@tonic-gate { 14557c478bd9Sstevel@tonic-gate struct pcf *p; 14567c478bd9Sstevel@tonic-gate uint_t i; 14577c478bd9Sstevel@tonic-gate 14587c478bd9Sstevel@tonic-gate p = pcf; 14597c478bd9Sstevel@tonic-gate for (i = 0; i < PCF_FANOUT; i++) { 14607c478bd9Sstevel@tonic-gate mutex_exit(&p->pcf_lock); 14617c478bd9Sstevel@tonic-gate p++; 14627c478bd9Sstevel@tonic-gate } 14637c478bd9Sstevel@tonic-gate } 14647c478bd9Sstevel@tonic-gate 14657c478bd9Sstevel@tonic-gate /* 14667c478bd9Sstevel@tonic-gate * Inform the VM system that we need some pages freed up. 14677c478bd9Sstevel@tonic-gate * Calls must be symmetric, e.g.: 14687c478bd9Sstevel@tonic-gate * 14697c478bd9Sstevel@tonic-gate * page_needfree(100); 14707c478bd9Sstevel@tonic-gate * wait a bit; 14717c478bd9Sstevel@tonic-gate * page_needfree(-100); 14727c478bd9Sstevel@tonic-gate */ 14737c478bd9Sstevel@tonic-gate void 14747c478bd9Sstevel@tonic-gate page_needfree(spgcnt_t npages) 14757c478bd9Sstevel@tonic-gate { 14767c478bd9Sstevel@tonic-gate mutex_enter(&new_freemem_lock); 14777c478bd9Sstevel@tonic-gate needfree += npages; 14787c478bd9Sstevel@tonic-gate mutex_exit(&new_freemem_lock); 14797c478bd9Sstevel@tonic-gate } 14807c478bd9Sstevel@tonic-gate 14817c478bd9Sstevel@tonic-gate /* 14827c478bd9Sstevel@tonic-gate * Throttle for page_create(): try to prevent freemem from dropping 14837c478bd9Sstevel@tonic-gate * below throttlefree. We can't provide a 100% guarantee because 14847c478bd9Sstevel@tonic-gate * KM_NOSLEEP allocations, page_reclaim(), and various other things 14857c478bd9Sstevel@tonic-gate * nibble away at the freelist. However, we can block all PG_WAIT 14867c478bd9Sstevel@tonic-gate * allocations until memory becomes available. The motivation is 14877c478bd9Sstevel@tonic-gate * that several things can fall apart when there's no free memory: 14887c478bd9Sstevel@tonic-gate * 14897c478bd9Sstevel@tonic-gate * (1) If pageout() needs memory to push a page, the system deadlocks. 14907c478bd9Sstevel@tonic-gate * 14917c478bd9Sstevel@tonic-gate * (2) By (broken) specification, timeout(9F) can neither fail nor 14927c478bd9Sstevel@tonic-gate * block, so it has no choice but to panic the system if it 14937c478bd9Sstevel@tonic-gate * cannot allocate a callout structure. 14947c478bd9Sstevel@tonic-gate * 14957c478bd9Sstevel@tonic-gate * (3) Like timeout(), ddi_set_callback() cannot fail and cannot block; 14967c478bd9Sstevel@tonic-gate * it panics if it cannot allocate a callback structure. 14977c478bd9Sstevel@tonic-gate * 14987c478bd9Sstevel@tonic-gate * (4) Untold numbers of third-party drivers have not yet been hardened 14997c478bd9Sstevel@tonic-gate * against KM_NOSLEEP and/or allocb() failures; they simply assume 15007c478bd9Sstevel@tonic-gate * success and panic the system with a data fault on failure. 15017c478bd9Sstevel@tonic-gate * (The long-term solution to this particular problem is to ship 15027c478bd9Sstevel@tonic-gate * hostile fault-injecting DEBUG kernels with the DDK.) 15037c478bd9Sstevel@tonic-gate * 15047c478bd9Sstevel@tonic-gate * It is theoretically impossible to guarantee success of non-blocking 15057c478bd9Sstevel@tonic-gate * allocations, but in practice, this throttle is very hard to break. 15067c478bd9Sstevel@tonic-gate */ 15077c478bd9Sstevel@tonic-gate static int 15087c478bd9Sstevel@tonic-gate page_create_throttle(pgcnt_t npages, int flags) 15097c478bd9Sstevel@tonic-gate { 15107c478bd9Sstevel@tonic-gate ulong_t fm; 15117c478bd9Sstevel@tonic-gate uint_t i; 15127c478bd9Sstevel@tonic-gate pgcnt_t tf; /* effective value of throttlefree */ 15137c478bd9Sstevel@tonic-gate 15147c478bd9Sstevel@tonic-gate /* 15157c478bd9Sstevel@tonic-gate * Never deny pages when: 15167c478bd9Sstevel@tonic-gate * - it's a thread that cannot block [NOMEMWAIT()] 15177c478bd9Sstevel@tonic-gate * - the allocation cannot block and must not fail 15187c478bd9Sstevel@tonic-gate * - the allocation cannot block and is pageout dispensated 15197c478bd9Sstevel@tonic-gate */ 15207c478bd9Sstevel@tonic-gate if (NOMEMWAIT() || 15217c478bd9Sstevel@tonic-gate ((flags & (PG_WAIT | PG_PANIC)) == PG_PANIC) || 15227c478bd9Sstevel@tonic-gate ((flags & (PG_WAIT | PG_PUSHPAGE)) == PG_PUSHPAGE)) 15237c478bd9Sstevel@tonic-gate return (1); 15247c478bd9Sstevel@tonic-gate 15257c478bd9Sstevel@tonic-gate /* 15267c478bd9Sstevel@tonic-gate * If the allocation can't block, we look favorably upon it 15277c478bd9Sstevel@tonic-gate * unless we're below pageout_reserve. In that case we fail 15287c478bd9Sstevel@tonic-gate * the allocation because we want to make sure there are a few 15297c478bd9Sstevel@tonic-gate * pages available for pageout. 15307c478bd9Sstevel@tonic-gate */ 15317c478bd9Sstevel@tonic-gate if ((flags & PG_WAIT) == 0) 15327c478bd9Sstevel@tonic-gate return (freemem >= npages + pageout_reserve); 15337c478bd9Sstevel@tonic-gate 15347c478bd9Sstevel@tonic-gate /* Calculate the effective throttlefree value */ 15357c478bd9Sstevel@tonic-gate tf = throttlefree - 15367c478bd9Sstevel@tonic-gate ((flags & PG_PUSHPAGE) ? pageout_reserve : 0); 15377c478bd9Sstevel@tonic-gate 15387c478bd9Sstevel@tonic-gate cv_signal(&proc_pageout->p_cv); 15397c478bd9Sstevel@tonic-gate 15407c478bd9Sstevel@tonic-gate while (freemem < npages + tf) { 15417c478bd9Sstevel@tonic-gate pcf_acquire_all(); 15427c478bd9Sstevel@tonic-gate mutex_enter(&new_freemem_lock); 15437c478bd9Sstevel@tonic-gate fm = 0; 15447c478bd9Sstevel@tonic-gate for (i = 0; i < PCF_FANOUT; i++) { 15457c478bd9Sstevel@tonic-gate fm += pcf[i].pcf_count; 15467c478bd9Sstevel@tonic-gate pcf[i].pcf_wait++; 15477c478bd9Sstevel@tonic-gate mutex_exit(&pcf[i].pcf_lock); 15487c478bd9Sstevel@tonic-gate } 15497c478bd9Sstevel@tonic-gate freemem = fm; 15507c478bd9Sstevel@tonic-gate needfree += npages; 15517c478bd9Sstevel@tonic-gate freemem_wait++; 15527c478bd9Sstevel@tonic-gate cv_wait(&freemem_cv, &new_freemem_lock); 15537c478bd9Sstevel@tonic-gate freemem_wait--; 15547c478bd9Sstevel@tonic-gate needfree -= npages; 15557c478bd9Sstevel@tonic-gate mutex_exit(&new_freemem_lock); 15567c478bd9Sstevel@tonic-gate } 15577c478bd9Sstevel@tonic-gate return (1); 15587c478bd9Sstevel@tonic-gate } 15597c478bd9Sstevel@tonic-gate 15607c478bd9Sstevel@tonic-gate /* 15617c478bd9Sstevel@tonic-gate * page_create_wait() is called to either coalecse pages from the 15627c478bd9Sstevel@tonic-gate * different pcf buckets or to wait because there simply are not 15637c478bd9Sstevel@tonic-gate * enough pages to satisfy the caller's request. 15647c478bd9Sstevel@tonic-gate * 15657c478bd9Sstevel@tonic-gate * Sadly, this is called from platform/vm/vm_machdep.c 15667c478bd9Sstevel@tonic-gate */ 15677c478bd9Sstevel@tonic-gate int 15687c478bd9Sstevel@tonic-gate page_create_wait(size_t npages, uint_t flags) 15697c478bd9Sstevel@tonic-gate { 15707c478bd9Sstevel@tonic-gate pgcnt_t total; 15717c478bd9Sstevel@tonic-gate uint_t i; 15727c478bd9Sstevel@tonic-gate struct pcf *p; 15737c478bd9Sstevel@tonic-gate 15747c478bd9Sstevel@tonic-gate /* 15757c478bd9Sstevel@tonic-gate * Wait until there are enough free pages to satisfy our 15767c478bd9Sstevel@tonic-gate * entire request. 15777c478bd9Sstevel@tonic-gate * We set needfree += npages before prodding pageout, to make sure 15787c478bd9Sstevel@tonic-gate * it does real work when npages > lotsfree > freemem. 15797c478bd9Sstevel@tonic-gate */ 15807c478bd9Sstevel@tonic-gate VM_STAT_ADD(page_create_not_enough); 15817c478bd9Sstevel@tonic-gate 15827c478bd9Sstevel@tonic-gate ASSERT(!kcage_on ? !(flags & PG_NORELOC) : 1); 15837c478bd9Sstevel@tonic-gate checkagain: 15847c478bd9Sstevel@tonic-gate if ((flags & PG_NORELOC) && 15857c478bd9Sstevel@tonic-gate kcage_freemem < kcage_throttlefree + npages) 15867c478bd9Sstevel@tonic-gate (void) kcage_create_throttle(npages, flags); 15877c478bd9Sstevel@tonic-gate 15887c478bd9Sstevel@tonic-gate if (freemem < npages + throttlefree) 15897c478bd9Sstevel@tonic-gate if (!page_create_throttle(npages, flags)) 15907c478bd9Sstevel@tonic-gate return (0); 15917c478bd9Sstevel@tonic-gate 15927c478bd9Sstevel@tonic-gate /* 15937c478bd9Sstevel@tonic-gate * Since page_create_va() looked at every 15947c478bd9Sstevel@tonic-gate * bucket, assume we are going to have to wait. 15957c478bd9Sstevel@tonic-gate * Get all of the pcf locks. 15967c478bd9Sstevel@tonic-gate */ 15977c478bd9Sstevel@tonic-gate total = 0; 15987c478bd9Sstevel@tonic-gate p = pcf; 15997c478bd9Sstevel@tonic-gate for (i = 0; i < PCF_FANOUT; i++) { 16007c478bd9Sstevel@tonic-gate p->pcf_touch = 1; 16017c478bd9Sstevel@tonic-gate mutex_enter(&p->pcf_lock); 16027c478bd9Sstevel@tonic-gate total += p->pcf_count; 16037c478bd9Sstevel@tonic-gate if (total >= npages) { 16047c478bd9Sstevel@tonic-gate /* 16057c478bd9Sstevel@tonic-gate * Wow! There are enough pages laying around 16067c478bd9Sstevel@tonic-gate * to satisfy the request. Do the accounting, 16077c478bd9Sstevel@tonic-gate * drop the locks we acquired, and go back. 16087c478bd9Sstevel@tonic-gate * 16097c478bd9Sstevel@tonic-gate * freemem is not protected by any lock. So, 16107c478bd9Sstevel@tonic-gate * we cannot have any assertion containing 16117c478bd9Sstevel@tonic-gate * freemem. 16127c478bd9Sstevel@tonic-gate */ 16137c478bd9Sstevel@tonic-gate freemem -= npages; 16147c478bd9Sstevel@tonic-gate 16157c478bd9Sstevel@tonic-gate while (p >= pcf) { 16167c478bd9Sstevel@tonic-gate if (p->pcf_count <= npages) { 16177c478bd9Sstevel@tonic-gate npages -= p->pcf_count; 16187c478bd9Sstevel@tonic-gate p->pcf_count = 0; 16197c478bd9Sstevel@tonic-gate } else { 16207c478bd9Sstevel@tonic-gate p->pcf_count -= (uint_t)npages; 16217c478bd9Sstevel@tonic-gate npages = 0; 16227c478bd9Sstevel@tonic-gate } 16237c478bd9Sstevel@tonic-gate mutex_exit(&p->pcf_lock); 16247c478bd9Sstevel@tonic-gate p--; 16257c478bd9Sstevel@tonic-gate } 16267c478bd9Sstevel@tonic-gate ASSERT(npages == 0); 16277c478bd9Sstevel@tonic-gate return (1); 16287c478bd9Sstevel@tonic-gate } 16297c478bd9Sstevel@tonic-gate p++; 16307c478bd9Sstevel@tonic-gate } 16317c478bd9Sstevel@tonic-gate 16327c478bd9Sstevel@tonic-gate /* 16337c478bd9Sstevel@tonic-gate * All of the pcf locks are held, there are not enough pages 16347c478bd9Sstevel@tonic-gate * to satisfy the request (npages < total). 16357c478bd9Sstevel@tonic-gate * Be sure to acquire the new_freemem_lock before dropping 16367c478bd9Sstevel@tonic-gate * the pcf locks. This prevents dropping wakeups in page_free(). 16377c478bd9Sstevel@tonic-gate * The order is always pcf_lock then new_freemem_lock. 16387c478bd9Sstevel@tonic-gate * 16397c478bd9Sstevel@tonic-gate * Since we hold all the pcf locks, it is a good time to set freemem. 16407c478bd9Sstevel@tonic-gate * 16417c478bd9Sstevel@tonic-gate * If the caller does not want to wait, return now. 16427c478bd9Sstevel@tonic-gate * Else turn the pageout daemon loose to find something 16437c478bd9Sstevel@tonic-gate * and wait till it does. 16447c478bd9Sstevel@tonic-gate * 16457c478bd9Sstevel@tonic-gate */ 16467c478bd9Sstevel@tonic-gate freemem = total; 16477c478bd9Sstevel@tonic-gate 16487c478bd9Sstevel@tonic-gate if ((flags & PG_WAIT) == 0) { 16497c478bd9Sstevel@tonic-gate pcf_release_all(); 16507c478bd9Sstevel@tonic-gate 16517c478bd9Sstevel@tonic-gate TRACE_2(TR_FAC_VM, TR_PAGE_CREATE_NOMEM, 16527c478bd9Sstevel@tonic-gate "page_create_nomem:npages %ld freemem %ld", npages, freemem); 16537c478bd9Sstevel@tonic-gate return (0); 16547c478bd9Sstevel@tonic-gate } 16557c478bd9Sstevel@tonic-gate 16567c478bd9Sstevel@tonic-gate ASSERT(proc_pageout != NULL); 16577c478bd9Sstevel@tonic-gate cv_signal(&proc_pageout->p_cv); 16587c478bd9Sstevel@tonic-gate 16597c478bd9Sstevel@tonic-gate TRACE_2(TR_FAC_VM, TR_PAGE_CREATE_SLEEP_START, 16607c478bd9Sstevel@tonic-gate "page_create_sleep_start: freemem %ld needfree %ld", 16617c478bd9Sstevel@tonic-gate freemem, needfree); 16627c478bd9Sstevel@tonic-gate 16637c478bd9Sstevel@tonic-gate /* 16647c478bd9Sstevel@tonic-gate * We are going to wait. 16657c478bd9Sstevel@tonic-gate * We currently hold all of the pcf_locks, 16667c478bd9Sstevel@tonic-gate * get the new_freemem_lock (it protects freemem_wait), 16677c478bd9Sstevel@tonic-gate * before dropping the pcf_locks. 16687c478bd9Sstevel@tonic-gate */ 16697c478bd9Sstevel@tonic-gate mutex_enter(&new_freemem_lock); 16707c478bd9Sstevel@tonic-gate 16717c478bd9Sstevel@tonic-gate p = pcf; 16727c478bd9Sstevel@tonic-gate for (i = 0; i < PCF_FANOUT; i++) { 16737c478bd9Sstevel@tonic-gate p->pcf_wait++; 16747c478bd9Sstevel@tonic-gate mutex_exit(&p->pcf_lock); 16757c478bd9Sstevel@tonic-gate p++; 16767c478bd9Sstevel@tonic-gate } 16777c478bd9Sstevel@tonic-gate 16787c478bd9Sstevel@tonic-gate needfree += npages; 16797c478bd9Sstevel@tonic-gate freemem_wait++; 16807c478bd9Sstevel@tonic-gate 16817c478bd9Sstevel@tonic-gate cv_wait(&freemem_cv, &new_freemem_lock); 16827c478bd9Sstevel@tonic-gate 16837c478bd9Sstevel@tonic-gate freemem_wait--; 16847c478bd9Sstevel@tonic-gate needfree -= npages; 16857c478bd9Sstevel@tonic-gate 16867c478bd9Sstevel@tonic-gate mutex_exit(&new_freemem_lock); 16877c478bd9Sstevel@tonic-gate 16887c478bd9Sstevel@tonic-gate TRACE_2(TR_FAC_VM, TR_PAGE_CREATE_SLEEP_END, 16897c478bd9Sstevel@tonic-gate "page_create_sleep_end: freemem %ld needfree %ld", 16907c478bd9Sstevel@tonic-gate freemem, needfree); 16917c478bd9Sstevel@tonic-gate 16927c478bd9Sstevel@tonic-gate VM_STAT_ADD(page_create_not_enough_again); 16937c478bd9Sstevel@tonic-gate goto checkagain; 16947c478bd9Sstevel@tonic-gate } 16957c478bd9Sstevel@tonic-gate 16967c478bd9Sstevel@tonic-gate /* 16977c478bd9Sstevel@tonic-gate * A routine to do the opposite of page_create_wait(). 16987c478bd9Sstevel@tonic-gate */ 16997c478bd9Sstevel@tonic-gate void 17007c478bd9Sstevel@tonic-gate page_create_putback(spgcnt_t npages) 17017c478bd9Sstevel@tonic-gate { 17027c478bd9Sstevel@tonic-gate struct pcf *p; 17037c478bd9Sstevel@tonic-gate pgcnt_t lump; 17047c478bd9Sstevel@tonic-gate uint_t *which; 17057c478bd9Sstevel@tonic-gate 17067c478bd9Sstevel@tonic-gate /* 17077c478bd9Sstevel@tonic-gate * When a contiguous lump is broken up, we have to 17087c478bd9Sstevel@tonic-gate * deal with lots of pages (min 64) so lets spread 17097c478bd9Sstevel@tonic-gate * the wealth around. 17107c478bd9Sstevel@tonic-gate */ 17117c478bd9Sstevel@tonic-gate lump = roundup(npages, PCF_FANOUT) / PCF_FANOUT; 17127c478bd9Sstevel@tonic-gate freemem += npages; 17137c478bd9Sstevel@tonic-gate 17147c478bd9Sstevel@tonic-gate for (p = pcf; (npages > 0) && (p < &pcf[PCF_FANOUT]); p++) { 17157c478bd9Sstevel@tonic-gate which = &p->pcf_count; 17167c478bd9Sstevel@tonic-gate 17177c478bd9Sstevel@tonic-gate mutex_enter(&p->pcf_lock); 17187c478bd9Sstevel@tonic-gate 17197c478bd9Sstevel@tonic-gate if (p->pcf_block) { 17207c478bd9Sstevel@tonic-gate which = &p->pcf_reserve; 17217c478bd9Sstevel@tonic-gate } 17227c478bd9Sstevel@tonic-gate 17237c478bd9Sstevel@tonic-gate if (lump < npages) { 17247c478bd9Sstevel@tonic-gate *which += (uint_t)lump; 17257c478bd9Sstevel@tonic-gate npages -= lump; 17267c478bd9Sstevel@tonic-gate } else { 17277c478bd9Sstevel@tonic-gate *which += (uint_t)npages; 17287c478bd9Sstevel@tonic-gate npages = 0; 17297c478bd9Sstevel@tonic-gate } 17307c478bd9Sstevel@tonic-gate 17317c478bd9Sstevel@tonic-gate if (p->pcf_wait) { 17327c478bd9Sstevel@tonic-gate mutex_enter(&new_freemem_lock); 17337c478bd9Sstevel@tonic-gate /* 17347c478bd9Sstevel@tonic-gate * Check to see if some other thread 17357c478bd9Sstevel@tonic-gate * is actually waiting. Another bucket 17367c478bd9Sstevel@tonic-gate * may have woken it up by now. If there 17377c478bd9Sstevel@tonic-gate * are no waiters, then set our pcf_wait 17387c478bd9Sstevel@tonic-gate * count to zero to avoid coming in here 17397c478bd9Sstevel@tonic-gate * next time. 17407c478bd9Sstevel@tonic-gate */ 17417c478bd9Sstevel@tonic-gate if (freemem_wait) { 17427c478bd9Sstevel@tonic-gate if (npages > 1) { 17437c478bd9Sstevel@tonic-gate cv_broadcast(&freemem_cv); 17447c478bd9Sstevel@tonic-gate } else { 17457c478bd9Sstevel@tonic-gate cv_signal(&freemem_cv); 17467c478bd9Sstevel@tonic-gate } 17477c478bd9Sstevel@tonic-gate p->pcf_wait--; 17487c478bd9Sstevel@tonic-gate } else { 17497c478bd9Sstevel@tonic-gate p->pcf_wait = 0; 17507c478bd9Sstevel@tonic-gate } 17517c478bd9Sstevel@tonic-gate mutex_exit(&new_freemem_lock); 17527c478bd9Sstevel@tonic-gate } 17537c478bd9Sstevel@tonic-gate mutex_exit(&p->pcf_lock); 17547c478bd9Sstevel@tonic-gate } 17557c478bd9Sstevel@tonic-gate ASSERT(npages == 0); 17567c478bd9Sstevel@tonic-gate } 17577c478bd9Sstevel@tonic-gate 17587c478bd9Sstevel@tonic-gate /* 17597c478bd9Sstevel@tonic-gate * A helper routine for page_create_get_something. 17607c478bd9Sstevel@tonic-gate * The indenting got to deep down there. 17617c478bd9Sstevel@tonic-gate * Unblock the pcf counters. Any pages freed after 17627c478bd9Sstevel@tonic-gate * pcf_block got set are moved to pcf_count and 17637c478bd9Sstevel@tonic-gate * wakeups (cv_broadcast() or cv_signal()) are done as needed. 17647c478bd9Sstevel@tonic-gate */ 17657c478bd9Sstevel@tonic-gate static void 17667c478bd9Sstevel@tonic-gate pcgs_unblock(void) 17677c478bd9Sstevel@tonic-gate { 17687c478bd9Sstevel@tonic-gate int i; 17697c478bd9Sstevel@tonic-gate struct pcf *p; 17707c478bd9Sstevel@tonic-gate 17717c478bd9Sstevel@tonic-gate /* Update freemem while we're here. */ 17727c478bd9Sstevel@tonic-gate freemem = 0; 17737c478bd9Sstevel@tonic-gate p = pcf; 17747c478bd9Sstevel@tonic-gate for (i = 0; i < PCF_FANOUT; i++) { 17757c478bd9Sstevel@tonic-gate mutex_enter(&p->pcf_lock); 17767c478bd9Sstevel@tonic-gate ASSERT(p->pcf_count == 0); 17777c478bd9Sstevel@tonic-gate p->pcf_count = p->pcf_reserve; 17787c478bd9Sstevel@tonic-gate p->pcf_block = 0; 17797c478bd9Sstevel@tonic-gate freemem += p->pcf_count; 17807c478bd9Sstevel@tonic-gate if (p->pcf_wait) { 17817c478bd9Sstevel@tonic-gate mutex_enter(&new_freemem_lock); 17827c478bd9Sstevel@tonic-gate if (freemem_wait) { 17837c478bd9Sstevel@tonic-gate if (p->pcf_reserve > 1) { 17847c478bd9Sstevel@tonic-gate cv_broadcast(&freemem_cv); 17857c478bd9Sstevel@tonic-gate p->pcf_wait = 0; 17867c478bd9Sstevel@tonic-gate } else { 17877c478bd9Sstevel@tonic-gate cv_signal(&freemem_cv); 17887c478bd9Sstevel@tonic-gate p->pcf_wait--; 17897c478bd9Sstevel@tonic-gate } 17907c478bd9Sstevel@tonic-gate } else { 17917c478bd9Sstevel@tonic-gate p->pcf_wait = 0; 17927c478bd9Sstevel@tonic-gate } 17937c478bd9Sstevel@tonic-gate mutex_exit(&new_freemem_lock); 17947c478bd9Sstevel@tonic-gate } 17957c478bd9Sstevel@tonic-gate p->pcf_reserve = 0; 17967c478bd9Sstevel@tonic-gate mutex_exit(&p->pcf_lock); 17977c478bd9Sstevel@tonic-gate p++; 17987c478bd9Sstevel@tonic-gate } 17997c478bd9Sstevel@tonic-gate } 18007c478bd9Sstevel@tonic-gate 18017c478bd9Sstevel@tonic-gate /* 18027c478bd9Sstevel@tonic-gate * Called from page_create_va() when both the cache and free lists 18037c478bd9Sstevel@tonic-gate * have been checked once. 18047c478bd9Sstevel@tonic-gate * 18057c478bd9Sstevel@tonic-gate * Either returns a page or panics since the accounting was done 18067c478bd9Sstevel@tonic-gate * way before we got here. 18077c478bd9Sstevel@tonic-gate * 18087c478bd9Sstevel@tonic-gate * We don't come here often, so leave the accounting on permanently. 18097c478bd9Sstevel@tonic-gate */ 18107c478bd9Sstevel@tonic-gate 18117c478bd9Sstevel@tonic-gate #define MAX_PCGS 100 18127c478bd9Sstevel@tonic-gate 18137c478bd9Sstevel@tonic-gate #ifdef DEBUG 18147c478bd9Sstevel@tonic-gate #define PCGS_TRIES 100 18157c478bd9Sstevel@tonic-gate #else /* DEBUG */ 18167c478bd9Sstevel@tonic-gate #define PCGS_TRIES 10 18177c478bd9Sstevel@tonic-gate #endif /* DEBUG */ 18187c478bd9Sstevel@tonic-gate 18197c478bd9Sstevel@tonic-gate #ifdef VM_STATS 18207c478bd9Sstevel@tonic-gate uint_t pcgs_counts[PCGS_TRIES]; 18217c478bd9Sstevel@tonic-gate uint_t pcgs_too_many; 18227c478bd9Sstevel@tonic-gate uint_t pcgs_entered; 18237c478bd9Sstevel@tonic-gate uint_t pcgs_entered_noreloc; 18247c478bd9Sstevel@tonic-gate uint_t pcgs_locked; 18257c478bd9Sstevel@tonic-gate uint_t pcgs_cagelocked; 18267c478bd9Sstevel@tonic-gate #endif /* VM_STATS */ 18277c478bd9Sstevel@tonic-gate 18287c478bd9Sstevel@tonic-gate static page_t * 18297c478bd9Sstevel@tonic-gate page_create_get_something(vnode_t *vp, u_offset_t off, struct seg *seg, 18307c478bd9Sstevel@tonic-gate caddr_t vaddr, uint_t flags) 18317c478bd9Sstevel@tonic-gate { 18327c478bd9Sstevel@tonic-gate uint_t count; 18337c478bd9Sstevel@tonic-gate page_t *pp; 18347c478bd9Sstevel@tonic-gate uint_t locked, i; 18357c478bd9Sstevel@tonic-gate struct pcf *p; 18367c478bd9Sstevel@tonic-gate lgrp_t *lgrp; 18377c478bd9Sstevel@tonic-gate int cagelocked = 0; 18387c478bd9Sstevel@tonic-gate 18397c478bd9Sstevel@tonic-gate VM_STAT_ADD(pcgs_entered); 18407c478bd9Sstevel@tonic-gate 18417c478bd9Sstevel@tonic-gate /* 18427c478bd9Sstevel@tonic-gate * Tap any reserve freelists: if we fail now, we'll die 18437c478bd9Sstevel@tonic-gate * since the page(s) we're looking for have already been 18447c478bd9Sstevel@tonic-gate * accounted for. 18457c478bd9Sstevel@tonic-gate */ 18467c478bd9Sstevel@tonic-gate flags |= PG_PANIC; 18477c478bd9Sstevel@tonic-gate 18487c478bd9Sstevel@tonic-gate if ((flags & PG_NORELOC) != 0) { 18497c478bd9Sstevel@tonic-gate VM_STAT_ADD(pcgs_entered_noreloc); 18507c478bd9Sstevel@tonic-gate /* 18517c478bd9Sstevel@tonic-gate * Requests for free pages from critical threads 18527c478bd9Sstevel@tonic-gate * such as pageout still won't throttle here, but 18537c478bd9Sstevel@tonic-gate * we must try again, to give the cageout thread 18547c478bd9Sstevel@tonic-gate * another chance to catch up. Since we already 18557c478bd9Sstevel@tonic-gate * accounted for the pages, we had better get them 18567c478bd9Sstevel@tonic-gate * this time. 18577c478bd9Sstevel@tonic-gate * 18587c478bd9Sstevel@tonic-gate * N.B. All non-critical threads acquire the pcgs_cagelock 18597c478bd9Sstevel@tonic-gate * to serialize access to the freelists. This implements a 18607c478bd9Sstevel@tonic-gate * turnstile-type synchornization to avoid starvation of 18617c478bd9Sstevel@tonic-gate * critical requests for PG_NORELOC memory by non-critical 18627c478bd9Sstevel@tonic-gate * threads: all non-critical threads must acquire a 'ticket' 18637c478bd9Sstevel@tonic-gate * before passing through, which entails making sure 18647c478bd9Sstevel@tonic-gate * kcage_freemem won't fall below minfree prior to grabbing 18657c478bd9Sstevel@tonic-gate * pages from the freelists. 18667c478bd9Sstevel@tonic-gate */ 18677c478bd9Sstevel@tonic-gate if (kcage_create_throttle(1, flags) == KCT_NONCRIT) { 18687c478bd9Sstevel@tonic-gate mutex_enter(&pcgs_cagelock); 18697c478bd9Sstevel@tonic-gate cagelocked = 1; 18707c478bd9Sstevel@tonic-gate VM_STAT_ADD(pcgs_cagelocked); 18717c478bd9Sstevel@tonic-gate } 18727c478bd9Sstevel@tonic-gate } 18737c478bd9Sstevel@tonic-gate 18747c478bd9Sstevel@tonic-gate /* 18757c478bd9Sstevel@tonic-gate * Time to get serious. 18767c478bd9Sstevel@tonic-gate * We failed to get a `correctly colored' page from both the 18777c478bd9Sstevel@tonic-gate * free and cache lists. 18787c478bd9Sstevel@tonic-gate * We escalate in stage. 18797c478bd9Sstevel@tonic-gate * 18807c478bd9Sstevel@tonic-gate * First try both lists without worring about color. 18817c478bd9Sstevel@tonic-gate * 18827c478bd9Sstevel@tonic-gate * Then, grab all page accounting locks (ie. pcf[]) and 18837c478bd9Sstevel@tonic-gate * steal any pages that they have and set the pcf_block flag to 18847c478bd9Sstevel@tonic-gate * stop deletions from the lists. This will help because 18857c478bd9Sstevel@tonic-gate * a page can get added to the free list while we are looking 18867c478bd9Sstevel@tonic-gate * at the cache list, then another page could be added to the cache 18877c478bd9Sstevel@tonic-gate * list allowing the page on the free list to be removed as we 18887c478bd9Sstevel@tonic-gate * move from looking at the cache list to the free list. This 18897c478bd9Sstevel@tonic-gate * could happen over and over. We would never find the page 18907c478bd9Sstevel@tonic-gate * we have accounted for. 18917c478bd9Sstevel@tonic-gate * 18927c478bd9Sstevel@tonic-gate * Noreloc pages are a subset of the global (relocatable) page pool. 18937c478bd9Sstevel@tonic-gate * They are not tracked separately in the pcf bins, so it is 18947c478bd9Sstevel@tonic-gate * impossible to know when doing pcf accounting if the available 18957c478bd9Sstevel@tonic-gate * page(s) are noreloc pages or not. When looking for a noreloc page 18967c478bd9Sstevel@tonic-gate * it is quite easy to end up here even if the global (relocatable) 18977c478bd9Sstevel@tonic-gate * page pool has plenty of free pages but the noreloc pool is empty. 18987c478bd9Sstevel@tonic-gate * 18997c478bd9Sstevel@tonic-gate * When the noreloc pool is empty (or low), additional noreloc pages 19007c478bd9Sstevel@tonic-gate * are created by converting pages from the global page pool. This 19017c478bd9Sstevel@tonic-gate * process will stall during pcf accounting if the pcf bins are 19027c478bd9Sstevel@tonic-gate * already locked. Such is the case when a noreloc allocation is 19037c478bd9Sstevel@tonic-gate * looping here in page_create_get_something waiting for more noreloc 19047c478bd9Sstevel@tonic-gate * pages to appear. 19057c478bd9Sstevel@tonic-gate * 19067c478bd9Sstevel@tonic-gate * Short of adding a new field to the pcf bins to accurately track 19077c478bd9Sstevel@tonic-gate * the number of free noreloc pages, we instead do not grab the 19087c478bd9Sstevel@tonic-gate * pcgs_lock, do not set the pcf blocks and do not timeout when 19097c478bd9Sstevel@tonic-gate * allocating a noreloc page. This allows noreloc allocations to 19107c478bd9Sstevel@tonic-gate * loop without blocking global page pool allocations. 19117c478bd9Sstevel@tonic-gate * 19127c478bd9Sstevel@tonic-gate * NOTE: the behaviour of page_create_get_something has not changed 19137c478bd9Sstevel@tonic-gate * for the case of global page pool allocations. 19147c478bd9Sstevel@tonic-gate */ 19157c478bd9Sstevel@tonic-gate 19167c478bd9Sstevel@tonic-gate flags &= ~PG_MATCH_COLOR; 19177c478bd9Sstevel@tonic-gate locked = 0; 19187c478bd9Sstevel@tonic-gate #ifndef __sparc 19197c478bd9Sstevel@tonic-gate /* 19207c478bd9Sstevel@tonic-gate * page_create_get_something may be called because 4g memory may be 19217c478bd9Sstevel@tonic-gate * depleted. Set flags to allow for relocation of base page below 19227c478bd9Sstevel@tonic-gate * 4g if necessary. 19237c478bd9Sstevel@tonic-gate */ 19247c478bd9Sstevel@tonic-gate if (physmax4g) 19257c478bd9Sstevel@tonic-gate flags |= (PGI_PGCPSZC0 | PGI_PGCPHIPRI); 19267c478bd9Sstevel@tonic-gate #endif 19277c478bd9Sstevel@tonic-gate 19287c478bd9Sstevel@tonic-gate lgrp = lgrp_mem_choose(seg, vaddr, PAGESIZE); 19297c478bd9Sstevel@tonic-gate 19307c478bd9Sstevel@tonic-gate for (count = 0; kcage_on || count < MAX_PCGS; count++) { 19317c478bd9Sstevel@tonic-gate pp = page_get_freelist(vp, off, seg, vaddr, PAGESIZE, 19327c478bd9Sstevel@tonic-gate flags, lgrp); 19337c478bd9Sstevel@tonic-gate if (pp == NULL) { 19347c478bd9Sstevel@tonic-gate pp = page_get_cachelist(vp, off, seg, vaddr, 19357c478bd9Sstevel@tonic-gate flags, lgrp); 19367c478bd9Sstevel@tonic-gate } 19377c478bd9Sstevel@tonic-gate if (pp == NULL) { 19387c478bd9Sstevel@tonic-gate /* 19397c478bd9Sstevel@tonic-gate * Serialize. Don't fight with other pcgs(). 19407c478bd9Sstevel@tonic-gate */ 19417c478bd9Sstevel@tonic-gate if (!locked && (!kcage_on || !(flags & PG_NORELOC))) { 19427c478bd9Sstevel@tonic-gate mutex_enter(&pcgs_lock); 19437c478bd9Sstevel@tonic-gate VM_STAT_ADD(pcgs_locked); 19447c478bd9Sstevel@tonic-gate locked = 1; 19457c478bd9Sstevel@tonic-gate p = pcf; 19467c478bd9Sstevel@tonic-gate for (i = 0; i < PCF_FANOUT; i++) { 19477c478bd9Sstevel@tonic-gate mutex_enter(&p->pcf_lock); 19487c478bd9Sstevel@tonic-gate ASSERT(p->pcf_block == 0); 19497c478bd9Sstevel@tonic-gate p->pcf_block = 1; 19507c478bd9Sstevel@tonic-gate p->pcf_reserve = p->pcf_count; 19517c478bd9Sstevel@tonic-gate p->pcf_count = 0; 19527c478bd9Sstevel@tonic-gate mutex_exit(&p->pcf_lock); 19537c478bd9Sstevel@tonic-gate p++; 19547c478bd9Sstevel@tonic-gate } 19557c478bd9Sstevel@tonic-gate freemem = 0; 19567c478bd9Sstevel@tonic-gate } 19577c478bd9Sstevel@tonic-gate 19587c478bd9Sstevel@tonic-gate if (count) { 19597c478bd9Sstevel@tonic-gate /* 19607c478bd9Sstevel@tonic-gate * Since page_free() puts pages on 19617c478bd9Sstevel@tonic-gate * a list then accounts for it, we 19627c478bd9Sstevel@tonic-gate * just have to wait for page_free() 19637c478bd9Sstevel@tonic-gate * to unlock any page it was working 19647c478bd9Sstevel@tonic-gate * with. The page_lock()-page_reclaim() 19657c478bd9Sstevel@tonic-gate * path falls in the same boat. 19667c478bd9Sstevel@tonic-gate * 19677c478bd9Sstevel@tonic-gate * We don't need to check on the 19687c478bd9Sstevel@tonic-gate * PG_WAIT flag, we have already 19697c478bd9Sstevel@tonic-gate * accounted for the page we are 19707c478bd9Sstevel@tonic-gate * looking for in page_create_va(). 19717c478bd9Sstevel@tonic-gate * 19727c478bd9Sstevel@tonic-gate * We just wait a moment to let any 19737c478bd9Sstevel@tonic-gate * locked pages on the lists free up, 19747c478bd9Sstevel@tonic-gate * then continue around and try again. 19757c478bd9Sstevel@tonic-gate * 19767c478bd9Sstevel@tonic-gate * Will be awakened by set_freemem(). 19777c478bd9Sstevel@tonic-gate */ 19787c478bd9Sstevel@tonic-gate mutex_enter(&pcgs_wait_lock); 19797c478bd9Sstevel@tonic-gate cv_wait(&pcgs_cv, &pcgs_wait_lock); 19807c478bd9Sstevel@tonic-gate mutex_exit(&pcgs_wait_lock); 19817c478bd9Sstevel@tonic-gate } 19827c478bd9Sstevel@tonic-gate } else { 19837c478bd9Sstevel@tonic-gate #ifdef VM_STATS 19847c478bd9Sstevel@tonic-gate if (count >= PCGS_TRIES) { 19857c478bd9Sstevel@tonic-gate VM_STAT_ADD(pcgs_too_many); 19867c478bd9Sstevel@tonic-gate } else { 19877c478bd9Sstevel@tonic-gate VM_STAT_ADD(pcgs_counts[count]); 19887c478bd9Sstevel@tonic-gate } 19897c478bd9Sstevel@tonic-gate #endif 19907c478bd9Sstevel@tonic-gate if (locked) { 19917c478bd9Sstevel@tonic-gate pcgs_unblock(); 19927c478bd9Sstevel@tonic-gate mutex_exit(&pcgs_lock); 19937c478bd9Sstevel@tonic-gate } 19947c478bd9Sstevel@tonic-gate if (cagelocked) 19957c478bd9Sstevel@tonic-gate mutex_exit(&pcgs_cagelock); 19967c478bd9Sstevel@tonic-gate return (pp); 19977c478bd9Sstevel@tonic-gate } 19987c478bd9Sstevel@tonic-gate } 19997c478bd9Sstevel@tonic-gate /* 20007c478bd9Sstevel@tonic-gate * we go down holding the pcf locks. 20017c478bd9Sstevel@tonic-gate */ 20027c478bd9Sstevel@tonic-gate panic("no %spage found %d", 20037c478bd9Sstevel@tonic-gate ((flags & PG_NORELOC) ? "non-reloc " : ""), count); 20047c478bd9Sstevel@tonic-gate /*NOTREACHED*/ 20057c478bd9Sstevel@tonic-gate } 20067c478bd9Sstevel@tonic-gate 20077c478bd9Sstevel@tonic-gate /* 20087c478bd9Sstevel@tonic-gate * Create enough pages for "bytes" worth of data starting at 20097c478bd9Sstevel@tonic-gate * "off" in "vp". 20107c478bd9Sstevel@tonic-gate * 20117c478bd9Sstevel@tonic-gate * Where flag must be one of: 20127c478bd9Sstevel@tonic-gate * 20137c478bd9Sstevel@tonic-gate * PG_EXCL: Exclusive create (fail if any page already 20147c478bd9Sstevel@tonic-gate * exists in the page cache) which does not 20157c478bd9Sstevel@tonic-gate * wait for memory to become available. 20167c478bd9Sstevel@tonic-gate * 20177c478bd9Sstevel@tonic-gate * PG_WAIT: Non-exclusive create which can wait for 20187c478bd9Sstevel@tonic-gate * memory to become available. 20197c478bd9Sstevel@tonic-gate * 20207c478bd9Sstevel@tonic-gate * PG_PHYSCONTIG: Allocate physically contiguous pages. 20217c478bd9Sstevel@tonic-gate * (Not Supported) 20227c478bd9Sstevel@tonic-gate * 20237c478bd9Sstevel@tonic-gate * A doubly linked list of pages is returned to the caller. Each page 20247c478bd9Sstevel@tonic-gate * on the list has the "exclusive" (p_selock) lock and "iolock" (p_iolock) 20257c478bd9Sstevel@tonic-gate * lock. 20267c478bd9Sstevel@tonic-gate * 20277c478bd9Sstevel@tonic-gate * Unable to change the parameters to page_create() in a minor release, 20287c478bd9Sstevel@tonic-gate * we renamed page_create() to page_create_va(), changed all known calls 20297c478bd9Sstevel@tonic-gate * from page_create() to page_create_va(), and created this wrapper. 20307c478bd9Sstevel@tonic-gate * 20317c478bd9Sstevel@tonic-gate * Upon a major release, we should break compatibility by deleting this 20327c478bd9Sstevel@tonic-gate * wrapper, and replacing all the strings "page_create_va", with "page_create". 20337c478bd9Sstevel@tonic-gate * 20347c478bd9Sstevel@tonic-gate * NOTE: There is a copy of this interface as page_create_io() in 20357c478bd9Sstevel@tonic-gate * i86/vm/vm_machdep.c. Any bugs fixed here should be applied 20367c478bd9Sstevel@tonic-gate * there. 20377c478bd9Sstevel@tonic-gate */ 20387c478bd9Sstevel@tonic-gate page_t * 20397c478bd9Sstevel@tonic-gate page_create(vnode_t *vp, u_offset_t off, size_t bytes, uint_t flags) 20407c478bd9Sstevel@tonic-gate { 20417c478bd9Sstevel@tonic-gate caddr_t random_vaddr; 20427c478bd9Sstevel@tonic-gate struct seg kseg; 20437c478bd9Sstevel@tonic-gate 20447c478bd9Sstevel@tonic-gate #ifdef DEBUG 20457c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, "Using deprecated interface page_create: caller %p", 20467c478bd9Sstevel@tonic-gate (void *)caller()); 20477c478bd9Sstevel@tonic-gate #endif 20487c478bd9Sstevel@tonic-gate 20497c478bd9Sstevel@tonic-gate random_vaddr = (caddr_t)(((uintptr_t)vp >> 7) ^ 20507c478bd9Sstevel@tonic-gate (uintptr_t)(off >> PAGESHIFT)); 20517c478bd9Sstevel@tonic-gate kseg.s_as = &kas; 20527c478bd9Sstevel@tonic-gate 20537c478bd9Sstevel@tonic-gate return (page_create_va(vp, off, bytes, flags, &kseg, random_vaddr)); 20547c478bd9Sstevel@tonic-gate } 20557c478bd9Sstevel@tonic-gate 20567c478bd9Sstevel@tonic-gate #ifdef DEBUG 20577c478bd9Sstevel@tonic-gate uint32_t pg_alloc_pgs_mtbf = 0; 20587c478bd9Sstevel@tonic-gate #endif 20597c478bd9Sstevel@tonic-gate 20607c478bd9Sstevel@tonic-gate /* 20617c478bd9Sstevel@tonic-gate * Used for large page support. It will attempt to allocate 20627c478bd9Sstevel@tonic-gate * a large page(s) off the freelist. 20637c478bd9Sstevel@tonic-gate * 20647c478bd9Sstevel@tonic-gate * Returns non zero on failure. 20657c478bd9Sstevel@tonic-gate */ 20667c478bd9Sstevel@tonic-gate int 20677c478bd9Sstevel@tonic-gate page_alloc_pages(struct seg *seg, caddr_t addr, page_t **basepp, 20687c478bd9Sstevel@tonic-gate page_t *ppa[], uint_t szc, int anypgsz) 20697c478bd9Sstevel@tonic-gate { 20707c478bd9Sstevel@tonic-gate pgcnt_t npgs, curnpgs, totpgs; 20717c478bd9Sstevel@tonic-gate size_t pgsz; 20727c478bd9Sstevel@tonic-gate page_t *pplist = NULL, *pp; 20737c478bd9Sstevel@tonic-gate int err = 0; 20747c478bd9Sstevel@tonic-gate lgrp_t *lgrp; 20757c478bd9Sstevel@tonic-gate 20767c478bd9Sstevel@tonic-gate ASSERT(szc != 0 && szc <= (page_num_pagesizes() - 1)); 20777c478bd9Sstevel@tonic-gate 20787c478bd9Sstevel@tonic-gate VM_STAT_ADD(alloc_pages[0]); 20797c478bd9Sstevel@tonic-gate 20807c478bd9Sstevel@tonic-gate #ifdef DEBUG 20817c478bd9Sstevel@tonic-gate if (pg_alloc_pgs_mtbf && !(gethrtime() % pg_alloc_pgs_mtbf)) { 20827c478bd9Sstevel@tonic-gate return (ENOMEM); 20837c478bd9Sstevel@tonic-gate } 20847c478bd9Sstevel@tonic-gate #endif 20857c478bd9Sstevel@tonic-gate 20867c478bd9Sstevel@tonic-gate pgsz = page_get_pagesize(szc); 20877c478bd9Sstevel@tonic-gate totpgs = curnpgs = npgs = pgsz >> PAGESHIFT; 20887c478bd9Sstevel@tonic-gate 20897c478bd9Sstevel@tonic-gate ASSERT(((uintptr_t)addr & (pgsz - 1)) == 0); 20907c478bd9Sstevel@tonic-gate /* 20917c478bd9Sstevel@tonic-gate * One must be NULL but not both. 20927c478bd9Sstevel@tonic-gate * And one must be non NULL but not both. 20937c478bd9Sstevel@tonic-gate */ 20947c478bd9Sstevel@tonic-gate ASSERT(basepp != NULL || ppa != NULL); 20957c478bd9Sstevel@tonic-gate ASSERT(basepp == NULL || ppa == NULL); 20967c478bd9Sstevel@tonic-gate 20977c478bd9Sstevel@tonic-gate (void) page_create_wait(npgs, PG_WAIT); 20987c478bd9Sstevel@tonic-gate 20997c478bd9Sstevel@tonic-gate while (npgs && szc) { 21007c478bd9Sstevel@tonic-gate lgrp = lgrp_mem_choose(seg, addr, pgsz); 21017c478bd9Sstevel@tonic-gate pp = page_get_freelist(NULL, 0, seg, addr, pgsz, 0, lgrp); 21027c478bd9Sstevel@tonic-gate if (pp != NULL) { 21037c478bd9Sstevel@tonic-gate VM_STAT_ADD(alloc_pages[1]); 21047c478bd9Sstevel@tonic-gate page_list_concat(&pplist, &pp); 21057c478bd9Sstevel@tonic-gate ASSERT(npgs >= curnpgs); 21067c478bd9Sstevel@tonic-gate npgs -= curnpgs; 21077c478bd9Sstevel@tonic-gate } else if (anypgsz) { 21087c478bd9Sstevel@tonic-gate VM_STAT_ADD(alloc_pages[2]); 21097c478bd9Sstevel@tonic-gate szc--; 21107c478bd9Sstevel@tonic-gate pgsz = page_get_pagesize(szc); 21117c478bd9Sstevel@tonic-gate curnpgs = pgsz >> PAGESHIFT; 21127c478bd9Sstevel@tonic-gate } else { 21137c478bd9Sstevel@tonic-gate VM_STAT_ADD(alloc_pages[3]); 21147c478bd9Sstevel@tonic-gate ASSERT(npgs == totpgs); 21157c478bd9Sstevel@tonic-gate page_create_putback(npgs); 21167c478bd9Sstevel@tonic-gate return (ENOMEM); 21177c478bd9Sstevel@tonic-gate } 21187c478bd9Sstevel@tonic-gate } 21197c478bd9Sstevel@tonic-gate if (szc == 0) { 21207c478bd9Sstevel@tonic-gate VM_STAT_ADD(alloc_pages[4]); 21217c478bd9Sstevel@tonic-gate ASSERT(npgs != 0); 21227c478bd9Sstevel@tonic-gate page_create_putback(npgs); 21237c478bd9Sstevel@tonic-gate err = ENOMEM; 21247c478bd9Sstevel@tonic-gate } else if (basepp != NULL) { 21257c478bd9Sstevel@tonic-gate ASSERT(npgs == 0); 21267c478bd9Sstevel@tonic-gate ASSERT(ppa == NULL); 21277c478bd9Sstevel@tonic-gate *basepp = pplist; 21287c478bd9Sstevel@tonic-gate } 21297c478bd9Sstevel@tonic-gate 21307c478bd9Sstevel@tonic-gate npgs = totpgs - npgs; 21317c478bd9Sstevel@tonic-gate pp = pplist; 21327c478bd9Sstevel@tonic-gate 21337c478bd9Sstevel@tonic-gate /* 21347c478bd9Sstevel@tonic-gate * Clear the free and age bits. Also if we were passed in a ppa then 21357c478bd9Sstevel@tonic-gate * fill it in with all the constituent pages from the large page. But 21367c478bd9Sstevel@tonic-gate * if we failed to allocate all the pages just free what we got. 21377c478bd9Sstevel@tonic-gate */ 21387c478bd9Sstevel@tonic-gate while (npgs != 0) { 21397c478bd9Sstevel@tonic-gate ASSERT(PP_ISFREE(pp)); 21407c478bd9Sstevel@tonic-gate ASSERT(PP_ISAGED(pp)); 21417c478bd9Sstevel@tonic-gate if (ppa != NULL || err != 0) { 21427c478bd9Sstevel@tonic-gate if (err == 0) { 21437c478bd9Sstevel@tonic-gate VM_STAT_ADD(alloc_pages[5]); 21447c478bd9Sstevel@tonic-gate PP_CLRFREE(pp); 21457c478bd9Sstevel@tonic-gate PP_CLRAGED(pp); 21467c478bd9Sstevel@tonic-gate page_sub(&pplist, pp); 21477c478bd9Sstevel@tonic-gate *ppa++ = pp; 21487c478bd9Sstevel@tonic-gate npgs--; 21497c478bd9Sstevel@tonic-gate } else { 21507c478bd9Sstevel@tonic-gate VM_STAT_ADD(alloc_pages[6]); 21517c478bd9Sstevel@tonic-gate ASSERT(pp->p_szc != 0); 21527c478bd9Sstevel@tonic-gate curnpgs = page_get_pagecnt(pp->p_szc); 21537c478bd9Sstevel@tonic-gate page_list_break(&pp, &pplist, curnpgs); 21547c478bd9Sstevel@tonic-gate page_list_add_pages(pp, 0); 21557c478bd9Sstevel@tonic-gate page_create_putback(curnpgs); 21567c478bd9Sstevel@tonic-gate ASSERT(npgs >= curnpgs); 21577c478bd9Sstevel@tonic-gate npgs -= curnpgs; 21587c478bd9Sstevel@tonic-gate } 21597c478bd9Sstevel@tonic-gate pp = pplist; 21607c478bd9Sstevel@tonic-gate } else { 21617c478bd9Sstevel@tonic-gate VM_STAT_ADD(alloc_pages[7]); 21627c478bd9Sstevel@tonic-gate PP_CLRFREE(pp); 21637c478bd9Sstevel@tonic-gate PP_CLRAGED(pp); 21647c478bd9Sstevel@tonic-gate pp = pp->p_next; 21657c478bd9Sstevel@tonic-gate npgs--; 21667c478bd9Sstevel@tonic-gate } 21677c478bd9Sstevel@tonic-gate } 21687c478bd9Sstevel@tonic-gate return (err); 21697c478bd9Sstevel@tonic-gate } 21707c478bd9Sstevel@tonic-gate 21717c478bd9Sstevel@tonic-gate /* 21727c478bd9Sstevel@tonic-gate * Get a single large page off of the freelists, and set it up for use. 21737c478bd9Sstevel@tonic-gate * Number of bytes requested must be a supported page size. 21747c478bd9Sstevel@tonic-gate * 21757c478bd9Sstevel@tonic-gate * Note that this call may fail even if there is sufficient 21767c478bd9Sstevel@tonic-gate * memory available or PG_WAIT is set, so the caller must 21777c478bd9Sstevel@tonic-gate * be willing to fallback on page_create_va(), block and retry, 21787c478bd9Sstevel@tonic-gate * or fail the requester. 21797c478bd9Sstevel@tonic-gate */ 21807c478bd9Sstevel@tonic-gate page_t * 21817c478bd9Sstevel@tonic-gate page_create_va_large(vnode_t *vp, u_offset_t off, size_t bytes, uint_t flags, 21827c478bd9Sstevel@tonic-gate struct seg *seg, caddr_t vaddr, void *arg) 21837c478bd9Sstevel@tonic-gate { 21847c478bd9Sstevel@tonic-gate pgcnt_t npages, pcftotal; 21857c478bd9Sstevel@tonic-gate page_t *pp; 21867c478bd9Sstevel@tonic-gate page_t *rootpp; 21877c478bd9Sstevel@tonic-gate lgrp_t *lgrp; 21887c478bd9Sstevel@tonic-gate uint_t enough; 21897c478bd9Sstevel@tonic-gate uint_t pcf_index; 21907c478bd9Sstevel@tonic-gate uint_t i; 21917c478bd9Sstevel@tonic-gate struct pcf *p; 21927c478bd9Sstevel@tonic-gate struct pcf *q; 21937c478bd9Sstevel@tonic-gate lgrp_id_t *lgrpid = (lgrp_id_t *)arg; 21947c478bd9Sstevel@tonic-gate 21957c478bd9Sstevel@tonic-gate ASSERT(vp != NULL); 21967c478bd9Sstevel@tonic-gate 21977c478bd9Sstevel@tonic-gate ASSERT((flags & ~(PG_EXCL | PG_WAIT | 21987c478bd9Sstevel@tonic-gate PG_NORELOC | PG_PANIC | PG_PUSHPAGE)) == 0); 21997c478bd9Sstevel@tonic-gate /* but no others */ 22007c478bd9Sstevel@tonic-gate 22017c478bd9Sstevel@tonic-gate ASSERT((flags & PG_EXCL) == PG_EXCL); 22027c478bd9Sstevel@tonic-gate 22037c478bd9Sstevel@tonic-gate npages = btop(bytes); 22047c478bd9Sstevel@tonic-gate 22057c478bd9Sstevel@tonic-gate if (!kcage_on || panicstr) { 22067c478bd9Sstevel@tonic-gate /* 22077c478bd9Sstevel@tonic-gate * Cage is OFF, or we are single threaded in 22087c478bd9Sstevel@tonic-gate * panic, so make everything a RELOC request. 22097c478bd9Sstevel@tonic-gate */ 22107c478bd9Sstevel@tonic-gate flags &= ~PG_NORELOC; 22117c478bd9Sstevel@tonic-gate } 22127c478bd9Sstevel@tonic-gate 22137c478bd9Sstevel@tonic-gate /* 22147c478bd9Sstevel@tonic-gate * Make sure there's adequate physical memory available. 22157c478bd9Sstevel@tonic-gate * Note: PG_WAIT is ignored here. 22167c478bd9Sstevel@tonic-gate */ 22177c478bd9Sstevel@tonic-gate if (freemem <= throttlefree + npages) { 22187c478bd9Sstevel@tonic-gate VM_STAT_ADD(page_create_large_cnt[1]); 22197c478bd9Sstevel@tonic-gate return (NULL); 22207c478bd9Sstevel@tonic-gate } 22217c478bd9Sstevel@tonic-gate 22227c478bd9Sstevel@tonic-gate /* 22237c478bd9Sstevel@tonic-gate * If cage is on, dampen draw from cage when available 22247c478bd9Sstevel@tonic-gate * cage space is low. 22257c478bd9Sstevel@tonic-gate */ 22267c478bd9Sstevel@tonic-gate if ((flags & (PG_NORELOC | PG_WAIT)) == (PG_NORELOC | PG_WAIT) && 22277c478bd9Sstevel@tonic-gate kcage_freemem < kcage_throttlefree + npages) { 22287c478bd9Sstevel@tonic-gate 22297c478bd9Sstevel@tonic-gate /* 22307c478bd9Sstevel@tonic-gate * The cage is on, the caller wants PG_NORELOC 22317c478bd9Sstevel@tonic-gate * pages and available cage memory is very low. 22327c478bd9Sstevel@tonic-gate * Call kcage_create_throttle() to attempt to 22337c478bd9Sstevel@tonic-gate * control demand on the cage. 22347c478bd9Sstevel@tonic-gate */ 22357c478bd9Sstevel@tonic-gate if (kcage_create_throttle(npages, flags) == KCT_FAILURE) { 22367c478bd9Sstevel@tonic-gate VM_STAT_ADD(page_create_large_cnt[2]); 22377c478bd9Sstevel@tonic-gate return (NULL); 22387c478bd9Sstevel@tonic-gate } 22397c478bd9Sstevel@tonic-gate } 22407c478bd9Sstevel@tonic-gate 22417c478bd9Sstevel@tonic-gate enough = 0; 22427c478bd9Sstevel@tonic-gate pcf_index = PCF_INDEX(); 22437c478bd9Sstevel@tonic-gate p = &pcf[pcf_index]; 22447c478bd9Sstevel@tonic-gate p->pcf_touch = 1; 22457c478bd9Sstevel@tonic-gate q = &pcf[PCF_FANOUT]; 22467c478bd9Sstevel@tonic-gate for (pcftotal = 0, i = 0; i < PCF_FANOUT; i++) { 22477c478bd9Sstevel@tonic-gate if (p->pcf_count > npages) { 22487c478bd9Sstevel@tonic-gate /* 22497c478bd9Sstevel@tonic-gate * a good one to try. 22507c478bd9Sstevel@tonic-gate */ 22517c478bd9Sstevel@tonic-gate mutex_enter(&p->pcf_lock); 22527c478bd9Sstevel@tonic-gate if (p->pcf_count > npages) { 22537c478bd9Sstevel@tonic-gate p->pcf_count -= (uint_t)npages; 22547c478bd9Sstevel@tonic-gate /* 22557c478bd9Sstevel@tonic-gate * freemem is not protected by any lock. 22567c478bd9Sstevel@tonic-gate * Thus, we cannot have any assertion 22577c478bd9Sstevel@tonic-gate * containing freemem here. 22587c478bd9Sstevel@tonic-gate */ 22597c478bd9Sstevel@tonic-gate freemem -= npages; 22607c478bd9Sstevel@tonic-gate enough = 1; 22617c478bd9Sstevel@tonic-gate mutex_exit(&p->pcf_lock); 22627c478bd9Sstevel@tonic-gate break; 22637c478bd9Sstevel@tonic-gate } 22647c478bd9Sstevel@tonic-gate mutex_exit(&p->pcf_lock); 22657c478bd9Sstevel@tonic-gate } 22667c478bd9Sstevel@tonic-gate pcftotal += p->pcf_count; 22677c478bd9Sstevel@tonic-gate p++; 22687c478bd9Sstevel@tonic-gate if (p >= q) { 22697c478bd9Sstevel@tonic-gate p = pcf; 22707c478bd9Sstevel@tonic-gate } 22717c478bd9Sstevel@tonic-gate p->pcf_touch = 1; 22727c478bd9Sstevel@tonic-gate } 22737c478bd9Sstevel@tonic-gate 22747c478bd9Sstevel@tonic-gate if (!enough) { 22757c478bd9Sstevel@tonic-gate /* If there isn't enough memory available, give up. */ 22767c478bd9Sstevel@tonic-gate if (pcftotal < npages) { 22777c478bd9Sstevel@tonic-gate VM_STAT_ADD(page_create_large_cnt[3]); 22787c478bd9Sstevel@tonic-gate return (NULL); 22797c478bd9Sstevel@tonic-gate } 22807c478bd9Sstevel@tonic-gate 22817c478bd9Sstevel@tonic-gate /* try to collect pages from several pcf bins */ 22827c478bd9Sstevel@tonic-gate for (p = pcf, pcftotal = 0, i = 0; i < PCF_FANOUT; i++) { 22837c478bd9Sstevel@tonic-gate p->pcf_touch = 1; 22847c478bd9Sstevel@tonic-gate mutex_enter(&p->pcf_lock); 22857c478bd9Sstevel@tonic-gate pcftotal += p->pcf_count; 22867c478bd9Sstevel@tonic-gate if (pcftotal >= npages) { 22877c478bd9Sstevel@tonic-gate /* 22887c478bd9Sstevel@tonic-gate * Wow! There are enough pages laying around 22897c478bd9Sstevel@tonic-gate * to satisfy the request. Do the accounting, 22907c478bd9Sstevel@tonic-gate * drop the locks we acquired, and go back. 22917c478bd9Sstevel@tonic-gate * 22927c478bd9Sstevel@tonic-gate * freemem is not protected by any lock. So, 22937c478bd9Sstevel@tonic-gate * we cannot have any assertion containing 22947c478bd9Sstevel@tonic-gate * freemem. 22957c478bd9Sstevel@tonic-gate */ 22967c478bd9Sstevel@tonic-gate pgcnt_t tpages = npages; 22977c478bd9Sstevel@tonic-gate freemem -= npages; 22987c478bd9Sstevel@tonic-gate while (p >= pcf) { 22997c478bd9Sstevel@tonic-gate if (p->pcf_count <= tpages) { 23007c478bd9Sstevel@tonic-gate tpages -= p->pcf_count; 23017c478bd9Sstevel@tonic-gate p->pcf_count = 0; 23027c478bd9Sstevel@tonic-gate } else { 23037c478bd9Sstevel@tonic-gate p->pcf_count -= (uint_t)tpages; 23047c478bd9Sstevel@tonic-gate tpages = 0; 23057c478bd9Sstevel@tonic-gate } 23067c478bd9Sstevel@tonic-gate mutex_exit(&p->pcf_lock); 23077c478bd9Sstevel@tonic-gate p--; 23087c478bd9Sstevel@tonic-gate } 23097c478bd9Sstevel@tonic-gate ASSERT(tpages == 0); 23107c478bd9Sstevel@tonic-gate break; 23117c478bd9Sstevel@tonic-gate } 23127c478bd9Sstevel@tonic-gate p++; 23137c478bd9Sstevel@tonic-gate } 23147c478bd9Sstevel@tonic-gate if (i == PCF_FANOUT) { 23157c478bd9Sstevel@tonic-gate /* failed to collect pages - release the locks */ 23167c478bd9Sstevel@tonic-gate while (--p >= pcf) { 23177c478bd9Sstevel@tonic-gate mutex_exit(&p->pcf_lock); 23187c478bd9Sstevel@tonic-gate } 23197c478bd9Sstevel@tonic-gate VM_STAT_ADD(page_create_large_cnt[4]); 23207c478bd9Sstevel@tonic-gate return (NULL); 23217c478bd9Sstevel@tonic-gate } 23227c478bd9Sstevel@tonic-gate } 23237c478bd9Sstevel@tonic-gate 23247c478bd9Sstevel@tonic-gate /* 23257c478bd9Sstevel@tonic-gate * This is where this function behaves fundamentally differently 23267c478bd9Sstevel@tonic-gate * than page_create_va(); since we're intending to map the page 23277c478bd9Sstevel@tonic-gate * with a single TTE, we have to get it as a physically contiguous 23287c478bd9Sstevel@tonic-gate * hardware pagesize chunk. If we can't, we fail. 23297c478bd9Sstevel@tonic-gate */ 23307c478bd9Sstevel@tonic-gate if (lgrpid != NULL && *lgrpid >= 0 && *lgrpid <= lgrp_alloc_max && 23317c478bd9Sstevel@tonic-gate LGRP_EXISTS(lgrp_table[*lgrpid])) 23327c478bd9Sstevel@tonic-gate lgrp = lgrp_table[*lgrpid]; 23337c478bd9Sstevel@tonic-gate else 23347c478bd9Sstevel@tonic-gate lgrp = lgrp_mem_choose(seg, vaddr, bytes); 23357c478bd9Sstevel@tonic-gate 23367c478bd9Sstevel@tonic-gate if ((rootpp = page_get_freelist(&kvp, off, seg, vaddr, 23377c478bd9Sstevel@tonic-gate bytes, flags & ~PG_MATCH_COLOR, lgrp)) == NULL) { 23387c478bd9Sstevel@tonic-gate page_create_putback(npages); 23397c478bd9Sstevel@tonic-gate VM_STAT_ADD(page_create_large_cnt[5]); 23407c478bd9Sstevel@tonic-gate return (NULL); 23417c478bd9Sstevel@tonic-gate } 23427c478bd9Sstevel@tonic-gate 23437c478bd9Sstevel@tonic-gate /* 23447c478bd9Sstevel@tonic-gate * if we got the page with the wrong mtype give it back this is a 23457c478bd9Sstevel@tonic-gate * workaround for CR 6249718. When CR 6249718 is fixed we never get 23467c478bd9Sstevel@tonic-gate * inside "if" and the workaround becomes just a nop 23477c478bd9Sstevel@tonic-gate */ 23487c478bd9Sstevel@tonic-gate if (kcage_on && (flags & PG_NORELOC) && !PP_ISNORELOC(rootpp)) { 23497c478bd9Sstevel@tonic-gate page_list_add_pages(rootpp, 0); 23507c478bd9Sstevel@tonic-gate page_create_putback(npages); 23517c478bd9Sstevel@tonic-gate VM_STAT_ADD(page_create_large_cnt[6]); 23527c478bd9Sstevel@tonic-gate return (NULL); 23537c478bd9Sstevel@tonic-gate } 23547c478bd9Sstevel@tonic-gate 23557c478bd9Sstevel@tonic-gate /* 23567c478bd9Sstevel@tonic-gate * If satisfying this request has left us with too little 23577c478bd9Sstevel@tonic-gate * memory, start the wheels turning to get some back. The 23587c478bd9Sstevel@tonic-gate * first clause of the test prevents waking up the pageout 23597c478bd9Sstevel@tonic-gate * daemon in situations where it would decide that there's 23607c478bd9Sstevel@tonic-gate * nothing to do. 23617c478bd9Sstevel@tonic-gate */ 23627c478bd9Sstevel@tonic-gate if (nscan < desscan && freemem < minfree) { 23637c478bd9Sstevel@tonic-gate TRACE_1(TR_FAC_VM, TR_PAGEOUT_CV_SIGNAL, 23647c478bd9Sstevel@tonic-gate "pageout_cv_signal:freemem %ld", freemem); 23657c478bd9Sstevel@tonic-gate cv_signal(&proc_pageout->p_cv); 23667c478bd9Sstevel@tonic-gate } 23677c478bd9Sstevel@tonic-gate 23687c478bd9Sstevel@tonic-gate pp = rootpp; 23697c478bd9Sstevel@tonic-gate while (npages--) { 23707c478bd9Sstevel@tonic-gate ASSERT(PAGE_EXCL(pp)); 23717c478bd9Sstevel@tonic-gate ASSERT(pp->p_vnode == NULL); 23727c478bd9Sstevel@tonic-gate ASSERT(!hat_page_is_mapped(pp)); 23737c478bd9Sstevel@tonic-gate PP_CLRFREE(pp); 23747c478bd9Sstevel@tonic-gate PP_CLRAGED(pp); 23757c478bd9Sstevel@tonic-gate if (!page_hashin(pp, vp, off, NULL)) 23767c478bd9Sstevel@tonic-gate panic("page_create_large: hashin failed: page %p", 23777c478bd9Sstevel@tonic-gate (void *)pp); 23787c478bd9Sstevel@tonic-gate page_io_lock(pp); 23797c478bd9Sstevel@tonic-gate off += PAGESIZE; 23807c478bd9Sstevel@tonic-gate pp = pp->p_next; 23817c478bd9Sstevel@tonic-gate } 23827c478bd9Sstevel@tonic-gate 23837c478bd9Sstevel@tonic-gate VM_STAT_ADD(page_create_large_cnt[0]); 23847c478bd9Sstevel@tonic-gate return (rootpp); 23857c478bd9Sstevel@tonic-gate } 23867c478bd9Sstevel@tonic-gate 23877c478bd9Sstevel@tonic-gate page_t * 23887c478bd9Sstevel@tonic-gate page_create_va(vnode_t *vp, u_offset_t off, size_t bytes, uint_t flags, 23897c478bd9Sstevel@tonic-gate struct seg *seg, caddr_t vaddr) 23907c478bd9Sstevel@tonic-gate { 23917c478bd9Sstevel@tonic-gate page_t *plist = NULL; 23927c478bd9Sstevel@tonic-gate pgcnt_t npages; 23937c478bd9Sstevel@tonic-gate pgcnt_t found_on_free = 0; 23947c478bd9Sstevel@tonic-gate pgcnt_t pages_req; 23957c478bd9Sstevel@tonic-gate page_t *npp = NULL; 23967c478bd9Sstevel@tonic-gate uint_t enough; 23977c478bd9Sstevel@tonic-gate uint_t i; 23987c478bd9Sstevel@tonic-gate uint_t pcf_index; 23997c478bd9Sstevel@tonic-gate struct pcf *p; 24007c478bd9Sstevel@tonic-gate struct pcf *q; 24017c478bd9Sstevel@tonic-gate lgrp_t *lgrp; 24027c478bd9Sstevel@tonic-gate 24037c478bd9Sstevel@tonic-gate TRACE_4(TR_FAC_VM, TR_PAGE_CREATE_START, 24047c478bd9Sstevel@tonic-gate "page_create_start:vp %p off %llx bytes %lu flags %x", 24057c478bd9Sstevel@tonic-gate vp, off, bytes, flags); 24067c478bd9Sstevel@tonic-gate 24077c478bd9Sstevel@tonic-gate ASSERT(bytes != 0 && vp != NULL); 24087c478bd9Sstevel@tonic-gate 24097c478bd9Sstevel@tonic-gate if ((flags & PG_EXCL) == 0 && (flags & PG_WAIT) == 0) { 24107c478bd9Sstevel@tonic-gate panic("page_create: invalid flags"); 24117c478bd9Sstevel@tonic-gate /*NOTREACHED*/ 24127c478bd9Sstevel@tonic-gate } 24137c478bd9Sstevel@tonic-gate ASSERT((flags & ~(PG_EXCL | PG_WAIT | 24147c478bd9Sstevel@tonic-gate PG_NORELOC | PG_PANIC | PG_PUSHPAGE)) == 0); 24157c478bd9Sstevel@tonic-gate /* but no others */ 24167c478bd9Sstevel@tonic-gate 24177c478bd9Sstevel@tonic-gate pages_req = npages = btopr(bytes); 24187c478bd9Sstevel@tonic-gate /* 24197c478bd9Sstevel@tonic-gate * Try to see whether request is too large to *ever* be 24207c478bd9Sstevel@tonic-gate * satisfied, in order to prevent deadlock. We arbitrarily 24217c478bd9Sstevel@tonic-gate * decide to limit maximum size requests to max_page_get. 24227c478bd9Sstevel@tonic-gate */ 24237c478bd9Sstevel@tonic-gate if (npages >= max_page_get) { 24247c478bd9Sstevel@tonic-gate if ((flags & PG_WAIT) == 0) { 24257c478bd9Sstevel@tonic-gate TRACE_4(TR_FAC_VM, TR_PAGE_CREATE_TOOBIG, 24267c478bd9Sstevel@tonic-gate "page_create_toobig:vp %p off %llx npages " 24277c478bd9Sstevel@tonic-gate "%lu max_page_get %lu", 24287c478bd9Sstevel@tonic-gate vp, off, npages, max_page_get); 24297c478bd9Sstevel@tonic-gate return (NULL); 24307c478bd9Sstevel@tonic-gate } else { 24317c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, 24327c478bd9Sstevel@tonic-gate "Request for too much kernel memory " 24337c478bd9Sstevel@tonic-gate "(%lu bytes), will hang forever", bytes); 24347c478bd9Sstevel@tonic-gate for (;;) 24357c478bd9Sstevel@tonic-gate delay(1000000000); 24367c478bd9Sstevel@tonic-gate } 24377c478bd9Sstevel@tonic-gate } 24387c478bd9Sstevel@tonic-gate 24397c478bd9Sstevel@tonic-gate if (!kcage_on || panicstr) { 24407c478bd9Sstevel@tonic-gate /* 24417c478bd9Sstevel@tonic-gate * Cage is OFF, or we are single threaded in 24427c478bd9Sstevel@tonic-gate * panic, so make everything a RELOC request. 24437c478bd9Sstevel@tonic-gate */ 24447c478bd9Sstevel@tonic-gate flags &= ~PG_NORELOC; 24457c478bd9Sstevel@tonic-gate } 24467c478bd9Sstevel@tonic-gate 24477c478bd9Sstevel@tonic-gate if (freemem <= throttlefree + npages) 24487c478bd9Sstevel@tonic-gate if (!page_create_throttle(npages, flags)) 24497c478bd9Sstevel@tonic-gate return (NULL); 24507c478bd9Sstevel@tonic-gate 24517c478bd9Sstevel@tonic-gate /* 24527c478bd9Sstevel@tonic-gate * If cage is on, dampen draw from cage when available 24537c478bd9Sstevel@tonic-gate * cage space is low. 24547c478bd9Sstevel@tonic-gate */ 24557c478bd9Sstevel@tonic-gate if ((flags & PG_NORELOC) && 24567c478bd9Sstevel@tonic-gate kcage_freemem < kcage_throttlefree + npages) { 24577c478bd9Sstevel@tonic-gate 24587c478bd9Sstevel@tonic-gate /* 24597c478bd9Sstevel@tonic-gate * The cage is on, the caller wants PG_NORELOC 24607c478bd9Sstevel@tonic-gate * pages and available cage memory is very low. 24617c478bd9Sstevel@tonic-gate * Call kcage_create_throttle() to attempt to 24627c478bd9Sstevel@tonic-gate * control demand on the cage. 24637c478bd9Sstevel@tonic-gate */ 24647c478bd9Sstevel@tonic-gate if (kcage_create_throttle(npages, flags) == KCT_FAILURE) 24657c478bd9Sstevel@tonic-gate return (NULL); 24667c478bd9Sstevel@tonic-gate } 24677c478bd9Sstevel@tonic-gate 24687c478bd9Sstevel@tonic-gate VM_STAT_ADD(page_create_cnt[0]); 24697c478bd9Sstevel@tonic-gate 24707c478bd9Sstevel@tonic-gate enough = 0; 24717c478bd9Sstevel@tonic-gate pcf_index = PCF_INDEX(); 24727c478bd9Sstevel@tonic-gate 24737c478bd9Sstevel@tonic-gate p = &pcf[pcf_index]; 24747c478bd9Sstevel@tonic-gate p->pcf_touch = 1; 24757c478bd9Sstevel@tonic-gate q = &pcf[PCF_FANOUT]; 24767c478bd9Sstevel@tonic-gate for (i = 0; i < PCF_FANOUT; i++) { 24777c478bd9Sstevel@tonic-gate if (p->pcf_count > npages) { 24787c478bd9Sstevel@tonic-gate /* 24797c478bd9Sstevel@tonic-gate * a good one to try. 24807c478bd9Sstevel@tonic-gate */ 24817c478bd9Sstevel@tonic-gate mutex_enter(&p->pcf_lock); 24827c478bd9Sstevel@tonic-gate if (p->pcf_count > npages) { 24837c478bd9Sstevel@tonic-gate p->pcf_count -= (uint_t)npages; 24847c478bd9Sstevel@tonic-gate /* 24857c478bd9Sstevel@tonic-gate * freemem is not protected by any lock. 24867c478bd9Sstevel@tonic-gate * Thus, we cannot have any assertion 24877c478bd9Sstevel@tonic-gate * containing freemem here. 24887c478bd9Sstevel@tonic-gate */ 24897c478bd9Sstevel@tonic-gate freemem -= npages; 24907c478bd9Sstevel@tonic-gate enough = 1; 24917c478bd9Sstevel@tonic-gate mutex_exit(&p->pcf_lock); 24927c478bd9Sstevel@tonic-gate break; 24937c478bd9Sstevel@tonic-gate } 24947c478bd9Sstevel@tonic-gate mutex_exit(&p->pcf_lock); 24957c478bd9Sstevel@tonic-gate } 24967c478bd9Sstevel@tonic-gate p++; 24977c478bd9Sstevel@tonic-gate if (p >= q) { 24987c478bd9Sstevel@tonic-gate p = pcf; 24997c478bd9Sstevel@tonic-gate } 25007c478bd9Sstevel@tonic-gate p->pcf_touch = 1; 25017c478bd9Sstevel@tonic-gate } 25027c478bd9Sstevel@tonic-gate 25037c478bd9Sstevel@tonic-gate if (!enough) { 25047c478bd9Sstevel@tonic-gate /* 25057c478bd9Sstevel@tonic-gate * Have to look harder. If npages is greater than 25067c478bd9Sstevel@tonic-gate * one, then we might have to coalecse the counters. 25077c478bd9Sstevel@tonic-gate * 25087c478bd9Sstevel@tonic-gate * Go wait. We come back having accounted 25097c478bd9Sstevel@tonic-gate * for the memory. 25107c478bd9Sstevel@tonic-gate */ 25117c478bd9Sstevel@tonic-gate VM_STAT_ADD(page_create_cnt[1]); 25127c478bd9Sstevel@tonic-gate if (!page_create_wait(npages, flags)) { 25137c478bd9Sstevel@tonic-gate VM_STAT_ADD(page_create_cnt[2]); 25147c478bd9Sstevel@tonic-gate return (NULL); 25157c478bd9Sstevel@tonic-gate } 25167c478bd9Sstevel@tonic-gate } 25177c478bd9Sstevel@tonic-gate 25187c478bd9Sstevel@tonic-gate TRACE_2(TR_FAC_VM, TR_PAGE_CREATE_SUCCESS, 25197c478bd9Sstevel@tonic-gate "page_create_success:vp %p off %llx", vp, off); 25207c478bd9Sstevel@tonic-gate 25217c478bd9Sstevel@tonic-gate /* 25227c478bd9Sstevel@tonic-gate * If satisfying this request has left us with too little 25237c478bd9Sstevel@tonic-gate * memory, start the wheels turning to get some back. The 25247c478bd9Sstevel@tonic-gate * first clause of the test prevents waking up the pageout 25257c478bd9Sstevel@tonic-gate * daemon in situations where it would decide that there's 25267c478bd9Sstevel@tonic-gate * nothing to do. 25277c478bd9Sstevel@tonic-gate */ 25287c478bd9Sstevel@tonic-gate if (nscan < desscan && freemem < minfree) { 25297c478bd9Sstevel@tonic-gate TRACE_1(TR_FAC_VM, TR_PAGEOUT_CV_SIGNAL, 25307c478bd9Sstevel@tonic-gate "pageout_cv_signal:freemem %ld", freemem); 25317c478bd9Sstevel@tonic-gate cv_signal(&proc_pageout->p_cv); 25327c478bd9Sstevel@tonic-gate } 25337c478bd9Sstevel@tonic-gate 25347c478bd9Sstevel@tonic-gate /* 25357c478bd9Sstevel@tonic-gate * Loop around collecting the requested number of pages. 25367c478bd9Sstevel@tonic-gate * Most of the time, we have to `create' a new page. With 25377c478bd9Sstevel@tonic-gate * this in mind, pull the page off the free list before 25387c478bd9Sstevel@tonic-gate * getting the hash lock. This will minimize the hash 25397c478bd9Sstevel@tonic-gate * lock hold time, nesting, and the like. If it turns 25407c478bd9Sstevel@tonic-gate * out we don't need the page, we put it back at the end. 25417c478bd9Sstevel@tonic-gate */ 25427c478bd9Sstevel@tonic-gate while (npages--) { 25437c478bd9Sstevel@tonic-gate page_t *pp; 25447c478bd9Sstevel@tonic-gate kmutex_t *phm = NULL; 25457c478bd9Sstevel@tonic-gate ulong_t index; 25467c478bd9Sstevel@tonic-gate 25477c478bd9Sstevel@tonic-gate index = PAGE_HASH_FUNC(vp, off); 25487c478bd9Sstevel@tonic-gate top: 25497c478bd9Sstevel@tonic-gate ASSERT(phm == NULL); 25507c478bd9Sstevel@tonic-gate ASSERT(index == PAGE_HASH_FUNC(vp, off)); 25517c478bd9Sstevel@tonic-gate ASSERT(MUTEX_NOT_HELD(page_vnode_mutex(vp))); 25527c478bd9Sstevel@tonic-gate 25537c478bd9Sstevel@tonic-gate if (npp == NULL) { 25547c478bd9Sstevel@tonic-gate /* 25557c478bd9Sstevel@tonic-gate * Try to get a page from the freelist (ie, 25567c478bd9Sstevel@tonic-gate * a page with no [vp, off] tag). If that 25577c478bd9Sstevel@tonic-gate * fails, use the cachelist. 25587c478bd9Sstevel@tonic-gate * 25597c478bd9Sstevel@tonic-gate * During the first attempt at both the free 25607c478bd9Sstevel@tonic-gate * and cache lists we try for the correct color. 25617c478bd9Sstevel@tonic-gate */ 25627c478bd9Sstevel@tonic-gate /* 25637c478bd9Sstevel@tonic-gate * XXXX-how do we deal with virtual indexed 25647c478bd9Sstevel@tonic-gate * caches and and colors? 25657c478bd9Sstevel@tonic-gate */ 25667c478bd9Sstevel@tonic-gate VM_STAT_ADD(page_create_cnt[4]); 25677c478bd9Sstevel@tonic-gate /* 25687c478bd9Sstevel@tonic-gate * Get lgroup to allocate next page of shared memory 25697c478bd9Sstevel@tonic-gate * from and use it to specify where to allocate 25707c478bd9Sstevel@tonic-gate * the physical memory 25717c478bd9Sstevel@tonic-gate */ 25727c478bd9Sstevel@tonic-gate lgrp = lgrp_mem_choose(seg, vaddr, PAGESIZE); 25737c478bd9Sstevel@tonic-gate npp = page_get_freelist(vp, off, seg, vaddr, PAGESIZE, 25747c478bd9Sstevel@tonic-gate flags | PG_MATCH_COLOR, lgrp); 25757c478bd9Sstevel@tonic-gate if (npp == NULL) { 25767c478bd9Sstevel@tonic-gate npp = page_get_cachelist(vp, off, seg, 25777c478bd9Sstevel@tonic-gate vaddr, flags | PG_MATCH_COLOR, lgrp); 25787c478bd9Sstevel@tonic-gate if (npp == NULL) { 25797c478bd9Sstevel@tonic-gate npp = page_create_get_something(vp, 25807c478bd9Sstevel@tonic-gate off, seg, vaddr, 25817c478bd9Sstevel@tonic-gate flags & ~PG_MATCH_COLOR); 25827c478bd9Sstevel@tonic-gate } 25837c478bd9Sstevel@tonic-gate 25847c478bd9Sstevel@tonic-gate if (PP_ISAGED(npp) == 0) { 25857c478bd9Sstevel@tonic-gate /* 25867c478bd9Sstevel@tonic-gate * Since this page came from the 25877c478bd9Sstevel@tonic-gate * cachelist, we must destroy the 25887c478bd9Sstevel@tonic-gate * old vnode association. 25897c478bd9Sstevel@tonic-gate */ 25907c478bd9Sstevel@tonic-gate page_hashout(npp, NULL); 25917c478bd9Sstevel@tonic-gate } 25927c478bd9Sstevel@tonic-gate } 25937c478bd9Sstevel@tonic-gate } 25947c478bd9Sstevel@tonic-gate 25957c478bd9Sstevel@tonic-gate /* 25967c478bd9Sstevel@tonic-gate * We own this page! 25977c478bd9Sstevel@tonic-gate */ 25987c478bd9Sstevel@tonic-gate ASSERT(PAGE_EXCL(npp)); 25997c478bd9Sstevel@tonic-gate ASSERT(npp->p_vnode == NULL); 26007c478bd9Sstevel@tonic-gate ASSERT(!hat_page_is_mapped(npp)); 26017c478bd9Sstevel@tonic-gate PP_CLRFREE(npp); 26027c478bd9Sstevel@tonic-gate PP_CLRAGED(npp); 26037c478bd9Sstevel@tonic-gate 26047c478bd9Sstevel@tonic-gate /* 26057c478bd9Sstevel@tonic-gate * Here we have a page in our hot little mits and are 26067c478bd9Sstevel@tonic-gate * just waiting to stuff it on the appropriate lists. 26077c478bd9Sstevel@tonic-gate * Get the mutex and check to see if it really does 26087c478bd9Sstevel@tonic-gate * not exist. 26097c478bd9Sstevel@tonic-gate */ 26107c478bd9Sstevel@tonic-gate phm = PAGE_HASH_MUTEX(index); 26117c478bd9Sstevel@tonic-gate mutex_enter(phm); 26127c478bd9Sstevel@tonic-gate PAGE_HASH_SEARCH(index, pp, vp, off); 26137c478bd9Sstevel@tonic-gate if (pp == NULL) { 26147c478bd9Sstevel@tonic-gate VM_STAT_ADD(page_create_new); 26157c478bd9Sstevel@tonic-gate pp = npp; 26167c478bd9Sstevel@tonic-gate npp = NULL; 26177c478bd9Sstevel@tonic-gate if (!page_hashin(pp, vp, off, phm)) { 26187c478bd9Sstevel@tonic-gate /* 26197c478bd9Sstevel@tonic-gate * Since we hold the page hash mutex and 26207c478bd9Sstevel@tonic-gate * just searched for this page, page_hashin 26217c478bd9Sstevel@tonic-gate * had better not fail. If it does, that 26227c478bd9Sstevel@tonic-gate * means somethread did not follow the 26237c478bd9Sstevel@tonic-gate * page hash mutex rules. Panic now and 26247c478bd9Sstevel@tonic-gate * get it over with. As usual, go down 26257c478bd9Sstevel@tonic-gate * holding all the locks. 26267c478bd9Sstevel@tonic-gate */ 26277c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(phm)); 26287c478bd9Sstevel@tonic-gate panic("page_create: " 26297c478bd9Sstevel@tonic-gate "hashin failed %p %p %llx %p", 26307c478bd9Sstevel@tonic-gate (void *)pp, (void *)vp, off, (void *)phm); 26317c478bd9Sstevel@tonic-gate /*NOTREACHED*/ 26327c478bd9Sstevel@tonic-gate } 26337c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(phm)); 26347c478bd9Sstevel@tonic-gate mutex_exit(phm); 26357c478bd9Sstevel@tonic-gate phm = NULL; 26367c478bd9Sstevel@tonic-gate 26377c478bd9Sstevel@tonic-gate /* 26387c478bd9Sstevel@tonic-gate * Hat layer locking need not be done to set 26397c478bd9Sstevel@tonic-gate * the following bits since the page is not hashed 26407c478bd9Sstevel@tonic-gate * and was on the free list (i.e., had no mappings). 26417c478bd9Sstevel@tonic-gate * 26427c478bd9Sstevel@tonic-gate * Set the reference bit to protect 26437c478bd9Sstevel@tonic-gate * against immediate pageout 26447c478bd9Sstevel@tonic-gate * 26457c478bd9Sstevel@tonic-gate * XXXmh modify freelist code to set reference 26467c478bd9Sstevel@tonic-gate * bit so we don't have to do it here. 26477c478bd9Sstevel@tonic-gate */ 26487c478bd9Sstevel@tonic-gate page_set_props(pp, P_REF); 26497c478bd9Sstevel@tonic-gate found_on_free++; 26507c478bd9Sstevel@tonic-gate } else { 26517c478bd9Sstevel@tonic-gate VM_STAT_ADD(page_create_exists); 26527c478bd9Sstevel@tonic-gate if (flags & PG_EXCL) { 26537c478bd9Sstevel@tonic-gate /* 26547c478bd9Sstevel@tonic-gate * Found an existing page, and the caller 26557c478bd9Sstevel@tonic-gate * wanted all new pages. Undo all of the work 26567c478bd9Sstevel@tonic-gate * we have done. 26577c478bd9Sstevel@tonic-gate */ 26587c478bd9Sstevel@tonic-gate mutex_exit(phm); 26597c478bd9Sstevel@tonic-gate phm = NULL; 26607c478bd9Sstevel@tonic-gate while (plist != NULL) { 26617c478bd9Sstevel@tonic-gate pp = plist; 26627c478bd9Sstevel@tonic-gate page_sub(&plist, pp); 26637c478bd9Sstevel@tonic-gate page_io_unlock(pp); 26647c478bd9Sstevel@tonic-gate /* large pages should not end up here */ 26657c478bd9Sstevel@tonic-gate ASSERT(pp->p_szc == 0); 26667c478bd9Sstevel@tonic-gate /*LINTED: constant in conditional ctx*/ 26677c478bd9Sstevel@tonic-gate VN_DISPOSE(pp, B_INVAL, 0, kcred); 26687c478bd9Sstevel@tonic-gate } 26697c478bd9Sstevel@tonic-gate VM_STAT_ADD(page_create_found_one); 26707c478bd9Sstevel@tonic-gate goto fail; 26717c478bd9Sstevel@tonic-gate } 26727c478bd9Sstevel@tonic-gate ASSERT(flags & PG_WAIT); 26737c478bd9Sstevel@tonic-gate if (!page_lock(pp, SE_EXCL, phm, P_NO_RECLAIM)) { 26747c478bd9Sstevel@tonic-gate /* 26757c478bd9Sstevel@tonic-gate * Start all over again if we blocked trying 26767c478bd9Sstevel@tonic-gate * to lock the page. 26777c478bd9Sstevel@tonic-gate */ 26787c478bd9Sstevel@tonic-gate mutex_exit(phm); 26797c478bd9Sstevel@tonic-gate VM_STAT_ADD(page_create_page_lock_failed); 26807c478bd9Sstevel@tonic-gate phm = NULL; 26817c478bd9Sstevel@tonic-gate goto top; 26827c478bd9Sstevel@tonic-gate } 26837c478bd9Sstevel@tonic-gate mutex_exit(phm); 26847c478bd9Sstevel@tonic-gate phm = NULL; 26857c478bd9Sstevel@tonic-gate 26867c478bd9Sstevel@tonic-gate if (PP_ISFREE(pp)) { 26877c478bd9Sstevel@tonic-gate ASSERT(PP_ISAGED(pp) == 0); 26887c478bd9Sstevel@tonic-gate VM_STAT_ADD(pagecnt.pc_get_cache); 26897c478bd9Sstevel@tonic-gate page_list_sub(pp, PG_CACHE_LIST); 26907c478bd9Sstevel@tonic-gate PP_CLRFREE(pp); 26917c478bd9Sstevel@tonic-gate found_on_free++; 26927c478bd9Sstevel@tonic-gate } 26937c478bd9Sstevel@tonic-gate } 26947c478bd9Sstevel@tonic-gate 26957c478bd9Sstevel@tonic-gate /* 26967c478bd9Sstevel@tonic-gate * Got a page! It is locked. Acquire the i/o 26977c478bd9Sstevel@tonic-gate * lock since we are going to use the p_next and 26987c478bd9Sstevel@tonic-gate * p_prev fields to link the requested pages together. 26997c478bd9Sstevel@tonic-gate */ 27007c478bd9Sstevel@tonic-gate page_io_lock(pp); 27017c478bd9Sstevel@tonic-gate page_add(&plist, pp); 27027c478bd9Sstevel@tonic-gate plist = plist->p_next; 27037c478bd9Sstevel@tonic-gate off += PAGESIZE; 27047c478bd9Sstevel@tonic-gate vaddr += PAGESIZE; 27057c478bd9Sstevel@tonic-gate } 27067c478bd9Sstevel@tonic-gate 27077c478bd9Sstevel@tonic-gate ASSERT((flags & PG_EXCL) ? (found_on_free == pages_req) : 1); 27087c478bd9Sstevel@tonic-gate fail: 27097c478bd9Sstevel@tonic-gate if (npp != NULL) { 27107c478bd9Sstevel@tonic-gate /* 27117c478bd9Sstevel@tonic-gate * Did not need this page after all. 27127c478bd9Sstevel@tonic-gate * Put it back on the free list. 27137c478bd9Sstevel@tonic-gate */ 27147c478bd9Sstevel@tonic-gate VM_STAT_ADD(page_create_putbacks); 27157c478bd9Sstevel@tonic-gate PP_SETFREE(npp); 27167c478bd9Sstevel@tonic-gate PP_SETAGED(npp); 27177c478bd9Sstevel@tonic-gate npp->p_offset = (u_offset_t)-1; 27187c478bd9Sstevel@tonic-gate page_list_add(npp, PG_FREE_LIST | PG_LIST_TAIL); 27197c478bd9Sstevel@tonic-gate page_unlock(npp); 27207c478bd9Sstevel@tonic-gate 27217c478bd9Sstevel@tonic-gate } 27227c478bd9Sstevel@tonic-gate 27237c478bd9Sstevel@tonic-gate ASSERT(pages_req >= found_on_free); 27247c478bd9Sstevel@tonic-gate 27257c478bd9Sstevel@tonic-gate { 27267c478bd9Sstevel@tonic-gate uint_t overshoot = (uint_t)(pages_req - found_on_free); 27277c478bd9Sstevel@tonic-gate 27287c478bd9Sstevel@tonic-gate if (overshoot) { 27297c478bd9Sstevel@tonic-gate VM_STAT_ADD(page_create_overshoot); 27307c478bd9Sstevel@tonic-gate p = &pcf[pcf_index]; 27317c478bd9Sstevel@tonic-gate p->pcf_touch = 1; 27327c478bd9Sstevel@tonic-gate mutex_enter(&p->pcf_lock); 27337c478bd9Sstevel@tonic-gate if (p->pcf_block) { 27347c478bd9Sstevel@tonic-gate p->pcf_reserve += overshoot; 27357c478bd9Sstevel@tonic-gate } else { 27367c478bd9Sstevel@tonic-gate p->pcf_count += overshoot; 27377c478bd9Sstevel@tonic-gate if (p->pcf_wait) { 27387c478bd9Sstevel@tonic-gate mutex_enter(&new_freemem_lock); 27397c478bd9Sstevel@tonic-gate if (freemem_wait) { 27407c478bd9Sstevel@tonic-gate cv_signal(&freemem_cv); 27417c478bd9Sstevel@tonic-gate p->pcf_wait--; 27427c478bd9Sstevel@tonic-gate } else { 27437c478bd9Sstevel@tonic-gate p->pcf_wait = 0; 27447c478bd9Sstevel@tonic-gate } 27457c478bd9Sstevel@tonic-gate mutex_exit(&new_freemem_lock); 27467c478bd9Sstevel@tonic-gate } 27477c478bd9Sstevel@tonic-gate } 27487c478bd9Sstevel@tonic-gate mutex_exit(&p->pcf_lock); 27497c478bd9Sstevel@tonic-gate /* freemem is approximate, so this test OK */ 27507c478bd9Sstevel@tonic-gate if (!p->pcf_block) 27517c478bd9Sstevel@tonic-gate freemem += overshoot; 27527c478bd9Sstevel@tonic-gate } 27537c478bd9Sstevel@tonic-gate } 27547c478bd9Sstevel@tonic-gate 27557c478bd9Sstevel@tonic-gate return (plist); 27567c478bd9Sstevel@tonic-gate } 27577c478bd9Sstevel@tonic-gate 27587c478bd9Sstevel@tonic-gate /* 27597c478bd9Sstevel@tonic-gate * One or more constituent pages of this large page has been marked 27607c478bd9Sstevel@tonic-gate * toxic. Simply demote the large page to PAGESIZE pages and let 27617c478bd9Sstevel@tonic-gate * page_free() handle it. This routine should only be called by 27627c478bd9Sstevel@tonic-gate * large page free routines (page_free_pages() and page_destroy_pages(). 27637c478bd9Sstevel@tonic-gate * All pages are locked SE_EXCL and have already been marked free. 27647c478bd9Sstevel@tonic-gate */ 27657c478bd9Sstevel@tonic-gate static void 27667c478bd9Sstevel@tonic-gate page_free_toxic_pages(page_t *rootpp) 27677c478bd9Sstevel@tonic-gate { 27687c478bd9Sstevel@tonic-gate page_t *tpp; 27697c478bd9Sstevel@tonic-gate pgcnt_t i, pgcnt = page_get_pagecnt(rootpp->p_szc); 27707c478bd9Sstevel@tonic-gate uint_t szc = rootpp->p_szc; 27717c478bd9Sstevel@tonic-gate 27727c478bd9Sstevel@tonic-gate for (i = 0, tpp = rootpp; i < pgcnt; i++, tpp = tpp->p_next) { 27737c478bd9Sstevel@tonic-gate ASSERT(tpp->p_szc == szc); 27747c478bd9Sstevel@tonic-gate ASSERT((PAGE_EXCL(tpp) && 27757c478bd9Sstevel@tonic-gate !page_iolock_assert(tpp)) || panicstr); 27767c478bd9Sstevel@tonic-gate tpp->p_szc = 0; 27777c478bd9Sstevel@tonic-gate } 27787c478bd9Sstevel@tonic-gate 27797c478bd9Sstevel@tonic-gate while (rootpp != NULL) { 27807c478bd9Sstevel@tonic-gate tpp = rootpp; 27817c478bd9Sstevel@tonic-gate page_sub(&rootpp, tpp); 27827c478bd9Sstevel@tonic-gate ASSERT(PP_ISFREE(tpp)); 27837c478bd9Sstevel@tonic-gate PP_CLRFREE(tpp); 27847c478bd9Sstevel@tonic-gate page_free(tpp, 1); 27857c478bd9Sstevel@tonic-gate } 27867c478bd9Sstevel@tonic-gate } 27877c478bd9Sstevel@tonic-gate 27887c478bd9Sstevel@tonic-gate /* 27897c478bd9Sstevel@tonic-gate * Put page on the "free" list. 27907c478bd9Sstevel@tonic-gate * The free list is really two lists maintained by 27917c478bd9Sstevel@tonic-gate * the PSM of whatever machine we happen to be on. 27927c478bd9Sstevel@tonic-gate */ 27937c478bd9Sstevel@tonic-gate void 27947c478bd9Sstevel@tonic-gate page_free(page_t *pp, int dontneed) 27957c478bd9Sstevel@tonic-gate { 27967c478bd9Sstevel@tonic-gate struct pcf *p; 27977c478bd9Sstevel@tonic-gate uint_t pcf_index; 27987c478bd9Sstevel@tonic-gate 27997c478bd9Sstevel@tonic-gate ASSERT((PAGE_EXCL(pp) && 28007c478bd9Sstevel@tonic-gate !page_iolock_assert(pp)) || panicstr); 28017c478bd9Sstevel@tonic-gate 28027c478bd9Sstevel@tonic-gate if (page_deteriorating(pp)) { 28037c478bd9Sstevel@tonic-gate volatile int i = 0; 28047c478bd9Sstevel@tonic-gate char *kaddr; 28057c478bd9Sstevel@tonic-gate volatile int rb, wb; 28067c478bd9Sstevel@tonic-gate uint64_t pa; 28077c478bd9Sstevel@tonic-gate volatile int ue = 0; 28087c478bd9Sstevel@tonic-gate on_trap_data_t otd; 28097c478bd9Sstevel@tonic-gate 28107c478bd9Sstevel@tonic-gate if (pp->p_vnode != NULL) { 28117c478bd9Sstevel@tonic-gate /* 28127c478bd9Sstevel@tonic-gate * Let page_destroy() do its bean counting and 28137c478bd9Sstevel@tonic-gate * hash out the page; it will then call back 28147c478bd9Sstevel@tonic-gate * into page_free() with pp->p_vnode == NULL. 28157c478bd9Sstevel@tonic-gate */ 28167c478bd9Sstevel@tonic-gate page_destroy(pp, 0); 28177c478bd9Sstevel@tonic-gate return; 28187c478bd9Sstevel@tonic-gate } 28197c478bd9Sstevel@tonic-gate 28207c478bd9Sstevel@tonic-gate if (page_isfailing(pp)) { 28217c478bd9Sstevel@tonic-gate /* 28227c478bd9Sstevel@tonic-gate * If we have already exceeded the limit for 28237c478bd9Sstevel@tonic-gate * pages retired, we will treat this page as 28247c478bd9Sstevel@tonic-gate * 'toxic' rather than failing. That will ensure 28257c478bd9Sstevel@tonic-gate * that the page is at least cleaned, and if 28267c478bd9Sstevel@tonic-gate * a UE is detected, the page will be retired 28277c478bd9Sstevel@tonic-gate * anyway. 28287c478bd9Sstevel@tonic-gate */ 28297c478bd9Sstevel@tonic-gate if (pages_retired_limit_exceeded()) { 28307c478bd9Sstevel@tonic-gate /* 28317c478bd9Sstevel@tonic-gate * clear the flag and reset to toxic 28327c478bd9Sstevel@tonic-gate */ 28337c478bd9Sstevel@tonic-gate page_clrtoxic(pp); 28347c478bd9Sstevel@tonic-gate page_settoxic(pp, PAGE_IS_TOXIC); 28357c478bd9Sstevel@tonic-gate } else { 28367c478bd9Sstevel@tonic-gate pa = ptob((uint64_t)page_pptonum(pp)); 28377c478bd9Sstevel@tonic-gate if (page_retire_messages) { 28387c478bd9Sstevel@tonic-gate cmn_err(CE_NOTE, "Page 0x%08x.%08x " 28397c478bd9Sstevel@tonic-gate "removed from service", 28407c478bd9Sstevel@tonic-gate (uint32_t)(pa >> 32), (uint32_t)pa); 28417c478bd9Sstevel@tonic-gate } 28427c478bd9Sstevel@tonic-gate goto page_failed; 28437c478bd9Sstevel@tonic-gate } 28447c478bd9Sstevel@tonic-gate } 28457c478bd9Sstevel@tonic-gate 28467c478bd9Sstevel@tonic-gate pagescrub(pp, 0, PAGESIZE); 28477c478bd9Sstevel@tonic-gate 28487c478bd9Sstevel@tonic-gate /* 28497c478bd9Sstevel@tonic-gate * We want to determine whether the error that occurred on 28507c478bd9Sstevel@tonic-gate * this page is transient or persistent, so we get a mapping 28517c478bd9Sstevel@tonic-gate * to the page and try every possible bit pattern to compare 28527c478bd9Sstevel@tonic-gate * what we write with what we read back. A smaller number 28537c478bd9Sstevel@tonic-gate * of bit patterns might suffice, but there's no point in 28547c478bd9Sstevel@tonic-gate * getting fancy. If this is the hot path on your system, 28557c478bd9Sstevel@tonic-gate * you've got bigger problems. 28567c478bd9Sstevel@tonic-gate */ 28577c478bd9Sstevel@tonic-gate kaddr = ppmapin(pp, PROT_READ | PROT_WRITE, (caddr_t)-1); 28587c478bd9Sstevel@tonic-gate for (wb = 0xff; wb >= 0; wb--) { 28597c478bd9Sstevel@tonic-gate if (on_trap(&otd, OT_DATA_EC)) { 28607c478bd9Sstevel@tonic-gate pa = ptob((uint64_t)page_pptonum(pp)) + i; 28617c478bd9Sstevel@tonic-gate page_settoxic(pp, PAGE_IS_FAILING); 28627c478bd9Sstevel@tonic-gate 28637c478bd9Sstevel@tonic-gate if (page_retire_messages) { 28647c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, "Uncorrectable Error " 28657c478bd9Sstevel@tonic-gate "occurred at PA 0x%08x.%08x while " 28667c478bd9Sstevel@tonic-gate "attempting to clear previously " 28677c478bd9Sstevel@tonic-gate "reported error; page removed from " 28687c478bd9Sstevel@tonic-gate "service", (uint32_t)(pa >> 32), 28697c478bd9Sstevel@tonic-gate (uint32_t)pa); 28707c478bd9Sstevel@tonic-gate } 28717c478bd9Sstevel@tonic-gate 28727c478bd9Sstevel@tonic-gate ue++; 28737c478bd9Sstevel@tonic-gate break; 28747c478bd9Sstevel@tonic-gate } 28757c478bd9Sstevel@tonic-gate 28767c478bd9Sstevel@tonic-gate /* 28777c478bd9Sstevel@tonic-gate * Write out the bit pattern, flush it to memory, and 28787c478bd9Sstevel@tonic-gate * read it back while under on_trap() protection. 28797c478bd9Sstevel@tonic-gate */ 28807c478bd9Sstevel@tonic-gate for (i = 0; i < PAGESIZE; i++) 28817c478bd9Sstevel@tonic-gate kaddr[i] = wb; 28827c478bd9Sstevel@tonic-gate 28837c478bd9Sstevel@tonic-gate sync_data_memory(kaddr, PAGESIZE); 28847c478bd9Sstevel@tonic-gate 28857c478bd9Sstevel@tonic-gate for (i = 0; i < PAGESIZE; i++) { 28867c478bd9Sstevel@tonic-gate if ((rb = (uchar_t)kaddr[i]) != wb) { 28877c478bd9Sstevel@tonic-gate page_settoxic(pp, PAGE_IS_FAILING); 28887c478bd9Sstevel@tonic-gate goto out; 28897c478bd9Sstevel@tonic-gate } 28907c478bd9Sstevel@tonic-gate } 28917c478bd9Sstevel@tonic-gate } 28927c478bd9Sstevel@tonic-gate out: 28937c478bd9Sstevel@tonic-gate no_trap(); 28947c478bd9Sstevel@tonic-gate ppmapout(kaddr); 28957c478bd9Sstevel@tonic-gate 28967c478bd9Sstevel@tonic-gate if (wb >= 0 && !ue) { 28977c478bd9Sstevel@tonic-gate pa = ptob((uint64_t)page_pptonum(pp)) + i; 28987c478bd9Sstevel@tonic-gate if (page_retire_messages) { 28997c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, "Data Mismatch occurred at PA " 29007c478bd9Sstevel@tonic-gate "0x%08x.%08x [ 0x%x != 0x%x ] while " 29017c478bd9Sstevel@tonic-gate "attempting to clear previously reported " 29027c478bd9Sstevel@tonic-gate "error; page removed from service", 29037c478bd9Sstevel@tonic-gate (uint32_t)(pa >> 32), (uint32_t)pa, rb, wb); 29047c478bd9Sstevel@tonic-gate } 29057c478bd9Sstevel@tonic-gate } 29067c478bd9Sstevel@tonic-gate page_failed: 29077c478bd9Sstevel@tonic-gate /* 29087c478bd9Sstevel@tonic-gate * DR operations change the association between a page_t 29097c478bd9Sstevel@tonic-gate * and the physical page it represents. Check if the 29107c478bd9Sstevel@tonic-gate * page is still bad. If it is, then retire it. 29117c478bd9Sstevel@tonic-gate */ 29127c478bd9Sstevel@tonic-gate if (page_isfaulty(pp) && page_isfailing(pp)) { 29137c478bd9Sstevel@tonic-gate /* 29147c478bd9Sstevel@tonic-gate * In the future, it might be useful to have a platform 29157c478bd9Sstevel@tonic-gate * callback here to tell the hardware to fence off this 29167c478bd9Sstevel@tonic-gate * page during the next reboot. 29177c478bd9Sstevel@tonic-gate * 29187c478bd9Sstevel@tonic-gate * We move the page to the retired_vnode here 29197c478bd9Sstevel@tonic-gate */ 29207c478bd9Sstevel@tonic-gate (void) page_hashin(pp, &retired_ppages, 29217c478bd9Sstevel@tonic-gate (u_offset_t)ptob((uint64_t)page_pptonum(pp)), NULL); 29227c478bd9Sstevel@tonic-gate mutex_enter(&freemem_lock); 29237c478bd9Sstevel@tonic-gate availrmem--; 29247c478bd9Sstevel@tonic-gate mutex_exit(&freemem_lock); 29257c478bd9Sstevel@tonic-gate page_retired(pp); 29267c478bd9Sstevel@tonic-gate page_downgrade(pp); 29277c478bd9Sstevel@tonic-gate 29287c478bd9Sstevel@tonic-gate /* 29297c478bd9Sstevel@tonic-gate * If DR raced with the above page retirement code, 29307c478bd9Sstevel@tonic-gate * we might have retired a good page. If so, unretire 29317c478bd9Sstevel@tonic-gate * the page. 29327c478bd9Sstevel@tonic-gate */ 29337c478bd9Sstevel@tonic-gate if (!page_isfaulty(pp)) 29347c478bd9Sstevel@tonic-gate page_unretire_pages(); 29357c478bd9Sstevel@tonic-gate return; 29367c478bd9Sstevel@tonic-gate } 29377c478bd9Sstevel@tonic-gate 29387c478bd9Sstevel@tonic-gate pa = ptob((uint64_t)page_pptonum(pp)); 29397c478bd9Sstevel@tonic-gate 29407c478bd9Sstevel@tonic-gate if (page_retire_messages) { 29417c478bd9Sstevel@tonic-gate cmn_err(CE_NOTE, "Previously reported error on page " 29427c478bd9Sstevel@tonic-gate "0x%08x.%08x cleared", (uint32_t)(pa >> 32), 29437c478bd9Sstevel@tonic-gate (uint32_t)pa); 29447c478bd9Sstevel@tonic-gate } 29457c478bd9Sstevel@tonic-gate 29467c478bd9Sstevel@tonic-gate page_clrtoxic(pp); 29477c478bd9Sstevel@tonic-gate } 29487c478bd9Sstevel@tonic-gate 29497c478bd9Sstevel@tonic-gate if (PP_ISFREE(pp)) { 29507c478bd9Sstevel@tonic-gate panic("page_free: page %p is free", (void *)pp); 29517c478bd9Sstevel@tonic-gate } 29527c478bd9Sstevel@tonic-gate 29537c478bd9Sstevel@tonic-gate if (pp->p_szc != 0) { 29547c478bd9Sstevel@tonic-gate if (pp->p_vnode == NULL || IS_SWAPFSVP(pp->p_vnode) || 29557c478bd9Sstevel@tonic-gate pp->p_vnode == &kvp) { 29567c478bd9Sstevel@tonic-gate panic("page_free: anon or kernel " 29577c478bd9Sstevel@tonic-gate "or no vnode large page %p", (void *)pp); 29587c478bd9Sstevel@tonic-gate } 29597c478bd9Sstevel@tonic-gate page_demote_vp_pages(pp); 29607c478bd9Sstevel@tonic-gate ASSERT(pp->p_szc == 0); 29617c478bd9Sstevel@tonic-gate } 29627c478bd9Sstevel@tonic-gate 29637c478bd9Sstevel@tonic-gate /* 29647c478bd9Sstevel@tonic-gate * The page_struct_lock need not be acquired to examine these 29657c478bd9Sstevel@tonic-gate * fields since the page has an "exclusive" lock. 29667c478bd9Sstevel@tonic-gate */ 29677c478bd9Sstevel@tonic-gate if (hat_page_is_mapped(pp) || pp->p_lckcnt != 0 || pp->p_cowcnt != 0) { 29687c478bd9Sstevel@tonic-gate panic("page_free pp=%p, pfn=%lx, lckcnt=%d, cowcnt=%d", 29697c478bd9Sstevel@tonic-gate pp, page_pptonum(pp), pp->p_lckcnt, pp->p_cowcnt); 29707c478bd9Sstevel@tonic-gate /*NOTREACHED*/ 29717c478bd9Sstevel@tonic-gate } 29727c478bd9Sstevel@tonic-gate 29737c478bd9Sstevel@tonic-gate ASSERT(!hat_page_getshare(pp)); 29747c478bd9Sstevel@tonic-gate 29757c478bd9Sstevel@tonic-gate PP_SETFREE(pp); 29767c478bd9Sstevel@tonic-gate ASSERT(pp->p_vnode == NULL || !IS_VMODSORT(pp->p_vnode) || 29777c478bd9Sstevel@tonic-gate !hat_ismod(pp)); 29787c478bd9Sstevel@tonic-gate page_clr_all_props(pp); 29797c478bd9Sstevel@tonic-gate ASSERT(!hat_page_getshare(pp)); 29807c478bd9Sstevel@tonic-gate 29817c478bd9Sstevel@tonic-gate /* 29827c478bd9Sstevel@tonic-gate * Now we add the page to the head of the free list. 29837c478bd9Sstevel@tonic-gate * But if this page is associated with a paged vnode 29847c478bd9Sstevel@tonic-gate * then we adjust the head forward so that the page is 29857c478bd9Sstevel@tonic-gate * effectively at the end of the list. 29867c478bd9Sstevel@tonic-gate */ 29877c478bd9Sstevel@tonic-gate if (pp->p_vnode == NULL) { 29887c478bd9Sstevel@tonic-gate /* 29897c478bd9Sstevel@tonic-gate * Page has no identity, put it on the free list. 29907c478bd9Sstevel@tonic-gate */ 29917c478bd9Sstevel@tonic-gate PP_SETAGED(pp); 29927c478bd9Sstevel@tonic-gate pp->p_offset = (u_offset_t)-1; 29937c478bd9Sstevel@tonic-gate page_list_add(pp, PG_FREE_LIST | PG_LIST_TAIL); 29947c478bd9Sstevel@tonic-gate VM_STAT_ADD(pagecnt.pc_free_free); 29957c478bd9Sstevel@tonic-gate TRACE_1(TR_FAC_VM, TR_PAGE_FREE_FREE, 29967c478bd9Sstevel@tonic-gate "page_free_free:pp %p", pp); 29977c478bd9Sstevel@tonic-gate } else { 29987c478bd9Sstevel@tonic-gate PP_CLRAGED(pp); 29997c478bd9Sstevel@tonic-gate 30007c478bd9Sstevel@tonic-gate if (!dontneed || nopageage) { 30017c478bd9Sstevel@tonic-gate /* move it to the tail of the list */ 30027c478bd9Sstevel@tonic-gate page_list_add(pp, PG_CACHE_LIST | PG_LIST_TAIL); 30037c478bd9Sstevel@tonic-gate 30047c478bd9Sstevel@tonic-gate VM_STAT_ADD(pagecnt.pc_free_cache); 30057c478bd9Sstevel@tonic-gate TRACE_1(TR_FAC_VM, TR_PAGE_FREE_CACHE_TAIL, 30067c478bd9Sstevel@tonic-gate "page_free_cache_tail:pp %p", pp); 30077c478bd9Sstevel@tonic-gate } else { 30087c478bd9Sstevel@tonic-gate page_list_add(pp, PG_CACHE_LIST | PG_LIST_HEAD); 30097c478bd9Sstevel@tonic-gate 30107c478bd9Sstevel@tonic-gate VM_STAT_ADD(pagecnt.pc_free_dontneed); 30117c478bd9Sstevel@tonic-gate TRACE_1(TR_FAC_VM, TR_PAGE_FREE_CACHE_HEAD, 30127c478bd9Sstevel@tonic-gate "page_free_cache_head:pp %p", pp); 30137c478bd9Sstevel@tonic-gate } 30147c478bd9Sstevel@tonic-gate } 30157c478bd9Sstevel@tonic-gate page_unlock(pp); 30167c478bd9Sstevel@tonic-gate 30177c478bd9Sstevel@tonic-gate /* 30187c478bd9Sstevel@tonic-gate * Now do the `freemem' accounting. 30197c478bd9Sstevel@tonic-gate */ 30207c478bd9Sstevel@tonic-gate pcf_index = PCF_INDEX(); 30217c478bd9Sstevel@tonic-gate p = &pcf[pcf_index]; 30227c478bd9Sstevel@tonic-gate p->pcf_touch = 1; 30237c478bd9Sstevel@tonic-gate 30247c478bd9Sstevel@tonic-gate mutex_enter(&p->pcf_lock); 30257c478bd9Sstevel@tonic-gate if (p->pcf_block) { 30267c478bd9Sstevel@tonic-gate p->pcf_reserve += 1; 30277c478bd9Sstevel@tonic-gate } else { 30287c478bd9Sstevel@tonic-gate p->pcf_count += 1; 30297c478bd9Sstevel@tonic-gate if (p->pcf_wait) { 30307c478bd9Sstevel@tonic-gate mutex_enter(&new_freemem_lock); 30317c478bd9Sstevel@tonic-gate /* 30327c478bd9Sstevel@tonic-gate * Check to see if some other thread 30337c478bd9Sstevel@tonic-gate * is actually waiting. Another bucket 30347c478bd9Sstevel@tonic-gate * may have woken it up by now. If there 30357c478bd9Sstevel@tonic-gate * are no waiters, then set our pcf_wait 30367c478bd9Sstevel@tonic-gate * count to zero to avoid coming in here 30377c478bd9Sstevel@tonic-gate * next time. Also, since only one page 30387c478bd9Sstevel@tonic-gate * was put on the free list, just wake 30397c478bd9Sstevel@tonic-gate * up one waiter. 30407c478bd9Sstevel@tonic-gate */ 30417c478bd9Sstevel@tonic-gate if (freemem_wait) { 30427c478bd9Sstevel@tonic-gate cv_signal(&freemem_cv); 30437c478bd9Sstevel@tonic-gate p->pcf_wait--; 30447c478bd9Sstevel@tonic-gate } else { 30457c478bd9Sstevel@tonic-gate p->pcf_wait = 0; 30467c478bd9Sstevel@tonic-gate } 30477c478bd9Sstevel@tonic-gate mutex_exit(&new_freemem_lock); 30487c478bd9Sstevel@tonic-gate } 30497c478bd9Sstevel@tonic-gate } 30507c478bd9Sstevel@tonic-gate mutex_exit(&p->pcf_lock); 30517c478bd9Sstevel@tonic-gate 30527c478bd9Sstevel@tonic-gate /* freemem is approximate, so this test OK */ 30537c478bd9Sstevel@tonic-gate if (!p->pcf_block) 30547c478bd9Sstevel@tonic-gate freemem += 1; 30557c478bd9Sstevel@tonic-gate } 30567c478bd9Sstevel@tonic-gate 30577c478bd9Sstevel@tonic-gate /* 30587c478bd9Sstevel@tonic-gate * Put page on the "free" list during intial startup. 30597c478bd9Sstevel@tonic-gate * This happens during initial single threaded execution. 30607c478bd9Sstevel@tonic-gate */ 30617c478bd9Sstevel@tonic-gate void 30627c478bd9Sstevel@tonic-gate page_free_at_startup(page_t *pp) 30637c478bd9Sstevel@tonic-gate { 30647c478bd9Sstevel@tonic-gate struct pcf *p; 30657c478bd9Sstevel@tonic-gate uint_t pcf_index; 30667c478bd9Sstevel@tonic-gate 30677c478bd9Sstevel@tonic-gate page_list_add(pp, PG_FREE_LIST | PG_LIST_HEAD | PG_LIST_ISINIT); 30687c478bd9Sstevel@tonic-gate VM_STAT_ADD(pagecnt.pc_free_free); 30697c478bd9Sstevel@tonic-gate 30707c478bd9Sstevel@tonic-gate /* 30717c478bd9Sstevel@tonic-gate * Now do the `freemem' accounting. 30727c478bd9Sstevel@tonic-gate */ 30737c478bd9Sstevel@tonic-gate pcf_index = PCF_INDEX(); 30747c478bd9Sstevel@tonic-gate p = &pcf[pcf_index]; 30757c478bd9Sstevel@tonic-gate p->pcf_touch = 1; 30767c478bd9Sstevel@tonic-gate 30777c478bd9Sstevel@tonic-gate ASSERT(p->pcf_block == 0); 30787c478bd9Sstevel@tonic-gate ASSERT(p->pcf_wait == 0); 30797c478bd9Sstevel@tonic-gate p->pcf_count += 1; 30807c478bd9Sstevel@tonic-gate 30817c478bd9Sstevel@tonic-gate /* freemem is approximate, so this is OK */ 30827c478bd9Sstevel@tonic-gate freemem += 1; 30837c478bd9Sstevel@tonic-gate } 30847c478bd9Sstevel@tonic-gate 30857c478bd9Sstevel@tonic-gate void 30867c478bd9Sstevel@tonic-gate page_free_pages(page_t *pp) 30877c478bd9Sstevel@tonic-gate { 30887c478bd9Sstevel@tonic-gate page_t *tpp, *rootpp = NULL; 30897c478bd9Sstevel@tonic-gate pgcnt_t pgcnt = page_get_pagecnt(pp->p_szc); 30907c478bd9Sstevel@tonic-gate pgcnt_t i; 30917c478bd9Sstevel@tonic-gate uint_t szc = pp->p_szc; 30927c478bd9Sstevel@tonic-gate int toxic = 0; 30937c478bd9Sstevel@tonic-gate 30947c478bd9Sstevel@tonic-gate VM_STAT_ADD(pagecnt.pc_free_pages); 30957c478bd9Sstevel@tonic-gate TRACE_1(TR_FAC_VM, TR_PAGE_FREE_FREE, 30967c478bd9Sstevel@tonic-gate "page_free_free:pp %p", pp); 30977c478bd9Sstevel@tonic-gate 30987c478bd9Sstevel@tonic-gate ASSERT(pp->p_szc != 0 && pp->p_szc < page_num_pagesizes()); 30997c478bd9Sstevel@tonic-gate if ((page_pptonum(pp) & (pgcnt - 1)) != 0) { 31007c478bd9Sstevel@tonic-gate panic("page_free_pages: not root page %p", (void *)pp); 31017c478bd9Sstevel@tonic-gate /*NOTREACHED*/ 31027c478bd9Sstevel@tonic-gate } 31037c478bd9Sstevel@tonic-gate 3104*affbd3ccSkchow for (i = 0, tpp = pp; i < pgcnt; i++, tpp++) { 31057c478bd9Sstevel@tonic-gate ASSERT((PAGE_EXCL(tpp) && 31067c478bd9Sstevel@tonic-gate !page_iolock_assert(tpp)) || panicstr); 31077c478bd9Sstevel@tonic-gate if (PP_ISFREE(tpp)) { 31087c478bd9Sstevel@tonic-gate panic("page_free_pages: page %p is free", (void *)tpp); 31097c478bd9Sstevel@tonic-gate /*NOTREACHED*/ 31107c478bd9Sstevel@tonic-gate } 31117c478bd9Sstevel@tonic-gate if (hat_page_is_mapped(tpp) || tpp->p_lckcnt != 0 || 31127c478bd9Sstevel@tonic-gate tpp->p_cowcnt != 0) { 31137c478bd9Sstevel@tonic-gate panic("page_free_pages %p", (void *)tpp); 31147c478bd9Sstevel@tonic-gate /*NOTREACHED*/ 31157c478bd9Sstevel@tonic-gate } 31167c478bd9Sstevel@tonic-gate 31177c478bd9Sstevel@tonic-gate ASSERT(!hat_page_getshare(tpp)); 31187c478bd9Sstevel@tonic-gate ASSERT(tpp->p_vnode == NULL); 31197c478bd9Sstevel@tonic-gate ASSERT(tpp->p_szc == szc); 31207c478bd9Sstevel@tonic-gate 31217c478bd9Sstevel@tonic-gate if (page_deteriorating(tpp)) 31227c478bd9Sstevel@tonic-gate toxic = 1; 31237c478bd9Sstevel@tonic-gate 31247c478bd9Sstevel@tonic-gate PP_SETFREE(tpp); 31257c478bd9Sstevel@tonic-gate page_clr_all_props(tpp); 31267c478bd9Sstevel@tonic-gate PP_SETAGED(tpp); 31277c478bd9Sstevel@tonic-gate tpp->p_offset = (u_offset_t)-1; 31287c478bd9Sstevel@tonic-gate ASSERT(tpp->p_next == tpp); 31297c478bd9Sstevel@tonic-gate ASSERT(tpp->p_prev == tpp); 31307c478bd9Sstevel@tonic-gate page_list_concat(&rootpp, &tpp); 31317c478bd9Sstevel@tonic-gate } 31327c478bd9Sstevel@tonic-gate ASSERT(rootpp == pp); 31337c478bd9Sstevel@tonic-gate 31347c478bd9Sstevel@tonic-gate if (toxic) { 31357c478bd9Sstevel@tonic-gate page_free_toxic_pages(rootpp); 31367c478bd9Sstevel@tonic-gate return; 31377c478bd9Sstevel@tonic-gate } 31387c478bd9Sstevel@tonic-gate page_list_add_pages(rootpp, 0); 31397c478bd9Sstevel@tonic-gate page_create_putback(pgcnt); 31407c478bd9Sstevel@tonic-gate } 31417c478bd9Sstevel@tonic-gate 31427c478bd9Sstevel@tonic-gate int free_pages = 1; 31437c478bd9Sstevel@tonic-gate 31447c478bd9Sstevel@tonic-gate /* 31457c478bd9Sstevel@tonic-gate * This routine attempts to return pages to the cachelist via page_release(). 31467c478bd9Sstevel@tonic-gate * It does not *have* to be successful in all cases, since the pageout scanner 31477c478bd9Sstevel@tonic-gate * will catch any pages it misses. It does need to be fast and not introduce 31487c478bd9Sstevel@tonic-gate * too much overhead. 31497c478bd9Sstevel@tonic-gate * 31507c478bd9Sstevel@tonic-gate * If a page isn't found on the unlocked sweep of the page_hash bucket, we 31517c478bd9Sstevel@tonic-gate * don't lock and retry. This is ok, since the page scanner will eventually 31527c478bd9Sstevel@tonic-gate * find any page we miss in free_vp_pages(). 31537c478bd9Sstevel@tonic-gate */ 31547c478bd9Sstevel@tonic-gate void 31557c478bd9Sstevel@tonic-gate free_vp_pages(vnode_t *vp, u_offset_t off, size_t len) 31567c478bd9Sstevel@tonic-gate { 31577c478bd9Sstevel@tonic-gate page_t *pp; 31587c478bd9Sstevel@tonic-gate u_offset_t eoff; 31597c478bd9Sstevel@tonic-gate extern int swap_in_range(vnode_t *, u_offset_t, size_t); 31607c478bd9Sstevel@tonic-gate 31617c478bd9Sstevel@tonic-gate eoff = off + len; 31627c478bd9Sstevel@tonic-gate 31637c478bd9Sstevel@tonic-gate if (free_pages == 0) 31647c478bd9Sstevel@tonic-gate return; 31657c478bd9Sstevel@tonic-gate if (swap_in_range(vp, off, len)) 31667c478bd9Sstevel@tonic-gate return; 31677c478bd9Sstevel@tonic-gate 31687c478bd9Sstevel@tonic-gate for (; off < eoff; off += PAGESIZE) { 31697c478bd9Sstevel@tonic-gate 31707c478bd9Sstevel@tonic-gate /* 31717c478bd9Sstevel@tonic-gate * find the page using a fast, but inexact search. It'll be OK 31727c478bd9Sstevel@tonic-gate * if a few pages slip through the cracks here. 31737c478bd9Sstevel@tonic-gate */ 31747c478bd9Sstevel@tonic-gate pp = page_exists(vp, off); 31757c478bd9Sstevel@tonic-gate 31767c478bd9Sstevel@tonic-gate /* 31777c478bd9Sstevel@tonic-gate * If we didn't find the page (it may not exist), the page 31787c478bd9Sstevel@tonic-gate * is free, looks still in use (shared), or we can't lock it, 31797c478bd9Sstevel@tonic-gate * just give up. 31807c478bd9Sstevel@tonic-gate */ 31817c478bd9Sstevel@tonic-gate if (pp == NULL || 31827c478bd9Sstevel@tonic-gate PP_ISFREE(pp) || 31837c478bd9Sstevel@tonic-gate page_share_cnt(pp) > 0 || 31847c478bd9Sstevel@tonic-gate !page_trylock(pp, SE_EXCL)) 31857c478bd9Sstevel@tonic-gate continue; 31867c478bd9Sstevel@tonic-gate 31877c478bd9Sstevel@tonic-gate /* 31887c478bd9Sstevel@tonic-gate * Once we have locked pp, verify that it's still the 31897c478bd9Sstevel@tonic-gate * correct page and not already free 31907c478bd9Sstevel@tonic-gate */ 31917c478bd9Sstevel@tonic-gate ASSERT(PAGE_LOCKED_SE(pp, SE_EXCL)); 31927c478bd9Sstevel@tonic-gate if (pp->p_vnode != vp || pp->p_offset != off || PP_ISFREE(pp)) { 31937c478bd9Sstevel@tonic-gate page_unlock(pp); 31947c478bd9Sstevel@tonic-gate continue; 31957c478bd9Sstevel@tonic-gate } 31967c478bd9Sstevel@tonic-gate 31977c478bd9Sstevel@tonic-gate /* 31987c478bd9Sstevel@tonic-gate * try to release the page... 31997c478bd9Sstevel@tonic-gate */ 32007c478bd9Sstevel@tonic-gate (void) page_release(pp, 1); 32017c478bd9Sstevel@tonic-gate } 32027c478bd9Sstevel@tonic-gate } 32037c478bd9Sstevel@tonic-gate 32047c478bd9Sstevel@tonic-gate /* 32057c478bd9Sstevel@tonic-gate * Reclaim the given page from the free list. 32067c478bd9Sstevel@tonic-gate * Returns 1 on success or 0 on failure. 32077c478bd9Sstevel@tonic-gate * 32087c478bd9Sstevel@tonic-gate * The page is unlocked if it can't be reclaimed (when freemem == 0). 32097c478bd9Sstevel@tonic-gate * If `lock' is non-null, it will be dropped and re-acquired if 32107c478bd9Sstevel@tonic-gate * the routine must wait while freemem is 0. 32117c478bd9Sstevel@tonic-gate * 32127c478bd9Sstevel@tonic-gate * As it turns out, boot_getpages() does this. It picks a page, 32137c478bd9Sstevel@tonic-gate * based on where OBP mapped in some address, gets its pfn, searches 32147c478bd9Sstevel@tonic-gate * the memsegs, locks the page, then pulls it off the free list! 32157c478bd9Sstevel@tonic-gate */ 32167c478bd9Sstevel@tonic-gate int 32177c478bd9Sstevel@tonic-gate page_reclaim(page_t *pp, kmutex_t *lock) 32187c478bd9Sstevel@tonic-gate { 32197c478bd9Sstevel@tonic-gate struct pcf *p; 32207c478bd9Sstevel@tonic-gate uint_t pcf_index; 32217c478bd9Sstevel@tonic-gate struct cpu *cpup; 32227c478bd9Sstevel@tonic-gate int enough; 32237c478bd9Sstevel@tonic-gate uint_t i; 32247c478bd9Sstevel@tonic-gate 32257c478bd9Sstevel@tonic-gate ASSERT(lock != NULL ? MUTEX_HELD(lock) : 1); 32267c478bd9Sstevel@tonic-gate ASSERT(PAGE_EXCL(pp) && PP_ISFREE(pp)); 32277c478bd9Sstevel@tonic-gate ASSERT(pp->p_szc == 0); 32287c478bd9Sstevel@tonic-gate 32297c478bd9Sstevel@tonic-gate /* 32307c478bd9Sstevel@tonic-gate * If `freemem' is 0, we cannot reclaim this page from the 32317c478bd9Sstevel@tonic-gate * freelist, so release every lock we might hold: the page, 32327c478bd9Sstevel@tonic-gate * and the `lock' before blocking. 32337c478bd9Sstevel@tonic-gate * 32347c478bd9Sstevel@tonic-gate * The only way `freemem' can become 0 while there are pages 32357c478bd9Sstevel@tonic-gate * marked free (have their p->p_free bit set) is when the 32367c478bd9Sstevel@tonic-gate * system is low on memory and doing a page_create(). In 32377c478bd9Sstevel@tonic-gate * order to guarantee that once page_create() starts acquiring 32387c478bd9Sstevel@tonic-gate * pages it will be able to get all that it needs since `freemem' 32397c478bd9Sstevel@tonic-gate * was decreased by the requested amount. So, we need to release 32407c478bd9Sstevel@tonic-gate * this page, and let page_create() have it. 32417c478bd9Sstevel@tonic-gate * 32427c478bd9Sstevel@tonic-gate * Since `freemem' being zero is not supposed to happen, just 32437c478bd9Sstevel@tonic-gate * use the usual hash stuff as a starting point. If that bucket 32447c478bd9Sstevel@tonic-gate * is empty, then assume the worst, and start at the beginning 32457c478bd9Sstevel@tonic-gate * of the pcf array. If we always start at the beginning 32467c478bd9Sstevel@tonic-gate * when acquiring more than one pcf lock, there won't be any 32477c478bd9Sstevel@tonic-gate * deadlock problems. 32487c478bd9Sstevel@tonic-gate */ 32497c478bd9Sstevel@tonic-gate 32507c478bd9Sstevel@tonic-gate /* TODO: Do we need to test kcage_freemem if PG_NORELOC(pp)? */ 32517c478bd9Sstevel@tonic-gate 32527c478bd9Sstevel@tonic-gate if (freemem <= throttlefree && !page_create_throttle(1l, 0)) { 32537c478bd9Sstevel@tonic-gate pcf_acquire_all(); 32547c478bd9Sstevel@tonic-gate goto page_reclaim_nomem; 32557c478bd9Sstevel@tonic-gate } 32567c478bd9Sstevel@tonic-gate 32577c478bd9Sstevel@tonic-gate enough = 0; 32587c478bd9Sstevel@tonic-gate pcf_index = PCF_INDEX(); 32597c478bd9Sstevel@tonic-gate p = &pcf[pcf_index]; 32607c478bd9Sstevel@tonic-gate p->pcf_touch = 1; 32617c478bd9Sstevel@tonic-gate mutex_enter(&p->pcf_lock); 32627c478bd9Sstevel@tonic-gate if (p->pcf_count >= 1) { 32637c478bd9Sstevel@tonic-gate enough = 1; 32647c478bd9Sstevel@tonic-gate p->pcf_count--; 32657c478bd9Sstevel@tonic-gate } 32667c478bd9Sstevel@tonic-gate mutex_exit(&p->pcf_lock); 32677c478bd9Sstevel@tonic-gate 32687c478bd9Sstevel@tonic-gate if (!enough) { 32697c478bd9Sstevel@tonic-gate VM_STAT_ADD(page_reclaim_zero); 32707c478bd9Sstevel@tonic-gate /* 32717c478bd9Sstevel@tonic-gate * Check again. Its possible that some other thread 32727c478bd9Sstevel@tonic-gate * could have been right behind us, and added one 32737c478bd9Sstevel@tonic-gate * to a list somewhere. Acquire each of the pcf locks 32747c478bd9Sstevel@tonic-gate * until we find a page. 32757c478bd9Sstevel@tonic-gate */ 32767c478bd9Sstevel@tonic-gate p = pcf; 32777c478bd9Sstevel@tonic-gate for (i = 0; i < PCF_FANOUT; i++) { 32787c478bd9Sstevel@tonic-gate p->pcf_touch = 1; 32797c478bd9Sstevel@tonic-gate mutex_enter(&p->pcf_lock); 32807c478bd9Sstevel@tonic-gate if (p->pcf_count >= 1) { 32817c478bd9Sstevel@tonic-gate p->pcf_count -= 1; 32827c478bd9Sstevel@tonic-gate enough = 1; 32837c478bd9Sstevel@tonic-gate break; 32847c478bd9Sstevel@tonic-gate } 32857c478bd9Sstevel@tonic-gate p++; 32867c478bd9Sstevel@tonic-gate } 32877c478bd9Sstevel@tonic-gate 32887c478bd9Sstevel@tonic-gate if (!enough) { 32897c478bd9Sstevel@tonic-gate page_reclaim_nomem: 32907c478bd9Sstevel@tonic-gate /* 32917c478bd9Sstevel@tonic-gate * We really can't have page `pp'. 32927c478bd9Sstevel@tonic-gate * Time for the no-memory dance with 32937c478bd9Sstevel@tonic-gate * page_free(). This is just like 32947c478bd9Sstevel@tonic-gate * page_create_wait(). Plus the added 32957c478bd9Sstevel@tonic-gate * attraction of releasing whatever mutex 32967c478bd9Sstevel@tonic-gate * we held when we were called with in `lock'. 32977c478bd9Sstevel@tonic-gate * Page_unlock() will wakeup any thread 32987c478bd9Sstevel@tonic-gate * waiting around for this page. 32997c478bd9Sstevel@tonic-gate */ 33007c478bd9Sstevel@tonic-gate if (lock) { 33017c478bd9Sstevel@tonic-gate VM_STAT_ADD(page_reclaim_zero_locked); 33027c478bd9Sstevel@tonic-gate mutex_exit(lock); 33037c478bd9Sstevel@tonic-gate } 33047c478bd9Sstevel@tonic-gate page_unlock(pp); 33057c478bd9Sstevel@tonic-gate 33067c478bd9Sstevel@tonic-gate /* 33077c478bd9Sstevel@tonic-gate * get this before we drop all the pcf locks. 33087c478bd9Sstevel@tonic-gate */ 33097c478bd9Sstevel@tonic-gate mutex_enter(&new_freemem_lock); 33107c478bd9Sstevel@tonic-gate 33117c478bd9Sstevel@tonic-gate p = pcf; 33127c478bd9Sstevel@tonic-gate for (i = 0; i < PCF_FANOUT; i++) { 33137c478bd9Sstevel@tonic-gate p->pcf_wait++; 33147c478bd9Sstevel@tonic-gate mutex_exit(&p->pcf_lock); 33157c478bd9Sstevel@tonic-gate p++; 33167c478bd9Sstevel@tonic-gate } 33177c478bd9Sstevel@tonic-gate 33187c478bd9Sstevel@tonic-gate freemem_wait++; 33197c478bd9Sstevel@tonic-gate cv_wait(&freemem_cv, &new_freemem_lock); 33207c478bd9Sstevel@tonic-gate freemem_wait--; 33217c478bd9Sstevel@tonic-gate 33227c478bd9Sstevel@tonic-gate mutex_exit(&new_freemem_lock); 33237c478bd9Sstevel@tonic-gate 33247c478bd9Sstevel@tonic-gate if (lock) { 33257c478bd9Sstevel@tonic-gate mutex_enter(lock); 33267c478bd9Sstevel@tonic-gate } 33277c478bd9Sstevel@tonic-gate return (0); 33287c478bd9Sstevel@tonic-gate } 33297c478bd9Sstevel@tonic-gate 33307c478bd9Sstevel@tonic-gate /* 33317c478bd9Sstevel@tonic-gate * There was a page to be found. 33327c478bd9Sstevel@tonic-gate * The pcf accounting has been done, 33337c478bd9Sstevel@tonic-gate * though none of the pcf_wait flags have been set, 33347c478bd9Sstevel@tonic-gate * drop the locks and continue on. 33357c478bd9Sstevel@tonic-gate */ 33367c478bd9Sstevel@tonic-gate while (p >= pcf) { 33377c478bd9Sstevel@tonic-gate mutex_exit(&p->pcf_lock); 33387c478bd9Sstevel@tonic-gate p--; 33397c478bd9Sstevel@tonic-gate } 33407c478bd9Sstevel@tonic-gate } 33417c478bd9Sstevel@tonic-gate 33427c478bd9Sstevel@tonic-gate /* 33437c478bd9Sstevel@tonic-gate * freemem is not protected by any lock. Thus, we cannot 33447c478bd9Sstevel@tonic-gate * have any assertion containing freemem here. 33457c478bd9Sstevel@tonic-gate */ 33467c478bd9Sstevel@tonic-gate freemem -= 1; 33477c478bd9Sstevel@tonic-gate 33487c478bd9Sstevel@tonic-gate VM_STAT_ADD(pagecnt.pc_reclaim); 33497c478bd9Sstevel@tonic-gate if (PP_ISAGED(pp)) { 33507c478bd9Sstevel@tonic-gate page_list_sub(pp, PG_FREE_LIST); 33517c478bd9Sstevel@tonic-gate TRACE_1(TR_FAC_VM, TR_PAGE_UNFREE_FREE, 33527c478bd9Sstevel@tonic-gate "page_reclaim_free:pp %p", pp); 33537c478bd9Sstevel@tonic-gate } else { 33547c478bd9Sstevel@tonic-gate page_list_sub(pp, PG_CACHE_LIST); 33557c478bd9Sstevel@tonic-gate TRACE_1(TR_FAC_VM, TR_PAGE_UNFREE_CACHE, 33567c478bd9Sstevel@tonic-gate "page_reclaim_cache:pp %p", pp); 33577c478bd9Sstevel@tonic-gate } 33587c478bd9Sstevel@tonic-gate 33597c478bd9Sstevel@tonic-gate /* 33607c478bd9Sstevel@tonic-gate * clear the p_free & p_age bits since this page is no longer 33617c478bd9Sstevel@tonic-gate * on the free list. Notice that there was a brief time where 33627c478bd9Sstevel@tonic-gate * a page is marked as free, but is not on the list. 33637c478bd9Sstevel@tonic-gate * 33647c478bd9Sstevel@tonic-gate * Set the reference bit to protect against immediate pageout. 33657c478bd9Sstevel@tonic-gate */ 33667c478bd9Sstevel@tonic-gate PP_CLRFREE(pp); 33677c478bd9Sstevel@tonic-gate PP_CLRAGED(pp); 33687c478bd9Sstevel@tonic-gate page_set_props(pp, P_REF); 33697c478bd9Sstevel@tonic-gate 33707c478bd9Sstevel@tonic-gate CPU_STATS_ENTER_K(); 33717c478bd9Sstevel@tonic-gate cpup = CPU; /* get cpup now that CPU cannot change */ 33727c478bd9Sstevel@tonic-gate CPU_STATS_ADDQ(cpup, vm, pgrec, 1); 33737c478bd9Sstevel@tonic-gate CPU_STATS_ADDQ(cpup, vm, pgfrec, 1); 33747c478bd9Sstevel@tonic-gate CPU_STATS_EXIT_K(); 33757c478bd9Sstevel@tonic-gate 33767c478bd9Sstevel@tonic-gate return (1); 33777c478bd9Sstevel@tonic-gate } 33787c478bd9Sstevel@tonic-gate 33797c478bd9Sstevel@tonic-gate 33807c478bd9Sstevel@tonic-gate 33817c478bd9Sstevel@tonic-gate /* 33827c478bd9Sstevel@tonic-gate * Destroy identity of the page and put it back on 33837c478bd9Sstevel@tonic-gate * the page free list. Assumes that the caller has 33847c478bd9Sstevel@tonic-gate * acquired the "exclusive" lock on the page. 33857c478bd9Sstevel@tonic-gate */ 33867c478bd9Sstevel@tonic-gate void 33877c478bd9Sstevel@tonic-gate page_destroy(page_t *pp, int dontfree) 33887c478bd9Sstevel@tonic-gate { 33897c478bd9Sstevel@tonic-gate ASSERT((PAGE_EXCL(pp) && 33907c478bd9Sstevel@tonic-gate !page_iolock_assert(pp)) || panicstr); 33917c478bd9Sstevel@tonic-gate 33927c478bd9Sstevel@tonic-gate if (pp->p_szc != 0) { 33937c478bd9Sstevel@tonic-gate if (pp->p_vnode == NULL || IS_SWAPFSVP(pp->p_vnode) || 33947c478bd9Sstevel@tonic-gate pp->p_vnode == &kvp) { 33957c478bd9Sstevel@tonic-gate panic("page_destroy: anon or kernel or no vnode " 33967c478bd9Sstevel@tonic-gate "large page %p", (void *)pp); 33977c478bd9Sstevel@tonic-gate } 33987c478bd9Sstevel@tonic-gate page_demote_vp_pages(pp); 33997c478bd9Sstevel@tonic-gate ASSERT(pp->p_szc == 0); 34007c478bd9Sstevel@tonic-gate } 34017c478bd9Sstevel@tonic-gate 34027c478bd9Sstevel@tonic-gate TRACE_1(TR_FAC_VM, TR_PAGE_DESTROY, "page_destroy:pp %p", pp); 34037c478bd9Sstevel@tonic-gate 34047c478bd9Sstevel@tonic-gate /* 34057c478bd9Sstevel@tonic-gate * Unload translations, if any, then hash out the 34067c478bd9Sstevel@tonic-gate * page to erase its identity. 34077c478bd9Sstevel@tonic-gate */ 34087c478bd9Sstevel@tonic-gate (void) hat_pageunload(pp, HAT_FORCE_PGUNLOAD); 34097c478bd9Sstevel@tonic-gate page_hashout(pp, NULL); 34107c478bd9Sstevel@tonic-gate 34117c478bd9Sstevel@tonic-gate if (!dontfree) { 34127c478bd9Sstevel@tonic-gate /* 34137c478bd9Sstevel@tonic-gate * Acquire the "freemem_lock" for availrmem. 34147c478bd9Sstevel@tonic-gate * The page_struct_lock need not be acquired for lckcnt 34157c478bd9Sstevel@tonic-gate * and cowcnt since the page has an "exclusive" lock. 34167c478bd9Sstevel@tonic-gate */ 34177c478bd9Sstevel@tonic-gate if ((pp->p_lckcnt != 0) || (pp->p_cowcnt != 0)) { 34187c478bd9Sstevel@tonic-gate mutex_enter(&freemem_lock); 34197c478bd9Sstevel@tonic-gate if (pp->p_lckcnt != 0) { 34207c478bd9Sstevel@tonic-gate availrmem++; 34217c478bd9Sstevel@tonic-gate pp->p_lckcnt = 0; 34227c478bd9Sstevel@tonic-gate } 34237c478bd9Sstevel@tonic-gate if (pp->p_cowcnt != 0) { 34247c478bd9Sstevel@tonic-gate availrmem += pp->p_cowcnt; 34257c478bd9Sstevel@tonic-gate pp->p_cowcnt = 0; 34267c478bd9Sstevel@tonic-gate } 34277c478bd9Sstevel@tonic-gate mutex_exit(&freemem_lock); 34287c478bd9Sstevel@tonic-gate } 34297c478bd9Sstevel@tonic-gate /* 34307c478bd9Sstevel@tonic-gate * Put the page on the "free" list. 34317c478bd9Sstevel@tonic-gate */ 34327c478bd9Sstevel@tonic-gate page_free(pp, 0); 34337c478bd9Sstevel@tonic-gate } 34347c478bd9Sstevel@tonic-gate } 34357c478bd9Sstevel@tonic-gate 34367c478bd9Sstevel@tonic-gate void 34377c478bd9Sstevel@tonic-gate page_destroy_pages(page_t *pp) 34387c478bd9Sstevel@tonic-gate { 34397c478bd9Sstevel@tonic-gate 34407c478bd9Sstevel@tonic-gate page_t *tpp, *rootpp = NULL; 34417c478bd9Sstevel@tonic-gate pgcnt_t pgcnt = page_get_pagecnt(pp->p_szc); 34427c478bd9Sstevel@tonic-gate pgcnt_t i, pglcks = 0; 34437c478bd9Sstevel@tonic-gate uint_t szc = pp->p_szc; 34447c478bd9Sstevel@tonic-gate int toxic = 0; 34457c478bd9Sstevel@tonic-gate 34467c478bd9Sstevel@tonic-gate ASSERT(pp->p_szc != 0 && pp->p_szc < page_num_pagesizes()); 34477c478bd9Sstevel@tonic-gate 34487c478bd9Sstevel@tonic-gate VM_STAT_ADD(pagecnt.pc_destroy_pages); 34497c478bd9Sstevel@tonic-gate 34507c478bd9Sstevel@tonic-gate TRACE_1(TR_FAC_VM, TR_PAGE_DESTROY, "page_destroy_pages:pp %p", pp); 34517c478bd9Sstevel@tonic-gate 34527c478bd9Sstevel@tonic-gate if ((page_pptonum(pp) & (pgcnt - 1)) != 0) { 34537c478bd9Sstevel@tonic-gate panic("page_destroy_pages: not root page %p", (void *)pp); 34547c478bd9Sstevel@tonic-gate /*NOTREACHED*/ 34557c478bd9Sstevel@tonic-gate } 34567c478bd9Sstevel@tonic-gate 3457*affbd3ccSkchow for (i = 0, tpp = pp; i < pgcnt; i++, tpp++) { 34587c478bd9Sstevel@tonic-gate ASSERT((PAGE_EXCL(tpp) && 34597c478bd9Sstevel@tonic-gate !page_iolock_assert(tpp)) || panicstr); 34607c478bd9Sstevel@tonic-gate (void) hat_pageunload(tpp, HAT_FORCE_PGUNLOAD); 34617c478bd9Sstevel@tonic-gate page_hashout(tpp, NULL); 34627c478bd9Sstevel@tonic-gate ASSERT(tpp->p_offset == (u_offset_t)-1); 34637c478bd9Sstevel@tonic-gate if (tpp->p_lckcnt != 0) { 34647c478bd9Sstevel@tonic-gate pglcks++; 34657c478bd9Sstevel@tonic-gate tpp->p_lckcnt = 0; 34667c478bd9Sstevel@tonic-gate } else if (tpp->p_cowcnt != 0) { 34677c478bd9Sstevel@tonic-gate pglcks += tpp->p_cowcnt; 34687c478bd9Sstevel@tonic-gate tpp->p_cowcnt = 0; 34697c478bd9Sstevel@tonic-gate } 34707c478bd9Sstevel@tonic-gate ASSERT(!hat_page_getshare(tpp)); 34717c478bd9Sstevel@tonic-gate ASSERT(tpp->p_vnode == NULL); 34727c478bd9Sstevel@tonic-gate ASSERT(tpp->p_szc == szc); 34737c478bd9Sstevel@tonic-gate 34747c478bd9Sstevel@tonic-gate if (page_deteriorating(tpp)) 34757c478bd9Sstevel@tonic-gate toxic = 1; 34767c478bd9Sstevel@tonic-gate 34777c478bd9Sstevel@tonic-gate PP_SETFREE(tpp); 34787c478bd9Sstevel@tonic-gate page_clr_all_props(tpp); 34797c478bd9Sstevel@tonic-gate PP_SETAGED(tpp); 34807c478bd9Sstevel@tonic-gate ASSERT(tpp->p_next == tpp); 34817c478bd9Sstevel@tonic-gate ASSERT(tpp->p_prev == tpp); 34827c478bd9Sstevel@tonic-gate page_list_concat(&rootpp, &tpp); 34837c478bd9Sstevel@tonic-gate } 34847c478bd9Sstevel@tonic-gate 34857c478bd9Sstevel@tonic-gate ASSERT(rootpp == pp); 34867c478bd9Sstevel@tonic-gate if (pglcks != 0) { 34877c478bd9Sstevel@tonic-gate mutex_enter(&freemem_lock); 34887c478bd9Sstevel@tonic-gate availrmem += pglcks; 34897c478bd9Sstevel@tonic-gate mutex_exit(&freemem_lock); 34907c478bd9Sstevel@tonic-gate } 34917c478bd9Sstevel@tonic-gate 34927c478bd9Sstevel@tonic-gate if (toxic) { 34937c478bd9Sstevel@tonic-gate page_free_toxic_pages(rootpp); 34947c478bd9Sstevel@tonic-gate return; 34957c478bd9Sstevel@tonic-gate } 34967c478bd9Sstevel@tonic-gate page_list_add_pages(rootpp, 0); 34977c478bd9Sstevel@tonic-gate page_create_putback(pgcnt); 34987c478bd9Sstevel@tonic-gate } 34997c478bd9Sstevel@tonic-gate 35007c478bd9Sstevel@tonic-gate /* 35017c478bd9Sstevel@tonic-gate * Similar to page_destroy(), but destroys pages which are 35027c478bd9Sstevel@tonic-gate * locked and known to be on the page free list. Since 35037c478bd9Sstevel@tonic-gate * the page is known to be free and locked, no one can access 35047c478bd9Sstevel@tonic-gate * it. 35057c478bd9Sstevel@tonic-gate * 35067c478bd9Sstevel@tonic-gate * Also, the number of free pages does not change. 35077c478bd9Sstevel@tonic-gate */ 35087c478bd9Sstevel@tonic-gate void 35097c478bd9Sstevel@tonic-gate page_destroy_free(page_t *pp) 35107c478bd9Sstevel@tonic-gate { 35117c478bd9Sstevel@tonic-gate ASSERT(PAGE_EXCL(pp)); 35127c478bd9Sstevel@tonic-gate ASSERT(PP_ISFREE(pp)); 35137c478bd9Sstevel@tonic-gate ASSERT(pp->p_vnode); 35147c478bd9Sstevel@tonic-gate ASSERT(hat_page_getattr(pp, P_MOD | P_REF | P_RO) == 0); 35157c478bd9Sstevel@tonic-gate ASSERT(!hat_page_is_mapped(pp)); 35167c478bd9Sstevel@tonic-gate ASSERT(PP_ISAGED(pp) == 0); 35177c478bd9Sstevel@tonic-gate ASSERT(pp->p_szc == 0); 35187c478bd9Sstevel@tonic-gate 35197c478bd9Sstevel@tonic-gate VM_STAT_ADD(pagecnt.pc_destroy_free); 35207c478bd9Sstevel@tonic-gate page_list_sub(pp, PG_CACHE_LIST); 35217c478bd9Sstevel@tonic-gate 35227c478bd9Sstevel@tonic-gate page_hashout(pp, NULL); 35237c478bd9Sstevel@tonic-gate ASSERT(pp->p_vnode == NULL); 35247c478bd9Sstevel@tonic-gate ASSERT(pp->p_offset == (u_offset_t)-1); 35257c478bd9Sstevel@tonic-gate ASSERT(pp->p_hash == NULL); 35267c478bd9Sstevel@tonic-gate 35277c478bd9Sstevel@tonic-gate PP_SETAGED(pp); 35287c478bd9Sstevel@tonic-gate page_list_add(pp, PG_FREE_LIST | PG_LIST_TAIL); 35297c478bd9Sstevel@tonic-gate page_unlock(pp); 35307c478bd9Sstevel@tonic-gate 35317c478bd9Sstevel@tonic-gate mutex_enter(&new_freemem_lock); 35327c478bd9Sstevel@tonic-gate if (freemem_wait) { 35337c478bd9Sstevel@tonic-gate cv_signal(&freemem_cv); 35347c478bd9Sstevel@tonic-gate } 35357c478bd9Sstevel@tonic-gate mutex_exit(&new_freemem_lock); 35367c478bd9Sstevel@tonic-gate } 35377c478bd9Sstevel@tonic-gate 35387c478bd9Sstevel@tonic-gate /* 35397c478bd9Sstevel@tonic-gate * Rename the page "opp" to have an identity specified 35407c478bd9Sstevel@tonic-gate * by [vp, off]. If a page already exists with this name 35417c478bd9Sstevel@tonic-gate * it is locked and destroyed. Note that the page's 35427c478bd9Sstevel@tonic-gate * translations are not unloaded during the rename. 35437c478bd9Sstevel@tonic-gate * 35447c478bd9Sstevel@tonic-gate * This routine is used by the anon layer to "steal" the 35457c478bd9Sstevel@tonic-gate * original page and is not unlike destroying a page and 35467c478bd9Sstevel@tonic-gate * creating a new page using the same page frame. 35477c478bd9Sstevel@tonic-gate * 35487c478bd9Sstevel@tonic-gate * XXX -- Could deadlock if caller 1 tries to rename A to B while 35497c478bd9Sstevel@tonic-gate * caller 2 tries to rename B to A. 35507c478bd9Sstevel@tonic-gate */ 35517c478bd9Sstevel@tonic-gate void 35527c478bd9Sstevel@tonic-gate page_rename(page_t *opp, vnode_t *vp, u_offset_t off) 35537c478bd9Sstevel@tonic-gate { 35547c478bd9Sstevel@tonic-gate page_t *pp; 35557c478bd9Sstevel@tonic-gate int olckcnt = 0; 35567c478bd9Sstevel@tonic-gate int ocowcnt = 0; 35577c478bd9Sstevel@tonic-gate kmutex_t *phm; 35587c478bd9Sstevel@tonic-gate ulong_t index; 35597c478bd9Sstevel@tonic-gate 35607c478bd9Sstevel@tonic-gate ASSERT(PAGE_EXCL(opp) && !page_iolock_assert(opp)); 35617c478bd9Sstevel@tonic-gate ASSERT(MUTEX_NOT_HELD(page_vnode_mutex(vp))); 35627c478bd9Sstevel@tonic-gate ASSERT(PP_ISFREE(opp) == 0); 35637c478bd9Sstevel@tonic-gate 35647c478bd9Sstevel@tonic-gate VM_STAT_ADD(page_rename_count); 35657c478bd9Sstevel@tonic-gate 35667c478bd9Sstevel@tonic-gate TRACE_3(TR_FAC_VM, TR_PAGE_RENAME, 35677c478bd9Sstevel@tonic-gate "page rename:pp %p vp %p off %llx", opp, vp, off); 35687c478bd9Sstevel@tonic-gate 356937fbc076Saguzovsk /* 357037fbc076Saguzovsk * CacheFS may call page_rename for a large NFS page 357137fbc076Saguzovsk * when both CacheFS and NFS mount points are used 357237fbc076Saguzovsk * by applications. Demote this large page before 357337fbc076Saguzovsk * renaming it, to ensure that there are no "partial" 357437fbc076Saguzovsk * large pages left lying around. 357537fbc076Saguzovsk */ 357637fbc076Saguzovsk if (opp->p_szc != 0) { 357737fbc076Saguzovsk vnode_t *ovp = opp->p_vnode; 357837fbc076Saguzovsk ASSERT(ovp != NULL); 357937fbc076Saguzovsk ASSERT(!IS_SWAPFSVP(ovp)); 358037fbc076Saguzovsk ASSERT(ovp != &kvp); 358137fbc076Saguzovsk page_demote_vp_pages(opp); 358237fbc076Saguzovsk ASSERT(opp->p_szc == 0); 358337fbc076Saguzovsk } 358437fbc076Saguzovsk 35857c478bd9Sstevel@tonic-gate page_hashout(opp, NULL); 35867c478bd9Sstevel@tonic-gate PP_CLRAGED(opp); 35877c478bd9Sstevel@tonic-gate 35887c478bd9Sstevel@tonic-gate /* 35897c478bd9Sstevel@tonic-gate * Acquire the appropriate page hash lock, since 35907c478bd9Sstevel@tonic-gate * we're going to rename the page. 35917c478bd9Sstevel@tonic-gate */ 35927c478bd9Sstevel@tonic-gate index = PAGE_HASH_FUNC(vp, off); 35937c478bd9Sstevel@tonic-gate phm = PAGE_HASH_MUTEX(index); 35947c478bd9Sstevel@tonic-gate mutex_enter(phm); 35957c478bd9Sstevel@tonic-gate top: 35967c478bd9Sstevel@tonic-gate /* 35977c478bd9Sstevel@tonic-gate * Look for an existing page with this name and destroy it if found. 35987c478bd9Sstevel@tonic-gate * By holding the page hash lock all the way to the page_hashin() 35997c478bd9Sstevel@tonic-gate * call, we are assured that no page can be created with this 36007c478bd9Sstevel@tonic-gate * identity. In the case when the phm lock is dropped to undo any 36017c478bd9Sstevel@tonic-gate * hat layer mappings, the existing page is held with an "exclusive" 36027c478bd9Sstevel@tonic-gate * lock, again preventing another page from being created with 36037c478bd9Sstevel@tonic-gate * this identity. 36047c478bd9Sstevel@tonic-gate */ 36057c478bd9Sstevel@tonic-gate PAGE_HASH_SEARCH(index, pp, vp, off); 36067c478bd9Sstevel@tonic-gate if (pp != NULL) { 36077c478bd9Sstevel@tonic-gate VM_STAT_ADD(page_rename_exists); 36087c478bd9Sstevel@tonic-gate 36097c478bd9Sstevel@tonic-gate /* 36107c478bd9Sstevel@tonic-gate * As it turns out, this is one of only two places where 36117c478bd9Sstevel@tonic-gate * page_lock() needs to hold the passed in lock in the 36127c478bd9Sstevel@tonic-gate * successful case. In all of the others, the lock could 36137c478bd9Sstevel@tonic-gate * be dropped as soon as the attempt is made to lock 36147c478bd9Sstevel@tonic-gate * the page. It is tempting to add yet another arguement, 36157c478bd9Sstevel@tonic-gate * PL_KEEP or PL_DROP, to let page_lock know what to do. 36167c478bd9Sstevel@tonic-gate */ 36177c478bd9Sstevel@tonic-gate if (!page_lock(pp, SE_EXCL, phm, P_RECLAIM)) { 36187c478bd9Sstevel@tonic-gate /* 36197c478bd9Sstevel@tonic-gate * Went to sleep because the page could not 36207c478bd9Sstevel@tonic-gate * be locked. We were woken up when the page 36217c478bd9Sstevel@tonic-gate * was unlocked, or when the page was destroyed. 36227c478bd9Sstevel@tonic-gate * In either case, `phm' was dropped while we 36237c478bd9Sstevel@tonic-gate * slept. Hence we should not just roar through 36247c478bd9Sstevel@tonic-gate * this loop. 36257c478bd9Sstevel@tonic-gate */ 36267c478bd9Sstevel@tonic-gate goto top; 36277c478bd9Sstevel@tonic-gate } 36287c478bd9Sstevel@tonic-gate 362937fbc076Saguzovsk /* 363037fbc076Saguzovsk * If an existing page is a large page, then demote 363137fbc076Saguzovsk * it to ensure that no "partial" large pages are 363237fbc076Saguzovsk * "created" after page_rename. An existing page 363337fbc076Saguzovsk * can be a CacheFS page, and can't belong to swapfs. 363437fbc076Saguzovsk */ 36357c478bd9Sstevel@tonic-gate if (hat_page_is_mapped(pp)) { 36367c478bd9Sstevel@tonic-gate /* 36377c478bd9Sstevel@tonic-gate * Unload translations. Since we hold the 36387c478bd9Sstevel@tonic-gate * exclusive lock on this page, the page 36397c478bd9Sstevel@tonic-gate * can not be changed while we drop phm. 36407c478bd9Sstevel@tonic-gate * This is also not a lock protocol violation, 36417c478bd9Sstevel@tonic-gate * but rather the proper way to do things. 36427c478bd9Sstevel@tonic-gate */ 36437c478bd9Sstevel@tonic-gate mutex_exit(phm); 36447c478bd9Sstevel@tonic-gate (void) hat_pageunload(pp, HAT_FORCE_PGUNLOAD); 364537fbc076Saguzovsk if (pp->p_szc != 0) { 364637fbc076Saguzovsk ASSERT(!IS_SWAPFSVP(vp)); 364737fbc076Saguzovsk ASSERT(vp != &kvp); 364837fbc076Saguzovsk page_demote_vp_pages(pp); 364937fbc076Saguzovsk ASSERT(pp->p_szc == 0); 365037fbc076Saguzovsk } 365137fbc076Saguzovsk mutex_enter(phm); 365237fbc076Saguzovsk } else if (pp->p_szc != 0) { 365337fbc076Saguzovsk ASSERT(!IS_SWAPFSVP(vp)); 365437fbc076Saguzovsk ASSERT(vp != &kvp); 365537fbc076Saguzovsk mutex_exit(phm); 365637fbc076Saguzovsk page_demote_vp_pages(pp); 365737fbc076Saguzovsk ASSERT(pp->p_szc == 0); 36587c478bd9Sstevel@tonic-gate mutex_enter(phm); 36597c478bd9Sstevel@tonic-gate } 36607c478bd9Sstevel@tonic-gate page_hashout(pp, phm); 36617c478bd9Sstevel@tonic-gate } 36627c478bd9Sstevel@tonic-gate /* 36637c478bd9Sstevel@tonic-gate * Hash in the page with the new identity. 36647c478bd9Sstevel@tonic-gate */ 36657c478bd9Sstevel@tonic-gate if (!page_hashin(opp, vp, off, phm)) { 36667c478bd9Sstevel@tonic-gate /* 36677c478bd9Sstevel@tonic-gate * We were holding phm while we searched for [vp, off] 36687c478bd9Sstevel@tonic-gate * and only dropped phm if we found and locked a page. 36697c478bd9Sstevel@tonic-gate * If we can't create this page now, then some thing 36707c478bd9Sstevel@tonic-gate * is really broken. 36717c478bd9Sstevel@tonic-gate */ 36727c478bd9Sstevel@tonic-gate panic("page_rename: Can't hash in page: %p", (void *)pp); 36737c478bd9Sstevel@tonic-gate /*NOTREACHED*/ 36747c478bd9Sstevel@tonic-gate } 36757c478bd9Sstevel@tonic-gate 36767c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(phm)); 36777c478bd9Sstevel@tonic-gate mutex_exit(phm); 36787c478bd9Sstevel@tonic-gate 36797c478bd9Sstevel@tonic-gate /* 36807c478bd9Sstevel@tonic-gate * Now that we have dropped phm, lets get around to finishing up 36817c478bd9Sstevel@tonic-gate * with pp. 36827c478bd9Sstevel@tonic-gate */ 36837c478bd9Sstevel@tonic-gate if (pp != NULL) { 36847c478bd9Sstevel@tonic-gate ASSERT(!hat_page_is_mapped(pp)); 36857c478bd9Sstevel@tonic-gate /* for now large pages should not end up here */ 36867c478bd9Sstevel@tonic-gate ASSERT(pp->p_szc == 0); 36877c478bd9Sstevel@tonic-gate /* 36887c478bd9Sstevel@tonic-gate * Save the locks for transfer to the new page and then 36897c478bd9Sstevel@tonic-gate * clear them so page_free doesn't think they're important. 36907c478bd9Sstevel@tonic-gate * The page_struct_lock need not be acquired for lckcnt and 36917c478bd9Sstevel@tonic-gate * cowcnt since the page has an "exclusive" lock. 36927c478bd9Sstevel@tonic-gate */ 36937c478bd9Sstevel@tonic-gate olckcnt = pp->p_lckcnt; 36947c478bd9Sstevel@tonic-gate ocowcnt = pp->p_cowcnt; 36957c478bd9Sstevel@tonic-gate pp->p_lckcnt = pp->p_cowcnt = 0; 36967c478bd9Sstevel@tonic-gate 36977c478bd9Sstevel@tonic-gate /* 36987c478bd9Sstevel@tonic-gate * Put the page on the "free" list after we drop 36997c478bd9Sstevel@tonic-gate * the lock. The less work under the lock the better. 37007c478bd9Sstevel@tonic-gate */ 37017c478bd9Sstevel@tonic-gate /*LINTED: constant in conditional context*/ 37027c478bd9Sstevel@tonic-gate VN_DISPOSE(pp, B_FREE, 0, kcred); 37037c478bd9Sstevel@tonic-gate } 37047c478bd9Sstevel@tonic-gate 37057c478bd9Sstevel@tonic-gate /* 37067c478bd9Sstevel@tonic-gate * Transfer the lock count from the old page (if any). 37077c478bd9Sstevel@tonic-gate * The page_struct_lock need not be acquired for lckcnt and 37087c478bd9Sstevel@tonic-gate * cowcnt since the page has an "exclusive" lock. 37097c478bd9Sstevel@tonic-gate */ 37107c478bd9Sstevel@tonic-gate opp->p_lckcnt += olckcnt; 37117c478bd9Sstevel@tonic-gate opp->p_cowcnt += ocowcnt; 37127c478bd9Sstevel@tonic-gate } 37137c478bd9Sstevel@tonic-gate 37147c478bd9Sstevel@tonic-gate /* 37157c478bd9Sstevel@tonic-gate * low level routine to add page `pp' to the hash and vp chains for [vp, offset] 37167c478bd9Sstevel@tonic-gate * 37177c478bd9Sstevel@tonic-gate * Pages are normally inserted at the start of a vnode's v_pages list. 37187c478bd9Sstevel@tonic-gate * If the vnode is VMODSORT and the page is modified, it goes at the end. 37197c478bd9Sstevel@tonic-gate * This can happen when a modified page is relocated for DR. 37207c478bd9Sstevel@tonic-gate * 37217c478bd9Sstevel@tonic-gate * Returns 1 on success and 0 on failure. 37227c478bd9Sstevel@tonic-gate */ 37237c478bd9Sstevel@tonic-gate static int 37247c478bd9Sstevel@tonic-gate page_do_hashin(page_t *pp, vnode_t *vp, u_offset_t offset) 37257c478bd9Sstevel@tonic-gate { 37267c478bd9Sstevel@tonic-gate page_t **listp; 37277c478bd9Sstevel@tonic-gate page_t *tp; 37287c478bd9Sstevel@tonic-gate ulong_t index; 37297c478bd9Sstevel@tonic-gate 37307c478bd9Sstevel@tonic-gate ASSERT(PAGE_EXCL(pp)); 37317c478bd9Sstevel@tonic-gate ASSERT(vp != NULL); 37327c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(page_vnode_mutex(vp))); 37337c478bd9Sstevel@tonic-gate 37347c478bd9Sstevel@tonic-gate /* 37357c478bd9Sstevel@tonic-gate * Be sure to set these up before the page is inserted on the hash 37367c478bd9Sstevel@tonic-gate * list. As soon as the page is placed on the list some other 37377c478bd9Sstevel@tonic-gate * thread might get confused and wonder how this page could 37387c478bd9Sstevel@tonic-gate * possibly hash to this list. 37397c478bd9Sstevel@tonic-gate */ 37407c478bd9Sstevel@tonic-gate pp->p_vnode = vp; 37417c478bd9Sstevel@tonic-gate pp->p_offset = offset; 37427c478bd9Sstevel@tonic-gate 37437c478bd9Sstevel@tonic-gate /* 37447c478bd9Sstevel@tonic-gate * record if this page is on a swap vnode 37457c478bd9Sstevel@tonic-gate */ 37467c478bd9Sstevel@tonic-gate if ((vp->v_flag & VISSWAP) != 0) 37477c478bd9Sstevel@tonic-gate PP_SETSWAP(pp); 37487c478bd9Sstevel@tonic-gate 37497c478bd9Sstevel@tonic-gate index = PAGE_HASH_FUNC(vp, offset); 37507c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(PAGE_HASH_MUTEX(index))); 37517c478bd9Sstevel@tonic-gate listp = &page_hash[index]; 37527c478bd9Sstevel@tonic-gate 37537c478bd9Sstevel@tonic-gate /* 37547c478bd9Sstevel@tonic-gate * If this page is already hashed in, fail this attempt to add it. 37557c478bd9Sstevel@tonic-gate */ 37567c478bd9Sstevel@tonic-gate for (tp = *listp; tp != NULL; tp = tp->p_hash) { 37577c478bd9Sstevel@tonic-gate if (tp->p_vnode == vp && tp->p_offset == offset) { 37587c478bd9Sstevel@tonic-gate pp->p_vnode = NULL; 37597c478bd9Sstevel@tonic-gate pp->p_offset = (u_offset_t)(-1); 37607c478bd9Sstevel@tonic-gate return (0); 37617c478bd9Sstevel@tonic-gate } 37627c478bd9Sstevel@tonic-gate } 37637c478bd9Sstevel@tonic-gate pp->p_hash = *listp; 37647c478bd9Sstevel@tonic-gate *listp = pp; 37657c478bd9Sstevel@tonic-gate 37667c478bd9Sstevel@tonic-gate /* 37677c478bd9Sstevel@tonic-gate * Add the page to the vnode's list of pages 37687c478bd9Sstevel@tonic-gate */ 37697c478bd9Sstevel@tonic-gate if (vp->v_pages != NULL && IS_VMODSORT(vp) && hat_ismod(pp)) 37707c478bd9Sstevel@tonic-gate listp = &vp->v_pages->p_vpprev->p_vpnext; 37717c478bd9Sstevel@tonic-gate else 37727c478bd9Sstevel@tonic-gate listp = &vp->v_pages; 37737c478bd9Sstevel@tonic-gate 37747c478bd9Sstevel@tonic-gate page_vpadd(listp, pp); 37757c478bd9Sstevel@tonic-gate 37767c478bd9Sstevel@tonic-gate return (1); 37777c478bd9Sstevel@tonic-gate } 37787c478bd9Sstevel@tonic-gate 37797c478bd9Sstevel@tonic-gate /* 37807c478bd9Sstevel@tonic-gate * Add page `pp' to both the hash and vp chains for [vp, offset]. 37817c478bd9Sstevel@tonic-gate * 37827c478bd9Sstevel@tonic-gate * Returns 1 on success and 0 on failure. 37837c478bd9Sstevel@tonic-gate * If hold is passed in, it is not dropped. 37847c478bd9Sstevel@tonic-gate */ 37857c478bd9Sstevel@tonic-gate int 37867c478bd9Sstevel@tonic-gate page_hashin(page_t *pp, vnode_t *vp, u_offset_t offset, kmutex_t *hold) 37877c478bd9Sstevel@tonic-gate { 37887c478bd9Sstevel@tonic-gate kmutex_t *phm = NULL; 37897c478bd9Sstevel@tonic-gate kmutex_t *vphm; 37907c478bd9Sstevel@tonic-gate int rc; 37917c478bd9Sstevel@tonic-gate 37927c478bd9Sstevel@tonic-gate ASSERT(MUTEX_NOT_HELD(page_vnode_mutex(vp))); 37937c478bd9Sstevel@tonic-gate 37947c478bd9Sstevel@tonic-gate TRACE_3(TR_FAC_VM, TR_PAGE_HASHIN, 37957c478bd9Sstevel@tonic-gate "page_hashin:pp %p vp %p offset %llx", 37967c478bd9Sstevel@tonic-gate pp, vp, offset); 37977c478bd9Sstevel@tonic-gate 37987c478bd9Sstevel@tonic-gate VM_STAT_ADD(hashin_count); 37997c478bd9Sstevel@tonic-gate 38007c478bd9Sstevel@tonic-gate if (hold != NULL) 38017c478bd9Sstevel@tonic-gate phm = hold; 38027c478bd9Sstevel@tonic-gate else { 38037c478bd9Sstevel@tonic-gate VM_STAT_ADD(hashin_not_held); 38047c478bd9Sstevel@tonic-gate phm = PAGE_HASH_MUTEX(PAGE_HASH_FUNC(vp, offset)); 38057c478bd9Sstevel@tonic-gate mutex_enter(phm); 38067c478bd9Sstevel@tonic-gate } 38077c478bd9Sstevel@tonic-gate 38087c478bd9Sstevel@tonic-gate vphm = page_vnode_mutex(vp); 38097c478bd9Sstevel@tonic-gate mutex_enter(vphm); 38107c478bd9Sstevel@tonic-gate rc = page_do_hashin(pp, vp, offset); 38117c478bd9Sstevel@tonic-gate mutex_exit(vphm); 38127c478bd9Sstevel@tonic-gate if (hold == NULL) 38137c478bd9Sstevel@tonic-gate mutex_exit(phm); 38147c478bd9Sstevel@tonic-gate if (rc == 0) 38157c478bd9Sstevel@tonic-gate VM_STAT_ADD(hashin_already); 38167c478bd9Sstevel@tonic-gate return (rc); 38177c478bd9Sstevel@tonic-gate } 38187c478bd9Sstevel@tonic-gate 38197c478bd9Sstevel@tonic-gate /* 38207c478bd9Sstevel@tonic-gate * Remove page ``pp'' from the hash and vp chains and remove vp association. 38217c478bd9Sstevel@tonic-gate * All mutexes must be held 38227c478bd9Sstevel@tonic-gate */ 38237c478bd9Sstevel@tonic-gate static void 38247c478bd9Sstevel@tonic-gate page_do_hashout(page_t *pp) 38257c478bd9Sstevel@tonic-gate { 38267c478bd9Sstevel@tonic-gate page_t **hpp; 38277c478bd9Sstevel@tonic-gate page_t *hp; 38287c478bd9Sstevel@tonic-gate vnode_t *vp = pp->p_vnode; 38297c478bd9Sstevel@tonic-gate 38307c478bd9Sstevel@tonic-gate ASSERT(vp != NULL); 38317c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(page_vnode_mutex(vp))); 38327c478bd9Sstevel@tonic-gate 38337c478bd9Sstevel@tonic-gate /* 38347c478bd9Sstevel@tonic-gate * First, take pp off of its hash chain. 38357c478bd9Sstevel@tonic-gate */ 38367c478bd9Sstevel@tonic-gate hpp = &page_hash[PAGE_HASH_FUNC(vp, pp->p_offset)]; 38377c478bd9Sstevel@tonic-gate 38387c478bd9Sstevel@tonic-gate for (;;) { 38397c478bd9Sstevel@tonic-gate hp = *hpp; 38407c478bd9Sstevel@tonic-gate if (hp == pp) 38417c478bd9Sstevel@tonic-gate break; 38427c478bd9Sstevel@tonic-gate if (hp == NULL) { 38437c478bd9Sstevel@tonic-gate panic("page_do_hashout"); 38447c478bd9Sstevel@tonic-gate /*NOTREACHED*/ 38457c478bd9Sstevel@tonic-gate } 38467c478bd9Sstevel@tonic-gate hpp = &hp->p_hash; 38477c478bd9Sstevel@tonic-gate } 38487c478bd9Sstevel@tonic-gate *hpp = pp->p_hash; 38497c478bd9Sstevel@tonic-gate 38507c478bd9Sstevel@tonic-gate /* 38517c478bd9Sstevel@tonic-gate * Now remove it from its associated vnode. 38527c478bd9Sstevel@tonic-gate */ 38537c478bd9Sstevel@tonic-gate if (vp->v_pages) 38547c478bd9Sstevel@tonic-gate page_vpsub(&vp->v_pages, pp); 38557c478bd9Sstevel@tonic-gate 38567c478bd9Sstevel@tonic-gate pp->p_hash = NULL; 38577c478bd9Sstevel@tonic-gate page_clr_all_props(pp); 38587c478bd9Sstevel@tonic-gate PP_CLRSWAP(pp); 38597c478bd9Sstevel@tonic-gate pp->p_vnode = NULL; 38607c478bd9Sstevel@tonic-gate pp->p_offset = (u_offset_t)-1; 38617c478bd9Sstevel@tonic-gate } 38627c478bd9Sstevel@tonic-gate 38637c478bd9Sstevel@tonic-gate /* 38647c478bd9Sstevel@tonic-gate * Remove page ``pp'' from the hash and vp chains and remove vp association. 38657c478bd9Sstevel@tonic-gate * 38667c478bd9Sstevel@tonic-gate * When `phm' is non-NULL it contains the address of the mutex protecting the 38677c478bd9Sstevel@tonic-gate * hash list pp is on. It is not dropped. 38687c478bd9Sstevel@tonic-gate */ 38697c478bd9Sstevel@tonic-gate void 38707c478bd9Sstevel@tonic-gate page_hashout(page_t *pp, kmutex_t *phm) 38717c478bd9Sstevel@tonic-gate { 38727c478bd9Sstevel@tonic-gate vnode_t *vp; 38737c478bd9Sstevel@tonic-gate ulong_t index; 38747c478bd9Sstevel@tonic-gate kmutex_t *nphm; 38757c478bd9Sstevel@tonic-gate kmutex_t *vphm; 38767c478bd9Sstevel@tonic-gate kmutex_t *sep; 38777c478bd9Sstevel@tonic-gate 38787c478bd9Sstevel@tonic-gate ASSERT(phm != NULL ? MUTEX_HELD(phm) : 1); 38797c478bd9Sstevel@tonic-gate ASSERT(pp->p_vnode != NULL); 38807c478bd9Sstevel@tonic-gate ASSERT((PAGE_EXCL(pp) && !page_iolock_assert(pp)) || panicstr); 38817c478bd9Sstevel@tonic-gate ASSERT(MUTEX_NOT_HELD(page_vnode_mutex(pp->p_vnode))); 38827c478bd9Sstevel@tonic-gate 38837c478bd9Sstevel@tonic-gate vp = pp->p_vnode; 38847c478bd9Sstevel@tonic-gate 38857c478bd9Sstevel@tonic-gate TRACE_2(TR_FAC_VM, TR_PAGE_HASHOUT, 38867c478bd9Sstevel@tonic-gate "page_hashout:pp %p vp %p", pp, vp); 38877c478bd9Sstevel@tonic-gate 38887c478bd9Sstevel@tonic-gate /* Kernel probe */ 38897c478bd9Sstevel@tonic-gate TNF_PROBE_2(page_unmap, "vm pagefault", /* CSTYLED */, 38907c478bd9Sstevel@tonic-gate tnf_opaque, vnode, vp, 38917c478bd9Sstevel@tonic-gate tnf_offset, offset, pp->p_offset); 38927c478bd9Sstevel@tonic-gate 38937c478bd9Sstevel@tonic-gate /* 38947c478bd9Sstevel@tonic-gate * 38957c478bd9Sstevel@tonic-gate */ 38967c478bd9Sstevel@tonic-gate VM_STAT_ADD(hashout_count); 38977c478bd9Sstevel@tonic-gate index = PAGE_HASH_FUNC(vp, pp->p_offset); 38987c478bd9Sstevel@tonic-gate if (phm == NULL) { 38997c478bd9Sstevel@tonic-gate VM_STAT_ADD(hashout_not_held); 39007c478bd9Sstevel@tonic-gate nphm = PAGE_HASH_MUTEX(index); 39017c478bd9Sstevel@tonic-gate mutex_enter(nphm); 39027c478bd9Sstevel@tonic-gate } 39037c478bd9Sstevel@tonic-gate ASSERT(phm ? phm == PAGE_HASH_MUTEX(index) : 1); 39047c478bd9Sstevel@tonic-gate 39057c478bd9Sstevel@tonic-gate 39067c478bd9Sstevel@tonic-gate /* 39077c478bd9Sstevel@tonic-gate * grab page vnode mutex and remove it... 39087c478bd9Sstevel@tonic-gate */ 39097c478bd9Sstevel@tonic-gate vphm = page_vnode_mutex(vp); 39107c478bd9Sstevel@tonic-gate mutex_enter(vphm); 39117c478bd9Sstevel@tonic-gate 39127c478bd9Sstevel@tonic-gate page_do_hashout(pp); 39137c478bd9Sstevel@tonic-gate 39147c478bd9Sstevel@tonic-gate mutex_exit(vphm); 39157c478bd9Sstevel@tonic-gate if (phm == NULL) 39167c478bd9Sstevel@tonic-gate mutex_exit(nphm); 39177c478bd9Sstevel@tonic-gate 39187c478bd9Sstevel@tonic-gate /* 39197c478bd9Sstevel@tonic-gate * If the page was retired, update the pages_retired 39207c478bd9Sstevel@tonic-gate * total and clear the page flag 39217c478bd9Sstevel@tonic-gate */ 39227c478bd9Sstevel@tonic-gate if (page_isretired(pp)) { 39237c478bd9Sstevel@tonic-gate retired_page_removed(pp); 39247c478bd9Sstevel@tonic-gate } 39257c478bd9Sstevel@tonic-gate 39267c478bd9Sstevel@tonic-gate /* 39277c478bd9Sstevel@tonic-gate * Wake up processes waiting for this page. The page's 39287c478bd9Sstevel@tonic-gate * identity has been changed, and is probably not the 39297c478bd9Sstevel@tonic-gate * desired page any longer. 39307c478bd9Sstevel@tonic-gate */ 39317c478bd9Sstevel@tonic-gate sep = page_se_mutex(pp); 39327c478bd9Sstevel@tonic-gate mutex_enter(sep); 39337c478bd9Sstevel@tonic-gate if (CV_HAS_WAITERS(&pp->p_cv)) 39347c478bd9Sstevel@tonic-gate cv_broadcast(&pp->p_cv); 39357c478bd9Sstevel@tonic-gate mutex_exit(sep); 39367c478bd9Sstevel@tonic-gate } 39377c478bd9Sstevel@tonic-gate 39387c478bd9Sstevel@tonic-gate /* 39397c478bd9Sstevel@tonic-gate * Add the page to the front of a linked list of pages 39407c478bd9Sstevel@tonic-gate * using the p_next & p_prev pointers for the list. 39417c478bd9Sstevel@tonic-gate * The caller is responsible for protecting the list pointers. 39427c478bd9Sstevel@tonic-gate */ 39437c478bd9Sstevel@tonic-gate void 39447c478bd9Sstevel@tonic-gate page_add(page_t **ppp, page_t *pp) 39457c478bd9Sstevel@tonic-gate { 39467c478bd9Sstevel@tonic-gate ASSERT(PAGE_EXCL(pp) || (PAGE_SHARED(pp) && page_iolock_assert(pp))); 39477c478bd9Sstevel@tonic-gate 39487c478bd9Sstevel@tonic-gate page_add_common(ppp, pp); 39497c478bd9Sstevel@tonic-gate } 39507c478bd9Sstevel@tonic-gate 39517c478bd9Sstevel@tonic-gate 39527c478bd9Sstevel@tonic-gate 39537c478bd9Sstevel@tonic-gate /* 39547c478bd9Sstevel@tonic-gate * Common code for page_add() and mach_page_add() 39557c478bd9Sstevel@tonic-gate */ 39567c478bd9Sstevel@tonic-gate void 39577c478bd9Sstevel@tonic-gate page_add_common(page_t **ppp, page_t *pp) 39587c478bd9Sstevel@tonic-gate { 39597c478bd9Sstevel@tonic-gate if (*ppp == NULL) { 39607c478bd9Sstevel@tonic-gate pp->p_next = pp->p_prev = pp; 39617c478bd9Sstevel@tonic-gate } else { 39627c478bd9Sstevel@tonic-gate pp->p_next = *ppp; 39637c478bd9Sstevel@tonic-gate pp->p_prev = (*ppp)->p_prev; 39647c478bd9Sstevel@tonic-gate (*ppp)->p_prev = pp; 39657c478bd9Sstevel@tonic-gate pp->p_prev->p_next = pp; 39667c478bd9Sstevel@tonic-gate } 39677c478bd9Sstevel@tonic-gate *ppp = pp; 39687c478bd9Sstevel@tonic-gate } 39697c478bd9Sstevel@tonic-gate 39707c478bd9Sstevel@tonic-gate 39717c478bd9Sstevel@tonic-gate /* 39727c478bd9Sstevel@tonic-gate * Remove this page from a linked list of pages 39737c478bd9Sstevel@tonic-gate * using the p_next & p_prev pointers for the list. 39747c478bd9Sstevel@tonic-gate * 39757c478bd9Sstevel@tonic-gate * The caller is responsible for protecting the list pointers. 39767c478bd9Sstevel@tonic-gate */ 39777c478bd9Sstevel@tonic-gate void 39787c478bd9Sstevel@tonic-gate page_sub(page_t **ppp, page_t *pp) 39797c478bd9Sstevel@tonic-gate { 39807c478bd9Sstevel@tonic-gate ASSERT((PP_ISFREE(pp)) ? 1 : 39817c478bd9Sstevel@tonic-gate (PAGE_EXCL(pp)) || (PAGE_SHARED(pp) && page_iolock_assert(pp))); 39827c478bd9Sstevel@tonic-gate 39837c478bd9Sstevel@tonic-gate if (*ppp == NULL || pp == NULL) { 39847c478bd9Sstevel@tonic-gate panic("page_sub: bad arg(s): pp %p, *ppp %p", 39857c478bd9Sstevel@tonic-gate (void *)pp, (void *)(*ppp)); 39867c478bd9Sstevel@tonic-gate /*NOTREACHED*/ 39877c478bd9Sstevel@tonic-gate } 39887c478bd9Sstevel@tonic-gate 39897c478bd9Sstevel@tonic-gate page_sub_common(ppp, pp); 39907c478bd9Sstevel@tonic-gate } 39917c478bd9Sstevel@tonic-gate 39927c478bd9Sstevel@tonic-gate 39937c478bd9Sstevel@tonic-gate /* 39947c478bd9Sstevel@tonic-gate * Common code for page_sub() and mach_page_sub() 39957c478bd9Sstevel@tonic-gate */ 39967c478bd9Sstevel@tonic-gate void 39977c478bd9Sstevel@tonic-gate page_sub_common(page_t **ppp, page_t *pp) 39987c478bd9Sstevel@tonic-gate { 39997c478bd9Sstevel@tonic-gate if (*ppp == pp) 40007c478bd9Sstevel@tonic-gate *ppp = pp->p_next; /* go to next page */ 40017c478bd9Sstevel@tonic-gate 40027c478bd9Sstevel@tonic-gate if (*ppp == pp) 40037c478bd9Sstevel@tonic-gate *ppp = NULL; /* page list is gone */ 40047c478bd9Sstevel@tonic-gate else { 40057c478bd9Sstevel@tonic-gate pp->p_prev->p_next = pp->p_next; 40067c478bd9Sstevel@tonic-gate pp->p_next->p_prev = pp->p_prev; 40077c478bd9Sstevel@tonic-gate } 40087c478bd9Sstevel@tonic-gate pp->p_prev = pp->p_next = pp; /* make pp a list of one */ 40097c478bd9Sstevel@tonic-gate } 40107c478bd9Sstevel@tonic-gate 40117c478bd9Sstevel@tonic-gate 40127c478bd9Sstevel@tonic-gate /* 40137c478bd9Sstevel@tonic-gate * Break page list cppp into two lists with npages in the first list. 40147c478bd9Sstevel@tonic-gate * The tail is returned in nppp. 40157c478bd9Sstevel@tonic-gate */ 40167c478bd9Sstevel@tonic-gate void 40177c478bd9Sstevel@tonic-gate page_list_break(page_t **oppp, page_t **nppp, pgcnt_t npages) 40187c478bd9Sstevel@tonic-gate { 40197c478bd9Sstevel@tonic-gate page_t *s1pp = *oppp; 40207c478bd9Sstevel@tonic-gate page_t *s2pp; 40217c478bd9Sstevel@tonic-gate page_t *e1pp, *e2pp; 40227c478bd9Sstevel@tonic-gate long n = 0; 40237c478bd9Sstevel@tonic-gate 40247c478bd9Sstevel@tonic-gate if (s1pp == NULL) { 40257c478bd9Sstevel@tonic-gate *nppp = NULL; 40267c478bd9Sstevel@tonic-gate return; 40277c478bd9Sstevel@tonic-gate } 40287c478bd9Sstevel@tonic-gate if (npages == 0) { 40297c478bd9Sstevel@tonic-gate *nppp = s1pp; 40307c478bd9Sstevel@tonic-gate *oppp = NULL; 40317c478bd9Sstevel@tonic-gate return; 40327c478bd9Sstevel@tonic-gate } 40337c478bd9Sstevel@tonic-gate for (n = 0, s2pp = *oppp; n < npages; n++) { 40347c478bd9Sstevel@tonic-gate s2pp = s2pp->p_next; 40357c478bd9Sstevel@tonic-gate } 40367c478bd9Sstevel@tonic-gate /* Fix head and tail of new lists */ 40377c478bd9Sstevel@tonic-gate e1pp = s2pp->p_prev; 40387c478bd9Sstevel@tonic-gate e2pp = s1pp->p_prev; 40397c478bd9Sstevel@tonic-gate s1pp->p_prev = e1pp; 40407c478bd9Sstevel@tonic-gate e1pp->p_next = s1pp; 40417c478bd9Sstevel@tonic-gate s2pp->p_prev = e2pp; 40427c478bd9Sstevel@tonic-gate e2pp->p_next = s2pp; 40437c478bd9Sstevel@tonic-gate 40447c478bd9Sstevel@tonic-gate /* second list empty */ 40457c478bd9Sstevel@tonic-gate if (s2pp == s1pp) { 40467c478bd9Sstevel@tonic-gate *oppp = s1pp; 40477c478bd9Sstevel@tonic-gate *nppp = NULL; 40487c478bd9Sstevel@tonic-gate } else { 40497c478bd9Sstevel@tonic-gate *oppp = s1pp; 40507c478bd9Sstevel@tonic-gate *nppp = s2pp; 40517c478bd9Sstevel@tonic-gate } 40527c478bd9Sstevel@tonic-gate } 40537c478bd9Sstevel@tonic-gate 40547c478bd9Sstevel@tonic-gate /* 40557c478bd9Sstevel@tonic-gate * Concatenate page list nppp onto the end of list ppp. 40567c478bd9Sstevel@tonic-gate */ 40577c478bd9Sstevel@tonic-gate void 40587c478bd9Sstevel@tonic-gate page_list_concat(page_t **ppp, page_t **nppp) 40597c478bd9Sstevel@tonic-gate { 40607c478bd9Sstevel@tonic-gate page_t *s1pp, *s2pp, *e1pp, *e2pp; 40617c478bd9Sstevel@tonic-gate 40627c478bd9Sstevel@tonic-gate if (*nppp == NULL) { 40637c478bd9Sstevel@tonic-gate return; 40647c478bd9Sstevel@tonic-gate } 40657c478bd9Sstevel@tonic-gate if (*ppp == NULL) { 40667c478bd9Sstevel@tonic-gate *ppp = *nppp; 40677c478bd9Sstevel@tonic-gate return; 40687c478bd9Sstevel@tonic-gate } 40697c478bd9Sstevel@tonic-gate s1pp = *ppp; 40707c478bd9Sstevel@tonic-gate e1pp = s1pp->p_prev; 40717c478bd9Sstevel@tonic-gate s2pp = *nppp; 40727c478bd9Sstevel@tonic-gate e2pp = s2pp->p_prev; 40737c478bd9Sstevel@tonic-gate s1pp->p_prev = e2pp; 40747c478bd9Sstevel@tonic-gate e2pp->p_next = s1pp; 40757c478bd9Sstevel@tonic-gate e1pp->p_next = s2pp; 40767c478bd9Sstevel@tonic-gate s2pp->p_prev = e1pp; 40777c478bd9Sstevel@tonic-gate } 40787c478bd9Sstevel@tonic-gate 40797c478bd9Sstevel@tonic-gate /* 40807c478bd9Sstevel@tonic-gate * return the next page in the page list 40817c478bd9Sstevel@tonic-gate */ 40827c478bd9Sstevel@tonic-gate page_t * 40837c478bd9Sstevel@tonic-gate page_list_next(page_t *pp) 40847c478bd9Sstevel@tonic-gate { 40857c478bd9Sstevel@tonic-gate return (pp->p_next); 40867c478bd9Sstevel@tonic-gate } 40877c478bd9Sstevel@tonic-gate 40887c478bd9Sstevel@tonic-gate 40897c478bd9Sstevel@tonic-gate /* 40907c478bd9Sstevel@tonic-gate * Add the page to the front of the linked list of pages 40917c478bd9Sstevel@tonic-gate * using p_vpnext/p_vpprev pointers for the list. 40927c478bd9Sstevel@tonic-gate * 40937c478bd9Sstevel@tonic-gate * The caller is responsible for protecting the lists. 40947c478bd9Sstevel@tonic-gate */ 40957c478bd9Sstevel@tonic-gate void 40967c478bd9Sstevel@tonic-gate page_vpadd(page_t **ppp, page_t *pp) 40977c478bd9Sstevel@tonic-gate { 40987c478bd9Sstevel@tonic-gate if (*ppp == NULL) { 40997c478bd9Sstevel@tonic-gate pp->p_vpnext = pp->p_vpprev = pp; 41007c478bd9Sstevel@tonic-gate } else { 41017c478bd9Sstevel@tonic-gate pp->p_vpnext = *ppp; 41027c478bd9Sstevel@tonic-gate pp->p_vpprev = (*ppp)->p_vpprev; 41037c478bd9Sstevel@tonic-gate (*ppp)->p_vpprev = pp; 41047c478bd9Sstevel@tonic-gate pp->p_vpprev->p_vpnext = pp; 41057c478bd9Sstevel@tonic-gate } 41067c478bd9Sstevel@tonic-gate *ppp = pp; 41077c478bd9Sstevel@tonic-gate } 41087c478bd9Sstevel@tonic-gate 41097c478bd9Sstevel@tonic-gate /* 41107c478bd9Sstevel@tonic-gate * Remove this page from the linked list of pages 41117c478bd9Sstevel@tonic-gate * using p_vpnext/p_vpprev pointers for the list. 41127c478bd9Sstevel@tonic-gate * 41137c478bd9Sstevel@tonic-gate * The caller is responsible for protecting the lists. 41147c478bd9Sstevel@tonic-gate */ 41157c478bd9Sstevel@tonic-gate void 41167c478bd9Sstevel@tonic-gate page_vpsub(page_t **ppp, page_t *pp) 41177c478bd9Sstevel@tonic-gate { 41187c478bd9Sstevel@tonic-gate if (*ppp == NULL || pp == NULL) { 41197c478bd9Sstevel@tonic-gate panic("page_vpsub: bad arg(s): pp %p, *ppp %p", 41207c478bd9Sstevel@tonic-gate (void *)pp, (void *)(*ppp)); 41217c478bd9Sstevel@tonic-gate /*NOTREACHED*/ 41227c478bd9Sstevel@tonic-gate } 41237c478bd9Sstevel@tonic-gate 41247c478bd9Sstevel@tonic-gate if (*ppp == pp) 41257c478bd9Sstevel@tonic-gate *ppp = pp->p_vpnext; /* go to next page */ 41267c478bd9Sstevel@tonic-gate 41277c478bd9Sstevel@tonic-gate if (*ppp == pp) 41287c478bd9Sstevel@tonic-gate *ppp = NULL; /* page list is gone */ 41297c478bd9Sstevel@tonic-gate else { 41307c478bd9Sstevel@tonic-gate pp->p_vpprev->p_vpnext = pp->p_vpnext; 41317c478bd9Sstevel@tonic-gate pp->p_vpnext->p_vpprev = pp->p_vpprev; 41327c478bd9Sstevel@tonic-gate } 41337c478bd9Sstevel@tonic-gate pp->p_vpprev = pp->p_vpnext = pp; /* make pp a list of one */ 41347c478bd9Sstevel@tonic-gate } 41357c478bd9Sstevel@tonic-gate 41367c478bd9Sstevel@tonic-gate /* 41377c478bd9Sstevel@tonic-gate * Lock a physical page into memory "long term". Used to support "lock 41387c478bd9Sstevel@tonic-gate * in memory" functions. Accepts the page to be locked, and a cow variable 41397c478bd9Sstevel@tonic-gate * to indicate whether a the lock will travel to the new page during 41407c478bd9Sstevel@tonic-gate * a potential copy-on-write. 41417c478bd9Sstevel@tonic-gate */ 41427c478bd9Sstevel@tonic-gate int 41437c478bd9Sstevel@tonic-gate page_pp_lock( 41447c478bd9Sstevel@tonic-gate page_t *pp, /* page to be locked */ 41457c478bd9Sstevel@tonic-gate int cow, /* cow lock */ 41467c478bd9Sstevel@tonic-gate int kernel) /* must succeed -- ignore checking */ 41477c478bd9Sstevel@tonic-gate { 41487c478bd9Sstevel@tonic-gate int r = 0; /* result -- assume failure */ 41497c478bd9Sstevel@tonic-gate 41507c478bd9Sstevel@tonic-gate ASSERT(PAGE_LOCKED(pp)); 41517c478bd9Sstevel@tonic-gate 41527c478bd9Sstevel@tonic-gate page_struct_lock(pp); 41537c478bd9Sstevel@tonic-gate /* 41547c478bd9Sstevel@tonic-gate * Acquire the "freemem_lock" for availrmem. 41557c478bd9Sstevel@tonic-gate */ 41567c478bd9Sstevel@tonic-gate if (cow) { 41577c478bd9Sstevel@tonic-gate mutex_enter(&freemem_lock); 41587c478bd9Sstevel@tonic-gate if ((availrmem > pages_pp_maximum) && 41597c478bd9Sstevel@tonic-gate (pp->p_cowcnt < (ushort_t)PAGE_LOCK_MAXIMUM)) { 41607c478bd9Sstevel@tonic-gate availrmem--; 41617c478bd9Sstevel@tonic-gate pages_locked++; 41627c478bd9Sstevel@tonic-gate mutex_exit(&freemem_lock); 41637c478bd9Sstevel@tonic-gate r = 1; 41647c478bd9Sstevel@tonic-gate if (++pp->p_cowcnt == (ushort_t)PAGE_LOCK_MAXIMUM) { 41657c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, 41667c478bd9Sstevel@tonic-gate "COW lock limit reached on pfn 0x%lx", 41677c478bd9Sstevel@tonic-gate page_pptonum(pp)); 41687c478bd9Sstevel@tonic-gate } 41697c478bd9Sstevel@tonic-gate } else 41707c478bd9Sstevel@tonic-gate mutex_exit(&freemem_lock); 41717c478bd9Sstevel@tonic-gate } else { 41727c478bd9Sstevel@tonic-gate if (pp->p_lckcnt) { 41737c478bd9Sstevel@tonic-gate if (pp->p_lckcnt < (ushort_t)PAGE_LOCK_MAXIMUM) { 41747c478bd9Sstevel@tonic-gate r = 1; 41757c478bd9Sstevel@tonic-gate if (++pp->p_lckcnt == 41767c478bd9Sstevel@tonic-gate (ushort_t)PAGE_LOCK_MAXIMUM) { 41777c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, "Page lock limit " 41787c478bd9Sstevel@tonic-gate "reached on pfn 0x%lx", 41797c478bd9Sstevel@tonic-gate page_pptonum(pp)); 41807c478bd9Sstevel@tonic-gate } 41817c478bd9Sstevel@tonic-gate } 41827c478bd9Sstevel@tonic-gate } else { 41837c478bd9Sstevel@tonic-gate if (kernel) { 41847c478bd9Sstevel@tonic-gate /* availrmem accounting done by caller */ 41857c478bd9Sstevel@tonic-gate ++pp->p_lckcnt; 41867c478bd9Sstevel@tonic-gate r = 1; 41877c478bd9Sstevel@tonic-gate } else { 41887c478bd9Sstevel@tonic-gate mutex_enter(&freemem_lock); 41897c478bd9Sstevel@tonic-gate if (availrmem > pages_pp_maximum) { 41907c478bd9Sstevel@tonic-gate availrmem--; 41917c478bd9Sstevel@tonic-gate pages_locked++; 41927c478bd9Sstevel@tonic-gate ++pp->p_lckcnt; 41937c478bd9Sstevel@tonic-gate r = 1; 41947c478bd9Sstevel@tonic-gate } 41957c478bd9Sstevel@tonic-gate mutex_exit(&freemem_lock); 41967c478bd9Sstevel@tonic-gate } 41977c478bd9Sstevel@tonic-gate } 41987c478bd9Sstevel@tonic-gate } 41997c478bd9Sstevel@tonic-gate page_struct_unlock(pp); 42007c478bd9Sstevel@tonic-gate return (r); 42017c478bd9Sstevel@tonic-gate } 42027c478bd9Sstevel@tonic-gate 42037c478bd9Sstevel@tonic-gate /* 42047c478bd9Sstevel@tonic-gate * Decommit a lock on a physical page frame. Account for cow locks if 42057c478bd9Sstevel@tonic-gate * appropriate. 42067c478bd9Sstevel@tonic-gate */ 42077c478bd9Sstevel@tonic-gate void 42087c478bd9Sstevel@tonic-gate page_pp_unlock( 42097c478bd9Sstevel@tonic-gate page_t *pp, /* page to be unlocked */ 42107c478bd9Sstevel@tonic-gate int cow, /* expect cow lock */ 42117c478bd9Sstevel@tonic-gate int kernel) /* this was a kernel lock */ 42127c478bd9Sstevel@tonic-gate { 42137c478bd9Sstevel@tonic-gate ASSERT(PAGE_LOCKED(pp)); 42147c478bd9Sstevel@tonic-gate 42157c478bd9Sstevel@tonic-gate page_struct_lock(pp); 42167c478bd9Sstevel@tonic-gate /* 42177c478bd9Sstevel@tonic-gate * Acquire the "freemem_lock" for availrmem. 42187c478bd9Sstevel@tonic-gate * If cowcnt or lcknt is already 0 do nothing; i.e., we 42197c478bd9Sstevel@tonic-gate * could be called to unlock even if nothing is locked. This could 42207c478bd9Sstevel@tonic-gate * happen if locked file pages were truncated (removing the lock) 42217c478bd9Sstevel@tonic-gate * and the file was grown again and new pages faulted in; the new 42227c478bd9Sstevel@tonic-gate * pages are unlocked but the segment still thinks they're locked. 42237c478bd9Sstevel@tonic-gate */ 42247c478bd9Sstevel@tonic-gate if (cow) { 42257c478bd9Sstevel@tonic-gate if (pp->p_cowcnt) { 42267c478bd9Sstevel@tonic-gate mutex_enter(&freemem_lock); 42277c478bd9Sstevel@tonic-gate pp->p_cowcnt--; 42287c478bd9Sstevel@tonic-gate availrmem++; 42297c478bd9Sstevel@tonic-gate pages_locked--; 42307c478bd9Sstevel@tonic-gate mutex_exit(&freemem_lock); 42317c478bd9Sstevel@tonic-gate } 42327c478bd9Sstevel@tonic-gate } else { 42337c478bd9Sstevel@tonic-gate if (pp->p_lckcnt && --pp->p_lckcnt == 0) { 42347c478bd9Sstevel@tonic-gate if (!kernel) { 42357c478bd9Sstevel@tonic-gate mutex_enter(&freemem_lock); 42367c478bd9Sstevel@tonic-gate availrmem++; 42377c478bd9Sstevel@tonic-gate pages_locked--; 42387c478bd9Sstevel@tonic-gate mutex_exit(&freemem_lock); 42397c478bd9Sstevel@tonic-gate } 42407c478bd9Sstevel@tonic-gate } 42417c478bd9Sstevel@tonic-gate } 42427c478bd9Sstevel@tonic-gate page_struct_unlock(pp); 42437c478bd9Sstevel@tonic-gate } 42447c478bd9Sstevel@tonic-gate 42457c478bd9Sstevel@tonic-gate /* 42467c478bd9Sstevel@tonic-gate * This routine reserves availrmem for npages; 42477c478bd9Sstevel@tonic-gate * flags: KM_NOSLEEP or KM_SLEEP 42487c478bd9Sstevel@tonic-gate * returns 1 on success or 0 on failure 42497c478bd9Sstevel@tonic-gate */ 42507c478bd9Sstevel@tonic-gate int 42517c478bd9Sstevel@tonic-gate page_resv(pgcnt_t npages, uint_t flags) 42527c478bd9Sstevel@tonic-gate { 42537c478bd9Sstevel@tonic-gate mutex_enter(&freemem_lock); 42547c478bd9Sstevel@tonic-gate while (availrmem < tune.t_minarmem + npages) { 42557c478bd9Sstevel@tonic-gate if (flags & KM_NOSLEEP) { 42567c478bd9Sstevel@tonic-gate mutex_exit(&freemem_lock); 42577c478bd9Sstevel@tonic-gate return (0); 42587c478bd9Sstevel@tonic-gate } 42597c478bd9Sstevel@tonic-gate mutex_exit(&freemem_lock); 42607c478bd9Sstevel@tonic-gate page_needfree(npages); 42617c478bd9Sstevel@tonic-gate kmem_reap(); 42627c478bd9Sstevel@tonic-gate delay(hz >> 2); 42637c478bd9Sstevel@tonic-gate page_needfree(-(spgcnt_t)npages); 42647c478bd9Sstevel@tonic-gate mutex_enter(&freemem_lock); 42657c478bd9Sstevel@tonic-gate } 42667c478bd9Sstevel@tonic-gate availrmem -= npages; 42677c478bd9Sstevel@tonic-gate mutex_exit(&freemem_lock); 42687c478bd9Sstevel@tonic-gate return (1); 42697c478bd9Sstevel@tonic-gate } 42707c478bd9Sstevel@tonic-gate 42717c478bd9Sstevel@tonic-gate /* 42727c478bd9Sstevel@tonic-gate * This routine unreserves availrmem for npages; 42737c478bd9Sstevel@tonic-gate */ 42747c478bd9Sstevel@tonic-gate void 42757c478bd9Sstevel@tonic-gate page_unresv(pgcnt_t npages) 42767c478bd9Sstevel@tonic-gate { 42777c478bd9Sstevel@tonic-gate mutex_enter(&freemem_lock); 42787c478bd9Sstevel@tonic-gate availrmem += npages; 42797c478bd9Sstevel@tonic-gate mutex_exit(&freemem_lock); 42807c478bd9Sstevel@tonic-gate } 42817c478bd9Sstevel@tonic-gate 42827c478bd9Sstevel@tonic-gate /* 42837c478bd9Sstevel@tonic-gate * See Statement at the beginning of segvn_lockop() regarding 42847c478bd9Sstevel@tonic-gate * the way we handle cowcnts and lckcnts. 42857c478bd9Sstevel@tonic-gate * 42867c478bd9Sstevel@tonic-gate * Transfer cowcnt on 'opp' to cowcnt on 'npp' if the vpage 42877c478bd9Sstevel@tonic-gate * that breaks COW has PROT_WRITE. 42887c478bd9Sstevel@tonic-gate * 42897c478bd9Sstevel@tonic-gate * Note that, we may also break COW in case we are softlocking 42907c478bd9Sstevel@tonic-gate * on read access during physio; 42917c478bd9Sstevel@tonic-gate * in this softlock case, the vpage may not have PROT_WRITE. 42927c478bd9Sstevel@tonic-gate * So, we need to transfer lckcnt on 'opp' to lckcnt on 'npp' 42937c478bd9Sstevel@tonic-gate * if the vpage doesn't have PROT_WRITE. 42947c478bd9Sstevel@tonic-gate * 42957c478bd9Sstevel@tonic-gate * This routine is never called if we are stealing a page 42967c478bd9Sstevel@tonic-gate * in anon_private. 42977c478bd9Sstevel@tonic-gate * 42987c478bd9Sstevel@tonic-gate * The caller subtracted from availrmem for read only mapping. 42997c478bd9Sstevel@tonic-gate * if lckcnt is 1 increment availrmem. 43007c478bd9Sstevel@tonic-gate */ 43017c478bd9Sstevel@tonic-gate void 43027c478bd9Sstevel@tonic-gate page_pp_useclaim( 43037c478bd9Sstevel@tonic-gate page_t *opp, /* original page frame losing lock */ 43047c478bd9Sstevel@tonic-gate page_t *npp, /* new page frame gaining lock */ 43057c478bd9Sstevel@tonic-gate uint_t write_perm) /* set if vpage has PROT_WRITE */ 43067c478bd9Sstevel@tonic-gate { 43077c478bd9Sstevel@tonic-gate int payback = 0; 43087c478bd9Sstevel@tonic-gate 43097c478bd9Sstevel@tonic-gate ASSERT(PAGE_LOCKED(opp)); 43107c478bd9Sstevel@tonic-gate ASSERT(PAGE_LOCKED(npp)); 43117c478bd9Sstevel@tonic-gate 43127c478bd9Sstevel@tonic-gate page_struct_lock(opp); 43137c478bd9Sstevel@tonic-gate 43147c478bd9Sstevel@tonic-gate ASSERT(npp->p_cowcnt == 0); 43157c478bd9Sstevel@tonic-gate ASSERT(npp->p_lckcnt == 0); 43167c478bd9Sstevel@tonic-gate 43177c478bd9Sstevel@tonic-gate /* Don't use claim if nothing is locked (see page_pp_unlock above) */ 43187c478bd9Sstevel@tonic-gate if ((write_perm && opp->p_cowcnt != 0) || 43197c478bd9Sstevel@tonic-gate (!write_perm && opp->p_lckcnt != 0)) { 43207c478bd9Sstevel@tonic-gate 43217c478bd9Sstevel@tonic-gate if (write_perm) { 43227c478bd9Sstevel@tonic-gate npp->p_cowcnt++; 43237c478bd9Sstevel@tonic-gate ASSERT(opp->p_cowcnt != 0); 43247c478bd9Sstevel@tonic-gate opp->p_cowcnt--; 43257c478bd9Sstevel@tonic-gate } else { 43267c478bd9Sstevel@tonic-gate 43277c478bd9Sstevel@tonic-gate ASSERT(opp->p_lckcnt != 0); 43287c478bd9Sstevel@tonic-gate 43297c478bd9Sstevel@tonic-gate /* 43307c478bd9Sstevel@tonic-gate * We didn't need availrmem decremented if p_lckcnt on 43317c478bd9Sstevel@tonic-gate * original page is 1. Here, we are unlocking 43327c478bd9Sstevel@tonic-gate * read-only copy belonging to original page and 43337c478bd9Sstevel@tonic-gate * are locking a copy belonging to new page. 43347c478bd9Sstevel@tonic-gate */ 43357c478bd9Sstevel@tonic-gate if (opp->p_lckcnt == 1) 43367c478bd9Sstevel@tonic-gate payback = 1; 43377c478bd9Sstevel@tonic-gate 43387c478bd9Sstevel@tonic-gate npp->p_lckcnt++; 43397c478bd9Sstevel@tonic-gate opp->p_lckcnt--; 43407c478bd9Sstevel@tonic-gate } 43417c478bd9Sstevel@tonic-gate } 43427c478bd9Sstevel@tonic-gate if (payback) { 43437c478bd9Sstevel@tonic-gate mutex_enter(&freemem_lock); 43447c478bd9Sstevel@tonic-gate availrmem++; 43457c478bd9Sstevel@tonic-gate pages_useclaim--; 43467c478bd9Sstevel@tonic-gate mutex_exit(&freemem_lock); 43477c478bd9Sstevel@tonic-gate } 43487c478bd9Sstevel@tonic-gate page_struct_unlock(opp); 43497c478bd9Sstevel@tonic-gate } 43507c478bd9Sstevel@tonic-gate 43517c478bd9Sstevel@tonic-gate /* 43527c478bd9Sstevel@tonic-gate * Simple claim adjust functions -- used to support changes in 43537c478bd9Sstevel@tonic-gate * claims due to changes in access permissions. Used by segvn_setprot(). 43547c478bd9Sstevel@tonic-gate */ 43557c478bd9Sstevel@tonic-gate int 43567c478bd9Sstevel@tonic-gate page_addclaim(page_t *pp) 43577c478bd9Sstevel@tonic-gate { 43587c478bd9Sstevel@tonic-gate int r = 0; /* result */ 43597c478bd9Sstevel@tonic-gate 43607c478bd9Sstevel@tonic-gate ASSERT(PAGE_LOCKED(pp)); 43617c478bd9Sstevel@tonic-gate 43627c478bd9Sstevel@tonic-gate page_struct_lock(pp); 43637c478bd9Sstevel@tonic-gate ASSERT(pp->p_lckcnt != 0); 43647c478bd9Sstevel@tonic-gate 43657c478bd9Sstevel@tonic-gate if (pp->p_lckcnt == 1) { 43667c478bd9Sstevel@tonic-gate if (pp->p_cowcnt < (ushort_t)PAGE_LOCK_MAXIMUM) { 43677c478bd9Sstevel@tonic-gate --pp->p_lckcnt; 43687c478bd9Sstevel@tonic-gate r = 1; 43697c478bd9Sstevel@tonic-gate if (++pp->p_cowcnt == (ushort_t)PAGE_LOCK_MAXIMUM) { 43707c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, 43717c478bd9Sstevel@tonic-gate "COW lock limit reached on pfn 0x%lx", 43727c478bd9Sstevel@tonic-gate page_pptonum(pp)); 43737c478bd9Sstevel@tonic-gate } 43747c478bd9Sstevel@tonic-gate } 43757c478bd9Sstevel@tonic-gate } else { 43767c478bd9Sstevel@tonic-gate mutex_enter(&freemem_lock); 43777c478bd9Sstevel@tonic-gate if ((availrmem > pages_pp_maximum) && 43787c478bd9Sstevel@tonic-gate (pp->p_cowcnt < (ushort_t)PAGE_LOCK_MAXIMUM)) { 43797c478bd9Sstevel@tonic-gate --availrmem; 43807c478bd9Sstevel@tonic-gate ++pages_claimed; 43817c478bd9Sstevel@tonic-gate mutex_exit(&freemem_lock); 43827c478bd9Sstevel@tonic-gate --pp->p_lckcnt; 43837c478bd9Sstevel@tonic-gate r = 1; 43847c478bd9Sstevel@tonic-gate if (++pp->p_cowcnt == (ushort_t)PAGE_LOCK_MAXIMUM) { 43857c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, 43867c478bd9Sstevel@tonic-gate "COW lock limit reached on pfn 0x%lx", 43877c478bd9Sstevel@tonic-gate page_pptonum(pp)); 43887c478bd9Sstevel@tonic-gate } 43897c478bd9Sstevel@tonic-gate } else 43907c478bd9Sstevel@tonic-gate mutex_exit(&freemem_lock); 43917c478bd9Sstevel@tonic-gate } 43927c478bd9Sstevel@tonic-gate page_struct_unlock(pp); 43937c478bd9Sstevel@tonic-gate return (r); 43947c478bd9Sstevel@tonic-gate } 43957c478bd9Sstevel@tonic-gate 43967c478bd9Sstevel@tonic-gate int 43977c478bd9Sstevel@tonic-gate page_subclaim(page_t *pp) 43987c478bd9Sstevel@tonic-gate { 43997c478bd9Sstevel@tonic-gate int r = 0; 44007c478bd9Sstevel@tonic-gate 44017c478bd9Sstevel@tonic-gate ASSERT(PAGE_LOCKED(pp)); 44027c478bd9Sstevel@tonic-gate 44037c478bd9Sstevel@tonic-gate page_struct_lock(pp); 44047c478bd9Sstevel@tonic-gate ASSERT(pp->p_cowcnt != 0); 44057c478bd9Sstevel@tonic-gate 44067c478bd9Sstevel@tonic-gate if (pp->p_lckcnt) { 44077c478bd9Sstevel@tonic-gate if (pp->p_lckcnt < (ushort_t)PAGE_LOCK_MAXIMUM) { 44087c478bd9Sstevel@tonic-gate r = 1; 44097c478bd9Sstevel@tonic-gate /* 44107c478bd9Sstevel@tonic-gate * for availrmem 44117c478bd9Sstevel@tonic-gate */ 44127c478bd9Sstevel@tonic-gate mutex_enter(&freemem_lock); 44137c478bd9Sstevel@tonic-gate availrmem++; 44147c478bd9Sstevel@tonic-gate pages_claimed--; 44157c478bd9Sstevel@tonic-gate mutex_exit(&freemem_lock); 44167c478bd9Sstevel@tonic-gate 44177c478bd9Sstevel@tonic-gate pp->p_cowcnt--; 44187c478bd9Sstevel@tonic-gate 44197c478bd9Sstevel@tonic-gate if (++pp->p_lckcnt == (ushort_t)PAGE_LOCK_MAXIMUM) { 44207c478bd9Sstevel@tonic-gate cmn_err(CE_WARN, 44217c478bd9Sstevel@tonic-gate "Page lock limit reached on pfn 0x%lx", 44227c478bd9Sstevel@tonic-gate page_pptonum(pp)); 44237c478bd9Sstevel@tonic-gate } 44247c478bd9Sstevel@tonic-gate } 44257c478bd9Sstevel@tonic-gate } else { 44267c478bd9Sstevel@tonic-gate r = 1; 44277c478bd9Sstevel@tonic-gate pp->p_cowcnt--; 44287c478bd9Sstevel@tonic-gate pp->p_lckcnt++; 44297c478bd9Sstevel@tonic-gate } 44307c478bd9Sstevel@tonic-gate page_struct_unlock(pp); 44317c478bd9Sstevel@tonic-gate return (r); 44327c478bd9Sstevel@tonic-gate } 44337c478bd9Sstevel@tonic-gate 44347c478bd9Sstevel@tonic-gate int 44357c478bd9Sstevel@tonic-gate page_addclaim_pages(page_t **ppa) 44367c478bd9Sstevel@tonic-gate { 44377c478bd9Sstevel@tonic-gate 44387c478bd9Sstevel@tonic-gate pgcnt_t lckpgs = 0, pg_idx; 44397c478bd9Sstevel@tonic-gate 44407c478bd9Sstevel@tonic-gate VM_STAT_ADD(pagecnt.pc_addclaim_pages); 44417c478bd9Sstevel@tonic-gate 44427c478bd9Sstevel@tonic-gate mutex_enter(&page_llock); 44437c478bd9Sstevel@tonic-gate for (pg_idx = 0; ppa[pg_idx] != NULL; pg_idx++) { 44447c478bd9Sstevel@tonic-gate 44457c478bd9Sstevel@tonic-gate ASSERT(PAGE_LOCKED(ppa[pg_idx])); 44467c478bd9Sstevel@tonic-gate ASSERT(ppa[pg_idx]->p_lckcnt != 0); 44477c478bd9Sstevel@tonic-gate if (ppa[pg_idx]->p_cowcnt == (ushort_t)PAGE_LOCK_MAXIMUM) { 44487c478bd9Sstevel@tonic-gate mutex_exit(&page_llock); 44497c478bd9Sstevel@tonic-gate return (0); 44507c478bd9Sstevel@tonic-gate } 44517c478bd9Sstevel@tonic-gate if (ppa[pg_idx]->p_lckcnt > 1) 44527c478bd9Sstevel@tonic-gate lckpgs++; 44537c478bd9Sstevel@tonic-gate } 44547c478bd9Sstevel@tonic-gate 44557c478bd9Sstevel@tonic-gate if (lckpgs != 0) { 44567c478bd9Sstevel@tonic-gate mutex_enter(&freemem_lock); 44577c478bd9Sstevel@tonic-gate if (availrmem >= pages_pp_maximum + lckpgs) { 44587c478bd9Sstevel@tonic-gate availrmem -= lckpgs; 44597c478bd9Sstevel@tonic-gate pages_claimed += lckpgs; 44607c478bd9Sstevel@tonic-gate } else { 44617c478bd9Sstevel@tonic-gate mutex_exit(&freemem_lock); 44627c478bd9Sstevel@tonic-gate mutex_exit(&page_llock); 44637c478bd9Sstevel@tonic-gate return (0); 44647c478bd9Sstevel@tonic-gate } 44657c478bd9Sstevel@tonic-gate mutex_exit(&freemem_lock); 44667c478bd9Sstevel@tonic-gate } 44677c478bd9Sstevel@tonic-gate 44687c478bd9Sstevel@tonic-gate for (pg_idx = 0; ppa[pg_idx] != NULL; pg_idx++) { 44697c478bd9Sstevel@tonic-gate ppa[pg_idx]->p_lckcnt--; 44707c478bd9Sstevel@tonic-gate ppa[pg_idx]->p_cowcnt++; 44717c478bd9Sstevel@tonic-gate } 44727c478bd9Sstevel@tonic-gate mutex_exit(&page_llock); 44737c478bd9Sstevel@tonic-gate return (1); 44747c478bd9Sstevel@tonic-gate } 44757c478bd9Sstevel@tonic-gate 44767c478bd9Sstevel@tonic-gate int 44777c478bd9Sstevel@tonic-gate page_subclaim_pages(page_t **ppa) 44787c478bd9Sstevel@tonic-gate { 44797c478bd9Sstevel@tonic-gate pgcnt_t ulckpgs = 0, pg_idx; 44807c478bd9Sstevel@tonic-gate 44817c478bd9Sstevel@tonic-gate VM_STAT_ADD(pagecnt.pc_subclaim_pages); 44827c478bd9Sstevel@tonic-gate 44837c478bd9Sstevel@tonic-gate mutex_enter(&page_llock); 44847c478bd9Sstevel@tonic-gate for (pg_idx = 0; ppa[pg_idx] != NULL; pg_idx++) { 44857c478bd9Sstevel@tonic-gate 44867c478bd9Sstevel@tonic-gate ASSERT(PAGE_LOCKED(ppa[pg_idx])); 44877c478bd9Sstevel@tonic-gate ASSERT(ppa[pg_idx]->p_cowcnt != 0); 44887c478bd9Sstevel@tonic-gate if (ppa[pg_idx]->p_lckcnt == (ushort_t)PAGE_LOCK_MAXIMUM) { 44897c478bd9Sstevel@tonic-gate mutex_exit(&page_llock); 44907c478bd9Sstevel@tonic-gate return (0); 44917c478bd9Sstevel@tonic-gate } 44927c478bd9Sstevel@tonic-gate if (ppa[pg_idx]->p_lckcnt != 0) 44937c478bd9Sstevel@tonic-gate ulckpgs++; 44947c478bd9Sstevel@tonic-gate } 44957c478bd9Sstevel@tonic-gate 44967c478bd9Sstevel@tonic-gate if (ulckpgs != 0) { 44977c478bd9Sstevel@tonic-gate mutex_enter(&freemem_lock); 44987c478bd9Sstevel@tonic-gate availrmem += ulckpgs; 44997c478bd9Sstevel@tonic-gate pages_claimed -= ulckpgs; 45007c478bd9Sstevel@tonic-gate mutex_exit(&freemem_lock); 45017c478bd9Sstevel@tonic-gate } 45027c478bd9Sstevel@tonic-gate 45037c478bd9Sstevel@tonic-gate for (pg_idx = 0; ppa[pg_idx] != NULL; pg_idx++) { 45047c478bd9Sstevel@tonic-gate ppa[pg_idx]->p_cowcnt--; 45057c478bd9Sstevel@tonic-gate ppa[pg_idx]->p_lckcnt++; 45067c478bd9Sstevel@tonic-gate 45077c478bd9Sstevel@tonic-gate } 45087c478bd9Sstevel@tonic-gate mutex_exit(&page_llock); 45097c478bd9Sstevel@tonic-gate return (1); 45107c478bd9Sstevel@tonic-gate } 45117c478bd9Sstevel@tonic-gate 45127c478bd9Sstevel@tonic-gate page_t * 45137c478bd9Sstevel@tonic-gate page_numtopp(pfn_t pfnum, se_t se) 45147c478bd9Sstevel@tonic-gate { 45157c478bd9Sstevel@tonic-gate page_t *pp; 45167c478bd9Sstevel@tonic-gate 45177c478bd9Sstevel@tonic-gate retry: 45187c478bd9Sstevel@tonic-gate pp = page_numtopp_nolock(pfnum); 45197c478bd9Sstevel@tonic-gate if (pp == NULL) { 45207c478bd9Sstevel@tonic-gate return ((page_t *)NULL); 45217c478bd9Sstevel@tonic-gate } 45227c478bd9Sstevel@tonic-gate 45237c478bd9Sstevel@tonic-gate /* 45247c478bd9Sstevel@tonic-gate * Acquire the appropriate lock on the page. 45257c478bd9Sstevel@tonic-gate */ 45267c478bd9Sstevel@tonic-gate while (!page_lock(pp, se, (kmutex_t *)NULL, P_RECLAIM)) { 45277c478bd9Sstevel@tonic-gate if (page_pptonum(pp) != pfnum) 45287c478bd9Sstevel@tonic-gate goto retry; 45297c478bd9Sstevel@tonic-gate continue; 45307c478bd9Sstevel@tonic-gate } 45317c478bd9Sstevel@tonic-gate 45327c478bd9Sstevel@tonic-gate if (page_pptonum(pp) != pfnum) { 45337c478bd9Sstevel@tonic-gate page_unlock(pp); 45347c478bd9Sstevel@tonic-gate goto retry; 45357c478bd9Sstevel@tonic-gate } 45367c478bd9Sstevel@tonic-gate 45377c478bd9Sstevel@tonic-gate return (pp); 45387c478bd9Sstevel@tonic-gate } 45397c478bd9Sstevel@tonic-gate 45407c478bd9Sstevel@tonic-gate page_t * 45417c478bd9Sstevel@tonic-gate page_numtopp_noreclaim(pfn_t pfnum, se_t se) 45427c478bd9Sstevel@tonic-gate { 45437c478bd9Sstevel@tonic-gate page_t *pp; 45447c478bd9Sstevel@tonic-gate 45457c478bd9Sstevel@tonic-gate retry: 45467c478bd9Sstevel@tonic-gate pp = page_numtopp_nolock(pfnum); 45477c478bd9Sstevel@tonic-gate if (pp == NULL) { 45487c478bd9Sstevel@tonic-gate return ((page_t *)NULL); 45497c478bd9Sstevel@tonic-gate } 45507c478bd9Sstevel@tonic-gate 45517c478bd9Sstevel@tonic-gate /* 45527c478bd9Sstevel@tonic-gate * Acquire the appropriate lock on the page. 45537c478bd9Sstevel@tonic-gate */ 45547c478bd9Sstevel@tonic-gate while (!page_lock(pp, se, (kmutex_t *)NULL, P_NO_RECLAIM)) { 45557c478bd9Sstevel@tonic-gate if (page_pptonum(pp) != pfnum) 45567c478bd9Sstevel@tonic-gate goto retry; 45577c478bd9Sstevel@tonic-gate continue; 45587c478bd9Sstevel@tonic-gate } 45597c478bd9Sstevel@tonic-gate 45607c478bd9Sstevel@tonic-gate if (page_pptonum(pp) != pfnum) { 45617c478bd9Sstevel@tonic-gate page_unlock(pp); 45627c478bd9Sstevel@tonic-gate goto retry; 45637c478bd9Sstevel@tonic-gate } 45647c478bd9Sstevel@tonic-gate 45657c478bd9Sstevel@tonic-gate return (pp); 45667c478bd9Sstevel@tonic-gate } 45677c478bd9Sstevel@tonic-gate 45687c478bd9Sstevel@tonic-gate /* 45697c478bd9Sstevel@tonic-gate * This routine is like page_numtopp, but will only return page structs 45707c478bd9Sstevel@tonic-gate * for pages which are ok for loading into hardware using the page struct. 45717c478bd9Sstevel@tonic-gate */ 45727c478bd9Sstevel@tonic-gate page_t * 45737c478bd9Sstevel@tonic-gate page_numtopp_nowait(pfn_t pfnum, se_t se) 45747c478bd9Sstevel@tonic-gate { 45757c478bd9Sstevel@tonic-gate page_t *pp; 45767c478bd9Sstevel@tonic-gate 45777c478bd9Sstevel@tonic-gate retry: 45787c478bd9Sstevel@tonic-gate pp = page_numtopp_nolock(pfnum); 45797c478bd9Sstevel@tonic-gate if (pp == NULL) { 45807c478bd9Sstevel@tonic-gate return ((page_t *)NULL); 45817c478bd9Sstevel@tonic-gate } 45827c478bd9Sstevel@tonic-gate 45837c478bd9Sstevel@tonic-gate /* 45847c478bd9Sstevel@tonic-gate * Try to acquire the appropriate lock on the page. 45857c478bd9Sstevel@tonic-gate */ 45867c478bd9Sstevel@tonic-gate if (PP_ISFREE(pp)) 45877c478bd9Sstevel@tonic-gate pp = NULL; 45887c478bd9Sstevel@tonic-gate else { 45897c478bd9Sstevel@tonic-gate if (!page_trylock(pp, se)) 45907c478bd9Sstevel@tonic-gate pp = NULL; 45917c478bd9Sstevel@tonic-gate else { 45927c478bd9Sstevel@tonic-gate if (page_pptonum(pp) != pfnum) { 45937c478bd9Sstevel@tonic-gate page_unlock(pp); 45947c478bd9Sstevel@tonic-gate goto retry; 45957c478bd9Sstevel@tonic-gate } 45967c478bd9Sstevel@tonic-gate if (PP_ISFREE(pp)) { 45977c478bd9Sstevel@tonic-gate page_unlock(pp); 45987c478bd9Sstevel@tonic-gate pp = NULL; 45997c478bd9Sstevel@tonic-gate } 46007c478bd9Sstevel@tonic-gate } 46017c478bd9Sstevel@tonic-gate } 46027c478bd9Sstevel@tonic-gate return (pp); 46037c478bd9Sstevel@tonic-gate } 46047c478bd9Sstevel@tonic-gate 46057c478bd9Sstevel@tonic-gate /* 46067c478bd9Sstevel@tonic-gate * Returns a count of dirty pages that are in the process 46077c478bd9Sstevel@tonic-gate * of being written out. If 'cleanit' is set, try to push the page. 46087c478bd9Sstevel@tonic-gate */ 46097c478bd9Sstevel@tonic-gate pgcnt_t 46107c478bd9Sstevel@tonic-gate page_busy(int cleanit) 46117c478bd9Sstevel@tonic-gate { 46127c478bd9Sstevel@tonic-gate page_t *page0 = page_first(); 46137c478bd9Sstevel@tonic-gate page_t *pp = page0; 46147c478bd9Sstevel@tonic-gate pgcnt_t nppbusy = 0; 46157c478bd9Sstevel@tonic-gate u_offset_t off; 46167c478bd9Sstevel@tonic-gate 46177c478bd9Sstevel@tonic-gate do { 46187c478bd9Sstevel@tonic-gate vnode_t *vp = pp->p_vnode; 46197c478bd9Sstevel@tonic-gate 46207c478bd9Sstevel@tonic-gate /* 46217c478bd9Sstevel@tonic-gate * A page is a candidate for syncing if it is: 46227c478bd9Sstevel@tonic-gate * 46237c478bd9Sstevel@tonic-gate * (a) On neither the freelist nor the cachelist 46247c478bd9Sstevel@tonic-gate * (b) Hashed onto a vnode 46257c478bd9Sstevel@tonic-gate * (c) Not a kernel page 46267c478bd9Sstevel@tonic-gate * (d) Dirty 46277c478bd9Sstevel@tonic-gate * (e) Not part of a swapfile 46287c478bd9Sstevel@tonic-gate * (f) a page which belongs to a real vnode; eg has a non-null 46297c478bd9Sstevel@tonic-gate * v_vfsp pointer. 46307c478bd9Sstevel@tonic-gate * (g) Backed by a filesystem which doesn't have a 46317c478bd9Sstevel@tonic-gate * stubbed-out sync operation 46327c478bd9Sstevel@tonic-gate */ 46337c478bd9Sstevel@tonic-gate if (!PP_ISFREE(pp) && vp != NULL && vp != &kvp && 46347c478bd9Sstevel@tonic-gate hat_ismod(pp) && !IS_SWAPVP(vp) && vp->v_vfsp != NULL && 46357c478bd9Sstevel@tonic-gate vfs_can_sync(vp->v_vfsp)) { 46367c478bd9Sstevel@tonic-gate nppbusy++; 46377c478bd9Sstevel@tonic-gate vfs_syncprogress(); 46387c478bd9Sstevel@tonic-gate 46397c478bd9Sstevel@tonic-gate if (!cleanit) 46407c478bd9Sstevel@tonic-gate continue; 46417c478bd9Sstevel@tonic-gate if (!page_trylock(pp, SE_EXCL)) 46427c478bd9Sstevel@tonic-gate continue; 46437c478bd9Sstevel@tonic-gate 46447c478bd9Sstevel@tonic-gate if (PP_ISFREE(pp) || vp == NULL || IS_SWAPVP(vp) || 46457c478bd9Sstevel@tonic-gate pp->p_lckcnt != 0 || pp->p_cowcnt != 0 || 46467c478bd9Sstevel@tonic-gate !(hat_pagesync(pp, 46477c478bd9Sstevel@tonic-gate HAT_SYNC_DONTZERO | HAT_SYNC_STOPON_MOD) & P_MOD)) { 46487c478bd9Sstevel@tonic-gate page_unlock(pp); 46497c478bd9Sstevel@tonic-gate continue; 46507c478bd9Sstevel@tonic-gate } 46517c478bd9Sstevel@tonic-gate off = pp->p_offset; 46527c478bd9Sstevel@tonic-gate VN_HOLD(vp); 46537c478bd9Sstevel@tonic-gate page_unlock(pp); 46547c478bd9Sstevel@tonic-gate (void) VOP_PUTPAGE(vp, off, PAGESIZE, 46557c478bd9Sstevel@tonic-gate B_ASYNC | B_FREE, kcred); 46567c478bd9Sstevel@tonic-gate VN_RELE(vp); 46577c478bd9Sstevel@tonic-gate } 46587c478bd9Sstevel@tonic-gate } while ((pp = page_next(pp)) != page0); 46597c478bd9Sstevel@tonic-gate 46607c478bd9Sstevel@tonic-gate return (nppbusy); 46617c478bd9Sstevel@tonic-gate } 46627c478bd9Sstevel@tonic-gate 46637c478bd9Sstevel@tonic-gate void page_invalidate_pages(void); 46647c478bd9Sstevel@tonic-gate 46657c478bd9Sstevel@tonic-gate /* 46667c478bd9Sstevel@tonic-gate * callback handler to vm sub-system 46677c478bd9Sstevel@tonic-gate * 46687c478bd9Sstevel@tonic-gate * callers make sure no recursive entries to this func. 46697c478bd9Sstevel@tonic-gate */ 46707c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 46717c478bd9Sstevel@tonic-gate boolean_t 46727c478bd9Sstevel@tonic-gate callb_vm_cpr(void *arg, int code) 46737c478bd9Sstevel@tonic-gate { 46747c478bd9Sstevel@tonic-gate if (code == CB_CODE_CPR_CHKPT) 46757c478bd9Sstevel@tonic-gate page_invalidate_pages(); 46767c478bd9Sstevel@tonic-gate return (B_TRUE); 46777c478bd9Sstevel@tonic-gate } 46787c478bd9Sstevel@tonic-gate 46797c478bd9Sstevel@tonic-gate /* 46807c478bd9Sstevel@tonic-gate * Invalidate all pages of the system. 46817c478bd9Sstevel@tonic-gate * It shouldn't be called until all user page activities are all stopped. 46827c478bd9Sstevel@tonic-gate */ 46837c478bd9Sstevel@tonic-gate void 46847c478bd9Sstevel@tonic-gate page_invalidate_pages() 46857c478bd9Sstevel@tonic-gate { 46867c478bd9Sstevel@tonic-gate page_t *pp; 46877c478bd9Sstevel@tonic-gate page_t *page0; 46887c478bd9Sstevel@tonic-gate pgcnt_t nbusypages; 46897c478bd9Sstevel@tonic-gate int retry = 0; 46907c478bd9Sstevel@tonic-gate const int MAXRETRIES = 4; 46917c478bd9Sstevel@tonic-gate #if defined(__sparc) 46927c478bd9Sstevel@tonic-gate extern struct vnode prom_ppages; 46937c478bd9Sstevel@tonic-gate #endif /* __sparc */ 46947c478bd9Sstevel@tonic-gate 46957c478bd9Sstevel@tonic-gate top: 46967c478bd9Sstevel@tonic-gate /* 46977c478bd9Sstevel@tonic-gate * Flush dirty pages and destory the clean ones. 46987c478bd9Sstevel@tonic-gate */ 46997c478bd9Sstevel@tonic-gate nbusypages = 0; 47007c478bd9Sstevel@tonic-gate 47017c478bd9Sstevel@tonic-gate pp = page0 = page_first(); 47027c478bd9Sstevel@tonic-gate do { 47037c478bd9Sstevel@tonic-gate struct vnode *vp; 47047c478bd9Sstevel@tonic-gate u_offset_t offset; 47057c478bd9Sstevel@tonic-gate int mod; 47067c478bd9Sstevel@tonic-gate 47077c478bd9Sstevel@tonic-gate /* 47087c478bd9Sstevel@tonic-gate * skip the page if it has no vnode or the page associated 47097c478bd9Sstevel@tonic-gate * with the kernel vnode or prom allocated kernel mem. 47107c478bd9Sstevel@tonic-gate */ 47117c478bd9Sstevel@tonic-gate #if defined(__sparc) 47127c478bd9Sstevel@tonic-gate if ((vp = pp->p_vnode) == NULL || vp == &kvp || 47137c478bd9Sstevel@tonic-gate vp == &prom_ppages) 47147c478bd9Sstevel@tonic-gate #else /* x86 doesn't have prom or prom_ppage */ 47157c478bd9Sstevel@tonic-gate if ((vp = pp->p_vnode) == NULL || vp == &kvp) 47167c478bd9Sstevel@tonic-gate #endif /* __sparc */ 47177c478bd9Sstevel@tonic-gate continue; 47187c478bd9Sstevel@tonic-gate 47197c478bd9Sstevel@tonic-gate /* 47207c478bd9Sstevel@tonic-gate * skip the page which is already free invalidated. 47217c478bd9Sstevel@tonic-gate */ 47227c478bd9Sstevel@tonic-gate if (PP_ISFREE(pp) && PP_ISAGED(pp)) 47237c478bd9Sstevel@tonic-gate continue; 47247c478bd9Sstevel@tonic-gate 47257c478bd9Sstevel@tonic-gate /* 47267c478bd9Sstevel@tonic-gate * skip pages that are already locked or can't be "exclusively" 47277c478bd9Sstevel@tonic-gate * locked or are already free. After we lock the page, check 47287c478bd9Sstevel@tonic-gate * the free and age bits again to be sure it's not destroied 47297c478bd9Sstevel@tonic-gate * yet. 47307c478bd9Sstevel@tonic-gate * To achieve max. parallelization, we use page_trylock instead 47317c478bd9Sstevel@tonic-gate * of page_lock so that we don't get block on individual pages 47327c478bd9Sstevel@tonic-gate * while we have thousands of other pages to process. 47337c478bd9Sstevel@tonic-gate */ 47347c478bd9Sstevel@tonic-gate if (!page_trylock(pp, SE_EXCL)) { 47357c478bd9Sstevel@tonic-gate nbusypages++; 47367c478bd9Sstevel@tonic-gate continue; 47377c478bd9Sstevel@tonic-gate } else if (PP_ISFREE(pp)) { 47387c478bd9Sstevel@tonic-gate if (!PP_ISAGED(pp)) { 47397c478bd9Sstevel@tonic-gate page_destroy_free(pp); 47407c478bd9Sstevel@tonic-gate } else { 47417c478bd9Sstevel@tonic-gate page_unlock(pp); 47427c478bd9Sstevel@tonic-gate } 47437c478bd9Sstevel@tonic-gate continue; 47447c478bd9Sstevel@tonic-gate } 47457c478bd9Sstevel@tonic-gate /* 47467c478bd9Sstevel@tonic-gate * Is this page involved in some I/O? shared? 47477c478bd9Sstevel@tonic-gate * 47487c478bd9Sstevel@tonic-gate * The page_struct_lock need not be acquired to 47497c478bd9Sstevel@tonic-gate * examine these fields since the page has an 47507c478bd9Sstevel@tonic-gate * "exclusive" lock. 47517c478bd9Sstevel@tonic-gate */ 47527c478bd9Sstevel@tonic-gate if (pp->p_lckcnt != 0 || pp->p_cowcnt != 0) { 47537c478bd9Sstevel@tonic-gate page_unlock(pp); 47547c478bd9Sstevel@tonic-gate continue; 47557c478bd9Sstevel@tonic-gate } 47567c478bd9Sstevel@tonic-gate 47577c478bd9Sstevel@tonic-gate if (vp->v_type == VCHR) { 47587c478bd9Sstevel@tonic-gate panic("vp->v_type == VCHR"); 47597c478bd9Sstevel@tonic-gate /*NOTREACHED*/ 47607c478bd9Sstevel@tonic-gate } 47617c478bd9Sstevel@tonic-gate 47627c478bd9Sstevel@tonic-gate if (!page_try_demote_pages(pp)) { 47637c478bd9Sstevel@tonic-gate page_unlock(pp); 47647c478bd9Sstevel@tonic-gate continue; 47657c478bd9Sstevel@tonic-gate } 47667c478bd9Sstevel@tonic-gate 47677c478bd9Sstevel@tonic-gate /* 47687c478bd9Sstevel@tonic-gate * Check the modified bit. Leave the bits alone in hardware 47697c478bd9Sstevel@tonic-gate * (they will be modified if we do the putpage). 47707c478bd9Sstevel@tonic-gate */ 47717c478bd9Sstevel@tonic-gate mod = (hat_pagesync(pp, HAT_SYNC_DONTZERO | HAT_SYNC_STOPON_MOD) 47727c478bd9Sstevel@tonic-gate & P_MOD); 47737c478bd9Sstevel@tonic-gate if (mod) { 47747c478bd9Sstevel@tonic-gate offset = pp->p_offset; 47757c478bd9Sstevel@tonic-gate /* 47767c478bd9Sstevel@tonic-gate * Hold the vnode before releasing the page lock 47777c478bd9Sstevel@tonic-gate * to prevent it from being freed and re-used by 47787c478bd9Sstevel@tonic-gate * some other thread. 47797c478bd9Sstevel@tonic-gate */ 47807c478bd9Sstevel@tonic-gate VN_HOLD(vp); 47817c478bd9Sstevel@tonic-gate page_unlock(pp); 47827c478bd9Sstevel@tonic-gate /* 47837c478bd9Sstevel@tonic-gate * No error return is checked here. Callers such as 47847c478bd9Sstevel@tonic-gate * cpr deals with the dirty pages at the dump time 47857c478bd9Sstevel@tonic-gate * if this putpage fails. 47867c478bd9Sstevel@tonic-gate */ 47877c478bd9Sstevel@tonic-gate (void) VOP_PUTPAGE(vp, offset, PAGESIZE, B_INVAL, 47887c478bd9Sstevel@tonic-gate kcred); 47897c478bd9Sstevel@tonic-gate VN_RELE(vp); 47907c478bd9Sstevel@tonic-gate } else { 47917c478bd9Sstevel@tonic-gate page_destroy(pp, 0); 47927c478bd9Sstevel@tonic-gate } 47937c478bd9Sstevel@tonic-gate } while ((pp = page_next(pp)) != page0); 47947c478bd9Sstevel@tonic-gate if (nbusypages && retry++ < MAXRETRIES) { 47957c478bd9Sstevel@tonic-gate delay(1); 47967c478bd9Sstevel@tonic-gate goto top; 47977c478bd9Sstevel@tonic-gate } 47987c478bd9Sstevel@tonic-gate } 47997c478bd9Sstevel@tonic-gate 48007c478bd9Sstevel@tonic-gate /* 48017c478bd9Sstevel@tonic-gate * Replace the page "old" with the page "new" on the page hash and vnode lists 48027c478bd9Sstevel@tonic-gate * 48037c478bd9Sstevel@tonic-gate * the replacemnt must be done in place, ie the equivalent sequence: 48047c478bd9Sstevel@tonic-gate * 48057c478bd9Sstevel@tonic-gate * vp = old->p_vnode; 48067c478bd9Sstevel@tonic-gate * off = old->p_offset; 48077c478bd9Sstevel@tonic-gate * page_do_hashout(old) 48087c478bd9Sstevel@tonic-gate * page_do_hashin(new, vp, off) 48097c478bd9Sstevel@tonic-gate * 48107c478bd9Sstevel@tonic-gate * doesn't work, since 48117c478bd9Sstevel@tonic-gate * 1) if old is the only page on the vnode, the v_pages list has a window 48127c478bd9Sstevel@tonic-gate * where it looks empty. This will break file system assumptions. 48137c478bd9Sstevel@tonic-gate * and 48147c478bd9Sstevel@tonic-gate * 2) pvn_vplist_dirty() can't deal with pages moving on the v_pages list. 48157c478bd9Sstevel@tonic-gate */ 48167c478bd9Sstevel@tonic-gate static void 48177c478bd9Sstevel@tonic-gate page_do_relocate_hash(page_t *new, page_t *old) 48187c478bd9Sstevel@tonic-gate { 48197c478bd9Sstevel@tonic-gate page_t **hash_list; 48207c478bd9Sstevel@tonic-gate vnode_t *vp = old->p_vnode; 48217c478bd9Sstevel@tonic-gate kmutex_t *sep; 48227c478bd9Sstevel@tonic-gate 48237c478bd9Sstevel@tonic-gate ASSERT(PAGE_EXCL(old)); 48247c478bd9Sstevel@tonic-gate ASSERT(PAGE_EXCL(new)); 48257c478bd9Sstevel@tonic-gate ASSERT(vp != NULL); 48267c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(page_vnode_mutex(vp))); 48277c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(PAGE_HASH_MUTEX(PAGE_HASH_FUNC(vp, old->p_offset)))); 48287c478bd9Sstevel@tonic-gate 48297c478bd9Sstevel@tonic-gate /* 48307c478bd9Sstevel@tonic-gate * First find old page on the page hash list 48317c478bd9Sstevel@tonic-gate */ 48327c478bd9Sstevel@tonic-gate hash_list = &page_hash[PAGE_HASH_FUNC(vp, old->p_offset)]; 48337c478bd9Sstevel@tonic-gate 48347c478bd9Sstevel@tonic-gate for (;;) { 48357c478bd9Sstevel@tonic-gate if (*hash_list == old) 48367c478bd9Sstevel@tonic-gate break; 48377c478bd9Sstevel@tonic-gate if (*hash_list == NULL) { 48387c478bd9Sstevel@tonic-gate panic("page_do_hashout"); 48397c478bd9Sstevel@tonic-gate /*NOTREACHED*/ 48407c478bd9Sstevel@tonic-gate } 48417c478bd9Sstevel@tonic-gate hash_list = &(*hash_list)->p_hash; 48427c478bd9Sstevel@tonic-gate } 48437c478bd9Sstevel@tonic-gate 48447c478bd9Sstevel@tonic-gate /* 48457c478bd9Sstevel@tonic-gate * update new and replace old with new on the page hash list 48467c478bd9Sstevel@tonic-gate */ 48477c478bd9Sstevel@tonic-gate new->p_vnode = old->p_vnode; 48487c478bd9Sstevel@tonic-gate new->p_offset = old->p_offset; 48497c478bd9Sstevel@tonic-gate new->p_hash = old->p_hash; 48507c478bd9Sstevel@tonic-gate *hash_list = new; 48517c478bd9Sstevel@tonic-gate 48527c478bd9Sstevel@tonic-gate if ((new->p_vnode->v_flag & VISSWAP) != 0) 48537c478bd9Sstevel@tonic-gate PP_SETSWAP(new); 48547c478bd9Sstevel@tonic-gate 48557c478bd9Sstevel@tonic-gate /* 48567c478bd9Sstevel@tonic-gate * replace old with new on the vnode's page list 48577c478bd9Sstevel@tonic-gate */ 48587c478bd9Sstevel@tonic-gate if (old->p_vpnext == old) { 48597c478bd9Sstevel@tonic-gate new->p_vpnext = new; 48607c478bd9Sstevel@tonic-gate new->p_vpprev = new; 48617c478bd9Sstevel@tonic-gate } else { 48627c478bd9Sstevel@tonic-gate new->p_vpnext = old->p_vpnext; 48637c478bd9Sstevel@tonic-gate new->p_vpprev = old->p_vpprev; 48647c478bd9Sstevel@tonic-gate new->p_vpnext->p_vpprev = new; 48657c478bd9Sstevel@tonic-gate new->p_vpprev->p_vpnext = new; 48667c478bd9Sstevel@tonic-gate } 48677c478bd9Sstevel@tonic-gate if (vp->v_pages == old) 48687c478bd9Sstevel@tonic-gate vp->v_pages = new; 48697c478bd9Sstevel@tonic-gate 48707c478bd9Sstevel@tonic-gate /* 48717c478bd9Sstevel@tonic-gate * clear out the old page 48727c478bd9Sstevel@tonic-gate */ 48737c478bd9Sstevel@tonic-gate old->p_hash = NULL; 48747c478bd9Sstevel@tonic-gate old->p_vpnext = NULL; 48757c478bd9Sstevel@tonic-gate old->p_vpprev = NULL; 48767c478bd9Sstevel@tonic-gate old->p_vnode = NULL; 48777c478bd9Sstevel@tonic-gate PP_CLRSWAP(old); 48787c478bd9Sstevel@tonic-gate old->p_offset = (u_offset_t)-1; 48797c478bd9Sstevel@tonic-gate page_clr_all_props(old); 48807c478bd9Sstevel@tonic-gate 48817c478bd9Sstevel@tonic-gate /* 48827c478bd9Sstevel@tonic-gate * Wake up processes waiting for this page. The page's 48837c478bd9Sstevel@tonic-gate * identity has been changed, and is probably not the 48847c478bd9Sstevel@tonic-gate * desired page any longer. 48857c478bd9Sstevel@tonic-gate */ 48867c478bd9Sstevel@tonic-gate sep = page_se_mutex(old); 48877c478bd9Sstevel@tonic-gate mutex_enter(sep); 48887c478bd9Sstevel@tonic-gate if (CV_HAS_WAITERS(&old->p_cv)) 48897c478bd9Sstevel@tonic-gate cv_broadcast(&old->p_cv); 48907c478bd9Sstevel@tonic-gate mutex_exit(sep); 48917c478bd9Sstevel@tonic-gate } 48927c478bd9Sstevel@tonic-gate 48937c478bd9Sstevel@tonic-gate /* 48947c478bd9Sstevel@tonic-gate * This function moves the identity of page "pp_old" to page "pp_new". 48957c478bd9Sstevel@tonic-gate * Both pages must be locked on entry. "pp_new" is free, has no identity, 48967c478bd9Sstevel@tonic-gate * and need not be hashed out from anywhere. 48977c478bd9Sstevel@tonic-gate */ 48987c478bd9Sstevel@tonic-gate void 48997c478bd9Sstevel@tonic-gate page_relocate_hash(page_t *pp_new, page_t *pp_old) 49007c478bd9Sstevel@tonic-gate { 49017c478bd9Sstevel@tonic-gate vnode_t *vp = pp_old->p_vnode; 49027c478bd9Sstevel@tonic-gate u_offset_t off = pp_old->p_offset; 49037c478bd9Sstevel@tonic-gate kmutex_t *phm, *vphm; 49047c478bd9Sstevel@tonic-gate 49057c478bd9Sstevel@tonic-gate /* 49067c478bd9Sstevel@tonic-gate * Rehash two pages 49077c478bd9Sstevel@tonic-gate */ 49087c478bd9Sstevel@tonic-gate ASSERT(PAGE_EXCL(pp_old)); 49097c478bd9Sstevel@tonic-gate ASSERT(PAGE_EXCL(pp_new)); 49107c478bd9Sstevel@tonic-gate ASSERT(vp != NULL); 49117c478bd9Sstevel@tonic-gate ASSERT(pp_new->p_vnode == NULL); 49127c478bd9Sstevel@tonic-gate 49137c478bd9Sstevel@tonic-gate /* 49147c478bd9Sstevel@tonic-gate * hashout then hashin while holding the mutexes 49157c478bd9Sstevel@tonic-gate */ 49167c478bd9Sstevel@tonic-gate phm = PAGE_HASH_MUTEX(PAGE_HASH_FUNC(vp, off)); 49177c478bd9Sstevel@tonic-gate mutex_enter(phm); 49187c478bd9Sstevel@tonic-gate vphm = page_vnode_mutex(vp); 49197c478bd9Sstevel@tonic-gate mutex_enter(vphm); 49207c478bd9Sstevel@tonic-gate 49217c478bd9Sstevel@tonic-gate page_do_relocate_hash(pp_new, pp_old); 49227c478bd9Sstevel@tonic-gate 49237c478bd9Sstevel@tonic-gate mutex_exit(vphm); 49247c478bd9Sstevel@tonic-gate mutex_exit(phm); 49257c478bd9Sstevel@tonic-gate 49267c478bd9Sstevel@tonic-gate /* 49277c478bd9Sstevel@tonic-gate * The page_struct_lock need not be acquired for lckcnt and 49287c478bd9Sstevel@tonic-gate * cowcnt since the page has an "exclusive" lock. 49297c478bd9Sstevel@tonic-gate */ 49307c478bd9Sstevel@tonic-gate ASSERT(pp_new->p_lckcnt == 0); 49317c478bd9Sstevel@tonic-gate ASSERT(pp_new->p_cowcnt == 0); 49327c478bd9Sstevel@tonic-gate pp_new->p_lckcnt = pp_old->p_lckcnt; 49337c478bd9Sstevel@tonic-gate pp_new->p_cowcnt = pp_old->p_cowcnt; 49347c478bd9Sstevel@tonic-gate pp_old->p_lckcnt = pp_old->p_cowcnt = 0; 49357c478bd9Sstevel@tonic-gate 49367c478bd9Sstevel@tonic-gate /* The following comment preserved from page_flip(). */ 49377c478bd9Sstevel@tonic-gate /* XXX - Do we need to protect fsdata? */ 49387c478bd9Sstevel@tonic-gate pp_new->p_fsdata = pp_old->p_fsdata; 49397c478bd9Sstevel@tonic-gate } 49407c478bd9Sstevel@tonic-gate 49417c478bd9Sstevel@tonic-gate /* 49427c478bd9Sstevel@tonic-gate * Helper routine used to lock all remaining members of a 49437c478bd9Sstevel@tonic-gate * large page. The caller is responsible for passing in a locked 49447c478bd9Sstevel@tonic-gate * pp. If pp is a large page, then it succeeds in locking all the 49457c478bd9Sstevel@tonic-gate * remaining constituent pages or it returns with only the 49467c478bd9Sstevel@tonic-gate * original page locked. 49477c478bd9Sstevel@tonic-gate * 49487c478bd9Sstevel@tonic-gate * Returns 1 on success, 0 on failure. 49497c478bd9Sstevel@tonic-gate * 49507c478bd9Sstevel@tonic-gate * If success is returned this routine gurantees p_szc for all constituent 49517c478bd9Sstevel@tonic-gate * pages of a large page pp belongs to can't change. To achieve this we 49527c478bd9Sstevel@tonic-gate * recheck szc of pp after locking all constituent pages and retry if szc 49537c478bd9Sstevel@tonic-gate * changed (it could only decrease). Since hat_page_demote() needs an EXCL 49547c478bd9Sstevel@tonic-gate * lock on one of constituent pages it can't be running after all constituent 49557c478bd9Sstevel@tonic-gate * pages are locked. hat_page_demote() with a lock on a constituent page 49567c478bd9Sstevel@tonic-gate * outside of this large page (i.e. pp belonged to a larger large page) is 49577c478bd9Sstevel@tonic-gate * already done with all constituent pages of pp since the root's p_szc is 49587c478bd9Sstevel@tonic-gate * changed last. Thefore no need to synchronize with hat_page_demote() that 49597c478bd9Sstevel@tonic-gate * locked a constituent page outside of pp's current large page. 49607c478bd9Sstevel@tonic-gate */ 49617c478bd9Sstevel@tonic-gate #ifdef DEBUG 49627c478bd9Sstevel@tonic-gate uint32_t gpg_trylock_mtbf = 0; 49637c478bd9Sstevel@tonic-gate #endif 49647c478bd9Sstevel@tonic-gate 49657c478bd9Sstevel@tonic-gate int 49667c478bd9Sstevel@tonic-gate group_page_trylock(page_t *pp, se_t se) 49677c478bd9Sstevel@tonic-gate { 49687c478bd9Sstevel@tonic-gate page_t *tpp; 49697c478bd9Sstevel@tonic-gate pgcnt_t npgs, i, j; 49707c478bd9Sstevel@tonic-gate uint_t pszc = pp->p_szc; 49717c478bd9Sstevel@tonic-gate 49727c478bd9Sstevel@tonic-gate #ifdef DEBUG 49737c478bd9Sstevel@tonic-gate if (gpg_trylock_mtbf && !(gethrtime() % gpg_trylock_mtbf)) { 49747c478bd9Sstevel@tonic-gate return (0); 49757c478bd9Sstevel@tonic-gate } 49767c478bd9Sstevel@tonic-gate #endif 49777c478bd9Sstevel@tonic-gate 49787c478bd9Sstevel@tonic-gate if (pp != PP_GROUPLEADER(pp, pszc)) { 49797c478bd9Sstevel@tonic-gate return (0); 49807c478bd9Sstevel@tonic-gate } 49817c478bd9Sstevel@tonic-gate 49827c478bd9Sstevel@tonic-gate retry: 49837c478bd9Sstevel@tonic-gate ASSERT(PAGE_LOCKED_SE(pp, se)); 49847c478bd9Sstevel@tonic-gate ASSERT(!PP_ISFREE(pp)); 49857c478bd9Sstevel@tonic-gate if (pszc == 0) { 49867c478bd9Sstevel@tonic-gate return (1); 49877c478bd9Sstevel@tonic-gate } 49887c478bd9Sstevel@tonic-gate npgs = page_get_pagecnt(pszc); 49897c478bd9Sstevel@tonic-gate tpp = pp + 1; 49907c478bd9Sstevel@tonic-gate for (i = 1; i < npgs; i++, tpp++) { 49917c478bd9Sstevel@tonic-gate if (!page_trylock(tpp, se)) { 49927c478bd9Sstevel@tonic-gate tpp = pp + 1; 49937c478bd9Sstevel@tonic-gate for (j = 1; j < i; j++, tpp++) { 49947c478bd9Sstevel@tonic-gate page_unlock(tpp); 49957c478bd9Sstevel@tonic-gate } 49967c478bd9Sstevel@tonic-gate return (0); 49977c478bd9Sstevel@tonic-gate } 49987c478bd9Sstevel@tonic-gate } 49997c478bd9Sstevel@tonic-gate if (pp->p_szc != pszc) { 50007c478bd9Sstevel@tonic-gate ASSERT(pp->p_szc < pszc); 50017c478bd9Sstevel@tonic-gate ASSERT(pp->p_vnode != NULL && pp->p_vnode != &kvp && 50027c478bd9Sstevel@tonic-gate !IS_SWAPFSVP(pp->p_vnode)); 50037c478bd9Sstevel@tonic-gate tpp = pp + 1; 50047c478bd9Sstevel@tonic-gate for (i = 1; i < npgs; i++, tpp++) { 50057c478bd9Sstevel@tonic-gate page_unlock(tpp); 50067c478bd9Sstevel@tonic-gate } 50077c478bd9Sstevel@tonic-gate pszc = pp->p_szc; 50087c478bd9Sstevel@tonic-gate goto retry; 50097c478bd9Sstevel@tonic-gate } 50107c478bd9Sstevel@tonic-gate return (1); 50117c478bd9Sstevel@tonic-gate } 50127c478bd9Sstevel@tonic-gate 50137c478bd9Sstevel@tonic-gate void 50147c478bd9Sstevel@tonic-gate group_page_unlock(page_t *pp) 50157c478bd9Sstevel@tonic-gate { 50167c478bd9Sstevel@tonic-gate page_t *tpp; 50177c478bd9Sstevel@tonic-gate pgcnt_t npgs, i; 50187c478bd9Sstevel@tonic-gate 50197c478bd9Sstevel@tonic-gate ASSERT(PAGE_LOCKED(pp)); 50207c478bd9Sstevel@tonic-gate ASSERT(!PP_ISFREE(pp)); 50217c478bd9Sstevel@tonic-gate ASSERT(pp == PP_PAGEROOT(pp)); 50227c478bd9Sstevel@tonic-gate npgs = page_get_pagecnt(pp->p_szc); 50237c478bd9Sstevel@tonic-gate for (i = 1, tpp = pp + 1; i < npgs; i++, tpp++) { 50247c478bd9Sstevel@tonic-gate page_unlock(tpp); 50257c478bd9Sstevel@tonic-gate } 50267c478bd9Sstevel@tonic-gate } 50277c478bd9Sstevel@tonic-gate 50287c478bd9Sstevel@tonic-gate /* 50297c478bd9Sstevel@tonic-gate * returns 50307c478bd9Sstevel@tonic-gate * 0 : on success and *nrelocp is number of relocated PAGESIZE pages 50317c478bd9Sstevel@tonic-gate * ERANGE : this is not a base page 50327c478bd9Sstevel@tonic-gate * EBUSY : failure to get locks on the page/pages 50337c478bd9Sstevel@tonic-gate * ENOMEM : failure to obtain replacement pages 50347c478bd9Sstevel@tonic-gate * EAGAIN : OBP has not yet completed its boot-time handoff to the kernel 50357c478bd9Sstevel@tonic-gate * 50367c478bd9Sstevel@tonic-gate * Return with all constituent members of target and replacement 50377c478bd9Sstevel@tonic-gate * SE_EXCL locked. It is the callers responsibility to drop the 50387c478bd9Sstevel@tonic-gate * locks. 50397c478bd9Sstevel@tonic-gate */ 50407c478bd9Sstevel@tonic-gate int 50417c478bd9Sstevel@tonic-gate do_page_relocate( 50427c478bd9Sstevel@tonic-gate page_t **target, 50437c478bd9Sstevel@tonic-gate page_t **replacement, 50447c478bd9Sstevel@tonic-gate int grouplock, 50457c478bd9Sstevel@tonic-gate spgcnt_t *nrelocp, 50467c478bd9Sstevel@tonic-gate lgrp_t *lgrp) 50477c478bd9Sstevel@tonic-gate { 50487c478bd9Sstevel@tonic-gate #ifdef DEBUG 50497c478bd9Sstevel@tonic-gate page_t *first_repl; 50507c478bd9Sstevel@tonic-gate #endif /* DEBUG */ 50517c478bd9Sstevel@tonic-gate page_t *repl; 50527c478bd9Sstevel@tonic-gate page_t *targ; 50537c478bd9Sstevel@tonic-gate page_t *pl = NULL; 50547c478bd9Sstevel@tonic-gate uint_t ppattr; 50557c478bd9Sstevel@tonic-gate pfn_t pfn, repl_pfn; 50567c478bd9Sstevel@tonic-gate uint_t szc; 50577c478bd9Sstevel@tonic-gate spgcnt_t npgs, i; 50587c478bd9Sstevel@tonic-gate int repl_contig = 0; 50597c478bd9Sstevel@tonic-gate uint_t flags = 0; 50607c478bd9Sstevel@tonic-gate spgcnt_t dofree = 0; 50617c478bd9Sstevel@tonic-gate 50627c478bd9Sstevel@tonic-gate *nrelocp = 0; 50637c478bd9Sstevel@tonic-gate 50647c478bd9Sstevel@tonic-gate #if defined(__sparc) 50657c478bd9Sstevel@tonic-gate /* 50667c478bd9Sstevel@tonic-gate * We need to wait till OBP has completed 50677c478bd9Sstevel@tonic-gate * its boot-time handoff of its resources to the kernel 50687c478bd9Sstevel@tonic-gate * before we allow page relocation 50697c478bd9Sstevel@tonic-gate */ 50707c478bd9Sstevel@tonic-gate if (page_relocate_ready == 0) { 50717c478bd9Sstevel@tonic-gate return (EAGAIN); 50727c478bd9Sstevel@tonic-gate } 50737c478bd9Sstevel@tonic-gate #endif 50747c478bd9Sstevel@tonic-gate 50757c478bd9Sstevel@tonic-gate /* 50767c478bd9Sstevel@tonic-gate * If this is not a base page, 50777c478bd9Sstevel@tonic-gate * just return with 0x0 pages relocated. 50787c478bd9Sstevel@tonic-gate */ 50797c478bd9Sstevel@tonic-gate targ = *target; 50807c478bd9Sstevel@tonic-gate ASSERT(PAGE_EXCL(targ)); 50817c478bd9Sstevel@tonic-gate ASSERT(!PP_ISFREE(targ)); 50827c478bd9Sstevel@tonic-gate szc = targ->p_szc; 50837c478bd9Sstevel@tonic-gate ASSERT(szc < mmu_page_sizes); 50847c478bd9Sstevel@tonic-gate VM_STAT_ADD(vmm_vmstats.ppr_reloc[szc]); 50857c478bd9Sstevel@tonic-gate pfn = targ->p_pagenum; 50867c478bd9Sstevel@tonic-gate if (pfn != PFN_BASE(pfn, szc)) { 50877c478bd9Sstevel@tonic-gate VM_STAT_ADD(vmm_vmstats.ppr_relocnoroot[szc]); 50887c478bd9Sstevel@tonic-gate return (ERANGE); 50897c478bd9Sstevel@tonic-gate } 50907c478bd9Sstevel@tonic-gate 50917c478bd9Sstevel@tonic-gate if ((repl = *replacement) != NULL && repl->p_szc >= szc) { 50927c478bd9Sstevel@tonic-gate repl_pfn = repl->p_pagenum; 50937c478bd9Sstevel@tonic-gate if (repl_pfn != PFN_BASE(repl_pfn, szc)) { 50947c478bd9Sstevel@tonic-gate VM_STAT_ADD(vmm_vmstats.ppr_reloc_replnoroot[szc]); 50957c478bd9Sstevel@tonic-gate return (ERANGE); 50967c478bd9Sstevel@tonic-gate } 50977c478bd9Sstevel@tonic-gate repl_contig = 1; 50987c478bd9Sstevel@tonic-gate } 50997c478bd9Sstevel@tonic-gate 51007c478bd9Sstevel@tonic-gate /* 51017c478bd9Sstevel@tonic-gate * We must lock all members of this large page or we cannot 51027c478bd9Sstevel@tonic-gate * relocate any part of it. 51037c478bd9Sstevel@tonic-gate */ 51047c478bd9Sstevel@tonic-gate if (grouplock != 0 && !group_page_trylock(targ, SE_EXCL)) { 51057c478bd9Sstevel@tonic-gate VM_STAT_ADD(vmm_vmstats.ppr_relocnolock[targ->p_szc]); 51067c478bd9Sstevel@tonic-gate return (EBUSY); 51077c478bd9Sstevel@tonic-gate } 51087c478bd9Sstevel@tonic-gate 51097c478bd9Sstevel@tonic-gate /* 51107c478bd9Sstevel@tonic-gate * reread szc it could have been decreased before 51117c478bd9Sstevel@tonic-gate * group_page_trylock() was done. 51127c478bd9Sstevel@tonic-gate */ 51137c478bd9Sstevel@tonic-gate szc = targ->p_szc; 51147c478bd9Sstevel@tonic-gate ASSERT(szc < mmu_page_sizes); 51157c478bd9Sstevel@tonic-gate VM_STAT_ADD(vmm_vmstats.ppr_reloc[szc]); 51167c478bd9Sstevel@tonic-gate ASSERT(pfn == PFN_BASE(pfn, szc)); 51177c478bd9Sstevel@tonic-gate 51187c478bd9Sstevel@tonic-gate npgs = page_get_pagecnt(targ->p_szc); 51197c478bd9Sstevel@tonic-gate 51207c478bd9Sstevel@tonic-gate if (repl == NULL) { 51217c478bd9Sstevel@tonic-gate dofree = npgs; /* Size of target page in MMU pages */ 51227c478bd9Sstevel@tonic-gate if (!page_create_wait(dofree, 0)) { 51237c478bd9Sstevel@tonic-gate if (grouplock != 0) { 51247c478bd9Sstevel@tonic-gate group_page_unlock(targ); 51257c478bd9Sstevel@tonic-gate } 51267c478bd9Sstevel@tonic-gate VM_STAT_ADD(vmm_vmstats.ppr_relocnomem[szc]); 51277c478bd9Sstevel@tonic-gate return (ENOMEM); 51287c478bd9Sstevel@tonic-gate } 51297c478bd9Sstevel@tonic-gate 51307c478bd9Sstevel@tonic-gate /* 51317c478bd9Sstevel@tonic-gate * seg kmem pages require that the target and replacement 51327c478bd9Sstevel@tonic-gate * page be the same pagesize. 51337c478bd9Sstevel@tonic-gate */ 51347c478bd9Sstevel@tonic-gate flags = (targ->p_vnode == &kvp) ? PGR_SAMESZC : 0; 51357c478bd9Sstevel@tonic-gate repl = page_get_replacement_page(targ, lgrp, flags); 51367c478bd9Sstevel@tonic-gate if (repl == NULL) { 51377c478bd9Sstevel@tonic-gate if (grouplock != 0) { 51387c478bd9Sstevel@tonic-gate group_page_unlock(targ); 51397c478bd9Sstevel@tonic-gate } 51407c478bd9Sstevel@tonic-gate page_create_putback(dofree); 51417c478bd9Sstevel@tonic-gate VM_STAT_ADD(vmm_vmstats.ppr_relocnomem[szc]); 51427c478bd9Sstevel@tonic-gate return (ENOMEM); 51437c478bd9Sstevel@tonic-gate } 51447c478bd9Sstevel@tonic-gate } 51457c478bd9Sstevel@tonic-gate #ifdef DEBUG 51467c478bd9Sstevel@tonic-gate else { 51477c478bd9Sstevel@tonic-gate ASSERT(PAGE_LOCKED(repl)); 51487c478bd9Sstevel@tonic-gate } 51497c478bd9Sstevel@tonic-gate #endif /* DEBUG */ 51507c478bd9Sstevel@tonic-gate 51517c478bd9Sstevel@tonic-gate #if defined(__sparc) 51527c478bd9Sstevel@tonic-gate /* 51537c478bd9Sstevel@tonic-gate * Let hat_page_relocate() complete the relocation if it's kernel page 51547c478bd9Sstevel@tonic-gate */ 51557c478bd9Sstevel@tonic-gate if (targ->p_vnode == &kvp) { 51567c478bd9Sstevel@tonic-gate *replacement = repl; 51577c478bd9Sstevel@tonic-gate if (hat_page_relocate(target, replacement, nrelocp) != 0) { 51587c478bd9Sstevel@tonic-gate if (grouplock != 0) { 51597c478bd9Sstevel@tonic-gate group_page_unlock(targ); 51607c478bd9Sstevel@tonic-gate } 51617c478bd9Sstevel@tonic-gate if (dofree) { 51627c478bd9Sstevel@tonic-gate *replacement = NULL; 51637c478bd9Sstevel@tonic-gate page_free_replacement_page(repl); 51647c478bd9Sstevel@tonic-gate page_create_putback(dofree); 51657c478bd9Sstevel@tonic-gate } 51667c478bd9Sstevel@tonic-gate VM_STAT_ADD(vmm_vmstats.ppr_krelocfail[szc]); 51677c478bd9Sstevel@tonic-gate return (EAGAIN); 51687c478bd9Sstevel@tonic-gate } 51697c478bd9Sstevel@tonic-gate VM_STAT_ADD(vmm_vmstats.ppr_relocok[szc]); 51707c478bd9Sstevel@tonic-gate return (0); 51717c478bd9Sstevel@tonic-gate } 51727c478bd9Sstevel@tonic-gate #else 51737c478bd9Sstevel@tonic-gate #if defined(lint) 51747c478bd9Sstevel@tonic-gate dofree = dofree; 51757c478bd9Sstevel@tonic-gate #endif 51767c478bd9Sstevel@tonic-gate #endif 51777c478bd9Sstevel@tonic-gate 51787c478bd9Sstevel@tonic-gate #ifdef DEBUG 51797c478bd9Sstevel@tonic-gate first_repl = repl; 51807c478bd9Sstevel@tonic-gate #endif /* DEBUG */ 51817c478bd9Sstevel@tonic-gate 51827c478bd9Sstevel@tonic-gate for (i = 0; i < npgs; i++) { 51837c478bd9Sstevel@tonic-gate ASSERT(PAGE_EXCL(targ)); 51847c478bd9Sstevel@tonic-gate 51857c478bd9Sstevel@tonic-gate (void) hat_pageunload(targ, HAT_FORCE_PGUNLOAD); 51867c478bd9Sstevel@tonic-gate 51877c478bd9Sstevel@tonic-gate ASSERT(hat_page_getshare(targ) == 0); 51887c478bd9Sstevel@tonic-gate ASSERT(!PP_ISFREE(targ)); 51897c478bd9Sstevel@tonic-gate ASSERT(targ->p_pagenum == (pfn + i)); 51907c478bd9Sstevel@tonic-gate ASSERT(repl_contig == 0 || 51917c478bd9Sstevel@tonic-gate repl->p_pagenum == (repl_pfn + i)); 51927c478bd9Sstevel@tonic-gate 51937c478bd9Sstevel@tonic-gate /* 51947c478bd9Sstevel@tonic-gate * Copy the page contents and attributes then 51957c478bd9Sstevel@tonic-gate * relocate the page in the page hash. 51967c478bd9Sstevel@tonic-gate */ 51977c478bd9Sstevel@tonic-gate ppcopy(targ, repl); 51987c478bd9Sstevel@tonic-gate ppattr = hat_page_getattr(targ, (P_MOD | P_REF | P_RO)); 51997c478bd9Sstevel@tonic-gate page_clr_all_props(repl); 52007c478bd9Sstevel@tonic-gate page_set_props(repl, ppattr); 52017c478bd9Sstevel@tonic-gate page_relocate_hash(repl, targ); 52027c478bd9Sstevel@tonic-gate 52037c478bd9Sstevel@tonic-gate ASSERT(hat_page_getshare(targ) == 0); 52047c478bd9Sstevel@tonic-gate ASSERT(hat_page_getshare(repl) == 0); 52057c478bd9Sstevel@tonic-gate /* 52067c478bd9Sstevel@tonic-gate * Now clear the props on targ, after the 52077c478bd9Sstevel@tonic-gate * page_relocate_hash(), they no longer 52087c478bd9Sstevel@tonic-gate * have any meaning. 52097c478bd9Sstevel@tonic-gate */ 52107c478bd9Sstevel@tonic-gate page_clr_all_props(targ); 52117c478bd9Sstevel@tonic-gate ASSERT(targ->p_next == targ); 52127c478bd9Sstevel@tonic-gate ASSERT(targ->p_prev == targ); 52137c478bd9Sstevel@tonic-gate page_list_concat(&pl, &targ); 52147c478bd9Sstevel@tonic-gate 52157c478bd9Sstevel@tonic-gate targ++; 52167c478bd9Sstevel@tonic-gate if (repl_contig != 0) { 52177c478bd9Sstevel@tonic-gate repl++; 52187c478bd9Sstevel@tonic-gate } else { 52197c478bd9Sstevel@tonic-gate repl = repl->p_next; 52207c478bd9Sstevel@tonic-gate } 52217c478bd9Sstevel@tonic-gate } 52227c478bd9Sstevel@tonic-gate /* assert that we have come full circle with repl */ 52237c478bd9Sstevel@tonic-gate ASSERT(repl_contig == 1 || first_repl == repl); 52247c478bd9Sstevel@tonic-gate 52257c478bd9Sstevel@tonic-gate *target = pl; 52267c478bd9Sstevel@tonic-gate if (*replacement == NULL) { 52277c478bd9Sstevel@tonic-gate ASSERT(first_repl == repl); 52287c478bd9Sstevel@tonic-gate *replacement = repl; 52297c478bd9Sstevel@tonic-gate } 52307c478bd9Sstevel@tonic-gate VM_STAT_ADD(vmm_vmstats.ppr_relocok[szc]); 52317c478bd9Sstevel@tonic-gate *nrelocp = npgs; 52327c478bd9Sstevel@tonic-gate return (0); 52337c478bd9Sstevel@tonic-gate } 52347c478bd9Sstevel@tonic-gate /* 52357c478bd9Sstevel@tonic-gate * On success returns 0 and *nrelocp the number of PAGESIZE pages relocated. 52367c478bd9Sstevel@tonic-gate */ 52377c478bd9Sstevel@tonic-gate int 52387c478bd9Sstevel@tonic-gate page_relocate( 52397c478bd9Sstevel@tonic-gate page_t **target, 52407c478bd9Sstevel@tonic-gate page_t **replacement, 52417c478bd9Sstevel@tonic-gate int grouplock, 52427c478bd9Sstevel@tonic-gate int freetarget, 52437c478bd9Sstevel@tonic-gate spgcnt_t *nrelocp, 52447c478bd9Sstevel@tonic-gate lgrp_t *lgrp) 52457c478bd9Sstevel@tonic-gate { 52467c478bd9Sstevel@tonic-gate spgcnt_t ret; 52477c478bd9Sstevel@tonic-gate 52487c478bd9Sstevel@tonic-gate /* do_page_relocate returns 0 on success or errno value */ 52497c478bd9Sstevel@tonic-gate ret = do_page_relocate(target, replacement, grouplock, nrelocp, lgrp); 52507c478bd9Sstevel@tonic-gate 52517c478bd9Sstevel@tonic-gate if (ret != 0 || freetarget == 0) { 52527c478bd9Sstevel@tonic-gate return (ret); 52537c478bd9Sstevel@tonic-gate } 52547c478bd9Sstevel@tonic-gate if (*nrelocp == 1) { 52557c478bd9Sstevel@tonic-gate ASSERT(*target != NULL); 52567c478bd9Sstevel@tonic-gate page_free(*target, 1); 52577c478bd9Sstevel@tonic-gate } else { 52587c478bd9Sstevel@tonic-gate page_t *tpp = *target; 52597c478bd9Sstevel@tonic-gate uint_t szc = tpp->p_szc; 52607c478bd9Sstevel@tonic-gate pgcnt_t npgs = page_get_pagecnt(szc); 52617c478bd9Sstevel@tonic-gate ASSERT(npgs > 1); 52627c478bd9Sstevel@tonic-gate ASSERT(szc != 0); 52637c478bd9Sstevel@tonic-gate do { 52647c478bd9Sstevel@tonic-gate ASSERT(PAGE_EXCL(tpp)); 52657c478bd9Sstevel@tonic-gate ASSERT(!hat_page_is_mapped(tpp)); 52667c478bd9Sstevel@tonic-gate ASSERT(tpp->p_szc == szc); 52677c478bd9Sstevel@tonic-gate PP_SETFREE(tpp); 52687c478bd9Sstevel@tonic-gate PP_SETAGED(tpp); 52697c478bd9Sstevel@tonic-gate npgs--; 52707c478bd9Sstevel@tonic-gate } while ((tpp = tpp->p_next) != *target); 52717c478bd9Sstevel@tonic-gate ASSERT(npgs == 0); 52727c478bd9Sstevel@tonic-gate page_list_add_pages(*target, 0); 52737c478bd9Sstevel@tonic-gate npgs = page_get_pagecnt(szc); 52747c478bd9Sstevel@tonic-gate page_create_putback(npgs); 52757c478bd9Sstevel@tonic-gate } 52767c478bd9Sstevel@tonic-gate return (ret); 52777c478bd9Sstevel@tonic-gate } 52787c478bd9Sstevel@tonic-gate 52797c478bd9Sstevel@tonic-gate /* 52807c478bd9Sstevel@tonic-gate * it is up to the caller to deal with pcf accounting. 52817c478bd9Sstevel@tonic-gate */ 52827c478bd9Sstevel@tonic-gate void 52837c478bd9Sstevel@tonic-gate page_free_replacement_page(page_t *pplist) 52847c478bd9Sstevel@tonic-gate { 52857c478bd9Sstevel@tonic-gate page_t *pp; 52867c478bd9Sstevel@tonic-gate 52877c478bd9Sstevel@tonic-gate while (pplist != NULL) { 52887c478bd9Sstevel@tonic-gate /* 52897c478bd9Sstevel@tonic-gate * pp_targ is a linked list. 52907c478bd9Sstevel@tonic-gate */ 52917c478bd9Sstevel@tonic-gate pp = pplist; 52927c478bd9Sstevel@tonic-gate if (pp->p_szc == 0) { 52937c478bd9Sstevel@tonic-gate page_sub(&pplist, pp); 52947c478bd9Sstevel@tonic-gate page_clr_all_props(pp); 52957c478bd9Sstevel@tonic-gate PP_SETFREE(pp); 52967c478bd9Sstevel@tonic-gate PP_SETAGED(pp); 52977c478bd9Sstevel@tonic-gate page_list_add(pp, PG_FREE_LIST | PG_LIST_TAIL); 52987c478bd9Sstevel@tonic-gate page_unlock(pp); 52997c478bd9Sstevel@tonic-gate VM_STAT_ADD(pagecnt.pc_free_replacement_page[0]); 53007c478bd9Sstevel@tonic-gate } else { 53017c478bd9Sstevel@tonic-gate spgcnt_t curnpgs = page_get_pagecnt(pp->p_szc); 53027c478bd9Sstevel@tonic-gate page_t *tpp; 53037c478bd9Sstevel@tonic-gate page_list_break(&pp, &pplist, curnpgs); 53047c478bd9Sstevel@tonic-gate tpp = pp; 53057c478bd9Sstevel@tonic-gate do { 53067c478bd9Sstevel@tonic-gate ASSERT(PAGE_EXCL(tpp)); 53077c478bd9Sstevel@tonic-gate ASSERT(!hat_page_is_mapped(tpp)); 53087c478bd9Sstevel@tonic-gate page_clr_all_props(pp); 53097c478bd9Sstevel@tonic-gate PP_SETFREE(tpp); 53107c478bd9Sstevel@tonic-gate PP_SETAGED(tpp); 53117c478bd9Sstevel@tonic-gate } while ((tpp = tpp->p_next) != pp); 53127c478bd9Sstevel@tonic-gate page_list_add_pages(pp, 0); 53137c478bd9Sstevel@tonic-gate VM_STAT_ADD(pagecnt.pc_free_replacement_page[1]); 53147c478bd9Sstevel@tonic-gate } 53157c478bd9Sstevel@tonic-gate } 53167c478bd9Sstevel@tonic-gate } 53177c478bd9Sstevel@tonic-gate 53187c478bd9Sstevel@tonic-gate /* 53197c478bd9Sstevel@tonic-gate * Relocate target to non-relocatable replacement page. 53207c478bd9Sstevel@tonic-gate */ 53217c478bd9Sstevel@tonic-gate int 53227c478bd9Sstevel@tonic-gate page_relocate_cage(page_t **target, page_t **replacement) 53237c478bd9Sstevel@tonic-gate { 53247c478bd9Sstevel@tonic-gate page_t *tpp, *rpp; 53257c478bd9Sstevel@tonic-gate spgcnt_t pgcnt, npgs; 53267c478bd9Sstevel@tonic-gate int result; 53277c478bd9Sstevel@tonic-gate 53287c478bd9Sstevel@tonic-gate tpp = *target; 53297c478bd9Sstevel@tonic-gate 53307c478bd9Sstevel@tonic-gate ASSERT(PAGE_EXCL(tpp)); 53317c478bd9Sstevel@tonic-gate ASSERT(tpp->p_szc == 0); 53327c478bd9Sstevel@tonic-gate 53337c478bd9Sstevel@tonic-gate pgcnt = btop(page_get_pagesize(tpp->p_szc)); 53347c478bd9Sstevel@tonic-gate 53357c478bd9Sstevel@tonic-gate do { 53367c478bd9Sstevel@tonic-gate (void) page_create_wait(pgcnt, PG_WAIT | PG_NORELOC); 53377c478bd9Sstevel@tonic-gate rpp = page_get_replacement_page(tpp, NULL, PGR_NORELOC); 53387c478bd9Sstevel@tonic-gate if (rpp == NULL) { 53397c478bd9Sstevel@tonic-gate page_create_putback(pgcnt); 53407c478bd9Sstevel@tonic-gate kcage_cageout_wakeup(); 53417c478bd9Sstevel@tonic-gate } 53427c478bd9Sstevel@tonic-gate } while (rpp == NULL); 53437c478bd9Sstevel@tonic-gate 53447c478bd9Sstevel@tonic-gate ASSERT(PP_ISNORELOC(rpp)); 53457c478bd9Sstevel@tonic-gate 53467c478bd9Sstevel@tonic-gate result = page_relocate(&tpp, &rpp, 0, 1, &npgs, NULL); 53477c478bd9Sstevel@tonic-gate 53487c478bd9Sstevel@tonic-gate if (result == 0) { 53497c478bd9Sstevel@tonic-gate *replacement = rpp; 53507c478bd9Sstevel@tonic-gate if (pgcnt != npgs) 53517c478bd9Sstevel@tonic-gate panic("page_relocate_cage: partial relocation"); 53527c478bd9Sstevel@tonic-gate } 53537c478bd9Sstevel@tonic-gate 53547c478bd9Sstevel@tonic-gate return (result); 53557c478bd9Sstevel@tonic-gate } 53567c478bd9Sstevel@tonic-gate 53577c478bd9Sstevel@tonic-gate /* 53587c478bd9Sstevel@tonic-gate * Release the page lock on a page, place on cachelist 53597c478bd9Sstevel@tonic-gate * tail if no longer mapped. Caller can let us know if 53607c478bd9Sstevel@tonic-gate * the page is known to be clean. 53617c478bd9Sstevel@tonic-gate */ 53627c478bd9Sstevel@tonic-gate int 53637c478bd9Sstevel@tonic-gate page_release(page_t *pp, int checkmod) 53647c478bd9Sstevel@tonic-gate { 53657c478bd9Sstevel@tonic-gate int status; 53667c478bd9Sstevel@tonic-gate 53677c478bd9Sstevel@tonic-gate ASSERT(PAGE_LOCKED(pp) && !PP_ISFREE(pp) && 53687c478bd9Sstevel@tonic-gate (pp->p_vnode != NULL)); 53697c478bd9Sstevel@tonic-gate 53707c478bd9Sstevel@tonic-gate if (!hat_page_is_mapped(pp) && !IS_SWAPVP(pp->p_vnode) && 53717c478bd9Sstevel@tonic-gate ((PAGE_SHARED(pp) && page_tryupgrade(pp)) || PAGE_EXCL(pp)) && 53727c478bd9Sstevel@tonic-gate pp->p_lckcnt == 0 && pp->p_cowcnt == 0 && 53737c478bd9Sstevel@tonic-gate !hat_page_is_mapped(pp)) { 53747c478bd9Sstevel@tonic-gate 53757c478bd9Sstevel@tonic-gate /* 53767c478bd9Sstevel@tonic-gate * If page is modified, unlock it 53777c478bd9Sstevel@tonic-gate * 53787c478bd9Sstevel@tonic-gate * (p_nrm & P_MOD) bit has the latest stuff because: 53797c478bd9Sstevel@tonic-gate * (1) We found that this page doesn't have any mappings 53807c478bd9Sstevel@tonic-gate * _after_ holding SE_EXCL and 53817c478bd9Sstevel@tonic-gate * (2) We didn't drop SE_EXCL lock after the check in (1) 53827c478bd9Sstevel@tonic-gate */ 53837c478bd9Sstevel@tonic-gate if (checkmod && hat_ismod(pp)) { 53847c478bd9Sstevel@tonic-gate page_unlock(pp); 53857c478bd9Sstevel@tonic-gate status = PGREL_MOD; 53867c478bd9Sstevel@tonic-gate } else { 53877c478bd9Sstevel@tonic-gate /*LINTED: constant in conditional context*/ 53887c478bd9Sstevel@tonic-gate VN_DISPOSE(pp, B_FREE, 0, kcred); 53897c478bd9Sstevel@tonic-gate status = PGREL_CLEAN; 53907c478bd9Sstevel@tonic-gate } 53917c478bd9Sstevel@tonic-gate } else { 53927c478bd9Sstevel@tonic-gate page_unlock(pp); 53937c478bd9Sstevel@tonic-gate status = PGREL_NOTREL; 53947c478bd9Sstevel@tonic-gate } 53957c478bd9Sstevel@tonic-gate return (status); 53967c478bd9Sstevel@tonic-gate } 53977c478bd9Sstevel@tonic-gate 53987c478bd9Sstevel@tonic-gate int 53997c478bd9Sstevel@tonic-gate page_try_demote_pages(page_t *pp) 54007c478bd9Sstevel@tonic-gate { 54017c478bd9Sstevel@tonic-gate page_t *tpp, *rootpp = pp; 54027c478bd9Sstevel@tonic-gate pfn_t pfn = page_pptonum(pp); 54037c478bd9Sstevel@tonic-gate spgcnt_t i, npgs; 54047c478bd9Sstevel@tonic-gate uint_t szc = pp->p_szc; 54057c478bd9Sstevel@tonic-gate vnode_t *vp = pp->p_vnode; 54067c478bd9Sstevel@tonic-gate 54077c478bd9Sstevel@tonic-gate ASSERT(PAGE_EXCL(rootpp)); 54087c478bd9Sstevel@tonic-gate 54097c478bd9Sstevel@tonic-gate VM_STAT_ADD(pagecnt.pc_try_demote_pages[0]); 54107c478bd9Sstevel@tonic-gate 54117c478bd9Sstevel@tonic-gate if (rootpp->p_szc == 0) { 54127c478bd9Sstevel@tonic-gate VM_STAT_ADD(pagecnt.pc_try_demote_pages[1]); 54137c478bd9Sstevel@tonic-gate return (1); 54147c478bd9Sstevel@tonic-gate } 54157c478bd9Sstevel@tonic-gate 54167c478bd9Sstevel@tonic-gate if (vp != NULL && !IS_SWAPFSVP(vp) && vp != &kvp) { 54177c478bd9Sstevel@tonic-gate VM_STAT_ADD(pagecnt.pc_try_demote_pages[2]); 54187c478bd9Sstevel@tonic-gate page_demote_vp_pages(rootpp); 54197c478bd9Sstevel@tonic-gate ASSERT(pp->p_szc == 0); 54207c478bd9Sstevel@tonic-gate return (1); 54217c478bd9Sstevel@tonic-gate } 54227c478bd9Sstevel@tonic-gate 54237c478bd9Sstevel@tonic-gate /* 54247c478bd9Sstevel@tonic-gate * Adjust rootpp if passed in is not the base 54257c478bd9Sstevel@tonic-gate * constituent page. 54267c478bd9Sstevel@tonic-gate */ 54277c478bd9Sstevel@tonic-gate npgs = page_get_pagecnt(rootpp->p_szc); 54287c478bd9Sstevel@tonic-gate ASSERT(npgs > 1); 54297c478bd9Sstevel@tonic-gate if (!IS_P2ALIGNED(pfn, npgs)) { 54307c478bd9Sstevel@tonic-gate pfn = P2ALIGN(pfn, npgs); 54317c478bd9Sstevel@tonic-gate rootpp = page_numtopp_nolock(pfn); 54327c478bd9Sstevel@tonic-gate VM_STAT_ADD(pagecnt.pc_try_demote_pages[3]); 54337c478bd9Sstevel@tonic-gate ASSERT(rootpp->p_vnode != NULL); 54347c478bd9Sstevel@tonic-gate ASSERT(rootpp->p_szc == szc); 54357c478bd9Sstevel@tonic-gate } 54367c478bd9Sstevel@tonic-gate 54377c478bd9Sstevel@tonic-gate /* 54387c478bd9Sstevel@tonic-gate * We can't demote kernel pages since we can't hat_unload() 54397c478bd9Sstevel@tonic-gate * the mappings. 54407c478bd9Sstevel@tonic-gate */ 54417c478bd9Sstevel@tonic-gate if (rootpp->p_vnode == &kvp) 54427c478bd9Sstevel@tonic-gate return (0); 54437c478bd9Sstevel@tonic-gate 54447c478bd9Sstevel@tonic-gate /* 54457c478bd9Sstevel@tonic-gate * Attempt to lock all constituent pages except the page passed 54467c478bd9Sstevel@tonic-gate * in since it's already locked. 54477c478bd9Sstevel@tonic-gate */ 5448*affbd3ccSkchow for (tpp = rootpp, i = 0; i < npgs; i++, tpp++) { 54497c478bd9Sstevel@tonic-gate ASSERT(!PP_ISFREE(tpp)); 54507c478bd9Sstevel@tonic-gate ASSERT(tpp->p_vnode != NULL); 54517c478bd9Sstevel@tonic-gate 54527c478bd9Sstevel@tonic-gate if (tpp != pp && !page_trylock(tpp, SE_EXCL)) 54537c478bd9Sstevel@tonic-gate break; 54547c478bd9Sstevel@tonic-gate ASSERT(tpp->p_szc == rootpp->p_szc); 54557c478bd9Sstevel@tonic-gate ASSERT(page_pptonum(tpp) == page_pptonum(rootpp) + i); 54567c478bd9Sstevel@tonic-gate (void) hat_pageunload(tpp, HAT_FORCE_PGUNLOAD); 54577c478bd9Sstevel@tonic-gate } 54587c478bd9Sstevel@tonic-gate 54597c478bd9Sstevel@tonic-gate /* 54607c478bd9Sstevel@tonic-gate * If we failed to lock them all then unlock what we have locked 54617c478bd9Sstevel@tonic-gate * so far and bail. 54627c478bd9Sstevel@tonic-gate */ 54637c478bd9Sstevel@tonic-gate if (i < npgs) { 54647c478bd9Sstevel@tonic-gate tpp = rootpp; 54657c478bd9Sstevel@tonic-gate while (i-- > 0) { 54667c478bd9Sstevel@tonic-gate if (tpp != pp) 54677c478bd9Sstevel@tonic-gate page_unlock(tpp); 5468*affbd3ccSkchow tpp++; 54697c478bd9Sstevel@tonic-gate } 54707c478bd9Sstevel@tonic-gate VM_STAT_ADD(pagecnt.pc_try_demote_pages[4]); 54717c478bd9Sstevel@tonic-gate return (0); 54727c478bd9Sstevel@tonic-gate } 54737c478bd9Sstevel@tonic-gate 54747c478bd9Sstevel@tonic-gate /* 54757c478bd9Sstevel@tonic-gate * XXX probably p_szc clearing and page unlocking can be done within 54767c478bd9Sstevel@tonic-gate * one loop but since this is rare code we can play very safe. 54777c478bd9Sstevel@tonic-gate */ 5478*affbd3ccSkchow for (tpp = rootpp, i = 0; i < npgs; i++, tpp++) { 54797c478bd9Sstevel@tonic-gate ASSERT(PAGE_EXCL(tpp)); 54807c478bd9Sstevel@tonic-gate tpp->p_szc = 0; 54817c478bd9Sstevel@tonic-gate } 54827c478bd9Sstevel@tonic-gate 54837c478bd9Sstevel@tonic-gate /* 54847c478bd9Sstevel@tonic-gate * Unlock all pages except the page passed in. 54857c478bd9Sstevel@tonic-gate */ 5486*affbd3ccSkchow for (tpp = rootpp, i = 0; i < npgs; i++, tpp++) { 54877c478bd9Sstevel@tonic-gate ASSERT(!hat_page_is_mapped(tpp)); 54887c478bd9Sstevel@tonic-gate if (tpp != pp) 54897c478bd9Sstevel@tonic-gate page_unlock(tpp); 54907c478bd9Sstevel@tonic-gate } 54917c478bd9Sstevel@tonic-gate VM_STAT_ADD(pagecnt.pc_try_demote_pages[5]); 54927c478bd9Sstevel@tonic-gate return (1); 54937c478bd9Sstevel@tonic-gate } 54947c478bd9Sstevel@tonic-gate 54957c478bd9Sstevel@tonic-gate /* 54967c478bd9Sstevel@tonic-gate * Called by page_free() and page_destroy() to demote the page size code 54977c478bd9Sstevel@tonic-gate * (p_szc) to 0 (since we can't just put a single PAGESIZE page with non zero 54987c478bd9Sstevel@tonic-gate * p_szc on free list, neither can we just clear p_szc of a single page_t 54997c478bd9Sstevel@tonic-gate * within a large page since it will break other code that relies on p_szc 55007c478bd9Sstevel@tonic-gate * being the same for all page_t's of a large page). Anonymous pages should 55017c478bd9Sstevel@tonic-gate * never end up here because anon_map_getpages() cannot deal with p_szc 55027c478bd9Sstevel@tonic-gate * changes after a single constituent page is locked. While anonymous or 55037c478bd9Sstevel@tonic-gate * kernel large pages are demoted or freed the entire large page at a time 55047c478bd9Sstevel@tonic-gate * with all constituent pages locked EXCL for the file system pages we 55057c478bd9Sstevel@tonic-gate * have to be able to demote a large page (i.e. decrease all constituent pages 55067c478bd9Sstevel@tonic-gate * p_szc) with only just an EXCL lock on one of constituent pages. The reason 55077c478bd9Sstevel@tonic-gate * we can easily deal with anonymous page demotion the entire large page at a 55087c478bd9Sstevel@tonic-gate * time is that those operation originate at address space level and concern 55097c478bd9Sstevel@tonic-gate * the entire large page region with actual demotion only done when pages are 55107c478bd9Sstevel@tonic-gate * not shared with any other processes (therefore we can always get EXCL lock 55117c478bd9Sstevel@tonic-gate * on all anonymous constituent pages after clearing segment page 55127c478bd9Sstevel@tonic-gate * cache). However file system pages can be truncated or invalidated at a 55137c478bd9Sstevel@tonic-gate * PAGESIZE level from the file system side and end up in page_free() or 55147c478bd9Sstevel@tonic-gate * page_destroy() (we also allow only part of the large page to be SOFTLOCKed 55157c478bd9Sstevel@tonic-gate * and therfore pageout should be able to demote a large page by EXCL locking 55167c478bd9Sstevel@tonic-gate * any constituent page that is not under SOFTLOCK). In those cases we cannot 55177c478bd9Sstevel@tonic-gate * rely on being able to lock EXCL all constituent pages. 55187c478bd9Sstevel@tonic-gate * 55197c478bd9Sstevel@tonic-gate * To prevent szc changes on file system pages one has to lock all constituent 55207c478bd9Sstevel@tonic-gate * pages at least SHARED (or call page_szc_lock()). The only subsystem that 55217c478bd9Sstevel@tonic-gate * doesn't rely on locking all constituent pages (or using page_szc_lock()) to 55227c478bd9Sstevel@tonic-gate * prevent szc changes is hat layer that uses its own page level mlist 55237c478bd9Sstevel@tonic-gate * locks. hat assumes that szc doesn't change after mlist lock for a page is 55247c478bd9Sstevel@tonic-gate * taken. Therefore we need to change szc under hat level locks if we only 55257c478bd9Sstevel@tonic-gate * have an EXCL lock on a single constituent page and hat still references any 55267c478bd9Sstevel@tonic-gate * of constituent pages. (Note we can't "ignore" hat layer by simply 55277c478bd9Sstevel@tonic-gate * hat_pageunload() all constituent pages without having EXCL locks on all of 55287c478bd9Sstevel@tonic-gate * constituent pages). We use hat_page_demote() call to safely demote szc of 55297c478bd9Sstevel@tonic-gate * all constituent pages under hat locks when we only have an EXCL lock on one 55307c478bd9Sstevel@tonic-gate * of constituent pages. 55317c478bd9Sstevel@tonic-gate * 55327c478bd9Sstevel@tonic-gate * This routine calls page_szc_lock() before calling hat_page_demote() to 55337c478bd9Sstevel@tonic-gate * allow segvn in one special case not to lock all constituent pages SHARED 55347c478bd9Sstevel@tonic-gate * before calling hat_memload_array() that relies on p_szc not changeing even 55357c478bd9Sstevel@tonic-gate * before hat level mlist lock is taken. In that case segvn uses 55367c478bd9Sstevel@tonic-gate * page_szc_lock() to prevent hat_page_demote() changeing p_szc values. 55377c478bd9Sstevel@tonic-gate * 55387c478bd9Sstevel@tonic-gate * Anonymous or kernel page demotion still has to lock all pages exclusively 55397c478bd9Sstevel@tonic-gate * and do hat_pageunload() on all constituent pages before demoting the page 55407c478bd9Sstevel@tonic-gate * therefore there's no need for anonymous or kernel page demotion to use 55417c478bd9Sstevel@tonic-gate * hat_page_demote() mechanism. 55427c478bd9Sstevel@tonic-gate * 55437c478bd9Sstevel@tonic-gate * hat_page_demote() removes all large mappings that map pp and then decreases 55447c478bd9Sstevel@tonic-gate * p_szc starting from the last constituent page of the large page. By working 55457c478bd9Sstevel@tonic-gate * from the tail of a large page in pfn decreasing order allows one looking at 55467c478bd9Sstevel@tonic-gate * the root page to know that hat_page_demote() is done for root's szc area. 55477c478bd9Sstevel@tonic-gate * e.g. if a root page has szc 1 one knows it only has to lock all constituent 55487c478bd9Sstevel@tonic-gate * pages within szc 1 area to prevent szc changes because hat_page_demote() 55497c478bd9Sstevel@tonic-gate * that started on this page when it had szc > 1 is done for this szc 1 area. 55507c478bd9Sstevel@tonic-gate * 55517c478bd9Sstevel@tonic-gate * We are guranteed that all constituent pages of pp's large page belong to 55527c478bd9Sstevel@tonic-gate * the same vnode with the consecutive offsets increasing in the direction of 55537c478bd9Sstevel@tonic-gate * the pfn i.e. the identity of constituent pages can't change until their 55547c478bd9Sstevel@tonic-gate * p_szc is decreased. Therefore it's safe for hat_page_demote() to remove 55557c478bd9Sstevel@tonic-gate * large mappings to pp even though we don't lock any constituent page except 55567c478bd9Sstevel@tonic-gate * pp (i.e. we won't unload e.g. kernel locked page). 55577c478bd9Sstevel@tonic-gate */ 55587c478bd9Sstevel@tonic-gate static void 55597c478bd9Sstevel@tonic-gate page_demote_vp_pages(page_t *pp) 55607c478bd9Sstevel@tonic-gate { 55617c478bd9Sstevel@tonic-gate kmutex_t *mtx; 55627c478bd9Sstevel@tonic-gate 55637c478bd9Sstevel@tonic-gate ASSERT(PAGE_EXCL(pp)); 55647c478bd9Sstevel@tonic-gate ASSERT(!PP_ISFREE(pp)); 55657c478bd9Sstevel@tonic-gate ASSERT(pp->p_vnode != NULL); 55667c478bd9Sstevel@tonic-gate ASSERT(!IS_SWAPFSVP(pp->p_vnode)); 55677c478bd9Sstevel@tonic-gate ASSERT(pp->p_vnode != &kvp); 55687c478bd9Sstevel@tonic-gate 55697c478bd9Sstevel@tonic-gate VM_STAT_ADD(pagecnt.pc_demote_pages[0]); 55707c478bd9Sstevel@tonic-gate 55717c478bd9Sstevel@tonic-gate mtx = page_szc_lock(pp); 55727c478bd9Sstevel@tonic-gate if (mtx != NULL) { 55737c478bd9Sstevel@tonic-gate hat_page_demote(pp); 55747c478bd9Sstevel@tonic-gate mutex_exit(mtx); 55757c478bd9Sstevel@tonic-gate } 55767c478bd9Sstevel@tonic-gate ASSERT(pp->p_szc == 0); 55777c478bd9Sstevel@tonic-gate } 55787c478bd9Sstevel@tonic-gate 55797c478bd9Sstevel@tonic-gate /* 55807c478bd9Sstevel@tonic-gate * Page retire operation. 55817c478bd9Sstevel@tonic-gate * 55827c478bd9Sstevel@tonic-gate * page_retire() 55837c478bd9Sstevel@tonic-gate * Attempt to retire (throw away) page pp. We cannot do this if 55847c478bd9Sstevel@tonic-gate * the page is dirty; if the page is clean, we can try. We return 0 on 55857c478bd9Sstevel@tonic-gate * success, -1 on failure. This routine should be invoked by the platform's 55867c478bd9Sstevel@tonic-gate * memory error detection code. 55877c478bd9Sstevel@tonic-gate * 55887c478bd9Sstevel@tonic-gate * pages_retired_limit_exceeded() 55897c478bd9Sstevel@tonic-gate * We set a limit on the number of pages which may be retired. This 55907c478bd9Sstevel@tonic-gate * is set to a percentage of total physical memory. This limit is 55917c478bd9Sstevel@tonic-gate * enforced here. 55927c478bd9Sstevel@tonic-gate */ 55937c478bd9Sstevel@tonic-gate 55947c478bd9Sstevel@tonic-gate static pgcnt_t retired_pgcnt = 0; 55957c478bd9Sstevel@tonic-gate 55967c478bd9Sstevel@tonic-gate /* 55977c478bd9Sstevel@tonic-gate * routines to update the count of retired pages 55987c478bd9Sstevel@tonic-gate */ 55997c478bd9Sstevel@tonic-gate static void 56007c478bd9Sstevel@tonic-gate page_retired(page_t *pp) 56017c478bd9Sstevel@tonic-gate { 56027c478bd9Sstevel@tonic-gate ASSERT(pp); 56037c478bd9Sstevel@tonic-gate 56047c478bd9Sstevel@tonic-gate page_settoxic(pp, PAGE_IS_RETIRED); 56057c478bd9Sstevel@tonic-gate atomic_add_long(&retired_pgcnt, 1); 56067c478bd9Sstevel@tonic-gate } 56077c478bd9Sstevel@tonic-gate 56087c478bd9Sstevel@tonic-gate static void 56097c478bd9Sstevel@tonic-gate retired_page_removed(page_t *pp) 56107c478bd9Sstevel@tonic-gate { 56117c478bd9Sstevel@tonic-gate ASSERT(pp); 56127c478bd9Sstevel@tonic-gate ASSERT(page_isretired(pp)); 56137c478bd9Sstevel@tonic-gate ASSERT(retired_pgcnt > 0); 56147c478bd9Sstevel@tonic-gate 56157c478bd9Sstevel@tonic-gate page_clrtoxic(pp); 56167c478bd9Sstevel@tonic-gate atomic_add_long(&retired_pgcnt, -1); 56177c478bd9Sstevel@tonic-gate } 56187c478bd9Sstevel@tonic-gate 56197c478bd9Sstevel@tonic-gate 56207c478bd9Sstevel@tonic-gate static int 56217c478bd9Sstevel@tonic-gate pages_retired_limit_exceeded() 56227c478bd9Sstevel@tonic-gate { 56237c478bd9Sstevel@tonic-gate pgcnt_t retired_max; 56247c478bd9Sstevel@tonic-gate 56257c478bd9Sstevel@tonic-gate /* 56267c478bd9Sstevel@tonic-gate * If the percentage is zero or is not set correctly, 56277c478bd9Sstevel@tonic-gate * return TRUE so that pages are not retired. 56287c478bd9Sstevel@tonic-gate */ 56297c478bd9Sstevel@tonic-gate if (max_pages_retired_bps <= 0 || 56307c478bd9Sstevel@tonic-gate max_pages_retired_bps >= 10000) 56317c478bd9Sstevel@tonic-gate return (1); 56327c478bd9Sstevel@tonic-gate 56337c478bd9Sstevel@tonic-gate /* 56347c478bd9Sstevel@tonic-gate * Calculate the maximum number of pages allowed to 56357c478bd9Sstevel@tonic-gate * be retired as a percentage of total physical memory 56367c478bd9Sstevel@tonic-gate * (Remember that we are using basis points, hence the 10000.) 56377c478bd9Sstevel@tonic-gate */ 56387c478bd9Sstevel@tonic-gate retired_max = (physmem * max_pages_retired_bps) / 10000; 56397c478bd9Sstevel@tonic-gate 56407c478bd9Sstevel@tonic-gate /* 56417c478bd9Sstevel@tonic-gate * return 'TRUE' if we have already retired more 56427c478bd9Sstevel@tonic-gate * than the legal limit 56437c478bd9Sstevel@tonic-gate */ 56447c478bd9Sstevel@tonic-gate return (retired_pgcnt >= retired_max); 56457c478bd9Sstevel@tonic-gate } 56467c478bd9Sstevel@tonic-gate 56477c478bd9Sstevel@tonic-gate #define PAGE_RETIRE_SELOCK 0 56487c478bd9Sstevel@tonic-gate #define PAGE_RETIRE_NORECLAIM 1 56497c478bd9Sstevel@tonic-gate #define PAGE_RETIRE_LOCKED 2 56507c478bd9Sstevel@tonic-gate #define PAGE_RETIRE_COW 3 56517c478bd9Sstevel@tonic-gate #define PAGE_RETIRE_DIRTY 4 56527c478bd9Sstevel@tonic-gate #define PAGE_RETIRE_LPAGE 5 56537c478bd9Sstevel@tonic-gate #define PAGE_RETIRE_SUCCESS 6 56547c478bd9Sstevel@tonic-gate #define PAGE_RETIRE_LIMIT 7 56557c478bd9Sstevel@tonic-gate #define PAGE_RETIRE_NCODES 8 56567c478bd9Sstevel@tonic-gate 56577c478bd9Sstevel@tonic-gate typedef struct page_retire_op { 56587c478bd9Sstevel@tonic-gate int pr_count; 56597c478bd9Sstevel@tonic-gate short pr_unlock; 56607c478bd9Sstevel@tonic-gate short pr_retval; 56617c478bd9Sstevel@tonic-gate char *pr_message; 56627c478bd9Sstevel@tonic-gate } page_retire_op_t; 56637c478bd9Sstevel@tonic-gate 56647c478bd9Sstevel@tonic-gate page_retire_op_t page_retire_ops[PAGE_RETIRE_NCODES] = { 56657c478bd9Sstevel@tonic-gate { 0, 0, -1, "cannot lock page" }, 56667c478bd9Sstevel@tonic-gate { 0, 0, -1, "cannot reclaim cached page" }, 56677c478bd9Sstevel@tonic-gate { 0, 1, -1, "page is locked" }, 56687c478bd9Sstevel@tonic-gate { 0, 1, -1, "copy-on-write page" }, 56697c478bd9Sstevel@tonic-gate { 0, 1, -1, "page is dirty" }, 56707c478bd9Sstevel@tonic-gate { 0, 1, -1, "cannot demote large page" }, 56717c478bd9Sstevel@tonic-gate { 0, 0, 0, "page successfully retired" }, 56727c478bd9Sstevel@tonic-gate { 0, 0, -1, "excess pages retired already" }, 56737c478bd9Sstevel@tonic-gate }; 56747c478bd9Sstevel@tonic-gate 56757c478bd9Sstevel@tonic-gate static int 56767c478bd9Sstevel@tonic-gate page_retire_done(page_t *pp, int code) 56777c478bd9Sstevel@tonic-gate { 56787c478bd9Sstevel@tonic-gate page_retire_op_t *prop = &page_retire_ops[code]; 56797c478bd9Sstevel@tonic-gate 56807c478bd9Sstevel@tonic-gate prop->pr_count++; 56817c478bd9Sstevel@tonic-gate 56827c478bd9Sstevel@tonic-gate if (prop->pr_unlock) 56837c478bd9Sstevel@tonic-gate page_unlock(pp); 56847c478bd9Sstevel@tonic-gate 56857c478bd9Sstevel@tonic-gate if (page_retire_messages > 1) { 56867c478bd9Sstevel@tonic-gate printf("page_retire(%p) pfn 0x%lx %s: %s\n", 56877c478bd9Sstevel@tonic-gate (void *)pp, page_pptonum(pp), 56887c478bd9Sstevel@tonic-gate prop->pr_retval == -1 ? "failed" : "succeeded", 56897c478bd9Sstevel@tonic-gate prop->pr_message); 56907c478bd9Sstevel@tonic-gate } 56917c478bd9Sstevel@tonic-gate 56927c478bd9Sstevel@tonic-gate return (prop->pr_retval); 56937c478bd9Sstevel@tonic-gate } 56947c478bd9Sstevel@tonic-gate 56957c478bd9Sstevel@tonic-gate int 56967c478bd9Sstevel@tonic-gate page_retire(page_t *pp, uchar_t flag) 56977c478bd9Sstevel@tonic-gate { 56987c478bd9Sstevel@tonic-gate uint64_t pa = ptob((uint64_t)page_pptonum(pp)); 56997c478bd9Sstevel@tonic-gate 57007c478bd9Sstevel@tonic-gate ASSERT(flag == PAGE_IS_FAILING || flag == PAGE_IS_TOXIC); 57017c478bd9Sstevel@tonic-gate 57027c478bd9Sstevel@tonic-gate /* 57037c478bd9Sstevel@tonic-gate * DR operations change the association between a page_t 57047c478bd9Sstevel@tonic-gate * and the physical page it represents. Check if the 57057c478bd9Sstevel@tonic-gate * page is still bad. 57067c478bd9Sstevel@tonic-gate */ 57077c478bd9Sstevel@tonic-gate if (!page_isfaulty(pp)) { 57087c478bd9Sstevel@tonic-gate page_clrtoxic(pp); 57097c478bd9Sstevel@tonic-gate return (page_retire_done(pp, PAGE_RETIRE_SUCCESS)); 57107c478bd9Sstevel@tonic-gate } 57117c478bd9Sstevel@tonic-gate 57127c478bd9Sstevel@tonic-gate /* 57137c478bd9Sstevel@tonic-gate * We set the flag here so that even if we fail due 57147c478bd9Sstevel@tonic-gate * to exceeding the limit for retired pages, the 57157c478bd9Sstevel@tonic-gate * page will still be checked and either cleared 57167c478bd9Sstevel@tonic-gate * or retired in page_free(). 57177c478bd9Sstevel@tonic-gate */ 57187c478bd9Sstevel@tonic-gate page_settoxic(pp, flag); 57197c478bd9Sstevel@tonic-gate 57207c478bd9Sstevel@tonic-gate if (flag == PAGE_IS_TOXIC) { 57217c478bd9Sstevel@tonic-gate if (page_retire_messages) { 57227c478bd9Sstevel@tonic-gate cmn_err(CE_NOTE, "Scheduling clearing of error on" 57237c478bd9Sstevel@tonic-gate " page 0x%08x.%08x", 57247c478bd9Sstevel@tonic-gate (uint32_t)(pa >> 32), (uint32_t)pa); 57257c478bd9Sstevel@tonic-gate } 57267c478bd9Sstevel@tonic-gate 57277c478bd9Sstevel@tonic-gate } else { /* PAGE_IS_FAILING */ 57287c478bd9Sstevel@tonic-gate if (pages_retired_limit_exceeded()) { 57297c478bd9Sstevel@tonic-gate /* 57307c478bd9Sstevel@tonic-gate * Return as we have already exceeded the 57317c478bd9Sstevel@tonic-gate * maximum number of pages allowed to be 57327c478bd9Sstevel@tonic-gate * retired 57337c478bd9Sstevel@tonic-gate */ 57347c478bd9Sstevel@tonic-gate return (page_retire_done(pp, PAGE_RETIRE_LIMIT)); 57357c478bd9Sstevel@tonic-gate } 57367c478bd9Sstevel@tonic-gate 57377c478bd9Sstevel@tonic-gate if (page_retire_messages) { 57387c478bd9Sstevel@tonic-gate cmn_err(CE_NOTE, "Scheduling removal of " 57397c478bd9Sstevel@tonic-gate "page 0x%08x.%08x", 57407c478bd9Sstevel@tonic-gate (uint32_t)(pa >> 32), (uint32_t)pa); 57417c478bd9Sstevel@tonic-gate } 57427c478bd9Sstevel@tonic-gate } 57437c478bd9Sstevel@tonic-gate 57447c478bd9Sstevel@tonic-gate if (PAGE_LOCKED(pp) || !page_trylock(pp, SE_EXCL)) 57457c478bd9Sstevel@tonic-gate return (page_retire_done(pp, PAGE_RETIRE_SELOCK)); 57467c478bd9Sstevel@tonic-gate 57477c478bd9Sstevel@tonic-gate /* 57487c478bd9Sstevel@tonic-gate * If this is a large page we first try and demote it 57497c478bd9Sstevel@tonic-gate * to PAGESIZE pages and then dispose of the toxic page. 57507c478bd9Sstevel@tonic-gate * On failure we will let the page free/destroy 57517c478bd9Sstevel@tonic-gate * code handle it later since this is a mapped page. 57527c478bd9Sstevel@tonic-gate * Note that free large pages can always be demoted. 57537c478bd9Sstevel@tonic-gate * 57547c478bd9Sstevel@tonic-gate */ 57557c478bd9Sstevel@tonic-gate if (pp->p_szc != 0) { 57567c478bd9Sstevel@tonic-gate if (PP_ISFREE(pp)) 57577c478bd9Sstevel@tonic-gate (void) page_demote_free_pages(pp); 57587c478bd9Sstevel@tonic-gate else 57597c478bd9Sstevel@tonic-gate (void) page_try_demote_pages(pp); 57607c478bd9Sstevel@tonic-gate 57617c478bd9Sstevel@tonic-gate if (pp->p_szc != 0) 57627c478bd9Sstevel@tonic-gate return (page_retire_done(pp, PAGE_RETIRE_LPAGE)); 57637c478bd9Sstevel@tonic-gate } 57647c478bd9Sstevel@tonic-gate 57657c478bd9Sstevel@tonic-gate if (PP_ISFREE(pp)) { 57667c478bd9Sstevel@tonic-gate if (!page_reclaim(pp, NULL)) 57677c478bd9Sstevel@tonic-gate return (page_retire_done(pp, PAGE_RETIRE_NORECLAIM)); 57687c478bd9Sstevel@tonic-gate /*LINTED: constant in conditional context*/ 57697c478bd9Sstevel@tonic-gate VN_DISPOSE(pp, pp->p_vnode ? B_INVAL : B_FREE, 0, kcred) 57707c478bd9Sstevel@tonic-gate return (page_retire_done(pp, PAGE_RETIRE_SUCCESS)); 57717c478bd9Sstevel@tonic-gate } 57727c478bd9Sstevel@tonic-gate 57737c478bd9Sstevel@tonic-gate if (pp->p_lckcnt != 0) 57747c478bd9Sstevel@tonic-gate return (page_retire_done(pp, PAGE_RETIRE_LOCKED)); 57757c478bd9Sstevel@tonic-gate 57767c478bd9Sstevel@tonic-gate if (pp->p_cowcnt != 0) 57777c478bd9Sstevel@tonic-gate return (page_retire_done(pp, PAGE_RETIRE_COW)); 57787c478bd9Sstevel@tonic-gate 57797c478bd9Sstevel@tonic-gate /* 57807c478bd9Sstevel@tonic-gate * Unload all translations to this page. No new translations 57817c478bd9Sstevel@tonic-gate * can be created while we hold the exclusive lock on the page. 57827c478bd9Sstevel@tonic-gate */ 57837c478bd9Sstevel@tonic-gate (void) hat_pageunload(pp, HAT_FORCE_PGUNLOAD); 57847c478bd9Sstevel@tonic-gate 57857c478bd9Sstevel@tonic-gate if (hat_ismod(pp)) 57867c478bd9Sstevel@tonic-gate return (page_retire_done(pp, PAGE_RETIRE_DIRTY)); 57877c478bd9Sstevel@tonic-gate 57887c478bd9Sstevel@tonic-gate /*LINTED: constant in conditional context*/ 57897c478bd9Sstevel@tonic-gate VN_DISPOSE(pp, B_INVAL, 0, kcred); 57907c478bd9Sstevel@tonic-gate 57917c478bd9Sstevel@tonic-gate return (page_retire_done(pp, PAGE_RETIRE_SUCCESS)); 57927c478bd9Sstevel@tonic-gate } 57937c478bd9Sstevel@tonic-gate 57947c478bd9Sstevel@tonic-gate /* 57957c478bd9Sstevel@tonic-gate * Mark any existing pages for migration in the given range 57967c478bd9Sstevel@tonic-gate */ 57977c478bd9Sstevel@tonic-gate void 57987c478bd9Sstevel@tonic-gate page_mark_migrate(struct seg *seg, caddr_t addr, size_t len, 57997c478bd9Sstevel@tonic-gate struct anon_map *amp, ulong_t anon_index, vnode_t *vp, 58007c478bd9Sstevel@tonic-gate u_offset_t vnoff, int rflag) 58017c478bd9Sstevel@tonic-gate { 58027c478bd9Sstevel@tonic-gate struct anon *ap; 58037c478bd9Sstevel@tonic-gate vnode_t *curvp; 58047c478bd9Sstevel@tonic-gate lgrp_t *from; 58057c478bd9Sstevel@tonic-gate pgcnt_t i; 58067c478bd9Sstevel@tonic-gate pgcnt_t nlocked; 58077c478bd9Sstevel@tonic-gate u_offset_t off; 58087c478bd9Sstevel@tonic-gate pfn_t pfn; 58097c478bd9Sstevel@tonic-gate size_t pgsz; 58107c478bd9Sstevel@tonic-gate size_t segpgsz; 58117c478bd9Sstevel@tonic-gate pgcnt_t pages; 58127c478bd9Sstevel@tonic-gate uint_t pszc; 58137c478bd9Sstevel@tonic-gate page_t **ppa; 58147c478bd9Sstevel@tonic-gate pgcnt_t ppa_nentries; 58157c478bd9Sstevel@tonic-gate page_t *pp; 58167c478bd9Sstevel@tonic-gate caddr_t va; 58177c478bd9Sstevel@tonic-gate ulong_t an_idx; 58187c478bd9Sstevel@tonic-gate anon_sync_obj_t cookie; 58197c478bd9Sstevel@tonic-gate 58207c478bd9Sstevel@tonic-gate ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock)); 58217c478bd9Sstevel@tonic-gate 58227c478bd9Sstevel@tonic-gate /* 58237c478bd9Sstevel@tonic-gate * Don't do anything if don't need to do lgroup optimizations 58247c478bd9Sstevel@tonic-gate * on this system 58257c478bd9Sstevel@tonic-gate */ 58267c478bd9Sstevel@tonic-gate if (!lgrp_optimizations()) 58277c478bd9Sstevel@tonic-gate return; 58287c478bd9Sstevel@tonic-gate 58297c478bd9Sstevel@tonic-gate /* 58307c478bd9Sstevel@tonic-gate * Align address and length to (potentially large) page boundary 58317c478bd9Sstevel@tonic-gate */ 58327c478bd9Sstevel@tonic-gate segpgsz = page_get_pagesize(seg->s_szc); 58337c478bd9Sstevel@tonic-gate addr = (caddr_t)P2ALIGN((uintptr_t)addr, segpgsz); 58347c478bd9Sstevel@tonic-gate if (rflag) 58357c478bd9Sstevel@tonic-gate len = P2ROUNDUP(len, segpgsz); 58367c478bd9Sstevel@tonic-gate 58377c478bd9Sstevel@tonic-gate /* 58387c478bd9Sstevel@tonic-gate * Allocate page array to accomodate largest page size 58397c478bd9Sstevel@tonic-gate */ 58407c478bd9Sstevel@tonic-gate pgsz = page_get_pagesize(page_num_pagesizes() - 1); 58417c478bd9Sstevel@tonic-gate ppa_nentries = btop(pgsz); 58427c478bd9Sstevel@tonic-gate ppa = kmem_zalloc(ppa_nentries * sizeof (page_t *), KM_SLEEP); 58437c478bd9Sstevel@tonic-gate 58447c478bd9Sstevel@tonic-gate /* 58457c478bd9Sstevel@tonic-gate * Do one (large) page at a time 58467c478bd9Sstevel@tonic-gate */ 58477c478bd9Sstevel@tonic-gate va = addr; 58487c478bd9Sstevel@tonic-gate while (va < addr + len) { 58497c478bd9Sstevel@tonic-gate /* 58507c478bd9Sstevel@tonic-gate * Lookup (root) page for vnode and offset corresponding to 58517c478bd9Sstevel@tonic-gate * this virtual address 58527c478bd9Sstevel@tonic-gate * Try anonmap first since there may be copy-on-write 58537c478bd9Sstevel@tonic-gate * pages, but initialize vnode pointer and offset using 58547c478bd9Sstevel@tonic-gate * vnode arguments just in case there isn't an amp. 58557c478bd9Sstevel@tonic-gate */ 58567c478bd9Sstevel@tonic-gate curvp = vp; 58577c478bd9Sstevel@tonic-gate off = vnoff + va - seg->s_base; 58587c478bd9Sstevel@tonic-gate if (amp) { 58597c478bd9Sstevel@tonic-gate ANON_LOCK_ENTER(&->a_rwlock, RW_READER); 58607c478bd9Sstevel@tonic-gate an_idx = anon_index + seg_page(seg, va); 58617c478bd9Sstevel@tonic-gate anon_array_enter(amp, an_idx, &cookie); 58627c478bd9Sstevel@tonic-gate ap = anon_get_ptr(amp->ahp, an_idx); 58637c478bd9Sstevel@tonic-gate if (ap) 58647c478bd9Sstevel@tonic-gate swap_xlate(ap, &curvp, &off); 58657c478bd9Sstevel@tonic-gate anon_array_exit(&cookie); 58667c478bd9Sstevel@tonic-gate ANON_LOCK_EXIT(&->a_rwlock); 58677c478bd9Sstevel@tonic-gate } 58687c478bd9Sstevel@tonic-gate 58697c478bd9Sstevel@tonic-gate pp = NULL; 58707c478bd9Sstevel@tonic-gate if (curvp) 58717c478bd9Sstevel@tonic-gate pp = page_lookup(curvp, off, SE_SHARED); 58727c478bd9Sstevel@tonic-gate 58737c478bd9Sstevel@tonic-gate /* 58747c478bd9Sstevel@tonic-gate * If there isn't a page at this virtual address, 58757c478bd9Sstevel@tonic-gate * skip to next page 58767c478bd9Sstevel@tonic-gate */ 58777c478bd9Sstevel@tonic-gate if (pp == NULL) { 58787c478bd9Sstevel@tonic-gate va += PAGESIZE; 58797c478bd9Sstevel@tonic-gate continue; 58807c478bd9Sstevel@tonic-gate } 58817c478bd9Sstevel@tonic-gate 58827c478bd9Sstevel@tonic-gate /* 58837c478bd9Sstevel@tonic-gate * Figure out which lgroup this page is in for kstats 58847c478bd9Sstevel@tonic-gate */ 58857c478bd9Sstevel@tonic-gate pfn = page_pptonum(pp); 58867c478bd9Sstevel@tonic-gate from = lgrp_pfn_to_lgrp(pfn); 58877c478bd9Sstevel@tonic-gate 58887c478bd9Sstevel@tonic-gate /* 58897c478bd9Sstevel@tonic-gate * Get page size, and round up and skip to next page boundary 58907c478bd9Sstevel@tonic-gate * if unaligned address 58917c478bd9Sstevel@tonic-gate */ 58927c478bd9Sstevel@tonic-gate pszc = pp->p_szc; 58937c478bd9Sstevel@tonic-gate pgsz = page_get_pagesize(pszc); 58947c478bd9Sstevel@tonic-gate pages = btop(pgsz); 58957c478bd9Sstevel@tonic-gate if (!IS_P2ALIGNED(va, pgsz) || 58967c478bd9Sstevel@tonic-gate !IS_P2ALIGNED(pfn, pages) || 58977c478bd9Sstevel@tonic-gate pgsz > segpgsz) { 58987c478bd9Sstevel@tonic-gate pgsz = MIN(pgsz, segpgsz); 58997c478bd9Sstevel@tonic-gate page_unlock(pp); 59007c478bd9Sstevel@tonic-gate i = btop(P2END((uintptr_t)va, pgsz) - 59017c478bd9Sstevel@tonic-gate (uintptr_t)va); 59027c478bd9Sstevel@tonic-gate va = (caddr_t)P2END((uintptr_t)va, pgsz); 59037c478bd9Sstevel@tonic-gate lgrp_stat_add(from->lgrp_id, LGRP_PMM_FAIL_PGS, i); 59047c478bd9Sstevel@tonic-gate continue; 59057c478bd9Sstevel@tonic-gate } 59067c478bd9Sstevel@tonic-gate 59077c478bd9Sstevel@tonic-gate /* 59087c478bd9Sstevel@tonic-gate * Upgrade to exclusive lock on page 59097c478bd9Sstevel@tonic-gate */ 59107c478bd9Sstevel@tonic-gate if (!page_tryupgrade(pp)) { 59117c478bd9Sstevel@tonic-gate page_unlock(pp); 59127c478bd9Sstevel@tonic-gate va += pgsz; 59137c478bd9Sstevel@tonic-gate lgrp_stat_add(from->lgrp_id, LGRP_PMM_FAIL_PGS, 59147c478bd9Sstevel@tonic-gate btop(pgsz)); 59157c478bd9Sstevel@tonic-gate continue; 59167c478bd9Sstevel@tonic-gate } 59177c478bd9Sstevel@tonic-gate 59187c478bd9Sstevel@tonic-gate /* 59197c478bd9Sstevel@tonic-gate * Remember pages locked exclusively and how many 59207c478bd9Sstevel@tonic-gate */ 59217c478bd9Sstevel@tonic-gate ppa[0] = pp; 59227c478bd9Sstevel@tonic-gate nlocked = 1; 59237c478bd9Sstevel@tonic-gate 59247c478bd9Sstevel@tonic-gate /* 59257c478bd9Sstevel@tonic-gate * Lock constituent pages if this is large page 59267c478bd9Sstevel@tonic-gate */ 59277c478bd9Sstevel@tonic-gate if (pages > 1) { 59287c478bd9Sstevel@tonic-gate /* 59297c478bd9Sstevel@tonic-gate * Lock all constituents except root page, since it 59307c478bd9Sstevel@tonic-gate * should be locked already. 59317c478bd9Sstevel@tonic-gate */ 59327c478bd9Sstevel@tonic-gate for (i = 1; i < pages; i++) { 5933*affbd3ccSkchow pp++; 59347c478bd9Sstevel@tonic-gate if (!page_trylock(pp, SE_EXCL)) { 59357c478bd9Sstevel@tonic-gate break; 59367c478bd9Sstevel@tonic-gate } 59377c478bd9Sstevel@tonic-gate if (PP_ISFREE(pp) || 59387c478bd9Sstevel@tonic-gate pp->p_szc != pszc) { 59397c478bd9Sstevel@tonic-gate /* 59407c478bd9Sstevel@tonic-gate * hat_page_demote() raced in with us. 59417c478bd9Sstevel@tonic-gate */ 59427c478bd9Sstevel@tonic-gate ASSERT(!IS_SWAPFSVP(curvp)); 59437c478bd9Sstevel@tonic-gate page_unlock(pp); 59447c478bd9Sstevel@tonic-gate break; 59457c478bd9Sstevel@tonic-gate } 59467c478bd9Sstevel@tonic-gate ppa[nlocked] = pp; 59477c478bd9Sstevel@tonic-gate nlocked++; 59487c478bd9Sstevel@tonic-gate } 59497c478bd9Sstevel@tonic-gate } 59507c478bd9Sstevel@tonic-gate 59517c478bd9Sstevel@tonic-gate /* 59527c478bd9Sstevel@tonic-gate * If all constituent pages couldn't be locked, 59537c478bd9Sstevel@tonic-gate * unlock pages locked so far and skip to next page. 59547c478bd9Sstevel@tonic-gate */ 59557c478bd9Sstevel@tonic-gate if (nlocked != pages) { 59567c478bd9Sstevel@tonic-gate for (i = 0; i < nlocked; i++) 59577c478bd9Sstevel@tonic-gate page_unlock(ppa[i]); 59587c478bd9Sstevel@tonic-gate va += pgsz; 59597c478bd9Sstevel@tonic-gate lgrp_stat_add(from->lgrp_id, LGRP_PMM_FAIL_PGS, 59607c478bd9Sstevel@tonic-gate btop(pgsz)); 59617c478bd9Sstevel@tonic-gate continue; 59627c478bd9Sstevel@tonic-gate } 59637c478bd9Sstevel@tonic-gate 59647c478bd9Sstevel@tonic-gate /* 59657c478bd9Sstevel@tonic-gate * hat_page_demote() can no longer happen 59667c478bd9Sstevel@tonic-gate * since last cons page had the right p_szc after 59677c478bd9Sstevel@tonic-gate * all cons pages were locked. all cons pages 59687c478bd9Sstevel@tonic-gate * should now have the same p_szc. 59697c478bd9Sstevel@tonic-gate */ 59707c478bd9Sstevel@tonic-gate 59717c478bd9Sstevel@tonic-gate /* 59727c478bd9Sstevel@tonic-gate * All constituent pages locked successfully, so mark 59737c478bd9Sstevel@tonic-gate * large page for migration and unload the mappings of 59747c478bd9Sstevel@tonic-gate * constituent pages, so a fault will occur on any part of the 59757c478bd9Sstevel@tonic-gate * large page 59767c478bd9Sstevel@tonic-gate */ 59777c478bd9Sstevel@tonic-gate PP_SETMIGRATE(ppa[0]); 59787c478bd9Sstevel@tonic-gate for (i = 0; i < nlocked; i++) { 59797c478bd9Sstevel@tonic-gate pp = ppa[i]; 59807c478bd9Sstevel@tonic-gate (void) hat_pageunload(pp, HAT_FORCE_PGUNLOAD); 59817c478bd9Sstevel@tonic-gate ASSERT(hat_page_getshare(pp) == 0); 59827c478bd9Sstevel@tonic-gate page_unlock(pp); 59837c478bd9Sstevel@tonic-gate } 59847c478bd9Sstevel@tonic-gate lgrp_stat_add(from->lgrp_id, LGRP_PMM_PGS, nlocked); 59857c478bd9Sstevel@tonic-gate 59867c478bd9Sstevel@tonic-gate va += pgsz; 59877c478bd9Sstevel@tonic-gate } 59887c478bd9Sstevel@tonic-gate kmem_free(ppa, ppa_nentries * sizeof (page_t *)); 59897c478bd9Sstevel@tonic-gate } 59907c478bd9Sstevel@tonic-gate 59917c478bd9Sstevel@tonic-gate /* 59927c478bd9Sstevel@tonic-gate * Migrate any pages that have been marked for migration in the given range 59937c478bd9Sstevel@tonic-gate */ 59947c478bd9Sstevel@tonic-gate void 59957c478bd9Sstevel@tonic-gate page_migrate( 59967c478bd9Sstevel@tonic-gate struct seg *seg, 59977c478bd9Sstevel@tonic-gate caddr_t addr, 59987c478bd9Sstevel@tonic-gate page_t **ppa, 59997c478bd9Sstevel@tonic-gate pgcnt_t npages) 60007c478bd9Sstevel@tonic-gate { 60017c478bd9Sstevel@tonic-gate lgrp_t *from; 60027c478bd9Sstevel@tonic-gate lgrp_t *to; 60037c478bd9Sstevel@tonic-gate page_t *newpp; 60047c478bd9Sstevel@tonic-gate page_t *pp; 60057c478bd9Sstevel@tonic-gate pfn_t pfn; 60067c478bd9Sstevel@tonic-gate size_t pgsz; 60077c478bd9Sstevel@tonic-gate spgcnt_t page_cnt; 60087c478bd9Sstevel@tonic-gate spgcnt_t i; 60097c478bd9Sstevel@tonic-gate uint_t pszc; 60107c478bd9Sstevel@tonic-gate 60117c478bd9Sstevel@tonic-gate ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as, &seg->s_as->a_lock)); 60127c478bd9Sstevel@tonic-gate 60137c478bd9Sstevel@tonic-gate while (npages > 0) { 60147c478bd9Sstevel@tonic-gate pp = *ppa; 60157c478bd9Sstevel@tonic-gate pszc = pp->p_szc; 60167c478bd9Sstevel@tonic-gate pgsz = page_get_pagesize(pszc); 60177c478bd9Sstevel@tonic-gate page_cnt = btop(pgsz); 60187c478bd9Sstevel@tonic-gate 60197c478bd9Sstevel@tonic-gate /* 60207c478bd9Sstevel@tonic-gate * Check to see whether this page is marked for migration 60217c478bd9Sstevel@tonic-gate * 60227c478bd9Sstevel@tonic-gate * Assume that root page of large page is marked for 60237c478bd9Sstevel@tonic-gate * migration and none of the other constituent pages 60247c478bd9Sstevel@tonic-gate * are marked. This really simplifies clearing the 60257c478bd9Sstevel@tonic-gate * migrate bit by not having to clear it from each 60267c478bd9Sstevel@tonic-gate * constituent page. 60277c478bd9Sstevel@tonic-gate * 60287c478bd9Sstevel@tonic-gate * note we don't want to relocate an entire large page if 60297c478bd9Sstevel@tonic-gate * someone is only using one subpage. 60307c478bd9Sstevel@tonic-gate */ 60317c478bd9Sstevel@tonic-gate if (npages < page_cnt) 60327c478bd9Sstevel@tonic-gate break; 60337c478bd9Sstevel@tonic-gate 60347c478bd9Sstevel@tonic-gate /* 60357c478bd9Sstevel@tonic-gate * Is it marked for migration? 60367c478bd9Sstevel@tonic-gate */ 60377c478bd9Sstevel@tonic-gate if (!PP_ISMIGRATE(pp)) 60387c478bd9Sstevel@tonic-gate goto next; 60397c478bd9Sstevel@tonic-gate 60407c478bd9Sstevel@tonic-gate /* 60417c478bd9Sstevel@tonic-gate * Determine lgroups that page is being migrated between 60427c478bd9Sstevel@tonic-gate */ 60437c478bd9Sstevel@tonic-gate pfn = page_pptonum(pp); 60447c478bd9Sstevel@tonic-gate if (!IS_P2ALIGNED(pfn, page_cnt)) { 60457c478bd9Sstevel@tonic-gate break; 60467c478bd9Sstevel@tonic-gate } 60477c478bd9Sstevel@tonic-gate from = lgrp_pfn_to_lgrp(pfn); 60487c478bd9Sstevel@tonic-gate to = lgrp_mem_choose(seg, addr, pgsz); 60497c478bd9Sstevel@tonic-gate 60507c478bd9Sstevel@tonic-gate /* 60517c478bd9Sstevel@tonic-gate * Check to see whether we are trying to migrate page to lgroup 60527c478bd9Sstevel@tonic-gate * where it is allocated already 60537c478bd9Sstevel@tonic-gate */ 60547c478bd9Sstevel@tonic-gate if (to == from) { 60557c478bd9Sstevel@tonic-gate PP_CLRMIGRATE(pp); 60567c478bd9Sstevel@tonic-gate goto next; 60577c478bd9Sstevel@tonic-gate } 60587c478bd9Sstevel@tonic-gate 60597c478bd9Sstevel@tonic-gate /* 60607c478bd9Sstevel@tonic-gate * Need to get exclusive lock's to migrate 60617c478bd9Sstevel@tonic-gate */ 60627c478bd9Sstevel@tonic-gate for (i = 0; i < page_cnt; i++) { 60637c478bd9Sstevel@tonic-gate ASSERT(PAGE_LOCKED(ppa[i])); 60647c478bd9Sstevel@tonic-gate if (page_pptonum(ppa[i]) != pfn + i || 60657c478bd9Sstevel@tonic-gate ppa[i]->p_szc != pszc) { 60667c478bd9Sstevel@tonic-gate break; 60677c478bd9Sstevel@tonic-gate } 60687c478bd9Sstevel@tonic-gate if (!page_tryupgrade(ppa[i])) { 60697c478bd9Sstevel@tonic-gate lgrp_stat_add(from->lgrp_id, 60707c478bd9Sstevel@tonic-gate LGRP_PM_FAIL_LOCK_PGS, 60717c478bd9Sstevel@tonic-gate page_cnt); 60727c478bd9Sstevel@tonic-gate break; 60737c478bd9Sstevel@tonic-gate } 60747c478bd9Sstevel@tonic-gate } 60757c478bd9Sstevel@tonic-gate if (i != page_cnt) { 60767c478bd9Sstevel@tonic-gate while (--i != -1) { 60777c478bd9Sstevel@tonic-gate page_downgrade(ppa[i]); 60787c478bd9Sstevel@tonic-gate } 60797c478bd9Sstevel@tonic-gate goto next; 60807c478bd9Sstevel@tonic-gate } 60817c478bd9Sstevel@tonic-gate 60827c478bd9Sstevel@tonic-gate (void) page_create_wait(page_cnt, PG_WAIT); 60837c478bd9Sstevel@tonic-gate newpp = page_get_replacement_page(pp, to, PGR_SAMESZC); 60847c478bd9Sstevel@tonic-gate if (newpp == NULL) { 60857c478bd9Sstevel@tonic-gate page_create_putback(page_cnt); 60867c478bd9Sstevel@tonic-gate for (i = 0; i < page_cnt; i++) { 60877c478bd9Sstevel@tonic-gate page_downgrade(ppa[i]); 60887c478bd9Sstevel@tonic-gate } 60897c478bd9Sstevel@tonic-gate lgrp_stat_add(to->lgrp_id, LGRP_PM_FAIL_ALLOC_PGS, 60907c478bd9Sstevel@tonic-gate page_cnt); 60917c478bd9Sstevel@tonic-gate goto next; 60927c478bd9Sstevel@tonic-gate } 60937c478bd9Sstevel@tonic-gate ASSERT(newpp->p_szc == pszc); 60947c478bd9Sstevel@tonic-gate /* 60957c478bd9Sstevel@tonic-gate * Clear migrate bit and relocate page 60967c478bd9Sstevel@tonic-gate */ 60977c478bd9Sstevel@tonic-gate PP_CLRMIGRATE(pp); 60987c478bd9Sstevel@tonic-gate if (page_relocate(&pp, &newpp, 0, 1, &page_cnt, to)) { 60997c478bd9Sstevel@tonic-gate panic("page_migrate: page_relocate failed"); 61007c478bd9Sstevel@tonic-gate } 61017c478bd9Sstevel@tonic-gate ASSERT(page_cnt * PAGESIZE == pgsz); 61027c478bd9Sstevel@tonic-gate 61037c478bd9Sstevel@tonic-gate /* 61047c478bd9Sstevel@tonic-gate * Keep stats for number of pages migrated from and to 61057c478bd9Sstevel@tonic-gate * each lgroup 61067c478bd9Sstevel@tonic-gate */ 61077c478bd9Sstevel@tonic-gate lgrp_stat_add(from->lgrp_id, LGRP_PM_SRC_PGS, page_cnt); 61087c478bd9Sstevel@tonic-gate lgrp_stat_add(to->lgrp_id, LGRP_PM_DEST_PGS, page_cnt); 61097c478bd9Sstevel@tonic-gate /* 61107c478bd9Sstevel@tonic-gate * update the page_t array we were passed in and 61117c478bd9Sstevel@tonic-gate * unlink constituent pages of a large page. 61127c478bd9Sstevel@tonic-gate */ 61137c478bd9Sstevel@tonic-gate for (i = 0; i < page_cnt; ++i, ++pp) { 61147c478bd9Sstevel@tonic-gate ASSERT(PAGE_EXCL(newpp)); 61157c478bd9Sstevel@tonic-gate ASSERT(newpp->p_szc == pszc); 61167c478bd9Sstevel@tonic-gate ppa[i] = newpp; 61177c478bd9Sstevel@tonic-gate pp = newpp; 61187c478bd9Sstevel@tonic-gate page_sub(&newpp, pp); 61197c478bd9Sstevel@tonic-gate page_downgrade(pp); 61207c478bd9Sstevel@tonic-gate } 61217c478bd9Sstevel@tonic-gate ASSERT(newpp == NULL); 61227c478bd9Sstevel@tonic-gate next: 61237c478bd9Sstevel@tonic-gate addr += pgsz; 61247c478bd9Sstevel@tonic-gate ppa += page_cnt; 61257c478bd9Sstevel@tonic-gate npages -= page_cnt; 61267c478bd9Sstevel@tonic-gate } 61277c478bd9Sstevel@tonic-gate } 61287c478bd9Sstevel@tonic-gate 61297c478bd9Sstevel@tonic-gate /* 61307c478bd9Sstevel@tonic-gate * initialize the vnode for retired pages 61317c478bd9Sstevel@tonic-gate */ 61327c478bd9Sstevel@tonic-gate static void 61337c478bd9Sstevel@tonic-gate page_retired_init(void) 61347c478bd9Sstevel@tonic-gate { 61357c478bd9Sstevel@tonic-gate vn_setops(&retired_ppages, &retired_vnodeops); 61367c478bd9Sstevel@tonic-gate } 61377c478bd9Sstevel@tonic-gate 61387c478bd9Sstevel@tonic-gate /* ARGSUSED */ 61397c478bd9Sstevel@tonic-gate static void 61407c478bd9Sstevel@tonic-gate retired_dispose(vnode_t *vp, page_t *pp, int flag, int dn, cred_t *cr) 61417c478bd9Sstevel@tonic-gate { 61427c478bd9Sstevel@tonic-gate panic("retired_dispose invoked"); 61437c478bd9Sstevel@tonic-gate } 61447c478bd9Sstevel@tonic-gate 61457c478bd9Sstevel@tonic-gate /* ARGSUSED */ 61467c478bd9Sstevel@tonic-gate static void 61477c478bd9Sstevel@tonic-gate retired_inactive(vnode_t *vp, cred_t *cr) 61487c478bd9Sstevel@tonic-gate {} 61497c478bd9Sstevel@tonic-gate 61507c478bd9Sstevel@tonic-gate void 61517c478bd9Sstevel@tonic-gate page_unretire_pages(void) 61527c478bd9Sstevel@tonic-gate { 61537c478bd9Sstevel@tonic-gate page_t *pp; 61547c478bd9Sstevel@tonic-gate kmutex_t *vphm; 61557c478bd9Sstevel@tonic-gate vnode_t *vp; 61567c478bd9Sstevel@tonic-gate page_t *rpages[UNRETIRE_PAGES]; 61577c478bd9Sstevel@tonic-gate pgcnt_t i, npages, rmem; 61587c478bd9Sstevel@tonic-gate uint64_t pa; 61597c478bd9Sstevel@tonic-gate 61607c478bd9Sstevel@tonic-gate rmem = 0; 61617c478bd9Sstevel@tonic-gate 61627c478bd9Sstevel@tonic-gate for (;;) { 61637c478bd9Sstevel@tonic-gate /* 61647c478bd9Sstevel@tonic-gate * We do this in 2 steps: 61657c478bd9Sstevel@tonic-gate * 61667c478bd9Sstevel@tonic-gate * 1. We walk the retired pages list and collect a list of 61677c478bd9Sstevel@tonic-gate * pages that have the toxic field cleared. 61687c478bd9Sstevel@tonic-gate * 61697c478bd9Sstevel@tonic-gate * 2. We iterate through the page list and unretire each one. 61707c478bd9Sstevel@tonic-gate * 61717c478bd9Sstevel@tonic-gate * We have to do it in two steps on account of the mutexes that 61727c478bd9Sstevel@tonic-gate * we need to acquire. 61737c478bd9Sstevel@tonic-gate */ 61747c478bd9Sstevel@tonic-gate 61757c478bd9Sstevel@tonic-gate vp = &retired_ppages; 61767c478bd9Sstevel@tonic-gate vphm = page_vnode_mutex(vp); 61777c478bd9Sstevel@tonic-gate mutex_enter(vphm); 61787c478bd9Sstevel@tonic-gate 61797c478bd9Sstevel@tonic-gate if ((pp = vp->v_pages) == NULL) { 61807c478bd9Sstevel@tonic-gate mutex_exit(vphm); 61817c478bd9Sstevel@tonic-gate break; 61827c478bd9Sstevel@tonic-gate } 61837c478bd9Sstevel@tonic-gate 61847c478bd9Sstevel@tonic-gate i = 0; 61857c478bd9Sstevel@tonic-gate do { 61867c478bd9Sstevel@tonic-gate ASSERT(pp != NULL); 61877c478bd9Sstevel@tonic-gate ASSERT(pp->p_vnode == vp); 61887c478bd9Sstevel@tonic-gate 61897c478bd9Sstevel@tonic-gate /* 61907c478bd9Sstevel@tonic-gate * DR operations change the association between a page_t 61917c478bd9Sstevel@tonic-gate * and the physical page it represents. Check if the 61927c478bd9Sstevel@tonic-gate * page is still bad. If not, unretire it. 61937c478bd9Sstevel@tonic-gate */ 61947c478bd9Sstevel@tonic-gate if (!page_isfaulty(pp)) 61957c478bd9Sstevel@tonic-gate rpages[i++] = pp; 61967c478bd9Sstevel@tonic-gate 61977c478bd9Sstevel@tonic-gate pp = pp->p_vpnext; 61987c478bd9Sstevel@tonic-gate } while ((pp != vp->v_pages) && (i < UNRETIRE_PAGES)); 61997c478bd9Sstevel@tonic-gate 62007c478bd9Sstevel@tonic-gate mutex_exit(vphm); 62017c478bd9Sstevel@tonic-gate 62027c478bd9Sstevel@tonic-gate npages = i; 62037c478bd9Sstevel@tonic-gate for (i = 0; i < npages; i++) { 62047c478bd9Sstevel@tonic-gate pp = rpages[i]; 62057c478bd9Sstevel@tonic-gate pa = ptob((uint64_t)page_pptonum(pp)); 62067c478bd9Sstevel@tonic-gate 62077c478bd9Sstevel@tonic-gate /* 62087c478bd9Sstevel@tonic-gate * Need to upgrade the shared lock to an exclusive 62097c478bd9Sstevel@tonic-gate * lock in order to hash out the page. 62107c478bd9Sstevel@tonic-gate * 62117c478bd9Sstevel@tonic-gate * The page could have been retired but the page lock 62127c478bd9Sstevel@tonic-gate * may not have been downgraded yet. If so, skip this 62137c478bd9Sstevel@tonic-gate * page. page_free() will call this function after the 62147c478bd9Sstevel@tonic-gate * lock is downgraded. 62157c478bd9Sstevel@tonic-gate */ 62167c478bd9Sstevel@tonic-gate 62177c478bd9Sstevel@tonic-gate if (!PAGE_SHARED(pp) || !page_tryupgrade(pp)) 62187c478bd9Sstevel@tonic-gate continue; 62197c478bd9Sstevel@tonic-gate 62207c478bd9Sstevel@tonic-gate /* 62217c478bd9Sstevel@tonic-gate * Both page_free() and DR call this function. They 62227c478bd9Sstevel@tonic-gate * can potentially call this function at the same 62237c478bd9Sstevel@tonic-gate * time and race with each other. 62247c478bd9Sstevel@tonic-gate */ 62257c478bd9Sstevel@tonic-gate if (!page_isretired(pp) || page_isfaulty(pp)) { 62267c478bd9Sstevel@tonic-gate page_downgrade(pp); 62277c478bd9Sstevel@tonic-gate continue; 62287c478bd9Sstevel@tonic-gate } 62297c478bd9Sstevel@tonic-gate 62307c478bd9Sstevel@tonic-gate cmn_err(CE_NOTE, 62317c478bd9Sstevel@tonic-gate "unretiring retired page 0x%08x.%08x", 62327c478bd9Sstevel@tonic-gate (uint32_t)(pa >> 32), (uint32_t)pa); 62337c478bd9Sstevel@tonic-gate 62347c478bd9Sstevel@tonic-gate /* 62357c478bd9Sstevel@tonic-gate * When a page is removed from the retired pages vnode, 62367c478bd9Sstevel@tonic-gate * its toxic field is also cleared. So, we do not have 62377c478bd9Sstevel@tonic-gate * to do that seperately here. 62387c478bd9Sstevel@tonic-gate */ 62397c478bd9Sstevel@tonic-gate page_hashout(pp, (kmutex_t *)NULL); 62407c478bd9Sstevel@tonic-gate 62417c478bd9Sstevel@tonic-gate /* 62427c478bd9Sstevel@tonic-gate * This is a good page. So, free it. 62437c478bd9Sstevel@tonic-gate */ 62447c478bd9Sstevel@tonic-gate pp->p_vnode = NULL; 62457c478bd9Sstevel@tonic-gate page_free(pp, 1); 62467c478bd9Sstevel@tonic-gate rmem++; 62477c478bd9Sstevel@tonic-gate } 62487c478bd9Sstevel@tonic-gate 62497c478bd9Sstevel@tonic-gate /* 62507c478bd9Sstevel@tonic-gate * If the rpages array was filled up, then there could be more 62517c478bd9Sstevel@tonic-gate * retired pages that are not faulty. We need to iterate 62527c478bd9Sstevel@tonic-gate * again and unretire them. Otherwise, we are done. 62537c478bd9Sstevel@tonic-gate */ 62547c478bd9Sstevel@tonic-gate if (npages < UNRETIRE_PAGES) 62557c478bd9Sstevel@tonic-gate break; 62567c478bd9Sstevel@tonic-gate } 62577c478bd9Sstevel@tonic-gate 62587c478bd9Sstevel@tonic-gate mutex_enter(&freemem_lock); 62597c478bd9Sstevel@tonic-gate availrmem += rmem; 62607c478bd9Sstevel@tonic-gate mutex_exit(&freemem_lock); 62617c478bd9Sstevel@tonic-gate } 62627c478bd9Sstevel@tonic-gate 62637c478bd9Sstevel@tonic-gate ulong_t mem_waiters = 0; 62647c478bd9Sstevel@tonic-gate ulong_t max_count = 20; 62657c478bd9Sstevel@tonic-gate #define MAX_DELAY 0x1ff 62667c478bd9Sstevel@tonic-gate 62677c478bd9Sstevel@tonic-gate /* 62687c478bd9Sstevel@tonic-gate * Check if enough memory is available to proceed. 62697c478bd9Sstevel@tonic-gate * Depending on system configuration and how much memory is 62707c478bd9Sstevel@tonic-gate * reserved for swap we need to check against two variables. 62717c478bd9Sstevel@tonic-gate * e.g. on systems with little physical swap availrmem can be 62727c478bd9Sstevel@tonic-gate * more reliable indicator of how much memory is available. 62737c478bd9Sstevel@tonic-gate * On systems with large phys swap freemem can be better indicator. 62747c478bd9Sstevel@tonic-gate * If freemem drops below threshold level don't return an error 62757c478bd9Sstevel@tonic-gate * immediately but wake up pageout to free memory and block. 62767c478bd9Sstevel@tonic-gate * This is done number of times. If pageout is not able to free 62777c478bd9Sstevel@tonic-gate * memory within certain time return an error. 62787c478bd9Sstevel@tonic-gate * The same applies for availrmem but kmem_reap is used to 62797c478bd9Sstevel@tonic-gate * free memory. 62807c478bd9Sstevel@tonic-gate */ 62817c478bd9Sstevel@tonic-gate int 62827c478bd9Sstevel@tonic-gate page_mem_avail(pgcnt_t npages) 62837c478bd9Sstevel@tonic-gate { 62847c478bd9Sstevel@tonic-gate ulong_t count; 62857c478bd9Sstevel@tonic-gate 62867c478bd9Sstevel@tonic-gate #if defined(__i386) 62877c478bd9Sstevel@tonic-gate if (freemem > desfree + npages && 62887c478bd9Sstevel@tonic-gate availrmem > swapfs_reserve + npages && 62897c478bd9Sstevel@tonic-gate btop(vmem_size(heap_arena, VMEM_FREE)) > tune.t_minarmem + 62907c478bd9Sstevel@tonic-gate npages) 62917c478bd9Sstevel@tonic-gate return (1); 62927c478bd9Sstevel@tonic-gate #else 62937c478bd9Sstevel@tonic-gate if (freemem > desfree + npages && 62947c478bd9Sstevel@tonic-gate availrmem > swapfs_reserve + npages) 62957c478bd9Sstevel@tonic-gate return (1); 62967c478bd9Sstevel@tonic-gate #endif 62977c478bd9Sstevel@tonic-gate 62987c478bd9Sstevel@tonic-gate count = max_count; 62997c478bd9Sstevel@tonic-gate atomic_add_long(&mem_waiters, 1); 63007c478bd9Sstevel@tonic-gate 63017c478bd9Sstevel@tonic-gate while (freemem < desfree + npages && --count) { 63027c478bd9Sstevel@tonic-gate cv_signal(&proc_pageout->p_cv); 63037c478bd9Sstevel@tonic-gate if (delay_sig(hz + (mem_waiters & MAX_DELAY))) { 63047c478bd9Sstevel@tonic-gate atomic_add_long(&mem_waiters, -1); 63057c478bd9Sstevel@tonic-gate return (0); 63067c478bd9Sstevel@tonic-gate } 63077c478bd9Sstevel@tonic-gate } 63087c478bd9Sstevel@tonic-gate if (count == 0) { 63097c478bd9Sstevel@tonic-gate atomic_add_long(&mem_waiters, -1); 63107c478bd9Sstevel@tonic-gate return (0); 63117c478bd9Sstevel@tonic-gate } 63127c478bd9Sstevel@tonic-gate 63137c478bd9Sstevel@tonic-gate count = max_count; 63147c478bd9Sstevel@tonic-gate while (availrmem < swapfs_reserve + npages && --count) { 63157c478bd9Sstevel@tonic-gate kmem_reap(); 63167c478bd9Sstevel@tonic-gate if (delay_sig(hz + (mem_waiters & MAX_DELAY))) { 63177c478bd9Sstevel@tonic-gate atomic_add_long(&mem_waiters, -1); 63187c478bd9Sstevel@tonic-gate return (0); 63197c478bd9Sstevel@tonic-gate } 63207c478bd9Sstevel@tonic-gate } 63217c478bd9Sstevel@tonic-gate atomic_add_long(&mem_waiters, -1); 63227c478bd9Sstevel@tonic-gate if (count == 0) 63237c478bd9Sstevel@tonic-gate return (0); 63247c478bd9Sstevel@tonic-gate 63257c478bd9Sstevel@tonic-gate #if defined(__i386) 63267c478bd9Sstevel@tonic-gate if (btop(vmem_size(heap_arena, VMEM_FREE)) < 63277c478bd9Sstevel@tonic-gate tune.t_minarmem + npages) 63287c478bd9Sstevel@tonic-gate return (0); 63297c478bd9Sstevel@tonic-gate #endif 63307c478bd9Sstevel@tonic-gate return (1); 63317c478bd9Sstevel@tonic-gate } 63327c478bd9Sstevel@tonic-gate 63337c478bd9Sstevel@tonic-gate 63347c478bd9Sstevel@tonic-gate /* 63357c478bd9Sstevel@tonic-gate * Search the memory segments to locate the desired page. Within a 63367c478bd9Sstevel@tonic-gate * segment, pages increase linearly with one page structure per 63377c478bd9Sstevel@tonic-gate * physical page frame (size PAGESIZE). The search begins 63387c478bd9Sstevel@tonic-gate * with the segment that was accessed last, to take advantage of locality. 63397c478bd9Sstevel@tonic-gate * If the hint misses, we start from the beginning of the sorted memseg list 63407c478bd9Sstevel@tonic-gate */ 63417c478bd9Sstevel@tonic-gate 63427c478bd9Sstevel@tonic-gate 63437c478bd9Sstevel@tonic-gate /* 63447c478bd9Sstevel@tonic-gate * Some data structures for pfn to pp lookup. 63457c478bd9Sstevel@tonic-gate */ 63467c478bd9Sstevel@tonic-gate ulong_t mhash_per_slot; 63477c478bd9Sstevel@tonic-gate struct memseg *memseg_hash[N_MEM_SLOTS]; 63487c478bd9Sstevel@tonic-gate 63497c478bd9Sstevel@tonic-gate page_t * 63507c478bd9Sstevel@tonic-gate page_numtopp_nolock(pfn_t pfnum) 63517c478bd9Sstevel@tonic-gate { 63527c478bd9Sstevel@tonic-gate struct memseg *seg; 63537c478bd9Sstevel@tonic-gate page_t *pp; 6354*affbd3ccSkchow vm_cpu_data_t *vc = CPU->cpu_vm_data; 63557c478bd9Sstevel@tonic-gate 6356*affbd3ccSkchow ASSERT(vc != NULL); 63577c478bd9Sstevel@tonic-gate 63587c478bd9Sstevel@tonic-gate MEMSEG_STAT_INCR(nsearch); 63597c478bd9Sstevel@tonic-gate 63607c478bd9Sstevel@tonic-gate /* Try last winner first */ 6361*affbd3ccSkchow if (((seg = vc->vc_pnum_memseg) != NULL) && 63627c478bd9Sstevel@tonic-gate (pfnum >= seg->pages_base) && (pfnum < seg->pages_end)) { 63637c478bd9Sstevel@tonic-gate MEMSEG_STAT_INCR(nlastwon); 63647c478bd9Sstevel@tonic-gate pp = seg->pages + (pfnum - seg->pages_base); 63657c478bd9Sstevel@tonic-gate if (pp->p_pagenum == pfnum) 63667c478bd9Sstevel@tonic-gate return ((page_t *)pp); 63677c478bd9Sstevel@tonic-gate } 63687c478bd9Sstevel@tonic-gate 63697c478bd9Sstevel@tonic-gate /* Else Try hash */ 63707c478bd9Sstevel@tonic-gate if (((seg = memseg_hash[MEMSEG_PFN_HASH(pfnum)]) != NULL) && 63717c478bd9Sstevel@tonic-gate (pfnum >= seg->pages_base) && (pfnum < seg->pages_end)) { 63727c478bd9Sstevel@tonic-gate MEMSEG_STAT_INCR(nhashwon); 6373*affbd3ccSkchow vc->vc_pnum_memseg = seg; 63747c478bd9Sstevel@tonic-gate pp = seg->pages + (pfnum - seg->pages_base); 63757c478bd9Sstevel@tonic-gate if (pp->p_pagenum == pfnum) 63767c478bd9Sstevel@tonic-gate return ((page_t *)pp); 63777c478bd9Sstevel@tonic-gate } 63787c478bd9Sstevel@tonic-gate 63797c478bd9Sstevel@tonic-gate /* Else Brute force */ 63807c478bd9Sstevel@tonic-gate for (seg = memsegs; seg != NULL; seg = seg->next) { 63817c478bd9Sstevel@tonic-gate if (pfnum >= seg->pages_base && pfnum < seg->pages_end) { 6382*affbd3ccSkchow vc->vc_pnum_memseg = seg; 63837c478bd9Sstevel@tonic-gate pp = seg->pages + (pfnum - seg->pages_base); 63847c478bd9Sstevel@tonic-gate return ((page_t *)pp); 63857c478bd9Sstevel@tonic-gate } 63867c478bd9Sstevel@tonic-gate } 6387*affbd3ccSkchow vc->vc_pnum_memseg = NULL; 63887c478bd9Sstevel@tonic-gate MEMSEG_STAT_INCR(nnotfound); 63897c478bd9Sstevel@tonic-gate return ((page_t *)NULL); 63907c478bd9Sstevel@tonic-gate 63917c478bd9Sstevel@tonic-gate } 63927c478bd9Sstevel@tonic-gate 63937c478bd9Sstevel@tonic-gate struct memseg * 63947c478bd9Sstevel@tonic-gate page_numtomemseg_nolock(pfn_t pfnum) 63957c478bd9Sstevel@tonic-gate { 63967c478bd9Sstevel@tonic-gate struct memseg *seg; 63977c478bd9Sstevel@tonic-gate page_t *pp; 63987c478bd9Sstevel@tonic-gate 63997c478bd9Sstevel@tonic-gate /* Try hash */ 64007c478bd9Sstevel@tonic-gate if (((seg = memseg_hash[MEMSEG_PFN_HASH(pfnum)]) != NULL) && 64017c478bd9Sstevel@tonic-gate (pfnum >= seg->pages_base) && (pfnum < seg->pages_end)) { 64027c478bd9Sstevel@tonic-gate pp = seg->pages + (pfnum - seg->pages_base); 64037c478bd9Sstevel@tonic-gate if (pp->p_pagenum == pfnum) 64047c478bd9Sstevel@tonic-gate return (seg); 64057c478bd9Sstevel@tonic-gate } 64067c478bd9Sstevel@tonic-gate 64077c478bd9Sstevel@tonic-gate /* Else Brute force */ 64087c478bd9Sstevel@tonic-gate for (seg = memsegs; seg != NULL; seg = seg->next) { 64097c478bd9Sstevel@tonic-gate if (pfnum >= seg->pages_base && pfnum < seg->pages_end) { 64107c478bd9Sstevel@tonic-gate return (seg); 64117c478bd9Sstevel@tonic-gate } 64127c478bd9Sstevel@tonic-gate } 64137c478bd9Sstevel@tonic-gate return ((struct memseg *)NULL); 64147c478bd9Sstevel@tonic-gate } 64157c478bd9Sstevel@tonic-gate 64167c478bd9Sstevel@tonic-gate /* 64177c478bd9Sstevel@tonic-gate * Given a page and a count return the page struct that is 64187c478bd9Sstevel@tonic-gate * n structs away from the current one in the global page 64197c478bd9Sstevel@tonic-gate * list. 64207c478bd9Sstevel@tonic-gate * 64217c478bd9Sstevel@tonic-gate * This function wraps to the first page upon 64227c478bd9Sstevel@tonic-gate * reaching the end of the memseg list. 64237c478bd9Sstevel@tonic-gate */ 64247c478bd9Sstevel@tonic-gate page_t * 64257c478bd9Sstevel@tonic-gate page_nextn(page_t *pp, ulong_t n) 64267c478bd9Sstevel@tonic-gate { 64277c478bd9Sstevel@tonic-gate struct memseg *seg; 64287c478bd9Sstevel@tonic-gate page_t *ppn; 6429*affbd3ccSkchow vm_cpu_data_t *vc = (vm_cpu_data_t *)CPU->cpu_vm_data; 64307c478bd9Sstevel@tonic-gate 6431*affbd3ccSkchow ASSERT(vc != NULL); 6432*affbd3ccSkchow 6433*affbd3ccSkchow if (((seg = vc->vc_pnext_memseg) == NULL) || 64347c478bd9Sstevel@tonic-gate (seg->pages_base == seg->pages_end) || 64357c478bd9Sstevel@tonic-gate !(pp >= seg->pages && pp < seg->epages)) { 64367c478bd9Sstevel@tonic-gate 64377c478bd9Sstevel@tonic-gate for (seg = memsegs; seg; seg = seg->next) { 64387c478bd9Sstevel@tonic-gate if (pp >= seg->pages && pp < seg->epages) 64397c478bd9Sstevel@tonic-gate break; 64407c478bd9Sstevel@tonic-gate } 64417c478bd9Sstevel@tonic-gate 64427c478bd9Sstevel@tonic-gate if (seg == NULL) { 64437c478bd9Sstevel@tonic-gate /* Memory delete got in, return something valid. */ 64447c478bd9Sstevel@tonic-gate /* TODO: fix me. */ 64457c478bd9Sstevel@tonic-gate seg = memsegs; 64467c478bd9Sstevel@tonic-gate pp = seg->pages; 64477c478bd9Sstevel@tonic-gate } 64487c478bd9Sstevel@tonic-gate } 64497c478bd9Sstevel@tonic-gate 64507c478bd9Sstevel@tonic-gate /* check for wraparound - possible if n is large */ 64517c478bd9Sstevel@tonic-gate while ((ppn = (pp + n)) >= seg->epages || ppn < pp) { 64527c478bd9Sstevel@tonic-gate n -= seg->epages - pp; 64537c478bd9Sstevel@tonic-gate seg = seg->next; 64547c478bd9Sstevel@tonic-gate if (seg == NULL) 64557c478bd9Sstevel@tonic-gate seg = memsegs; 64567c478bd9Sstevel@tonic-gate pp = seg->pages; 64577c478bd9Sstevel@tonic-gate } 6458*affbd3ccSkchow vc->vc_pnext_memseg = seg; 64597c478bd9Sstevel@tonic-gate return (ppn); 64607c478bd9Sstevel@tonic-gate } 64617c478bd9Sstevel@tonic-gate 64627c478bd9Sstevel@tonic-gate /* 64637c478bd9Sstevel@tonic-gate * Initialize for a loop using page_next_scan_large(). 64647c478bd9Sstevel@tonic-gate */ 64657c478bd9Sstevel@tonic-gate page_t * 64667c478bd9Sstevel@tonic-gate page_next_scan_init(void **cookie) 64677c478bd9Sstevel@tonic-gate { 64687c478bd9Sstevel@tonic-gate ASSERT(cookie != NULL); 64697c478bd9Sstevel@tonic-gate *cookie = (void *)memsegs; 64707c478bd9Sstevel@tonic-gate return ((page_t *)memsegs->pages); 64717c478bd9Sstevel@tonic-gate } 64727c478bd9Sstevel@tonic-gate 64737c478bd9Sstevel@tonic-gate /* 64747c478bd9Sstevel@tonic-gate * Return the next page in a scan of page_t's, assuming we want 64757c478bd9Sstevel@tonic-gate * to skip over sub-pages within larger page sizes. 64767c478bd9Sstevel@tonic-gate * 64777c478bd9Sstevel@tonic-gate * The cookie is used to keep track of the current memseg. 64787c478bd9Sstevel@tonic-gate */ 64797c478bd9Sstevel@tonic-gate page_t * 64807c478bd9Sstevel@tonic-gate page_next_scan_large( 64817c478bd9Sstevel@tonic-gate page_t *pp, 64827c478bd9Sstevel@tonic-gate ulong_t *n, 64837c478bd9Sstevel@tonic-gate void **cookie) 64847c478bd9Sstevel@tonic-gate { 64857c478bd9Sstevel@tonic-gate struct memseg *seg = (struct memseg *)*cookie; 64867c478bd9Sstevel@tonic-gate page_t *new_pp; 64877c478bd9Sstevel@tonic-gate ulong_t cnt; 64887c478bd9Sstevel@tonic-gate pfn_t pfn; 64897c478bd9Sstevel@tonic-gate 64907c478bd9Sstevel@tonic-gate 64917c478bd9Sstevel@tonic-gate /* 64927c478bd9Sstevel@tonic-gate * get the count of page_t's to skip based on the page size 64937c478bd9Sstevel@tonic-gate */ 64947c478bd9Sstevel@tonic-gate ASSERT(pp != NULL); 64957c478bd9Sstevel@tonic-gate if (pp->p_szc == 0) { 64967c478bd9Sstevel@tonic-gate cnt = 1; 64977c478bd9Sstevel@tonic-gate } else { 64987c478bd9Sstevel@tonic-gate pfn = page_pptonum(pp); 64997c478bd9Sstevel@tonic-gate cnt = page_get_pagecnt(pp->p_szc); 65007c478bd9Sstevel@tonic-gate cnt -= pfn & (cnt - 1); 65017c478bd9Sstevel@tonic-gate } 65027c478bd9Sstevel@tonic-gate *n += cnt; 65037c478bd9Sstevel@tonic-gate new_pp = pp + cnt; 65047c478bd9Sstevel@tonic-gate 65057c478bd9Sstevel@tonic-gate /* 65067c478bd9Sstevel@tonic-gate * Catch if we went past the end of the current memory segment. If so, 65077c478bd9Sstevel@tonic-gate * just move to the next segment with pages. 65087c478bd9Sstevel@tonic-gate */ 65097c478bd9Sstevel@tonic-gate if (new_pp >= seg->epages) { 65107c478bd9Sstevel@tonic-gate do { 65117c478bd9Sstevel@tonic-gate seg = seg->next; 65127c478bd9Sstevel@tonic-gate if (seg == NULL) 65137c478bd9Sstevel@tonic-gate seg = memsegs; 65147c478bd9Sstevel@tonic-gate } while (seg->pages == seg->epages); 65157c478bd9Sstevel@tonic-gate new_pp = seg->pages; 65167c478bd9Sstevel@tonic-gate *cookie = (void *)seg; 65177c478bd9Sstevel@tonic-gate } 65187c478bd9Sstevel@tonic-gate 65197c478bd9Sstevel@tonic-gate return (new_pp); 65207c478bd9Sstevel@tonic-gate } 65217c478bd9Sstevel@tonic-gate 65227c478bd9Sstevel@tonic-gate 65237c478bd9Sstevel@tonic-gate /* 65247c478bd9Sstevel@tonic-gate * Returns next page in list. Note: this function wraps 65257c478bd9Sstevel@tonic-gate * to the first page in the list upon reaching the end 65267c478bd9Sstevel@tonic-gate * of the list. Callers should be aware of this fact. 65277c478bd9Sstevel@tonic-gate */ 65287c478bd9Sstevel@tonic-gate 65297c478bd9Sstevel@tonic-gate /* We should change this be a #define */ 65307c478bd9Sstevel@tonic-gate 65317c478bd9Sstevel@tonic-gate page_t * 65327c478bd9Sstevel@tonic-gate page_next(page_t *pp) 65337c478bd9Sstevel@tonic-gate { 65347c478bd9Sstevel@tonic-gate return (page_nextn(pp, 1)); 65357c478bd9Sstevel@tonic-gate } 65367c478bd9Sstevel@tonic-gate 65377c478bd9Sstevel@tonic-gate page_t * 65387c478bd9Sstevel@tonic-gate page_first() 65397c478bd9Sstevel@tonic-gate { 65407c478bd9Sstevel@tonic-gate return ((page_t *)memsegs->pages); 65417c478bd9Sstevel@tonic-gate } 65427c478bd9Sstevel@tonic-gate 65437c478bd9Sstevel@tonic-gate 65447c478bd9Sstevel@tonic-gate /* 65457c478bd9Sstevel@tonic-gate * This routine is called at boot with the initial memory configuration 65467c478bd9Sstevel@tonic-gate * and when memory is added or removed. 65477c478bd9Sstevel@tonic-gate */ 65487c478bd9Sstevel@tonic-gate void 65497c478bd9Sstevel@tonic-gate build_pfn_hash() 65507c478bd9Sstevel@tonic-gate { 65517c478bd9Sstevel@tonic-gate pfn_t cur; 65527c478bd9Sstevel@tonic-gate pgcnt_t index; 65537c478bd9Sstevel@tonic-gate struct memseg *pseg; 65547c478bd9Sstevel@tonic-gate int i; 65557c478bd9Sstevel@tonic-gate 65567c478bd9Sstevel@tonic-gate /* 65577c478bd9Sstevel@tonic-gate * Clear memseg_hash array. 65587c478bd9Sstevel@tonic-gate * Since memory add/delete is designed to operate concurrently 65597c478bd9Sstevel@tonic-gate * with normal operation, the hash rebuild must be able to run 65607c478bd9Sstevel@tonic-gate * concurrently with page_numtopp_nolock(). To support this 65617c478bd9Sstevel@tonic-gate * functionality, assignments to memseg_hash array members must 65627c478bd9Sstevel@tonic-gate * be done atomically. 65637c478bd9Sstevel@tonic-gate * 65647c478bd9Sstevel@tonic-gate * NOTE: bzero() does not currently guarantee this for kernel 65657c478bd9Sstevel@tonic-gate * threads, and cannot be used here. 65667c478bd9Sstevel@tonic-gate */ 65677c478bd9Sstevel@tonic-gate for (i = 0; i < N_MEM_SLOTS; i++) 65687c478bd9Sstevel@tonic-gate memseg_hash[i] = NULL; 65697c478bd9Sstevel@tonic-gate 65707c478bd9Sstevel@tonic-gate hat_kpm_mseghash_clear(N_MEM_SLOTS); 65717c478bd9Sstevel@tonic-gate 65727c478bd9Sstevel@tonic-gate /* 65737c478bd9Sstevel@tonic-gate * Physmax is the last valid pfn. 65747c478bd9Sstevel@tonic-gate */ 65757c478bd9Sstevel@tonic-gate mhash_per_slot = (physmax + 1) >> MEM_HASH_SHIFT; 65767c478bd9Sstevel@tonic-gate for (pseg = memsegs; pseg != NULL; pseg = pseg->next) { 65777c478bd9Sstevel@tonic-gate index = MEMSEG_PFN_HASH(pseg->pages_base); 65787c478bd9Sstevel@tonic-gate cur = pseg->pages_base; 65797c478bd9Sstevel@tonic-gate do { 65807c478bd9Sstevel@tonic-gate if (index >= N_MEM_SLOTS) 65817c478bd9Sstevel@tonic-gate index = MEMSEG_PFN_HASH(cur); 65827c478bd9Sstevel@tonic-gate 65837c478bd9Sstevel@tonic-gate if (memseg_hash[index] == NULL || 65847c478bd9Sstevel@tonic-gate memseg_hash[index]->pages_base > pseg->pages_base) { 65857c478bd9Sstevel@tonic-gate memseg_hash[index] = pseg; 65867c478bd9Sstevel@tonic-gate hat_kpm_mseghash_update(index, pseg); 65877c478bd9Sstevel@tonic-gate } 65887c478bd9Sstevel@tonic-gate cur += mhash_per_slot; 65897c478bd9Sstevel@tonic-gate index++; 65907c478bd9Sstevel@tonic-gate } while (cur < pseg->pages_end); 65917c478bd9Sstevel@tonic-gate } 65927c478bd9Sstevel@tonic-gate } 65937c478bd9Sstevel@tonic-gate 65947c478bd9Sstevel@tonic-gate /* 65957c478bd9Sstevel@tonic-gate * Return the pagenum for the pp 65967c478bd9Sstevel@tonic-gate */ 65977c478bd9Sstevel@tonic-gate pfn_t 65987c478bd9Sstevel@tonic-gate page_pptonum(page_t *pp) 65997c478bd9Sstevel@tonic-gate { 66007c478bd9Sstevel@tonic-gate return (pp->p_pagenum); 66017c478bd9Sstevel@tonic-gate } 66027c478bd9Sstevel@tonic-gate 66037c478bd9Sstevel@tonic-gate /* 66047c478bd9Sstevel@tonic-gate * interface to the referenced and modified etc bits 66057c478bd9Sstevel@tonic-gate * in the PSM part of the page struct 66067c478bd9Sstevel@tonic-gate * when no locking is desired. 66077c478bd9Sstevel@tonic-gate */ 66087c478bd9Sstevel@tonic-gate void 66097c478bd9Sstevel@tonic-gate page_set_props(page_t *pp, uint_t flags) 66107c478bd9Sstevel@tonic-gate { 66117c478bd9Sstevel@tonic-gate ASSERT((flags & ~(P_MOD | P_REF | P_RO)) == 0); 66127c478bd9Sstevel@tonic-gate pp->p_nrm |= (uchar_t)flags; 66137c478bd9Sstevel@tonic-gate } 66147c478bd9Sstevel@tonic-gate 66157c478bd9Sstevel@tonic-gate void 66167c478bd9Sstevel@tonic-gate page_clr_all_props(page_t *pp) 66177c478bd9Sstevel@tonic-gate { 66187c478bd9Sstevel@tonic-gate pp->p_nrm = 0; 66197c478bd9Sstevel@tonic-gate } 66207c478bd9Sstevel@tonic-gate 66217c478bd9Sstevel@tonic-gate /* 66227c478bd9Sstevel@tonic-gate * The following functions is called from free_vp_pages() 66237c478bd9Sstevel@tonic-gate * for an inexact estimate of a newly free'd page... 66247c478bd9Sstevel@tonic-gate */ 66257c478bd9Sstevel@tonic-gate ulong_t 66267c478bd9Sstevel@tonic-gate page_share_cnt(page_t *pp) 66277c478bd9Sstevel@tonic-gate { 66287c478bd9Sstevel@tonic-gate return (hat_page_getshare(pp)); 66297c478bd9Sstevel@tonic-gate } 66307c478bd9Sstevel@tonic-gate 66317c478bd9Sstevel@tonic-gate /* 66327c478bd9Sstevel@tonic-gate * The following functions are used in handling memory 66337c478bd9Sstevel@tonic-gate * errors. 66347c478bd9Sstevel@tonic-gate */ 66357c478bd9Sstevel@tonic-gate 66367c478bd9Sstevel@tonic-gate int 66377c478bd9Sstevel@tonic-gate page_istoxic(page_t *pp) 66387c478bd9Sstevel@tonic-gate { 66397c478bd9Sstevel@tonic-gate return ((pp->p_toxic & PAGE_IS_TOXIC) == PAGE_IS_TOXIC); 66407c478bd9Sstevel@tonic-gate } 66417c478bd9Sstevel@tonic-gate 66427c478bd9Sstevel@tonic-gate int 66437c478bd9Sstevel@tonic-gate page_isfailing(page_t *pp) 66447c478bd9Sstevel@tonic-gate { 66457c478bd9Sstevel@tonic-gate return ((pp->p_toxic & PAGE_IS_FAILING) == PAGE_IS_FAILING); 66467c478bd9Sstevel@tonic-gate } 66477c478bd9Sstevel@tonic-gate 66487c478bd9Sstevel@tonic-gate int 66497c478bd9Sstevel@tonic-gate page_isretired(page_t *pp) 66507c478bd9Sstevel@tonic-gate { 66517c478bd9Sstevel@tonic-gate return ((pp->p_toxic & PAGE_IS_RETIRED) == PAGE_IS_RETIRED); 66527c478bd9Sstevel@tonic-gate } 66537c478bd9Sstevel@tonic-gate 66547c478bd9Sstevel@tonic-gate int 66557c478bd9Sstevel@tonic-gate page_deteriorating(page_t *pp) 66567c478bd9Sstevel@tonic-gate { 66577c478bd9Sstevel@tonic-gate return ((pp->p_toxic & (PAGE_IS_TOXIC | PAGE_IS_FAILING)) != 0); 66587c478bd9Sstevel@tonic-gate } 66597c478bd9Sstevel@tonic-gate 66607c478bd9Sstevel@tonic-gate void 66617c478bd9Sstevel@tonic-gate page_settoxic(page_t *pp, uchar_t flag) 66627c478bd9Sstevel@tonic-gate { 66637c478bd9Sstevel@tonic-gate uchar_t new_flag = 0; 66647c478bd9Sstevel@tonic-gate while ((new_flag & flag) != flag) { 66657c478bd9Sstevel@tonic-gate uchar_t old_flag = pp->p_toxic; 66667c478bd9Sstevel@tonic-gate new_flag = old_flag | flag; 66677c478bd9Sstevel@tonic-gate (void) cas8(&pp->p_toxic, old_flag, new_flag); 66687c478bd9Sstevel@tonic-gate new_flag = ((volatile page_t *)pp)->p_toxic; 66697c478bd9Sstevel@tonic-gate } 66707c478bd9Sstevel@tonic-gate } 66717c478bd9Sstevel@tonic-gate 66727c478bd9Sstevel@tonic-gate void 66737c478bd9Sstevel@tonic-gate page_clrtoxic(page_t *pp) 66747c478bd9Sstevel@tonic-gate { 66757c478bd9Sstevel@tonic-gate /* 66767c478bd9Sstevel@tonic-gate * We don't need to worry about atomicity on the 66777c478bd9Sstevel@tonic-gate * p_toxic flag here as this is only called from 66787c478bd9Sstevel@tonic-gate * page_free() while holding an exclusive lock on 66797c478bd9Sstevel@tonic-gate * the page 66807c478bd9Sstevel@tonic-gate */ 66817c478bd9Sstevel@tonic-gate pp->p_toxic = PAGE_IS_OK; 66827c478bd9Sstevel@tonic-gate } 66837c478bd9Sstevel@tonic-gate 66847c478bd9Sstevel@tonic-gate void 66857c478bd9Sstevel@tonic-gate page_clrtoxic_flag(page_t *pp, uchar_t flag) 66867c478bd9Sstevel@tonic-gate { 66877c478bd9Sstevel@tonic-gate uchar_t new_flag = ((volatile page_t *)pp)->p_toxic; 66887c478bd9Sstevel@tonic-gate while ((new_flag & flag) == flag) { 66897c478bd9Sstevel@tonic-gate uchar_t old_flag = new_flag; 66907c478bd9Sstevel@tonic-gate new_flag = old_flag & ~flag; 66917c478bd9Sstevel@tonic-gate (void) cas8(&pp->p_toxic, old_flag, new_flag); 66927c478bd9Sstevel@tonic-gate new_flag = ((volatile page_t *)pp)->p_toxic; 66937c478bd9Sstevel@tonic-gate } 66947c478bd9Sstevel@tonic-gate } 66957c478bd9Sstevel@tonic-gate 66967c478bd9Sstevel@tonic-gate int 66977c478bd9Sstevel@tonic-gate page_isfaulty(page_t *pp) 66987c478bd9Sstevel@tonic-gate { 66997c478bd9Sstevel@tonic-gate return ((pp->p_toxic & PAGE_IS_FAULTY) == PAGE_IS_FAULTY); 67007c478bd9Sstevel@tonic-gate } 67017c478bd9Sstevel@tonic-gate 67027c478bd9Sstevel@tonic-gate /* 67037c478bd9Sstevel@tonic-gate * The following four functions are called from /proc code 67047c478bd9Sstevel@tonic-gate * for the /proc/<pid>/xmap interface. 67057c478bd9Sstevel@tonic-gate */ 67067c478bd9Sstevel@tonic-gate int 67077c478bd9Sstevel@tonic-gate page_isshared(page_t *pp) 67087c478bd9Sstevel@tonic-gate { 67097c478bd9Sstevel@tonic-gate return (hat_page_getshare(pp) > 1); 67107c478bd9Sstevel@tonic-gate } 67117c478bd9Sstevel@tonic-gate 67127c478bd9Sstevel@tonic-gate int 67137c478bd9Sstevel@tonic-gate page_isfree(page_t *pp) 67147c478bd9Sstevel@tonic-gate { 67157c478bd9Sstevel@tonic-gate return (PP_ISFREE(pp)); 67167c478bd9Sstevel@tonic-gate } 67177c478bd9Sstevel@tonic-gate 67187c478bd9Sstevel@tonic-gate int 67197c478bd9Sstevel@tonic-gate page_isref(page_t *pp) 67207c478bd9Sstevel@tonic-gate { 67217c478bd9Sstevel@tonic-gate return (hat_page_getattr(pp, P_REF)); 67227c478bd9Sstevel@tonic-gate } 67237c478bd9Sstevel@tonic-gate 67247c478bd9Sstevel@tonic-gate int 67257c478bd9Sstevel@tonic-gate page_ismod(page_t *pp) 67267c478bd9Sstevel@tonic-gate { 67277c478bd9Sstevel@tonic-gate return (hat_page_getattr(pp, P_MOD)); 67287c478bd9Sstevel@tonic-gate } 6729