17c478bd9Sstevel@tonic-gate /* 27c478bd9Sstevel@tonic-gate * CDDL HEADER START 37c478bd9Sstevel@tonic-gate * 47c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the 5aa042c4bSkchow * Common Development and Distribution License (the "License"). 6aa042c4bSkchow * You may not use this file except in compliance with the License. 77c478bd9Sstevel@tonic-gate * 87c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 97c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 107c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions 117c478bd9Sstevel@tonic-gate * and limitations under the License. 127c478bd9Sstevel@tonic-gate * 137c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 147c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 157c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 167c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 177c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 187c478bd9Sstevel@tonic-gate * 197c478bd9Sstevel@tonic-gate * CDDL HEADER END 207c478bd9Sstevel@tonic-gate */ 217c478bd9Sstevel@tonic-gate /* 22*ae115bc7Smrj * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 237c478bd9Sstevel@tonic-gate * Use is subject to license terms. 247c478bd9Sstevel@tonic-gate */ 257c478bd9Sstevel@tonic-gate 267c478bd9Sstevel@tonic-gate /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 277c478bd9Sstevel@tonic-gate /* All Rights Reserved */ 287c478bd9Sstevel@tonic-gate 297c478bd9Sstevel@tonic-gate /* 307c478bd9Sstevel@tonic-gate * Portions of this source code were derived from Berkeley 4.3 BSD 317c478bd9Sstevel@tonic-gate * under license from the Regents of the University of California. 327c478bd9Sstevel@tonic-gate */ 337c478bd9Sstevel@tonic-gate 347c478bd9Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 357c478bd9Sstevel@tonic-gate 367c478bd9Sstevel@tonic-gate /* 377c478bd9Sstevel@tonic-gate * UNIX machine dependent virtual memory support. 387c478bd9Sstevel@tonic-gate */ 397c478bd9Sstevel@tonic-gate 407c478bd9Sstevel@tonic-gate #include <sys/types.h> 417c478bd9Sstevel@tonic-gate #include <sys/param.h> 427c478bd9Sstevel@tonic-gate #include <sys/systm.h> 437c478bd9Sstevel@tonic-gate #include <sys/user.h> 447c478bd9Sstevel@tonic-gate #include <sys/proc.h> 457c478bd9Sstevel@tonic-gate #include <sys/kmem.h> 467c478bd9Sstevel@tonic-gate #include <sys/vmem.h> 477c478bd9Sstevel@tonic-gate #include <sys/buf.h> 487c478bd9Sstevel@tonic-gate #include <sys/cpuvar.h> 497c478bd9Sstevel@tonic-gate #include <sys/lgrp.h> 507c478bd9Sstevel@tonic-gate #include <sys/disp.h> 517c478bd9Sstevel@tonic-gate #include <sys/vm.h> 527c478bd9Sstevel@tonic-gate #include <sys/mman.h> 537c478bd9Sstevel@tonic-gate #include <sys/vnode.h> 547c478bd9Sstevel@tonic-gate #include <sys/cred.h> 557c478bd9Sstevel@tonic-gate #include <sys/exec.h> 567c478bd9Sstevel@tonic-gate #include <sys/exechdr.h> 577c478bd9Sstevel@tonic-gate #include <sys/debug.h> 58ec25b48fSsusans #include <sys/vmsystm.h> 597c478bd9Sstevel@tonic-gate 607c478bd9Sstevel@tonic-gate #include <vm/hat.h> 617c478bd9Sstevel@tonic-gate #include <vm/as.h> 627c478bd9Sstevel@tonic-gate #include <vm/seg.h> 637c478bd9Sstevel@tonic-gate #include <vm/seg_kp.h> 647c478bd9Sstevel@tonic-gate #include <vm/seg_vn.h> 657c478bd9Sstevel@tonic-gate #include <vm/page.h> 667c478bd9Sstevel@tonic-gate #include <vm/seg_kmem.h> 677c478bd9Sstevel@tonic-gate #include <vm/seg_kpm.h> 687c478bd9Sstevel@tonic-gate #include <vm/vm_dep.h> 697c478bd9Sstevel@tonic-gate 707c478bd9Sstevel@tonic-gate #include <sys/cpu.h> 717c478bd9Sstevel@tonic-gate #include <sys/vm_machparam.h> 727c478bd9Sstevel@tonic-gate #include <sys/memlist.h> 737c478bd9Sstevel@tonic-gate #include <sys/bootconf.h> /* XXX the memlist stuff belongs in memlist_plat.h */ 747c478bd9Sstevel@tonic-gate #include <vm/hat_i86.h> 757c478bd9Sstevel@tonic-gate #include <sys/x86_archext.h> 767c478bd9Sstevel@tonic-gate #include <sys/elf_386.h> 777c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h> 787c478bd9Sstevel@tonic-gate #include <sys/archsystm.h> 797c478bd9Sstevel@tonic-gate #include <sys/machsystm.h> 807c478bd9Sstevel@tonic-gate 817c478bd9Sstevel@tonic-gate #include <sys/vtrace.h> 827c478bd9Sstevel@tonic-gate #include <sys/ddidmareq.h> 837c478bd9Sstevel@tonic-gate #include <sys/promif.h> 847c478bd9Sstevel@tonic-gate #include <sys/memnode.h> 857c478bd9Sstevel@tonic-gate #include <sys/stack.h> 867c478bd9Sstevel@tonic-gate 875d07b933Sdp uint_t vac_colors = 1; 887c478bd9Sstevel@tonic-gate 897c478bd9Sstevel@tonic-gate int largepagesupport = 0; 907c478bd9Sstevel@tonic-gate extern uint_t page_create_new; 917c478bd9Sstevel@tonic-gate extern uint_t page_create_exists; 927c478bd9Sstevel@tonic-gate extern uint_t page_create_putbacks; 937c478bd9Sstevel@tonic-gate extern uint_t page_create_putbacks; 94*ae115bc7Smrj /* 95*ae115bc7Smrj * Allow users to disable the kernel's use of SSE. 96*ae115bc7Smrj */ 97*ae115bc7Smrj extern int use_sse_pagecopy, use_sse_pagezero; 987c478bd9Sstevel@tonic-gate 997c478bd9Sstevel@tonic-gate /* 4g memory management */ 1007c478bd9Sstevel@tonic-gate pgcnt_t maxmem4g; 1017c478bd9Sstevel@tonic-gate pgcnt_t freemem4g; 1027c478bd9Sstevel@tonic-gate int physmax4g; 1037c478bd9Sstevel@tonic-gate int desfree4gshift = 4; /* maxmem4g shift to derive DESFREE4G */ 1047c478bd9Sstevel@tonic-gate int lotsfree4gshift = 3; 1057c478bd9Sstevel@tonic-gate 10607ad560dSkchow /* 16m memory management: desired number of free pages below 16m. */ 10707ad560dSkchow pgcnt_t desfree16m = 0x380; 10807ad560dSkchow 1097c478bd9Sstevel@tonic-gate #ifdef VM_STATS 1107c478bd9Sstevel@tonic-gate struct { 1117c478bd9Sstevel@tonic-gate ulong_t pga_alloc; 1127c478bd9Sstevel@tonic-gate ulong_t pga_notfullrange; 1137c478bd9Sstevel@tonic-gate ulong_t pga_nulldmaattr; 1147c478bd9Sstevel@tonic-gate ulong_t pga_allocok; 1157c478bd9Sstevel@tonic-gate ulong_t pga_allocfailed; 1167c478bd9Sstevel@tonic-gate ulong_t pgma_alloc; 1177c478bd9Sstevel@tonic-gate ulong_t pgma_allocok; 1187c478bd9Sstevel@tonic-gate ulong_t pgma_allocfailed; 1197c478bd9Sstevel@tonic-gate ulong_t pgma_allocempty; 1207c478bd9Sstevel@tonic-gate } pga_vmstats; 1217c478bd9Sstevel@tonic-gate #endif 1227c478bd9Sstevel@tonic-gate 1237c478bd9Sstevel@tonic-gate uint_t mmu_page_sizes; 1247c478bd9Sstevel@tonic-gate 1257c478bd9Sstevel@tonic-gate /* How many page sizes the users can see */ 1267c478bd9Sstevel@tonic-gate uint_t mmu_exported_page_sizes; 1277c478bd9Sstevel@tonic-gate 128beb1bda0Sdavemq /* 129beb1bda0Sdavemq * Number of pages in 1 GB. Don't enable automatic large pages if we have 130beb1bda0Sdavemq * fewer than this many pages. 131beb1bda0Sdavemq */ 132ec25b48fSsusans pgcnt_t shm_lpg_min_physmem = 1 << (30 - MMU_PAGESHIFT); 133ec25b48fSsusans pgcnt_t privm_lpg_min_physmem = 1 << (30 - MMU_PAGESHIFT); 134ec25b48fSsusans 135ec25b48fSsusans /* 136ec25b48fSsusans * Maximum and default segment size tunables for user private 137ec25b48fSsusans * and shared anon memory, and user text and initialized data. 138ec25b48fSsusans * These can be patched via /etc/system to allow large pages 139ec25b48fSsusans * to be used for mapping application private and shared anon memory. 140ec25b48fSsusans */ 141ec25b48fSsusans size_t mcntl0_lpsize = MMU_PAGESIZE; 142ec25b48fSsusans size_t max_uheap_lpsize = MMU_PAGESIZE; 143ec25b48fSsusans size_t default_uheap_lpsize = MMU_PAGESIZE; 144ec25b48fSsusans size_t max_ustack_lpsize = MMU_PAGESIZE; 145ec25b48fSsusans size_t default_ustack_lpsize = MMU_PAGESIZE; 146ec25b48fSsusans size_t max_privmap_lpsize = MMU_PAGESIZE; 147ec25b48fSsusans size_t max_uidata_lpsize = MMU_PAGESIZE; 148ec25b48fSsusans size_t max_utext_lpsize = MMU_PAGESIZE; 149ec25b48fSsusans size_t max_shm_lpsize = MMU_PAGESIZE; 1507c478bd9Sstevel@tonic-gate 1517c478bd9Sstevel@tonic-gate /* 1527c478bd9Sstevel@tonic-gate * Return the optimum page size for a given mapping 1537c478bd9Sstevel@tonic-gate */ 1547c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 1557c478bd9Sstevel@tonic-gate size_t 156ec25b48fSsusans map_pgsz(int maptype, struct proc *p, caddr_t addr, size_t len, int memcntl) 1577c478bd9Sstevel@tonic-gate { 158ec25b48fSsusans level_t l = 0; 159ec25b48fSsusans size_t pgsz = MMU_PAGESIZE; 160ec25b48fSsusans size_t max_lpsize; 161ec25b48fSsusans uint_t mszc; 1627c478bd9Sstevel@tonic-gate 163ec25b48fSsusans ASSERT(maptype != MAPPGSZ_VA); 1647c478bd9Sstevel@tonic-gate 165ec25b48fSsusans if (maptype != MAPPGSZ_ISM && physmem < privm_lpg_min_physmem) { 166ec25b48fSsusans return (MMU_PAGESIZE); 167ec25b48fSsusans } 1687c478bd9Sstevel@tonic-gate 169ec25b48fSsusans switch (maptype) { 1707c478bd9Sstevel@tonic-gate case MAPPGSZ_HEAP: 171ec25b48fSsusans case MAPPGSZ_STK: 172ec25b48fSsusans max_lpsize = memcntl ? mcntl0_lpsize : (maptype == 173ec25b48fSsusans MAPPGSZ_HEAP ? max_uheap_lpsize : max_ustack_lpsize); 174ec25b48fSsusans if (max_lpsize == MMU_PAGESIZE) { 175ec25b48fSsusans return (MMU_PAGESIZE); 176ec25b48fSsusans } 177ec25b48fSsusans if (len == 0) { 178ec25b48fSsusans len = (maptype == MAPPGSZ_HEAP) ? p->p_brkbase + 179ec25b48fSsusans p->p_brksize - p->p_bssbase : p->p_stksize; 180ec25b48fSsusans } 181ec25b48fSsusans len = (maptype == MAPPGSZ_HEAP) ? MAX(len, 182ec25b48fSsusans default_uheap_lpsize) : MAX(len, default_ustack_lpsize); 183ec25b48fSsusans 1847c478bd9Sstevel@tonic-gate /* 1857c478bd9Sstevel@tonic-gate * use the pages size that best fits len 1867c478bd9Sstevel@tonic-gate */ 1877c478bd9Sstevel@tonic-gate for (l = mmu.max_page_level; l > 0; --l) { 188ec25b48fSsusans if (LEVEL_SIZE(l) > max_lpsize || len < LEVEL_SIZE(l)) { 1897c478bd9Sstevel@tonic-gate continue; 190ec25b48fSsusans } else { 191ec25b48fSsusans pgsz = LEVEL_SIZE(l); 192ec25b48fSsusans } 1937c478bd9Sstevel@tonic-gate break; 1947c478bd9Sstevel@tonic-gate } 195ec25b48fSsusans 196ec25b48fSsusans mszc = (maptype == MAPPGSZ_HEAP ? p->p_brkpageszc : 197ec25b48fSsusans p->p_stkpageszc); 198ec25b48fSsusans if (addr == 0 && (pgsz < hw_page_array[mszc].hp_size)) { 199ec25b48fSsusans pgsz = hw_page_array[mszc].hp_size; 200ec25b48fSsusans } 201ec25b48fSsusans return (pgsz); 2027c478bd9Sstevel@tonic-gate 2037c478bd9Sstevel@tonic-gate /* 2047c478bd9Sstevel@tonic-gate * for ISM use the 1st large page size. 2057c478bd9Sstevel@tonic-gate */ 2067c478bd9Sstevel@tonic-gate case MAPPGSZ_ISM: 2077c478bd9Sstevel@tonic-gate if (mmu.max_page_level == 0) 2087c478bd9Sstevel@tonic-gate return (MMU_PAGESIZE); 2097c478bd9Sstevel@tonic-gate return (LEVEL_SIZE(1)); 2107c478bd9Sstevel@tonic-gate } 211ec25b48fSsusans return (pgsz); 2127c478bd9Sstevel@tonic-gate } 2137c478bd9Sstevel@tonic-gate 214ec25b48fSsusans static uint_t 215ec25b48fSsusans map_szcvec(caddr_t addr, size_t size, uintptr_t off, size_t max_lpsize, 216ec25b48fSsusans size_t min_physmem) 2177c478bd9Sstevel@tonic-gate { 218ec25b48fSsusans caddr_t eaddr = addr + size; 219ec25b48fSsusans uint_t szcvec = 0; 220ec25b48fSsusans caddr_t raddr; 221ec25b48fSsusans caddr_t readdr; 2227c478bd9Sstevel@tonic-gate size_t pgsz; 223ec25b48fSsusans int i; 2247c478bd9Sstevel@tonic-gate 225ec25b48fSsusans if (physmem < min_physmem || max_lpsize <= MMU_PAGESIZE) { 2267c478bd9Sstevel@tonic-gate return (0); 2277c478bd9Sstevel@tonic-gate } 228ec25b48fSsusans 229ec25b48fSsusans for (i = mmu_page_sizes - 1; i > 0; i--) { 230ec25b48fSsusans pgsz = page_get_pagesize(i); 231ec25b48fSsusans if (pgsz > max_lpsize) { 232ec25b48fSsusans continue; 233ec25b48fSsusans } 234ec25b48fSsusans raddr = (caddr_t)P2ROUNDUP((uintptr_t)addr, pgsz); 235ec25b48fSsusans readdr = (caddr_t)P2ALIGN((uintptr_t)eaddr, pgsz); 236ec25b48fSsusans if (raddr < addr || raddr >= readdr) { 237ec25b48fSsusans continue; 238ec25b48fSsusans } 239ec25b48fSsusans if (P2PHASE((uintptr_t)addr ^ off, pgsz)) { 240ec25b48fSsusans continue; 241ec25b48fSsusans } 242ec25b48fSsusans /* 243ec25b48fSsusans * Set szcvec to the remaining page sizes. 244ec25b48fSsusans */ 245ec25b48fSsusans szcvec = ((1 << (i + 1)) - 1) & ~1; 246ec25b48fSsusans break; 2477c478bd9Sstevel@tonic-gate } 248ec25b48fSsusans return (szcvec); 2497c478bd9Sstevel@tonic-gate } 2507c478bd9Sstevel@tonic-gate 251ec25b48fSsusans /* 252ec25b48fSsusans * Return a bit vector of large page size codes that 253ec25b48fSsusans * can be used to map [addr, addr + len) region. 254ec25b48fSsusans */ 255ec25b48fSsusans /*ARGSUSED*/ 25607b65a64Saguzovsk uint_t 257ec25b48fSsusans map_pgszcvec(caddr_t addr, size_t size, uintptr_t off, int flags, int type, 258ec25b48fSsusans int memcntl) 25907b65a64Saguzovsk { 260ec25b48fSsusans size_t max_lpsize = mcntl0_lpsize; 26107b65a64Saguzovsk 262ec25b48fSsusans if (mmu.max_page_level == 0) 26307b65a64Saguzovsk return (0); 26407b65a64Saguzovsk 265ec25b48fSsusans if (flags & MAP_TEXT) { 266ec25b48fSsusans if (!memcntl) 267ec25b48fSsusans max_lpsize = max_utext_lpsize; 268ec25b48fSsusans return (map_szcvec(addr, size, off, max_lpsize, 269ec25b48fSsusans shm_lpg_min_physmem)); 270ec25b48fSsusans 271ec25b48fSsusans } else if (flags & MAP_INITDATA) { 272ec25b48fSsusans if (!memcntl) 273ec25b48fSsusans max_lpsize = max_uidata_lpsize; 274ec25b48fSsusans return (map_szcvec(addr, size, off, max_lpsize, 275ec25b48fSsusans privm_lpg_min_physmem)); 276ec25b48fSsusans 277ec25b48fSsusans } else if (type == MAPPGSZC_SHM) { 278ec25b48fSsusans if (!memcntl) 279ec25b48fSsusans max_lpsize = max_shm_lpsize; 280ec25b48fSsusans return (map_szcvec(addr, size, off, max_lpsize, 281ec25b48fSsusans shm_lpg_min_physmem)); 282ec25b48fSsusans 283ec25b48fSsusans } else if (type == MAPPGSZC_HEAP) { 284ec25b48fSsusans if (!memcntl) 285ec25b48fSsusans max_lpsize = max_uheap_lpsize; 286ec25b48fSsusans return (map_szcvec(addr, size, off, max_lpsize, 287ec25b48fSsusans privm_lpg_min_physmem)); 288ec25b48fSsusans 289ec25b48fSsusans } else if (type == MAPPGSZC_STACK) { 290ec25b48fSsusans if (!memcntl) 291ec25b48fSsusans max_lpsize = max_ustack_lpsize; 292ec25b48fSsusans return (map_szcvec(addr, size, off, max_lpsize, 293ec25b48fSsusans privm_lpg_min_physmem)); 294ec25b48fSsusans 295ec25b48fSsusans } else { 296ec25b48fSsusans if (!memcntl) 297ec25b48fSsusans max_lpsize = max_privmap_lpsize; 298ec25b48fSsusans return (map_szcvec(addr, size, off, max_lpsize, 299ec25b48fSsusans privm_lpg_min_physmem)); 30007b65a64Saguzovsk } 30107b65a64Saguzovsk } 30207b65a64Saguzovsk 3037c478bd9Sstevel@tonic-gate /* 3047c478bd9Sstevel@tonic-gate * Handle a pagefault. 3057c478bd9Sstevel@tonic-gate */ 3067c478bd9Sstevel@tonic-gate faultcode_t 3077c478bd9Sstevel@tonic-gate pagefault( 3087c478bd9Sstevel@tonic-gate caddr_t addr, 3097c478bd9Sstevel@tonic-gate enum fault_type type, 3107c478bd9Sstevel@tonic-gate enum seg_rw rw, 3117c478bd9Sstevel@tonic-gate int iskernel) 3127c478bd9Sstevel@tonic-gate { 3137c478bd9Sstevel@tonic-gate struct as *as; 3147c478bd9Sstevel@tonic-gate struct hat *hat; 3157c478bd9Sstevel@tonic-gate struct proc *p; 3167c478bd9Sstevel@tonic-gate kthread_t *t; 3177c478bd9Sstevel@tonic-gate faultcode_t res; 3187c478bd9Sstevel@tonic-gate caddr_t base; 3197c478bd9Sstevel@tonic-gate size_t len; 3207c478bd9Sstevel@tonic-gate int err; 3217c478bd9Sstevel@tonic-gate int mapped_red; 3227c478bd9Sstevel@tonic-gate uintptr_t ea; 3237c478bd9Sstevel@tonic-gate 3247c478bd9Sstevel@tonic-gate ASSERT_STACK_ALIGNED(); 3257c478bd9Sstevel@tonic-gate 3267c478bd9Sstevel@tonic-gate if (INVALID_VADDR(addr)) 3277c478bd9Sstevel@tonic-gate return (FC_NOMAP); 3287c478bd9Sstevel@tonic-gate 3297c478bd9Sstevel@tonic-gate mapped_red = segkp_map_red(); 3307c478bd9Sstevel@tonic-gate 3317c478bd9Sstevel@tonic-gate if (iskernel) { 3327c478bd9Sstevel@tonic-gate as = &kas; 3337c478bd9Sstevel@tonic-gate hat = as->a_hat; 3347c478bd9Sstevel@tonic-gate } else { 3357c478bd9Sstevel@tonic-gate t = curthread; 3367c478bd9Sstevel@tonic-gate p = ttoproc(t); 3377c478bd9Sstevel@tonic-gate as = p->p_as; 3387c478bd9Sstevel@tonic-gate hat = as->a_hat; 3397c478bd9Sstevel@tonic-gate } 3407c478bd9Sstevel@tonic-gate 3417c478bd9Sstevel@tonic-gate /* 3427c478bd9Sstevel@tonic-gate * Dispatch pagefault. 3437c478bd9Sstevel@tonic-gate */ 3447c478bd9Sstevel@tonic-gate res = as_fault(hat, as, addr, 1, type, rw); 3457c478bd9Sstevel@tonic-gate 3467c478bd9Sstevel@tonic-gate /* 3477c478bd9Sstevel@tonic-gate * If this isn't a potential unmapped hole in the user's 3487c478bd9Sstevel@tonic-gate * UNIX data or stack segments, just return status info. 3497c478bd9Sstevel@tonic-gate */ 3507c478bd9Sstevel@tonic-gate if (res != FC_NOMAP || iskernel) 3517c478bd9Sstevel@tonic-gate goto out; 3527c478bd9Sstevel@tonic-gate 3537c478bd9Sstevel@tonic-gate /* 3547c478bd9Sstevel@tonic-gate * Check to see if we happened to faulted on a currently unmapped 3557c478bd9Sstevel@tonic-gate * part of the UNIX data or stack segments. If so, create a zfod 3567c478bd9Sstevel@tonic-gate * mapping there and then try calling the fault routine again. 3577c478bd9Sstevel@tonic-gate */ 3587c478bd9Sstevel@tonic-gate base = p->p_brkbase; 3597c478bd9Sstevel@tonic-gate len = p->p_brksize; 3607c478bd9Sstevel@tonic-gate 3617c478bd9Sstevel@tonic-gate if (addr < base || addr >= base + len) { /* data seg? */ 3627c478bd9Sstevel@tonic-gate base = (caddr_t)p->p_usrstack - p->p_stksize; 3637c478bd9Sstevel@tonic-gate len = p->p_stksize; 3647c478bd9Sstevel@tonic-gate if (addr < base || addr >= p->p_usrstack) { /* stack seg? */ 3657c478bd9Sstevel@tonic-gate /* not in either UNIX data or stack segments */ 3667c478bd9Sstevel@tonic-gate res = FC_NOMAP; 3677c478bd9Sstevel@tonic-gate goto out; 3687c478bd9Sstevel@tonic-gate } 3697c478bd9Sstevel@tonic-gate } 3707c478bd9Sstevel@tonic-gate 3717c478bd9Sstevel@tonic-gate /* 3727c478bd9Sstevel@tonic-gate * the rest of this function implements a 3.X 4.X 5.X compatibility 3737c478bd9Sstevel@tonic-gate * This code is probably not needed anymore 3747c478bd9Sstevel@tonic-gate */ 3757c478bd9Sstevel@tonic-gate if (p->p_model == DATAMODEL_ILP32) { 3767c478bd9Sstevel@tonic-gate 3777c478bd9Sstevel@tonic-gate /* expand the gap to the page boundaries on each side */ 3787c478bd9Sstevel@tonic-gate ea = P2ROUNDUP((uintptr_t)base + len, MMU_PAGESIZE); 3797c478bd9Sstevel@tonic-gate base = (caddr_t)P2ALIGN((uintptr_t)base, MMU_PAGESIZE); 3807c478bd9Sstevel@tonic-gate len = ea - (uintptr_t)base; 3817c478bd9Sstevel@tonic-gate 3827c478bd9Sstevel@tonic-gate as_rangelock(as); 3837c478bd9Sstevel@tonic-gate if (as_gap(as, MMU_PAGESIZE, &base, &len, AH_CONTAIN, addr) == 3847c478bd9Sstevel@tonic-gate 0) { 3857c478bd9Sstevel@tonic-gate err = as_map(as, base, len, segvn_create, zfod_argsp); 3867c478bd9Sstevel@tonic-gate as_rangeunlock(as); 3877c478bd9Sstevel@tonic-gate if (err) { 3887c478bd9Sstevel@tonic-gate res = FC_MAKE_ERR(err); 3897c478bd9Sstevel@tonic-gate goto out; 3907c478bd9Sstevel@tonic-gate } 3917c478bd9Sstevel@tonic-gate } else { 3927c478bd9Sstevel@tonic-gate /* 3937c478bd9Sstevel@tonic-gate * This page is already mapped by another thread after 3947c478bd9Sstevel@tonic-gate * we returned from as_fault() above. We just fall 3957c478bd9Sstevel@tonic-gate * through as_fault() below. 3967c478bd9Sstevel@tonic-gate */ 3977c478bd9Sstevel@tonic-gate as_rangeunlock(as); 3987c478bd9Sstevel@tonic-gate } 3997c478bd9Sstevel@tonic-gate 4007c478bd9Sstevel@tonic-gate res = as_fault(hat, as, addr, 1, F_INVAL, rw); 4017c478bd9Sstevel@tonic-gate } 4027c478bd9Sstevel@tonic-gate 4037c478bd9Sstevel@tonic-gate out: 4047c478bd9Sstevel@tonic-gate if (mapped_red) 4057c478bd9Sstevel@tonic-gate segkp_unmap_red(); 4067c478bd9Sstevel@tonic-gate 4077c478bd9Sstevel@tonic-gate return (res); 4087c478bd9Sstevel@tonic-gate } 4097c478bd9Sstevel@tonic-gate 4107c478bd9Sstevel@tonic-gate void 4117c478bd9Sstevel@tonic-gate map_addr(caddr_t *addrp, size_t len, offset_t off, int vacalign, uint_t flags) 4127c478bd9Sstevel@tonic-gate { 4137c478bd9Sstevel@tonic-gate struct proc *p = curproc; 4147c478bd9Sstevel@tonic-gate caddr_t userlimit = (flags & _MAP_LOW32) ? 4157c478bd9Sstevel@tonic-gate (caddr_t)_userlimit32 : p->p_as->a_userlimit; 4167c478bd9Sstevel@tonic-gate 4177c478bd9Sstevel@tonic-gate map_addr_proc(addrp, len, off, vacalign, userlimit, curproc, flags); 4187c478bd9Sstevel@tonic-gate } 4197c478bd9Sstevel@tonic-gate 4207c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 4217c478bd9Sstevel@tonic-gate int 4227c478bd9Sstevel@tonic-gate map_addr_vacalign_check(caddr_t addr, u_offset_t off) 4237c478bd9Sstevel@tonic-gate { 4247c478bd9Sstevel@tonic-gate return (0); 4257c478bd9Sstevel@tonic-gate } 4267c478bd9Sstevel@tonic-gate 4277c478bd9Sstevel@tonic-gate /* 4287c478bd9Sstevel@tonic-gate * map_addr_proc() is the routine called when the system is to 4297c478bd9Sstevel@tonic-gate * choose an address for the user. We will pick an address 430*ae115bc7Smrj * range which is the highest available below userlimit. 4317c478bd9Sstevel@tonic-gate * 4327c478bd9Sstevel@tonic-gate * addrp is a value/result parameter. 4337c478bd9Sstevel@tonic-gate * On input it is a hint from the user to be used in a completely 4347c478bd9Sstevel@tonic-gate * machine dependent fashion. We decide to completely ignore this hint. 4357c478bd9Sstevel@tonic-gate * 4367c478bd9Sstevel@tonic-gate * On output it is NULL if no address can be found in the current 4377c478bd9Sstevel@tonic-gate * processes address space or else an address that is currently 4387c478bd9Sstevel@tonic-gate * not mapped for len bytes with a page of red zone on either side. 4397c478bd9Sstevel@tonic-gate * 4407c478bd9Sstevel@tonic-gate * align is not needed on x86 (it's for viturally addressed caches) 4417c478bd9Sstevel@tonic-gate */ 4427c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 4437c478bd9Sstevel@tonic-gate void 4447c478bd9Sstevel@tonic-gate map_addr_proc( 4457c478bd9Sstevel@tonic-gate caddr_t *addrp, 4467c478bd9Sstevel@tonic-gate size_t len, 4477c478bd9Sstevel@tonic-gate offset_t off, 4487c478bd9Sstevel@tonic-gate int vacalign, 4497c478bd9Sstevel@tonic-gate caddr_t userlimit, 4507c478bd9Sstevel@tonic-gate struct proc *p, 4517c478bd9Sstevel@tonic-gate uint_t flags) 4527c478bd9Sstevel@tonic-gate { 4537c478bd9Sstevel@tonic-gate struct as *as = p->p_as; 4547c478bd9Sstevel@tonic-gate caddr_t addr; 4557c478bd9Sstevel@tonic-gate caddr_t base; 4567c478bd9Sstevel@tonic-gate size_t slen; 4577c478bd9Sstevel@tonic-gate size_t align_amount; 4587c478bd9Sstevel@tonic-gate 4597c478bd9Sstevel@tonic-gate ASSERT32(userlimit == as->a_userlimit); 4607c478bd9Sstevel@tonic-gate 4617c478bd9Sstevel@tonic-gate base = p->p_brkbase; 4627c478bd9Sstevel@tonic-gate #if defined(__amd64) 4637c478bd9Sstevel@tonic-gate /* 4647c478bd9Sstevel@tonic-gate * XX64 Yes, this needs more work. 4657c478bd9Sstevel@tonic-gate */ 4667c478bd9Sstevel@tonic-gate if (p->p_model == DATAMODEL_NATIVE) { 4677c478bd9Sstevel@tonic-gate if (userlimit < as->a_userlimit) { 4687c478bd9Sstevel@tonic-gate /* 4697c478bd9Sstevel@tonic-gate * This happens when a program wants to map 4707c478bd9Sstevel@tonic-gate * something in a range that's accessible to a 4717c478bd9Sstevel@tonic-gate * program in a smaller address space. For example, 4727c478bd9Sstevel@tonic-gate * a 64-bit program calling mmap32(2) to guarantee 4737c478bd9Sstevel@tonic-gate * that the returned address is below 4Gbytes. 4747c478bd9Sstevel@tonic-gate */ 4757c478bd9Sstevel@tonic-gate ASSERT((uintptr_t)userlimit < ADDRESS_C(0xffffffff)); 4767c478bd9Sstevel@tonic-gate 4777c478bd9Sstevel@tonic-gate if (userlimit > base) 4787c478bd9Sstevel@tonic-gate slen = userlimit - base; 4797c478bd9Sstevel@tonic-gate else { 4807c478bd9Sstevel@tonic-gate *addrp = NULL; 4817c478bd9Sstevel@tonic-gate return; 4827c478bd9Sstevel@tonic-gate } 4837c478bd9Sstevel@tonic-gate } else { 4847c478bd9Sstevel@tonic-gate /* 4857c478bd9Sstevel@tonic-gate * XX64 This layout is probably wrong .. but in 4867c478bd9Sstevel@tonic-gate * the event we make the amd64 address space look 4877c478bd9Sstevel@tonic-gate * like sparcv9 i.e. with the stack -above- the 4887c478bd9Sstevel@tonic-gate * heap, this bit of code might even be correct. 4897c478bd9Sstevel@tonic-gate */ 4907c478bd9Sstevel@tonic-gate slen = p->p_usrstack - base - 4917c478bd9Sstevel@tonic-gate (((size_t)rctl_enforced_value( 4927c478bd9Sstevel@tonic-gate rctlproc_legacy[RLIMIT_STACK], 4937c478bd9Sstevel@tonic-gate p->p_rctls, p) + PAGEOFFSET) & PAGEMASK); 4947c478bd9Sstevel@tonic-gate } 4957c478bd9Sstevel@tonic-gate } else 4967c478bd9Sstevel@tonic-gate #endif 4977c478bd9Sstevel@tonic-gate slen = userlimit - base; 4987c478bd9Sstevel@tonic-gate 4997c478bd9Sstevel@tonic-gate len = (len + PAGEOFFSET) & PAGEMASK; 5007c478bd9Sstevel@tonic-gate 5017c478bd9Sstevel@tonic-gate /* 5027c478bd9Sstevel@tonic-gate * Redzone for each side of the request. This is done to leave 5037c478bd9Sstevel@tonic-gate * one page unmapped between segments. This is not required, but 5047c478bd9Sstevel@tonic-gate * it's useful for the user because if their program strays across 5057c478bd9Sstevel@tonic-gate * a segment boundary, it will catch a fault immediately making 5067c478bd9Sstevel@tonic-gate * debugging a little easier. 5077c478bd9Sstevel@tonic-gate */ 5087c478bd9Sstevel@tonic-gate len += 2 * MMU_PAGESIZE; 5097c478bd9Sstevel@tonic-gate 5107c478bd9Sstevel@tonic-gate /* 5117c478bd9Sstevel@tonic-gate * figure out what the alignment should be 5127c478bd9Sstevel@tonic-gate * 5137c478bd9Sstevel@tonic-gate * XX64 -- is there an ELF_AMD64_MAXPGSZ or is it the same???? 5147c478bd9Sstevel@tonic-gate */ 5157c478bd9Sstevel@tonic-gate if (len <= ELF_386_MAXPGSZ) { 5167c478bd9Sstevel@tonic-gate /* 5177c478bd9Sstevel@tonic-gate * Align virtual addresses to ensure that ELF shared libraries 5187c478bd9Sstevel@tonic-gate * are mapped with the appropriate alignment constraints by 5197c478bd9Sstevel@tonic-gate * the run-time linker. 5207c478bd9Sstevel@tonic-gate */ 5217c478bd9Sstevel@tonic-gate align_amount = ELF_386_MAXPGSZ; 5227c478bd9Sstevel@tonic-gate } else { 5237c478bd9Sstevel@tonic-gate int l = mmu.max_page_level; 5247c478bd9Sstevel@tonic-gate 5257c478bd9Sstevel@tonic-gate while (l && len < LEVEL_SIZE(l)) 5267c478bd9Sstevel@tonic-gate --l; 5277c478bd9Sstevel@tonic-gate 5287c478bd9Sstevel@tonic-gate align_amount = LEVEL_SIZE(l); 5297c478bd9Sstevel@tonic-gate } 5307c478bd9Sstevel@tonic-gate 5317c478bd9Sstevel@tonic-gate if ((flags & MAP_ALIGN) && ((uintptr_t)*addrp > align_amount)) 5327c478bd9Sstevel@tonic-gate align_amount = (uintptr_t)*addrp; 5337c478bd9Sstevel@tonic-gate 5347c478bd9Sstevel@tonic-gate len += align_amount; 5357c478bd9Sstevel@tonic-gate 5367c478bd9Sstevel@tonic-gate /* 5377c478bd9Sstevel@tonic-gate * Look for a large enough hole starting below userlimit. 5387c478bd9Sstevel@tonic-gate * After finding it, use the upper part. Addition of PAGESIZE 5397c478bd9Sstevel@tonic-gate * is for the redzone as described above. 5407c478bd9Sstevel@tonic-gate */ 5417c478bd9Sstevel@tonic-gate if (as_gap(as, len, &base, &slen, AH_HI, NULL) == 0) { 5427c478bd9Sstevel@tonic-gate caddr_t as_addr; 5437c478bd9Sstevel@tonic-gate 5447c478bd9Sstevel@tonic-gate addr = base + slen - len + MMU_PAGESIZE; 5457c478bd9Sstevel@tonic-gate as_addr = addr; 5467c478bd9Sstevel@tonic-gate /* 5477c478bd9Sstevel@tonic-gate * Round address DOWN to the alignment amount, 5487c478bd9Sstevel@tonic-gate * add the offset, and if this address is less 5497c478bd9Sstevel@tonic-gate * than the original address, add alignment amount. 5507c478bd9Sstevel@tonic-gate */ 5517c478bd9Sstevel@tonic-gate addr = (caddr_t)((uintptr_t)addr & (~(align_amount - 1))); 5527c478bd9Sstevel@tonic-gate addr += (uintptr_t)(off & (align_amount - 1)); 5537c478bd9Sstevel@tonic-gate if (addr < as_addr) 5547c478bd9Sstevel@tonic-gate addr += align_amount; 5557c478bd9Sstevel@tonic-gate 5567c478bd9Sstevel@tonic-gate ASSERT(addr <= (as_addr + align_amount)); 5577c478bd9Sstevel@tonic-gate ASSERT(((uintptr_t)addr & (align_amount - 1)) == 5587c478bd9Sstevel@tonic-gate ((uintptr_t)(off & (align_amount - 1)))); 5597c478bd9Sstevel@tonic-gate *addrp = addr; 5607c478bd9Sstevel@tonic-gate } else { 5617c478bd9Sstevel@tonic-gate *addrp = NULL; /* no more virtual space */ 5627c478bd9Sstevel@tonic-gate } 5637c478bd9Sstevel@tonic-gate } 5647c478bd9Sstevel@tonic-gate 5657c478bd9Sstevel@tonic-gate /* 5667c478bd9Sstevel@tonic-gate * Determine whether [base, base+len] contains a valid range of 5677c478bd9Sstevel@tonic-gate * addresses at least minlen long. base and len are adjusted if 5687c478bd9Sstevel@tonic-gate * required to provide a valid range. 5697c478bd9Sstevel@tonic-gate */ 5707c478bd9Sstevel@tonic-gate /*ARGSUSED3*/ 5717c478bd9Sstevel@tonic-gate int 5727c478bd9Sstevel@tonic-gate valid_va_range(caddr_t *basep, size_t *lenp, size_t minlen, int dir) 5737c478bd9Sstevel@tonic-gate { 5747c478bd9Sstevel@tonic-gate uintptr_t hi, lo; 5757c478bd9Sstevel@tonic-gate 5767c478bd9Sstevel@tonic-gate lo = (uintptr_t)*basep; 5777c478bd9Sstevel@tonic-gate hi = lo + *lenp; 5787c478bd9Sstevel@tonic-gate 5797c478bd9Sstevel@tonic-gate /* 5807c478bd9Sstevel@tonic-gate * If hi rolled over the top, try cutting back. 5817c478bd9Sstevel@tonic-gate */ 5827c478bd9Sstevel@tonic-gate if (hi < lo) { 5837c478bd9Sstevel@tonic-gate if (0 - lo + hi < minlen) 5847c478bd9Sstevel@tonic-gate return (0); 5857c478bd9Sstevel@tonic-gate if (0 - lo < minlen) 5867c478bd9Sstevel@tonic-gate return (0); 5877c478bd9Sstevel@tonic-gate *lenp = 0 - lo; 5887c478bd9Sstevel@tonic-gate } else if (hi - lo < minlen) { 5897c478bd9Sstevel@tonic-gate return (0); 5907c478bd9Sstevel@tonic-gate } 5917c478bd9Sstevel@tonic-gate #if defined(__amd64) 5927c478bd9Sstevel@tonic-gate /* 5937c478bd9Sstevel@tonic-gate * Deal with a possible hole in the address range between 5947c478bd9Sstevel@tonic-gate * hole_start and hole_end that should never be mapped. 5957c478bd9Sstevel@tonic-gate */ 5967c478bd9Sstevel@tonic-gate if (lo < hole_start) { 5977c478bd9Sstevel@tonic-gate if (hi > hole_start) { 5987c478bd9Sstevel@tonic-gate if (hi < hole_end) { 5997c478bd9Sstevel@tonic-gate hi = hole_start; 6007c478bd9Sstevel@tonic-gate } else { 6017c478bd9Sstevel@tonic-gate /* lo < hole_start && hi >= hole_end */ 6027c478bd9Sstevel@tonic-gate if (dir == AH_LO) { 6037c478bd9Sstevel@tonic-gate /* 6047c478bd9Sstevel@tonic-gate * prefer lowest range 6057c478bd9Sstevel@tonic-gate */ 6067c478bd9Sstevel@tonic-gate if (hole_start - lo >= minlen) 6077c478bd9Sstevel@tonic-gate hi = hole_start; 6087c478bd9Sstevel@tonic-gate else if (hi - hole_end >= minlen) 6097c478bd9Sstevel@tonic-gate lo = hole_end; 6107c478bd9Sstevel@tonic-gate else 6117c478bd9Sstevel@tonic-gate return (0); 6127c478bd9Sstevel@tonic-gate } else { 6137c478bd9Sstevel@tonic-gate /* 6147c478bd9Sstevel@tonic-gate * prefer highest range 6157c478bd9Sstevel@tonic-gate */ 6167c478bd9Sstevel@tonic-gate if (hi - hole_end >= minlen) 6177c478bd9Sstevel@tonic-gate lo = hole_end; 6187c478bd9Sstevel@tonic-gate else if (hole_start - lo >= minlen) 6197c478bd9Sstevel@tonic-gate hi = hole_start; 6207c478bd9Sstevel@tonic-gate else 6217c478bd9Sstevel@tonic-gate return (0); 6227c478bd9Sstevel@tonic-gate } 6237c478bd9Sstevel@tonic-gate } 6247c478bd9Sstevel@tonic-gate } 6257c478bd9Sstevel@tonic-gate } else { 6267c478bd9Sstevel@tonic-gate /* lo >= hole_start */ 6277c478bd9Sstevel@tonic-gate if (hi < hole_end) 6287c478bd9Sstevel@tonic-gate return (0); 6297c478bd9Sstevel@tonic-gate if (lo < hole_end) 6307c478bd9Sstevel@tonic-gate lo = hole_end; 6317c478bd9Sstevel@tonic-gate } 6327c478bd9Sstevel@tonic-gate 6337c478bd9Sstevel@tonic-gate if (hi - lo < minlen) 6347c478bd9Sstevel@tonic-gate return (0); 6357c478bd9Sstevel@tonic-gate 6367c478bd9Sstevel@tonic-gate *basep = (caddr_t)lo; 6377c478bd9Sstevel@tonic-gate *lenp = hi - lo; 6387c478bd9Sstevel@tonic-gate #endif 6397c478bd9Sstevel@tonic-gate return (1); 6407c478bd9Sstevel@tonic-gate } 6417c478bd9Sstevel@tonic-gate 6427c478bd9Sstevel@tonic-gate /* 6437c478bd9Sstevel@tonic-gate * Determine whether [addr, addr+len] are valid user addresses. 6447c478bd9Sstevel@tonic-gate */ 6457c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 6467c478bd9Sstevel@tonic-gate int 6477c478bd9Sstevel@tonic-gate valid_usr_range(caddr_t addr, size_t len, uint_t prot, struct as *as, 6487c478bd9Sstevel@tonic-gate caddr_t userlimit) 6497c478bd9Sstevel@tonic-gate { 6507c478bd9Sstevel@tonic-gate caddr_t eaddr = addr + len; 6517c478bd9Sstevel@tonic-gate 6527c478bd9Sstevel@tonic-gate if (eaddr <= addr || addr >= userlimit || eaddr > userlimit) 6537c478bd9Sstevel@tonic-gate return (RANGE_BADADDR); 6547c478bd9Sstevel@tonic-gate 6557c478bd9Sstevel@tonic-gate #if defined(__amd64) 6567c478bd9Sstevel@tonic-gate /* 6577c478bd9Sstevel@tonic-gate * Check for the VA hole 6587c478bd9Sstevel@tonic-gate */ 6597c478bd9Sstevel@tonic-gate if (eaddr > (caddr_t)hole_start && addr < (caddr_t)hole_end) 6607c478bd9Sstevel@tonic-gate return (RANGE_BADADDR); 6617c478bd9Sstevel@tonic-gate #endif 6627c478bd9Sstevel@tonic-gate 6637c478bd9Sstevel@tonic-gate return (RANGE_OKAY); 6647c478bd9Sstevel@tonic-gate } 6657c478bd9Sstevel@tonic-gate 6667c478bd9Sstevel@tonic-gate /* 6677c478bd9Sstevel@tonic-gate * Return 1 if the page frame is onboard memory, else 0. 6687c478bd9Sstevel@tonic-gate */ 6697c478bd9Sstevel@tonic-gate int 6707c478bd9Sstevel@tonic-gate pf_is_memory(pfn_t pf) 6717c478bd9Sstevel@tonic-gate { 672*ae115bc7Smrj if (pfn_is_foreign(pf)) 673*ae115bc7Smrj return (0); 674*ae115bc7Smrj return (address_in_memlist(phys_install, pfn_to_pa(pf), 1)); 6757c478bd9Sstevel@tonic-gate } 6767c478bd9Sstevel@tonic-gate 6777c478bd9Sstevel@tonic-gate 6787c478bd9Sstevel@tonic-gate /* 6797c478bd9Sstevel@tonic-gate * initialized by page_coloring_init(). 6807c478bd9Sstevel@tonic-gate */ 6817c478bd9Sstevel@tonic-gate uint_t page_colors; 6827c478bd9Sstevel@tonic-gate uint_t page_colors_mask; 6837c478bd9Sstevel@tonic-gate uint_t page_coloring_shift; 6847c478bd9Sstevel@tonic-gate int cpu_page_colors; 6857c478bd9Sstevel@tonic-gate static uint_t l2_colors; 6867c478bd9Sstevel@tonic-gate 6877c478bd9Sstevel@tonic-gate /* 6887c478bd9Sstevel@tonic-gate * Page freelists and cachelists are dynamically allocated once mnoderangecnt 6897c478bd9Sstevel@tonic-gate * and page_colors are calculated from the l2 cache n-way set size. Within a 6907c478bd9Sstevel@tonic-gate * mnode range, the page freelist and cachelist are hashed into bins based on 6917c478bd9Sstevel@tonic-gate * color. This makes it easier to search for a page within a specific memory 6927c478bd9Sstevel@tonic-gate * range. 6937c478bd9Sstevel@tonic-gate */ 6947c478bd9Sstevel@tonic-gate #define PAGE_COLORS_MIN 16 6957c478bd9Sstevel@tonic-gate 6967c478bd9Sstevel@tonic-gate page_t ****page_freelists; 6977c478bd9Sstevel@tonic-gate page_t ***page_cachelists; 6987c478bd9Sstevel@tonic-gate 6997c478bd9Sstevel@tonic-gate /* 7007c478bd9Sstevel@tonic-gate * As the PC architecture evolved memory up was clumped into several 7017c478bd9Sstevel@tonic-gate * ranges for various historical I/O devices to do DMA. 7027c478bd9Sstevel@tonic-gate * < 16Meg - ISA bus 7037c478bd9Sstevel@tonic-gate * < 2Gig - ??? 7047c478bd9Sstevel@tonic-gate * < 4Gig - PCI bus or drivers that don't understand PAE mode 7057c478bd9Sstevel@tonic-gate */ 7067c478bd9Sstevel@tonic-gate static pfn_t arch_memranges[NUM_MEM_RANGES] = { 7077c478bd9Sstevel@tonic-gate 0x100000, /* pfn range for 4G and above */ 7087c478bd9Sstevel@tonic-gate 0x80000, /* pfn range for 2G-4G */ 7097c478bd9Sstevel@tonic-gate 0x01000, /* pfn range for 16M-2G */ 7107c478bd9Sstevel@tonic-gate 0x00000, /* pfn range for 0-16M */ 7117c478bd9Sstevel@tonic-gate }; 7127c478bd9Sstevel@tonic-gate 7137c478bd9Sstevel@tonic-gate /* 7147c478bd9Sstevel@tonic-gate * These are changed during startup if the machine has limited memory. 7157c478bd9Sstevel@tonic-gate */ 7167c478bd9Sstevel@tonic-gate pfn_t *memranges = &arch_memranges[0]; 7177c478bd9Sstevel@tonic-gate int nranges = NUM_MEM_RANGES; 7187c478bd9Sstevel@tonic-gate 7197c478bd9Sstevel@tonic-gate /* 7207c478bd9Sstevel@tonic-gate * Used by page layer to know about page sizes 7217c478bd9Sstevel@tonic-gate */ 7227c478bd9Sstevel@tonic-gate hw_pagesize_t hw_page_array[MAX_NUM_LEVEL + 1]; 7237c478bd9Sstevel@tonic-gate 7247c478bd9Sstevel@tonic-gate /* 7257c478bd9Sstevel@tonic-gate * This can be patched via /etc/system to allow old non-PAE aware device 7267c478bd9Sstevel@tonic-gate * drivers to use kmem_alloc'd memory on 32 bit systems with > 4Gig RAM. 7277c478bd9Sstevel@tonic-gate */ 7287c478bd9Sstevel@tonic-gate #if defined(__i386) 729aa042c4bSkchow int restricted_kmemalloc = 0; 7307c478bd9Sstevel@tonic-gate #elif defined(__amd64) 7317c478bd9Sstevel@tonic-gate int restricted_kmemalloc = 0; 7327c478bd9Sstevel@tonic-gate #endif 7337c478bd9Sstevel@tonic-gate 7347c478bd9Sstevel@tonic-gate kmutex_t *fpc_mutex[NPC_MUTEX]; 7357c478bd9Sstevel@tonic-gate kmutex_t *cpc_mutex[NPC_MUTEX]; 7367c478bd9Sstevel@tonic-gate 7377c478bd9Sstevel@tonic-gate 7387c478bd9Sstevel@tonic-gate /* 7397c478bd9Sstevel@tonic-gate * return the memrange containing pfn 7407c478bd9Sstevel@tonic-gate */ 7417c478bd9Sstevel@tonic-gate int 7427c478bd9Sstevel@tonic-gate memrange_num(pfn_t pfn) 7437c478bd9Sstevel@tonic-gate { 7447c478bd9Sstevel@tonic-gate int n; 7457c478bd9Sstevel@tonic-gate 7467c478bd9Sstevel@tonic-gate for (n = 0; n < nranges - 1; ++n) { 7477c478bd9Sstevel@tonic-gate if (pfn >= memranges[n]) 7487c478bd9Sstevel@tonic-gate break; 7497c478bd9Sstevel@tonic-gate } 7507c478bd9Sstevel@tonic-gate return (n); 7517c478bd9Sstevel@tonic-gate } 7527c478bd9Sstevel@tonic-gate 7537c478bd9Sstevel@tonic-gate /* 7547c478bd9Sstevel@tonic-gate * return the mnoderange containing pfn 7557c478bd9Sstevel@tonic-gate */ 7567c478bd9Sstevel@tonic-gate int 7577c478bd9Sstevel@tonic-gate pfn_2_mtype(pfn_t pfn) 7587c478bd9Sstevel@tonic-gate { 7597c478bd9Sstevel@tonic-gate int n; 7607c478bd9Sstevel@tonic-gate 7617c478bd9Sstevel@tonic-gate for (n = mnoderangecnt - 1; n >= 0; n--) { 7627c478bd9Sstevel@tonic-gate if (pfn >= mnoderanges[n].mnr_pfnlo) { 7637c478bd9Sstevel@tonic-gate break; 7647c478bd9Sstevel@tonic-gate } 7657c478bd9Sstevel@tonic-gate } 7667c478bd9Sstevel@tonic-gate return (n); 7677c478bd9Sstevel@tonic-gate } 7687c478bd9Sstevel@tonic-gate 7697c478bd9Sstevel@tonic-gate /* 7707c478bd9Sstevel@tonic-gate * is_contigpage_free: 7717c478bd9Sstevel@tonic-gate * returns a page list of contiguous pages. It minimally has to return 7727c478bd9Sstevel@tonic-gate * minctg pages. Caller determines minctg based on the scatter-gather 7737c478bd9Sstevel@tonic-gate * list length. 7747c478bd9Sstevel@tonic-gate * 7757c478bd9Sstevel@tonic-gate * pfnp is set to the next page frame to search on return. 7767c478bd9Sstevel@tonic-gate */ 7777c478bd9Sstevel@tonic-gate static page_t * 7787c478bd9Sstevel@tonic-gate is_contigpage_free( 7797c478bd9Sstevel@tonic-gate pfn_t *pfnp, 7807c478bd9Sstevel@tonic-gate pgcnt_t *pgcnt, 7817c478bd9Sstevel@tonic-gate pgcnt_t minctg, 7827c478bd9Sstevel@tonic-gate uint64_t pfnseg, 7837c478bd9Sstevel@tonic-gate int iolock) 7847c478bd9Sstevel@tonic-gate { 7857c478bd9Sstevel@tonic-gate int i = 0; 7867c478bd9Sstevel@tonic-gate pfn_t pfn = *pfnp; 7877c478bd9Sstevel@tonic-gate page_t *pp; 7887c478bd9Sstevel@tonic-gate page_t *plist = NULL; 7897c478bd9Sstevel@tonic-gate 7907c478bd9Sstevel@tonic-gate /* 7917c478bd9Sstevel@tonic-gate * fail if pfn + minctg crosses a segment boundary. 7927c478bd9Sstevel@tonic-gate * Adjust for next starting pfn to begin at segment boundary. 7937c478bd9Sstevel@tonic-gate */ 7947c478bd9Sstevel@tonic-gate 7957c478bd9Sstevel@tonic-gate if (((*pfnp + minctg - 1) & pfnseg) < (*pfnp & pfnseg)) { 7967c478bd9Sstevel@tonic-gate *pfnp = roundup(*pfnp, pfnseg + 1); 7977c478bd9Sstevel@tonic-gate return (NULL); 7987c478bd9Sstevel@tonic-gate } 7997c478bd9Sstevel@tonic-gate 8007c478bd9Sstevel@tonic-gate do { 8017c478bd9Sstevel@tonic-gate retry: 8027c478bd9Sstevel@tonic-gate pp = page_numtopp_nolock(pfn + i); 8037c478bd9Sstevel@tonic-gate if ((pp == NULL) || 8047c478bd9Sstevel@tonic-gate (page_trylock(pp, SE_EXCL) == 0)) { 8057c478bd9Sstevel@tonic-gate (*pfnp)++; 8067c478bd9Sstevel@tonic-gate break; 8077c478bd9Sstevel@tonic-gate } 8087c478bd9Sstevel@tonic-gate if (page_pptonum(pp) != pfn + i) { 8097c478bd9Sstevel@tonic-gate page_unlock(pp); 8107c478bd9Sstevel@tonic-gate goto retry; 8117c478bd9Sstevel@tonic-gate } 8127c478bd9Sstevel@tonic-gate 8137c478bd9Sstevel@tonic-gate if (!(PP_ISFREE(pp))) { 8147c478bd9Sstevel@tonic-gate page_unlock(pp); 8157c478bd9Sstevel@tonic-gate (*pfnp)++; 8167c478bd9Sstevel@tonic-gate break; 8177c478bd9Sstevel@tonic-gate } 8187c478bd9Sstevel@tonic-gate 8197c478bd9Sstevel@tonic-gate if (!PP_ISAGED(pp)) { 8207c478bd9Sstevel@tonic-gate page_list_sub(pp, PG_CACHE_LIST); 8217c478bd9Sstevel@tonic-gate page_hashout(pp, (kmutex_t *)NULL); 8227c478bd9Sstevel@tonic-gate } else { 8237c478bd9Sstevel@tonic-gate page_list_sub(pp, PG_FREE_LIST); 8247c478bd9Sstevel@tonic-gate } 8257c478bd9Sstevel@tonic-gate 8267c478bd9Sstevel@tonic-gate if (iolock) 8277c478bd9Sstevel@tonic-gate page_io_lock(pp); 8287c478bd9Sstevel@tonic-gate page_list_concat(&plist, &pp); 8297c478bd9Sstevel@tonic-gate 8307c478bd9Sstevel@tonic-gate /* 8317c478bd9Sstevel@tonic-gate * exit loop when pgcnt satisfied or segment boundary reached. 8327c478bd9Sstevel@tonic-gate */ 8337c478bd9Sstevel@tonic-gate 8347c478bd9Sstevel@tonic-gate } while ((++i < *pgcnt) && ((pfn + i) & pfnseg)); 8357c478bd9Sstevel@tonic-gate 8367c478bd9Sstevel@tonic-gate *pfnp += i; /* set to next pfn to search */ 8377c478bd9Sstevel@tonic-gate 8387c478bd9Sstevel@tonic-gate if (i >= minctg) { 8397c478bd9Sstevel@tonic-gate *pgcnt -= i; 8407c478bd9Sstevel@tonic-gate return (plist); 8417c478bd9Sstevel@tonic-gate } 8427c478bd9Sstevel@tonic-gate 8437c478bd9Sstevel@tonic-gate /* 8447c478bd9Sstevel@tonic-gate * failure: minctg not satisfied. 8457c478bd9Sstevel@tonic-gate * 8467c478bd9Sstevel@tonic-gate * if next request crosses segment boundary, set next pfn 8477c478bd9Sstevel@tonic-gate * to search from the segment boundary. 8487c478bd9Sstevel@tonic-gate */ 8497c478bd9Sstevel@tonic-gate if (((*pfnp + minctg - 1) & pfnseg) < (*pfnp & pfnseg)) 8507c478bd9Sstevel@tonic-gate *pfnp = roundup(*pfnp, pfnseg + 1); 8517c478bd9Sstevel@tonic-gate 8527c478bd9Sstevel@tonic-gate /* clean up any pages already allocated */ 8537c478bd9Sstevel@tonic-gate 8547c478bd9Sstevel@tonic-gate while (plist) { 8557c478bd9Sstevel@tonic-gate pp = plist; 8567c478bd9Sstevel@tonic-gate page_sub(&plist, pp); 8577c478bd9Sstevel@tonic-gate page_list_add(pp, PG_FREE_LIST | PG_LIST_TAIL); 8587c478bd9Sstevel@tonic-gate if (iolock) 8597c478bd9Sstevel@tonic-gate page_io_unlock(pp); 8607c478bd9Sstevel@tonic-gate page_unlock(pp); 8617c478bd9Sstevel@tonic-gate } 8627c478bd9Sstevel@tonic-gate 8637c478bd9Sstevel@tonic-gate return (NULL); 8647c478bd9Sstevel@tonic-gate } 8657c478bd9Sstevel@tonic-gate 8667c478bd9Sstevel@tonic-gate /* 8677c478bd9Sstevel@tonic-gate * verify that pages being returned from allocator have correct DMA attribute 8687c478bd9Sstevel@tonic-gate */ 8697c478bd9Sstevel@tonic-gate #ifndef DEBUG 8707c478bd9Sstevel@tonic-gate #define check_dma(a, b, c) (0) 8717c478bd9Sstevel@tonic-gate #else 8727c478bd9Sstevel@tonic-gate static void 8737c478bd9Sstevel@tonic-gate check_dma(ddi_dma_attr_t *dma_attr, page_t *pp, int cnt) 8747c478bd9Sstevel@tonic-gate { 8757c478bd9Sstevel@tonic-gate if (dma_attr == NULL) 8767c478bd9Sstevel@tonic-gate return; 8777c478bd9Sstevel@tonic-gate 8787c478bd9Sstevel@tonic-gate while (cnt-- > 0) { 879*ae115bc7Smrj if (pa_to_ma(pfn_to_pa(pp->p_pagenum)) < 8807c478bd9Sstevel@tonic-gate dma_attr->dma_attr_addr_lo) 8817c478bd9Sstevel@tonic-gate panic("PFN (pp=%p) below dma_attr_addr_lo", pp); 882*ae115bc7Smrj if (pa_to_ma(pfn_to_pa(pp->p_pagenum)) >= 8837c478bd9Sstevel@tonic-gate dma_attr->dma_attr_addr_hi) 8847c478bd9Sstevel@tonic-gate panic("PFN (pp=%p) above dma_attr_addr_hi", pp); 8857c478bd9Sstevel@tonic-gate pp = pp->p_next; 8867c478bd9Sstevel@tonic-gate } 8877c478bd9Sstevel@tonic-gate } 8887c478bd9Sstevel@tonic-gate #endif 8897c478bd9Sstevel@tonic-gate 8907c478bd9Sstevel@tonic-gate static kmutex_t contig_lock; 8917c478bd9Sstevel@tonic-gate 8927c478bd9Sstevel@tonic-gate #define CONTIG_LOCK() mutex_enter(&contig_lock); 8937c478bd9Sstevel@tonic-gate #define CONTIG_UNLOCK() mutex_exit(&contig_lock); 8947c478bd9Sstevel@tonic-gate 8957c478bd9Sstevel@tonic-gate #define PFN_16M (mmu_btop((uint64_t)0x1000000)) 8967c478bd9Sstevel@tonic-gate 8977c478bd9Sstevel@tonic-gate static page_t * 8987c478bd9Sstevel@tonic-gate page_get_contigpage(pgcnt_t *pgcnt, ddi_dma_attr_t *mattr, int iolock) 8997c478bd9Sstevel@tonic-gate { 9007c478bd9Sstevel@tonic-gate pfn_t pfn; 9017c478bd9Sstevel@tonic-gate int sgllen; 9027c478bd9Sstevel@tonic-gate uint64_t pfnseg; 9037c478bd9Sstevel@tonic-gate pgcnt_t minctg; 9047c478bd9Sstevel@tonic-gate page_t *pplist = NULL, *plist; 9057c478bd9Sstevel@tonic-gate uint64_t lo, hi; 9067c478bd9Sstevel@tonic-gate pgcnt_t pfnalign = 0; 9077c478bd9Sstevel@tonic-gate static pfn_t startpfn; 9087c478bd9Sstevel@tonic-gate static pgcnt_t lastctgcnt; 9097c478bd9Sstevel@tonic-gate uintptr_t align; 9107c478bd9Sstevel@tonic-gate 9117c478bd9Sstevel@tonic-gate CONTIG_LOCK(); 9127c478bd9Sstevel@tonic-gate 9137c478bd9Sstevel@tonic-gate if (mattr) { 9147c478bd9Sstevel@tonic-gate lo = mmu_btop((mattr->dma_attr_addr_lo + MMU_PAGEOFFSET)); 9157c478bd9Sstevel@tonic-gate hi = mmu_btop(mattr->dma_attr_addr_hi); 9167c478bd9Sstevel@tonic-gate if (hi >= physmax) 9177c478bd9Sstevel@tonic-gate hi = physmax - 1; 9187c478bd9Sstevel@tonic-gate sgllen = mattr->dma_attr_sgllen; 9197c478bd9Sstevel@tonic-gate pfnseg = mmu_btop(mattr->dma_attr_seg); 9207c478bd9Sstevel@tonic-gate 9217c478bd9Sstevel@tonic-gate align = maxbit(mattr->dma_attr_align, mattr->dma_attr_minxfer); 9227c478bd9Sstevel@tonic-gate if (align > MMU_PAGESIZE) 9237c478bd9Sstevel@tonic-gate pfnalign = mmu_btop(align); 9247c478bd9Sstevel@tonic-gate 9257c478bd9Sstevel@tonic-gate /* 9267c478bd9Sstevel@tonic-gate * in order to satisfy the request, must minimally 9277c478bd9Sstevel@tonic-gate * acquire minctg contiguous pages 9287c478bd9Sstevel@tonic-gate */ 9297c478bd9Sstevel@tonic-gate minctg = howmany(*pgcnt, sgllen); 9307c478bd9Sstevel@tonic-gate 9317c478bd9Sstevel@tonic-gate ASSERT(hi >= lo); 9327c478bd9Sstevel@tonic-gate 9337c478bd9Sstevel@tonic-gate /* 9347c478bd9Sstevel@tonic-gate * start from where last searched if the minctg >= lastctgcnt 9357c478bd9Sstevel@tonic-gate */ 9367c478bd9Sstevel@tonic-gate if (minctg < lastctgcnt || startpfn < lo || startpfn > hi) 9377c478bd9Sstevel@tonic-gate startpfn = lo; 9387c478bd9Sstevel@tonic-gate } else { 9397c478bd9Sstevel@tonic-gate hi = physmax - 1; 9407c478bd9Sstevel@tonic-gate lo = 0; 9417c478bd9Sstevel@tonic-gate sgllen = 1; 9427c478bd9Sstevel@tonic-gate pfnseg = mmu.highest_pfn; 9437c478bd9Sstevel@tonic-gate minctg = *pgcnt; 9447c478bd9Sstevel@tonic-gate 9457c478bd9Sstevel@tonic-gate if (minctg < lastctgcnt) 9467c478bd9Sstevel@tonic-gate startpfn = lo; 9477c478bd9Sstevel@tonic-gate } 9487c478bd9Sstevel@tonic-gate lastctgcnt = minctg; 9497c478bd9Sstevel@tonic-gate 9507c478bd9Sstevel@tonic-gate ASSERT(pfnseg + 1 >= (uint64_t)minctg); 9517c478bd9Sstevel@tonic-gate 9527c478bd9Sstevel@tonic-gate /* conserve 16m memory - start search above 16m when possible */ 9537c478bd9Sstevel@tonic-gate if (hi > PFN_16M && startpfn < PFN_16M) 9547c478bd9Sstevel@tonic-gate startpfn = PFN_16M; 9557c478bd9Sstevel@tonic-gate 9567c478bd9Sstevel@tonic-gate pfn = startpfn; 9577c478bd9Sstevel@tonic-gate if (pfnalign) 9587c478bd9Sstevel@tonic-gate pfn = P2ROUNDUP(pfn, pfnalign); 9597c478bd9Sstevel@tonic-gate 9607c478bd9Sstevel@tonic-gate while (pfn + minctg - 1 <= hi) { 9617c478bd9Sstevel@tonic-gate 9627c478bd9Sstevel@tonic-gate plist = is_contigpage_free(&pfn, pgcnt, minctg, pfnseg, iolock); 9637c478bd9Sstevel@tonic-gate if (plist) { 9647c478bd9Sstevel@tonic-gate page_list_concat(&pplist, &plist); 9657c478bd9Sstevel@tonic-gate sgllen--; 9667c478bd9Sstevel@tonic-gate /* 9677c478bd9Sstevel@tonic-gate * return when contig pages no longer needed 9687c478bd9Sstevel@tonic-gate */ 9697c478bd9Sstevel@tonic-gate if (!*pgcnt || ((*pgcnt <= sgllen) && !pfnalign)) { 9707c478bd9Sstevel@tonic-gate startpfn = pfn; 9717c478bd9Sstevel@tonic-gate CONTIG_UNLOCK(); 9727c478bd9Sstevel@tonic-gate check_dma(mattr, pplist, *pgcnt); 9737c478bd9Sstevel@tonic-gate return (pplist); 9747c478bd9Sstevel@tonic-gate } 9757c478bd9Sstevel@tonic-gate minctg = howmany(*pgcnt, sgllen); 9767c478bd9Sstevel@tonic-gate } 9777c478bd9Sstevel@tonic-gate if (pfnalign) 9787c478bd9Sstevel@tonic-gate pfn = P2ROUNDUP(pfn, pfnalign); 9797c478bd9Sstevel@tonic-gate } 9807c478bd9Sstevel@tonic-gate 9817c478bd9Sstevel@tonic-gate /* cannot find contig pages in specified range */ 9827c478bd9Sstevel@tonic-gate if (startpfn == lo) { 9837c478bd9Sstevel@tonic-gate CONTIG_UNLOCK(); 9847c478bd9Sstevel@tonic-gate return (NULL); 9857c478bd9Sstevel@tonic-gate } 9867c478bd9Sstevel@tonic-gate 9877c478bd9Sstevel@tonic-gate /* did not start with lo previously */ 9887c478bd9Sstevel@tonic-gate pfn = lo; 9897c478bd9Sstevel@tonic-gate if (pfnalign) 9907c478bd9Sstevel@tonic-gate pfn = P2ROUNDUP(pfn, pfnalign); 9917c478bd9Sstevel@tonic-gate 9927c478bd9Sstevel@tonic-gate /* allow search to go above startpfn */ 9937c478bd9Sstevel@tonic-gate while (pfn < startpfn) { 9947c478bd9Sstevel@tonic-gate 9957c478bd9Sstevel@tonic-gate plist = is_contigpage_free(&pfn, pgcnt, minctg, pfnseg, iolock); 9967c478bd9Sstevel@tonic-gate if (plist != NULL) { 9977c478bd9Sstevel@tonic-gate 9987c478bd9Sstevel@tonic-gate page_list_concat(&pplist, &plist); 9997c478bd9Sstevel@tonic-gate sgllen--; 10007c478bd9Sstevel@tonic-gate 10017c478bd9Sstevel@tonic-gate /* 10027c478bd9Sstevel@tonic-gate * return when contig pages no longer needed 10037c478bd9Sstevel@tonic-gate */ 10047c478bd9Sstevel@tonic-gate if (!*pgcnt || ((*pgcnt <= sgllen) && !pfnalign)) { 10057c478bd9Sstevel@tonic-gate startpfn = pfn; 10067c478bd9Sstevel@tonic-gate CONTIG_UNLOCK(); 10077c478bd9Sstevel@tonic-gate check_dma(mattr, pplist, *pgcnt); 10087c478bd9Sstevel@tonic-gate return (pplist); 10097c478bd9Sstevel@tonic-gate } 10107c478bd9Sstevel@tonic-gate minctg = howmany(*pgcnt, sgllen); 10117c478bd9Sstevel@tonic-gate } 10127c478bd9Sstevel@tonic-gate if (pfnalign) 10137c478bd9Sstevel@tonic-gate pfn = P2ROUNDUP(pfn, pfnalign); 10147c478bd9Sstevel@tonic-gate } 10157c478bd9Sstevel@tonic-gate CONTIG_UNLOCK(); 10167c478bd9Sstevel@tonic-gate return (NULL); 10177c478bd9Sstevel@tonic-gate } 10187c478bd9Sstevel@tonic-gate 10197c478bd9Sstevel@tonic-gate /* 10207c478bd9Sstevel@tonic-gate * combine mem_node_config and memrange memory ranges into one data 10217c478bd9Sstevel@tonic-gate * structure to be used for page list management. 10227c478bd9Sstevel@tonic-gate * 10237c478bd9Sstevel@tonic-gate * mnode_range_cnt() calculates the number of memory ranges for mnode and 10247c478bd9Sstevel@tonic-gate * memranges[]. Used to determine the size of page lists and mnoderanges. 10257c478bd9Sstevel@tonic-gate * 10267c478bd9Sstevel@tonic-gate * mnode_range_setup() initializes mnoderanges. 10277c478bd9Sstevel@tonic-gate */ 10287c478bd9Sstevel@tonic-gate mnoderange_t *mnoderanges; 10297c478bd9Sstevel@tonic-gate int mnoderangecnt; 10307c478bd9Sstevel@tonic-gate int mtype4g; 10317c478bd9Sstevel@tonic-gate 10327c478bd9Sstevel@tonic-gate int 10335d07b933Sdp mnode_range_cnt(int mnode) 10347c478bd9Sstevel@tonic-gate { 10357c478bd9Sstevel@tonic-gate int mri; 10367c478bd9Sstevel@tonic-gate int mnrcnt = 0; 10377c478bd9Sstevel@tonic-gate 10385d07b933Sdp if (mem_node_config[mnode].exists != 0) { 10397c478bd9Sstevel@tonic-gate mri = nranges - 1; 10407c478bd9Sstevel@tonic-gate 10417c478bd9Sstevel@tonic-gate /* find the memranges index below contained in mnode range */ 10427c478bd9Sstevel@tonic-gate 10437c478bd9Sstevel@tonic-gate while (MEMRANGEHI(mri) < mem_node_config[mnode].physbase) 10447c478bd9Sstevel@tonic-gate mri--; 10457c478bd9Sstevel@tonic-gate 10467c478bd9Sstevel@tonic-gate /* 10477c478bd9Sstevel@tonic-gate * increment mnode range counter when memranges or mnode 10487c478bd9Sstevel@tonic-gate * boundary is reached. 10497c478bd9Sstevel@tonic-gate */ 10507c478bd9Sstevel@tonic-gate while (mri >= 0 && 10517c478bd9Sstevel@tonic-gate mem_node_config[mnode].physmax >= MEMRANGELO(mri)) { 10527c478bd9Sstevel@tonic-gate mnrcnt++; 10537c478bd9Sstevel@tonic-gate if (mem_node_config[mnode].physmax > MEMRANGEHI(mri)) 10547c478bd9Sstevel@tonic-gate mri--; 10557c478bd9Sstevel@tonic-gate else 10567c478bd9Sstevel@tonic-gate break; 10577c478bd9Sstevel@tonic-gate } 10587c478bd9Sstevel@tonic-gate } 10595d07b933Sdp ASSERT(mnrcnt <= MAX_MNODE_MRANGES); 10607c478bd9Sstevel@tonic-gate return (mnrcnt); 10617c478bd9Sstevel@tonic-gate } 10627c478bd9Sstevel@tonic-gate 10637c478bd9Sstevel@tonic-gate void 10647c478bd9Sstevel@tonic-gate mnode_range_setup(mnoderange_t *mnoderanges) 10657c478bd9Sstevel@tonic-gate { 10667c478bd9Sstevel@tonic-gate int mnode, mri; 10677c478bd9Sstevel@tonic-gate 10687c478bd9Sstevel@tonic-gate for (mnode = 0; mnode < max_mem_nodes; mnode++) { 10697c478bd9Sstevel@tonic-gate if (mem_node_config[mnode].exists == 0) 10707c478bd9Sstevel@tonic-gate continue; 10717c478bd9Sstevel@tonic-gate 10727c478bd9Sstevel@tonic-gate mri = nranges - 1; 10737c478bd9Sstevel@tonic-gate 10747c478bd9Sstevel@tonic-gate while (MEMRANGEHI(mri) < mem_node_config[mnode].physbase) 10757c478bd9Sstevel@tonic-gate mri--; 10767c478bd9Sstevel@tonic-gate 10777c478bd9Sstevel@tonic-gate while (mri >= 0 && mem_node_config[mnode].physmax >= 10787c478bd9Sstevel@tonic-gate MEMRANGELO(mri)) { 10797c478bd9Sstevel@tonic-gate mnoderanges->mnr_pfnlo = 10807c478bd9Sstevel@tonic-gate MAX(MEMRANGELO(mri), 10817c478bd9Sstevel@tonic-gate mem_node_config[mnode].physbase); 10827c478bd9Sstevel@tonic-gate mnoderanges->mnr_pfnhi = 10837c478bd9Sstevel@tonic-gate MIN(MEMRANGEHI(mri), 10847c478bd9Sstevel@tonic-gate mem_node_config[mnode].physmax); 10857c478bd9Sstevel@tonic-gate mnoderanges->mnr_mnode = mnode; 10867c478bd9Sstevel@tonic-gate mnoderanges->mnr_memrange = mri; 10877c478bd9Sstevel@tonic-gate mnoderanges++; 10887c478bd9Sstevel@tonic-gate if (mem_node_config[mnode].physmax > MEMRANGEHI(mri)) 10897c478bd9Sstevel@tonic-gate mri--; 10907c478bd9Sstevel@tonic-gate else 10917c478bd9Sstevel@tonic-gate break; 10927c478bd9Sstevel@tonic-gate } 10937c478bd9Sstevel@tonic-gate } 10947c478bd9Sstevel@tonic-gate } 10957c478bd9Sstevel@tonic-gate 10967c478bd9Sstevel@tonic-gate /* 10977c478bd9Sstevel@tonic-gate * Determine if the mnode range specified in mtype contains memory belonging 10987c478bd9Sstevel@tonic-gate * to memory node mnode. If flags & PGI_MT_RANGE is set then mtype contains 109907ad560dSkchow * the range of indices from high pfn to 0, 16m or 4g. 11007c478bd9Sstevel@tonic-gate * 11017c478bd9Sstevel@tonic-gate * Return first mnode range type index found otherwise return -1 if none found. 11027c478bd9Sstevel@tonic-gate */ 11037c478bd9Sstevel@tonic-gate int 11047c478bd9Sstevel@tonic-gate mtype_func(int mnode, int mtype, uint_t flags) 11057c478bd9Sstevel@tonic-gate { 11067c478bd9Sstevel@tonic-gate if (flags & PGI_MT_RANGE) { 110707ad560dSkchow int mtlim; 11087c478bd9Sstevel@tonic-gate 11097c478bd9Sstevel@tonic-gate if (flags & PGI_MT_NEXT) 11107c478bd9Sstevel@tonic-gate mtype--; 111107ad560dSkchow if (flags & PGI_MT_RANGE0) 111207ad560dSkchow mtlim = 0; 111307ad560dSkchow else if (flags & PGI_MT_RANGE4G) 111407ad560dSkchow mtlim = mtype4g + 1; /* exclude 0-4g range */ 111507ad560dSkchow else if (flags & PGI_MT_RANGE16M) 111607ad560dSkchow mtlim = 1; /* exclude 0-16m range */ 11177c478bd9Sstevel@tonic-gate while (mtype >= mtlim) { 11187c478bd9Sstevel@tonic-gate if (mnoderanges[mtype].mnr_mnode == mnode) 11197c478bd9Sstevel@tonic-gate return (mtype); 11207c478bd9Sstevel@tonic-gate mtype--; 11217c478bd9Sstevel@tonic-gate } 11227c478bd9Sstevel@tonic-gate } else { 11237c478bd9Sstevel@tonic-gate if (mnoderanges[mtype].mnr_mnode == mnode) 11247c478bd9Sstevel@tonic-gate return (mtype); 11257c478bd9Sstevel@tonic-gate } 11267c478bd9Sstevel@tonic-gate return (-1); 11277c478bd9Sstevel@tonic-gate } 11287c478bd9Sstevel@tonic-gate 1129e21bae1bSkchow /* 1130e21bae1bSkchow * Update the page list max counts with the pfn range specified by the 1131e21bae1bSkchow * input parameters. Called from add_physmem() when physical memory with 1132e21bae1bSkchow * page_t's are initially added to the page lists. 1133e21bae1bSkchow */ 1134e21bae1bSkchow void 1135e21bae1bSkchow mtype_modify_max(pfn_t startpfn, long cnt) 1136e21bae1bSkchow { 1137e21bae1bSkchow int mtype = 0; 1138e21bae1bSkchow pfn_t endpfn = startpfn + cnt, pfn; 1139e21bae1bSkchow pgcnt_t inc; 1140e21bae1bSkchow 1141e21bae1bSkchow ASSERT(cnt > 0); 1142e21bae1bSkchow 1143e21bae1bSkchow for (pfn = startpfn; pfn < endpfn; ) { 1144e21bae1bSkchow if (pfn <= mnoderanges[mtype].mnr_pfnhi) { 1145e21bae1bSkchow if (endpfn < mnoderanges[mtype].mnr_pfnhi) { 1146e21bae1bSkchow inc = endpfn - pfn; 1147e21bae1bSkchow } else { 1148e21bae1bSkchow inc = mnoderanges[mtype].mnr_pfnhi - pfn + 1; 1149e21bae1bSkchow } 1150e21bae1bSkchow mnoderanges[mtype].mnr_mt_pgmax += inc; 1151e21bae1bSkchow if (physmax4g && mtype <= mtype4g) 1152e21bae1bSkchow maxmem4g += inc; 1153e21bae1bSkchow pfn += inc; 1154e21bae1bSkchow } 1155e21bae1bSkchow mtype++; 1156e21bae1bSkchow ASSERT(mtype < mnoderangecnt || pfn >= endpfn); 1157e21bae1bSkchow } 1158e21bae1bSkchow } 1159e21bae1bSkchow 1160affbd3ccSkchow /* 1161affbd3ccSkchow * Returns the free page count for mnode 1162affbd3ccSkchow */ 1163affbd3ccSkchow int 1164affbd3ccSkchow mnode_pgcnt(int mnode) 1165affbd3ccSkchow { 1166affbd3ccSkchow int mtype = mnoderangecnt - 1; 1167affbd3ccSkchow int flags = PGI_MT_RANGE0; 1168affbd3ccSkchow pgcnt_t pgcnt = 0; 1169affbd3ccSkchow 1170affbd3ccSkchow mtype = mtype_func(mnode, mtype, flags); 1171affbd3ccSkchow 1172affbd3ccSkchow while (mtype != -1) { 117307ad560dSkchow pgcnt += MTYPE_FREEMEM(mtype); 1174affbd3ccSkchow mtype = mtype_func(mnode, mtype, flags | PGI_MT_NEXT); 1175affbd3ccSkchow } 1176affbd3ccSkchow return (pgcnt); 1177affbd3ccSkchow } 1178affbd3ccSkchow 11797c478bd9Sstevel@tonic-gate /* 11807c478bd9Sstevel@tonic-gate * Initialize page coloring variables based on the l2 cache parameters. 11817c478bd9Sstevel@tonic-gate * Calculate and return memory needed for page coloring data structures. 11827c478bd9Sstevel@tonic-gate */ 11837c478bd9Sstevel@tonic-gate size_t 11847c478bd9Sstevel@tonic-gate page_coloring_init(uint_t l2_sz, int l2_linesz, int l2_assoc) 11857c478bd9Sstevel@tonic-gate { 11867c478bd9Sstevel@tonic-gate size_t colorsz = 0; 11877c478bd9Sstevel@tonic-gate int i; 11887c478bd9Sstevel@tonic-gate int colors; 11897c478bd9Sstevel@tonic-gate 11907c478bd9Sstevel@tonic-gate /* 11917c478bd9Sstevel@tonic-gate * Reduce the memory ranges lists if we don't have large amounts 11927c478bd9Sstevel@tonic-gate * of memory. This avoids searching known empty free lists. 11937c478bd9Sstevel@tonic-gate */ 11947c478bd9Sstevel@tonic-gate i = memrange_num(physmax); 11957c478bd9Sstevel@tonic-gate memranges += i; 11967c478bd9Sstevel@tonic-gate nranges -= i; 11977c478bd9Sstevel@tonic-gate #if defined(__i386) 11987c478bd9Sstevel@tonic-gate if (i > 0) 11997c478bd9Sstevel@tonic-gate restricted_kmemalloc = 0; 12007c478bd9Sstevel@tonic-gate #endif 12017c478bd9Sstevel@tonic-gate /* physmax greater than 4g */ 12027c478bd9Sstevel@tonic-gate if (i == 0) 12037c478bd9Sstevel@tonic-gate physmax4g = 1; 12047c478bd9Sstevel@tonic-gate 12057c478bd9Sstevel@tonic-gate ASSERT(ISP2(l2_sz)); 12067c478bd9Sstevel@tonic-gate ASSERT(ISP2(l2_linesz)); 12077c478bd9Sstevel@tonic-gate ASSERT(l2_sz > MMU_PAGESIZE); 12087c478bd9Sstevel@tonic-gate 12097c478bd9Sstevel@tonic-gate /* l2_assoc is 0 for fully associative l2 cache */ 12107c478bd9Sstevel@tonic-gate if (l2_assoc) 12117c478bd9Sstevel@tonic-gate l2_colors = MAX(1, l2_sz / (l2_assoc * MMU_PAGESIZE)); 12127c478bd9Sstevel@tonic-gate else 12137c478bd9Sstevel@tonic-gate l2_colors = 1; 12147c478bd9Sstevel@tonic-gate 12157c478bd9Sstevel@tonic-gate /* for scalability, configure at least PAGE_COLORS_MIN color bins */ 12167c478bd9Sstevel@tonic-gate page_colors = MAX(l2_colors, PAGE_COLORS_MIN); 12177c478bd9Sstevel@tonic-gate 12187c478bd9Sstevel@tonic-gate /* 12197c478bd9Sstevel@tonic-gate * cpu_page_colors is non-zero when a page color may be spread across 12207c478bd9Sstevel@tonic-gate * multiple bins. 12217c478bd9Sstevel@tonic-gate */ 12227c478bd9Sstevel@tonic-gate if (l2_colors < page_colors) 12237c478bd9Sstevel@tonic-gate cpu_page_colors = l2_colors; 12247c478bd9Sstevel@tonic-gate 12257c478bd9Sstevel@tonic-gate ASSERT(ISP2(page_colors)); 12267c478bd9Sstevel@tonic-gate 12277c478bd9Sstevel@tonic-gate page_colors_mask = page_colors - 1; 12287c478bd9Sstevel@tonic-gate 12297c478bd9Sstevel@tonic-gate ASSERT(ISP2(CPUSETSIZE())); 12307c478bd9Sstevel@tonic-gate page_coloring_shift = lowbit(CPUSETSIZE()); 12317c478bd9Sstevel@tonic-gate 12325d07b933Sdp /* initialize number of colors per page size */ 12335d07b933Sdp for (i = 0; i <= mmu.max_page_level; i++) { 12345d07b933Sdp hw_page_array[i].hp_size = LEVEL_SIZE(i); 12355d07b933Sdp hw_page_array[i].hp_shift = LEVEL_SHIFT(i); 12365d07b933Sdp hw_page_array[i].hp_pgcnt = LEVEL_SIZE(i) >> LEVEL_SHIFT(0); 12375d07b933Sdp hw_page_array[i].hp_colors = (page_colors_mask >> 12385d07b933Sdp (hw_page_array[i].hp_shift - hw_page_array[0].hp_shift)) 12395d07b933Sdp + 1; 12405d07b933Sdp } 12415d07b933Sdp 12425d07b933Sdp /* 12435d07b933Sdp * The value of cpu_page_colors determines if additional color bins 12445d07b933Sdp * need to be checked for a particular color in the page_get routines. 12455d07b933Sdp */ 12465d07b933Sdp if (cpu_page_colors != 0) { 12475d07b933Sdp 12485d07b933Sdp int a = lowbit(page_colors) - lowbit(cpu_page_colors); 12495d07b933Sdp ASSERT(a > 0); 12505d07b933Sdp ASSERT(a < 16); 12515d07b933Sdp 12525d07b933Sdp for (i = 0; i <= mmu.max_page_level; i++) { 12535d07b933Sdp if ((colors = hw_page_array[i].hp_colors) <= 1) { 12545d07b933Sdp colorequivszc[i] = 0; 12555d07b933Sdp continue; 12565d07b933Sdp } 12575d07b933Sdp while ((colors >> a) == 0) 12585d07b933Sdp a--; 12595d07b933Sdp ASSERT(a >= 0); 12605d07b933Sdp 12615d07b933Sdp /* higher 4 bits encodes color equiv mask */ 12625d07b933Sdp colorequivszc[i] = (a << 4); 12635d07b933Sdp } 12645d07b933Sdp } 12655d07b933Sdp 12665d07b933Sdp /* factor in colorequiv to check additional 'equivalent' bins. */ 12675d07b933Sdp if (colorequiv > 1) { 12685d07b933Sdp 12695d07b933Sdp int a = lowbit(colorequiv) - 1; 12705d07b933Sdp if (a > 15) 12715d07b933Sdp a = 15; 12725d07b933Sdp 12735d07b933Sdp for (i = 0; i <= mmu.max_page_level; i++) { 12745d07b933Sdp if ((colors = hw_page_array[i].hp_colors) <= 1) { 12755d07b933Sdp continue; 12765d07b933Sdp } 12775d07b933Sdp while ((colors >> a) == 0) 12785d07b933Sdp a--; 12795d07b933Sdp if ((a << 4) > colorequivszc[i]) { 12805d07b933Sdp colorequivszc[i] = (a << 4); 12815d07b933Sdp } 12825d07b933Sdp } 12835d07b933Sdp } 12845d07b933Sdp 12857c478bd9Sstevel@tonic-gate /* size for mnoderanges */ 12865d07b933Sdp for (mnoderangecnt = 0, i = 0; i < max_mem_nodes; i++) 12875d07b933Sdp mnoderangecnt += mnode_range_cnt(i); 12887c478bd9Sstevel@tonic-gate colorsz = mnoderangecnt * sizeof (mnoderange_t); 12897c478bd9Sstevel@tonic-gate 12907c478bd9Sstevel@tonic-gate /* size for fpc_mutex and cpc_mutex */ 12917c478bd9Sstevel@tonic-gate colorsz += (2 * max_mem_nodes * sizeof (kmutex_t) * NPC_MUTEX); 12927c478bd9Sstevel@tonic-gate 12937c478bd9Sstevel@tonic-gate /* size of page_freelists */ 12947c478bd9Sstevel@tonic-gate colorsz += mnoderangecnt * sizeof (page_t ***); 12957c478bd9Sstevel@tonic-gate colorsz += mnoderangecnt * mmu_page_sizes * sizeof (page_t **); 12967c478bd9Sstevel@tonic-gate 12977c478bd9Sstevel@tonic-gate for (i = 0; i < mmu_page_sizes; i++) { 12987c478bd9Sstevel@tonic-gate colors = page_get_pagecolors(i); 12997c478bd9Sstevel@tonic-gate colorsz += mnoderangecnt * colors * sizeof (page_t *); 13007c478bd9Sstevel@tonic-gate } 13017c478bd9Sstevel@tonic-gate 13027c478bd9Sstevel@tonic-gate /* size of page_cachelists */ 13037c478bd9Sstevel@tonic-gate colorsz += mnoderangecnt * sizeof (page_t **); 13047c478bd9Sstevel@tonic-gate colorsz += mnoderangecnt * page_colors * sizeof (page_t *); 13057c478bd9Sstevel@tonic-gate 13067c478bd9Sstevel@tonic-gate return (colorsz); 13077c478bd9Sstevel@tonic-gate } 13087c478bd9Sstevel@tonic-gate 13097c478bd9Sstevel@tonic-gate /* 13107c478bd9Sstevel@tonic-gate * Called once at startup to configure page_coloring data structures and 13117c478bd9Sstevel@tonic-gate * does the 1st page_free()/page_freelist_add(). 13127c478bd9Sstevel@tonic-gate */ 13137c478bd9Sstevel@tonic-gate void 13147c478bd9Sstevel@tonic-gate page_coloring_setup(caddr_t pcmemaddr) 13157c478bd9Sstevel@tonic-gate { 13167c478bd9Sstevel@tonic-gate int i; 13177c478bd9Sstevel@tonic-gate int j; 13187c478bd9Sstevel@tonic-gate int k; 13197c478bd9Sstevel@tonic-gate caddr_t addr; 13207c478bd9Sstevel@tonic-gate int colors; 13217c478bd9Sstevel@tonic-gate 13227c478bd9Sstevel@tonic-gate /* 13237c478bd9Sstevel@tonic-gate * do page coloring setup 13247c478bd9Sstevel@tonic-gate */ 13257c478bd9Sstevel@tonic-gate addr = pcmemaddr; 13267c478bd9Sstevel@tonic-gate 13277c478bd9Sstevel@tonic-gate mnoderanges = (mnoderange_t *)addr; 13287c478bd9Sstevel@tonic-gate addr += (mnoderangecnt * sizeof (mnoderange_t)); 13297c478bd9Sstevel@tonic-gate 13307c478bd9Sstevel@tonic-gate mnode_range_setup(mnoderanges); 13317c478bd9Sstevel@tonic-gate 13327c478bd9Sstevel@tonic-gate if (physmax4g) 13337c478bd9Sstevel@tonic-gate mtype4g = pfn_2_mtype(0xfffff); 13347c478bd9Sstevel@tonic-gate 13357c478bd9Sstevel@tonic-gate for (k = 0; k < NPC_MUTEX; k++) { 13367c478bd9Sstevel@tonic-gate fpc_mutex[k] = (kmutex_t *)addr; 13377c478bd9Sstevel@tonic-gate addr += (max_mem_nodes * sizeof (kmutex_t)); 13387c478bd9Sstevel@tonic-gate } 13397c478bd9Sstevel@tonic-gate for (k = 0; k < NPC_MUTEX; k++) { 13407c478bd9Sstevel@tonic-gate cpc_mutex[k] = (kmutex_t *)addr; 13417c478bd9Sstevel@tonic-gate addr += (max_mem_nodes * sizeof (kmutex_t)); 13427c478bd9Sstevel@tonic-gate } 13437c478bd9Sstevel@tonic-gate page_freelists = (page_t ****)addr; 13447c478bd9Sstevel@tonic-gate addr += (mnoderangecnt * sizeof (page_t ***)); 13457c478bd9Sstevel@tonic-gate 13467c478bd9Sstevel@tonic-gate page_cachelists = (page_t ***)addr; 13477c478bd9Sstevel@tonic-gate addr += (mnoderangecnt * sizeof (page_t **)); 13487c478bd9Sstevel@tonic-gate 13497c478bd9Sstevel@tonic-gate for (i = 0; i < mnoderangecnt; i++) { 13507c478bd9Sstevel@tonic-gate page_freelists[i] = (page_t ***)addr; 13517c478bd9Sstevel@tonic-gate addr += (mmu_page_sizes * sizeof (page_t **)); 13527c478bd9Sstevel@tonic-gate 13537c478bd9Sstevel@tonic-gate for (j = 0; j < mmu_page_sizes; j++) { 13547c478bd9Sstevel@tonic-gate colors = page_get_pagecolors(j); 13557c478bd9Sstevel@tonic-gate page_freelists[i][j] = (page_t **)addr; 13567c478bd9Sstevel@tonic-gate addr += (colors * sizeof (page_t *)); 13577c478bd9Sstevel@tonic-gate } 13587c478bd9Sstevel@tonic-gate page_cachelists[i] = (page_t **)addr; 13597c478bd9Sstevel@tonic-gate addr += (page_colors * sizeof (page_t *)); 13607c478bd9Sstevel@tonic-gate } 13617c478bd9Sstevel@tonic-gate } 13627c478bd9Sstevel@tonic-gate 13637c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 13647c478bd9Sstevel@tonic-gate int 13657c478bd9Sstevel@tonic-gate bp_color(struct buf *bp) 13667c478bd9Sstevel@tonic-gate { 13677c478bd9Sstevel@tonic-gate return (0); 13687c478bd9Sstevel@tonic-gate } 13697c478bd9Sstevel@tonic-gate 13707c478bd9Sstevel@tonic-gate /* 13717c478bd9Sstevel@tonic-gate * get a page from any list with the given mnode 13727c478bd9Sstevel@tonic-gate */ 13737c478bd9Sstevel@tonic-gate page_t * 13747c478bd9Sstevel@tonic-gate page_get_mnode_anylist(ulong_t origbin, uchar_t szc, uint_t flags, 13757c478bd9Sstevel@tonic-gate int mnode, int mtype, ddi_dma_attr_t *dma_attr) 13767c478bd9Sstevel@tonic-gate { 13775d07b933Sdp kmutex_t *pcm; 13785d07b933Sdp int i; 13795d07b933Sdp page_t *pp; 13805d07b933Sdp page_t *first_pp; 13815d07b933Sdp uint64_t pgaddr; 13825d07b933Sdp ulong_t bin; 13835d07b933Sdp int mtypestart; 13845d07b933Sdp int plw_initialized; 13855d07b933Sdp page_list_walker_t plw; 13867c478bd9Sstevel@tonic-gate 13877c478bd9Sstevel@tonic-gate VM_STAT_ADD(pga_vmstats.pgma_alloc); 13887c478bd9Sstevel@tonic-gate 13897c478bd9Sstevel@tonic-gate ASSERT((flags & PG_MATCH_COLOR) == 0); 13907c478bd9Sstevel@tonic-gate ASSERT(szc == 0); 13917c478bd9Sstevel@tonic-gate ASSERT(dma_attr != NULL); 13927c478bd9Sstevel@tonic-gate 13937c478bd9Sstevel@tonic-gate MTYPE_START(mnode, mtype, flags); 13947c478bd9Sstevel@tonic-gate if (mtype < 0) { 13957c478bd9Sstevel@tonic-gate VM_STAT_ADD(pga_vmstats.pgma_allocempty); 13967c478bd9Sstevel@tonic-gate return (NULL); 13977c478bd9Sstevel@tonic-gate } 13987c478bd9Sstevel@tonic-gate 13997c478bd9Sstevel@tonic-gate mtypestart = mtype; 14007c478bd9Sstevel@tonic-gate 14017c478bd9Sstevel@tonic-gate bin = origbin; 14027c478bd9Sstevel@tonic-gate 14037c478bd9Sstevel@tonic-gate /* 14047c478bd9Sstevel@tonic-gate * check up to page_colors + 1 bins - origbin may be checked twice 14057c478bd9Sstevel@tonic-gate * because of BIN_STEP skip 14067c478bd9Sstevel@tonic-gate */ 14077c478bd9Sstevel@tonic-gate do { 14085d07b933Sdp plw_initialized = 0; 14095d07b933Sdp 14105d07b933Sdp for (plw.plw_count = 0; 14115d07b933Sdp plw.plw_count < page_colors; plw.plw_count++) { 14125d07b933Sdp 14137c478bd9Sstevel@tonic-gate if (PAGE_FREELISTS(mnode, szc, bin, mtype) == NULL) 14147c478bd9Sstevel@tonic-gate goto nextfreebin; 14157c478bd9Sstevel@tonic-gate 14167c478bd9Sstevel@tonic-gate pcm = PC_BIN_MUTEX(mnode, bin, PG_FREE_LIST); 14177c478bd9Sstevel@tonic-gate mutex_enter(pcm); 14187c478bd9Sstevel@tonic-gate pp = PAGE_FREELISTS(mnode, szc, bin, mtype); 14197c478bd9Sstevel@tonic-gate first_pp = pp; 14207c478bd9Sstevel@tonic-gate while (pp != NULL) { 14217c478bd9Sstevel@tonic-gate if (page_trylock(pp, SE_EXCL) == 0) { 14227c478bd9Sstevel@tonic-gate pp = pp->p_next; 14237c478bd9Sstevel@tonic-gate if (pp == first_pp) { 14247c478bd9Sstevel@tonic-gate pp = NULL; 14257c478bd9Sstevel@tonic-gate } 14267c478bd9Sstevel@tonic-gate continue; 14277c478bd9Sstevel@tonic-gate } 14287c478bd9Sstevel@tonic-gate 14297c478bd9Sstevel@tonic-gate ASSERT(PP_ISFREE(pp)); 14307c478bd9Sstevel@tonic-gate ASSERT(PP_ISAGED(pp)); 14317c478bd9Sstevel@tonic-gate ASSERT(pp->p_vnode == NULL); 14327c478bd9Sstevel@tonic-gate ASSERT(pp->p_hash == NULL); 14337c478bd9Sstevel@tonic-gate ASSERT(pp->p_offset == (u_offset_t)-1); 14347c478bd9Sstevel@tonic-gate ASSERT(pp->p_szc == szc); 14357c478bd9Sstevel@tonic-gate ASSERT(PFN_2_MEM_NODE(pp->p_pagenum) == mnode); 14367c478bd9Sstevel@tonic-gate /* check if page within DMA attributes */ 1437*ae115bc7Smrj pgaddr = pa_to_ma(pfn_to_pa(pp->p_pagenum)); 14387c478bd9Sstevel@tonic-gate if ((pgaddr >= dma_attr->dma_attr_addr_lo) && 14397c478bd9Sstevel@tonic-gate (pgaddr + MMU_PAGESIZE - 1 <= 14407c478bd9Sstevel@tonic-gate dma_attr->dma_attr_addr_hi)) { 14417c478bd9Sstevel@tonic-gate break; 14427c478bd9Sstevel@tonic-gate } 14437c478bd9Sstevel@tonic-gate 14447c478bd9Sstevel@tonic-gate /* continue looking */ 14457c478bd9Sstevel@tonic-gate page_unlock(pp); 14467c478bd9Sstevel@tonic-gate pp = pp->p_next; 14477c478bd9Sstevel@tonic-gate if (pp == first_pp) 14487c478bd9Sstevel@tonic-gate pp = NULL; 14497c478bd9Sstevel@tonic-gate 14507c478bd9Sstevel@tonic-gate } 14517c478bd9Sstevel@tonic-gate if (pp != NULL) { 14527c478bd9Sstevel@tonic-gate ASSERT(mtype == PP_2_MTYPE(pp)); 14537c478bd9Sstevel@tonic-gate ASSERT(pp->p_szc == 0); 14547c478bd9Sstevel@tonic-gate 14557c478bd9Sstevel@tonic-gate /* found a page with specified DMA attributes */ 14567c478bd9Sstevel@tonic-gate page_sub(&PAGE_FREELISTS(mnode, szc, bin, 14577c478bd9Sstevel@tonic-gate mtype), pp); 1458affbd3ccSkchow page_ctr_sub(mnode, mtype, pp, PG_FREE_LIST); 14597c478bd9Sstevel@tonic-gate 14607c478bd9Sstevel@tonic-gate if ((PP_ISFREE(pp) == 0) || 14617c478bd9Sstevel@tonic-gate (PP_ISAGED(pp) == 0)) { 14627c478bd9Sstevel@tonic-gate cmn_err(CE_PANIC, "page %p is not free", 14637c478bd9Sstevel@tonic-gate (void *)pp); 14647c478bd9Sstevel@tonic-gate } 14657c478bd9Sstevel@tonic-gate 14667c478bd9Sstevel@tonic-gate mutex_exit(pcm); 14677c478bd9Sstevel@tonic-gate check_dma(dma_attr, pp, 1); 14687c478bd9Sstevel@tonic-gate VM_STAT_ADD(pga_vmstats.pgma_allocok); 14697c478bd9Sstevel@tonic-gate return (pp); 14707c478bd9Sstevel@tonic-gate } 14717c478bd9Sstevel@tonic-gate mutex_exit(pcm); 14727c478bd9Sstevel@tonic-gate nextfreebin: 14735d07b933Sdp if (plw_initialized == 0) { 14745d07b933Sdp page_list_walk_init(szc, 0, bin, 1, 0, &plw); 14755d07b933Sdp ASSERT(plw.plw_ceq_dif == page_colors); 14765d07b933Sdp plw_initialized = 1; 14775d07b933Sdp } 14787c478bd9Sstevel@tonic-gate 14795d07b933Sdp if (plw.plw_do_split) { 14805d07b933Sdp pp = page_freelist_split(szc, bin, mnode, 14815d07b933Sdp mtype, 14825d07b933Sdp mmu_btop(dma_attr->dma_attr_addr_hi + 1), 14835d07b933Sdp &plw); 14845d07b933Sdp if (pp != NULL) 14855d07b933Sdp return (pp); 14865d07b933Sdp } 14875d07b933Sdp 14885d07b933Sdp bin = page_list_walk_next_bin(szc, bin, &plw); 14897c478bd9Sstevel@tonic-gate } 14905d07b933Sdp 1491affbd3ccSkchow MTYPE_NEXT(mnode, mtype, flags); 1492affbd3ccSkchow } while (mtype >= 0); 14937c478bd9Sstevel@tonic-gate 14947c478bd9Sstevel@tonic-gate /* failed to find a page in the freelist; try it in the cachelist */ 14957c478bd9Sstevel@tonic-gate 14967c478bd9Sstevel@tonic-gate /* reset mtype start for cachelist search */ 14977c478bd9Sstevel@tonic-gate mtype = mtypestart; 14987c478bd9Sstevel@tonic-gate ASSERT(mtype >= 0); 14997c478bd9Sstevel@tonic-gate 15007c478bd9Sstevel@tonic-gate /* start with the bin of matching color */ 15017c478bd9Sstevel@tonic-gate bin = origbin; 15027c478bd9Sstevel@tonic-gate 15037c478bd9Sstevel@tonic-gate do { 15047c478bd9Sstevel@tonic-gate for (i = 0; i <= page_colors; i++) { 15057c478bd9Sstevel@tonic-gate if (PAGE_CACHELISTS(mnode, bin, mtype) == NULL) 15067c478bd9Sstevel@tonic-gate goto nextcachebin; 15077c478bd9Sstevel@tonic-gate pcm = PC_BIN_MUTEX(mnode, bin, PG_CACHE_LIST); 15087c478bd9Sstevel@tonic-gate mutex_enter(pcm); 15097c478bd9Sstevel@tonic-gate pp = PAGE_CACHELISTS(mnode, bin, mtype); 15107c478bd9Sstevel@tonic-gate first_pp = pp; 15117c478bd9Sstevel@tonic-gate while (pp != NULL) { 15127c478bd9Sstevel@tonic-gate if (page_trylock(pp, SE_EXCL) == 0) { 15137c478bd9Sstevel@tonic-gate pp = pp->p_next; 15147c478bd9Sstevel@tonic-gate if (pp == first_pp) 15157c478bd9Sstevel@tonic-gate break; 15167c478bd9Sstevel@tonic-gate continue; 15177c478bd9Sstevel@tonic-gate } 15187c478bd9Sstevel@tonic-gate ASSERT(pp->p_vnode); 15197c478bd9Sstevel@tonic-gate ASSERT(PP_ISAGED(pp) == 0); 15207c478bd9Sstevel@tonic-gate ASSERT(pp->p_szc == 0); 15217c478bd9Sstevel@tonic-gate ASSERT(PFN_2_MEM_NODE(pp->p_pagenum) == mnode); 15227c478bd9Sstevel@tonic-gate 15237c478bd9Sstevel@tonic-gate /* check if page within DMA attributes */ 15247c478bd9Sstevel@tonic-gate 1525*ae115bc7Smrj pgaddr = pa_to_ma(pfn_to_pa(pp->p_pagenum)); 15267c478bd9Sstevel@tonic-gate if ((pgaddr >= dma_attr->dma_attr_addr_lo) && 15277c478bd9Sstevel@tonic-gate (pgaddr + MMU_PAGESIZE - 1 <= 15287c478bd9Sstevel@tonic-gate dma_attr->dma_attr_addr_hi)) { 15297c478bd9Sstevel@tonic-gate break; 15307c478bd9Sstevel@tonic-gate } 15317c478bd9Sstevel@tonic-gate 15327c478bd9Sstevel@tonic-gate /* continue looking */ 15337c478bd9Sstevel@tonic-gate page_unlock(pp); 15347c478bd9Sstevel@tonic-gate pp = pp->p_next; 15357c478bd9Sstevel@tonic-gate if (pp == first_pp) 15367c478bd9Sstevel@tonic-gate pp = NULL; 15377c478bd9Sstevel@tonic-gate } 15387c478bd9Sstevel@tonic-gate 15397c478bd9Sstevel@tonic-gate if (pp != NULL) { 15407c478bd9Sstevel@tonic-gate ASSERT(mtype == PP_2_MTYPE(pp)); 15417c478bd9Sstevel@tonic-gate ASSERT(pp->p_szc == 0); 15427c478bd9Sstevel@tonic-gate 15437c478bd9Sstevel@tonic-gate /* found a page with specified DMA attributes */ 15447c478bd9Sstevel@tonic-gate page_sub(&PAGE_CACHELISTS(mnode, bin, 15457c478bd9Sstevel@tonic-gate mtype), pp); 1546affbd3ccSkchow page_ctr_sub(mnode, mtype, pp, PG_CACHE_LIST); 15477c478bd9Sstevel@tonic-gate 15487c478bd9Sstevel@tonic-gate mutex_exit(pcm); 15497c478bd9Sstevel@tonic-gate ASSERT(pp->p_vnode); 15507c478bd9Sstevel@tonic-gate ASSERT(PP_ISAGED(pp) == 0); 15517c478bd9Sstevel@tonic-gate check_dma(dma_attr, pp, 1); 15527c478bd9Sstevel@tonic-gate VM_STAT_ADD(pga_vmstats.pgma_allocok); 15537c478bd9Sstevel@tonic-gate return (pp); 15547c478bd9Sstevel@tonic-gate } 15557c478bd9Sstevel@tonic-gate mutex_exit(pcm); 15567c478bd9Sstevel@tonic-gate nextcachebin: 15577c478bd9Sstevel@tonic-gate bin += (i == 0) ? BIN_STEP : 1; 15587c478bd9Sstevel@tonic-gate bin &= page_colors_mask; 15597c478bd9Sstevel@tonic-gate } 1560affbd3ccSkchow MTYPE_NEXT(mnode, mtype, flags); 1561affbd3ccSkchow } while (mtype >= 0); 15627c478bd9Sstevel@tonic-gate 15637c478bd9Sstevel@tonic-gate VM_STAT_ADD(pga_vmstats.pgma_allocfailed); 15647c478bd9Sstevel@tonic-gate return (NULL); 15657c478bd9Sstevel@tonic-gate } 15667c478bd9Sstevel@tonic-gate 15677c478bd9Sstevel@tonic-gate /* 15687c478bd9Sstevel@tonic-gate * This function is similar to page_get_freelist()/page_get_cachelist() 15697c478bd9Sstevel@tonic-gate * but it searches both the lists to find a page with the specified 15707c478bd9Sstevel@tonic-gate * color (or no color) and DMA attributes. The search is done in the 15717c478bd9Sstevel@tonic-gate * freelist first and then in the cache list within the highest memory 15727c478bd9Sstevel@tonic-gate * range (based on DMA attributes) before searching in the lower 15737c478bd9Sstevel@tonic-gate * memory ranges. 15747c478bd9Sstevel@tonic-gate * 15757c478bd9Sstevel@tonic-gate * Note: This function is called only by page_create_io(). 15767c478bd9Sstevel@tonic-gate */ 15777c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 15787c478bd9Sstevel@tonic-gate page_t * 15797c478bd9Sstevel@tonic-gate page_get_anylist(struct vnode *vp, u_offset_t off, struct as *as, caddr_t vaddr, 15807c478bd9Sstevel@tonic-gate size_t size, uint_t flags, ddi_dma_attr_t *dma_attr, lgrp_t *lgrp) 15817c478bd9Sstevel@tonic-gate { 15827c478bd9Sstevel@tonic-gate uint_t bin; 15837c478bd9Sstevel@tonic-gate int mtype; 15847c478bd9Sstevel@tonic-gate page_t *pp; 15857c478bd9Sstevel@tonic-gate int n; 15867c478bd9Sstevel@tonic-gate int m; 15877c478bd9Sstevel@tonic-gate int szc; 15887c478bd9Sstevel@tonic-gate int fullrange; 15897c478bd9Sstevel@tonic-gate int mnode; 15907c478bd9Sstevel@tonic-gate int local_failed_stat = 0; 15917c478bd9Sstevel@tonic-gate lgrp_mnode_cookie_t lgrp_cookie; 15927c478bd9Sstevel@tonic-gate 15937c478bd9Sstevel@tonic-gate VM_STAT_ADD(pga_vmstats.pga_alloc); 15947c478bd9Sstevel@tonic-gate 15957c478bd9Sstevel@tonic-gate /* only base pagesize currently supported */ 15967c478bd9Sstevel@tonic-gate if (size != MMU_PAGESIZE) 15977c478bd9Sstevel@tonic-gate return (NULL); 15987c478bd9Sstevel@tonic-gate 15997c478bd9Sstevel@tonic-gate /* 16007c478bd9Sstevel@tonic-gate * If we're passed a specific lgroup, we use it. Otherwise, 16017c478bd9Sstevel@tonic-gate * assume first-touch placement is desired. 16027c478bd9Sstevel@tonic-gate */ 16037c478bd9Sstevel@tonic-gate if (!LGRP_EXISTS(lgrp)) 16047c478bd9Sstevel@tonic-gate lgrp = lgrp_home_lgrp(); 16057c478bd9Sstevel@tonic-gate 16067c478bd9Sstevel@tonic-gate /* LINTED */ 16075d07b933Sdp AS_2_BIN(as, seg, vp, vaddr, bin, 0); 16087c478bd9Sstevel@tonic-gate 16097c478bd9Sstevel@tonic-gate /* 16107c478bd9Sstevel@tonic-gate * Only hold one freelist or cachelist lock at a time, that way we 16117c478bd9Sstevel@tonic-gate * can start anywhere and not have to worry about lock 16127c478bd9Sstevel@tonic-gate * ordering. 16137c478bd9Sstevel@tonic-gate */ 16147c478bd9Sstevel@tonic-gate if (dma_attr == NULL) { 16157c478bd9Sstevel@tonic-gate n = 0; 16167c478bd9Sstevel@tonic-gate m = mnoderangecnt - 1; 16177c478bd9Sstevel@tonic-gate fullrange = 1; 16187c478bd9Sstevel@tonic-gate VM_STAT_ADD(pga_vmstats.pga_nulldmaattr); 16197c478bd9Sstevel@tonic-gate } else { 16207c478bd9Sstevel@tonic-gate pfn_t pfnlo = mmu_btop(dma_attr->dma_attr_addr_lo); 16217c478bd9Sstevel@tonic-gate pfn_t pfnhi = mmu_btop(dma_attr->dma_attr_addr_hi); 16227c478bd9Sstevel@tonic-gate 16237c478bd9Sstevel@tonic-gate /* 16247c478bd9Sstevel@tonic-gate * We can guarantee alignment only for page boundary. 16257c478bd9Sstevel@tonic-gate */ 16267c478bd9Sstevel@tonic-gate if (dma_attr->dma_attr_align > MMU_PAGESIZE) 16277c478bd9Sstevel@tonic-gate return (NULL); 16287c478bd9Sstevel@tonic-gate 16297c478bd9Sstevel@tonic-gate n = pfn_2_mtype(pfnlo); 16307c478bd9Sstevel@tonic-gate m = pfn_2_mtype(pfnhi); 16317c478bd9Sstevel@tonic-gate 16327c478bd9Sstevel@tonic-gate fullrange = ((pfnlo == mnoderanges[n].mnr_pfnlo) && 16337c478bd9Sstevel@tonic-gate (pfnhi >= mnoderanges[m].mnr_pfnhi)); 16347c478bd9Sstevel@tonic-gate } 16357c478bd9Sstevel@tonic-gate VM_STAT_COND_ADD(fullrange == 0, pga_vmstats.pga_notfullrange); 16367c478bd9Sstevel@tonic-gate 16377c478bd9Sstevel@tonic-gate if (n > m) 16387c478bd9Sstevel@tonic-gate return (NULL); 16397c478bd9Sstevel@tonic-gate 16407c478bd9Sstevel@tonic-gate szc = 0; 16417c478bd9Sstevel@tonic-gate 16427c478bd9Sstevel@tonic-gate /* cylcing thru mtype handled by RANGE0 if n == 0 */ 16437c478bd9Sstevel@tonic-gate if (n == 0) { 16447c478bd9Sstevel@tonic-gate flags |= PGI_MT_RANGE0; 16457c478bd9Sstevel@tonic-gate n = m; 16467c478bd9Sstevel@tonic-gate } 16477c478bd9Sstevel@tonic-gate 16487c478bd9Sstevel@tonic-gate /* 16497c478bd9Sstevel@tonic-gate * Try local memory node first, but try remote if we can't 16507c478bd9Sstevel@tonic-gate * get a page of the right color. 16517c478bd9Sstevel@tonic-gate */ 16527c478bd9Sstevel@tonic-gate LGRP_MNODE_COOKIE_INIT(lgrp_cookie, lgrp, LGRP_SRCH_HIER); 16537c478bd9Sstevel@tonic-gate while ((mnode = lgrp_memnode_choose(&lgrp_cookie)) >= 0) { 16547c478bd9Sstevel@tonic-gate /* 16557c478bd9Sstevel@tonic-gate * allocate pages from high pfn to low. 16567c478bd9Sstevel@tonic-gate */ 16577c478bd9Sstevel@tonic-gate for (mtype = m; mtype >= n; mtype--) { 16587c478bd9Sstevel@tonic-gate if (fullrange != 0) { 16597c478bd9Sstevel@tonic-gate pp = page_get_mnode_freelist(mnode, 16607c478bd9Sstevel@tonic-gate bin, mtype, szc, flags); 16617c478bd9Sstevel@tonic-gate if (pp == NULL) { 16627c478bd9Sstevel@tonic-gate pp = page_get_mnode_cachelist( 16637c478bd9Sstevel@tonic-gate bin, flags, mnode, mtype); 16647c478bd9Sstevel@tonic-gate } 16657c478bd9Sstevel@tonic-gate } else { 16667c478bd9Sstevel@tonic-gate pp = page_get_mnode_anylist(bin, szc, 16677c478bd9Sstevel@tonic-gate flags, mnode, mtype, dma_attr); 16687c478bd9Sstevel@tonic-gate } 16697c478bd9Sstevel@tonic-gate if (pp != NULL) { 16707c478bd9Sstevel@tonic-gate VM_STAT_ADD(pga_vmstats.pga_allocok); 16717c478bd9Sstevel@tonic-gate check_dma(dma_attr, pp, 1); 16727c478bd9Sstevel@tonic-gate return (pp); 16737c478bd9Sstevel@tonic-gate } 16747c478bd9Sstevel@tonic-gate } 16757c478bd9Sstevel@tonic-gate if (!local_failed_stat) { 16767c478bd9Sstevel@tonic-gate lgrp_stat_add(lgrp->lgrp_id, LGRP_NUM_ALLOC_FAIL, 1); 16777c478bd9Sstevel@tonic-gate local_failed_stat = 1; 16787c478bd9Sstevel@tonic-gate } 16797c478bd9Sstevel@tonic-gate } 16807c478bd9Sstevel@tonic-gate VM_STAT_ADD(pga_vmstats.pga_allocfailed); 16817c478bd9Sstevel@tonic-gate 16827c478bd9Sstevel@tonic-gate return (NULL); 16837c478bd9Sstevel@tonic-gate } 16847c478bd9Sstevel@tonic-gate 16857c478bd9Sstevel@tonic-gate /* 16867c478bd9Sstevel@tonic-gate * page_create_io() 16877c478bd9Sstevel@tonic-gate * 16887c478bd9Sstevel@tonic-gate * This function is a copy of page_create_va() with an additional 16897c478bd9Sstevel@tonic-gate * argument 'mattr' that specifies DMA memory requirements to 16907c478bd9Sstevel@tonic-gate * the page list functions. This function is used by the segkmem 16917c478bd9Sstevel@tonic-gate * allocator so it is only to create new pages (i.e PG_EXCL is 16927c478bd9Sstevel@tonic-gate * set). 16937c478bd9Sstevel@tonic-gate * 16947c478bd9Sstevel@tonic-gate * Note: This interface is currently used by x86 PSM only and is 16957c478bd9Sstevel@tonic-gate * not fully specified so the commitment level is only for 16967c478bd9Sstevel@tonic-gate * private interface specific to x86. This interface uses PSM 16977c478bd9Sstevel@tonic-gate * specific page_get_anylist() interface. 16987c478bd9Sstevel@tonic-gate */ 16997c478bd9Sstevel@tonic-gate 17007c478bd9Sstevel@tonic-gate #define PAGE_HASH_SEARCH(index, pp, vp, off) { \ 17017c478bd9Sstevel@tonic-gate for ((pp) = page_hash[(index)]; (pp); (pp) = (pp)->p_hash) { \ 17027c478bd9Sstevel@tonic-gate if ((pp)->p_vnode == (vp) && (pp)->p_offset == (off)) \ 17037c478bd9Sstevel@tonic-gate break; \ 17047c478bd9Sstevel@tonic-gate } \ 17057c478bd9Sstevel@tonic-gate } 17067c478bd9Sstevel@tonic-gate 17077c478bd9Sstevel@tonic-gate 17087c478bd9Sstevel@tonic-gate page_t * 17097c478bd9Sstevel@tonic-gate page_create_io( 17107c478bd9Sstevel@tonic-gate struct vnode *vp, 17117c478bd9Sstevel@tonic-gate u_offset_t off, 17127c478bd9Sstevel@tonic-gate uint_t bytes, 17137c478bd9Sstevel@tonic-gate uint_t flags, 17147c478bd9Sstevel@tonic-gate struct as *as, 17157c478bd9Sstevel@tonic-gate caddr_t vaddr, 17167c478bd9Sstevel@tonic-gate ddi_dma_attr_t *mattr) /* DMA memory attributes if any */ 17177c478bd9Sstevel@tonic-gate { 17187c478bd9Sstevel@tonic-gate page_t *plist = NULL; 17197c478bd9Sstevel@tonic-gate uint_t plist_len = 0; 17207c478bd9Sstevel@tonic-gate pgcnt_t npages; 17217c478bd9Sstevel@tonic-gate page_t *npp = NULL; 17227c478bd9Sstevel@tonic-gate uint_t pages_req; 17237c478bd9Sstevel@tonic-gate page_t *pp; 17247c478bd9Sstevel@tonic-gate kmutex_t *phm = NULL; 17257c478bd9Sstevel@tonic-gate uint_t index; 17267c478bd9Sstevel@tonic-gate 17277c478bd9Sstevel@tonic-gate TRACE_4(TR_FAC_VM, TR_PAGE_CREATE_START, 17287c478bd9Sstevel@tonic-gate "page_create_start:vp %p off %llx bytes %u flags %x", 17297c478bd9Sstevel@tonic-gate vp, off, bytes, flags); 17307c478bd9Sstevel@tonic-gate 17317c478bd9Sstevel@tonic-gate ASSERT((flags & ~(PG_EXCL | PG_WAIT | PG_PHYSCONTIG)) == 0); 17327c478bd9Sstevel@tonic-gate 17337c478bd9Sstevel@tonic-gate pages_req = npages = mmu_btopr(bytes); 17347c478bd9Sstevel@tonic-gate 17357c478bd9Sstevel@tonic-gate /* 17367c478bd9Sstevel@tonic-gate * Do the freemem and pcf accounting. 17377c478bd9Sstevel@tonic-gate */ 17387c478bd9Sstevel@tonic-gate if (!page_create_wait(npages, flags)) { 17397c478bd9Sstevel@tonic-gate return (NULL); 17407c478bd9Sstevel@tonic-gate } 17417c478bd9Sstevel@tonic-gate 17427c478bd9Sstevel@tonic-gate TRACE_2(TR_FAC_VM, TR_PAGE_CREATE_SUCCESS, 17437c478bd9Sstevel@tonic-gate "page_create_success:vp %p off %llx", 17447c478bd9Sstevel@tonic-gate vp, off); 17457c478bd9Sstevel@tonic-gate 17467c478bd9Sstevel@tonic-gate /* 17477c478bd9Sstevel@tonic-gate * If satisfying this request has left us with too little 17487c478bd9Sstevel@tonic-gate * memory, start the wheels turning to get some back. The 17497c478bd9Sstevel@tonic-gate * first clause of the test prevents waking up the pageout 17507c478bd9Sstevel@tonic-gate * daemon in situations where it would decide that there's 17517c478bd9Sstevel@tonic-gate * nothing to do. 17527c478bd9Sstevel@tonic-gate */ 17537c478bd9Sstevel@tonic-gate if (nscan < desscan && freemem < minfree) { 17547c478bd9Sstevel@tonic-gate TRACE_1(TR_FAC_VM, TR_PAGEOUT_CV_SIGNAL, 17557c478bd9Sstevel@tonic-gate "pageout_cv_signal:freemem %ld", freemem); 17567c478bd9Sstevel@tonic-gate cv_signal(&proc_pageout->p_cv); 17577c478bd9Sstevel@tonic-gate } 17587c478bd9Sstevel@tonic-gate 17597c478bd9Sstevel@tonic-gate if (flags & PG_PHYSCONTIG) { 17607c478bd9Sstevel@tonic-gate 17617c478bd9Sstevel@tonic-gate plist = page_get_contigpage(&npages, mattr, 1); 17627c478bd9Sstevel@tonic-gate if (plist == NULL) { 17637c478bd9Sstevel@tonic-gate page_create_putback(npages); 17647c478bd9Sstevel@tonic-gate return (NULL); 17657c478bd9Sstevel@tonic-gate } 17667c478bd9Sstevel@tonic-gate 17677c478bd9Sstevel@tonic-gate pp = plist; 17687c478bd9Sstevel@tonic-gate 17697c478bd9Sstevel@tonic-gate do { 17707c478bd9Sstevel@tonic-gate if (!page_hashin(pp, vp, off, NULL)) { 17717c478bd9Sstevel@tonic-gate panic("pg_creat_io: hashin failed %p %p %llx", 17727c478bd9Sstevel@tonic-gate (void *)pp, (void *)vp, off); 17737c478bd9Sstevel@tonic-gate } 17747c478bd9Sstevel@tonic-gate VM_STAT_ADD(page_create_new); 17757c478bd9Sstevel@tonic-gate off += MMU_PAGESIZE; 17767c478bd9Sstevel@tonic-gate PP_CLRFREE(pp); 17777c478bd9Sstevel@tonic-gate PP_CLRAGED(pp); 17787c478bd9Sstevel@tonic-gate page_set_props(pp, P_REF); 17797c478bd9Sstevel@tonic-gate pp = pp->p_next; 17807c478bd9Sstevel@tonic-gate } while (pp != plist); 17817c478bd9Sstevel@tonic-gate 17827c478bd9Sstevel@tonic-gate if (!npages) { 17837c478bd9Sstevel@tonic-gate check_dma(mattr, plist, pages_req); 17847c478bd9Sstevel@tonic-gate return (plist); 17857c478bd9Sstevel@tonic-gate } else { 17867c478bd9Sstevel@tonic-gate vaddr += (pages_req - npages) << MMU_PAGESHIFT; 17877c478bd9Sstevel@tonic-gate } 17887c478bd9Sstevel@tonic-gate 17897c478bd9Sstevel@tonic-gate /* 17907c478bd9Sstevel@tonic-gate * fall-thru: 17917c478bd9Sstevel@tonic-gate * 17927c478bd9Sstevel@tonic-gate * page_get_contigpage returns when npages <= sgllen. 17937c478bd9Sstevel@tonic-gate * Grab the rest of the non-contig pages below from anylist. 17947c478bd9Sstevel@tonic-gate */ 17957c478bd9Sstevel@tonic-gate } 17967c478bd9Sstevel@tonic-gate 17977c478bd9Sstevel@tonic-gate /* 17987c478bd9Sstevel@tonic-gate * Loop around collecting the requested number of pages. 17997c478bd9Sstevel@tonic-gate * Most of the time, we have to `create' a new page. With 18007c478bd9Sstevel@tonic-gate * this in mind, pull the page off the free list before 18017c478bd9Sstevel@tonic-gate * getting the hash lock. This will minimize the hash 18027c478bd9Sstevel@tonic-gate * lock hold time, nesting, and the like. If it turns 18037c478bd9Sstevel@tonic-gate * out we don't need the page, we put it back at the end. 18047c478bd9Sstevel@tonic-gate */ 18057c478bd9Sstevel@tonic-gate while (npages--) { 18067c478bd9Sstevel@tonic-gate phm = NULL; 18077c478bd9Sstevel@tonic-gate 18087c478bd9Sstevel@tonic-gate index = PAGE_HASH_FUNC(vp, off); 18097c478bd9Sstevel@tonic-gate top: 18107c478bd9Sstevel@tonic-gate ASSERT(phm == NULL); 18117c478bd9Sstevel@tonic-gate ASSERT(index == PAGE_HASH_FUNC(vp, off)); 18127c478bd9Sstevel@tonic-gate ASSERT(MUTEX_NOT_HELD(page_vnode_mutex(vp))); 18137c478bd9Sstevel@tonic-gate 18147c478bd9Sstevel@tonic-gate if (npp == NULL) { 18157c478bd9Sstevel@tonic-gate /* 18167c478bd9Sstevel@tonic-gate * Try to get the page of any color either from 18177c478bd9Sstevel@tonic-gate * the freelist or from the cache list. 18187c478bd9Sstevel@tonic-gate */ 18197c478bd9Sstevel@tonic-gate npp = page_get_anylist(vp, off, as, vaddr, MMU_PAGESIZE, 18207c478bd9Sstevel@tonic-gate flags & ~PG_MATCH_COLOR, mattr, NULL); 18217c478bd9Sstevel@tonic-gate if (npp == NULL) { 18227c478bd9Sstevel@tonic-gate if (mattr == NULL) { 18237c478bd9Sstevel@tonic-gate /* 18247c478bd9Sstevel@tonic-gate * Not looking for a special page; 18257c478bd9Sstevel@tonic-gate * panic! 18267c478bd9Sstevel@tonic-gate */ 18277c478bd9Sstevel@tonic-gate panic("no page found %d", (int)npages); 18287c478bd9Sstevel@tonic-gate } 18297c478bd9Sstevel@tonic-gate /* 18307c478bd9Sstevel@tonic-gate * No page found! This can happen 18317c478bd9Sstevel@tonic-gate * if we are looking for a page 18327c478bd9Sstevel@tonic-gate * within a specific memory range 18337c478bd9Sstevel@tonic-gate * for DMA purposes. If PG_WAIT is 18347c478bd9Sstevel@tonic-gate * specified then we wait for a 18357c478bd9Sstevel@tonic-gate * while and then try again. The 18367c478bd9Sstevel@tonic-gate * wait could be forever if we 18377c478bd9Sstevel@tonic-gate * don't get the page(s) we need. 18387c478bd9Sstevel@tonic-gate * 18397c478bd9Sstevel@tonic-gate * Note: XXX We really need a mechanism 18407c478bd9Sstevel@tonic-gate * to wait for pages in the desired 18417c478bd9Sstevel@tonic-gate * range. For now, we wait for any 18427c478bd9Sstevel@tonic-gate * pages and see if we can use it. 18437c478bd9Sstevel@tonic-gate */ 18447c478bd9Sstevel@tonic-gate 18457c478bd9Sstevel@tonic-gate if ((mattr != NULL) && (flags & PG_WAIT)) { 18467c478bd9Sstevel@tonic-gate delay(10); 18477c478bd9Sstevel@tonic-gate goto top; 18487c478bd9Sstevel@tonic-gate } 18497c478bd9Sstevel@tonic-gate goto fail; /* undo accounting stuff */ 18507c478bd9Sstevel@tonic-gate } 18517c478bd9Sstevel@tonic-gate 18527c478bd9Sstevel@tonic-gate if (PP_ISAGED(npp) == 0) { 18537c478bd9Sstevel@tonic-gate /* 18547c478bd9Sstevel@tonic-gate * Since this page came from the 18557c478bd9Sstevel@tonic-gate * cachelist, we must destroy the 18567c478bd9Sstevel@tonic-gate * old vnode association. 18577c478bd9Sstevel@tonic-gate */ 18587c478bd9Sstevel@tonic-gate page_hashout(npp, (kmutex_t *)NULL); 18597c478bd9Sstevel@tonic-gate } 18607c478bd9Sstevel@tonic-gate } 18617c478bd9Sstevel@tonic-gate 18627c478bd9Sstevel@tonic-gate /* 18637c478bd9Sstevel@tonic-gate * We own this page! 18647c478bd9Sstevel@tonic-gate */ 18657c478bd9Sstevel@tonic-gate ASSERT(PAGE_EXCL(npp)); 18667c478bd9Sstevel@tonic-gate ASSERT(npp->p_vnode == NULL); 18677c478bd9Sstevel@tonic-gate ASSERT(!hat_page_is_mapped(npp)); 18687c478bd9Sstevel@tonic-gate PP_CLRFREE(npp); 18697c478bd9Sstevel@tonic-gate PP_CLRAGED(npp); 18707c478bd9Sstevel@tonic-gate 18717c478bd9Sstevel@tonic-gate /* 18727c478bd9Sstevel@tonic-gate * Here we have a page in our hot little mits and are 18737c478bd9Sstevel@tonic-gate * just waiting to stuff it on the appropriate lists. 18747c478bd9Sstevel@tonic-gate * Get the mutex and check to see if it really does 18757c478bd9Sstevel@tonic-gate * not exist. 18767c478bd9Sstevel@tonic-gate */ 18777c478bd9Sstevel@tonic-gate phm = PAGE_HASH_MUTEX(index); 18787c478bd9Sstevel@tonic-gate mutex_enter(phm); 18797c478bd9Sstevel@tonic-gate PAGE_HASH_SEARCH(index, pp, vp, off); 18807c478bd9Sstevel@tonic-gate if (pp == NULL) { 18817c478bd9Sstevel@tonic-gate VM_STAT_ADD(page_create_new); 18827c478bd9Sstevel@tonic-gate pp = npp; 18837c478bd9Sstevel@tonic-gate npp = NULL; 18847c478bd9Sstevel@tonic-gate if (!page_hashin(pp, vp, off, phm)) { 18857c478bd9Sstevel@tonic-gate /* 18867c478bd9Sstevel@tonic-gate * Since we hold the page hash mutex and 18877c478bd9Sstevel@tonic-gate * just searched for this page, page_hashin 18887c478bd9Sstevel@tonic-gate * had better not fail. If it does, that 18897c478bd9Sstevel@tonic-gate * means somethread did not follow the 18907c478bd9Sstevel@tonic-gate * page hash mutex rules. Panic now and 18917c478bd9Sstevel@tonic-gate * get it over with. As usual, go down 18927c478bd9Sstevel@tonic-gate * holding all the locks. 18937c478bd9Sstevel@tonic-gate */ 18947c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(phm)); 18957c478bd9Sstevel@tonic-gate panic("page_create: hashin fail %p %p %llx %p", 18967c478bd9Sstevel@tonic-gate (void *)pp, (void *)vp, off, (void *)phm); 18977c478bd9Sstevel@tonic-gate 18987c478bd9Sstevel@tonic-gate } 18997c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(phm)); 19007c478bd9Sstevel@tonic-gate mutex_exit(phm); 19017c478bd9Sstevel@tonic-gate phm = NULL; 19027c478bd9Sstevel@tonic-gate 19037c478bd9Sstevel@tonic-gate /* 19047c478bd9Sstevel@tonic-gate * Hat layer locking need not be done to set 19057c478bd9Sstevel@tonic-gate * the following bits since the page is not hashed 19067c478bd9Sstevel@tonic-gate * and was on the free list (i.e., had no mappings). 19077c478bd9Sstevel@tonic-gate * 19087c478bd9Sstevel@tonic-gate * Set the reference bit to protect 19097c478bd9Sstevel@tonic-gate * against immediate pageout 19107c478bd9Sstevel@tonic-gate * 19117c478bd9Sstevel@tonic-gate * XXXmh modify freelist code to set reference 19127c478bd9Sstevel@tonic-gate * bit so we don't have to do it here. 19137c478bd9Sstevel@tonic-gate */ 19147c478bd9Sstevel@tonic-gate page_set_props(pp, P_REF); 19157c478bd9Sstevel@tonic-gate } else { 19167c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(phm)); 19177c478bd9Sstevel@tonic-gate mutex_exit(phm); 19187c478bd9Sstevel@tonic-gate phm = NULL; 19197c478bd9Sstevel@tonic-gate /* 19207c478bd9Sstevel@tonic-gate * NOTE: This should not happen for pages associated 19217c478bd9Sstevel@tonic-gate * with kernel vnode 'kvp'. 19227c478bd9Sstevel@tonic-gate */ 19237c478bd9Sstevel@tonic-gate /* XX64 - to debug why this happens! */ 1924ad23a2dbSjohansen ASSERT(!VN_ISKAS(vp)); 1925ad23a2dbSjohansen if (VN_ISKAS(vp)) 19267c478bd9Sstevel@tonic-gate cmn_err(CE_NOTE, 19277c478bd9Sstevel@tonic-gate "page_create: page not expected " 19287c478bd9Sstevel@tonic-gate "in hash list for kernel vnode - pp 0x%p", 19297c478bd9Sstevel@tonic-gate (void *)pp); 19307c478bd9Sstevel@tonic-gate VM_STAT_ADD(page_create_exists); 19317c478bd9Sstevel@tonic-gate goto fail; 19327c478bd9Sstevel@tonic-gate } 19337c478bd9Sstevel@tonic-gate 19347c478bd9Sstevel@tonic-gate /* 19357c478bd9Sstevel@tonic-gate * Got a page! It is locked. Acquire the i/o 19367c478bd9Sstevel@tonic-gate * lock since we are going to use the p_next and 19377c478bd9Sstevel@tonic-gate * p_prev fields to link the requested pages together. 19387c478bd9Sstevel@tonic-gate */ 19397c478bd9Sstevel@tonic-gate page_io_lock(pp); 19407c478bd9Sstevel@tonic-gate page_add(&plist, pp); 19417c478bd9Sstevel@tonic-gate plist = plist->p_next; 19427c478bd9Sstevel@tonic-gate off += MMU_PAGESIZE; 19437c478bd9Sstevel@tonic-gate vaddr += MMU_PAGESIZE; 19447c478bd9Sstevel@tonic-gate } 19457c478bd9Sstevel@tonic-gate 19467c478bd9Sstevel@tonic-gate check_dma(mattr, plist, pages_req); 19477c478bd9Sstevel@tonic-gate return (plist); 19487c478bd9Sstevel@tonic-gate 19497c478bd9Sstevel@tonic-gate fail: 19507c478bd9Sstevel@tonic-gate if (npp != NULL) { 19517c478bd9Sstevel@tonic-gate /* 19527c478bd9Sstevel@tonic-gate * Did not need this page after all. 19537c478bd9Sstevel@tonic-gate * Put it back on the free list. 19547c478bd9Sstevel@tonic-gate */ 19557c478bd9Sstevel@tonic-gate VM_STAT_ADD(page_create_putbacks); 19567c478bd9Sstevel@tonic-gate PP_SETFREE(npp); 19577c478bd9Sstevel@tonic-gate PP_SETAGED(npp); 19587c478bd9Sstevel@tonic-gate npp->p_offset = (u_offset_t)-1; 19597c478bd9Sstevel@tonic-gate page_list_add(npp, PG_FREE_LIST | PG_LIST_TAIL); 19607c478bd9Sstevel@tonic-gate page_unlock(npp); 19617c478bd9Sstevel@tonic-gate } 19627c478bd9Sstevel@tonic-gate 19637c478bd9Sstevel@tonic-gate /* 19647c478bd9Sstevel@tonic-gate * Give up the pages we already got. 19657c478bd9Sstevel@tonic-gate */ 19667c478bd9Sstevel@tonic-gate while (plist != NULL) { 19677c478bd9Sstevel@tonic-gate pp = plist; 19687c478bd9Sstevel@tonic-gate page_sub(&plist, pp); 19697c478bd9Sstevel@tonic-gate page_io_unlock(pp); 19707c478bd9Sstevel@tonic-gate plist_len++; 19717c478bd9Sstevel@tonic-gate /*LINTED: constant in conditional ctx*/ 19727c478bd9Sstevel@tonic-gate VN_DISPOSE(pp, B_INVAL, 0, kcred); 19737c478bd9Sstevel@tonic-gate } 19747c478bd9Sstevel@tonic-gate 19757c478bd9Sstevel@tonic-gate /* 19767c478bd9Sstevel@tonic-gate * VN_DISPOSE does freemem accounting for the pages in plist 19777c478bd9Sstevel@tonic-gate * by calling page_free. So, we need to undo the pcf accounting 19787c478bd9Sstevel@tonic-gate * for only the remaining pages. 19797c478bd9Sstevel@tonic-gate */ 19807c478bd9Sstevel@tonic-gate VM_STAT_ADD(page_create_putbacks); 19817c478bd9Sstevel@tonic-gate page_create_putback(pages_req - plist_len); 19827c478bd9Sstevel@tonic-gate 19837c478bd9Sstevel@tonic-gate return (NULL); 19847c478bd9Sstevel@tonic-gate } 19857c478bd9Sstevel@tonic-gate 19867c478bd9Sstevel@tonic-gate 19877c478bd9Sstevel@tonic-gate /* 19887c478bd9Sstevel@tonic-gate * Copy the data from the physical page represented by "frompp" to 19897c478bd9Sstevel@tonic-gate * that represented by "topp". ppcopy uses CPU->cpu_caddr1 and 19907c478bd9Sstevel@tonic-gate * CPU->cpu_caddr2. It assumes that no one uses either map at interrupt 19917c478bd9Sstevel@tonic-gate * level and no one sleeps with an active mapping there. 19927c478bd9Sstevel@tonic-gate * 19937c478bd9Sstevel@tonic-gate * Note that the ref/mod bits in the page_t's are not affected by 19947c478bd9Sstevel@tonic-gate * this operation, hence it is up to the caller to update them appropriately. 19957c478bd9Sstevel@tonic-gate */ 19968b464eb8Smec int 19977c478bd9Sstevel@tonic-gate ppcopy(page_t *frompp, page_t *topp) 19987c478bd9Sstevel@tonic-gate { 19997c478bd9Sstevel@tonic-gate caddr_t pp_addr1; 20007c478bd9Sstevel@tonic-gate caddr_t pp_addr2; 2001*ae115bc7Smrj hat_mempte_t pte1; 2002*ae115bc7Smrj hat_mempte_t pte2; 20037c478bd9Sstevel@tonic-gate kmutex_t *ppaddr_mutex; 20048b464eb8Smec label_t ljb; 20058b464eb8Smec int ret = 1; 20067c478bd9Sstevel@tonic-gate 20077c478bd9Sstevel@tonic-gate ASSERT_STACK_ALIGNED(); 20087c478bd9Sstevel@tonic-gate ASSERT(PAGE_LOCKED(frompp)); 20097c478bd9Sstevel@tonic-gate ASSERT(PAGE_LOCKED(topp)); 20107c478bd9Sstevel@tonic-gate 20117c478bd9Sstevel@tonic-gate if (kpm_enable) { 20127c478bd9Sstevel@tonic-gate pp_addr1 = hat_kpm_page2va(frompp, 0); 20137c478bd9Sstevel@tonic-gate pp_addr2 = hat_kpm_page2va(topp, 0); 20147c478bd9Sstevel@tonic-gate kpreempt_disable(); 20157c478bd9Sstevel@tonic-gate } else { 20167c478bd9Sstevel@tonic-gate /* 20177c478bd9Sstevel@tonic-gate * disable pre-emption so that CPU can't change 20187c478bd9Sstevel@tonic-gate */ 20197c478bd9Sstevel@tonic-gate kpreempt_disable(); 20207c478bd9Sstevel@tonic-gate 20217c478bd9Sstevel@tonic-gate pp_addr1 = CPU->cpu_caddr1; 20227c478bd9Sstevel@tonic-gate pp_addr2 = CPU->cpu_caddr2; 2023*ae115bc7Smrj pte1 = CPU->cpu_caddr1pte; 2024*ae115bc7Smrj pte2 = CPU->cpu_caddr2pte; 20257c478bd9Sstevel@tonic-gate 20267c478bd9Sstevel@tonic-gate ppaddr_mutex = &CPU->cpu_ppaddr_mutex; 20277c478bd9Sstevel@tonic-gate mutex_enter(ppaddr_mutex); 20287c478bd9Sstevel@tonic-gate 20297c478bd9Sstevel@tonic-gate hat_mempte_remap(page_pptonum(frompp), pp_addr1, pte1, 20307c478bd9Sstevel@tonic-gate PROT_READ | HAT_STORECACHING_OK, HAT_LOAD_NOCONSIST); 20317c478bd9Sstevel@tonic-gate hat_mempte_remap(page_pptonum(topp), pp_addr2, pte2, 20327c478bd9Sstevel@tonic-gate PROT_READ | PROT_WRITE | HAT_STORECACHING_OK, 20337c478bd9Sstevel@tonic-gate HAT_LOAD_NOCONSIST); 20347c478bd9Sstevel@tonic-gate } 20357c478bd9Sstevel@tonic-gate 20368b464eb8Smec if (on_fault(&ljb)) { 20378b464eb8Smec ret = 0; 20388b464eb8Smec goto faulted; 20398b464eb8Smec } 20407c478bd9Sstevel@tonic-gate if (use_sse_pagecopy) 20417c478bd9Sstevel@tonic-gate hwblkpagecopy(pp_addr1, pp_addr2); 20427c478bd9Sstevel@tonic-gate else 20437c478bd9Sstevel@tonic-gate bcopy(pp_addr1, pp_addr2, PAGESIZE); 20447c478bd9Sstevel@tonic-gate 20458b464eb8Smec no_fault(); 20468b464eb8Smec faulted: 2047*ae115bc7Smrj if (!kpm_enable) { 20487c478bd9Sstevel@tonic-gate mutex_exit(ppaddr_mutex); 2049*ae115bc7Smrj } 20507c478bd9Sstevel@tonic-gate kpreempt_enable(); 20518b464eb8Smec return (ret); 20527c478bd9Sstevel@tonic-gate } 20537c478bd9Sstevel@tonic-gate 20547c478bd9Sstevel@tonic-gate /* 20557c478bd9Sstevel@tonic-gate * Zero the physical page from off to off + len given by `pp' 20567c478bd9Sstevel@tonic-gate * without changing the reference and modified bits of page. 20577c478bd9Sstevel@tonic-gate * 20587c478bd9Sstevel@tonic-gate * We use this using CPU private page address #2, see ppcopy() for more info. 20597c478bd9Sstevel@tonic-gate * pagezero() must not be called at interrupt level. 20607c478bd9Sstevel@tonic-gate */ 20617c478bd9Sstevel@tonic-gate void 20627c478bd9Sstevel@tonic-gate pagezero(page_t *pp, uint_t off, uint_t len) 20637c478bd9Sstevel@tonic-gate { 20647c478bd9Sstevel@tonic-gate caddr_t pp_addr2; 2065*ae115bc7Smrj hat_mempte_t pte2; 20667c478bd9Sstevel@tonic-gate kmutex_t *ppaddr_mutex; 20677c478bd9Sstevel@tonic-gate 20687c478bd9Sstevel@tonic-gate ASSERT_STACK_ALIGNED(); 20697c478bd9Sstevel@tonic-gate ASSERT(len <= MMU_PAGESIZE); 20707c478bd9Sstevel@tonic-gate ASSERT(off <= MMU_PAGESIZE); 20717c478bd9Sstevel@tonic-gate ASSERT(off + len <= MMU_PAGESIZE); 20727c478bd9Sstevel@tonic-gate ASSERT(PAGE_LOCKED(pp)); 20737c478bd9Sstevel@tonic-gate 20747c478bd9Sstevel@tonic-gate if (kpm_enable) { 20757c478bd9Sstevel@tonic-gate pp_addr2 = hat_kpm_page2va(pp, 0); 20767c478bd9Sstevel@tonic-gate kpreempt_disable(); 20777c478bd9Sstevel@tonic-gate } else { 20787c478bd9Sstevel@tonic-gate kpreempt_disable(); 20797c478bd9Sstevel@tonic-gate 20807c478bd9Sstevel@tonic-gate pp_addr2 = CPU->cpu_caddr2; 2081*ae115bc7Smrj pte2 = CPU->cpu_caddr2pte; 20827c478bd9Sstevel@tonic-gate 20837c478bd9Sstevel@tonic-gate ppaddr_mutex = &CPU->cpu_ppaddr_mutex; 20847c478bd9Sstevel@tonic-gate mutex_enter(ppaddr_mutex); 20857c478bd9Sstevel@tonic-gate 20867c478bd9Sstevel@tonic-gate hat_mempte_remap(page_pptonum(pp), pp_addr2, pte2, 20877c478bd9Sstevel@tonic-gate PROT_READ | PROT_WRITE | HAT_STORECACHING_OK, 20887c478bd9Sstevel@tonic-gate HAT_LOAD_NOCONSIST); 20897c478bd9Sstevel@tonic-gate } 20907c478bd9Sstevel@tonic-gate 2091*ae115bc7Smrj if (use_sse_pagezero) { 20927c478bd9Sstevel@tonic-gate hwblkclr(pp_addr2 + off, len); 2093*ae115bc7Smrj } else { 20947c478bd9Sstevel@tonic-gate bzero(pp_addr2 + off, len); 2095*ae115bc7Smrj } 20967c478bd9Sstevel@tonic-gate 20977c478bd9Sstevel@tonic-gate if (!kpm_enable) 20987c478bd9Sstevel@tonic-gate mutex_exit(ppaddr_mutex); 20997c478bd9Sstevel@tonic-gate kpreempt_enable(); 21007c478bd9Sstevel@tonic-gate } 21017c478bd9Sstevel@tonic-gate 21027c478bd9Sstevel@tonic-gate /* 21037c478bd9Sstevel@tonic-gate * Platform-dependent page scrub call. 21047c478bd9Sstevel@tonic-gate */ 21057c478bd9Sstevel@tonic-gate void 21067c478bd9Sstevel@tonic-gate pagescrub(page_t *pp, uint_t off, uint_t len) 21077c478bd9Sstevel@tonic-gate { 21087c478bd9Sstevel@tonic-gate /* 21097c478bd9Sstevel@tonic-gate * For now, we rely on the fact that pagezero() will 21107c478bd9Sstevel@tonic-gate * always clear UEs. 21117c478bd9Sstevel@tonic-gate */ 21127c478bd9Sstevel@tonic-gate pagezero(pp, off, len); 21137c478bd9Sstevel@tonic-gate } 21147c478bd9Sstevel@tonic-gate 21157c478bd9Sstevel@tonic-gate /* 21167c478bd9Sstevel@tonic-gate * set up two private addresses for use on a given CPU for use in ppcopy() 21177c478bd9Sstevel@tonic-gate */ 21187c478bd9Sstevel@tonic-gate void 21197c478bd9Sstevel@tonic-gate setup_vaddr_for_ppcopy(struct cpu *cpup) 21207c478bd9Sstevel@tonic-gate { 21217c478bd9Sstevel@tonic-gate void *addr; 2122*ae115bc7Smrj hat_mempte_t pte_pa; 21237c478bd9Sstevel@tonic-gate 21247c478bd9Sstevel@tonic-gate addr = vmem_alloc(heap_arena, mmu_ptob(1), VM_SLEEP); 2125*ae115bc7Smrj pte_pa = hat_mempte_setup(addr); 21267c478bd9Sstevel@tonic-gate cpup->cpu_caddr1 = addr; 2127*ae115bc7Smrj cpup->cpu_caddr1pte = pte_pa; 21287c478bd9Sstevel@tonic-gate 21297c478bd9Sstevel@tonic-gate addr = vmem_alloc(heap_arena, mmu_ptob(1), VM_SLEEP); 2130*ae115bc7Smrj pte_pa = hat_mempte_setup(addr); 21317c478bd9Sstevel@tonic-gate cpup->cpu_caddr2 = addr; 2132*ae115bc7Smrj cpup->cpu_caddr2pte = pte_pa; 21337c478bd9Sstevel@tonic-gate 21347c478bd9Sstevel@tonic-gate mutex_init(&cpup->cpu_ppaddr_mutex, NULL, MUTEX_DEFAULT, NULL); 21357c478bd9Sstevel@tonic-gate } 21367c478bd9Sstevel@tonic-gate 2137*ae115bc7Smrj /* 2138*ae115bc7Smrj * Undo setup_vaddr_for_ppcopy 2139*ae115bc7Smrj */ 2140*ae115bc7Smrj void 2141*ae115bc7Smrj teardown_vaddr_for_ppcopy(struct cpu *cpup) 2142*ae115bc7Smrj { 2143*ae115bc7Smrj mutex_destroy(&cpup->cpu_ppaddr_mutex); 2144*ae115bc7Smrj 2145*ae115bc7Smrj hat_mempte_release(cpup->cpu_caddr2, cpup->cpu_caddr2pte); 2146*ae115bc7Smrj cpup->cpu_caddr2pte = 0; 2147*ae115bc7Smrj vmem_free(heap_arena, cpup->cpu_caddr2, mmu_ptob(1)); 2148*ae115bc7Smrj cpup->cpu_caddr2 = 0; 2149*ae115bc7Smrj 2150*ae115bc7Smrj hat_mempte_release(cpup->cpu_caddr1, cpup->cpu_caddr1pte); 2151*ae115bc7Smrj cpup->cpu_caddr1pte = 0; 2152*ae115bc7Smrj vmem_free(heap_arena, cpup->cpu_caddr1, mmu_ptob(1)); 2153*ae115bc7Smrj cpup->cpu_caddr1 = 0; 2154*ae115bc7Smrj } 21557c478bd9Sstevel@tonic-gate 21567c478bd9Sstevel@tonic-gate /* 21577c478bd9Sstevel@tonic-gate * Create the pageout scanner thread. The thread has to 21587c478bd9Sstevel@tonic-gate * start at procedure with process pp and priority pri. 21597c478bd9Sstevel@tonic-gate */ 21607c478bd9Sstevel@tonic-gate void 21617c478bd9Sstevel@tonic-gate pageout_init(void (*procedure)(), proc_t *pp, pri_t pri) 21627c478bd9Sstevel@tonic-gate { 21637c478bd9Sstevel@tonic-gate (void) thread_create(NULL, 0, procedure, NULL, 0, pp, TS_RUN, pri); 21647c478bd9Sstevel@tonic-gate } 21657c478bd9Sstevel@tonic-gate 21667c478bd9Sstevel@tonic-gate /* 21677c478bd9Sstevel@tonic-gate * Function for flushing D-cache when performing module relocations 21687c478bd9Sstevel@tonic-gate * to an alternate mapping. Unnecessary on Intel / AMD platforms. 21697c478bd9Sstevel@tonic-gate */ 21707c478bd9Sstevel@tonic-gate void 21717c478bd9Sstevel@tonic-gate dcache_flushall() 21727c478bd9Sstevel@tonic-gate {} 2173102033aaSdp 2174102033aaSdp size_t 2175102033aaSdp exec_get_spslew(void) 2176102033aaSdp { 2177102033aaSdp return (0); 2178102033aaSdp } 2179*ae115bc7Smrj 2180*ae115bc7Smrj /* 2181*ae115bc7Smrj * Allocate a memory page. The argument 'seed' can be any pseudo-random 2182*ae115bc7Smrj * number to vary where the pages come from. This is quite a hacked up 2183*ae115bc7Smrj * method -- it works for now, but really needs to be fixed up a bit. 2184*ae115bc7Smrj * 2185*ae115bc7Smrj * We currently use page_create_va() on the kvp with fake offsets, 2186*ae115bc7Smrj * segments and virt address. This is pretty bogus, but was copied from the 2187*ae115bc7Smrj * old hat_i86.c code. A better approach would be to specify either mnode 2188*ae115bc7Smrj * random or mnode local and takes a page from whatever color has the MOST 2189*ae115bc7Smrj * available - this would have a minimal impact on page coloring. 2190*ae115bc7Smrj */ 2191*ae115bc7Smrj page_t * 2192*ae115bc7Smrj page_get_physical(uintptr_t seed) 2193*ae115bc7Smrj { 2194*ae115bc7Smrj page_t *pp; 2195*ae115bc7Smrj u_offset_t offset; 2196*ae115bc7Smrj static struct seg tmpseg; 2197*ae115bc7Smrj static uintptr_t ctr = 0; 2198*ae115bc7Smrj 2199*ae115bc7Smrj /* 2200*ae115bc7Smrj * This code is gross, we really need a simpler page allocator. 2201*ae115bc7Smrj * 2202*ae115bc7Smrj * We need assign an offset for the page to call page_create_va(). 2203*ae115bc7Smrj * To avoid conflicts with other pages, we get creative with the offset. 2204*ae115bc7Smrj * For 32 bits, we pick an offset > 4Gig 2205*ae115bc7Smrj * For 64 bits, pick an offset somewhere in the VA hole. 2206*ae115bc7Smrj */ 2207*ae115bc7Smrj offset = seed; 2208*ae115bc7Smrj if (offset > kernelbase) 2209*ae115bc7Smrj offset -= kernelbase; 2210*ae115bc7Smrj offset <<= MMU_PAGESHIFT; 2211*ae115bc7Smrj #if defined(__amd64) 2212*ae115bc7Smrj offset += mmu.hole_start; /* something in VA hole */ 2213*ae115bc7Smrj #else 2214*ae115bc7Smrj offset += 1ULL << 40; /* something > 4 Gig */ 2215*ae115bc7Smrj #endif 2216*ae115bc7Smrj 2217*ae115bc7Smrj if (page_resv(1, KM_NOSLEEP) == 0) 2218*ae115bc7Smrj return (NULL); 2219*ae115bc7Smrj 2220*ae115bc7Smrj #ifdef DEBUG 2221*ae115bc7Smrj pp = page_exists(&kvp, offset); 2222*ae115bc7Smrj if (pp != NULL) 2223*ae115bc7Smrj panic("page already exists %p", pp); 2224*ae115bc7Smrj #endif 2225*ae115bc7Smrj 2226*ae115bc7Smrj pp = page_create_va(&kvp, offset, MMU_PAGESIZE, PG_EXCL | PG_NORELOC, 2227*ae115bc7Smrj &tmpseg, (caddr_t)(ctr += MMU_PAGESIZE)); /* changing VA usage */ 2228*ae115bc7Smrj if (pp == NULL) 2229*ae115bc7Smrj return (NULL); 2230*ae115bc7Smrj page_io_unlock(pp); 2231*ae115bc7Smrj page_hashout(pp, NULL); 2232*ae115bc7Smrj return (pp); 2233*ae115bc7Smrj } 2234