17c478bd9Sstevel@tonic-gate /* 27c478bd9Sstevel@tonic-gate * CDDL HEADER START 37c478bd9Sstevel@tonic-gate * 47c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the 5aa042c4bSkchow * Common Development and Distribution License (the "License"). 6aa042c4bSkchow * You may not use this file except in compliance with the License. 77c478bd9Sstevel@tonic-gate * 87c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 97c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 107c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions 117c478bd9Sstevel@tonic-gate * and limitations under the License. 127c478bd9Sstevel@tonic-gate * 137c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 147c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 157c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 167c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 177c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 187c478bd9Sstevel@tonic-gate * 197c478bd9Sstevel@tonic-gate * CDDL HEADER END 207c478bd9Sstevel@tonic-gate */ 217c478bd9Sstevel@tonic-gate /* 22e21bae1bSkchow * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 237c478bd9Sstevel@tonic-gate * Use is subject to license terms. 247c478bd9Sstevel@tonic-gate */ 257c478bd9Sstevel@tonic-gate 267c478bd9Sstevel@tonic-gate /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 277c478bd9Sstevel@tonic-gate /* All Rights Reserved */ 287c478bd9Sstevel@tonic-gate 297c478bd9Sstevel@tonic-gate /* 307c478bd9Sstevel@tonic-gate * Portions of this source code were derived from Berkeley 4.3 BSD 317c478bd9Sstevel@tonic-gate * under license from the Regents of the University of California. 327c478bd9Sstevel@tonic-gate */ 337c478bd9Sstevel@tonic-gate 347c478bd9Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 357c478bd9Sstevel@tonic-gate 367c478bd9Sstevel@tonic-gate /* 377c478bd9Sstevel@tonic-gate * UNIX machine dependent virtual memory support. 387c478bd9Sstevel@tonic-gate */ 397c478bd9Sstevel@tonic-gate 407c478bd9Sstevel@tonic-gate #include <sys/types.h> 417c478bd9Sstevel@tonic-gate #include <sys/param.h> 427c478bd9Sstevel@tonic-gate #include <sys/systm.h> 437c478bd9Sstevel@tonic-gate #include <sys/user.h> 447c478bd9Sstevel@tonic-gate #include <sys/proc.h> 457c478bd9Sstevel@tonic-gate #include <sys/kmem.h> 467c478bd9Sstevel@tonic-gate #include <sys/vmem.h> 477c478bd9Sstevel@tonic-gate #include <sys/buf.h> 487c478bd9Sstevel@tonic-gate #include <sys/cpuvar.h> 497c478bd9Sstevel@tonic-gate #include <sys/lgrp.h> 507c478bd9Sstevel@tonic-gate #include <sys/disp.h> 517c478bd9Sstevel@tonic-gate #include <sys/vm.h> 527c478bd9Sstevel@tonic-gate #include <sys/mman.h> 537c478bd9Sstevel@tonic-gate #include <sys/vnode.h> 547c478bd9Sstevel@tonic-gate #include <sys/cred.h> 557c478bd9Sstevel@tonic-gate #include <sys/exec.h> 567c478bd9Sstevel@tonic-gate #include <sys/exechdr.h> 577c478bd9Sstevel@tonic-gate #include <sys/debug.h> 58*ec25b48fSsusans #include <sys/vmsystm.h> 597c478bd9Sstevel@tonic-gate 607c478bd9Sstevel@tonic-gate #include <vm/hat.h> 617c478bd9Sstevel@tonic-gate #include <vm/as.h> 627c478bd9Sstevel@tonic-gate #include <vm/seg.h> 637c478bd9Sstevel@tonic-gate #include <vm/seg_kp.h> 647c478bd9Sstevel@tonic-gate #include <vm/seg_vn.h> 657c478bd9Sstevel@tonic-gate #include <vm/page.h> 667c478bd9Sstevel@tonic-gate #include <vm/seg_kmem.h> 677c478bd9Sstevel@tonic-gate #include <vm/seg_kpm.h> 687c478bd9Sstevel@tonic-gate #include <vm/vm_dep.h> 697c478bd9Sstevel@tonic-gate 707c478bd9Sstevel@tonic-gate #include <sys/cpu.h> 717c478bd9Sstevel@tonic-gate #include <sys/vm_machparam.h> 727c478bd9Sstevel@tonic-gate #include <sys/memlist.h> 737c478bd9Sstevel@tonic-gate #include <sys/bootconf.h> /* XXX the memlist stuff belongs in memlist_plat.h */ 747c478bd9Sstevel@tonic-gate #include <vm/hat_i86.h> 757c478bd9Sstevel@tonic-gate #include <sys/x86_archext.h> 767c478bd9Sstevel@tonic-gate #include <sys/elf_386.h> 777c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h> 787c478bd9Sstevel@tonic-gate #include <sys/archsystm.h> 797c478bd9Sstevel@tonic-gate #include <sys/machsystm.h> 807c478bd9Sstevel@tonic-gate 817c478bd9Sstevel@tonic-gate #include <sys/vtrace.h> 827c478bd9Sstevel@tonic-gate #include <sys/ddidmareq.h> 837c478bd9Sstevel@tonic-gate #include <sys/promif.h> 847c478bd9Sstevel@tonic-gate #include <sys/memnode.h> 857c478bd9Sstevel@tonic-gate #include <sys/stack.h> 867c478bd9Sstevel@tonic-gate 875d07b933Sdp uint_t vac_colors = 1; 887c478bd9Sstevel@tonic-gate 897c478bd9Sstevel@tonic-gate int largepagesupport = 0; 907c478bd9Sstevel@tonic-gate extern uint_t page_create_new; 917c478bd9Sstevel@tonic-gate extern uint_t page_create_exists; 927c478bd9Sstevel@tonic-gate extern uint_t page_create_putbacks; 937c478bd9Sstevel@tonic-gate extern uint_t page_create_putbacks; 947c478bd9Sstevel@tonic-gate extern uintptr_t eprom_kernelbase; 957c478bd9Sstevel@tonic-gate extern int use_sse_pagecopy, use_sse_pagezero; /* in ml/float.s */ 967c478bd9Sstevel@tonic-gate 977c478bd9Sstevel@tonic-gate /* 4g memory management */ 987c478bd9Sstevel@tonic-gate pgcnt_t maxmem4g; 997c478bd9Sstevel@tonic-gate pgcnt_t freemem4g; 1007c478bd9Sstevel@tonic-gate int physmax4g; 1017c478bd9Sstevel@tonic-gate int desfree4gshift = 4; /* maxmem4g shift to derive DESFREE4G */ 1027c478bd9Sstevel@tonic-gate int lotsfree4gshift = 3; 1037c478bd9Sstevel@tonic-gate 10407ad560dSkchow /* 16m memory management: desired number of free pages below 16m. */ 10507ad560dSkchow pgcnt_t desfree16m = 0x380; 10607ad560dSkchow 1077c478bd9Sstevel@tonic-gate #ifdef VM_STATS 1087c478bd9Sstevel@tonic-gate struct { 1097c478bd9Sstevel@tonic-gate ulong_t pga_alloc; 1107c478bd9Sstevel@tonic-gate ulong_t pga_notfullrange; 1117c478bd9Sstevel@tonic-gate ulong_t pga_nulldmaattr; 1127c478bd9Sstevel@tonic-gate ulong_t pga_allocok; 1137c478bd9Sstevel@tonic-gate ulong_t pga_allocfailed; 1147c478bd9Sstevel@tonic-gate ulong_t pgma_alloc; 1157c478bd9Sstevel@tonic-gate ulong_t pgma_allocok; 1167c478bd9Sstevel@tonic-gate ulong_t pgma_allocfailed; 1177c478bd9Sstevel@tonic-gate ulong_t pgma_allocempty; 1187c478bd9Sstevel@tonic-gate } pga_vmstats; 1197c478bd9Sstevel@tonic-gate #endif 1207c478bd9Sstevel@tonic-gate 1217c478bd9Sstevel@tonic-gate uint_t mmu_page_sizes; 1227c478bd9Sstevel@tonic-gate 1237c478bd9Sstevel@tonic-gate /* How many page sizes the users can see */ 1247c478bd9Sstevel@tonic-gate uint_t mmu_exported_page_sizes; 1257c478bd9Sstevel@tonic-gate 126beb1bda0Sdavemq /* 127beb1bda0Sdavemq * Number of pages in 1 GB. Don't enable automatic large pages if we have 128beb1bda0Sdavemq * fewer than this many pages. 129beb1bda0Sdavemq */ 130*ec25b48fSsusans pgcnt_t shm_lpg_min_physmem = 1 << (30 - MMU_PAGESHIFT); 131*ec25b48fSsusans pgcnt_t privm_lpg_min_physmem = 1 << (30 - MMU_PAGESHIFT); 132*ec25b48fSsusans 133*ec25b48fSsusans /* 134*ec25b48fSsusans * Maximum and default segment size tunables for user private 135*ec25b48fSsusans * and shared anon memory, and user text and initialized data. 136*ec25b48fSsusans * These can be patched via /etc/system to allow large pages 137*ec25b48fSsusans * to be used for mapping application private and shared anon memory. 138*ec25b48fSsusans */ 139*ec25b48fSsusans size_t mcntl0_lpsize = MMU_PAGESIZE; 140*ec25b48fSsusans size_t max_uheap_lpsize = MMU_PAGESIZE; 141*ec25b48fSsusans size_t default_uheap_lpsize = MMU_PAGESIZE; 142*ec25b48fSsusans size_t max_ustack_lpsize = MMU_PAGESIZE; 143*ec25b48fSsusans size_t default_ustack_lpsize = MMU_PAGESIZE; 144*ec25b48fSsusans size_t max_privmap_lpsize = MMU_PAGESIZE; 145*ec25b48fSsusans size_t max_uidata_lpsize = MMU_PAGESIZE; 146*ec25b48fSsusans size_t max_utext_lpsize = MMU_PAGESIZE; 147*ec25b48fSsusans size_t max_shm_lpsize = MMU_PAGESIZE; 1487c478bd9Sstevel@tonic-gate 1497c478bd9Sstevel@tonic-gate /* 1507c478bd9Sstevel@tonic-gate * Return the optimum page size for a given mapping 1517c478bd9Sstevel@tonic-gate */ 1527c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 1537c478bd9Sstevel@tonic-gate size_t 154*ec25b48fSsusans map_pgsz(int maptype, struct proc *p, caddr_t addr, size_t len, int memcntl) 1557c478bd9Sstevel@tonic-gate { 156*ec25b48fSsusans level_t l = 0; 157*ec25b48fSsusans size_t pgsz = MMU_PAGESIZE; 158*ec25b48fSsusans size_t max_lpsize; 159*ec25b48fSsusans uint_t mszc; 1607c478bd9Sstevel@tonic-gate 161*ec25b48fSsusans ASSERT(maptype != MAPPGSZ_VA); 1627c478bd9Sstevel@tonic-gate 163*ec25b48fSsusans if (maptype != MAPPGSZ_ISM && physmem < privm_lpg_min_physmem) { 164*ec25b48fSsusans return (MMU_PAGESIZE); 165*ec25b48fSsusans } 1667c478bd9Sstevel@tonic-gate 167*ec25b48fSsusans switch (maptype) { 1687c478bd9Sstevel@tonic-gate case MAPPGSZ_HEAP: 169*ec25b48fSsusans case MAPPGSZ_STK: 170*ec25b48fSsusans max_lpsize = memcntl ? mcntl0_lpsize : (maptype == 171*ec25b48fSsusans MAPPGSZ_HEAP ? max_uheap_lpsize : max_ustack_lpsize); 172*ec25b48fSsusans if (max_lpsize == MMU_PAGESIZE) { 173*ec25b48fSsusans return (MMU_PAGESIZE); 174*ec25b48fSsusans } 175*ec25b48fSsusans if (len == 0) { 176*ec25b48fSsusans len = (maptype == MAPPGSZ_HEAP) ? p->p_brkbase + 177*ec25b48fSsusans p->p_brksize - p->p_bssbase : p->p_stksize; 178*ec25b48fSsusans } 179*ec25b48fSsusans len = (maptype == MAPPGSZ_HEAP) ? MAX(len, 180*ec25b48fSsusans default_uheap_lpsize) : MAX(len, default_ustack_lpsize); 181*ec25b48fSsusans 1827c478bd9Sstevel@tonic-gate /* 1837c478bd9Sstevel@tonic-gate * use the pages size that best fits len 1847c478bd9Sstevel@tonic-gate */ 1857c478bd9Sstevel@tonic-gate for (l = mmu.max_page_level; l > 0; --l) { 186*ec25b48fSsusans if (LEVEL_SIZE(l) > max_lpsize || len < LEVEL_SIZE(l)) { 1877c478bd9Sstevel@tonic-gate continue; 188*ec25b48fSsusans } else { 189*ec25b48fSsusans pgsz = LEVEL_SIZE(l); 190*ec25b48fSsusans } 1917c478bd9Sstevel@tonic-gate break; 1927c478bd9Sstevel@tonic-gate } 193*ec25b48fSsusans 194*ec25b48fSsusans mszc = (maptype == MAPPGSZ_HEAP ? p->p_brkpageszc : 195*ec25b48fSsusans p->p_stkpageszc); 196*ec25b48fSsusans if (addr == 0 && (pgsz < hw_page_array[mszc].hp_size)) { 197*ec25b48fSsusans pgsz = hw_page_array[mszc].hp_size; 198*ec25b48fSsusans } 199*ec25b48fSsusans return (pgsz); 2007c478bd9Sstevel@tonic-gate 2017c478bd9Sstevel@tonic-gate /* 2027c478bd9Sstevel@tonic-gate * for ISM use the 1st large page size. 2037c478bd9Sstevel@tonic-gate */ 2047c478bd9Sstevel@tonic-gate case MAPPGSZ_ISM: 2057c478bd9Sstevel@tonic-gate if (mmu.max_page_level == 0) 2067c478bd9Sstevel@tonic-gate return (MMU_PAGESIZE); 2077c478bd9Sstevel@tonic-gate return (LEVEL_SIZE(1)); 2087c478bd9Sstevel@tonic-gate } 209*ec25b48fSsusans return (pgsz); 2107c478bd9Sstevel@tonic-gate } 2117c478bd9Sstevel@tonic-gate 212*ec25b48fSsusans static uint_t 213*ec25b48fSsusans map_szcvec(caddr_t addr, size_t size, uintptr_t off, size_t max_lpsize, 214*ec25b48fSsusans size_t min_physmem) 2157c478bd9Sstevel@tonic-gate { 216*ec25b48fSsusans caddr_t eaddr = addr + size; 217*ec25b48fSsusans uint_t szcvec = 0; 218*ec25b48fSsusans caddr_t raddr; 219*ec25b48fSsusans caddr_t readdr; 2207c478bd9Sstevel@tonic-gate size_t pgsz; 221*ec25b48fSsusans int i; 2227c478bd9Sstevel@tonic-gate 223*ec25b48fSsusans if (physmem < min_physmem || max_lpsize <= MMU_PAGESIZE) { 2247c478bd9Sstevel@tonic-gate return (0); 2257c478bd9Sstevel@tonic-gate } 226*ec25b48fSsusans 227*ec25b48fSsusans for (i = mmu_page_sizes - 1; i > 0; i--) { 228*ec25b48fSsusans pgsz = page_get_pagesize(i); 229*ec25b48fSsusans if (pgsz > max_lpsize) { 230*ec25b48fSsusans continue; 231*ec25b48fSsusans } 232*ec25b48fSsusans raddr = (caddr_t)P2ROUNDUP((uintptr_t)addr, pgsz); 233*ec25b48fSsusans readdr = (caddr_t)P2ALIGN((uintptr_t)eaddr, pgsz); 234*ec25b48fSsusans if (raddr < addr || raddr >= readdr) { 235*ec25b48fSsusans continue; 236*ec25b48fSsusans } 237*ec25b48fSsusans if (P2PHASE((uintptr_t)addr ^ off, pgsz)) { 238*ec25b48fSsusans continue; 239*ec25b48fSsusans } 240*ec25b48fSsusans /* 241*ec25b48fSsusans * Set szcvec to the remaining page sizes. 242*ec25b48fSsusans */ 243*ec25b48fSsusans szcvec = ((1 << (i + 1)) - 1) & ~1; 244*ec25b48fSsusans break; 2457c478bd9Sstevel@tonic-gate } 246*ec25b48fSsusans return (szcvec); 2477c478bd9Sstevel@tonic-gate } 2487c478bd9Sstevel@tonic-gate 249*ec25b48fSsusans /* 250*ec25b48fSsusans * Return a bit vector of large page size codes that 251*ec25b48fSsusans * can be used to map [addr, addr + len) region. 252*ec25b48fSsusans */ 253*ec25b48fSsusans /*ARGSUSED*/ 25407b65a64Saguzovsk uint_t 255*ec25b48fSsusans map_pgszcvec(caddr_t addr, size_t size, uintptr_t off, int flags, int type, 256*ec25b48fSsusans int memcntl) 25707b65a64Saguzovsk { 258*ec25b48fSsusans size_t max_lpsize = mcntl0_lpsize; 25907b65a64Saguzovsk 260*ec25b48fSsusans if (mmu.max_page_level == 0) 26107b65a64Saguzovsk return (0); 26207b65a64Saguzovsk 263*ec25b48fSsusans if (flags & MAP_TEXT) { 264*ec25b48fSsusans if (!memcntl) 265*ec25b48fSsusans max_lpsize = max_utext_lpsize; 266*ec25b48fSsusans return (map_szcvec(addr, size, off, max_lpsize, 267*ec25b48fSsusans shm_lpg_min_physmem)); 268*ec25b48fSsusans 269*ec25b48fSsusans } else if (flags & MAP_INITDATA) { 270*ec25b48fSsusans if (!memcntl) 271*ec25b48fSsusans max_lpsize = max_uidata_lpsize; 272*ec25b48fSsusans return (map_szcvec(addr, size, off, max_lpsize, 273*ec25b48fSsusans privm_lpg_min_physmem)); 274*ec25b48fSsusans 275*ec25b48fSsusans } else if (type == MAPPGSZC_SHM) { 276*ec25b48fSsusans if (!memcntl) 277*ec25b48fSsusans max_lpsize = max_shm_lpsize; 278*ec25b48fSsusans return (map_szcvec(addr, size, off, max_lpsize, 279*ec25b48fSsusans shm_lpg_min_physmem)); 280*ec25b48fSsusans 281*ec25b48fSsusans } else if (type == MAPPGSZC_HEAP) { 282*ec25b48fSsusans if (!memcntl) 283*ec25b48fSsusans max_lpsize = max_uheap_lpsize; 284*ec25b48fSsusans return (map_szcvec(addr, size, off, max_lpsize, 285*ec25b48fSsusans privm_lpg_min_physmem)); 286*ec25b48fSsusans 287*ec25b48fSsusans } else if (type == MAPPGSZC_STACK) { 288*ec25b48fSsusans if (!memcntl) 289*ec25b48fSsusans max_lpsize = max_ustack_lpsize; 290*ec25b48fSsusans return (map_szcvec(addr, size, off, max_lpsize, 291*ec25b48fSsusans privm_lpg_min_physmem)); 292*ec25b48fSsusans 293*ec25b48fSsusans } else { 294*ec25b48fSsusans if (!memcntl) 295*ec25b48fSsusans max_lpsize = max_privmap_lpsize; 296*ec25b48fSsusans return (map_szcvec(addr, size, off, max_lpsize, 297*ec25b48fSsusans privm_lpg_min_physmem)); 29807b65a64Saguzovsk } 29907b65a64Saguzovsk } 30007b65a64Saguzovsk 3017c478bd9Sstevel@tonic-gate /* 3027c478bd9Sstevel@tonic-gate * Handle a pagefault. 3037c478bd9Sstevel@tonic-gate */ 3047c478bd9Sstevel@tonic-gate faultcode_t 3057c478bd9Sstevel@tonic-gate pagefault( 3067c478bd9Sstevel@tonic-gate caddr_t addr, 3077c478bd9Sstevel@tonic-gate enum fault_type type, 3087c478bd9Sstevel@tonic-gate enum seg_rw rw, 3097c478bd9Sstevel@tonic-gate int iskernel) 3107c478bd9Sstevel@tonic-gate { 3117c478bd9Sstevel@tonic-gate struct as *as; 3127c478bd9Sstevel@tonic-gate struct hat *hat; 3137c478bd9Sstevel@tonic-gate struct proc *p; 3147c478bd9Sstevel@tonic-gate kthread_t *t; 3157c478bd9Sstevel@tonic-gate faultcode_t res; 3167c478bd9Sstevel@tonic-gate caddr_t base; 3177c478bd9Sstevel@tonic-gate size_t len; 3187c478bd9Sstevel@tonic-gate int err; 3197c478bd9Sstevel@tonic-gate int mapped_red; 3207c478bd9Sstevel@tonic-gate uintptr_t ea; 3217c478bd9Sstevel@tonic-gate 3227c478bd9Sstevel@tonic-gate ASSERT_STACK_ALIGNED(); 3237c478bd9Sstevel@tonic-gate 3247c478bd9Sstevel@tonic-gate if (INVALID_VADDR(addr)) 3257c478bd9Sstevel@tonic-gate return (FC_NOMAP); 3267c478bd9Sstevel@tonic-gate 3277c478bd9Sstevel@tonic-gate mapped_red = segkp_map_red(); 3287c478bd9Sstevel@tonic-gate 3297c478bd9Sstevel@tonic-gate if (iskernel) { 3307c478bd9Sstevel@tonic-gate as = &kas; 3317c478bd9Sstevel@tonic-gate hat = as->a_hat; 3327c478bd9Sstevel@tonic-gate } else { 3337c478bd9Sstevel@tonic-gate t = curthread; 3347c478bd9Sstevel@tonic-gate p = ttoproc(t); 3357c478bd9Sstevel@tonic-gate as = p->p_as; 3367c478bd9Sstevel@tonic-gate hat = as->a_hat; 3377c478bd9Sstevel@tonic-gate } 3387c478bd9Sstevel@tonic-gate 3397c478bd9Sstevel@tonic-gate /* 3407c478bd9Sstevel@tonic-gate * Dispatch pagefault. 3417c478bd9Sstevel@tonic-gate */ 3427c478bd9Sstevel@tonic-gate res = as_fault(hat, as, addr, 1, type, rw); 3437c478bd9Sstevel@tonic-gate 3447c478bd9Sstevel@tonic-gate /* 3457c478bd9Sstevel@tonic-gate * If this isn't a potential unmapped hole in the user's 3467c478bd9Sstevel@tonic-gate * UNIX data or stack segments, just return status info. 3477c478bd9Sstevel@tonic-gate */ 3487c478bd9Sstevel@tonic-gate if (res != FC_NOMAP || iskernel) 3497c478bd9Sstevel@tonic-gate goto out; 3507c478bd9Sstevel@tonic-gate 3517c478bd9Sstevel@tonic-gate /* 3527c478bd9Sstevel@tonic-gate * Check to see if we happened to faulted on a currently unmapped 3537c478bd9Sstevel@tonic-gate * part of the UNIX data or stack segments. If so, create a zfod 3547c478bd9Sstevel@tonic-gate * mapping there and then try calling the fault routine again. 3557c478bd9Sstevel@tonic-gate */ 3567c478bd9Sstevel@tonic-gate base = p->p_brkbase; 3577c478bd9Sstevel@tonic-gate len = p->p_brksize; 3587c478bd9Sstevel@tonic-gate 3597c478bd9Sstevel@tonic-gate if (addr < base || addr >= base + len) { /* data seg? */ 3607c478bd9Sstevel@tonic-gate base = (caddr_t)p->p_usrstack - p->p_stksize; 3617c478bd9Sstevel@tonic-gate len = p->p_stksize; 3627c478bd9Sstevel@tonic-gate if (addr < base || addr >= p->p_usrstack) { /* stack seg? */ 3637c478bd9Sstevel@tonic-gate /* not in either UNIX data or stack segments */ 3647c478bd9Sstevel@tonic-gate res = FC_NOMAP; 3657c478bd9Sstevel@tonic-gate goto out; 3667c478bd9Sstevel@tonic-gate } 3677c478bd9Sstevel@tonic-gate } 3687c478bd9Sstevel@tonic-gate 3697c478bd9Sstevel@tonic-gate /* 3707c478bd9Sstevel@tonic-gate * the rest of this function implements a 3.X 4.X 5.X compatibility 3717c478bd9Sstevel@tonic-gate * This code is probably not needed anymore 3727c478bd9Sstevel@tonic-gate */ 3737c478bd9Sstevel@tonic-gate if (p->p_model == DATAMODEL_ILP32) { 3747c478bd9Sstevel@tonic-gate 3757c478bd9Sstevel@tonic-gate /* expand the gap to the page boundaries on each side */ 3767c478bd9Sstevel@tonic-gate ea = P2ROUNDUP((uintptr_t)base + len, MMU_PAGESIZE); 3777c478bd9Sstevel@tonic-gate base = (caddr_t)P2ALIGN((uintptr_t)base, MMU_PAGESIZE); 3787c478bd9Sstevel@tonic-gate len = ea - (uintptr_t)base; 3797c478bd9Sstevel@tonic-gate 3807c478bd9Sstevel@tonic-gate as_rangelock(as); 3817c478bd9Sstevel@tonic-gate if (as_gap(as, MMU_PAGESIZE, &base, &len, AH_CONTAIN, addr) == 3827c478bd9Sstevel@tonic-gate 0) { 3837c478bd9Sstevel@tonic-gate err = as_map(as, base, len, segvn_create, zfod_argsp); 3847c478bd9Sstevel@tonic-gate as_rangeunlock(as); 3857c478bd9Sstevel@tonic-gate if (err) { 3867c478bd9Sstevel@tonic-gate res = FC_MAKE_ERR(err); 3877c478bd9Sstevel@tonic-gate goto out; 3887c478bd9Sstevel@tonic-gate } 3897c478bd9Sstevel@tonic-gate } else { 3907c478bd9Sstevel@tonic-gate /* 3917c478bd9Sstevel@tonic-gate * This page is already mapped by another thread after 3927c478bd9Sstevel@tonic-gate * we returned from as_fault() above. We just fall 3937c478bd9Sstevel@tonic-gate * through as_fault() below. 3947c478bd9Sstevel@tonic-gate */ 3957c478bd9Sstevel@tonic-gate as_rangeunlock(as); 3967c478bd9Sstevel@tonic-gate } 3977c478bd9Sstevel@tonic-gate 3987c478bd9Sstevel@tonic-gate res = as_fault(hat, as, addr, 1, F_INVAL, rw); 3997c478bd9Sstevel@tonic-gate } 4007c478bd9Sstevel@tonic-gate 4017c478bd9Sstevel@tonic-gate out: 4027c478bd9Sstevel@tonic-gate if (mapped_red) 4037c478bd9Sstevel@tonic-gate segkp_unmap_red(); 4047c478bd9Sstevel@tonic-gate 4057c478bd9Sstevel@tonic-gate return (res); 4067c478bd9Sstevel@tonic-gate } 4077c478bd9Sstevel@tonic-gate 4087c478bd9Sstevel@tonic-gate void 4097c478bd9Sstevel@tonic-gate map_addr(caddr_t *addrp, size_t len, offset_t off, int vacalign, uint_t flags) 4107c478bd9Sstevel@tonic-gate { 4117c478bd9Sstevel@tonic-gate struct proc *p = curproc; 4127c478bd9Sstevel@tonic-gate caddr_t userlimit = (flags & _MAP_LOW32) ? 4137c478bd9Sstevel@tonic-gate (caddr_t)_userlimit32 : p->p_as->a_userlimit; 4147c478bd9Sstevel@tonic-gate 4157c478bd9Sstevel@tonic-gate map_addr_proc(addrp, len, off, vacalign, userlimit, curproc, flags); 4167c478bd9Sstevel@tonic-gate } 4177c478bd9Sstevel@tonic-gate 4187c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 4197c478bd9Sstevel@tonic-gate int 4207c478bd9Sstevel@tonic-gate map_addr_vacalign_check(caddr_t addr, u_offset_t off) 4217c478bd9Sstevel@tonic-gate { 4227c478bd9Sstevel@tonic-gate return (0); 4237c478bd9Sstevel@tonic-gate } 4247c478bd9Sstevel@tonic-gate 4257c478bd9Sstevel@tonic-gate /* 4267c478bd9Sstevel@tonic-gate * map_addr_proc() is the routine called when the system is to 4277c478bd9Sstevel@tonic-gate * choose an address for the user. We will pick an address 4287c478bd9Sstevel@tonic-gate * range which is the highest available below kernelbase. 4297c478bd9Sstevel@tonic-gate * 4307c478bd9Sstevel@tonic-gate * addrp is a value/result parameter. 4317c478bd9Sstevel@tonic-gate * On input it is a hint from the user to be used in a completely 4327c478bd9Sstevel@tonic-gate * machine dependent fashion. We decide to completely ignore this hint. 4337c478bd9Sstevel@tonic-gate * 4347c478bd9Sstevel@tonic-gate * On output it is NULL if no address can be found in the current 4357c478bd9Sstevel@tonic-gate * processes address space or else an address that is currently 4367c478bd9Sstevel@tonic-gate * not mapped for len bytes with a page of red zone on either side. 4377c478bd9Sstevel@tonic-gate * 4387c478bd9Sstevel@tonic-gate * align is not needed on x86 (it's for viturally addressed caches) 4397c478bd9Sstevel@tonic-gate */ 4407c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 4417c478bd9Sstevel@tonic-gate void 4427c478bd9Sstevel@tonic-gate map_addr_proc( 4437c478bd9Sstevel@tonic-gate caddr_t *addrp, 4447c478bd9Sstevel@tonic-gate size_t len, 4457c478bd9Sstevel@tonic-gate offset_t off, 4467c478bd9Sstevel@tonic-gate int vacalign, 4477c478bd9Sstevel@tonic-gate caddr_t userlimit, 4487c478bd9Sstevel@tonic-gate struct proc *p, 4497c478bd9Sstevel@tonic-gate uint_t flags) 4507c478bd9Sstevel@tonic-gate { 4517c478bd9Sstevel@tonic-gate struct as *as = p->p_as; 4527c478bd9Sstevel@tonic-gate caddr_t addr; 4537c478bd9Sstevel@tonic-gate caddr_t base; 4547c478bd9Sstevel@tonic-gate size_t slen; 4557c478bd9Sstevel@tonic-gate size_t align_amount; 4567c478bd9Sstevel@tonic-gate 4577c478bd9Sstevel@tonic-gate ASSERT32(userlimit == as->a_userlimit); 4587c478bd9Sstevel@tonic-gate 4597c478bd9Sstevel@tonic-gate base = p->p_brkbase; 4607c478bd9Sstevel@tonic-gate #if defined(__amd64) 4617c478bd9Sstevel@tonic-gate /* 4627c478bd9Sstevel@tonic-gate * XX64 Yes, this needs more work. 4637c478bd9Sstevel@tonic-gate */ 4647c478bd9Sstevel@tonic-gate if (p->p_model == DATAMODEL_NATIVE) { 4657c478bd9Sstevel@tonic-gate if (userlimit < as->a_userlimit) { 4667c478bd9Sstevel@tonic-gate /* 4677c478bd9Sstevel@tonic-gate * This happens when a program wants to map 4687c478bd9Sstevel@tonic-gate * something in a range that's accessible to a 4697c478bd9Sstevel@tonic-gate * program in a smaller address space. For example, 4707c478bd9Sstevel@tonic-gate * a 64-bit program calling mmap32(2) to guarantee 4717c478bd9Sstevel@tonic-gate * that the returned address is below 4Gbytes. 4727c478bd9Sstevel@tonic-gate */ 4737c478bd9Sstevel@tonic-gate ASSERT((uintptr_t)userlimit < ADDRESS_C(0xffffffff)); 4747c478bd9Sstevel@tonic-gate 4757c478bd9Sstevel@tonic-gate if (userlimit > base) 4767c478bd9Sstevel@tonic-gate slen = userlimit - base; 4777c478bd9Sstevel@tonic-gate else { 4787c478bd9Sstevel@tonic-gate *addrp = NULL; 4797c478bd9Sstevel@tonic-gate return; 4807c478bd9Sstevel@tonic-gate } 4817c478bd9Sstevel@tonic-gate } else { 4827c478bd9Sstevel@tonic-gate /* 4837c478bd9Sstevel@tonic-gate * XX64 This layout is probably wrong .. but in 4847c478bd9Sstevel@tonic-gate * the event we make the amd64 address space look 4857c478bd9Sstevel@tonic-gate * like sparcv9 i.e. with the stack -above- the 4867c478bd9Sstevel@tonic-gate * heap, this bit of code might even be correct. 4877c478bd9Sstevel@tonic-gate */ 4887c478bd9Sstevel@tonic-gate slen = p->p_usrstack - base - 4897c478bd9Sstevel@tonic-gate (((size_t)rctl_enforced_value( 4907c478bd9Sstevel@tonic-gate rctlproc_legacy[RLIMIT_STACK], 4917c478bd9Sstevel@tonic-gate p->p_rctls, p) + PAGEOFFSET) & PAGEMASK); 4927c478bd9Sstevel@tonic-gate } 4937c478bd9Sstevel@tonic-gate } else 4947c478bd9Sstevel@tonic-gate #endif 4957c478bd9Sstevel@tonic-gate slen = userlimit - base; 4967c478bd9Sstevel@tonic-gate 4977c478bd9Sstevel@tonic-gate len = (len + PAGEOFFSET) & PAGEMASK; 4987c478bd9Sstevel@tonic-gate 4997c478bd9Sstevel@tonic-gate /* 5007c478bd9Sstevel@tonic-gate * Redzone for each side of the request. This is done to leave 5017c478bd9Sstevel@tonic-gate * one page unmapped between segments. This is not required, but 5027c478bd9Sstevel@tonic-gate * it's useful for the user because if their program strays across 5037c478bd9Sstevel@tonic-gate * a segment boundary, it will catch a fault immediately making 5047c478bd9Sstevel@tonic-gate * debugging a little easier. 5057c478bd9Sstevel@tonic-gate */ 5067c478bd9Sstevel@tonic-gate len += 2 * MMU_PAGESIZE; 5077c478bd9Sstevel@tonic-gate 5087c478bd9Sstevel@tonic-gate /* 5097c478bd9Sstevel@tonic-gate * figure out what the alignment should be 5107c478bd9Sstevel@tonic-gate * 5117c478bd9Sstevel@tonic-gate * XX64 -- is there an ELF_AMD64_MAXPGSZ or is it the same???? 5127c478bd9Sstevel@tonic-gate */ 5137c478bd9Sstevel@tonic-gate if (len <= ELF_386_MAXPGSZ) { 5147c478bd9Sstevel@tonic-gate /* 5157c478bd9Sstevel@tonic-gate * Align virtual addresses to ensure that ELF shared libraries 5167c478bd9Sstevel@tonic-gate * are mapped with the appropriate alignment constraints by 5177c478bd9Sstevel@tonic-gate * the run-time linker. 5187c478bd9Sstevel@tonic-gate */ 5197c478bd9Sstevel@tonic-gate align_amount = ELF_386_MAXPGSZ; 5207c478bd9Sstevel@tonic-gate } else { 5217c478bd9Sstevel@tonic-gate int l = mmu.max_page_level; 5227c478bd9Sstevel@tonic-gate 5237c478bd9Sstevel@tonic-gate while (l && len < LEVEL_SIZE(l)) 5247c478bd9Sstevel@tonic-gate --l; 5257c478bd9Sstevel@tonic-gate 5267c478bd9Sstevel@tonic-gate align_amount = LEVEL_SIZE(l); 5277c478bd9Sstevel@tonic-gate } 5287c478bd9Sstevel@tonic-gate 5297c478bd9Sstevel@tonic-gate if ((flags & MAP_ALIGN) && ((uintptr_t)*addrp > align_amount)) 5307c478bd9Sstevel@tonic-gate align_amount = (uintptr_t)*addrp; 5317c478bd9Sstevel@tonic-gate 5327c478bd9Sstevel@tonic-gate len += align_amount; 5337c478bd9Sstevel@tonic-gate 5347c478bd9Sstevel@tonic-gate /* 5357c478bd9Sstevel@tonic-gate * Look for a large enough hole starting below userlimit. 5367c478bd9Sstevel@tonic-gate * After finding it, use the upper part. Addition of PAGESIZE 5377c478bd9Sstevel@tonic-gate * is for the redzone as described above. 5387c478bd9Sstevel@tonic-gate */ 5397c478bd9Sstevel@tonic-gate if (as_gap(as, len, &base, &slen, AH_HI, NULL) == 0) { 5407c478bd9Sstevel@tonic-gate caddr_t as_addr; 5417c478bd9Sstevel@tonic-gate 5427c478bd9Sstevel@tonic-gate addr = base + slen - len + MMU_PAGESIZE; 5437c478bd9Sstevel@tonic-gate as_addr = addr; 5447c478bd9Sstevel@tonic-gate /* 5457c478bd9Sstevel@tonic-gate * Round address DOWN to the alignment amount, 5467c478bd9Sstevel@tonic-gate * add the offset, and if this address is less 5477c478bd9Sstevel@tonic-gate * than the original address, add alignment amount. 5487c478bd9Sstevel@tonic-gate */ 5497c478bd9Sstevel@tonic-gate addr = (caddr_t)((uintptr_t)addr & (~(align_amount - 1))); 5507c478bd9Sstevel@tonic-gate addr += (uintptr_t)(off & (align_amount - 1)); 5517c478bd9Sstevel@tonic-gate if (addr < as_addr) 5527c478bd9Sstevel@tonic-gate addr += align_amount; 5537c478bd9Sstevel@tonic-gate 5547c478bd9Sstevel@tonic-gate ASSERT(addr <= (as_addr + align_amount)); 5557c478bd9Sstevel@tonic-gate ASSERT(((uintptr_t)addr & (align_amount - 1)) == 5567c478bd9Sstevel@tonic-gate ((uintptr_t)(off & (align_amount - 1)))); 5577c478bd9Sstevel@tonic-gate *addrp = addr; 5587c478bd9Sstevel@tonic-gate } else { 5597c478bd9Sstevel@tonic-gate *addrp = NULL; /* no more virtual space */ 5607c478bd9Sstevel@tonic-gate } 5617c478bd9Sstevel@tonic-gate } 5627c478bd9Sstevel@tonic-gate 5637c478bd9Sstevel@tonic-gate /* 5647c478bd9Sstevel@tonic-gate * Determine whether [base, base+len] contains a valid range of 5657c478bd9Sstevel@tonic-gate * addresses at least minlen long. base and len are adjusted if 5667c478bd9Sstevel@tonic-gate * required to provide a valid range. 5677c478bd9Sstevel@tonic-gate */ 5687c478bd9Sstevel@tonic-gate /*ARGSUSED3*/ 5697c478bd9Sstevel@tonic-gate int 5707c478bd9Sstevel@tonic-gate valid_va_range(caddr_t *basep, size_t *lenp, size_t minlen, int dir) 5717c478bd9Sstevel@tonic-gate { 5727c478bd9Sstevel@tonic-gate uintptr_t hi, lo; 5737c478bd9Sstevel@tonic-gate 5747c478bd9Sstevel@tonic-gate lo = (uintptr_t)*basep; 5757c478bd9Sstevel@tonic-gate hi = lo + *lenp; 5767c478bd9Sstevel@tonic-gate 5777c478bd9Sstevel@tonic-gate /* 5787c478bd9Sstevel@tonic-gate * If hi rolled over the top, try cutting back. 5797c478bd9Sstevel@tonic-gate */ 5807c478bd9Sstevel@tonic-gate if (hi < lo) { 5817c478bd9Sstevel@tonic-gate if (0 - lo + hi < minlen) 5827c478bd9Sstevel@tonic-gate return (0); 5837c478bd9Sstevel@tonic-gate if (0 - lo < minlen) 5847c478bd9Sstevel@tonic-gate return (0); 5857c478bd9Sstevel@tonic-gate *lenp = 0 - lo; 5867c478bd9Sstevel@tonic-gate } else if (hi - lo < minlen) { 5877c478bd9Sstevel@tonic-gate return (0); 5887c478bd9Sstevel@tonic-gate } 5897c478bd9Sstevel@tonic-gate #if defined(__amd64) 5907c478bd9Sstevel@tonic-gate /* 5917c478bd9Sstevel@tonic-gate * Deal with a possible hole in the address range between 5927c478bd9Sstevel@tonic-gate * hole_start and hole_end that should never be mapped. 5937c478bd9Sstevel@tonic-gate */ 5947c478bd9Sstevel@tonic-gate if (lo < hole_start) { 5957c478bd9Sstevel@tonic-gate if (hi > hole_start) { 5967c478bd9Sstevel@tonic-gate if (hi < hole_end) { 5977c478bd9Sstevel@tonic-gate hi = hole_start; 5987c478bd9Sstevel@tonic-gate } else { 5997c478bd9Sstevel@tonic-gate /* lo < hole_start && hi >= hole_end */ 6007c478bd9Sstevel@tonic-gate if (dir == AH_LO) { 6017c478bd9Sstevel@tonic-gate /* 6027c478bd9Sstevel@tonic-gate * prefer lowest range 6037c478bd9Sstevel@tonic-gate */ 6047c478bd9Sstevel@tonic-gate if (hole_start - lo >= minlen) 6057c478bd9Sstevel@tonic-gate hi = hole_start; 6067c478bd9Sstevel@tonic-gate else if (hi - hole_end >= minlen) 6077c478bd9Sstevel@tonic-gate lo = hole_end; 6087c478bd9Sstevel@tonic-gate else 6097c478bd9Sstevel@tonic-gate return (0); 6107c478bd9Sstevel@tonic-gate } else { 6117c478bd9Sstevel@tonic-gate /* 6127c478bd9Sstevel@tonic-gate * prefer highest range 6137c478bd9Sstevel@tonic-gate */ 6147c478bd9Sstevel@tonic-gate if (hi - hole_end >= minlen) 6157c478bd9Sstevel@tonic-gate lo = hole_end; 6167c478bd9Sstevel@tonic-gate else if (hole_start - lo >= minlen) 6177c478bd9Sstevel@tonic-gate hi = hole_start; 6187c478bd9Sstevel@tonic-gate else 6197c478bd9Sstevel@tonic-gate return (0); 6207c478bd9Sstevel@tonic-gate } 6217c478bd9Sstevel@tonic-gate } 6227c478bd9Sstevel@tonic-gate } 6237c478bd9Sstevel@tonic-gate } else { 6247c478bd9Sstevel@tonic-gate /* lo >= hole_start */ 6257c478bd9Sstevel@tonic-gate if (hi < hole_end) 6267c478bd9Sstevel@tonic-gate return (0); 6277c478bd9Sstevel@tonic-gate if (lo < hole_end) 6287c478bd9Sstevel@tonic-gate lo = hole_end; 6297c478bd9Sstevel@tonic-gate } 6307c478bd9Sstevel@tonic-gate 6317c478bd9Sstevel@tonic-gate if (hi - lo < minlen) 6327c478bd9Sstevel@tonic-gate return (0); 6337c478bd9Sstevel@tonic-gate 6347c478bd9Sstevel@tonic-gate *basep = (caddr_t)lo; 6357c478bd9Sstevel@tonic-gate *lenp = hi - lo; 6367c478bd9Sstevel@tonic-gate #endif 6377c478bd9Sstevel@tonic-gate return (1); 6387c478bd9Sstevel@tonic-gate } 6397c478bd9Sstevel@tonic-gate 6407c478bd9Sstevel@tonic-gate /* 6417c478bd9Sstevel@tonic-gate * Determine whether [addr, addr+len] are valid user addresses. 6427c478bd9Sstevel@tonic-gate */ 6437c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 6447c478bd9Sstevel@tonic-gate int 6457c478bd9Sstevel@tonic-gate valid_usr_range(caddr_t addr, size_t len, uint_t prot, struct as *as, 6467c478bd9Sstevel@tonic-gate caddr_t userlimit) 6477c478bd9Sstevel@tonic-gate { 6487c478bd9Sstevel@tonic-gate caddr_t eaddr = addr + len; 6497c478bd9Sstevel@tonic-gate 6507c478bd9Sstevel@tonic-gate if (eaddr <= addr || addr >= userlimit || eaddr > userlimit) 6517c478bd9Sstevel@tonic-gate return (RANGE_BADADDR); 6527c478bd9Sstevel@tonic-gate 6537c478bd9Sstevel@tonic-gate #if defined(__amd64) 6547c478bd9Sstevel@tonic-gate /* 6557c478bd9Sstevel@tonic-gate * Check for the VA hole 6567c478bd9Sstevel@tonic-gate */ 6577c478bd9Sstevel@tonic-gate if (eaddr > (caddr_t)hole_start && addr < (caddr_t)hole_end) 6587c478bd9Sstevel@tonic-gate return (RANGE_BADADDR); 6597c478bd9Sstevel@tonic-gate #endif 6607c478bd9Sstevel@tonic-gate 6617c478bd9Sstevel@tonic-gate return (RANGE_OKAY); 6627c478bd9Sstevel@tonic-gate } 6637c478bd9Sstevel@tonic-gate 6647c478bd9Sstevel@tonic-gate /* 6657c478bd9Sstevel@tonic-gate * Return 1 if the page frame is onboard memory, else 0. 6667c478bd9Sstevel@tonic-gate */ 6677c478bd9Sstevel@tonic-gate int 6687c478bd9Sstevel@tonic-gate pf_is_memory(pfn_t pf) 6697c478bd9Sstevel@tonic-gate { 6707c478bd9Sstevel@tonic-gate return (address_in_memlist(phys_install, mmu_ptob((uint64_t)pf), 1)); 6717c478bd9Sstevel@tonic-gate } 6727c478bd9Sstevel@tonic-gate 6737c478bd9Sstevel@tonic-gate 6747c478bd9Sstevel@tonic-gate /* 6757c478bd9Sstevel@tonic-gate * initialized by page_coloring_init(). 6767c478bd9Sstevel@tonic-gate */ 6777c478bd9Sstevel@tonic-gate uint_t page_colors; 6787c478bd9Sstevel@tonic-gate uint_t page_colors_mask; 6797c478bd9Sstevel@tonic-gate uint_t page_coloring_shift; 6807c478bd9Sstevel@tonic-gate int cpu_page_colors; 6817c478bd9Sstevel@tonic-gate static uint_t l2_colors; 6827c478bd9Sstevel@tonic-gate 6837c478bd9Sstevel@tonic-gate /* 6847c478bd9Sstevel@tonic-gate * Page freelists and cachelists are dynamically allocated once mnoderangecnt 6857c478bd9Sstevel@tonic-gate * and page_colors are calculated from the l2 cache n-way set size. Within a 6867c478bd9Sstevel@tonic-gate * mnode range, the page freelist and cachelist are hashed into bins based on 6877c478bd9Sstevel@tonic-gate * color. This makes it easier to search for a page within a specific memory 6887c478bd9Sstevel@tonic-gate * range. 6897c478bd9Sstevel@tonic-gate */ 6907c478bd9Sstevel@tonic-gate #define PAGE_COLORS_MIN 16 6917c478bd9Sstevel@tonic-gate 6927c478bd9Sstevel@tonic-gate page_t ****page_freelists; 6937c478bd9Sstevel@tonic-gate page_t ***page_cachelists; 6947c478bd9Sstevel@tonic-gate 6957c478bd9Sstevel@tonic-gate /* 6967c478bd9Sstevel@tonic-gate * As the PC architecture evolved memory up was clumped into several 6977c478bd9Sstevel@tonic-gate * ranges for various historical I/O devices to do DMA. 6987c478bd9Sstevel@tonic-gate * < 16Meg - ISA bus 6997c478bd9Sstevel@tonic-gate * < 2Gig - ??? 7007c478bd9Sstevel@tonic-gate * < 4Gig - PCI bus or drivers that don't understand PAE mode 7017c478bd9Sstevel@tonic-gate */ 7027c478bd9Sstevel@tonic-gate static pfn_t arch_memranges[NUM_MEM_RANGES] = { 7037c478bd9Sstevel@tonic-gate 0x100000, /* pfn range for 4G and above */ 7047c478bd9Sstevel@tonic-gate 0x80000, /* pfn range for 2G-4G */ 7057c478bd9Sstevel@tonic-gate 0x01000, /* pfn range for 16M-2G */ 7067c478bd9Sstevel@tonic-gate 0x00000, /* pfn range for 0-16M */ 7077c478bd9Sstevel@tonic-gate }; 7087c478bd9Sstevel@tonic-gate 7097c478bd9Sstevel@tonic-gate /* 7107c478bd9Sstevel@tonic-gate * These are changed during startup if the machine has limited memory. 7117c478bd9Sstevel@tonic-gate */ 7127c478bd9Sstevel@tonic-gate pfn_t *memranges = &arch_memranges[0]; 7137c478bd9Sstevel@tonic-gate int nranges = NUM_MEM_RANGES; 7147c478bd9Sstevel@tonic-gate 7157c478bd9Sstevel@tonic-gate /* 7167c478bd9Sstevel@tonic-gate * Used by page layer to know about page sizes 7177c478bd9Sstevel@tonic-gate */ 7187c478bd9Sstevel@tonic-gate hw_pagesize_t hw_page_array[MAX_NUM_LEVEL + 1]; 7197c478bd9Sstevel@tonic-gate 7207c478bd9Sstevel@tonic-gate /* 7217c478bd9Sstevel@tonic-gate * This can be patched via /etc/system to allow old non-PAE aware device 7227c478bd9Sstevel@tonic-gate * drivers to use kmem_alloc'd memory on 32 bit systems with > 4Gig RAM. 7237c478bd9Sstevel@tonic-gate */ 7247c478bd9Sstevel@tonic-gate #if defined(__i386) 725aa042c4bSkchow int restricted_kmemalloc = 0; 7267c478bd9Sstevel@tonic-gate #elif defined(__amd64) 7277c478bd9Sstevel@tonic-gate int restricted_kmemalloc = 0; 7287c478bd9Sstevel@tonic-gate #endif 7297c478bd9Sstevel@tonic-gate 7307c478bd9Sstevel@tonic-gate kmutex_t *fpc_mutex[NPC_MUTEX]; 7317c478bd9Sstevel@tonic-gate kmutex_t *cpc_mutex[NPC_MUTEX]; 7327c478bd9Sstevel@tonic-gate 7337c478bd9Sstevel@tonic-gate 7347c478bd9Sstevel@tonic-gate /* 7357c478bd9Sstevel@tonic-gate * return the memrange containing pfn 7367c478bd9Sstevel@tonic-gate */ 7377c478bd9Sstevel@tonic-gate int 7387c478bd9Sstevel@tonic-gate memrange_num(pfn_t pfn) 7397c478bd9Sstevel@tonic-gate { 7407c478bd9Sstevel@tonic-gate int n; 7417c478bd9Sstevel@tonic-gate 7427c478bd9Sstevel@tonic-gate for (n = 0; n < nranges - 1; ++n) { 7437c478bd9Sstevel@tonic-gate if (pfn >= memranges[n]) 7447c478bd9Sstevel@tonic-gate break; 7457c478bd9Sstevel@tonic-gate } 7467c478bd9Sstevel@tonic-gate return (n); 7477c478bd9Sstevel@tonic-gate } 7487c478bd9Sstevel@tonic-gate 7497c478bd9Sstevel@tonic-gate /* 7507c478bd9Sstevel@tonic-gate * return the mnoderange containing pfn 7517c478bd9Sstevel@tonic-gate */ 7527c478bd9Sstevel@tonic-gate int 7537c478bd9Sstevel@tonic-gate pfn_2_mtype(pfn_t pfn) 7547c478bd9Sstevel@tonic-gate { 7557c478bd9Sstevel@tonic-gate int n; 7567c478bd9Sstevel@tonic-gate 7577c478bd9Sstevel@tonic-gate for (n = mnoderangecnt - 1; n >= 0; n--) { 7587c478bd9Sstevel@tonic-gate if (pfn >= mnoderanges[n].mnr_pfnlo) { 7597c478bd9Sstevel@tonic-gate break; 7607c478bd9Sstevel@tonic-gate } 7617c478bd9Sstevel@tonic-gate } 7627c478bd9Sstevel@tonic-gate return (n); 7637c478bd9Sstevel@tonic-gate } 7647c478bd9Sstevel@tonic-gate 7657c478bd9Sstevel@tonic-gate /* 7667c478bd9Sstevel@tonic-gate * is_contigpage_free: 7677c478bd9Sstevel@tonic-gate * returns a page list of contiguous pages. It minimally has to return 7687c478bd9Sstevel@tonic-gate * minctg pages. Caller determines minctg based on the scatter-gather 7697c478bd9Sstevel@tonic-gate * list length. 7707c478bd9Sstevel@tonic-gate * 7717c478bd9Sstevel@tonic-gate * pfnp is set to the next page frame to search on return. 7727c478bd9Sstevel@tonic-gate */ 7737c478bd9Sstevel@tonic-gate static page_t * 7747c478bd9Sstevel@tonic-gate is_contigpage_free( 7757c478bd9Sstevel@tonic-gate pfn_t *pfnp, 7767c478bd9Sstevel@tonic-gate pgcnt_t *pgcnt, 7777c478bd9Sstevel@tonic-gate pgcnt_t minctg, 7787c478bd9Sstevel@tonic-gate uint64_t pfnseg, 7797c478bd9Sstevel@tonic-gate int iolock) 7807c478bd9Sstevel@tonic-gate { 7817c478bd9Sstevel@tonic-gate int i = 0; 7827c478bd9Sstevel@tonic-gate pfn_t pfn = *pfnp; 7837c478bd9Sstevel@tonic-gate page_t *pp; 7847c478bd9Sstevel@tonic-gate page_t *plist = NULL; 7857c478bd9Sstevel@tonic-gate 7867c478bd9Sstevel@tonic-gate /* 7877c478bd9Sstevel@tonic-gate * fail if pfn + minctg crosses a segment boundary. 7887c478bd9Sstevel@tonic-gate * Adjust for next starting pfn to begin at segment boundary. 7897c478bd9Sstevel@tonic-gate */ 7907c478bd9Sstevel@tonic-gate 7917c478bd9Sstevel@tonic-gate if (((*pfnp + minctg - 1) & pfnseg) < (*pfnp & pfnseg)) { 7927c478bd9Sstevel@tonic-gate *pfnp = roundup(*pfnp, pfnseg + 1); 7937c478bd9Sstevel@tonic-gate return (NULL); 7947c478bd9Sstevel@tonic-gate } 7957c478bd9Sstevel@tonic-gate 7967c478bd9Sstevel@tonic-gate do { 7977c478bd9Sstevel@tonic-gate retry: 7987c478bd9Sstevel@tonic-gate pp = page_numtopp_nolock(pfn + i); 7997c478bd9Sstevel@tonic-gate if ((pp == NULL) || 8007c478bd9Sstevel@tonic-gate (page_trylock(pp, SE_EXCL) == 0)) { 8017c478bd9Sstevel@tonic-gate (*pfnp)++; 8027c478bd9Sstevel@tonic-gate break; 8037c478bd9Sstevel@tonic-gate } 8047c478bd9Sstevel@tonic-gate if (page_pptonum(pp) != pfn + i) { 8057c478bd9Sstevel@tonic-gate page_unlock(pp); 8067c478bd9Sstevel@tonic-gate goto retry; 8077c478bd9Sstevel@tonic-gate } 8087c478bd9Sstevel@tonic-gate 8097c478bd9Sstevel@tonic-gate if (!(PP_ISFREE(pp))) { 8107c478bd9Sstevel@tonic-gate page_unlock(pp); 8117c478bd9Sstevel@tonic-gate (*pfnp)++; 8127c478bd9Sstevel@tonic-gate break; 8137c478bd9Sstevel@tonic-gate } 8147c478bd9Sstevel@tonic-gate 8157c478bd9Sstevel@tonic-gate if (!PP_ISAGED(pp)) { 8167c478bd9Sstevel@tonic-gate page_list_sub(pp, PG_CACHE_LIST); 8177c478bd9Sstevel@tonic-gate page_hashout(pp, (kmutex_t *)NULL); 8187c478bd9Sstevel@tonic-gate } else { 8197c478bd9Sstevel@tonic-gate page_list_sub(pp, PG_FREE_LIST); 8207c478bd9Sstevel@tonic-gate } 8217c478bd9Sstevel@tonic-gate 8227c478bd9Sstevel@tonic-gate if (iolock) 8237c478bd9Sstevel@tonic-gate page_io_lock(pp); 8247c478bd9Sstevel@tonic-gate page_list_concat(&plist, &pp); 8257c478bd9Sstevel@tonic-gate 8267c478bd9Sstevel@tonic-gate /* 8277c478bd9Sstevel@tonic-gate * exit loop when pgcnt satisfied or segment boundary reached. 8287c478bd9Sstevel@tonic-gate */ 8297c478bd9Sstevel@tonic-gate 8307c478bd9Sstevel@tonic-gate } while ((++i < *pgcnt) && ((pfn + i) & pfnseg)); 8317c478bd9Sstevel@tonic-gate 8327c478bd9Sstevel@tonic-gate *pfnp += i; /* set to next pfn to search */ 8337c478bd9Sstevel@tonic-gate 8347c478bd9Sstevel@tonic-gate if (i >= minctg) { 8357c478bd9Sstevel@tonic-gate *pgcnt -= i; 8367c478bd9Sstevel@tonic-gate return (plist); 8377c478bd9Sstevel@tonic-gate } 8387c478bd9Sstevel@tonic-gate 8397c478bd9Sstevel@tonic-gate /* 8407c478bd9Sstevel@tonic-gate * failure: minctg not satisfied. 8417c478bd9Sstevel@tonic-gate * 8427c478bd9Sstevel@tonic-gate * if next request crosses segment boundary, set next pfn 8437c478bd9Sstevel@tonic-gate * to search from the segment boundary. 8447c478bd9Sstevel@tonic-gate */ 8457c478bd9Sstevel@tonic-gate if (((*pfnp + minctg - 1) & pfnseg) < (*pfnp & pfnseg)) 8467c478bd9Sstevel@tonic-gate *pfnp = roundup(*pfnp, pfnseg + 1); 8477c478bd9Sstevel@tonic-gate 8487c478bd9Sstevel@tonic-gate /* clean up any pages already allocated */ 8497c478bd9Sstevel@tonic-gate 8507c478bd9Sstevel@tonic-gate while (plist) { 8517c478bd9Sstevel@tonic-gate pp = plist; 8527c478bd9Sstevel@tonic-gate page_sub(&plist, pp); 8537c478bd9Sstevel@tonic-gate page_list_add(pp, PG_FREE_LIST | PG_LIST_TAIL); 8547c478bd9Sstevel@tonic-gate if (iolock) 8557c478bd9Sstevel@tonic-gate page_io_unlock(pp); 8567c478bd9Sstevel@tonic-gate page_unlock(pp); 8577c478bd9Sstevel@tonic-gate } 8587c478bd9Sstevel@tonic-gate 8597c478bd9Sstevel@tonic-gate return (NULL); 8607c478bd9Sstevel@tonic-gate } 8617c478bd9Sstevel@tonic-gate 8627c478bd9Sstevel@tonic-gate /* 8637c478bd9Sstevel@tonic-gate * verify that pages being returned from allocator have correct DMA attribute 8647c478bd9Sstevel@tonic-gate */ 8657c478bd9Sstevel@tonic-gate #ifndef DEBUG 8667c478bd9Sstevel@tonic-gate #define check_dma(a, b, c) (0) 8677c478bd9Sstevel@tonic-gate #else 8687c478bd9Sstevel@tonic-gate static void 8697c478bd9Sstevel@tonic-gate check_dma(ddi_dma_attr_t *dma_attr, page_t *pp, int cnt) 8707c478bd9Sstevel@tonic-gate { 8717c478bd9Sstevel@tonic-gate if (dma_attr == NULL) 8727c478bd9Sstevel@tonic-gate return; 8737c478bd9Sstevel@tonic-gate 8747c478bd9Sstevel@tonic-gate while (cnt-- > 0) { 8757c478bd9Sstevel@tonic-gate if (mmu_ptob((uint64_t)pp->p_pagenum) < 8767c478bd9Sstevel@tonic-gate dma_attr->dma_attr_addr_lo) 8777c478bd9Sstevel@tonic-gate panic("PFN (pp=%p) below dma_attr_addr_lo", pp); 8787c478bd9Sstevel@tonic-gate if (mmu_ptob((uint64_t)pp->p_pagenum) >= 8797c478bd9Sstevel@tonic-gate dma_attr->dma_attr_addr_hi) 8807c478bd9Sstevel@tonic-gate panic("PFN (pp=%p) above dma_attr_addr_hi", pp); 8817c478bd9Sstevel@tonic-gate pp = pp->p_next; 8827c478bd9Sstevel@tonic-gate } 8837c478bd9Sstevel@tonic-gate } 8847c478bd9Sstevel@tonic-gate #endif 8857c478bd9Sstevel@tonic-gate 8867c478bd9Sstevel@tonic-gate static kmutex_t contig_lock; 8877c478bd9Sstevel@tonic-gate 8887c478bd9Sstevel@tonic-gate #define CONTIG_LOCK() mutex_enter(&contig_lock); 8897c478bd9Sstevel@tonic-gate #define CONTIG_UNLOCK() mutex_exit(&contig_lock); 8907c478bd9Sstevel@tonic-gate 8917c478bd9Sstevel@tonic-gate #define PFN_16M (mmu_btop((uint64_t)0x1000000)) 8927c478bd9Sstevel@tonic-gate 8937c478bd9Sstevel@tonic-gate static page_t * 8947c478bd9Sstevel@tonic-gate page_get_contigpage(pgcnt_t *pgcnt, ddi_dma_attr_t *mattr, int iolock) 8957c478bd9Sstevel@tonic-gate { 8967c478bd9Sstevel@tonic-gate pfn_t pfn; 8977c478bd9Sstevel@tonic-gate int sgllen; 8987c478bd9Sstevel@tonic-gate uint64_t pfnseg; 8997c478bd9Sstevel@tonic-gate pgcnt_t minctg; 9007c478bd9Sstevel@tonic-gate page_t *pplist = NULL, *plist; 9017c478bd9Sstevel@tonic-gate uint64_t lo, hi; 9027c478bd9Sstevel@tonic-gate pgcnt_t pfnalign = 0; 9037c478bd9Sstevel@tonic-gate static pfn_t startpfn; 9047c478bd9Sstevel@tonic-gate static pgcnt_t lastctgcnt; 9057c478bd9Sstevel@tonic-gate uintptr_t align; 9067c478bd9Sstevel@tonic-gate 9077c478bd9Sstevel@tonic-gate CONTIG_LOCK(); 9087c478bd9Sstevel@tonic-gate 9097c478bd9Sstevel@tonic-gate if (mattr) { 9107c478bd9Sstevel@tonic-gate lo = mmu_btop((mattr->dma_attr_addr_lo + MMU_PAGEOFFSET)); 9117c478bd9Sstevel@tonic-gate hi = mmu_btop(mattr->dma_attr_addr_hi); 9127c478bd9Sstevel@tonic-gate if (hi >= physmax) 9137c478bd9Sstevel@tonic-gate hi = physmax - 1; 9147c478bd9Sstevel@tonic-gate sgllen = mattr->dma_attr_sgllen; 9157c478bd9Sstevel@tonic-gate pfnseg = mmu_btop(mattr->dma_attr_seg); 9167c478bd9Sstevel@tonic-gate 9177c478bd9Sstevel@tonic-gate align = maxbit(mattr->dma_attr_align, mattr->dma_attr_minxfer); 9187c478bd9Sstevel@tonic-gate if (align > MMU_PAGESIZE) 9197c478bd9Sstevel@tonic-gate pfnalign = mmu_btop(align); 9207c478bd9Sstevel@tonic-gate 9217c478bd9Sstevel@tonic-gate /* 9227c478bd9Sstevel@tonic-gate * in order to satisfy the request, must minimally 9237c478bd9Sstevel@tonic-gate * acquire minctg contiguous pages 9247c478bd9Sstevel@tonic-gate */ 9257c478bd9Sstevel@tonic-gate minctg = howmany(*pgcnt, sgllen); 9267c478bd9Sstevel@tonic-gate 9277c478bd9Sstevel@tonic-gate ASSERT(hi >= lo); 9287c478bd9Sstevel@tonic-gate 9297c478bd9Sstevel@tonic-gate /* 9307c478bd9Sstevel@tonic-gate * start from where last searched if the minctg >= lastctgcnt 9317c478bd9Sstevel@tonic-gate */ 9327c478bd9Sstevel@tonic-gate if (minctg < lastctgcnt || startpfn < lo || startpfn > hi) 9337c478bd9Sstevel@tonic-gate startpfn = lo; 9347c478bd9Sstevel@tonic-gate } else { 9357c478bd9Sstevel@tonic-gate hi = physmax - 1; 9367c478bd9Sstevel@tonic-gate lo = 0; 9377c478bd9Sstevel@tonic-gate sgllen = 1; 9387c478bd9Sstevel@tonic-gate pfnseg = mmu.highest_pfn; 9397c478bd9Sstevel@tonic-gate minctg = *pgcnt; 9407c478bd9Sstevel@tonic-gate 9417c478bd9Sstevel@tonic-gate if (minctg < lastctgcnt) 9427c478bd9Sstevel@tonic-gate startpfn = lo; 9437c478bd9Sstevel@tonic-gate } 9447c478bd9Sstevel@tonic-gate lastctgcnt = minctg; 9457c478bd9Sstevel@tonic-gate 9467c478bd9Sstevel@tonic-gate ASSERT(pfnseg + 1 >= (uint64_t)minctg); 9477c478bd9Sstevel@tonic-gate 9487c478bd9Sstevel@tonic-gate /* conserve 16m memory - start search above 16m when possible */ 9497c478bd9Sstevel@tonic-gate if (hi > PFN_16M && startpfn < PFN_16M) 9507c478bd9Sstevel@tonic-gate startpfn = PFN_16M; 9517c478bd9Sstevel@tonic-gate 9527c478bd9Sstevel@tonic-gate pfn = startpfn; 9537c478bd9Sstevel@tonic-gate if (pfnalign) 9547c478bd9Sstevel@tonic-gate pfn = P2ROUNDUP(pfn, pfnalign); 9557c478bd9Sstevel@tonic-gate 9567c478bd9Sstevel@tonic-gate while (pfn + minctg - 1 <= hi) { 9577c478bd9Sstevel@tonic-gate 9587c478bd9Sstevel@tonic-gate plist = is_contigpage_free(&pfn, pgcnt, minctg, pfnseg, iolock); 9597c478bd9Sstevel@tonic-gate if (plist) { 9607c478bd9Sstevel@tonic-gate page_list_concat(&pplist, &plist); 9617c478bd9Sstevel@tonic-gate sgllen--; 9627c478bd9Sstevel@tonic-gate /* 9637c478bd9Sstevel@tonic-gate * return when contig pages no longer needed 9647c478bd9Sstevel@tonic-gate */ 9657c478bd9Sstevel@tonic-gate if (!*pgcnt || ((*pgcnt <= sgllen) && !pfnalign)) { 9667c478bd9Sstevel@tonic-gate startpfn = pfn; 9677c478bd9Sstevel@tonic-gate CONTIG_UNLOCK(); 9687c478bd9Sstevel@tonic-gate check_dma(mattr, pplist, *pgcnt); 9697c478bd9Sstevel@tonic-gate return (pplist); 9707c478bd9Sstevel@tonic-gate } 9717c478bd9Sstevel@tonic-gate minctg = howmany(*pgcnt, sgllen); 9727c478bd9Sstevel@tonic-gate } 9737c478bd9Sstevel@tonic-gate if (pfnalign) 9747c478bd9Sstevel@tonic-gate pfn = P2ROUNDUP(pfn, pfnalign); 9757c478bd9Sstevel@tonic-gate } 9767c478bd9Sstevel@tonic-gate 9777c478bd9Sstevel@tonic-gate /* cannot find contig pages in specified range */ 9787c478bd9Sstevel@tonic-gate if (startpfn == lo) { 9797c478bd9Sstevel@tonic-gate CONTIG_UNLOCK(); 9807c478bd9Sstevel@tonic-gate return (NULL); 9817c478bd9Sstevel@tonic-gate } 9827c478bd9Sstevel@tonic-gate 9837c478bd9Sstevel@tonic-gate /* did not start with lo previously */ 9847c478bd9Sstevel@tonic-gate pfn = lo; 9857c478bd9Sstevel@tonic-gate if (pfnalign) 9867c478bd9Sstevel@tonic-gate pfn = P2ROUNDUP(pfn, pfnalign); 9877c478bd9Sstevel@tonic-gate 9887c478bd9Sstevel@tonic-gate /* allow search to go above startpfn */ 9897c478bd9Sstevel@tonic-gate while (pfn < startpfn) { 9907c478bd9Sstevel@tonic-gate 9917c478bd9Sstevel@tonic-gate plist = is_contigpage_free(&pfn, pgcnt, minctg, pfnseg, iolock); 9927c478bd9Sstevel@tonic-gate if (plist != NULL) { 9937c478bd9Sstevel@tonic-gate 9947c478bd9Sstevel@tonic-gate page_list_concat(&pplist, &plist); 9957c478bd9Sstevel@tonic-gate sgllen--; 9967c478bd9Sstevel@tonic-gate 9977c478bd9Sstevel@tonic-gate /* 9987c478bd9Sstevel@tonic-gate * return when contig pages no longer needed 9997c478bd9Sstevel@tonic-gate */ 10007c478bd9Sstevel@tonic-gate if (!*pgcnt || ((*pgcnt <= sgllen) && !pfnalign)) { 10017c478bd9Sstevel@tonic-gate startpfn = pfn; 10027c478bd9Sstevel@tonic-gate CONTIG_UNLOCK(); 10037c478bd9Sstevel@tonic-gate check_dma(mattr, pplist, *pgcnt); 10047c478bd9Sstevel@tonic-gate return (pplist); 10057c478bd9Sstevel@tonic-gate } 10067c478bd9Sstevel@tonic-gate minctg = howmany(*pgcnt, sgllen); 10077c478bd9Sstevel@tonic-gate } 10087c478bd9Sstevel@tonic-gate if (pfnalign) 10097c478bd9Sstevel@tonic-gate pfn = P2ROUNDUP(pfn, pfnalign); 10107c478bd9Sstevel@tonic-gate } 10117c478bd9Sstevel@tonic-gate CONTIG_UNLOCK(); 10127c478bd9Sstevel@tonic-gate return (NULL); 10137c478bd9Sstevel@tonic-gate } 10147c478bd9Sstevel@tonic-gate 10157c478bd9Sstevel@tonic-gate /* 10167c478bd9Sstevel@tonic-gate * combine mem_node_config and memrange memory ranges into one data 10177c478bd9Sstevel@tonic-gate * structure to be used for page list management. 10187c478bd9Sstevel@tonic-gate * 10197c478bd9Sstevel@tonic-gate * mnode_range_cnt() calculates the number of memory ranges for mnode and 10207c478bd9Sstevel@tonic-gate * memranges[]. Used to determine the size of page lists and mnoderanges. 10217c478bd9Sstevel@tonic-gate * 10227c478bd9Sstevel@tonic-gate * mnode_range_setup() initializes mnoderanges. 10237c478bd9Sstevel@tonic-gate */ 10247c478bd9Sstevel@tonic-gate mnoderange_t *mnoderanges; 10257c478bd9Sstevel@tonic-gate int mnoderangecnt; 10267c478bd9Sstevel@tonic-gate int mtype4g; 10277c478bd9Sstevel@tonic-gate 10287c478bd9Sstevel@tonic-gate int 10295d07b933Sdp mnode_range_cnt(int mnode) 10307c478bd9Sstevel@tonic-gate { 10317c478bd9Sstevel@tonic-gate int mri; 10327c478bd9Sstevel@tonic-gate int mnrcnt = 0; 10337c478bd9Sstevel@tonic-gate 10345d07b933Sdp if (mem_node_config[mnode].exists != 0) { 10357c478bd9Sstevel@tonic-gate mri = nranges - 1; 10367c478bd9Sstevel@tonic-gate 10377c478bd9Sstevel@tonic-gate /* find the memranges index below contained in mnode range */ 10387c478bd9Sstevel@tonic-gate 10397c478bd9Sstevel@tonic-gate while (MEMRANGEHI(mri) < mem_node_config[mnode].physbase) 10407c478bd9Sstevel@tonic-gate mri--; 10417c478bd9Sstevel@tonic-gate 10427c478bd9Sstevel@tonic-gate /* 10437c478bd9Sstevel@tonic-gate * increment mnode range counter when memranges or mnode 10447c478bd9Sstevel@tonic-gate * boundary is reached. 10457c478bd9Sstevel@tonic-gate */ 10467c478bd9Sstevel@tonic-gate while (mri >= 0 && 10477c478bd9Sstevel@tonic-gate mem_node_config[mnode].physmax >= MEMRANGELO(mri)) { 10487c478bd9Sstevel@tonic-gate mnrcnt++; 10497c478bd9Sstevel@tonic-gate if (mem_node_config[mnode].physmax > MEMRANGEHI(mri)) 10507c478bd9Sstevel@tonic-gate mri--; 10517c478bd9Sstevel@tonic-gate else 10527c478bd9Sstevel@tonic-gate break; 10537c478bd9Sstevel@tonic-gate } 10547c478bd9Sstevel@tonic-gate } 10555d07b933Sdp ASSERT(mnrcnt <= MAX_MNODE_MRANGES); 10567c478bd9Sstevel@tonic-gate return (mnrcnt); 10577c478bd9Sstevel@tonic-gate } 10587c478bd9Sstevel@tonic-gate 10597c478bd9Sstevel@tonic-gate void 10607c478bd9Sstevel@tonic-gate mnode_range_setup(mnoderange_t *mnoderanges) 10617c478bd9Sstevel@tonic-gate { 10627c478bd9Sstevel@tonic-gate int mnode, mri; 10637c478bd9Sstevel@tonic-gate 10647c478bd9Sstevel@tonic-gate for (mnode = 0; mnode < max_mem_nodes; mnode++) { 10657c478bd9Sstevel@tonic-gate if (mem_node_config[mnode].exists == 0) 10667c478bd9Sstevel@tonic-gate continue; 10677c478bd9Sstevel@tonic-gate 10687c478bd9Sstevel@tonic-gate mri = nranges - 1; 10697c478bd9Sstevel@tonic-gate 10707c478bd9Sstevel@tonic-gate while (MEMRANGEHI(mri) < mem_node_config[mnode].physbase) 10717c478bd9Sstevel@tonic-gate mri--; 10727c478bd9Sstevel@tonic-gate 10737c478bd9Sstevel@tonic-gate while (mri >= 0 && mem_node_config[mnode].physmax >= 10747c478bd9Sstevel@tonic-gate MEMRANGELO(mri)) { 10757c478bd9Sstevel@tonic-gate mnoderanges->mnr_pfnlo = 10767c478bd9Sstevel@tonic-gate MAX(MEMRANGELO(mri), 10777c478bd9Sstevel@tonic-gate mem_node_config[mnode].physbase); 10787c478bd9Sstevel@tonic-gate mnoderanges->mnr_pfnhi = 10797c478bd9Sstevel@tonic-gate MIN(MEMRANGEHI(mri), 10807c478bd9Sstevel@tonic-gate mem_node_config[mnode].physmax); 10817c478bd9Sstevel@tonic-gate mnoderanges->mnr_mnode = mnode; 10827c478bd9Sstevel@tonic-gate mnoderanges->mnr_memrange = mri; 10837c478bd9Sstevel@tonic-gate mnoderanges++; 10847c478bd9Sstevel@tonic-gate if (mem_node_config[mnode].physmax > MEMRANGEHI(mri)) 10857c478bd9Sstevel@tonic-gate mri--; 10867c478bd9Sstevel@tonic-gate else 10877c478bd9Sstevel@tonic-gate break; 10887c478bd9Sstevel@tonic-gate } 10897c478bd9Sstevel@tonic-gate } 10907c478bd9Sstevel@tonic-gate } 10917c478bd9Sstevel@tonic-gate 10927c478bd9Sstevel@tonic-gate /* 10937c478bd9Sstevel@tonic-gate * Determine if the mnode range specified in mtype contains memory belonging 10947c478bd9Sstevel@tonic-gate * to memory node mnode. If flags & PGI_MT_RANGE is set then mtype contains 109507ad560dSkchow * the range of indices from high pfn to 0, 16m or 4g. 10967c478bd9Sstevel@tonic-gate * 10977c478bd9Sstevel@tonic-gate * Return first mnode range type index found otherwise return -1 if none found. 10987c478bd9Sstevel@tonic-gate */ 10997c478bd9Sstevel@tonic-gate int 11007c478bd9Sstevel@tonic-gate mtype_func(int mnode, int mtype, uint_t flags) 11017c478bd9Sstevel@tonic-gate { 11027c478bd9Sstevel@tonic-gate if (flags & PGI_MT_RANGE) { 110307ad560dSkchow int mtlim; 11047c478bd9Sstevel@tonic-gate 11057c478bd9Sstevel@tonic-gate if (flags & PGI_MT_NEXT) 11067c478bd9Sstevel@tonic-gate mtype--; 110707ad560dSkchow if (flags & PGI_MT_RANGE0) 110807ad560dSkchow mtlim = 0; 110907ad560dSkchow else if (flags & PGI_MT_RANGE4G) 111007ad560dSkchow mtlim = mtype4g + 1; /* exclude 0-4g range */ 111107ad560dSkchow else if (flags & PGI_MT_RANGE16M) 111207ad560dSkchow mtlim = 1; /* exclude 0-16m range */ 11137c478bd9Sstevel@tonic-gate while (mtype >= mtlim) { 11147c478bd9Sstevel@tonic-gate if (mnoderanges[mtype].mnr_mnode == mnode) 11157c478bd9Sstevel@tonic-gate return (mtype); 11167c478bd9Sstevel@tonic-gate mtype--; 11177c478bd9Sstevel@tonic-gate } 11187c478bd9Sstevel@tonic-gate } else { 11197c478bd9Sstevel@tonic-gate if (mnoderanges[mtype].mnr_mnode == mnode) 11207c478bd9Sstevel@tonic-gate return (mtype); 11217c478bd9Sstevel@tonic-gate } 11227c478bd9Sstevel@tonic-gate return (-1); 11237c478bd9Sstevel@tonic-gate } 11247c478bd9Sstevel@tonic-gate 1125e21bae1bSkchow /* 1126e21bae1bSkchow * Update the page list max counts with the pfn range specified by the 1127e21bae1bSkchow * input parameters. Called from add_physmem() when physical memory with 1128e21bae1bSkchow * page_t's are initially added to the page lists. 1129e21bae1bSkchow */ 1130e21bae1bSkchow void 1131e21bae1bSkchow mtype_modify_max(pfn_t startpfn, long cnt) 1132e21bae1bSkchow { 1133e21bae1bSkchow int mtype = 0; 1134e21bae1bSkchow pfn_t endpfn = startpfn + cnt, pfn; 1135e21bae1bSkchow pgcnt_t inc; 1136e21bae1bSkchow 1137e21bae1bSkchow ASSERT(cnt > 0); 1138e21bae1bSkchow 1139e21bae1bSkchow for (pfn = startpfn; pfn < endpfn; ) { 1140e21bae1bSkchow if (pfn <= mnoderanges[mtype].mnr_pfnhi) { 1141e21bae1bSkchow if (endpfn < mnoderanges[mtype].mnr_pfnhi) { 1142e21bae1bSkchow inc = endpfn - pfn; 1143e21bae1bSkchow } else { 1144e21bae1bSkchow inc = mnoderanges[mtype].mnr_pfnhi - pfn + 1; 1145e21bae1bSkchow } 1146e21bae1bSkchow mnoderanges[mtype].mnr_mt_pgmax += inc; 1147e21bae1bSkchow if (physmax4g && mtype <= mtype4g) 1148e21bae1bSkchow maxmem4g += inc; 1149e21bae1bSkchow pfn += inc; 1150e21bae1bSkchow } 1151e21bae1bSkchow mtype++; 1152e21bae1bSkchow ASSERT(mtype < mnoderangecnt || pfn >= endpfn); 1153e21bae1bSkchow } 1154e21bae1bSkchow } 1155e21bae1bSkchow 1156affbd3ccSkchow /* 1157affbd3ccSkchow * Returns the free page count for mnode 1158affbd3ccSkchow */ 1159affbd3ccSkchow int 1160affbd3ccSkchow mnode_pgcnt(int mnode) 1161affbd3ccSkchow { 1162affbd3ccSkchow int mtype = mnoderangecnt - 1; 1163affbd3ccSkchow int flags = PGI_MT_RANGE0; 1164affbd3ccSkchow pgcnt_t pgcnt = 0; 1165affbd3ccSkchow 1166affbd3ccSkchow mtype = mtype_func(mnode, mtype, flags); 1167affbd3ccSkchow 1168affbd3ccSkchow while (mtype != -1) { 116907ad560dSkchow pgcnt += MTYPE_FREEMEM(mtype); 1170affbd3ccSkchow mtype = mtype_func(mnode, mtype, flags | PGI_MT_NEXT); 1171affbd3ccSkchow } 1172affbd3ccSkchow return (pgcnt); 1173affbd3ccSkchow } 1174affbd3ccSkchow 11757c478bd9Sstevel@tonic-gate /* 11767c478bd9Sstevel@tonic-gate * Initialize page coloring variables based on the l2 cache parameters. 11777c478bd9Sstevel@tonic-gate * Calculate and return memory needed for page coloring data structures. 11787c478bd9Sstevel@tonic-gate */ 11797c478bd9Sstevel@tonic-gate size_t 11807c478bd9Sstevel@tonic-gate page_coloring_init(uint_t l2_sz, int l2_linesz, int l2_assoc) 11817c478bd9Sstevel@tonic-gate { 11827c478bd9Sstevel@tonic-gate size_t colorsz = 0; 11837c478bd9Sstevel@tonic-gate int i; 11847c478bd9Sstevel@tonic-gate int colors; 11857c478bd9Sstevel@tonic-gate 11867c478bd9Sstevel@tonic-gate /* 11877c478bd9Sstevel@tonic-gate * Reduce the memory ranges lists if we don't have large amounts 11887c478bd9Sstevel@tonic-gate * of memory. This avoids searching known empty free lists. 11897c478bd9Sstevel@tonic-gate */ 11907c478bd9Sstevel@tonic-gate i = memrange_num(physmax); 11917c478bd9Sstevel@tonic-gate memranges += i; 11927c478bd9Sstevel@tonic-gate nranges -= i; 11937c478bd9Sstevel@tonic-gate #if defined(__i386) 11947c478bd9Sstevel@tonic-gate if (i > 0) 11957c478bd9Sstevel@tonic-gate restricted_kmemalloc = 0; 11967c478bd9Sstevel@tonic-gate #endif 11977c478bd9Sstevel@tonic-gate /* physmax greater than 4g */ 11987c478bd9Sstevel@tonic-gate if (i == 0) 11997c478bd9Sstevel@tonic-gate physmax4g = 1; 12007c478bd9Sstevel@tonic-gate 12017c478bd9Sstevel@tonic-gate ASSERT(ISP2(l2_sz)); 12027c478bd9Sstevel@tonic-gate ASSERT(ISP2(l2_linesz)); 12037c478bd9Sstevel@tonic-gate ASSERT(l2_sz > MMU_PAGESIZE); 12047c478bd9Sstevel@tonic-gate 12057c478bd9Sstevel@tonic-gate /* l2_assoc is 0 for fully associative l2 cache */ 12067c478bd9Sstevel@tonic-gate if (l2_assoc) 12077c478bd9Sstevel@tonic-gate l2_colors = MAX(1, l2_sz / (l2_assoc * MMU_PAGESIZE)); 12087c478bd9Sstevel@tonic-gate else 12097c478bd9Sstevel@tonic-gate l2_colors = 1; 12107c478bd9Sstevel@tonic-gate 12117c478bd9Sstevel@tonic-gate /* for scalability, configure at least PAGE_COLORS_MIN color bins */ 12127c478bd9Sstevel@tonic-gate page_colors = MAX(l2_colors, PAGE_COLORS_MIN); 12137c478bd9Sstevel@tonic-gate 12147c478bd9Sstevel@tonic-gate /* 12157c478bd9Sstevel@tonic-gate * cpu_page_colors is non-zero when a page color may be spread across 12167c478bd9Sstevel@tonic-gate * multiple bins. 12177c478bd9Sstevel@tonic-gate */ 12187c478bd9Sstevel@tonic-gate if (l2_colors < page_colors) 12197c478bd9Sstevel@tonic-gate cpu_page_colors = l2_colors; 12207c478bd9Sstevel@tonic-gate 12217c478bd9Sstevel@tonic-gate ASSERT(ISP2(page_colors)); 12227c478bd9Sstevel@tonic-gate 12237c478bd9Sstevel@tonic-gate page_colors_mask = page_colors - 1; 12247c478bd9Sstevel@tonic-gate 12257c478bd9Sstevel@tonic-gate ASSERT(ISP2(CPUSETSIZE())); 12267c478bd9Sstevel@tonic-gate page_coloring_shift = lowbit(CPUSETSIZE()); 12277c478bd9Sstevel@tonic-gate 12285d07b933Sdp /* initialize number of colors per page size */ 12295d07b933Sdp for (i = 0; i <= mmu.max_page_level; i++) { 12305d07b933Sdp hw_page_array[i].hp_size = LEVEL_SIZE(i); 12315d07b933Sdp hw_page_array[i].hp_shift = LEVEL_SHIFT(i); 12325d07b933Sdp hw_page_array[i].hp_pgcnt = LEVEL_SIZE(i) >> LEVEL_SHIFT(0); 12335d07b933Sdp hw_page_array[i].hp_colors = (page_colors_mask >> 12345d07b933Sdp (hw_page_array[i].hp_shift - hw_page_array[0].hp_shift)) 12355d07b933Sdp + 1; 12365d07b933Sdp } 12375d07b933Sdp 12385d07b933Sdp /* 12395d07b933Sdp * The value of cpu_page_colors determines if additional color bins 12405d07b933Sdp * need to be checked for a particular color in the page_get routines. 12415d07b933Sdp */ 12425d07b933Sdp if (cpu_page_colors != 0) { 12435d07b933Sdp 12445d07b933Sdp int a = lowbit(page_colors) - lowbit(cpu_page_colors); 12455d07b933Sdp ASSERT(a > 0); 12465d07b933Sdp ASSERT(a < 16); 12475d07b933Sdp 12485d07b933Sdp for (i = 0; i <= mmu.max_page_level; i++) { 12495d07b933Sdp if ((colors = hw_page_array[i].hp_colors) <= 1) { 12505d07b933Sdp colorequivszc[i] = 0; 12515d07b933Sdp continue; 12525d07b933Sdp } 12535d07b933Sdp while ((colors >> a) == 0) 12545d07b933Sdp a--; 12555d07b933Sdp ASSERT(a >= 0); 12565d07b933Sdp 12575d07b933Sdp /* higher 4 bits encodes color equiv mask */ 12585d07b933Sdp colorequivszc[i] = (a << 4); 12595d07b933Sdp } 12605d07b933Sdp } 12615d07b933Sdp 12625d07b933Sdp /* factor in colorequiv to check additional 'equivalent' bins. */ 12635d07b933Sdp if (colorequiv > 1) { 12645d07b933Sdp 12655d07b933Sdp int a = lowbit(colorequiv) - 1; 12665d07b933Sdp if (a > 15) 12675d07b933Sdp a = 15; 12685d07b933Sdp 12695d07b933Sdp for (i = 0; i <= mmu.max_page_level; i++) { 12705d07b933Sdp if ((colors = hw_page_array[i].hp_colors) <= 1) { 12715d07b933Sdp continue; 12725d07b933Sdp } 12735d07b933Sdp while ((colors >> a) == 0) 12745d07b933Sdp a--; 12755d07b933Sdp if ((a << 4) > colorequivszc[i]) { 12765d07b933Sdp colorequivszc[i] = (a << 4); 12775d07b933Sdp } 12785d07b933Sdp } 12795d07b933Sdp } 12805d07b933Sdp 12817c478bd9Sstevel@tonic-gate /* size for mnoderanges */ 12825d07b933Sdp for (mnoderangecnt = 0, i = 0; i < max_mem_nodes; i++) 12835d07b933Sdp mnoderangecnt += mnode_range_cnt(i); 12847c478bd9Sstevel@tonic-gate colorsz = mnoderangecnt * sizeof (mnoderange_t); 12857c478bd9Sstevel@tonic-gate 12867c478bd9Sstevel@tonic-gate /* size for fpc_mutex and cpc_mutex */ 12877c478bd9Sstevel@tonic-gate colorsz += (2 * max_mem_nodes * sizeof (kmutex_t) * NPC_MUTEX); 12887c478bd9Sstevel@tonic-gate 12897c478bd9Sstevel@tonic-gate /* size of page_freelists */ 12907c478bd9Sstevel@tonic-gate colorsz += mnoderangecnt * sizeof (page_t ***); 12917c478bd9Sstevel@tonic-gate colorsz += mnoderangecnt * mmu_page_sizes * sizeof (page_t **); 12927c478bd9Sstevel@tonic-gate 12937c478bd9Sstevel@tonic-gate for (i = 0; i < mmu_page_sizes; i++) { 12947c478bd9Sstevel@tonic-gate colors = page_get_pagecolors(i); 12957c478bd9Sstevel@tonic-gate colorsz += mnoderangecnt * colors * sizeof (page_t *); 12967c478bd9Sstevel@tonic-gate } 12977c478bd9Sstevel@tonic-gate 12987c478bd9Sstevel@tonic-gate /* size of page_cachelists */ 12997c478bd9Sstevel@tonic-gate colorsz += mnoderangecnt * sizeof (page_t **); 13007c478bd9Sstevel@tonic-gate colorsz += mnoderangecnt * page_colors * sizeof (page_t *); 13017c478bd9Sstevel@tonic-gate 13027c478bd9Sstevel@tonic-gate return (colorsz); 13037c478bd9Sstevel@tonic-gate } 13047c478bd9Sstevel@tonic-gate 13057c478bd9Sstevel@tonic-gate /* 13067c478bd9Sstevel@tonic-gate * Called once at startup to configure page_coloring data structures and 13077c478bd9Sstevel@tonic-gate * does the 1st page_free()/page_freelist_add(). 13087c478bd9Sstevel@tonic-gate */ 13097c478bd9Sstevel@tonic-gate void 13107c478bd9Sstevel@tonic-gate page_coloring_setup(caddr_t pcmemaddr) 13117c478bd9Sstevel@tonic-gate { 13127c478bd9Sstevel@tonic-gate int i; 13137c478bd9Sstevel@tonic-gate int j; 13147c478bd9Sstevel@tonic-gate int k; 13157c478bd9Sstevel@tonic-gate caddr_t addr; 13167c478bd9Sstevel@tonic-gate int colors; 13177c478bd9Sstevel@tonic-gate 13187c478bd9Sstevel@tonic-gate /* 13197c478bd9Sstevel@tonic-gate * do page coloring setup 13207c478bd9Sstevel@tonic-gate */ 13217c478bd9Sstevel@tonic-gate addr = pcmemaddr; 13227c478bd9Sstevel@tonic-gate 13237c478bd9Sstevel@tonic-gate mnoderanges = (mnoderange_t *)addr; 13247c478bd9Sstevel@tonic-gate addr += (mnoderangecnt * sizeof (mnoderange_t)); 13257c478bd9Sstevel@tonic-gate 13267c478bd9Sstevel@tonic-gate mnode_range_setup(mnoderanges); 13277c478bd9Sstevel@tonic-gate 13287c478bd9Sstevel@tonic-gate if (physmax4g) 13297c478bd9Sstevel@tonic-gate mtype4g = pfn_2_mtype(0xfffff); 13307c478bd9Sstevel@tonic-gate 13317c478bd9Sstevel@tonic-gate for (k = 0; k < NPC_MUTEX; k++) { 13327c478bd9Sstevel@tonic-gate fpc_mutex[k] = (kmutex_t *)addr; 13337c478bd9Sstevel@tonic-gate addr += (max_mem_nodes * sizeof (kmutex_t)); 13347c478bd9Sstevel@tonic-gate } 13357c478bd9Sstevel@tonic-gate for (k = 0; k < NPC_MUTEX; k++) { 13367c478bd9Sstevel@tonic-gate cpc_mutex[k] = (kmutex_t *)addr; 13377c478bd9Sstevel@tonic-gate addr += (max_mem_nodes * sizeof (kmutex_t)); 13387c478bd9Sstevel@tonic-gate } 13397c478bd9Sstevel@tonic-gate page_freelists = (page_t ****)addr; 13407c478bd9Sstevel@tonic-gate addr += (mnoderangecnt * sizeof (page_t ***)); 13417c478bd9Sstevel@tonic-gate 13427c478bd9Sstevel@tonic-gate page_cachelists = (page_t ***)addr; 13437c478bd9Sstevel@tonic-gate addr += (mnoderangecnt * sizeof (page_t **)); 13447c478bd9Sstevel@tonic-gate 13457c478bd9Sstevel@tonic-gate for (i = 0; i < mnoderangecnt; i++) { 13467c478bd9Sstevel@tonic-gate page_freelists[i] = (page_t ***)addr; 13477c478bd9Sstevel@tonic-gate addr += (mmu_page_sizes * sizeof (page_t **)); 13487c478bd9Sstevel@tonic-gate 13497c478bd9Sstevel@tonic-gate for (j = 0; j < mmu_page_sizes; j++) { 13507c478bd9Sstevel@tonic-gate colors = page_get_pagecolors(j); 13517c478bd9Sstevel@tonic-gate page_freelists[i][j] = (page_t **)addr; 13527c478bd9Sstevel@tonic-gate addr += (colors * sizeof (page_t *)); 13537c478bd9Sstevel@tonic-gate } 13547c478bd9Sstevel@tonic-gate page_cachelists[i] = (page_t **)addr; 13557c478bd9Sstevel@tonic-gate addr += (page_colors * sizeof (page_t *)); 13567c478bd9Sstevel@tonic-gate } 13577c478bd9Sstevel@tonic-gate } 13587c478bd9Sstevel@tonic-gate 13597c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 13607c478bd9Sstevel@tonic-gate int 13617c478bd9Sstevel@tonic-gate bp_color(struct buf *bp) 13627c478bd9Sstevel@tonic-gate { 13637c478bd9Sstevel@tonic-gate return (0); 13647c478bd9Sstevel@tonic-gate } 13657c478bd9Sstevel@tonic-gate 13667c478bd9Sstevel@tonic-gate /* 13677c478bd9Sstevel@tonic-gate * get a page from any list with the given mnode 13687c478bd9Sstevel@tonic-gate */ 13697c478bd9Sstevel@tonic-gate page_t * 13707c478bd9Sstevel@tonic-gate page_get_mnode_anylist(ulong_t origbin, uchar_t szc, uint_t flags, 13717c478bd9Sstevel@tonic-gate int mnode, int mtype, ddi_dma_attr_t *dma_attr) 13727c478bd9Sstevel@tonic-gate { 13735d07b933Sdp kmutex_t *pcm; 13745d07b933Sdp int i; 13755d07b933Sdp page_t *pp; 13765d07b933Sdp page_t *first_pp; 13775d07b933Sdp uint64_t pgaddr; 13785d07b933Sdp ulong_t bin; 13795d07b933Sdp int mtypestart; 13805d07b933Sdp int plw_initialized; 13815d07b933Sdp page_list_walker_t plw; 13827c478bd9Sstevel@tonic-gate 13837c478bd9Sstevel@tonic-gate VM_STAT_ADD(pga_vmstats.pgma_alloc); 13847c478bd9Sstevel@tonic-gate 13857c478bd9Sstevel@tonic-gate ASSERT((flags & PG_MATCH_COLOR) == 0); 13867c478bd9Sstevel@tonic-gate ASSERT(szc == 0); 13877c478bd9Sstevel@tonic-gate ASSERT(dma_attr != NULL); 13887c478bd9Sstevel@tonic-gate 13897c478bd9Sstevel@tonic-gate MTYPE_START(mnode, mtype, flags); 13907c478bd9Sstevel@tonic-gate if (mtype < 0) { 13917c478bd9Sstevel@tonic-gate VM_STAT_ADD(pga_vmstats.pgma_allocempty); 13927c478bd9Sstevel@tonic-gate return (NULL); 13937c478bd9Sstevel@tonic-gate } 13947c478bd9Sstevel@tonic-gate 13957c478bd9Sstevel@tonic-gate mtypestart = mtype; 13967c478bd9Sstevel@tonic-gate 13977c478bd9Sstevel@tonic-gate bin = origbin; 13987c478bd9Sstevel@tonic-gate 13997c478bd9Sstevel@tonic-gate /* 14007c478bd9Sstevel@tonic-gate * check up to page_colors + 1 bins - origbin may be checked twice 14017c478bd9Sstevel@tonic-gate * because of BIN_STEP skip 14027c478bd9Sstevel@tonic-gate */ 14037c478bd9Sstevel@tonic-gate do { 14045d07b933Sdp plw_initialized = 0; 14055d07b933Sdp 14065d07b933Sdp for (plw.plw_count = 0; 14075d07b933Sdp plw.plw_count < page_colors; plw.plw_count++) { 14085d07b933Sdp 14097c478bd9Sstevel@tonic-gate if (PAGE_FREELISTS(mnode, szc, bin, mtype) == NULL) 14107c478bd9Sstevel@tonic-gate goto nextfreebin; 14117c478bd9Sstevel@tonic-gate 14127c478bd9Sstevel@tonic-gate pcm = PC_BIN_MUTEX(mnode, bin, PG_FREE_LIST); 14137c478bd9Sstevel@tonic-gate mutex_enter(pcm); 14147c478bd9Sstevel@tonic-gate pp = PAGE_FREELISTS(mnode, szc, bin, mtype); 14157c478bd9Sstevel@tonic-gate first_pp = pp; 14167c478bd9Sstevel@tonic-gate while (pp != NULL) { 14177c478bd9Sstevel@tonic-gate if (page_trylock(pp, SE_EXCL) == 0) { 14187c478bd9Sstevel@tonic-gate pp = pp->p_next; 14197c478bd9Sstevel@tonic-gate if (pp == first_pp) { 14207c478bd9Sstevel@tonic-gate pp = NULL; 14217c478bd9Sstevel@tonic-gate } 14227c478bd9Sstevel@tonic-gate continue; 14237c478bd9Sstevel@tonic-gate } 14247c478bd9Sstevel@tonic-gate 14257c478bd9Sstevel@tonic-gate ASSERT(PP_ISFREE(pp)); 14267c478bd9Sstevel@tonic-gate ASSERT(PP_ISAGED(pp)); 14277c478bd9Sstevel@tonic-gate ASSERT(pp->p_vnode == NULL); 14287c478bd9Sstevel@tonic-gate ASSERT(pp->p_hash == NULL); 14297c478bd9Sstevel@tonic-gate ASSERT(pp->p_offset == (u_offset_t)-1); 14307c478bd9Sstevel@tonic-gate ASSERT(pp->p_szc == szc); 14317c478bd9Sstevel@tonic-gate ASSERT(PFN_2_MEM_NODE(pp->p_pagenum) == mnode); 14327c478bd9Sstevel@tonic-gate /* check if page within DMA attributes */ 14337c478bd9Sstevel@tonic-gate pgaddr = mmu_ptob((uint64_t)(pp->p_pagenum)); 14347c478bd9Sstevel@tonic-gate 14357c478bd9Sstevel@tonic-gate if ((pgaddr >= dma_attr->dma_attr_addr_lo) && 14367c478bd9Sstevel@tonic-gate (pgaddr + MMU_PAGESIZE - 1 <= 14377c478bd9Sstevel@tonic-gate dma_attr->dma_attr_addr_hi)) { 14387c478bd9Sstevel@tonic-gate break; 14397c478bd9Sstevel@tonic-gate } 14407c478bd9Sstevel@tonic-gate 14417c478bd9Sstevel@tonic-gate /* continue looking */ 14427c478bd9Sstevel@tonic-gate page_unlock(pp); 14437c478bd9Sstevel@tonic-gate pp = pp->p_next; 14447c478bd9Sstevel@tonic-gate if (pp == first_pp) 14457c478bd9Sstevel@tonic-gate pp = NULL; 14467c478bd9Sstevel@tonic-gate 14477c478bd9Sstevel@tonic-gate } 14487c478bd9Sstevel@tonic-gate if (pp != NULL) { 14497c478bd9Sstevel@tonic-gate ASSERT(mtype == PP_2_MTYPE(pp)); 14507c478bd9Sstevel@tonic-gate ASSERT(pp->p_szc == 0); 14517c478bd9Sstevel@tonic-gate 14527c478bd9Sstevel@tonic-gate /* found a page with specified DMA attributes */ 14537c478bd9Sstevel@tonic-gate page_sub(&PAGE_FREELISTS(mnode, szc, bin, 14547c478bd9Sstevel@tonic-gate mtype), pp); 1455affbd3ccSkchow page_ctr_sub(mnode, mtype, pp, PG_FREE_LIST); 14567c478bd9Sstevel@tonic-gate 14577c478bd9Sstevel@tonic-gate if ((PP_ISFREE(pp) == 0) || 14587c478bd9Sstevel@tonic-gate (PP_ISAGED(pp) == 0)) { 14597c478bd9Sstevel@tonic-gate cmn_err(CE_PANIC, "page %p is not free", 14607c478bd9Sstevel@tonic-gate (void *)pp); 14617c478bd9Sstevel@tonic-gate } 14627c478bd9Sstevel@tonic-gate 14637c478bd9Sstevel@tonic-gate mutex_exit(pcm); 14647c478bd9Sstevel@tonic-gate check_dma(dma_attr, pp, 1); 14657c478bd9Sstevel@tonic-gate VM_STAT_ADD(pga_vmstats.pgma_allocok); 14667c478bd9Sstevel@tonic-gate return (pp); 14677c478bd9Sstevel@tonic-gate } 14687c478bd9Sstevel@tonic-gate mutex_exit(pcm); 14697c478bd9Sstevel@tonic-gate nextfreebin: 14705d07b933Sdp if (plw_initialized == 0) { 14715d07b933Sdp page_list_walk_init(szc, 0, bin, 1, 0, &plw); 14725d07b933Sdp ASSERT(plw.plw_ceq_dif == page_colors); 14735d07b933Sdp plw_initialized = 1; 14745d07b933Sdp } 14757c478bd9Sstevel@tonic-gate 14765d07b933Sdp if (plw.plw_do_split) { 14775d07b933Sdp pp = page_freelist_split(szc, bin, mnode, 14785d07b933Sdp mtype, 14795d07b933Sdp mmu_btop(dma_attr->dma_attr_addr_hi + 1), 14805d07b933Sdp &plw); 14815d07b933Sdp if (pp != NULL) 14825d07b933Sdp return (pp); 14835d07b933Sdp } 14845d07b933Sdp 14855d07b933Sdp bin = page_list_walk_next_bin(szc, bin, &plw); 14867c478bd9Sstevel@tonic-gate } 14875d07b933Sdp 1488affbd3ccSkchow MTYPE_NEXT(mnode, mtype, flags); 1489affbd3ccSkchow } while (mtype >= 0); 14907c478bd9Sstevel@tonic-gate 14917c478bd9Sstevel@tonic-gate /* failed to find a page in the freelist; try it in the cachelist */ 14927c478bd9Sstevel@tonic-gate 14937c478bd9Sstevel@tonic-gate /* reset mtype start for cachelist search */ 14947c478bd9Sstevel@tonic-gate mtype = mtypestart; 14957c478bd9Sstevel@tonic-gate ASSERT(mtype >= 0); 14967c478bd9Sstevel@tonic-gate 14977c478bd9Sstevel@tonic-gate /* start with the bin of matching color */ 14987c478bd9Sstevel@tonic-gate bin = origbin; 14997c478bd9Sstevel@tonic-gate 15007c478bd9Sstevel@tonic-gate do { 15017c478bd9Sstevel@tonic-gate for (i = 0; i <= page_colors; i++) { 15027c478bd9Sstevel@tonic-gate if (PAGE_CACHELISTS(mnode, bin, mtype) == NULL) 15037c478bd9Sstevel@tonic-gate goto nextcachebin; 15047c478bd9Sstevel@tonic-gate pcm = PC_BIN_MUTEX(mnode, bin, PG_CACHE_LIST); 15057c478bd9Sstevel@tonic-gate mutex_enter(pcm); 15067c478bd9Sstevel@tonic-gate pp = PAGE_CACHELISTS(mnode, bin, mtype); 15077c478bd9Sstevel@tonic-gate first_pp = pp; 15087c478bd9Sstevel@tonic-gate while (pp != NULL) { 15097c478bd9Sstevel@tonic-gate if (page_trylock(pp, SE_EXCL) == 0) { 15107c478bd9Sstevel@tonic-gate pp = pp->p_next; 15117c478bd9Sstevel@tonic-gate if (pp == first_pp) 15127c478bd9Sstevel@tonic-gate break; 15137c478bd9Sstevel@tonic-gate continue; 15147c478bd9Sstevel@tonic-gate } 15157c478bd9Sstevel@tonic-gate ASSERT(pp->p_vnode); 15167c478bd9Sstevel@tonic-gate ASSERT(PP_ISAGED(pp) == 0); 15177c478bd9Sstevel@tonic-gate ASSERT(pp->p_szc == 0); 15187c478bd9Sstevel@tonic-gate ASSERT(PFN_2_MEM_NODE(pp->p_pagenum) == mnode); 15197c478bd9Sstevel@tonic-gate 15207c478bd9Sstevel@tonic-gate /* check if page within DMA attributes */ 15217c478bd9Sstevel@tonic-gate 15227c478bd9Sstevel@tonic-gate pgaddr = ptob((uint64_t)(pp->p_pagenum)); 15237c478bd9Sstevel@tonic-gate 15247c478bd9Sstevel@tonic-gate if ((pgaddr >= dma_attr->dma_attr_addr_lo) && 15257c478bd9Sstevel@tonic-gate (pgaddr + MMU_PAGESIZE - 1 <= 15267c478bd9Sstevel@tonic-gate dma_attr->dma_attr_addr_hi)) { 15277c478bd9Sstevel@tonic-gate break; 15287c478bd9Sstevel@tonic-gate } 15297c478bd9Sstevel@tonic-gate 15307c478bd9Sstevel@tonic-gate /* continue looking */ 15317c478bd9Sstevel@tonic-gate page_unlock(pp); 15327c478bd9Sstevel@tonic-gate pp = pp->p_next; 15337c478bd9Sstevel@tonic-gate if (pp == first_pp) 15347c478bd9Sstevel@tonic-gate pp = NULL; 15357c478bd9Sstevel@tonic-gate } 15367c478bd9Sstevel@tonic-gate 15377c478bd9Sstevel@tonic-gate if (pp != NULL) { 15387c478bd9Sstevel@tonic-gate ASSERT(mtype == PP_2_MTYPE(pp)); 15397c478bd9Sstevel@tonic-gate ASSERT(pp->p_szc == 0); 15407c478bd9Sstevel@tonic-gate 15417c478bd9Sstevel@tonic-gate /* found a page with specified DMA attributes */ 15427c478bd9Sstevel@tonic-gate page_sub(&PAGE_CACHELISTS(mnode, bin, 15437c478bd9Sstevel@tonic-gate mtype), pp); 1544affbd3ccSkchow page_ctr_sub(mnode, mtype, pp, PG_CACHE_LIST); 15457c478bd9Sstevel@tonic-gate 15467c478bd9Sstevel@tonic-gate mutex_exit(pcm); 15477c478bd9Sstevel@tonic-gate ASSERT(pp->p_vnode); 15487c478bd9Sstevel@tonic-gate ASSERT(PP_ISAGED(pp) == 0); 15497c478bd9Sstevel@tonic-gate check_dma(dma_attr, pp, 1); 15507c478bd9Sstevel@tonic-gate VM_STAT_ADD(pga_vmstats.pgma_allocok); 15517c478bd9Sstevel@tonic-gate return (pp); 15527c478bd9Sstevel@tonic-gate } 15537c478bd9Sstevel@tonic-gate mutex_exit(pcm); 15547c478bd9Sstevel@tonic-gate nextcachebin: 15557c478bd9Sstevel@tonic-gate bin += (i == 0) ? BIN_STEP : 1; 15567c478bd9Sstevel@tonic-gate bin &= page_colors_mask; 15577c478bd9Sstevel@tonic-gate } 1558affbd3ccSkchow MTYPE_NEXT(mnode, mtype, flags); 1559affbd3ccSkchow } while (mtype >= 0); 15607c478bd9Sstevel@tonic-gate 15617c478bd9Sstevel@tonic-gate VM_STAT_ADD(pga_vmstats.pgma_allocfailed); 15627c478bd9Sstevel@tonic-gate return (NULL); 15637c478bd9Sstevel@tonic-gate } 15647c478bd9Sstevel@tonic-gate 15657c478bd9Sstevel@tonic-gate /* 15667c478bd9Sstevel@tonic-gate * This function is similar to page_get_freelist()/page_get_cachelist() 15677c478bd9Sstevel@tonic-gate * but it searches both the lists to find a page with the specified 15687c478bd9Sstevel@tonic-gate * color (or no color) and DMA attributes. The search is done in the 15697c478bd9Sstevel@tonic-gate * freelist first and then in the cache list within the highest memory 15707c478bd9Sstevel@tonic-gate * range (based on DMA attributes) before searching in the lower 15717c478bd9Sstevel@tonic-gate * memory ranges. 15727c478bd9Sstevel@tonic-gate * 15737c478bd9Sstevel@tonic-gate * Note: This function is called only by page_create_io(). 15747c478bd9Sstevel@tonic-gate */ 15757c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 15767c478bd9Sstevel@tonic-gate page_t * 15777c478bd9Sstevel@tonic-gate page_get_anylist(struct vnode *vp, u_offset_t off, struct as *as, caddr_t vaddr, 15787c478bd9Sstevel@tonic-gate size_t size, uint_t flags, ddi_dma_attr_t *dma_attr, lgrp_t *lgrp) 15797c478bd9Sstevel@tonic-gate { 15807c478bd9Sstevel@tonic-gate uint_t bin; 15817c478bd9Sstevel@tonic-gate int mtype; 15827c478bd9Sstevel@tonic-gate page_t *pp; 15837c478bd9Sstevel@tonic-gate int n; 15847c478bd9Sstevel@tonic-gate int m; 15857c478bd9Sstevel@tonic-gate int szc; 15867c478bd9Sstevel@tonic-gate int fullrange; 15877c478bd9Sstevel@tonic-gate int mnode; 15887c478bd9Sstevel@tonic-gate int local_failed_stat = 0; 15897c478bd9Sstevel@tonic-gate lgrp_mnode_cookie_t lgrp_cookie; 15907c478bd9Sstevel@tonic-gate 15917c478bd9Sstevel@tonic-gate VM_STAT_ADD(pga_vmstats.pga_alloc); 15927c478bd9Sstevel@tonic-gate 15937c478bd9Sstevel@tonic-gate /* only base pagesize currently supported */ 15947c478bd9Sstevel@tonic-gate if (size != MMU_PAGESIZE) 15957c478bd9Sstevel@tonic-gate return (NULL); 15967c478bd9Sstevel@tonic-gate 15977c478bd9Sstevel@tonic-gate /* 15987c478bd9Sstevel@tonic-gate * If we're passed a specific lgroup, we use it. Otherwise, 15997c478bd9Sstevel@tonic-gate * assume first-touch placement is desired. 16007c478bd9Sstevel@tonic-gate */ 16017c478bd9Sstevel@tonic-gate if (!LGRP_EXISTS(lgrp)) 16027c478bd9Sstevel@tonic-gate lgrp = lgrp_home_lgrp(); 16037c478bd9Sstevel@tonic-gate 16047c478bd9Sstevel@tonic-gate /* LINTED */ 16055d07b933Sdp AS_2_BIN(as, seg, vp, vaddr, bin, 0); 16067c478bd9Sstevel@tonic-gate 16077c478bd9Sstevel@tonic-gate /* 16087c478bd9Sstevel@tonic-gate * Only hold one freelist or cachelist lock at a time, that way we 16097c478bd9Sstevel@tonic-gate * can start anywhere and not have to worry about lock 16107c478bd9Sstevel@tonic-gate * ordering. 16117c478bd9Sstevel@tonic-gate */ 16127c478bd9Sstevel@tonic-gate if (dma_attr == NULL) { 16137c478bd9Sstevel@tonic-gate n = 0; 16147c478bd9Sstevel@tonic-gate m = mnoderangecnt - 1; 16157c478bd9Sstevel@tonic-gate fullrange = 1; 16167c478bd9Sstevel@tonic-gate VM_STAT_ADD(pga_vmstats.pga_nulldmaattr); 16177c478bd9Sstevel@tonic-gate } else { 16187c478bd9Sstevel@tonic-gate pfn_t pfnlo = mmu_btop(dma_attr->dma_attr_addr_lo); 16197c478bd9Sstevel@tonic-gate pfn_t pfnhi = mmu_btop(dma_attr->dma_attr_addr_hi); 16207c478bd9Sstevel@tonic-gate 16217c478bd9Sstevel@tonic-gate /* 16227c478bd9Sstevel@tonic-gate * We can guarantee alignment only for page boundary. 16237c478bd9Sstevel@tonic-gate */ 16247c478bd9Sstevel@tonic-gate if (dma_attr->dma_attr_align > MMU_PAGESIZE) 16257c478bd9Sstevel@tonic-gate return (NULL); 16267c478bd9Sstevel@tonic-gate 16277c478bd9Sstevel@tonic-gate n = pfn_2_mtype(pfnlo); 16287c478bd9Sstevel@tonic-gate m = pfn_2_mtype(pfnhi); 16297c478bd9Sstevel@tonic-gate 16307c478bd9Sstevel@tonic-gate fullrange = ((pfnlo == mnoderanges[n].mnr_pfnlo) && 16317c478bd9Sstevel@tonic-gate (pfnhi >= mnoderanges[m].mnr_pfnhi)); 16327c478bd9Sstevel@tonic-gate } 16337c478bd9Sstevel@tonic-gate VM_STAT_COND_ADD(fullrange == 0, pga_vmstats.pga_notfullrange); 16347c478bd9Sstevel@tonic-gate 16357c478bd9Sstevel@tonic-gate if (n > m) 16367c478bd9Sstevel@tonic-gate return (NULL); 16377c478bd9Sstevel@tonic-gate 16387c478bd9Sstevel@tonic-gate szc = 0; 16397c478bd9Sstevel@tonic-gate 16407c478bd9Sstevel@tonic-gate /* cylcing thru mtype handled by RANGE0 if n == 0 */ 16417c478bd9Sstevel@tonic-gate if (n == 0) { 16427c478bd9Sstevel@tonic-gate flags |= PGI_MT_RANGE0; 16437c478bd9Sstevel@tonic-gate n = m; 16447c478bd9Sstevel@tonic-gate } 16457c478bd9Sstevel@tonic-gate 16467c478bd9Sstevel@tonic-gate /* 16477c478bd9Sstevel@tonic-gate * Try local memory node first, but try remote if we can't 16487c478bd9Sstevel@tonic-gate * get a page of the right color. 16497c478bd9Sstevel@tonic-gate */ 16507c478bd9Sstevel@tonic-gate LGRP_MNODE_COOKIE_INIT(lgrp_cookie, lgrp, LGRP_SRCH_HIER); 16517c478bd9Sstevel@tonic-gate while ((mnode = lgrp_memnode_choose(&lgrp_cookie)) >= 0) { 16527c478bd9Sstevel@tonic-gate /* 16537c478bd9Sstevel@tonic-gate * allocate pages from high pfn to low. 16547c478bd9Sstevel@tonic-gate */ 16557c478bd9Sstevel@tonic-gate for (mtype = m; mtype >= n; mtype--) { 16567c478bd9Sstevel@tonic-gate if (fullrange != 0) { 16577c478bd9Sstevel@tonic-gate pp = page_get_mnode_freelist(mnode, 16587c478bd9Sstevel@tonic-gate bin, mtype, szc, flags); 16597c478bd9Sstevel@tonic-gate if (pp == NULL) { 16607c478bd9Sstevel@tonic-gate pp = page_get_mnode_cachelist( 16617c478bd9Sstevel@tonic-gate bin, flags, mnode, mtype); 16627c478bd9Sstevel@tonic-gate } 16637c478bd9Sstevel@tonic-gate } else { 16647c478bd9Sstevel@tonic-gate pp = page_get_mnode_anylist(bin, szc, 16657c478bd9Sstevel@tonic-gate flags, mnode, mtype, dma_attr); 16667c478bd9Sstevel@tonic-gate } 16677c478bd9Sstevel@tonic-gate if (pp != NULL) { 16687c478bd9Sstevel@tonic-gate VM_STAT_ADD(pga_vmstats.pga_allocok); 16697c478bd9Sstevel@tonic-gate check_dma(dma_attr, pp, 1); 16707c478bd9Sstevel@tonic-gate return (pp); 16717c478bd9Sstevel@tonic-gate } 16727c478bd9Sstevel@tonic-gate } 16737c478bd9Sstevel@tonic-gate if (!local_failed_stat) { 16747c478bd9Sstevel@tonic-gate lgrp_stat_add(lgrp->lgrp_id, LGRP_NUM_ALLOC_FAIL, 1); 16757c478bd9Sstevel@tonic-gate local_failed_stat = 1; 16767c478bd9Sstevel@tonic-gate } 16777c478bd9Sstevel@tonic-gate } 16787c478bd9Sstevel@tonic-gate VM_STAT_ADD(pga_vmstats.pga_allocfailed); 16797c478bd9Sstevel@tonic-gate 16807c478bd9Sstevel@tonic-gate return (NULL); 16817c478bd9Sstevel@tonic-gate } 16827c478bd9Sstevel@tonic-gate 16837c478bd9Sstevel@tonic-gate /* 16847c478bd9Sstevel@tonic-gate * page_create_io() 16857c478bd9Sstevel@tonic-gate * 16867c478bd9Sstevel@tonic-gate * This function is a copy of page_create_va() with an additional 16877c478bd9Sstevel@tonic-gate * argument 'mattr' that specifies DMA memory requirements to 16887c478bd9Sstevel@tonic-gate * the page list functions. This function is used by the segkmem 16897c478bd9Sstevel@tonic-gate * allocator so it is only to create new pages (i.e PG_EXCL is 16907c478bd9Sstevel@tonic-gate * set). 16917c478bd9Sstevel@tonic-gate * 16927c478bd9Sstevel@tonic-gate * Note: This interface is currently used by x86 PSM only and is 16937c478bd9Sstevel@tonic-gate * not fully specified so the commitment level is only for 16947c478bd9Sstevel@tonic-gate * private interface specific to x86. This interface uses PSM 16957c478bd9Sstevel@tonic-gate * specific page_get_anylist() interface. 16967c478bd9Sstevel@tonic-gate */ 16977c478bd9Sstevel@tonic-gate 16987c478bd9Sstevel@tonic-gate #define PAGE_HASH_SEARCH(index, pp, vp, off) { \ 16997c478bd9Sstevel@tonic-gate for ((pp) = page_hash[(index)]; (pp); (pp) = (pp)->p_hash) { \ 17007c478bd9Sstevel@tonic-gate if ((pp)->p_vnode == (vp) && (pp)->p_offset == (off)) \ 17017c478bd9Sstevel@tonic-gate break; \ 17027c478bd9Sstevel@tonic-gate } \ 17037c478bd9Sstevel@tonic-gate } 17047c478bd9Sstevel@tonic-gate 17057c478bd9Sstevel@tonic-gate 17067c478bd9Sstevel@tonic-gate page_t * 17077c478bd9Sstevel@tonic-gate page_create_io( 17087c478bd9Sstevel@tonic-gate struct vnode *vp, 17097c478bd9Sstevel@tonic-gate u_offset_t off, 17107c478bd9Sstevel@tonic-gate uint_t bytes, 17117c478bd9Sstevel@tonic-gate uint_t flags, 17127c478bd9Sstevel@tonic-gate struct as *as, 17137c478bd9Sstevel@tonic-gate caddr_t vaddr, 17147c478bd9Sstevel@tonic-gate ddi_dma_attr_t *mattr) /* DMA memory attributes if any */ 17157c478bd9Sstevel@tonic-gate { 17167c478bd9Sstevel@tonic-gate page_t *plist = NULL; 17177c478bd9Sstevel@tonic-gate uint_t plist_len = 0; 17187c478bd9Sstevel@tonic-gate pgcnt_t npages; 17197c478bd9Sstevel@tonic-gate page_t *npp = NULL; 17207c478bd9Sstevel@tonic-gate uint_t pages_req; 17217c478bd9Sstevel@tonic-gate page_t *pp; 17227c478bd9Sstevel@tonic-gate kmutex_t *phm = NULL; 17237c478bd9Sstevel@tonic-gate uint_t index; 17247c478bd9Sstevel@tonic-gate 17257c478bd9Sstevel@tonic-gate TRACE_4(TR_FAC_VM, TR_PAGE_CREATE_START, 17267c478bd9Sstevel@tonic-gate "page_create_start:vp %p off %llx bytes %u flags %x", 17277c478bd9Sstevel@tonic-gate vp, off, bytes, flags); 17287c478bd9Sstevel@tonic-gate 17297c478bd9Sstevel@tonic-gate ASSERT((flags & ~(PG_EXCL | PG_WAIT | PG_PHYSCONTIG)) == 0); 17307c478bd9Sstevel@tonic-gate 17317c478bd9Sstevel@tonic-gate pages_req = npages = mmu_btopr(bytes); 17327c478bd9Sstevel@tonic-gate 17337c478bd9Sstevel@tonic-gate /* 17347c478bd9Sstevel@tonic-gate * Do the freemem and pcf accounting. 17357c478bd9Sstevel@tonic-gate */ 17367c478bd9Sstevel@tonic-gate if (!page_create_wait(npages, flags)) { 17377c478bd9Sstevel@tonic-gate return (NULL); 17387c478bd9Sstevel@tonic-gate } 17397c478bd9Sstevel@tonic-gate 17407c478bd9Sstevel@tonic-gate TRACE_2(TR_FAC_VM, TR_PAGE_CREATE_SUCCESS, 17417c478bd9Sstevel@tonic-gate "page_create_success:vp %p off %llx", 17427c478bd9Sstevel@tonic-gate vp, off); 17437c478bd9Sstevel@tonic-gate 17447c478bd9Sstevel@tonic-gate /* 17457c478bd9Sstevel@tonic-gate * If satisfying this request has left us with too little 17467c478bd9Sstevel@tonic-gate * memory, start the wheels turning to get some back. The 17477c478bd9Sstevel@tonic-gate * first clause of the test prevents waking up the pageout 17487c478bd9Sstevel@tonic-gate * daemon in situations where it would decide that there's 17497c478bd9Sstevel@tonic-gate * nothing to do. 17507c478bd9Sstevel@tonic-gate */ 17517c478bd9Sstevel@tonic-gate if (nscan < desscan && freemem < minfree) { 17527c478bd9Sstevel@tonic-gate TRACE_1(TR_FAC_VM, TR_PAGEOUT_CV_SIGNAL, 17537c478bd9Sstevel@tonic-gate "pageout_cv_signal:freemem %ld", freemem); 17547c478bd9Sstevel@tonic-gate cv_signal(&proc_pageout->p_cv); 17557c478bd9Sstevel@tonic-gate } 17567c478bd9Sstevel@tonic-gate 17577c478bd9Sstevel@tonic-gate if (flags & PG_PHYSCONTIG) { 17587c478bd9Sstevel@tonic-gate 17597c478bd9Sstevel@tonic-gate plist = page_get_contigpage(&npages, mattr, 1); 17607c478bd9Sstevel@tonic-gate if (plist == NULL) { 17617c478bd9Sstevel@tonic-gate page_create_putback(npages); 17627c478bd9Sstevel@tonic-gate return (NULL); 17637c478bd9Sstevel@tonic-gate } 17647c478bd9Sstevel@tonic-gate 17657c478bd9Sstevel@tonic-gate pp = plist; 17667c478bd9Sstevel@tonic-gate 17677c478bd9Sstevel@tonic-gate do { 17687c478bd9Sstevel@tonic-gate if (!page_hashin(pp, vp, off, NULL)) { 17697c478bd9Sstevel@tonic-gate panic("pg_creat_io: hashin failed %p %p %llx", 17707c478bd9Sstevel@tonic-gate (void *)pp, (void *)vp, off); 17717c478bd9Sstevel@tonic-gate } 17727c478bd9Sstevel@tonic-gate VM_STAT_ADD(page_create_new); 17737c478bd9Sstevel@tonic-gate off += MMU_PAGESIZE; 17747c478bd9Sstevel@tonic-gate PP_CLRFREE(pp); 17757c478bd9Sstevel@tonic-gate PP_CLRAGED(pp); 17767c478bd9Sstevel@tonic-gate page_set_props(pp, P_REF); 17777c478bd9Sstevel@tonic-gate pp = pp->p_next; 17787c478bd9Sstevel@tonic-gate } while (pp != plist); 17797c478bd9Sstevel@tonic-gate 17807c478bd9Sstevel@tonic-gate if (!npages) { 17817c478bd9Sstevel@tonic-gate check_dma(mattr, plist, pages_req); 17827c478bd9Sstevel@tonic-gate return (plist); 17837c478bd9Sstevel@tonic-gate } else { 17847c478bd9Sstevel@tonic-gate vaddr += (pages_req - npages) << MMU_PAGESHIFT; 17857c478bd9Sstevel@tonic-gate } 17867c478bd9Sstevel@tonic-gate 17877c478bd9Sstevel@tonic-gate /* 17887c478bd9Sstevel@tonic-gate * fall-thru: 17897c478bd9Sstevel@tonic-gate * 17907c478bd9Sstevel@tonic-gate * page_get_contigpage returns when npages <= sgllen. 17917c478bd9Sstevel@tonic-gate * Grab the rest of the non-contig pages below from anylist. 17927c478bd9Sstevel@tonic-gate */ 17937c478bd9Sstevel@tonic-gate } 17947c478bd9Sstevel@tonic-gate 17957c478bd9Sstevel@tonic-gate /* 17967c478bd9Sstevel@tonic-gate * Loop around collecting the requested number of pages. 17977c478bd9Sstevel@tonic-gate * Most of the time, we have to `create' a new page. With 17987c478bd9Sstevel@tonic-gate * this in mind, pull the page off the free list before 17997c478bd9Sstevel@tonic-gate * getting the hash lock. This will minimize the hash 18007c478bd9Sstevel@tonic-gate * lock hold time, nesting, and the like. If it turns 18017c478bd9Sstevel@tonic-gate * out we don't need the page, we put it back at the end. 18027c478bd9Sstevel@tonic-gate */ 18037c478bd9Sstevel@tonic-gate while (npages--) { 18047c478bd9Sstevel@tonic-gate phm = NULL; 18057c478bd9Sstevel@tonic-gate 18067c478bd9Sstevel@tonic-gate index = PAGE_HASH_FUNC(vp, off); 18077c478bd9Sstevel@tonic-gate top: 18087c478bd9Sstevel@tonic-gate ASSERT(phm == NULL); 18097c478bd9Sstevel@tonic-gate ASSERT(index == PAGE_HASH_FUNC(vp, off)); 18107c478bd9Sstevel@tonic-gate ASSERT(MUTEX_NOT_HELD(page_vnode_mutex(vp))); 18117c478bd9Sstevel@tonic-gate 18127c478bd9Sstevel@tonic-gate if (npp == NULL) { 18137c478bd9Sstevel@tonic-gate /* 18147c478bd9Sstevel@tonic-gate * Try to get the page of any color either from 18157c478bd9Sstevel@tonic-gate * the freelist or from the cache list. 18167c478bd9Sstevel@tonic-gate */ 18177c478bd9Sstevel@tonic-gate npp = page_get_anylist(vp, off, as, vaddr, MMU_PAGESIZE, 18187c478bd9Sstevel@tonic-gate flags & ~PG_MATCH_COLOR, mattr, NULL); 18197c478bd9Sstevel@tonic-gate if (npp == NULL) { 18207c478bd9Sstevel@tonic-gate if (mattr == NULL) { 18217c478bd9Sstevel@tonic-gate /* 18227c478bd9Sstevel@tonic-gate * Not looking for a special page; 18237c478bd9Sstevel@tonic-gate * panic! 18247c478bd9Sstevel@tonic-gate */ 18257c478bd9Sstevel@tonic-gate panic("no page found %d", (int)npages); 18267c478bd9Sstevel@tonic-gate } 18277c478bd9Sstevel@tonic-gate /* 18287c478bd9Sstevel@tonic-gate * No page found! This can happen 18297c478bd9Sstevel@tonic-gate * if we are looking for a page 18307c478bd9Sstevel@tonic-gate * within a specific memory range 18317c478bd9Sstevel@tonic-gate * for DMA purposes. If PG_WAIT is 18327c478bd9Sstevel@tonic-gate * specified then we wait for a 18337c478bd9Sstevel@tonic-gate * while and then try again. The 18347c478bd9Sstevel@tonic-gate * wait could be forever if we 18357c478bd9Sstevel@tonic-gate * don't get the page(s) we need. 18367c478bd9Sstevel@tonic-gate * 18377c478bd9Sstevel@tonic-gate * Note: XXX We really need a mechanism 18387c478bd9Sstevel@tonic-gate * to wait for pages in the desired 18397c478bd9Sstevel@tonic-gate * range. For now, we wait for any 18407c478bd9Sstevel@tonic-gate * pages and see if we can use it. 18417c478bd9Sstevel@tonic-gate */ 18427c478bd9Sstevel@tonic-gate 18437c478bd9Sstevel@tonic-gate if ((mattr != NULL) && (flags & PG_WAIT)) { 18447c478bd9Sstevel@tonic-gate delay(10); 18457c478bd9Sstevel@tonic-gate goto top; 18467c478bd9Sstevel@tonic-gate } 18477c478bd9Sstevel@tonic-gate 18487c478bd9Sstevel@tonic-gate goto fail; /* undo accounting stuff */ 18497c478bd9Sstevel@tonic-gate } 18507c478bd9Sstevel@tonic-gate 18517c478bd9Sstevel@tonic-gate if (PP_ISAGED(npp) == 0) { 18527c478bd9Sstevel@tonic-gate /* 18537c478bd9Sstevel@tonic-gate * Since this page came from the 18547c478bd9Sstevel@tonic-gate * cachelist, we must destroy the 18557c478bd9Sstevel@tonic-gate * old vnode association. 18567c478bd9Sstevel@tonic-gate */ 18577c478bd9Sstevel@tonic-gate page_hashout(npp, (kmutex_t *)NULL); 18587c478bd9Sstevel@tonic-gate } 18597c478bd9Sstevel@tonic-gate } 18607c478bd9Sstevel@tonic-gate 18617c478bd9Sstevel@tonic-gate /* 18627c478bd9Sstevel@tonic-gate * We own this page! 18637c478bd9Sstevel@tonic-gate */ 18647c478bd9Sstevel@tonic-gate ASSERT(PAGE_EXCL(npp)); 18657c478bd9Sstevel@tonic-gate ASSERT(npp->p_vnode == NULL); 18667c478bd9Sstevel@tonic-gate ASSERT(!hat_page_is_mapped(npp)); 18677c478bd9Sstevel@tonic-gate PP_CLRFREE(npp); 18687c478bd9Sstevel@tonic-gate PP_CLRAGED(npp); 18697c478bd9Sstevel@tonic-gate 18707c478bd9Sstevel@tonic-gate /* 18717c478bd9Sstevel@tonic-gate * Here we have a page in our hot little mits and are 18727c478bd9Sstevel@tonic-gate * just waiting to stuff it on the appropriate lists. 18737c478bd9Sstevel@tonic-gate * Get the mutex and check to see if it really does 18747c478bd9Sstevel@tonic-gate * not exist. 18757c478bd9Sstevel@tonic-gate */ 18767c478bd9Sstevel@tonic-gate phm = PAGE_HASH_MUTEX(index); 18777c478bd9Sstevel@tonic-gate mutex_enter(phm); 18787c478bd9Sstevel@tonic-gate PAGE_HASH_SEARCH(index, pp, vp, off); 18797c478bd9Sstevel@tonic-gate if (pp == NULL) { 18807c478bd9Sstevel@tonic-gate VM_STAT_ADD(page_create_new); 18817c478bd9Sstevel@tonic-gate pp = npp; 18827c478bd9Sstevel@tonic-gate npp = NULL; 18837c478bd9Sstevel@tonic-gate if (!page_hashin(pp, vp, off, phm)) { 18847c478bd9Sstevel@tonic-gate /* 18857c478bd9Sstevel@tonic-gate * Since we hold the page hash mutex and 18867c478bd9Sstevel@tonic-gate * just searched for this page, page_hashin 18877c478bd9Sstevel@tonic-gate * had better not fail. If it does, that 18887c478bd9Sstevel@tonic-gate * means somethread did not follow the 18897c478bd9Sstevel@tonic-gate * page hash mutex rules. Panic now and 18907c478bd9Sstevel@tonic-gate * get it over with. As usual, go down 18917c478bd9Sstevel@tonic-gate * holding all the locks. 18927c478bd9Sstevel@tonic-gate */ 18937c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(phm)); 18947c478bd9Sstevel@tonic-gate panic("page_create: hashin fail %p %p %llx %p", 18957c478bd9Sstevel@tonic-gate (void *)pp, (void *)vp, off, (void *)phm); 18967c478bd9Sstevel@tonic-gate 18977c478bd9Sstevel@tonic-gate } 18987c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(phm)); 18997c478bd9Sstevel@tonic-gate mutex_exit(phm); 19007c478bd9Sstevel@tonic-gate phm = NULL; 19017c478bd9Sstevel@tonic-gate 19027c478bd9Sstevel@tonic-gate /* 19037c478bd9Sstevel@tonic-gate * Hat layer locking need not be done to set 19047c478bd9Sstevel@tonic-gate * the following bits since the page is not hashed 19057c478bd9Sstevel@tonic-gate * and was on the free list (i.e., had no mappings). 19067c478bd9Sstevel@tonic-gate * 19077c478bd9Sstevel@tonic-gate * Set the reference bit to protect 19087c478bd9Sstevel@tonic-gate * against immediate pageout 19097c478bd9Sstevel@tonic-gate * 19107c478bd9Sstevel@tonic-gate * XXXmh modify freelist code to set reference 19117c478bd9Sstevel@tonic-gate * bit so we don't have to do it here. 19127c478bd9Sstevel@tonic-gate */ 19137c478bd9Sstevel@tonic-gate page_set_props(pp, P_REF); 19147c478bd9Sstevel@tonic-gate } else { 19157c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(phm)); 19167c478bd9Sstevel@tonic-gate mutex_exit(phm); 19177c478bd9Sstevel@tonic-gate phm = NULL; 19187c478bd9Sstevel@tonic-gate /* 19197c478bd9Sstevel@tonic-gate * NOTE: This should not happen for pages associated 19207c478bd9Sstevel@tonic-gate * with kernel vnode 'kvp'. 19217c478bd9Sstevel@tonic-gate */ 19227c478bd9Sstevel@tonic-gate /* XX64 - to debug why this happens! */ 19237c478bd9Sstevel@tonic-gate ASSERT(vp != &kvp); 19247c478bd9Sstevel@tonic-gate if (vp == &kvp) 19257c478bd9Sstevel@tonic-gate cmn_err(CE_NOTE, 19267c478bd9Sstevel@tonic-gate "page_create: page not expected " 19277c478bd9Sstevel@tonic-gate "in hash list for kernel vnode - pp 0x%p", 19287c478bd9Sstevel@tonic-gate (void *)pp); 19297c478bd9Sstevel@tonic-gate VM_STAT_ADD(page_create_exists); 19307c478bd9Sstevel@tonic-gate goto fail; 19317c478bd9Sstevel@tonic-gate } 19327c478bd9Sstevel@tonic-gate 19337c478bd9Sstevel@tonic-gate /* 19347c478bd9Sstevel@tonic-gate * Got a page! It is locked. Acquire the i/o 19357c478bd9Sstevel@tonic-gate * lock since we are going to use the p_next and 19367c478bd9Sstevel@tonic-gate * p_prev fields to link the requested pages together. 19377c478bd9Sstevel@tonic-gate */ 19387c478bd9Sstevel@tonic-gate page_io_lock(pp); 19397c478bd9Sstevel@tonic-gate page_add(&plist, pp); 19407c478bd9Sstevel@tonic-gate plist = plist->p_next; 19417c478bd9Sstevel@tonic-gate off += MMU_PAGESIZE; 19427c478bd9Sstevel@tonic-gate vaddr += MMU_PAGESIZE; 19437c478bd9Sstevel@tonic-gate } 19447c478bd9Sstevel@tonic-gate 19457c478bd9Sstevel@tonic-gate check_dma(mattr, plist, pages_req); 19467c478bd9Sstevel@tonic-gate return (plist); 19477c478bd9Sstevel@tonic-gate 19487c478bd9Sstevel@tonic-gate fail: 19497c478bd9Sstevel@tonic-gate if (npp != NULL) { 19507c478bd9Sstevel@tonic-gate /* 19517c478bd9Sstevel@tonic-gate * Did not need this page after all. 19527c478bd9Sstevel@tonic-gate * Put it back on the free list. 19537c478bd9Sstevel@tonic-gate */ 19547c478bd9Sstevel@tonic-gate VM_STAT_ADD(page_create_putbacks); 19557c478bd9Sstevel@tonic-gate PP_SETFREE(npp); 19567c478bd9Sstevel@tonic-gate PP_SETAGED(npp); 19577c478bd9Sstevel@tonic-gate npp->p_offset = (u_offset_t)-1; 19587c478bd9Sstevel@tonic-gate page_list_add(npp, PG_FREE_LIST | PG_LIST_TAIL); 19597c478bd9Sstevel@tonic-gate page_unlock(npp); 19607c478bd9Sstevel@tonic-gate } 19617c478bd9Sstevel@tonic-gate 19627c478bd9Sstevel@tonic-gate /* 19637c478bd9Sstevel@tonic-gate * Give up the pages we already got. 19647c478bd9Sstevel@tonic-gate */ 19657c478bd9Sstevel@tonic-gate while (plist != NULL) { 19667c478bd9Sstevel@tonic-gate pp = plist; 19677c478bd9Sstevel@tonic-gate page_sub(&plist, pp); 19687c478bd9Sstevel@tonic-gate page_io_unlock(pp); 19697c478bd9Sstevel@tonic-gate plist_len++; 19707c478bd9Sstevel@tonic-gate /*LINTED: constant in conditional ctx*/ 19717c478bd9Sstevel@tonic-gate VN_DISPOSE(pp, B_INVAL, 0, kcred); 19727c478bd9Sstevel@tonic-gate } 19737c478bd9Sstevel@tonic-gate 19747c478bd9Sstevel@tonic-gate /* 19757c478bd9Sstevel@tonic-gate * VN_DISPOSE does freemem accounting for the pages in plist 19767c478bd9Sstevel@tonic-gate * by calling page_free. So, we need to undo the pcf accounting 19777c478bd9Sstevel@tonic-gate * for only the remaining pages. 19787c478bd9Sstevel@tonic-gate */ 19797c478bd9Sstevel@tonic-gate VM_STAT_ADD(page_create_putbacks); 19807c478bd9Sstevel@tonic-gate page_create_putback(pages_req - plist_len); 19817c478bd9Sstevel@tonic-gate 19827c478bd9Sstevel@tonic-gate return (NULL); 19837c478bd9Sstevel@tonic-gate } 19847c478bd9Sstevel@tonic-gate 19857c478bd9Sstevel@tonic-gate 19867c478bd9Sstevel@tonic-gate /* 19877c478bd9Sstevel@tonic-gate * Copy the data from the physical page represented by "frompp" to 19887c478bd9Sstevel@tonic-gate * that represented by "topp". ppcopy uses CPU->cpu_caddr1 and 19897c478bd9Sstevel@tonic-gate * CPU->cpu_caddr2. It assumes that no one uses either map at interrupt 19907c478bd9Sstevel@tonic-gate * level and no one sleeps with an active mapping there. 19917c478bd9Sstevel@tonic-gate * 19927c478bd9Sstevel@tonic-gate * Note that the ref/mod bits in the page_t's are not affected by 19937c478bd9Sstevel@tonic-gate * this operation, hence it is up to the caller to update them appropriately. 19947c478bd9Sstevel@tonic-gate */ 19957c478bd9Sstevel@tonic-gate void 19967c478bd9Sstevel@tonic-gate ppcopy(page_t *frompp, page_t *topp) 19977c478bd9Sstevel@tonic-gate { 19987c478bd9Sstevel@tonic-gate caddr_t pp_addr1; 19997c478bd9Sstevel@tonic-gate caddr_t pp_addr2; 20007c478bd9Sstevel@tonic-gate void *pte1; 20017c478bd9Sstevel@tonic-gate void *pte2; 20027c478bd9Sstevel@tonic-gate kmutex_t *ppaddr_mutex; 20037c478bd9Sstevel@tonic-gate 20047c478bd9Sstevel@tonic-gate ASSERT_STACK_ALIGNED(); 20057c478bd9Sstevel@tonic-gate ASSERT(PAGE_LOCKED(frompp)); 20067c478bd9Sstevel@tonic-gate ASSERT(PAGE_LOCKED(topp)); 20077c478bd9Sstevel@tonic-gate 20087c478bd9Sstevel@tonic-gate if (kpm_enable) { 20097c478bd9Sstevel@tonic-gate pp_addr1 = hat_kpm_page2va(frompp, 0); 20107c478bd9Sstevel@tonic-gate pp_addr2 = hat_kpm_page2va(topp, 0); 20117c478bd9Sstevel@tonic-gate kpreempt_disable(); 20127c478bd9Sstevel@tonic-gate } else { 20137c478bd9Sstevel@tonic-gate /* 20147c478bd9Sstevel@tonic-gate * disable pre-emption so that CPU can't change 20157c478bd9Sstevel@tonic-gate */ 20167c478bd9Sstevel@tonic-gate kpreempt_disable(); 20177c478bd9Sstevel@tonic-gate 20187c478bd9Sstevel@tonic-gate pp_addr1 = CPU->cpu_caddr1; 20197c478bd9Sstevel@tonic-gate pp_addr2 = CPU->cpu_caddr2; 20207c478bd9Sstevel@tonic-gate pte1 = (void *)CPU->cpu_caddr1pte; 20217c478bd9Sstevel@tonic-gate pte2 = (void *)CPU->cpu_caddr2pte; 20227c478bd9Sstevel@tonic-gate 20237c478bd9Sstevel@tonic-gate ppaddr_mutex = &CPU->cpu_ppaddr_mutex; 20247c478bd9Sstevel@tonic-gate mutex_enter(ppaddr_mutex); 20257c478bd9Sstevel@tonic-gate 20267c478bd9Sstevel@tonic-gate hat_mempte_remap(page_pptonum(frompp), pp_addr1, pte1, 20277c478bd9Sstevel@tonic-gate PROT_READ | HAT_STORECACHING_OK, HAT_LOAD_NOCONSIST); 20287c478bd9Sstevel@tonic-gate hat_mempte_remap(page_pptonum(topp), pp_addr2, pte2, 20297c478bd9Sstevel@tonic-gate PROT_READ | PROT_WRITE | HAT_STORECACHING_OK, 20307c478bd9Sstevel@tonic-gate HAT_LOAD_NOCONSIST); 20317c478bd9Sstevel@tonic-gate } 20327c478bd9Sstevel@tonic-gate 20337c478bd9Sstevel@tonic-gate if (use_sse_pagecopy) 20347c478bd9Sstevel@tonic-gate hwblkpagecopy(pp_addr1, pp_addr2); 20357c478bd9Sstevel@tonic-gate else 20367c478bd9Sstevel@tonic-gate bcopy(pp_addr1, pp_addr2, PAGESIZE); 20377c478bd9Sstevel@tonic-gate 20387c478bd9Sstevel@tonic-gate if (!kpm_enable) 20397c478bd9Sstevel@tonic-gate mutex_exit(ppaddr_mutex); 20407c478bd9Sstevel@tonic-gate kpreempt_enable(); 20417c478bd9Sstevel@tonic-gate } 20427c478bd9Sstevel@tonic-gate 20437c478bd9Sstevel@tonic-gate /* 20447c478bd9Sstevel@tonic-gate * Zero the physical page from off to off + len given by `pp' 20457c478bd9Sstevel@tonic-gate * without changing the reference and modified bits of page. 20467c478bd9Sstevel@tonic-gate * 20477c478bd9Sstevel@tonic-gate * We use this using CPU private page address #2, see ppcopy() for more info. 20487c478bd9Sstevel@tonic-gate * pagezero() must not be called at interrupt level. 20497c478bd9Sstevel@tonic-gate */ 20507c478bd9Sstevel@tonic-gate void 20517c478bd9Sstevel@tonic-gate pagezero(page_t *pp, uint_t off, uint_t len) 20527c478bd9Sstevel@tonic-gate { 20537c478bd9Sstevel@tonic-gate caddr_t pp_addr2; 20547c478bd9Sstevel@tonic-gate void *pte2; 20557c478bd9Sstevel@tonic-gate kmutex_t *ppaddr_mutex; 20567c478bd9Sstevel@tonic-gate 20577c478bd9Sstevel@tonic-gate ASSERT_STACK_ALIGNED(); 20587c478bd9Sstevel@tonic-gate ASSERT(len <= MMU_PAGESIZE); 20597c478bd9Sstevel@tonic-gate ASSERT(off <= MMU_PAGESIZE); 20607c478bd9Sstevel@tonic-gate ASSERT(off + len <= MMU_PAGESIZE); 20617c478bd9Sstevel@tonic-gate ASSERT(PAGE_LOCKED(pp)); 20627c478bd9Sstevel@tonic-gate 20637c478bd9Sstevel@tonic-gate if (kpm_enable) { 20647c478bd9Sstevel@tonic-gate pp_addr2 = hat_kpm_page2va(pp, 0); 20657c478bd9Sstevel@tonic-gate kpreempt_disable(); 20667c478bd9Sstevel@tonic-gate } else { 20677c478bd9Sstevel@tonic-gate kpreempt_disable(); 20687c478bd9Sstevel@tonic-gate 20697c478bd9Sstevel@tonic-gate pp_addr2 = CPU->cpu_caddr2; 20707c478bd9Sstevel@tonic-gate pte2 = (void *)CPU->cpu_caddr2pte; 20717c478bd9Sstevel@tonic-gate 20727c478bd9Sstevel@tonic-gate ppaddr_mutex = &CPU->cpu_ppaddr_mutex; 20737c478bd9Sstevel@tonic-gate mutex_enter(ppaddr_mutex); 20747c478bd9Sstevel@tonic-gate 20757c478bd9Sstevel@tonic-gate hat_mempte_remap(page_pptonum(pp), pp_addr2, pte2, 20767c478bd9Sstevel@tonic-gate PROT_READ | PROT_WRITE | HAT_STORECACHING_OK, 20777c478bd9Sstevel@tonic-gate HAT_LOAD_NOCONSIST); 20787c478bd9Sstevel@tonic-gate } 20797c478bd9Sstevel@tonic-gate 20807c478bd9Sstevel@tonic-gate if (use_sse_pagezero) 20817c478bd9Sstevel@tonic-gate hwblkclr(pp_addr2 + off, len); 20827c478bd9Sstevel@tonic-gate else 20837c478bd9Sstevel@tonic-gate bzero(pp_addr2 + off, len); 20847c478bd9Sstevel@tonic-gate 20857c478bd9Sstevel@tonic-gate if (!kpm_enable) 20867c478bd9Sstevel@tonic-gate mutex_exit(ppaddr_mutex); 20877c478bd9Sstevel@tonic-gate kpreempt_enable(); 20887c478bd9Sstevel@tonic-gate } 20897c478bd9Sstevel@tonic-gate 20907c478bd9Sstevel@tonic-gate /* 20917c478bd9Sstevel@tonic-gate * Platform-dependent page scrub call. 20927c478bd9Sstevel@tonic-gate */ 20937c478bd9Sstevel@tonic-gate void 20947c478bd9Sstevel@tonic-gate pagescrub(page_t *pp, uint_t off, uint_t len) 20957c478bd9Sstevel@tonic-gate { 20967c478bd9Sstevel@tonic-gate /* 20977c478bd9Sstevel@tonic-gate * For now, we rely on the fact that pagezero() will 20987c478bd9Sstevel@tonic-gate * always clear UEs. 20997c478bd9Sstevel@tonic-gate */ 21007c478bd9Sstevel@tonic-gate pagezero(pp, off, len); 21017c478bd9Sstevel@tonic-gate } 21027c478bd9Sstevel@tonic-gate 21037c478bd9Sstevel@tonic-gate /* 21047c478bd9Sstevel@tonic-gate * set up two private addresses for use on a given CPU for use in ppcopy() 21057c478bd9Sstevel@tonic-gate */ 21067c478bd9Sstevel@tonic-gate void 21077c478bd9Sstevel@tonic-gate setup_vaddr_for_ppcopy(struct cpu *cpup) 21087c478bd9Sstevel@tonic-gate { 21097c478bd9Sstevel@tonic-gate void *addr; 21107c478bd9Sstevel@tonic-gate void *pte; 21117c478bd9Sstevel@tonic-gate 21127c478bd9Sstevel@tonic-gate addr = vmem_alloc(heap_arena, mmu_ptob(1), VM_SLEEP); 21137c478bd9Sstevel@tonic-gate pte = hat_mempte_setup(addr); 21147c478bd9Sstevel@tonic-gate cpup->cpu_caddr1 = addr; 21157c478bd9Sstevel@tonic-gate cpup->cpu_caddr1pte = (pteptr_t)pte; 21167c478bd9Sstevel@tonic-gate 21177c478bd9Sstevel@tonic-gate addr = vmem_alloc(heap_arena, mmu_ptob(1), VM_SLEEP); 21187c478bd9Sstevel@tonic-gate pte = hat_mempte_setup(addr); 21197c478bd9Sstevel@tonic-gate cpup->cpu_caddr2 = addr; 21207c478bd9Sstevel@tonic-gate cpup->cpu_caddr2pte = (pteptr_t)pte; 21217c478bd9Sstevel@tonic-gate 21227c478bd9Sstevel@tonic-gate mutex_init(&cpup->cpu_ppaddr_mutex, NULL, MUTEX_DEFAULT, NULL); 21237c478bd9Sstevel@tonic-gate } 21247c478bd9Sstevel@tonic-gate 21257c478bd9Sstevel@tonic-gate 21267c478bd9Sstevel@tonic-gate /* 21277c478bd9Sstevel@tonic-gate * Create the pageout scanner thread. The thread has to 21287c478bd9Sstevel@tonic-gate * start at procedure with process pp and priority pri. 21297c478bd9Sstevel@tonic-gate */ 21307c478bd9Sstevel@tonic-gate void 21317c478bd9Sstevel@tonic-gate pageout_init(void (*procedure)(), proc_t *pp, pri_t pri) 21327c478bd9Sstevel@tonic-gate { 21337c478bd9Sstevel@tonic-gate (void) thread_create(NULL, 0, procedure, NULL, 0, pp, TS_RUN, pri); 21347c478bd9Sstevel@tonic-gate } 21357c478bd9Sstevel@tonic-gate 21367c478bd9Sstevel@tonic-gate /* 21377c478bd9Sstevel@tonic-gate * Function for flushing D-cache when performing module relocations 21387c478bd9Sstevel@tonic-gate * to an alternate mapping. Unnecessary on Intel / AMD platforms. 21397c478bd9Sstevel@tonic-gate */ 21407c478bd9Sstevel@tonic-gate void 21417c478bd9Sstevel@tonic-gate dcache_flushall() 21427c478bd9Sstevel@tonic-gate {} 2143