17c478bd9Sstevel@tonic-gate /* 27c478bd9Sstevel@tonic-gate * CDDL HEADER START 37c478bd9Sstevel@tonic-gate * 47c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the 57c478bd9Sstevel@tonic-gate * Common Development and Distribution License, Version 1.0 only 67c478bd9Sstevel@tonic-gate * (the "License"). You may not use this file except in compliance 77c478bd9Sstevel@tonic-gate * with the License. 87c478bd9Sstevel@tonic-gate * 97c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 107c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 117c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions 127c478bd9Sstevel@tonic-gate * and limitations under the License. 137c478bd9Sstevel@tonic-gate * 147c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 157c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 167c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 177c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 187c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 197c478bd9Sstevel@tonic-gate * 207c478bd9Sstevel@tonic-gate * CDDL HEADER END 217c478bd9Sstevel@tonic-gate */ 227c478bd9Sstevel@tonic-gate /* 23e21bae1bSkchow * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 247c478bd9Sstevel@tonic-gate * Use is subject to license terms. 257c478bd9Sstevel@tonic-gate */ 267c478bd9Sstevel@tonic-gate 277c478bd9Sstevel@tonic-gate /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 287c478bd9Sstevel@tonic-gate /* All Rights Reserved */ 297c478bd9Sstevel@tonic-gate 307c478bd9Sstevel@tonic-gate /* 317c478bd9Sstevel@tonic-gate * Portions of this source code were derived from Berkeley 4.3 BSD 327c478bd9Sstevel@tonic-gate * under license from the Regents of the University of California. 337c478bd9Sstevel@tonic-gate */ 347c478bd9Sstevel@tonic-gate 357c478bd9Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 367c478bd9Sstevel@tonic-gate 377c478bd9Sstevel@tonic-gate /* 387c478bd9Sstevel@tonic-gate * UNIX machine dependent virtual memory support. 397c478bd9Sstevel@tonic-gate */ 407c478bd9Sstevel@tonic-gate 417c478bd9Sstevel@tonic-gate #include <sys/types.h> 427c478bd9Sstevel@tonic-gate #include <sys/param.h> 437c478bd9Sstevel@tonic-gate #include <sys/systm.h> 447c478bd9Sstevel@tonic-gate #include <sys/user.h> 457c478bd9Sstevel@tonic-gate #include <sys/proc.h> 467c478bd9Sstevel@tonic-gate #include <sys/kmem.h> 477c478bd9Sstevel@tonic-gate #include <sys/vmem.h> 487c478bd9Sstevel@tonic-gate #include <sys/buf.h> 497c478bd9Sstevel@tonic-gate #include <sys/cpuvar.h> 507c478bd9Sstevel@tonic-gate #include <sys/lgrp.h> 517c478bd9Sstevel@tonic-gate #include <sys/disp.h> 527c478bd9Sstevel@tonic-gate #include <sys/vm.h> 537c478bd9Sstevel@tonic-gate #include <sys/mman.h> 547c478bd9Sstevel@tonic-gate #include <sys/vnode.h> 557c478bd9Sstevel@tonic-gate #include <sys/cred.h> 567c478bd9Sstevel@tonic-gate #include <sys/exec.h> 577c478bd9Sstevel@tonic-gate #include <sys/exechdr.h> 587c478bd9Sstevel@tonic-gate #include <sys/debug.h> 597c478bd9Sstevel@tonic-gate 607c478bd9Sstevel@tonic-gate #include <vm/hat.h> 617c478bd9Sstevel@tonic-gate #include <vm/as.h> 627c478bd9Sstevel@tonic-gate #include <vm/seg.h> 637c478bd9Sstevel@tonic-gate #include <vm/seg_kp.h> 647c478bd9Sstevel@tonic-gate #include <vm/seg_vn.h> 657c478bd9Sstevel@tonic-gate #include <vm/page.h> 667c478bd9Sstevel@tonic-gate #include <vm/seg_kmem.h> 677c478bd9Sstevel@tonic-gate #include <vm/seg_kpm.h> 687c478bd9Sstevel@tonic-gate #include <vm/vm_dep.h> 697c478bd9Sstevel@tonic-gate 707c478bd9Sstevel@tonic-gate #include <sys/cpu.h> 717c478bd9Sstevel@tonic-gate #include <sys/vm_machparam.h> 727c478bd9Sstevel@tonic-gate #include <sys/memlist.h> 737c478bd9Sstevel@tonic-gate #include <sys/bootconf.h> /* XXX the memlist stuff belongs in memlist_plat.h */ 747c478bd9Sstevel@tonic-gate #include <vm/hat_i86.h> 757c478bd9Sstevel@tonic-gate #include <sys/x86_archext.h> 767c478bd9Sstevel@tonic-gate #include <sys/elf_386.h> 777c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h> 787c478bd9Sstevel@tonic-gate #include <sys/archsystm.h> 797c478bd9Sstevel@tonic-gate #include <sys/machsystm.h> 807c478bd9Sstevel@tonic-gate 817c478bd9Sstevel@tonic-gate #include <sys/vtrace.h> 827c478bd9Sstevel@tonic-gate #include <sys/ddidmareq.h> 837c478bd9Sstevel@tonic-gate #include <sys/promif.h> 847c478bd9Sstevel@tonic-gate #include <sys/memnode.h> 857c478bd9Sstevel@tonic-gate #include <sys/stack.h> 867c478bd9Sstevel@tonic-gate 877c478bd9Sstevel@tonic-gate uint_t vac_colors = 0; 887c478bd9Sstevel@tonic-gate 897c478bd9Sstevel@tonic-gate int largepagesupport = 0; 907c478bd9Sstevel@tonic-gate extern uint_t page_create_new; 917c478bd9Sstevel@tonic-gate extern uint_t page_create_exists; 927c478bd9Sstevel@tonic-gate extern uint_t page_create_putbacks; 937c478bd9Sstevel@tonic-gate extern uint_t page_create_putbacks; 947c478bd9Sstevel@tonic-gate extern uintptr_t eprom_kernelbase; 957c478bd9Sstevel@tonic-gate extern int use_sse_pagecopy, use_sse_pagezero; /* in ml/float.s */ 967c478bd9Sstevel@tonic-gate 977c478bd9Sstevel@tonic-gate /* 4g memory management */ 987c478bd9Sstevel@tonic-gate pgcnt_t maxmem4g; 997c478bd9Sstevel@tonic-gate pgcnt_t freemem4g; 1007c478bd9Sstevel@tonic-gate int physmax4g; 1017c478bd9Sstevel@tonic-gate int desfree4gshift = 4; /* maxmem4g shift to derive DESFREE4G */ 1027c478bd9Sstevel@tonic-gate int lotsfree4gshift = 3; 1037c478bd9Sstevel@tonic-gate 104*07ad560dSkchow /* 16m memory management: desired number of free pages below 16m. */ 105*07ad560dSkchow pgcnt_t desfree16m = 0x380; 106*07ad560dSkchow 1077c478bd9Sstevel@tonic-gate #ifdef VM_STATS 1087c478bd9Sstevel@tonic-gate struct { 1097c478bd9Sstevel@tonic-gate ulong_t pga_alloc; 1107c478bd9Sstevel@tonic-gate ulong_t pga_notfullrange; 1117c478bd9Sstevel@tonic-gate ulong_t pga_nulldmaattr; 1127c478bd9Sstevel@tonic-gate ulong_t pga_allocok; 1137c478bd9Sstevel@tonic-gate ulong_t pga_allocfailed; 1147c478bd9Sstevel@tonic-gate ulong_t pgma_alloc; 1157c478bd9Sstevel@tonic-gate ulong_t pgma_allocok; 1167c478bd9Sstevel@tonic-gate ulong_t pgma_allocfailed; 1177c478bd9Sstevel@tonic-gate ulong_t pgma_allocempty; 1187c478bd9Sstevel@tonic-gate } pga_vmstats; 1197c478bd9Sstevel@tonic-gate #endif 1207c478bd9Sstevel@tonic-gate 1217c478bd9Sstevel@tonic-gate uint_t mmu_page_sizes; 1227c478bd9Sstevel@tonic-gate 1237c478bd9Sstevel@tonic-gate /* How many page sizes the users can see */ 1247c478bd9Sstevel@tonic-gate uint_t mmu_exported_page_sizes; 1257c478bd9Sstevel@tonic-gate 1267c478bd9Sstevel@tonic-gate size_t auto_lpg_va_default = MMU_PAGESIZE; /* used by zmap() */ 127beb1bda0Sdavemq /* 128beb1bda0Sdavemq * Number of pages in 1 GB. Don't enable automatic large pages if we have 129beb1bda0Sdavemq * fewer than this many pages. 130beb1bda0Sdavemq */ 131beb1bda0Sdavemq pgcnt_t auto_lpg_min_physmem = 1 << (30 - MMU_PAGESHIFT); 1327c478bd9Sstevel@tonic-gate 1337c478bd9Sstevel@tonic-gate /* 1347c478bd9Sstevel@tonic-gate * Return the optimum page size for a given mapping 1357c478bd9Sstevel@tonic-gate */ 1367c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 1377c478bd9Sstevel@tonic-gate size_t 1387c478bd9Sstevel@tonic-gate map_pgsz(int maptype, struct proc *p, caddr_t addr, size_t len, int *remap) 1397c478bd9Sstevel@tonic-gate { 1407c478bd9Sstevel@tonic-gate level_t l; 1417c478bd9Sstevel@tonic-gate 1427c478bd9Sstevel@tonic-gate if (remap) 1437c478bd9Sstevel@tonic-gate *remap = 0; 1447c478bd9Sstevel@tonic-gate 1457c478bd9Sstevel@tonic-gate switch (maptype) { 1467c478bd9Sstevel@tonic-gate 1477c478bd9Sstevel@tonic-gate case MAPPGSZ_STK: 1487c478bd9Sstevel@tonic-gate case MAPPGSZ_HEAP: 1497c478bd9Sstevel@tonic-gate case MAPPGSZ_VA: 1507c478bd9Sstevel@tonic-gate /* 1517c478bd9Sstevel@tonic-gate * use the pages size that best fits len 1527c478bd9Sstevel@tonic-gate */ 1537c478bd9Sstevel@tonic-gate for (l = mmu.max_page_level; l > 0; --l) { 1547c478bd9Sstevel@tonic-gate if (len < LEVEL_SIZE(l)) 1557c478bd9Sstevel@tonic-gate continue; 1567c478bd9Sstevel@tonic-gate break; 1577c478bd9Sstevel@tonic-gate } 1587c478bd9Sstevel@tonic-gate return (LEVEL_SIZE(l)); 1597c478bd9Sstevel@tonic-gate 1607c478bd9Sstevel@tonic-gate /* 1617c478bd9Sstevel@tonic-gate * for ISM use the 1st large page size. 1627c478bd9Sstevel@tonic-gate */ 1637c478bd9Sstevel@tonic-gate case MAPPGSZ_ISM: 1647c478bd9Sstevel@tonic-gate if (mmu.max_page_level == 0) 1657c478bd9Sstevel@tonic-gate return (MMU_PAGESIZE); 1667c478bd9Sstevel@tonic-gate return (LEVEL_SIZE(1)); 1677c478bd9Sstevel@tonic-gate } 1687c478bd9Sstevel@tonic-gate return (0); 1697c478bd9Sstevel@tonic-gate } 1707c478bd9Sstevel@tonic-gate 1717c478bd9Sstevel@tonic-gate /* 1727c478bd9Sstevel@tonic-gate * This can be patched via /etc/system to allow large pages 1737c478bd9Sstevel@tonic-gate * to be used for mapping application and libraries text segments. 1747c478bd9Sstevel@tonic-gate */ 1757c478bd9Sstevel@tonic-gate int use_text_largepages = 0; 1767c478bd9Sstevel@tonic-gate 1777c478bd9Sstevel@tonic-gate /* 1787c478bd9Sstevel@tonic-gate * Return a bit vector of large page size codes that 1797c478bd9Sstevel@tonic-gate * can be used to map [addr, addr + len) region. 1807c478bd9Sstevel@tonic-gate */ 1817c478bd9Sstevel@tonic-gate 1827c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 1837c478bd9Sstevel@tonic-gate uint_t 1847c478bd9Sstevel@tonic-gate map_execseg_pgszcvec(int text, caddr_t addr, size_t len) 1857c478bd9Sstevel@tonic-gate { 1867c478bd9Sstevel@tonic-gate size_t pgsz; 1877c478bd9Sstevel@tonic-gate caddr_t a; 1887c478bd9Sstevel@tonic-gate 1897c478bd9Sstevel@tonic-gate if (!text || !use_text_largepages || 1907c478bd9Sstevel@tonic-gate mmu.max_page_level == 0) 1917c478bd9Sstevel@tonic-gate return (0); 1927c478bd9Sstevel@tonic-gate 1937c478bd9Sstevel@tonic-gate pgsz = LEVEL_SIZE(1); 1947c478bd9Sstevel@tonic-gate a = (caddr_t)P2ROUNDUP((uintptr_t)addr, pgsz); 1957c478bd9Sstevel@tonic-gate if (a < addr || a >= addr + len) { 1967c478bd9Sstevel@tonic-gate return (0); 1977c478bd9Sstevel@tonic-gate } 1987c478bd9Sstevel@tonic-gate len -= (a - addr); 1997c478bd9Sstevel@tonic-gate if (len < pgsz) { 2007c478bd9Sstevel@tonic-gate return (0); 2017c478bd9Sstevel@tonic-gate } 2027c478bd9Sstevel@tonic-gate return (1 << 1); 2037c478bd9Sstevel@tonic-gate } 2047c478bd9Sstevel@tonic-gate 2057c478bd9Sstevel@tonic-gate /* 2067c478bd9Sstevel@tonic-gate * Handle a pagefault. 2077c478bd9Sstevel@tonic-gate */ 2087c478bd9Sstevel@tonic-gate faultcode_t 2097c478bd9Sstevel@tonic-gate pagefault( 2107c478bd9Sstevel@tonic-gate caddr_t addr, 2117c478bd9Sstevel@tonic-gate enum fault_type type, 2127c478bd9Sstevel@tonic-gate enum seg_rw rw, 2137c478bd9Sstevel@tonic-gate int iskernel) 2147c478bd9Sstevel@tonic-gate { 2157c478bd9Sstevel@tonic-gate struct as *as; 2167c478bd9Sstevel@tonic-gate struct hat *hat; 2177c478bd9Sstevel@tonic-gate struct proc *p; 2187c478bd9Sstevel@tonic-gate kthread_t *t; 2197c478bd9Sstevel@tonic-gate faultcode_t res; 2207c478bd9Sstevel@tonic-gate caddr_t base; 2217c478bd9Sstevel@tonic-gate size_t len; 2227c478bd9Sstevel@tonic-gate int err; 2237c478bd9Sstevel@tonic-gate int mapped_red; 2247c478bd9Sstevel@tonic-gate uintptr_t ea; 2257c478bd9Sstevel@tonic-gate 2267c478bd9Sstevel@tonic-gate ASSERT_STACK_ALIGNED(); 2277c478bd9Sstevel@tonic-gate 2287c478bd9Sstevel@tonic-gate if (INVALID_VADDR(addr)) 2297c478bd9Sstevel@tonic-gate return (FC_NOMAP); 2307c478bd9Sstevel@tonic-gate 2317c478bd9Sstevel@tonic-gate mapped_red = segkp_map_red(); 2327c478bd9Sstevel@tonic-gate 2337c478bd9Sstevel@tonic-gate if (iskernel) { 2347c478bd9Sstevel@tonic-gate as = &kas; 2357c478bd9Sstevel@tonic-gate hat = as->a_hat; 2367c478bd9Sstevel@tonic-gate } else { 2377c478bd9Sstevel@tonic-gate t = curthread; 2387c478bd9Sstevel@tonic-gate p = ttoproc(t); 2397c478bd9Sstevel@tonic-gate as = p->p_as; 2407c478bd9Sstevel@tonic-gate hat = as->a_hat; 2417c478bd9Sstevel@tonic-gate } 2427c478bd9Sstevel@tonic-gate 2437c478bd9Sstevel@tonic-gate /* 2447c478bd9Sstevel@tonic-gate * Dispatch pagefault. 2457c478bd9Sstevel@tonic-gate */ 2467c478bd9Sstevel@tonic-gate res = as_fault(hat, as, addr, 1, type, rw); 2477c478bd9Sstevel@tonic-gate 2487c478bd9Sstevel@tonic-gate /* 2497c478bd9Sstevel@tonic-gate * If this isn't a potential unmapped hole in the user's 2507c478bd9Sstevel@tonic-gate * UNIX data or stack segments, just return status info. 2517c478bd9Sstevel@tonic-gate */ 2527c478bd9Sstevel@tonic-gate if (res != FC_NOMAP || iskernel) 2537c478bd9Sstevel@tonic-gate goto out; 2547c478bd9Sstevel@tonic-gate 2557c478bd9Sstevel@tonic-gate /* 2567c478bd9Sstevel@tonic-gate * Check to see if we happened to faulted on a currently unmapped 2577c478bd9Sstevel@tonic-gate * part of the UNIX data or stack segments. If so, create a zfod 2587c478bd9Sstevel@tonic-gate * mapping there and then try calling the fault routine again. 2597c478bd9Sstevel@tonic-gate */ 2607c478bd9Sstevel@tonic-gate base = p->p_brkbase; 2617c478bd9Sstevel@tonic-gate len = p->p_brksize; 2627c478bd9Sstevel@tonic-gate 2637c478bd9Sstevel@tonic-gate if (addr < base || addr >= base + len) { /* data seg? */ 2647c478bd9Sstevel@tonic-gate base = (caddr_t)p->p_usrstack - p->p_stksize; 2657c478bd9Sstevel@tonic-gate len = p->p_stksize; 2667c478bd9Sstevel@tonic-gate if (addr < base || addr >= p->p_usrstack) { /* stack seg? */ 2677c478bd9Sstevel@tonic-gate /* not in either UNIX data or stack segments */ 2687c478bd9Sstevel@tonic-gate res = FC_NOMAP; 2697c478bd9Sstevel@tonic-gate goto out; 2707c478bd9Sstevel@tonic-gate } 2717c478bd9Sstevel@tonic-gate } 2727c478bd9Sstevel@tonic-gate 2737c478bd9Sstevel@tonic-gate /* 2747c478bd9Sstevel@tonic-gate * the rest of this function implements a 3.X 4.X 5.X compatibility 2757c478bd9Sstevel@tonic-gate * This code is probably not needed anymore 2767c478bd9Sstevel@tonic-gate */ 2777c478bd9Sstevel@tonic-gate if (p->p_model == DATAMODEL_ILP32) { 2787c478bd9Sstevel@tonic-gate 2797c478bd9Sstevel@tonic-gate /* expand the gap to the page boundaries on each side */ 2807c478bd9Sstevel@tonic-gate ea = P2ROUNDUP((uintptr_t)base + len, MMU_PAGESIZE); 2817c478bd9Sstevel@tonic-gate base = (caddr_t)P2ALIGN((uintptr_t)base, MMU_PAGESIZE); 2827c478bd9Sstevel@tonic-gate len = ea - (uintptr_t)base; 2837c478bd9Sstevel@tonic-gate 2847c478bd9Sstevel@tonic-gate as_rangelock(as); 2857c478bd9Sstevel@tonic-gate if (as_gap(as, MMU_PAGESIZE, &base, &len, AH_CONTAIN, addr) == 2867c478bd9Sstevel@tonic-gate 0) { 2877c478bd9Sstevel@tonic-gate err = as_map(as, base, len, segvn_create, zfod_argsp); 2887c478bd9Sstevel@tonic-gate as_rangeunlock(as); 2897c478bd9Sstevel@tonic-gate if (err) { 2907c478bd9Sstevel@tonic-gate res = FC_MAKE_ERR(err); 2917c478bd9Sstevel@tonic-gate goto out; 2927c478bd9Sstevel@tonic-gate } 2937c478bd9Sstevel@tonic-gate } else { 2947c478bd9Sstevel@tonic-gate /* 2957c478bd9Sstevel@tonic-gate * This page is already mapped by another thread after 2967c478bd9Sstevel@tonic-gate * we returned from as_fault() above. We just fall 2977c478bd9Sstevel@tonic-gate * through as_fault() below. 2987c478bd9Sstevel@tonic-gate */ 2997c478bd9Sstevel@tonic-gate as_rangeunlock(as); 3007c478bd9Sstevel@tonic-gate } 3017c478bd9Sstevel@tonic-gate 3027c478bd9Sstevel@tonic-gate res = as_fault(hat, as, addr, 1, F_INVAL, rw); 3037c478bd9Sstevel@tonic-gate } 3047c478bd9Sstevel@tonic-gate 3057c478bd9Sstevel@tonic-gate out: 3067c478bd9Sstevel@tonic-gate if (mapped_red) 3077c478bd9Sstevel@tonic-gate segkp_unmap_red(); 3087c478bd9Sstevel@tonic-gate 3097c478bd9Sstevel@tonic-gate return (res); 3107c478bd9Sstevel@tonic-gate } 3117c478bd9Sstevel@tonic-gate 3127c478bd9Sstevel@tonic-gate void 3137c478bd9Sstevel@tonic-gate map_addr(caddr_t *addrp, size_t len, offset_t off, int vacalign, uint_t flags) 3147c478bd9Sstevel@tonic-gate { 3157c478bd9Sstevel@tonic-gate struct proc *p = curproc; 3167c478bd9Sstevel@tonic-gate caddr_t userlimit = (flags & _MAP_LOW32) ? 3177c478bd9Sstevel@tonic-gate (caddr_t)_userlimit32 : p->p_as->a_userlimit; 3187c478bd9Sstevel@tonic-gate 3197c478bd9Sstevel@tonic-gate map_addr_proc(addrp, len, off, vacalign, userlimit, curproc, flags); 3207c478bd9Sstevel@tonic-gate } 3217c478bd9Sstevel@tonic-gate 3227c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 3237c478bd9Sstevel@tonic-gate int 3247c478bd9Sstevel@tonic-gate map_addr_vacalign_check(caddr_t addr, u_offset_t off) 3257c478bd9Sstevel@tonic-gate { 3267c478bd9Sstevel@tonic-gate return (0); 3277c478bd9Sstevel@tonic-gate } 3287c478bd9Sstevel@tonic-gate 3297c478bd9Sstevel@tonic-gate /* 3307c478bd9Sstevel@tonic-gate * map_addr_proc() is the routine called when the system is to 3317c478bd9Sstevel@tonic-gate * choose an address for the user. We will pick an address 3327c478bd9Sstevel@tonic-gate * range which is the highest available below kernelbase. 3337c478bd9Sstevel@tonic-gate * 3347c478bd9Sstevel@tonic-gate * addrp is a value/result parameter. 3357c478bd9Sstevel@tonic-gate * On input it is a hint from the user to be used in a completely 3367c478bd9Sstevel@tonic-gate * machine dependent fashion. We decide to completely ignore this hint. 3377c478bd9Sstevel@tonic-gate * 3387c478bd9Sstevel@tonic-gate * On output it is NULL if no address can be found in the current 3397c478bd9Sstevel@tonic-gate * processes address space or else an address that is currently 3407c478bd9Sstevel@tonic-gate * not mapped for len bytes with a page of red zone on either side. 3417c478bd9Sstevel@tonic-gate * 3427c478bd9Sstevel@tonic-gate * align is not needed on x86 (it's for viturally addressed caches) 3437c478bd9Sstevel@tonic-gate */ 3447c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 3457c478bd9Sstevel@tonic-gate void 3467c478bd9Sstevel@tonic-gate map_addr_proc( 3477c478bd9Sstevel@tonic-gate caddr_t *addrp, 3487c478bd9Sstevel@tonic-gate size_t len, 3497c478bd9Sstevel@tonic-gate offset_t off, 3507c478bd9Sstevel@tonic-gate int vacalign, 3517c478bd9Sstevel@tonic-gate caddr_t userlimit, 3527c478bd9Sstevel@tonic-gate struct proc *p, 3537c478bd9Sstevel@tonic-gate uint_t flags) 3547c478bd9Sstevel@tonic-gate { 3557c478bd9Sstevel@tonic-gate struct as *as = p->p_as; 3567c478bd9Sstevel@tonic-gate caddr_t addr; 3577c478bd9Sstevel@tonic-gate caddr_t base; 3587c478bd9Sstevel@tonic-gate size_t slen; 3597c478bd9Sstevel@tonic-gate size_t align_amount; 3607c478bd9Sstevel@tonic-gate 3617c478bd9Sstevel@tonic-gate ASSERT32(userlimit == as->a_userlimit); 3627c478bd9Sstevel@tonic-gate 3637c478bd9Sstevel@tonic-gate base = p->p_brkbase; 3647c478bd9Sstevel@tonic-gate #if defined(__amd64) 3657c478bd9Sstevel@tonic-gate /* 3667c478bd9Sstevel@tonic-gate * XX64 Yes, this needs more work. 3677c478bd9Sstevel@tonic-gate */ 3687c478bd9Sstevel@tonic-gate if (p->p_model == DATAMODEL_NATIVE) { 3697c478bd9Sstevel@tonic-gate if (userlimit < as->a_userlimit) { 3707c478bd9Sstevel@tonic-gate /* 3717c478bd9Sstevel@tonic-gate * This happens when a program wants to map 3727c478bd9Sstevel@tonic-gate * something in a range that's accessible to a 3737c478bd9Sstevel@tonic-gate * program in a smaller address space. For example, 3747c478bd9Sstevel@tonic-gate * a 64-bit program calling mmap32(2) to guarantee 3757c478bd9Sstevel@tonic-gate * that the returned address is below 4Gbytes. 3767c478bd9Sstevel@tonic-gate */ 3777c478bd9Sstevel@tonic-gate ASSERT((uintptr_t)userlimit < ADDRESS_C(0xffffffff)); 3787c478bd9Sstevel@tonic-gate 3797c478bd9Sstevel@tonic-gate if (userlimit > base) 3807c478bd9Sstevel@tonic-gate slen = userlimit - base; 3817c478bd9Sstevel@tonic-gate else { 3827c478bd9Sstevel@tonic-gate *addrp = NULL; 3837c478bd9Sstevel@tonic-gate return; 3847c478bd9Sstevel@tonic-gate } 3857c478bd9Sstevel@tonic-gate } else { 3867c478bd9Sstevel@tonic-gate /* 3877c478bd9Sstevel@tonic-gate * XX64 This layout is probably wrong .. but in 3887c478bd9Sstevel@tonic-gate * the event we make the amd64 address space look 3897c478bd9Sstevel@tonic-gate * like sparcv9 i.e. with the stack -above- the 3907c478bd9Sstevel@tonic-gate * heap, this bit of code might even be correct. 3917c478bd9Sstevel@tonic-gate */ 3927c478bd9Sstevel@tonic-gate slen = p->p_usrstack - base - 3937c478bd9Sstevel@tonic-gate (((size_t)rctl_enforced_value( 3947c478bd9Sstevel@tonic-gate rctlproc_legacy[RLIMIT_STACK], 3957c478bd9Sstevel@tonic-gate p->p_rctls, p) + PAGEOFFSET) & PAGEMASK); 3967c478bd9Sstevel@tonic-gate } 3977c478bd9Sstevel@tonic-gate } else 3987c478bd9Sstevel@tonic-gate #endif 3997c478bd9Sstevel@tonic-gate slen = userlimit - base; 4007c478bd9Sstevel@tonic-gate 4017c478bd9Sstevel@tonic-gate len = (len + PAGEOFFSET) & PAGEMASK; 4027c478bd9Sstevel@tonic-gate 4037c478bd9Sstevel@tonic-gate /* 4047c478bd9Sstevel@tonic-gate * Redzone for each side of the request. This is done to leave 4057c478bd9Sstevel@tonic-gate * one page unmapped between segments. This is not required, but 4067c478bd9Sstevel@tonic-gate * it's useful for the user because if their program strays across 4077c478bd9Sstevel@tonic-gate * a segment boundary, it will catch a fault immediately making 4087c478bd9Sstevel@tonic-gate * debugging a little easier. 4097c478bd9Sstevel@tonic-gate */ 4107c478bd9Sstevel@tonic-gate len += 2 * MMU_PAGESIZE; 4117c478bd9Sstevel@tonic-gate 4127c478bd9Sstevel@tonic-gate /* 4137c478bd9Sstevel@tonic-gate * figure out what the alignment should be 4147c478bd9Sstevel@tonic-gate * 4157c478bd9Sstevel@tonic-gate * XX64 -- is there an ELF_AMD64_MAXPGSZ or is it the same???? 4167c478bd9Sstevel@tonic-gate */ 4177c478bd9Sstevel@tonic-gate if (len <= ELF_386_MAXPGSZ) { 4187c478bd9Sstevel@tonic-gate /* 4197c478bd9Sstevel@tonic-gate * Align virtual addresses to ensure that ELF shared libraries 4207c478bd9Sstevel@tonic-gate * are mapped with the appropriate alignment constraints by 4217c478bd9Sstevel@tonic-gate * the run-time linker. 4227c478bd9Sstevel@tonic-gate */ 4237c478bd9Sstevel@tonic-gate align_amount = ELF_386_MAXPGSZ; 4247c478bd9Sstevel@tonic-gate } else { 4257c478bd9Sstevel@tonic-gate int l = mmu.max_page_level; 4267c478bd9Sstevel@tonic-gate 4277c478bd9Sstevel@tonic-gate while (l && len < LEVEL_SIZE(l)) 4287c478bd9Sstevel@tonic-gate --l; 4297c478bd9Sstevel@tonic-gate 4307c478bd9Sstevel@tonic-gate align_amount = LEVEL_SIZE(l); 4317c478bd9Sstevel@tonic-gate } 4327c478bd9Sstevel@tonic-gate 4337c478bd9Sstevel@tonic-gate if ((flags & MAP_ALIGN) && ((uintptr_t)*addrp > align_amount)) 4347c478bd9Sstevel@tonic-gate align_amount = (uintptr_t)*addrp; 4357c478bd9Sstevel@tonic-gate 4367c478bd9Sstevel@tonic-gate len += align_amount; 4377c478bd9Sstevel@tonic-gate 4387c478bd9Sstevel@tonic-gate /* 4397c478bd9Sstevel@tonic-gate * Look for a large enough hole starting below userlimit. 4407c478bd9Sstevel@tonic-gate * After finding it, use the upper part. Addition of PAGESIZE 4417c478bd9Sstevel@tonic-gate * is for the redzone as described above. 4427c478bd9Sstevel@tonic-gate */ 4437c478bd9Sstevel@tonic-gate if (as_gap(as, len, &base, &slen, AH_HI, NULL) == 0) { 4447c478bd9Sstevel@tonic-gate caddr_t as_addr; 4457c478bd9Sstevel@tonic-gate 4467c478bd9Sstevel@tonic-gate addr = base + slen - len + MMU_PAGESIZE; 4477c478bd9Sstevel@tonic-gate as_addr = addr; 4487c478bd9Sstevel@tonic-gate /* 4497c478bd9Sstevel@tonic-gate * Round address DOWN to the alignment amount, 4507c478bd9Sstevel@tonic-gate * add the offset, and if this address is less 4517c478bd9Sstevel@tonic-gate * than the original address, add alignment amount. 4527c478bd9Sstevel@tonic-gate */ 4537c478bd9Sstevel@tonic-gate addr = (caddr_t)((uintptr_t)addr & (~(align_amount - 1))); 4547c478bd9Sstevel@tonic-gate addr += (uintptr_t)(off & (align_amount - 1)); 4557c478bd9Sstevel@tonic-gate if (addr < as_addr) 4567c478bd9Sstevel@tonic-gate addr += align_amount; 4577c478bd9Sstevel@tonic-gate 4587c478bd9Sstevel@tonic-gate ASSERT(addr <= (as_addr + align_amount)); 4597c478bd9Sstevel@tonic-gate ASSERT(((uintptr_t)addr & (align_amount - 1)) == 4607c478bd9Sstevel@tonic-gate ((uintptr_t)(off & (align_amount - 1)))); 4617c478bd9Sstevel@tonic-gate *addrp = addr; 4627c478bd9Sstevel@tonic-gate } else { 4637c478bd9Sstevel@tonic-gate *addrp = NULL; /* no more virtual space */ 4647c478bd9Sstevel@tonic-gate } 4657c478bd9Sstevel@tonic-gate } 4667c478bd9Sstevel@tonic-gate 4677c478bd9Sstevel@tonic-gate /* 4687c478bd9Sstevel@tonic-gate * Determine whether [base, base+len] contains a valid range of 4697c478bd9Sstevel@tonic-gate * addresses at least minlen long. base and len are adjusted if 4707c478bd9Sstevel@tonic-gate * required to provide a valid range. 4717c478bd9Sstevel@tonic-gate */ 4727c478bd9Sstevel@tonic-gate /*ARGSUSED3*/ 4737c478bd9Sstevel@tonic-gate int 4747c478bd9Sstevel@tonic-gate valid_va_range(caddr_t *basep, size_t *lenp, size_t minlen, int dir) 4757c478bd9Sstevel@tonic-gate { 4767c478bd9Sstevel@tonic-gate uintptr_t hi, lo; 4777c478bd9Sstevel@tonic-gate 4787c478bd9Sstevel@tonic-gate lo = (uintptr_t)*basep; 4797c478bd9Sstevel@tonic-gate hi = lo + *lenp; 4807c478bd9Sstevel@tonic-gate 4817c478bd9Sstevel@tonic-gate /* 4827c478bd9Sstevel@tonic-gate * If hi rolled over the top, try cutting back. 4837c478bd9Sstevel@tonic-gate */ 4847c478bd9Sstevel@tonic-gate if (hi < lo) { 4857c478bd9Sstevel@tonic-gate if (0 - lo + hi < minlen) 4867c478bd9Sstevel@tonic-gate return (0); 4877c478bd9Sstevel@tonic-gate if (0 - lo < minlen) 4887c478bd9Sstevel@tonic-gate return (0); 4897c478bd9Sstevel@tonic-gate *lenp = 0 - lo; 4907c478bd9Sstevel@tonic-gate } else if (hi - lo < minlen) { 4917c478bd9Sstevel@tonic-gate return (0); 4927c478bd9Sstevel@tonic-gate } 4937c478bd9Sstevel@tonic-gate #if defined(__amd64) 4947c478bd9Sstevel@tonic-gate /* 4957c478bd9Sstevel@tonic-gate * Deal with a possible hole in the address range between 4967c478bd9Sstevel@tonic-gate * hole_start and hole_end that should never be mapped. 4977c478bd9Sstevel@tonic-gate */ 4987c478bd9Sstevel@tonic-gate if (lo < hole_start) { 4997c478bd9Sstevel@tonic-gate if (hi > hole_start) { 5007c478bd9Sstevel@tonic-gate if (hi < hole_end) { 5017c478bd9Sstevel@tonic-gate hi = hole_start; 5027c478bd9Sstevel@tonic-gate } else { 5037c478bd9Sstevel@tonic-gate /* lo < hole_start && hi >= hole_end */ 5047c478bd9Sstevel@tonic-gate if (dir == AH_LO) { 5057c478bd9Sstevel@tonic-gate /* 5067c478bd9Sstevel@tonic-gate * prefer lowest range 5077c478bd9Sstevel@tonic-gate */ 5087c478bd9Sstevel@tonic-gate if (hole_start - lo >= minlen) 5097c478bd9Sstevel@tonic-gate hi = hole_start; 5107c478bd9Sstevel@tonic-gate else if (hi - hole_end >= minlen) 5117c478bd9Sstevel@tonic-gate lo = hole_end; 5127c478bd9Sstevel@tonic-gate else 5137c478bd9Sstevel@tonic-gate return (0); 5147c478bd9Sstevel@tonic-gate } else { 5157c478bd9Sstevel@tonic-gate /* 5167c478bd9Sstevel@tonic-gate * prefer highest range 5177c478bd9Sstevel@tonic-gate */ 5187c478bd9Sstevel@tonic-gate if (hi - hole_end >= minlen) 5197c478bd9Sstevel@tonic-gate lo = hole_end; 5207c478bd9Sstevel@tonic-gate else if (hole_start - lo >= minlen) 5217c478bd9Sstevel@tonic-gate hi = hole_start; 5227c478bd9Sstevel@tonic-gate else 5237c478bd9Sstevel@tonic-gate return (0); 5247c478bd9Sstevel@tonic-gate } 5257c478bd9Sstevel@tonic-gate } 5267c478bd9Sstevel@tonic-gate } 5277c478bd9Sstevel@tonic-gate } else { 5287c478bd9Sstevel@tonic-gate /* lo >= hole_start */ 5297c478bd9Sstevel@tonic-gate if (hi < hole_end) 5307c478bd9Sstevel@tonic-gate return (0); 5317c478bd9Sstevel@tonic-gate if (lo < hole_end) 5327c478bd9Sstevel@tonic-gate lo = hole_end; 5337c478bd9Sstevel@tonic-gate } 5347c478bd9Sstevel@tonic-gate 5357c478bd9Sstevel@tonic-gate if (hi - lo < minlen) 5367c478bd9Sstevel@tonic-gate return (0); 5377c478bd9Sstevel@tonic-gate 5387c478bd9Sstevel@tonic-gate *basep = (caddr_t)lo; 5397c478bd9Sstevel@tonic-gate *lenp = hi - lo; 5407c478bd9Sstevel@tonic-gate #endif 5417c478bd9Sstevel@tonic-gate return (1); 5427c478bd9Sstevel@tonic-gate } 5437c478bd9Sstevel@tonic-gate 5447c478bd9Sstevel@tonic-gate /* 5457c478bd9Sstevel@tonic-gate * Determine whether [addr, addr+len] are valid user addresses. 5467c478bd9Sstevel@tonic-gate */ 5477c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 5487c478bd9Sstevel@tonic-gate int 5497c478bd9Sstevel@tonic-gate valid_usr_range(caddr_t addr, size_t len, uint_t prot, struct as *as, 5507c478bd9Sstevel@tonic-gate caddr_t userlimit) 5517c478bd9Sstevel@tonic-gate { 5527c478bd9Sstevel@tonic-gate caddr_t eaddr = addr + len; 5537c478bd9Sstevel@tonic-gate 5547c478bd9Sstevel@tonic-gate if (eaddr <= addr || addr >= userlimit || eaddr > userlimit) 5557c478bd9Sstevel@tonic-gate return (RANGE_BADADDR); 5567c478bd9Sstevel@tonic-gate 5577c478bd9Sstevel@tonic-gate #if defined(__amd64) 5587c478bd9Sstevel@tonic-gate /* 5597c478bd9Sstevel@tonic-gate * Check for the VA hole 5607c478bd9Sstevel@tonic-gate */ 5617c478bd9Sstevel@tonic-gate if (eaddr > (caddr_t)hole_start && addr < (caddr_t)hole_end) 5627c478bd9Sstevel@tonic-gate return (RANGE_BADADDR); 5637c478bd9Sstevel@tonic-gate #endif 5647c478bd9Sstevel@tonic-gate 5657c478bd9Sstevel@tonic-gate return (RANGE_OKAY); 5667c478bd9Sstevel@tonic-gate } 5677c478bd9Sstevel@tonic-gate 5687c478bd9Sstevel@tonic-gate /* 5697c478bd9Sstevel@tonic-gate * Return 1 if the page frame is onboard memory, else 0. 5707c478bd9Sstevel@tonic-gate */ 5717c478bd9Sstevel@tonic-gate int 5727c478bd9Sstevel@tonic-gate pf_is_memory(pfn_t pf) 5737c478bd9Sstevel@tonic-gate { 5747c478bd9Sstevel@tonic-gate return (address_in_memlist(phys_install, mmu_ptob((uint64_t)pf), 1)); 5757c478bd9Sstevel@tonic-gate } 5767c478bd9Sstevel@tonic-gate 5777c478bd9Sstevel@tonic-gate 5787c478bd9Sstevel@tonic-gate /* 5797c478bd9Sstevel@tonic-gate * initialized by page_coloring_init(). 5807c478bd9Sstevel@tonic-gate */ 5817c478bd9Sstevel@tonic-gate uint_t page_colors; 5827c478bd9Sstevel@tonic-gate uint_t page_colors_mask; 5837c478bd9Sstevel@tonic-gate uint_t page_coloring_shift; 5847c478bd9Sstevel@tonic-gate int cpu_page_colors; 5857c478bd9Sstevel@tonic-gate static uint_t l2_colors; 5867c478bd9Sstevel@tonic-gate 5877c478bd9Sstevel@tonic-gate /* 5887c478bd9Sstevel@tonic-gate * Page freelists and cachelists are dynamically allocated once mnoderangecnt 5897c478bd9Sstevel@tonic-gate * and page_colors are calculated from the l2 cache n-way set size. Within a 5907c478bd9Sstevel@tonic-gate * mnode range, the page freelist and cachelist are hashed into bins based on 5917c478bd9Sstevel@tonic-gate * color. This makes it easier to search for a page within a specific memory 5927c478bd9Sstevel@tonic-gate * range. 5937c478bd9Sstevel@tonic-gate */ 5947c478bd9Sstevel@tonic-gate #define PAGE_COLORS_MIN 16 5957c478bd9Sstevel@tonic-gate 5967c478bd9Sstevel@tonic-gate page_t ****page_freelists; 5977c478bd9Sstevel@tonic-gate page_t ***page_cachelists; 5987c478bd9Sstevel@tonic-gate 5997c478bd9Sstevel@tonic-gate /* 6007c478bd9Sstevel@tonic-gate * As the PC architecture evolved memory up was clumped into several 6017c478bd9Sstevel@tonic-gate * ranges for various historical I/O devices to do DMA. 6027c478bd9Sstevel@tonic-gate * < 16Meg - ISA bus 6037c478bd9Sstevel@tonic-gate * < 2Gig - ??? 6047c478bd9Sstevel@tonic-gate * < 4Gig - PCI bus or drivers that don't understand PAE mode 6057c478bd9Sstevel@tonic-gate */ 6067c478bd9Sstevel@tonic-gate static pfn_t arch_memranges[NUM_MEM_RANGES] = { 6077c478bd9Sstevel@tonic-gate 0x100000, /* pfn range for 4G and above */ 6087c478bd9Sstevel@tonic-gate 0x80000, /* pfn range for 2G-4G */ 6097c478bd9Sstevel@tonic-gate 0x01000, /* pfn range for 16M-2G */ 6107c478bd9Sstevel@tonic-gate 0x00000, /* pfn range for 0-16M */ 6117c478bd9Sstevel@tonic-gate }; 6127c478bd9Sstevel@tonic-gate 6137c478bd9Sstevel@tonic-gate /* 6147c478bd9Sstevel@tonic-gate * These are changed during startup if the machine has limited memory. 6157c478bd9Sstevel@tonic-gate */ 6167c478bd9Sstevel@tonic-gate pfn_t *memranges = &arch_memranges[0]; 6177c478bd9Sstevel@tonic-gate int nranges = NUM_MEM_RANGES; 6187c478bd9Sstevel@tonic-gate 6197c478bd9Sstevel@tonic-gate /* 6207c478bd9Sstevel@tonic-gate * Used by page layer to know about page sizes 6217c478bd9Sstevel@tonic-gate */ 6227c478bd9Sstevel@tonic-gate hw_pagesize_t hw_page_array[MAX_NUM_LEVEL + 1]; 6237c478bd9Sstevel@tonic-gate 6247c478bd9Sstevel@tonic-gate /* 6257c478bd9Sstevel@tonic-gate * This can be patched via /etc/system to allow old non-PAE aware device 6267c478bd9Sstevel@tonic-gate * drivers to use kmem_alloc'd memory on 32 bit systems with > 4Gig RAM. 6277c478bd9Sstevel@tonic-gate */ 6287c478bd9Sstevel@tonic-gate #if defined(__i386) 6297c478bd9Sstevel@tonic-gate int restricted_kmemalloc = 1; /* XX64 re-examine with PSARC 2004/405 */ 6307c478bd9Sstevel@tonic-gate #elif defined(__amd64) 6317c478bd9Sstevel@tonic-gate int restricted_kmemalloc = 0; 6327c478bd9Sstevel@tonic-gate #endif 6337c478bd9Sstevel@tonic-gate 6347c478bd9Sstevel@tonic-gate kmutex_t *fpc_mutex[NPC_MUTEX]; 6357c478bd9Sstevel@tonic-gate kmutex_t *cpc_mutex[NPC_MUTEX]; 6367c478bd9Sstevel@tonic-gate 6377c478bd9Sstevel@tonic-gate 6387c478bd9Sstevel@tonic-gate /* 6397c478bd9Sstevel@tonic-gate * return the memrange containing pfn 6407c478bd9Sstevel@tonic-gate */ 6417c478bd9Sstevel@tonic-gate int 6427c478bd9Sstevel@tonic-gate memrange_num(pfn_t pfn) 6437c478bd9Sstevel@tonic-gate { 6447c478bd9Sstevel@tonic-gate int n; 6457c478bd9Sstevel@tonic-gate 6467c478bd9Sstevel@tonic-gate for (n = 0; n < nranges - 1; ++n) { 6477c478bd9Sstevel@tonic-gate if (pfn >= memranges[n]) 6487c478bd9Sstevel@tonic-gate break; 6497c478bd9Sstevel@tonic-gate } 6507c478bd9Sstevel@tonic-gate return (n); 6517c478bd9Sstevel@tonic-gate } 6527c478bd9Sstevel@tonic-gate 6537c478bd9Sstevel@tonic-gate /* 6547c478bd9Sstevel@tonic-gate * return the mnoderange containing pfn 6557c478bd9Sstevel@tonic-gate */ 6567c478bd9Sstevel@tonic-gate int 6577c478bd9Sstevel@tonic-gate pfn_2_mtype(pfn_t pfn) 6587c478bd9Sstevel@tonic-gate { 6597c478bd9Sstevel@tonic-gate int n; 6607c478bd9Sstevel@tonic-gate 6617c478bd9Sstevel@tonic-gate for (n = mnoderangecnt - 1; n >= 0; n--) { 6627c478bd9Sstevel@tonic-gate if (pfn >= mnoderanges[n].mnr_pfnlo) { 6637c478bd9Sstevel@tonic-gate break; 6647c478bd9Sstevel@tonic-gate } 6657c478bd9Sstevel@tonic-gate } 6667c478bd9Sstevel@tonic-gate return (n); 6677c478bd9Sstevel@tonic-gate } 6687c478bd9Sstevel@tonic-gate 6697c478bd9Sstevel@tonic-gate /* 6707c478bd9Sstevel@tonic-gate * is_contigpage_free: 6717c478bd9Sstevel@tonic-gate * returns a page list of contiguous pages. It minimally has to return 6727c478bd9Sstevel@tonic-gate * minctg pages. Caller determines minctg based on the scatter-gather 6737c478bd9Sstevel@tonic-gate * list length. 6747c478bd9Sstevel@tonic-gate * 6757c478bd9Sstevel@tonic-gate * pfnp is set to the next page frame to search on return. 6767c478bd9Sstevel@tonic-gate */ 6777c478bd9Sstevel@tonic-gate static page_t * 6787c478bd9Sstevel@tonic-gate is_contigpage_free( 6797c478bd9Sstevel@tonic-gate pfn_t *pfnp, 6807c478bd9Sstevel@tonic-gate pgcnt_t *pgcnt, 6817c478bd9Sstevel@tonic-gate pgcnt_t minctg, 6827c478bd9Sstevel@tonic-gate uint64_t pfnseg, 6837c478bd9Sstevel@tonic-gate int iolock) 6847c478bd9Sstevel@tonic-gate { 6857c478bd9Sstevel@tonic-gate int i = 0; 6867c478bd9Sstevel@tonic-gate pfn_t pfn = *pfnp; 6877c478bd9Sstevel@tonic-gate page_t *pp; 6887c478bd9Sstevel@tonic-gate page_t *plist = NULL; 6897c478bd9Sstevel@tonic-gate 6907c478bd9Sstevel@tonic-gate /* 6917c478bd9Sstevel@tonic-gate * fail if pfn + minctg crosses a segment boundary. 6927c478bd9Sstevel@tonic-gate * Adjust for next starting pfn to begin at segment boundary. 6937c478bd9Sstevel@tonic-gate */ 6947c478bd9Sstevel@tonic-gate 6957c478bd9Sstevel@tonic-gate if (((*pfnp + minctg - 1) & pfnseg) < (*pfnp & pfnseg)) { 6967c478bd9Sstevel@tonic-gate *pfnp = roundup(*pfnp, pfnseg + 1); 6977c478bd9Sstevel@tonic-gate return (NULL); 6987c478bd9Sstevel@tonic-gate } 6997c478bd9Sstevel@tonic-gate 7007c478bd9Sstevel@tonic-gate do { 7017c478bd9Sstevel@tonic-gate retry: 7027c478bd9Sstevel@tonic-gate pp = page_numtopp_nolock(pfn + i); 7037c478bd9Sstevel@tonic-gate if ((pp == NULL) || 7047c478bd9Sstevel@tonic-gate (page_trylock(pp, SE_EXCL) == 0)) { 7057c478bd9Sstevel@tonic-gate (*pfnp)++; 7067c478bd9Sstevel@tonic-gate break; 7077c478bd9Sstevel@tonic-gate } 7087c478bd9Sstevel@tonic-gate if (page_pptonum(pp) != pfn + i) { 7097c478bd9Sstevel@tonic-gate page_unlock(pp); 7107c478bd9Sstevel@tonic-gate goto retry; 7117c478bd9Sstevel@tonic-gate } 7127c478bd9Sstevel@tonic-gate 7137c478bd9Sstevel@tonic-gate if (!(PP_ISFREE(pp))) { 7147c478bd9Sstevel@tonic-gate page_unlock(pp); 7157c478bd9Sstevel@tonic-gate (*pfnp)++; 7167c478bd9Sstevel@tonic-gate break; 7177c478bd9Sstevel@tonic-gate } 7187c478bd9Sstevel@tonic-gate 7197c478bd9Sstevel@tonic-gate if (!PP_ISAGED(pp)) { 7207c478bd9Sstevel@tonic-gate page_list_sub(pp, PG_CACHE_LIST); 7217c478bd9Sstevel@tonic-gate page_hashout(pp, (kmutex_t *)NULL); 7227c478bd9Sstevel@tonic-gate } else { 7237c478bd9Sstevel@tonic-gate page_list_sub(pp, PG_FREE_LIST); 7247c478bd9Sstevel@tonic-gate } 7257c478bd9Sstevel@tonic-gate 7267c478bd9Sstevel@tonic-gate if (iolock) 7277c478bd9Sstevel@tonic-gate page_io_lock(pp); 7287c478bd9Sstevel@tonic-gate page_list_concat(&plist, &pp); 7297c478bd9Sstevel@tonic-gate 7307c478bd9Sstevel@tonic-gate /* 7317c478bd9Sstevel@tonic-gate * exit loop when pgcnt satisfied or segment boundary reached. 7327c478bd9Sstevel@tonic-gate */ 7337c478bd9Sstevel@tonic-gate 7347c478bd9Sstevel@tonic-gate } while ((++i < *pgcnt) && ((pfn + i) & pfnseg)); 7357c478bd9Sstevel@tonic-gate 7367c478bd9Sstevel@tonic-gate *pfnp += i; /* set to next pfn to search */ 7377c478bd9Sstevel@tonic-gate 7387c478bd9Sstevel@tonic-gate if (i >= minctg) { 7397c478bd9Sstevel@tonic-gate *pgcnt -= i; 7407c478bd9Sstevel@tonic-gate return (plist); 7417c478bd9Sstevel@tonic-gate } 7427c478bd9Sstevel@tonic-gate 7437c478bd9Sstevel@tonic-gate /* 7447c478bd9Sstevel@tonic-gate * failure: minctg not satisfied. 7457c478bd9Sstevel@tonic-gate * 7467c478bd9Sstevel@tonic-gate * if next request crosses segment boundary, set next pfn 7477c478bd9Sstevel@tonic-gate * to search from the segment boundary. 7487c478bd9Sstevel@tonic-gate */ 7497c478bd9Sstevel@tonic-gate if (((*pfnp + minctg - 1) & pfnseg) < (*pfnp & pfnseg)) 7507c478bd9Sstevel@tonic-gate *pfnp = roundup(*pfnp, pfnseg + 1); 7517c478bd9Sstevel@tonic-gate 7527c478bd9Sstevel@tonic-gate /* clean up any pages already allocated */ 7537c478bd9Sstevel@tonic-gate 7547c478bd9Sstevel@tonic-gate while (plist) { 7557c478bd9Sstevel@tonic-gate pp = plist; 7567c478bd9Sstevel@tonic-gate page_sub(&plist, pp); 7577c478bd9Sstevel@tonic-gate page_list_add(pp, PG_FREE_LIST | PG_LIST_TAIL); 7587c478bd9Sstevel@tonic-gate if (iolock) 7597c478bd9Sstevel@tonic-gate page_io_unlock(pp); 7607c478bd9Sstevel@tonic-gate page_unlock(pp); 7617c478bd9Sstevel@tonic-gate } 7627c478bd9Sstevel@tonic-gate 7637c478bd9Sstevel@tonic-gate return (NULL); 7647c478bd9Sstevel@tonic-gate } 7657c478bd9Sstevel@tonic-gate 7667c478bd9Sstevel@tonic-gate /* 7677c478bd9Sstevel@tonic-gate * verify that pages being returned from allocator have correct DMA attribute 7687c478bd9Sstevel@tonic-gate */ 7697c478bd9Sstevel@tonic-gate #ifndef DEBUG 7707c478bd9Sstevel@tonic-gate #define check_dma(a, b, c) (0) 7717c478bd9Sstevel@tonic-gate #else 7727c478bd9Sstevel@tonic-gate static void 7737c478bd9Sstevel@tonic-gate check_dma(ddi_dma_attr_t *dma_attr, page_t *pp, int cnt) 7747c478bd9Sstevel@tonic-gate { 7757c478bd9Sstevel@tonic-gate if (dma_attr == NULL) 7767c478bd9Sstevel@tonic-gate return; 7777c478bd9Sstevel@tonic-gate 7787c478bd9Sstevel@tonic-gate while (cnt-- > 0) { 7797c478bd9Sstevel@tonic-gate if (mmu_ptob((uint64_t)pp->p_pagenum) < 7807c478bd9Sstevel@tonic-gate dma_attr->dma_attr_addr_lo) 7817c478bd9Sstevel@tonic-gate panic("PFN (pp=%p) below dma_attr_addr_lo", pp); 7827c478bd9Sstevel@tonic-gate if (mmu_ptob((uint64_t)pp->p_pagenum) >= 7837c478bd9Sstevel@tonic-gate dma_attr->dma_attr_addr_hi) 7847c478bd9Sstevel@tonic-gate panic("PFN (pp=%p) above dma_attr_addr_hi", pp); 7857c478bd9Sstevel@tonic-gate pp = pp->p_next; 7867c478bd9Sstevel@tonic-gate } 7877c478bd9Sstevel@tonic-gate } 7887c478bd9Sstevel@tonic-gate #endif 7897c478bd9Sstevel@tonic-gate 7907c478bd9Sstevel@tonic-gate static kmutex_t contig_lock; 7917c478bd9Sstevel@tonic-gate 7927c478bd9Sstevel@tonic-gate #define CONTIG_LOCK() mutex_enter(&contig_lock); 7937c478bd9Sstevel@tonic-gate #define CONTIG_UNLOCK() mutex_exit(&contig_lock); 7947c478bd9Sstevel@tonic-gate 7957c478bd9Sstevel@tonic-gate #define PFN_16M (mmu_btop((uint64_t)0x1000000)) 7967c478bd9Sstevel@tonic-gate 7977c478bd9Sstevel@tonic-gate static page_t * 7987c478bd9Sstevel@tonic-gate page_get_contigpage(pgcnt_t *pgcnt, ddi_dma_attr_t *mattr, int iolock) 7997c478bd9Sstevel@tonic-gate { 8007c478bd9Sstevel@tonic-gate pfn_t pfn; 8017c478bd9Sstevel@tonic-gate int sgllen; 8027c478bd9Sstevel@tonic-gate uint64_t pfnseg; 8037c478bd9Sstevel@tonic-gate pgcnt_t minctg; 8047c478bd9Sstevel@tonic-gate page_t *pplist = NULL, *plist; 8057c478bd9Sstevel@tonic-gate uint64_t lo, hi; 8067c478bd9Sstevel@tonic-gate pgcnt_t pfnalign = 0; 8077c478bd9Sstevel@tonic-gate static pfn_t startpfn; 8087c478bd9Sstevel@tonic-gate static pgcnt_t lastctgcnt; 8097c478bd9Sstevel@tonic-gate uintptr_t align; 8107c478bd9Sstevel@tonic-gate 8117c478bd9Sstevel@tonic-gate CONTIG_LOCK(); 8127c478bd9Sstevel@tonic-gate 8137c478bd9Sstevel@tonic-gate if (mattr) { 8147c478bd9Sstevel@tonic-gate lo = mmu_btop((mattr->dma_attr_addr_lo + MMU_PAGEOFFSET)); 8157c478bd9Sstevel@tonic-gate hi = mmu_btop(mattr->dma_attr_addr_hi); 8167c478bd9Sstevel@tonic-gate if (hi >= physmax) 8177c478bd9Sstevel@tonic-gate hi = physmax - 1; 8187c478bd9Sstevel@tonic-gate sgllen = mattr->dma_attr_sgllen; 8197c478bd9Sstevel@tonic-gate pfnseg = mmu_btop(mattr->dma_attr_seg); 8207c478bd9Sstevel@tonic-gate 8217c478bd9Sstevel@tonic-gate align = maxbit(mattr->dma_attr_align, mattr->dma_attr_minxfer); 8227c478bd9Sstevel@tonic-gate if (align > MMU_PAGESIZE) 8237c478bd9Sstevel@tonic-gate pfnalign = mmu_btop(align); 8247c478bd9Sstevel@tonic-gate 8257c478bd9Sstevel@tonic-gate /* 8267c478bd9Sstevel@tonic-gate * in order to satisfy the request, must minimally 8277c478bd9Sstevel@tonic-gate * acquire minctg contiguous pages 8287c478bd9Sstevel@tonic-gate */ 8297c478bd9Sstevel@tonic-gate minctg = howmany(*pgcnt, sgllen); 8307c478bd9Sstevel@tonic-gate 8317c478bd9Sstevel@tonic-gate ASSERT(hi >= lo); 8327c478bd9Sstevel@tonic-gate 8337c478bd9Sstevel@tonic-gate /* 8347c478bd9Sstevel@tonic-gate * start from where last searched if the minctg >= lastctgcnt 8357c478bd9Sstevel@tonic-gate */ 8367c478bd9Sstevel@tonic-gate if (minctg < lastctgcnt || startpfn < lo || startpfn > hi) 8377c478bd9Sstevel@tonic-gate startpfn = lo; 8387c478bd9Sstevel@tonic-gate } else { 8397c478bd9Sstevel@tonic-gate hi = physmax - 1; 8407c478bd9Sstevel@tonic-gate lo = 0; 8417c478bd9Sstevel@tonic-gate sgllen = 1; 8427c478bd9Sstevel@tonic-gate pfnseg = mmu.highest_pfn; 8437c478bd9Sstevel@tonic-gate minctg = *pgcnt; 8447c478bd9Sstevel@tonic-gate 8457c478bd9Sstevel@tonic-gate if (minctg < lastctgcnt) 8467c478bd9Sstevel@tonic-gate startpfn = lo; 8477c478bd9Sstevel@tonic-gate } 8487c478bd9Sstevel@tonic-gate lastctgcnt = minctg; 8497c478bd9Sstevel@tonic-gate 8507c478bd9Sstevel@tonic-gate ASSERT(pfnseg + 1 >= (uint64_t)minctg); 8517c478bd9Sstevel@tonic-gate 8527c478bd9Sstevel@tonic-gate /* conserve 16m memory - start search above 16m when possible */ 8537c478bd9Sstevel@tonic-gate if (hi > PFN_16M && startpfn < PFN_16M) 8547c478bd9Sstevel@tonic-gate startpfn = PFN_16M; 8557c478bd9Sstevel@tonic-gate 8567c478bd9Sstevel@tonic-gate pfn = startpfn; 8577c478bd9Sstevel@tonic-gate if (pfnalign) 8587c478bd9Sstevel@tonic-gate pfn = P2ROUNDUP(pfn, pfnalign); 8597c478bd9Sstevel@tonic-gate 8607c478bd9Sstevel@tonic-gate while (pfn + minctg - 1 <= hi) { 8617c478bd9Sstevel@tonic-gate 8627c478bd9Sstevel@tonic-gate plist = is_contigpage_free(&pfn, pgcnt, minctg, pfnseg, iolock); 8637c478bd9Sstevel@tonic-gate if (plist) { 8647c478bd9Sstevel@tonic-gate page_list_concat(&pplist, &plist); 8657c478bd9Sstevel@tonic-gate sgllen--; 8667c478bd9Sstevel@tonic-gate /* 8677c478bd9Sstevel@tonic-gate * return when contig pages no longer needed 8687c478bd9Sstevel@tonic-gate */ 8697c478bd9Sstevel@tonic-gate if (!*pgcnt || ((*pgcnt <= sgllen) && !pfnalign)) { 8707c478bd9Sstevel@tonic-gate startpfn = pfn; 8717c478bd9Sstevel@tonic-gate CONTIG_UNLOCK(); 8727c478bd9Sstevel@tonic-gate check_dma(mattr, pplist, *pgcnt); 8737c478bd9Sstevel@tonic-gate return (pplist); 8747c478bd9Sstevel@tonic-gate } 8757c478bd9Sstevel@tonic-gate minctg = howmany(*pgcnt, sgllen); 8767c478bd9Sstevel@tonic-gate } 8777c478bd9Sstevel@tonic-gate if (pfnalign) 8787c478bd9Sstevel@tonic-gate pfn = P2ROUNDUP(pfn, pfnalign); 8797c478bd9Sstevel@tonic-gate } 8807c478bd9Sstevel@tonic-gate 8817c478bd9Sstevel@tonic-gate /* cannot find contig pages in specified range */ 8827c478bd9Sstevel@tonic-gate if (startpfn == lo) { 8837c478bd9Sstevel@tonic-gate CONTIG_UNLOCK(); 8847c478bd9Sstevel@tonic-gate return (NULL); 8857c478bd9Sstevel@tonic-gate } 8867c478bd9Sstevel@tonic-gate 8877c478bd9Sstevel@tonic-gate /* did not start with lo previously */ 8887c478bd9Sstevel@tonic-gate pfn = lo; 8897c478bd9Sstevel@tonic-gate if (pfnalign) 8907c478bd9Sstevel@tonic-gate pfn = P2ROUNDUP(pfn, pfnalign); 8917c478bd9Sstevel@tonic-gate 8927c478bd9Sstevel@tonic-gate /* allow search to go above startpfn */ 8937c478bd9Sstevel@tonic-gate while (pfn < startpfn) { 8947c478bd9Sstevel@tonic-gate 8957c478bd9Sstevel@tonic-gate plist = is_contigpage_free(&pfn, pgcnt, minctg, pfnseg, iolock); 8967c478bd9Sstevel@tonic-gate if (plist != NULL) { 8977c478bd9Sstevel@tonic-gate 8987c478bd9Sstevel@tonic-gate page_list_concat(&pplist, &plist); 8997c478bd9Sstevel@tonic-gate sgllen--; 9007c478bd9Sstevel@tonic-gate 9017c478bd9Sstevel@tonic-gate /* 9027c478bd9Sstevel@tonic-gate * return when contig pages no longer needed 9037c478bd9Sstevel@tonic-gate */ 9047c478bd9Sstevel@tonic-gate if (!*pgcnt || ((*pgcnt <= sgllen) && !pfnalign)) { 9057c478bd9Sstevel@tonic-gate startpfn = pfn; 9067c478bd9Sstevel@tonic-gate CONTIG_UNLOCK(); 9077c478bd9Sstevel@tonic-gate check_dma(mattr, pplist, *pgcnt); 9087c478bd9Sstevel@tonic-gate return (pplist); 9097c478bd9Sstevel@tonic-gate } 9107c478bd9Sstevel@tonic-gate minctg = howmany(*pgcnt, sgllen); 9117c478bd9Sstevel@tonic-gate } 9127c478bd9Sstevel@tonic-gate if (pfnalign) 9137c478bd9Sstevel@tonic-gate pfn = P2ROUNDUP(pfn, pfnalign); 9147c478bd9Sstevel@tonic-gate } 9157c478bd9Sstevel@tonic-gate CONTIG_UNLOCK(); 9167c478bd9Sstevel@tonic-gate return (NULL); 9177c478bd9Sstevel@tonic-gate } 9187c478bd9Sstevel@tonic-gate 9197c478bd9Sstevel@tonic-gate /* 9207c478bd9Sstevel@tonic-gate * combine mem_node_config and memrange memory ranges into one data 9217c478bd9Sstevel@tonic-gate * structure to be used for page list management. 9227c478bd9Sstevel@tonic-gate * 9237c478bd9Sstevel@tonic-gate * mnode_range_cnt() calculates the number of memory ranges for mnode and 9247c478bd9Sstevel@tonic-gate * memranges[]. Used to determine the size of page lists and mnoderanges. 9257c478bd9Sstevel@tonic-gate * 9267c478bd9Sstevel@tonic-gate * mnode_range_setup() initializes mnoderanges. 9277c478bd9Sstevel@tonic-gate */ 9287c478bd9Sstevel@tonic-gate mnoderange_t *mnoderanges; 9297c478bd9Sstevel@tonic-gate int mnoderangecnt; 9307c478bd9Sstevel@tonic-gate int mtype4g; 9317c478bd9Sstevel@tonic-gate 9327c478bd9Sstevel@tonic-gate int 9337c478bd9Sstevel@tonic-gate mnode_range_cnt() 9347c478bd9Sstevel@tonic-gate { 9357c478bd9Sstevel@tonic-gate int mri; 9367c478bd9Sstevel@tonic-gate int mnrcnt = 0; 9377c478bd9Sstevel@tonic-gate int mnode; 9387c478bd9Sstevel@tonic-gate 9397c478bd9Sstevel@tonic-gate for (mnode = 0; mnode < max_mem_nodes; mnode++) { 9407c478bd9Sstevel@tonic-gate if (mem_node_config[mnode].exists == 0) 9417c478bd9Sstevel@tonic-gate continue; 9427c478bd9Sstevel@tonic-gate 9437c478bd9Sstevel@tonic-gate mri = nranges - 1; 9447c478bd9Sstevel@tonic-gate 9457c478bd9Sstevel@tonic-gate /* find the memranges index below contained in mnode range */ 9467c478bd9Sstevel@tonic-gate 9477c478bd9Sstevel@tonic-gate while (MEMRANGEHI(mri) < mem_node_config[mnode].physbase) 9487c478bd9Sstevel@tonic-gate mri--; 9497c478bd9Sstevel@tonic-gate 9507c478bd9Sstevel@tonic-gate /* 9517c478bd9Sstevel@tonic-gate * increment mnode range counter when memranges or mnode 9527c478bd9Sstevel@tonic-gate * boundary is reached. 9537c478bd9Sstevel@tonic-gate */ 9547c478bd9Sstevel@tonic-gate while (mri >= 0 && 9557c478bd9Sstevel@tonic-gate mem_node_config[mnode].physmax >= MEMRANGELO(mri)) { 9567c478bd9Sstevel@tonic-gate mnrcnt++; 9577c478bd9Sstevel@tonic-gate if (mem_node_config[mnode].physmax > MEMRANGEHI(mri)) 9587c478bd9Sstevel@tonic-gate mri--; 9597c478bd9Sstevel@tonic-gate else 9607c478bd9Sstevel@tonic-gate break; 9617c478bd9Sstevel@tonic-gate } 9627c478bd9Sstevel@tonic-gate } 9637c478bd9Sstevel@tonic-gate return (mnrcnt); 9647c478bd9Sstevel@tonic-gate } 9657c478bd9Sstevel@tonic-gate 9667c478bd9Sstevel@tonic-gate void 9677c478bd9Sstevel@tonic-gate mnode_range_setup(mnoderange_t *mnoderanges) 9687c478bd9Sstevel@tonic-gate { 9697c478bd9Sstevel@tonic-gate int mnode, mri; 9707c478bd9Sstevel@tonic-gate 9717c478bd9Sstevel@tonic-gate for (mnode = 0; mnode < max_mem_nodes; mnode++) { 9727c478bd9Sstevel@tonic-gate if (mem_node_config[mnode].exists == 0) 9737c478bd9Sstevel@tonic-gate continue; 9747c478bd9Sstevel@tonic-gate 9757c478bd9Sstevel@tonic-gate mri = nranges - 1; 9767c478bd9Sstevel@tonic-gate 9777c478bd9Sstevel@tonic-gate while (MEMRANGEHI(mri) < mem_node_config[mnode].physbase) 9787c478bd9Sstevel@tonic-gate mri--; 9797c478bd9Sstevel@tonic-gate 9807c478bd9Sstevel@tonic-gate while (mri >= 0 && mem_node_config[mnode].physmax >= 9817c478bd9Sstevel@tonic-gate MEMRANGELO(mri)) { 9827c478bd9Sstevel@tonic-gate mnoderanges->mnr_pfnlo = 9837c478bd9Sstevel@tonic-gate MAX(MEMRANGELO(mri), 9847c478bd9Sstevel@tonic-gate mem_node_config[mnode].physbase); 9857c478bd9Sstevel@tonic-gate mnoderanges->mnr_pfnhi = 9867c478bd9Sstevel@tonic-gate MIN(MEMRANGEHI(mri), 9877c478bd9Sstevel@tonic-gate mem_node_config[mnode].physmax); 9887c478bd9Sstevel@tonic-gate mnoderanges->mnr_mnode = mnode; 9897c478bd9Sstevel@tonic-gate mnoderanges->mnr_memrange = mri; 9907c478bd9Sstevel@tonic-gate mnoderanges++; 9917c478bd9Sstevel@tonic-gate if (mem_node_config[mnode].physmax > MEMRANGEHI(mri)) 9927c478bd9Sstevel@tonic-gate mri--; 9937c478bd9Sstevel@tonic-gate else 9947c478bd9Sstevel@tonic-gate break; 9957c478bd9Sstevel@tonic-gate } 9967c478bd9Sstevel@tonic-gate } 9977c478bd9Sstevel@tonic-gate } 9987c478bd9Sstevel@tonic-gate 9997c478bd9Sstevel@tonic-gate /* 10007c478bd9Sstevel@tonic-gate * Determine if the mnode range specified in mtype contains memory belonging 10017c478bd9Sstevel@tonic-gate * to memory node mnode. If flags & PGI_MT_RANGE is set then mtype contains 1002*07ad560dSkchow * the range of indices from high pfn to 0, 16m or 4g. 10037c478bd9Sstevel@tonic-gate * 10047c478bd9Sstevel@tonic-gate * Return first mnode range type index found otherwise return -1 if none found. 10057c478bd9Sstevel@tonic-gate */ 10067c478bd9Sstevel@tonic-gate int 10077c478bd9Sstevel@tonic-gate mtype_func(int mnode, int mtype, uint_t flags) 10087c478bd9Sstevel@tonic-gate { 10097c478bd9Sstevel@tonic-gate if (flags & PGI_MT_RANGE) { 1010*07ad560dSkchow int mtlim; 10117c478bd9Sstevel@tonic-gate 10127c478bd9Sstevel@tonic-gate if (flags & PGI_MT_NEXT) 10137c478bd9Sstevel@tonic-gate mtype--; 1014*07ad560dSkchow if (flags & PGI_MT_RANGE0) 1015*07ad560dSkchow mtlim = 0; 1016*07ad560dSkchow else if (flags & PGI_MT_RANGE4G) 1017*07ad560dSkchow mtlim = mtype4g + 1; /* exclude 0-4g range */ 1018*07ad560dSkchow else if (flags & PGI_MT_RANGE16M) 1019*07ad560dSkchow mtlim = 1; /* exclude 0-16m range */ 10207c478bd9Sstevel@tonic-gate while (mtype >= mtlim) { 10217c478bd9Sstevel@tonic-gate if (mnoderanges[mtype].mnr_mnode == mnode) 10227c478bd9Sstevel@tonic-gate return (mtype); 10237c478bd9Sstevel@tonic-gate mtype--; 10247c478bd9Sstevel@tonic-gate } 10257c478bd9Sstevel@tonic-gate } else { 10267c478bd9Sstevel@tonic-gate if (mnoderanges[mtype].mnr_mnode == mnode) 10277c478bd9Sstevel@tonic-gate return (mtype); 10287c478bd9Sstevel@tonic-gate } 10297c478bd9Sstevel@tonic-gate return (-1); 10307c478bd9Sstevel@tonic-gate } 10317c478bd9Sstevel@tonic-gate 1032e21bae1bSkchow /* 1033e21bae1bSkchow * Update the page list max counts with the pfn range specified by the 1034e21bae1bSkchow * input parameters. Called from add_physmem() when physical memory with 1035e21bae1bSkchow * page_t's are initially added to the page lists. 1036e21bae1bSkchow */ 1037e21bae1bSkchow void 1038e21bae1bSkchow mtype_modify_max(pfn_t startpfn, long cnt) 1039e21bae1bSkchow { 1040e21bae1bSkchow int mtype = 0; 1041e21bae1bSkchow pfn_t endpfn = startpfn + cnt, pfn; 1042e21bae1bSkchow pgcnt_t inc; 1043e21bae1bSkchow 1044e21bae1bSkchow ASSERT(cnt > 0); 1045e21bae1bSkchow 1046e21bae1bSkchow for (pfn = startpfn; pfn < endpfn; ) { 1047e21bae1bSkchow if (pfn <= mnoderanges[mtype].mnr_pfnhi) { 1048e21bae1bSkchow if (endpfn < mnoderanges[mtype].mnr_pfnhi) { 1049e21bae1bSkchow inc = endpfn - pfn; 1050e21bae1bSkchow } else { 1051e21bae1bSkchow inc = mnoderanges[mtype].mnr_pfnhi - pfn + 1; 1052e21bae1bSkchow } 1053e21bae1bSkchow mnoderanges[mtype].mnr_mt_pgmax += inc; 1054e21bae1bSkchow if (physmax4g && mtype <= mtype4g) 1055e21bae1bSkchow maxmem4g += inc; 1056e21bae1bSkchow pfn += inc; 1057e21bae1bSkchow } 1058e21bae1bSkchow mtype++; 1059e21bae1bSkchow ASSERT(mtype < mnoderangecnt || pfn >= endpfn); 1060e21bae1bSkchow } 1061e21bae1bSkchow } 1062e21bae1bSkchow 1063affbd3ccSkchow /* 1064affbd3ccSkchow * Returns the free page count for mnode 1065affbd3ccSkchow */ 1066affbd3ccSkchow int 1067affbd3ccSkchow mnode_pgcnt(int mnode) 1068affbd3ccSkchow { 1069affbd3ccSkchow int mtype = mnoderangecnt - 1; 1070affbd3ccSkchow int flags = PGI_MT_RANGE0; 1071affbd3ccSkchow pgcnt_t pgcnt = 0; 1072affbd3ccSkchow 1073affbd3ccSkchow mtype = mtype_func(mnode, mtype, flags); 1074affbd3ccSkchow 1075affbd3ccSkchow while (mtype != -1) { 1076*07ad560dSkchow pgcnt += MTYPE_FREEMEM(mtype); 1077affbd3ccSkchow mtype = mtype_func(mnode, mtype, flags | PGI_MT_NEXT); 1078affbd3ccSkchow } 1079affbd3ccSkchow return (pgcnt); 1080affbd3ccSkchow } 1081affbd3ccSkchow 10827c478bd9Sstevel@tonic-gate /* 10837c478bd9Sstevel@tonic-gate * Initialize page coloring variables based on the l2 cache parameters. 10847c478bd9Sstevel@tonic-gate * Calculate and return memory needed for page coloring data structures. 10857c478bd9Sstevel@tonic-gate */ 10867c478bd9Sstevel@tonic-gate size_t 10877c478bd9Sstevel@tonic-gate page_coloring_init(uint_t l2_sz, int l2_linesz, int l2_assoc) 10887c478bd9Sstevel@tonic-gate { 10897c478bd9Sstevel@tonic-gate size_t colorsz = 0; 10907c478bd9Sstevel@tonic-gate int i; 10917c478bd9Sstevel@tonic-gate int colors; 10927c478bd9Sstevel@tonic-gate 10937c478bd9Sstevel@tonic-gate /* 10947c478bd9Sstevel@tonic-gate * Reduce the memory ranges lists if we don't have large amounts 10957c478bd9Sstevel@tonic-gate * of memory. This avoids searching known empty free lists. 10967c478bd9Sstevel@tonic-gate */ 10977c478bd9Sstevel@tonic-gate i = memrange_num(physmax); 10987c478bd9Sstevel@tonic-gate memranges += i; 10997c478bd9Sstevel@tonic-gate nranges -= i; 11007c478bd9Sstevel@tonic-gate #if defined(__i386) 11017c478bd9Sstevel@tonic-gate if (i > 0) 11027c478bd9Sstevel@tonic-gate restricted_kmemalloc = 0; 11037c478bd9Sstevel@tonic-gate #endif 11047c478bd9Sstevel@tonic-gate /* physmax greater than 4g */ 11057c478bd9Sstevel@tonic-gate if (i == 0) 11067c478bd9Sstevel@tonic-gate physmax4g = 1; 11077c478bd9Sstevel@tonic-gate 11087c478bd9Sstevel@tonic-gate /* 11097c478bd9Sstevel@tonic-gate * setup pagesize for generic page layer 11107c478bd9Sstevel@tonic-gate */ 11117c478bd9Sstevel@tonic-gate for (i = 0; i <= mmu.max_page_level; ++i) { 11127c478bd9Sstevel@tonic-gate hw_page_array[i].hp_size = LEVEL_SIZE(i); 11137c478bd9Sstevel@tonic-gate hw_page_array[i].hp_shift = LEVEL_SHIFT(i); 11147c478bd9Sstevel@tonic-gate hw_page_array[i].hp_pgcnt = LEVEL_SIZE(i) >> LEVEL_SHIFT(0); 11157c478bd9Sstevel@tonic-gate } 11167c478bd9Sstevel@tonic-gate 11177c478bd9Sstevel@tonic-gate ASSERT(ISP2(l2_sz)); 11187c478bd9Sstevel@tonic-gate ASSERT(ISP2(l2_linesz)); 11197c478bd9Sstevel@tonic-gate ASSERT(l2_sz > MMU_PAGESIZE); 11207c478bd9Sstevel@tonic-gate 11217c478bd9Sstevel@tonic-gate /* l2_assoc is 0 for fully associative l2 cache */ 11227c478bd9Sstevel@tonic-gate if (l2_assoc) 11237c478bd9Sstevel@tonic-gate l2_colors = MAX(1, l2_sz / (l2_assoc * MMU_PAGESIZE)); 11247c478bd9Sstevel@tonic-gate else 11257c478bd9Sstevel@tonic-gate l2_colors = 1; 11267c478bd9Sstevel@tonic-gate 11277c478bd9Sstevel@tonic-gate /* for scalability, configure at least PAGE_COLORS_MIN color bins */ 11287c478bd9Sstevel@tonic-gate page_colors = MAX(l2_colors, PAGE_COLORS_MIN); 11297c478bd9Sstevel@tonic-gate 11307c478bd9Sstevel@tonic-gate /* 11317c478bd9Sstevel@tonic-gate * cpu_page_colors is non-zero when a page color may be spread across 11327c478bd9Sstevel@tonic-gate * multiple bins. 11337c478bd9Sstevel@tonic-gate */ 11347c478bd9Sstevel@tonic-gate if (l2_colors < page_colors) 11357c478bd9Sstevel@tonic-gate cpu_page_colors = l2_colors; 11367c478bd9Sstevel@tonic-gate 11377c478bd9Sstevel@tonic-gate ASSERT(ISP2(page_colors)); 11387c478bd9Sstevel@tonic-gate 11397c478bd9Sstevel@tonic-gate page_colors_mask = page_colors - 1; 11407c478bd9Sstevel@tonic-gate 11417c478bd9Sstevel@tonic-gate ASSERT(ISP2(CPUSETSIZE())); 11427c478bd9Sstevel@tonic-gate page_coloring_shift = lowbit(CPUSETSIZE()); 11437c478bd9Sstevel@tonic-gate 11447c478bd9Sstevel@tonic-gate /* size for mnoderanges */ 11457c478bd9Sstevel@tonic-gate mnoderangecnt = mnode_range_cnt(); 11467c478bd9Sstevel@tonic-gate colorsz = mnoderangecnt * sizeof (mnoderange_t); 11477c478bd9Sstevel@tonic-gate 11487c478bd9Sstevel@tonic-gate /* size for fpc_mutex and cpc_mutex */ 11497c478bd9Sstevel@tonic-gate colorsz += (2 * max_mem_nodes * sizeof (kmutex_t) * NPC_MUTEX); 11507c478bd9Sstevel@tonic-gate 11517c478bd9Sstevel@tonic-gate /* size of page_freelists */ 11527c478bd9Sstevel@tonic-gate colorsz += mnoderangecnt * sizeof (page_t ***); 11537c478bd9Sstevel@tonic-gate colorsz += mnoderangecnt * mmu_page_sizes * sizeof (page_t **); 11547c478bd9Sstevel@tonic-gate 11557c478bd9Sstevel@tonic-gate for (i = 0; i < mmu_page_sizes; i++) { 11567c478bd9Sstevel@tonic-gate colors = page_get_pagecolors(i); 11577c478bd9Sstevel@tonic-gate colorsz += mnoderangecnt * colors * sizeof (page_t *); 11587c478bd9Sstevel@tonic-gate } 11597c478bd9Sstevel@tonic-gate 11607c478bd9Sstevel@tonic-gate /* size of page_cachelists */ 11617c478bd9Sstevel@tonic-gate colorsz += mnoderangecnt * sizeof (page_t **); 11627c478bd9Sstevel@tonic-gate colorsz += mnoderangecnt * page_colors * sizeof (page_t *); 11637c478bd9Sstevel@tonic-gate 11647c478bd9Sstevel@tonic-gate return (colorsz); 11657c478bd9Sstevel@tonic-gate } 11667c478bd9Sstevel@tonic-gate 11677c478bd9Sstevel@tonic-gate /* 11687c478bd9Sstevel@tonic-gate * Called once at startup to configure page_coloring data structures and 11697c478bd9Sstevel@tonic-gate * does the 1st page_free()/page_freelist_add(). 11707c478bd9Sstevel@tonic-gate */ 11717c478bd9Sstevel@tonic-gate void 11727c478bd9Sstevel@tonic-gate page_coloring_setup(caddr_t pcmemaddr) 11737c478bd9Sstevel@tonic-gate { 11747c478bd9Sstevel@tonic-gate int i; 11757c478bd9Sstevel@tonic-gate int j; 11767c478bd9Sstevel@tonic-gate int k; 11777c478bd9Sstevel@tonic-gate caddr_t addr; 11787c478bd9Sstevel@tonic-gate int colors; 11797c478bd9Sstevel@tonic-gate 11807c478bd9Sstevel@tonic-gate /* 11817c478bd9Sstevel@tonic-gate * do page coloring setup 11827c478bd9Sstevel@tonic-gate */ 11837c478bd9Sstevel@tonic-gate addr = pcmemaddr; 11847c478bd9Sstevel@tonic-gate 11857c478bd9Sstevel@tonic-gate mnoderanges = (mnoderange_t *)addr; 11867c478bd9Sstevel@tonic-gate addr += (mnoderangecnt * sizeof (mnoderange_t)); 11877c478bd9Sstevel@tonic-gate 11887c478bd9Sstevel@tonic-gate mnode_range_setup(mnoderanges); 11897c478bd9Sstevel@tonic-gate 11907c478bd9Sstevel@tonic-gate if (physmax4g) 11917c478bd9Sstevel@tonic-gate mtype4g = pfn_2_mtype(0xfffff); 11927c478bd9Sstevel@tonic-gate 11937c478bd9Sstevel@tonic-gate for (k = 0; k < NPC_MUTEX; k++) { 11947c478bd9Sstevel@tonic-gate fpc_mutex[k] = (kmutex_t *)addr; 11957c478bd9Sstevel@tonic-gate addr += (max_mem_nodes * sizeof (kmutex_t)); 11967c478bd9Sstevel@tonic-gate } 11977c478bd9Sstevel@tonic-gate for (k = 0; k < NPC_MUTEX; k++) { 11987c478bd9Sstevel@tonic-gate cpc_mutex[k] = (kmutex_t *)addr; 11997c478bd9Sstevel@tonic-gate addr += (max_mem_nodes * sizeof (kmutex_t)); 12007c478bd9Sstevel@tonic-gate } 12017c478bd9Sstevel@tonic-gate page_freelists = (page_t ****)addr; 12027c478bd9Sstevel@tonic-gate addr += (mnoderangecnt * sizeof (page_t ***)); 12037c478bd9Sstevel@tonic-gate 12047c478bd9Sstevel@tonic-gate page_cachelists = (page_t ***)addr; 12057c478bd9Sstevel@tonic-gate addr += (mnoderangecnt * sizeof (page_t **)); 12067c478bd9Sstevel@tonic-gate 12077c478bd9Sstevel@tonic-gate for (i = 0; i < mnoderangecnt; i++) { 12087c478bd9Sstevel@tonic-gate page_freelists[i] = (page_t ***)addr; 12097c478bd9Sstevel@tonic-gate addr += (mmu_page_sizes * sizeof (page_t **)); 12107c478bd9Sstevel@tonic-gate 12117c478bd9Sstevel@tonic-gate for (j = 0; j < mmu_page_sizes; j++) { 12127c478bd9Sstevel@tonic-gate colors = page_get_pagecolors(j); 12137c478bd9Sstevel@tonic-gate page_freelists[i][j] = (page_t **)addr; 12147c478bd9Sstevel@tonic-gate addr += (colors * sizeof (page_t *)); 12157c478bd9Sstevel@tonic-gate } 12167c478bd9Sstevel@tonic-gate page_cachelists[i] = (page_t **)addr; 12177c478bd9Sstevel@tonic-gate addr += (page_colors * sizeof (page_t *)); 12187c478bd9Sstevel@tonic-gate } 12197c478bd9Sstevel@tonic-gate } 12207c478bd9Sstevel@tonic-gate 12217c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 12227c478bd9Sstevel@tonic-gate int 12237c478bd9Sstevel@tonic-gate bp_color(struct buf *bp) 12247c478bd9Sstevel@tonic-gate { 12257c478bd9Sstevel@tonic-gate return (0); 12267c478bd9Sstevel@tonic-gate } 12277c478bd9Sstevel@tonic-gate 12287c478bd9Sstevel@tonic-gate /* 12297c478bd9Sstevel@tonic-gate * get a page from any list with the given mnode 12307c478bd9Sstevel@tonic-gate */ 12317c478bd9Sstevel@tonic-gate page_t * 12327c478bd9Sstevel@tonic-gate page_get_mnode_anylist(ulong_t origbin, uchar_t szc, uint_t flags, 12337c478bd9Sstevel@tonic-gate int mnode, int mtype, ddi_dma_attr_t *dma_attr) 12347c478bd9Sstevel@tonic-gate { 12357c478bd9Sstevel@tonic-gate kmutex_t *pcm; 12367c478bd9Sstevel@tonic-gate int i; 12377c478bd9Sstevel@tonic-gate page_t *pp; 12387c478bd9Sstevel@tonic-gate page_t *first_pp; 12397c478bd9Sstevel@tonic-gate uint64_t pgaddr; 12407c478bd9Sstevel@tonic-gate ulong_t bin; 12417c478bd9Sstevel@tonic-gate int mtypestart; 12427c478bd9Sstevel@tonic-gate 12437c478bd9Sstevel@tonic-gate VM_STAT_ADD(pga_vmstats.pgma_alloc); 12447c478bd9Sstevel@tonic-gate 12457c478bd9Sstevel@tonic-gate ASSERT((flags & PG_MATCH_COLOR) == 0); 12467c478bd9Sstevel@tonic-gate ASSERT(szc == 0); 12477c478bd9Sstevel@tonic-gate ASSERT(dma_attr != NULL); 12487c478bd9Sstevel@tonic-gate 12497c478bd9Sstevel@tonic-gate 12507c478bd9Sstevel@tonic-gate MTYPE_START(mnode, mtype, flags); 12517c478bd9Sstevel@tonic-gate if (mtype < 0) { 12527c478bd9Sstevel@tonic-gate VM_STAT_ADD(pga_vmstats.pgma_allocempty); 12537c478bd9Sstevel@tonic-gate return (NULL); 12547c478bd9Sstevel@tonic-gate } 12557c478bd9Sstevel@tonic-gate 12567c478bd9Sstevel@tonic-gate mtypestart = mtype; 12577c478bd9Sstevel@tonic-gate 12587c478bd9Sstevel@tonic-gate bin = origbin; 12597c478bd9Sstevel@tonic-gate 12607c478bd9Sstevel@tonic-gate /* 12617c478bd9Sstevel@tonic-gate * check up to page_colors + 1 bins - origbin may be checked twice 12627c478bd9Sstevel@tonic-gate * because of BIN_STEP skip 12637c478bd9Sstevel@tonic-gate */ 12647c478bd9Sstevel@tonic-gate do { 12657c478bd9Sstevel@tonic-gate i = 0; 12667c478bd9Sstevel@tonic-gate while (i <= page_colors) { 12677c478bd9Sstevel@tonic-gate if (PAGE_FREELISTS(mnode, szc, bin, mtype) == NULL) 12687c478bd9Sstevel@tonic-gate goto nextfreebin; 12697c478bd9Sstevel@tonic-gate 12707c478bd9Sstevel@tonic-gate pcm = PC_BIN_MUTEX(mnode, bin, PG_FREE_LIST); 12717c478bd9Sstevel@tonic-gate mutex_enter(pcm); 12727c478bd9Sstevel@tonic-gate pp = PAGE_FREELISTS(mnode, szc, bin, mtype); 12737c478bd9Sstevel@tonic-gate first_pp = pp; 12747c478bd9Sstevel@tonic-gate while (pp != NULL) { 12757c478bd9Sstevel@tonic-gate if (page_trylock(pp, SE_EXCL) == 0) { 12767c478bd9Sstevel@tonic-gate pp = pp->p_next; 12777c478bd9Sstevel@tonic-gate if (pp == first_pp) { 12787c478bd9Sstevel@tonic-gate pp = NULL; 12797c478bd9Sstevel@tonic-gate } 12807c478bd9Sstevel@tonic-gate continue; 12817c478bd9Sstevel@tonic-gate } 12827c478bd9Sstevel@tonic-gate 12837c478bd9Sstevel@tonic-gate ASSERT(PP_ISFREE(pp)); 12847c478bd9Sstevel@tonic-gate ASSERT(PP_ISAGED(pp)); 12857c478bd9Sstevel@tonic-gate ASSERT(pp->p_vnode == NULL); 12867c478bd9Sstevel@tonic-gate ASSERT(pp->p_hash == NULL); 12877c478bd9Sstevel@tonic-gate ASSERT(pp->p_offset == (u_offset_t)-1); 12887c478bd9Sstevel@tonic-gate ASSERT(pp->p_szc == szc); 12897c478bd9Sstevel@tonic-gate ASSERT(PFN_2_MEM_NODE(pp->p_pagenum) == mnode); 12907c478bd9Sstevel@tonic-gate /* check if page within DMA attributes */ 12917c478bd9Sstevel@tonic-gate pgaddr = mmu_ptob((uint64_t)(pp->p_pagenum)); 12927c478bd9Sstevel@tonic-gate 12937c478bd9Sstevel@tonic-gate if ((pgaddr >= dma_attr->dma_attr_addr_lo) && 12947c478bd9Sstevel@tonic-gate (pgaddr + MMU_PAGESIZE - 1 <= 12957c478bd9Sstevel@tonic-gate dma_attr->dma_attr_addr_hi)) { 12967c478bd9Sstevel@tonic-gate break; 12977c478bd9Sstevel@tonic-gate } 12987c478bd9Sstevel@tonic-gate 12997c478bd9Sstevel@tonic-gate /* continue looking */ 13007c478bd9Sstevel@tonic-gate page_unlock(pp); 13017c478bd9Sstevel@tonic-gate pp = pp->p_next; 13027c478bd9Sstevel@tonic-gate if (pp == first_pp) 13037c478bd9Sstevel@tonic-gate pp = NULL; 13047c478bd9Sstevel@tonic-gate 13057c478bd9Sstevel@tonic-gate } 13067c478bd9Sstevel@tonic-gate if (pp != NULL) { 13077c478bd9Sstevel@tonic-gate ASSERT(mtype == PP_2_MTYPE(pp)); 13087c478bd9Sstevel@tonic-gate ASSERT(pp->p_szc == 0); 13097c478bd9Sstevel@tonic-gate 13107c478bd9Sstevel@tonic-gate /* found a page with specified DMA attributes */ 13117c478bd9Sstevel@tonic-gate page_sub(&PAGE_FREELISTS(mnode, szc, bin, 13127c478bd9Sstevel@tonic-gate mtype), pp); 1313affbd3ccSkchow page_ctr_sub(mnode, mtype, pp, PG_FREE_LIST); 13147c478bd9Sstevel@tonic-gate 13157c478bd9Sstevel@tonic-gate if ((PP_ISFREE(pp) == 0) || 13167c478bd9Sstevel@tonic-gate (PP_ISAGED(pp) == 0)) { 13177c478bd9Sstevel@tonic-gate cmn_err(CE_PANIC, "page %p is not free", 13187c478bd9Sstevel@tonic-gate (void *)pp); 13197c478bd9Sstevel@tonic-gate } 13207c478bd9Sstevel@tonic-gate 13217c478bd9Sstevel@tonic-gate mutex_exit(pcm); 13227c478bd9Sstevel@tonic-gate check_dma(dma_attr, pp, 1); 13237c478bd9Sstevel@tonic-gate VM_STAT_ADD(pga_vmstats.pgma_allocok); 13247c478bd9Sstevel@tonic-gate return (pp); 13257c478bd9Sstevel@tonic-gate } 13267c478bd9Sstevel@tonic-gate mutex_exit(pcm); 13277c478bd9Sstevel@tonic-gate nextfreebin: 13287c478bd9Sstevel@tonic-gate pp = page_freelist_fill(szc, bin, mnode, mtype, 13297c478bd9Sstevel@tonic-gate mmu_btop(dma_attr->dma_attr_addr_hi + 1)); 13307c478bd9Sstevel@tonic-gate if (pp) 13317c478bd9Sstevel@tonic-gate return (pp); 13327c478bd9Sstevel@tonic-gate 13337c478bd9Sstevel@tonic-gate /* try next bin */ 13347c478bd9Sstevel@tonic-gate bin += (i == 0) ? BIN_STEP : 1; 13357c478bd9Sstevel@tonic-gate bin &= page_colors_mask; 13367c478bd9Sstevel@tonic-gate i++; 13377c478bd9Sstevel@tonic-gate } 1338affbd3ccSkchow MTYPE_NEXT(mnode, mtype, flags); 1339affbd3ccSkchow } while (mtype >= 0); 13407c478bd9Sstevel@tonic-gate 13417c478bd9Sstevel@tonic-gate /* failed to find a page in the freelist; try it in the cachelist */ 13427c478bd9Sstevel@tonic-gate 13437c478bd9Sstevel@tonic-gate /* reset mtype start for cachelist search */ 13447c478bd9Sstevel@tonic-gate mtype = mtypestart; 13457c478bd9Sstevel@tonic-gate ASSERT(mtype >= 0); 13467c478bd9Sstevel@tonic-gate 13477c478bd9Sstevel@tonic-gate /* start with the bin of matching color */ 13487c478bd9Sstevel@tonic-gate bin = origbin; 13497c478bd9Sstevel@tonic-gate 13507c478bd9Sstevel@tonic-gate do { 13517c478bd9Sstevel@tonic-gate for (i = 0; i <= page_colors; i++) { 13527c478bd9Sstevel@tonic-gate if (PAGE_CACHELISTS(mnode, bin, mtype) == NULL) 13537c478bd9Sstevel@tonic-gate goto nextcachebin; 13547c478bd9Sstevel@tonic-gate pcm = PC_BIN_MUTEX(mnode, bin, PG_CACHE_LIST); 13557c478bd9Sstevel@tonic-gate mutex_enter(pcm); 13567c478bd9Sstevel@tonic-gate pp = PAGE_CACHELISTS(mnode, bin, mtype); 13577c478bd9Sstevel@tonic-gate first_pp = pp; 13587c478bd9Sstevel@tonic-gate while (pp != NULL) { 13597c478bd9Sstevel@tonic-gate if (page_trylock(pp, SE_EXCL) == 0) { 13607c478bd9Sstevel@tonic-gate pp = pp->p_next; 13617c478bd9Sstevel@tonic-gate if (pp == first_pp) 13627c478bd9Sstevel@tonic-gate break; 13637c478bd9Sstevel@tonic-gate continue; 13647c478bd9Sstevel@tonic-gate } 13657c478bd9Sstevel@tonic-gate ASSERT(pp->p_vnode); 13667c478bd9Sstevel@tonic-gate ASSERT(PP_ISAGED(pp) == 0); 13677c478bd9Sstevel@tonic-gate ASSERT(pp->p_szc == 0); 13687c478bd9Sstevel@tonic-gate ASSERT(PFN_2_MEM_NODE(pp->p_pagenum) == mnode); 13697c478bd9Sstevel@tonic-gate 13707c478bd9Sstevel@tonic-gate /* check if page within DMA attributes */ 13717c478bd9Sstevel@tonic-gate 13727c478bd9Sstevel@tonic-gate pgaddr = ptob((uint64_t)(pp->p_pagenum)); 13737c478bd9Sstevel@tonic-gate 13747c478bd9Sstevel@tonic-gate if ((pgaddr >= dma_attr->dma_attr_addr_lo) && 13757c478bd9Sstevel@tonic-gate (pgaddr + MMU_PAGESIZE - 1 <= 13767c478bd9Sstevel@tonic-gate dma_attr->dma_attr_addr_hi)) { 13777c478bd9Sstevel@tonic-gate break; 13787c478bd9Sstevel@tonic-gate } 13797c478bd9Sstevel@tonic-gate 13807c478bd9Sstevel@tonic-gate /* continue looking */ 13817c478bd9Sstevel@tonic-gate page_unlock(pp); 13827c478bd9Sstevel@tonic-gate pp = pp->p_next; 13837c478bd9Sstevel@tonic-gate if (pp == first_pp) 13847c478bd9Sstevel@tonic-gate pp = NULL; 13857c478bd9Sstevel@tonic-gate } 13867c478bd9Sstevel@tonic-gate 13877c478bd9Sstevel@tonic-gate if (pp != NULL) { 13887c478bd9Sstevel@tonic-gate ASSERT(mtype == PP_2_MTYPE(pp)); 13897c478bd9Sstevel@tonic-gate ASSERT(pp->p_szc == 0); 13907c478bd9Sstevel@tonic-gate 13917c478bd9Sstevel@tonic-gate /* found a page with specified DMA attributes */ 13927c478bd9Sstevel@tonic-gate page_sub(&PAGE_CACHELISTS(mnode, bin, 13937c478bd9Sstevel@tonic-gate mtype), pp); 1394affbd3ccSkchow page_ctr_sub(mnode, mtype, pp, PG_CACHE_LIST); 13957c478bd9Sstevel@tonic-gate 13967c478bd9Sstevel@tonic-gate mutex_exit(pcm); 13977c478bd9Sstevel@tonic-gate ASSERT(pp->p_vnode); 13987c478bd9Sstevel@tonic-gate ASSERT(PP_ISAGED(pp) == 0); 13997c478bd9Sstevel@tonic-gate check_dma(dma_attr, pp, 1); 14007c478bd9Sstevel@tonic-gate VM_STAT_ADD(pga_vmstats.pgma_allocok); 14017c478bd9Sstevel@tonic-gate return (pp); 14027c478bd9Sstevel@tonic-gate } 14037c478bd9Sstevel@tonic-gate mutex_exit(pcm); 14047c478bd9Sstevel@tonic-gate nextcachebin: 14057c478bd9Sstevel@tonic-gate bin += (i == 0) ? BIN_STEP : 1; 14067c478bd9Sstevel@tonic-gate bin &= page_colors_mask; 14077c478bd9Sstevel@tonic-gate } 1408affbd3ccSkchow MTYPE_NEXT(mnode, mtype, flags); 1409affbd3ccSkchow } while (mtype >= 0); 14107c478bd9Sstevel@tonic-gate 14117c478bd9Sstevel@tonic-gate VM_STAT_ADD(pga_vmstats.pgma_allocfailed); 14127c478bd9Sstevel@tonic-gate return (NULL); 14137c478bd9Sstevel@tonic-gate } 14147c478bd9Sstevel@tonic-gate 14157c478bd9Sstevel@tonic-gate /* 14167c478bd9Sstevel@tonic-gate * This function is similar to page_get_freelist()/page_get_cachelist() 14177c478bd9Sstevel@tonic-gate * but it searches both the lists to find a page with the specified 14187c478bd9Sstevel@tonic-gate * color (or no color) and DMA attributes. The search is done in the 14197c478bd9Sstevel@tonic-gate * freelist first and then in the cache list within the highest memory 14207c478bd9Sstevel@tonic-gate * range (based on DMA attributes) before searching in the lower 14217c478bd9Sstevel@tonic-gate * memory ranges. 14227c478bd9Sstevel@tonic-gate * 14237c478bd9Sstevel@tonic-gate * Note: This function is called only by page_create_io(). 14247c478bd9Sstevel@tonic-gate */ 14257c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 14267c478bd9Sstevel@tonic-gate page_t * 14277c478bd9Sstevel@tonic-gate page_get_anylist(struct vnode *vp, u_offset_t off, struct as *as, caddr_t vaddr, 14287c478bd9Sstevel@tonic-gate size_t size, uint_t flags, ddi_dma_attr_t *dma_attr, lgrp_t *lgrp) 14297c478bd9Sstevel@tonic-gate { 14307c478bd9Sstevel@tonic-gate uint_t bin; 14317c478bd9Sstevel@tonic-gate int mtype; 14327c478bd9Sstevel@tonic-gate page_t *pp; 14337c478bd9Sstevel@tonic-gate int n; 14347c478bd9Sstevel@tonic-gate int m; 14357c478bd9Sstevel@tonic-gate int szc; 14367c478bd9Sstevel@tonic-gate int fullrange; 14377c478bd9Sstevel@tonic-gate int mnode; 14387c478bd9Sstevel@tonic-gate int local_failed_stat = 0; 14397c478bd9Sstevel@tonic-gate lgrp_mnode_cookie_t lgrp_cookie; 14407c478bd9Sstevel@tonic-gate 14417c478bd9Sstevel@tonic-gate VM_STAT_ADD(pga_vmstats.pga_alloc); 14427c478bd9Sstevel@tonic-gate 14437c478bd9Sstevel@tonic-gate /* only base pagesize currently supported */ 14447c478bd9Sstevel@tonic-gate if (size != MMU_PAGESIZE) 14457c478bd9Sstevel@tonic-gate return (NULL); 14467c478bd9Sstevel@tonic-gate 14477c478bd9Sstevel@tonic-gate /* 14487c478bd9Sstevel@tonic-gate * If we're passed a specific lgroup, we use it. Otherwise, 14497c478bd9Sstevel@tonic-gate * assume first-touch placement is desired. 14507c478bd9Sstevel@tonic-gate */ 14517c478bd9Sstevel@tonic-gate if (!LGRP_EXISTS(lgrp)) 14527c478bd9Sstevel@tonic-gate lgrp = lgrp_home_lgrp(); 14537c478bd9Sstevel@tonic-gate 14547c478bd9Sstevel@tonic-gate /* LINTED */ 14557c478bd9Sstevel@tonic-gate AS_2_BIN(as, seg, vp, vaddr, bin); 14567c478bd9Sstevel@tonic-gate 14577c478bd9Sstevel@tonic-gate /* 14587c478bd9Sstevel@tonic-gate * Only hold one freelist or cachelist lock at a time, that way we 14597c478bd9Sstevel@tonic-gate * can start anywhere and not have to worry about lock 14607c478bd9Sstevel@tonic-gate * ordering. 14617c478bd9Sstevel@tonic-gate */ 14627c478bd9Sstevel@tonic-gate if (dma_attr == NULL) { 14637c478bd9Sstevel@tonic-gate n = 0; 14647c478bd9Sstevel@tonic-gate m = mnoderangecnt - 1; 14657c478bd9Sstevel@tonic-gate fullrange = 1; 14667c478bd9Sstevel@tonic-gate VM_STAT_ADD(pga_vmstats.pga_nulldmaattr); 14677c478bd9Sstevel@tonic-gate } else { 14687c478bd9Sstevel@tonic-gate pfn_t pfnlo = mmu_btop(dma_attr->dma_attr_addr_lo); 14697c478bd9Sstevel@tonic-gate pfn_t pfnhi = mmu_btop(dma_attr->dma_attr_addr_hi); 14707c478bd9Sstevel@tonic-gate 14717c478bd9Sstevel@tonic-gate /* 14727c478bd9Sstevel@tonic-gate * We can guarantee alignment only for page boundary. 14737c478bd9Sstevel@tonic-gate */ 14747c478bd9Sstevel@tonic-gate if (dma_attr->dma_attr_align > MMU_PAGESIZE) 14757c478bd9Sstevel@tonic-gate return (NULL); 14767c478bd9Sstevel@tonic-gate 14777c478bd9Sstevel@tonic-gate n = pfn_2_mtype(pfnlo); 14787c478bd9Sstevel@tonic-gate m = pfn_2_mtype(pfnhi); 14797c478bd9Sstevel@tonic-gate 14807c478bd9Sstevel@tonic-gate fullrange = ((pfnlo == mnoderanges[n].mnr_pfnlo) && 14817c478bd9Sstevel@tonic-gate (pfnhi >= mnoderanges[m].mnr_pfnhi)); 14827c478bd9Sstevel@tonic-gate } 14837c478bd9Sstevel@tonic-gate VM_STAT_COND_ADD(fullrange == 0, pga_vmstats.pga_notfullrange); 14847c478bd9Sstevel@tonic-gate 14857c478bd9Sstevel@tonic-gate if (n > m) 14867c478bd9Sstevel@tonic-gate return (NULL); 14877c478bd9Sstevel@tonic-gate 14887c478bd9Sstevel@tonic-gate szc = 0; 14897c478bd9Sstevel@tonic-gate 14907c478bd9Sstevel@tonic-gate /* cylcing thru mtype handled by RANGE0 if n == 0 */ 14917c478bd9Sstevel@tonic-gate if (n == 0) { 14927c478bd9Sstevel@tonic-gate flags |= PGI_MT_RANGE0; 14937c478bd9Sstevel@tonic-gate n = m; 14947c478bd9Sstevel@tonic-gate } 14957c478bd9Sstevel@tonic-gate 14967c478bd9Sstevel@tonic-gate /* 14977c478bd9Sstevel@tonic-gate * Try local memory node first, but try remote if we can't 14987c478bd9Sstevel@tonic-gate * get a page of the right color. 14997c478bd9Sstevel@tonic-gate */ 15007c478bd9Sstevel@tonic-gate LGRP_MNODE_COOKIE_INIT(lgrp_cookie, lgrp, LGRP_SRCH_HIER); 15017c478bd9Sstevel@tonic-gate while ((mnode = lgrp_memnode_choose(&lgrp_cookie)) >= 0) { 15027c478bd9Sstevel@tonic-gate /* 15037c478bd9Sstevel@tonic-gate * allocate pages from high pfn to low. 15047c478bd9Sstevel@tonic-gate */ 15057c478bd9Sstevel@tonic-gate for (mtype = m; mtype >= n; mtype--) { 15067c478bd9Sstevel@tonic-gate if (fullrange != 0) { 15077c478bd9Sstevel@tonic-gate pp = page_get_mnode_freelist(mnode, 15087c478bd9Sstevel@tonic-gate bin, mtype, szc, flags); 15097c478bd9Sstevel@tonic-gate if (pp == NULL) { 15107c478bd9Sstevel@tonic-gate pp = page_get_mnode_cachelist( 15117c478bd9Sstevel@tonic-gate bin, flags, mnode, mtype); 15127c478bd9Sstevel@tonic-gate } 15137c478bd9Sstevel@tonic-gate } else { 15147c478bd9Sstevel@tonic-gate pp = page_get_mnode_anylist(bin, szc, 15157c478bd9Sstevel@tonic-gate flags, mnode, mtype, dma_attr); 15167c478bd9Sstevel@tonic-gate } 15177c478bd9Sstevel@tonic-gate if (pp != NULL) { 15187c478bd9Sstevel@tonic-gate VM_STAT_ADD(pga_vmstats.pga_allocok); 15197c478bd9Sstevel@tonic-gate check_dma(dma_attr, pp, 1); 15207c478bd9Sstevel@tonic-gate return (pp); 15217c478bd9Sstevel@tonic-gate } 15227c478bd9Sstevel@tonic-gate } 15237c478bd9Sstevel@tonic-gate if (!local_failed_stat) { 15247c478bd9Sstevel@tonic-gate lgrp_stat_add(lgrp->lgrp_id, LGRP_NUM_ALLOC_FAIL, 1); 15257c478bd9Sstevel@tonic-gate local_failed_stat = 1; 15267c478bd9Sstevel@tonic-gate } 15277c478bd9Sstevel@tonic-gate } 15287c478bd9Sstevel@tonic-gate VM_STAT_ADD(pga_vmstats.pga_allocfailed); 15297c478bd9Sstevel@tonic-gate 15307c478bd9Sstevel@tonic-gate return (NULL); 15317c478bd9Sstevel@tonic-gate } 15327c478bd9Sstevel@tonic-gate 15337c478bd9Sstevel@tonic-gate /* 15347c478bd9Sstevel@tonic-gate * page_create_io() 15357c478bd9Sstevel@tonic-gate * 15367c478bd9Sstevel@tonic-gate * This function is a copy of page_create_va() with an additional 15377c478bd9Sstevel@tonic-gate * argument 'mattr' that specifies DMA memory requirements to 15387c478bd9Sstevel@tonic-gate * the page list functions. This function is used by the segkmem 15397c478bd9Sstevel@tonic-gate * allocator so it is only to create new pages (i.e PG_EXCL is 15407c478bd9Sstevel@tonic-gate * set). 15417c478bd9Sstevel@tonic-gate * 15427c478bd9Sstevel@tonic-gate * Note: This interface is currently used by x86 PSM only and is 15437c478bd9Sstevel@tonic-gate * not fully specified so the commitment level is only for 15447c478bd9Sstevel@tonic-gate * private interface specific to x86. This interface uses PSM 15457c478bd9Sstevel@tonic-gate * specific page_get_anylist() interface. 15467c478bd9Sstevel@tonic-gate */ 15477c478bd9Sstevel@tonic-gate 15487c478bd9Sstevel@tonic-gate #define PAGE_HASH_SEARCH(index, pp, vp, off) { \ 15497c478bd9Sstevel@tonic-gate for ((pp) = page_hash[(index)]; (pp); (pp) = (pp)->p_hash) { \ 15507c478bd9Sstevel@tonic-gate if ((pp)->p_vnode == (vp) && (pp)->p_offset == (off)) \ 15517c478bd9Sstevel@tonic-gate break; \ 15527c478bd9Sstevel@tonic-gate } \ 15537c478bd9Sstevel@tonic-gate } 15547c478bd9Sstevel@tonic-gate 15557c478bd9Sstevel@tonic-gate 15567c478bd9Sstevel@tonic-gate page_t * 15577c478bd9Sstevel@tonic-gate page_create_io( 15587c478bd9Sstevel@tonic-gate struct vnode *vp, 15597c478bd9Sstevel@tonic-gate u_offset_t off, 15607c478bd9Sstevel@tonic-gate uint_t bytes, 15617c478bd9Sstevel@tonic-gate uint_t flags, 15627c478bd9Sstevel@tonic-gate struct as *as, 15637c478bd9Sstevel@tonic-gate caddr_t vaddr, 15647c478bd9Sstevel@tonic-gate ddi_dma_attr_t *mattr) /* DMA memory attributes if any */ 15657c478bd9Sstevel@tonic-gate { 15667c478bd9Sstevel@tonic-gate page_t *plist = NULL; 15677c478bd9Sstevel@tonic-gate uint_t plist_len = 0; 15687c478bd9Sstevel@tonic-gate pgcnt_t npages; 15697c478bd9Sstevel@tonic-gate page_t *npp = NULL; 15707c478bd9Sstevel@tonic-gate uint_t pages_req; 15717c478bd9Sstevel@tonic-gate page_t *pp; 15727c478bd9Sstevel@tonic-gate kmutex_t *phm = NULL; 15737c478bd9Sstevel@tonic-gate uint_t index; 15747c478bd9Sstevel@tonic-gate 15757c478bd9Sstevel@tonic-gate TRACE_4(TR_FAC_VM, TR_PAGE_CREATE_START, 15767c478bd9Sstevel@tonic-gate "page_create_start:vp %p off %llx bytes %u flags %x", 15777c478bd9Sstevel@tonic-gate vp, off, bytes, flags); 15787c478bd9Sstevel@tonic-gate 15797c478bd9Sstevel@tonic-gate ASSERT((flags & ~(PG_EXCL | PG_WAIT | PG_PHYSCONTIG)) == 0); 15807c478bd9Sstevel@tonic-gate 15817c478bd9Sstevel@tonic-gate pages_req = npages = mmu_btopr(bytes); 15827c478bd9Sstevel@tonic-gate 15837c478bd9Sstevel@tonic-gate /* 15847c478bd9Sstevel@tonic-gate * Do the freemem and pcf accounting. 15857c478bd9Sstevel@tonic-gate */ 15867c478bd9Sstevel@tonic-gate if (!page_create_wait(npages, flags)) { 15877c478bd9Sstevel@tonic-gate return (NULL); 15887c478bd9Sstevel@tonic-gate } 15897c478bd9Sstevel@tonic-gate 15907c478bd9Sstevel@tonic-gate TRACE_2(TR_FAC_VM, TR_PAGE_CREATE_SUCCESS, 15917c478bd9Sstevel@tonic-gate "page_create_success:vp %p off %llx", 15927c478bd9Sstevel@tonic-gate vp, off); 15937c478bd9Sstevel@tonic-gate 15947c478bd9Sstevel@tonic-gate /* 15957c478bd9Sstevel@tonic-gate * If satisfying this request has left us with too little 15967c478bd9Sstevel@tonic-gate * memory, start the wheels turning to get some back. The 15977c478bd9Sstevel@tonic-gate * first clause of the test prevents waking up the pageout 15987c478bd9Sstevel@tonic-gate * daemon in situations where it would decide that there's 15997c478bd9Sstevel@tonic-gate * nothing to do. 16007c478bd9Sstevel@tonic-gate */ 16017c478bd9Sstevel@tonic-gate if (nscan < desscan && freemem < minfree) { 16027c478bd9Sstevel@tonic-gate TRACE_1(TR_FAC_VM, TR_PAGEOUT_CV_SIGNAL, 16037c478bd9Sstevel@tonic-gate "pageout_cv_signal:freemem %ld", freemem); 16047c478bd9Sstevel@tonic-gate cv_signal(&proc_pageout->p_cv); 16057c478bd9Sstevel@tonic-gate } 16067c478bd9Sstevel@tonic-gate 16077c478bd9Sstevel@tonic-gate if (flags & PG_PHYSCONTIG) { 16087c478bd9Sstevel@tonic-gate 16097c478bd9Sstevel@tonic-gate plist = page_get_contigpage(&npages, mattr, 1); 16107c478bd9Sstevel@tonic-gate if (plist == NULL) { 16117c478bd9Sstevel@tonic-gate page_create_putback(npages); 16127c478bd9Sstevel@tonic-gate return (NULL); 16137c478bd9Sstevel@tonic-gate } 16147c478bd9Sstevel@tonic-gate 16157c478bd9Sstevel@tonic-gate pp = plist; 16167c478bd9Sstevel@tonic-gate 16177c478bd9Sstevel@tonic-gate do { 16187c478bd9Sstevel@tonic-gate if (!page_hashin(pp, vp, off, NULL)) { 16197c478bd9Sstevel@tonic-gate panic("pg_creat_io: hashin failed %p %p %llx", 16207c478bd9Sstevel@tonic-gate (void *)pp, (void *)vp, off); 16217c478bd9Sstevel@tonic-gate } 16227c478bd9Sstevel@tonic-gate VM_STAT_ADD(page_create_new); 16237c478bd9Sstevel@tonic-gate off += MMU_PAGESIZE; 16247c478bd9Sstevel@tonic-gate PP_CLRFREE(pp); 16257c478bd9Sstevel@tonic-gate PP_CLRAGED(pp); 16267c478bd9Sstevel@tonic-gate page_set_props(pp, P_REF); 16277c478bd9Sstevel@tonic-gate pp = pp->p_next; 16287c478bd9Sstevel@tonic-gate } while (pp != plist); 16297c478bd9Sstevel@tonic-gate 16307c478bd9Sstevel@tonic-gate if (!npages) { 16317c478bd9Sstevel@tonic-gate check_dma(mattr, plist, pages_req); 16327c478bd9Sstevel@tonic-gate return (plist); 16337c478bd9Sstevel@tonic-gate } else { 16347c478bd9Sstevel@tonic-gate vaddr += (pages_req - npages) << MMU_PAGESHIFT; 16357c478bd9Sstevel@tonic-gate } 16367c478bd9Sstevel@tonic-gate 16377c478bd9Sstevel@tonic-gate /* 16387c478bd9Sstevel@tonic-gate * fall-thru: 16397c478bd9Sstevel@tonic-gate * 16407c478bd9Sstevel@tonic-gate * page_get_contigpage returns when npages <= sgllen. 16417c478bd9Sstevel@tonic-gate * Grab the rest of the non-contig pages below from anylist. 16427c478bd9Sstevel@tonic-gate */ 16437c478bd9Sstevel@tonic-gate } 16447c478bd9Sstevel@tonic-gate 16457c478bd9Sstevel@tonic-gate /* 16467c478bd9Sstevel@tonic-gate * Loop around collecting the requested number of pages. 16477c478bd9Sstevel@tonic-gate * Most of the time, we have to `create' a new page. With 16487c478bd9Sstevel@tonic-gate * this in mind, pull the page off the free list before 16497c478bd9Sstevel@tonic-gate * getting the hash lock. This will minimize the hash 16507c478bd9Sstevel@tonic-gate * lock hold time, nesting, and the like. If it turns 16517c478bd9Sstevel@tonic-gate * out we don't need the page, we put it back at the end. 16527c478bd9Sstevel@tonic-gate */ 16537c478bd9Sstevel@tonic-gate while (npages--) { 16547c478bd9Sstevel@tonic-gate phm = NULL; 16557c478bd9Sstevel@tonic-gate 16567c478bd9Sstevel@tonic-gate index = PAGE_HASH_FUNC(vp, off); 16577c478bd9Sstevel@tonic-gate top: 16587c478bd9Sstevel@tonic-gate ASSERT(phm == NULL); 16597c478bd9Sstevel@tonic-gate ASSERT(index == PAGE_HASH_FUNC(vp, off)); 16607c478bd9Sstevel@tonic-gate ASSERT(MUTEX_NOT_HELD(page_vnode_mutex(vp))); 16617c478bd9Sstevel@tonic-gate 16627c478bd9Sstevel@tonic-gate if (npp == NULL) { 16637c478bd9Sstevel@tonic-gate /* 16647c478bd9Sstevel@tonic-gate * Try to get the page of any color either from 16657c478bd9Sstevel@tonic-gate * the freelist or from the cache list. 16667c478bd9Sstevel@tonic-gate */ 16677c478bd9Sstevel@tonic-gate npp = page_get_anylist(vp, off, as, vaddr, MMU_PAGESIZE, 16687c478bd9Sstevel@tonic-gate flags & ~PG_MATCH_COLOR, mattr, NULL); 16697c478bd9Sstevel@tonic-gate if (npp == NULL) { 16707c478bd9Sstevel@tonic-gate if (mattr == NULL) { 16717c478bd9Sstevel@tonic-gate /* 16727c478bd9Sstevel@tonic-gate * Not looking for a special page; 16737c478bd9Sstevel@tonic-gate * panic! 16747c478bd9Sstevel@tonic-gate */ 16757c478bd9Sstevel@tonic-gate panic("no page found %d", (int)npages); 16767c478bd9Sstevel@tonic-gate } 16777c478bd9Sstevel@tonic-gate /* 16787c478bd9Sstevel@tonic-gate * No page found! This can happen 16797c478bd9Sstevel@tonic-gate * if we are looking for a page 16807c478bd9Sstevel@tonic-gate * within a specific memory range 16817c478bd9Sstevel@tonic-gate * for DMA purposes. If PG_WAIT is 16827c478bd9Sstevel@tonic-gate * specified then we wait for a 16837c478bd9Sstevel@tonic-gate * while and then try again. The 16847c478bd9Sstevel@tonic-gate * wait could be forever if we 16857c478bd9Sstevel@tonic-gate * don't get the page(s) we need. 16867c478bd9Sstevel@tonic-gate * 16877c478bd9Sstevel@tonic-gate * Note: XXX We really need a mechanism 16887c478bd9Sstevel@tonic-gate * to wait for pages in the desired 16897c478bd9Sstevel@tonic-gate * range. For now, we wait for any 16907c478bd9Sstevel@tonic-gate * pages and see if we can use it. 16917c478bd9Sstevel@tonic-gate */ 16927c478bd9Sstevel@tonic-gate 16937c478bd9Sstevel@tonic-gate if ((mattr != NULL) && (flags & PG_WAIT)) { 16947c478bd9Sstevel@tonic-gate delay(10); 16957c478bd9Sstevel@tonic-gate goto top; 16967c478bd9Sstevel@tonic-gate } 16977c478bd9Sstevel@tonic-gate 16987c478bd9Sstevel@tonic-gate goto fail; /* undo accounting stuff */ 16997c478bd9Sstevel@tonic-gate } 17007c478bd9Sstevel@tonic-gate 17017c478bd9Sstevel@tonic-gate if (PP_ISAGED(npp) == 0) { 17027c478bd9Sstevel@tonic-gate /* 17037c478bd9Sstevel@tonic-gate * Since this page came from the 17047c478bd9Sstevel@tonic-gate * cachelist, we must destroy the 17057c478bd9Sstevel@tonic-gate * old vnode association. 17067c478bd9Sstevel@tonic-gate */ 17077c478bd9Sstevel@tonic-gate page_hashout(npp, (kmutex_t *)NULL); 17087c478bd9Sstevel@tonic-gate } 17097c478bd9Sstevel@tonic-gate } 17107c478bd9Sstevel@tonic-gate 17117c478bd9Sstevel@tonic-gate /* 17127c478bd9Sstevel@tonic-gate * We own this page! 17137c478bd9Sstevel@tonic-gate */ 17147c478bd9Sstevel@tonic-gate ASSERT(PAGE_EXCL(npp)); 17157c478bd9Sstevel@tonic-gate ASSERT(npp->p_vnode == NULL); 17167c478bd9Sstevel@tonic-gate ASSERT(!hat_page_is_mapped(npp)); 17177c478bd9Sstevel@tonic-gate PP_CLRFREE(npp); 17187c478bd9Sstevel@tonic-gate PP_CLRAGED(npp); 17197c478bd9Sstevel@tonic-gate 17207c478bd9Sstevel@tonic-gate /* 17217c478bd9Sstevel@tonic-gate * Here we have a page in our hot little mits and are 17227c478bd9Sstevel@tonic-gate * just waiting to stuff it on the appropriate lists. 17237c478bd9Sstevel@tonic-gate * Get the mutex and check to see if it really does 17247c478bd9Sstevel@tonic-gate * not exist. 17257c478bd9Sstevel@tonic-gate */ 17267c478bd9Sstevel@tonic-gate phm = PAGE_HASH_MUTEX(index); 17277c478bd9Sstevel@tonic-gate mutex_enter(phm); 17287c478bd9Sstevel@tonic-gate PAGE_HASH_SEARCH(index, pp, vp, off); 17297c478bd9Sstevel@tonic-gate if (pp == NULL) { 17307c478bd9Sstevel@tonic-gate VM_STAT_ADD(page_create_new); 17317c478bd9Sstevel@tonic-gate pp = npp; 17327c478bd9Sstevel@tonic-gate npp = NULL; 17337c478bd9Sstevel@tonic-gate if (!page_hashin(pp, vp, off, phm)) { 17347c478bd9Sstevel@tonic-gate /* 17357c478bd9Sstevel@tonic-gate * Since we hold the page hash mutex and 17367c478bd9Sstevel@tonic-gate * just searched for this page, page_hashin 17377c478bd9Sstevel@tonic-gate * had better not fail. If it does, that 17387c478bd9Sstevel@tonic-gate * means somethread did not follow the 17397c478bd9Sstevel@tonic-gate * page hash mutex rules. Panic now and 17407c478bd9Sstevel@tonic-gate * get it over with. As usual, go down 17417c478bd9Sstevel@tonic-gate * holding all the locks. 17427c478bd9Sstevel@tonic-gate */ 17437c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(phm)); 17447c478bd9Sstevel@tonic-gate panic("page_create: hashin fail %p %p %llx %p", 17457c478bd9Sstevel@tonic-gate (void *)pp, (void *)vp, off, (void *)phm); 17467c478bd9Sstevel@tonic-gate 17477c478bd9Sstevel@tonic-gate } 17487c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(phm)); 17497c478bd9Sstevel@tonic-gate mutex_exit(phm); 17507c478bd9Sstevel@tonic-gate phm = NULL; 17517c478bd9Sstevel@tonic-gate 17527c478bd9Sstevel@tonic-gate /* 17537c478bd9Sstevel@tonic-gate * Hat layer locking need not be done to set 17547c478bd9Sstevel@tonic-gate * the following bits since the page is not hashed 17557c478bd9Sstevel@tonic-gate * and was on the free list (i.e., had no mappings). 17567c478bd9Sstevel@tonic-gate * 17577c478bd9Sstevel@tonic-gate * Set the reference bit to protect 17587c478bd9Sstevel@tonic-gate * against immediate pageout 17597c478bd9Sstevel@tonic-gate * 17607c478bd9Sstevel@tonic-gate * XXXmh modify freelist code to set reference 17617c478bd9Sstevel@tonic-gate * bit so we don't have to do it here. 17627c478bd9Sstevel@tonic-gate */ 17637c478bd9Sstevel@tonic-gate page_set_props(pp, P_REF); 17647c478bd9Sstevel@tonic-gate } else { 17657c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(phm)); 17667c478bd9Sstevel@tonic-gate mutex_exit(phm); 17677c478bd9Sstevel@tonic-gate phm = NULL; 17687c478bd9Sstevel@tonic-gate /* 17697c478bd9Sstevel@tonic-gate * NOTE: This should not happen for pages associated 17707c478bd9Sstevel@tonic-gate * with kernel vnode 'kvp'. 17717c478bd9Sstevel@tonic-gate */ 17727c478bd9Sstevel@tonic-gate /* XX64 - to debug why this happens! */ 17737c478bd9Sstevel@tonic-gate ASSERT(vp != &kvp); 17747c478bd9Sstevel@tonic-gate if (vp == &kvp) 17757c478bd9Sstevel@tonic-gate cmn_err(CE_NOTE, 17767c478bd9Sstevel@tonic-gate "page_create: page not expected " 17777c478bd9Sstevel@tonic-gate "in hash list for kernel vnode - pp 0x%p", 17787c478bd9Sstevel@tonic-gate (void *)pp); 17797c478bd9Sstevel@tonic-gate VM_STAT_ADD(page_create_exists); 17807c478bd9Sstevel@tonic-gate goto fail; 17817c478bd9Sstevel@tonic-gate } 17827c478bd9Sstevel@tonic-gate 17837c478bd9Sstevel@tonic-gate /* 17847c478bd9Sstevel@tonic-gate * Got a page! It is locked. Acquire the i/o 17857c478bd9Sstevel@tonic-gate * lock since we are going to use the p_next and 17867c478bd9Sstevel@tonic-gate * p_prev fields to link the requested pages together. 17877c478bd9Sstevel@tonic-gate */ 17887c478bd9Sstevel@tonic-gate page_io_lock(pp); 17897c478bd9Sstevel@tonic-gate page_add(&plist, pp); 17907c478bd9Sstevel@tonic-gate plist = plist->p_next; 17917c478bd9Sstevel@tonic-gate off += MMU_PAGESIZE; 17927c478bd9Sstevel@tonic-gate vaddr += MMU_PAGESIZE; 17937c478bd9Sstevel@tonic-gate } 17947c478bd9Sstevel@tonic-gate 17957c478bd9Sstevel@tonic-gate check_dma(mattr, plist, pages_req); 17967c478bd9Sstevel@tonic-gate return (plist); 17977c478bd9Sstevel@tonic-gate 17987c478bd9Sstevel@tonic-gate fail: 17997c478bd9Sstevel@tonic-gate if (npp != NULL) { 18007c478bd9Sstevel@tonic-gate /* 18017c478bd9Sstevel@tonic-gate * Did not need this page after all. 18027c478bd9Sstevel@tonic-gate * Put it back on the free list. 18037c478bd9Sstevel@tonic-gate */ 18047c478bd9Sstevel@tonic-gate VM_STAT_ADD(page_create_putbacks); 18057c478bd9Sstevel@tonic-gate PP_SETFREE(npp); 18067c478bd9Sstevel@tonic-gate PP_SETAGED(npp); 18077c478bd9Sstevel@tonic-gate npp->p_offset = (u_offset_t)-1; 18087c478bd9Sstevel@tonic-gate page_list_add(npp, PG_FREE_LIST | PG_LIST_TAIL); 18097c478bd9Sstevel@tonic-gate page_unlock(npp); 18107c478bd9Sstevel@tonic-gate } 18117c478bd9Sstevel@tonic-gate 18127c478bd9Sstevel@tonic-gate /* 18137c478bd9Sstevel@tonic-gate * Give up the pages we already got. 18147c478bd9Sstevel@tonic-gate */ 18157c478bd9Sstevel@tonic-gate while (plist != NULL) { 18167c478bd9Sstevel@tonic-gate pp = plist; 18177c478bd9Sstevel@tonic-gate page_sub(&plist, pp); 18187c478bd9Sstevel@tonic-gate page_io_unlock(pp); 18197c478bd9Sstevel@tonic-gate plist_len++; 18207c478bd9Sstevel@tonic-gate /*LINTED: constant in conditional ctx*/ 18217c478bd9Sstevel@tonic-gate VN_DISPOSE(pp, B_INVAL, 0, kcred); 18227c478bd9Sstevel@tonic-gate } 18237c478bd9Sstevel@tonic-gate 18247c478bd9Sstevel@tonic-gate /* 18257c478bd9Sstevel@tonic-gate * VN_DISPOSE does freemem accounting for the pages in plist 18267c478bd9Sstevel@tonic-gate * by calling page_free. So, we need to undo the pcf accounting 18277c478bd9Sstevel@tonic-gate * for only the remaining pages. 18287c478bd9Sstevel@tonic-gate */ 18297c478bd9Sstevel@tonic-gate VM_STAT_ADD(page_create_putbacks); 18307c478bd9Sstevel@tonic-gate page_create_putback(pages_req - plist_len); 18317c478bd9Sstevel@tonic-gate 18327c478bd9Sstevel@tonic-gate return (NULL); 18337c478bd9Sstevel@tonic-gate } 18347c478bd9Sstevel@tonic-gate 18357c478bd9Sstevel@tonic-gate 18367c478bd9Sstevel@tonic-gate /* 18377c478bd9Sstevel@tonic-gate * Copy the data from the physical page represented by "frompp" to 18387c478bd9Sstevel@tonic-gate * that represented by "topp". ppcopy uses CPU->cpu_caddr1 and 18397c478bd9Sstevel@tonic-gate * CPU->cpu_caddr2. It assumes that no one uses either map at interrupt 18407c478bd9Sstevel@tonic-gate * level and no one sleeps with an active mapping there. 18417c478bd9Sstevel@tonic-gate * 18427c478bd9Sstevel@tonic-gate * Note that the ref/mod bits in the page_t's are not affected by 18437c478bd9Sstevel@tonic-gate * this operation, hence it is up to the caller to update them appropriately. 18447c478bd9Sstevel@tonic-gate */ 18457c478bd9Sstevel@tonic-gate void 18467c478bd9Sstevel@tonic-gate ppcopy(page_t *frompp, page_t *topp) 18477c478bd9Sstevel@tonic-gate { 18487c478bd9Sstevel@tonic-gate caddr_t pp_addr1; 18497c478bd9Sstevel@tonic-gate caddr_t pp_addr2; 18507c478bd9Sstevel@tonic-gate void *pte1; 18517c478bd9Sstevel@tonic-gate void *pte2; 18527c478bd9Sstevel@tonic-gate kmutex_t *ppaddr_mutex; 18537c478bd9Sstevel@tonic-gate 18547c478bd9Sstevel@tonic-gate ASSERT_STACK_ALIGNED(); 18557c478bd9Sstevel@tonic-gate ASSERT(PAGE_LOCKED(frompp)); 18567c478bd9Sstevel@tonic-gate ASSERT(PAGE_LOCKED(topp)); 18577c478bd9Sstevel@tonic-gate 18587c478bd9Sstevel@tonic-gate if (kpm_enable) { 18597c478bd9Sstevel@tonic-gate pp_addr1 = hat_kpm_page2va(frompp, 0); 18607c478bd9Sstevel@tonic-gate pp_addr2 = hat_kpm_page2va(topp, 0); 18617c478bd9Sstevel@tonic-gate kpreempt_disable(); 18627c478bd9Sstevel@tonic-gate } else { 18637c478bd9Sstevel@tonic-gate /* 18647c478bd9Sstevel@tonic-gate * disable pre-emption so that CPU can't change 18657c478bd9Sstevel@tonic-gate */ 18667c478bd9Sstevel@tonic-gate kpreempt_disable(); 18677c478bd9Sstevel@tonic-gate 18687c478bd9Sstevel@tonic-gate pp_addr1 = CPU->cpu_caddr1; 18697c478bd9Sstevel@tonic-gate pp_addr2 = CPU->cpu_caddr2; 18707c478bd9Sstevel@tonic-gate pte1 = (void *)CPU->cpu_caddr1pte; 18717c478bd9Sstevel@tonic-gate pte2 = (void *)CPU->cpu_caddr2pte; 18727c478bd9Sstevel@tonic-gate 18737c478bd9Sstevel@tonic-gate ppaddr_mutex = &CPU->cpu_ppaddr_mutex; 18747c478bd9Sstevel@tonic-gate mutex_enter(ppaddr_mutex); 18757c478bd9Sstevel@tonic-gate 18767c478bd9Sstevel@tonic-gate hat_mempte_remap(page_pptonum(frompp), pp_addr1, pte1, 18777c478bd9Sstevel@tonic-gate PROT_READ | HAT_STORECACHING_OK, HAT_LOAD_NOCONSIST); 18787c478bd9Sstevel@tonic-gate hat_mempte_remap(page_pptonum(topp), pp_addr2, pte2, 18797c478bd9Sstevel@tonic-gate PROT_READ | PROT_WRITE | HAT_STORECACHING_OK, 18807c478bd9Sstevel@tonic-gate HAT_LOAD_NOCONSIST); 18817c478bd9Sstevel@tonic-gate } 18827c478bd9Sstevel@tonic-gate 18837c478bd9Sstevel@tonic-gate if (use_sse_pagecopy) 18847c478bd9Sstevel@tonic-gate hwblkpagecopy(pp_addr1, pp_addr2); 18857c478bd9Sstevel@tonic-gate else 18867c478bd9Sstevel@tonic-gate bcopy(pp_addr1, pp_addr2, PAGESIZE); 18877c478bd9Sstevel@tonic-gate 18887c478bd9Sstevel@tonic-gate if (!kpm_enable) 18897c478bd9Sstevel@tonic-gate mutex_exit(ppaddr_mutex); 18907c478bd9Sstevel@tonic-gate kpreempt_enable(); 18917c478bd9Sstevel@tonic-gate } 18927c478bd9Sstevel@tonic-gate 18937c478bd9Sstevel@tonic-gate /* 18947c478bd9Sstevel@tonic-gate * Zero the physical page from off to off + len given by `pp' 18957c478bd9Sstevel@tonic-gate * without changing the reference and modified bits of page. 18967c478bd9Sstevel@tonic-gate * 18977c478bd9Sstevel@tonic-gate * We use this using CPU private page address #2, see ppcopy() for more info. 18987c478bd9Sstevel@tonic-gate * pagezero() must not be called at interrupt level. 18997c478bd9Sstevel@tonic-gate */ 19007c478bd9Sstevel@tonic-gate void 19017c478bd9Sstevel@tonic-gate pagezero(page_t *pp, uint_t off, uint_t len) 19027c478bd9Sstevel@tonic-gate { 19037c478bd9Sstevel@tonic-gate caddr_t pp_addr2; 19047c478bd9Sstevel@tonic-gate void *pte2; 19057c478bd9Sstevel@tonic-gate kmutex_t *ppaddr_mutex; 19067c478bd9Sstevel@tonic-gate 19077c478bd9Sstevel@tonic-gate ASSERT_STACK_ALIGNED(); 19087c478bd9Sstevel@tonic-gate ASSERT(len <= MMU_PAGESIZE); 19097c478bd9Sstevel@tonic-gate ASSERT(off <= MMU_PAGESIZE); 19107c478bd9Sstevel@tonic-gate ASSERT(off + len <= MMU_PAGESIZE); 19117c478bd9Sstevel@tonic-gate ASSERT(PAGE_LOCKED(pp)); 19127c478bd9Sstevel@tonic-gate 19137c478bd9Sstevel@tonic-gate if (kpm_enable) { 19147c478bd9Sstevel@tonic-gate pp_addr2 = hat_kpm_page2va(pp, 0); 19157c478bd9Sstevel@tonic-gate kpreempt_disable(); 19167c478bd9Sstevel@tonic-gate } else { 19177c478bd9Sstevel@tonic-gate kpreempt_disable(); 19187c478bd9Sstevel@tonic-gate 19197c478bd9Sstevel@tonic-gate pp_addr2 = CPU->cpu_caddr2; 19207c478bd9Sstevel@tonic-gate pte2 = (void *)CPU->cpu_caddr2pte; 19217c478bd9Sstevel@tonic-gate 19227c478bd9Sstevel@tonic-gate ppaddr_mutex = &CPU->cpu_ppaddr_mutex; 19237c478bd9Sstevel@tonic-gate mutex_enter(ppaddr_mutex); 19247c478bd9Sstevel@tonic-gate 19257c478bd9Sstevel@tonic-gate hat_mempte_remap(page_pptonum(pp), pp_addr2, pte2, 19267c478bd9Sstevel@tonic-gate PROT_READ | PROT_WRITE | HAT_STORECACHING_OK, 19277c478bd9Sstevel@tonic-gate HAT_LOAD_NOCONSIST); 19287c478bd9Sstevel@tonic-gate } 19297c478bd9Sstevel@tonic-gate 19307c478bd9Sstevel@tonic-gate if (use_sse_pagezero) 19317c478bd9Sstevel@tonic-gate hwblkclr(pp_addr2 + off, len); 19327c478bd9Sstevel@tonic-gate else 19337c478bd9Sstevel@tonic-gate bzero(pp_addr2 + off, len); 19347c478bd9Sstevel@tonic-gate 19357c478bd9Sstevel@tonic-gate if (!kpm_enable) 19367c478bd9Sstevel@tonic-gate mutex_exit(ppaddr_mutex); 19377c478bd9Sstevel@tonic-gate kpreempt_enable(); 19387c478bd9Sstevel@tonic-gate } 19397c478bd9Sstevel@tonic-gate 19407c478bd9Sstevel@tonic-gate /* 19417c478bd9Sstevel@tonic-gate * Platform-dependent page scrub call. 19427c478bd9Sstevel@tonic-gate */ 19437c478bd9Sstevel@tonic-gate void 19447c478bd9Sstevel@tonic-gate pagescrub(page_t *pp, uint_t off, uint_t len) 19457c478bd9Sstevel@tonic-gate { 19467c478bd9Sstevel@tonic-gate /* 19477c478bd9Sstevel@tonic-gate * For now, we rely on the fact that pagezero() will 19487c478bd9Sstevel@tonic-gate * always clear UEs. 19497c478bd9Sstevel@tonic-gate */ 19507c478bd9Sstevel@tonic-gate pagezero(pp, off, len); 19517c478bd9Sstevel@tonic-gate } 19527c478bd9Sstevel@tonic-gate 19537c478bd9Sstevel@tonic-gate /* 19547c478bd9Sstevel@tonic-gate * set up two private addresses for use on a given CPU for use in ppcopy() 19557c478bd9Sstevel@tonic-gate */ 19567c478bd9Sstevel@tonic-gate void 19577c478bd9Sstevel@tonic-gate setup_vaddr_for_ppcopy(struct cpu *cpup) 19587c478bd9Sstevel@tonic-gate { 19597c478bd9Sstevel@tonic-gate void *addr; 19607c478bd9Sstevel@tonic-gate void *pte; 19617c478bd9Sstevel@tonic-gate 19627c478bd9Sstevel@tonic-gate addr = vmem_alloc(heap_arena, mmu_ptob(1), VM_SLEEP); 19637c478bd9Sstevel@tonic-gate pte = hat_mempte_setup(addr); 19647c478bd9Sstevel@tonic-gate cpup->cpu_caddr1 = addr; 19657c478bd9Sstevel@tonic-gate cpup->cpu_caddr1pte = (pteptr_t)pte; 19667c478bd9Sstevel@tonic-gate 19677c478bd9Sstevel@tonic-gate addr = vmem_alloc(heap_arena, mmu_ptob(1), VM_SLEEP); 19687c478bd9Sstevel@tonic-gate pte = hat_mempte_setup(addr); 19697c478bd9Sstevel@tonic-gate cpup->cpu_caddr2 = addr; 19707c478bd9Sstevel@tonic-gate cpup->cpu_caddr2pte = (pteptr_t)pte; 19717c478bd9Sstevel@tonic-gate 19727c478bd9Sstevel@tonic-gate mutex_init(&cpup->cpu_ppaddr_mutex, NULL, MUTEX_DEFAULT, NULL); 19737c478bd9Sstevel@tonic-gate } 19747c478bd9Sstevel@tonic-gate 19757c478bd9Sstevel@tonic-gate 19767c478bd9Sstevel@tonic-gate /* 19777c478bd9Sstevel@tonic-gate * Create the pageout scanner thread. The thread has to 19787c478bd9Sstevel@tonic-gate * start at procedure with process pp and priority pri. 19797c478bd9Sstevel@tonic-gate */ 19807c478bd9Sstevel@tonic-gate void 19817c478bd9Sstevel@tonic-gate pageout_init(void (*procedure)(), proc_t *pp, pri_t pri) 19827c478bd9Sstevel@tonic-gate { 19837c478bd9Sstevel@tonic-gate (void) thread_create(NULL, 0, procedure, NULL, 0, pp, TS_RUN, pri); 19847c478bd9Sstevel@tonic-gate } 19857c478bd9Sstevel@tonic-gate 19867c478bd9Sstevel@tonic-gate /* 19877c478bd9Sstevel@tonic-gate * Function for flushing D-cache when performing module relocations 19887c478bd9Sstevel@tonic-gate * to an alternate mapping. Unnecessary on Intel / AMD platforms. 19897c478bd9Sstevel@tonic-gate */ 19907c478bd9Sstevel@tonic-gate void 19917c478bd9Sstevel@tonic-gate dcache_flushall() 19927c478bd9Sstevel@tonic-gate {} 1993