17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate * CDDL HEADER START
37c478bd9Sstevel@tonic-gate *
47c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the
5d0662dbfSelowe * Common Development and Distribution License (the "License").
6d0662dbfSelowe * You may not use this file except in compliance with the License.
77c478bd9Sstevel@tonic-gate *
87c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing.
107c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions
117c478bd9Sstevel@tonic-gate * and limitations under the License.
127c478bd9Sstevel@tonic-gate *
137c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each
147c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the
167c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying
177c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bd9Sstevel@tonic-gate *
197c478bd9Sstevel@tonic-gate * CDDL HEADER END
207c478bd9Sstevel@tonic-gate */
217c478bd9Sstevel@tonic-gate /*
22d94ffb28Sjmcp * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23d94ffb28Sjmcp * Use is subject to license terms.
24a02406b9SPatrick Mooney * Copyright 2016 Joyent, Inc.
25*9174bfaaSGarrett D'Amore * Copyright 2022 Garrett D'Amore <garrett@damore.org>
267c478bd9Sstevel@tonic-gate */
277c478bd9Sstevel@tonic-gate
287c478bd9Sstevel@tonic-gate /*
297c478bd9Sstevel@tonic-gate * UNIX machine dependent virtual memory support.
307c478bd9Sstevel@tonic-gate */
317c478bd9Sstevel@tonic-gate
327c478bd9Sstevel@tonic-gate #include <sys/vm.h>
337c478bd9Sstevel@tonic-gate #include <sys/exec.h>
347c478bd9Sstevel@tonic-gate
357c478bd9Sstevel@tonic-gate #include <sys/exechdr.h>
367c478bd9Sstevel@tonic-gate #include <vm/seg_kmem.h>
377c478bd9Sstevel@tonic-gate #include <sys/atomic.h>
387c478bd9Sstevel@tonic-gate #include <sys/archsystm.h>
397c478bd9Sstevel@tonic-gate #include <sys/machsystm.h>
407c478bd9Sstevel@tonic-gate #include <sys/kdi.h>
417c478bd9Sstevel@tonic-gate #include <sys/cpu_module.h>
42d2a70789SRichard Lowe #include <sys/secflags.h>
437c478bd9Sstevel@tonic-gate
447c478bd9Sstevel@tonic-gate #include <vm/hat_sfmmu.h>
457c478bd9Sstevel@tonic-gate
467c478bd9Sstevel@tonic-gate #include <sys/memnode.h>
477c478bd9Sstevel@tonic-gate
487c478bd9Sstevel@tonic-gate #include <sys/mem_config.h>
497c478bd9Sstevel@tonic-gate #include <sys/mem_cage.h>
507c478bd9Sstevel@tonic-gate #include <vm/vm_dep.h>
515d07b933Sdp #include <vm/page.h>
527c478bd9Sstevel@tonic-gate #include <sys/platform_module.h>
537c478bd9Sstevel@tonic-gate
547c478bd9Sstevel@tonic-gate /*
557c478bd9Sstevel@tonic-gate * These variables are set by module specific config routines.
5605d3dc4bSpaulsan * They are only set by modules which will use physical cache page coloring.
577c478bd9Sstevel@tonic-gate */
587c478bd9Sstevel@tonic-gate int do_pg_coloring = 0;
597c478bd9Sstevel@tonic-gate
607c478bd9Sstevel@tonic-gate /*
617c478bd9Sstevel@tonic-gate * These variables can be conveniently patched at kernel load time to
6205d3dc4bSpaulsan * prevent do_pg_coloring from being enabled by
637c478bd9Sstevel@tonic-gate * module specific config routines.
647c478bd9Sstevel@tonic-gate */
657c478bd9Sstevel@tonic-gate
667c478bd9Sstevel@tonic-gate int use_page_coloring = 1;
677c478bd9Sstevel@tonic-gate
687c478bd9Sstevel@tonic-gate /*
697c478bd9Sstevel@tonic-gate * initialized by page_coloring_init()
707c478bd9Sstevel@tonic-gate */
717c478bd9Sstevel@tonic-gate extern uint_t page_colors;
727c478bd9Sstevel@tonic-gate extern uint_t page_colors_mask;
737c478bd9Sstevel@tonic-gate extern uint_t page_coloring_shift;
747c478bd9Sstevel@tonic-gate int cpu_page_colors;
757c478bd9Sstevel@tonic-gate uint_t vac_colors = 0;
767c478bd9Sstevel@tonic-gate uint_t vac_colors_mask = 0;
777c478bd9Sstevel@tonic-gate
785d07b933Sdp /* cpu specific coloring initialization */
795d07b933Sdp extern void page_coloring_init_cpu();
805d07b933Sdp #pragma weak page_coloring_init_cpu
815d07b933Sdp
827c478bd9Sstevel@tonic-gate /*
837c478bd9Sstevel@tonic-gate * get the ecache setsize for the current cpu.
847c478bd9Sstevel@tonic-gate */
857c478bd9Sstevel@tonic-gate #define CPUSETSIZE() (cpunodes[CPU->cpu_id].ecache_setsize)
867c478bd9Sstevel@tonic-gate
877c478bd9Sstevel@tonic-gate plcnt_t plcnt; /* page list count */
887c478bd9Sstevel@tonic-gate
897c478bd9Sstevel@tonic-gate /*
907c478bd9Sstevel@tonic-gate * This variable is set by the cpu module to contain the lowest
917c478bd9Sstevel@tonic-gate * address not affected by the SF_ERRATA_57 workaround. It should
927c478bd9Sstevel@tonic-gate * remain 0 if the workaround is not needed.
937c478bd9Sstevel@tonic-gate */
947c478bd9Sstevel@tonic-gate #if defined(SF_ERRATA_57)
957c478bd9Sstevel@tonic-gate caddr_t errata57_limit;
967c478bd9Sstevel@tonic-gate #endif
977c478bd9Sstevel@tonic-gate
987c478bd9Sstevel@tonic-gate extern void page_relocate_hash(page_t *, page_t *);
997c478bd9Sstevel@tonic-gate
1007c478bd9Sstevel@tonic-gate /*
1017c478bd9Sstevel@tonic-gate * these must be defined in platform specific areas
1027c478bd9Sstevel@tonic-gate */
1037c478bd9Sstevel@tonic-gate extern void map_addr_proc(caddr_t *, size_t, offset_t, int, caddr_t,
1047c478bd9Sstevel@tonic-gate struct proc *, uint_t);
1057c478bd9Sstevel@tonic-gate extern page_t *page_get_freelist(struct vnode *, u_offset_t, struct seg *,
1067c478bd9Sstevel@tonic-gate caddr_t, size_t, uint_t, struct lgrp *);
1077c478bd9Sstevel@tonic-gate /*
1087c478bd9Sstevel@tonic-gate * Convert page frame number to an OBMEM page frame number
1097c478bd9Sstevel@tonic-gate * (i.e. put in the type bits -- zero for this implementation)
1107c478bd9Sstevel@tonic-gate */
1117c478bd9Sstevel@tonic-gate pfn_t
impl_obmem_pfnum(pfn_t pf)1127c478bd9Sstevel@tonic-gate impl_obmem_pfnum(pfn_t pf)
1137c478bd9Sstevel@tonic-gate {
1147c478bd9Sstevel@tonic-gate return (pf);
1157c478bd9Sstevel@tonic-gate }
1167c478bd9Sstevel@tonic-gate
1177c478bd9Sstevel@tonic-gate /*
1187c478bd9Sstevel@tonic-gate * Use physmax to determine the highest physical page of DRAM memory
1197c478bd9Sstevel@tonic-gate * It is assumed that any physical addresses above physmax is in IO space.
1207c478bd9Sstevel@tonic-gate * We don't bother checking the low end because we assume that memory space
1217c478bd9Sstevel@tonic-gate * begins at physical page frame 0.
1227c478bd9Sstevel@tonic-gate *
1237c478bd9Sstevel@tonic-gate * Return 1 if the page frame is onboard DRAM memory, else 0.
1247c478bd9Sstevel@tonic-gate * Returns 0 for nvram so it won't be cached.
1257c478bd9Sstevel@tonic-gate */
1267c478bd9Sstevel@tonic-gate int
pf_is_memory(pfn_t pf)1277c478bd9Sstevel@tonic-gate pf_is_memory(pfn_t pf)
1287c478bd9Sstevel@tonic-gate {
1297c478bd9Sstevel@tonic-gate /* We must be IO space */
1307c478bd9Sstevel@tonic-gate if (pf > physmax)
1317c478bd9Sstevel@tonic-gate return (0);
1327c478bd9Sstevel@tonic-gate
1337c478bd9Sstevel@tonic-gate /* We must be memory space */
1347c478bd9Sstevel@tonic-gate return (1);
1357c478bd9Sstevel@tonic-gate }
1367c478bd9Sstevel@tonic-gate
1377c478bd9Sstevel@tonic-gate /*
1387c478bd9Sstevel@tonic-gate * Handle a pagefault.
1397c478bd9Sstevel@tonic-gate */
1407c478bd9Sstevel@tonic-gate faultcode_t
pagefault(caddr_t addr,enum fault_type type,enum seg_rw rw,int iskernel)1417c478bd9Sstevel@tonic-gate pagefault(caddr_t addr, enum fault_type type, enum seg_rw rw, int iskernel)
1427c478bd9Sstevel@tonic-gate {
1437c478bd9Sstevel@tonic-gate struct as *as;
1447c478bd9Sstevel@tonic-gate struct proc *p;
1457c478bd9Sstevel@tonic-gate faultcode_t res;
1467c478bd9Sstevel@tonic-gate caddr_t base;
1477c478bd9Sstevel@tonic-gate size_t len;
1487c478bd9Sstevel@tonic-gate int err;
1497c478bd9Sstevel@tonic-gate
1507c478bd9Sstevel@tonic-gate if (INVALID_VADDR(addr))
1517c478bd9Sstevel@tonic-gate return (FC_NOMAP);
1527c478bd9Sstevel@tonic-gate
1537c478bd9Sstevel@tonic-gate if (iskernel) {
1547c478bd9Sstevel@tonic-gate as = &kas;
1557c478bd9Sstevel@tonic-gate } else {
1567c478bd9Sstevel@tonic-gate p = curproc;
1577c478bd9Sstevel@tonic-gate as = p->p_as;
1587c478bd9Sstevel@tonic-gate #if defined(SF_ERRATA_57)
1597c478bd9Sstevel@tonic-gate /*
1607c478bd9Sstevel@tonic-gate * Prevent infinite loops due to a segment driver
1617c478bd9Sstevel@tonic-gate * setting the execute permissions and the sfmmu hat
1627c478bd9Sstevel@tonic-gate * silently ignoring them.
1637c478bd9Sstevel@tonic-gate */
1647c478bd9Sstevel@tonic-gate if (rw == S_EXEC && AS_TYPE_64BIT(as) &&
1657c478bd9Sstevel@tonic-gate addr < errata57_limit) {
1667c478bd9Sstevel@tonic-gate res = FC_NOMAP;
1677c478bd9Sstevel@tonic-gate goto out;
1687c478bd9Sstevel@tonic-gate }
1697c478bd9Sstevel@tonic-gate #endif
1707c478bd9Sstevel@tonic-gate }
1717c478bd9Sstevel@tonic-gate
1727c478bd9Sstevel@tonic-gate /*
1737c478bd9Sstevel@tonic-gate * Dispatch pagefault.
1747c478bd9Sstevel@tonic-gate */
1757c478bd9Sstevel@tonic-gate res = as_fault(as->a_hat, as, addr, 1, type, rw);
1767c478bd9Sstevel@tonic-gate
1777c478bd9Sstevel@tonic-gate /*
1787c478bd9Sstevel@tonic-gate * If this isn't a potential unmapped hole in the user's
1797c478bd9Sstevel@tonic-gate * UNIX data or stack segments, just return status info.
1807c478bd9Sstevel@tonic-gate */
1817c478bd9Sstevel@tonic-gate if (!(res == FC_NOMAP && iskernel == 0))
1827c478bd9Sstevel@tonic-gate goto out;
1837c478bd9Sstevel@tonic-gate
1847c478bd9Sstevel@tonic-gate /*
1857c478bd9Sstevel@tonic-gate * Check to see if we happened to faulted on a currently unmapped
1867c478bd9Sstevel@tonic-gate * part of the UNIX data or stack segments. If so, create a zfod
1877c478bd9Sstevel@tonic-gate * mapping there and then try calling the fault routine again.
1887c478bd9Sstevel@tonic-gate */
1897c478bd9Sstevel@tonic-gate base = p->p_brkbase;
1907c478bd9Sstevel@tonic-gate len = p->p_brksize;
1917c478bd9Sstevel@tonic-gate
1927c478bd9Sstevel@tonic-gate if (addr < base || addr >= base + len) { /* data seg? */
1937c478bd9Sstevel@tonic-gate base = (caddr_t)(p->p_usrstack - p->p_stksize);
1947c478bd9Sstevel@tonic-gate len = p->p_stksize;
1957c478bd9Sstevel@tonic-gate if (addr < base || addr >= p->p_usrstack) { /* stack seg? */
1967c478bd9Sstevel@tonic-gate /* not in either UNIX data or stack segments */
1977c478bd9Sstevel@tonic-gate res = FC_NOMAP;
1987c478bd9Sstevel@tonic-gate goto out;
1997c478bd9Sstevel@tonic-gate }
2007c478bd9Sstevel@tonic-gate }
2017c478bd9Sstevel@tonic-gate
2027c478bd9Sstevel@tonic-gate /* the rest of this function implements a 3.X 4.X 5.X compatibility */
2037c478bd9Sstevel@tonic-gate /* This code is probably not needed anymore */
2047c478bd9Sstevel@tonic-gate
2057c478bd9Sstevel@tonic-gate /* expand the gap to the page boundaries on each side */
2067c478bd9Sstevel@tonic-gate len = (((uintptr_t)base + len + PAGEOFFSET) & PAGEMASK) -
2077c478bd9Sstevel@tonic-gate ((uintptr_t)base & PAGEMASK);
2087c478bd9Sstevel@tonic-gate base = (caddr_t)((uintptr_t)base & PAGEMASK);
2097c478bd9Sstevel@tonic-gate
2107c478bd9Sstevel@tonic-gate as_rangelock(as);
2117c478bd9Sstevel@tonic-gate as_purge(as);
2127c478bd9Sstevel@tonic-gate if (as_gap(as, PAGESIZE, &base, &len, AH_CONTAIN, addr) == 0) {
2137c478bd9Sstevel@tonic-gate err = as_map(as, base, len, segvn_create, zfod_argsp);
2147c478bd9Sstevel@tonic-gate as_rangeunlock(as);
2157c478bd9Sstevel@tonic-gate if (err) {
2167c478bd9Sstevel@tonic-gate res = FC_MAKE_ERR(err);
2177c478bd9Sstevel@tonic-gate goto out;
2187c478bd9Sstevel@tonic-gate }
2197c478bd9Sstevel@tonic-gate } else {
2207c478bd9Sstevel@tonic-gate /*
2217c478bd9Sstevel@tonic-gate * This page is already mapped by another thread after we
2227c478bd9Sstevel@tonic-gate * returned from as_fault() above. We just fallthrough
2237c478bd9Sstevel@tonic-gate * as_fault() below.
2247c478bd9Sstevel@tonic-gate */
2257c478bd9Sstevel@tonic-gate as_rangeunlock(as);
2267c478bd9Sstevel@tonic-gate }
2277c478bd9Sstevel@tonic-gate
2287c478bd9Sstevel@tonic-gate res = as_fault(as->a_hat, as, addr, 1, F_INVAL, rw);
2297c478bd9Sstevel@tonic-gate
2307c478bd9Sstevel@tonic-gate out:
2317c478bd9Sstevel@tonic-gate
2327c478bd9Sstevel@tonic-gate return (res);
2337c478bd9Sstevel@tonic-gate }
2347c478bd9Sstevel@tonic-gate
2357c478bd9Sstevel@tonic-gate /*
2367c478bd9Sstevel@tonic-gate * This is the routine which defines the address limit implied
2377c478bd9Sstevel@tonic-gate * by the flag '_MAP_LOW32'. USERLIMIT32 matches the highest
2387c478bd9Sstevel@tonic-gate * mappable address in a 32-bit process on this platform (though
2397c478bd9Sstevel@tonic-gate * perhaps we should make it be UINT32_MAX here?)
2407c478bd9Sstevel@tonic-gate */
2417c478bd9Sstevel@tonic-gate void
map_addr(caddr_t * addrp,size_t len,offset_t off,int vacalign,uint_t flags)2427c478bd9Sstevel@tonic-gate map_addr(caddr_t *addrp, size_t len, offset_t off, int vacalign, uint_t flags)
2437c478bd9Sstevel@tonic-gate {
2447c478bd9Sstevel@tonic-gate struct proc *p = curproc;
2457c478bd9Sstevel@tonic-gate caddr_t userlimit = flags & _MAP_LOW32 ?
246986fd29aSsetje (caddr_t)USERLIMIT32 : p->p_as->a_userlimit;
2477c478bd9Sstevel@tonic-gate map_addr_proc(addrp, len, off, vacalign, userlimit, p, flags);
2487c478bd9Sstevel@tonic-gate }
2497c478bd9Sstevel@tonic-gate
2507c478bd9Sstevel@tonic-gate /*
2517c478bd9Sstevel@tonic-gate * Some V9 CPUs have holes in the middle of the 64-bit virtual address range.
2527c478bd9Sstevel@tonic-gate */
2537c478bd9Sstevel@tonic-gate caddr_t hole_start, hole_end;
2547c478bd9Sstevel@tonic-gate
2557c478bd9Sstevel@tonic-gate /*
2567c478bd9Sstevel@tonic-gate * kpm mapping window
2577c478bd9Sstevel@tonic-gate */
2587c478bd9Sstevel@tonic-gate caddr_t kpm_vbase;
2597c478bd9Sstevel@tonic-gate size_t kpm_size;
2607c478bd9Sstevel@tonic-gate uchar_t kpm_size_shift;
2617c478bd9Sstevel@tonic-gate
26246ab9534Smec int valid_va_range_aligned_wraparound;
2637c478bd9Sstevel@tonic-gate /*
26446ab9534Smec * Determine whether [*basep, *basep + *lenp) contains a mappable range of
26546ab9534Smec * addresses at least "minlen" long, where the base of the range is at "off"
26646ab9534Smec * phase from an "align" boundary and there is space for a "redzone"-sized
26746ab9534Smec * redzone on either side of the range. On success, 1 is returned and *basep
26846ab9534Smec * and *lenp are adjusted to describe the acceptable range (including
26946ab9534Smec * the redzone). On failure, 0 is returned.
2707c478bd9Sstevel@tonic-gate */
2717c478bd9Sstevel@tonic-gate int
valid_va_range_aligned(caddr_t * basep,size_t * lenp,size_t minlen,int dir,size_t align,size_t redzone,size_t off)27246ab9534Smec valid_va_range_aligned(caddr_t *basep, size_t *lenp, size_t minlen, int dir,
27346ab9534Smec size_t align, size_t redzone, size_t off)
2747c478bd9Sstevel@tonic-gate {
2757c478bd9Sstevel@tonic-gate caddr_t hi, lo;
27646ab9534Smec size_t tot_len;
27746ab9534Smec
27846ab9534Smec ASSERT(align == 0 ? off == 0 : off < align);
27946ab9534Smec ASSERT(ISP2(align));
28046ab9534Smec ASSERT(align == 0 || align >= PAGESIZE);
2817c478bd9Sstevel@tonic-gate
2827c478bd9Sstevel@tonic-gate lo = *basep;
2837c478bd9Sstevel@tonic-gate hi = lo + *lenp;
28446ab9534Smec tot_len = minlen + 2 * redzone; /* need at least this much space */
2857c478bd9Sstevel@tonic-gate
28646ab9534Smec /* If hi rolled over the top try cutting back. */
2877c478bd9Sstevel@tonic-gate if (hi < lo) {
28846ab9534Smec *lenp = 0UL - (uintptr_t)lo - 1UL;
28946ab9534Smec /* Trying to see if this really happens, and then if so, why */
29046ab9534Smec valid_va_range_aligned_wraparound++;
29146ab9534Smec hi = lo + *lenp;
29246ab9534Smec }
29346ab9534Smec if (*lenp < tot_len) {
2947c478bd9Sstevel@tonic-gate return (0);
29546ab9534Smec }
2967c478bd9Sstevel@tonic-gate
2977c478bd9Sstevel@tonic-gate /*
2987c478bd9Sstevel@tonic-gate * Deal with a possible hole in the address range between
2997c478bd9Sstevel@tonic-gate * hole_start and hole_end that should never be mapped by the MMU.
3007c478bd9Sstevel@tonic-gate */
3017c478bd9Sstevel@tonic-gate
3027c478bd9Sstevel@tonic-gate if (lo < hole_start) {
3037c478bd9Sstevel@tonic-gate if (hi > hole_start)
3047c478bd9Sstevel@tonic-gate if (hi < hole_end)
3057c478bd9Sstevel@tonic-gate hi = hole_start;
3067c478bd9Sstevel@tonic-gate else
3077c478bd9Sstevel@tonic-gate /* lo < hole_start && hi >= hole_end */
3087c478bd9Sstevel@tonic-gate if (dir == AH_LO) {
3097c478bd9Sstevel@tonic-gate /*
3107c478bd9Sstevel@tonic-gate * prefer lowest range
3117c478bd9Sstevel@tonic-gate */
31246ab9534Smec if (hole_start - lo >= tot_len)
3137c478bd9Sstevel@tonic-gate hi = hole_start;
31446ab9534Smec else if (hi - hole_end >= tot_len)
3157c478bd9Sstevel@tonic-gate lo = hole_end;
3167c478bd9Sstevel@tonic-gate else
3177c478bd9Sstevel@tonic-gate return (0);
3187c478bd9Sstevel@tonic-gate } else {
3197c478bd9Sstevel@tonic-gate /*
3207c478bd9Sstevel@tonic-gate * prefer highest range
3217c478bd9Sstevel@tonic-gate */
32246ab9534Smec if (hi - hole_end >= tot_len)
3237c478bd9Sstevel@tonic-gate lo = hole_end;
32446ab9534Smec else if (hole_start - lo >= tot_len)
3257c478bd9Sstevel@tonic-gate hi = hole_start;
3267c478bd9Sstevel@tonic-gate else
3277c478bd9Sstevel@tonic-gate return (0);
3287c478bd9Sstevel@tonic-gate }
3297c478bd9Sstevel@tonic-gate } else {
3307c478bd9Sstevel@tonic-gate /* lo >= hole_start */
3317c478bd9Sstevel@tonic-gate if (hi < hole_end)
3327c478bd9Sstevel@tonic-gate return (0);
3337c478bd9Sstevel@tonic-gate if (lo < hole_end)
3347c478bd9Sstevel@tonic-gate lo = hole_end;
3357c478bd9Sstevel@tonic-gate }
3367c478bd9Sstevel@tonic-gate
33746ab9534Smec /* Check if remaining length is too small */
33846ab9534Smec if (hi - lo < tot_len) {
3397c478bd9Sstevel@tonic-gate return (0);
34046ab9534Smec }
34146ab9534Smec if (align > 1) {
34246ab9534Smec caddr_t tlo = lo + redzone;
34346ab9534Smec caddr_t thi = hi - redzone;
34446ab9534Smec tlo = (caddr_t)P2PHASEUP((uintptr_t)tlo, align, off);
34546ab9534Smec if (tlo < lo + redzone) {
34646ab9534Smec return (0);
34746ab9534Smec }
34846ab9534Smec if (thi < tlo || thi - tlo < minlen) {
34946ab9534Smec return (0);
35046ab9534Smec }
35146ab9534Smec }
3527c478bd9Sstevel@tonic-gate *basep = lo;
3537c478bd9Sstevel@tonic-gate *lenp = hi - lo;
3547c478bd9Sstevel@tonic-gate return (1);
3557c478bd9Sstevel@tonic-gate }
3567c478bd9Sstevel@tonic-gate
35746ab9534Smec /*
35846ab9534Smec * Determine whether [*basep, *basep + *lenp) contains a mappable range of
35946ab9534Smec * addresses at least "minlen" long. On success, 1 is returned and *basep
36046ab9534Smec * and *lenp are adjusted to describe the acceptable range. On failure, 0
36146ab9534Smec * is returned.
36246ab9534Smec */
36346ab9534Smec int
valid_va_range(caddr_t * basep,size_t * lenp,size_t minlen,int dir)36446ab9534Smec valid_va_range(caddr_t *basep, size_t *lenp, size_t minlen, int dir)
36546ab9534Smec {
36646ab9534Smec return (valid_va_range_aligned(basep, lenp, minlen, dir, 0, 0, 0));
36746ab9534Smec }
36846ab9534Smec
369d2a70789SRichard Lowe /*
370d2a70789SRichard Lowe * Default to forbidding the first 64k of address space. This protects most
371d2a70789SRichard Lowe * reasonably sized structures from dereferences through NULL:
372d2a70789SRichard Lowe * ((foo_t *)0)->bar
373d2a70789SRichard Lowe */
374d2a70789SRichard Lowe uintptr_t forbidden_null_mapping_sz = 0x10000;
375d2a70789SRichard Lowe
3767c478bd9Sstevel@tonic-gate /*
3777c478bd9Sstevel@tonic-gate * Determine whether [addr, addr+len] with protections `prot' are valid
3787c478bd9Sstevel@tonic-gate * for a user address space.
3797c478bd9Sstevel@tonic-gate */
3807c478bd9Sstevel@tonic-gate /*ARGSUSED*/
3817c478bd9Sstevel@tonic-gate int
valid_usr_range(caddr_t addr,size_t len,uint_t prot,struct as * as,caddr_t userlimit)3827c478bd9Sstevel@tonic-gate valid_usr_range(caddr_t addr, size_t len, uint_t prot, struct as *as,
3837c478bd9Sstevel@tonic-gate caddr_t userlimit)
3847c478bd9Sstevel@tonic-gate {
3857c478bd9Sstevel@tonic-gate caddr_t eaddr = addr + len;
3867c478bd9Sstevel@tonic-gate
3877c478bd9Sstevel@tonic-gate if (eaddr <= addr || addr >= userlimit || eaddr > userlimit)
3887c478bd9Sstevel@tonic-gate return (RANGE_BADADDR);
3897c478bd9Sstevel@tonic-gate
390d2a70789SRichard Lowe if ((addr <= (caddr_t)forbidden_null_mapping_sz) &&
391a02406b9SPatrick Mooney as->a_proc != NULL &&
392d2a70789SRichard Lowe secflag_enabled(as->a_proc, PROC_SEC_FORBIDNULLMAP))
393d2a70789SRichard Lowe return (RANGE_BADADDR);
394d2a70789SRichard Lowe
3957c478bd9Sstevel@tonic-gate /*
3967c478bd9Sstevel@tonic-gate * Determine if the address range falls within an illegal
3977c478bd9Sstevel@tonic-gate * range of the MMU.
3987c478bd9Sstevel@tonic-gate */
3997c478bd9Sstevel@tonic-gate if (eaddr > hole_start && addr < hole_end)
4007c478bd9Sstevel@tonic-gate return (RANGE_BADADDR);
4017c478bd9Sstevel@tonic-gate
4027c478bd9Sstevel@tonic-gate #if defined(SF_ERRATA_57)
4037c478bd9Sstevel@tonic-gate /*
4047c478bd9Sstevel@tonic-gate * Make sure USERLIMIT isn't raised too high
4057c478bd9Sstevel@tonic-gate */
4067c478bd9Sstevel@tonic-gate ASSERT64(addr <= (caddr_t)0xffffffff80000000ul ||
4077c478bd9Sstevel@tonic-gate errata57_limit == 0);
4087c478bd9Sstevel@tonic-gate
4097c478bd9Sstevel@tonic-gate if (AS_TYPE_64BIT(as) &&
4107c478bd9Sstevel@tonic-gate (addr < errata57_limit) &&
4117c478bd9Sstevel@tonic-gate (prot & PROT_EXEC))
4127c478bd9Sstevel@tonic-gate return (RANGE_BADPROT);
4137c478bd9Sstevel@tonic-gate #endif /* SF_ERRATA57 */
4147c478bd9Sstevel@tonic-gate return (RANGE_OKAY);
4157c478bd9Sstevel@tonic-gate }
4167c478bd9Sstevel@tonic-gate
4177c478bd9Sstevel@tonic-gate /*
4187c478bd9Sstevel@tonic-gate * Routine used to check to see if an a.out can be executed
4197c478bd9Sstevel@tonic-gate * by the current machine/architecture.
4207c478bd9Sstevel@tonic-gate */
4217c478bd9Sstevel@tonic-gate int
chkaout(struct exdata * exp)4227c478bd9Sstevel@tonic-gate chkaout(struct exdata *exp)
4237c478bd9Sstevel@tonic-gate {
4247c478bd9Sstevel@tonic-gate if (exp->ux_mach == M_SPARC)
4257c478bd9Sstevel@tonic-gate return (0);
4267c478bd9Sstevel@tonic-gate else
4277c478bd9Sstevel@tonic-gate return (ENOEXEC);
4287c478bd9Sstevel@tonic-gate }
4297c478bd9Sstevel@tonic-gate
4307c478bd9Sstevel@tonic-gate
431ec25b48fSsusans /*
432ec25b48fSsusans * Return non 0 value if the address may cause a VAC alias with KPM mappings.
433ec25b48fSsusans * KPM selects an address such that it's equal offset modulo shm_alignment and
434ec25b48fSsusans * assumes it can't be in VAC conflict with any larger than PAGESIZE mapping.
435ec25b48fSsusans */
436ec25b48fSsusans int
map_addr_vacalign_check(caddr_t addr,u_offset_t off)437ec25b48fSsusans map_addr_vacalign_check(caddr_t addr, u_offset_t off)
4387c478bd9Sstevel@tonic-gate {
439ec25b48fSsusans if (vac) {
440ec25b48fSsusans return (((uintptr_t)addr ^ off) & shm_alignment - 1);
441ec25b48fSsusans } else {
442ec25b48fSsusans return (0);
4437c478bd9Sstevel@tonic-gate }
4447c478bd9Sstevel@tonic-gate }
4457c478bd9Sstevel@tonic-gate
446ec25b48fSsusans /*
447ec25b48fSsusans * Sanity control. Don't use large pages regardless of user
448ec25b48fSsusans * settings if there's less than priv or shm_lpg_min_physmem memory installed.
449ec25b48fSsusans * The units for this variable is 8K pages.
450ec25b48fSsusans */
451ec25b48fSsusans pgcnt_t shm_lpg_min_physmem = 131072; /* 1GB */
452ec25b48fSsusans pgcnt_t privm_lpg_min_physmem = 131072; /* 1GB */
453ec25b48fSsusans
454e12a8a13Ssusans static size_t
map_pgszheap(struct proc * p,caddr_t addr,size_t len)4557c478bd9Sstevel@tonic-gate map_pgszheap(struct proc *p, caddr_t addr, size_t len)
4567c478bd9Sstevel@tonic-gate {
457ec25b48fSsusans size_t pgsz = MMU_PAGESIZE;
458ec25b48fSsusans int szc;
4597c478bd9Sstevel@tonic-gate
4607c478bd9Sstevel@tonic-gate /*
4617c478bd9Sstevel@tonic-gate * If len is zero, retrieve from proc and don't demote the page size.
462ec25b48fSsusans * Use atleast the default pagesize.
4637c478bd9Sstevel@tonic-gate */
4647c478bd9Sstevel@tonic-gate if (len == 0) {
465ec25b48fSsusans len = p->p_brkbase + p->p_brksize - p->p_bssbase;
4667c478bd9Sstevel@tonic-gate }
467ec25b48fSsusans len = MAX(len, default_uheap_lpsize);
4687c478bd9Sstevel@tonic-gate
469ec25b48fSsusans for (szc = mmu_page_sizes - 1; szc >= 0; szc--) {
470ec25b48fSsusans pgsz = hw_page_array[szc].hp_size;
471ec25b48fSsusans if ((disable_auto_data_large_pages & (1 << szc)) ||
472ec25b48fSsusans pgsz > max_uheap_lpsize)
473ec25b48fSsusans continue;
474ec25b48fSsusans if (len >= pgsz) {
475ec25b48fSsusans break;
476ec25b48fSsusans }
4777c478bd9Sstevel@tonic-gate }
4787c478bd9Sstevel@tonic-gate
4797c478bd9Sstevel@tonic-gate /*
480ec25b48fSsusans * If addr == 0 we were called by memcntl() when the
4817c478bd9Sstevel@tonic-gate * size code is 0. Don't set pgsz less than current size.
4827c478bd9Sstevel@tonic-gate */
4837c478bd9Sstevel@tonic-gate if (addr == 0 && (pgsz < hw_page_array[p->p_brkpageszc].hp_size)) {
4847c478bd9Sstevel@tonic-gate pgsz = hw_page_array[p->p_brkpageszc].hp_size;
4857c478bd9Sstevel@tonic-gate }
4867c478bd9Sstevel@tonic-gate
4877c478bd9Sstevel@tonic-gate return (pgsz);
4887c478bd9Sstevel@tonic-gate }
4897c478bd9Sstevel@tonic-gate
490e12a8a13Ssusans static size_t
map_pgszstk(struct proc * p,caddr_t addr,size_t len)4917c478bd9Sstevel@tonic-gate map_pgszstk(struct proc *p, caddr_t addr, size_t len)
4927c478bd9Sstevel@tonic-gate {
493ec25b48fSsusans size_t pgsz = MMU_PAGESIZE;
494ec25b48fSsusans int szc;
4957c478bd9Sstevel@tonic-gate
4967c478bd9Sstevel@tonic-gate /*
4977c478bd9Sstevel@tonic-gate * If len is zero, retrieve from proc and don't demote the page size.
498ec25b48fSsusans * Use atleast the default pagesize.
4997c478bd9Sstevel@tonic-gate */
5007c478bd9Sstevel@tonic-gate if (len == 0) {
5017c478bd9Sstevel@tonic-gate len = p->p_stksize;
5027c478bd9Sstevel@tonic-gate }
503ec25b48fSsusans len = MAX(len, default_ustack_lpsize);
5047c478bd9Sstevel@tonic-gate
505ec25b48fSsusans for (szc = mmu_page_sizes - 1; szc >= 0; szc--) {
506ec25b48fSsusans pgsz = hw_page_array[szc].hp_size;
507ec25b48fSsusans if ((disable_auto_data_large_pages & (1 << szc)) ||
508ec25b48fSsusans pgsz > max_ustack_lpsize)
509ec25b48fSsusans continue;
510ec25b48fSsusans if (len >= pgsz) {
511ec25b48fSsusans break;
512ec25b48fSsusans }
5137c478bd9Sstevel@tonic-gate }
5147c478bd9Sstevel@tonic-gate
5157c478bd9Sstevel@tonic-gate /*
5167c478bd9Sstevel@tonic-gate * If addr == 0 we were called by memcntl() or exec_args() when the
5177c478bd9Sstevel@tonic-gate * size code is 0. Don't set pgsz less than current size.
5187c478bd9Sstevel@tonic-gate */
5197c478bd9Sstevel@tonic-gate if (addr == 0 && (pgsz < hw_page_array[p->p_stkpageszc].hp_size)) {
5207c478bd9Sstevel@tonic-gate pgsz = hw_page_array[p->p_stkpageszc].hp_size;
5217c478bd9Sstevel@tonic-gate }
5227c478bd9Sstevel@tonic-gate
5237c478bd9Sstevel@tonic-gate return (pgsz);
5247c478bd9Sstevel@tonic-gate }
5257c478bd9Sstevel@tonic-gate
526e12a8a13Ssusans static size_t
map_pgszism(caddr_t addr,size_t len)527e12a8a13Ssusans map_pgszism(caddr_t addr, size_t len)
528e12a8a13Ssusans {
529e12a8a13Ssusans uint_t szc;
530e12a8a13Ssusans size_t pgsz;
531e12a8a13Ssusans
532e12a8a13Ssusans for (szc = mmu_page_sizes - 1; szc >= TTE4M; szc--) {
533e12a8a13Ssusans if (disable_ism_large_pages & (1 << szc))
534e12a8a13Ssusans continue;
535e12a8a13Ssusans
536e12a8a13Ssusans pgsz = hw_page_array[szc].hp_size;
537e12a8a13Ssusans if ((len >= pgsz) && IS_P2ALIGNED(addr, pgsz))
538e12a8a13Ssusans return (pgsz);
539e12a8a13Ssusans }
540ec25b48fSsusans
541e12a8a13Ssusans return (DEFAULT_ISM_PAGESIZE);
542e12a8a13Ssusans }
543e12a8a13Ssusans
544e12a8a13Ssusans /*
545e12a8a13Ssusans * Suggest a page size to be used to map a segment of type maptype and length
546e12a8a13Ssusans * len. Returns a page size (not a size code).
547e12a8a13Ssusans */
548ec25b48fSsusans /* ARGSUSED */
549e12a8a13Ssusans size_t
map_pgsz(int maptype,struct proc * p,caddr_t addr,size_t len,int memcntl)550ec25b48fSsusans map_pgsz(int maptype, struct proc *p, caddr_t addr, size_t len, int memcntl)
551e12a8a13Ssusans {
552ec25b48fSsusans size_t pgsz = MMU_PAGESIZE;
553ec25b48fSsusans
554ec25b48fSsusans ASSERT(maptype != MAPPGSZ_VA);
555e12a8a13Ssusans
556ec25b48fSsusans if (maptype != MAPPGSZ_ISM && physmem < privm_lpg_min_physmem) {
557ec25b48fSsusans return (MMU_PAGESIZE);
558ec25b48fSsusans }
559e12a8a13Ssusans
560e12a8a13Ssusans switch (maptype) {
561e12a8a13Ssusans case MAPPGSZ_ISM:
562e12a8a13Ssusans pgsz = map_pgszism(addr, len);
563e12a8a13Ssusans break;
564e12a8a13Ssusans
565e12a8a13Ssusans case MAPPGSZ_STK:
566ec25b48fSsusans if (max_ustack_lpsize > MMU_PAGESIZE) {
567ec25b48fSsusans pgsz = map_pgszstk(p, addr, len);
568ec25b48fSsusans }
569e12a8a13Ssusans break;
570e12a8a13Ssusans
571e12a8a13Ssusans case MAPPGSZ_HEAP:
572ec25b48fSsusans if (max_uheap_lpsize > MMU_PAGESIZE) {
573ec25b48fSsusans pgsz = map_pgszheap(p, addr, len);
574ec25b48fSsusans }
575e12a8a13Ssusans break;
576e12a8a13Ssusans }
577e12a8a13Ssusans return (pgsz);
578e12a8a13Ssusans }
5797c478bd9Sstevel@tonic-gate
5807c478bd9Sstevel@tonic-gate
5817c478bd9Sstevel@tonic-gate /* assumes TTE8K...TTE4M == szc */
5827c478bd9Sstevel@tonic-gate
5837c478bd9Sstevel@tonic-gate static uint_t
map_szcvec(caddr_t addr,size_t size,uintptr_t off,int disable_lpgs,size_t max_lpsize,size_t min_physmem)584ec25b48fSsusans map_szcvec(caddr_t addr, size_t size, uintptr_t off, int disable_lpgs,
585ec25b48fSsusans size_t max_lpsize, size_t min_physmem)
58607b65a64Saguzovsk {
58707b65a64Saguzovsk caddr_t eaddr = addr + size;
58807b65a64Saguzovsk uint_t szcvec = 0;
58907b65a64Saguzovsk caddr_t raddr;
59007b65a64Saguzovsk caddr_t readdr;
59107b65a64Saguzovsk size_t pgsz;
592ec25b48fSsusans int i;
59307b65a64Saguzovsk
594ec25b48fSsusans if (physmem < min_physmem || max_lpsize <= MMU_PAGESIZE) {
59507b65a64Saguzovsk return (0);
59607b65a64Saguzovsk }
59707b65a64Saguzovsk for (i = mmu_page_sizes - 1; i > 0; i--) {
598ec25b48fSsusans if (disable_lpgs & (1 << i)) {
59907b65a64Saguzovsk continue;
60007b65a64Saguzovsk }
60107b65a64Saguzovsk pgsz = page_get_pagesize(i);
602ec25b48fSsusans if (pgsz > max_lpsize) {
60307b65a64Saguzovsk continue;
60407b65a64Saguzovsk }
60507b65a64Saguzovsk raddr = (caddr_t)P2ROUNDUP((uintptr_t)addr, pgsz);
60607b65a64Saguzovsk readdr = (caddr_t)P2ALIGN((uintptr_t)eaddr, pgsz);
60707b65a64Saguzovsk if (raddr < addr || raddr >= readdr) {
60807b65a64Saguzovsk continue;
60907b65a64Saguzovsk }
61007b65a64Saguzovsk if (P2PHASE((uintptr_t)addr ^ off, pgsz)) {
61107b65a64Saguzovsk continue;
61207b65a64Saguzovsk }
61307b65a64Saguzovsk szcvec |= (1 << i);
61407b65a64Saguzovsk /*
61507b65a64Saguzovsk * And or in the remaining enabled page sizes.
61607b65a64Saguzovsk */
617ec25b48fSsusans szcvec |= P2PHASE(~disable_lpgs, (1 << i));
61807b65a64Saguzovsk szcvec &= ~1; /* no need to return 8K pagesize */
61907b65a64Saguzovsk break;
62007b65a64Saguzovsk }
62107b65a64Saguzovsk return (szcvec);
62207b65a64Saguzovsk }
62307b65a64Saguzovsk
624ec25b48fSsusans /*
625ec25b48fSsusans * Return a bit vector of large page size codes that
626ec25b48fSsusans * can be used to map [addr, addr + len) region.
627ec25b48fSsusans */
628ec25b48fSsusans /* ARGSUSED */
629ec25b48fSsusans uint_t
map_pgszcvec(caddr_t addr,size_t size,uintptr_t off,int flags,int type,int memcntl)630ec25b48fSsusans map_pgszcvec(caddr_t addr, size_t size, uintptr_t off, int flags, int type,
631ec25b48fSsusans int memcntl)
632ec25b48fSsusans {
633ec25b48fSsusans if (flags & MAP_TEXT) {
634986fd29aSsetje return (map_szcvec(addr, size, off,
635986fd29aSsetje disable_auto_text_large_pages,
636ec25b48fSsusans max_utext_lpsize, shm_lpg_min_physmem));
637ec25b48fSsusans
638ec25b48fSsusans } else if (flags & MAP_INITDATA) {
639986fd29aSsetje return (map_szcvec(addr, size, off,
640986fd29aSsetje disable_auto_data_large_pages,
641ec25b48fSsusans max_uidata_lpsize, privm_lpg_min_physmem));
642ec25b48fSsusans
643ec25b48fSsusans } else if (type == MAPPGSZC_SHM) {
644986fd29aSsetje return (map_szcvec(addr, size, off,
645986fd29aSsetje disable_auto_data_large_pages,
646ec25b48fSsusans max_shm_lpsize, shm_lpg_min_physmem));
647ec25b48fSsusans
648ec25b48fSsusans } else if (type == MAPPGSZC_HEAP) {
649986fd29aSsetje return (map_szcvec(addr, size, off,
650986fd29aSsetje disable_auto_data_large_pages,
651ec25b48fSsusans max_uheap_lpsize, privm_lpg_min_physmem));
652ec25b48fSsusans
653ec25b48fSsusans } else if (type == MAPPGSZC_STACK) {
654986fd29aSsetje return (map_szcvec(addr, size, off,
655986fd29aSsetje disable_auto_data_large_pages,
656ec25b48fSsusans max_ustack_lpsize, privm_lpg_min_physmem));
657ec25b48fSsusans
658ec25b48fSsusans } else {
659986fd29aSsetje return (map_szcvec(addr, size, off,
660986fd29aSsetje disable_auto_data_large_pages,
661ec25b48fSsusans max_privmap_lpsize, privm_lpg_min_physmem));
662ec25b48fSsusans }
663ec25b48fSsusans }
664ec25b48fSsusans
665d94ffb28Sjmcp /*
666d94ffb28Sjmcp * Anchored in the table below are counters used to keep track
667d94ffb28Sjmcp * of free contiguous physical memory. Each element of the table contains
668d94ffb28Sjmcp * the array of counters, the size of array which is allocated during
669d94ffb28Sjmcp * startup based on physmax and a shift value used to convert a pagenum
670d94ffb28Sjmcp * into a counter array index or vice versa. The table has page size
671d94ffb28Sjmcp * for rows and region size for columns:
672d94ffb28Sjmcp *
673d94ffb28Sjmcp * page_counters[page_size][region_size]
674d94ffb28Sjmcp *
675*9174bfaaSGarrett D'Amore * page_size: TTE size code of pages on page_size freelist.
676d94ffb28Sjmcp *
677d94ffb28Sjmcp * region_size: TTE size code of a candidate larger page made up
678d94ffb28Sjmcp * made up of contiguous free page_size pages.
679d94ffb28Sjmcp *
680d94ffb28Sjmcp * As you go across a page_size row increasing region_size each
681d94ffb28Sjmcp * element keeps track of how many (region_size - 1) size groups
682d94ffb28Sjmcp * made up of page_size free pages can be coalesced into a
683d94ffb28Sjmcp * regsion_size page. Yuck! Lets try an example:
684d94ffb28Sjmcp *
685*9174bfaaSGarrett D'Amore * page_counters[1][3] is the table element used for identifying
686d94ffb28Sjmcp * candidate 4M pages from contiguous pages off the 64K free list.
687d94ffb28Sjmcp * Each index in the page_counters[1][3].array spans 4M. Its the
688d94ffb28Sjmcp * number of free 512K size (regsion_size - 1) groups of contiguous
689d94ffb28Sjmcp * 64K free pages. So when page_counters[1][3].counters[n] == 8
690d94ffb28Sjmcp * we know we have a candidate 4M page made up of 512K size groups
691d94ffb28Sjmcp * of 64K free pages.
692d94ffb28Sjmcp */
693d94ffb28Sjmcp
694d94ffb28Sjmcp /*
695d94ffb28Sjmcp * Per page size free lists. 3rd (max_mem_nodes) and 4th (page coloring bins)
696d94ffb28Sjmcp * dimensions are allocated dynamically.
697d94ffb28Sjmcp */
698d94ffb28Sjmcp page_t ***page_freelists[MMU_PAGE_SIZES][MAX_MEM_TYPES];
699d94ffb28Sjmcp
7007c478bd9Sstevel@tonic-gate /*
7017c478bd9Sstevel@tonic-gate * For now there is only a single size cache list.
7027c478bd9Sstevel@tonic-gate * Allocated dynamically.
7037c478bd9Sstevel@tonic-gate */
7047c478bd9Sstevel@tonic-gate page_t ***page_cachelists[MAX_MEM_TYPES];
7057c478bd9Sstevel@tonic-gate
7067c478bd9Sstevel@tonic-gate kmutex_t *fpc_mutex[NPC_MUTEX];
7077c478bd9Sstevel@tonic-gate kmutex_t *cpc_mutex[NPC_MUTEX];
7087c478bd9Sstevel@tonic-gate
709986fd29aSsetje /*
710986fd29aSsetje * Calculate space needed for page freelists and counters
711986fd29aSsetje */
712986fd29aSsetje size_t
calc_free_pagelist_sz(void)713986fd29aSsetje calc_free_pagelist_sz(void)
7147c478bd9Sstevel@tonic-gate {
715986fd29aSsetje int szc;
716986fd29aSsetje size_t alloc_sz, cache_sz, free_sz;
7177c478bd9Sstevel@tonic-gate
718986fd29aSsetje /*
719986fd29aSsetje * one cachelist per color, node, and type
720986fd29aSsetje */
721986fd29aSsetje cache_sz = (page_get_pagecolors(0) * sizeof (page_t *)) +
722986fd29aSsetje sizeof (page_t **);
723986fd29aSsetje cache_sz *= max_mem_nodes * MAX_MEM_TYPES;
724986fd29aSsetje
725986fd29aSsetje /*
726986fd29aSsetje * one freelist per size, color, node, and type
727986fd29aSsetje */
728986fd29aSsetje free_sz = sizeof (page_t **);
729986fd29aSsetje for (szc = 0; szc < mmu_page_sizes; szc++)
730986fd29aSsetje free_sz += sizeof (page_t *) * page_get_pagecolors(szc);
731986fd29aSsetje free_sz *= max_mem_nodes * MAX_MEM_TYPES;
732986fd29aSsetje
733986fd29aSsetje alloc_sz = cache_sz + free_sz + page_ctrs_sz();
734986fd29aSsetje return (alloc_sz);
735986fd29aSsetje }
736986fd29aSsetje
737986fd29aSsetje caddr_t
alloc_page_freelists(caddr_t alloc_base)738986fd29aSsetje alloc_page_freelists(caddr_t alloc_base)
739986fd29aSsetje {
740986fd29aSsetje int mnode, mtype;
741986fd29aSsetje int szc, clrs;
7427c478bd9Sstevel@tonic-gate
7437c478bd9Sstevel@tonic-gate /*
7447c478bd9Sstevel@tonic-gate * We only support small pages in the cachelist.
7457c478bd9Sstevel@tonic-gate */
7467c478bd9Sstevel@tonic-gate for (mtype = 0; mtype < MAX_MEM_TYPES; mtype++) {
747986fd29aSsetje page_cachelists[mtype] = (page_t ***)alloc_base;
748986fd29aSsetje alloc_base += (max_mem_nodes * sizeof (page_t **));
749986fd29aSsetje for (mnode = 0; mnode < max_mem_nodes; mnode++) {
750986fd29aSsetje page_cachelists[mtype][mnode] = (page_t **)alloc_base;
751986fd29aSsetje alloc_base +=
752986fd29aSsetje (page_get_pagecolors(0) * sizeof (page_t *));
7537c478bd9Sstevel@tonic-gate }
7547c478bd9Sstevel@tonic-gate }
7557c478bd9Sstevel@tonic-gate
756986fd29aSsetje /*
757986fd29aSsetje * Allocate freelists bins for all
758986fd29aSsetje * supported page sizes.
759986fd29aSsetje */
760986fd29aSsetje for (szc = 0; szc < mmu_page_sizes; szc++) {
761986fd29aSsetje clrs = page_get_pagecolors(szc);
762986fd29aSsetje for (mtype = 0; mtype < MAX_MEM_TYPES; mtype++) {
763d94ffb28Sjmcp page_freelists[szc][mtype] = (page_t ***)alloc_base;
764986fd29aSsetje alloc_base += (max_mem_nodes * sizeof (page_t **));
765986fd29aSsetje for (mnode = 0; mnode < max_mem_nodes; mnode++) {
766d94ffb28Sjmcp page_freelists[szc][mtype][mnode] =
767986fd29aSsetje (page_t **)alloc_base;
768986fd29aSsetje alloc_base += (clrs * (sizeof (page_t *)));
769986fd29aSsetje }
770986fd29aSsetje }
771986fd29aSsetje }
7727c478bd9Sstevel@tonic-gate
773986fd29aSsetje alloc_base = page_ctrs_alloc(alloc_base);
7747c478bd9Sstevel@tonic-gate return (alloc_base);
7757c478bd9Sstevel@tonic-gate }
7767c478bd9Sstevel@tonic-gate
7777c478bd9Sstevel@tonic-gate /*
778986fd29aSsetje * Allocate page_freelists locks for a memnode from the nucleus data
779986fd29aSsetje * area. This is the first time that mmu_page_sizes is used during
780986fd29aSsetje * bootup, so check mmu_page_sizes initialization.
7817c478bd9Sstevel@tonic-gate */
7827c478bd9Sstevel@tonic-gate int
ndata_alloc_page_mutexs(struct memlist * ndata)783986fd29aSsetje ndata_alloc_page_mutexs(struct memlist *ndata)
7847c478bd9Sstevel@tonic-gate {
7857c478bd9Sstevel@tonic-gate size_t alloc_sz;
7867c478bd9Sstevel@tonic-gate caddr_t alloc_base;
787986fd29aSsetje int i;
788986fd29aSsetje void page_coloring_init();
7897c478bd9Sstevel@tonic-gate
790986fd29aSsetje page_coloring_init();
7917c478bd9Sstevel@tonic-gate if (&mmu_init_mmu_page_sizes) {
792986fd29aSsetje if (!mmu_init_mmu_page_sizes(0)) {
7937c478bd9Sstevel@tonic-gate cmn_err(CE_PANIC, "mmu_page_sizes %d not initialized",
7947c478bd9Sstevel@tonic-gate mmu_page_sizes);
7957c478bd9Sstevel@tonic-gate }
7967c478bd9Sstevel@tonic-gate }
7977c478bd9Sstevel@tonic-gate ASSERT(mmu_page_sizes >= DEFAULT_MMU_PAGE_SIZES);
7987c478bd9Sstevel@tonic-gate
799986fd29aSsetje /* fpc_mutex and cpc_mutex */
800986fd29aSsetje alloc_sz = 2 * NPC_MUTEX * max_mem_nodes * sizeof (kmutex_t);
8017c478bd9Sstevel@tonic-gate
8027c478bd9Sstevel@tonic-gate alloc_base = ndata_alloc(ndata, alloc_sz, ecache_alignsize);
8037c478bd9Sstevel@tonic-gate if (alloc_base == NULL)
8047c478bd9Sstevel@tonic-gate return (-1);
8057c478bd9Sstevel@tonic-gate
806986fd29aSsetje ASSERT(((uintptr_t)alloc_base & (ecache_alignsize - 1)) == 0);
8077c478bd9Sstevel@tonic-gate
808986fd29aSsetje for (i = 0; i < NPC_MUTEX; i++) {
809986fd29aSsetje fpc_mutex[i] = (kmutex_t *)alloc_base;
810986fd29aSsetje alloc_base += (sizeof (kmutex_t) * max_mem_nodes);
811986fd29aSsetje cpc_mutex[i] = (kmutex_t *)alloc_base;
812986fd29aSsetje alloc_base += (sizeof (kmutex_t) * max_mem_nodes);
813986fd29aSsetje }
8147c478bd9Sstevel@tonic-gate return (0);
8157c478bd9Sstevel@tonic-gate }
8167c478bd9Sstevel@tonic-gate
8177c478bd9Sstevel@tonic-gate /*
8187c478bd9Sstevel@tonic-gate * To select our starting bin, we stride through the bins with a stride
8197c478bd9Sstevel@tonic-gate * of 337. Why 337? It's prime, it's largeish, and it performs well both
8207c478bd9Sstevel@tonic-gate * in simulation and practice for different workloads on varying cache sizes.
8217c478bd9Sstevel@tonic-gate */
8227c478bd9Sstevel@tonic-gate uint32_t color_start_current = 0;
8237c478bd9Sstevel@tonic-gate uint32_t color_start_stride = 337;
8247c478bd9Sstevel@tonic-gate int color_start_random = 0;
8257c478bd9Sstevel@tonic-gate
8267c478bd9Sstevel@tonic-gate /* ARGSUSED */
8277c478bd9Sstevel@tonic-gate uint_t
get_color_start(struct as * as)8287c478bd9Sstevel@tonic-gate get_color_start(struct as *as)
8297c478bd9Sstevel@tonic-gate {
8307c478bd9Sstevel@tonic-gate uint32_t old, new;
8317c478bd9Sstevel@tonic-gate
8327c478bd9Sstevel@tonic-gate if (consistent_coloring == 2 || color_start_random) {
8337c478bd9Sstevel@tonic-gate return ((uint_t)(((gettick()) << (vac_shift - MMU_PAGESHIFT)) &
8345d07b933Sdp (hw_page_array[0].hp_colors - 1)));
8357c478bd9Sstevel@tonic-gate }
8367c478bd9Sstevel@tonic-gate
8377c478bd9Sstevel@tonic-gate do {
8387c478bd9Sstevel@tonic-gate old = color_start_current;
8397c478bd9Sstevel@tonic-gate new = old + (color_start_stride << (vac_shift - MMU_PAGESHIFT));
84075d94465SJosef 'Jeff' Sipek } while (atomic_cas_32(&color_start_current, old, new) != old);
8417c478bd9Sstevel@tonic-gate
8427c478bd9Sstevel@tonic-gate return ((uint_t)(new));
8437c478bd9Sstevel@tonic-gate }
8447c478bd9Sstevel@tonic-gate
8457c478bd9Sstevel@tonic-gate /*
8467c478bd9Sstevel@tonic-gate * Called once at startup from kphysm_init() -- before memialloc()
8477c478bd9Sstevel@tonic-gate * is invoked to do the 1st page_free()/page_freelist_add().
8487c478bd9Sstevel@tonic-gate *
8497c478bd9Sstevel@tonic-gate * initializes page_colors and page_colors_mask based on ecache_setsize.
8507c478bd9Sstevel@tonic-gate *
8517c478bd9Sstevel@tonic-gate * Also initializes the counter locks.
8527c478bd9Sstevel@tonic-gate */
8537c478bd9Sstevel@tonic-gate void
page_coloring_init()8547c478bd9Sstevel@tonic-gate page_coloring_init()
8557c478bd9Sstevel@tonic-gate {
8565d07b933Sdp int a, i;
8575d07b933Sdp uint_t colors;
8587c478bd9Sstevel@tonic-gate
8597c478bd9Sstevel@tonic-gate if (do_pg_coloring == 0) {
8607c478bd9Sstevel@tonic-gate page_colors = 1;
861102033aaSdp for (i = 0; i < mmu_page_sizes; i++) {
862102033aaSdp colorequivszc[i] = 0;
8635d07b933Sdp hw_page_array[i].hp_colors = 1;
864102033aaSdp }
8657c478bd9Sstevel@tonic-gate return;
8667c478bd9Sstevel@tonic-gate }
8677c478bd9Sstevel@tonic-gate
8687c478bd9Sstevel@tonic-gate /*
8697c478bd9Sstevel@tonic-gate * Calculate page_colors from ecache_setsize. ecache_setsize contains
8707c478bd9Sstevel@tonic-gate * the max ecache setsize of all cpus configured in the system or, for
8717c478bd9Sstevel@tonic-gate * cheetah+ systems, the max possible ecache setsize for all possible
8727c478bd9Sstevel@tonic-gate * cheetah+ cpus.
8737c478bd9Sstevel@tonic-gate */
8747c478bd9Sstevel@tonic-gate page_colors = ecache_setsize / MMU_PAGESIZE;
8757c478bd9Sstevel@tonic-gate page_colors_mask = page_colors - 1;
8767c478bd9Sstevel@tonic-gate
8775d07b933Sdp vac_colors = vac_size / MMU_PAGESIZE;
8785d07b933Sdp vac_colors_mask = vac_colors -1;
8795d07b933Sdp
8805d07b933Sdp page_coloring_shift = 0;
8815d07b933Sdp a = ecache_setsize;
8825d07b933Sdp while (a >>= 1) {
8835d07b933Sdp page_coloring_shift++;
8845d07b933Sdp }
8855d07b933Sdp
8865d07b933Sdp /* initialize number of colors per page size */
8875d07b933Sdp for (i = 0; i < mmu_page_sizes; i++) {
8885d07b933Sdp hw_page_array[i].hp_colors = (page_colors_mask >>
8895d07b933Sdp (hw_page_array[i].hp_shift - hw_page_array[0].hp_shift))
8905d07b933Sdp + 1;
891102033aaSdp colorequivszc[i] = 0;
8925d07b933Sdp }
8935d07b933Sdp
8947c478bd9Sstevel@tonic-gate /*
8957c478bd9Sstevel@tonic-gate * initialize cpu_page_colors if ecache setsizes are homogenous.
8967c478bd9Sstevel@tonic-gate * cpu_page_colors set to -1 during DR operation or during startup
8977c478bd9Sstevel@tonic-gate * if setsizes are heterogenous.
8987c478bd9Sstevel@tonic-gate *
8997c478bd9Sstevel@tonic-gate * The value of cpu_page_colors determines if additional color bins
9007c478bd9Sstevel@tonic-gate * need to be checked for a particular color in the page_get routines.
9017c478bd9Sstevel@tonic-gate */
902102033aaSdp if (cpu_setsize > 0 && cpu_page_colors == 0 &&
903102033aaSdp cpu_setsize < ecache_setsize) {
9047c478bd9Sstevel@tonic-gate cpu_page_colors = cpu_setsize / MMU_PAGESIZE;
9055d07b933Sdp a = lowbit(page_colors) - lowbit(cpu_page_colors);
9065d07b933Sdp ASSERT(a > 0);
9075d07b933Sdp ASSERT(a < 16);
9085d07b933Sdp
9095d07b933Sdp for (i = 0; i < mmu_page_sizes; i++) {
9105d07b933Sdp if ((colors = hw_page_array[i].hp_colors) <= 1) {
9115d07b933Sdp continue;
9125d07b933Sdp }
9135d07b933Sdp while ((colors >> a) == 0)
9145d07b933Sdp a--;
9155d07b933Sdp ASSERT(a >= 0);
9167c478bd9Sstevel@tonic-gate
9175d07b933Sdp /* higher 4 bits encodes color equiv mask */
9185d07b933Sdp colorequivszc[i] = (a << 4);
9195d07b933Sdp }
9205d07b933Sdp }
9217c478bd9Sstevel@tonic-gate
9225d07b933Sdp /* do cpu specific color initialization */
9235d07b933Sdp if (&page_coloring_init_cpu) {
9245d07b933Sdp page_coloring_init_cpu();
9257c478bd9Sstevel@tonic-gate }
9267c478bd9Sstevel@tonic-gate }
9277c478bd9Sstevel@tonic-gate
9287c478bd9Sstevel@tonic-gate int
bp_color(struct buf * bp)9297c478bd9Sstevel@tonic-gate bp_color(struct buf *bp)
9307c478bd9Sstevel@tonic-gate {
9317c478bd9Sstevel@tonic-gate int color = -1;
9327c478bd9Sstevel@tonic-gate
9337c478bd9Sstevel@tonic-gate if (vac) {
9347c478bd9Sstevel@tonic-gate if ((bp->b_flags & B_PAGEIO) != 0) {
9357c478bd9Sstevel@tonic-gate color = sfmmu_get_ppvcolor(bp->b_pages);
9367c478bd9Sstevel@tonic-gate } else if (bp->b_un.b_addr != NULL) {
9377c478bd9Sstevel@tonic-gate color = sfmmu_get_addrvcolor(bp->b_un.b_addr);
9387c478bd9Sstevel@tonic-gate }
9397c478bd9Sstevel@tonic-gate }
9407c478bd9Sstevel@tonic-gate return (color < 0 ? 0 : ptob(color));
9417c478bd9Sstevel@tonic-gate }
9427c478bd9Sstevel@tonic-gate
9437c478bd9Sstevel@tonic-gate /*
9447c478bd9Sstevel@tonic-gate * Function for flushing D-cache when performing module relocations
9457c478bd9Sstevel@tonic-gate * to an alternate mapping. Stubbed out on all platforms except sun4u,
9467c478bd9Sstevel@tonic-gate * at least for now.
9477c478bd9Sstevel@tonic-gate */
9487c478bd9Sstevel@tonic-gate void
dcache_flushall()9497c478bd9Sstevel@tonic-gate dcache_flushall()
9507c478bd9Sstevel@tonic-gate {
9517c478bd9Sstevel@tonic-gate sfmmu_cache_flushall();
9527c478bd9Sstevel@tonic-gate }
9537c478bd9Sstevel@tonic-gate
9547c478bd9Sstevel@tonic-gate static int
kdi_range_overlap(uintptr_t va1,size_t sz1,uintptr_t va2,size_t sz2)9557c478bd9Sstevel@tonic-gate kdi_range_overlap(uintptr_t va1, size_t sz1, uintptr_t va2, size_t sz2)
9567c478bd9Sstevel@tonic-gate {
9577c478bd9Sstevel@tonic-gate if (va1 < va2 && va1 + sz1 <= va2)
9587c478bd9Sstevel@tonic-gate return (0);
9597c478bd9Sstevel@tonic-gate
9607c478bd9Sstevel@tonic-gate if (va2 < va1 && va2 + sz2 <= va1)
9617c478bd9Sstevel@tonic-gate return (0);
9627c478bd9Sstevel@tonic-gate
9637c478bd9Sstevel@tonic-gate return (1);
9647c478bd9Sstevel@tonic-gate }
9657c478bd9Sstevel@tonic-gate
9667c478bd9Sstevel@tonic-gate /*
9677c478bd9Sstevel@tonic-gate * Return the number of bytes, relative to the beginning of a given range, that
9687c478bd9Sstevel@tonic-gate * are non-toxic (can be read from and written to with relative impunity).
9697c478bd9Sstevel@tonic-gate */
9707c478bd9Sstevel@tonic-gate size_t
kdi_range_is_nontoxic(uintptr_t va,size_t sz,int write)9717c478bd9Sstevel@tonic-gate kdi_range_is_nontoxic(uintptr_t va, size_t sz, int write)
9727c478bd9Sstevel@tonic-gate {
9737c478bd9Sstevel@tonic-gate /* OBP reads are harmless, but we don't want people writing there */
9747c478bd9Sstevel@tonic-gate if (write && kdi_range_overlap(va, sz, OFW_START_ADDR, OFW_END_ADDR -
9757c478bd9Sstevel@tonic-gate OFW_START_ADDR + 1))
9767c478bd9Sstevel@tonic-gate return (va < OFW_START_ADDR ? OFW_START_ADDR - va : 0);
9777c478bd9Sstevel@tonic-gate
9787c478bd9Sstevel@tonic-gate if (kdi_range_overlap(va, sz, PIOMAPBASE, PIOMAPSIZE))
9797c478bd9Sstevel@tonic-gate return (va < PIOMAPBASE ? PIOMAPBASE - va : 0);
9807c478bd9Sstevel@tonic-gate
9817c478bd9Sstevel@tonic-gate return (sz); /* no overlap */
9827c478bd9Sstevel@tonic-gate }
9837c478bd9Sstevel@tonic-gate
9847c478bd9Sstevel@tonic-gate /*
9857c478bd9Sstevel@tonic-gate * Minimum physmem required for enabling large pages for kernel heap
9867c478bd9Sstevel@tonic-gate * Currently we do not enable lp for kmem on systems with less
9877c478bd9Sstevel@tonic-gate * than 1GB of memory. This value can be changed via /etc/system
9887c478bd9Sstevel@tonic-gate */
9897c478bd9Sstevel@tonic-gate size_t segkmem_lpminphysmem = 0x40000000; /* 1GB */
9907c478bd9Sstevel@tonic-gate
9917c478bd9Sstevel@tonic-gate /*
9927c478bd9Sstevel@tonic-gate * this function chooses large page size for kernel heap
9937c478bd9Sstevel@tonic-gate */
9947c478bd9Sstevel@tonic-gate size_t
get_segkmem_lpsize(size_t lpsize)9957c478bd9Sstevel@tonic-gate get_segkmem_lpsize(size_t lpsize)
9967c478bd9Sstevel@tonic-gate {
9977c478bd9Sstevel@tonic-gate size_t memtotal = physmem * PAGESIZE;
998d0662dbfSelowe size_t mmusz;
999d0662dbfSelowe uint_t szc;
10007c478bd9Sstevel@tonic-gate
10017c478bd9Sstevel@tonic-gate if (memtotal < segkmem_lpminphysmem)
10027c478bd9Sstevel@tonic-gate return (PAGESIZE);
10037c478bd9Sstevel@tonic-gate
10047c478bd9Sstevel@tonic-gate if (plat_lpkmem_is_supported != NULL &&
10057c478bd9Sstevel@tonic-gate plat_lpkmem_is_supported() == 0)
10067c478bd9Sstevel@tonic-gate return (PAGESIZE);
10077c478bd9Sstevel@tonic-gate
1008d0662dbfSelowe mmusz = mmu_get_kernel_lpsize(lpsize);
1009d0662dbfSelowe szc = page_szc(mmusz);
1010d0662dbfSelowe
1011d0662dbfSelowe while (szc) {
1012d0662dbfSelowe if (!(disable_large_pages & (1 << szc)))
1013d0662dbfSelowe return (page_get_pagesize(szc));
1014d0662dbfSelowe szc--;
1015d0662dbfSelowe }
1016d0662dbfSelowe return (PAGESIZE);
10177c478bd9Sstevel@tonic-gate }
1018