xref: /illumos-gate/usr/src/uts/i86pc/vm/vm_machdep.c (revision 7c478bd95313f5f23a4c958a745db2134aa0324)
1*7c478bd9Sstevel@tonic-gate /*
2*7c478bd9Sstevel@tonic-gate  * CDDL HEADER START
3*7c478bd9Sstevel@tonic-gate  *
4*7c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5*7c478bd9Sstevel@tonic-gate  * Common Development and Distribution License, Version 1.0 only
6*7c478bd9Sstevel@tonic-gate  * (the "License").  You may not use this file except in compliance
7*7c478bd9Sstevel@tonic-gate  * with the License.
8*7c478bd9Sstevel@tonic-gate  *
9*7c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10*7c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
11*7c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
12*7c478bd9Sstevel@tonic-gate  * and limitations under the License.
13*7c478bd9Sstevel@tonic-gate  *
14*7c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
15*7c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16*7c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
17*7c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
18*7c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
19*7c478bd9Sstevel@tonic-gate  *
20*7c478bd9Sstevel@tonic-gate  * CDDL HEADER END
21*7c478bd9Sstevel@tonic-gate  */
22*7c478bd9Sstevel@tonic-gate /*
23*7c478bd9Sstevel@tonic-gate  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24*7c478bd9Sstevel@tonic-gate  * Use is subject to license terms.
25*7c478bd9Sstevel@tonic-gate  */
26*7c478bd9Sstevel@tonic-gate 
27*7c478bd9Sstevel@tonic-gate /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
28*7c478bd9Sstevel@tonic-gate /*	All Rights Reserved   */
29*7c478bd9Sstevel@tonic-gate 
30*7c478bd9Sstevel@tonic-gate /*
31*7c478bd9Sstevel@tonic-gate  * Portions of this source code were derived from Berkeley 4.3 BSD
32*7c478bd9Sstevel@tonic-gate  * under license from the Regents of the University of California.
33*7c478bd9Sstevel@tonic-gate  */
34*7c478bd9Sstevel@tonic-gate 
35*7c478bd9Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
36*7c478bd9Sstevel@tonic-gate 
37*7c478bd9Sstevel@tonic-gate /*
38*7c478bd9Sstevel@tonic-gate  * UNIX machine dependent virtual memory support.
39*7c478bd9Sstevel@tonic-gate  */
40*7c478bd9Sstevel@tonic-gate 
41*7c478bd9Sstevel@tonic-gate #include <sys/types.h>
42*7c478bd9Sstevel@tonic-gate #include <sys/param.h>
43*7c478bd9Sstevel@tonic-gate #include <sys/systm.h>
44*7c478bd9Sstevel@tonic-gate #include <sys/user.h>
45*7c478bd9Sstevel@tonic-gate #include <sys/proc.h>
46*7c478bd9Sstevel@tonic-gate #include <sys/kmem.h>
47*7c478bd9Sstevel@tonic-gate #include <sys/vmem.h>
48*7c478bd9Sstevel@tonic-gate #include <sys/buf.h>
49*7c478bd9Sstevel@tonic-gate #include <sys/cpuvar.h>
50*7c478bd9Sstevel@tonic-gate #include <sys/lgrp.h>
51*7c478bd9Sstevel@tonic-gate #include <sys/disp.h>
52*7c478bd9Sstevel@tonic-gate #include <sys/vm.h>
53*7c478bd9Sstevel@tonic-gate #include <sys/mman.h>
54*7c478bd9Sstevel@tonic-gate #include <sys/vnode.h>
55*7c478bd9Sstevel@tonic-gate #include <sys/cred.h>
56*7c478bd9Sstevel@tonic-gate #include <sys/exec.h>
57*7c478bd9Sstevel@tonic-gate #include <sys/exechdr.h>
58*7c478bd9Sstevel@tonic-gate #include <sys/debug.h>
59*7c478bd9Sstevel@tonic-gate 
60*7c478bd9Sstevel@tonic-gate #include <vm/hat.h>
61*7c478bd9Sstevel@tonic-gate #include <vm/as.h>
62*7c478bd9Sstevel@tonic-gate #include <vm/seg.h>
63*7c478bd9Sstevel@tonic-gate #include <vm/seg_kp.h>
64*7c478bd9Sstevel@tonic-gate #include <vm/seg_vn.h>
65*7c478bd9Sstevel@tonic-gate #include <vm/page.h>
66*7c478bd9Sstevel@tonic-gate #include <vm/seg_kmem.h>
67*7c478bd9Sstevel@tonic-gate #include <vm/seg_kpm.h>
68*7c478bd9Sstevel@tonic-gate #include <vm/vm_dep.h>
69*7c478bd9Sstevel@tonic-gate 
70*7c478bd9Sstevel@tonic-gate #include <sys/cpu.h>
71*7c478bd9Sstevel@tonic-gate #include <sys/vm_machparam.h>
72*7c478bd9Sstevel@tonic-gate #include <sys/memlist.h>
73*7c478bd9Sstevel@tonic-gate #include <sys/bootconf.h> /* XXX the memlist stuff belongs in memlist_plat.h */
74*7c478bd9Sstevel@tonic-gate #include <vm/hat_i86.h>
75*7c478bd9Sstevel@tonic-gate #include <sys/x86_archext.h>
76*7c478bd9Sstevel@tonic-gate #include <sys/elf_386.h>
77*7c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h>
78*7c478bd9Sstevel@tonic-gate #include <sys/archsystm.h>
79*7c478bd9Sstevel@tonic-gate #include <sys/machsystm.h>
80*7c478bd9Sstevel@tonic-gate 
81*7c478bd9Sstevel@tonic-gate #include <sys/vtrace.h>
82*7c478bd9Sstevel@tonic-gate #include <sys/ddidmareq.h>
83*7c478bd9Sstevel@tonic-gate #include <sys/promif.h>
84*7c478bd9Sstevel@tonic-gate #include <sys/memnode.h>
85*7c478bd9Sstevel@tonic-gate #include <sys/stack.h>
86*7c478bd9Sstevel@tonic-gate 
87*7c478bd9Sstevel@tonic-gate uint_t vac_colors = 0;
88*7c478bd9Sstevel@tonic-gate 
89*7c478bd9Sstevel@tonic-gate int largepagesupport = 0;
90*7c478bd9Sstevel@tonic-gate extern uint_t page_create_new;
91*7c478bd9Sstevel@tonic-gate extern uint_t page_create_exists;
92*7c478bd9Sstevel@tonic-gate extern uint_t page_create_putbacks;
93*7c478bd9Sstevel@tonic-gate extern uint_t page_create_putbacks;
94*7c478bd9Sstevel@tonic-gate extern uintptr_t eprom_kernelbase;
95*7c478bd9Sstevel@tonic-gate extern int use_sse_pagecopy, use_sse_pagezero;	/* in ml/float.s */
96*7c478bd9Sstevel@tonic-gate 
97*7c478bd9Sstevel@tonic-gate /* 4g memory management */
98*7c478bd9Sstevel@tonic-gate pgcnt_t		maxmem4g;
99*7c478bd9Sstevel@tonic-gate pgcnt_t		freemem4g;
100*7c478bd9Sstevel@tonic-gate int		physmax4g;
101*7c478bd9Sstevel@tonic-gate int		desfree4gshift = 4;	/* maxmem4g shift to derive DESFREE4G */
102*7c478bd9Sstevel@tonic-gate int		lotsfree4gshift = 3;
103*7c478bd9Sstevel@tonic-gate 
104*7c478bd9Sstevel@tonic-gate #ifdef VM_STATS
105*7c478bd9Sstevel@tonic-gate struct {
106*7c478bd9Sstevel@tonic-gate 	ulong_t	pga_alloc;
107*7c478bd9Sstevel@tonic-gate 	ulong_t	pga_notfullrange;
108*7c478bd9Sstevel@tonic-gate 	ulong_t	pga_nulldmaattr;
109*7c478bd9Sstevel@tonic-gate 	ulong_t	pga_allocok;
110*7c478bd9Sstevel@tonic-gate 	ulong_t	pga_allocfailed;
111*7c478bd9Sstevel@tonic-gate 	ulong_t	pgma_alloc;
112*7c478bd9Sstevel@tonic-gate 	ulong_t	pgma_allocok;
113*7c478bd9Sstevel@tonic-gate 	ulong_t	pgma_allocfailed;
114*7c478bd9Sstevel@tonic-gate 	ulong_t	pgma_allocempty;
115*7c478bd9Sstevel@tonic-gate } pga_vmstats;
116*7c478bd9Sstevel@tonic-gate #endif
117*7c478bd9Sstevel@tonic-gate 
118*7c478bd9Sstevel@tonic-gate uint_t mmu_page_sizes;
119*7c478bd9Sstevel@tonic-gate 
120*7c478bd9Sstevel@tonic-gate /* How many page sizes the users can see */
121*7c478bd9Sstevel@tonic-gate uint_t mmu_exported_page_sizes;
122*7c478bd9Sstevel@tonic-gate 
123*7c478bd9Sstevel@tonic-gate size_t auto_lpg_va_default = MMU_PAGESIZE; /* used by zmap() */
124*7c478bd9Sstevel@tonic-gate 
125*7c478bd9Sstevel@tonic-gate /*
126*7c478bd9Sstevel@tonic-gate  * Return the optimum page size for a given mapping
127*7c478bd9Sstevel@tonic-gate  */
128*7c478bd9Sstevel@tonic-gate /*ARGSUSED*/
129*7c478bd9Sstevel@tonic-gate size_t
130*7c478bd9Sstevel@tonic-gate map_pgsz(int maptype, struct proc *p, caddr_t addr, size_t len, int *remap)
131*7c478bd9Sstevel@tonic-gate {
132*7c478bd9Sstevel@tonic-gate 	level_t l;
133*7c478bd9Sstevel@tonic-gate 
134*7c478bd9Sstevel@tonic-gate 	if (remap)
135*7c478bd9Sstevel@tonic-gate 		*remap = 0;
136*7c478bd9Sstevel@tonic-gate 
137*7c478bd9Sstevel@tonic-gate 	switch (maptype) {
138*7c478bd9Sstevel@tonic-gate 
139*7c478bd9Sstevel@tonic-gate 	case MAPPGSZ_STK:
140*7c478bd9Sstevel@tonic-gate 	case MAPPGSZ_HEAP:
141*7c478bd9Sstevel@tonic-gate 	case MAPPGSZ_VA:
142*7c478bd9Sstevel@tonic-gate 		/*
143*7c478bd9Sstevel@tonic-gate 		 * use the pages size that best fits len
144*7c478bd9Sstevel@tonic-gate 		 */
145*7c478bd9Sstevel@tonic-gate 		for (l = mmu.max_page_level; l > 0; --l) {
146*7c478bd9Sstevel@tonic-gate 			if (len < LEVEL_SIZE(l))
147*7c478bd9Sstevel@tonic-gate 				continue;
148*7c478bd9Sstevel@tonic-gate 			break;
149*7c478bd9Sstevel@tonic-gate 		}
150*7c478bd9Sstevel@tonic-gate 		return (LEVEL_SIZE(l));
151*7c478bd9Sstevel@tonic-gate 
152*7c478bd9Sstevel@tonic-gate 	/*
153*7c478bd9Sstevel@tonic-gate 	 * for ISM use the 1st large page size.
154*7c478bd9Sstevel@tonic-gate 	 */
155*7c478bd9Sstevel@tonic-gate 	case MAPPGSZ_ISM:
156*7c478bd9Sstevel@tonic-gate 		if (mmu.max_page_level == 0)
157*7c478bd9Sstevel@tonic-gate 			return (MMU_PAGESIZE);
158*7c478bd9Sstevel@tonic-gate 		return (LEVEL_SIZE(1));
159*7c478bd9Sstevel@tonic-gate 	}
160*7c478bd9Sstevel@tonic-gate 	return (0);
161*7c478bd9Sstevel@tonic-gate }
162*7c478bd9Sstevel@tonic-gate 
163*7c478bd9Sstevel@tonic-gate /*
164*7c478bd9Sstevel@tonic-gate  * This can be patched via /etc/system to allow large pages
165*7c478bd9Sstevel@tonic-gate  * to be used for mapping application and libraries text segments.
166*7c478bd9Sstevel@tonic-gate  */
167*7c478bd9Sstevel@tonic-gate int	use_text_largepages = 0;
168*7c478bd9Sstevel@tonic-gate 
169*7c478bd9Sstevel@tonic-gate /*
170*7c478bd9Sstevel@tonic-gate  * Return a bit vector of large page size codes that
171*7c478bd9Sstevel@tonic-gate  * can be used to map [addr, addr + len) region.
172*7c478bd9Sstevel@tonic-gate  */
173*7c478bd9Sstevel@tonic-gate 
174*7c478bd9Sstevel@tonic-gate /*ARGSUSED*/
175*7c478bd9Sstevel@tonic-gate uint_t
176*7c478bd9Sstevel@tonic-gate map_execseg_pgszcvec(int text, caddr_t addr, size_t len)
177*7c478bd9Sstevel@tonic-gate {
178*7c478bd9Sstevel@tonic-gate 	size_t	pgsz;
179*7c478bd9Sstevel@tonic-gate 	caddr_t a;
180*7c478bd9Sstevel@tonic-gate 
181*7c478bd9Sstevel@tonic-gate 	if (!text || !use_text_largepages ||
182*7c478bd9Sstevel@tonic-gate 	    mmu.max_page_level == 0)
183*7c478bd9Sstevel@tonic-gate 		return (0);
184*7c478bd9Sstevel@tonic-gate 
185*7c478bd9Sstevel@tonic-gate 	pgsz = LEVEL_SIZE(1);
186*7c478bd9Sstevel@tonic-gate 	a = (caddr_t)P2ROUNDUP((uintptr_t)addr, pgsz);
187*7c478bd9Sstevel@tonic-gate 	if (a < addr || a >= addr + len) {
188*7c478bd9Sstevel@tonic-gate 		return (0);
189*7c478bd9Sstevel@tonic-gate 	}
190*7c478bd9Sstevel@tonic-gate 	len -= (a - addr);
191*7c478bd9Sstevel@tonic-gate 	if (len < pgsz) {
192*7c478bd9Sstevel@tonic-gate 		return (0);
193*7c478bd9Sstevel@tonic-gate 	}
194*7c478bd9Sstevel@tonic-gate 	return (1 << 1);
195*7c478bd9Sstevel@tonic-gate }
196*7c478bd9Sstevel@tonic-gate 
197*7c478bd9Sstevel@tonic-gate /*
198*7c478bd9Sstevel@tonic-gate  * Handle a pagefault.
199*7c478bd9Sstevel@tonic-gate  */
200*7c478bd9Sstevel@tonic-gate faultcode_t
201*7c478bd9Sstevel@tonic-gate pagefault(
202*7c478bd9Sstevel@tonic-gate 	caddr_t addr,
203*7c478bd9Sstevel@tonic-gate 	enum fault_type type,
204*7c478bd9Sstevel@tonic-gate 	enum seg_rw rw,
205*7c478bd9Sstevel@tonic-gate 	int iskernel)
206*7c478bd9Sstevel@tonic-gate {
207*7c478bd9Sstevel@tonic-gate 	struct as *as;
208*7c478bd9Sstevel@tonic-gate 	struct hat *hat;
209*7c478bd9Sstevel@tonic-gate 	struct proc *p;
210*7c478bd9Sstevel@tonic-gate 	kthread_t *t;
211*7c478bd9Sstevel@tonic-gate 	faultcode_t res;
212*7c478bd9Sstevel@tonic-gate 	caddr_t base;
213*7c478bd9Sstevel@tonic-gate 	size_t len;
214*7c478bd9Sstevel@tonic-gate 	int err;
215*7c478bd9Sstevel@tonic-gate 	int mapped_red;
216*7c478bd9Sstevel@tonic-gate 	uintptr_t ea;
217*7c478bd9Sstevel@tonic-gate 
218*7c478bd9Sstevel@tonic-gate 	ASSERT_STACK_ALIGNED();
219*7c478bd9Sstevel@tonic-gate 
220*7c478bd9Sstevel@tonic-gate 	if (INVALID_VADDR(addr))
221*7c478bd9Sstevel@tonic-gate 		return (FC_NOMAP);
222*7c478bd9Sstevel@tonic-gate 
223*7c478bd9Sstevel@tonic-gate 	mapped_red = segkp_map_red();
224*7c478bd9Sstevel@tonic-gate 
225*7c478bd9Sstevel@tonic-gate 	if (iskernel) {
226*7c478bd9Sstevel@tonic-gate 		as = &kas;
227*7c478bd9Sstevel@tonic-gate 		hat = as->a_hat;
228*7c478bd9Sstevel@tonic-gate 	} else {
229*7c478bd9Sstevel@tonic-gate 		t = curthread;
230*7c478bd9Sstevel@tonic-gate 		p = ttoproc(t);
231*7c478bd9Sstevel@tonic-gate 		as = p->p_as;
232*7c478bd9Sstevel@tonic-gate 		hat = as->a_hat;
233*7c478bd9Sstevel@tonic-gate 	}
234*7c478bd9Sstevel@tonic-gate 
235*7c478bd9Sstevel@tonic-gate 	/*
236*7c478bd9Sstevel@tonic-gate 	 * Dispatch pagefault.
237*7c478bd9Sstevel@tonic-gate 	 */
238*7c478bd9Sstevel@tonic-gate 	res = as_fault(hat, as, addr, 1, type, rw);
239*7c478bd9Sstevel@tonic-gate 
240*7c478bd9Sstevel@tonic-gate 	/*
241*7c478bd9Sstevel@tonic-gate 	 * If this isn't a potential unmapped hole in the user's
242*7c478bd9Sstevel@tonic-gate 	 * UNIX data or stack segments, just return status info.
243*7c478bd9Sstevel@tonic-gate 	 */
244*7c478bd9Sstevel@tonic-gate 	if (res != FC_NOMAP || iskernel)
245*7c478bd9Sstevel@tonic-gate 		goto out;
246*7c478bd9Sstevel@tonic-gate 
247*7c478bd9Sstevel@tonic-gate 	/*
248*7c478bd9Sstevel@tonic-gate 	 * Check to see if we happened to faulted on a currently unmapped
249*7c478bd9Sstevel@tonic-gate 	 * part of the UNIX data or stack segments.  If so, create a zfod
250*7c478bd9Sstevel@tonic-gate 	 * mapping there and then try calling the fault routine again.
251*7c478bd9Sstevel@tonic-gate 	 */
252*7c478bd9Sstevel@tonic-gate 	base = p->p_brkbase;
253*7c478bd9Sstevel@tonic-gate 	len = p->p_brksize;
254*7c478bd9Sstevel@tonic-gate 
255*7c478bd9Sstevel@tonic-gate 	if (addr < base || addr >= base + len) {		/* data seg? */
256*7c478bd9Sstevel@tonic-gate 		base = (caddr_t)p->p_usrstack - p->p_stksize;
257*7c478bd9Sstevel@tonic-gate 		len = p->p_stksize;
258*7c478bd9Sstevel@tonic-gate 		if (addr < base || addr >= p->p_usrstack) {	/* stack seg? */
259*7c478bd9Sstevel@tonic-gate 			/* not in either UNIX data or stack segments */
260*7c478bd9Sstevel@tonic-gate 			res = FC_NOMAP;
261*7c478bd9Sstevel@tonic-gate 			goto out;
262*7c478bd9Sstevel@tonic-gate 		}
263*7c478bd9Sstevel@tonic-gate 	}
264*7c478bd9Sstevel@tonic-gate 
265*7c478bd9Sstevel@tonic-gate 	/*
266*7c478bd9Sstevel@tonic-gate 	 * the rest of this function implements a 3.X 4.X 5.X compatibility
267*7c478bd9Sstevel@tonic-gate 	 * This code is probably not needed anymore
268*7c478bd9Sstevel@tonic-gate 	 */
269*7c478bd9Sstevel@tonic-gate 	if (p->p_model == DATAMODEL_ILP32) {
270*7c478bd9Sstevel@tonic-gate 
271*7c478bd9Sstevel@tonic-gate 		/* expand the gap to the page boundaries on each side */
272*7c478bd9Sstevel@tonic-gate 		ea = P2ROUNDUP((uintptr_t)base + len, MMU_PAGESIZE);
273*7c478bd9Sstevel@tonic-gate 		base = (caddr_t)P2ALIGN((uintptr_t)base, MMU_PAGESIZE);
274*7c478bd9Sstevel@tonic-gate 		len = ea - (uintptr_t)base;
275*7c478bd9Sstevel@tonic-gate 
276*7c478bd9Sstevel@tonic-gate 		as_rangelock(as);
277*7c478bd9Sstevel@tonic-gate 		if (as_gap(as, MMU_PAGESIZE, &base, &len, AH_CONTAIN, addr) ==
278*7c478bd9Sstevel@tonic-gate 		    0) {
279*7c478bd9Sstevel@tonic-gate 			err = as_map(as, base, len, segvn_create, zfod_argsp);
280*7c478bd9Sstevel@tonic-gate 			as_rangeunlock(as);
281*7c478bd9Sstevel@tonic-gate 			if (err) {
282*7c478bd9Sstevel@tonic-gate 				res = FC_MAKE_ERR(err);
283*7c478bd9Sstevel@tonic-gate 				goto out;
284*7c478bd9Sstevel@tonic-gate 			}
285*7c478bd9Sstevel@tonic-gate 		} else {
286*7c478bd9Sstevel@tonic-gate 			/*
287*7c478bd9Sstevel@tonic-gate 			 * This page is already mapped by another thread after
288*7c478bd9Sstevel@tonic-gate 			 * we returned from as_fault() above.  We just fall
289*7c478bd9Sstevel@tonic-gate 			 * through as_fault() below.
290*7c478bd9Sstevel@tonic-gate 			 */
291*7c478bd9Sstevel@tonic-gate 			as_rangeunlock(as);
292*7c478bd9Sstevel@tonic-gate 		}
293*7c478bd9Sstevel@tonic-gate 
294*7c478bd9Sstevel@tonic-gate 		res = as_fault(hat, as, addr, 1, F_INVAL, rw);
295*7c478bd9Sstevel@tonic-gate 	}
296*7c478bd9Sstevel@tonic-gate 
297*7c478bd9Sstevel@tonic-gate out:
298*7c478bd9Sstevel@tonic-gate 	if (mapped_red)
299*7c478bd9Sstevel@tonic-gate 		segkp_unmap_red();
300*7c478bd9Sstevel@tonic-gate 
301*7c478bd9Sstevel@tonic-gate 	return (res);
302*7c478bd9Sstevel@tonic-gate }
303*7c478bd9Sstevel@tonic-gate 
304*7c478bd9Sstevel@tonic-gate void
305*7c478bd9Sstevel@tonic-gate map_addr(caddr_t *addrp, size_t len, offset_t off, int vacalign, uint_t flags)
306*7c478bd9Sstevel@tonic-gate {
307*7c478bd9Sstevel@tonic-gate 	struct proc *p = curproc;
308*7c478bd9Sstevel@tonic-gate 	caddr_t userlimit = (flags & _MAP_LOW32) ?
309*7c478bd9Sstevel@tonic-gate 	    (caddr_t)_userlimit32 : p->p_as->a_userlimit;
310*7c478bd9Sstevel@tonic-gate 
311*7c478bd9Sstevel@tonic-gate 	map_addr_proc(addrp, len, off, vacalign, userlimit, curproc, flags);
312*7c478bd9Sstevel@tonic-gate }
313*7c478bd9Sstevel@tonic-gate 
314*7c478bd9Sstevel@tonic-gate /*ARGSUSED*/
315*7c478bd9Sstevel@tonic-gate int
316*7c478bd9Sstevel@tonic-gate map_addr_vacalign_check(caddr_t addr, u_offset_t off)
317*7c478bd9Sstevel@tonic-gate {
318*7c478bd9Sstevel@tonic-gate 	return (0);
319*7c478bd9Sstevel@tonic-gate }
320*7c478bd9Sstevel@tonic-gate 
321*7c478bd9Sstevel@tonic-gate /*
322*7c478bd9Sstevel@tonic-gate  * map_addr_proc() is the routine called when the system is to
323*7c478bd9Sstevel@tonic-gate  * choose an address for the user.  We will pick an address
324*7c478bd9Sstevel@tonic-gate  * range which is the highest available below kernelbase.
325*7c478bd9Sstevel@tonic-gate  *
326*7c478bd9Sstevel@tonic-gate  * addrp is a value/result parameter.
327*7c478bd9Sstevel@tonic-gate  *	On input it is a hint from the user to be used in a completely
328*7c478bd9Sstevel@tonic-gate  *	machine dependent fashion.  We decide to completely ignore this hint.
329*7c478bd9Sstevel@tonic-gate  *
330*7c478bd9Sstevel@tonic-gate  *	On output it is NULL if no address can be found in the current
331*7c478bd9Sstevel@tonic-gate  *	processes address space or else an address that is currently
332*7c478bd9Sstevel@tonic-gate  *	not mapped for len bytes with a page of red zone on either side.
333*7c478bd9Sstevel@tonic-gate  *
334*7c478bd9Sstevel@tonic-gate  *	align is not needed on x86 (it's for viturally addressed caches)
335*7c478bd9Sstevel@tonic-gate  */
336*7c478bd9Sstevel@tonic-gate /*ARGSUSED*/
337*7c478bd9Sstevel@tonic-gate void
338*7c478bd9Sstevel@tonic-gate map_addr_proc(
339*7c478bd9Sstevel@tonic-gate 	caddr_t *addrp,
340*7c478bd9Sstevel@tonic-gate 	size_t len,
341*7c478bd9Sstevel@tonic-gate 	offset_t off,
342*7c478bd9Sstevel@tonic-gate 	int vacalign,
343*7c478bd9Sstevel@tonic-gate 	caddr_t userlimit,
344*7c478bd9Sstevel@tonic-gate 	struct proc *p,
345*7c478bd9Sstevel@tonic-gate 	uint_t flags)
346*7c478bd9Sstevel@tonic-gate {
347*7c478bd9Sstevel@tonic-gate 	struct as *as = p->p_as;
348*7c478bd9Sstevel@tonic-gate 	caddr_t addr;
349*7c478bd9Sstevel@tonic-gate 	caddr_t base;
350*7c478bd9Sstevel@tonic-gate 	size_t slen;
351*7c478bd9Sstevel@tonic-gate 	size_t align_amount;
352*7c478bd9Sstevel@tonic-gate 
353*7c478bd9Sstevel@tonic-gate 	ASSERT32(userlimit == as->a_userlimit);
354*7c478bd9Sstevel@tonic-gate 
355*7c478bd9Sstevel@tonic-gate 	base = p->p_brkbase;
356*7c478bd9Sstevel@tonic-gate #if defined(__amd64)
357*7c478bd9Sstevel@tonic-gate 	/*
358*7c478bd9Sstevel@tonic-gate 	 * XX64 Yes, this needs more work.
359*7c478bd9Sstevel@tonic-gate 	 */
360*7c478bd9Sstevel@tonic-gate 	if (p->p_model == DATAMODEL_NATIVE) {
361*7c478bd9Sstevel@tonic-gate 		if (userlimit < as->a_userlimit) {
362*7c478bd9Sstevel@tonic-gate 			/*
363*7c478bd9Sstevel@tonic-gate 			 * This happens when a program wants to map
364*7c478bd9Sstevel@tonic-gate 			 * something in a range that's accessible to a
365*7c478bd9Sstevel@tonic-gate 			 * program in a smaller address space.  For example,
366*7c478bd9Sstevel@tonic-gate 			 * a 64-bit program calling mmap32(2) to guarantee
367*7c478bd9Sstevel@tonic-gate 			 * that the returned address is below 4Gbytes.
368*7c478bd9Sstevel@tonic-gate 			 */
369*7c478bd9Sstevel@tonic-gate 			ASSERT((uintptr_t)userlimit < ADDRESS_C(0xffffffff));
370*7c478bd9Sstevel@tonic-gate 
371*7c478bd9Sstevel@tonic-gate 			if (userlimit > base)
372*7c478bd9Sstevel@tonic-gate 				slen = userlimit - base;
373*7c478bd9Sstevel@tonic-gate 			else {
374*7c478bd9Sstevel@tonic-gate 				*addrp = NULL;
375*7c478bd9Sstevel@tonic-gate 				return;
376*7c478bd9Sstevel@tonic-gate 			}
377*7c478bd9Sstevel@tonic-gate 		} else {
378*7c478bd9Sstevel@tonic-gate 			/*
379*7c478bd9Sstevel@tonic-gate 			 * XX64 This layout is probably wrong .. but in
380*7c478bd9Sstevel@tonic-gate 			 * the event we make the amd64 address space look
381*7c478bd9Sstevel@tonic-gate 			 * like sparcv9 i.e. with the stack -above- the
382*7c478bd9Sstevel@tonic-gate 			 * heap, this bit of code might even be correct.
383*7c478bd9Sstevel@tonic-gate 			 */
384*7c478bd9Sstevel@tonic-gate 			slen = p->p_usrstack - base -
385*7c478bd9Sstevel@tonic-gate 			    (((size_t)rctl_enforced_value(
386*7c478bd9Sstevel@tonic-gate 			    rctlproc_legacy[RLIMIT_STACK],
387*7c478bd9Sstevel@tonic-gate 			    p->p_rctls, p) + PAGEOFFSET) & PAGEMASK);
388*7c478bd9Sstevel@tonic-gate 		}
389*7c478bd9Sstevel@tonic-gate 	} else
390*7c478bd9Sstevel@tonic-gate #endif
391*7c478bd9Sstevel@tonic-gate 		slen = userlimit - base;
392*7c478bd9Sstevel@tonic-gate 
393*7c478bd9Sstevel@tonic-gate 	len = (len + PAGEOFFSET) & PAGEMASK;
394*7c478bd9Sstevel@tonic-gate 
395*7c478bd9Sstevel@tonic-gate 	/*
396*7c478bd9Sstevel@tonic-gate 	 * Redzone for each side of the request. This is done to leave
397*7c478bd9Sstevel@tonic-gate 	 * one page unmapped between segments. This is not required, but
398*7c478bd9Sstevel@tonic-gate 	 * it's useful for the user because if their program strays across
399*7c478bd9Sstevel@tonic-gate 	 * a segment boundary, it will catch a fault immediately making
400*7c478bd9Sstevel@tonic-gate 	 * debugging a little easier.
401*7c478bd9Sstevel@tonic-gate 	 */
402*7c478bd9Sstevel@tonic-gate 	len += 2 * MMU_PAGESIZE;
403*7c478bd9Sstevel@tonic-gate 
404*7c478bd9Sstevel@tonic-gate 	/*
405*7c478bd9Sstevel@tonic-gate 	 * figure out what the alignment should be
406*7c478bd9Sstevel@tonic-gate 	 *
407*7c478bd9Sstevel@tonic-gate 	 * XX64 -- is there an ELF_AMD64_MAXPGSZ or is it the same????
408*7c478bd9Sstevel@tonic-gate 	 */
409*7c478bd9Sstevel@tonic-gate 	if (len <= ELF_386_MAXPGSZ) {
410*7c478bd9Sstevel@tonic-gate 		/*
411*7c478bd9Sstevel@tonic-gate 		 * Align virtual addresses to ensure that ELF shared libraries
412*7c478bd9Sstevel@tonic-gate 		 * are mapped with the appropriate alignment constraints by
413*7c478bd9Sstevel@tonic-gate 		 * the run-time linker.
414*7c478bd9Sstevel@tonic-gate 		 */
415*7c478bd9Sstevel@tonic-gate 		align_amount = ELF_386_MAXPGSZ;
416*7c478bd9Sstevel@tonic-gate 	} else {
417*7c478bd9Sstevel@tonic-gate 		int l = mmu.max_page_level;
418*7c478bd9Sstevel@tonic-gate 
419*7c478bd9Sstevel@tonic-gate 		while (l && len < LEVEL_SIZE(l))
420*7c478bd9Sstevel@tonic-gate 			--l;
421*7c478bd9Sstevel@tonic-gate 
422*7c478bd9Sstevel@tonic-gate 		align_amount = LEVEL_SIZE(l);
423*7c478bd9Sstevel@tonic-gate 	}
424*7c478bd9Sstevel@tonic-gate 
425*7c478bd9Sstevel@tonic-gate 	if ((flags & MAP_ALIGN) && ((uintptr_t)*addrp > align_amount))
426*7c478bd9Sstevel@tonic-gate 		align_amount = (uintptr_t)*addrp;
427*7c478bd9Sstevel@tonic-gate 
428*7c478bd9Sstevel@tonic-gate 	len += align_amount;
429*7c478bd9Sstevel@tonic-gate 
430*7c478bd9Sstevel@tonic-gate 	/*
431*7c478bd9Sstevel@tonic-gate 	 * Look for a large enough hole starting below userlimit.
432*7c478bd9Sstevel@tonic-gate 	 * After finding it, use the upper part.  Addition of PAGESIZE
433*7c478bd9Sstevel@tonic-gate 	 * is for the redzone as described above.
434*7c478bd9Sstevel@tonic-gate 	 */
435*7c478bd9Sstevel@tonic-gate 	if (as_gap(as, len, &base, &slen, AH_HI, NULL) == 0) {
436*7c478bd9Sstevel@tonic-gate 		caddr_t as_addr;
437*7c478bd9Sstevel@tonic-gate 
438*7c478bd9Sstevel@tonic-gate 		addr = base + slen - len + MMU_PAGESIZE;
439*7c478bd9Sstevel@tonic-gate 		as_addr = addr;
440*7c478bd9Sstevel@tonic-gate 		/*
441*7c478bd9Sstevel@tonic-gate 		 * Round address DOWN to the alignment amount,
442*7c478bd9Sstevel@tonic-gate 		 * add the offset, and if this address is less
443*7c478bd9Sstevel@tonic-gate 		 * than the original address, add alignment amount.
444*7c478bd9Sstevel@tonic-gate 		 */
445*7c478bd9Sstevel@tonic-gate 		addr = (caddr_t)((uintptr_t)addr & (~(align_amount - 1)));
446*7c478bd9Sstevel@tonic-gate 		addr += (uintptr_t)(off & (align_amount - 1));
447*7c478bd9Sstevel@tonic-gate 		if (addr < as_addr)
448*7c478bd9Sstevel@tonic-gate 			addr += align_amount;
449*7c478bd9Sstevel@tonic-gate 
450*7c478bd9Sstevel@tonic-gate 		ASSERT(addr <= (as_addr + align_amount));
451*7c478bd9Sstevel@tonic-gate 		ASSERT(((uintptr_t)addr & (align_amount - 1)) ==
452*7c478bd9Sstevel@tonic-gate 		    ((uintptr_t)(off & (align_amount - 1))));
453*7c478bd9Sstevel@tonic-gate 		*addrp = addr;
454*7c478bd9Sstevel@tonic-gate 	} else {
455*7c478bd9Sstevel@tonic-gate 		*addrp = NULL;	/* no more virtual space */
456*7c478bd9Sstevel@tonic-gate 	}
457*7c478bd9Sstevel@tonic-gate }
458*7c478bd9Sstevel@tonic-gate 
459*7c478bd9Sstevel@tonic-gate /*
460*7c478bd9Sstevel@tonic-gate  * Determine whether [base, base+len] contains a valid range of
461*7c478bd9Sstevel@tonic-gate  * addresses at least minlen long. base and len are adjusted if
462*7c478bd9Sstevel@tonic-gate  * required to provide a valid range.
463*7c478bd9Sstevel@tonic-gate  */
464*7c478bd9Sstevel@tonic-gate /*ARGSUSED3*/
465*7c478bd9Sstevel@tonic-gate int
466*7c478bd9Sstevel@tonic-gate valid_va_range(caddr_t *basep, size_t *lenp, size_t minlen, int dir)
467*7c478bd9Sstevel@tonic-gate {
468*7c478bd9Sstevel@tonic-gate 	uintptr_t hi, lo;
469*7c478bd9Sstevel@tonic-gate 
470*7c478bd9Sstevel@tonic-gate 	lo = (uintptr_t)*basep;
471*7c478bd9Sstevel@tonic-gate 	hi = lo + *lenp;
472*7c478bd9Sstevel@tonic-gate 
473*7c478bd9Sstevel@tonic-gate 	/*
474*7c478bd9Sstevel@tonic-gate 	 * If hi rolled over the top, try cutting back.
475*7c478bd9Sstevel@tonic-gate 	 */
476*7c478bd9Sstevel@tonic-gate 	if (hi < lo) {
477*7c478bd9Sstevel@tonic-gate 		if (0 - lo + hi < minlen)
478*7c478bd9Sstevel@tonic-gate 			return (0);
479*7c478bd9Sstevel@tonic-gate 		if (0 - lo < minlen)
480*7c478bd9Sstevel@tonic-gate 			return (0);
481*7c478bd9Sstevel@tonic-gate 		*lenp = 0 - lo;
482*7c478bd9Sstevel@tonic-gate 	} else if (hi - lo < minlen) {
483*7c478bd9Sstevel@tonic-gate 		return (0);
484*7c478bd9Sstevel@tonic-gate 	}
485*7c478bd9Sstevel@tonic-gate #if defined(__amd64)
486*7c478bd9Sstevel@tonic-gate 	/*
487*7c478bd9Sstevel@tonic-gate 	 * Deal with a possible hole in the address range between
488*7c478bd9Sstevel@tonic-gate 	 * hole_start and hole_end that should never be mapped.
489*7c478bd9Sstevel@tonic-gate 	 */
490*7c478bd9Sstevel@tonic-gate 	if (lo < hole_start) {
491*7c478bd9Sstevel@tonic-gate 		if (hi > hole_start) {
492*7c478bd9Sstevel@tonic-gate 			if (hi < hole_end) {
493*7c478bd9Sstevel@tonic-gate 				hi = hole_start;
494*7c478bd9Sstevel@tonic-gate 			} else {
495*7c478bd9Sstevel@tonic-gate 				/* lo < hole_start && hi >= hole_end */
496*7c478bd9Sstevel@tonic-gate 				if (dir == AH_LO) {
497*7c478bd9Sstevel@tonic-gate 					/*
498*7c478bd9Sstevel@tonic-gate 					 * prefer lowest range
499*7c478bd9Sstevel@tonic-gate 					 */
500*7c478bd9Sstevel@tonic-gate 					if (hole_start - lo >= minlen)
501*7c478bd9Sstevel@tonic-gate 						hi = hole_start;
502*7c478bd9Sstevel@tonic-gate 					else if (hi - hole_end >= minlen)
503*7c478bd9Sstevel@tonic-gate 						lo = hole_end;
504*7c478bd9Sstevel@tonic-gate 					else
505*7c478bd9Sstevel@tonic-gate 						return (0);
506*7c478bd9Sstevel@tonic-gate 				} else {
507*7c478bd9Sstevel@tonic-gate 					/*
508*7c478bd9Sstevel@tonic-gate 					 * prefer highest range
509*7c478bd9Sstevel@tonic-gate 					 */
510*7c478bd9Sstevel@tonic-gate 					if (hi - hole_end >= minlen)
511*7c478bd9Sstevel@tonic-gate 						lo = hole_end;
512*7c478bd9Sstevel@tonic-gate 					else if (hole_start - lo >= minlen)
513*7c478bd9Sstevel@tonic-gate 						hi = hole_start;
514*7c478bd9Sstevel@tonic-gate 					else
515*7c478bd9Sstevel@tonic-gate 						return (0);
516*7c478bd9Sstevel@tonic-gate 				}
517*7c478bd9Sstevel@tonic-gate 			}
518*7c478bd9Sstevel@tonic-gate 		}
519*7c478bd9Sstevel@tonic-gate 	} else {
520*7c478bd9Sstevel@tonic-gate 		/* lo >= hole_start */
521*7c478bd9Sstevel@tonic-gate 		if (hi < hole_end)
522*7c478bd9Sstevel@tonic-gate 			return (0);
523*7c478bd9Sstevel@tonic-gate 		if (lo < hole_end)
524*7c478bd9Sstevel@tonic-gate 			lo = hole_end;
525*7c478bd9Sstevel@tonic-gate 	}
526*7c478bd9Sstevel@tonic-gate 
527*7c478bd9Sstevel@tonic-gate 	if (hi - lo < minlen)
528*7c478bd9Sstevel@tonic-gate 		return (0);
529*7c478bd9Sstevel@tonic-gate 
530*7c478bd9Sstevel@tonic-gate 	*basep = (caddr_t)lo;
531*7c478bd9Sstevel@tonic-gate 	*lenp = hi - lo;
532*7c478bd9Sstevel@tonic-gate #endif
533*7c478bd9Sstevel@tonic-gate 	return (1);
534*7c478bd9Sstevel@tonic-gate }
535*7c478bd9Sstevel@tonic-gate 
536*7c478bd9Sstevel@tonic-gate /*
537*7c478bd9Sstevel@tonic-gate  * Determine whether [addr, addr+len] are valid user addresses.
538*7c478bd9Sstevel@tonic-gate  */
539*7c478bd9Sstevel@tonic-gate /*ARGSUSED*/
540*7c478bd9Sstevel@tonic-gate int
541*7c478bd9Sstevel@tonic-gate valid_usr_range(caddr_t addr, size_t len, uint_t prot, struct as *as,
542*7c478bd9Sstevel@tonic-gate     caddr_t userlimit)
543*7c478bd9Sstevel@tonic-gate {
544*7c478bd9Sstevel@tonic-gate 	caddr_t eaddr = addr + len;
545*7c478bd9Sstevel@tonic-gate 
546*7c478bd9Sstevel@tonic-gate 	if (eaddr <= addr || addr >= userlimit || eaddr > userlimit)
547*7c478bd9Sstevel@tonic-gate 		return (RANGE_BADADDR);
548*7c478bd9Sstevel@tonic-gate 
549*7c478bd9Sstevel@tonic-gate #if defined(__amd64)
550*7c478bd9Sstevel@tonic-gate 	/*
551*7c478bd9Sstevel@tonic-gate 	 * Check for the VA hole
552*7c478bd9Sstevel@tonic-gate 	 */
553*7c478bd9Sstevel@tonic-gate 	if (eaddr > (caddr_t)hole_start && addr < (caddr_t)hole_end)
554*7c478bd9Sstevel@tonic-gate 		return (RANGE_BADADDR);
555*7c478bd9Sstevel@tonic-gate #endif
556*7c478bd9Sstevel@tonic-gate 
557*7c478bd9Sstevel@tonic-gate 	return (RANGE_OKAY);
558*7c478bd9Sstevel@tonic-gate }
559*7c478bd9Sstevel@tonic-gate 
560*7c478bd9Sstevel@tonic-gate /*
561*7c478bd9Sstevel@tonic-gate  * Return 1 if the page frame is onboard memory, else 0.
562*7c478bd9Sstevel@tonic-gate  */
563*7c478bd9Sstevel@tonic-gate int
564*7c478bd9Sstevel@tonic-gate pf_is_memory(pfn_t pf)
565*7c478bd9Sstevel@tonic-gate {
566*7c478bd9Sstevel@tonic-gate 	return (address_in_memlist(phys_install, mmu_ptob((uint64_t)pf), 1));
567*7c478bd9Sstevel@tonic-gate }
568*7c478bd9Sstevel@tonic-gate 
569*7c478bd9Sstevel@tonic-gate 
570*7c478bd9Sstevel@tonic-gate /*
571*7c478bd9Sstevel@tonic-gate  * initialized by page_coloring_init().
572*7c478bd9Sstevel@tonic-gate  */
573*7c478bd9Sstevel@tonic-gate uint_t	page_colors;
574*7c478bd9Sstevel@tonic-gate uint_t	page_colors_mask;
575*7c478bd9Sstevel@tonic-gate uint_t	page_coloring_shift;
576*7c478bd9Sstevel@tonic-gate int	cpu_page_colors;
577*7c478bd9Sstevel@tonic-gate static uint_t	l2_colors;
578*7c478bd9Sstevel@tonic-gate 
579*7c478bd9Sstevel@tonic-gate /*
580*7c478bd9Sstevel@tonic-gate  * Page freelists and cachelists are dynamically allocated once mnoderangecnt
581*7c478bd9Sstevel@tonic-gate  * and page_colors are calculated from the l2 cache n-way set size.  Within a
582*7c478bd9Sstevel@tonic-gate  * mnode range, the page freelist and cachelist are hashed into bins based on
583*7c478bd9Sstevel@tonic-gate  * color. This makes it easier to search for a page within a specific memory
584*7c478bd9Sstevel@tonic-gate  * range.
585*7c478bd9Sstevel@tonic-gate  */
586*7c478bd9Sstevel@tonic-gate #define	PAGE_COLORS_MIN	16
587*7c478bd9Sstevel@tonic-gate 
588*7c478bd9Sstevel@tonic-gate page_t ****page_freelists;
589*7c478bd9Sstevel@tonic-gate page_t ***page_cachelists;
590*7c478bd9Sstevel@tonic-gate 
591*7c478bd9Sstevel@tonic-gate /*
592*7c478bd9Sstevel@tonic-gate  * As the PC architecture evolved memory up was clumped into several
593*7c478bd9Sstevel@tonic-gate  * ranges for various historical I/O devices to do DMA.
594*7c478bd9Sstevel@tonic-gate  * < 16Meg - ISA bus
595*7c478bd9Sstevel@tonic-gate  * < 2Gig - ???
596*7c478bd9Sstevel@tonic-gate  * < 4Gig - PCI bus or drivers that don't understand PAE mode
597*7c478bd9Sstevel@tonic-gate  */
598*7c478bd9Sstevel@tonic-gate static pfn_t arch_memranges[NUM_MEM_RANGES] = {
599*7c478bd9Sstevel@tonic-gate     0x100000,	/* pfn range for 4G and above */
600*7c478bd9Sstevel@tonic-gate     0x80000,	/* pfn range for 2G-4G */
601*7c478bd9Sstevel@tonic-gate     0x01000,	/* pfn range for 16M-2G */
602*7c478bd9Sstevel@tonic-gate     0x00000,	/* pfn range for 0-16M */
603*7c478bd9Sstevel@tonic-gate };
604*7c478bd9Sstevel@tonic-gate 
605*7c478bd9Sstevel@tonic-gate /*
606*7c478bd9Sstevel@tonic-gate  * These are changed during startup if the machine has limited memory.
607*7c478bd9Sstevel@tonic-gate  */
608*7c478bd9Sstevel@tonic-gate pfn_t *memranges = &arch_memranges[0];
609*7c478bd9Sstevel@tonic-gate int nranges = NUM_MEM_RANGES;
610*7c478bd9Sstevel@tonic-gate 
611*7c478bd9Sstevel@tonic-gate /*
612*7c478bd9Sstevel@tonic-gate  * Used by page layer to know about page sizes
613*7c478bd9Sstevel@tonic-gate  */
614*7c478bd9Sstevel@tonic-gate hw_pagesize_t hw_page_array[MAX_NUM_LEVEL + 1];
615*7c478bd9Sstevel@tonic-gate 
616*7c478bd9Sstevel@tonic-gate /*
617*7c478bd9Sstevel@tonic-gate  * This can be patched via /etc/system to allow old non-PAE aware device
618*7c478bd9Sstevel@tonic-gate  * drivers to use kmem_alloc'd memory on 32 bit systems with > 4Gig RAM.
619*7c478bd9Sstevel@tonic-gate  */
620*7c478bd9Sstevel@tonic-gate #if defined(__i386)
621*7c478bd9Sstevel@tonic-gate int restricted_kmemalloc = 1;	/* XX64 re-examine with PSARC 2004/405 */
622*7c478bd9Sstevel@tonic-gate #elif defined(__amd64)
623*7c478bd9Sstevel@tonic-gate int restricted_kmemalloc = 0;
624*7c478bd9Sstevel@tonic-gate #endif
625*7c478bd9Sstevel@tonic-gate 
626*7c478bd9Sstevel@tonic-gate kmutex_t	*fpc_mutex[NPC_MUTEX];
627*7c478bd9Sstevel@tonic-gate kmutex_t	*cpc_mutex[NPC_MUTEX];
628*7c478bd9Sstevel@tonic-gate 
629*7c478bd9Sstevel@tonic-gate 
630*7c478bd9Sstevel@tonic-gate /*
631*7c478bd9Sstevel@tonic-gate  * return the memrange containing pfn
632*7c478bd9Sstevel@tonic-gate  */
633*7c478bd9Sstevel@tonic-gate int
634*7c478bd9Sstevel@tonic-gate memrange_num(pfn_t pfn)
635*7c478bd9Sstevel@tonic-gate {
636*7c478bd9Sstevel@tonic-gate 	int n;
637*7c478bd9Sstevel@tonic-gate 
638*7c478bd9Sstevel@tonic-gate 	for (n = 0; n < nranges - 1; ++n) {
639*7c478bd9Sstevel@tonic-gate 		if (pfn >= memranges[n])
640*7c478bd9Sstevel@tonic-gate 			break;
641*7c478bd9Sstevel@tonic-gate 	}
642*7c478bd9Sstevel@tonic-gate 	return (n);
643*7c478bd9Sstevel@tonic-gate }
644*7c478bd9Sstevel@tonic-gate 
645*7c478bd9Sstevel@tonic-gate /*
646*7c478bd9Sstevel@tonic-gate  * return the mnoderange containing pfn
647*7c478bd9Sstevel@tonic-gate  */
648*7c478bd9Sstevel@tonic-gate int
649*7c478bd9Sstevel@tonic-gate pfn_2_mtype(pfn_t pfn)
650*7c478bd9Sstevel@tonic-gate {
651*7c478bd9Sstevel@tonic-gate 	int	n;
652*7c478bd9Sstevel@tonic-gate 
653*7c478bd9Sstevel@tonic-gate 	for (n = mnoderangecnt - 1; n >= 0; n--) {
654*7c478bd9Sstevel@tonic-gate 		if (pfn >= mnoderanges[n].mnr_pfnlo) {
655*7c478bd9Sstevel@tonic-gate 			break;
656*7c478bd9Sstevel@tonic-gate 		}
657*7c478bd9Sstevel@tonic-gate 	}
658*7c478bd9Sstevel@tonic-gate 	return (n);
659*7c478bd9Sstevel@tonic-gate }
660*7c478bd9Sstevel@tonic-gate 
661*7c478bd9Sstevel@tonic-gate /*
662*7c478bd9Sstevel@tonic-gate  * is_contigpage_free:
663*7c478bd9Sstevel@tonic-gate  *	returns a page list of contiguous pages. It minimally has to return
664*7c478bd9Sstevel@tonic-gate  *	minctg pages. Caller determines minctg based on the scatter-gather
665*7c478bd9Sstevel@tonic-gate  *	list length.
666*7c478bd9Sstevel@tonic-gate  *
667*7c478bd9Sstevel@tonic-gate  *	pfnp is set to the next page frame to search on return.
668*7c478bd9Sstevel@tonic-gate  */
669*7c478bd9Sstevel@tonic-gate static page_t *
670*7c478bd9Sstevel@tonic-gate is_contigpage_free(
671*7c478bd9Sstevel@tonic-gate 	pfn_t *pfnp,
672*7c478bd9Sstevel@tonic-gate 	pgcnt_t *pgcnt,
673*7c478bd9Sstevel@tonic-gate 	pgcnt_t minctg,
674*7c478bd9Sstevel@tonic-gate 	uint64_t pfnseg,
675*7c478bd9Sstevel@tonic-gate 	int iolock)
676*7c478bd9Sstevel@tonic-gate {
677*7c478bd9Sstevel@tonic-gate 	int	i = 0;
678*7c478bd9Sstevel@tonic-gate 	pfn_t	pfn = *pfnp;
679*7c478bd9Sstevel@tonic-gate 	page_t	*pp;
680*7c478bd9Sstevel@tonic-gate 	page_t	*plist = NULL;
681*7c478bd9Sstevel@tonic-gate 
682*7c478bd9Sstevel@tonic-gate 	/*
683*7c478bd9Sstevel@tonic-gate 	 * fail if pfn + minctg crosses a segment boundary.
684*7c478bd9Sstevel@tonic-gate 	 * Adjust for next starting pfn to begin at segment boundary.
685*7c478bd9Sstevel@tonic-gate 	 */
686*7c478bd9Sstevel@tonic-gate 
687*7c478bd9Sstevel@tonic-gate 	if (((*pfnp + minctg - 1) & pfnseg) < (*pfnp & pfnseg)) {
688*7c478bd9Sstevel@tonic-gate 		*pfnp = roundup(*pfnp, pfnseg + 1);
689*7c478bd9Sstevel@tonic-gate 		return (NULL);
690*7c478bd9Sstevel@tonic-gate 	}
691*7c478bd9Sstevel@tonic-gate 
692*7c478bd9Sstevel@tonic-gate 	do {
693*7c478bd9Sstevel@tonic-gate retry:
694*7c478bd9Sstevel@tonic-gate 		pp = page_numtopp_nolock(pfn + i);
695*7c478bd9Sstevel@tonic-gate 		if ((pp == NULL) ||
696*7c478bd9Sstevel@tonic-gate 		    (page_trylock(pp, SE_EXCL) == 0)) {
697*7c478bd9Sstevel@tonic-gate 			(*pfnp)++;
698*7c478bd9Sstevel@tonic-gate 			break;
699*7c478bd9Sstevel@tonic-gate 		}
700*7c478bd9Sstevel@tonic-gate 		if (page_pptonum(pp) != pfn + i) {
701*7c478bd9Sstevel@tonic-gate 			page_unlock(pp);
702*7c478bd9Sstevel@tonic-gate 			goto retry;
703*7c478bd9Sstevel@tonic-gate 		}
704*7c478bd9Sstevel@tonic-gate 
705*7c478bd9Sstevel@tonic-gate 		if (!(PP_ISFREE(pp))) {
706*7c478bd9Sstevel@tonic-gate 			page_unlock(pp);
707*7c478bd9Sstevel@tonic-gate 			(*pfnp)++;
708*7c478bd9Sstevel@tonic-gate 			break;
709*7c478bd9Sstevel@tonic-gate 		}
710*7c478bd9Sstevel@tonic-gate 
711*7c478bd9Sstevel@tonic-gate 		if (!PP_ISAGED(pp)) {
712*7c478bd9Sstevel@tonic-gate 			page_list_sub(pp, PG_CACHE_LIST);
713*7c478bd9Sstevel@tonic-gate 			page_hashout(pp, (kmutex_t *)NULL);
714*7c478bd9Sstevel@tonic-gate 		} else {
715*7c478bd9Sstevel@tonic-gate 			page_list_sub(pp, PG_FREE_LIST);
716*7c478bd9Sstevel@tonic-gate 		}
717*7c478bd9Sstevel@tonic-gate 
718*7c478bd9Sstevel@tonic-gate 		if (iolock)
719*7c478bd9Sstevel@tonic-gate 			page_io_lock(pp);
720*7c478bd9Sstevel@tonic-gate 		page_list_concat(&plist, &pp);
721*7c478bd9Sstevel@tonic-gate 
722*7c478bd9Sstevel@tonic-gate 		/*
723*7c478bd9Sstevel@tonic-gate 		 * exit loop when pgcnt satisfied or segment boundary reached.
724*7c478bd9Sstevel@tonic-gate 		 */
725*7c478bd9Sstevel@tonic-gate 
726*7c478bd9Sstevel@tonic-gate 	} while ((++i < *pgcnt) && ((pfn + i) & pfnseg));
727*7c478bd9Sstevel@tonic-gate 
728*7c478bd9Sstevel@tonic-gate 	*pfnp += i;		/* set to next pfn to search */
729*7c478bd9Sstevel@tonic-gate 
730*7c478bd9Sstevel@tonic-gate 	if (i >= minctg) {
731*7c478bd9Sstevel@tonic-gate 		*pgcnt -= i;
732*7c478bd9Sstevel@tonic-gate 		return (plist);
733*7c478bd9Sstevel@tonic-gate 	}
734*7c478bd9Sstevel@tonic-gate 
735*7c478bd9Sstevel@tonic-gate 	/*
736*7c478bd9Sstevel@tonic-gate 	 * failure: minctg not satisfied.
737*7c478bd9Sstevel@tonic-gate 	 *
738*7c478bd9Sstevel@tonic-gate 	 * if next request crosses segment boundary, set next pfn
739*7c478bd9Sstevel@tonic-gate 	 * to search from the segment boundary.
740*7c478bd9Sstevel@tonic-gate 	 */
741*7c478bd9Sstevel@tonic-gate 	if (((*pfnp + minctg - 1) & pfnseg) < (*pfnp & pfnseg))
742*7c478bd9Sstevel@tonic-gate 		*pfnp = roundup(*pfnp, pfnseg + 1);
743*7c478bd9Sstevel@tonic-gate 
744*7c478bd9Sstevel@tonic-gate 	/* clean up any pages already allocated */
745*7c478bd9Sstevel@tonic-gate 
746*7c478bd9Sstevel@tonic-gate 	while (plist) {
747*7c478bd9Sstevel@tonic-gate 		pp = plist;
748*7c478bd9Sstevel@tonic-gate 		page_sub(&plist, pp);
749*7c478bd9Sstevel@tonic-gate 		page_list_add(pp, PG_FREE_LIST | PG_LIST_TAIL);
750*7c478bd9Sstevel@tonic-gate 		if (iolock)
751*7c478bd9Sstevel@tonic-gate 			page_io_unlock(pp);
752*7c478bd9Sstevel@tonic-gate 		page_unlock(pp);
753*7c478bd9Sstevel@tonic-gate 	}
754*7c478bd9Sstevel@tonic-gate 
755*7c478bd9Sstevel@tonic-gate 	return (NULL);
756*7c478bd9Sstevel@tonic-gate }
757*7c478bd9Sstevel@tonic-gate 
758*7c478bd9Sstevel@tonic-gate /*
759*7c478bd9Sstevel@tonic-gate  * verify that pages being returned from allocator have correct DMA attribute
760*7c478bd9Sstevel@tonic-gate  */
761*7c478bd9Sstevel@tonic-gate #ifndef DEBUG
762*7c478bd9Sstevel@tonic-gate #define	check_dma(a, b, c) (0)
763*7c478bd9Sstevel@tonic-gate #else
764*7c478bd9Sstevel@tonic-gate static void
765*7c478bd9Sstevel@tonic-gate check_dma(ddi_dma_attr_t *dma_attr, page_t *pp, int cnt)
766*7c478bd9Sstevel@tonic-gate {
767*7c478bd9Sstevel@tonic-gate 	if (dma_attr == NULL)
768*7c478bd9Sstevel@tonic-gate 		return;
769*7c478bd9Sstevel@tonic-gate 
770*7c478bd9Sstevel@tonic-gate 	while (cnt-- > 0) {
771*7c478bd9Sstevel@tonic-gate 		if (mmu_ptob((uint64_t)pp->p_pagenum) <
772*7c478bd9Sstevel@tonic-gate 		    dma_attr->dma_attr_addr_lo)
773*7c478bd9Sstevel@tonic-gate 			panic("PFN (pp=%p) below dma_attr_addr_lo", pp);
774*7c478bd9Sstevel@tonic-gate 		if (mmu_ptob((uint64_t)pp->p_pagenum) >=
775*7c478bd9Sstevel@tonic-gate 		    dma_attr->dma_attr_addr_hi)
776*7c478bd9Sstevel@tonic-gate 			panic("PFN (pp=%p) above dma_attr_addr_hi", pp);
777*7c478bd9Sstevel@tonic-gate 		pp = pp->p_next;
778*7c478bd9Sstevel@tonic-gate 	}
779*7c478bd9Sstevel@tonic-gate }
780*7c478bd9Sstevel@tonic-gate #endif
781*7c478bd9Sstevel@tonic-gate 
782*7c478bd9Sstevel@tonic-gate static kmutex_t	contig_lock;
783*7c478bd9Sstevel@tonic-gate 
784*7c478bd9Sstevel@tonic-gate #define	CONTIG_LOCK()	mutex_enter(&contig_lock);
785*7c478bd9Sstevel@tonic-gate #define	CONTIG_UNLOCK()	mutex_exit(&contig_lock);
786*7c478bd9Sstevel@tonic-gate 
787*7c478bd9Sstevel@tonic-gate #define	PFN_16M		(mmu_btop((uint64_t)0x1000000))
788*7c478bd9Sstevel@tonic-gate 
789*7c478bd9Sstevel@tonic-gate static page_t *
790*7c478bd9Sstevel@tonic-gate page_get_contigpage(pgcnt_t *pgcnt, ddi_dma_attr_t *mattr, int iolock)
791*7c478bd9Sstevel@tonic-gate {
792*7c478bd9Sstevel@tonic-gate 	pfn_t		pfn;
793*7c478bd9Sstevel@tonic-gate 	int		sgllen;
794*7c478bd9Sstevel@tonic-gate 	uint64_t	pfnseg;
795*7c478bd9Sstevel@tonic-gate 	pgcnt_t		minctg;
796*7c478bd9Sstevel@tonic-gate 	page_t		*pplist = NULL, *plist;
797*7c478bd9Sstevel@tonic-gate 	uint64_t	lo, hi;
798*7c478bd9Sstevel@tonic-gate 	pgcnt_t		pfnalign = 0;
799*7c478bd9Sstevel@tonic-gate 	static pfn_t	startpfn;
800*7c478bd9Sstevel@tonic-gate 	static pgcnt_t	lastctgcnt;
801*7c478bd9Sstevel@tonic-gate 	uintptr_t	align;
802*7c478bd9Sstevel@tonic-gate 
803*7c478bd9Sstevel@tonic-gate 	CONTIG_LOCK();
804*7c478bd9Sstevel@tonic-gate 
805*7c478bd9Sstevel@tonic-gate 	if (mattr) {
806*7c478bd9Sstevel@tonic-gate 		lo = mmu_btop((mattr->dma_attr_addr_lo + MMU_PAGEOFFSET));
807*7c478bd9Sstevel@tonic-gate 		hi = mmu_btop(mattr->dma_attr_addr_hi);
808*7c478bd9Sstevel@tonic-gate 		if (hi >= physmax)
809*7c478bd9Sstevel@tonic-gate 			hi = physmax - 1;
810*7c478bd9Sstevel@tonic-gate 		sgllen = mattr->dma_attr_sgllen;
811*7c478bd9Sstevel@tonic-gate 		pfnseg = mmu_btop(mattr->dma_attr_seg);
812*7c478bd9Sstevel@tonic-gate 
813*7c478bd9Sstevel@tonic-gate 		align = maxbit(mattr->dma_attr_align, mattr->dma_attr_minxfer);
814*7c478bd9Sstevel@tonic-gate 		if (align > MMU_PAGESIZE)
815*7c478bd9Sstevel@tonic-gate 			pfnalign = mmu_btop(align);
816*7c478bd9Sstevel@tonic-gate 
817*7c478bd9Sstevel@tonic-gate 		/*
818*7c478bd9Sstevel@tonic-gate 		 * in order to satisfy the request, must minimally
819*7c478bd9Sstevel@tonic-gate 		 * acquire minctg contiguous pages
820*7c478bd9Sstevel@tonic-gate 		 */
821*7c478bd9Sstevel@tonic-gate 		minctg = howmany(*pgcnt, sgllen);
822*7c478bd9Sstevel@tonic-gate 
823*7c478bd9Sstevel@tonic-gate 		ASSERT(hi >= lo);
824*7c478bd9Sstevel@tonic-gate 
825*7c478bd9Sstevel@tonic-gate 		/*
826*7c478bd9Sstevel@tonic-gate 		 * start from where last searched if the minctg >= lastctgcnt
827*7c478bd9Sstevel@tonic-gate 		 */
828*7c478bd9Sstevel@tonic-gate 		if (minctg < lastctgcnt || startpfn < lo || startpfn > hi)
829*7c478bd9Sstevel@tonic-gate 			startpfn = lo;
830*7c478bd9Sstevel@tonic-gate 	} else {
831*7c478bd9Sstevel@tonic-gate 		hi = physmax - 1;
832*7c478bd9Sstevel@tonic-gate 		lo = 0;
833*7c478bd9Sstevel@tonic-gate 		sgllen = 1;
834*7c478bd9Sstevel@tonic-gate 		pfnseg = mmu.highest_pfn;
835*7c478bd9Sstevel@tonic-gate 		minctg = *pgcnt;
836*7c478bd9Sstevel@tonic-gate 
837*7c478bd9Sstevel@tonic-gate 		if (minctg < lastctgcnt)
838*7c478bd9Sstevel@tonic-gate 			startpfn = lo;
839*7c478bd9Sstevel@tonic-gate 	}
840*7c478bd9Sstevel@tonic-gate 	lastctgcnt = minctg;
841*7c478bd9Sstevel@tonic-gate 
842*7c478bd9Sstevel@tonic-gate 	ASSERT(pfnseg + 1 >= (uint64_t)minctg);
843*7c478bd9Sstevel@tonic-gate 
844*7c478bd9Sstevel@tonic-gate 	/* conserve 16m memory - start search above 16m when possible */
845*7c478bd9Sstevel@tonic-gate 	if (hi > PFN_16M && startpfn < PFN_16M)
846*7c478bd9Sstevel@tonic-gate 		startpfn = PFN_16M;
847*7c478bd9Sstevel@tonic-gate 
848*7c478bd9Sstevel@tonic-gate 	pfn = startpfn;
849*7c478bd9Sstevel@tonic-gate 	if (pfnalign)
850*7c478bd9Sstevel@tonic-gate 		pfn = P2ROUNDUP(pfn, pfnalign);
851*7c478bd9Sstevel@tonic-gate 
852*7c478bd9Sstevel@tonic-gate 	while (pfn + minctg - 1 <= hi) {
853*7c478bd9Sstevel@tonic-gate 
854*7c478bd9Sstevel@tonic-gate 		plist = is_contigpage_free(&pfn, pgcnt, minctg, pfnseg, iolock);
855*7c478bd9Sstevel@tonic-gate 		if (plist) {
856*7c478bd9Sstevel@tonic-gate 			page_list_concat(&pplist, &plist);
857*7c478bd9Sstevel@tonic-gate 			sgllen--;
858*7c478bd9Sstevel@tonic-gate 			/*
859*7c478bd9Sstevel@tonic-gate 			 * return when contig pages no longer needed
860*7c478bd9Sstevel@tonic-gate 			 */
861*7c478bd9Sstevel@tonic-gate 			if (!*pgcnt || ((*pgcnt <= sgllen) && !pfnalign)) {
862*7c478bd9Sstevel@tonic-gate 				startpfn = pfn;
863*7c478bd9Sstevel@tonic-gate 				CONTIG_UNLOCK();
864*7c478bd9Sstevel@tonic-gate 				check_dma(mattr, pplist, *pgcnt);
865*7c478bd9Sstevel@tonic-gate 				return (pplist);
866*7c478bd9Sstevel@tonic-gate 			}
867*7c478bd9Sstevel@tonic-gate 			minctg = howmany(*pgcnt, sgllen);
868*7c478bd9Sstevel@tonic-gate 		}
869*7c478bd9Sstevel@tonic-gate 		if (pfnalign)
870*7c478bd9Sstevel@tonic-gate 			pfn = P2ROUNDUP(pfn, pfnalign);
871*7c478bd9Sstevel@tonic-gate 	}
872*7c478bd9Sstevel@tonic-gate 
873*7c478bd9Sstevel@tonic-gate 	/* cannot find contig pages in specified range */
874*7c478bd9Sstevel@tonic-gate 	if (startpfn == lo) {
875*7c478bd9Sstevel@tonic-gate 		CONTIG_UNLOCK();
876*7c478bd9Sstevel@tonic-gate 		return (NULL);
877*7c478bd9Sstevel@tonic-gate 	}
878*7c478bd9Sstevel@tonic-gate 
879*7c478bd9Sstevel@tonic-gate 	/* did not start with lo previously */
880*7c478bd9Sstevel@tonic-gate 	pfn = lo;
881*7c478bd9Sstevel@tonic-gate 	if (pfnalign)
882*7c478bd9Sstevel@tonic-gate 		pfn = P2ROUNDUP(pfn, pfnalign);
883*7c478bd9Sstevel@tonic-gate 
884*7c478bd9Sstevel@tonic-gate 	/* allow search to go above startpfn */
885*7c478bd9Sstevel@tonic-gate 	while (pfn < startpfn) {
886*7c478bd9Sstevel@tonic-gate 
887*7c478bd9Sstevel@tonic-gate 		plist = is_contigpage_free(&pfn, pgcnt, minctg, pfnseg, iolock);
888*7c478bd9Sstevel@tonic-gate 		if (plist != NULL) {
889*7c478bd9Sstevel@tonic-gate 
890*7c478bd9Sstevel@tonic-gate 			page_list_concat(&pplist, &plist);
891*7c478bd9Sstevel@tonic-gate 			sgllen--;
892*7c478bd9Sstevel@tonic-gate 
893*7c478bd9Sstevel@tonic-gate 			/*
894*7c478bd9Sstevel@tonic-gate 			 * return when contig pages no longer needed
895*7c478bd9Sstevel@tonic-gate 			 */
896*7c478bd9Sstevel@tonic-gate 			if (!*pgcnt || ((*pgcnt <= sgllen) && !pfnalign)) {
897*7c478bd9Sstevel@tonic-gate 				startpfn = pfn;
898*7c478bd9Sstevel@tonic-gate 				CONTIG_UNLOCK();
899*7c478bd9Sstevel@tonic-gate 				check_dma(mattr, pplist, *pgcnt);
900*7c478bd9Sstevel@tonic-gate 				return (pplist);
901*7c478bd9Sstevel@tonic-gate 			}
902*7c478bd9Sstevel@tonic-gate 			minctg = howmany(*pgcnt, sgllen);
903*7c478bd9Sstevel@tonic-gate 		}
904*7c478bd9Sstevel@tonic-gate 		if (pfnalign)
905*7c478bd9Sstevel@tonic-gate 			pfn = P2ROUNDUP(pfn, pfnalign);
906*7c478bd9Sstevel@tonic-gate 	}
907*7c478bd9Sstevel@tonic-gate 	CONTIG_UNLOCK();
908*7c478bd9Sstevel@tonic-gate 	return (NULL);
909*7c478bd9Sstevel@tonic-gate }
910*7c478bd9Sstevel@tonic-gate 
911*7c478bd9Sstevel@tonic-gate /*
912*7c478bd9Sstevel@tonic-gate  * combine mem_node_config and memrange memory ranges into one data
913*7c478bd9Sstevel@tonic-gate  * structure to be used for page list management.
914*7c478bd9Sstevel@tonic-gate  *
915*7c478bd9Sstevel@tonic-gate  * mnode_range_cnt() calculates the number of memory ranges for mnode and
916*7c478bd9Sstevel@tonic-gate  * memranges[]. Used to determine the size of page lists and mnoderanges.
917*7c478bd9Sstevel@tonic-gate  *
918*7c478bd9Sstevel@tonic-gate  * mnode_range_setup() initializes mnoderanges.
919*7c478bd9Sstevel@tonic-gate  */
920*7c478bd9Sstevel@tonic-gate mnoderange_t	*mnoderanges;
921*7c478bd9Sstevel@tonic-gate int		mnoderangecnt;
922*7c478bd9Sstevel@tonic-gate int		mtype4g;
923*7c478bd9Sstevel@tonic-gate 
924*7c478bd9Sstevel@tonic-gate int
925*7c478bd9Sstevel@tonic-gate mnode_range_cnt()
926*7c478bd9Sstevel@tonic-gate {
927*7c478bd9Sstevel@tonic-gate 	int	mri;
928*7c478bd9Sstevel@tonic-gate 	int	mnrcnt = 0;
929*7c478bd9Sstevel@tonic-gate 	int	mnode;
930*7c478bd9Sstevel@tonic-gate 
931*7c478bd9Sstevel@tonic-gate 	for (mnode = 0; mnode < max_mem_nodes; mnode++) {
932*7c478bd9Sstevel@tonic-gate 		if (mem_node_config[mnode].exists == 0)
933*7c478bd9Sstevel@tonic-gate 			continue;
934*7c478bd9Sstevel@tonic-gate 
935*7c478bd9Sstevel@tonic-gate 		mri = nranges - 1;
936*7c478bd9Sstevel@tonic-gate 
937*7c478bd9Sstevel@tonic-gate 		/* find the memranges index below contained in mnode range */
938*7c478bd9Sstevel@tonic-gate 
939*7c478bd9Sstevel@tonic-gate 		while (MEMRANGEHI(mri) < mem_node_config[mnode].physbase)
940*7c478bd9Sstevel@tonic-gate 			mri--;
941*7c478bd9Sstevel@tonic-gate 
942*7c478bd9Sstevel@tonic-gate 		/*
943*7c478bd9Sstevel@tonic-gate 		 * increment mnode range counter when memranges or mnode
944*7c478bd9Sstevel@tonic-gate 		 * boundary is reached.
945*7c478bd9Sstevel@tonic-gate 		 */
946*7c478bd9Sstevel@tonic-gate 		while (mri >= 0 &&
947*7c478bd9Sstevel@tonic-gate 		    mem_node_config[mnode].physmax >= MEMRANGELO(mri)) {
948*7c478bd9Sstevel@tonic-gate 			mnrcnt++;
949*7c478bd9Sstevel@tonic-gate 			if (mem_node_config[mnode].physmax > MEMRANGEHI(mri))
950*7c478bd9Sstevel@tonic-gate 				mri--;
951*7c478bd9Sstevel@tonic-gate 			else
952*7c478bd9Sstevel@tonic-gate 				break;
953*7c478bd9Sstevel@tonic-gate 		}
954*7c478bd9Sstevel@tonic-gate 	}
955*7c478bd9Sstevel@tonic-gate 	return (mnrcnt);
956*7c478bd9Sstevel@tonic-gate }
957*7c478bd9Sstevel@tonic-gate 
958*7c478bd9Sstevel@tonic-gate void
959*7c478bd9Sstevel@tonic-gate mnode_range_setup(mnoderange_t *mnoderanges)
960*7c478bd9Sstevel@tonic-gate {
961*7c478bd9Sstevel@tonic-gate 	int	mnode, mri;
962*7c478bd9Sstevel@tonic-gate 
963*7c478bd9Sstevel@tonic-gate 	for (mnode = 0; mnode < max_mem_nodes; mnode++) {
964*7c478bd9Sstevel@tonic-gate 		if (mem_node_config[mnode].exists == 0)
965*7c478bd9Sstevel@tonic-gate 			continue;
966*7c478bd9Sstevel@tonic-gate 
967*7c478bd9Sstevel@tonic-gate 		mri = nranges - 1;
968*7c478bd9Sstevel@tonic-gate 
969*7c478bd9Sstevel@tonic-gate 		while (MEMRANGEHI(mri) < mem_node_config[mnode].physbase)
970*7c478bd9Sstevel@tonic-gate 			mri--;
971*7c478bd9Sstevel@tonic-gate 
972*7c478bd9Sstevel@tonic-gate 		while (mri >= 0 && mem_node_config[mnode].physmax >=
973*7c478bd9Sstevel@tonic-gate 		    MEMRANGELO(mri)) {
974*7c478bd9Sstevel@tonic-gate 			mnoderanges->mnr_pfnlo =
975*7c478bd9Sstevel@tonic-gate 			    MAX(MEMRANGELO(mri),
976*7c478bd9Sstevel@tonic-gate 				mem_node_config[mnode].physbase);
977*7c478bd9Sstevel@tonic-gate 			mnoderanges->mnr_pfnhi =
978*7c478bd9Sstevel@tonic-gate 			    MIN(MEMRANGEHI(mri),
979*7c478bd9Sstevel@tonic-gate 				mem_node_config[mnode].physmax);
980*7c478bd9Sstevel@tonic-gate 			mnoderanges->mnr_mnode = mnode;
981*7c478bd9Sstevel@tonic-gate 			mnoderanges->mnr_memrange = mri;
982*7c478bd9Sstevel@tonic-gate 			mnoderanges++;
983*7c478bd9Sstevel@tonic-gate 			if (mem_node_config[mnode].physmax > MEMRANGEHI(mri))
984*7c478bd9Sstevel@tonic-gate 				mri--;
985*7c478bd9Sstevel@tonic-gate 			else
986*7c478bd9Sstevel@tonic-gate 				break;
987*7c478bd9Sstevel@tonic-gate 		}
988*7c478bd9Sstevel@tonic-gate 	}
989*7c478bd9Sstevel@tonic-gate }
990*7c478bd9Sstevel@tonic-gate 
991*7c478bd9Sstevel@tonic-gate /*
992*7c478bd9Sstevel@tonic-gate  * Determine if the mnode range specified in mtype contains memory belonging
993*7c478bd9Sstevel@tonic-gate  * to memory node mnode.  If flags & PGI_MT_RANGE is set then mtype contains
994*7c478bd9Sstevel@tonic-gate  * the range of indices to 0 or 4g.
995*7c478bd9Sstevel@tonic-gate  *
996*7c478bd9Sstevel@tonic-gate  * Return first mnode range type index found otherwise return -1 if none found.
997*7c478bd9Sstevel@tonic-gate  */
998*7c478bd9Sstevel@tonic-gate int
999*7c478bd9Sstevel@tonic-gate mtype_func(int mnode, int mtype, uint_t flags)
1000*7c478bd9Sstevel@tonic-gate {
1001*7c478bd9Sstevel@tonic-gate 	if (flags & PGI_MT_RANGE) {
1002*7c478bd9Sstevel@tonic-gate 		int	mtlim = 0;	/* default to PGI_MT_RANGEO */
1003*7c478bd9Sstevel@tonic-gate 
1004*7c478bd9Sstevel@tonic-gate 		if (flags & PGI_MT_NEXT)
1005*7c478bd9Sstevel@tonic-gate 			mtype--;
1006*7c478bd9Sstevel@tonic-gate 		if (flags & PGI_MT_RANGE4G)
1007*7c478bd9Sstevel@tonic-gate 			mtlim = mtype4g + 1;
1008*7c478bd9Sstevel@tonic-gate 		while (mtype >= mtlim) {
1009*7c478bd9Sstevel@tonic-gate 			if (mnoderanges[mtype].mnr_mnode == mnode)
1010*7c478bd9Sstevel@tonic-gate 				return (mtype);
1011*7c478bd9Sstevel@tonic-gate 			mtype--;
1012*7c478bd9Sstevel@tonic-gate 		}
1013*7c478bd9Sstevel@tonic-gate 	} else {
1014*7c478bd9Sstevel@tonic-gate 		if (mnoderanges[mtype].mnr_mnode == mnode)
1015*7c478bd9Sstevel@tonic-gate 			return (mtype);
1016*7c478bd9Sstevel@tonic-gate 	}
1017*7c478bd9Sstevel@tonic-gate 	return (-1);
1018*7c478bd9Sstevel@tonic-gate }
1019*7c478bd9Sstevel@tonic-gate 
1020*7c478bd9Sstevel@tonic-gate /*
1021*7c478bd9Sstevel@tonic-gate  * Initialize page coloring variables based on the l2 cache parameters.
1022*7c478bd9Sstevel@tonic-gate  * Calculate and return memory needed for page coloring data structures.
1023*7c478bd9Sstevel@tonic-gate  */
1024*7c478bd9Sstevel@tonic-gate size_t
1025*7c478bd9Sstevel@tonic-gate page_coloring_init(uint_t l2_sz, int l2_linesz, int l2_assoc)
1026*7c478bd9Sstevel@tonic-gate {
1027*7c478bd9Sstevel@tonic-gate 	size_t	colorsz = 0;
1028*7c478bd9Sstevel@tonic-gate 	int	i;
1029*7c478bd9Sstevel@tonic-gate 	int	colors;
1030*7c478bd9Sstevel@tonic-gate 
1031*7c478bd9Sstevel@tonic-gate 	/*
1032*7c478bd9Sstevel@tonic-gate 	 * Reduce the memory ranges lists if we don't have large amounts
1033*7c478bd9Sstevel@tonic-gate 	 * of memory. This avoids searching known empty free lists.
1034*7c478bd9Sstevel@tonic-gate 	 */
1035*7c478bd9Sstevel@tonic-gate 	i = memrange_num(physmax);
1036*7c478bd9Sstevel@tonic-gate 	memranges += i;
1037*7c478bd9Sstevel@tonic-gate 	nranges -= i;
1038*7c478bd9Sstevel@tonic-gate #if defined(__i386)
1039*7c478bd9Sstevel@tonic-gate 	if (i > 0)
1040*7c478bd9Sstevel@tonic-gate 		restricted_kmemalloc = 0;
1041*7c478bd9Sstevel@tonic-gate #endif
1042*7c478bd9Sstevel@tonic-gate 	/* physmax greater than 4g */
1043*7c478bd9Sstevel@tonic-gate 	if (i == 0)
1044*7c478bd9Sstevel@tonic-gate 		physmax4g = 1;
1045*7c478bd9Sstevel@tonic-gate 
1046*7c478bd9Sstevel@tonic-gate 	/*
1047*7c478bd9Sstevel@tonic-gate 	 * setup pagesize for generic page layer
1048*7c478bd9Sstevel@tonic-gate 	 */
1049*7c478bd9Sstevel@tonic-gate 	for (i = 0; i <= mmu.max_page_level; ++i) {
1050*7c478bd9Sstevel@tonic-gate 		hw_page_array[i].hp_size = LEVEL_SIZE(i);
1051*7c478bd9Sstevel@tonic-gate 		hw_page_array[i].hp_shift = LEVEL_SHIFT(i);
1052*7c478bd9Sstevel@tonic-gate 		hw_page_array[i].hp_pgcnt = LEVEL_SIZE(i) >> LEVEL_SHIFT(0);
1053*7c478bd9Sstevel@tonic-gate 	}
1054*7c478bd9Sstevel@tonic-gate 
1055*7c478bd9Sstevel@tonic-gate 	ASSERT(ISP2(l2_sz));
1056*7c478bd9Sstevel@tonic-gate 	ASSERT(ISP2(l2_linesz));
1057*7c478bd9Sstevel@tonic-gate 	ASSERT(l2_sz > MMU_PAGESIZE);
1058*7c478bd9Sstevel@tonic-gate 
1059*7c478bd9Sstevel@tonic-gate 	/* l2_assoc is 0 for fully associative l2 cache */
1060*7c478bd9Sstevel@tonic-gate 	if (l2_assoc)
1061*7c478bd9Sstevel@tonic-gate 		l2_colors = MAX(1, l2_sz / (l2_assoc * MMU_PAGESIZE));
1062*7c478bd9Sstevel@tonic-gate 	else
1063*7c478bd9Sstevel@tonic-gate 		l2_colors = 1;
1064*7c478bd9Sstevel@tonic-gate 
1065*7c478bd9Sstevel@tonic-gate 	/* for scalability, configure at least PAGE_COLORS_MIN color bins */
1066*7c478bd9Sstevel@tonic-gate 	page_colors = MAX(l2_colors, PAGE_COLORS_MIN);
1067*7c478bd9Sstevel@tonic-gate 
1068*7c478bd9Sstevel@tonic-gate 	/*
1069*7c478bd9Sstevel@tonic-gate 	 * cpu_page_colors is non-zero when a page color may be spread across
1070*7c478bd9Sstevel@tonic-gate 	 * multiple bins.
1071*7c478bd9Sstevel@tonic-gate 	 */
1072*7c478bd9Sstevel@tonic-gate 	if (l2_colors < page_colors)
1073*7c478bd9Sstevel@tonic-gate 		cpu_page_colors = l2_colors;
1074*7c478bd9Sstevel@tonic-gate 
1075*7c478bd9Sstevel@tonic-gate 	ASSERT(ISP2(page_colors));
1076*7c478bd9Sstevel@tonic-gate 
1077*7c478bd9Sstevel@tonic-gate 	page_colors_mask = page_colors - 1;
1078*7c478bd9Sstevel@tonic-gate 
1079*7c478bd9Sstevel@tonic-gate 	ASSERT(ISP2(CPUSETSIZE()));
1080*7c478bd9Sstevel@tonic-gate 	page_coloring_shift = lowbit(CPUSETSIZE());
1081*7c478bd9Sstevel@tonic-gate 
1082*7c478bd9Sstevel@tonic-gate 	/* size for mnoderanges */
1083*7c478bd9Sstevel@tonic-gate 	mnoderangecnt = mnode_range_cnt();
1084*7c478bd9Sstevel@tonic-gate 	colorsz = mnoderangecnt * sizeof (mnoderange_t);
1085*7c478bd9Sstevel@tonic-gate 
1086*7c478bd9Sstevel@tonic-gate 	/* size for fpc_mutex and cpc_mutex */
1087*7c478bd9Sstevel@tonic-gate 	colorsz += (2 * max_mem_nodes * sizeof (kmutex_t) * NPC_MUTEX);
1088*7c478bd9Sstevel@tonic-gate 
1089*7c478bd9Sstevel@tonic-gate 	/* size of page_freelists */
1090*7c478bd9Sstevel@tonic-gate 	colorsz += mnoderangecnt * sizeof (page_t ***);
1091*7c478bd9Sstevel@tonic-gate 	colorsz += mnoderangecnt * mmu_page_sizes * sizeof (page_t **);
1092*7c478bd9Sstevel@tonic-gate 
1093*7c478bd9Sstevel@tonic-gate 	for (i = 0; i < mmu_page_sizes; i++) {
1094*7c478bd9Sstevel@tonic-gate 		colors = page_get_pagecolors(i);
1095*7c478bd9Sstevel@tonic-gate 		colorsz += mnoderangecnt * colors * sizeof (page_t *);
1096*7c478bd9Sstevel@tonic-gate 	}
1097*7c478bd9Sstevel@tonic-gate 
1098*7c478bd9Sstevel@tonic-gate 	/* size of page_cachelists */
1099*7c478bd9Sstevel@tonic-gate 	colorsz += mnoderangecnt * sizeof (page_t **);
1100*7c478bd9Sstevel@tonic-gate 	colorsz += mnoderangecnt * page_colors * sizeof (page_t *);
1101*7c478bd9Sstevel@tonic-gate 
1102*7c478bd9Sstevel@tonic-gate 	return (colorsz);
1103*7c478bd9Sstevel@tonic-gate }
1104*7c478bd9Sstevel@tonic-gate 
1105*7c478bd9Sstevel@tonic-gate /*
1106*7c478bd9Sstevel@tonic-gate  * Called once at startup to configure page_coloring data structures and
1107*7c478bd9Sstevel@tonic-gate  * does the 1st page_free()/page_freelist_add().
1108*7c478bd9Sstevel@tonic-gate  */
1109*7c478bd9Sstevel@tonic-gate void
1110*7c478bd9Sstevel@tonic-gate page_coloring_setup(caddr_t pcmemaddr)
1111*7c478bd9Sstevel@tonic-gate {
1112*7c478bd9Sstevel@tonic-gate 	int	i;
1113*7c478bd9Sstevel@tonic-gate 	int	j;
1114*7c478bd9Sstevel@tonic-gate 	int	k;
1115*7c478bd9Sstevel@tonic-gate 	caddr_t	addr;
1116*7c478bd9Sstevel@tonic-gate 	int	colors;
1117*7c478bd9Sstevel@tonic-gate 
1118*7c478bd9Sstevel@tonic-gate 	/*
1119*7c478bd9Sstevel@tonic-gate 	 * do page coloring setup
1120*7c478bd9Sstevel@tonic-gate 	 */
1121*7c478bd9Sstevel@tonic-gate 	addr = pcmemaddr;
1122*7c478bd9Sstevel@tonic-gate 
1123*7c478bd9Sstevel@tonic-gate 	mnoderanges = (mnoderange_t *)addr;
1124*7c478bd9Sstevel@tonic-gate 	addr += (mnoderangecnt * sizeof (mnoderange_t));
1125*7c478bd9Sstevel@tonic-gate 
1126*7c478bd9Sstevel@tonic-gate 	mnode_range_setup(mnoderanges);
1127*7c478bd9Sstevel@tonic-gate 
1128*7c478bd9Sstevel@tonic-gate 	if (physmax4g)
1129*7c478bd9Sstevel@tonic-gate 		mtype4g = pfn_2_mtype(0xfffff);
1130*7c478bd9Sstevel@tonic-gate 
1131*7c478bd9Sstevel@tonic-gate 	for (k = 0; k < NPC_MUTEX; k++) {
1132*7c478bd9Sstevel@tonic-gate 		fpc_mutex[k] = (kmutex_t *)addr;
1133*7c478bd9Sstevel@tonic-gate 		addr += (max_mem_nodes * sizeof (kmutex_t));
1134*7c478bd9Sstevel@tonic-gate 	}
1135*7c478bd9Sstevel@tonic-gate 	for (k = 0; k < NPC_MUTEX; k++) {
1136*7c478bd9Sstevel@tonic-gate 		cpc_mutex[k] = (kmutex_t *)addr;
1137*7c478bd9Sstevel@tonic-gate 		addr += (max_mem_nodes * sizeof (kmutex_t));
1138*7c478bd9Sstevel@tonic-gate 	}
1139*7c478bd9Sstevel@tonic-gate 	page_freelists = (page_t ****)addr;
1140*7c478bd9Sstevel@tonic-gate 	addr += (mnoderangecnt * sizeof (page_t ***));
1141*7c478bd9Sstevel@tonic-gate 
1142*7c478bd9Sstevel@tonic-gate 	page_cachelists = (page_t ***)addr;
1143*7c478bd9Sstevel@tonic-gate 	addr += (mnoderangecnt * sizeof (page_t **));
1144*7c478bd9Sstevel@tonic-gate 
1145*7c478bd9Sstevel@tonic-gate 	for (i = 0; i < mnoderangecnt; i++) {
1146*7c478bd9Sstevel@tonic-gate 		page_freelists[i] = (page_t ***)addr;
1147*7c478bd9Sstevel@tonic-gate 		addr += (mmu_page_sizes * sizeof (page_t **));
1148*7c478bd9Sstevel@tonic-gate 
1149*7c478bd9Sstevel@tonic-gate 		for (j = 0; j < mmu_page_sizes; j++) {
1150*7c478bd9Sstevel@tonic-gate 			colors = page_get_pagecolors(j);
1151*7c478bd9Sstevel@tonic-gate 			page_freelists[i][j] = (page_t **)addr;
1152*7c478bd9Sstevel@tonic-gate 			addr += (colors * sizeof (page_t *));
1153*7c478bd9Sstevel@tonic-gate 		}
1154*7c478bd9Sstevel@tonic-gate 		page_cachelists[i] = (page_t **)addr;
1155*7c478bd9Sstevel@tonic-gate 		addr += (page_colors * sizeof (page_t *));
1156*7c478bd9Sstevel@tonic-gate 	}
1157*7c478bd9Sstevel@tonic-gate }
1158*7c478bd9Sstevel@tonic-gate 
1159*7c478bd9Sstevel@tonic-gate /*ARGSUSED*/
1160*7c478bd9Sstevel@tonic-gate int
1161*7c478bd9Sstevel@tonic-gate bp_color(struct buf *bp)
1162*7c478bd9Sstevel@tonic-gate {
1163*7c478bd9Sstevel@tonic-gate 	return (0);
1164*7c478bd9Sstevel@tonic-gate }
1165*7c478bd9Sstevel@tonic-gate 
1166*7c478bd9Sstevel@tonic-gate /*
1167*7c478bd9Sstevel@tonic-gate  * get a page from any list with the given mnode
1168*7c478bd9Sstevel@tonic-gate  */
1169*7c478bd9Sstevel@tonic-gate page_t *
1170*7c478bd9Sstevel@tonic-gate page_get_mnode_anylist(ulong_t origbin, uchar_t szc, uint_t flags,
1171*7c478bd9Sstevel@tonic-gate     int mnode, int mtype, ddi_dma_attr_t *dma_attr)
1172*7c478bd9Sstevel@tonic-gate {
1173*7c478bd9Sstevel@tonic-gate 	kmutex_t	*pcm;
1174*7c478bd9Sstevel@tonic-gate 	int		i;
1175*7c478bd9Sstevel@tonic-gate 	page_t		*pp;
1176*7c478bd9Sstevel@tonic-gate 	page_t		*first_pp;
1177*7c478bd9Sstevel@tonic-gate 	uint64_t	pgaddr;
1178*7c478bd9Sstevel@tonic-gate 	ulong_t		bin;
1179*7c478bd9Sstevel@tonic-gate 	int		mtypestart;
1180*7c478bd9Sstevel@tonic-gate 
1181*7c478bd9Sstevel@tonic-gate 	VM_STAT_ADD(pga_vmstats.pgma_alloc);
1182*7c478bd9Sstevel@tonic-gate 
1183*7c478bd9Sstevel@tonic-gate 	ASSERT((flags & PG_MATCH_COLOR) == 0);
1184*7c478bd9Sstevel@tonic-gate 	ASSERT(szc == 0);
1185*7c478bd9Sstevel@tonic-gate 	ASSERT(dma_attr != NULL);
1186*7c478bd9Sstevel@tonic-gate 
1187*7c478bd9Sstevel@tonic-gate 
1188*7c478bd9Sstevel@tonic-gate 	MTYPE_START(mnode, mtype, flags);
1189*7c478bd9Sstevel@tonic-gate 	if (mtype < 0) {
1190*7c478bd9Sstevel@tonic-gate 		VM_STAT_ADD(pga_vmstats.pgma_allocempty);
1191*7c478bd9Sstevel@tonic-gate 		return (NULL);
1192*7c478bd9Sstevel@tonic-gate 	}
1193*7c478bd9Sstevel@tonic-gate 
1194*7c478bd9Sstevel@tonic-gate 	mtypestart = mtype;
1195*7c478bd9Sstevel@tonic-gate 
1196*7c478bd9Sstevel@tonic-gate 	bin = origbin;
1197*7c478bd9Sstevel@tonic-gate 
1198*7c478bd9Sstevel@tonic-gate 	/*
1199*7c478bd9Sstevel@tonic-gate 	 * check up to page_colors + 1 bins - origbin may be checked twice
1200*7c478bd9Sstevel@tonic-gate 	 * because of BIN_STEP skip
1201*7c478bd9Sstevel@tonic-gate 	 */
1202*7c478bd9Sstevel@tonic-gate 	do {
1203*7c478bd9Sstevel@tonic-gate 		i = 0;
1204*7c478bd9Sstevel@tonic-gate 		while (i <= page_colors) {
1205*7c478bd9Sstevel@tonic-gate 			if (PAGE_FREELISTS(mnode, szc, bin, mtype) == NULL)
1206*7c478bd9Sstevel@tonic-gate 				goto nextfreebin;
1207*7c478bd9Sstevel@tonic-gate 
1208*7c478bd9Sstevel@tonic-gate 			pcm = PC_BIN_MUTEX(mnode, bin, PG_FREE_LIST);
1209*7c478bd9Sstevel@tonic-gate 			mutex_enter(pcm);
1210*7c478bd9Sstevel@tonic-gate 			pp = PAGE_FREELISTS(mnode, szc, bin, mtype);
1211*7c478bd9Sstevel@tonic-gate 			first_pp = pp;
1212*7c478bd9Sstevel@tonic-gate 			while (pp != NULL) {
1213*7c478bd9Sstevel@tonic-gate 				if (page_trylock(pp, SE_EXCL) == 0) {
1214*7c478bd9Sstevel@tonic-gate 					pp = pp->p_next;
1215*7c478bd9Sstevel@tonic-gate 					if (pp == first_pp) {
1216*7c478bd9Sstevel@tonic-gate 						pp = NULL;
1217*7c478bd9Sstevel@tonic-gate 					}
1218*7c478bd9Sstevel@tonic-gate 					continue;
1219*7c478bd9Sstevel@tonic-gate 				}
1220*7c478bd9Sstevel@tonic-gate 
1221*7c478bd9Sstevel@tonic-gate 				ASSERT(PP_ISFREE(pp));
1222*7c478bd9Sstevel@tonic-gate 				ASSERT(PP_ISAGED(pp));
1223*7c478bd9Sstevel@tonic-gate 				ASSERT(pp->p_vnode == NULL);
1224*7c478bd9Sstevel@tonic-gate 				ASSERT(pp->p_hash == NULL);
1225*7c478bd9Sstevel@tonic-gate 				ASSERT(pp->p_offset == (u_offset_t)-1);
1226*7c478bd9Sstevel@tonic-gate 				ASSERT(pp->p_szc == szc);
1227*7c478bd9Sstevel@tonic-gate 				ASSERT(PFN_2_MEM_NODE(pp->p_pagenum) == mnode);
1228*7c478bd9Sstevel@tonic-gate 				/* check if page within DMA attributes */
1229*7c478bd9Sstevel@tonic-gate 				pgaddr = mmu_ptob((uint64_t)(pp->p_pagenum));
1230*7c478bd9Sstevel@tonic-gate 
1231*7c478bd9Sstevel@tonic-gate 				if ((pgaddr >= dma_attr->dma_attr_addr_lo) &&
1232*7c478bd9Sstevel@tonic-gate 				    (pgaddr + MMU_PAGESIZE - 1 <=
1233*7c478bd9Sstevel@tonic-gate 				    dma_attr->dma_attr_addr_hi)) {
1234*7c478bd9Sstevel@tonic-gate 					break;
1235*7c478bd9Sstevel@tonic-gate 				}
1236*7c478bd9Sstevel@tonic-gate 
1237*7c478bd9Sstevel@tonic-gate 				/* continue looking */
1238*7c478bd9Sstevel@tonic-gate 				page_unlock(pp);
1239*7c478bd9Sstevel@tonic-gate 				pp = pp->p_next;
1240*7c478bd9Sstevel@tonic-gate 				if (pp == first_pp)
1241*7c478bd9Sstevel@tonic-gate 					pp = NULL;
1242*7c478bd9Sstevel@tonic-gate 
1243*7c478bd9Sstevel@tonic-gate 			}
1244*7c478bd9Sstevel@tonic-gate 			if (pp != NULL) {
1245*7c478bd9Sstevel@tonic-gate 				ASSERT(mtype == PP_2_MTYPE(pp));
1246*7c478bd9Sstevel@tonic-gate 				ASSERT(pp->p_szc == 0);
1247*7c478bd9Sstevel@tonic-gate 
1248*7c478bd9Sstevel@tonic-gate 				/* found a page with specified DMA attributes */
1249*7c478bd9Sstevel@tonic-gate 				page_sub(&PAGE_FREELISTS(mnode, szc, bin,
1250*7c478bd9Sstevel@tonic-gate 				    mtype), pp);
1251*7c478bd9Sstevel@tonic-gate 				page_ctr_sub(pp, PG_FREE_LIST);
1252*7c478bd9Sstevel@tonic-gate 
1253*7c478bd9Sstevel@tonic-gate 				if ((PP_ISFREE(pp) == 0) ||
1254*7c478bd9Sstevel@tonic-gate 				    (PP_ISAGED(pp) == 0)) {
1255*7c478bd9Sstevel@tonic-gate 					cmn_err(CE_PANIC, "page %p is not free",
1256*7c478bd9Sstevel@tonic-gate 					    (void *)pp);
1257*7c478bd9Sstevel@tonic-gate 				}
1258*7c478bd9Sstevel@tonic-gate 
1259*7c478bd9Sstevel@tonic-gate 				mutex_exit(pcm);
1260*7c478bd9Sstevel@tonic-gate 				check_dma(dma_attr, pp, 1);
1261*7c478bd9Sstevel@tonic-gate 				VM_STAT_ADD(pga_vmstats.pgma_allocok);
1262*7c478bd9Sstevel@tonic-gate 				return (pp);
1263*7c478bd9Sstevel@tonic-gate 			}
1264*7c478bd9Sstevel@tonic-gate 			mutex_exit(pcm);
1265*7c478bd9Sstevel@tonic-gate nextfreebin:
1266*7c478bd9Sstevel@tonic-gate 			pp = page_freelist_fill(szc, bin, mnode, mtype,
1267*7c478bd9Sstevel@tonic-gate 			    mmu_btop(dma_attr->dma_attr_addr_hi + 1));
1268*7c478bd9Sstevel@tonic-gate 			if (pp)
1269*7c478bd9Sstevel@tonic-gate 				return (pp);
1270*7c478bd9Sstevel@tonic-gate 
1271*7c478bd9Sstevel@tonic-gate 			/* try next bin */
1272*7c478bd9Sstevel@tonic-gate 			bin += (i == 0) ? BIN_STEP : 1;
1273*7c478bd9Sstevel@tonic-gate 			bin &= page_colors_mask;
1274*7c478bd9Sstevel@tonic-gate 			i++;
1275*7c478bd9Sstevel@tonic-gate 		}
1276*7c478bd9Sstevel@tonic-gate 	} while ((flags & PGI_MT_RANGE) &&
1277*7c478bd9Sstevel@tonic-gate 	    (MTYPE_NEXT(mnode, mtype, flags) >= 0));
1278*7c478bd9Sstevel@tonic-gate 
1279*7c478bd9Sstevel@tonic-gate 	/* failed to find a page in the freelist; try it in the cachelist */
1280*7c478bd9Sstevel@tonic-gate 
1281*7c478bd9Sstevel@tonic-gate 	/* reset mtype start for cachelist search */
1282*7c478bd9Sstevel@tonic-gate 	mtype = mtypestart;
1283*7c478bd9Sstevel@tonic-gate 	ASSERT(mtype >= 0);
1284*7c478bd9Sstevel@tonic-gate 
1285*7c478bd9Sstevel@tonic-gate 	/* start with the bin of matching color */
1286*7c478bd9Sstevel@tonic-gate 	bin = origbin;
1287*7c478bd9Sstevel@tonic-gate 
1288*7c478bd9Sstevel@tonic-gate 	do {
1289*7c478bd9Sstevel@tonic-gate 		for (i = 0; i <= page_colors; i++) {
1290*7c478bd9Sstevel@tonic-gate 			if (PAGE_CACHELISTS(mnode, bin, mtype) == NULL)
1291*7c478bd9Sstevel@tonic-gate 				goto nextcachebin;
1292*7c478bd9Sstevel@tonic-gate 			pcm = PC_BIN_MUTEX(mnode, bin, PG_CACHE_LIST);
1293*7c478bd9Sstevel@tonic-gate 			mutex_enter(pcm);
1294*7c478bd9Sstevel@tonic-gate 			pp = PAGE_CACHELISTS(mnode, bin, mtype);
1295*7c478bd9Sstevel@tonic-gate 			first_pp = pp;
1296*7c478bd9Sstevel@tonic-gate 			while (pp != NULL) {
1297*7c478bd9Sstevel@tonic-gate 				if (page_trylock(pp, SE_EXCL) == 0) {
1298*7c478bd9Sstevel@tonic-gate 					pp = pp->p_next;
1299*7c478bd9Sstevel@tonic-gate 					if (pp == first_pp)
1300*7c478bd9Sstevel@tonic-gate 						break;
1301*7c478bd9Sstevel@tonic-gate 					continue;
1302*7c478bd9Sstevel@tonic-gate 				}
1303*7c478bd9Sstevel@tonic-gate 				ASSERT(pp->p_vnode);
1304*7c478bd9Sstevel@tonic-gate 				ASSERT(PP_ISAGED(pp) == 0);
1305*7c478bd9Sstevel@tonic-gate 				ASSERT(pp->p_szc == 0);
1306*7c478bd9Sstevel@tonic-gate 				ASSERT(PFN_2_MEM_NODE(pp->p_pagenum) == mnode);
1307*7c478bd9Sstevel@tonic-gate 
1308*7c478bd9Sstevel@tonic-gate 				/* check if page within DMA attributes */
1309*7c478bd9Sstevel@tonic-gate 
1310*7c478bd9Sstevel@tonic-gate 				pgaddr = ptob((uint64_t)(pp->p_pagenum));
1311*7c478bd9Sstevel@tonic-gate 
1312*7c478bd9Sstevel@tonic-gate 				if ((pgaddr >= dma_attr->dma_attr_addr_lo) &&
1313*7c478bd9Sstevel@tonic-gate 				    (pgaddr + MMU_PAGESIZE - 1 <=
1314*7c478bd9Sstevel@tonic-gate 				    dma_attr->dma_attr_addr_hi)) {
1315*7c478bd9Sstevel@tonic-gate 					break;
1316*7c478bd9Sstevel@tonic-gate 				}
1317*7c478bd9Sstevel@tonic-gate 
1318*7c478bd9Sstevel@tonic-gate 				/* continue looking */
1319*7c478bd9Sstevel@tonic-gate 				page_unlock(pp);
1320*7c478bd9Sstevel@tonic-gate 				pp = pp->p_next;
1321*7c478bd9Sstevel@tonic-gate 				if (pp == first_pp)
1322*7c478bd9Sstevel@tonic-gate 					pp = NULL;
1323*7c478bd9Sstevel@tonic-gate 			}
1324*7c478bd9Sstevel@tonic-gate 
1325*7c478bd9Sstevel@tonic-gate 			if (pp != NULL) {
1326*7c478bd9Sstevel@tonic-gate 				ASSERT(mtype == PP_2_MTYPE(pp));
1327*7c478bd9Sstevel@tonic-gate 				ASSERT(pp->p_szc == 0);
1328*7c478bd9Sstevel@tonic-gate 
1329*7c478bd9Sstevel@tonic-gate 				/* found a page with specified DMA attributes */
1330*7c478bd9Sstevel@tonic-gate 				page_sub(&PAGE_CACHELISTS(mnode, bin,
1331*7c478bd9Sstevel@tonic-gate 				    mtype), pp);
1332*7c478bd9Sstevel@tonic-gate 				page_ctr_sub(pp, PG_CACHE_LIST);
1333*7c478bd9Sstevel@tonic-gate 
1334*7c478bd9Sstevel@tonic-gate 				mutex_exit(pcm);
1335*7c478bd9Sstevel@tonic-gate 				ASSERT(pp->p_vnode);
1336*7c478bd9Sstevel@tonic-gate 				ASSERT(PP_ISAGED(pp) == 0);
1337*7c478bd9Sstevel@tonic-gate 				check_dma(dma_attr, pp, 1);
1338*7c478bd9Sstevel@tonic-gate 				VM_STAT_ADD(pga_vmstats.pgma_allocok);
1339*7c478bd9Sstevel@tonic-gate 				return (pp);
1340*7c478bd9Sstevel@tonic-gate 			}
1341*7c478bd9Sstevel@tonic-gate 			mutex_exit(pcm);
1342*7c478bd9Sstevel@tonic-gate nextcachebin:
1343*7c478bd9Sstevel@tonic-gate 			bin += (i == 0) ? BIN_STEP : 1;
1344*7c478bd9Sstevel@tonic-gate 			bin &= page_colors_mask;
1345*7c478bd9Sstevel@tonic-gate 		}
1346*7c478bd9Sstevel@tonic-gate 	} while ((flags & PGI_MT_RANGE) &&
1347*7c478bd9Sstevel@tonic-gate 	    (MTYPE_NEXT(mnode, mtype, flags) >= 0));
1348*7c478bd9Sstevel@tonic-gate 
1349*7c478bd9Sstevel@tonic-gate 	VM_STAT_ADD(pga_vmstats.pgma_allocfailed);
1350*7c478bd9Sstevel@tonic-gate 	return (NULL);
1351*7c478bd9Sstevel@tonic-gate }
1352*7c478bd9Sstevel@tonic-gate 
1353*7c478bd9Sstevel@tonic-gate /*
1354*7c478bd9Sstevel@tonic-gate  * This function is similar to page_get_freelist()/page_get_cachelist()
1355*7c478bd9Sstevel@tonic-gate  * but it searches both the lists to find a page with the specified
1356*7c478bd9Sstevel@tonic-gate  * color (or no color) and DMA attributes. The search is done in the
1357*7c478bd9Sstevel@tonic-gate  * freelist first and then in the cache list within the highest memory
1358*7c478bd9Sstevel@tonic-gate  * range (based on DMA attributes) before searching in the lower
1359*7c478bd9Sstevel@tonic-gate  * memory ranges.
1360*7c478bd9Sstevel@tonic-gate  *
1361*7c478bd9Sstevel@tonic-gate  * Note: This function is called only by page_create_io().
1362*7c478bd9Sstevel@tonic-gate  */
1363*7c478bd9Sstevel@tonic-gate /*ARGSUSED*/
1364*7c478bd9Sstevel@tonic-gate page_t *
1365*7c478bd9Sstevel@tonic-gate page_get_anylist(struct vnode *vp, u_offset_t off, struct as *as, caddr_t vaddr,
1366*7c478bd9Sstevel@tonic-gate     size_t size, uint_t flags, ddi_dma_attr_t *dma_attr, lgrp_t	*lgrp)
1367*7c478bd9Sstevel@tonic-gate {
1368*7c478bd9Sstevel@tonic-gate 	uint_t		bin;
1369*7c478bd9Sstevel@tonic-gate 	int		mtype;
1370*7c478bd9Sstevel@tonic-gate 	page_t		*pp;
1371*7c478bd9Sstevel@tonic-gate 	int		n;
1372*7c478bd9Sstevel@tonic-gate 	int		m;
1373*7c478bd9Sstevel@tonic-gate 	int		szc;
1374*7c478bd9Sstevel@tonic-gate 	int		fullrange;
1375*7c478bd9Sstevel@tonic-gate 	int		mnode;
1376*7c478bd9Sstevel@tonic-gate 	int		local_failed_stat = 0;
1377*7c478bd9Sstevel@tonic-gate 	lgrp_mnode_cookie_t	lgrp_cookie;
1378*7c478bd9Sstevel@tonic-gate 
1379*7c478bd9Sstevel@tonic-gate 	VM_STAT_ADD(pga_vmstats.pga_alloc);
1380*7c478bd9Sstevel@tonic-gate 
1381*7c478bd9Sstevel@tonic-gate 	/* only base pagesize currently supported */
1382*7c478bd9Sstevel@tonic-gate 	if (size != MMU_PAGESIZE)
1383*7c478bd9Sstevel@tonic-gate 		return (NULL);
1384*7c478bd9Sstevel@tonic-gate 
1385*7c478bd9Sstevel@tonic-gate 	/*
1386*7c478bd9Sstevel@tonic-gate 	 * If we're passed a specific lgroup, we use it.  Otherwise,
1387*7c478bd9Sstevel@tonic-gate 	 * assume first-touch placement is desired.
1388*7c478bd9Sstevel@tonic-gate 	 */
1389*7c478bd9Sstevel@tonic-gate 	if (!LGRP_EXISTS(lgrp))
1390*7c478bd9Sstevel@tonic-gate 		lgrp = lgrp_home_lgrp();
1391*7c478bd9Sstevel@tonic-gate 
1392*7c478bd9Sstevel@tonic-gate 	/* LINTED */
1393*7c478bd9Sstevel@tonic-gate 	AS_2_BIN(as, seg, vp, vaddr, bin);
1394*7c478bd9Sstevel@tonic-gate 
1395*7c478bd9Sstevel@tonic-gate 	/*
1396*7c478bd9Sstevel@tonic-gate 	 * Only hold one freelist or cachelist lock at a time, that way we
1397*7c478bd9Sstevel@tonic-gate 	 * can start anywhere and not have to worry about lock
1398*7c478bd9Sstevel@tonic-gate 	 * ordering.
1399*7c478bd9Sstevel@tonic-gate 	 */
1400*7c478bd9Sstevel@tonic-gate 	if (dma_attr == NULL) {
1401*7c478bd9Sstevel@tonic-gate 		n = 0;
1402*7c478bd9Sstevel@tonic-gate 		m = mnoderangecnt - 1;
1403*7c478bd9Sstevel@tonic-gate 		fullrange = 1;
1404*7c478bd9Sstevel@tonic-gate 		VM_STAT_ADD(pga_vmstats.pga_nulldmaattr);
1405*7c478bd9Sstevel@tonic-gate 	} else {
1406*7c478bd9Sstevel@tonic-gate 		pfn_t pfnlo = mmu_btop(dma_attr->dma_attr_addr_lo);
1407*7c478bd9Sstevel@tonic-gate 		pfn_t pfnhi = mmu_btop(dma_attr->dma_attr_addr_hi);
1408*7c478bd9Sstevel@tonic-gate 
1409*7c478bd9Sstevel@tonic-gate 		/*
1410*7c478bd9Sstevel@tonic-gate 		 * We can guarantee alignment only for page boundary.
1411*7c478bd9Sstevel@tonic-gate 		 */
1412*7c478bd9Sstevel@tonic-gate 		if (dma_attr->dma_attr_align > MMU_PAGESIZE)
1413*7c478bd9Sstevel@tonic-gate 			return (NULL);
1414*7c478bd9Sstevel@tonic-gate 
1415*7c478bd9Sstevel@tonic-gate 		n = pfn_2_mtype(pfnlo);
1416*7c478bd9Sstevel@tonic-gate 		m = pfn_2_mtype(pfnhi);
1417*7c478bd9Sstevel@tonic-gate 
1418*7c478bd9Sstevel@tonic-gate 		fullrange = ((pfnlo == mnoderanges[n].mnr_pfnlo) &&
1419*7c478bd9Sstevel@tonic-gate 		    (pfnhi >= mnoderanges[m].mnr_pfnhi));
1420*7c478bd9Sstevel@tonic-gate 	}
1421*7c478bd9Sstevel@tonic-gate 	VM_STAT_COND_ADD(fullrange == 0, pga_vmstats.pga_notfullrange);
1422*7c478bd9Sstevel@tonic-gate 
1423*7c478bd9Sstevel@tonic-gate 	if (n > m)
1424*7c478bd9Sstevel@tonic-gate 		return (NULL);
1425*7c478bd9Sstevel@tonic-gate 
1426*7c478bd9Sstevel@tonic-gate 	szc = 0;
1427*7c478bd9Sstevel@tonic-gate 
1428*7c478bd9Sstevel@tonic-gate 	/* cylcing thru mtype handled by RANGE0 if n == 0 */
1429*7c478bd9Sstevel@tonic-gate 	if (n == 0) {
1430*7c478bd9Sstevel@tonic-gate 		flags |= PGI_MT_RANGE0;
1431*7c478bd9Sstevel@tonic-gate 		n = m;
1432*7c478bd9Sstevel@tonic-gate 	}
1433*7c478bd9Sstevel@tonic-gate 
1434*7c478bd9Sstevel@tonic-gate 	/*
1435*7c478bd9Sstevel@tonic-gate 	 * Try local memory node first, but try remote if we can't
1436*7c478bd9Sstevel@tonic-gate 	 * get a page of the right color.
1437*7c478bd9Sstevel@tonic-gate 	 */
1438*7c478bd9Sstevel@tonic-gate 	LGRP_MNODE_COOKIE_INIT(lgrp_cookie, lgrp, LGRP_SRCH_HIER);
1439*7c478bd9Sstevel@tonic-gate 	while ((mnode = lgrp_memnode_choose(&lgrp_cookie)) >= 0) {
1440*7c478bd9Sstevel@tonic-gate 		/*
1441*7c478bd9Sstevel@tonic-gate 		 * allocate pages from high pfn to low.
1442*7c478bd9Sstevel@tonic-gate 		 */
1443*7c478bd9Sstevel@tonic-gate 		for (mtype = m; mtype >= n; mtype--) {
1444*7c478bd9Sstevel@tonic-gate 			if (fullrange != 0) {
1445*7c478bd9Sstevel@tonic-gate 				pp = page_get_mnode_freelist(mnode,
1446*7c478bd9Sstevel@tonic-gate 				    bin, mtype, szc, flags);
1447*7c478bd9Sstevel@tonic-gate 				if (pp == NULL) {
1448*7c478bd9Sstevel@tonic-gate 					pp = page_get_mnode_cachelist(
1449*7c478bd9Sstevel@tonic-gate 						bin, flags, mnode, mtype);
1450*7c478bd9Sstevel@tonic-gate 				}
1451*7c478bd9Sstevel@tonic-gate 			} else {
1452*7c478bd9Sstevel@tonic-gate 				pp = page_get_mnode_anylist(bin, szc,
1453*7c478bd9Sstevel@tonic-gate 				    flags, mnode, mtype, dma_attr);
1454*7c478bd9Sstevel@tonic-gate 			}
1455*7c478bd9Sstevel@tonic-gate 			if (pp != NULL) {
1456*7c478bd9Sstevel@tonic-gate 				VM_STAT_ADD(pga_vmstats.pga_allocok);
1457*7c478bd9Sstevel@tonic-gate 				check_dma(dma_attr, pp, 1);
1458*7c478bd9Sstevel@tonic-gate 				return (pp);
1459*7c478bd9Sstevel@tonic-gate 			}
1460*7c478bd9Sstevel@tonic-gate 		}
1461*7c478bd9Sstevel@tonic-gate 		if (!local_failed_stat) {
1462*7c478bd9Sstevel@tonic-gate 			lgrp_stat_add(lgrp->lgrp_id, LGRP_NUM_ALLOC_FAIL, 1);
1463*7c478bd9Sstevel@tonic-gate 			local_failed_stat = 1;
1464*7c478bd9Sstevel@tonic-gate 		}
1465*7c478bd9Sstevel@tonic-gate 	}
1466*7c478bd9Sstevel@tonic-gate 	VM_STAT_ADD(pga_vmstats.pga_allocfailed);
1467*7c478bd9Sstevel@tonic-gate 
1468*7c478bd9Sstevel@tonic-gate 	return (NULL);
1469*7c478bd9Sstevel@tonic-gate }
1470*7c478bd9Sstevel@tonic-gate 
1471*7c478bd9Sstevel@tonic-gate /*
1472*7c478bd9Sstevel@tonic-gate  * page_create_io()
1473*7c478bd9Sstevel@tonic-gate  *
1474*7c478bd9Sstevel@tonic-gate  * This function is a copy of page_create_va() with an additional
1475*7c478bd9Sstevel@tonic-gate  * argument 'mattr' that specifies DMA memory requirements to
1476*7c478bd9Sstevel@tonic-gate  * the page list functions. This function is used by the segkmem
1477*7c478bd9Sstevel@tonic-gate  * allocator so it is only to create new pages (i.e PG_EXCL is
1478*7c478bd9Sstevel@tonic-gate  * set).
1479*7c478bd9Sstevel@tonic-gate  *
1480*7c478bd9Sstevel@tonic-gate  * Note: This interface is currently used by x86 PSM only and is
1481*7c478bd9Sstevel@tonic-gate  *	 not fully specified so the commitment level is only for
1482*7c478bd9Sstevel@tonic-gate  *	 private interface specific to x86. This interface uses PSM
1483*7c478bd9Sstevel@tonic-gate  *	 specific page_get_anylist() interface.
1484*7c478bd9Sstevel@tonic-gate  */
1485*7c478bd9Sstevel@tonic-gate 
1486*7c478bd9Sstevel@tonic-gate #define	PAGE_HASH_SEARCH(index, pp, vp, off) { \
1487*7c478bd9Sstevel@tonic-gate 	for ((pp) = page_hash[(index)]; (pp); (pp) = (pp)->p_hash) { \
1488*7c478bd9Sstevel@tonic-gate 		if ((pp)->p_vnode == (vp) && (pp)->p_offset == (off)) \
1489*7c478bd9Sstevel@tonic-gate 			break; \
1490*7c478bd9Sstevel@tonic-gate 	} \
1491*7c478bd9Sstevel@tonic-gate }
1492*7c478bd9Sstevel@tonic-gate 
1493*7c478bd9Sstevel@tonic-gate 
1494*7c478bd9Sstevel@tonic-gate page_t *
1495*7c478bd9Sstevel@tonic-gate page_create_io(
1496*7c478bd9Sstevel@tonic-gate 	struct vnode	*vp,
1497*7c478bd9Sstevel@tonic-gate 	u_offset_t	off,
1498*7c478bd9Sstevel@tonic-gate 	uint_t		bytes,
1499*7c478bd9Sstevel@tonic-gate 	uint_t		flags,
1500*7c478bd9Sstevel@tonic-gate 	struct as	*as,
1501*7c478bd9Sstevel@tonic-gate 	caddr_t		vaddr,
1502*7c478bd9Sstevel@tonic-gate 	ddi_dma_attr_t	*mattr)	/* DMA memory attributes if any */
1503*7c478bd9Sstevel@tonic-gate {
1504*7c478bd9Sstevel@tonic-gate 	page_t		*plist = NULL;
1505*7c478bd9Sstevel@tonic-gate 	uint_t		plist_len = 0;
1506*7c478bd9Sstevel@tonic-gate 	pgcnt_t		npages;
1507*7c478bd9Sstevel@tonic-gate 	page_t		*npp = NULL;
1508*7c478bd9Sstevel@tonic-gate 	uint_t		pages_req;
1509*7c478bd9Sstevel@tonic-gate 	page_t		*pp;
1510*7c478bd9Sstevel@tonic-gate 	kmutex_t	*phm = NULL;
1511*7c478bd9Sstevel@tonic-gate 	uint_t		index;
1512*7c478bd9Sstevel@tonic-gate 
1513*7c478bd9Sstevel@tonic-gate 	TRACE_4(TR_FAC_VM, TR_PAGE_CREATE_START,
1514*7c478bd9Sstevel@tonic-gate 		"page_create_start:vp %p off %llx bytes %u flags %x",
1515*7c478bd9Sstevel@tonic-gate 		vp, off, bytes, flags);
1516*7c478bd9Sstevel@tonic-gate 
1517*7c478bd9Sstevel@tonic-gate 	ASSERT((flags & ~(PG_EXCL | PG_WAIT | PG_PHYSCONTIG)) == 0);
1518*7c478bd9Sstevel@tonic-gate 
1519*7c478bd9Sstevel@tonic-gate 	pages_req = npages = mmu_btopr(bytes);
1520*7c478bd9Sstevel@tonic-gate 
1521*7c478bd9Sstevel@tonic-gate 	/*
1522*7c478bd9Sstevel@tonic-gate 	 * Do the freemem and pcf accounting.
1523*7c478bd9Sstevel@tonic-gate 	 */
1524*7c478bd9Sstevel@tonic-gate 	if (!page_create_wait(npages, flags)) {
1525*7c478bd9Sstevel@tonic-gate 		return (NULL);
1526*7c478bd9Sstevel@tonic-gate 	}
1527*7c478bd9Sstevel@tonic-gate 
1528*7c478bd9Sstevel@tonic-gate 	TRACE_2(TR_FAC_VM, TR_PAGE_CREATE_SUCCESS,
1529*7c478bd9Sstevel@tonic-gate 		"page_create_success:vp %p off %llx",
1530*7c478bd9Sstevel@tonic-gate 		vp, off);
1531*7c478bd9Sstevel@tonic-gate 
1532*7c478bd9Sstevel@tonic-gate 	/*
1533*7c478bd9Sstevel@tonic-gate 	 * If satisfying this request has left us with too little
1534*7c478bd9Sstevel@tonic-gate 	 * memory, start the wheels turning to get some back.  The
1535*7c478bd9Sstevel@tonic-gate 	 * first clause of the test prevents waking up the pageout
1536*7c478bd9Sstevel@tonic-gate 	 * daemon in situations where it would decide that there's
1537*7c478bd9Sstevel@tonic-gate 	 * nothing to do.
1538*7c478bd9Sstevel@tonic-gate 	 */
1539*7c478bd9Sstevel@tonic-gate 	if (nscan < desscan && freemem < minfree) {
1540*7c478bd9Sstevel@tonic-gate 		TRACE_1(TR_FAC_VM, TR_PAGEOUT_CV_SIGNAL,
1541*7c478bd9Sstevel@tonic-gate 			"pageout_cv_signal:freemem %ld", freemem);
1542*7c478bd9Sstevel@tonic-gate 		cv_signal(&proc_pageout->p_cv);
1543*7c478bd9Sstevel@tonic-gate 	}
1544*7c478bd9Sstevel@tonic-gate 
1545*7c478bd9Sstevel@tonic-gate 	if (flags & PG_PHYSCONTIG) {
1546*7c478bd9Sstevel@tonic-gate 
1547*7c478bd9Sstevel@tonic-gate 		plist = page_get_contigpage(&npages, mattr, 1);
1548*7c478bd9Sstevel@tonic-gate 		if (plist == NULL) {
1549*7c478bd9Sstevel@tonic-gate 			page_create_putback(npages);
1550*7c478bd9Sstevel@tonic-gate 			return (NULL);
1551*7c478bd9Sstevel@tonic-gate 		}
1552*7c478bd9Sstevel@tonic-gate 
1553*7c478bd9Sstevel@tonic-gate 		pp = plist;
1554*7c478bd9Sstevel@tonic-gate 
1555*7c478bd9Sstevel@tonic-gate 		do {
1556*7c478bd9Sstevel@tonic-gate 			if (!page_hashin(pp, vp, off, NULL)) {
1557*7c478bd9Sstevel@tonic-gate 				panic("pg_creat_io: hashin failed %p %p %llx",
1558*7c478bd9Sstevel@tonic-gate 				    (void *)pp, (void *)vp, off);
1559*7c478bd9Sstevel@tonic-gate 			}
1560*7c478bd9Sstevel@tonic-gate 			VM_STAT_ADD(page_create_new);
1561*7c478bd9Sstevel@tonic-gate 			off += MMU_PAGESIZE;
1562*7c478bd9Sstevel@tonic-gate 			PP_CLRFREE(pp);
1563*7c478bd9Sstevel@tonic-gate 			PP_CLRAGED(pp);
1564*7c478bd9Sstevel@tonic-gate 			page_set_props(pp, P_REF);
1565*7c478bd9Sstevel@tonic-gate 			pp = pp->p_next;
1566*7c478bd9Sstevel@tonic-gate 		} while (pp != plist);
1567*7c478bd9Sstevel@tonic-gate 
1568*7c478bd9Sstevel@tonic-gate 		if (!npages) {
1569*7c478bd9Sstevel@tonic-gate 			check_dma(mattr, plist, pages_req);
1570*7c478bd9Sstevel@tonic-gate 			return (plist);
1571*7c478bd9Sstevel@tonic-gate 		} else {
1572*7c478bd9Sstevel@tonic-gate 			vaddr += (pages_req - npages) << MMU_PAGESHIFT;
1573*7c478bd9Sstevel@tonic-gate 		}
1574*7c478bd9Sstevel@tonic-gate 
1575*7c478bd9Sstevel@tonic-gate 		/*
1576*7c478bd9Sstevel@tonic-gate 		 * fall-thru:
1577*7c478bd9Sstevel@tonic-gate 		 *
1578*7c478bd9Sstevel@tonic-gate 		 * page_get_contigpage returns when npages <= sgllen.
1579*7c478bd9Sstevel@tonic-gate 		 * Grab the rest of the non-contig pages below from anylist.
1580*7c478bd9Sstevel@tonic-gate 		 */
1581*7c478bd9Sstevel@tonic-gate 	}
1582*7c478bd9Sstevel@tonic-gate 
1583*7c478bd9Sstevel@tonic-gate 	/*
1584*7c478bd9Sstevel@tonic-gate 	 * Loop around collecting the requested number of pages.
1585*7c478bd9Sstevel@tonic-gate 	 * Most of the time, we have to `create' a new page. With
1586*7c478bd9Sstevel@tonic-gate 	 * this in mind, pull the page off the free list before
1587*7c478bd9Sstevel@tonic-gate 	 * getting the hash lock.  This will minimize the hash
1588*7c478bd9Sstevel@tonic-gate 	 * lock hold time, nesting, and the like.  If it turns
1589*7c478bd9Sstevel@tonic-gate 	 * out we don't need the page, we put it back at the end.
1590*7c478bd9Sstevel@tonic-gate 	 */
1591*7c478bd9Sstevel@tonic-gate 	while (npages--) {
1592*7c478bd9Sstevel@tonic-gate 		phm = NULL;
1593*7c478bd9Sstevel@tonic-gate 
1594*7c478bd9Sstevel@tonic-gate 		index = PAGE_HASH_FUNC(vp, off);
1595*7c478bd9Sstevel@tonic-gate top:
1596*7c478bd9Sstevel@tonic-gate 		ASSERT(phm == NULL);
1597*7c478bd9Sstevel@tonic-gate 		ASSERT(index == PAGE_HASH_FUNC(vp, off));
1598*7c478bd9Sstevel@tonic-gate 		ASSERT(MUTEX_NOT_HELD(page_vnode_mutex(vp)));
1599*7c478bd9Sstevel@tonic-gate 
1600*7c478bd9Sstevel@tonic-gate 		if (npp == NULL) {
1601*7c478bd9Sstevel@tonic-gate 			/*
1602*7c478bd9Sstevel@tonic-gate 			 * Try to get the page of any color either from
1603*7c478bd9Sstevel@tonic-gate 			 * the freelist or from the cache list.
1604*7c478bd9Sstevel@tonic-gate 			 */
1605*7c478bd9Sstevel@tonic-gate 			npp = page_get_anylist(vp, off, as, vaddr, MMU_PAGESIZE,
1606*7c478bd9Sstevel@tonic-gate 			    flags & ~PG_MATCH_COLOR, mattr, NULL);
1607*7c478bd9Sstevel@tonic-gate 			if (npp == NULL) {
1608*7c478bd9Sstevel@tonic-gate 				if (mattr == NULL) {
1609*7c478bd9Sstevel@tonic-gate 					/*
1610*7c478bd9Sstevel@tonic-gate 					 * Not looking for a special page;
1611*7c478bd9Sstevel@tonic-gate 					 * panic!
1612*7c478bd9Sstevel@tonic-gate 					 */
1613*7c478bd9Sstevel@tonic-gate 					panic("no page found %d", (int)npages);
1614*7c478bd9Sstevel@tonic-gate 				}
1615*7c478bd9Sstevel@tonic-gate 				/*
1616*7c478bd9Sstevel@tonic-gate 				 * No page found! This can happen
1617*7c478bd9Sstevel@tonic-gate 				 * if we are looking for a page
1618*7c478bd9Sstevel@tonic-gate 				 * within a specific memory range
1619*7c478bd9Sstevel@tonic-gate 				 * for DMA purposes. If PG_WAIT is
1620*7c478bd9Sstevel@tonic-gate 				 * specified then we wait for a
1621*7c478bd9Sstevel@tonic-gate 				 * while and then try again. The
1622*7c478bd9Sstevel@tonic-gate 				 * wait could be forever if we
1623*7c478bd9Sstevel@tonic-gate 				 * don't get the page(s) we need.
1624*7c478bd9Sstevel@tonic-gate 				 *
1625*7c478bd9Sstevel@tonic-gate 				 * Note: XXX We really need a mechanism
1626*7c478bd9Sstevel@tonic-gate 				 * to wait for pages in the desired
1627*7c478bd9Sstevel@tonic-gate 				 * range. For now, we wait for any
1628*7c478bd9Sstevel@tonic-gate 				 * pages and see if we can use it.
1629*7c478bd9Sstevel@tonic-gate 				 */
1630*7c478bd9Sstevel@tonic-gate 
1631*7c478bd9Sstevel@tonic-gate 				if ((mattr != NULL) && (flags & PG_WAIT)) {
1632*7c478bd9Sstevel@tonic-gate 					delay(10);
1633*7c478bd9Sstevel@tonic-gate 					goto top;
1634*7c478bd9Sstevel@tonic-gate 				}
1635*7c478bd9Sstevel@tonic-gate 
1636*7c478bd9Sstevel@tonic-gate 				goto fail; /* undo accounting stuff */
1637*7c478bd9Sstevel@tonic-gate 			}
1638*7c478bd9Sstevel@tonic-gate 
1639*7c478bd9Sstevel@tonic-gate 			if (PP_ISAGED(npp) == 0) {
1640*7c478bd9Sstevel@tonic-gate 				/*
1641*7c478bd9Sstevel@tonic-gate 				 * Since this page came from the
1642*7c478bd9Sstevel@tonic-gate 				 * cachelist, we must destroy the
1643*7c478bd9Sstevel@tonic-gate 				 * old vnode association.
1644*7c478bd9Sstevel@tonic-gate 				 */
1645*7c478bd9Sstevel@tonic-gate 				page_hashout(npp, (kmutex_t *)NULL);
1646*7c478bd9Sstevel@tonic-gate 			}
1647*7c478bd9Sstevel@tonic-gate 		}
1648*7c478bd9Sstevel@tonic-gate 
1649*7c478bd9Sstevel@tonic-gate 		/*
1650*7c478bd9Sstevel@tonic-gate 		 * We own this page!
1651*7c478bd9Sstevel@tonic-gate 		 */
1652*7c478bd9Sstevel@tonic-gate 		ASSERT(PAGE_EXCL(npp));
1653*7c478bd9Sstevel@tonic-gate 		ASSERT(npp->p_vnode == NULL);
1654*7c478bd9Sstevel@tonic-gate 		ASSERT(!hat_page_is_mapped(npp));
1655*7c478bd9Sstevel@tonic-gate 		PP_CLRFREE(npp);
1656*7c478bd9Sstevel@tonic-gate 		PP_CLRAGED(npp);
1657*7c478bd9Sstevel@tonic-gate 
1658*7c478bd9Sstevel@tonic-gate 		/*
1659*7c478bd9Sstevel@tonic-gate 		 * Here we have a page in our hot little mits and are
1660*7c478bd9Sstevel@tonic-gate 		 * just waiting to stuff it on the appropriate lists.
1661*7c478bd9Sstevel@tonic-gate 		 * Get the mutex and check to see if it really does
1662*7c478bd9Sstevel@tonic-gate 		 * not exist.
1663*7c478bd9Sstevel@tonic-gate 		 */
1664*7c478bd9Sstevel@tonic-gate 		phm = PAGE_HASH_MUTEX(index);
1665*7c478bd9Sstevel@tonic-gate 		mutex_enter(phm);
1666*7c478bd9Sstevel@tonic-gate 		PAGE_HASH_SEARCH(index, pp, vp, off);
1667*7c478bd9Sstevel@tonic-gate 		if (pp == NULL) {
1668*7c478bd9Sstevel@tonic-gate 			VM_STAT_ADD(page_create_new);
1669*7c478bd9Sstevel@tonic-gate 			pp = npp;
1670*7c478bd9Sstevel@tonic-gate 			npp = NULL;
1671*7c478bd9Sstevel@tonic-gate 			if (!page_hashin(pp, vp, off, phm)) {
1672*7c478bd9Sstevel@tonic-gate 				/*
1673*7c478bd9Sstevel@tonic-gate 				 * Since we hold the page hash mutex and
1674*7c478bd9Sstevel@tonic-gate 				 * just searched for this page, page_hashin
1675*7c478bd9Sstevel@tonic-gate 				 * had better not fail.  If it does, that
1676*7c478bd9Sstevel@tonic-gate 				 * means somethread did not follow the
1677*7c478bd9Sstevel@tonic-gate 				 * page hash mutex rules.  Panic now and
1678*7c478bd9Sstevel@tonic-gate 				 * get it over with.  As usual, go down
1679*7c478bd9Sstevel@tonic-gate 				 * holding all the locks.
1680*7c478bd9Sstevel@tonic-gate 				 */
1681*7c478bd9Sstevel@tonic-gate 				ASSERT(MUTEX_HELD(phm));
1682*7c478bd9Sstevel@tonic-gate 				panic("page_create: hashin fail %p %p %llx %p",
1683*7c478bd9Sstevel@tonic-gate 				    (void *)pp, (void *)vp, off, (void *)phm);
1684*7c478bd9Sstevel@tonic-gate 
1685*7c478bd9Sstevel@tonic-gate 			}
1686*7c478bd9Sstevel@tonic-gate 			ASSERT(MUTEX_HELD(phm));
1687*7c478bd9Sstevel@tonic-gate 			mutex_exit(phm);
1688*7c478bd9Sstevel@tonic-gate 			phm = NULL;
1689*7c478bd9Sstevel@tonic-gate 
1690*7c478bd9Sstevel@tonic-gate 			/*
1691*7c478bd9Sstevel@tonic-gate 			 * Hat layer locking need not be done to set
1692*7c478bd9Sstevel@tonic-gate 			 * the following bits since the page is not hashed
1693*7c478bd9Sstevel@tonic-gate 			 * and was on the free list (i.e., had no mappings).
1694*7c478bd9Sstevel@tonic-gate 			 *
1695*7c478bd9Sstevel@tonic-gate 			 * Set the reference bit to protect
1696*7c478bd9Sstevel@tonic-gate 			 * against immediate pageout
1697*7c478bd9Sstevel@tonic-gate 			 *
1698*7c478bd9Sstevel@tonic-gate 			 * XXXmh modify freelist code to set reference
1699*7c478bd9Sstevel@tonic-gate 			 * bit so we don't have to do it here.
1700*7c478bd9Sstevel@tonic-gate 			 */
1701*7c478bd9Sstevel@tonic-gate 			page_set_props(pp, P_REF);
1702*7c478bd9Sstevel@tonic-gate 		} else {
1703*7c478bd9Sstevel@tonic-gate 			ASSERT(MUTEX_HELD(phm));
1704*7c478bd9Sstevel@tonic-gate 			mutex_exit(phm);
1705*7c478bd9Sstevel@tonic-gate 			phm = NULL;
1706*7c478bd9Sstevel@tonic-gate 			/*
1707*7c478bd9Sstevel@tonic-gate 			 * NOTE: This should not happen for pages associated
1708*7c478bd9Sstevel@tonic-gate 			 *	 with kernel vnode 'kvp'.
1709*7c478bd9Sstevel@tonic-gate 			 */
1710*7c478bd9Sstevel@tonic-gate 			/* XX64 - to debug why this happens! */
1711*7c478bd9Sstevel@tonic-gate 			ASSERT(vp != &kvp);
1712*7c478bd9Sstevel@tonic-gate 			if (vp == &kvp)
1713*7c478bd9Sstevel@tonic-gate 				cmn_err(CE_NOTE,
1714*7c478bd9Sstevel@tonic-gate 				    "page_create: page not expected "
1715*7c478bd9Sstevel@tonic-gate 				    "in hash list for kernel vnode - pp 0x%p",
1716*7c478bd9Sstevel@tonic-gate 				    (void *)pp);
1717*7c478bd9Sstevel@tonic-gate 			VM_STAT_ADD(page_create_exists);
1718*7c478bd9Sstevel@tonic-gate 			goto fail;
1719*7c478bd9Sstevel@tonic-gate 		}
1720*7c478bd9Sstevel@tonic-gate 
1721*7c478bd9Sstevel@tonic-gate 		/*
1722*7c478bd9Sstevel@tonic-gate 		 * Got a page!  It is locked.  Acquire the i/o
1723*7c478bd9Sstevel@tonic-gate 		 * lock since we are going to use the p_next and
1724*7c478bd9Sstevel@tonic-gate 		 * p_prev fields to link the requested pages together.
1725*7c478bd9Sstevel@tonic-gate 		 */
1726*7c478bd9Sstevel@tonic-gate 		page_io_lock(pp);
1727*7c478bd9Sstevel@tonic-gate 		page_add(&plist, pp);
1728*7c478bd9Sstevel@tonic-gate 		plist = plist->p_next;
1729*7c478bd9Sstevel@tonic-gate 		off += MMU_PAGESIZE;
1730*7c478bd9Sstevel@tonic-gate 		vaddr += MMU_PAGESIZE;
1731*7c478bd9Sstevel@tonic-gate 	}
1732*7c478bd9Sstevel@tonic-gate 
1733*7c478bd9Sstevel@tonic-gate 	check_dma(mattr, plist, pages_req);
1734*7c478bd9Sstevel@tonic-gate 	return (plist);
1735*7c478bd9Sstevel@tonic-gate 
1736*7c478bd9Sstevel@tonic-gate fail:
1737*7c478bd9Sstevel@tonic-gate 	if (npp != NULL) {
1738*7c478bd9Sstevel@tonic-gate 		/*
1739*7c478bd9Sstevel@tonic-gate 		 * Did not need this page after all.
1740*7c478bd9Sstevel@tonic-gate 		 * Put it back on the free list.
1741*7c478bd9Sstevel@tonic-gate 		 */
1742*7c478bd9Sstevel@tonic-gate 		VM_STAT_ADD(page_create_putbacks);
1743*7c478bd9Sstevel@tonic-gate 		PP_SETFREE(npp);
1744*7c478bd9Sstevel@tonic-gate 		PP_SETAGED(npp);
1745*7c478bd9Sstevel@tonic-gate 		npp->p_offset = (u_offset_t)-1;
1746*7c478bd9Sstevel@tonic-gate 		page_list_add(npp, PG_FREE_LIST | PG_LIST_TAIL);
1747*7c478bd9Sstevel@tonic-gate 		page_unlock(npp);
1748*7c478bd9Sstevel@tonic-gate 	}
1749*7c478bd9Sstevel@tonic-gate 
1750*7c478bd9Sstevel@tonic-gate 	/*
1751*7c478bd9Sstevel@tonic-gate 	 * Give up the pages we already got.
1752*7c478bd9Sstevel@tonic-gate 	 */
1753*7c478bd9Sstevel@tonic-gate 	while (plist != NULL) {
1754*7c478bd9Sstevel@tonic-gate 		pp = plist;
1755*7c478bd9Sstevel@tonic-gate 		page_sub(&plist, pp);
1756*7c478bd9Sstevel@tonic-gate 		page_io_unlock(pp);
1757*7c478bd9Sstevel@tonic-gate 		plist_len++;
1758*7c478bd9Sstevel@tonic-gate 		/*LINTED: constant in conditional ctx*/
1759*7c478bd9Sstevel@tonic-gate 		VN_DISPOSE(pp, B_INVAL, 0, kcred);
1760*7c478bd9Sstevel@tonic-gate 	}
1761*7c478bd9Sstevel@tonic-gate 
1762*7c478bd9Sstevel@tonic-gate 	/*
1763*7c478bd9Sstevel@tonic-gate 	 * VN_DISPOSE does freemem accounting for the pages in plist
1764*7c478bd9Sstevel@tonic-gate 	 * by calling page_free. So, we need to undo the pcf accounting
1765*7c478bd9Sstevel@tonic-gate 	 * for only the remaining pages.
1766*7c478bd9Sstevel@tonic-gate 	 */
1767*7c478bd9Sstevel@tonic-gate 	VM_STAT_ADD(page_create_putbacks);
1768*7c478bd9Sstevel@tonic-gate 	page_create_putback(pages_req - plist_len);
1769*7c478bd9Sstevel@tonic-gate 
1770*7c478bd9Sstevel@tonic-gate 	return (NULL);
1771*7c478bd9Sstevel@tonic-gate }
1772*7c478bd9Sstevel@tonic-gate 
1773*7c478bd9Sstevel@tonic-gate 
1774*7c478bd9Sstevel@tonic-gate /*
1775*7c478bd9Sstevel@tonic-gate  * Copy the data from the physical page represented by "frompp" to
1776*7c478bd9Sstevel@tonic-gate  * that represented by "topp". ppcopy uses CPU->cpu_caddr1 and
1777*7c478bd9Sstevel@tonic-gate  * CPU->cpu_caddr2.  It assumes that no one uses either map at interrupt
1778*7c478bd9Sstevel@tonic-gate  * level and no one sleeps with an active mapping there.
1779*7c478bd9Sstevel@tonic-gate  *
1780*7c478bd9Sstevel@tonic-gate  * Note that the ref/mod bits in the page_t's are not affected by
1781*7c478bd9Sstevel@tonic-gate  * this operation, hence it is up to the caller to update them appropriately.
1782*7c478bd9Sstevel@tonic-gate  */
1783*7c478bd9Sstevel@tonic-gate void
1784*7c478bd9Sstevel@tonic-gate ppcopy(page_t *frompp, page_t *topp)
1785*7c478bd9Sstevel@tonic-gate {
1786*7c478bd9Sstevel@tonic-gate 	caddr_t		pp_addr1;
1787*7c478bd9Sstevel@tonic-gate 	caddr_t		pp_addr2;
1788*7c478bd9Sstevel@tonic-gate 	void		*pte1;
1789*7c478bd9Sstevel@tonic-gate 	void		*pte2;
1790*7c478bd9Sstevel@tonic-gate 	kmutex_t	*ppaddr_mutex;
1791*7c478bd9Sstevel@tonic-gate 
1792*7c478bd9Sstevel@tonic-gate 	ASSERT_STACK_ALIGNED();
1793*7c478bd9Sstevel@tonic-gate 	ASSERT(PAGE_LOCKED(frompp));
1794*7c478bd9Sstevel@tonic-gate 	ASSERT(PAGE_LOCKED(topp));
1795*7c478bd9Sstevel@tonic-gate 
1796*7c478bd9Sstevel@tonic-gate 	if (kpm_enable) {
1797*7c478bd9Sstevel@tonic-gate 		pp_addr1 = hat_kpm_page2va(frompp, 0);
1798*7c478bd9Sstevel@tonic-gate 		pp_addr2 = hat_kpm_page2va(topp, 0);
1799*7c478bd9Sstevel@tonic-gate 		kpreempt_disable();
1800*7c478bd9Sstevel@tonic-gate 	} else {
1801*7c478bd9Sstevel@tonic-gate 		/*
1802*7c478bd9Sstevel@tonic-gate 		 * disable pre-emption so that CPU can't change
1803*7c478bd9Sstevel@tonic-gate 		 */
1804*7c478bd9Sstevel@tonic-gate 		kpreempt_disable();
1805*7c478bd9Sstevel@tonic-gate 
1806*7c478bd9Sstevel@tonic-gate 		pp_addr1 = CPU->cpu_caddr1;
1807*7c478bd9Sstevel@tonic-gate 		pp_addr2 = CPU->cpu_caddr2;
1808*7c478bd9Sstevel@tonic-gate 		pte1 = (void *)CPU->cpu_caddr1pte;
1809*7c478bd9Sstevel@tonic-gate 		pte2 = (void *)CPU->cpu_caddr2pte;
1810*7c478bd9Sstevel@tonic-gate 
1811*7c478bd9Sstevel@tonic-gate 		ppaddr_mutex = &CPU->cpu_ppaddr_mutex;
1812*7c478bd9Sstevel@tonic-gate 		mutex_enter(ppaddr_mutex);
1813*7c478bd9Sstevel@tonic-gate 
1814*7c478bd9Sstevel@tonic-gate 		hat_mempte_remap(page_pptonum(frompp), pp_addr1, pte1,
1815*7c478bd9Sstevel@tonic-gate 		    PROT_READ | HAT_STORECACHING_OK, HAT_LOAD_NOCONSIST);
1816*7c478bd9Sstevel@tonic-gate 		hat_mempte_remap(page_pptonum(topp), pp_addr2, pte2,
1817*7c478bd9Sstevel@tonic-gate 		    PROT_READ | PROT_WRITE | HAT_STORECACHING_OK,
1818*7c478bd9Sstevel@tonic-gate 		    HAT_LOAD_NOCONSIST);
1819*7c478bd9Sstevel@tonic-gate 	}
1820*7c478bd9Sstevel@tonic-gate 
1821*7c478bd9Sstevel@tonic-gate 	if (use_sse_pagecopy)
1822*7c478bd9Sstevel@tonic-gate 		hwblkpagecopy(pp_addr1, pp_addr2);
1823*7c478bd9Sstevel@tonic-gate 	else
1824*7c478bd9Sstevel@tonic-gate 		bcopy(pp_addr1, pp_addr2, PAGESIZE);
1825*7c478bd9Sstevel@tonic-gate 
1826*7c478bd9Sstevel@tonic-gate 	if (!kpm_enable)
1827*7c478bd9Sstevel@tonic-gate 		mutex_exit(ppaddr_mutex);
1828*7c478bd9Sstevel@tonic-gate 	kpreempt_enable();
1829*7c478bd9Sstevel@tonic-gate }
1830*7c478bd9Sstevel@tonic-gate 
1831*7c478bd9Sstevel@tonic-gate /*
1832*7c478bd9Sstevel@tonic-gate  * Zero the physical page from off to off + len given by `pp'
1833*7c478bd9Sstevel@tonic-gate  * without changing the reference and modified bits of page.
1834*7c478bd9Sstevel@tonic-gate  *
1835*7c478bd9Sstevel@tonic-gate  * We use this using CPU private page address #2, see ppcopy() for more info.
1836*7c478bd9Sstevel@tonic-gate  * pagezero() must not be called at interrupt level.
1837*7c478bd9Sstevel@tonic-gate  */
1838*7c478bd9Sstevel@tonic-gate void
1839*7c478bd9Sstevel@tonic-gate pagezero(page_t *pp, uint_t off, uint_t len)
1840*7c478bd9Sstevel@tonic-gate {
1841*7c478bd9Sstevel@tonic-gate 	caddr_t		pp_addr2;
1842*7c478bd9Sstevel@tonic-gate 	void		*pte2;
1843*7c478bd9Sstevel@tonic-gate 	kmutex_t	*ppaddr_mutex;
1844*7c478bd9Sstevel@tonic-gate 
1845*7c478bd9Sstevel@tonic-gate 	ASSERT_STACK_ALIGNED();
1846*7c478bd9Sstevel@tonic-gate 	ASSERT(len <= MMU_PAGESIZE);
1847*7c478bd9Sstevel@tonic-gate 	ASSERT(off <= MMU_PAGESIZE);
1848*7c478bd9Sstevel@tonic-gate 	ASSERT(off + len <= MMU_PAGESIZE);
1849*7c478bd9Sstevel@tonic-gate 	ASSERT(PAGE_LOCKED(pp));
1850*7c478bd9Sstevel@tonic-gate 
1851*7c478bd9Sstevel@tonic-gate 	if (kpm_enable) {
1852*7c478bd9Sstevel@tonic-gate 		pp_addr2 = hat_kpm_page2va(pp, 0);
1853*7c478bd9Sstevel@tonic-gate 		kpreempt_disable();
1854*7c478bd9Sstevel@tonic-gate 	} else {
1855*7c478bd9Sstevel@tonic-gate 		kpreempt_disable();
1856*7c478bd9Sstevel@tonic-gate 
1857*7c478bd9Sstevel@tonic-gate 		pp_addr2 = CPU->cpu_caddr2;
1858*7c478bd9Sstevel@tonic-gate 		pte2 = (void *)CPU->cpu_caddr2pte;
1859*7c478bd9Sstevel@tonic-gate 
1860*7c478bd9Sstevel@tonic-gate 		ppaddr_mutex = &CPU->cpu_ppaddr_mutex;
1861*7c478bd9Sstevel@tonic-gate 		mutex_enter(ppaddr_mutex);
1862*7c478bd9Sstevel@tonic-gate 
1863*7c478bd9Sstevel@tonic-gate 		hat_mempte_remap(page_pptonum(pp), pp_addr2, pte2,
1864*7c478bd9Sstevel@tonic-gate 		    PROT_READ | PROT_WRITE | HAT_STORECACHING_OK,
1865*7c478bd9Sstevel@tonic-gate 		    HAT_LOAD_NOCONSIST);
1866*7c478bd9Sstevel@tonic-gate 	}
1867*7c478bd9Sstevel@tonic-gate 
1868*7c478bd9Sstevel@tonic-gate 	if (use_sse_pagezero)
1869*7c478bd9Sstevel@tonic-gate 		hwblkclr(pp_addr2 + off, len);
1870*7c478bd9Sstevel@tonic-gate 	else
1871*7c478bd9Sstevel@tonic-gate 		bzero(pp_addr2 + off, len);
1872*7c478bd9Sstevel@tonic-gate 
1873*7c478bd9Sstevel@tonic-gate 	if (!kpm_enable)
1874*7c478bd9Sstevel@tonic-gate 		mutex_exit(ppaddr_mutex);
1875*7c478bd9Sstevel@tonic-gate 	kpreempt_enable();
1876*7c478bd9Sstevel@tonic-gate }
1877*7c478bd9Sstevel@tonic-gate 
1878*7c478bd9Sstevel@tonic-gate /*
1879*7c478bd9Sstevel@tonic-gate  * Platform-dependent page scrub call.
1880*7c478bd9Sstevel@tonic-gate  */
1881*7c478bd9Sstevel@tonic-gate void
1882*7c478bd9Sstevel@tonic-gate pagescrub(page_t *pp, uint_t off, uint_t len)
1883*7c478bd9Sstevel@tonic-gate {
1884*7c478bd9Sstevel@tonic-gate 	/*
1885*7c478bd9Sstevel@tonic-gate 	 * For now, we rely on the fact that pagezero() will
1886*7c478bd9Sstevel@tonic-gate 	 * always clear UEs.
1887*7c478bd9Sstevel@tonic-gate 	 */
1888*7c478bd9Sstevel@tonic-gate 	pagezero(pp, off, len);
1889*7c478bd9Sstevel@tonic-gate }
1890*7c478bd9Sstevel@tonic-gate 
1891*7c478bd9Sstevel@tonic-gate /*
1892*7c478bd9Sstevel@tonic-gate  * set up two private addresses for use on a given CPU for use in ppcopy()
1893*7c478bd9Sstevel@tonic-gate  */
1894*7c478bd9Sstevel@tonic-gate void
1895*7c478bd9Sstevel@tonic-gate setup_vaddr_for_ppcopy(struct cpu *cpup)
1896*7c478bd9Sstevel@tonic-gate {
1897*7c478bd9Sstevel@tonic-gate 	void *addr;
1898*7c478bd9Sstevel@tonic-gate 	void *pte;
1899*7c478bd9Sstevel@tonic-gate 
1900*7c478bd9Sstevel@tonic-gate 	addr = vmem_alloc(heap_arena, mmu_ptob(1), VM_SLEEP);
1901*7c478bd9Sstevel@tonic-gate 	pte = hat_mempte_setup(addr);
1902*7c478bd9Sstevel@tonic-gate 	cpup->cpu_caddr1 = addr;
1903*7c478bd9Sstevel@tonic-gate 	cpup->cpu_caddr1pte = (pteptr_t)pte;
1904*7c478bd9Sstevel@tonic-gate 
1905*7c478bd9Sstevel@tonic-gate 	addr = vmem_alloc(heap_arena, mmu_ptob(1), VM_SLEEP);
1906*7c478bd9Sstevel@tonic-gate 	pte = hat_mempte_setup(addr);
1907*7c478bd9Sstevel@tonic-gate 	cpup->cpu_caddr2 = addr;
1908*7c478bd9Sstevel@tonic-gate 	cpup->cpu_caddr2pte = (pteptr_t)pte;
1909*7c478bd9Sstevel@tonic-gate 
1910*7c478bd9Sstevel@tonic-gate 	mutex_init(&cpup->cpu_ppaddr_mutex, NULL, MUTEX_DEFAULT, NULL);
1911*7c478bd9Sstevel@tonic-gate }
1912*7c478bd9Sstevel@tonic-gate 
1913*7c478bd9Sstevel@tonic-gate 
1914*7c478bd9Sstevel@tonic-gate /*
1915*7c478bd9Sstevel@tonic-gate  * Create the pageout scanner thread. The thread has to
1916*7c478bd9Sstevel@tonic-gate  * start at procedure with process pp and priority pri.
1917*7c478bd9Sstevel@tonic-gate  */
1918*7c478bd9Sstevel@tonic-gate void
1919*7c478bd9Sstevel@tonic-gate pageout_init(void (*procedure)(), proc_t *pp, pri_t pri)
1920*7c478bd9Sstevel@tonic-gate {
1921*7c478bd9Sstevel@tonic-gate 	(void) thread_create(NULL, 0, procedure, NULL, 0, pp, TS_RUN, pri);
1922*7c478bd9Sstevel@tonic-gate }
1923*7c478bd9Sstevel@tonic-gate 
1924*7c478bd9Sstevel@tonic-gate /*
1925*7c478bd9Sstevel@tonic-gate  * any use for this?
1926*7c478bd9Sstevel@tonic-gate  */
1927*7c478bd9Sstevel@tonic-gate void
1928*7c478bd9Sstevel@tonic-gate post_startup_mmu_initialization(void)
1929*7c478bd9Sstevel@tonic-gate {}
1930*7c478bd9Sstevel@tonic-gate 
1931*7c478bd9Sstevel@tonic-gate /*
1932*7c478bd9Sstevel@tonic-gate  * Function for flushing D-cache when performing module relocations
1933*7c478bd9Sstevel@tonic-gate  * to an alternate mapping.  Unnecessary on Intel / AMD platforms.
1934*7c478bd9Sstevel@tonic-gate  */
1935*7c478bd9Sstevel@tonic-gate void
1936*7c478bd9Sstevel@tonic-gate dcache_flushall()
1937*7c478bd9Sstevel@tonic-gate {}
1938