1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26/* Copyright (c) 1983, 1984, 1985, 1986, 1987, 1988, 1989 AT&T */
27/* All Rights Reserved */
28
29/*
30 * Portions of this source code were derived from Berkeley 4.3 BSD
31 * under license from the Regents of the University of California.
32 */
33
34/*
35 * VM - segment for non-faulting loads.
36 */
37
38#include <sys/types.h>
39#include <sys/t_lock.h>
40#include <sys/param.h>
41#include <sys/mman.h>
42#include <sys/errno.h>
43#include <sys/kmem.h>
44#include <sys/cmn_err.h>
45#include <sys/vnode.h>
46#include <sys/proc.h>
47#include <sys/conf.h>
48#include <sys/debug.h>
49#include <sys/archsystm.h>
50#include <sys/lgrp.h>
51
52#include <vm/page.h>
53#include <vm/hat.h>
54#include <vm/as.h>
55#include <vm/seg.h>
56#include <vm/vpage.h>
57
58/*
59 * Private seg op routines.
60 */
61static int	segnf_dup(struct seg *seg, struct seg *newseg);
62static int	segnf_unmap(struct seg *seg, caddr_t addr, size_t len);
63static void	segnf_free(struct seg *seg);
64static faultcode_t segnf_nomap(void);
65static int	segnf_setprot(struct seg *seg, caddr_t addr,
66		    size_t len, uint_t prot);
67static int	segnf_checkprot(struct seg *seg, caddr_t addr,
68		    size_t len, uint_t prot);
69static void	segnf_badop(void);
70static int	segnf_nop(void);
71static int	segnf_getprot(struct seg *seg, caddr_t addr,
72		    size_t len, uint_t *protv);
73static u_offset_t segnf_getoffset(struct seg *seg, caddr_t addr);
74static int	segnf_gettype(struct seg *seg, caddr_t addr);
75static int	segnf_getvp(struct seg *seg, caddr_t addr, struct vnode **vpp);
76static void	segnf_dump(struct seg *seg);
77static int	segnf_pagelock(struct seg *seg, caddr_t addr, size_t len,
78		    struct page ***ppp, enum lock_type type, enum seg_rw rw);
79static int	segnf_setpagesize(struct seg *seg, caddr_t addr, size_t len,
80		    uint_t szc);
81static int	segnf_getmemid(struct seg *seg, caddr_t addr, memid_t *memidp);
82static lgrp_mem_policy_info_t	*segnf_getpolicy(struct seg *seg,
83    caddr_t addr);
84
85
86struct seg_ops segnf_ops = {
87	segnf_dup,
88	segnf_unmap,
89	segnf_free,
90	(faultcode_t (*)(struct hat *, struct seg *, caddr_t, size_t,
91	    enum fault_type, enum seg_rw))
92		segnf_nomap,		/* fault */
93	(faultcode_t (*)(struct seg *, caddr_t))
94		segnf_nomap,		/* faulta */
95	segnf_setprot,
96	segnf_checkprot,
97	(int (*)())segnf_badop,		/* kluster */
98	(size_t (*)(struct seg *))NULL,	/* swapout */
99	(int (*)(struct seg *, caddr_t, size_t, int, uint_t))
100		segnf_nop,		/* sync */
101	(size_t (*)(struct seg *, caddr_t, size_t, char *))
102		segnf_nop,		/* incore */
103	(int (*)(struct seg *, caddr_t, size_t, int, int, ulong_t *, size_t))
104		segnf_nop,		/* lockop */
105	segnf_getprot,
106	segnf_getoffset,
107	segnf_gettype,
108	segnf_getvp,
109	(int (*)(struct seg *, caddr_t, size_t, uint_t))
110		segnf_nop,		/* advise */
111	segnf_dump,
112	segnf_pagelock,
113	segnf_setpagesize,
114	segnf_getmemid,
115	segnf_getpolicy,
116};
117
118/*
119 * vnode and page for the page of zeros we use for the nf mappings.
120 */
121static kmutex_t segnf_lock;
122static struct vnode nfvp;
123static struct page **nfpp;
124
125#define	addr_to_vcolor(addr)                                            \
126	(shm_alignment) ?						\
127	((int)(((uintptr_t)(addr) & (shm_alignment - 1)) >> PAGESHIFT)) : 0
128
129/*
130 * We try to limit the number of Non-fault segments created.
131 * Non fault segments are created to optimize sparc V9 code which uses
132 * the sparc nonfaulting load ASI (ASI_PRIMARY_NOFAULT).
133 *
134 * There are several reasons why creating too many non-fault segments
135 * could cause problems.
136 *
137 * 	First, excessive allocation of kernel resources for the seg
138 *	structures and the HAT data to map the zero pages.
139 *
140 * 	Secondly, creating nofault segments actually uses up user virtual
141 * 	address space. This makes it unavailable for subsequent mmap(0, ...)
142 *	calls which use as_gap() to find empty va regions.  Creation of too
143 *	many nofault segments could thus interfere with the ability of the
144 *	runtime linker to load a shared object.
145 */
146#define	MAXSEGFORNF	(10000)
147#define	MAXNFSEARCH	(5)
148
149
150/*
151 * Must be called from startup()
152 */
153void
154segnf_init()
155{
156	mutex_init(&segnf_lock, NULL, MUTEX_DEFAULT, NULL);
157}
158
159
160/*
161 * Create a no-fault segment.
162 *
163 * The no-fault segment is not technically necessary, as the code in
164 * nfload() in trap.c will emulate the SPARC instruction and load
165 * a value of zero in the destination register.
166 *
167 * However, this code tries to put a page of zero's at the nofault address
168 * so that subsequent non-faulting loads to the same page will not
169 * trap with a tlb miss.
170 *
171 * In order to help limit the number of segments we merge adjacent nofault
172 * segments into a single segment.  If we get a large number of segments
173 * we'll also try to delete a random other nf segment.
174 */
175/* ARGSUSED */
176int
177segnf_create(struct seg *seg, void *argsp)
178{
179	uint_t prot;
180	pgcnt_t	vacpgs;
181	u_offset_t off = 0;
182	caddr_t	vaddr = NULL;
183	int i, color;
184	struct seg *s1;
185	struct seg *s2;
186	size_t size;
187	struct as *as = seg->s_as;
188
189	ASSERT(as && AS_WRITE_HELD(as));
190
191	/*
192	 * Need a page per virtual color or just 1 if no vac.
193	 */
194	mutex_enter(&segnf_lock);
195	if (nfpp == NULL) {
196		struct seg kseg;
197
198		vacpgs = 1;
199		if (shm_alignment > PAGESIZE) {
200			vacpgs = shm_alignment >> PAGESHIFT;
201		}
202
203		nfpp = kmem_alloc(sizeof (*nfpp) * vacpgs, KM_SLEEP);
204
205		kseg.s_as = &kas;
206		for (i = 0; i < vacpgs; i++, off += PAGESIZE,
207		    vaddr += PAGESIZE) {
208			nfpp[i] = page_create_va(&nfvp, off, PAGESIZE,
209			    PG_WAIT | PG_NORELOC, &kseg, vaddr);
210			page_io_unlock(nfpp[i]);
211			page_downgrade(nfpp[i]);
212			pagezero(nfpp[i], 0, PAGESIZE);
213		}
214	}
215	mutex_exit(&segnf_lock);
216
217	hat_map(as->a_hat, seg->s_base, seg->s_size, HAT_MAP);
218
219	/*
220	 * s_data can't be NULL because of ASSERTS in the common vm code.
221	 */
222	seg->s_ops = &segnf_ops;
223	seg->s_data = seg;
224	seg->s_flags |= S_PURGE;
225
226	mutex_enter(&as->a_contents);
227	as->a_flags |= AS_NEEDSPURGE;
228	mutex_exit(&as->a_contents);
229
230	prot = PROT_READ;
231	color = addr_to_vcolor(seg->s_base);
232	if (as != &kas)
233		prot |= PROT_USER;
234	hat_memload(as->a_hat, seg->s_base, nfpp[color],
235	    prot | HAT_NOFAULT, HAT_LOAD);
236
237	/*
238	 * At this point see if we can concatenate a segment to
239	 * a non-fault segment immediately before and/or after it.
240	 */
241	if ((s1 = AS_SEGPREV(as, seg)) != NULL &&
242	    s1->s_ops == &segnf_ops &&
243	    s1->s_base + s1->s_size == seg->s_base) {
244		size = s1->s_size;
245		seg_free(s1);
246		seg->s_base -= size;
247		seg->s_size += size;
248	}
249
250	if ((s2 = AS_SEGNEXT(as, seg)) != NULL &&
251	    s2->s_ops == &segnf_ops &&
252	    seg->s_base + seg->s_size == s2->s_base) {
253		size = s2->s_size;
254		seg_free(s2);
255		seg->s_size += size;
256	}
257
258	/*
259	 * if we already have a lot of segments, try to delete some other
260	 * nofault segment to reduce the probability of uncontrolled segment
261	 * creation.
262	 *
263	 * the code looks around quickly (no more than MAXNFSEARCH segments
264	 * each way) for another NF segment and then deletes it.
265	 */
266	if (avl_numnodes(&as->a_segtree) > MAXSEGFORNF) {
267		size = 0;
268		s2 = NULL;
269		s1 = AS_SEGPREV(as, seg);
270		while (size++ < MAXNFSEARCH && s1 != NULL) {
271			if (s1->s_ops == &segnf_ops)
272				s2 = s1;
273			s1 = AS_SEGPREV(s1->s_as, seg);
274		}
275		if (s2 == NULL) {
276			s1 = AS_SEGNEXT(as, seg);
277			while (size-- > 0 && s1 != NULL) {
278				if (s1->s_ops == &segnf_ops)
279					s2 = s1;
280				s1 = AS_SEGNEXT(as, seg);
281			}
282		}
283		if (s2 != NULL)
284			seg_unmap(s2);
285	}
286
287	return (0);
288}
289
290/*
291 * Never really need "No fault" segments, so they aren't dup'd.
292 */
293/* ARGSUSED */
294static int
295segnf_dup(struct seg *seg, struct seg *newseg)
296{
297	panic("segnf_dup");
298	return (0);
299}
300
301/*
302 * Split a segment at addr for length len.
303 */
304static int
305segnf_unmap(struct seg *seg, caddr_t addr, size_t len)
306{
307	ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as));
308
309	/*
310	 * Check for bad sizes.
311	 */
312	if (addr < seg->s_base || addr + len > seg->s_base + seg->s_size ||
313	    (len & PAGEOFFSET) || ((uintptr_t)addr & PAGEOFFSET)) {
314		cmn_err(CE_PANIC, "segnf_unmap: bad unmap size");
315	}
316
317	/*
318	 * Unload any hardware translations in the range to be taken out.
319	 */
320	hat_unload(seg->s_as->a_hat, addr, len, HAT_UNLOAD_UNMAP);
321
322	if (addr == seg->s_base && len == seg->s_size) {
323		/*
324		 * Freeing entire segment.
325		 */
326		seg_free(seg);
327	} else if (addr == seg->s_base) {
328		/*
329		 * Freeing the beginning of the segment.
330		 */
331		seg->s_base += len;
332		seg->s_size -= len;
333	} else if (addr + len == seg->s_base + seg->s_size) {
334		/*
335		 * Freeing the end of the segment.
336		 */
337		seg->s_size -= len;
338	} else {
339		/*
340		 * The section to go is in the middle of the segment, so we
341		 * have to cut it into two segments.  We shrink the existing
342		 * "seg" at the low end, and create "nseg" for the high end.
343		 */
344		caddr_t nbase = addr + len;
345		size_t nsize = (seg->s_base + seg->s_size) - nbase;
346		struct seg *nseg;
347
348		/*
349		 * Trim down "seg" before trying to stick "nseg" into the as.
350		 */
351		seg->s_size = addr - seg->s_base;
352		nseg = seg_alloc(seg->s_as, nbase, nsize);
353		if (nseg == NULL)
354			cmn_err(CE_PANIC, "segnf_unmap: seg_alloc failed");
355
356		/*
357		 * s_data can't be NULL because of ASSERTs in common VM code.
358		 */
359		nseg->s_ops = seg->s_ops;
360		nseg->s_data = nseg;
361		nseg->s_flags |= S_PURGE;
362		mutex_enter(&seg->s_as->a_contents);
363		seg->s_as->a_flags |= AS_NEEDSPURGE;
364		mutex_exit(&seg->s_as->a_contents);
365	}
366
367	return (0);
368}
369
370/*
371 * Free a segment.
372 */
373static void
374segnf_free(struct seg *seg)
375{
376	ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as));
377}
378
379/*
380 * No faults allowed on segnf.
381 */
382static faultcode_t
383segnf_nomap(void)
384{
385	return (FC_NOMAP);
386}
387
388/* ARGSUSED */
389static int
390segnf_setprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot)
391{
392	ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
393	return (EACCES);
394}
395
396/* ARGSUSED */
397static int
398segnf_checkprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot)
399{
400	uint_t sprot;
401	ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
402
403	sprot = seg->s_as == &kas ?  PROT_READ : PROT_READ|PROT_USER;
404	return ((prot & sprot) == prot ? 0 : EACCES);
405}
406
407static void
408segnf_badop(void)
409{
410	panic("segnf_badop");
411	/*NOTREACHED*/
412}
413
414static int
415segnf_nop(void)
416{
417	return (0);
418}
419
420static int
421segnf_getprot(struct seg *seg, caddr_t addr, size_t len, uint_t *protv)
422{
423	size_t pgno = seg_page(seg, addr + len) - seg_page(seg, addr) + 1;
424	size_t p;
425	ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
426
427	for (p = 0; p < pgno; ++p)
428		protv[p] = PROT_READ;
429	return (0);
430}
431
432/* ARGSUSED */
433static u_offset_t
434segnf_getoffset(struct seg *seg, caddr_t addr)
435{
436	ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
437
438	return ((u_offset_t)0);
439}
440
441/* ARGSUSED */
442static int
443segnf_gettype(struct seg *seg, caddr_t addr)
444{
445	ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
446
447	return (MAP_SHARED);
448}
449
450/* ARGSUSED */
451static int
452segnf_getvp(struct seg *seg, caddr_t addr, struct vnode **vpp)
453{
454	ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as));
455
456	*vpp = &nfvp;
457	return (0);
458}
459
460/*
461 * segnf pages are not dumped, so we just return
462 */
463/* ARGSUSED */
464static void
465segnf_dump(struct seg *seg)
466{}
467
468/*ARGSUSED*/
469static int
470segnf_pagelock(struct seg *seg, caddr_t addr, size_t len,
471    struct page ***ppp, enum lock_type type, enum seg_rw rw)
472{
473	return (ENOTSUP);
474}
475
476/*ARGSUSED*/
477static int
478segnf_setpagesize(struct seg *seg, caddr_t addr, size_t len,
479    uint_t szc)
480{
481	return (ENOTSUP);
482}
483
484/*ARGSUSED*/
485static int
486segnf_getmemid(struct seg *seg, caddr_t addr, memid_t *memidp)
487{
488	return (ENODEV);
489}
490
491/*ARGSUSED*/
492static lgrp_mem_policy_info_t *
493segnf_getpolicy(struct seg *seg, caddr_t addr)
494{
495	return (NULL);
496}
497