1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 #include <sys/types.h>
27 #include <sys/systm.h>
28 #include <sys/archsystm.h>
29 #include <sys/machsystm.h>
30 #include <sys/t_lock.h>
31 #include <sys/vmem.h>
32 #include <sys/mman.h>
33 #include <sys/vm.h>
34 #include <sys/cpu.h>
35 #include <sys/cmn_err.h>
36 #include <sys/cpuvar.h>
37 #include <sys/atomic.h>
38 #include <vm/as.h>
39 #include <vm/hat.h>
40 #include <vm/as.h>
41 #include <vm/page.h>
42 #include <vm/seg.h>
43 #include <vm/seg_kmem.h>
44 #include <vm/seg_kpm.h>
45 #include <vm/hat_sfmmu.h>
46 #include <sys/debug.h>
47 #include <sys/cpu_module.h>
48
49 /*
50 * A quick way to generate a cache consistent address to map in a page.
51 * users: ppcopy, pagezero, /proc, dev/mem
52 *
53 * The ppmapin/ppmapout routines provide a quick way of generating a cache
54 * consistent address by reserving a given amount of kernel address space.
55 * The base is PPMAPBASE and its size is PPMAPSIZE. This memory is divided
56 * into x number of sets, where x is the number of colors for the virtual
57 * cache. The number of colors is how many times a page can be mapped
58 * simulatenously in the cache. For direct map caches this translates to
59 * the number of pages in the cache.
60 * Each set will be assigned a group of virtual pages from the reserved memory
61 * depending on its virtual color.
62 * When trying to assign a virtual address we will find out the color for the
63 * physical page in question (if applicable). Then we will try to find an
64 * available virtual page from the set of the appropiate color.
65 */
66
67 int pp_slots = 4; /* small default, tuned by cpu module */
68
69 /* tuned by cpu module, default is "safe" */
70 int pp_consistent_coloring = PPAGE_STORES_POLLUTE | PPAGE_LOADS_POLLUTE;
71
72 static caddr_t ppmap_vaddrs[PPMAPSIZE / MMU_PAGESIZE];
73 static int nsets; /* number of sets */
74 static int ppmap_shift; /* set selector */
75
76 #ifdef PPDEBUG
77 #define MAXCOLORS 16 /* for debug only */
78 static int ppalloc_noslot = 0; /* # of allocations from kernelmap */
79 static int align_hits;
80 static int pp_allocs; /* # of ppmapin requests */
81 #endif /* PPDEBUG */
82
83 /*
84 * There are only 64 TLB entries on spitfire, 16 on cheetah
85 * (fully-associative TLB) so we allow the cpu module to tune the
86 * number to use here via pp_slots.
87 */
88 static struct ppmap_va {
89 caddr_t ppmap_slots[MAXPP_SLOTS];
90 } ppmap_va[NCPU];
91
92 /* prevent compilation with VAC defined */
93 #ifdef VAC
94 #error "sun4v ppmapin and ppmapout do not support VAC"
95 #endif
96
97 void
ppmapinit(void)98 ppmapinit(void)
99 {
100 int nset;
101 caddr_t va;
102
103 ASSERT(pp_slots <= MAXPP_SLOTS);
104
105 va = (caddr_t)PPMAPBASE;
106
107 /*
108 * sun4v does not have a virtual indexed cache and simply
109 * has only one set containing all pages.
110 */
111 nsets = mmu_btop(PPMAPSIZE);
112 ppmap_shift = MMU_PAGESHIFT;
113
114 for (nset = 0; nset < nsets; nset++) {
115 ppmap_vaddrs[nset] =
116 (caddr_t)((uintptr_t)va + (nset * MMU_PAGESIZE));
117 }
118 }
119
120 /*
121 * Allocate a cache consistent virtual address to map a page, pp,
122 * with protection, vprot; and map it in the MMU, using the most
123 * efficient means possible. The argument avoid is a virtual address
124 * hint which when masked yields an offset into a virtual cache
125 * that should be avoided when allocating an address to map in a
126 * page. An avoid arg of -1 means you don't care, for instance pagezero.
127 *
128 * machine dependent, depends on virtual address space layout,
129 * understands that all kernel addresses have bit 31 set.
130 *
131 * NOTE: For sun4 platforms the meaning of the hint argument is opposite from
132 * that found in other architectures. In other architectures the hint
133 * (called avoid) was used to ask ppmapin to NOT use the specified cache color.
134 * This was used to avoid virtual cache trashing in the bcopy. Unfortunately
135 * in the case of a COW, this later on caused a cache aliasing conflict. In
136 * sun4, the bcopy routine uses the block ld/st instructions so we don't have
137 * to worry about virtual cache trashing. Actually, by using the hint to choose
138 * the right color we can almost guarantee a cache conflict will not occur.
139 */
140
141 /*ARGSUSED2*/
142 caddr_t
ppmapin(page_t * pp,uint_t vprot,caddr_t hint)143 ppmapin(page_t *pp, uint_t vprot, caddr_t hint)
144 {
145 int nset;
146 caddr_t va;
147
148 #ifdef PPDEBUG
149 pp_allocs++;
150 #endif /* PPDEBUG */
151
152 /*
153 * For sun4v caches are physical caches, we can pick any address
154 * we want.
155 */
156 for (nset = 0; nset < nsets; nset++) {
157 va = ppmap_vaddrs[nset];
158 if (va != NULL) {
159 #ifdef PPDEBUG
160 align_hits++;
161 #endif /* PPDEBUG */
162 if (atomic_cas_ptr(&ppmap_vaddrs[nset], va, NULL) ==
163 va) {
164 hat_memload(kas.a_hat, va, pp,
165 vprot | HAT_NOSYNC,
166 HAT_LOAD_LOCK);
167 return (va);
168 }
169 }
170 }
171
172 #ifdef PPDEBUG
173 ppalloc_noslot++;
174 #endif /* PPDEBUG */
175
176 /*
177 * No free slots; get a random one from the kernel heap area.
178 */
179 va = vmem_alloc(heap_arena, PAGESIZE, VM_SLEEP);
180
181 hat_memload(kas.a_hat, va, pp, vprot | HAT_NOSYNC, HAT_LOAD_LOCK);
182
183 return (va);
184
185 }
186
187 void
ppmapout(caddr_t va)188 ppmapout(caddr_t va)
189 {
190 int nset;
191
192 if (va >= kernelheap && va < ekernelheap) {
193 /*
194 * Space came from kernelmap, flush the page and
195 * return the space.
196 */
197 hat_unload(kas.a_hat, va, PAGESIZE,
198 (HAT_UNLOAD_NOSYNC | HAT_UNLOAD_UNLOCK));
199 vmem_free(heap_arena, va, PAGESIZE);
200 } else {
201 /*
202 * Space came from ppmap_vaddrs[], give it back.
203 */
204 nset = ((uintptr_t)va >> ppmap_shift) & (nsets - 1);
205 hat_unload(kas.a_hat, va, PAGESIZE,
206 (HAT_UNLOAD_NOSYNC | HAT_UNLOAD_UNLOCK));
207
208 ASSERT(ppmap_vaddrs[nset] == NULL);
209 ppmap_vaddrs[nset] = va;
210 }
211 }
212
213 #ifdef DEBUG
214 #define PP_STAT_ADD(stat) (stat)++
215 uint_t pload, ploadfail;
216 uint_t ppzero, ppzero_short;
217 #else
218 #define PP_STAT_ADD(stat)
219 #endif /* DEBUG */
220
221 static void
pp_unload_tlb(caddr_t * pslot,caddr_t va)222 pp_unload_tlb(caddr_t *pslot, caddr_t va)
223 {
224 ASSERT(*pslot == va);
225
226 vtag_flushpage(va, (uint64_t)ksfmmup);
227 *pslot = NULL; /* release the slot */
228 }
229
230 /*
231 * Routine to copy kernel pages during relocation. It will copy one
232 * PAGESIZE page to another PAGESIZE page. This function may be called
233 * above LOCK_LEVEL so it should not grab any locks.
234 */
235 void
ppcopy_kernel__relocatable(page_t * fm_pp,page_t * to_pp)236 ppcopy_kernel__relocatable(page_t *fm_pp, page_t *to_pp)
237 {
238 uint64_t fm_pa, to_pa;
239 size_t nbytes;
240
241 fm_pa = (uint64_t)(fm_pp->p_pagenum) << MMU_PAGESHIFT;
242 to_pa = (uint64_t)(to_pp->p_pagenum) << MMU_PAGESHIFT;
243
244 nbytes = MMU_PAGESIZE;
245
246 for (; nbytes > 0; fm_pa += 32, to_pa += 32, nbytes -= 32)
247 hw_pa_bcopy32(fm_pa, to_pa);
248 }
249
250 /*
251 * Copy the data from the physical page represented by "frompp" to
252 * that represented by "topp".
253 *
254 * Try to use per cpu mapping first, if that fails then call pp_mapin
255 * to load it.
256 * Returns one on success or zero on some sort of fault while doing the copy.
257 */
258 int
ppcopy(page_t * fm_pp,page_t * to_pp)259 ppcopy(page_t *fm_pp, page_t *to_pp)
260 {
261 caddr_t fm_va = NULL;
262 caddr_t to_va;
263 boolean_t fast;
264 label_t ljb;
265 int ret = 1;
266
267 ASSERT(PAGE_LOCKED(fm_pp));
268 ASSERT(PAGE_LOCKED(to_pp));
269
270 /*
271 * Try to map using KPM if enabled. If it fails, fall
272 * back to ppmapin/ppmapout.
273 */
274 if ((kpm_enable == 0) ||
275 (fm_va = hat_kpm_mapin(fm_pp, NULL)) == NULL ||
276 (to_va = hat_kpm_mapin(to_pp, NULL)) == NULL) {
277 if (fm_va != NULL)
278 hat_kpm_mapout(fm_pp, NULL, fm_va);
279 fm_va = ppmapin(fm_pp, PROT_READ, (caddr_t)-1);
280 to_va = ppmapin(to_pp, PROT_READ | PROT_WRITE, fm_va);
281 fast = B_FALSE;
282 } else
283 fast = B_TRUE;
284
285 if (on_fault(&ljb)) {
286 ret = 0;
287 goto faulted;
288 }
289 bcopy(fm_va, to_va, PAGESIZE);
290 no_fault();
291 faulted:
292
293 /* Unmap */
294 if (fast) {
295 hat_kpm_mapout(fm_pp, NULL, fm_va);
296 hat_kpm_mapout(to_pp, NULL, to_va);
297 } else {
298 ppmapout(fm_va);
299 ppmapout(to_va);
300 }
301 return (ret);
302 }
303
304 /*
305 * Zero the physical page from off to off + len given by `pp'
306 * without changing the reference and modified bits of page.
307 *
308 * Again, we'll try per cpu mapping first.
309 */
310
311 void
pagezero(page_t * pp,uint_t off,uint_t len)312 pagezero(page_t *pp, uint_t off, uint_t len)
313 {
314 caddr_t va;
315 extern int hwblkclr(void *, size_t);
316 extern int use_hw_bzero;
317 boolean_t fast;
318
319 ASSERT((int)len > 0 && (int)off >= 0 && off + len <= PAGESIZE);
320 ASSERT(PAGE_LOCKED(pp));
321
322 PP_STAT_ADD(ppzero);
323
324 if (len != MMU_PAGESIZE || !use_hw_bzero) {
325 PP_STAT_ADD(ppzero_short);
326 }
327
328 kpreempt_disable();
329
330 /*
331 * Try to use KPM if enabled. If that fails, fall back to
332 * ppmapin/ppmapout.
333 */
334
335 if (kpm_enable != 0) {
336 fast = B_TRUE;
337 va = hat_kpm_mapin(pp, NULL);
338 } else
339 va = NULL;
340
341 if (va == NULL) {
342 fast = B_FALSE;
343 va = ppmapin(pp, PROT_READ | PROT_WRITE, (caddr_t)-1);
344 }
345
346 if (!use_hw_bzero) {
347 bzero(va + off, len);
348 sync_icache(va + off, len);
349 } else if (hwblkclr(va + off, len)) {
350 /*
351 * We may not have used block commit asi.
352 * So flush the I-$ manually
353 */
354 sync_icache(va + off, len);
355 } else {
356 /*
357 * We have used blk commit, and flushed the I-$.
358 * However we still may have an instruction in the
359 * pipeline. Only a flush will invalidate that.
360 */
361 doflush(va);
362 }
363
364 if (fast) {
365 hat_kpm_mapout(pp, NULL, va);
366 } else {
367 ppmapout(va);
368 }
369 kpreempt_enable();
370 }
371