1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26#include <sys/types.h>
27#include <sys/sysmacros.h>
28#include <sys/systm.h>
29#include <sys/mman.h>
30#include <sys/buf.h>
31#include <sys/vmem.h>
32#include <sys/cmn_err.h>
33#include <sys/debug.h>
34#include <sys/machparam.h>
35#include <vm/page.h>
36#include <vm/seg_kmem.h>
37#include <vm/seg_kpm.h>
38
39#ifdef __sparc
40#include <sys/cpu_module.h>
41#define	BP_FLUSH(addr, size)	flush_instr_mem((void *)addr, size);
42#else
43#define	BP_FLUSH(addr, size)
44#endif
45
46int bp_force_copy = 0;
47typedef enum {
48	BP_COPYIN	= 0,
49	BP_COPYOUT	= 1
50} bp_copydir_t;
51static int bp_copy_common(bp_copydir_t dir, struct buf *bp, void *driverbuf,
52    offset_t offset, size_t size);
53
54static vmem_t *bp_map_arena;
55static size_t bp_align;
56static uint_t bp_devload_flags = PROT_READ | PROT_WRITE | HAT_NOSYNC;
57int	bp_max_cache = 1 << 17;		/* 128K default; tunable */
58int	bp_mapin_kpm_enable = 1;	/* enable default; tunable */
59
60static void *
61bp_vmem_alloc(vmem_t *vmp, size_t size, int vmflag)
62{
63	return (vmem_xalloc(vmp, size, bp_align, 0, 0, NULL, NULL, vmflag));
64}
65
66void
67bp_init(size_t align, uint_t devload_flags)
68{
69	bp_align = MAX(align, PAGESIZE);
70	bp_devload_flags |= devload_flags;
71
72	if (bp_align <= bp_max_cache)
73		bp_map_arena = vmem_create("bp_map", NULL, 0, bp_align,
74		    bp_vmem_alloc, vmem_free, heap_arena,
75		    MIN(8 * bp_align, bp_max_cache), VM_SLEEP);
76}
77
78/*
79 * common routine so can be called with/without VM_SLEEP
80 */
81void *
82bp_mapin_common(struct buf *bp, int flag)
83{
84	struct as	*as;
85	pfn_t		pfnum;
86	page_t		*pp;
87	page_t		**pplist;
88	caddr_t		kaddr;
89	caddr_t		addr;
90	uintptr_t	off;
91	size_t		size;
92	pgcnt_t		npages;
93	int		color;
94
95	as = NULL;
96	/* return if already mapped in, no pageio/physio, or physio to kas */
97	if ((bp->b_flags & B_REMAPPED) ||
98	    !(bp->b_flags & (B_PAGEIO | B_PHYS)) ||
99	    (((bp->b_flags & (B_PAGEIO | B_PHYS)) == B_PHYS) &&
100	    ((bp->b_proc == NULL) || (bp->b_proc->p_as == &kas))))
101		return (bp->b_un.b_addr);
102
103	ASSERT((bp->b_flags & (B_PAGEIO | B_PHYS)) != (B_PAGEIO | B_PHYS));
104
105	addr = (caddr_t)bp->b_un.b_addr;
106	off = (uintptr_t)addr & PAGEOFFSET;
107	size = P2ROUNDUP(bp->b_bcount + off, PAGESIZE);
108	npages = btop(size);
109
110	/* Fastpath single page IO to locked memory by using kpm. */
111	if ((bp->b_flags & (B_SHADOW | B_PAGEIO)) && (npages == 1) &&
112	    kpm_enable && bp_mapin_kpm_enable) {
113		if (bp->b_flags & B_SHADOW)
114			pp = *bp->b_shadow;
115		else
116			pp = bp->b_pages;
117		kaddr = hat_kpm_mapin(pp, NULL);
118		bp->b_un.b_addr = kaddr + off;
119		bp->b_flags |= B_REMAPPED;
120		return (bp->b_un.b_addr);
121	}
122
123	/*
124	 * Allocate kernel virtual space for remapping.
125	 */
126	color = bp_color(bp);
127	ASSERT(color < bp_align);
128
129	if (bp_map_arena != NULL) {
130		kaddr = (caddr_t)vmem_alloc(bp_map_arena,
131		    P2ROUNDUP(color + size, bp_align), flag);
132		if (kaddr == NULL)
133			return (NULL);
134		kaddr += color;
135	} else {
136		kaddr = vmem_xalloc(heap_arena, size, bp_align, color,
137		    0, NULL, NULL, flag);
138		if (kaddr == NULL)
139			return (NULL);
140	}
141
142	ASSERT(P2PHASE((uintptr_t)kaddr, bp_align) == color);
143
144	/*
145	 * Map bp into the virtual space we just allocated.
146	 */
147	if (bp->b_flags & B_PAGEIO) {
148		pp = bp->b_pages;
149		pplist = NULL;
150	} else if (bp->b_flags & B_SHADOW) {
151		pp = NULL;
152		pplist = bp->b_shadow;
153	} else {
154		pp = NULL;
155		pplist = NULL;
156		if (bp->b_proc == NULL || (as = bp->b_proc->p_as) == NULL)
157			as = &kas;
158	}
159
160	bp->b_flags |= B_REMAPPED;
161	bp->b_un.b_addr = kaddr + off;
162
163	while (npages-- != 0) {
164		if (pp) {
165			pfnum = pp->p_pagenum;
166			pp = pp->p_next;
167		} else if (pplist == NULL) {
168			pfnum = hat_getpfnum(as->a_hat,
169			    (caddr_t)((uintptr_t)addr & MMU_PAGEMASK));
170			if (pfnum == PFN_INVALID)
171				panic("bp_mapin_common: hat_getpfnum for"
172				    " addr %p failed\n", (void *)addr);
173			addr += PAGESIZE;
174		} else {
175			pfnum = (*pplist)->p_pagenum;
176			pplist++;
177		}
178
179		hat_devload(kas.a_hat, kaddr, PAGESIZE, pfnum,
180		    bp_devload_flags, HAT_LOAD_LOCK);
181
182		kaddr += PAGESIZE;
183	}
184	return (bp->b_un.b_addr);
185}
186
187/*
188 * Convert bp for pageio/physio to a kernel addressable location.
189 */
190void
191bp_mapin(struct buf *bp)
192{
193	(void) bp_mapin_common(bp, VM_SLEEP);
194}
195
196/*
197 * Release all the resources associated with a previous bp_mapin() call.
198 */
199void
200bp_mapout(struct buf *bp)
201{
202	caddr_t		addr;
203	uintptr_t	off;
204	uintptr_t	base;
205	uintptr_t	color;
206	size_t		size;
207	pgcnt_t		npages;
208	page_t		*pp;
209
210	if ((bp->b_flags & B_REMAPPED) == 0)
211		return;
212
213	addr = bp->b_un.b_addr;
214	off = (uintptr_t)addr & PAGEOFFSET;
215	size = P2ROUNDUP(bp->b_bcount + off, PAGESIZE);
216	npages = btop(size);
217
218	bp->b_un.b_addr = (caddr_t)off;		/* debugging aid */
219
220	if ((bp->b_flags & (B_SHADOW | B_PAGEIO)) && (npages == 1) &&
221	    kpm_enable && bp_mapin_kpm_enable) {
222		if (bp->b_flags & B_SHADOW)
223			pp = *bp->b_shadow;
224		else
225			pp = bp->b_pages;
226		addr = (caddr_t)((uintptr_t)addr & MMU_PAGEMASK);
227		hat_kpm_mapout(pp, NULL, addr);
228		bp->b_flags &= ~B_REMAPPED;
229		return;
230	}
231
232	base = (uintptr_t)addr & MMU_PAGEMASK;
233	BP_FLUSH(base, size);
234	hat_unload(kas.a_hat, (void *)base, size,
235	    HAT_UNLOAD_NOSYNC | HAT_UNLOAD_UNLOCK);
236	if (bp_map_arena != NULL) {
237		color = P2PHASE(base, bp_align);
238		vmem_free(bp_map_arena, (void *)(base - color),
239		    P2ROUNDUP(color + size, bp_align));
240	} else
241		vmem_free(heap_arena, (void *)base, size);
242	bp->b_flags &= ~B_REMAPPED;
243}
244
245/*
246 * copy data from a KVA into a buf_t which may not be mapped in. offset
247 * is relative to the buf_t only.
248 */
249int
250bp_copyout(void *driverbuf, struct buf *bp, offset_t offset, size_t size)
251{
252	return (bp_copy_common(BP_COPYOUT, bp, driverbuf, offset, size));
253}
254
255/*
256 * copy data from a buf_t which may not be mapped in, into a KVA.. offset
257 * is relative to the buf_t only.
258 */
259int
260bp_copyin(struct buf *bp, void *driverbuf, offset_t offset, size_t size)
261{
262	return (bp_copy_common(BP_COPYIN, bp, driverbuf, offset, size));
263}
264
265
266#define	BP_COPY(dir, driverbuf, baddr, sz)	\
267	(dir == BP_COPYIN) ? \
268	bcopy(baddr, driverbuf, sz) :  bcopy(driverbuf, baddr, sz)
269
270static int
271bp_copy_common(bp_copydir_t dir, struct buf *bp, void *driverbuf,
272    offset_t offset, size_t size)
273{
274	page_t **pplist;
275	uintptr_t poff;
276	uintptr_t voff;
277	struct as *as;
278	caddr_t kaddr;
279	caddr_t addr;
280	page_t *page;
281	size_t psize;
282	page_t *pp;
283	pfn_t pfn;
284
285	ASSERT((offset + size) <= bp->b_bcount);
286	as = NULL;
287
288	/* if the buf_t already has a KVA, just do a bcopy */
289	if (!(bp->b_flags & (B_PHYS | B_PAGEIO))) {
290		BP_COPY(dir, driverbuf, bp->b_un.b_addr + offset, size);
291		return (0);
292	}
293
294	/* if we don't have kpm enabled, we need to do the slow path */
295	if (!kpm_enable || bp_force_copy) {
296		bp_mapin(bp);
297		BP_COPY(dir, driverbuf, bp->b_un.b_addr + offset, size);
298		bp_mapout(bp);
299		return (0);
300	}
301
302	/*
303	 * kpm is enabled, and we need to map in the buf_t for the copy
304	 */
305
306	/* setup pp, plist, and make sure 'as' is right */
307	if (bp->b_flags & B_PAGEIO) {
308		pp = bp->b_pages;
309		pplist = NULL;
310	} else if (bp->b_flags & B_SHADOW) {
311		pp = NULL;
312		pplist = bp->b_shadow;
313	} else {
314		pp = NULL;
315		pplist = NULL;
316		if (bp->b_proc == NULL || (as = bp->b_proc->p_as) == NULL) {
317			as = &kas;
318		}
319	}
320
321	/*
322	 * locals for the address, the offset into the first page, and the
323	 * size of the first page we are going to copy.
324	 */
325	addr = (caddr_t)bp->b_un.b_addr;
326	poff = (uintptr_t)addr & PAGEOFFSET;
327	psize = MIN(PAGESIZE - poff, size);
328
329	/*
330	 * we always start with a 0 offset into the driverbuf provided. The
331	 * offset passed in only applies to the buf_t.
332	 */
333	voff = 0;
334
335	/* Loop until we've copied al the data */
336	while (size > 0) {
337
338		/*
339		 * for a pp or pplist, get the pfn, then go to the next page_t
340		 * for the next time around the loop.
341		 */
342		if (pp) {
343			page = pp;
344			pp = pp->p_next;
345		} else if (pplist != NULL) {
346			page = (*pplist);
347			pplist++;
348
349		/*
350		 * We have a user VA. If we are going to copy this page, (e.g.
351		 * the offset into the buf_t where we start to copy is
352		 * within this page), get the pfn. Don't waste the cycles
353		 * getting the pfn if we're not copying this page.
354		 */
355		} else if (offset < psize) {
356			pfn = hat_getpfnum(as->a_hat,
357			    (caddr_t)((uintptr_t)addr & PAGEMASK));
358			if (pfn == PFN_INVALID) {
359				return (-1);
360			}
361			page = page_numtopp_nolock(pfn);
362			addr += psize - offset;
363		} else {
364			addr += psize;
365		}
366
367		/*
368		 * if we have an initial offset into the buf_t passed in,
369		 * and it falls within the current page, account for it in
370		 * the page size (how much we will copy) and the offset into the
371		 * page (where we'll start copying from).
372		 */
373		if ((offset > 0) && (offset < psize)) {
374			psize -= offset;
375			poff += offset;
376			offset = 0;
377
378		/*
379		 * if we have an initial offset into the buf_t passed in,
380		 * and it's not within the current page, skip this page.
381		 * We don't have to worry about the first page offset and size
382		 * anymore. psize will normally be PAGESIZE now unless we are
383		 * on the last page.
384		 */
385		} else if (offset >= psize) {
386			offset -= psize;
387			psize = MIN(PAGESIZE, size);
388			poff = 0;
389			continue;
390		}
391
392		/*
393		 * get a kpm mapping to the page, them copy in/out of the
394		 * page. update size left and offset into the driverbuf passed
395		 * in for the next time around the loop.
396		 */
397		kaddr = hat_kpm_mapin(page, NULL) + poff;
398		BP_COPY(dir, (void *)((uintptr_t)driverbuf + voff), kaddr,
399		    psize);
400		hat_kpm_mapout(page, NULL, kaddr - poff);
401
402		size -= psize;
403		voff += psize;
404
405		poff = 0;
406		psize = MIN(PAGESIZE, size);
407	}
408
409	return (0);
410}
411