xref: /illumos-gate/usr/src/uts/sun4u/io/pci/pci_reloc.c (revision f47a9c508408507a404eaf38dd597e6ac41f92e6)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * PCI nexus DVMA relocation routines.
31  *
32  * These routines handle the interactions with the HAT layer to
33  * implement page relocation for page(s) which have active DMA handle
34  * bindings when DVMA is being used for those handles.
35  *
36  * The current modus operandi is as follows:
37  *
38  *   Object binding: register the appropriate callback for each page
39  *     of the kernel object while obtaining the PFN for the DVMA page.
40  *
41  *   Object unbinding: unregister the callback for each page of the
42  *     kernel object.
43  *
44  *   Relocation request:
45  *     1) Suspend the bus and sync the caches.
46  *     2) Remap the DVMA object using the new provided PFN.
47  *     3) Unsuspend the bus.
48  *
49  *  The relocation code runs with CPUs captured (idling in xc_loop())
50  *  so we can only acquire spinlocks at PIL >= 13 for synchronization
51  *  within those codepaths.
52  */
53 #include <sys/types.h>
54 #include <sys/kmem.h>
55 #include <sys/async.h>
56 #include <sys/sysmacros.h>
57 #include <sys/sunddi.h>
58 #include <sys/machsystm.h>
59 #include <sys/ddi_impldefs.h>
60 #include <sys/dvma.h>
61 #include <vm/hat.h>
62 #include <sys/pci/pci_obj.h>
63 
64 /*LINTLIBRARY*/
65 
66 void
67 pci_dvma_unregister_callbacks(pci_t *pci_p, ddi_dma_impl_t *mp)
68 {
69 	ddi_dma_obj_t *dobj_p = &mp->dmai_object;
70 	struct as *as_p = dobj_p->dmao_obj.virt_obj.v_as;
71 	page_t **pplist = dobj_p->dmao_obj.virt_obj.v_priv;
72 	caddr_t vaddr = dobj_p->dmao_obj.virt_obj.v_addr;
73 	struct hat *hat_p;
74 	uint32_t offset;
75 	int i;
76 
77 	if (!PCI_DMA_CANRELOC(mp))
78 		return;
79 
80 	hat_p = (as_p == NULL)? kas.a_hat : as_p->a_hat;
81 	ASSERT(hat_p == kas.a_hat);
82 	ASSERT(pplist == NULL);
83 
84 	offset = mp->dmai_roffset;
85 	hat_delete_callback(vaddr, IOMMU_PAGE_SIZE - offset, mp, HAC_PAGELOCK);
86 	vaddr = (caddr_t)(((uintptr_t)vaddr + IOMMU_PAGE_SIZE) &
87 	    IOMMU_PAGE_MASK);
88 	for (i = 1; i < mp->dmai_ndvmapages; i++) {
89 		hat_delete_callback(vaddr, IOMMU_PAGE_SIZE, mp, HAC_PAGELOCK);
90 		vaddr += IOMMU_PAGE_SIZE;
91 	}
92 	mp->dmai_flags &= ~DMAI_FLAGS_RELOC;
93 }
94 
95 static int
96 pci_dvma_postrelocator(caddr_t va, uint_t len, uint_t flags, void *mpvoid,
97 	pfn_t newpfn)
98 {
99 	ddi_dma_impl_t *mp = (ddi_dma_impl_t *)mpvoid;
100 	dev_info_t *rdip = mp->dmai_rdip;
101 	ddi_dma_obj_t *dobj_p = &mp->dmai_object;
102 	page_t **pplist = dobj_p->dmao_obj.virt_obj.v_priv;
103 	caddr_t baseva = dobj_p->dmao_obj.virt_obj.v_addr;
104 	int index;
105 	size_t length = IOMMU_PTOB(1);
106 	off_t offset;
107 
108 	DEBUG0(DBG_RELOC, rdip, "postrelocator called\n");
109 
110 	if (flags == HAT_POSTUNSUSPEND) {
111 		mutex_enter(&pci_reloc_mutex);
112 		ASSERT(pci_reloc_thread == curthread);
113 		ASSERT(pci_reloc_presuspend > 0);
114 		if (--pci_reloc_presuspend == 0) {
115 			pci_reloc_thread = NULL;
116 			cv_broadcast(&pci_reloc_cv);
117 		}
118 		mutex_exit(&pci_reloc_mutex);
119 		return (0);
120 	}
121 
122 	ASSERT(flags == HAT_UNSUSPEND);
123 	ASSERT(pci_reloc_suspend > 0);
124 	pci_reloc_suspend--;
125 
126 	ASSERT(len <= length);
127 	ASSERT(pplist == NULL);	/* addr bind handle only */
128 	ASSERT(dobj_p->dmao_obj.virt_obj.v_as == &kas ||
129 	    dobj_p->dmao_obj.virt_obj.v_as == NULL);
130 	ASSERT(PCI_DMA_ISDVMA(mp));
131 	ASSERT(pci_reloc_thread == curthread);
132 
133 	offset = va - baseva;
134 	index = IOMMU_BTOPR(offset);
135 	ASSERT(index < mp->dmai_ndvmapages);
136 
137 	DEBUG3(DBG_RELOC, rdip, "index 0x%x, vaddr 0x%llx, baseva 0x%llx\n",
138 	    index, (int64_t)va, (int64_t)baseva);
139 
140 	if ((mp)->dmai_ndvmapages == 1) {
141 		DEBUG2(DBG_RELOC, rdip, "pfn remap (1) 0x%x -> 0x%x\n",
142 		    mp->dmai_pfnlst, newpfn);
143 		    mp->dmai_pfnlst = (void *)newpfn;
144 	} else {
145 		DEBUG3(DBG_RELOC, rdip, "pfn remap (%d) 0x%x -> 0x%x\n",
146 		    index, ((iopfn_t *)mp->dmai_pfnlst)[index], newpfn);
147 		((iopfn_t *)mp->dmai_pfnlst)[index] = (iopfn_t)newpfn;
148 	}
149 
150 	if (ddi_dma_mctl(rdip, rdip, (ddi_dma_handle_t)mp, DDI_DMA_REMAP,
151 	    &offset, &length, NULL, 0) != DDI_SUCCESS)
152 		return (EIO);
153 	if (ddi_ctlops(rdip, rdip, DDI_CTLOPS_UNQUIESCE, NULL, NULL) !=
154 	    DDI_SUCCESS)
155 		return (EIO);
156 
157 	return (0);
158 }
159 
160 /*
161  * Log a warning message if a callback is still registered on
162  * a page which is being freed.  This is indicative of a driver
163  * bug -- DMA handles are bound, and the memory is being freed by
164  * the VM subsystem without an unbind call on the handle first.
165  */
166 static int
167 pci_dma_relocerr(caddr_t va, uint_t len, uint_t errorcode, void *mpvoid)
168 {
169 	int errlevel = pci_dma_panic_on_leak? CE_PANIC : CE_WARN;
170 	if (errorcode == HAT_CB_ERR_LEAKED) {
171 		cmn_err(errlevel, "object 0x%p has a bound DMA handle 0x%p\n",
172 			va, mpvoid);
173 		return (0);
174 	}
175 
176 	/* unknown error code, unhandled so panic */
177 	return (EINVAL);
178 }
179 
180 /*
181  * pci DVMA remap entry points
182  *
183  * Called in response to a DDI_DMA_REMAP DMA ctlops command.
184  * Remaps the region specified in the underlying IOMMU. Safe
185  * to assume that the bus was quiesced and ddi_dma_sync() was
186  * invoked by the caller before we got to this point.
187  */
188 int
189 pci_dvma_remap(dev_info_t *dip, dev_info_t *rdip, ddi_dma_impl_t *mp,
190 	off_t offset, size_t length)
191 {
192 	pci_t *pci_p = get_pci_soft_state(ddi_get_instance(dip));
193 	iommu_t *iommu_p = pci_p->pci_iommu_p;
194 	dvma_addr_t dvma_pg;
195 	size_t npgs;
196 	int idx;
197 
198 	dvma_pg = IOMMU_BTOP(mp->dmai_mapping);
199 	idx = IOMMU_BTOPR(offset);
200 	dvma_pg += idx;
201 	npgs = IOMMU_BTOPR(length);
202 
203 	DEBUG3(DBG_RELOC, mp->dmai_rdip,
204 	    "pci_dvma_remap: dvma_pg 0x%llx len 0x%llx idx 0x%x\n",
205 	    dvma_pg, length, idx);
206 
207 	ASSERT(pci_p->pci_pbm_p->pbm_quiesce_count > 0);
208 	iommu_remap_pages(iommu_p, mp, dvma_pg, npgs, idx);
209 
210 	return (DDI_SUCCESS);
211 }
212 
213 void
214 pci_fdvma_remap(ddi_dma_impl_t *mp, caddr_t kvaddr, dvma_addr_t dvma_pg,
215 	size_t npages, size_t index, pfn_t newpfn)
216 {
217 	fdvma_t *fdvma_p = (fdvma_t *)mp->dmai_fdvma;
218 	pci_t *pci_p = (pci_t *)fdvma_p->softsp;
219 	iommu_t *iommu_p = pci_p->pci_iommu_p;
220 	dev_info_t *dip = pci_p->pci_dip;
221 	iopfn_t pfn = (iopfn_t)newpfn;
222 	dvma_addr_t pg_index = dvma_pg - iommu_p->dvma_base_pg;
223 	int i;
224 	uint64_t tte;
225 
226 	/* make sure we don't exceed reserved boundary */
227 	DEBUG3(DBG_FAST_DVMA, dip, "fast remap index=%x: %p, npgs=%x", index,
228 	    kvaddr, npages);
229 	if (index + npages > mp->dmai_ndvmapages) {
230 		cmn_err(pci_panic_on_fatal_errors ? CE_PANIC : CE_WARN,
231 			"%s%d: fdvma remap index(%lx)+pgs(%lx) exceeds limit\n",
232 			ddi_driver_name(dip), ddi_get_instance(dip),
233 			index, npages);
234 		return;
235 	}
236 
237 	for (i = 0; i < npages; i++, kvaddr += IOMMU_PAGE_SIZE) {
238 		DEBUG3(DBG_FAST_DVMA, dip, "remap dvma_pg %x -> pfn %x,"
239 		    " old tte 0x%llx\n", dvma_pg + i, pfn,
240 		    iommu_p->iommu_tsb_vaddr[pg_index + i]);
241 
242 		if (pfn == PFN_INVALID)
243 			goto bad_pfn;
244 
245 		if (i == 0)
246 			tte = MAKE_TTE_TEMPLATE(pfn, mp);
247 
248 		/* XXX assumes iommu and mmu has same page size */
249 		iommu_p->iommu_tsb_vaddr[pg_index + i] = tte | IOMMU_PTOB(pfn);
250 		IOMMU_PAGE_FLUSH(iommu_p, (dvma_pg + i));
251 	}
252 	return;
253 bad_pfn:
254 	cmn_err(CE_WARN, "%s%d: fdvma remap can't get page frame for vaddr %p",
255 		ddi_driver_name(dip), ddi_get_instance(dip), kvaddr);
256 }
257 
258 static int
259 pci_fdvma_prerelocator(caddr_t va, uint_t len, uint_t flags, void *mpvoid)
260 {
261 	ddi_dma_impl_t *mp = (ddi_dma_impl_t *)mpvoid;
262 	fdvma_t *fdvma_p = (fdvma_t *)mp->dmai_fdvma;
263 	caddr_t baseva, endva;
264 	int i;
265 
266 	/*
267 	 * It isn't safe to do relocation if all of the IOMMU
268 	 * mappings haven't yet been established at this index.
269 	 */
270 	for (i = 0; i < mp->dmai_ndvmapages; i++) {
271 		baseva = fdvma_p->kvbase[i];
272 		endva = baseva + IOMMU_PTOB(fdvma_p->pagecnt[i]);
273 		if (va >= baseva && va < endva)
274 			return (0);	/* found a valid index */
275 	}
276 	return (EAGAIN);
277 }
278 
279 static int
280 pci_fdvma_postrelocator(caddr_t va, uint_t len, uint_t flags, void *mpvoid,
281 	pfn_t pfn)
282 {
283 	ddi_dma_impl_t *mp = (ddi_dma_impl_t *)mpvoid;
284 	dev_info_t *rdip = mp->dmai_rdip;
285 	fdvma_t *fdvma_p = (fdvma_t *)mp->dmai_fdvma;
286 	caddr_t baseva;
287 	dvma_addr_t dvma_pg;
288 	size_t length = PAGESIZE;
289 	int i;
290 
291 	DEBUG0(DBG_RELOC, rdip, "fdvma postrelocator called\n");
292 
293 	if (flags == HAT_POSTUNSUSPEND) {
294 		mutex_enter(&pci_reloc_mutex);
295 		ASSERT(pci_reloc_thread == curthread);
296 		if (--pci_reloc_presuspend == 0) {
297 			pci_reloc_thread = NULL;
298 			cv_broadcast(&pci_reloc_cv);
299 		}
300 		mutex_exit(&pci_reloc_mutex);
301 		return (0);
302 	}
303 
304 	pci_reloc_suspend--;
305 
306 	ASSERT(flags == HAT_UNSUSPEND);
307 	ASSERT(len <= length);
308 	ASSERT((mp->dmai_rflags & DMP_BYPASSNEXUS) != 0);
309 
310 	/*
311 	 * This virtual page can have multiple cookies that refer
312 	 * to it within the same handle. We must walk the whole
313 	 * table for this DMA handle finding all the cookies, and
314 	 * update all of them. Sigh.
315 	 */
316 	for (i = 0; i < mp->dmai_ndvmapages; i++) {
317 		caddr_t endva;
318 		int index;
319 
320 		baseva = fdvma_p->kvbase[i];
321 		endva = baseva + IOMMU_PTOB(fdvma_p->pagecnt[i]);
322 
323 		if (va >= baseva && va < endva) {
324 			index = i + IOMMU_BTOP(va - baseva);
325 			ASSERT(index < mp->dmai_ndvmapages);
326 
327 			DEBUG4(DBG_RELOC, rdip, "mp %p: index 0x%x, "
328 			    " vaddr 0x%llx, baseva 0x%llx\n", mp, index,
329 			    (int64_t)va, (int64_t)baseva);
330 
331 			dvma_pg = IOMMU_BTOP(mp->dmai_mapping) + index;
332 			pci_fdvma_remap(mp, va, dvma_pg, IOMMU_BTOP(length),
333 			    index, pfn);
334 		}
335 	}
336 
337 	if (ddi_ctlops(rdip, rdip, DDI_CTLOPS_UNQUIESCE, NULL, NULL) !=
338 	    DDI_SUCCESS)
339 		return (EIO);
340 
341 	return (0);
342 }
343 
344 void
345 pci_fdvma_unregister_callbacks(pci_t *pci_p, fdvma_t *fdvma_p,
346 	ddi_dma_impl_t *mp, uint_t index)
347 {
348 	size_t npgs = fdvma_p->pagecnt[index];
349 	caddr_t kva = fdvma_p->kvbase[index];
350 	int i;
351 
352 	ASSERT(index + npgs <= mp->dmai_ndvmapages);
353 	ASSERT(kva != NULL);
354 
355 	for (i = 0; i < npgs && pci_dvma_remap_enabled;
356 	    i++, kva += IOMMU_PAGE_SIZE)
357 		hat_delete_callback(kva, IOMMU_PAGE_SIZE, mp, HAC_PAGELOCK);
358 }
359 
360 static int
361 pci_common_prerelocator(caddr_t va, uint_t len, uint_t flags, void *mpvoid)
362 {
363 	ddi_dma_impl_t *mp = (ddi_dma_impl_t *)mpvoid;
364 	ddi_dma_handle_t h = (ddi_dma_handle_t)mpvoid;
365 	dev_info_t *rdip = mp->dmai_rdip;
366 	int ret;
367 
368 	DEBUG0(DBG_RELOC, rdip, "prerelocator called\n");
369 
370 	if (flags == HAT_PRESUSPEND) {
371 		if (!ddi_prop_exists(DDI_DEV_T_ANY, rdip, DDI_PROP_NOTPROM,
372 		    "dvma-remap-supported"))
373 			return (ENOTSUP);
374 		if (!PCI_DMA_ISMAPPED(mp))
375 			return (EAGAIN);
376 
377 		if (mp->dmai_rflags & DMP_BYPASSNEXUS) {
378 			ret = pci_fdvma_prerelocator(va, len, flags, mpvoid);
379 			if (ret != 0)
380 				return (ret);
381 		} else if (!PCI_DMA_ISDVMA(mp))
382 			return (EINVAL);
383 
384 		/*
385 		 * Acquire the exclusive right to relocate a PCI DMA page,
386 		 * since we later have to pause CPUs which could otherwise
387 		 * lead to all sorts of synchronization headaches.
388 		 */
389 		mutex_enter(&pci_reloc_mutex);
390 		if (pci_reloc_thread != curthread) {
391 			while (pci_reloc_thread != NULL) {
392 				cv_wait(&pci_reloc_cv, &pci_reloc_mutex);
393 			}
394 			pci_reloc_thread = curthread;
395 			ASSERT(pci_reloc_suspend == 0);
396 		}
397 		mutex_exit(&pci_reloc_mutex);
398 
399 		ASSERT(pci_reloc_thread == curthread);
400 		pci_reloc_presuspend++;
401 
402 		return (0);
403 	}
404 
405 	ASSERT(flags == HAT_SUSPEND);
406 	ASSERT(PCI_DMA_CANRELOC(mp));
407 	ASSERT(pci_reloc_thread == curthread);
408 	pci_reloc_suspend++;
409 
410 	if (ddi_ctlops(rdip, rdip, DDI_CTLOPS_QUIESCE, NULL, NULL) !=
411 	    DDI_SUCCESS)
412 		return (EIO);
413 	if (ddi_dma_sync(h, 0, 0, DDI_DMA_SYNC_FORKERNEL) != DDI_SUCCESS)
414 		return (EIO);
415 
416 	return (0);
417 }
418 
419 /*
420  * Register two callback types: one for normal DVMA and the
421  * other for fast DVMA, since each method has a different way
422  * of tracking the PFNs behind a handle.
423  */
424 void
425 pci_reloc_init(void)
426 {
427 	mutex_init(&pci_reloc_mutex, NULL, MUTEX_DEFAULT, NULL);
428 	cv_init(&pci_reloc_cv, NULL, CV_DEFAULT, NULL);
429 	pci_dvma_cbid = hat_register_callback('D'<<24 | 'V'<<16 | 'M'<<8 | 'A',
430 		pci_common_prerelocator, pci_dvma_postrelocator,
431 		pci_dma_relocerr, 1);
432 	pci_fast_dvma_cbid = hat_register_callback(
433 		'F'<<24 | 'D'<<16 | 'M'<<8 | 'A', pci_common_prerelocator,
434 		pci_fdvma_postrelocator, pci_dma_relocerr, 1);
435 }
436 
437 void
438 pci_reloc_fini(void)
439 {
440 	cv_destroy(&pci_reloc_cv);
441 	mutex_destroy(&pci_reloc_mutex);
442 }
443