xref: /illumos-gate/usr/src/uts/i86xpv/os/xen_mmu.c (revision 349b53dd)
1843e1988Sjohnlev /*
2843e1988Sjohnlev  * CDDL HEADER START
3843e1988Sjohnlev  *
4843e1988Sjohnlev  * The contents of this file are subject to the terms of the
5843e1988Sjohnlev  * Common Development and Distribution License (the "License").
6843e1988Sjohnlev  * You may not use this file except in compliance with the License.
7843e1988Sjohnlev  *
8843e1988Sjohnlev  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9843e1988Sjohnlev  * or http://www.opensolaris.org/os/licensing.
10843e1988Sjohnlev  * See the License for the specific language governing permissions
11843e1988Sjohnlev  * and limitations under the License.
12843e1988Sjohnlev  *
13843e1988Sjohnlev  * When distributing Covered Code, include this CDDL HEADER in each
14843e1988Sjohnlev  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15843e1988Sjohnlev  * If applicable, add the following below this CDDL HEADER, with the
16843e1988Sjohnlev  * fields enclosed by brackets "[]" replaced with your own identifying
17843e1988Sjohnlev  * information: Portions Copyright [yyyy] [name of copyright owner]
18843e1988Sjohnlev  *
19843e1988Sjohnlev  * CDDL HEADER END
20843e1988Sjohnlev  */
21843e1988Sjohnlev 
22843e1988Sjohnlev /*
23*349b53ddSStuart Maybee  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24843e1988Sjohnlev  * Use is subject to license terms.
25843e1988Sjohnlev  */
26843e1988Sjohnlev 
27843e1988Sjohnlev 
28843e1988Sjohnlev #include <sys/mach_mmu.h>
29843e1988Sjohnlev #include <sys/machsystm.h>
30843e1988Sjohnlev #include <sys/cmn_err.h>
31843e1988Sjohnlev #include <sys/promif.h>
32843e1988Sjohnlev #include <sys/hypervisor.h>
33843e1988Sjohnlev #include <sys/bootconf.h>
34843e1988Sjohnlev #include <sys/ontrap.h>
35843e1988Sjohnlev #include <sys/rwlock.h>
36843e1988Sjohnlev #include <sys/sysmacros.h>
37843e1988Sjohnlev #include <vm/seg_kmem.h>
38843e1988Sjohnlev #include <vm/kboot_mmu.h>
39843e1988Sjohnlev #include <vm/hat_pte.h>
40843e1988Sjohnlev #include <vm/hat.h>
41843e1988Sjohnlev #include <vm/htable.h>
42843e1988Sjohnlev #include <vm/hat_i86.h>
43843e1988Sjohnlev 
44843e1988Sjohnlev start_info_t *xen_info;
45843e1988Sjohnlev ulong_t mfn_count;
46843e1988Sjohnlev mfn_t *mfn_list;
47843e1988Sjohnlev mfn_t *mfn_list_pages;		/* pages that make a table of mfn's */
48843e1988Sjohnlev 				/* that make up the pa_to_ma table */
49843e1988Sjohnlev mfn_t *mfn_list_pages_page;	/* page of mfn's for mfn_list_pages */
50843e1988Sjohnlev mfn_t cached_max_mfn;
51843e1988Sjohnlev uintptr_t xen_virt_start;
52843e1988Sjohnlev pfn_t *mfn_to_pfn_mapping;
53843e1988Sjohnlev caddr_t xb_addr;		/* virtual addr for the store_mfn page */
54843e1988Sjohnlev 
55843e1988Sjohnlev 
56843e1988Sjohnlev /*
57551bc2a6Smrj  * We need to prevent migration or suspension of a domU while it's
58551bc2a6Smrj  * manipulating MFN values, as the MFN values will spontaneously
59551bc2a6Smrj  * change. The next 4 routines provide a mechanism for that.
60551bc2a6Smrj  * The basic idea is to use reader/writer mutex, readers are any thread
61551bc2a6Smrj  * that is manipulating MFNs. Only the thread which is going to actually call
62551bc2a6Smrj  * HYPERVISOR_suspend() will become a writer.
63843e1988Sjohnlev  *
64551bc2a6Smrj  * Since various places need to manipulate MFNs and also call the HAT,
65551bc2a6Smrj  * we track if a thread acquires reader status and allow it to recursively
66551bc2a6Smrj  * do so again. This prevents deadlocks if a migration request
67551bc2a6Smrj  * is started and waits for some reader, but then the previous reader needs
68551bc2a6Smrj  * to call into the HAT.
69843e1988Sjohnlev  */
70843e1988Sjohnlev #define	NUM_M2P_LOCKS 128
71843e1988Sjohnlev static struct {
72843e1988Sjohnlev 	krwlock_t m2p_rwlock;
73843e1988Sjohnlev 	char m2p_pad[64 - sizeof (krwlock_t)];	/* 64 byte cache line size */
74843e1988Sjohnlev } m2p_lock[NUM_M2P_LOCKS];
75843e1988Sjohnlev 
76843e1988Sjohnlev #define	XM2P_HASH	((uintptr_t)curthread->t_tid & (NUM_M2P_LOCKS - 1))
77843e1988Sjohnlev 
78843e1988Sjohnlev void
xen_block_migrate(void)79843e1988Sjohnlev xen_block_migrate(void)
80843e1988Sjohnlev {
81843e1988Sjohnlev 	if (!DOMAIN_IS_INITDOMAIN(xen_info) &&
82551bc2a6Smrj 	    ++curthread->t_xpvcntr == 1)
83843e1988Sjohnlev 		rw_enter(&m2p_lock[XM2P_HASH].m2p_rwlock, RW_READER);
84843e1988Sjohnlev }
85843e1988Sjohnlev 
86843e1988Sjohnlev void
xen_allow_migrate(void)87843e1988Sjohnlev xen_allow_migrate(void)
88843e1988Sjohnlev {
89843e1988Sjohnlev 	if (!DOMAIN_IS_INITDOMAIN(xen_info) &&
90551bc2a6Smrj 	    --curthread->t_xpvcntr == 0)
91843e1988Sjohnlev 		rw_exit(&m2p_lock[XM2P_HASH].m2p_rwlock);
92843e1988Sjohnlev }
93843e1988Sjohnlev 
94843e1988Sjohnlev void
xen_start_migrate(void)95843e1988Sjohnlev xen_start_migrate(void)
96843e1988Sjohnlev {
97843e1988Sjohnlev 	int i;
98843e1988Sjohnlev 
99551bc2a6Smrj 	ASSERT(curthread->t_xpvcntr == 0);
100551bc2a6Smrj 	++curthread->t_xpvcntr; /* this allows calls into HAT */
101843e1988Sjohnlev 	for (i = 0; i < NUM_M2P_LOCKS; ++i)
102843e1988Sjohnlev 		rw_enter(&m2p_lock[i].m2p_rwlock, RW_WRITER);
103843e1988Sjohnlev }
104843e1988Sjohnlev 
105843e1988Sjohnlev void
xen_end_migrate(void)106843e1988Sjohnlev xen_end_migrate(void)
107843e1988Sjohnlev {
108843e1988Sjohnlev 	int i;
109843e1988Sjohnlev 
110843e1988Sjohnlev 	for (i = 0; i < NUM_M2P_LOCKS; ++i)
111843e1988Sjohnlev 		rw_exit(&m2p_lock[i].m2p_rwlock);
112551bc2a6Smrj 	ASSERT(curthread->t_xpvcntr == 1);
113551bc2a6Smrj 	--curthread->t_xpvcntr;
114843e1988Sjohnlev }
115843e1988Sjohnlev 
116843e1988Sjohnlev /*ARGSUSED*/
117843e1988Sjohnlev void
set_pteval(paddr_t table,uint_t index,uint_t level,x86pte_t pteval)118843e1988Sjohnlev set_pteval(paddr_t table, uint_t index, uint_t level, x86pte_t pteval)
119843e1988Sjohnlev {
120843e1988Sjohnlev 	mmu_update_t t;
121843e1988Sjohnlev 	maddr_t mtable = pa_to_ma(table);
122843e1988Sjohnlev 	int retcnt;
123843e1988Sjohnlev 
124843e1988Sjohnlev 	t.ptr = (mtable + index * pte_size) | MMU_NORMAL_PT_UPDATE;
125843e1988Sjohnlev 	t.val = pteval;
126843e1988Sjohnlev 	if (HYPERVISOR_mmu_update(&t, 1, &retcnt, DOMID_SELF) || retcnt != 1)
127843e1988Sjohnlev 		bop_panic("HYPERVISOR_mmu_update() failed");
128843e1988Sjohnlev }
129843e1988Sjohnlev 
130843e1988Sjohnlev /*
131843e1988Sjohnlev  * The start_info_t and mfn_list are initially mapped in low "boot" memory.
132843e1988Sjohnlev  * Each has a page aligned address and size. We relocate them up into the
133843e1988Sjohnlev  * kernel's normal address space at this point in time. We also create
134843e1988Sjohnlev  * the arrays that let the hypervisor suspend/resume a domain.
135843e1988Sjohnlev  */
136843e1988Sjohnlev void
xen_relocate_start_info(void)137843e1988Sjohnlev xen_relocate_start_info(void)
138843e1988Sjohnlev {
139843e1988Sjohnlev 	maddr_t mach_addr;
140843e1988Sjohnlev 	size_t sz;
141843e1988Sjohnlev 	size_t sz2;
142843e1988Sjohnlev 	offset_t off;
143843e1988Sjohnlev 	uintptr_t addr;
144843e1988Sjohnlev 	uintptr_t old;
145843e1988Sjohnlev 	int i, j;
146843e1988Sjohnlev 
147843e1988Sjohnlev 	/*
148843e1988Sjohnlev 	 * In dom0, we have to account for the console_info structure
149843e1988Sjohnlev 	 * which might immediately follow the start_info in memory.
150843e1988Sjohnlev 	 */
151843e1988Sjohnlev 	sz = sizeof (start_info_t);
152843e1988Sjohnlev 	if (DOMAIN_IS_INITDOMAIN(xen_info) &&
153843e1988Sjohnlev 	    xen_info->console.dom0.info_off >= sizeof (start_info_t)) {
154843e1988Sjohnlev 		sz += xen_info->console.dom0.info_off - sizeof (start_info_t) +
155843e1988Sjohnlev 		    xen_info->console.dom0.info_size;
156843e1988Sjohnlev 	}
157843e1988Sjohnlev 	sz = P2ROUNDUP(sz, MMU_PAGESIZE);
158843e1988Sjohnlev 	addr = (uintptr_t)vmem_alloc(heap_arena, sz, VM_SLEEP);
159843e1988Sjohnlev 	for (off = 0; off < sz; off += MMU_PAGESIZE) {
160843e1988Sjohnlev 		mach_addr = pa_to_ma(pfn_to_pa(va_to_pfn(
161843e1988Sjohnlev 		    (caddr_t)xen_info + off)));
162843e1988Sjohnlev 		kbm_map_ma(mach_addr + off, addr + off, 0);
163843e1988Sjohnlev 	}
164843e1988Sjohnlev 	boot_mapin((caddr_t)addr, sz);
165843e1988Sjohnlev 	old = (uintptr_t)xen_info;
166843e1988Sjohnlev 	xen_info = (start_info_t *)addr;
167843e1988Sjohnlev 	for (off = 0; off < sz; off += MMU_PAGESIZE)
168843e1988Sjohnlev 		kbm_unmap(old + off);
169843e1988Sjohnlev 
170843e1988Sjohnlev 	/*
171843e1988Sjohnlev 	 * Relocate the mfn_list, any number of pages.
172843e1988Sjohnlev 	 */
173843e1988Sjohnlev 	sz = P2ROUNDUP(mfn_count * sizeof (mfn_t), MMU_PAGESIZE);
174843e1988Sjohnlev 	addr = (uintptr_t)vmem_xalloc(heap_arena, sz, MMU_PAGESIZE, 0,
175843e1988Sjohnlev 	    0, 0, 0, VM_SLEEP);
176843e1988Sjohnlev 	for (off = 0; off < sz; off += MMU_PAGESIZE) {
177843e1988Sjohnlev 		mach_addr =
178843e1988Sjohnlev 		    pa_to_ma(pfn_to_pa(va_to_pfn((caddr_t)mfn_list + off)));
179843e1988Sjohnlev 		kbm_map_ma(mach_addr, addr + off, 0);
180843e1988Sjohnlev 	}
181843e1988Sjohnlev 	boot_mapin((caddr_t)addr, sz);
182843e1988Sjohnlev 	old = (uintptr_t)mfn_list;
183843e1988Sjohnlev 	mfn_list = (mfn_t *)addr;
184843e1988Sjohnlev 	xen_info->mfn_list = (mfn_t)addr;
185843e1988Sjohnlev 	for (off = 0; off < sz; off += MMU_PAGESIZE)
186843e1988Sjohnlev 		kbm_unmap(old + off);
187843e1988Sjohnlev 
188843e1988Sjohnlev 	/*
189843e1988Sjohnlev 	 * Create the lists of mfn_list pages needed by suspend/resume.
190843e1988Sjohnlev 	 * Note we skip this for domain 0 as it can't suspend/resume.
191843e1988Sjohnlev 	 */
192843e1988Sjohnlev 	if (!DOMAIN_IS_INITDOMAIN(xen_info)) {
193843e1988Sjohnlev 		sz2 = P2ROUNDUP(mmu_btop(sz) * sizeof (mfn_t), MMU_PAGESIZE);
194843e1988Sjohnlev 		mfn_list_pages = kmem_zalloc(sz2, VM_SLEEP);
195843e1988Sjohnlev 		mfn_list_pages_page = kmem_zalloc(MMU_PAGESIZE, VM_SLEEP);
196843e1988Sjohnlev 		i = 0;
197843e1988Sjohnlev 		for (off = 0; off < sz; off += MMU_PAGESIZE) {
198843e1988Sjohnlev 			j = mmu_btop(off);
199843e1988Sjohnlev 			if (((j * sizeof (mfn_t)) & MMU_PAGEOFFSET) == 0) {
200843e1988Sjohnlev 				mfn_list_pages_page[i++] =
201843e1988Sjohnlev 				    pfn_to_mfn(va_to_pfn(&mfn_list_pages[j]));
202843e1988Sjohnlev 			}
203843e1988Sjohnlev 			mfn_list_pages[j] =
204843e1988Sjohnlev 			    pfn_to_mfn(va_to_pfn((caddr_t)mfn_list + off));
205843e1988Sjohnlev 		}
206843e1988Sjohnlev 		HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list_list =
207843e1988Sjohnlev 		    pfn_to_mfn(va_to_pfn(mfn_list_pages_page));
208843e1988Sjohnlev 		HYPERVISOR_shared_info->arch.max_pfn = xen_info->nr_pages;
209843e1988Sjohnlev 	}
210843e1988Sjohnlev 
211843e1988Sjohnlev 	/*
212843e1988Sjohnlev 	 * Remap the shared info (for I/O) into high memory, too.
213843e1988Sjohnlev 	 */
214843e1988Sjohnlev 	sz = MMU_PAGESIZE;
215843e1988Sjohnlev 	addr = (uintptr_t)vmem_alloc(heap_arena, sz, VM_SLEEP);
216843e1988Sjohnlev 	kbm_map_ma(xen_info->shared_info, addr, 0);
217843e1988Sjohnlev 	/* shared info has no PFN so don't do: boot_mapin((caddr_t)addr, sz) */
218843e1988Sjohnlev 	old = (uintptr_t)HYPERVISOR_shared_info;
219843e1988Sjohnlev 	HYPERVISOR_shared_info = (void *)addr;
220843e1988Sjohnlev 	kbm_unmap(old);
221843e1988Sjohnlev 
222843e1988Sjohnlev 	/*
223843e1988Sjohnlev 	 * Remap the console info into high memory, too.
224843e1988Sjohnlev 	 */
225843e1988Sjohnlev 	if (!DOMAIN_IS_INITDOMAIN(xen_info)) {
226843e1988Sjohnlev 		sz = MMU_PAGESIZE;
227843e1988Sjohnlev 		addr = (uintptr_t)vmem_alloc(heap_arena, sz, VM_SLEEP);
228843e1988Sjohnlev 		kbm_map_ma(pfn_to_pa(xen_info->console.domU.mfn), addr, 0);
229843e1988Sjohnlev 		boot_mapin((caddr_t)addr, sz);
230843e1988Sjohnlev 		old = (uintptr_t)HYPERVISOR_console_page;
231843e1988Sjohnlev 		HYPERVISOR_console_page = (void *)addr;
232843e1988Sjohnlev 		kbm_unmap(old);
233843e1988Sjohnlev 	} else {
234843e1988Sjohnlev 		HYPERVISOR_console_page = NULL;
235843e1988Sjohnlev 	}
236843e1988Sjohnlev 
237843e1988Sjohnlev 	/*
238843e1988Sjohnlev 	 * On domUs we need to have the xenbus page (store_mfn) mapped into
239843e1988Sjohnlev 	 * the kernel. This is referenced as xb_addr.
240843e1988Sjohnlev 	 */
241843e1988Sjohnlev 	if (!DOMAIN_IS_INITDOMAIN(xen_info)) {
242843e1988Sjohnlev 		xb_addr = vmem_alloc(heap_arena, MMU_PAGESIZE, VM_SLEEP);
243843e1988Sjohnlev 		kbm_map_ma(mfn_to_ma(xen_info->store_mfn),
244843e1988Sjohnlev 		    (uintptr_t)xb_addr, 0);
245843e1988Sjohnlev 		boot_mapin(xb_addr, MMU_PAGESIZE);
246843e1988Sjohnlev 	}
247843e1988Sjohnlev }
248843e1988Sjohnlev 
249843e1988Sjohnlev /*
250843e1988Sjohnlev  * Generate the pfn value to use for a foreign mfn.
251843e1988Sjohnlev  */
252843e1988Sjohnlev pfn_t
xen_assign_pfn(mfn_t mfn)253843e1988Sjohnlev xen_assign_pfn(mfn_t mfn)
254843e1988Sjohnlev {
255843e1988Sjohnlev 	pfn_t pfn;
256843e1988Sjohnlev 
257843e1988Sjohnlev #ifdef DEBUG
258843e1988Sjohnlev 	/*
259843e1988Sjohnlev 	 * make sure this MFN isn't in our list of MFNs
260843e1988Sjohnlev 	 */
261843e1988Sjohnlev 	on_trap_data_t otd;
262843e1988Sjohnlev 	uint_t	on_trap_ready = (t0.t_stk != NULL);
263843e1988Sjohnlev 
264843e1988Sjohnlev 	if (on_trap_ready) {
265843e1988Sjohnlev 		if (on_trap(&otd, OT_DATA_ACCESS) == 0) {
266843e1988Sjohnlev 			pfn = mfn_to_pfn_mapping[mfn];
267843e1988Sjohnlev 			if (pfn < mfn_count && mfn_list[pfn] == mfn)
268843e1988Sjohnlev 				panic("xen_assign_pfn() mfn belongs to us");
269843e1988Sjohnlev 		}
270843e1988Sjohnlev 		no_trap();
271843e1988Sjohnlev 	}
272843e1988Sjohnlev #endif /* DEBUG */
273843e1988Sjohnlev 
274843e1988Sjohnlev 	if (mfn == MFN_INVALID)
275843e1988Sjohnlev 		panic("xen_assign_pfn(MFN_INVALID) not allowed");
276843e1988Sjohnlev 	pfn = (pfn_t)mfn | PFN_IS_FOREIGN_MFN;
277843e1988Sjohnlev 	if (pfn == mfn)
278843e1988Sjohnlev 		panic("xen_assign_pfn(mfn) PFN_IS_FOREIGN_MFN bit already set");
279843e1988Sjohnlev 	return (pfn);
280843e1988Sjohnlev }
281843e1988Sjohnlev 
282843e1988Sjohnlev void
xen_release_pfn(pfn_t pfn)283843e1988Sjohnlev xen_release_pfn(pfn_t pfn)
284843e1988Sjohnlev {
285843e1988Sjohnlev 	if (pfn == PFN_INVALID)
286843e1988Sjohnlev 		panic("xen_release_pfn(PFN_INVALID) not allowed");
287843e1988Sjohnlev 	if ((pfn & PFN_IS_FOREIGN_MFN) == 0)
288843e1988Sjohnlev 		panic("mfn high bit not set");
289843e1988Sjohnlev }
290843e1988Sjohnlev 
291843e1988Sjohnlev uint_t
pfn_is_foreign(pfn_t pfn)292843e1988Sjohnlev pfn_is_foreign(pfn_t pfn)
293843e1988Sjohnlev {
294843e1988Sjohnlev 	if (pfn == PFN_INVALID)
295843e1988Sjohnlev 		return (0);
296843e1988Sjohnlev 	return ((pfn & PFN_IS_FOREIGN_MFN) != 0);
297843e1988Sjohnlev }
298843e1988Sjohnlev 
299843e1988Sjohnlev pfn_t
pte2pfn(x86pte_t pte,level_t l)300843e1988Sjohnlev pte2pfn(x86pte_t pte, level_t l)
301843e1988Sjohnlev {
302843e1988Sjohnlev 	mfn_t mfn = PTE2MFN(pte, l);
303843e1988Sjohnlev 
304843e1988Sjohnlev 	if ((pte & PT_SOFTWARE) >= PT_FOREIGN)
305843e1988Sjohnlev 		return ((pfn_t)mfn | PFN_IS_FOREIGN_MFN);
306843e1988Sjohnlev 	return (mfn_to_pfn(mfn));
307843e1988Sjohnlev }
308843e1988Sjohnlev 
309843e1988Sjohnlev mfn_t
pfn_to_mfn(pfn_t pfn)310843e1988Sjohnlev pfn_to_mfn(pfn_t pfn)
311843e1988Sjohnlev {
312843e1988Sjohnlev 	if (pfn == PFN_INVALID)
313843e1988Sjohnlev 		panic("pfn_to_mfn(PFN_INVALID) not allowed");
314843e1988Sjohnlev 
315843e1988Sjohnlev 	if (pfn & PFN_IS_FOREIGN_MFN)
316843e1988Sjohnlev 		return (pfn & ~PFN_IS_FOREIGN_MFN);
317843e1988Sjohnlev 
318843e1988Sjohnlev 	if (pfn >= mfn_count)
319843e1988Sjohnlev 		panic("pfn_to_mfn(): illegal PFN 0x%lx", pfn);
320843e1988Sjohnlev 
321843e1988Sjohnlev 	return (mfn_list[pfn]);
322843e1988Sjohnlev }
323843e1988Sjohnlev 
324843e1988Sjohnlev /*
325843e1988Sjohnlev  * This routine translates an MFN back into the corresponding PFN value.
326843e1988Sjohnlev  * It has to be careful since the mfn_to_pfn_mapping[] might fault
327843e1988Sjohnlev  * as that table is sparse. It also has to check for non-faulting, but out of
328843e1988Sjohnlev  * range that exceed the table.
329843e1988Sjohnlev  */
330843e1988Sjohnlev pfn_t
mfn_to_pfn(mfn_t mfn)331843e1988Sjohnlev mfn_to_pfn(mfn_t mfn)
332843e1988Sjohnlev {
333843e1988Sjohnlev 	pfn_t pfn;
334843e1988Sjohnlev 	on_trap_data_t otd;
335843e1988Sjohnlev 	uint_t	on_trap_ready = (t0.t_stk != NULL);
336843e1988Sjohnlev 
337843e1988Sjohnlev 	/*
338843e1988Sjohnlev 	 * Cleared at a suspend or migrate
339843e1988Sjohnlev 	 */
340843e1988Sjohnlev 	if (cached_max_mfn == 0)
341843e1988Sjohnlev 		cached_max_mfn =
342843e1988Sjohnlev 		    HYPERVISOR_memory_op(XENMEM_maximum_ram_page, NULL);
343843e1988Sjohnlev 
344843e1988Sjohnlev 	if (cached_max_mfn < mfn)
345843e1988Sjohnlev 		return ((pfn_t)mfn | PFN_IS_FOREIGN_MFN);
346843e1988Sjohnlev 
347843e1988Sjohnlev 	if (on_trap_ready && on_trap(&otd, OT_DATA_ACCESS)) {
348843e1988Sjohnlev 		pfn = (pfn_t)mfn | PFN_IS_FOREIGN_MFN;
349843e1988Sjohnlev 	} else {
350843e1988Sjohnlev 		pfn = mfn_to_pfn_mapping[mfn];
351843e1988Sjohnlev 
352843e1988Sjohnlev 		if (pfn == PFN_INVALID || pfn >= mfn_count ||
353843e1988Sjohnlev 		    pfn_to_mfn(pfn) != mfn)
354843e1988Sjohnlev 			pfn = (pfn_t)mfn | PFN_IS_FOREIGN_MFN;
355843e1988Sjohnlev 	}
356843e1988Sjohnlev 
357843e1988Sjohnlev 	if (on_trap_ready)
358843e1988Sjohnlev 		no_trap();
359843e1988Sjohnlev 
360843e1988Sjohnlev 	/*
361843e1988Sjohnlev 	 * If khat_running is set then we should be checking
362843e1988Sjohnlev 	 * in domUs that migration is blocked while using the
363843e1988Sjohnlev 	 * mfn_to_pfn_mapping[] table.
364843e1988Sjohnlev 	 */
365843e1988Sjohnlev 	ASSERT(!khat_running || DOMAIN_IS_INITDOMAIN(xen_info) ||
366843e1988Sjohnlev 	    rw_read_held(&m2p_lock[XM2P_HASH].m2p_rwlock));
367843e1988Sjohnlev 
368843e1988Sjohnlev 	return (pfn);
369843e1988Sjohnlev }
370843e1988Sjohnlev 
371843e1988Sjohnlev /*
372843e1988Sjohnlev  * From a pseudo-physical address, find the corresponding machine address.
373843e1988Sjohnlev  */
374843e1988Sjohnlev maddr_t
pa_to_ma(paddr_t pa)375843e1988Sjohnlev pa_to_ma(paddr_t pa)
376843e1988Sjohnlev {
377843e1988Sjohnlev 	mfn_t mfn = pfn_to_mfn(mmu_btop(pa));
378843e1988Sjohnlev 
379843e1988Sjohnlev 	if (mfn == MFN_INVALID)
380843e1988Sjohnlev 		panic("pa_to_ma() got MFN_INVALID");
381843e1988Sjohnlev 	return (mfn_to_ma(mfn) + (pa & MMU_PAGEOFFSET));
382843e1988Sjohnlev }
383843e1988Sjohnlev 
384843e1988Sjohnlev /*
385843e1988Sjohnlev  * From a machine address, find the corresponding pseudo-physical address.
386843e1988Sjohnlev  */
387843e1988Sjohnlev paddr_t
ma_to_pa(maddr_t ma)388843e1988Sjohnlev ma_to_pa(maddr_t ma)
389843e1988Sjohnlev {
390843e1988Sjohnlev 	pfn_t pfn = mfn_to_pfn(mmu_btop(ma));
391843e1988Sjohnlev 
392843e1988Sjohnlev 	if (pfn == PFN_INVALID)
393843e1988Sjohnlev 		panic("ma_to_pa() got PFN_INVALID");
394843e1988Sjohnlev 	return (pfn_to_pa(pfn) + (ma & MMU_PAGEOFFSET));
395843e1988Sjohnlev }
396843e1988Sjohnlev 
397843e1988Sjohnlev /*
398843e1988Sjohnlev  * When calling reassign_pfn(), the page must be (at least) read locked
399843e1988Sjohnlev  * to make sure swrand does not try to grab it.
400843e1988Sjohnlev  */
401843e1988Sjohnlev #ifdef DEBUG
402843e1988Sjohnlev #define	CHECK_PAGE_LOCK(pfn)	{			\
403843e1988Sjohnlev 	page_t *pp = page_numtopp_nolock(pfn);		\
404843e1988Sjohnlev 	if ((pp != NULL) && (!PAGE_LOCKED(pp))) {	\
405843e1988Sjohnlev 		panic("reassign_pfn() called with unlocked page (pfn 0x%lx)", \
406843e1988Sjohnlev 		    pfn);				\
407843e1988Sjohnlev 	}						\
408843e1988Sjohnlev }
409843e1988Sjohnlev #else	/* DEBUG */
410843e1988Sjohnlev #define	CHECK_PAGE_LOCK(pfn)
411843e1988Sjohnlev #endif	/* DEBUG */
412843e1988Sjohnlev 
413843e1988Sjohnlev /*
414843e1988Sjohnlev  * Reassign a new machine page to back a physical address.
415843e1988Sjohnlev  */
416843e1988Sjohnlev void
reassign_pfn(pfn_t pfn,mfn_t mfn)417843e1988Sjohnlev reassign_pfn(pfn_t pfn, mfn_t mfn)
418843e1988Sjohnlev {
419843e1988Sjohnlev 	int mmu_update_return;
420843e1988Sjohnlev 	mmu_update_t t;
421843e1988Sjohnlev 	extern void update_contig_pfnlist(pfn_t, mfn_t, mfn_t);
422843e1988Sjohnlev 
423843e1988Sjohnlev 	ASSERT(pfn != PFN_INVALID);
424843e1988Sjohnlev 	ASSERT(!pfn_is_foreign(pfn));
425843e1988Sjohnlev 
426843e1988Sjohnlev 	ASSERT(pfn < mfn_count);
427843e1988Sjohnlev 	update_contig_pfnlist(pfn, mfn_list[pfn], mfn);
428843e1988Sjohnlev 	if (mfn == MFN_INVALID) {
429843e1988Sjohnlev 		CHECK_PAGE_LOCK(pfn);
430843e1988Sjohnlev 		if (kpm_vbase != NULL && xen_kpm_page(pfn, 0) < 0)
431843e1988Sjohnlev 			panic("reassign_pfn(): failed to remove kpm mapping");
432843e1988Sjohnlev 		mfn_list[pfn] = mfn;
433843e1988Sjohnlev 		return;
434843e1988Sjohnlev 	}
435843e1988Sjohnlev 
436843e1988Sjohnlev 	/*
437843e1988Sjohnlev 	 * Verify that previously given away pages are still page locked.
438843e1988Sjohnlev 	 */
439843e1988Sjohnlev 	if (mfn_list[pfn] == MFN_INVALID) {
440843e1988Sjohnlev 		CHECK_PAGE_LOCK(pfn);
441843e1988Sjohnlev 	}
442843e1988Sjohnlev 	mfn_list[pfn] = mfn;
443843e1988Sjohnlev 
444843e1988Sjohnlev 	t.ptr = mfn_to_ma(mfn) | MMU_MACHPHYS_UPDATE;
445843e1988Sjohnlev 	t.val = pfn;
446843e1988Sjohnlev 
447843e1988Sjohnlev 	if (HYPERVISOR_mmu_update(&t, 1, &mmu_update_return, DOMID_SELF))
448843e1988Sjohnlev 		panic("HYPERVISOR_mmu_update() failed");
449843e1988Sjohnlev 	ASSERT(mmu_update_return == 1);
450843e1988Sjohnlev 
451843e1988Sjohnlev 	if (kpm_vbase != NULL && xen_kpm_page(pfn, PT_VALID | PT_WRITABLE) < 0)
452843e1988Sjohnlev 		panic("reassign_pfn(): failed to enable kpm mapping");
453843e1988Sjohnlev }
454