1843e198johnlev/*
2843e198johnlev * CDDL HEADER START
3843e198johnlev *
4843e198johnlev * The contents of this file are subject to the terms of the
5843e198johnlev * Common Development and Distribution License (the "License").
6843e198johnlev * You may not use this file except in compliance with the License.
7843e198johnlev *
8843e198johnlev * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9843e198johnlev * or http://www.opensolaris.org/os/licensing.
10843e198johnlev * See the License for the specific language governing permissions
11843e198johnlev * and limitations under the License.
12843e198johnlev *
13843e198johnlev * When distributing Covered Code, include this CDDL HEADER in each
14843e198johnlev * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15843e198johnlev * If applicable, add the following below this CDDL HEADER, with the
16843e198johnlev * fields enclosed by brackets "[]" replaced with your own identifying
17843e198johnlev * information: Portions Copyright [yyyy] [name of copyright owner]
18843e198johnlev *
19843e198johnlev * CDDL HEADER END
20843e198johnlev */
21843e198johnlev/*
22349b53dStuart Maybee * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23843e198johnlev * Use is subject to license terms.
24843e198johnlev */
25843e198johnlev
26843e198johnlev/*
27a576ab5rab * KVM backend for hypervisor domain dumps.  We don't use libkvm for
28a576ab5rab * such dumps, since they do not have a namelist file or the typical
29a576ab5rab * dump structures we expect to aid bootstrapping.  Instead, we
30a576ab5rab * bootstrap based upon a debug_info structure at a known VA, using the
31a576ab5rab * guest's own page tables to resolve to physical addresses, and
32a576ab5rab * construct the namelist in a manner similar to ksyms_snapshot().
33a576ab5rab *
34a576ab5rab * Note that there are two formats understood by this module: the older,
35a576ab5rab * ad hoc format, which we call 'core' within this file, and an
36a576ab5rab * ELF-based format, known as 'elf'.
37a576ab5rab *
38a576ab5rab * We only support the older format generated on Solaris dom0: before we
39a576ab5rab * fixed it, core dump files were broken whenever a PFN didn't map a
40a576ab5rab * real MFN (!).
41843e198johnlev */
42843e198johnlev
43843e198johnlev#include <strings.h>
44843e198johnlev#include <stdio.h>
45843e198johnlev#include <stdlib.h>
46843e198johnlev#include <stddef.h>
47843e198johnlev#include <stdarg.h>
48843e198johnlev#include <unistd.h>
49843e198johnlev#include <fcntl.h>
50843e198johnlev#include <gelf.h>
51843e198johnlev#include <errno.h>
52843e198johnlev
53843e198johnlev#include <sys/mman.h>
54843e198johnlev#include <sys/stat.h>
55843e198johnlev#include <sys/debug_info.h>
56843e198johnlev#include <sys/xen_mmu.h>
57843e198johnlev#include <sys/elf.h>
58843e198johnlev#include <sys/machelf.h>
59843e198johnlev#include <sys/modctl.h>
60843e198johnlev#include <sys/kobj.h>
61843e198johnlev#include <sys/kobj_impl.h>
62843e198johnlev#include <sys/sysmacros.h>
63843e198johnlev#include <sys/privmregs.h>
64843e198johnlev#include <vm/as.h>
65843e198johnlev
66843e198johnlev#include <mdb/mdb_io.h>
67843e198johnlev#include <mdb/mdb_kb.h>
68843e198johnlev#include <mdb/mdb_target_impl.h>
69843e198johnlev
70843e198johnlev#include <xen/public/xen.h>
71a576ab5rab#include <xen/public/version.h>
72a576ab5rab#include <xen/public/elfnote.h>
73843e198johnlev
74843e198johnlev#define	XKB_SHDR_NULL 0
75843e198johnlev#define	XKB_SHDR_SYMTAB 1
76843e198johnlev#define	XKB_SHDR_STRTAB 2
77843e198johnlev#define	XKB_SHDR_SHSTRTAB 3
78843e198johnlev#define	XKB_SHDR_NUM 4
79843e198johnlev
80843e198johnlev#define	XKB_WALK_LOCAL 0x1
81843e198johnlev#define	XKB_WALK_GLOBAL 0x2
82843e198johnlev#define	XKB_WALK_STR 0x4
83843e198johnlev#define	XKB_WALK_ALL (XKB_WALK_LOCAL | XKB_WALK_GLOBAL | XKB_WALK_STR)
84843e198johnlev
85a576ab5rab#if defined(__i386)
86a576ab5rab#define	DEBUG_INFO 0xf4bff000
875d2eda9John Levon#define	DEBUG_INFO_HVM 0xfe7ff000
88a576ab5rab#elif defined(__amd64)
89a576ab5rab#define	DEBUG_INFO 0xfffffffffb7ff000
905d2eda9John Levon#define	DEBUG_INFO_HVM 0xfffffffffb7ff000
91a576ab5rab#endif
92a576ab5rab
93843e198johnlev#define	PAGE_SIZE 0x1000
94843e198johnlev#define	PAGE_SHIFT 12
95843e198johnlev#define	PAGE_OFFSET(a) ((a) & (PAGE_SIZE - 1))
96843e198johnlev#define	PAGE_MASK(a) ((a) & ~(PAGE_SIZE - 1))
97a576ab5rab#define	PAGE_ALIGNED(a) (((a) & (PAGE_SIZE -1)) == 0)
985d2eda9John Levon#define	PT_PADDR_LGPG 0x000fffffffffe000ull
99843e198johnlev#define	PT_PADDR 0x000ffffffffff000ull
100843e198johnlev#define	PT_VALID 0x1
1015d2eda9John Levon#define	PT_PAGESIZE 0x080
1025d2eda9John Levon#define	PTE_IS_LGPG(p, l) ((l) > 0 && ((p) & PT_PAGESIZE))
103843e198johnlev
104843e198johnlev#define	XC_CORE_MAGIC 0xF00FEBED
105843e198johnlev#define	XC_CORE_MAGIC_HVM 0xF00FEBEE
106843e198johnlev
107843e198johnlev#define	VGCF_HVM_GUEST (1<<1)
108843e198johnlev
109843e198johnlevtypedef struct xc_core_header {
110843e198johnlev	unsigned int xch_magic;
111843e198johnlev	unsigned int xch_nr_vcpus;
112843e198johnlev	unsigned int xch_nr_pages;
113843e198johnlev	unsigned int xch_ctxt_offset;
114843e198johnlev	unsigned int xch_index_offset;
115843e198johnlev	unsigned int xch_pages_offset;
116843e198johnlev} xc_core_header_t;
117843e198johnlev
118a576ab5rabstruct xc_elf_header {
119a576ab5rab	uint64_t xeh_magic;
120a576ab5rab	uint64_t xeh_nr_vcpus;
121a576ab5rab	uint64_t xeh_nr_pages;
122a576ab5rab	uint64_t xeh_page_size;
123a576ab5rab};
124a576ab5rab
125a576ab5rabstruct xc_elf_version {
126a576ab5rab	uint64_t xev_major;
127a576ab5rab	uint64_t xev_minor;
128a576ab5rab	xen_extraversion_t xev_extra;
129a576ab5rab	xen_compile_info_t xev_compile_info;
130a576ab5rab	xen_capabilities_info_t xev_capabilities;
131a576ab5rab	xen_changeset_info_t xev_changeset;
132a576ab5rab	xen_platform_parameters_t xev_platform_parameters;
133a576ab5rab	uint64_t xev_pagesize;
134a576ab5rab};
135a576ab5rab
136a576ab5rab/*
137a576ab5rab * Either an old-style (3.0.4) core format, or the ELF format.
138a576ab5rab */
139a576ab5rabtypedef enum {
140a576ab5rab	XKB_FORMAT_UNKNOWN = 0,
141a576ab5rab	XKB_FORMAT_CORE = 1,
142a576ab5rab	XKB_FORMAT_ELF = 2
143a576ab5rab} xkb_type_t;
144a576ab5rab
145843e198johnlevtypedef struct mfn_map {
146843e198johnlev	mfn_t mm_mfn;
147843e198johnlev	char *mm_map;
148843e198johnlev} mfn_map_t;
149843e198johnlev
150843e198johnlevtypedef struct mmu_info {
151843e198johnlev	size_t mi_max;
152843e198johnlev	size_t mi_shift[4];
153843e198johnlev	size_t mi_ptes;
154843e198johnlev	size_t mi_ptesize;
155843e198johnlev} mmu_info_t;
156843e198johnlev
157a576ab5rabtypedef struct xkb_core {
158a576ab5rab	xc_core_header_t xc_hdr;
159a576ab5rab	void *xc_p2m_buf;
160a576ab5rab} xkb_core_t;
161a576ab5rab
162a576ab5rabtypedef struct xkb_elf {
163a576ab5rab	mdb_gelf_file_t *xe_gelf;
164a576ab5rab	size_t *xe_off;
165a576ab5rab	struct xc_elf_header xe_hdr;
166a576ab5rab	struct xc_elf_version xe_version;
167a576ab5rab} xkb_elf_t;
168a576ab5rab
169843e198johnlevtypedef struct xkb {
170843e198johnlev	char *xkb_path;
171843e198johnlev	int xkb_fd;
1725d2eda9John Levon	int xkb_is_hvm;
173a576ab5rab
174a576ab5rab	xkb_type_t xkb_type;
175a576ab5rab	xkb_core_t xkb_core;
176a576ab5rab	xkb_elf_t xkb_elf;
177a576ab5rab
178a576ab5rab	size_t xkb_nr_vcpus;
179a576ab5rab	size_t xkb_nr_pages;
180a576ab5rab	size_t xkb_pages_off;
181a576ab5rab	xen_pfn_t xkb_max_pfn;
182843e198johnlev	mfn_t xkb_max_mfn;
183a576ab5rab	int xkb_is_pae;
184a576ab5rab
185843e198johnlev	mmu_info_t xkb_mmu;
186a576ab5rab	debug_info_t xkb_info;
187a576ab5rab
188349b53dStuart Maybee	void *xkb_vcpu_data;
189349b53dStuart Maybee	size_t xkb_vcpu_data_sz;
190349b53dStuart Maybee	struct vcpu_guest_context **xkb_vcpus;
191a576ab5rab
192843e198johnlev	char *xkb_pages;
193843e198johnlev	mfn_t *xkb_p2m;
194843e198johnlev	xen_pfn_t *xkb_m2p;
195843e198johnlev	mfn_map_t xkb_pt_map[4];
196843e198johnlev	mfn_map_t xkb_map;
197a576ab5rab
198a576ab5rab	char *xkb_namelist;
199a576ab5rab	size_t xkb_namesize;
200843e198johnlev} xkb_t;
201843e198johnlev
202843e198johnlevstatic const char xkb_shstrtab[] = "\0.symtab\0.strtab\0.shstrtab\0";
203843e198johnlev
204843e198johnlevtypedef struct xkb_namelist {
205843e198johnlev	Ehdr	kh_elf_hdr;
206843e198johnlev	Phdr	kh_text_phdr;
207843e198johnlev	Phdr	kh_data_phdr;
208843e198johnlev	Shdr	kh_shdr[XKB_SHDR_NUM];
209843e198johnlev	char	shstrings[sizeof (xkb_shstrtab)];
210843e198johnlev} xkb_namelist_t;
211843e198johnlev
212843e198johnlevstatic int xkb_build_ksyms(xkb_t *);
213843e198johnlevstatic offset_t xkb_mfn_to_offset(xkb_t *, mfn_t);
214843e198johnlevstatic mfn_t xkb_va_to_mfn(xkb_t *, uintptr_t, mfn_t);
215843e198johnlevstatic ssize_t xkb_read(xkb_t *, uintptr_t, void *, size_t);
216843e198johnlevstatic int xkb_read_word(xkb_t *, uintptr_t, uintptr_t *);
217843e198johnlevstatic char *xkb_map_mfn(xkb_t *, mfn_t, mfn_map_t *);
218843e198johnlevstatic int xkb_close(xkb_t *);
219843e198johnlev
220a576ab5rab/*
221a576ab5rab * Jump through the hoops we need to to correctly identify a core file
222a576ab5rab * of either the old or new format.
223a576ab5rab */
224843e198johnlevint
225843e198johnlevxkb_identify(const char *file, int *longmode)
226843e198johnlev{
227843e198johnlev	xc_core_header_t header;
228a576ab5rab	mdb_gelf_file_t *gf = NULL;
229a576ab5rab	mdb_gelf_sect_t *sect = NULL;
230a576ab5rab	mdb_io_t *io = NULL;
231a576ab5rab	char *notes = NULL;
232a576ab5rab	char *pos;
233a576ab5rab	int ret = 0;
234843e198johnlev	size_t sz;
235843e198johnlev	int fd;
236843e198johnlev
237843e198johnlev	if ((fd = open64(file, O_RDONLY)) == -1)
238843e198johnlev		return (-1);
239843e198johnlev
240843e198johnlev	if (pread64(fd, &header, sizeof (header), 0) != sizeof (header)) {
241843e198johnlev		(void) close(fd);
242843e198johnlev		return (0);
243843e198johnlev	}
244843e198johnlev
245843e198johnlev	(void) close(fd);
246843e198johnlev
247a576ab5rab	if (header.xch_magic == XC_CORE_MAGIC) {
248a576ab5rab		*longmode = 0;
249843e198johnlev
250a576ab5rab		/*
251a576ab5rab		 * Indeed.
252a576ab5rab		 */
253a576ab5rab		sz = header.xch_index_offset - header.xch_ctxt_offset;
254843e198johnlev#ifdef _LP64
255a576ab5rab		if (sizeof (struct vcpu_guest_context) *
256a576ab5rab		    header.xch_nr_vcpus == sz)
257a576ab5rab			*longmode = 1;
258843e198johnlev#else
259a576ab5rab		if (sizeof (struct vcpu_guest_context) *
260a576ab5rab		    header.xch_nr_vcpus != sz)
261a576ab5rab			*longmode = 1;
262843e198johnlev#endif /* _LP64 */
263843e198johnlev
264a576ab5rab		return (1);
265a576ab5rab	}
266a576ab5rab
267a576ab5rab	if ((io = mdb_fdio_create_path(NULL, file, O_RDONLY, 0)) == NULL)
268a576ab5rab		return (-1);
269a576ab5rab
270a576ab5rab	if ((gf = mdb_gelf_create(io, ET_NONE, GF_FILE)) == NULL)
271a576ab5rab		goto out;
272a576ab5rab
273a576ab5rab	if ((sect = mdb_gelf_sect_by_name(gf, ".note.Xen")) == NULL)
274a576ab5rab		goto out;
275a576ab5rab
276a576ab5rab	if ((notes = mdb_gelf_sect_load(gf, sect)) == NULL)
277a576ab5rab		goto out;
278a576ab5rab
279a576ab5rab	for (pos = notes; pos < notes + sect->gs_shdr.sh_size; ) {
280a576ab5rab		struct xc_elf_version *vers;
281a576ab5rab		/* LINTED - alignment */
282a576ab5rab		Elf64_Nhdr *nhdr = (Elf64_Nhdr *)pos;
283a576ab5rab		char *desc;
284a576ab5rab		char *name;
285a576ab5rab
286a576ab5rab		name = pos + sizeof (*nhdr);
287a576ab5rab		desc = (char *)P2ROUNDUP((uintptr_t)name + nhdr->n_namesz, 4);
288a576ab5rab
289a576ab5rab		pos = desc + nhdr->n_descsz;
290a576ab5rab
291a576ab5rab		if (nhdr->n_type != XEN_ELFNOTE_DUMPCORE_XEN_VERSION)
292a576ab5rab			continue;
293a576ab5rab
294a576ab5rab		/*
295a576ab5rab		 * The contents of this struct differ between 32 and 64
296a576ab5rab		 * bit; however, not until past the 'xev_capabilities'
297a576ab5rab		 * member, so we can just about get away with this.
298a576ab5rab		 */
299a576ab5rab
300a576ab5rab		/* LINTED - alignment */
301a576ab5rab		vers = (struct xc_elf_version *)desc;
302a576ab5rab
303a576ab5rab		if (strstr(vers->xev_capabilities, "x86_64")) {
304349b53dStuart Maybee			/*
305349b53dStuart Maybee			 * 64-bit hypervisor, but it can still be
306349b53dStuart Maybee			 * a 32-bit domain core. 32-bit domain cores
307349b53dStuart Maybee			 * are also dumped in Elf64 format, but they
308349b53dStuart Maybee			 * have e_machine set to EM_386, not EM_AMD64.
309349b53dStuart Maybee			 */
310349b53dStuart Maybee			if (gf->gf_ehdr.e_machine == EM_386)
311349b53dStuart Maybee				*longmode = 0;
312349b53dStuart Maybee			else
313349b53dStuart Maybee				*longmode = 1;
314a576ab5rab		} else if (strstr(vers->xev_capabilities, "x86_32") ||
315a576ab5rab		    strstr(vers->xev_capabilities, "x86_32p")) {
316349b53dStuart Maybee			/*
317349b53dStuart Maybee			 * 32-bit hypervisor, can only be a 32-bit core.
318349b53dStuart Maybee			 */
319a576ab5rab			*longmode = 0;
320a576ab5rab		} else {
321a576ab5rab			mdb_warn("couldn't derive word size of dump; "
322a576ab5rab			    "assuming 64-bit");
323a576ab5rab			*longmode = 1;
324a576ab5rab		}
325a576ab5rab	}
326a576ab5rab
327a576ab5rab	ret = 1;
328a576ab5rab
329a576ab5rabout:
330a576ab5rab	if (gf != NULL)
331a576ab5rab		mdb_gelf_destroy(gf);
332a576ab5rab	else if (io != NULL)
333a576ab5rab		mdb_io_destroy(io);
334a576ab5rab	return (ret);
335843e198johnlev}
336843e198johnlev
337843e198johnlevstatic void *
338843e198johnlevxkb_fail(xkb_t *xkb, const char *msg, ...)
339843e198johnlev{
340843e198johnlev	va_list args;
341843e198johnlev
342843e198johnlev	va_start(args, msg);
343843e198johnlev	if (xkb != NULL)
344843e198johnlev		(void) fprintf(stderr, "%s: ", xkb->xkb_path);
345843e198johnlev	(void) vfprintf(stderr, msg, args);
346843e198johnlev	(void) fprintf(stderr, "\n");
347843e198johnlev	va_end(args);
348843e198johnlev	if (xkb != NULL)
349843e198johnlev		(void) xkb_close(xkb);
350a576ab5rab
351a576ab5rab	errno = ENOEXEC;
352a576ab5rab
353843e198johnlev	return (NULL);
354843e198johnlev}
355843e198johnlev
356843e198johnlevstatic int
357843e198johnlevxkb_build_m2p(xkb_t *xkb)
358843e198johnlev{
359843e198johnlev	size_t i;
360843e198johnlev
361a576ab5rab	for (i = 0; i <= xkb->xkb_max_pfn; i++) {
362843e198johnlev		if (xkb->xkb_p2m[i] != MFN_INVALID &&
363843e198johnlev		    xkb->xkb_p2m[i] > xkb->xkb_max_mfn)
364843e198johnlev			xkb->xkb_max_mfn = xkb->xkb_p2m[i];
365843e198johnlev	}
366843e198johnlev
367843e198johnlev	xkb->xkb_m2p = mdb_alloc((xkb->xkb_max_mfn + 1) * sizeof (xen_pfn_t),
368843e198johnlev	    UM_SLEEP);
369843e198johnlev
370843e198johnlev	for (i = 0; i <= xkb->xkb_max_mfn; i++)
371843e198johnlev		xkb->xkb_m2p[i] = PFN_INVALID;
372843e198johnlev
373a576ab5rab	for (i = 0; i <= xkb->xkb_max_pfn; i++) {
374843e198johnlev		if (xkb->xkb_p2m[i] != MFN_INVALID)
375843e198johnlev			xkb->xkb_m2p[xkb->xkb_p2m[i]] = i;
376843e198johnlev	}
377843e198johnlev
378843e198johnlev	return (1);
379843e198johnlev}
380843e198johnlev
381843e198johnlev/*
382a576ab5rab * With FORMAT_CORE, we can use the table in the dump file directly.
383a576ab5rab * Just to make things fun, they've not page-aligned the p2m table.
384843e198johnlev */
385843e198johnlevstatic int
386843e198johnlevxkb_map_p2m(xkb_t *xkb)
387843e198johnlev{
388843e198johnlev	offset_t off;
389843e198johnlev	size_t size;
390a576ab5rab	xkb_core_t *xc = &xkb->xkb_core;
391a576ab5rab	size_t count = xkb->xkb_nr_pages;
392a576ab5rab	size_t boff = xc->xc_hdr.xch_index_offset;
393843e198johnlev
394a576ab5rab	size = (sizeof (mfn_t) * count) + (PAGE_SIZE * 2);
395843e198johnlev	size = PAGE_MASK(size);
396843e198johnlev	off = PAGE_MASK(boff);
397843e198johnlev
398843e198johnlev	/* LINTED - alignment */
399a576ab5rab	xc->xc_p2m_buf = (mfn_t *)mmap(NULL, size, PROT_READ,
400843e198johnlev	    MAP_SHARED, xkb->xkb_fd, off);
401843e198johnlev
402a576ab5rab	if (xc->xc_p2m_buf == (xen_pfn_t *)MAP_FAILED) {
403843e198johnlev		(void) xkb_fail(xkb, "cannot map p2m table");
404843e198johnlev		return (0);
405843e198johnlev	}
406843e198johnlev
407843e198johnlev	/* LINTED - alignment */
408a576ab5rab	xkb->xkb_p2m = (mfn_t *)((char *)xc->xc_p2m_buf +
409843e198johnlev	    PAGE_OFFSET(boff));
410843e198johnlev
411843e198johnlev	return (1);
412843e198johnlev}
413843e198johnlev
414843e198johnlev/*
415a576ab5rab * With FORMAT_ELF, we have a set of <pfn,mfn> pairs, which we convert
416a576ab5rab * into a linear array indexed by pfn for convenience.  We also need to
417a576ab5rab * track the mapping between mfn and the offset in the file: a pfn with
418a576ab5rab * no mfn will not appear in the core file.
419a576ab5rab */
420a576ab5rabstatic int
421a576ab5rabxkb_build_p2m(xkb_t *xkb)
422a576ab5rab{
423a576ab5rab	xkb_elf_t *xe = &xkb->xkb_elf;
424a576ab5rab	mdb_gelf_sect_t *sect;
425a576ab5rab	size_t size;
426a576ab5rab	size_t i;
427a576ab5rab
428a576ab5rab	struct elf_p2m {
429a576ab5rab		uint64_t pfn;
430a576ab5rab		uint64_t gmfn;
431a576ab5rab	} *p2m;
432a576ab5rab
433a576ab5rab	sect = mdb_gelf_sect_by_name(xe->xe_gelf, ".xen_p2m");
434a576ab5rab
435a576ab5rab	if (sect == NULL) {
436a576ab5rab		(void) xkb_fail(xkb, "cannot find section .xen_p2m");
437a576ab5rab		return (0);
438a576ab5rab	}
439a576ab5rab
440a576ab5rab	if ((p2m = mdb_gelf_sect_load(xe->xe_gelf, sect)) == NULL) {
441a576ab5rab		(void) xkb_fail(xkb, "couldn't read .xen_p2m");
442a576ab5rab		return (0);
443a576ab5rab	}
444a576ab5rab
445a576ab5rab	for (i = 0; i < xkb->xkb_nr_pages; i++) {
446a576ab5rab		if (p2m[i].pfn > xkb->xkb_max_pfn)
447a576ab5rab			xkb->xkb_max_pfn = p2m[i].pfn;
448a576ab5rab	}
449a576ab5rab
450a576ab5rab	size = sizeof (xen_pfn_t) * (xkb->xkb_max_pfn + 1);
451a576ab5rab	xkb->xkb_p2m = mdb_alloc(size, UM_SLEEP);
452a576ab5rab	size = sizeof (size_t) * (xkb->xkb_max_pfn + 1);
453a576ab5rab	xe->xe_off = mdb_alloc(size, UM_SLEEP);
454a576ab5rab
455a576ab5rab	for (i = 0; i <= xkb->xkb_max_pfn; i++) {
456a576ab5rab		xkb->xkb_p2m[i] = PFN_INVALID;
457a576ab5rab		xe->xe_off[i] = (size_t)-1;
458a576ab5rab	}
459a576ab5rab
460a576ab5rab	for (i = 0; i < xkb->xkb_nr_pages; i++) {
461a576ab5rab		xkb->xkb_p2m[p2m[i].pfn] = p2m[i].gmfn;
462a576ab5rab		xe->xe_off[p2m[i].pfn] = i;
463a576ab5rab	}
464a576ab5rab
465a576ab5rab	return (1);
466a576ab5rab}
467a576ab5rab
468a576ab5rab/*
4695d2eda9John Levon * For HVM images, we don't have the corresponding MFN list; the table
4705d2eda9John Levon * is just a mapping from page index in the dump to the corresponding
4715d2eda9John Levon * PFN.  To simplify the other code, we'll pretend that these PFNs are
4725d2eda9John Levon * really MFNs as well, by populating xkb_p2m.
4735d2eda9John Levon */
4745d2eda9John Levonstatic int
4755d2eda9John Levonxkb_build_fake_p2m(xkb_t *xkb)
4765d2eda9John Levon{
4775d2eda9John Levon	xkb_elf_t *xe = &xkb->xkb_elf;
4785d2eda9John Levon	mdb_gelf_sect_t *sect;
4795d2eda9John Levon	size_t size;
4805d2eda9John Levon	size_t i;
4815d2eda9John Levon
4825d2eda9John Levon	uint64_t *p2pfn;
4835d2eda9John Levon
4845d2eda9John Levon	sect = mdb_gelf_sect_by_name(xe->xe_gelf, ".xen_pfn");
4855d2eda9John Levon
4865d2eda9John Levon	if (sect == NULL) {
4875d2eda9John Levon		(void) xkb_fail(xkb, "cannot find section .xen_pfn");
4885d2eda9John Levon		return (0);
4895d2eda9John Levon	}
4905d2eda9John Levon
4915d2eda9John Levon	if ((p2pfn = mdb_gelf_sect_load(xe->xe_gelf, sect)) == NULL) {
4925d2eda9John Levon		(void) xkb_fail(xkb, "couldn't read .xen_pfn");
4935d2eda9John Levon		return (0);
4945d2eda9John Levon	}
4955d2eda9John Levon
4965d2eda9John Levon	for (i = 0; i < xkb->xkb_nr_pages; i++) {
497349b53dStuart Maybee		if (p2pfn[i] != PFN_INVALID && p2pfn[i] > xkb->xkb_max_pfn)
4985d2eda9John Levon			xkb->xkb_max_pfn = p2pfn[i];
4995d2eda9John Levon	}
5005d2eda9John Levon
5015d2eda9John Levon	size = sizeof (xen_pfn_t) * (xkb->xkb_max_pfn + 1);
5025d2eda9John Levon	xkb->xkb_p2m = mdb_alloc(size, UM_SLEEP);
503349b53dStuart Maybee
5045d2eda9John Levon	size = sizeof (size_t) * (xkb->xkb_max_pfn + 1);
5055d2eda9John Levon	xe->xe_off = mdb_alloc(size, UM_SLEEP);
5065d2eda9John Levon
5075d2eda9John Levon	for (i = 0; i <= xkb->xkb_max_pfn; i++) {
5085d2eda9John Levon		xkb->xkb_p2m[i] = PFN_INVALID;
5095d2eda9John Levon		xe->xe_off[i] = (size_t)-1;
5105d2eda9John Levon	}
5115d2eda9John Levon
5125d2eda9John Levon	for (i = 0; i < xkb->xkb_nr_pages; i++) {
513349b53dStuart Maybee		if (p2pfn[i] == PFN_INVALID)
514349b53dStuart Maybee			continue;
5155d2eda9John Levon		xkb->xkb_p2m[p2pfn[i]] = p2pfn[i];
5165d2eda9John Levon		xe->xe_off[p2pfn[i]] = i;
5175d2eda9John Levon	}
5185d2eda9John Levon
5195d2eda9John Levon	return (1);
5205d2eda9John Levon}
5215d2eda9John Levon
5225d2eda9John Levon/*
523843e198johnlev * Return the MFN of the top-level page table for the given as.
524843e198johnlev */
525843e198johnlevstatic mfn_t
526843e198johnlevxkb_as_to_mfn(xkb_t *xkb, struct as *as)
527843e198johnlev{
528843e198johnlev	uintptr_t asp = (uintptr_t)as;
529843e198johnlev	uintptr_t hatp;
530843e198johnlev	uintptr_t htablep;
531843e198johnlev	uintptr_t pfn;
532843e198johnlev
533843e198johnlev	if (!xkb_read_word(xkb, asp + offsetof(struct as, a_hat), &hatp))
534843e198johnlev		return (MFN_INVALID);
535843e198johnlev	if (!xkb_read_word(xkb, hatp + xkb->xkb_info.di_hat_htable_off,
536843e198johnlev	    &htablep))
537843e198johnlev		return (MFN_INVALID);
538843e198johnlev	if (!xkb_read_word(xkb, htablep + xkb->xkb_info.di_ht_pfn_off,
539843e198johnlev	    &pfn))
540843e198johnlev		return (MFN_INVALID);
541843e198johnlev
542a576ab5rab	if (pfn > xkb->xkb_max_pfn)
543843e198johnlev		return (MFN_INVALID);
544843e198johnlev
545843e198johnlev	return (xkb->xkb_p2m[pfn]);
546843e198johnlev}
547843e198johnlev
5485d2eda9John Levonstatic mfn_t
5495d2eda9John Levonxkb_cr3_to_pfn(xkb_t *xkb)
5505d2eda9John Levon{
551349b53dStuart Maybee	uint64_t cr3 = xkb->xkb_vcpus[0]->ctrlreg[3];
5525d2eda9John Levon	if (xkb->xkb_is_hvm)
5535d2eda9John Levon		return (cr3 >> PAGE_SHIFT);
5545d2eda9John Levon	return (xen_cr3_to_pfn(cr3));
5555d2eda9John Levon}
5565d2eda9John Levon
557843e198johnlevstatic ssize_t
558843e198johnlevxkb_read_helper(xkb_t *xkb, struct as *as, int phys, uint64_t addr,
559843e198johnlev    void *buf, size_t size)
560843e198johnlev{
561843e198johnlev	size_t left = size;
562a576ab5rab	int windowed = (xkb->xkb_pages == NULL);
5635d2eda9John Levon	mfn_t tlmfn = xkb_cr3_to_pfn(xkb);
564