1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26/*
27 * KVM backend for hypervisor domain dumps.  We don't use libkvm for
28 * such dumps, since they do not have a namelist file or the typical
29 * dump structures we expect to aid bootstrapping.  Instead, we
30 * bootstrap based upon a debug_info structure at a known VA, using the
31 * guest's own page tables to resolve to physical addresses, and
32 * construct the namelist in a manner similar to ksyms_snapshot().
33 *
34 * Note that there are two formats understood by this module: the older,
35 * ad hoc format, which we call 'core' within this file, and an
36 * ELF-based format, known as 'elf'.
37 *
38 * We only support the older format generated on Solaris dom0: before we
39 * fixed it, core dump files were broken whenever a PFN didn't map a
40 * real MFN (!).
41 */
42
43#include <strings.h>
44#include <stdio.h>
45#include <stdlib.h>
46#include <stddef.h>
47#include <stdarg.h>
48#include <unistd.h>
49#include <fcntl.h>
50#include <gelf.h>
51#include <errno.h>
52
53#include <sys/mman.h>
54#include <sys/stat.h>
55#include <sys/debug_info.h>
56#include <sys/xen_mmu.h>
57#include <sys/elf.h>
58#include <sys/machelf.h>
59#include <sys/modctl.h>
60#include <sys/kobj.h>
61#include <sys/kobj_impl.h>
62#include <sys/sysmacros.h>
63#include <sys/privmregs.h>
64#include <vm/as.h>
65
66#include <mdb/mdb_io.h>
67#include <mdb/mdb_kb.h>
68#include <mdb/mdb_target_impl.h>
69
70#include <xen/public/xen.h>
71#include <xen/public/version.h>
72#include <xen/public/elfnote.h>
73
74#define	XKB_SHDR_NULL 0
75#define	XKB_SHDR_SYMTAB 1
76#define	XKB_SHDR_STRTAB 2
77#define	XKB_SHDR_SHSTRTAB 3
78#define	XKB_SHDR_NUM 4
79
80#define	XKB_WALK_LOCAL 0x1
81#define	XKB_WALK_GLOBAL 0x2
82#define	XKB_WALK_STR 0x4
83#define	XKB_WALK_ALL (XKB_WALK_LOCAL | XKB_WALK_GLOBAL | XKB_WALK_STR)
84
85#if defined(__i386)
86#define	DEBUG_INFO 0xf4bff000
87#define	DEBUG_INFO_HVM 0xfe7ff000
88#elif defined(__amd64)
89#define	DEBUG_INFO 0xfffffffffb7ff000
90#define	DEBUG_INFO_HVM 0xfffffffffb7ff000
91#endif
92
93#define	PAGE_SIZE 0x1000
94#define	PAGE_SHIFT 12
95#define	PAGE_OFFSET(a) ((a) & (PAGE_SIZE - 1))
96#define	PAGE_MASK(a) ((a) & ~(PAGE_SIZE - 1))
97#define	PAGE_ALIGNED(a) (((a) & (PAGE_SIZE -1)) == 0)
98#define	PT_PADDR_LGPG 0x000fffffffffe000ull
99#define	PT_PADDR 0x000ffffffffff000ull
100#define	PT_VALID 0x1
101#define	PT_PAGESIZE 0x080
102#define	PTE_IS_LGPG(p, l) ((l) > 0 && ((p) & PT_PAGESIZE))
103
104#define	XC_CORE_MAGIC 0xF00FEBED
105#define	XC_CORE_MAGIC_HVM 0xF00FEBEE
106
107#define	VGCF_HVM_GUEST (1<<1)
108
109typedef struct xc_core_header {
110	unsigned int xch_magic;
111	unsigned int xch_nr_vcpus;
112	unsigned int xch_nr_pages;
113	unsigned int xch_ctxt_offset;
114	unsigned int xch_index_offset;
115	unsigned int xch_pages_offset;
116} xc_core_header_t;
117
118struct xc_elf_header {
119	uint64_t xeh_magic;
120	uint64_t xeh_nr_vcpus;
121	uint64_t xeh_nr_pages;
122	uint64_t xeh_page_size;
123};
124
125struct xc_elf_version {
126	uint64_t xev_major;
127	uint64_t xev_minor;
128	xen_extraversion_t xev_extra;
129	xen_compile_info_t xev_compile_info;
130	xen_capabilities_info_t xev_capabilities;
131	xen_changeset_info_t xev_changeset;
132	xen_platform_parameters_t xev_platform_parameters;
133	uint64_t xev_pagesize;
134};
135
136/*
137 * Either an old-style (3.0.4) core format, or the ELF format.
138 */
139typedef enum {
140	XKB_FORMAT_UNKNOWN = 0,
141	XKB_FORMAT_CORE = 1,
142	XKB_FORMAT_ELF = 2
143} xkb_type_t;
144
145typedef struct mfn_map {
146	mfn_t mm_mfn;
147	char *mm_map;
148} mfn_map_t;
149
150typedef struct mmu_info {
151	size_t mi_max;
152	size_t mi_shift[4];
153	size_t mi_ptes;
154	size_t mi_ptesize;
155} mmu_info_t;
156
157typedef struct xkb_core {
158	xc_core_header_t xc_hdr;
159	void *xc_p2m_buf;
160} xkb_core_t;
161
162typedef struct xkb_elf {
163	mdb_gelf_file_t *xe_gelf;
164	size_t *xe_off;
165	struct xc_elf_header xe_hdr;
166	struct xc_elf_version xe_version;
167} xkb_elf_t;
168
169typedef struct xkb {
170	char *xkb_path;
171	int xkb_fd;
172	int xkb_is_hvm;
173
174	xkb_type_t xkb_type;
175	xkb_core_t xkb_core;
176	xkb_elf_t xkb_elf;
177
178	size_t xkb_nr_vcpus;
179	size_t xkb_nr_pages;
180	size_t xkb_pages_off;
181	xen_pfn_t xkb_max_pfn;
182	mfn_t xkb_max_mfn;
183	int xkb_is_pae;
184
185	mmu_info_t xkb_mmu;
186	debug_info_t xkb_info;
187
188	void *xkb_vcpu_data;
189	size_t xkb_vcpu_data_sz;
190	struct vcpu_guest_context **xkb_vcpus;
191
192	char *xkb_pages;
193	mfn_t *xkb_p2m;
194	xen_pfn_t *xkb_m2p;
195	mfn_map_t xkb_pt_map[4];
196	mfn_map_t xkb_map;
197
198	char *xkb_namelist;
199	size_t xkb_namesize;
200} xkb_t;
201
202static const char xkb_shstrtab[] = "\0.symtab\0.strtab\0.shstrtab\0";
203
204typedef struct xkb_namelist {
205	Ehdr	kh_elf_hdr;
206	Phdr	kh_text_phdr;
207	Phdr	kh_data_phdr;
208	Shdr	kh_shdr[XKB_SHDR_NUM];
209	char	shstrings[sizeof (xkb_shstrtab)];
210} xkb_namelist_t;
211
212static int xkb_build_ksyms(xkb_t *);
213static offset_t xkb_mfn_to_offset(xkb_t *, mfn_t);
214static mfn_t xkb_va_to_mfn(xkb_t *, uintptr_t, mfn_t);
215static ssize_t xkb_read(xkb_t *, uintptr_t, void *, size_t);
216static int xkb_read_word(xkb_t *, uintptr_t, uintptr_t *);
217static char *xkb_map_mfn(xkb_t *, mfn_t, mfn_map_t *);
218static int xkb_close(xkb_t *);
219
220/*
221 * Jump through the hoops we need to to correctly identify a core file
222 * of either the old or new format.
223 */
224int
225xkb_identify(const char *file, int *longmode)
226{
227	xc_core_header_t header;
228	mdb_gelf_file_t *gf = NULL;
229	mdb_gelf_sect_t *sect = NULL;
230	mdb_io_t *io = NULL;
231	char *notes = NULL;
232	char *pos;
233	int ret = 0;
234	size_t sz;
235	int fd;
236
237	if ((fd = open64(file, O_RDONLY)) == -1)
238		return (-1);
239
240	if (pread64(fd, &header, sizeof (header), 0) != sizeof (header)) {
241		(void) close(fd);
242		return (0);
243	}
244
245	(void) close(fd);
246
247	if (header.xch_magic == XC_CORE_MAGIC) {
248		*longmode = 0;
249
250		/*
251		 * Indeed.
252		 */
253		sz = header.xch_index_offset - header.xch_ctxt_offset;
254#ifdef _LP64
255		if (sizeof (struct vcpu_guest_context) *
256		    header.xch_nr_vcpus == sz)
257			*longmode = 1;
258#else
259		if (sizeof (struct vcpu_guest_context) *
260		    header.xch_nr_vcpus != sz)
261			*longmode = 1;
262#endif /* _LP64 */
263
264		return (1);
265	}
266
267	if ((io = mdb_fdio_create_path(NULL, file, O_RDONLY, 0)) == NULL)
268		return (-1);
269
270	if ((gf = mdb_gelf_create(io, ET_NONE, GF_FILE)) == NULL)
271		goto out;
272
273	if ((sect = mdb_gelf_sect_by_name(gf, ".note.Xen")) == NULL)
274		goto out;
275
276	if ((notes = mdb_gelf_sect_load(gf, sect)) == NULL)
277		goto out;
278
279	for (pos = notes; pos < notes + sect->gs_shdr.sh_size; ) {
280		struct xc_elf_version *vers;
281		/* LINTED - alignment */
282		Elf64_Nhdr *nhdr = (Elf64_Nhdr *)pos;
283		char *desc;
284		char *name;
285
286		name = pos + sizeof (*nhdr);
287		desc = (char *)P2ROUNDUP((uintptr_t)name + nhdr->n_namesz, 4);
288
289		pos = desc + nhdr->n_descsz;
290
291		if (nhdr->n_type != XEN_ELFNOTE_DUMPCORE_XEN_VERSION)
292			continue;
293
294		/*
295		 * The contents of this struct differ between 32 and 64
296		 * bit; however, not until past the 'xev_capabilities'
297		 * member, so we can just about get away with this.
298		 */
299
300		/* LINTED - alignment */
301		vers = (struct xc_elf_version *)desc;
302
303		if (strstr(vers->xev_capabilities, "x86_64")) {
304			/*
305			 * 64-bit hypervisor, but it can still be
306			 * a 32-bit domain core. 32-bit domain cores
307			 * are also dumped in Elf64 format, but they
308			 * have e_machine set to EM_386, not EM_AMD64.
309			 */
310			if (gf->gf_ehdr.e_machine == EM_386)
311				*longmode = 0;
312			else
313				*longmode = 1;
314		} else if (strstr(vers->xev_capabilities, "x86_32") ||
315		    strstr(vers->xev_capabilities, "x86_32p")) {
316			/*
317			 * 32-bit hypervisor, can only be a 32-bit core.
318			 */
319			*longmode = 0;
320		} else {
321			mdb_warn("couldn't derive word size of dump; "
322			    "assuming 64-bit");
323			*longmode = 1;
324		}
325	}
326
327	ret = 1;
328
329out:
330	if (gf != NULL)
331		mdb_gelf_destroy(gf);
332	else if (io != NULL)
333		mdb_io_destroy(io);
334	return (ret);
335}
336
337static void *
338xkb_fail(xkb_t *xkb, const char *msg, ...)
339{
340	va_list args;
341
342	va_start(args, msg);
343	if (xkb != NULL)
344		(void) fprintf(stderr, "%s: ", xkb->xkb_path);
345	(void) vfprintf(stderr, msg, args);
346	(void) fprintf(stderr, "\n");
347	va_end(args);
348	if (xkb != NULL)
349		(void) xkb_close(xkb);
350
351	errno = ENOEXEC;
352
353	return (NULL);
354}
355
356static int
357xkb_build_m2p(xkb_t *xkb)
358{
359	size_t i;
360
361	for (i = 0; i <= xkb->xkb_max_pfn; i++) {
362		if (xkb->xkb_p2m[i] != MFN_INVALID &&
363		    xkb->xkb_p2m[i] > xkb->xkb_max_mfn)
364			xkb->xkb_max_mfn = xkb->xkb_p2m[i];
365	}
366
367	xkb->xkb_m2p = mdb_alloc((xkb->xkb_max_mfn + 1) * sizeof (xen_pfn_t),
368	    UM_SLEEP);
369
370	for (i = 0; i <= xkb->xkb_max_mfn; i++)
371		xkb->xkb_m2p[i] = PFN_INVALID;
372
373	for (i = 0; i <= xkb->xkb_max_pfn; i++) {
374		if (xkb->xkb_p2m[i] != MFN_INVALID)
375			xkb->xkb_m2p[xkb->xkb_p2m[i]] = i;
376	}
377
378	return (1);
379}
380
381/*
382 * With FORMAT_CORE, we can use the table in the dump file directly.
383 * Just to make things fun, they've not page-aligned the p2m table.
384 */
385static int
386xkb_map_p2m(xkb_t *xkb)
387{
388	offset_t off;
389	size_t size;
390	xkb_core_t *xc = &xkb->xkb_core;
391	size_t count = xkb->xkb_nr_pages;
392	size_t boff = xc->xc_hdr.xch_index_offset;
393
394	size = (sizeof (mfn_t) * count) + (PAGE_SIZE * 2);
395	size = PAGE_MASK(size);
396	off = PAGE_MASK(boff);
397
398	/* LINTED - alignment */
399	xc->xc_p2m_buf = (mfn_t *)mmap(NULL, size, PROT_READ,
400	    MAP_SHARED, xkb->xkb_fd, off);
401
402	if (xc->xc_p2m_buf == (xen_pfn_t *)MAP_FAILED) {
403		(void) xkb_fail(xkb, "cannot map p2m table");
404		return (0);
405	}
406
407	/* LINTED - alignment */
408	xkb->xkb_p2m = (mfn_t *)((char *)xc->xc_p2m_buf +
409	    PAGE_OFFSET(boff));
410
411	return (1);
412}
413
414/*
415 * With FORMAT_ELF, we have a set of <pfn,mfn> pairs, which we convert
416 * into a linear array indexed by pfn for convenience.  We also need to
417 * track the mapping between mfn and the offset in the file: a pfn with
418 * no mfn will not appear in the core file.
419 */
420static int
421xkb_build_p2m(xkb_t *xkb)
422{
423	xkb_elf_t *xe = &xkb->xkb_elf;
424	mdb_gelf_sect_t *sect;
425	size_t size;
426	size_t i;
427
428	struct elf_p2m {
429		uint64_t pfn;
430		uint64_t gmfn;
431	} *p2m;
432
433	sect = mdb_gelf_sect_by_name(xe->xe_gelf, ".xen_p2m");
434
435	if (sect == NULL) {
436		(void) xkb_fail(xkb, "cannot find section .xen_p2m");
437		return (0);
438	}
439
440	if ((p2m = mdb_gelf_sect_load(xe->xe_gelf, sect)) == NULL) {
441		(void) xkb_fail(xkb, "couldn't read .xen_p2m");
442		return (0);
443	}
444
445	for (i = 0; i < xkb->xkb_nr_pages; i++) {
446		if (p2m[i].pfn > xkb->xkb_max_pfn)
447			xkb->xkb_max_pfn = p2m[i].pfn;
448	}
449
450	size = sizeof (xen_pfn_t) * (xkb->xkb_max_pfn + 1);
451	xkb->xkb_p2m = mdb_alloc(size, UM_SLEEP);
452	size = sizeof (size_t) * (xkb->xkb_max_pfn + 1);
453	xe->xe_off = mdb_alloc(size, UM_SLEEP);
454
455	for (i = 0; i <= xkb->xkb_max_pfn; i++) {
456		xkb->xkb_p2m[i] = PFN_INVALID;
457		xe->xe_off[i] = (size_t)-1;
458	}
459
460	for (i = 0; i < xkb->xkb_nr_pages; i++) {
461		xkb->xkb_p2m[p2m[i].pfn] = p2m[i].gmfn;
462		xe->xe_off[p2m[i].pfn] = i;
463	}
464
465	return (1);
466}
467
468/*
469 * For HVM images, we don't have the corresponding MFN list; the table
470 * is just a mapping from page index in the dump to the corresponding
471 * PFN.  To simplify the other code, we'll pretend that these PFNs are
472 * really MFNs as well, by populating xkb_p2m.
473 */
474static int
475xkb_build_fake_p2m(xkb_t *xkb)
476{
477	xkb_elf_t *xe = &xkb->xkb_elf;
478	mdb_gelf_sect_t *sect;
479	size_t size;
480	size_t i;
481
482	uint64_t *p2pfn;
483
484	sect = mdb_gelf_sect_by_name(xe->xe_gelf, ".xen_pfn");
485
486	if (sect == NULL) {
487		(void) xkb_fail(xkb, "cannot find section .xen_pfn");
488		return (0);
489	}
490
491	if ((p2pfn = mdb_gelf_sect_load(xe->xe_gelf, sect)) == NULL) {
492		(void) xkb_fail(xkb, "couldn't read .xen_pfn");
493		return (0);
494	}
495
496	for (i = 0; i < xkb->xkb_nr_pages; i++) {
497		if (p2pfn[i] != PFN_INVALID && p2pfn[i] > xkb->xkb_max_pfn)
498			xkb->xkb_max_pfn = p2pfn[i];
499	}
500
501	size = sizeof (xen_pfn_t) * (xkb->xkb_max_pfn + 1);
502	xkb->xkb_p2m = mdb_alloc(size, UM_SLEEP);
503
504	size = sizeof (size_t) * (xkb->xkb_max_pfn + 1);
505	xe->xe_off = mdb_alloc(size, UM_SLEEP);
506
507	for (i = 0; i <= xkb->xkb_max_pfn; i++) {
508		xkb->xkb_p2m[i] = PFN_INVALID;
509		xe->xe_off[i] = (size_t)-1;
510	}
511
512	for (i = 0; i < xkb->xkb_nr_pages; i++) {
513		if (p2pfn[i] == PFN_INVALID)
514			continue;
515		xkb->xkb_p2m[p2pfn[i]] = p2pfn[i];
516		xe->xe_off[p2pfn[i]] = i;
517	}
518
519	return (1);
520}
521
522/*
523 * Return the MFN of the top-level page table for the given as.
524 */
525static mfn_t
526xkb_as_to_mfn(xkb_t *xkb, struct as *as)
527{
528	uintptr_t asp = (uintptr_t)as;
529	uintptr_t hatp;
530	uintptr_t htablep;
531	uintptr_t pfn;
532
533	if (!xkb_read_word(xkb, asp + offsetof(struct as, a_hat), &hatp))
534		return (MFN_INVALID);
535	if (!xkb_read_word(xkb, hatp + xkb->xkb_info.di_hat_htable_off,
536	    &htablep))
537		return (MFN_INVALID);
538	if (!xkb_read_word(xkb, htablep + xkb->xkb_info.di_ht_pfn_off,
539	    &pfn))
540		return (MFN_INVALID);
541
542	if (pfn > xkb->xkb_max_pfn)
543		return (MFN_INVALID);
544
545	return (xkb->xkb_p2m[pfn]);
546}
547
548static mfn_t
549xkb_cr3_to_pfn(xkb_t *xkb)
550{
551	uint64_t cr3 = xkb->xkb_vcpus[0]->ctrlreg[3];
552	if (xkb->xkb_is_hvm)
553		return (cr3 >> PAGE_SHIFT);
554	return (xen_cr3_to_pfn(cr3));
555}
556
557static ssize_t
558xkb_read_helper(xkb_t *xkb, struct as *as, int phys, uint64_t addr,
559    void *buf, size_t size)
560{
561	size_t left = size;
562	int windowed = (xkb->xkb_pages == NULL);
563	mfn_t tlmfn = xkb_cr3_to_pfn(xkb);
564
565	if (as != NULL && (tlmfn = xkb_as_to_mfn(xkb, as)) == MFN_INVALID)
566		return (-1);
567
568	while (left) {
569		uint64_t pos = addr + (size - left);
570		char *outpos = (char *)buf + (size - left);
571		size_t pageoff = PAGE_OFFSET(pos);
572		size_t sz = MIN(left, PAGE_SIZE - pageoff);
573		mfn_t mfn;
574
575		if (!phys) {
576			mfn = xkb_va_to_mfn(xkb, pos, tlmfn);
577			if (mfn == MFN_INVALID)
578				return (-1);
579		} else {
580			xen_pfn_t pfn = pos >> PAGE_SHIFT;
581			if (pfn > xkb->xkb_max_pfn)
582				return (-1);
583			mfn = xkb->xkb_p2m[pfn];
584			if (mfn == MFN_INVALID)
585				return (-1);
586		}
587
588		/*
589		 * If we're windowed then pread() is much faster.
590		 */
591		if (windowed) {
592			offset_t off = xkb_mfn_to_offset(xkb, mfn);
593			int ret;
594
595			if (off == ~1ULL)
596				return (-1);
597
598			off += pageoff;
599
600			ret = pread64(xkb->xkb_fd, outpos, sz, off);
601			if (ret == -1)
602				return (-1);
603			if (ret != sz)
604				return ((size - left) + ret);
605
606			left -= ret;
607		} else {
608			if (xkb_map_mfn(xkb, mfn, &xkb->xkb_map) == NULL)
609				return (-1);
610
611			bcopy(xkb->xkb_map.mm_map + pageoff, outpos, sz);
612
613			left -= sz;
614		}
615	}
616
617	return (size);
618}
619
620static ssize_t
621xkb_pread(xkb_t *xkb, uint64_t addr, void *buf, size_t size)
622{
623	return (xkb_read_helper(xkb, NULL, 1, addr, buf, size));
624}
625
626static ssize_t
627xkb_aread(xkb_t *xkb, uintptr_t addr, void *buf, size_t size, struct as *as)
628{
629	return (xkb_read_helper(xkb, as, 0, addr, buf, size));
630}
631
632static ssize_t
633xkb_read(xkb_t *xkb, uintptr_t addr, void *buf, size_t size)
634{
635	return (xkb_aread(xkb, addr, buf, size, NULL));
636}
637
638static int
639xkb_read_word(xkb_t *xkb, uintptr_t addr, uintptr_t *buf)
640{
641	if (xkb_read(xkb, addr, buf, sizeof (uintptr_t)) !=
642	    sizeof (uintptr_t))
643		return (0);
644	return (1);
645}
646
647static char *
648xkb_readstr(xkb_t *xkb, uintptr_t addr)
649{
650	char *str = mdb_alloc(1024, UM_SLEEP);
651	size_t i;
652
653	for (i = 0; i < 1024; i++) {
654		if (xkb_read(xkb, addr + i, &str[i], 1) != 1) {
655			mdb_free(str, 1024);
656			return (NULL);
657		}
658
659		if (str[i] == '\0')
660			break;
661	}
662
663	if (i == 1024) {
664		mdb_free(str, 1024);
665		return (NULL);
666	}
667
668	return (str);
669}
670
671static offset_t
672xkb_pfn_to_off(xkb_t *xkb, xen_pfn_t pfn)
673{
674	if (pfn == PFN_INVALID || pfn > xkb->xkb_max_pfn)
675		return (-1ULL);
676
677	if (xkb->xkb_type == XKB_FORMAT_CORE)
678		return (PAGE_SIZE * pfn);
679
680	return (PAGE_SIZE * (xkb->xkb_elf.xe_off[pfn]));
681}
682
683static offset_t
684xkb_mfn_to_offset(xkb_t *xkb, mfn_t mfn)
685{
686	xen_pfn_t pfn;
687
688	if (mfn > xkb->xkb_max_mfn)
689		return (-1ULL);
690
691	pfn = xkb->xkb_m2p[mfn];
692
693	if (pfn == PFN_INVALID)
694		return (-1ULL);
695
696	return (xkb->xkb_pages_off + xkb_pfn_to_off(xkb, pfn));
697}
698
699static char *
700xkb_map_mfn(xkb_t *xkb, mfn_t mfn, mfn_map_t *mm)
701{
702	int windowed = (xkb->xkb_pages == NULL);
703	offset_t off;
704
705	if (mm->mm_mfn == mfn)
706		return (mm->mm_map);
707
708	mm->mm_mfn = mfn;
709
710	if (windowed) {
711		if (mm->mm_map != (char *)MAP_FAILED) {
712			(void) munmap(mm->mm_map, PAGE_SIZE);
713			mm->mm_map = (void *)MAP_FAILED;
714		}
715
716		if ((off = xkb_mfn_to_offset(xkb, mfn)) == (-1ULL))
717			return (NULL);
718
719		mm->mm_map = mmap(NULL, PAGE_SIZE, PROT_READ, MAP_SHARED,
720		    xkb->xkb_fd, off);
721
722		if (mm->mm_map == (char *)MAP_FAILED)
723			return (NULL);
724	} else {
725		xen_pfn_t pfn;
726
727		mm->mm_map = NULL;
728
729		if (mfn > xkb->xkb_max_mfn)
730			return (NULL);
731
732		pfn = xkb->xkb_m2p[mfn];
733
734		if (pfn == PFN_INVALID)
735			return (NULL);
736
737		mm->mm_map = xkb->xkb_pages + xkb_pfn_to_off(xkb, pfn);
738	}
739
740	return (mm->mm_map);
741}
742
743static uint64_t
744xkb_get_pte(mmu_info_t *mmu, char *ptep)
745{
746	uint64_t pte = 0;
747
748	if (mmu->mi_ptesize == 8) {
749		/* LINTED - alignment */
750		pte = *((uint64_t *)ptep);
751	} else {
752		/* LINTED - alignment */
753		pte = *((uint32_t *)ptep);
754	}
755
756	return (pte);
757}
758
759static mfn_t
760xkb_pte_to_base_mfn(uint64_t pte, size_t level)
761{
762	if (PTE_IS_LGPG(pte, level)) {
763		pte &= PT_PADDR_LGPG;
764	} else {
765		pte &= PT_PADDR;
766	}
767
768	return (pte >> PAGE_SHIFT);
769}
770
771/*
772 * Resolve the given VA into an MFN, using the provided mfn as a top-level page
773 * table.
774 */
775static mfn_t
776xkb_va_to_mfn(xkb_t *xkb, uintptr_t va, mfn_t mfn)
777{
778	mmu_info_t *mmu = &xkb->xkb_mmu;
779	uint64_t pte;
780	size_t level;
781
782	for (level = mmu->mi_max; ; --level) {
783		size_t entry;
784
785		if (xkb_map_mfn(xkb, mfn, &xkb->xkb_pt_map[level]) == NULL)
786			return (MFN_INVALID);
787
788		entry = (va >> mmu->mi_shift[level]) & (mmu->mi_ptes - 1);
789
790		pte = xkb_get_pte(mmu, (char *)xkb->xkb_pt_map[level].mm_map +
791		    entry * mmu->mi_ptesize);
792
793		if ((mfn = xkb_pte_to_base_mfn(pte, level)) == MFN_INVALID)
794			return (MFN_INVALID);
795
796		if (level == 0)
797			break;
798
799		/*
800		 * Currently 'mfn' refers to the base MFN of the
801		 * large-page mapping.  Add on the 4K-sized index into
802		 * the large-page mapping to get the right MFN within
803		 * the mapping.
804		 */
805		if (PTE_IS_LGPG(pte, level)) {
806			mfn += (va & ((1 << mmu->mi_shift[level]) - 1)) >>
807			    PAGE_SHIFT;
808			break;
809		}
810	}
811
812	return (mfn);
813}
814
815static int
816xkb_read_module(xkb_t *xkb, uintptr_t modulep, struct module *module,
817    uintptr_t *sym_addr, uintptr_t *sym_count, uintptr_t *str_addr)
818{
819	if (xkb_read(xkb, modulep, module, sizeof (struct module)) !=
820	    sizeof (struct module))
821		return (0);
822
823	if (!xkb_read_word(xkb, (uintptr_t)module->symhdr +
824	    offsetof(Shdr, sh_addr), sym_addr))
825		return (0);
826
827	if (!xkb_read_word(xkb, (uintptr_t)module->strhdr +
828	    offsetof(Shdr, sh_addr), str_addr))
829		return (0);
830
831	if (!xkb_read_word(xkb, (uintptr_t)module->symhdr +
832	    offsetof(Shdr, sh_size), sym_count))
833		return (0);
834	*sym_count /= sizeof (Sym);
835
836	return (1);
837}
838
839static int
840xkb_read_modsyms(xkb_t *xkb, char **buf, size_t *sizes, int types,
841    uintptr_t sym_addr, uintptr_t str_addr, uintptr_t sym_count)
842{
843	size_t i;
844
845	for (i = 0; i < sym_count; i++) {
846		Sym sym;
847		char *name;
848		size_t sz;
849		int type = XKB_WALK_GLOBAL;
850
851		if (xkb_read(xkb, sym_addr + i * sizeof (sym), &sym,
852		    sizeof (sym)) != sizeof (sym))
853			return (0);
854
855		if (GELF_ST_BIND(sym.st_info) == STB_LOCAL)
856			type = XKB_WALK_LOCAL;
857
858		name = xkb_readstr(xkb, str_addr + sym.st_name);
859
860		sym.st_shndx = SHN_ABS;
861		sym.st_name = sizes[XKB_WALK_STR];
862
863		sizes[type] += sizeof (sym);
864		sz = strlen(name) + 1;
865		sizes[XKB_WALK_STR] += sz;
866
867		if (buf != NULL) {
868			if (types & type) {
869				bcopy(&sym, *buf, sizeof (sym));
870				*buf += sizeof (sym);
871			}
872			if (types & XKB_WALK_STR) {
873				bcopy(name, *buf, sz);
874				*buf += sz;
875			}
876		}
877
878		mdb_free(name, 1024);
879	}
880
881	return (1);
882}
883
884static int
885xkb_walk_syms(xkb_t *xkb, uintptr_t modhead, char **buf,
886    size_t *sizes, int types)
887{
888	uintptr_t modctl = modhead;
889	uintptr_t modulep;
890	struct module module;
891	uintptr_t sym_count;
892	uintptr_t sym_addr;
893	uintptr_t str_addr;
894	size_t max_iter = 500;
895
896	bzero(sizes, sizeof (*sizes) * (XKB_WALK_STR + 1));
897
898	/*
899	 * empty first symbol
900	 */
901	sizes[XKB_WALK_LOCAL] += sizeof (Sym);
902	sizes[XKB_WALK_STR] += 1;
903
904	if (buf != NULL) {
905		if (types & XKB_WALK_LOCAL) {
906			Sym tmp;
907			bzero(&tmp, sizeof (tmp));
908			bcopy(&tmp, *buf, sizeof (tmp));
909			*buf += sizeof (tmp);
910		}
911		if (types & XKB_WALK_STR) {
912			**buf = '\0';
913			(*buf)++;
914		}
915	}
916
917	for (;;) {
918		if (!xkb_read_word(xkb,
919		    modctl + offsetof(struct modctl, mod_mp), &modulep))
920			return (0);
921
922		if (modulep == 0)
923			goto next;
924
925		if (!xkb_read_module(xkb, modulep, &module, &sym_addr,
926		    &sym_count, &str_addr))
927			return (0);
928
929		if ((module.flags & KOBJ_NOKSYMS))
930			goto next;
931
932		if (!xkb_read_modsyms(xkb, buf, sizes, types, sym_addr,
933		    str_addr, sym_count))
934			return (0);
935
936next:
937		if (!xkb_read_word(xkb,
938		    modctl + offsetof(struct modctl, mod_next), &modctl))
939			return (0);
940
941		if (modctl == modhead)
942			break;
943		/*
944		 * Try and prevent us looping forever if we have a broken list.
945		 */
946		if (--max_iter == 0)
947			break;
948	}
949
950	return (1);
951}
952
953/*
954 * Userspace equivalent of ksyms_snapshot().  Since we don't have a namelist
955 * file for hypervisor images, we fabricate one here using code similar
956 * to that of /dev/ksyms.
957 */
958static int
959xkb_build_ksyms(xkb_t *xkb)
960{
961	debug_info_t *info = &xkb->xkb_info;
962	size_t sizes[XKB_WALK_STR + 1];
963	xkb_namelist_t *hdr;
964	char *buf;
965	struct modctl modules;
966	uintptr_t module;
967	Shdr *shp;
968
969	if (xkb_read(xkb, info->di_modules, &modules,
970	    sizeof (struct modctl)) != sizeof (struct modctl))
971		return (0);
972
973	module = (uintptr_t)modules.mod_mp;
974
975	if (!xkb_walk_syms(xkb, info->di_modules, NULL, sizes,
976	    XKB_WALK_LOCAL | XKB_WALK_GLOBAL | XKB_WALK_STR))
977		return (0);
978
979	xkb->xkb_namesize = sizeof (xkb_namelist_t);
980	xkb->xkb_namesize += sizes[XKB_WALK_LOCAL];
981	xkb->xkb_namesize += sizes[XKB_WALK_GLOBAL];
982	xkb->xkb_namesize += sizes[XKB_WALK_STR];
983
984	if ((xkb->xkb_namelist = mdb_zalloc(xkb->xkb_namesize, UM_SLEEP))
985	    == NULL)
986		return (0);
987
988	/* LINTED - alignment */
989	hdr = (xkb_namelist_t *)xkb->xkb_namelist;
990
991	if (xkb_read(xkb, module + offsetof(struct module, hdr),
992	    &hdr->kh_elf_hdr, sizeof (Ehdr)) != sizeof (Ehdr))
993		return (0);
994
995	hdr->kh_elf_hdr.e_phoff = offsetof(xkb_namelist_t, kh_text_phdr);
996	hdr->kh_elf_hdr.e_shoff = offsetof(xkb_namelist_t, kh_shdr);
997	hdr->kh_elf_hdr.e_phnum = 2;
998	hdr->kh_elf_hdr.e_shnum = XKB_SHDR_NUM;
999	hdr->kh_elf_hdr.e_shstrndx = XKB_SHDR_SHSTRTAB;
1000
1001	hdr->kh_text_phdr.p_type = PT_LOAD;
1002	hdr->kh_text_phdr.p_vaddr = (Addr)info->di_s_text;
1003	hdr->kh_text_phdr.p_memsz = (Word)(info->di_e_text - info->di_s_text);
1004	hdr->kh_text_phdr.p_flags = PF_R | PF_X;
1005
1006	hdr->kh_data_phdr.p_type = PT_LOAD;
1007	hdr->kh_data_phdr.p_vaddr = (Addr)info->di_s_data;
1008	hdr->kh_data_phdr.p_memsz = (Word)(info->di_e_data - info->di_s_data);
1009	hdr->kh_data_phdr.p_flags = PF_R | PF_W | PF_X;
1010
1011	shp = &hdr->kh_shdr[XKB_SHDR_SYMTAB];
1012	shp->sh_name = 1;	/* xkb_shstrtab[1] = ".symtab" */
1013	shp->sh_type = SHT_SYMTAB;
1014	shp->sh_offset = sizeof (xkb_namelist_t);
1015	shp->sh_size = sizes[XKB_WALK_LOCAL] + sizes[XKB_WALK_GLOBAL];
1016	shp->sh_link = XKB_SHDR_STRTAB;
1017	shp->sh_info = sizes[XKB_WALK_LOCAL] / sizeof (Sym);
1018	shp->sh_addralign = sizeof (Addr);
1019	shp->sh_entsize = sizeof (Sym);
1020	shp->sh_addr = (Addr)(xkb->xkb_namelist + shp->sh_offset);
1021
1022
1023	shp = &hdr->kh_shdr[XKB_SHDR_STRTAB];
1024	shp->sh_name = 9;	/* xkb_shstrtab[9] = ".strtab" */
1025	shp->sh_type = SHT_STRTAB;
1026	shp->sh_offset = sizeof (xkb_namelist_t) +
1027	    sizes[XKB_WALK_LOCAL] + sizes[XKB_WALK_GLOBAL];
1028	shp->sh_size = sizes[XKB_WALK_STR];
1029	shp->sh_addralign = 1;
1030	shp->sh_addr = (Addr)(xkb->xkb_namelist + shp->sh_offset);
1031
1032
1033	shp = &hdr->kh_shdr[XKB_SHDR_SHSTRTAB];
1034	shp->sh_name = 17;	/* xkb_shstrtab[17] = ".shstrtab" */
1035	shp->sh_type = SHT_STRTAB;
1036	shp->sh_offset = offsetof(xkb_namelist_t, shstrings);
1037	shp->sh_size = sizeof (xkb_shstrtab);
1038	shp->sh_addralign = 1;
1039	shp->sh_addr = (Addr)(xkb->xkb_namelist + shp->sh_offset);
1040
1041	bcopy(xkb_shstrtab, hdr->shstrings, sizeof (xkb_shstrtab));
1042
1043	buf = xkb->xkb_namelist + sizeof (xkb_namelist_t);
1044
1045	if (!xkb_walk_syms(xkb, info->di_modules, &buf, sizes,
1046	    XKB_WALK_LOCAL))
1047		return (0);
1048	if (!xkb_walk_syms(xkb, info->di_modules, &buf, sizes,
1049	    XKB_WALK_GLOBAL))
1050		return (0);
1051	if (!xkb_walk_syms(xkb, info->di_modules, &buf, sizes,
1052	    XKB_WALK_STR))
1053		return (0);
1054
1055	return (1);
1056}
1057
1058static xkb_t *
1059xkb_open_core(xkb_t *xkb)
1060{
1061	xkb_core_t *xc = &xkb->xkb_core;
1062	size_t sz;
1063	int i;
1064	struct vcpu_guest_context *vcp;
1065
1066	xkb->xkb_type = XKB_FORMAT_CORE;
1067
1068	if ((xkb->xkb_fd = open64(xkb->xkb_path, O_RDONLY)) == -1)
1069		return (xkb_fail(xkb, "cannot open %s", xkb->xkb_path));
1070
1071	if (pread64(xkb->xkb_fd, &xc->xc_hdr, sizeof (xc->xc_hdr), 0) !=
1072	    sizeof (xc->xc_hdr))
1073		return (xkb_fail(xkb, "invalid dump file"));
1074
1075	if (xc->xc_hdr.xch_magic == XC_CORE_MAGIC_HVM)
1076		return (xkb_fail(xkb, "cannot process HVM images"));
1077
1078	if (xc->xc_hdr.xch_magic != XC_CORE_MAGIC) {
1079		return (xkb_fail(xkb, "invalid magic %d",
1080		    xc->xc_hdr.xch_magic));
1081	}
1082
1083	/*
1084	 * With FORMAT_CORE, all pages are in the dump (non-existing
1085	 * ones are zeroed out).
1086	 */
1087	xkb->xkb_nr_pages = xc->xc_hdr.xch_nr_pages;
1088	xkb->xkb_pages_off = xc->xc_hdr.xch_pages_offset;
1089	xkb->xkb_max_pfn = xc->xc_hdr.xch_nr_pages - 1;
1090	xkb->xkb_nr_vcpus = xc->xc_hdr.xch_nr_vcpus;
1091
1092	sz = xkb->xkb_nr_vcpus * sizeof (struct vcpu_guest_context);
1093	xkb->xkb_vcpu_data_sz = sz;
1094	xkb->xkb_vcpu_data = mdb_alloc(sz, UM_SLEEP);
1095
1096	if (pread64(xkb->xkb_fd, xkb->xkb_vcpu_data, sz,
1097	    xc->xc_hdr.xch_ctxt_offset) != sz)
1098		return (xkb_fail(xkb, "cannot read VCPU contexts"));
1099
1100	sz = xkb->xkb_nr_vcpus * sizeof (struct vcpu_guest_context *);
1101	xkb->xkb_vcpus = mdb_alloc(sz, UM_SLEEP);
1102
1103	vcp = xkb->xkb_vcpu_data;
1104	for (i = 0; i < xkb->xkb_nr_vcpus; i++)
1105		xkb->xkb_vcpus[i] = &vcp[i];
1106
1107	/*
1108	 * Try to map all the data pages. If we can't, fall back to the
1109	 * window/pread() approach, which is significantly slower.
1110	 */
1111	xkb->xkb_pages = mmap(NULL, PAGE_SIZE * xkb->xkb_nr_pages,
1112	    PROT_READ, MAP_SHARED, xkb->xkb_fd, xc->xc_hdr.xch_pages_offset);
1113
1114	if (xkb->xkb_pages == (char *)MAP_FAILED)
1115		xkb->xkb_pages = NULL;
1116
1117	/*
1118	 * We'd like to adapt for correctness' sake, but we have no way of
1119	 * detecting a PAE guest, since cr4 writes are disallowed.
1120	 */
1121	xkb->xkb_is_pae = 1;
1122
1123	if (!xkb_map_p2m(xkb))
1124		return (NULL);
1125
1126	return (xkb);
1127}
1128
1129static xkb_t *
1130xkb_open_elf(xkb_t *xkb)
1131{
1132	xkb_elf_t *xe = &xkb->xkb_elf;
1133	mdb_gelf_sect_t *sect;
1134	char *notes;
1135	char *pos;
1136	mdb_io_t *io;
1137	size_t sz;
1138	int i;
1139	void *dp;
1140
1141	if ((io = mdb_fdio_create_path(NULL, xkb->xkb_path,
1142	    O_RDONLY, 0)) == NULL)
1143		return (xkb_fail(xkb, "failed to open"));
1144
1145	xe->xe_gelf = mdb_gelf_create(io, ET_NONE, GF_FILE);
1146
1147	if (xe->xe_gelf == NULL) {
1148		mdb_io_destroy(io);
1149		return (xkb);
1150	}
1151
1152	xkb->xkb_fd = mdb_fdio_fileno(io);
1153
1154	sect = mdb_gelf_sect_by_name(xe->xe_gelf, ".note.Xen");
1155
1156	if (sect == NULL)
1157		return (xkb);
1158
1159	if ((notes = mdb_gelf_sect_load(xe->xe_gelf, sect)) == NULL)
1160		return (xkb);
1161
1162	/*
1163	 * Now we know this is indeed a hypervisor core dump, even if
1164	 * it's corrupted.
1165	 */
1166	xkb->xkb_type = XKB_FORMAT_ELF;
1167
1168	for (pos = notes; pos < notes + sect->gs_shdr.sh_size; ) {
1169		/* LINTED - alignment */
1170		Elf64_Nhdr *nhdr = (Elf64_Nhdr *)pos;
1171		uint64_t vers;
1172		char *desc;
1173		char *name;
1174
1175		name = pos + sizeof (*nhdr);
1176		desc = (char *)P2ROUNDUP((uintptr_t)name + nhdr->n_namesz, 4);
1177
1178		pos = desc + nhdr->n_descsz;
1179
1180		switch (nhdr->n_type) {
1181		case XEN_ELFNOTE_DUMPCORE_NONE:
1182			break;
1183
1184		case XEN_ELFNOTE_DUMPCORE_HEADER:
1185			if (nhdr->n_descsz != sizeof (struct xc_elf_header)) {
1186				return (xkb_fail(xkb, "invalid ELF note "
1187				    "XEN_ELFNOTE_DUMPCORE_HEADER\n"));
1188			}
1189
1190			bcopy(desc, &xe->xe_hdr,
1191			    sizeof (struct xc_elf_header));
1192			break;
1193
1194		case XEN_ELFNOTE_DUMPCORE_XEN_VERSION:
1195			if (nhdr->n_descsz < sizeof (struct xc_elf_version)) {
1196				return (xkb_fail(xkb, "invalid ELF note "
1197				    "XEN_ELFNOTE_DUMPCORE_XEN_VERSION\n"));
1198			}
1199
1200			bcopy(desc, &xe->xe_version,
1201			    sizeof (struct xc_elf_version));
1202			break;
1203
1204		case XEN_ELFNOTE_DUMPCORE_FORMAT_VERSION:
1205			/* LINTED - alignment */
1206			vers = *((uint64_t *)desc);
1207			if ((vers >> 32) != 0) {
1208				return (xkb_fail(xkb, "unknown major "
1209				    "version %d (expected 0)\n",
1210				    (int)(vers >> 32)));
1211			}
1212
1213			if ((vers & 0xffffffff) != 1) {
1214				mdb_warn("unexpected dump minor number "
1215				    "version %d (expected 1)\n",
1216				    (int)(vers & 0xffffffff));
1217			}
1218			break;
1219
1220		default:
1221			mdb_warn("unknown ELF note %d(%s)\n",
1222			    nhdr->n_type, name);
1223			break;
1224		}
1225	}
1226
1227	xkb->xkb_is_hvm = xe->xe_hdr.xeh_magic == XC_CORE_MAGIC_HVM;
1228
1229	if (xe->xe_hdr.xeh_magic != XC_CORE_MAGIC &&
1230	    xe->xe_hdr.xeh_magic != XC_CORE_MAGIC_HVM) {
1231		return (xkb_fail(xkb, "invalid magic %d",
1232		    xe->xe_hdr.xeh_magic));
1233	}
1234
1235	xkb->xkb_nr_pages = xe->xe_hdr.xeh_nr_pages;
1236	xkb->xkb_is_pae = (strstr(xe->xe_version.xev_capabilities,
1237	    "x86_32p") != NULL);
1238
1239	sect = mdb_gelf_sect_by_name(xe->xe_gelf, ".xen_prstatus");
1240
1241	if (sect == NULL)
1242		return (xkb_fail(xkb, "cannot find section .xen_prstatus"));
1243
1244	if (sect->gs_shdr.sh_entsize < sizeof (vcpu_guest_context_t))
1245		return (xkb_fail(xkb, "invalid section .xen_prstatus"));
1246
1247	xkb->xkb_nr_vcpus = sect->gs_shdr.sh_size / sect->gs_shdr.sh_entsize;
1248
1249	xkb->xkb_vcpu_data = mdb_gelf_sect_load(xe->xe_gelf, sect);
1250	if (xkb->xkb_vcpu_data == NULL)
1251		return (xkb_fail(xkb, "cannot load section .xen_prstatus"));
1252	xkb->xkb_vcpu_data_sz = sect->gs_shdr.sh_size;
1253
1254	/*
1255	 * The vcpu_guest_context structures saved in the core file
1256	 * are actually unions of the 64-bit and 32-bit versions.
1257	 * Don't rely on the entry size to match the size of
1258	 * the structure, but set up an array of pointers.
1259	 */
1260	sz = xkb->xkb_nr_vcpus * sizeof (struct vcpu_guest_context *);
1261	xkb->xkb_vcpus = mdb_alloc(sz, UM_SLEEP);
1262	for (i = 0; i < xkb->xkb_nr_vcpus; i++) {
1263		dp = ((char *)xkb->xkb_vcpu_data +
1264		    i * sect->gs_shdr.sh_entsize);
1265		xkb->xkb_vcpus[i] = dp;
1266	}
1267
1268	sect = mdb_gelf_sect_by_name(xe->xe_gelf, ".xen_pages");
1269
1270	if (sect == NULL)
1271		return (xkb_fail(xkb, "cannot find section .xen_pages"));
1272
1273	if (!PAGE_ALIGNED(sect->gs_shdr.sh_offset))
1274		return (xkb_fail(xkb, ".xen_pages is not page aligned"));
1275
1276	if (sect->gs_shdr.sh_entsize != PAGE_SIZE)
1277		return (xkb_fail(xkb, "invalid section .xen_pages"));
1278
1279	xkb->xkb_pages_off = sect->gs_shdr.sh_offset;
1280
1281	/*
1282	 * Try to map all the data pages. If we can't, fall back to the
1283	 * window/pread() approach, which is significantly slower.
1284	 */
1285	xkb->xkb_pages = mmap(NULL, PAGE_SIZE * xkb->xkb_nr_pages,
1286	    PROT_READ, MAP_SHARED, xkb->xkb_fd, xkb->xkb_pages_off);
1287
1288	if (xkb->xkb_pages == (char *)MAP_FAILED)
1289		xkb->xkb_pages = NULL;
1290
1291	if (xkb->xkb_is_hvm) {
1292		if (!xkb_build_fake_p2m(xkb))
1293			return (NULL);
1294	} else {
1295		if (!xkb_build_p2m(xkb))
1296			return (NULL);
1297	}
1298
1299	return (xkb);
1300}
1301
1302static void
1303xkb_init_mmu(xkb_t *xkb)
1304{
1305#if defined(__amd64)
1306	xkb->xkb_mmu.mi_max = 3;
1307	xkb->xkb_mmu.mi_shift[0] = 12;
1308	xkb->xkb_mmu.mi_shift[1] = 21;
1309	xkb->xkb_mmu.mi_shift[2] = 30;
1310	xkb->xkb_mmu.mi_shift[3] = 39;
1311	xkb->xkb_mmu.mi_ptes = 512;
1312	xkb->xkb_mmu.mi_ptesize = 8;
1313#elif defined(__i386)
1314	if (xkb->xkb_is_pae) {
1315		xkb->xkb_mmu.mi_max = 2;
1316		xkb->xkb_mmu.mi_shift[0] = 12;
1317		xkb->xkb_mmu.mi_shift[1] = 21;
1318		xkb->xkb_mmu.mi_shift[2] = 30;
1319		xkb->xkb_mmu.mi_ptes = 512;
1320		xkb->xkb_mmu.mi_ptesize = 8;
1321	} else {
1322		xkb->xkb_mmu.mi_max = 1;
1323		xkb->xkb_mmu.mi_shift[0] = 12;
1324		xkb->xkb_mmu.mi_shift[1] = 22;
1325		xkb->xkb_mmu.mi_ptes = 1024;
1326		xkb->xkb_mmu.mi_ptesize = 4;
1327	}
1328#endif
1329}
1330
1331/*ARGSUSED*/
1332xkb_t *
1333xkb_open(const char *namelist, const char *corefile, const char *swapfile,
1334    int flag, const char *err)
1335{
1336	uintptr_t debug_info = DEBUG_INFO;
1337	struct stat64 corestat;
1338	xkb_t *xkb = NULL;
1339	size_t i;
1340
1341	if (stat64(corefile, &corestat) == -1)
1342		return (xkb_fail(xkb, "cannot stat %s", corefile));
1343
1344	if (flag != O_RDONLY)
1345		return (xkb_fail(xkb, "invalid open flags"));
1346
1347	xkb = mdb_zalloc(sizeof (*xkb), UM_SLEEP);
1348
1349	for (i = 0; i < 4; i++) {
1350		xkb->xkb_pt_map[i].mm_mfn = MFN_INVALID;
1351		xkb->xkb_pt_map[i].mm_map = (char *)MAP_FAILED;
1352	}
1353
1354	xkb->xkb_type = XKB_FORMAT_UNKNOWN;
1355	xkb->xkb_map.mm_mfn = MFN_INVALID;
1356	xkb->xkb_map.mm_map = (char *)MAP_FAILED;
1357	xkb->xkb_core.xc_p2m_buf = (char *)MAP_FAILED;
1358	xkb->xkb_fd = -1;
1359
1360	xkb->xkb_path = strdup(corefile);
1361
1362	if ((xkb = xkb_open_elf(xkb)) == NULL)
1363		return (NULL);
1364
1365	if (xkb->xkb_type == XKB_FORMAT_UNKNOWN) {
1366		if (!xkb_open_core(xkb))
1367			return (NULL);
1368	}
1369
1370	xkb_init_mmu(xkb);
1371
1372	if (!xkb_build_m2p(xkb))
1373		return (NULL);
1374
1375	if (xkb->xkb_is_hvm)
1376		debug_info = DEBUG_INFO_HVM;
1377
1378	if (xkb_read(xkb, debug_info, &xkb->xkb_info,
1379	    sizeof (xkb->xkb_info)) != sizeof (xkb->xkb_info))
1380		return (xkb_fail(xkb, "cannot read debug_info"));
1381
1382	if (xkb->xkb_info.di_magic != DEBUG_INFO_MAGIC) {
1383		return (xkb_fail(xkb, "invalid debug info magic %d",
1384		    xkb->xkb_info.di_magic));
1385	}
1386
1387	if (xkb->xkb_info.di_version != DEBUG_INFO_VERSION) {
1388		return (xkb_fail(xkb, "unknown debug info version %d",
1389		    xkb->xkb_info.di_version));
1390	}
1391
1392	if (!xkb_build_ksyms(xkb))
1393		return (xkb_fail(xkb, "cannot construct namelist"));
1394
1395	return (xkb);
1396}
1397
1398int
1399xkb_close(xkb_t *xkb)
1400{
1401	size_t i, sz;
1402
1403	if (xkb == NULL)
1404		return (0);
1405
1406	if (xkb->xkb_m2p != NULL) {
1407		mdb_free(xkb->xkb_m2p,
1408		    (xkb->xkb_max_mfn + 1) * sizeof (xen_pfn_t));
1409	}
1410
1411	if (xkb->xkb_pages != NULL) {
1412		(void) munmap((void *)xkb->xkb_pages,
1413		    PAGE_SIZE * xkb->xkb_nr_pages);
1414	} else {
1415		for (i = 0; i < 4; i++) {
1416			char *addr = xkb->xkb_pt_map[i].mm_map;
1417			if (addr != (char *)MAP_FAILED)
1418				(void) munmap((void *)addr, PAGE_SIZE);
1419		}
1420		if (xkb->xkb_map.mm_map != (char *)MAP_FAILED) {
1421			(void) munmap((void *)xkb->xkb_map.mm_map,
1422			    PAGE_SIZE);
1423		}
1424	}
1425
1426	if (xkb->xkb_namelist != NULL)
1427		mdb_free(xkb->xkb_namelist, xkb->xkb_namesize);
1428
1429	if (xkb->xkb_type == XKB_FORMAT_ELF) {
1430		xkb_elf_t *xe = &xkb->xkb_elf;
1431
1432		if (xe->xe_gelf != NULL)
1433			mdb_gelf_destroy(xe->xe_gelf);
1434
1435		sz = sizeof (xen_pfn_t) * (xkb->xkb_max_pfn + 1);
1436
1437		if (xkb->xkb_p2m != NULL)
1438			mdb_free(xkb->xkb_p2m, sz);
1439
1440		sz = sizeof (size_t) * (xkb->xkb_max_pfn + 1);
1441
1442		if (xe->xe_off != NULL)
1443			mdb_free(xe->xe_off, sz);
1444
1445	} else if (xkb->xkb_type == XKB_FORMAT_CORE) {
1446		xkb_core_t *xc = &xkb->xkb_core;
1447
1448		if (xkb->xkb_fd != -1)
1449			(void) close(xkb->xkb_fd);
1450
1451		sz = (xkb->xkb_nr_pages * sizeof (mfn_t)) + (PAGE_SIZE * 2);
1452		sz = PAGE_MASK(sz);
1453
1454		if (xc->xc_p2m_buf != (xen_pfn_t *)MAP_FAILED)
1455			(void) munmap(xc->xc_p2m_buf, sz);
1456
1457		if (xkb->xkb_vcpu_data != NULL)
1458			mdb_free(xkb->xkb_vcpu_data, xkb->xkb_vcpu_data_sz);
1459	}
1460
1461	if (xkb->xkb_vcpus != NULL) {
1462		sz = sizeof (struct vcpu_guest_context *) *
1463		    xkb->xkb_nr_vcpus;
1464		mdb_free(xkb->xkb_vcpus, sz);
1465	}
1466
1467	free(xkb->xkb_path);
1468
1469	mdb_free(xkb, sizeof (*xkb));
1470	return (0);
1471}
1472
1473/*ARGSUSED*/
1474static mdb_io_t *
1475xkb_sym_io(xkb_t *xkb, const char *symfile)
1476{
1477	mdb_io_t *io = mdb_memio_create(xkb->xkb_namelist, xkb->xkb_namesize);
1478
1479	if (io == NULL)
1480		mdb_warn("failed to create namelist from %s", xkb->xkb_path);
1481
1482	return (io);
1483}
1484
1485uint64_t
1486xkb_vtop(xkb_t *xkb, struct as *as, uintptr_t addr)
1487{
1488	mfn_t tlmfn = xkb_cr3_to_pfn(xkb);
1489	mfn_t mfn;
1490
1491	if (as != NULL && (tlmfn = xkb_as_to_mfn(xkb, as)) == MFN_INVALID)
1492		return (-1ULL);
1493
1494	mfn = xkb_va_to_mfn(xkb, addr, tlmfn);
1495
1496	if (mfn == MFN_INVALID || mfn > xkb->xkb_max_mfn)
1497		return (-1ULL);
1498
1499	return (((uint64_t)xkb->xkb_m2p[mfn] << PAGE_SHIFT)
1500	    | PAGE_OFFSET(addr));
1501}
1502
1503static int
1504xkb_getmregs(xkb_t *xkb, uint_t cpu, struct privmregs *mregs)
1505{
1506	struct vcpu_guest_context *vcpu;
1507	struct cpu_user_regs *ur;
1508	struct regs *regs;
1509
1510	if (cpu >= xkb->xkb_nr_vcpus) {
1511		errno = EINVAL;
1512		return (-1);
1513	}
1514
1515	bzero(mregs, sizeof (*mregs));
1516
1517	vcpu = xkb->xkb_vcpus[cpu];
1518	ur = &vcpu->user_regs;
1519	regs = &mregs->pm_gregs;
1520
1521	regs->r_ss = ur->ss;
1522	regs->r_cs = ur->cs;
1523	regs->r_ds = ur->ds;
1524	regs->r_es = ur->es;
1525	regs->r_fs = ur->fs;
1526	regs->r_gs = ur->gs;
1527	regs->r_trapno = ur->entry_vector;
1528	regs->r_err = ur->error_code;
1529#ifdef __amd64
1530	regs->r_savfp = ur->rbp;
1531	regs->r_savpc = ur->rip;
1532	regs->r_rdi = ur->rdi;
1533	regs->r_rsi = ur->rsi;
1534	regs->r_rdx = ur->rdx;
1535	regs->r_rcx = ur->rcx;
1536	regs->r_r8 = ur->r8;
1537	regs->r_r9 = ur->r9;
1538	regs->r_rax = ur->rax;
1539	regs->r_rbx = ur->rbx;
1540	regs->r_rbp = ur->rbp;
1541	regs->r_r10 = ur->r10;
1542	regs->r_r11 = ur->r11;
1543	regs->r_r12 = ur->r12;
1544	regs->r_r13 = ur->r13;
1545	regs->r_r14 = ur->r14;
1546	regs->r_r15 = ur->r15;
1547	regs->r_rip = ur->rip;
1548	regs->r_rfl = ur->rflags;
1549	regs->r_rsp = ur->rsp;
1550#else
1551	regs->r_savfp = ur->ebp;
1552	regs->r_savpc = ur->eip;
1553	regs->r_edi = ur->edi;
1554	regs->r_esi = ur->esi;
1555	regs->r_ebp = ur->ebp;
1556	regs->r_esp = ur->esp;
1557	regs->r_ebx = ur->ebx;
1558	regs->r_edx = ur->edx;
1559	regs->r_ecx = ur->ecx;
1560	regs->r_eax = ur->eax;
1561	regs->r_eip = ur->eip;
1562	regs->r_efl = ur->eflags;
1563	regs->r_uesp = 0;
1564#endif
1565
1566	bcopy(&vcpu->ctrlreg, &mregs->pm_cr, 8 * sizeof (ulong_t));
1567	bcopy(&vcpu->debugreg, &mregs->pm_dr, 8 * sizeof (ulong_t));
1568
1569	mregs->pm_flags = PM_GREGS | PM_CRREGS | PM_DRREGS;
1570
1571	return (0);
1572}
1573
1574static mdb_kb_ops_t xpv_kb_ops = {
1575	.kb_open = (void *(*)())xkb_open,
1576	.kb_close = (int (*)())xkb_close,
1577	.kb_sym_io = (mdb_io_t *(*)())xkb_sym_io,
1578	.kb_kread = (ssize_t (*)())xkb_read,
1579	.kb_kwrite = (ssize_t (*)())mdb_tgt_notsup,
1580	.kb_aread = (ssize_t (*)())xkb_aread,
1581	.kb_awrite = (ssize_t (*)())mdb_tgt_notsup,
1582	.kb_pread = (ssize_t (*)())xkb_pread,
1583	.kb_pwrite = (ssize_t (*)())mdb_tgt_notsup,
1584	.kb_vtop = (uint64_t (*)())xkb_vtop,
1585	.kb_getmregs = (int (*)())xkb_getmregs
1586};
1587
1588mdb_kb_ops_t *
1589mdb_kb_ops(void)
1590{
1591	return (&xpv_kb_ops);
1592}
1593
1594static const mdb_dcmd_t dcmds[] = { NULL, };
1595static const mdb_walker_t walkers[] = { NULL, };
1596static const mdb_modinfo_t modinfo = { MDB_API_VERSION, dcmds, walkers };
1597
1598const mdb_modinfo_t *
1599_mdb_init(void)
1600{
1601	return (&modinfo);
1602}
1603
1604void
1605_mdb_fini(void)
1606{
1607}
1608