1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License").  You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22/*
23 * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27/*
28 * Copyright (c) 2013, Joyent, Inc.  All rights reserved.
29 */
30
31#include <kvm.h>
32#include <stdio.h>
33#include <stdlib.h>
34#include <stdarg.h>
35#include <unistd.h>
36#include <limits.h>
37#include <fcntl.h>
38#include <strings.h>
39#include <errno.h>
40#include <sys/mem.h>
41#include <sys/stat.h>
42#include <sys/mman.h>
43#include <sys/dumphdr.h>
44#include <sys/sysmacros.h>
45
46struct _kvmd {
47	struct dumphdr	kvm_dump;
48	char		*kvm_debug;
49	int		kvm_openflag;
50	int		kvm_corefd;
51	int		kvm_kmemfd;
52	int		kvm_memfd;
53	size_t		kvm_coremapsize;
54	char		*kvm_core;
55	dump_map_t	*kvm_map;
56	pfn_t		*kvm_pfn;
57	struct as	*kvm_kas;
58	proc_t		*kvm_practive;
59	pid_t		kvm_pid;
60	char		kvm_namelist[MAXNAMELEN + 1];
61	boolean_t	kvm_namelist_core;
62	proc_t		kvm_proc;
63};
64
65#define	PREAD	(ssize_t (*)(int, void *, size_t, offset_t))pread64
66#define	PWRITE	(ssize_t (*)(int, void *, size_t, offset_t))pwrite64
67
68static int kvm_nlist_core(kvm_t *kd, struct nlist nl[], const char *err);
69
70static kvm_t *
71fail(kvm_t *kd, const char *err, const char *message, ...)
72{
73	va_list args;
74
75	va_start(args, message);
76	if (err || (kd && kd->kvm_debug)) {
77		(void) fprintf(stderr, "%s: ", err ? err : "KVM_DEBUG");
78		(void) vfprintf(stderr, message, args);
79		(void) fprintf(stderr, "\n");
80	}
81	va_end(args);
82	if (kd != NULL)
83		(void) kvm_close(kd);
84	return (NULL);
85}
86
87/*ARGSUSED*/
88kvm_t *
89kvm_open(const char *namelist, const char *corefile, const char *swapfile,
90	int flag, const char *err)
91{
92	kvm_t *kd;
93	struct stat64 memstat, kmemstat, allkmemstat, corestat;
94	struct nlist nl[3] = { { "kas" }, { "practive" }, { "" } };
95
96	if ((kd = calloc(1, sizeof (kvm_t))) == NULL)
97		return (fail(NULL, err, "cannot allocate space for kvm_t"));
98
99	kd->kvm_corefd = kd->kvm_kmemfd = kd->kvm_memfd = -1;
100	kd->kvm_debug = getenv("KVM_DEBUG");
101
102	if ((kd->kvm_openflag = flag) != O_RDONLY && flag != O_RDWR)
103		return (fail(kd, err, "illegal flag 0x%x to kvm_open()", flag));
104
105	if (corefile == NULL)
106		corefile = "/dev/kmem";
107
108	if (stat64(corefile, &corestat) == -1)
109		return (fail(kd, err, "cannot stat %s", corefile));
110
111	if (S_ISCHR(corestat.st_mode)) {
112		if (stat64("/dev/mem", &memstat) == -1)
113			return (fail(kd, err, "cannot stat /dev/mem"));
114
115		if (stat64("/dev/kmem", &kmemstat) == -1)
116			return (fail(kd, err, "cannot stat /dev/kmem"));
117
118		if (stat64("/dev/allkmem", &allkmemstat) == -1)
119			return (fail(kd, err, "cannot stat /dev/allkmem"));
120		if (corestat.st_rdev == memstat.st_rdev ||
121		    corestat.st_rdev == kmemstat.st_rdev ||
122		    corestat.st_rdev == allkmemstat.st_rdev) {
123			char *kmem = (corestat.st_rdev == allkmemstat.st_rdev ?
124			    "/dev/allkmem" : "/dev/kmem");
125
126			if ((kd->kvm_kmemfd = open64(kmem, flag)) == -1)
127				return (fail(kd, err, "cannot open %s", kmem));
128			if ((kd->kvm_memfd = open64("/dev/mem", flag)) == -1)
129				return (fail(kd, err, "cannot open /dev/mem"));
130		}
131	} else {
132		if ((kd->kvm_corefd = open64(corefile, flag)) == -1)
133			return (fail(kd, err, "cannot open %s", corefile));
134		if (pread64(kd->kvm_corefd, &kd->kvm_dump,
135		    sizeof (kd->kvm_dump), 0) != sizeof (kd->kvm_dump))
136			return (fail(kd, err, "cannot read dump header"));
137		if (kd->kvm_dump.dump_magic != DUMP_MAGIC)
138			return (fail(kd, err, "%s is not a kernel core file "
139			    "(bad magic number %x)", corefile,
140			    kd->kvm_dump.dump_magic));
141		if (kd->kvm_dump.dump_version != DUMP_VERSION)
142			return (fail(kd, err,
143			    "libkvm version (%u) != corefile version (%u)",
144			    DUMP_VERSION, kd->kvm_dump.dump_version));
145		if (kd->kvm_dump.dump_wordsize != DUMP_WORDSIZE)
146			return (fail(kd, err, "%s is a %d-bit core file - "
147			    "cannot examine with %d-bit libkvm", corefile,
148			    kd->kvm_dump.dump_wordsize, DUMP_WORDSIZE));
149		/*
150		 * We try to mmap(2) the entire corefile for performance
151		 * (so we can use bcopy(3C) rather than pread(2)).  Failing
152		 * that, we insist on at least mmap(2)ing the dump map.
153		 */
154		kd->kvm_coremapsize = (size_t)corestat.st_size;
155		if (corestat.st_size > LONG_MAX ||
156		    (kd->kvm_core = mmap64(0, kd->kvm_coremapsize,
157		    PROT_READ, MAP_SHARED, kd->kvm_corefd, 0)) == MAP_FAILED) {
158			kd->kvm_coremapsize = kd->kvm_dump.dump_data;
159			if ((kd->kvm_core = mmap64(0, kd->kvm_coremapsize,
160			    PROT_READ, MAP_SHARED, kd->kvm_corefd, 0)) ==
161			    MAP_FAILED)
162				return (fail(kd, err, "cannot mmap corefile"));
163		}
164		kd->kvm_map = (void *)(kd->kvm_core + kd->kvm_dump.dump_map);
165		kd->kvm_pfn = (void *)(kd->kvm_core + kd->kvm_dump.dump_pfn);
166	}
167
168	if (namelist == NULL)
169		namelist = "/dev/ksyms";
170
171	(void) strncpy(kd->kvm_namelist, namelist, MAXNAMELEN);
172
173	if (kvm_nlist(kd, nl) == -1) {
174		if (kd->kvm_corefd == -1) {
175			return (fail(kd, err, "%s is not a %d-bit "
176			    "kernel namelist", namelist, DUMP_WORDSIZE));
177		}
178
179		if (kvm_nlist_core(kd, nl, err) == -1)
180			return (NULL);		/* fail() already called */
181	}
182
183	kd->kvm_kas = (struct as *)nl[0].n_value;
184	kd->kvm_practive = (proc_t *)nl[1].n_value;
185
186	(void) kvm_setproc(kd);
187	return (kd);
188}
189
190int
191kvm_close(kvm_t *kd)
192{
193	if (kd->kvm_core != NULL && kd->kvm_core != MAP_FAILED)
194		(void) munmap(kd->kvm_core, kd->kvm_coremapsize);
195	if (kd->kvm_corefd != -1)
196		(void) close(kd->kvm_corefd);
197	if (kd->kvm_kmemfd != -1)
198		(void) close(kd->kvm_kmemfd);
199	if (kd->kvm_memfd != -1)
200		(void) close(kd->kvm_memfd);
201	if (kd->kvm_namelist_core)
202		(void) unlink(kd->kvm_namelist);
203	free(kd);
204	return (0);
205}
206
207const char *
208kvm_namelist(kvm_t *kd)
209{
210	return (kd->kvm_namelist);
211}
212
213int
214kvm_nlist(kvm_t *kd, struct nlist nl[])
215{
216	return (nlist(kd->kvm_namelist, nl));
217}
218
219/*
220 * If we don't have a name list, try to dig it out of the kernel crash dump.
221 * (The symbols have been present in the dump, uncompressed, for nearly a
222 * decade as of this writing -- and it is frankly surprising that the archaic
223 * notion of a disjoint symbol table managed to survive that change.)
224 */
225static int
226kvm_nlist_core(kvm_t *kd, struct nlist nl[], const char *err)
227{
228	dumphdr_t *dump = &kd->kvm_dump;
229	char *msg = "couldn't extract symbols from dump";
230	char *template = "/tmp/.libkvm.kvm_nlist_core.pid%d.XXXXXX";
231	int fd, rval;
232
233	if (dump->dump_ksyms_size != dump->dump_ksyms_csize) {
234		(void) fail(kd, err, "%s: kernel symbols are compressed", msg);
235		return (-1);
236	}
237
238	if (dump->dump_ksyms + dump->dump_ksyms_size > kd->kvm_coremapsize) {
239		(void) fail(kd, err, "%s: kernel symbols not mapped", msg);
240		return (-1);
241	}
242
243	/*
244	 * Beause this temporary file may be left as a turd if the caller
245	 * does not properly call kvm_close(), we make sure that it clearly
246	 * indicates its origins.
247	 */
248	(void) snprintf(kd->kvm_namelist, MAXNAMELEN, template, getpid());
249
250	if ((fd = mkstemp(kd->kvm_namelist)) == -1) {
251		(void) fail(kd, err, "%s: couldn't create temporary "
252		    "symbols file: %s", msg, strerror(errno));
253		return (-1);
254	}
255
256	kd->kvm_namelist_core = B_TRUE;
257
258	do {
259		rval = write(fd, (caddr_t)((uintptr_t)kd->kvm_core +
260		    (uintptr_t)dump->dump_ksyms), dump->dump_ksyms_size);
261	} while (rval < dump->dump_ksyms_size && errno == EINTR);
262
263	if (rval < dump->dump_ksyms_size) {
264		(void) fail(kd, err, "%s: couldn't write to temporary "
265		    "symbols file: %s", msg, strerror(errno));
266		(void) close(fd);
267		return (-1);
268	}
269
270	(void) close(fd);
271
272	if (kvm_nlist(kd, nl) == -1) {
273		(void) fail(kd, err, "%s: symbols not valid", msg);
274		return (-1);
275	}
276
277	return (0);
278}
279
280static offset_t
281kvm_lookup(kvm_t *kd, struct as *as, uint64_t addr)
282{
283	uintptr_t pageoff = addr & (kd->kvm_dump.dump_pagesize - 1);
284	uint64_t page = addr - pageoff;
285	offset_t off = 0;
286
287	if (kd->kvm_debug)
288		fprintf(stderr, "kvm_lookup(%p, %llx):", (void *)as, addr);
289
290	if (as == NULL) {		/* physical addressing mode */
291		long first = 0;
292		long last = kd->kvm_dump.dump_npages - 1;
293		pfn_t target = (pfn_t)(page >> kd->kvm_dump.dump_pageshift);
294		while (last >= first) {
295			long middle = (first + last) / 2;
296			pfn_t pfn = kd->kvm_pfn[middle];
297			if (kd->kvm_debug)
298				fprintf(stderr, " %ld ->", middle);
299			if (pfn == target) {
300				off = kd->kvm_dump.dump_data + pageoff +
301				    ((uint64_t)middle <<
302				    kd->kvm_dump.dump_pageshift);
303				break;
304			}
305			if (pfn < target)
306				first = middle + 1;
307			else
308				last = middle - 1;
309		}
310	} else {
311		long hash = DUMP_HASH(&kd->kvm_dump, as, page);
312		off = kd->kvm_map[hash].dm_first;
313		while (off != 0) {
314			dump_map_t *dmp = (void *)(kd->kvm_core + off);
315			if (kd->kvm_debug)
316				fprintf(stderr, " %llx ->", off);
317			if (dmp < kd->kvm_map ||
318			    dmp > kd->kvm_map + kd->kvm_dump.dump_hashmask ||
319			    (off & (sizeof (offset_t) - 1)) != 0 ||
320			    DUMP_HASH(&kd->kvm_dump, dmp->dm_as, dmp->dm_va) !=
321			    hash) {
322				if (kd->kvm_debug)
323					fprintf(stderr, " dump map corrupt\n");
324				return (0);
325			}
326			if (dmp->dm_va == page && dmp->dm_as == as) {
327				off = dmp->dm_data + pageoff;
328				break;
329			}
330			off = dmp->dm_next;
331		}
332	}
333	if (kd->kvm_debug)
334		fprintf(stderr, "%s found: %llx\n", off ? "" : " not", off);
335	return (off);
336}
337
338static ssize_t
339kvm_rw(kvm_t *kd, uint64_t addr, void *buf, size_t size,
340	struct as *as, ssize_t (*prw)(int, void *, size_t, offset_t))
341{
342	offset_t off;
343	size_t resid = size;
344
345	/*
346	 * read/write of zero bytes always succeeds
347	 */
348	if (size == 0)
349		return (0);
350
351	if (kd->kvm_core == NULL) {
352		char procbuf[100];
353		int procfd;
354		ssize_t rval;
355
356		if (as == kd->kvm_kas)
357			return (prw(kd->kvm_kmemfd, buf, size, addr));
358		if (as == NULL)
359			return (prw(kd->kvm_memfd, buf, size, addr));
360
361		(void) sprintf(procbuf, "/proc/%ld/as", kd->kvm_pid);
362		if ((procfd = open64(procbuf, kd->kvm_openflag)) == -1)
363			return (-1);
364		rval = prw(procfd, buf, size, addr);
365		(void) close(procfd);
366		return (rval);
367	}
368
369	while (resid != 0) {
370		uintptr_t pageoff = addr & (kd->kvm_dump.dump_pagesize - 1);
371		ssize_t len = MIN(resid, kd->kvm_dump.dump_pagesize - pageoff);
372
373		if ((off = kvm_lookup(kd, as, addr)) == 0)
374			break;
375
376		if (prw == PREAD && off < kd->kvm_coremapsize)
377			bcopy(kd->kvm_core + off, buf, len);
378		else if ((len = prw(kd->kvm_corefd, buf, len, off)) <= 0)
379			break;
380		resid -= len;
381		addr += len;
382		buf = (char *)buf + len;
383	}
384	return (resid < size ? size - resid : -1);
385}
386
387ssize_t
388kvm_read(kvm_t *kd, uintptr_t addr, void *buf, size_t size)
389{
390	return (kvm_rw(kd, addr, buf, size, kd->kvm_kas, PREAD));
391}
392
393ssize_t
394kvm_kread(kvm_t *kd, uintptr_t addr, void *buf, size_t size)
395{
396	return (kvm_rw(kd, addr, buf, size, kd->kvm_kas, PREAD));
397}
398
399ssize_t
400kvm_uread(kvm_t *kd, uintptr_t addr, void *buf, size_t size)
401{
402	return (kvm_rw(kd, addr, buf, size, kd->kvm_proc.p_as, PREAD));
403}
404
405ssize_t
406kvm_aread(kvm_t *kd, uintptr_t addr, void *buf, size_t size, struct as *as)
407{
408	return (kvm_rw(kd, addr, buf, size, as, PREAD));
409}
410
411ssize_t
412kvm_pread(kvm_t *kd, uint64_t addr, void *buf, size_t size)
413{
414	return (kvm_rw(kd, addr, buf, size, NULL, PREAD));
415}
416
417ssize_t
418kvm_write(kvm_t *kd, uintptr_t addr, const void *buf, size_t size)
419{
420	return (kvm_rw(kd, addr, (void *)buf, size, kd->kvm_kas, PWRITE));
421}
422
423ssize_t
424kvm_kwrite(kvm_t *kd, uintptr_t addr, const void *buf, size_t size)
425{
426	return (kvm_rw(kd, addr, (void *)buf, size, kd->kvm_kas, PWRITE));
427}
428
429ssize_t
430kvm_uwrite(kvm_t *kd, uintptr_t addr, const void *buf, size_t size)
431{
432	return (kvm_rw(kd, addr, (void *)buf, size, kd->kvm_proc.p_as, PWRITE));
433}
434
435ssize_t
436kvm_awrite(kvm_t *kd, uintptr_t addr, const void *buf, size_t size,
437    struct as *as)
438{
439	return (kvm_rw(kd, addr, (void *)buf, size, as, PWRITE));
440}
441
442ssize_t
443kvm_pwrite(kvm_t *kd, uint64_t addr, const void *buf, size_t size)
444{
445	return (kvm_rw(kd, addr, (void *)buf, size, NULL, PWRITE));
446}
447
448uint64_t
449kvm_physaddr(kvm_t *kd, struct as *as, uintptr_t addr)
450{
451	mem_vtop_t mem_vtop;
452	offset_t off;
453
454	if (kd->kvm_core == NULL) {
455		mem_vtop.m_as = as;
456		mem_vtop.m_va = (void *)addr;
457		if (ioctl(kd->kvm_kmemfd, MEM_VTOP, &mem_vtop) == 0)
458			return ((uint64_t)mem_vtop.m_pfn * getpagesize() +
459			    (addr & (getpagesize() - 1)));
460	} else {
461		if ((off = kvm_lookup(kd, as, addr)) != 0) {
462			long pfn_index =
463			    (u_offset_t)(off - kd->kvm_dump.dump_data) >>
464			    kd->kvm_dump.dump_pageshift;
465			return (((uint64_t)kd->kvm_pfn[pfn_index] <<
466			    kd->kvm_dump.dump_pageshift) +
467			    (addr & (kd->kvm_dump.dump_pagesize - 1)));
468		}
469	}
470	return (-1ULL);
471}
472
473struct proc *
474kvm_getproc(kvm_t *kd, pid_t pid)
475{
476	(void) kvm_setproc(kd);
477	while (kvm_nextproc(kd) != NULL)
478		if (kd->kvm_pid == pid)
479			return (&kd->kvm_proc);
480	return (NULL);
481}
482
483struct proc *
484kvm_nextproc(kvm_t *kd)
485{
486	if (kd->kvm_proc.p_next == NULL ||
487	    kvm_kread(kd, (uintptr_t)kd->kvm_proc.p_next,
488	    &kd->kvm_proc, sizeof (proc_t)) != sizeof (proc_t) ||
489	    kvm_kread(kd, (uintptr_t)&kd->kvm_proc.p_pidp->pid_id,
490	    &kd->kvm_pid, sizeof (pid_t)) != sizeof (pid_t))
491		return (NULL);
492
493	return (&kd->kvm_proc);
494}
495
496int
497kvm_setproc(kvm_t *kd)
498{
499	(void) kvm_kread(kd, (uintptr_t)kd->kvm_practive,
500	    &kd->kvm_proc.p_next, sizeof (proc_t *));
501	kd->kvm_pid = -1;
502	return (0);
503}
504
505/*ARGSUSED*/
506struct user *
507kvm_getu(kvm_t *kd, struct proc *p)
508{
509	return (&p->p_user);
510}
511