xref: /illumos-gate/usr/src/uts/intel/dtrace/sdt.c (revision 86ef0a63)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*
27  * Copyright (c) 2012, Joyent, Inc. All rights reserved.
28  * Copyright (c) 2013, 2014 by Delphix. All rights reserved.
29  */
30 
31 #include <sys/modctl.h>
32 #include <sys/sunddi.h>
33 #include <sys/dtrace.h>
34 #include <sys/kobj.h>
35 #include <sys/stat.h>
36 #include <sys/conf.h>
37 #include <vm/seg_kmem.h>
38 #include <sys/stack.h>
39 #include <sys/frame.h>
40 #include <sys/dtrace_impl.h>
41 #include <sys/cmn_err.h>
42 #include <sys/sysmacros.h>
43 #include <sys/privregs.h>
44 #include <sys/sdt_impl.h>
45 
46 #define	SDT_PATCHVAL	0xf0
47 #define	SDT_ADDR2NDX(addr)	((((uintptr_t)(addr)) >> 4) & sdt_probetab_mask)
48 #define	SDT_PROBETAB_SIZE	0x1000		/* 4k entries -- 16K total */
49 
50 static dev_info_t		*sdt_devi;
51 static int			sdt_verbose = 0;
52 static sdt_probe_t		**sdt_probetab;
53 static int			sdt_probetab_size;
54 static int			sdt_probetab_mask;
55 
56 /*ARGSUSED*/
57 static int
sdt_invop(uintptr_t addr,uintptr_t * stack,uintptr_t eax)58 sdt_invop(uintptr_t addr, uintptr_t *stack, uintptr_t eax)
59 {
60 	uintptr_t stack0, stack1, stack2, stack3, stack4;
61 	int i = 0;
62 	sdt_probe_t *sdt = sdt_probetab[SDT_ADDR2NDX(addr)];
63 
64 	/*
65 	 * On amd64, stack[0] contains the dereferenced stack pointer,
66 	 * stack[1] contains savfp, stack[2] contains savpc.  We want
67 	 * to step over these entries.
68 	 */
69 	i += 3;
70 
71 	for (; sdt != NULL; sdt = sdt->sdp_hashnext) {
72 		if ((uintptr_t)sdt->sdp_patchpoint == addr) {
73 			/*
74 			 * When accessing the arguments on the stack, we must
75 			 * protect against accessing beyond the stack.  We can
76 			 * safely set NOFAULT here -- we know that interrupts
77 			 * are already disabled.
78 			 */
79 			DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
80 			stack0 = stack[i++];
81 			stack1 = stack[i++];
82 			stack2 = stack[i++];
83 			stack3 = stack[i++];
84 			stack4 = stack[i++];
85 			DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT |
86 			    CPU_DTRACE_BADADDR);
87 
88 			dtrace_probe(sdt->sdp_id, stack0, stack1,
89 			    stack2, stack3, stack4);
90 
91 			return (DTRACE_INVOP_NOP);
92 		}
93 	}
94 
95 	return (0);
96 }
97 
98 /*ARGSUSED*/
99 static void
sdt_provide_module(void * arg,struct modctl * ctl)100 sdt_provide_module(void *arg, struct modctl *ctl)
101 {
102 	struct module *mp = ctl->mod_mp;
103 	char *modname = ctl->mod_modname;
104 	sdt_probedesc_t *sdpd;
105 	sdt_probe_t *sdp, *old;
106 	sdt_provider_t *prov;
107 	int len;
108 
109 	/*
110 	 * One for all, and all for one:  if we haven't yet registered all of
111 	 * our providers, we'll refuse to provide anything.
112 	 */
113 	for (prov = sdt_providers; prov->sdtp_name != NULL; prov++) {
114 		if (prov->sdtp_id == DTRACE_PROVNONE)
115 			return;
116 	}
117 
118 	if (mp->sdt_nprobes != 0 || (sdpd = mp->sdt_probes) == NULL)
119 		return;
120 
121 	for (sdpd = mp->sdt_probes; sdpd != NULL; sdpd = sdpd->sdpd_next) {
122 		char *name = sdpd->sdpd_name, *func, *nname;
123 		int i, j;
124 		sdt_provider_t *prov;
125 		ulong_t offs;
126 		dtrace_id_t id;
127 
128 		for (prov = sdt_providers; prov->sdtp_prefix != NULL; prov++) {
129 			char *prefix = prov->sdtp_prefix;
130 
131 			if (strncmp(name, prefix, strlen(prefix)) == 0) {
132 				name += strlen(prefix);
133 				break;
134 			}
135 		}
136 
137 		nname = kmem_alloc(len = strlen(name) + 1, KM_SLEEP);
138 
139 		for (i = 0, j = 0; name[j] != '\0'; i++) {
140 			if (name[j] == '_' && name[j + 1] == '_') {
141 				nname[i] = '-';
142 				j += 2;
143 			} else {
144 				nname[i] = name[j++];
145 			}
146 		}
147 
148 		nname[i] = '\0';
149 
150 		sdp = kmem_zalloc(sizeof (sdt_probe_t), KM_SLEEP);
151 		sdp->sdp_loadcnt = ctl->mod_loadcnt;
152 		sdp->sdp_ctl = ctl;
153 		sdp->sdp_name = nname;
154 		sdp->sdp_namelen = len;
155 		sdp->sdp_provider = prov;
156 
157 		func = kobj_searchsym(mp, sdpd->sdpd_offset, &offs);
158 
159 		if (func == NULL)
160 			func = "<unknown>";
161 
162 		/*
163 		 * We have our provider.  Now create the probe.
164 		 */
165 		if ((id = dtrace_probe_lookup(prov->sdtp_id, modname,
166 		    func, nname)) != DTRACE_IDNONE) {
167 			old = dtrace_probe_arg(prov->sdtp_id, id);
168 			ASSERT(old != NULL);
169 
170 			sdp->sdp_next = old->sdp_next;
171 			sdp->sdp_id = id;
172 			old->sdp_next = sdp;
173 		} else {
174 			sdp->sdp_id = dtrace_probe_create(prov->sdtp_id,
175 			    modname, func, nname, 3, sdp);
176 
177 			mp->sdt_nprobes++;
178 		}
179 
180 		sdp->sdp_hashnext =
181 		    sdt_probetab[SDT_ADDR2NDX(sdpd->sdpd_offset)];
182 		sdt_probetab[SDT_ADDR2NDX(sdpd->sdpd_offset)] = sdp;
183 
184 		sdp->sdp_patchval = SDT_PATCHVAL;
185 		sdp->sdp_patchpoint = (uint8_t *)sdpd->sdpd_offset;
186 		sdp->sdp_savedval = *sdp->sdp_patchpoint;
187 	}
188 }
189 
190 /*ARGSUSED*/
191 static void
sdt_destroy(void * arg,dtrace_id_t id,void * parg)192 sdt_destroy(void *arg, dtrace_id_t id, void *parg)
193 {
194 	sdt_probe_t *sdp = parg, *old, *last, *hash;
195 	struct modctl *ctl = sdp->sdp_ctl;
196 	int ndx;
197 
198 	if (ctl != NULL && ctl->mod_loadcnt == sdp->sdp_loadcnt) {
199 		if ((ctl->mod_loadcnt == sdp->sdp_loadcnt &&
200 		    ctl->mod_loaded)) {
201 			((struct module *)(ctl->mod_mp))->sdt_nprobes--;
202 		}
203 	}
204 
205 	while (sdp != NULL) {
206 		old = sdp;
207 
208 		/*
209 		 * Now we need to remove this probe from the sdt_probetab.
210 		 */
211 		ndx = SDT_ADDR2NDX(sdp->sdp_patchpoint);
212 		last = NULL;
213 		hash = sdt_probetab[ndx];
214 
215 		while (hash != sdp) {
216 			ASSERT(hash != NULL);
217 			last = hash;
218 			hash = hash->sdp_hashnext;
219 		}
220 
221 		if (last != NULL) {
222 			last->sdp_hashnext = sdp->sdp_hashnext;
223 		} else {
224 			sdt_probetab[ndx] = sdp->sdp_hashnext;
225 		}
226 
227 		kmem_free(sdp->sdp_name, sdp->sdp_namelen);
228 		sdp = sdp->sdp_next;
229 		kmem_free(old, sizeof (sdt_probe_t));
230 	}
231 }
232 
233 /*ARGSUSED*/
234 static int
sdt_enable(void * arg,dtrace_id_t id,void * parg)235 sdt_enable(void *arg, dtrace_id_t id, void *parg)
236 {
237 	sdt_probe_t *sdp = parg;
238 	struct modctl *ctl = sdp->sdp_ctl;
239 
240 	ctl->mod_nenabled++;
241 
242 	/*
243 	 * If this module has disappeared since we discovered its probes,
244 	 * refuse to enable it.
245 	 */
246 	if (!ctl->mod_loaded) {
247 		if (sdt_verbose) {
248 			cmn_err(CE_NOTE, "sdt is failing for probe %s "
249 			    "(module %s unloaded)",
250 			    sdp->sdp_name, ctl->mod_modname);
251 		}
252 		goto err;
253 	}
254 
255 	/*
256 	 * Now check that our modctl has the expected load count.  If it
257 	 * doesn't, this module must have been unloaded and reloaded -- and
258 	 * we're not going to touch it.
259 	 */
260 	if (ctl->mod_loadcnt != sdp->sdp_loadcnt) {
261 		if (sdt_verbose) {
262 			cmn_err(CE_NOTE, "sdt is failing for probe %s "
263 			    "(module %s reloaded)",
264 			    sdp->sdp_name, ctl->mod_modname);
265 		}
266 		goto err;
267 	}
268 
269 	while (sdp != NULL) {
270 		*sdp->sdp_patchpoint = sdp->sdp_patchval;
271 		sdp = sdp->sdp_next;
272 	}
273 err:
274 	return (0);
275 }
276 
277 /*ARGSUSED*/
278 static void
sdt_disable(void * arg,dtrace_id_t id,void * parg)279 sdt_disable(void *arg, dtrace_id_t id, void *parg)
280 {
281 	sdt_probe_t *sdp = parg;
282 	struct modctl *ctl = sdp->sdp_ctl;
283 
284 	ctl->mod_nenabled--;
285 
286 	if (!ctl->mod_loaded || ctl->mod_loadcnt != sdp->sdp_loadcnt)
287 		goto err;
288 
289 	while (sdp != NULL) {
290 		*sdp->sdp_patchpoint = sdp->sdp_savedval;
291 		sdp = sdp->sdp_next;
292 	}
293 
294 err:
295 	;
296 }
297 
298 /*ARGSUSED*/
299 uint64_t
sdt_getarg(void * arg,dtrace_id_t id,void * parg,int argno,int aframes)300 sdt_getarg(void *arg, dtrace_id_t id, void *parg, int argno, int aframes)
301 {
302 	uintptr_t val;
303 	struct frame *fp = (struct frame *)dtrace_getfp();
304 	uintptr_t *stack;
305 	int i;
306 	/*
307 	 * A total of 6 arguments are passed via registers; any argument with
308 	 * index of 5 or lower is therefore in a register.
309 	 */
310 	int inreg = 5;
311 
312 	for (i = 1; i <= aframes; i++) {
313 		fp = (struct frame *)(fp->fr_savfp);
314 
315 		if (fp->fr_savpc == (pc_t)dtrace_invop_callsite) {
316 			/*
317 			 * In the case of amd64, we will use the pointer to the
318 			 * regs structure that was pushed when we took the
319 			 * trap.  To get this structure, we must increment
320 			 * beyond the frame structure, the calling RIP, and
321 			 * padding stored in dtrace_invop().  If the argument
322 			 * that we're seeking is passed on the stack, we'll
323 			 * pull the true stack pointer out of the saved
324 			 * registers and decrement our argument by the number
325 			 * of arguments passed in registers; if the argument
326 			 * we're seeking is passed in regsiters, we can just
327 			 * load it directly.
328 			 */
329 			struct regs *rp = (struct regs *)((uintptr_t)&fp[1] +
330 			    sizeof (uintptr_t) * 2);
331 
332 			if (argno <= inreg) {
333 				stack = (uintptr_t *)&rp->r_rdi;
334 			} else {
335 				stack = (uintptr_t *)(rp->r_rsp);
336 				argno -= (inreg + 1);
337 			}
338 			goto load;
339 		}
340 	}
341 
342 	/*
343 	 * We know that we did not come through a trap to get into
344 	 * dtrace_probe() -- the provider simply called dtrace_probe()
345 	 * directly.  As this is the case, we need to shift the argument
346 	 * that we're looking for:  the probe ID is the first argument to
347 	 * dtrace_probe(), so the argument n will actually be found where
348 	 * one would expect to find argument (n + 1).
349 	 */
350 	argno++;
351 
352 	if (argno <= inreg) {
353 		/*
354 		 * This shouldn't happen.  If the argument is passed in a
355 		 * register then it should have been, well, passed in a
356 		 * register...
357 		 */
358 		DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
359 		return (0);
360 	}
361 
362 	argno -= (inreg + 1);
363 	stack = (uintptr_t *)&fp[1];
364 
365 load:
366 	DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
367 	val = stack[argno];
368 	DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
369 
370 	return (val);
371 }
372 
373 static dtrace_pops_t sdt_pops = {
374 	NULL,
375 	sdt_provide_module,
376 	sdt_enable,
377 	sdt_disable,
378 	NULL,
379 	NULL,
380 	sdt_getargdesc,
381 	sdt_getarg,
382 	NULL,
383 	sdt_destroy
384 };
385 
386 /*ARGSUSED*/
387 static int
sdt_attach(dev_info_t * devi,ddi_attach_cmd_t cmd)388 sdt_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
389 {
390 	sdt_provider_t *prov;
391 
392 	if (ddi_create_minor_node(devi, "sdt", S_IFCHR,
393 	    0, DDI_PSEUDO, 0) == DDI_FAILURE) {
394 		cmn_err(CE_NOTE, "/dev/sdt couldn't create minor node");
395 		ddi_remove_minor_node(devi, NULL);
396 		return (DDI_FAILURE);
397 	}
398 
399 	ddi_report_dev(devi);
400 	sdt_devi = devi;
401 
402 	if (sdt_probetab_size == 0)
403 		sdt_probetab_size = SDT_PROBETAB_SIZE;
404 
405 	sdt_probetab_mask = sdt_probetab_size - 1;
406 	sdt_probetab =
407 	    kmem_zalloc(sdt_probetab_size * sizeof (sdt_probe_t *), KM_SLEEP);
408 	dtrace_invop_add(sdt_invop);
409 
410 	for (prov = sdt_providers; prov->sdtp_name != NULL; prov++) {
411 		uint32_t priv;
412 
413 		if (prov->sdtp_priv == DTRACE_PRIV_NONE) {
414 			priv = DTRACE_PRIV_KERNEL;
415 			sdt_pops.dtps_mode = NULL;
416 		} else {
417 			priv = prov->sdtp_priv;
418 			ASSERT(priv == DTRACE_PRIV_USER);
419 			sdt_pops.dtps_mode = sdt_mode;
420 		}
421 
422 		if (dtrace_register(prov->sdtp_name, prov->sdtp_attr,
423 		    priv, NULL, &sdt_pops, prov, &prov->sdtp_id) != 0) {
424 			cmn_err(CE_WARN, "failed to register sdt provider %s",
425 			    prov->sdtp_name);
426 		}
427 	}
428 
429 	return (DDI_SUCCESS);
430 }
431 
432 /*ARGSUSED*/
433 static int
sdt_detach(dev_info_t * dip,ddi_detach_cmd_t cmd)434 sdt_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
435 {
436 	sdt_provider_t *prov;
437 
438 	switch (cmd) {
439 	case DDI_DETACH:
440 		break;
441 
442 	case DDI_SUSPEND:
443 		return (DDI_SUCCESS);
444 
445 	default:
446 		return (DDI_FAILURE);
447 	}
448 
449 	for (prov = sdt_providers; prov->sdtp_name != NULL; prov++) {
450 		if (prov->sdtp_id != DTRACE_PROVNONE) {
451 			if (dtrace_unregister(prov->sdtp_id) != 0)
452 				return (DDI_FAILURE);
453 
454 			prov->sdtp_id = DTRACE_PROVNONE;
455 		}
456 	}
457 
458 	dtrace_invop_remove(sdt_invop);
459 	kmem_free(sdt_probetab, sdt_probetab_size * sizeof (sdt_probe_t *));
460 
461 	return (DDI_SUCCESS);
462 }
463 
464 /*ARGSUSED*/
465 static int
sdt_info(dev_info_t * dip,ddi_info_cmd_t infocmd,void * arg,void ** result)466 sdt_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
467 {
468 	int error;
469 
470 	switch (infocmd) {
471 	case DDI_INFO_DEVT2DEVINFO:
472 		*result = (void *)sdt_devi;
473 		error = DDI_SUCCESS;
474 		break;
475 	case DDI_INFO_DEVT2INSTANCE:
476 		*result = (void *)0;
477 		error = DDI_SUCCESS;
478 		break;
479 	default:
480 		error = DDI_FAILURE;
481 	}
482 	return (error);
483 }
484 
485 /*ARGSUSED*/
486 static int
sdt_open(dev_t * devp,int flag,int otyp,cred_t * cred_p)487 sdt_open(dev_t *devp, int flag, int otyp, cred_t *cred_p)
488 {
489 	return (0);
490 }
491 
492 static struct cb_ops sdt_cb_ops = {
493 	sdt_open,		/* open */
494 	nodev,			/* close */
495 	nulldev,		/* strategy */
496 	nulldev,		/* print */
497 	nodev,			/* dump */
498 	nodev,			/* read */
499 	nodev,			/* write */
500 	nodev,			/* ioctl */
501 	nodev,			/* devmap */
502 	nodev,			/* mmap */
503 	nodev,			/* segmap */
504 	nochpoll,		/* poll */
505 	ddi_prop_op,		/* cb_prop_op */
506 	0,			/* streamtab  */
507 	D_NEW | D_MP		/* Driver compatibility flag */
508 };
509 
510 static struct dev_ops sdt_ops = {
511 	DEVO_REV,		/* devo_rev, */
512 	0,			/* refcnt  */
513 	sdt_info,		/* get_dev_info */
514 	nulldev,		/* identify */
515 	nulldev,		/* probe */
516 	sdt_attach,		/* attach */
517 	sdt_detach,		/* detach */
518 	nodev,			/* reset */
519 	&sdt_cb_ops,		/* driver operations */
520 	NULL,			/* bus operations */
521 	nodev,			/* dev power */
522 	ddi_quiesce_not_needed,		/* quiesce */
523 };
524 
525 /*
526  * Module linkage information for the kernel.
527  */
528 static struct modldrv modldrv = {
529 	&mod_driverops,		/* module type (this is a pseudo driver) */
530 	"Statically Defined Tracing",	/* name of module */
531 	&sdt_ops,		/* driver ops */
532 };
533 
534 static struct modlinkage modlinkage = {
535 	MODREV_1,
536 	(void *)&modldrv,
537 	NULL
538 };
539 
540 int
_init(void)541 _init(void)
542 {
543 	return (mod_install(&modlinkage));
544 }
545 
546 int
_info(struct modinfo * modinfop)547 _info(struct modinfo *modinfop)
548 {
549 	return (mod_info(&modlinkage, modinfop));
550 }
551 
552 int
_fini(void)553 _fini(void)
554 {
555 	return (mod_remove(&modlinkage));
556 }
557