/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ /* * Copyright (c) 2012, Joyent, Inc. All rights reserved. * Copyright (c) 2013, 2014 by Delphix. All rights reserved. * Copyright 2024 Oxide Computer Company */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define SDT_PATCHVAL 0xf0 #define SDT_ADDR2NDX(addr) ((((uintptr_t)(addr)) >> 4) & sdt_probetab_mask) #define SDT_PROBETAB_SIZE 0x1000 /* 4k entries -- 16K total */ static dev_info_t *sdt_devi; static int sdt_verbose = 0; static sdt_probe_t **sdt_probetab; static int sdt_probetab_size; static int sdt_probetab_mask; /*ARGSUSED*/ static int sdt_invop(uintptr_t addr, uintptr_t *stack, uintptr_t eax) { uintptr_t stack0, stack1, stack2, stack3, stack4; int i = 0; sdt_probe_t *sdt = sdt_probetab[SDT_ADDR2NDX(addr)]; /* * On amd64, stack[0] contains the dereferenced stack pointer, * stack[1] contains savfp, stack[2] contains savpc. We want * to step over these entries. */ i += 3; for (; sdt != NULL; sdt = sdt->sdp_hashnext) { if ((uintptr_t)sdt->sdp_patchpoint == addr) { /* * When accessing the arguments on the stack, we must * protect against accessing beyond the stack. We can * safely set NOFAULT here -- we know that interrupts * are already disabled. */ DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); stack0 = stack[i++]; stack1 = stack[i++]; stack2 = stack[i++]; stack3 = stack[i++]; stack4 = stack[i++]; DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | CPU_DTRACE_BADADDR); dtrace_probe(sdt->sdp_id, stack0, stack1, stack2, stack3, stack4); return (DTRACE_INVOP_NOP); } } return (0); } /*ARGSUSED*/ static void sdt_provide_module(void *arg, struct modctl *ctl) { struct module *mp = ctl->mod_mp; char *modname = ctl->mod_modname; sdt_probedesc_t *sdpd; sdt_probe_t *sdp, *old; sdt_provider_t *prov; int len; /* * One for all, and all for one: if we haven't yet registered all of * our providers, we'll refuse to provide anything. */ for (prov = sdt_providers; prov->sdtp_name != NULL; prov++) { if (prov->sdtp_id == DTRACE_PROVNONE) return; } if (mp->sdt_nprobes != 0 || (sdpd = mp->sdt_probes) == NULL) return; for (sdpd = mp->sdt_probes; sdpd != NULL; sdpd = sdpd->sdpd_next) { char *name = sdpd->sdpd_name, *func, *nname; int i, j; sdt_provider_t *prov; ulong_t offs; dtrace_id_t id; for (prov = sdt_providers; prov->sdtp_prefix != NULL; prov++) { char *prefix = prov->sdtp_prefix; if (strncmp(name, prefix, strlen(prefix)) == 0) { name += strlen(prefix); break; } } nname = kmem_alloc(len = strlen(name) + 1, KM_SLEEP); for (i = 0, j = 0; name[j] != '\0'; i++) { if (name[j] == '_' && name[j + 1] == '_') { nname[i] = '-'; j += 2; } else { nname[i] = name[j++]; } } nname[i] = '\0'; sdp = kmem_zalloc(sizeof (sdt_probe_t), KM_SLEEP); sdp->sdp_loadcnt = ctl->mod_loadcnt; sdp->sdp_ctl = ctl; sdp->sdp_name = nname; sdp->sdp_namelen = len; sdp->sdp_provider = prov; func = kobj_searchsym(mp, sdpd->sdpd_offset, &offs); if (func == NULL) func = ""; /* * We have our provider. Now create the probe. */ if ((id = dtrace_probe_lookup(prov->sdtp_id, modname, func, nname)) != DTRACE_IDNONE) { old = dtrace_probe_arg(prov->sdtp_id, id); ASSERT(old != NULL); sdp->sdp_next = old->sdp_next; sdp->sdp_id = id; old->sdp_next = sdp; } else { sdp->sdp_id = dtrace_probe_create(prov->sdtp_id, modname, func, nname, 3, sdp); mp->sdt_nprobes++; } sdp->sdp_hashnext = sdt_probetab[SDT_ADDR2NDX(sdpd->sdpd_offset)]; sdt_probetab[SDT_ADDR2NDX(sdpd->sdpd_offset)] = sdp; sdp->sdp_patchval = SDT_PATCHVAL; sdp->sdp_patchpoint = (uint8_t *)sdpd->sdpd_offset; sdp->sdp_savedval = *sdp->sdp_patchpoint; sdp->sdp_is_tailcall = sdp->sdp_patchpoint[SDT_OFF_RET_IDX] == SDT_RET; } } /*ARGSUSED*/ static void sdt_destroy(void *arg, dtrace_id_t id, void *parg) { sdt_probe_t *sdp = parg, *old, *last, *hash; struct modctl *ctl = sdp->sdp_ctl; int ndx; if (ctl != NULL && ctl->mod_loadcnt == sdp->sdp_loadcnt) { if ((ctl->mod_loadcnt == sdp->sdp_loadcnt && ctl->mod_loaded)) { ((struct module *)(ctl->mod_mp))->sdt_nprobes--; } } while (sdp != NULL) { old = sdp; /* * Now we need to remove this probe from the sdt_probetab. */ ndx = SDT_ADDR2NDX(sdp->sdp_patchpoint); last = NULL; hash = sdt_probetab[ndx]; while (hash != sdp) { ASSERT(hash != NULL); last = hash; hash = hash->sdp_hashnext; } if (last != NULL) { last->sdp_hashnext = sdp->sdp_hashnext; } else { sdt_probetab[ndx] = sdp->sdp_hashnext; } kmem_free(sdp->sdp_name, sdp->sdp_namelen); sdp = sdp->sdp_next; kmem_free(old, sizeof (sdt_probe_t)); } } /*ARGSUSED*/ static int sdt_enable(void *arg, dtrace_id_t id, void *parg) { sdt_probe_t *sdp = parg; struct modctl *ctl = sdp->sdp_ctl; ctl->mod_nenabled++; /* * If this module has disappeared since we discovered its probes, * refuse to enable it. */ if (!ctl->mod_loaded) { if (sdt_verbose) { cmn_err(CE_NOTE, "sdt is failing for probe %s " "(module %s unloaded)", sdp->sdp_name, ctl->mod_modname); } goto err; } /* * Now check that our modctl has the expected load count. If it * doesn't, this module must have been unloaded and reloaded -- and * we're not going to touch it. */ if (ctl->mod_loadcnt != sdp->sdp_loadcnt) { if (sdt_verbose) { cmn_err(CE_NOTE, "sdt is failing for probe %s " "(module %s reloaded)", sdp->sdp_name, ctl->mod_modname); } goto err; } while (sdp != NULL) { *sdp->sdp_patchpoint = sdp->sdp_patchval; sdp = sdp->sdp_next; } err: return (0); } /*ARGSUSED*/ static void sdt_disable(void *arg, dtrace_id_t id, void *parg) { sdt_probe_t *sdp = parg; struct modctl *ctl = sdp->sdp_ctl; ctl->mod_nenabled--; if (!ctl->mod_loaded || ctl->mod_loadcnt != sdp->sdp_loadcnt) goto err; while (sdp != NULL) { *sdp->sdp_patchpoint = sdp->sdp_savedval; sdp = sdp->sdp_next; } err: ; } /*ARGSUSED*/ uint64_t sdt_getarg(void *arg, dtrace_id_t id, void *parg, int argno, int aframes) { sdt_probe_t *sdp = parg; uintptr_t val; struct frame *fp = (struct frame *)dtrace_getfp(); uintptr_t *stack; int i; /* * A total of 6 arguments are passed via registers; any argument with * index of 5 or lower is therefore in a register. */ int inreg = 5; for (i = 1; i <= aframes; i++) { fp = (struct frame *)(fp->fr_savfp); if (fp->fr_savpc == (pc_t)dtrace_invop_callsite) { /* * In the case of amd64, we will use the pointer to the * regs structure that was pushed when we took the * trap. To get this structure, we must increment * beyond the frame structure, the calling RIP, and * padding stored in dtrace_invop(). If the argument * that we're seeking is passed on the stack, we'll * pull the true stack pointer out of the saved * registers and decrement our argument by the number * of arguments passed in registers; if the argument * we're seeking is passed in regsiters, we can just * load it directly. */ struct regs *rp = (struct regs *)((uintptr_t)&fp[1] + sizeof (uintptr_t) * 2); if (argno <= inreg) { stack = (uintptr_t *)&rp->r_rdi; } else { stack = (uintptr_t *)(rp->r_rsp); argno -= (inreg + 1); /* * If the probe was invoked as a tail call, the * compiler leaves the stack as if we had just * entered the fictitious __dtrace_probe_[name] * function, meaning we need to skip over the * saved return address to get to the stack * arguments. */ if (sdp->sdp_is_tailcall) argno++; } goto load; } } /* * We know that we did not come through a trap to get into * dtrace_probe() -- the provider simply called dtrace_probe() * directly. As this is the case, we need to shift the argument * that we're looking for: the probe ID is the first argument to * dtrace_probe(), so the argument n will actually be found where * one would expect to find argument (n + 1). */ argno++; if (argno <= inreg) { /* * This shouldn't happen. If the argument is passed in a * register then it should have been, well, passed in a * register... */ DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); return (0); } argno -= (inreg + 1); stack = (uintptr_t *)&fp[1]; load: DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); val = stack[argno]; DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); return (val); } static dtrace_pops_t sdt_pops = { NULL, sdt_provide_module, sdt_enable, sdt_disable, NULL, NULL, sdt_getargdesc, sdt_getarg, NULL, sdt_destroy }; /*ARGSUSED*/ static int sdt_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) { sdt_provider_t *prov; if (ddi_create_minor_node(devi, "sdt", S_IFCHR, 0, DDI_PSEUDO, 0) == DDI_FAILURE) { cmn_err(CE_NOTE, "/dev/sdt couldn't create minor node"); ddi_remove_minor_node(devi, NULL); return (DDI_FAILURE); } ddi_report_dev(devi); sdt_devi = devi; if (sdt_probetab_size == 0) sdt_probetab_size = SDT_PROBETAB_SIZE; sdt_probetab_mask = sdt_probetab_size - 1; sdt_probetab = kmem_zalloc(sdt_probetab_size * sizeof (sdt_probe_t *), KM_SLEEP); dtrace_invop_add(sdt_invop); for (prov = sdt_providers; prov->sdtp_name != NULL; prov++) { uint32_t priv; if (prov->sdtp_priv == DTRACE_PRIV_NONE) { priv = DTRACE_PRIV_KERNEL; sdt_pops.dtps_mode = NULL; } else { priv = prov->sdtp_priv; ASSERT(priv == DTRACE_PRIV_USER); sdt_pops.dtps_mode = sdt_mode; } if (dtrace_register(prov->sdtp_name, prov->sdtp_attr, priv, NULL, &sdt_pops, prov, &prov->sdtp_id) != 0) { cmn_err(CE_WARN, "failed to register sdt provider %s", prov->sdtp_name); } } return (DDI_SUCCESS); } /*ARGSUSED*/ static int sdt_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) { sdt_provider_t *prov; switch (cmd) { case DDI_DETACH: break; case DDI_SUSPEND: return (DDI_SUCCESS); default: return (DDI_FAILURE); } for (prov = sdt_providers; prov->sdtp_name != NULL; prov++) { if (prov->sdtp_id != DTRACE_PROVNONE) { if (dtrace_unregister(prov->sdtp_id) != 0) return (DDI_FAILURE); prov->sdtp_id = DTRACE_PROVNONE; } } dtrace_invop_remove(sdt_invop); kmem_free(sdt_probetab, sdt_probetab_size * sizeof (sdt_probe_t *)); return (DDI_SUCCESS); } /*ARGSUSED*/ static int sdt_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) { int error; switch (infocmd) { case DDI_INFO_DEVT2DEVINFO: *result = (void *)sdt_devi; error = DDI_SUCCESS; break; case DDI_INFO_DEVT2INSTANCE: *result = (void *)0; error = DDI_SUCCESS; break; default: error = DDI_FAILURE; } return (error); } /*ARGSUSED*/ static int sdt_open(dev_t *devp, int flag, int otyp, cred_t *cred_p) { return (0); } static struct cb_ops sdt_cb_ops = { sdt_open, /* open */ nodev, /* close */ nulldev, /* strategy */ nulldev, /* print */ nodev, /* dump */ nodev, /* read */ nodev, /* write */ nodev, /* ioctl */ nodev, /* devmap */ nodev, /* mmap */ nodev, /* segmap */ nochpoll, /* poll */ ddi_prop_op, /* cb_prop_op */ 0, /* streamtab */ D_NEW | D_MP /* Driver compatibility flag */ }; static struct dev_ops sdt_ops = { DEVO_REV, /* devo_rev, */ 0, /* refcnt */ sdt_info, /* get_dev_info */ nulldev, /* identify */ nulldev, /* probe */ sdt_attach, /* attach */ sdt_detach, /* detach */ nodev, /* reset */ &sdt_cb_ops, /* driver operations */ NULL, /* bus operations */ nodev, /* dev power */ ddi_quiesce_not_needed, /* quiesce */ }; /* * Module linkage information for the kernel. */ static struct modldrv modldrv = { &mod_driverops, /* module type (this is a pseudo driver) */ "Statically Defined Tracing", /* name of module */ &sdt_ops, /* driver ops */ }; static struct modlinkage modlinkage = { MODREV_1, (void *)&modldrv, NULL }; int _init(void) { return (mod_install(&modlinkage)); } int _info(struct modinfo *modinfop) { return (mod_info(&modlinkage, modinfop)); } int _fini(void) { return (mod_remove(&modlinkage)); }