17c478bdstevel@tonic-gate/*
27c478bdstevel@tonic-gate * CDDL HEADER START
37c478bdstevel@tonic-gate *
47c478bdstevel@tonic-gate * The contents of this file are subject to the terms of the
5ad4023cdp * Common Development and Distribution License (the "License").
6ad4023cdp * You may not use this file except in compliance with the License.
77c478bdstevel@tonic-gate *
87c478bdstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bdstevel@tonic-gate * or http://www.opensolaris.org/os/licensing.
107c478bdstevel@tonic-gate * See the License for the specific language governing permissions
117c478bdstevel@tonic-gate * and limitations under the License.
127c478bdstevel@tonic-gate *
137c478bdstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each
147c478bdstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bdstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the
167c478bdstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying
177c478bdstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bdstevel@tonic-gate *
197c478bdstevel@tonic-gate * CDDL HEADER END
207c478bdstevel@tonic-gate */
21ac44896ahl
227c478bdstevel@tonic-gate/*
23c9a6ea2Bryan Cantrill * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
24c3377eeJohn Levon * Copyright 2019 Joyent, Inc.
2582d86f4Matthew Ahrens * Copyright (c) 2012, 2014 by Delphix. All rights reserved.
267c478bdstevel@tonic-gate */
277c478bdstevel@tonic-gate
287c478bdstevel@tonic-gate/*
297c478bdstevel@tonic-gate * DTrace - Dynamic Tracing for Solaris
307c478bdstevel@tonic-gate *
317c478bdstevel@tonic-gate * This is the implementation of the Solaris Dynamic Tracing framework
327c478bdstevel@tonic-gate * (DTrace).  The user-visible interface to DTrace is described at length in
337c478bdstevel@tonic-gate * the "Solaris Dynamic Tracing Guide".  The interfaces between the libdtrace
347c478bdstevel@tonic-gate * library, the in-kernel DTrace framework, and the DTrace providers are
357c478bdstevel@tonic-gate * described in the block comments in the <sys/dtrace.h> header file.  The
367c478bdstevel@tonic-gate * internal architecture of DTrace is described in the block comments in the
377c478bdstevel@tonic-gate * <sys/dtrace_impl.h> header file.  The comments contained within the DTrace
387c478bdstevel@tonic-gate * implementation very much assume mastery of all of these sources; if one has
397c478bdstevel@tonic-gate * an unanswered question about the implementation, one should consult them
407c478bdstevel@tonic-gate * first.
417c478bdstevel@tonic-gate *
427c478bdstevel@tonic-gate * The functions here are ordered roughly as follows:
437c478bdstevel@tonic-gate *
447c478bdstevel@tonic-gate *   - Probe context functions
457c478bdstevel@tonic-gate *   - Probe hashing functions
467c478bdstevel@tonic-gate *   - Non-probe context utility functions
477c478bdstevel@tonic-gate *   - Matching functions
487c478bdstevel@tonic-gate *   - Provider-to-Framework API functions
497c478bdstevel@tonic-gate *   - Probe management functions
507c478bdstevel@tonic-gate *   - DIF object functions
517c478bdstevel@tonic-gate *   - Format functions
527c478bdstevel@tonic-gate *   - Predicate functions
537c478bdstevel@tonic-gate *   - ECB functions
547c478bdstevel@tonic-gate *   - Buffer functions
557c478bdstevel@tonic-gate *   - Enabling functions
567c478bdstevel@tonic-gate *   - DOF functions
577c478bdstevel@tonic-gate *   - Anonymous enabling functions
587c478bdstevel@tonic-gate *   - Consumer state functions
597c478bdstevel@tonic-gate *   - Helper functions
607c478bdstevel@tonic-gate *   - Hook functions
617c478bdstevel@tonic-gate *   - Driver cookbook functions
627c478bdstevel@tonic-gate *
637c478bdstevel@tonic-gate * Each group of functions begins with a block comment labelled the "DTrace
647c478bdstevel@tonic-gate * [Group] Functions", allowing one to find each block by searching forward
657c478bdstevel@tonic-gate * on capital-f functions.
667c478bdstevel@tonic-gate */
677c478bdstevel@tonic-gate#include <sys/errno.h>
687c478bdstevel@tonic-gate#include <sys/stat.h>
697c478bdstevel@tonic-gate#include <sys/modctl.h>
707c478bdstevel@tonic-gate#include <sys/conf.h>
717c478bdstevel@tonic-gate#include <sys/systm.h>
727c478bdstevel@tonic-gate#include <sys/ddi.h>
737c478bdstevel@tonic-gate#include <sys/sunddi.h>
747c478bdstevel@tonic-gate#include <sys/cpuvar.h>
757c478bdstevel@tonic-gate#include <sys/kmem.h>
767c478bdstevel@tonic-gate#include <sys/strsubr.h>
777c478bdstevel@tonic-gate#include <sys/sysmacros.h>
787c478bdstevel@tonic-gate#include <sys/dtrace_impl.h>
797c478bdstevel@tonic-gate#include <sys/atomic.h>
807c478bdstevel@tonic-gate#include <sys/cmn_err.h>
817c478bdstevel@tonic-gate#include <sys/mutex_impl.h>
827c478bdstevel@tonic-gate#include <sys/rwlock_impl.h>
837c478bdstevel@tonic-gate#include <sys/ctf_api.h>
847c478bdstevel@tonic-gate#include <sys/panic.h>
857c478bdstevel@tonic-gate#include <sys/priv_impl.h>
867c478bdstevel@tonic-gate#include <sys/policy.h>
877c478bdstevel@tonic-gate#include <sys/cred_impl.h>
887c478bdstevel@tonic-gate#include <sys/procfs_isa.h>
897c478bdstevel@tonic-gate#include <sys/taskq.h>
907c478bdstevel@tonic-gate#include <sys/mkdev.h>
917c478bdstevel@tonic-gate#include <sys/kdi.h>
927c478bdstevel@tonic-gate#include <sys/zone.h>
934edabffbrendan#include <sys/socket.h>
944edabffbrendan#include <netinet/in.h>
95f497f9fJoshua M. Clulow#include "strtolctype.h"
967c478bdstevel@tonic-gate
977c478bdstevel@tonic-gate/*
987c478bdstevel@tonic-gate * DTrace Tunable Variables
997c478bdstevel@tonic-gate *
1007c478bdstevel@tonic-gate * The following variables may be tuned by adding a line to /etc/system that
1017c478bdstevel@tonic-gate * includes both the name of the DTrace module ("dtrace") and the name of the
1027c478bdstevel@tonic-gate * variable.  For example:
1037c478bdstevel@tonic-gate *
1047c478bdstevel@tonic-gate *   set dtrace:dtrace_destructive_disallow = 1
1057c478bdstevel@tonic-gate *
1067c478bdstevel@tonic-gate * In general, the only variables that one should be tuning this way are those
1077c478bdstevel@tonic-gate * that affect system-wide DTrace behavior, and for which the default behavior
1087c478bdstevel@tonic-gate * is undesirable.  Most of these variables are tunable on a per-consumer
1097c478bdstevel@tonic-gate * basis using DTrace options, and need not be tuned on a system-wide basis.
1107c478bdstevel@tonic-gate * When tuning these variables, avoid pathological values; while some attempt
1117c478bdstevel@tonic-gate * is made to verify the integrity of these variables, they are not considered
1127c478bdstevel@tonic-gate * part of the supported interface to DTrace, and they are therefore not
1137c478bdstevel@tonic-gate * checked comprehensively.  Further, these variables should not be tuned
1147c478bdstevel@tonic-gate * dynamically via "mdb -kw" or other means; they should only be tuned via
1157c478bdstevel@tonic-gate * /etc/system.
1167c478bdstevel@tonic-gate */
1177c478bdstevel@tonic-gateint		dtrace_destructive_disallow = 0;
1187c478bdstevel@tonic-gatedtrace_optval_t	dtrace_nonroot_maxsize = (16 * 1024 * 1024);
1197c478bdstevel@tonic-gatesize_t		dtrace_difo_maxsize = (256 * 1024);
120d339a29Bryan Cantrilldtrace_optval_t	dtrace_dof_maxsize = (8 * 1024 * 1024);
121395c7a3Bryan Cantrillsize_t		dtrace_statvar_maxsize = (16 * 1024);
1227c478bdstevel@tonic-gatesize_t		dtrace_actions_max = (16 * 1024);
1237c478bdstevel@tonic-gatesize_t		dtrace_retain_max = 1024;
12436d0f9dDave Pachecodtrace_optval_t	dtrace_helper_actions_max = 1024;
1257c478bdstevel@tonic-gatedtrace_optval_t	dtrace_helper_providers_max = 32;
1267c478bdstevel@tonic-gatedtrace_optval_t	dtrace_dstate_defsize = (1 * 1024 * 1024);
1277c478bdstevel@tonic-gatesize_t		dtrace_strsize_default = 256;
1287c478bdstevel@tonic-gatedtrace_optval_t	dtrace_cleanrate_default = 9900990;		/* 101 hz */
1297c478bdstevel@tonic-gatedtrace_optval_t	dtrace_cleanrate_min = 200000;			/* 5000 hz */
1307c478bdstevel@tonic-gatedtrace_optval_t	dtrace_cleanrate_max = (uint64_t)60 * NANOSEC;	/* 1/minute */
1317c478bdstevel@tonic-gatedtrace_optval_t	dtrace_aggrate_default = NANOSEC;		/* 1 hz */
1327c478bdstevel@tonic-gatedtrace_optval_t	dtrace_statusrate_default = NANOSEC;		/* 1 hz */
1337c478bdstevel@tonic-gatedtrace_optval_t dtrace_statusrate_max = (hrtime_t)10 * NANOSEC;	 /* 6/minute */
1347c478bdstevel@tonic-gatedtrace_optval_t	dtrace_switchrate_default = NANOSEC;		/* 1 hz */
1357c478bdstevel@tonic-gatedtrace_optval_t	dtrace_nspec_default = 1;
1367c478bdstevel@tonic-gatedtrace_optval_t	dtrace_specsize_default = 32 * 1024;
1377c478bdstevel@tonic-gatedtrace_optval_t dtrace_stackframes_default = 20;
1387c478bdstevel@tonic-gatedtrace_optval_t dtrace_ustackframes_default = 20;
1397c478bdstevel@tonic-gatedtrace_optval_t dtrace_jstackframes_default = 50;
1407c478bdstevel@tonic-gatedtrace_optval_t dtrace_jstackstrsize_default = 512;
1417c478bdstevel@tonic-gateint		dtrace_msgdsize_max = 128;
1421944925Josef 'Jeff' Sipekhrtime_t	dtrace_chill_max = MSEC2NSEC(500);		/* 500 ms */
1437c478bdstevel@tonic-gatehrtime_t	dtrace_chill_interval = NANOSEC;		/* 1000 ms */
1447c478bdstevel@tonic-gateint		dtrace_devdepth_max = 32;
1457c478bdstevel@tonic-gateint		dtrace_err_verbose;
1467c478bdstevel@tonic-gatehrtime_t	dtrace_deadman_interval = NANOSEC;
1477c478bdstevel@tonic-gatehrtime_t	dtrace_deadman_timeout = (hrtime_t)10 * NANOSEC;
1487c478bdstevel@tonic-gatehrtime_t	dtrace_deadman_user = (hrtime_t)30 * NANOSEC;
149f484800Bryan Cantrillhrtime_t	dtrace_unregister_defunct_reap = (hrtime_t)60 * NANOSEC;
1507c478bdstevel@tonic-gate
1517c478bdstevel@tonic-gate/*
1527c478bdstevel@tonic-gate * DTrace External Variables
1537c478bdstevel@tonic-gate *
1547c478bdstevel@tonic-gate * As dtrace(7D) is a kernel module, any DTrace variables are obviously
1557c478bdstevel@tonic-gate * available to DTrace consumers via the backtick (`) syntax.  One of these,
1567c478bdstevel@tonic-gate * dtrace_zero, is made deliberately so:  it is provided as a source of
1577c478bdstevel@tonic-gate * well-known, zero-filled memory.  While this variable is not documented,
1587c478bdstevel@tonic-gate * it is used by some translators as an implementation detail.
1597c478bdstevel@tonic-gate */
1607c478bdstevel@tonic-gateconst char	dtrace_zero[256] = { 0 };	/* zero-filled memory */
1617c478bdstevel@tonic-gate
1627c478bdstevel@tonic-gate/*
1637c478bdstevel@tonic-gate * DTrace Internal Variables
1647c478bdstevel@tonic-gate */
1657c478bdstevel@tonic-gatestatic dev_info_t	*dtrace_devi;		/* device info */
1667c478bdstevel@tonic-gatestatic vmem_t		*dtrace_arena;		/* probe ID arena */
1677c478bdstevel@tonic-gatestatic vmem_t		*dtrace_minor;		/* minor number arena */
1687c478bdstevel@tonic-gatestatic taskq_t		*dtrace_taskq;		/* task queue */
1697c478bdstevel@tonic-gatestatic dtrace_probe_t	**dtrace_probes;	/* array of all probes */
1707c478bdstevel@tonic-gatestatic int		dtrace_nprobes;		/* number of probes */
1717c478bdstevel@tonic-gatestatic dtrace_provider_t *dtrace_provider;	/* provider list */
1727c478bdstevel@tonic-gatestatic dtrace_meta_t	*dtrace_meta_pid;	/* user-land meta provider */
1737c478bdstevel@tonic-gatestatic int		dtrace_opens;		/* number of opens */
174a1b5e53bmcstatic int		dtrace_helpers;		/* number of helpers */
175b0f673cBryan Cantrillstatic int		dtrace_getf;		/* number of unpriv getf()s */
1767c478bdstevel@tonic-gatestatic void		*dtrace_softstate;	/* softstate pointer */
1777c478bdstevel@tonic-gatestatic dtrace_hash_t	*dtrace_bymod;		/* probes hashed by module */
1787c478bdstevel@tonic-gatestatic dtrace_hash_t	*dtrace_byfunc;		/* probes hashed by function */
1797c478bdstevel@tonic-gatestatic dtrace_hash_t	*dtrace_byname;		/* probes hashed by name */
1807c478bdstevel@tonic-gatestatic dtrace_toxrange_t *dtrace_toxrange;	/* toxic range array */
1817c478bdstevel@tonic-gatestatic int		dtrace_toxranges;	/* number of toxic ranges */
1827c478bdstevel@tonic-gatestatic int		dtrace_toxranges_max;	/* size of toxic range array */
1837c478bdstevel@tonic-gatestatic dtrace_anon_t	dtrace_anon;		/* anonymous enabling */
1847c478bdstevel@tonic-gatestatic kmem_cache_t	*dtrace_state_cache;	/* cache for dynamic state */
1857c478bdstevel@tonic-gatestatic uint64_t		dtrace_vtime_references; /* number of vtimestamp refs */
1867c478bdstevel@tonic-gatestatic kthread_t	*dtrace_panicked;	/* panicking thread */
1877c478bdstevel@tonic-gatestatic dtrace_ecb_t	*dtrace_ecb_create_cache; /* cached created ECB */
1887c478bdstevel@tonic-gatestatic dtrace_genid_t	dtrace_probegen;	/* current probe generation */
1897c478bdstevel@tonic-gatestatic dtrace_helpers_t *dtrace_deferred_pid;	/* deferred helper list */
1907c478bdstevel@tonic-gatestatic dtrace_enabling_t *dtrace_retained;	/* list of retained enablings */
191ef5bb02Jonathan Haslamstatic dtrace_genid_t	dtrace_retained_gen;	/* current retained enab gen */
192586d07dbmcstatic dtrace_dynvar_t	dtrace_dynhash_sink;	/* end of dynamic hash chains */
193c9a6ea2Bryan Cantrillstatic int		dtrace_dynvar_failclean; /* dynvars failed to clean */
1947c478bdstevel@tonic-gate
1957c478bdstevel@tonic-gate/*
1967c478bdstevel@tonic-gate * DTrace Locking
1977c478bdstevel@tonic-gate * DTrace is protected by three (relatively coarse-grained) locks:
1987c478bdstevel@tonic-gate *
1997c478bdstevel@tonic-gate * (1) dtrace_lock is required to manipulate essentially any DTrace state,
2007c478bdstevel@tonic-gate *     including enabling state, probes, ECBs, consumer state, helper state,
2017c478bdstevel@tonic-gate *     etc.  Importantly, dtrace_lock is _not_ required when in probe context;
2027c478bdstevel@tonic-gate *     probe context is lock-free -- synchronization is handled via the
2037c478bdstevel@tonic-gate *     dtrace_sync() cross call mechanism.
2047c478bdstevel@tonic-gate *
2057c478bdstevel@tonic-gate * (2) dtrace_provider_lock is required when manipulating provider state, or
2067c478bdstevel@tonic-gate *     when provider state must be held constant.
2077c478bdstevel@tonic-gate *
2087c478bdstevel@tonic-gate * (3) dtrace_meta_lock is required when manipulating meta provider state, or
2097c478bdstevel@tonic-gate *     when meta provider state must be held constant.
2107c478bdstevel@tonic-gate *
2117c478bdstevel@tonic-gate * The lock ordering between these three locks is dtrace_meta_lock before
2127c478bdstevel@tonic-gate * dtrace_provider_lock before dtrace_lock.  (In particular, there are
2137c478bdstevel@tonic-gate * several places where dtrace_provider_lock is held by the framework as it
2147c478bdstevel@tonic-gate * calls into the providers -- which then call back into the framework,
2157c478bdstevel@tonic-gate * grabbing dtrace_lock.)
2167c478bdstevel@tonic-gate *
217a1b5e53bmc * There are two other locks in the mix:  mod_lock and cpu_lock.  With respect
218a1b5e53bmc * to dtrace_provider_lock and dtrace_lock, cpu_lock continues its historical
219a1b5e53bmc * role as a coarse-grained lock; it is acquired before both of these locks.
220a1b5e53bmc * With respect to dtrace_meta_lock, its behavior is stranger:  cpu_lock must
221a1b5e53bmc * be acquired _between_ dtrace_meta_lock and any other DTrace locks.
222a1b5e53bmc * mod_lock is similar with respect to dtrace_provider_lock in that it must be
223a1b5e53bmc * acquired _between_ dtrace_provider_lock and dtrace_lock.
2247c478bdstevel@tonic-gate */
2257c478bdstevel@tonic-gatestatic kmutex_t		dtrace_lock;		/* probe state lock */
2267c478bdstevel@tonic-gatestatic kmutex_t		dtrace_provider_lock;	/* provider state lock */
2277c478bdstevel@tonic-gatestatic kmutex_t		dtrace_meta_lock;	/* meta-provider state lock */
2287c478bdstevel@tonic-gate
2297c478bdstevel@tonic-gate/*
2307c478bdstevel@tonic-gate * DTrace Provider Variables
2317c478bdstevel@tonic-gate *
2327c478bdstevel@tonic-gate * These are the variables relating to DTrace as a provider (that is, the
2337c478bdstevel@tonic-gate * provider of the BEGIN, END, and ERROR probes).
2347c478bdstevel@tonic-gate */
2357c478bdstevel@tonic-gatestatic dtrace_pattr_t	dtrace_provider_attr = {
2367c478bdstevel@tonic-gate{ DTRACE_STABILITY_STABLE, DTRACE_STABILITY_STABLE, DTRACE_CLASS_COMMON },
2377c478bdstevel@tonic-gate{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
2387c478bdstevel@tonic-gate{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
2397c478bdstevel@tonic-gate{ DTRACE_STABILITY_STABLE, DTRACE_STABILITY_STABLE, DTRACE_CLASS_COMMON },
2407c478bdstevel@tonic-gate{ DTRACE_STABILITY_STABLE, DTRACE_STABILITY_STABLE, DTRACE_CLASS_COMMON },
2417c478bdstevel@tonic-gate};
2427c478bdstevel@tonic-gate
2437c478bdstevel@tonic-gatestatic void
244ed1faacToomas Soomedtrace_nullop_provide(void *arg __unused,
245ed1faacToomas Soome    const dtrace_probedesc_t *spec __unused)
246ed1faacToomas Soome{
247ed1faacToomas Soome}
248ed1faacToomas Soome
249ed1faacToomas Soomestatic void
250ed1faacToomas Soomedtrace_nullop_module(void *arg __unused, struct modctl *mp __unused)
251ed1faacToomas Soome{
252ed1faacToomas Soome}
253ed1faacToomas Soome
254ed1faacToomas Soomestatic void
255ed1faacToomas Soomedtrace_nullop(void *arg __unused, dtrace_id_t id __unused, void *parg __unused)
256ed1faacToomas Soome{
257ed1faacToomas Soome}
2587c478bdstevel@tonic-gate
259b9e93c1Jonathan Haslamstatic int
260ed1faacToomas Soomedtrace_enable_nullop(void *arg __unused, dtrace_id_t id __unused,
261ed1faacToomas Soome    void *parg __unused)
262b9e93c1Jonathan Haslam{
263b9e93c1Jonathan Haslam	return (0);
264b9e93c1Jonathan Haslam}
265b9e93c1Jonathan Haslam
2667c478bdstevel@tonic-gatestatic dtrace_pops_t	dtrace_provider_ops = {
267ed1faacToomas Soome	.dtps_provide = dtrace_nullop_provide,
268ed1faacToomas Soome	.dtps_provide_module = dtrace_nullop_module,
269ed1faacToomas Soome	.dtps_enable = dtrace_enable_nullop,
270ed1faacToomas Soome	.dtps_disable = dtrace_nullop,
271ed1faacToomas Soome	.dtps_suspend = dtrace_nullop,
272ed1faacToomas Soome	.dtps_resume = dtrace_nullop,
273ed1faacToomas Soome	.dtps_getargdesc = NULL,
274ed1faacToomas Soome	.dtps_getargval = NULL,
275ed1faacToomas Soome	.dtps_mode = NULL,
276ed1faacToomas Soome	.dtps_destroy = dtrace_nullop
2777c478bdstevel@tonic-gate};
2787c478bdstevel@tonic-gate
2797c478bdstevel@tonic-gatestatic dtrace_id_t	dtrace_probeid_begin;	/* special BEGIN probe */
2807c478bdstevel@tonic-gatestatic dtrace_id_t	dtrace_probeid_end;	/* special END probe */
2817c478bdstevel@tonic-gatedtrace_id_t		dtrace_probeid_error;	/* special ERROR probe */
2827c478bdstevel@tonic-gate
2837c478bdstevel@tonic-gate/*
2847c478bdstevel@tonic-gate * DTrace Helper Tracing Variables
285902686dBryan Cantrill *
286902686dBryan Cantrill * These variables should be set dynamically to enable helper tracing.  The
287902686dBryan Cantrill * only variables that should be set are dtrace_helptrace_enable (which should
288902686dBryan Cantrill * be set to a non-zero value to allocate helper tracing buffers on the next
289902686dBryan Cantrill * open of /dev/dtrace) and dtrace_helptrace_disable (which should be set to a
290902686dBryan Cantrill * non-zero value to deallocate helper tracing buffers on the next close of
291902686dBryan Cantrill * /dev/dtrace).  When (and only when) helper tracing is disabled, the
292902686dBryan Cantrill * buffer size may also be set via dtrace_helptrace_bufsize.
2937c478bdstevel@tonic-gate */
294902686dBryan Cantrillint			dtrace_helptrace_enable = 0;
295902686dBryan Cantrillint			dtrace_helptrace_disable = 0;
296902686dBryan Cantrillint			dtrace_helptrace_bufsize = 16 * 1024 * 1024;
297902686dBryan Cantrilluint32_t		dtrace_helptrace_nlocals;
298902686dBryan Cantrillstatic dtrace_helptrace_t *dtrace_helptrace_buffer;
299902686dBryan Cantrillstatic uint32_t		dtrace_helptrace_next = 0;
300902686dBryan Cantrillstatic int		dtrace_helptrace_wrapped = 0;
3017c478bdstevel@tonic-gate
3027c478bdstevel@tonic-gate/*
3037c478bdstevel@tonic-gate * DTrace Error Hashing
3047c478bdstevel@tonic-gate *
3057c478bdstevel@tonic-gate * On DEBUG kernels, DTrace will track the errors that has seen in a hash
3067c478bdstevel@tonic-gate * table.  This is very useful for checking coverage of tests that are
3077c478bdstevel@tonic-gate * expected to induce DIF or DOF processing errors, and may be useful for
3087c478bdstevel@tonic-gate * debugging problems in the DIF code generator or in DOF generation .  The
3097c478bdstevel@tonic-gate * error hash may be examined with the ::dtrace_errhash MDB dcmd.
3107c478bdstevel@tonic-gate */
3117c478bdstevel@tonic-gate#ifdef DEBUG
3127c478bdstevel@tonic-gatestatic dtrace_errhash_t	dtrace_errhash[DTRACE_ERRHASHSZ];
3137c478bdstevel@tonic-gatestatic const char *dtrace_errlast;
3147c478bdstevel@tonic-gatestatic kthread_t *dtrace_errthread;
3157c478bdstevel@tonic-gatestatic kmutex_t dtrace_errlock;
3167c478bdstevel@tonic-gate#endif
3177c478bdstevel@tonic-gate
3187c478bdstevel@tonic-gate/*
3197c478bdstevel@tonic-gate * DTrace Macros and Constants
3207c478bdstevel@tonic-gate *
3217c478bdstevel@tonic-gate * These are various macros that are useful in various spots in the
3227c478bdstevel@tonic-gate * implementation, along with a few random constants that have no meaning
3237c478bdstevel@tonic-gate * outside of the implementation.  There is no real structure to this cpp
3247c478bdstevel@tonic-gate * mishmash -- but is there ever?
3257c478bdstevel@tonic-gate */
3267c478bdstevel@tonic-gate#define	DTRACE_HASHSTR(hash, probe)	\
3277c478bdstevel@tonic-gate	dtrace_hash_str(*((char **)((uintptr_t)(probe) + (hash)->dth_stroffs)))
3287c478bdstevel@tonic-gate
3297c478bdstevel@tonic-gate#define	DTRACE_HASHNEXT(hash, probe)	\
3307c478bdstevel@tonic-gate	(dtrace_probe_t **)((uintptr_t)(probe) + (hash)->dth_nextoffs)
3317c478bdstevel@tonic-gate
3327c478bdstevel@tonic-gate#define	DTRACE_HASHPREV(hash, probe)	\
3337c478bdstevel@tonic-gate	(dtrace_probe_t **)((uintptr_t)(probe) + (hash)->dth_prevoffs)
3347c478bdstevel@tonic-gate
3357c478bdstevel@tonic-gate#define	DTRACE_HASHEQ(hash, lhs, rhs)	\
3367c478bdstevel@tonic-gate	(strcmp(*((char **)((uintptr_t)(lhs) + (hash)->dth_stroffs)), \
3377c478bdstevel@tonic-gate	    *((char **)((uintptr_t)(rhs) + (hash)->dth_stroffs))) == 0)
3387c478bdstevel@tonic-gate
3397c478bdstevel@tonic-gate#define	DTRACE_AGGHASHSIZE_SLEW		17
3407c478bdstevel@tonic-gate
3414edabffbrendan#define	DTRACE_V4MAPPED_OFFSET		(sizeof (uint32_t) * 3)
3424edabffbrendan
3437c478bdstevel@tonic-gate/*
3447c478bdstevel@tonic-gate * The key for a thread-local variable consists of the lower 61 bits of the
3457c478bdstevel@tonic-gate * t_did, plus the 3 bits of the highest active interrupt above LOCK_LEVEL.
3467c478bdstevel@tonic-gate * We add DIF_VARIABLE_MAX to t_did to assure that the thread key is never
3477c478bdstevel@tonic-gate * equal to a variable identifier.  This is necessary (but not sufficient) to
3487c478bdstevel@tonic-gate * assure that global associative arrays never collide with thread-local
3497c478bdstevel@tonic-gate * variables.  To guarantee that they cannot collide, we must also define the
3507c478bdstevel@tonic-gate * order for keying dynamic variables.  That order is:
3517c478bdstevel@tonic-gate *
3527c478bdstevel@tonic-gate *   [ key0 ] ... [ keyn ] [ variable-key ] [ tls-key ]
3537c478bdstevel@tonic-gate *
3547c478bdstevel@tonic-gate * Because the variable-key and the tls-key are in orthogonal spaces, there is
3557c478bdstevel@tonic-gate * no way for a global variable key signature to match a thread-local key
3567c478bdstevel@tonic-gate * signature.
3577c478bdstevel@tonic-gate */
3587c478bdstevel@tonic-gate#define	DTRACE_TLS_THRKEY(where) { \
3597c478bdstevel@tonic-gate	uint_t intr = 0; \
3607c478bdstevel@tonic-gate	uint_t actv = CPU->cpu_intr_actv >> (LOCK_LEVEL + 1); \
3617c478bdstevel@tonic-gate	for (; actv; actv >>= 1) \
3627c478bdstevel@tonic-gate		intr++; \
3637c478bdstevel@tonic-gate	ASSERT(intr < (1 << 3)); \
3647c478bdstevel@tonic-gate	(where) = ((curthread->t_did + DIF_VARIABLE_MAX) & \
3657c478bdstevel@tonic-gate	    (((uint64_t)1 << 61) - 1)) | ((uint64_t)intr << 61); \
3667c478bdstevel@tonic-gate}
3677c478bdstevel@tonic-gate
3682b6e762ahl#define	DT_BSWAP_8(x)	((x) & 0xff)
3692b6e762ahl#define	DT_BSWAP_16(x)	((DT_BSWAP_8(x) << 8) | DT_BSWAP_8((x) >> 8))
3702b6e762ahl#define	DT_BSWAP_32(x)	((DT_BSWAP_16(x) << 16) | DT_BSWAP_16((x) >> 16))
3712b6e762ahl#define	DT_BSWAP_64(x)	((DT_BSWAP_32(x) << 32) | DT_BSWAP_32((x) >> 32))
3722b6e762ahl
3736e0bee7jhaslam#define	DT_MASK_LO 0x00000000FFFFFFFFULL
3746e0bee7jhaslam
3757c478bdstevel@tonic-gate#define	DTRACE_STORE(type, tomax, offset, what) \
3767c478bdstevel@tonic-gate	*((type *)((uintptr_t)(tomax) + (uintptr_t)offset)) = (type)(what);
3777c478bdstevel@tonic-gate
378c93cc65Sebastien Roy#ifndef __x86
3797c478bdstevel@tonic-gate#define	DTRACE_ALIGNCHECK(addr, size, flags)				\
3807c478bdstevel@tonic-gate	if (addr & (size - 1)) {					\
3817c478bdstevel@tonic-gate		*flags |= CPU_DTRACE_BADALIGN;				\
3827c478bdstevel@tonic-gate		cpu_core[CPU->cpu_id].cpuc_dtrace_illval = addr;	\
3837c478bdstevel@tonic-gate		return (0);						\
3847c478bdstevel@tonic-gate	}
3857c478bdstevel@tonic-gate#else
3867c478bdstevel@tonic-gate#define	DTRACE_ALIGNCHECK(addr, size, flags)
3877c478bdstevel@tonic-gate#endif
3887c478bdstevel@tonic-gate
389e0aad1edp/*
390e0aad1edp * Test whether a range of memory starting at testaddr of size testsz falls
3919de6b71dp * within the range of memory described by addr, sz.  We take care to avoid
3929de6b71dp * problems with overflow and underflow of the unsigned quantities, and
3939de6b71dp * disallow all negative sizes.  Ranges of size 0 are allowed.
394e0aad1edp */
395e0aad1edp#define	DTRACE_INRANGE(testaddr, testsz, baseaddr, basesz) \
396b0f673cBryan Cantrill	((testaddr) - (uintptr_t)(baseaddr) < (basesz) && \
397b0f673cBryan Cantrill	(testaddr) + (testsz) - (uintptr_t)(baseaddr) <= (basesz) && \
3989de6b71dp	(testaddr) + (testsz) >= (testaddr))
3999de6b71dp
400771e39cPatrick Mooney#define	DTRACE_RANGE_REMAIN(remp, addr, baseaddr, basesz)		\
401771e39cPatrick Mooneydo {									\
402771e39cPatrick Mooney	if ((remp) != NULL) {						\
403771e39cPatrick Mooney		*(remp) = (uintptr_t)(baseaddr) + (basesz) - (addr);	\
404771e39cPatrick Mooney	}								\
405771e39cPatrick Mooney_NOTE(CONSTCOND) } while (0)
406771e39cPatrick Mooney
407771e39cPatrick Mooney
4089de6b71dp/*
4099de6b71dp * Test whether alloc_sz bytes will fit in the scratch region.  We isolate
4109de6b71dp * alloc_sz on the righthand side of the comparison in order to avoid overflow
4119de6b71dp * or underflow in the comparison with it.  This is simpler than the INRANGE
4129de6b71dp * check above, because we know that the dtms_scratch_ptr is valid in the
4139de6b71dp * range.  Allocations of size zero are allowed.
4149de6b71dp */
4159de6b71dp#define	DTRACE_INSCRATCH(mstate, alloc_sz) \
4169de6b71dp	((mstate)->dtms_scratch_base + (mstate)->dtms_scratch_size - \
4179de6b71dp	(mstate)->dtms_scratch_ptr >= (alloc_sz))
418e0aad1edp
4197c478bdstevel@tonic-gate#define	DTRACE_LOADFUNC(bits)						\
4207c478bdstevel@tonic-gate/*CSTYLED*/								\
4217c478bdstevel@tonic-gateuint##bits##_t								\
4227c478bdstevel@tonic-gatedtrace_load##bits(uintptr_t addr)					\
4237c478bdstevel@tonic-gate{									\
4247c478bdstevel@tonic-gate	size_t size = bits / NBBY;					\
4257c478bdstevel@tonic-gate	/*CSTYLED*/							\
4267c478bdstevel@tonic-gate	uint##bits##_t rval;						\
4277c478bdstevel@tonic-gate	int i;								\
4287c478bdstevel@tonic-gate	volatile uint16_t *flags = (volatile uint16_t *)		\
4297c478bdstevel@tonic-gate	    &cpu_core[CPU->cpu_id].cpuc_dtrace_flags;			\
4307c478bdstevel@tonic-gate									\
4317c478bdstevel@tonic-gate	DTRACE_ALIGNCHECK(addr, size, flags);				\
4327c478bdstevel@tonic-gate									\
4337c478bdstevel@tonic-gate	for (i = 0; i < dtrace_toxranges; i++) {			\
4347c478bdstevel@tonic-gate		if (addr >= dtrace_toxrange[i].dtt_limit)		\
4357c478bdstevel@tonic-gate			continue;					\
4367c478bdstevel@tonic-gate									\
4377c478bdstevel@tonic-gate		if (addr + size <= dtrace_toxrange[i].dtt_base)		\
4387c478bdstevel@tonic-gate			continue;					\
4397c478bdstevel@tonic-gate									\
4407c478bdstevel@tonic-gate		/*							\
4417c478bdstevel@tonic-gate		 * This address falls within a toxic region; return 0.	\
4427c478bdstevel@tonic-gate		 */							\
4437c478bdstevel@tonic-gate		*flags |= CPU_DTRACE_BADADDR;				\
4447c478bdstevel@tonic-gate		cpu_core[CPU->cpu_id].cpuc_dtrace_illval = addr;	\
4457c478bdstevel@tonic-gate		return (0);						\
4467c478bdstevel@tonic-gate	}								\
4477c478bdstevel@tonic-gate									\
4487c478bdstevel@tonic-gate	*flags |= CPU_DTRACE_NOFAULT;					\
4497c478bdstevel@tonic-gate	/*CSTYLED*/							\
4507c478bdstevel@tonic-gate	rval = *((volatile uint##bits##_t *)addr);			\
4517c478bdstevel@tonic-gate	*flags &= ~CPU_DTRACE_NOFAULT;					\
4527c478bdstevel@tonic-gate									\
4539eea9bbbmc	return (!(*flags & CPU_DTRACE_FAULT) ? rval : 0);		\
4547c478bdstevel@tonic-gate}
4557c478bdstevel@tonic-gate
4567c478bdstevel@tonic-gate#ifdef _LP64
4577c478bdstevel@tonic-gate#define	dtrace_loadptr	dtrace_load64
4587c478bdstevel@tonic-gate#else
4597c478bdstevel@tonic-gate#define	dtrace_loadptr	dtrace_load32
4607c478bdstevel@tonic-gate#endif
4617c478bdstevel@tonic-gate
462586d07dbmc#define	DTRACE_DYNHASH_FREE	0
463586d07dbmc#define	DTRACE_DYNHASH_SINK	1
464586d07dbmc#define	DTRACE_DYNHASH_VALID	2
465586d07dbmc
466b9e93c1Jonathan Haslam#define	DTRACE_MATCH_FAIL	-1
4677c478bdstevel@tonic-gate#define	DTRACE_MATCH_NEXT	0
4687c478bdstevel@tonic-gate#define	DTRACE_MATCH_DONE	1
4697c478bdstevel@tonic-gate#define	DTRACE_ANCHORED(probe)	((probe)->dtpr_func[0] != '\0')
4707c478bdstevel@tonic-gate#define	DTRACE_STATE_ALIGN	64
4717c478bdstevel@tonic-gate
472187eccfbmc#define	DTRACE_FLAGS2FLT(flags)						\
473187eccfbmc	(((flags) & CPU_DTRACE_BADADDR) ? DTRACEFLT_BADADDR :		\
474187eccfbmc	((flags) & CPU_DTRACE_ILLOP) ? DTRACEFLT_ILLOP :		\
475187eccfbmc	((flags) & CPU_DTRACE_DIVZERO) ? DTRACEFLT_DIVZERO :		\
476187eccfbmc	((flags) & CPU_DTRACE_KPRIV) ? DTRACEFLT_KPRIV :		\
477187eccfbmc	((flags) & CPU_DTRACE_UPRIV) ? DTRACEFLT_UPRIV :		\
478187eccfbmc	((flags) & CPU_DTRACE_TUPOFLOW) ?  DTRACEFLT_TUPOFLOW :		\
479187eccfbmc	((flags) & CPU_DTRACE_BADALIGN) ?  DTRACEFLT_BADALIGN :		\
480187eccfbmc	((flags) & CPU_DTRACE_NOSCRATCH) ?  DTRACEFLT_NOSCRATCH :	\
481b8fac8ejhaslam	((flags) & CPU_DTRACE_BADSTACK) ?  DTRACEFLT_BADSTACK :		\
482187eccfbmc	DTRACEFLT_UNKNOWN)
483187eccfbmc
48430ef842bmc#define	DTRACEACT_ISSTRING(act)						\
48530ef842bmc	((act)->dta_kind == DTRACEACT_DIFEXPR &&			\
48630ef842bmc	(act)->dta_difo->dtdo_rtype.dtdt_kind == DIF_TYPE_STRING)
48730ef842bmc
488e0aad1edpstatic size_t dtrace_strlen(const char *, size_t);
4897c478bdstevel@tonic-gatestatic dtrace_probe_t *dtrace_probe_lookup_id(dtrace_id_t id);
4907c478bdstevel@tonic-gatestatic void dtrace_enabling_provide(dtrace_provider_t *);
4917c478bdstevel@tonic-gatestatic int dtrace_enabling_match(dtrace_enabling_t *, int *);
4927c478bdstevel@tonic-gatestatic void dtrace_enabling_matchall(void);
493f484800Bryan Cantrillstatic void dtrace_enabling_reap(void);
4947c478bdstevel@tonic-gatestatic dtrace_state_t *dtrace_anon_grab(void);
4957c478bdstevel@tonic-gatestatic uint64_t dtrace_helper(int, dtrace_mstate_t *,
4967c478bdstevel@tonic-gate    dtrace_state_t *, uint64_t, uint64_t);
4977c478bdstevel@tonic-gatestatic dtrace_helpers_t *dtrace_helpers_create(proc_t *);
4987c478bdstevel@tonic-gatestatic void dtrace_buffer_drop(dtrace_buffer_t *);
499f484800Bryan Cantrillstatic int dtrace_buffer_consumed(dtrace_buffer_t *, hrtime_t when);
5007c478bdstevel@tonic-gatestatic intptr_t dtrace_buffer_reserve(dtrace_buffer_t *, size_t, size_t,
5017c478bdstevel@tonic-gate    dtrace_state_t *, dtrace_mstate_t *);
5027c478bdstevel@tonic-gatestatic int dtrace_state_option(dtrace_state_t *, dtrace_optid_t,
5037c478bdstevel@tonic-gate    dtrace_optval_t);
5047c478bdstevel@tonic-gatestatic int dtrace_ecb_create_enable(dtrace_probe_t *, void *);
505f498645ahlstatic void dtrace_helper_provider_destroy(dtrace_helper_provider_t *);
506b0f673cBryan Cantrillstatic int dtrace_priv_proc(dtrace_state_t *, dtrace_mstate_t *);
507b0f673cBryan Cantrillstatic void dtrace_getf_barrier(void);
508771e39cPatrick Mooneystatic int dtrace_canload_remains(uint64_t, size_t, size_t *,
509771e39cPatrick Mooney    dtrace_mstate_t *, dtrace_vstate_t *);
510771e39cPatrick Mooneystatic int dtrace_canstore_remains(uint64_t, size_t, size_t *,
511771e39cPatrick Mooney    dtrace_mstate_t *, dtrace_vstate_t *);
5127c478bdstevel@tonic-gate
5137c478bdstevel@tonic-gate/*
5147c478bdstevel@tonic-gate * DTrace Probe Context Functions
5157c478bdstevel@tonic-gate *
5167c478bdstevel@tonic-gate * These functions are called from probe context.  Because probe context is
5177c478bdstevel@tonic-gate * any context in which C may be called, arbitrarily locks may be held,
5187c478bdstevel@tonic-gate * interrupts may be disabled, we may be in arbitrary dispatched state, etc.
5197c478bdstevel@tonic-gate * As a result, functions called from probe context may only call other DTrace
5207c478bdstevel@tonic-gate * support functions -- they may not interact at all with the system at large.
5217c478bdstevel@tonic-gate * (Note that the ASSERT macro is made probe-context safe by redefining it in
5227c478bdstevel@tonic-gate * terms of dtrace_assfail(), a probe-context safe function.) If arbitrary
5237c478bdstevel@tonic-gate * loads are to be performed from probe context, they _must_ be in terms of
5247c478bdstevel@tonic-gate * the safe dtrace_load*() variants.
5257c478bdstevel@tonic-gate *
5267c478bdstevel@tonic-gate * Some functions in this block are not actually called from probe context;
5277c478bdstevel@tonic-gate * for these functions, there will be a comment above the function reading
5287c478bdstevel@tonic-gate * "Note:  not called from probe context."
5297c478bdstevel@tonic-gate */
5307c478bdstevel@tonic-gatevoid
5317c478bdstevel@tonic-gatedtrace_panic(const char *format, ...)
5327c478bdstevel@tonic-gate{
5337c478bdstevel@tonic-gate	va_list alist;
5347c478bdstevel@tonic-gate
5357c478bdstevel@tonic-gate	va_start(alist, format);
5367c478bdstevel@tonic-gate	dtrace_vpanic(format, alist);
5377c478bdstevel@tonic-gate	va_end(alist);
5387c478bdstevel@tonic-gate}
5397c478bdstevel@tonic-gate
5407c478bdstevel@tonic-gateint
5417c478bdstevel@tonic-gatedtrace_assfail(const char *a, const char *f, int l)
5427c478bdstevel@tonic-gate{
5437c478bdstevel@tonic-gate	dtrace_panic("assertion failed: %s, file: %s, line: %d", a, f, l);
5447c478bdstevel@tonic-gate
5457c478bdstevel@tonic-gate	/*
5467c478bdstevel@tonic-gate	 * We just need something here that even the most clever compiler
5477c478bdstevel@tonic-gate	 * cannot optimize away.
5487c478bdstevel@tonic-gate	 */
5497c478bdstevel@tonic-gate	return (a[(uintptr_t)f]);
5507c478bdstevel@tonic-gate}
5517c478bdstevel@tonic-gate
5527c478bdstevel@tonic-gate/*
553a1b5e53bmc * Atomically increment a specified error counter from probe context.
554a1b5e53bmc */
555a1b5e53bmcstatic void
556a1b5e53bmcdtrace_error(uint32_t *counter)
557a1b5e53bmc{
558a1b5e53bmc	/*
559a1b5e53bmc	 * Most counters stored to in probe context are per-CPU counters.
560a1b5e53bmc	 * However, there are some error conditions that are sufficiently
561a1b5e53bmc	 * arcane that they don't merit per-CPU storage.  If these counters
562a1b5e53bmc	 * are incremented concurrently on different CPUs, scalability will be
563a1b5e53bmc	 * adversely affected -- but we don't expect them to be white-hot in a
564a1b5e53bmc	 * correctly constructed enabling...
565a1b5e53bmc	 */
566a1b5e53bmc	uint32_t oval, nval;
567a1b5e53bmc
568a1b5e53bmc	do {
569a1b5e53bmc		oval = *counter;
570a1b5e53bmc
571a1b5e53bmc		if ((nval = oval + 1) == 0) {
572a1b5e53bmc			/*
573a1b5e53bmc			 * If the counter would wrap, set it to 1 -- assuring
574a1b5e53bmc			 * that the counter is never zero when we have seen
575a1b5e53bmc			 * errors.  (The counter must be 32-bits because we
576a1b5e53bmc			 * aren't guaranteed a 64-bit compare&swap operation.)
577a1b5e53bmc			 * To save this code both the infamy of being fingered
578a1b5e53bmc			 * by a priggish news story and the indignity of being
579a1b5e53bmc			 * the target of a neo-puritan witch trial, we're
580a1b5e53bmc			 * carefully avoiding any colorful description of the
581a1b5e53bmc			 * likelihood of this condition -- but suffice it to
582a1b5e53bmc			 * say that it is only slightly more likely than the
583a1b5e53bmc			 * overflow of predicate cache IDs, as discussed in
584a1b5e53bmc			 * dtrace_predicate_create().
585a1b5e53bmc			 */
586a1b5e53bmc			nval = 1;
587a1b5e53bmc		}
588a1b5e53bmc	} while (dtrace_cas32(counter, oval, nval) != oval);
589a1b5e53bmc}
590a1b5e53bmc
591a1b5e53bmc/*
5927c478bdstevel@tonic-gate * Use the DTRACE_LOADFUNC macro to define functions for each of loading a
5937c478bdstevel@tonic-gate * uint8_t, a uint16_t, a uint32_t and a uint64_t.
5947c478bdstevel@tonic-gate */
5951c0cef6Bryan Cantrill/* BEGIN CSTYLED */
5967c478bdstevel@tonic-gateDTRACE_LOADFUNC(8)
5977c478bdstevel@tonic-gateDTRACE_LOADFUNC(16)
5987c478bdstevel@tonic-gateDTRACE_LOADFUNC(32)
5997c478bdstevel@tonic-gateDTRACE_LOADFUNC(64)
6001c0cef6Bryan Cantrill/* END CSTYLED */
6017c478bdstevel@tonic-gate
6027c478bdstevel@tonic-gatestatic int
6037c478bdstevel@tonic-gatedtrace_inscratch(uintptr_t dest, size_t size, dtrace_mstate_t *mstate)
6047c478bdstevel@tonic-gate{
6057c478bdstevel@tonic-gate	if (dest < mstate->dtms_scratch_base)
6067c478bdstevel@tonic-gate		return (0);
6077c478bdstevel@tonic-gate
6087c478bdstevel@tonic-gate	if (dest + size < dest)
6097c478bdstevel@tonic-gate		return (0);
6107c478bdstevel@tonic-gate
6117c478bdstevel@tonic-gate	if (dest + size > mstate->dtms_scratch_ptr)
6127c478bdstevel@tonic-gate		return (0);
6137c478bdstevel@tonic-gate
6147c478bdstevel@tonic-gate	return (1);
6157c478bdstevel@tonic-gate}
6167c478bdstevel@tonic-gate
6177c478bdstevel@tonic-gatestatic int
618771e39cPatrick Mooneydtrace_canstore_statvar(uint64_t addr, size_t sz, size_t *remain,
6197c478bdstevel@tonic-gate    dtrace_statvar_t **svars, int nsvars)
6207c478bdstevel@tonic-gate{
6217c478bdstevel@tonic-gate	int i;
622395c7a3Bryan Cantrill	size_t maxglobalsize, maxlocalsize;
623395c7a3Bryan Cantrill
624395c7a3Bryan Cantrill	if (nsvars == 0)
625395c7a3Bryan Cantrill		return (0);
626395c7a3Bryan Cantrill
627d65f2bbBryan Cantrill	maxglobalsize = dtrace_statvar_maxsize + sizeof (uint64_t);
628d65f2bbBryan Cantrill	maxlocalsize = maxglobalsize * NCPU;
6297c478bdstevel@tonic-gate
6307c478bdstevel@tonic-gate	for (i = 0; i < nsvars; i++) {
6317c478bdstevel@tonic-gate		dtrace_statvar_t *svar = svars[i];
632395c7a3Bryan Cantrill		uint8_t scope;
633395c7a3Bryan Cantrill		size_t size;
6347c478bdstevel@tonic-gate
635395c7a3Bryan Cantrill		if (svar == NULL || (size = svar->dtsv_size) == 0)
6367c478bdstevel@tonic-gate			continue;
6377c478bdstevel@tonic-gate
638395c7a3Bryan Cantrill		scope = svar->dtsv_var.dtdv_scope;
639395c7a3Bryan Cantrill
640395c7a3Bryan Cantrill		/*
641395c7a3Bryan Cantrill		 * We verify that our size is valid in the spirit of providing
642395c7a3Bryan Cantrill		 * defense in depth:  we want to prevent attackers from using
643395c7a3Bryan Cantrill		 * DTrace to escalate an orthogonal kernel heap corruption bug
644395c7a3Bryan Cantrill		 * into the ability to store to arbitrary locations in memory.
645395c7a3Bryan Cantrill		 */
646d65f2bbBryan Cantrill		VERIFY((scope == DIFV_SCOPE_GLOBAL && size <= maxglobalsize) ||
647d65f2bbBryan Cantrill		    (scope == DIFV_SCOPE_LOCAL && size <= maxlocalsize));
648395c7a3Bryan Cantrill
649771e39cPatrick Mooney		if (DTRACE_INRANGE(addr, sz, svar->dtsv_data,
650771e39cPatrick Mooney		    svar->dtsv_size)) {
651771e39cPatrick Mooney			DTRACE_RANGE_REMAIN(remain, addr, svar->dtsv_data,
652771e39cPatrick Mooney			    svar->dtsv_size);
6537c478bdstevel@tonic-gate			return (1);
654771e39cPatrick Mooney		}
6557c478bdstevel@tonic-gate	}
6567c478bdstevel@tonic-gate
6577c478bdstevel@tonic-gate	return (0);
6587c478bdstevel@tonic-gate}
6597c478bdstevel@tonic-gate
6607c478bdstevel@tonic-gate/*
6617c478bdstevel@tonic-gate * Check to see if the address is within a memory region to which a store may
6627c478bdstevel@tonic-gate * be issued.  This includes the DTrace scratch areas, and any DTrace variable
6637c478bdstevel@tonic-gate * region.  The caller of dtrace_canstore() is responsible for performing any
6647c478bdstevel@tonic-gate * alignment checks that are needed before stores are actually executed.
6657c478bdstevel@tonic-gate */
6667c478bdstevel@tonic-gatestatic int
6677c478bdstevel@tonic-gatedtrace_canstore(uint64_t addr, size_t sz, dtrace_mstate_t *mstate,
6687c478bdstevel@tonic-gate    dtrace_vstate_t *vstate)
6697c478bdstevel@tonic-gate{
670771e39cPatrick Mooney	return (dtrace_canstore_remains(addr, sz, NULL, mstate, vstate));
671771e39cPatrick Mooney}
672771e39cPatrick Mooney
673771e39cPatrick Mooney/*
674771e39cPatrick Mooney * Implementation of dtrace_canstore which communicates the upper bound of the
675771e39cPatrick Mooney * allowed memory region.
676771e39cPatrick Mooney */
677771e39cPatrick Mooneystatic int
678771e39cPatrick Mooneydtrace_canstore_remains(uint64_t addr, size_t sz, size_t *remain,
679771e39cPatrick Mooney    dtrace_mstate_t *mstate, dtrace_vstate_t *vstate)
680771e39cPatrick Mooney{
6817c478bdstevel@tonic-gate	/*
6827c478bdstevel@tonic-gate	 * First, check to see if the address is in scratch space...
6837c478bdstevel@tonic-gate	 */
684e0aad1edp	if (DTRACE_INRANGE(addr, sz, mstate->dtms_scratch_base,
685771e39cPatrick Mooney	    mstate->dtms_scratch_size)) {
686771e39cPatrick Mooney		DTRACE_RANGE_REMAIN(remain, addr, mstate->dtms_scratch_base,
687771e39cPatrick Mooney		    mstate->dtms_scratch_size);
6887c478bdstevel@tonic-gate		return (1);
689771e39cPatrick Mooney	}
6907c478bdstevel@tonic-gate
6917c478bdstevel@tonic-gate	/*
6927c478bdstevel@tonic-gate	 * Now check to see if it's a dynamic variable.  This check will pick
6937c478bdstevel@tonic-gate	 * up both thread-local variables and any global dynamically-allocated
6947c478bdstevel@tonic-gate	 * variables.
6957c478bdstevel@tonic-gate	 */
696b0f673cBryan Cantrill	if (DTRACE_INRANGE(addr, sz, vstate->dtvs_dynvars.dtds_base,
697fbcb7dbjhaslam	    vstate->dtvs_dynvars.dtds_size)) {
698fbcb7dbjhaslam		dtrace_dstate_t *dstate = &vstate->dtvs_dynvars;
699fbcb7dbjhaslam		uintptr_t base = (uintptr_t)dstate->dtds_base +
700fbcb7dbjhaslam		    (dstate->dtds_hashsize * sizeof (dtrace_dynhash_t));
701fbcb7dbjhaslam		uintptr_t chunkoffs;
7021c0cef6Bryan Cantrill		dtrace_dynvar_t *dvar;
703fbcb7dbjhaslam
704fbcb7dbjhaslam		/*
705fbcb7dbjhaslam		 * Before we assume that we can store here, we need to make
706fbcb7dbjhaslam		 * sure that it isn't in our metadata -- storing to our
707fbcb7dbjhaslam		 * dynamic variable metadata would corrupt our state.  For
708fbcb7dbjhaslam		 * the range to not include any dynamic variable metadata,
709fbcb7dbjhaslam		 * it must:
710fbcb7dbjhaslam		 *
711fbcb7dbjhaslam		 *	(1) Start above the hash table that is at the base of
712fbcb7dbjhaslam		 *	the dynamic variable space
713fbcb7dbjhaslam		 *
714fbcb7dbjhaslam		 *	(2) Have a starting chunk offset that is beyond the
715fbcb7dbjhaslam		 *	dtrace_dynvar_t that is at the base of every chunk
716fbcb7dbjhaslam		 *
717fbcb7dbjhaslam		 *	(3) Not span a chunk boundary
718fbcb7dbjhaslam		 *
7191c0cef6Bryan Cantrill		 *	(4) Not be in the tuple space of a dynamic variable
7201c0cef6Bryan Cantrill		 *
721fbcb7dbjhaslam		 */
722fbcb7dbjhaslam		if (addr < base)
723fbcb7dbjhaslam			return (0);
724fbcb7dbjhaslam
725fbcb7dbjhaslam		chunkoffs = (addr - base) % dstate->dtds_chunksize;
726fbcb7dbjhaslam
727fbcb7dbjhaslam		if (chunkoffs < sizeof (dtrace_dynvar_t))
728fbcb7dbjhaslam			return (0);
729fbcb7dbjhaslam
730fbcb7dbjhaslam		if (chunkoffs + sz > dstate->dtds_chunksize)
731fbcb7dbjhaslam			return (0);
732fbcb7dbjhaslam
7331c0cef6Bryan Cantrill		dvar = (dtrace_dynvar_t *)((uintptr_t)addr - chunkoffs);
7341c0cef6Bryan Cantrill
7351c0cef6Bryan Cantrill		if (dvar->dtdv_hashval == DTRACE_DYNHASH_FREE)
7361c0cef6Bryan Cantrill			return (0);
7371c0cef6Bryan Cantrill
7381c0cef6Bryan Cantrill		if (chunkoffs < sizeof (dtrace_dynvar_t) +
7391c0cef6Bryan Cantrill		    ((dvar->dtdv_tuple.dtt_nkeys - 1) * sizeof (dtrace_key_t)))
7401c0cef6Bryan Cantrill			return (0);
7411c0cef6Bryan Cantrill
742771e39cPatrick Mooney		DTRACE_RANGE_REMAIN(remain, addr, dvar, dstate->dtds_chunksize);
7437c478bdstevel@tonic-gate		return (1);
744fbcb7dbjhaslam	}
7457c478bdstevel@tonic-gate
7467c478bdstevel@tonic-gate	/*
7477c478bdstevel@tonic-gate	 * Finally, check the static local and global variables.  These checks
7487c478bdstevel@tonic-gate	 * take the longest, so we perform them last.
7497c478bdstevel@tonic-gate	 */
750771e39cPatrick Mooney	if (dtrace_canstore_statvar(addr, sz, remain,
7517c478bdstevel@tonic-gate	    vstate->dtvs_locals, vstate->dtvs_nlocals))
7527c478bdstevel@tonic-gate		return (1);
7537c478bdstevel@tonic-gate
754771e39cPatrick Mooney	if (dtrace_canstore_statvar(addr, sz, remain,
7557c478bdstevel@tonic-gate	    vstate->dtvs_globals, vstate->dtvs_nglobals))
7567c478bdstevel@tonic-gate		return (1);
7577c478bdstevel@tonic-gate
7587c478bdstevel@tonic-gate	return (0);
7597c478bdstevel@tonic-gate}
7607c478bdstevel@tonic-gate
761e0aad1edp
762e0aad1edp/*
763e0aad1edp * Convenience routine to check to see if the address is within a memory
764e0aad1edp * region in which a load may be issued given the user's privilege level;
765e0aad1edp * if not, it sets the appropriate error flags and loads 'addr' into the
766e0aad1edp * illegal value slot.
767e0aad1edp *
768e0aad1edp * DTrace subroutines (DIF_SUBR_*) should use this helper to implement
769e0aad1edp * appropriate memory access protection.
770e0aad1edp */
771e0aad1edpstatic int
772e0aad1edpdtrace_canload(uint64_t addr, size_t sz, dtrace_mstate_t *mstate,
773e0aad1edp    dtrace_vstate_t *vstate)
774e0aad1edp{
775771e39cPatrick Mooney	return (dtrace_canload_remains(addr, sz, NULL, mstate, vstate));
776771e39cPatrick Mooney}
777771e39cPatrick Mooney
778771e39cPatrick Mooney/*
779771e39cPatrick Mooney * Implementation of dtrace_canload which communicates the upper bound of the
780771e39cPatrick Mooney * allowed memory region.
781771e39cPatrick Mooney */
782771e39cPatrick Mooneystatic int
783771e39cPatrick Mooneydtrace_canload_remains(uint64_t addr, size_t sz, size_t *remain,
784771e39cPatrick Mooney    dtrace_mstate_t *mstate, dtrace_vstate_t *vstate)
785771e39cPatrick Mooney{
786e0aad1edp	volatile uintptr_t *illval = &cpu_core[CPU->cpu_id].cpuc_dtrace_illval;
787b0f673cBryan Cantrill	file_t *fp;
788e0aad1edp
789e0aad1edp	/*
790e0aad1edp	 * If we hold the privilege to read from kernel memory, then
791e0aad1edp	 * everything is readable.
792e0aad1edp	 */
793771e39cPatrick Mooney	if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) != 0) {
794771e39cPatrick Mooney		DTRACE_RANGE_REMAIN(remain, addr, addr, sz);
795e0aad1edp		return (1);
796771e39cPatrick Mooney	}
797e0aad1edp
798e0aad1edp	/*
799e0aad1edp	 * You can obviously read that which you can store.
800e0aad1edp	 */
801771e39cPatrick Mooney	if (dtrace_canstore_remains(addr, sz, remain, mstate, vstate))
802e0aad1edp		return (1);
803e0aad1edp
804e0aad1edp	/*
805e0aad1edp	 * We're allowed to read from our own string table.
806e0aad1edp	 */
807b0f673cBryan Cantrill	if (DTRACE_INRANGE(addr, sz, mstate->dtms_difo->dtdo_strtab,
808771e39cPatrick Mooney	    mstate->dtms_difo->dtdo_strlen)) {
809771e39cPatrick Mooney		DTRACE_RANGE_REMAIN(remain, addr,
810771e39cPatrick Mooney		    mstate->dtms_difo->dtdo_strtab,
811771e39cPatrick Mooney		    mstate->dtms_difo->dtdo_strlen);
812e0aad1edp		return (1);
813771e39cPatrick Mooney	}
814e0aad1edp
815b0f673cBryan Cantrill	if (vstate->dtvs_state != NULL &&
816b0f673cBryan Cantrill	    dtrace_priv_proc(vstate->dtvs_state, mstate)) {
817b0f673cBryan Cantrill		proc_t *p;
818b0f673cBryan Cantrill
819b0f673cBryan Cantrill		/*
820b0f673cBryan Cantrill		 * When we have privileges to the current process, there are
821b0f673cBryan Cantrill		 * several context-related kernel structures that are safe to
822b0f673cBryan Cantrill		 * read, even absent the privilege to read from kernel memory.
823b0f673cBryan Cantrill		 * These reads are safe because these structures contain only
824b0f673cBryan Cantrill		 * state that (1) we're permitted to read, (2) is harmless or
825b0f673cBryan Cantrill		 * (3) contains pointers to additional kernel state that we're
826b0f673cBryan Cantrill		 * not permitted to read (and as such, do not present an
827b0f673cBryan Cantrill		 * opportunity for privilege escalation).  Finally (and
828b0f673cBryan Cantrill		 * critically), because of the nature of their relation with
829b0f673cBryan Cantrill		 * the current thread context, the memory associated with these
830b0f673cBryan Cantrill		 * structures cannot change over the duration of probe context,
831b0f673cBryan Cantrill		 * and it is therefore impossible for this memory to be
832b0f673cBryan Cantrill		 * deallocated and reallocated as something else while it's
833b0f673cBryan Cantrill		 * being operated upon.
834b0f673cBryan Cantrill		 */
835771e39cPatrick Mooney		if (DTRACE_INRANGE(addr, sz, curthread, sizeof (kthread_t))) {
836771e39cPatrick Mooney			DTRACE_RANGE_REMAIN(remain, addr, curthread,
837771e39cPatrick Mooney			    sizeof (kthread_t));
838b0f673cBryan Cantrill			return (1);
839771e39cPatrick Mooney		}
840b0f673cBryan Cantrill
841b0f673cBryan Cantrill		if ((p = curthread->t_procp) != NULL && DTRACE_INRANGE(addr,
842b0f673cBryan Cantrill		    sz, curthread->t_procp, sizeof (proc_t))) {
843771e39cPatrick Mooney			DTRACE_RANGE_REMAIN(remain, addr, curthread->t_procp,
844771e39cPatrick Mooney			    sizeof (proc_t));
845b0f673cBryan Cantrill			return (1);
846b0f673cBryan Cantrill		}
847b0f673cBryan Cantrill
848b0f673cBryan Cantrill		if (curthread->t_cred != NULL && DTRACE_INRANGE(addr, sz,
849b0f673cBryan Cantrill		    curthread->t_cred, sizeof (cred_t))) {
850771e39cPatrick Mooney			DTRACE_RANGE_REMAIN(remain, addr, curthread->t_cred,
851771e39cPatrick Mooney			    sizeof (cred_t));
852b0f673cBryan Cantrill			return (1);
853b0f673cBryan Cantrill		}
854b0f673cBryan Cantrill
855b0f673cBryan Cantrill		if (p != NULL && p->p_pidp != NULL && DTRACE_INRANGE(addr, sz,
856b0f673cBryan Cantrill		    &(p->p_pidp->pid_id), sizeof (pid_t))) {
857771e39cPatrick Mooney			DTRACE_RANGE_REMAIN(remain, addr, &(p->p_pidp->pid_id),
858771e39cPatrick Mooney			    sizeof (pid_t));
859b0f673cBryan Cantrill			return (1);
860b0f673cBryan Cantrill		}
861b0f673cBryan Cantrill
862b0f673cBryan Cantrill		if (curthread->t_cpu != NULL && DTRACE_INRANGE(addr, sz,
863b0f673cBryan Cantrill		    curthread->t_cpu, offsetof(cpu_t, cpu_pause_thread))) {
864771e39cPatrick Mooney			DTRACE_RANGE_REMAIN(remain, addr, curthread->t_cpu,
865771e39cPatrick Mooney			    offsetof(cpu_t, cpu_pause_thread));
866b0f673cBryan Cantrill			return (1);
867b0f673cBryan Cantrill		}
868b0f673cBryan Cantrill	}
869b0f673cBryan Cantrill
870b0f673cBryan Cantrill	if ((fp = mstate->dtms_getf) != NULL) {
871b0f673cBryan Cantrill		uintptr_t psz = sizeof (void *);
872b0f673cBryan Cantrill		vnode_t *vp;
873b0f673cBryan Cantrill		vnodeops_t *op;
874b0f673cBryan Cantrill
875b0f673cBryan Cantrill		/*
876b0f673cBryan Cantrill		 * When getf() returns a file_t, the enabling is implicitly
877b0f673cBryan Cantrill		 * granted the (transient) right to read the returned file_t
878b0f673cBryan Cantrill		 * as well as the v_path and v_op->vnop_name of the underlying
879b0f673cBryan Cantrill		 * vnode.  These accesses are allowed after a successful
880b0f673cBryan Cantrill		 * getf() because the members that they refer to cannot change
881b0f673cBryan Cantrill		 * once set -- and the barrier logic in the kernel's closef()
882b0f673cBryan Cantrill		 * path assures that the file_t and its referenced vode_t
883b0f673cBryan Cantrill		 * cannot themselves be stale (that is, it impossible for
884b0f673cBryan Cantrill		 * either dtms_getf itself or its f_vnode member to reference
885b0f673cBryan Cantrill		 * freed memory).
886b0f673cBryan Cantrill		 */
887771e39cPatrick Mooney		if (DTRACE_INRANGE(addr, sz, fp, sizeof (file_t))) {
888771e39cPatrick Mooney			DTRACE_RANGE_REMAIN(remain, addr, fp, sizeof (file_t));
889b0f673cBryan Cantrill			return (1);
890771e39cPatrick Mooney		}
891b0f673cBryan Cantrill
892b0f673cBryan Cantrill		if ((vp = fp->f_vnode) != NULL) {
893771e39cPatrick Mooney			size_t slen;
894771e39cPatrick Mooney
895771e39cPatrick Mooney			if (DTRACE_INRANGE(addr, sz, &vp->v_path, psz)) {
896771e39cPatrick Mooney				DTRACE_RANGE_REMAIN(remain, addr, &vp->v_path,
897771e39cPatrick Mooney				    psz);
898b0f673cBryan Cantrill				return (1);
899771e39cPatrick Mooney			}
900b0f673cBryan Cantrill
901771e39cPatrick Mooney			slen = strlen(vp->v_path) + 1;
902771e39cPatrick Mooney			if (DTRACE_INRANGE(addr, sz, vp->v_path, slen)) {
903771e39cPatrick Mooney				DTRACE_RANGE_REMAIN(remain, addr, vp->v_path,
904771e39cPatrick Mooney				    slen);
905b0f673cBryan Cantrill				return (1);
906b0f673cBryan Cantrill			}
907b0f673cBryan Cantrill
908771e39cPatrick Mooney			if (DTRACE_INRANGE(addr, sz, &vp->v_op, psz)) {
909771e39cPatrick Mooney				DTRACE_RANGE_REMAIN(remain, addr, &vp->v_op,
910771e39cPatrick Mooney				    psz);
911b0f673cBryan Cantrill				return (1);
912771e39cPatrick Mooney			}
913b0f673cBryan Cantrill
914b0f673cBryan Cantrill			if ((op = vp->v_op) != NULL &&
915b0f673cBryan Cantrill			    DTRACE_INRANGE(addr, sz, &op->vnop_name, psz)) {
916771e39cPatrick Mooney				DTRACE_RANGE_REMAIN(remain, addr,
917771e39cPatrick Mooney				    &op->vnop_name, psz);
918b0f673cBryan Cantrill				return (1);
919b0f673cBryan Cantrill			}
920b0f673cBryan Cantrill
921b0f673cBryan Cantrill			if (op != NULL && op->vnop_name != NULL &&
922b0f673cBryan Cantrill			    DTRACE_INRANGE(addr, sz, op->vnop_name,
923771e39cPatrick Mooney			    (slen = strlen(op->vnop_name) + 1))) {
924771e39cPatrick Mooney				DTRACE_RANGE_REMAIN(remain, addr,
925771e39cPatrick Mooney				    op->vnop_name, slen);
926b0f673cBryan Cantrill				return (1);
927b0f673cBryan Cantrill			}
928b0f673cBryan Cantrill		}
929b0f673cBryan Cantrill	}
930b0f673cBryan Cantrill
931e0aad1edp	DTRACE_CPUFLAG_SET(CPU_DTRACE_KPRIV);
932e0aad1edp	*illval = addr;
933e0aad1edp	return (0);
934e0aad1edp}
935e0aad1edp
936e0aad1edp/*
937e0aad1edp * Convenience routine to check to see if a given string is within a memory
938e0aad1edp * region in which a load may be issued given the user's privilege level;
939e0aad1edp * this exists so that we don't need to issue unnecessary dtrace_strlen()
940e0aad1edp * calls in the event that the user has all privileges.
941e0aad1edp */
942e0aad1edpstatic int
943771e39cPatrick Mooneydtrace_strcanload(uint64_t addr, size_t sz, size_t *remain,
944771e39cPatrick Mooney    dtrace_mstate_t *mstate, dtrace_vstate_t *vstate)
945e0aad1edp{
946771e39cPatrick Mooney	size_t rsize;
947e0aad1edp
948e0aad1edp	/*
949e0aad1edp	 * If we hold the privilege to read from kernel memory, then
950e0aad1edp	 * everything is readable.
951e0aad1edp	 */
952771e39cPatrick Mooney	if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) != 0) {
953771e39cPatrick Mooney		DTRACE_RANGE_REMAIN(remain, addr, addr, sz);
954e0aad1edp		return (1);
955771e39cPatrick Mooney	}
956e0aad1edp
957771e39cPatrick Mooney	/*
958771e39cPatrick Mooney	 * Even if the caller is uninterested in querying the remaining valid
959771e39cPatrick Mooney	 * range, it is required to ensure that the access is allowed.
960771e39cPatrick Mooney	 */
961771e39cPatrick Mooney	if (remain == NULL) {
962771e39cPatrick Mooney		remain = &rsize;
963771e39cPatrick Mooney	}
964771e39cPatrick Mooney	if (dtrace_canload_remains(addr, 0, remain, mstate, vstate)) {
965771e39cPatrick Mooney		size_t strsz;
966771e39cPatrick Mooney		/*
967771e39cPatrick Mooney		 * Perform the strlen after determining the length of the
968771e39cPatrick Mooney		 * memory region which is accessible.  This prevents timing
969771e39cPatrick Mooney		 * information from being used to find NULs in memory which is
970771e39cPatrick Mooney		 * not accessible to the caller.
971771e39cPatrick Mooney		 */
972771e39cPatrick Mooney		strsz = 1 + dtrace_strlen((char *)(uintptr_t)addr,
973771e39cPatrick Mooney		    MIN(sz, *remain));
974771e39cPatrick Mooney		if (strsz <= *remain) {
975771e39cPatrick Mooney			return (1);
976771e39cPatrick Mooney		}
977771e39cPatrick Mooney	}
978e0aad1edp
979e0aad1edp	return (0);
980e0aad1edp}
981e0aad1edp
982e0aad1edp/*
983e0aad1edp * Convenience routine to check to see if a given variable is within a memory
984e0aad1edp * region in which a load may be issued given the user's privilege level.
985e0aad1edp */
986e0aad1edpstatic int
987771e39cPatrick Mooneydtrace_vcanload(void *src, dtrace_diftype_t *type, size_t *remain,
988771e39cPatrick Mooney    dtrace_mstate_t *mstate, dtrace_vstate_t *vstate)
989e0aad1edp{
990e0aad1edp	size_t sz;
991e0aad1edp	ASSERT(type->dtdt_flags & DIF_TF_BYREF);
992e0aad1edp
993e0aad1edp	/*
994771e39cPatrick Mooney	 * Calculate the max size before performing any checks since even
995771e39cPatrick Mooney	 * DTRACE_ACCESS_KERNEL-credentialed callers expect that this function
996771e39cPatrick Mooney	 * return the max length via 'remain'.
997771e39cPatrick Mooney	 */
998771e39cPatrick Mooney	if (type->dtdt_kind == DIF_TYPE_STRING) {
999771e39cPatrick Mooney		dtrace_state_t *state = vstate->dtvs_state;
1000771e39cPatrick Mooney
1001771e39cPatrick Mooney		if (state != NULL) {
1002771e39cPatrick Mooney			sz = state->dts_options[DTRACEOPT_STRSIZE];
1003771e39cPatrick Mooney		} else {
1004771e39cPatrick Mooney			/*
1005771e39cPatrick Mooney			 * In helper context, we have a NULL state; fall back
1006771e39cPatrick Mooney			 * to using the system-wide default for the string size
1007771e39cPatrick Mooney			 * in this case.
1008771e39cPatrick Mooney			 */
1009771e39cPatrick Mooney			sz = dtrace_strsize_default;
1010771e39cPatrick Mooney		}
1011771e39cPatrick Mooney	} else {
1012771e39cPatrick Mooney		sz = type->dtdt_size;
1013771e39cPatrick Mooney	}
1014771e39cPatrick Mooney
1015771e39cPatrick Mooney	/*
1016e0aad1edp	 * If we hold the privilege to read from kernel memory, then
1017e0aad1edp	 * everything is readable.
1018e0aad1edp	 */
1019771e39cPatrick Mooney	if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) != 0) {
1020771e39cPatrick Mooney		DTRACE_RANGE_REMAIN(remain, (uintptr_t)src, src, sz);
1021e0aad1edp		return (1);
1022771e39cPatrick Mooney	}
1023e0aad1edp
1024771e39cPatrick Mooney	if (type->dtdt_kind == DIF_TYPE_STRING) {
1025771e39cPatrick Mooney		return (dtrace_strcanload((uintptr_t)src, sz, remain, mstate,
1026771e39cPatrick Mooney		    vstate));
1027771e39cPatrick Mooney	}
1028771e39cPatrick Mooney	return (dtrace_canload_remains((uintptr_t)src, sz, remain, mstate,
1029771e39cPatrick Mooney	    vstate));
1030e0aad1edp}
1031e0aad1edp
10327c478bdstevel@tonic-gate/*
1033f497f9fJoshua M. Clulow * Convert a string to a signed integer using safe loads.
1034f497f9fJoshua M. Clulow *
1035f497f9fJoshua M. Clulow * NOTE: This function uses various macros from strtolctype.h to manipulate
1036f497f9fJoshua M. Clulow * digit values, etc -- these have all been checked to ensure they make
1037f497f9fJoshua M. Clulow * no additional function calls.
1038f497f9fJoshua M. Clulow */
1039f497f9fJoshua M. Clulowstatic int64_t
1040f497f9fJoshua M. Clulowdtrace_strtoll(char *input, int base, size_t limit)
1041f497f9fJoshua M. Clulow{
1042f497f9fJoshua M. Clulow	uintptr_t pos = (uintptr_t)input;
1043f497f9fJoshua M. Clulow	int64_t val = 0;
1044f497f9fJoshua M. Clulow	int x;
1045f497f9fJoshua M. Clulow	boolean_t neg = B_FALSE;
1046f497f9fJoshua M. Clulow	char c, cc, ccc;
1047f497f9fJoshua M. Clulow	uintptr_t end = pos + limit;
1048f497f9fJoshua M. Clulow
1049f497f9fJoshua M. Clulow	/*
1050f497f9fJoshua M. Clulow	 * Consume any whitespace preceding digits.
1051f497f9fJoshua M. Clulow	 */
1052f497f9fJoshua M. Clulow	while ((c = dtrace_load8(pos)) == ' ' || c == '\t')
1053f497f9fJoshua M. Clulow		pos++;
1054f497f9fJoshua M. Clulow
1055f497f9fJoshua M. Clulow	/*
1056f497f9fJoshua M. Clulow	 * Handle an explicit sign if one is present.
1057f497f9fJoshua M. Clulow	 */
1058f497f9fJoshua M. Clulow	if (c == '-' || c == '+') {
1059f497f9fJoshua M. Clulow		if (c == '-')
1060f497f9fJoshua M. Clulow			neg = B_TRUE;
1061f497f9fJoshua M. Clulow		c = dtrace_load8(++pos);
1062f497f9fJoshua M. Clulow	}
1063f497f9fJoshua M. Clulow
1064f497f9fJoshua M. Clulow	/*
1065f497f9fJoshua M. Clulow	 * Check for an explicit hexadecimal prefix ("0x" or "0X") and skip it
1066f497f9fJoshua M. Clulow	 * if present.
1067f497f9fJoshua M. Clulow	 */
1068f497f9fJoshua M. Clulow	if (base == 16 && c == '0' && ((cc = dtrace_load8(pos + 1)) == 'x' ||
1069f497f9fJoshua M. Clulow	    cc == 'X') && isxdigit(ccc = dtrace_load8(pos + 2))) {
1070f497f9fJoshua M. Clulow		pos += 2;
1071f497f9fJoshua M. Clulow		c = ccc;
1072f497f9fJoshua M. Clulow	}
1073f497f9fJoshua M. Clulow
1074f497f9fJoshua M. Clulow	/*
1075f497f9fJoshua M. Clulow	 * Read in contiguous digits until the first non-digit character.
1076f497f9fJoshua M. Clulow	 */
1077f497f9fJoshua M. Clulow	for (; pos < end && c != '\0' && lisalnum(c) && (x = DIGIT(c)) < base;
1078f497f9fJoshua M. Clulow	    c = dtrace_load8(++pos))
1079f497f9fJoshua M. Clulow		val = val * base + x;
1080f497f9fJoshua M. Clulow
1081f497f9fJoshua M. Clulow	return (neg ? -val : val);
1082f497f9fJoshua M. Clulow}
1083f497f9fJoshua M. Clulow
1084f497f9fJoshua M. Clulow/*
10857c478bdstevel@tonic-gate * Compare two strings using safe loads.
10867c478bdstevel@tonic-gate */
10877c478bdstevel@tonic-gatestatic int
10887c478bdstevel@tonic-gatedtrace_strncmp(char *s1, char *s2, size_t limit)
10897c478bdstevel@tonic-gate{
10907c478bdstevel@tonic-gate	uint8_t c1, c2;
10917c478bdstevel@tonic-gate	volatile uint16_t *flags;
10927c478bdstevel@tonic-gate
10937c478bdstevel@tonic-gate	if (s1 == s2 || limit == 0)
10947c478bdstevel@tonic-gate		return (0);
10957c478bdstevel@tonic-gate
10967c478bdstevel@tonic-gate	flags = (volatile uint16_t *)&cpu_core[CPU->cpu_id].cpuc_dtrace_flags;
10977c478bdstevel@tonic-gate
10987c478bdstevel@tonic-gate	do {
10997c478bdstevel@tonic-gate		if (s1 == NULL) {
11007c478bdstevel@tonic-gate			c1 = '\0';
11017c478bdstevel@tonic-gate		} else {
11027c478bdstevel@tonic-gate			c1 = dtrace_load8((uintptr_t)s1++);
11037c478bdstevel@tonic-gate		}
11047c478bdstevel@tonic-gate
11057c478bdstevel@tonic-gate		if (s2 == NULL) {
11067c478bdstevel@tonic-gate			c2 = '\0';
11077c478bdstevel@tonic-gate		} else {
11087c478bdstevel@tonic-gate			c2 = dtrace_load8((uintptr_t)s2++);
11097c478bdstevel@tonic-gate		}
11107c478bdstevel@tonic-gate
11117c478bdstevel@tonic-gate		if (c1 != c2)
11127c478bdstevel@tonic-gate			return (c1 - c2);
11137c478bdstevel@tonic-gate	} while (--limit && c1 != '\0' && !(*flags & CPU_DTRACE_FAULT));
11147c478bdstevel@tonic-gate
11157c478bdstevel@tonic-gate	return (0);
11167c478bdstevel@tonic-gate}
11177c478bdstevel@tonic-gate
11187c478bdstevel@tonic-gate/*
11197c478bdstevel@tonic-gate * Compute strlen(s) for a string using safe memory accesses.  The additional
11207c478bdstevel@tonic-gate * len parameter is used to specify a maximum length to ensure completion.
11217c478bdstevel@tonic-gate */
11227c478bdstevel@tonic-gatestatic size_t
11237c478bdstevel@tonic-gatedtrace_strlen(const char *s, size_t lim)
11247c478bdstevel@tonic-gate{
11257c478bdstevel@tonic-gate	uint_t len;
11267c478bdstevel@tonic-gate
11277c478bdstevel@tonic-gate	for (len = 0; len != lim; len++) {
11287c478bdstevel@tonic-gate		if (dtrace_load8((uintptr_t)s++) == '\0')
11297c478bdstevel@tonic-gate			break;
11307c478bdstevel@tonic-gate	}
11317c478bdstevel@tonic-gate
11327c478bdstevel@tonic-gate	return (len);
11337c478bdstevel@tonic-gate}
11347c478bdstevel@tonic-gate
11357c478bdstevel@tonic-gate/*
11367c478bdstevel@tonic-gate * Check if an address falls within a toxic region.
11377c478bdstevel@tonic-gate */
11387c478bdstevel@tonic-gatestatic int
11397c478bdstevel@tonic-gatedtrace_istoxic(uintptr_t kaddr, size_t size)
11407c478bdstevel@tonic-gate{
11417c478bdstevel@tonic-gate	uintptr_t taddr, tsize;
11427c478bdstevel@tonic-gate	int i;
11437c478bdstevel@tonic-gate
11447c478bdstevel@tonic-gate	for (i = 0; i < dtrace_toxranges; i++) {
11457c478bdstevel@tonic-gate		taddr = dtrace_toxrange[i].dtt_base;
11467c478bdstevel@tonic-gate		tsize = dtrace_toxrange[i].dtt_limit - taddr;
11477c478bdstevel@tonic-gate
11487c478bdstevel@tonic-gate		if (kaddr - taddr < tsize) {
11497c478bdstevel@tonic-gate			DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
11507c478bdstevel@tonic-gate			cpu_core[CPU->cpu_id].cpuc_dtrace_illval = kaddr;
11517c478bdstevel@tonic-gate			return (1);
11527c478bdstevel@tonic-gate		}
11537c478bdstevel@tonic-gate
11547c478bdstevel@tonic-gate		if (taddr - kaddr < size) {
11557c478bdstevel@tonic-gate			DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
11567c478bdstevel@tonic-gate			cpu_core[CPU->cpu_id].cpuc_dtrace_illval = taddr;
11577c478bdstevel@tonic-gate			return (1);
11587c478bdstevel@tonic-gate		}
11597c478bdstevel@tonic-gate	}
11607c478bdstevel@tonic-gate
11617c478bdstevel@tonic-gate	return (0);
11627c478bdstevel@tonic-gate}
11637c478bdstevel@tonic-gate
11647c478bdstevel@tonic-gate/*
11657c478bdstevel@tonic-gate * Copy src to dst using safe memory accesses.  The src is assumed to be unsafe
11667c478bdstevel@tonic-gate * memory specified by the DIF program.  The dst is assumed to be safe memory
11677c478bdstevel@tonic-gate * that we can store to directly because it is managed by DTrace.  As with
11687c478bdstevel@tonic-gate * standard bcopy, overlapping copies are handled properly.
11697c478bdstevel@tonic-gate */
11707c478bdstevel@tonic-gatestatic void
11717c478bdstevel@tonic-gatedtrace_bcopy(const void *src, void *dst, size_t len)
11727c478bdstevel@tonic-gate{
11737c478bdstevel@tonic-gate	if (len != 0) {
11747c478bdstevel@tonic-gate		uint8_t *s1 = dst;
11757c478bdstevel@tonic-gate		const uint8_t *s2 = src;
11767c478bdstevel@tonic-gate
11777c478bdstevel@tonic-gate		if (s1 <= s2) {
11787c478bdstevel@tonic-gate			do {
11797c478bdstevel@tonic-gate				*s1++ = dtrace_load8((uintptr_t)s2++);
11807c478bdstevel@tonic-gate			} while (--len != 0);
11817c478bdstevel@tonic-gate		} else {
11827c478bdstevel@tonic-gate			s2 += len;
11837c478bdstevel@tonic-gate			s1 += len;
11847c478bdstevel@tonic-gate
11857c478bdstevel@tonic-gate			do {
11867c478bdstevel@tonic-gate				*--s1 = dtrace_load8((uintptr_t)--s2);
11877c478bdstevel@tonic-gate			} while (--len != 0);
11887c478bdstevel@tonic-gate		}
11897c478bdstevel@tonic-gate	}
11907c478bdstevel@tonic-gate}
11917c478bdstevel@tonic-gate
11927c478bdstevel@tonic-gate/*
11937c478bdstevel@tonic-gate * Copy src to dst using safe memory accesses, up to either the specified
11947c478bdstevel@tonic-gate * length, or the point that a nul byte is encountered.  The src is assumed to
11957c478bdstevel@tonic-gate * be unsafe memory specified by the DIF program.  The dst is assumed to be
11967c478bdstevel@tonic-gate * safe memory that we can store to directly because it is managed by DTrace.
11977c478bdstevel@tonic-gate * Unlike dtrace_bcopy(), overlapping regions are not handled.
11987c478bdstevel@tonic-gate */
11997c478bdstevel@tonic-gatestatic void
12007c478bdstevel@tonic-gatedtrace_strcpy(const void *src, void *dst, size_t len)
12017c478bdstevel@tonic-gate{
12027c478bdstevel@tonic-gate	if (len != 0) {
12037c478bdstevel@tonic-gate		uint8_t *s1 = dst, c;
12047c478bdstevel@tonic-gate		const uint8_t *s2 = src;
12057c478bdstevel@tonic-gate
12067c478bdstevel@tonic-gate		do {
12077c478bdstevel@tonic-gate			*s1++ = c = dtrace_load8((uintptr_t)s2++);
12087c478bdstevel@tonic-gate		} while (--len != 0 && c != '\0');
12097c478bdstevel@tonic-gate	}
12107c478bdstevel@tonic-gate}
12117c478bdstevel@tonic-gate
12127c478bdstevel@tonic-gate/*
12137c478bdstevel@tonic-gate * Copy src to dst, deriving the size and type from the specified (BYREF)
12147c478bdstevel@tonic-gate * variable type.  The src is assumed to be unsafe memory specified by the DIF
12157c478bdstevel@tonic-gate * program.  The dst is assumed to be DTrace variable memory that is of the
12167c478bdstevel@tonic-gate * specified type; we assume that we can store to directly.
12177c478bdstevel@tonic-gate */
12187c478bdstevel@tonic-gatestatic void
1219771e39cPatrick Mooneydtrace_vcopy(void *src, void *dst, dtrace_diftype_t *type, size_t limit)
12207c478bdstevel@tonic-gate{
12217c478bdstevel@tonic-gate	ASSERT(type->dtdt_flags & DIF_TF_BYREF);
12227c478bdstevel@tonic-gate
12237c478bdstevel@tonic-gate	if (type->dtdt_kind == DIF_TYPE_STRING) {
1224771e39cPatrick Mooney		dtrace_strcpy(src, dst, MIN(type->dtdt_size, limit));
12257c478bdstevel@tonic-gate	} else {
1226771e39cPatrick Mooney		dtrace_bcopy(src, dst, MIN(type->dtdt_size, limit));
12277c478bdstevel@tonic-gate	}
12287c478bdstevel@tonic-gate}
12297c478bdstevel@tonic-gate
12307c478bdstevel@tonic-gate/*
12317c478bdstevel@tonic-gate * Compare s1 to s2 using safe memory accesses.  The s1 data is assumed to be
12327c478bdstevel@tonic-gate * unsafe memory specified by the DIF program.  The s2 data is assumed to be
12337c478bdstevel@tonic-gate * safe memory that we can access directly because it is managed by DTrace.
12347c478bdstevel@tonic-gate */
12357c478bdstevel@tonic-gatestatic int
12367c478bdstevel@tonic-gatedtrace_bcmp(const void *s1, const void *s2, size_t len)
12377c478bdstevel@tonic-gate{
12387c478bdstevel@tonic-gate	volatile uint16_t *flags;
12397c478bdstevel@tonic-gate
12407c478bdstevel@tonic-gate	flags = (volatile uint16_t *)&cpu_core[CPU->cpu_id].cpuc_dtrace_flags;
12417c478bdstevel@tonic-gate
12427c478bdstevel@tonic-gate	if (s1 == s2)
12437c478bdstevel@tonic-gate		return (0);
12447c478bdstevel@tonic-gate
12457c478bdstevel@tonic-gate	if (s1 == NULL || s2 == NULL)
12467c478bdstevel@tonic-gate		return (1);
12477c478bdstevel@tonic-gate
12487c478bdstevel@tonic-gate	if (s1 != s2 && len != 0) {
12497c478bdstevel@tonic-gate		const uint8_t *ps1 = s1;
12507c478bdstevel@tonic-gate		const uint8_t *ps2 = s2;
12517c478bdstevel@tonic-gate
12527c478bdstevel@tonic-gate		do {
12537c478bdstevel@tonic-gate			if (dtrace_load8((uintptr_t)ps1++) != *ps2++)
12547c478bdstevel@tonic-gate				return (1);
12557c478bdstevel@tonic-gate		} while (--len != 0 && !(*flags & CPU_DTRACE_FAULT));
12567c478bdstevel@tonic-gate	}
12577c478bdstevel@tonic-gate	return (0);
12587c478bdstevel@tonic-gate}
12597c478bdstevel@tonic-gate
12607c478bdstevel@tonic-gate/*
12617c478bdstevel@tonic-gate * Zero the specified region using a simple byte-by-byte loop.  Note that this
12627c478bdstevel@tonic-gate * is for safe DTrace-managed memory only.
12637c478bdstevel@tonic-gate */
12647c478bdstevel@tonic-gatestatic void
12657c478bdstevel@tonic-gatedtrace_bzero(void *dst, size_t len)
12667c478bdstevel@tonic-gate{
12677c478bdstevel@tonic-gate	uchar_t *cp;
12687c478bdstevel@tonic-gate
12697c478bdstevel@tonic-gate	for (cp = dst; len != 0; len--)
12707c478bdstevel@tonic-gate		*cp++ = 0;
12717c478bdstevel@tonic-gate}
12727c478bdstevel@tonic-gate
12736e0bee7jhaslamstatic void
12746e0bee7jhaslamdtrace_add_128(uint64_t *addend1, uint64_t *addend2, uint64_t *sum)
12756e0bee7jhaslam{
12766e0bee7jhaslam	uint64_t result[2];
12776e0bee7jhaslam
12786e0bee7jhaslam	result[0] = addend1[0] + addend2[0];
12796e0bee7jhaslam	result[1] = addend1[1] + addend2[1] +
12806e0bee7jhaslam	    (result[0] < addend1[0] || result[0] < addend2[0] ? 1 : 0);
12816e0bee7jhaslam
12826e0bee7jhaslam	sum[0] = result[0];
12836e0bee7jhaslam	sum[1] = result[1];
12846e0bee7jhaslam}
12856e0bee7jhaslam
12866e0bee7jhaslam/*
12876e0bee7jhaslam * Shift the 128-bit value in a by b. If b is positive, shift left.
12886e0bee7jhaslam * If b is negative, shift right.
12896e0bee7jhaslam */
12906e0bee7jhaslamstatic void
12916e0bee7jhaslamdtrace_shift_128(uint64_t *a, int b)
12926e0bee7jhaslam{
12936e0bee7jhaslam	uint64_t mask;
12946e0bee7jhaslam
12956e0bee7jhaslam	if (b == 0)
12966e0bee7jhaslam		return;
12976e0bee7jhaslam
12986e0bee7jhaslam	if (b < 0) {
12996e0bee7jhaslam		b = -b;
13006e0bee7jhaslam		if (b >= 64) {
13016e0bee7jhaslam			a[0] = a[1] >> (b - 64);
13026e0bee7jhaslam			a[1] = 0;
13036e0bee7jhaslam		} else {
13046e0bee7jhaslam			a[0] >>= b;
13056e0bee7jhaslam			mask = 1LL << (64 - b);
1306