1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27#include <sys/fasttrap_isa.h>
28#include <sys/fasttrap_impl.h>
29#include <sys/dtrace.h>
30#include <sys/dtrace_impl.h>
31#include <sys/cmn_err.h>
32#include <sys/frame.h>
33#include <sys/stack.h>
34#include <sys/sysmacros.h>
35#include <sys/trap.h>
36
37#include <v9/sys/machpcb.h>
38#include <v9/sys/privregs.h>
39
40/*
41 * Lossless User-Land Tracing on SPARC
42 * -----------------------------------
43 *
44 * The Basic Idea
45 *
46 * The most important design constraint is, of course, correct execution of
47 * the user thread above all else. The next most important goal is rapid
48 * execution. We combine execution of instructions in user-land with
49 * emulation of certain instructions in the kernel to aim for complete
50 * correctness and maximal performance.
51 *
52 * We take advantage of the split PC/NPC architecture to speed up logical
53 * single-stepping; when we copy an instruction out to the scratch space in
54 * the ulwp_t structure (held in the %g7 register on SPARC), we can
55 * effectively single step by setting the PC to our scratch space and leaving
56 * the NPC alone. This executes the replaced instruction and then continues
57 * on without having to reenter the kernel as with single- stepping. The
58 * obvious caveat is for instructions whose execution is PC dependant --
59 * branches, call and link instructions (call and jmpl), and the rdpc
60 * instruction. These instructions cannot be executed in the manner described
61 * so they must be emulated in the kernel.
62 *
63 * Emulation for this small set of instructions if fairly simple; the most
64 * difficult part being emulating branch conditions.
65 *
66 *
67 * A Cache Heavy Portfolio
68 *
69 * It's important to note at this time that copying an instruction out to the
70 * ulwp_t scratch space in user-land is rather complicated. SPARC has
71 * separate data and instruction caches so any writes to the D$ (using a
72 * store instruction for example) aren't necessarily reflected in the I$.
73 * The flush instruction can be used to synchronize the two and must be used
74 * for any self-modifying code, but the flush instruction only applies to the
75 * primary address space (the absence of a flusha analogue to the flush
76 * instruction that accepts an ASI argument is an obvious omission from SPARC
77 * v9 where the notion of the alternate address space was introduced on
78 * SPARC). To correctly copy out the instruction we must use a block store
79 * that doesn't allocate in the D$ and ensures synchronization with the I$;
80 * see dtrace_blksuword32() for the implementation  (this function uses
81 * ASI_BLK_COMMIT_S to write a block through the secondary ASI in the manner
82 * described). Refer to the UltraSPARC I/II manual for details on the
83 * ASI_BLK_COMMIT_S ASI.
84 *
85 *
86 * Return Subtleties
87 *
88 * When we're firing a return probe we need to expose the value returned by
89 * the function being traced. Since the function can set the return value
90 * in its last instruction, we need to fire the return probe only _after_
91 * the effects of the instruction are apparent. For instructions that we
92 * emulate, we can call dtrace_probe() after we've performed the emulation;
93 * for instructions that we execute after we return to user-land, we set
94 * %pc to the instruction we copied out (as described above) and set %npc
95 * to a trap instruction stashed in the ulwp_t structure. After the traced
96 * instruction is executed, the trap instruction returns control to the
97 * kernel where we can fire the return probe.
98 *
99 * This need for a second trap in cases where we execute the traced
100 * instruction makes it all the more important to emulate the most common
101 * instructions to avoid the second trip in and out of the kernel.
102 *
103 *
104 * Making it Fast
105 *
106 * Since copying out an instruction is neither simple nor inexpensive for the
107 * CPU, we should attempt to avoid doing it in as many cases as possible.
108 * Since function entry and return are usually the most interesting probe
109 * sites, we attempt to tune the performance of the fasttrap provider around
110 * instructions typically in those places.
111 *
112 * Looking at a bunch of functions in libraries and executables reveals that
113 * most functions begin with either a save or a sethi (to setup a larger
114 * argument to the save) and end with a restore or an or (in the case of leaf
115 * functions). To try to improve performance, we emulate all of these
116 * instructions in the kernel.
117 *
118 * The save and restore instructions are a little tricky since they perform
119 * register window maniplulation. Rather than trying to tinker with the
120 * register windows from the kernel, we emulate the implicit add that takes
121 * place as part of those instructions and set the %pc to point to a simple
122 * save or restore we've hidden in the ulwp_t structure. If we're in a return
123 * probe so want to make it seem as though the tracepoint has been completely
124 * executed we need to remember that we've pulled this trick with restore and
125 * pull registers from the previous window (the one that we'll switch to once
126 * the simple store instruction is executed) rather than the current one. This
127 * is why in the case of emulating a restore we set the DTrace CPU flag
128 * CPU_DTRACE_FAKERESTORE before calling dtrace_probe() for the return probes
129 * (see fasttrap_return_common()).
130 */
131
132#define	OP(x)		((x) >> 30)
133#define	OP2(x)		(((x) >> 22) & 0x07)
134#define	OP3(x)		(((x) >> 19) & 0x3f)
135#define	RCOND(x)	(((x) >> 25) & 0x07)
136#define	COND(x)		(((x) >> 25) & 0x0f)
137#define	A(x)		(((x) >> 29) & 0x01)
138#define	I(x)		(((x) >> 13) & 0x01)
139#define	RD(x)		(((x) >> 25) & 0x1f)
140#define	RS1(x)		(((x) >> 14) & 0x1f)
141#define	RS2(x)		(((x) >> 0) & 0x1f)
142#define	CC(x)		(((x) >> 20) & 0x03)
143#define	DISP16(x)	((((x) >> 6) & 0xc000) | ((x) & 0x3fff))
144#define	DISP22(x)	((x) & 0x3fffff)
145#define	DISP19(x)	((x) & 0x7ffff)
146#define	DISP30(x)	((x) & 0x3fffffff)
147#define	SW_TRAP(x)	((x) & 0x7f)
148
149#define	OP3_OR		0x02
150#define	OP3_RD		0x28
151#define	OP3_JMPL	0x38
152#define	OP3_RETURN	0x39
153#define	OP3_TCC		0x3a
154#define	OP3_SAVE	0x3c
155#define	OP3_RESTORE	0x3d
156
157#define	OP3_PREFETCH	0x2d
158#define	OP3_CASA	0x3c
159#define	OP3_PREFETCHA	0x3d
160#define	OP3_CASXA	0x3e
161
162#define	OP2_ILLTRAP	0x0
163#define	OP2_BPcc	0x1
164#define	OP2_Bicc	0x2
165#define	OP2_BPr		0x3
166#define	OP2_SETHI	0x4
167#define	OP2_FBPfcc	0x5
168#define	OP2_FBfcc	0x6
169
170#define	R_G0		0
171#define	R_O0		8
172#define	R_SP		14
173#define	R_I0		24
174#define	R_I1		25
175#define	R_I2		26
176#define	R_I3		27
177#define	R_I4		28
178
179/*
180 * Check the comment in fasttrap.h when changing these offsets or adding
181 * new instructions.
182 */
183#define	FASTTRAP_OFF_SAVE	64
184#define	FASTTRAP_OFF_RESTORE	68
185#define	FASTTRAP_OFF_FTRET	72
186#define	FASTTRAP_OFF_RETURN	76
187
188#define	BREAKPOINT_INSTR	0x91d02001	/* ta 1 */
189
190/*
191 * Tunable to let users turn off the fancy save instruction optimization.
192 * If a program is non-ABI compliant, there's a possibility that the save
193 * instruction optimization could cause an error.
194 */
195int fasttrap_optimize_save = 1;
196
197static uint64_t
198fasttrap_anarg(struct regs *rp, int argno)
199{
200	uint64_t value;
201
202	if (argno < 6)
203		return ((&rp->r_o0)[argno]);
204
205	if (curproc->p_model == DATAMODEL_NATIVE) {
206		struct frame *fr = (struct frame *)(rp->r_sp + STACK_BIAS);
207
208		DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
209		value = dtrace_fulword(&fr->fr_argd[argno]);
210		DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | CPU_DTRACE_BADADDR |
211		    CPU_DTRACE_BADALIGN);
212	} else {
213		struct frame32 *fr = (struct frame32 *)rp->r_sp;
214
215		DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
216		value = dtrace_fuword32(&fr->fr_argd[argno]);
217		DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT | CPU_DTRACE_BADADDR |
218		    CPU_DTRACE_BADALIGN);
219	}
220
221	return (value);
222}
223
224static ulong_t fasttrap_getreg(struct regs *, uint_t);
225static void fasttrap_putreg(struct regs *, uint_t, ulong_t);
226
227static void
228fasttrap_usdt_args(fasttrap_probe_t *probe, struct regs *rp,
229    uint_t fake_restore, int argc, uintptr_t *argv)
230{
231	int i, x, cap = MIN(argc, probe->ftp_nargs);
232	int inc = (fake_restore ? 16 : 0);
233
234	/*
235	 * The only way we'll hit the fake_restore case is if a USDT probe is
236	 * invoked as a tail-call. While it wouldn't be incorrect, we can
237	 * avoid a call to fasttrap_getreg(), and safely use rp->r_sp
238	 * directly since a tail-call can't be made if the invoked function
239	 * would use the argument dump space (i.e. if there were more than
240	 * 6 arguments). We take this shortcut because unconditionally rooting
241	 * around for R_FP (R_SP + 16) would be unnecessarily painful.
242	 */
243
244	if (curproc->p_model == DATAMODEL_NATIVE) {
245		struct frame *fr = (struct frame *)(rp->r_sp + STACK_BIAS);
246		uintptr_t v;
247
248		for (i = 0; i < cap; i++) {
249			x = probe->ftp_argmap[i];
250
251			if (x < 6)
252				argv[i] = fasttrap_getreg(rp, R_O0 + x + inc);
253			else if (fasttrap_fulword(&fr->fr_argd[x], &v) != 0)
254				argv[i] = 0;
255		}
256
257	} else {
258		struct frame32 *fr = (struct frame32 *)rp->r_sp;
259		uint32_t v;
260
261		for (i = 0; i < cap; i++) {
262			x = probe->ftp_argmap[i];
263
264			if (x < 6)
265				argv[i] = fasttrap_getreg(rp, R_O0 + x + inc);
266			else if (fasttrap_fuword32(&fr->fr_argd[x], &v) != 0)
267				argv[i] = 0;
268		}
269	}
270
271	for (; i < argc; i++) {
272		argv[i] = 0;
273	}
274}
275
276static void
277fasttrap_return_common(struct regs *rp, uintptr_t pc, pid_t pid,
278    uint_t fake_restore)
279{
280	fasttrap_tracepoint_t *tp;
281	fasttrap_bucket_t *bucket;
282	fasttrap_id_t *id;
283	kmutex_t *pid_mtx;
284	dtrace_icookie_t cookie;
285
286	pid_mtx = &cpu_core[CPU->cpu_id].cpuc_pid_lock;
287	mutex_enter(pid_mtx);
288	bucket = &fasttrap_tpoints.fth_table[FASTTRAP_TPOINTS_INDEX(pid, pc)];
289
290	for (tp = bucket->ftb_data; tp != NULL; tp = tp->ftt_next) {
291		if (pid == tp->ftt_pid && pc == tp->ftt_pc &&
292		    tp->ftt_proc->ftpc_acount != 0)
293			break;
294	}
295
296	/*
297	 * Don't sweat it if we can't find the tracepoint again; unlike
298	 * when we're in fasttrap_pid_probe(), finding the tracepoint here
299	 * is not essential to the correct execution of the process.
300	 */
301	if (tp == NULL || tp->ftt_retids == NULL) {
302		mutex_exit(pid_mtx);
303		return;
304	}
305
306	for (id = tp->ftt_retids; id != NULL; id = id->fti_next) {
307		fasttrap_probe_t *probe = id->fti_probe;
308
309		if (id->fti_ptype == DTFTP_POST_OFFSETS) {
310			if (probe->ftp_argmap != NULL && fake_restore) {
311				uintptr_t t[5];
312
313				fasttrap_usdt_args(probe, rp, fake_restore,
314				    sizeof (t) / sizeof (t[0]), t);
315
316				cookie = dtrace_interrupt_disable();
317				DTRACE_CPUFLAG_SET(CPU_DTRACE_FAKERESTORE);
318				dtrace_probe(probe->ftp_id, t[0], t[1],
319				    t[2], t[3], t[4]);
320				DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_FAKERESTORE);
321				dtrace_interrupt_enable(cookie);
322
323			} else if (probe->ftp_argmap != NULL) {
324				uintptr_t t[5];
325
326				fasttrap_usdt_args(probe, rp, fake_restore,
327				    sizeof (t) / sizeof (t[0]), t);
328
329				dtrace_probe(probe->ftp_id, t[0], t[1],
330				    t[2], t[3], t[4]);
331
332			} else if (fake_restore) {
333				uintptr_t arg0 = fasttrap_getreg(rp, R_I0);
334				uintptr_t arg1 = fasttrap_getreg(rp, R_I1);
335				uintptr_t arg2 = fasttrap_getreg(rp, R_I2);
336				uintptr_t arg3 = fasttrap_getreg(rp, R_I3);
337				uintptr_t arg4 = fasttrap_getreg(rp, R_I4);
338
339				cookie = dtrace_interrupt_disable();
340				DTRACE_CPUFLAG_SET(CPU_DTRACE_FAKERESTORE);
341				dtrace_probe(probe->ftp_id, arg0, arg1,
342				    arg2, arg3, arg4);
343				DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_FAKERESTORE);
344				dtrace_interrupt_enable(cookie);
345
346			} else {
347				dtrace_probe(probe->ftp_id, rp->r_o0, rp->r_o1,
348				    rp->r_o2, rp->r_o3, rp->r_o4);
349			}
350
351			continue;
352		}
353
354		/*
355		 * If this is only a possible return point, we must
356		 * be looking at a potential tail call in leaf context.
357		 * If the %npc is still within this function, then we
358		 * must have misidentified a jmpl as a tail-call when it
359		 * is, in fact, part of a jump table. It would be nice to
360		 * remove this tracepoint, but this is neither the time
361		 * nor the place.
362		 */
363		if ((tp->ftt_flags & FASTTRAP_F_RETMAYBE) &&
364		    rp->r_npc - probe->ftp_faddr < probe->ftp_fsize)
365			continue;
366
367		/*
368		 * It's possible for a function to branch to the delay slot
369		 * of an instruction that we've identified as a return site.
370		 * We can dectect this spurious return probe activation by
371		 * observing that in this case %npc will be %pc + 4 and %npc
372		 * will be inside the current function (unless the user is
373		 * doing _crazy_ instruction picking in which case there's
374		 * very little we can do). The second check is important
375		 * in case the last instructions of a function make a tail-
376		 * call to the function located immediately subsequent.
377		 */
378		if (rp->r_npc == rp->r_pc + 4 &&
379		    rp->r_npc - probe->ftp_faddr < probe->ftp_fsize)
380			continue;
381
382		/*
383		 * The first argument is the offset of return tracepoint
384		 * in the function; the remaining arguments are the return
385		 * values.
386		 *
387		 * If fake_restore is set, we need to pull the return values
388		 * out of the %i's rather than the %o's -- a little trickier.
389		 */
390		if (!fake_restore) {
391			dtrace_probe(probe->ftp_id, pc - probe->ftp_faddr,
392			    rp->r_o0, rp->r_o1, rp->r_o2, rp->r_o3);
393		} else {
394			uintptr_t arg0 = fasttrap_getreg(rp, R_I0);
395			uintptr_t arg1 = fasttrap_getreg(rp, R_I1);
396			uintptr_t arg2 = fasttrap_getreg(rp, R_I2);
397			uintptr_t arg3 = fasttrap_getreg(rp, R_I3);
398
399			cookie = dtrace_interrupt_disable();
400			DTRACE_CPUFLAG_SET(CPU_DTRACE_FAKERESTORE);
401			dtrace_probe(probe->ftp_id, pc - probe->ftp_faddr,
402			    arg0, arg1, arg2, arg3);
403			DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_FAKERESTORE);
404			dtrace_interrupt_enable(cookie);
405		}
406	}
407
408	mutex_exit(pid_mtx);
409}
410
411int
412fasttrap_pid_probe(struct regs *rp)
413{
414	proc_t *p = curproc;
415	fasttrap_tracepoint_t *tp, tp_local;
416	fasttrap_id_t *id;
417	pid_t pid;
418	uintptr_t pc = rp->r_pc;
419	uintptr_t npc = rp->r_npc;
420	uintptr_t orig_pc = pc;
421	fasttrap_bucket_t *bucket;
422	kmutex_t *pid_mtx;
423	uint_t fake_restore = 0, is_enabled = 0;
424	dtrace_icookie_t cookie;
425
426	/*
427	 * It's possible that a user (in a veritable orgy of bad planning)
428	 * could redirect this thread's flow of control before it reached the
429	 * return probe fasttrap. In this case we need to kill the process
430	 * since it's in a unrecoverable state.
431	 */
432	if (curthread->t_dtrace_step) {
433		ASSERT(curthread->t_dtrace_on);
434		fasttrap_sigtrap(p, curthread, pc);
435		return (0);
436	}
437
438	/*
439	 * Clear all user tracing flags.
440	 */
441	curthread->t_dtrace_ft = 0;
442	curthread->t_dtrace_pc = 0;
443	curthread->t_dtrace_npc = 0;
444	curthread->t_dtrace_scrpc = 0;
445	curthread->t_dtrace_astpc = 0;
446
447	/*
448	 * Treat a child created by a call to vfork(2) as if it were its
449	 * parent. We know that there's only one thread of control in such a
450	 * process: this one.
451	 */
452	while (p->p_flag & SVFORK) {
453		p = p->p_parent;
454	}
455
456	pid = p->p_pid;
457	pid_mtx = &cpu_core[CPU->cpu_id].cpuc_pid_lock;
458	mutex_enter(pid_mtx);
459	bucket = &fasttrap_tpoints.fth_table[FASTTRAP_TPOINTS_INDEX(pid, pc)];
460
461	/*
462	 * Lookup the tracepoint that the process just hit.
463	 */
464	for (tp = bucket->ftb_data; tp != NULL; tp = tp->ftt_next) {
465		if (pid == tp->ftt_pid && pc == tp->ftt_pc &&
466		    tp->ftt_proc->ftpc_acount != 0)
467			break;
468	}
469
470	/*
471	 * If we couldn't find a matching tracepoint, either a tracepoint has
472	 * been inserted without using the pid<pid> ioctl interface (see
473	 * fasttrap_ioctl), or somehow we have mislaid this tracepoint.
474	 */
475	if (tp == NULL) {
476		mutex_exit(pid_mtx);
477		return (-1);
478	}
479
480	for (id = tp->ftt_ids; id != NULL; id = id->fti_next) {
481		fasttrap_probe_t *probe = id->fti_probe;
482		int isentry = (id->fti_ptype == DTFTP_ENTRY);
483
484		if (id->fti_ptype == DTFTP_IS_ENABLED) {
485			is_enabled = 1;
486			continue;
487		}
488
489		/*
490		 * We note that this was an entry probe to help ustack() find
491		 * the first caller.
492		 */
493		if (isentry) {
494			cookie = dtrace_interrupt_disable();
495			DTRACE_CPUFLAG_SET(CPU_DTRACE_ENTRY);
496		}
497		dtrace_probe(probe->ftp_id, rp->r_o0, rp->r_o1, rp->r_o2,
498		    rp->r_o3, rp->r_o4);
499		if (isentry) {
500			DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_ENTRY);
501			dtrace_interrupt_enable(cookie);
502		}
503	}
504
505	/*
506	 * We're about to do a bunch of work so we cache a local copy of
507	 * the tracepoint to emulate the instruction, and then find the
508	 * tracepoint again later if we need to light up any return probes.
509	 */
510	tp_local = *tp;
511	mutex_exit(pid_mtx);
512	tp = &tp_local;
513
514	/*
515	 * If there's an is-enabled probe conntected to this tracepoint it
516	 * means that there was a 'mov %g0, %o0' instruction that was placed
517	 * there by DTrace when the binary was linked. As this probe is, in
518	 * fact, enabled, we need to stuff 1 into %o0. Accordingly, we can
519	 * bypass all the instruction emulation logic since we know the
520	 * inevitable result. It's possible that a user could construct a
521	 * scenario where the 'is-enabled' probe was on some other
522	 * instruction, but that would be a rather exotic way to shoot oneself
523	 * in the foot.
524	 */
525	if (is_enabled) {
526		rp->r_o0 = 1;
527		pc = rp->r_npc;
528		npc = pc + 4;
529		goto done;
530	}
531
532	/*
533	 * We emulate certain types of instructions to ensure correctness
534	 * (in the case of position dependent instructions) or optimize
535	 * common cases. The rest we have the thread execute back in user-
536	 * land.
537	 */
538	switch (tp->ftt_type) {
539	case FASTTRAP_T_SAVE:
540	{
541		int32_t imm;
542
543		/*
544		 * This an optimization to let us handle function entry
545		 * probes more efficiently. Many functions begin with a save
546		 * instruction that follows the pattern:
547		 *	save	%sp, <imm>, %sp
548		 *
549		 * Meanwhile, we've stashed the instruction:
550		 *	save	%g1, %g0, %sp
551		 *
552		 * off of %g7, so all we have to do is stick the right value
553		 * into %g1 and reset %pc to point to the instruction we've
554		 * cleverly hidden (%npc should not be touched).
555		 */
556
557		imm = tp->ftt_instr << 19;
558		imm >>= 19;
559		rp->r_g1 = rp->r_sp + imm;
560		pc = rp->r_g7 + FASTTRAP_OFF_SAVE;
561		break;
562	}
563
564	case FASTTRAP_T_RESTORE:
565	{
566		ulong_t value;
567		uint_t rd;
568
569		/*
570		 * This is an optimization to let us handle function
571		 * return probes more efficiently. Most non-leaf functions
572		 * end with the sequence:
573		 *	ret
574		 *	restore	<reg>, <reg_or_imm>, %oX
575		 *
576		 * We've stashed the instruction:
577		 *	restore	%g0, %g0, %g0
578		 *
579		 * off of %g7 so we just need to place the correct value
580		 * in the right %i register (since after our fake-o
581		 * restore, the %i's will become the %o's) and set the %pc
582		 * to point to our hidden restore. We also set fake_restore to
583		 * let fasttrap_return_common() know that it will find the
584		 * return values in the %i's rather than the %o's.
585		 */
586
587		if (I(tp->ftt_instr)) {
588			int32_t imm;
589
590			imm = tp->ftt_instr << 19;
591			imm >>= 19;
592			value = fasttrap_getreg(rp, RS1(tp->ftt_instr)) + imm;
593		} else {
594			value = fasttrap_getreg(rp, RS1(tp->ftt_instr)) +
595			    fasttrap_getreg(rp, RS2(tp->ftt_instr));
596		}
597
598		/*
599		 * Convert %o's to %i's; leave %g's as they are.
600		 */
601		rd = RD(tp->ftt_instr);
602		fasttrap_putreg(rp, ((rd & 0x18) == 0x8) ? rd + 16 : rd, value);
603
604		pc = rp->r_g7 + FASTTRAP_OFF_RESTORE;
605		fake_restore = 1;
606		break;
607	}
608
609	case FASTTRAP_T_RETURN:
610	{
611		uintptr_t target;
612
613		/*
614		 * A return instruction is like a jmpl (without the link
615		 * part) that executes an implicit restore. We've stashed
616		 * the instruction:
617		 *	return %o0
618		 *
619		 * off of %g7 so we just need to place the target in %o0
620		 * and set the %pc to point to the stashed return instruction.
621		 * We use %o0 since that register disappears after the return
622		 * executes, erasing any evidence of this tampering.
623		 */
624		if (I(tp->ftt_instr)) {
625			int32_t imm;
626
627			imm = tp->ftt_instr << 19;
628			imm >>= 19;
629			target = fasttrap_getreg(rp, RS1(tp->ftt_instr)) + imm;
630		} else {
631			target = fasttrap_getreg(rp, RS1(tp->ftt_instr)) +
632			    fasttrap_getreg(rp, RS2(tp->ftt_instr));
633		}
634
635		fasttrap_putreg(rp, R_O0, target);
636
637		pc = rp->r_g7 + FASTTRAP_OFF_RETURN;
638		fake_restore = 1;
639		break;
640	}
641
642	case FASTTRAP_T_OR:
643	{
644		ulong_t value;
645
646		if (I(tp->ftt_instr)) {
647			int32_t imm;
648
649			imm = tp->ftt_instr << 19;
650			imm >>= 19;
651			value = fasttrap_getreg(rp, RS1(tp->ftt_instr)) | imm;
652		} else {
653			value = fasttrap_getreg(rp, RS1(tp->ftt_instr)) |
654			    fasttrap_getreg(rp, RS2(tp->ftt_instr));
655		}
656
657		fasttrap_putreg(rp, RD(tp->ftt_instr), value);
658		pc = rp->r_npc;
659		npc = pc + 4;
660		break;
661	}
662
663	case FASTTRAP_T_SETHI:
664		if (RD(tp->ftt_instr) != R_G0) {
665			uint32_t imm32 = tp->ftt_instr << 10;
666			fasttrap_putreg(rp, RD(tp->ftt_instr), (ulong_t)imm32);
667		}
668		pc = rp->r_npc;
669		npc = pc + 4;
670		break;
671
672	case FASTTRAP_T_CCR:
673	{
674		uint_t c, v, z, n, taken;
675		uint_t ccr = rp->r_tstate >> TSTATE_CCR_SHIFT;
676
677		if (tp->ftt_cc != 0)
678			ccr >>= 4;
679
680		c = (ccr >> 0) & 1;
681		v = (ccr >> 1) & 1;
682		z = (ccr >> 2) & 1;
683		n = (ccr >> 3) & 1;
684
685		switch (tp->ftt_code) {
686		case 0x0:	/* BN */
687			taken = 0;		break;
688		case 0x1:	/* BE */
689			taken = z;		break;
690		case 0x2:	/* BLE */
691			taken = z | (n ^ v);	break;
692		case 0x3:	/* BL */
693			taken = n ^ v;		break;
694		case 0x4:	/* BLEU */
695			taken = c | z;		break;
696		case 0x5:	/* BCS (BLU) */
697			taken = c;		break;
698		case 0x6:	/* BNEG */
699			taken = n;		break;
700		case 0x7:	/* BVS */
701			taken = v;		break;
702		case 0x8:	/* BA */
703			/*
704			 * We handle the BA case differently since the annul
705			 * bit means something slightly different.
706			 */
707			panic("fasttrap: mishandled a branch");
708			taken = 1;		break;
709		case 0x9:	/* BNE */
710			taken = ~z;		break;
711		case 0xa:	/* BG */
712			taken = ~(z | (n ^ v));	break;
713		case 0xb:	/* BGE */
714			taken = ~(n ^ v);	break;
715		case 0xc:	/* BGU */
716			taken = ~(c | z);	break;
717		case 0xd:	/* BCC (BGEU) */
718			taken = ~c;		break;
719		case 0xe:	/* BPOS */
720			taken = ~n;		break;
721		case 0xf:	/* BVC */
722			taken = ~v;		break;
723		}
724
725		if (taken & 1) {
726			pc = rp->r_npc;
727			npc = tp->ftt_dest;
728		} else if (tp->ftt_flags & FASTTRAP_F_ANNUL) {
729			/*
730			 * Untaken annulled branches don't execute the
731			 * instruction in the delay slot.
732			 */
733			pc = rp->r_npc + 4;
734			npc = pc + 4;
735		} else {
736			pc = rp->r_npc;
737			npc = pc + 4;
738		}
739		break;
740	}
741
742	case FASTTRAP_T_FCC:
743	{
744		uint_t fcc;
745		uint_t taken;
746		uint64_t fsr;
747
748		dtrace_getfsr(&fsr);
749
750		if (tp->ftt_cc == 0) {
751			fcc = (fsr >> 10) & 0x3;
752		} else {
753			uint_t shift;
754			ASSERT(tp->ftt_cc <= 3);
755			shift = 30 + tp->ftt_cc * 2;
756			fcc = (fsr >> shift) & 0x3;
757		}
758
759		switch (tp->ftt_code) {
760		case 0x0:	/* FBN */
761			taken = (1 << fcc) & (0|0|0|0);	break;
762		case 0x1:	/* FBNE */
763			taken = (1 << fcc) & (8|4|2|0);	break;
764		case 0x2:	/* FBLG */
765			taken = (1 << fcc) & (0|4|2|0);	break;
766		case 0x3:	/* FBUL */
767			taken = (1 << fcc) & (8|0|2|0);	break;
768		case 0x4:	/* FBL */
769			taken = (1 << fcc) & (0|0|2|0);	break;
770		case 0x5:	/* FBUG */
771			taken = (1 << fcc) & (8|4|0|0);	break;
772		case 0x6:	/* FBG */
773			taken = (1 << fcc) & (0|4|0|0);	break;
774		case 0x7:	/* FBU */
775			taken = (1 << fcc) & (8|0|0|0);	break;
776		case 0x8:	/* FBA */
777			/*
778			 * We handle the FBA case differently since the annul
779			 * bit means something slightly different.
780			 */
781			panic("fasttrap: mishandled a branch");
782			taken = (1 << fcc) & (8|4|2|1);	break;
783		case 0x9:	/* FBE */
784			taken = (1 << fcc) & (0|0|0|1);	break;
785		case 0xa:	/* FBUE */
786			taken = (1 << fcc) & (8|0|0|1);	break;
787		case 0xb:	/* FBGE */
788			taken = (1 << fcc) & (0|4|0|1);	break;
789		case 0xc:	/* FBUGE */
790			taken = (1 << fcc) & (8|4|0|1);	break;
791		case 0xd:	/* FBLE */
792			taken = (1 << fcc) & (0|0|2|1);	break;
793		case 0xe:	/* FBULE */
794			taken = (1 << fcc) & (8|0|2|1);	break;
795		case 0xf:	/* FBO */
796			taken = (1 << fcc) & (0|4|2|1);	break;
797		}
798
799		if (taken) {
800			pc = rp->r_npc;
801			npc = tp->ftt_dest;
802		} else if (tp->ftt_flags & FASTTRAP_F_ANNUL) {
803			/*
804			 * Untaken annulled branches don't execute the
805			 * instruction in the delay slot.
806			 */
807			pc = rp->r_npc + 4;
808			npc = pc + 4;
809		} else {
810			pc = rp->r_npc;
811			npc = pc + 4;
812		}
813		break;
814	}
815
816	case FASTTRAP_T_REG:
817	{
818		int64_t value;
819		uint_t taken;
820		uint_t reg = RS1(tp->ftt_instr);
821
822		/*
823		 * An ILP32 process shouldn't be using a branch predicated on
824		 * an %i or an %l since it would violate the ABI. It's a
825		 * violation of the ABI because we can't ensure deterministic
826		 * behavior. We should have identified this case when we
827		 * enabled the probe.
828		 */
829		ASSERT(p->p_model == DATAMODEL_LP64 || reg < 16);
830
831		value = (int64_t)fasttrap_getreg(rp, reg);
832
833		switch (tp->ftt_code) {
834		case 0x1:	/* BRZ */
835			taken = (value == 0);	break;
836		case 0x2:	/* BRLEZ */
837			taken = (value <= 0);	break;
838		case 0x3:	/* BRLZ */
839			taken = (value < 0);	break;
840		case 0x5:	/* BRNZ */
841			taken = (value != 0);	break;
842		case 0x6:	/* BRGZ */
843			taken = (value > 0);	break;
844		case 0x7:	/* BRGEZ */
845			taken = (value >= 0);	break;
846		default:
847		case 0x0:
848		case 0x4:
849			panic("fasttrap: mishandled a branch");
850		}
851
852		if (taken) {
853			pc = rp->r_npc;
854			npc = tp->ftt_dest;
855		} else if (tp->ftt_flags & FASTTRAP_F_ANNUL) {
856			/*
857			 * Untaken annulled branches don't execute the
858			 * instruction in the delay slot.
859			 */
860			pc = rp->r_npc + 4;
861			npc = pc + 4;
862		} else {
863			pc = rp->r_npc;
864			npc = pc + 4;
865		}
866		break;
867	}
868
869	case FASTTRAP_T_ALWAYS:
870		/*
871		 * BAs, BA,As...
872		 */
873
874		if (tp->ftt_flags & FASTTRAP_F_ANNUL) {
875			/*
876			 * Annulled branch always instructions never execute
877			 * the instruction in the delay slot.
878			 */
879			pc = tp->ftt_dest;
880			npc = tp->ftt_dest + 4;
881		} else {
882			pc = rp->r_npc;
883			npc = tp->ftt_dest;
884		}
885		break;
886
887	case FASTTRAP_T_RDPC:
888		fasttrap_putreg(rp, RD(tp->ftt_instr), rp->r_pc);
889		pc = rp->r_npc;
890		npc = pc + 4;
891		break;
892
893	case FASTTRAP_T_CALL:
894		/*
895		 * It's a call _and_ link remember...
896		 */
897		rp->r_o7 = rp->r_pc;
898		pc = rp->r_npc;
899		npc = tp->ftt_dest;
900		break;
901
902	case FASTTRAP_T_JMPL:
903		pc = rp->r_npc;
904
905		if (I(tp->ftt_instr)) {
906			uint_t rs1 = RS1(tp->ftt_instr);
907			int32_t imm;
908
909			imm = tp->ftt_instr << 19;
910			imm >>= 19;
911			npc = fasttrap_getreg(rp, rs1) + imm;
912		} else {
913			uint_t rs1 = RS1(tp->ftt_instr);
914			uint_t rs2 = RS2(tp->ftt_instr);
915
916			npc = fasttrap_getreg(rp, rs1) +
917			    fasttrap_getreg(rp, rs2);
918		}
919
920		/*
921		 * Do the link part of the jump-and-link instruction.
922		 */
923		fasttrap_putreg(rp, RD(tp->ftt_instr), rp->r_pc);
924
925		break;
926
927	case FASTTRAP_T_COMMON:
928	{
929		curthread->t_dtrace_scrpc = rp->r_g7;
930		curthread->t_dtrace_astpc = rp->r_g7 + FASTTRAP_OFF_FTRET;
931
932		/*
933		 * Copy the instruction to a reserved location in the
934		 * user-land thread structure, then set the PC to that
935		 * location and leave the NPC alone. We take pains to ensure
936		 * consistency in the instruction stream (See SPARC
937		 * Architecture Manual Version 9, sections 8.4.7, A.20, and
938		 * H.1.6; UltraSPARC I/II User's Manual, sections 3.1.1.1,
939		 * and 13.6.4) by using the ASI ASI_BLK_COMMIT_S to copy the
940		 * instruction into the user's address space without
941		 * bypassing the I$. There's no AS_USER version of this ASI
942		 * (as exist for other ASIs) so we use the lofault
943		 * mechanism to catch faults.
944		 */
945		if (dtrace_blksuword32(rp->r_g7, &tp->ftt_instr, 1) == -1) {
946			/*
947			 * If the copyout fails, then the process's state
948			 * is not consistent (the effects of the traced
949			 * instruction will never be seen). This process
950			 * cannot be allowed to continue execution.
951			 */
952			fasttrap_sigtrap(curproc, curthread, pc);
953			return (0);
954		}
955
956		curthread->t_dtrace_pc = pc;
957		curthread->t_dtrace_npc = npc;
958		curthread->t_dtrace_on = 1;
959
960		pc = curthread->t_dtrace_scrpc;
961
962		if (tp->ftt_retids != NULL) {
963			curthread->t_dtrace_step = 1;
964			curthread->t_dtrace_ret = 1;
965			npc = curthread->t_dtrace_astpc;
966		}
967		break;
968	}
969
970	default:
971		panic("fasttrap: mishandled an instruction");
972	}
973
974	/*
975	 * This bit me in the ass a couple of times, so lets toss this
976	 * in as a cursory sanity check.
977	 */
978	ASSERT(pc != rp->r_g7 + 4);
979	ASSERT(pc != rp->r_g7 + 8);
980
981done:
982	/*
983	 * If there were no return probes when we first found the tracepoint,
984	 * we should feel no obligation to honor any return probes that were
985	 * subsequently enabled -- they'll just have to wait until the next
986	 * time around.
987	 */
988	if (tp->ftt_retids != NULL) {
989		/*
990		 * We need to wait until the results of the instruction are
991		 * apparent before invoking any return probes. If this
992		 * instruction was emulated we can just call
993		 * fasttrap_return_common(); if it needs to be executed, we
994		 * need to wait until we return to the kernel.
995		 */
996		if (tp->ftt_type != FASTTRAP_T_COMMON) {
997			fasttrap_return_common(rp, orig_pc, pid, fake_restore);
998		} else {
999			ASSERT(curthread->t_dtrace_ret != 0);
1000			ASSERT(curthread->t_dtrace_pc == orig_pc);
1001			ASSERT(curthread->t_dtrace_scrpc == rp->r_g7);
1002			ASSERT(npc == curthread->t_dtrace_astpc);
1003		}
1004	}
1005
1006	ASSERT(pc != 0);
1007	rp->r_pc = pc;
1008	rp->r_npc = npc;
1009
1010	return (0);
1011}
1012
1013int
1014fasttrap_return_probe(struct regs *rp)
1015{
1016	proc_t *p = ttoproc(curthread);
1017	pid_t pid;
1018	uintptr_t pc = curthread->t_dtrace_pc;
1019	uintptr_t npc = curthread->t_dtrace_npc;
1020
1021	curthread->t_dtrace_pc = 0;
1022	curthread->t_dtrace_npc = 0;
1023	curthread->t_dtrace_scrpc = 0;
1024	curthread->t_dtrace_astpc = 0;
1025
1026	/*
1027	 * Treat a child created by a call to vfork(2) as if it were its
1028	 * parent. We know there's only one thread of control in such a
1029	 * process: this one.
1030	 */
1031	while (p->p_flag & SVFORK) {
1032		p = p->p_parent;
1033	}
1034
1035	/*
1036	 * We set the %pc and %npc to their values when the traced
1037	 * instruction was initially executed so that it appears to
1038	 * dtrace_probe() that we're on the original instruction, and so that
1039	 * the user can't easily detect our complex web of lies.
1040	 * dtrace_return_probe() (our caller) will correctly set %pc and %npc
1041	 * after we return.
1042	 */
1043	rp->r_pc = pc;
1044	rp->r_npc = npc;
1045
1046	pid = p->p_pid;
1047	fasttrap_return_common(rp, pc, pid, 0);
1048
1049	return (0);
1050}
1051
1052int
1053fasttrap_tracepoint_install(proc_t *p, fasttrap_tracepoint_t *tp)
1054{
1055	fasttrap_instr_t instr = FASTTRAP_INSTR;
1056
1057	if (uwrite(p, &instr, 4, tp->ftt_pc) != 0)
1058		return (-1);
1059
1060	return (0);
1061}
1062
1063int
1064fasttrap_tracepoint_remove(proc_t *p, fasttrap_tracepoint_t *tp)
1065{
1066	fasttrap_instr_t instr;
1067
1068	/*
1069	 * Distinguish between read or write failures and a changed
1070	 * instruction.
1071	 */
1072	if (uread(p, &instr, 4, tp->ftt_pc) != 0)
1073		return (0);
1074	if (instr != FASTTRAP_INSTR && instr != BREAKPOINT_INSTR)
1075		return (0);
1076	if (uwrite(p, &tp->ftt_instr, 4, tp->ftt_pc) != 0)
1077		return (-1);
1078
1079	return (0);
1080}
1081
1082int
1083fasttrap_tracepoint_init(proc_t *p, fasttrap_tracepoint_t *tp, uintptr_t pc,
1084    fasttrap_probe_type_t type)
1085{
1086	uint32_t instr;
1087	int32_t disp;
1088
1089	/*
1090	 * Read the instruction at the given address out of the process's
1091	 * address space. We don't have to worry about a debugger
1092	 * changing this instruction before we overwrite it with our trap
1093	 * instruction since P_PR_LOCK is set.
1094	 */
1095	if (uread(p, &instr, 4, pc) != 0)
1096		return (-1);
1097
1098	/*
1099	 * Decode the instruction to fill in the probe flags. We can have
1100	 * the process execute most instructions on its own using a pc/npc
1101	 * trick, but pc-relative control transfer present a problem since
1102	 * we're relocating the instruction. We emulate these instructions
1103	 * in the kernel. We assume a default type and over-write that as
1104	 * needed.
1105	 *
1106	 * pc-relative instructions must be emulated for correctness;
1107	 * other instructions (which represent a large set of commonly traced
1108	 * instructions) are emulated or otherwise optimized for performance.
1109	 */
1110	tp->ftt_type = FASTTRAP_T_COMMON;
1111	if (OP(instr) == 1) {
1112		/*
1113		 * Call instructions.
1114		 */
1115		tp->ftt_type = FASTTRAP_T_CALL;
1116		disp = DISP30(instr) << 2;
1117		tp->ftt_dest = pc + (intptr_t)disp;
1118
1119	} else if (OP(instr) == 0) {
1120		/*
1121		 * Branch instructions.
1122		 *
1123		 * Unconditional branches need careful attention when they're
1124		 * annulled: annulled unconditional branches never execute
1125		 * the instruction in the delay slot.
1126		 */
1127		switch (OP2(instr)) {
1128		case OP2_ILLTRAP:
1129		case 0x7:
1130			/*
1131			 * The compiler may place an illtrap after a call to
1132			 * a function that returns a structure. In the case of
1133			 * a returned structure, the compiler places an illtrap
1134			 * whose const22 field is the size of the returned
1135			 * structure immediately following the delay slot of
1136			 * the call. To stay out of the way, we refuse to
1137			 * place tracepoints on top of illtrap instructions.
1138			 *
1139			 * This is one of the dumbest architectural decisions
1140			 * I've ever had to work around.
1141			 *
1142			 * We also identify the only illegal op2 value (See
1143			 * SPARC Architecture Manual Version 9, E.2 table 31).
1144			 */
1145			return (-1);
1146
1147		case OP2_BPcc:
1148			if (COND(instr) == 8) {
1149				tp->ftt_type = FASTTRAP_T_ALWAYS;
1150			} else {
1151				/*
1152				 * Check for an illegal instruction.
1153				 */
1154				if (CC(instr) & 1)
1155					return (-1);
1156				tp->ftt_type = FASTTRAP_T_CCR;
1157				tp->ftt_cc = CC(instr);
1158				tp->ftt_code = COND(instr);
1159			}
1160
1161			if (A(instr) != 0)
1162				tp->ftt_flags |= FASTTRAP_F_ANNUL;
1163
1164			disp = DISP19(instr);
1165			disp <<= 13;
1166			disp >>= 11;
1167			tp->ftt_dest = pc + (intptr_t)disp;
1168			break;
1169
1170		case OP2_Bicc:
1171			if (COND(instr) == 8) {
1172				tp->ftt_type = FASTTRAP_T_ALWAYS;
1173			} else {
1174				tp->ftt_type = FASTTRAP_T_CCR;
1175				tp->ftt_cc = 0;
1176				tp->ftt_code = COND(instr);
1177			}
1178
1179			if (A(instr) != 0)
1180				tp->ftt_flags |= FASTTRAP_F_ANNUL;
1181
1182			disp = DISP22(instr);
1183			disp <<= 10;
1184			disp >>= 8;
1185			tp->ftt_dest = pc + (intptr_t)disp;
1186			break;
1187
1188		case OP2_BPr:
1189			/*
1190			 * Check for an illegal instruction.
1191			 */
1192			if ((RCOND(instr) & 3) == 0)
1193				return (-1);
1194
1195			/*
1196			 * It's a violation of the v8plus ABI to use a
1197			 * register-predicated branch in a 32-bit app if
1198			 * the register used is an %l or an %i (%gs and %os
1199			 * are legit because they're not saved to the stack
1200			 * in 32-bit words when we take a trap).
1201			 */
1202			if (p->p_model == DATAMODEL_ILP32 && RS1(instr) >= 16)
1203				return (-1);
1204
1205			tp->ftt_type = FASTTRAP_T_REG;
1206			if (A(instr) != 0)
1207				tp->ftt_flags |= FASTTRAP_F_ANNUL;
1208			disp = DISP16(instr);
1209			disp <<= 16;
1210			disp >>= 14;
1211			tp->ftt_dest = pc + (intptr_t)disp;
1212			tp->ftt_code = RCOND(instr);
1213			break;
1214
1215		case OP2_SETHI:
1216			tp->ftt_type = FASTTRAP_T_SETHI;
1217			break;
1218
1219		case OP2_FBPfcc:
1220			if (COND(instr) == 8) {
1221				tp->ftt_type = FASTTRAP_T_ALWAYS;
1222			} else {
1223				tp->ftt_type = FASTTRAP_T_FCC;
1224				tp->ftt_cc = CC(instr);
1225				tp->ftt_code = COND(instr);
1226			}
1227
1228			if (A(instr) != 0)
1229				tp->ftt_flags |= FASTTRAP_F_ANNUL;
1230
1231			disp = DISP19(instr);
1232			disp <<= 13;
1233			disp >>= 11;
1234			tp->ftt_dest = pc + (intptr_t)disp;
1235			break;
1236
1237		case OP2_FBfcc:
1238			if (COND(instr) == 8) {
1239				tp->ftt_type = FASTTRAP_T_ALWAYS;
1240			} else {
1241				tp->ftt_type = FASTTRAP_T_FCC;
1242				tp->ftt_cc = 0;
1243				tp->ftt_code = COND(instr);
1244			}
1245
1246			if (A(instr) != 0)
1247				tp->ftt_flags |= FASTTRAP_F_ANNUL;
1248
1249			disp = DISP22(instr);
1250			disp <<= 10;
1251			disp >>= 8;
1252			tp->ftt_dest = pc + (intptr_t)disp;
1253			break;
1254		}
1255
1256	} else if (OP(instr) == 2) {
1257		switch (OP3(instr)) {
1258		case OP3_RETURN:
1259			tp->ftt_type = FASTTRAP_T_RETURN;
1260			break;
1261
1262		case OP3_JMPL:
1263			tp->ftt_type = FASTTRAP_T_JMPL;
1264			break;
1265
1266		case OP3_RD:
1267			if (RS1(instr) == 5)
1268				tp->ftt_type = FASTTRAP_T_RDPC;
1269			break;
1270
1271		case OP3_SAVE:
1272			/*
1273			 * We optimize for save instructions at function
1274			 * entry; see the comment in fasttrap_pid_probe()
1275			 * (near FASTTRAP_T_SAVE) for details.
1276			 */
1277			if (fasttrap_optimize_save != 0 &&
1278			    type == DTFTP_ENTRY &&
1279			    I(instr) == 1 && RD(instr) == R_SP)
1280				tp->ftt_type = FASTTRAP_T_SAVE;
1281			break;
1282
1283		case OP3_RESTORE:
1284			/*
1285			 * We optimize restore instructions at function
1286			 * return; see the comment in fasttrap_pid_probe()
1287			 * (near FASTTRAP_T_RESTORE) for details.
1288			 *
1289			 * rd must be an %o or %g register.
1290			 */
1291			if ((RD(instr) & 0x10) == 0)
1292				tp->ftt_type = FASTTRAP_T_RESTORE;
1293			break;
1294
1295		case OP3_OR:
1296			/*
1297			 * A large proportion of instructions in the delay
1298			 * slot of retl instructions are or's so we emulate
1299			 * these downstairs as an optimization.
1300			 */
1301			tp->ftt_type = FASTTRAP_T_OR;
1302			break;
1303
1304		case OP3_TCC:
1305			/*
1306			 * Breakpoint instructions are effectively position-
1307			 * dependent since the debugger uses the %pc value
1308			 * to lookup which breakpoint was executed. As a
1309			 * result, we can't actually instrument breakpoints.
1310			 */
1311			if (SW_TRAP(instr) == ST_BREAKPOINT)
1312				return (-1);
1313			break;
1314
1315		case 0x19:
1316		case 0x1d:
1317		case 0x29:
1318		case 0x33:
1319		case 0x3f:
1320			/*
1321			 * Identify illegal instructions (See SPARC
1322			 * Architecture Manual Version 9, E.2 table 32).
1323			 */
1324			return (-1);
1325		}
1326	} else if (OP(instr) == 3) {
1327		uint32_t op3 = OP3(instr);
1328
1329		/*
1330		 * Identify illegal instructions (See SPARC Architecture
1331		 * Manual Version 9, E.2 table 33).
1332		 */
1333		if ((op3 & 0x28) == 0x28) {
1334			if (op3 != OP3_PREFETCH && op3 != OP3_CASA &&
1335			    op3 != OP3_PREFETCHA && op3 != OP3_CASXA)
1336				return (-1);
1337		} else {
1338			if ((op3 & 0x0f) == 0x0c || (op3 & 0x3b) == 0x31)
1339				return (-1);
1340		}
1341	}
1342
1343	tp->ftt_instr = instr;
1344
1345	/*
1346	 * We don't know how this tracepoint is going to be used, but in case
1347	 * it's used as part of a function return probe, we need to indicate
1348	 * whether it's always a return site or only potentially a return
1349	 * site. If it's part of a return probe, it's always going to be a
1350	 * return from that function if it's a restore instruction or if
1351	 * the previous instruction was a return. If we could reliably
1352	 * distinguish jump tables from return sites, this wouldn't be
1353	 * necessary.
1354	 */
1355	if (tp->ftt_type != FASTTRAP_T_RESTORE &&
1356	    (uread(p, &instr, 4, pc - sizeof (instr)) != 0 ||
1357	    !(OP(instr) == 2 && OP3(instr) == OP3_RETURN)))
1358		tp->ftt_flags |= FASTTRAP_F_RETMAYBE;
1359
1360	return (0);
1361}
1362
1363/*ARGSUSED*/
1364uint64_t
1365fasttrap_pid_getarg(void *arg, dtrace_id_t id, void *parg, int argno,
1366    int aframes)
1367{
1368	return (fasttrap_anarg(ttolwp(curthread)->lwp_regs, argno));
1369}
1370
1371/*ARGSUSED*/
1372uint64_t
1373fasttrap_usdt_getarg(void *arg, dtrace_id_t id, void *parg, int argno,
1374    int aframes)
1375{
1376	return (fasttrap_anarg(ttolwp(curthread)->lwp_regs, argno));
1377}
1378
1379static uint64_t fasttrap_getreg_fast_cnt;
1380static uint64_t fasttrap_getreg_mpcb_cnt;
1381static uint64_t fasttrap_getreg_slow_cnt;
1382
1383static ulong_t
1384fasttrap_getreg(struct regs *rp, uint_t reg)
1385{
1386	ulong_t value;
1387	dtrace_icookie_t cookie;
1388	struct machpcb *mpcb;
1389	extern ulong_t dtrace_getreg_win(uint_t, uint_t);
1390
1391	/*
1392	 * We have the %os and %gs in our struct regs, but if we need to
1393	 * snag a %l or %i we need to go scrounging around in the process's
1394	 * address space.
1395	 */
1396	if (reg == 0)
1397		return (0);
1398
1399	if (reg < 16)
1400		return ((&rp->r_g1)[reg - 1]);
1401
1402	/*
1403	 * Before we look at the user's stack, we'll check the register
1404	 * windows to see if the information we want is in there.
1405	 */
1406	cookie = dtrace_interrupt_disable();
1407	if (dtrace_getotherwin() > 0) {
1408		value = dtrace_getreg_win(reg, 1);
1409		dtrace_interrupt_enable(cookie);
1410
1411		atomic_inc_64(&fasttrap_getreg_fast_cnt);
1412
1413		return (value);
1414	}
1415	dtrace_interrupt_enable(cookie);
1416
1417	/*
1418	 * First check the machpcb structure to see if we've already read
1419	 * in the register window we're looking for; if we haven't, (and
1420	 * we probably haven't) try to copy in the value of the register.
1421	 */
1422	/* LINTED - alignment */
1423	mpcb = (struct machpcb *)((caddr_t)rp - REGOFF);
1424
1425	if (get_udatamodel() == DATAMODEL_NATIVE) {
1426		struct frame *fr = (struct frame *)(rp->r_sp + STACK_BIAS);
1427
1428		if (mpcb->mpcb_wbcnt > 0) {
1429			struct rwindow *rwin = (void *)mpcb->mpcb_wbuf;
1430			int i = mpcb->mpcb_wbcnt;
1431			do {
1432				i--;
1433				if ((long)mpcb->mpcb_spbuf[i] != rp->r_sp)
1434					continue;
1435
1436				atomic_inc_64(&fasttrap_getreg_mpcb_cnt);
1437				return (rwin[i].rw_local[reg - 16]);
1438			} while (i > 0);
1439		}
1440
1441		if (fasttrap_fulword(&fr->fr_local[reg - 16], &value) != 0)
1442			goto err;
1443	} else {
1444		struct frame32 *fr =
1445		    (struct frame32 *)(uintptr_t)(caddr32_t)rp->r_sp;
1446		uint32_t *v32 = (uint32_t *)&value;
1447
1448		if (mpcb->mpcb_wbcnt > 0) {
1449			struct rwindow32 *rwin = (void *)mpcb->mpcb_wbuf;
1450			int i = mpcb->mpcb_wbcnt;
1451			do {
1452				i--;
1453				if ((long)mpcb->mpcb_spbuf[i] != rp->r_sp)
1454					continue;
1455
1456				atomic_inc_64(&fasttrap_getreg_mpcb_cnt);
1457				return (rwin[i].rw_local[reg - 16]);
1458			} while (i > 0);
1459		}
1460
1461		if (fasttrap_fuword32(&fr->fr_local[reg - 16], &v32[1]) != 0)
1462			goto err;
1463
1464		v32[0] = 0;
1465	}
1466
1467	atomic_inc_64(&fasttrap_getreg_slow_cnt);
1468	return (value);
1469
1470err:
1471	/*
1472	 * If the copy in failed, the process will be in a irrecoverable
1473	 * state, and we have no choice but to kill it.
1474	 */
1475	psignal(ttoproc(curthread), SIGILL);
1476	return (0);
1477}
1478
1479static uint64_t fasttrap_putreg_fast_cnt;
1480static uint64_t fasttrap_putreg_mpcb_cnt;
1481static uint64_t fasttrap_putreg_slow_cnt;
1482
1483static void
1484fasttrap_putreg(struct regs *rp, uint_t reg, ulong_t value)
1485{
1486	dtrace_icookie_t cookie;
1487	struct machpcb *mpcb;
1488	extern void dtrace_putreg_win(uint_t, ulong_t);
1489
1490	if (reg == 0)
1491		return;
1492
1493	if (reg < 16) {
1494		(&rp->r_g1)[reg - 1] = value;
1495		return;
1496	}
1497
1498	/*
1499	 * If the user process is still using some register windows, we
1500	 * can just place the value in the correct window.
1501	 */
1502	cookie = dtrace_interrupt_disable();
1503	if (dtrace_getotherwin() > 0) {
1504		dtrace_putreg_win(reg, value);
1505		dtrace_interrupt_enable(cookie);
1506		atomic_inc_64(&fasttrap_putreg_fast_cnt);
1507		return;
1508	}
1509	dtrace_interrupt_enable(cookie);
1510
1511	/*
1512	 * First see if there's a copy of the register window in the
1513	 * machpcb structure that we can modify; if there isn't try to
1514	 * copy out the value. If that fails, we try to create a new
1515	 * register window in the machpcb structure. While this isn't
1516	 * _precisely_ the intended use of the machpcb structure, it
1517	 * can't cause any problems since we know at this point in the
1518	 * code that all of the user's data have been flushed out of the
1519	 * register file (since %otherwin is 0).
1520	 */
1521	/* LINTED - alignment */
1522	mpcb = (struct machpcb *)((caddr_t)rp - REGOFF);
1523
1524	if (get_udatamodel() == DATAMODEL_NATIVE) {
1525		struct frame *fr = (struct frame *)(rp->r_sp + STACK_BIAS);
1526		/* LINTED - alignment */
1527		struct rwindow *rwin = (struct rwindow *)mpcb->mpcb_wbuf;
1528
1529		if (mpcb->mpcb_wbcnt > 0) {
1530			int i = mpcb->mpcb_wbcnt;
1531			do {
1532				i--;
1533				if ((long)mpcb->mpcb_spbuf[i] != rp->r_sp)
1534					continue;
1535
1536				rwin[i].rw_local[reg - 16] = value;
1537				atomic_inc_64(&fasttrap_putreg_mpcb_cnt);
1538				return;
1539			} while (i > 0);
1540		}
1541
1542		if (fasttrap_sulword(&fr->fr_local[reg - 16], value) != 0) {
1543			if (mpcb->mpcb_wbcnt >= MAXWIN || copyin(fr,
1544			    &rwin[mpcb->mpcb_wbcnt], sizeof (*rwin)) != 0)
1545				goto err;
1546
1547			rwin[mpcb->mpcb_wbcnt].rw_local[reg - 16] = value;
1548			mpcb->mpcb_spbuf[mpcb->mpcb_wbcnt] = (caddr_t)rp->r_sp;
1549			mpcb->mpcb_wbcnt++;
1550			atomic_inc_64(&fasttrap_putreg_mpcb_cnt);
1551			return;
1552		}
1553	} else {
1554		struct frame32 *fr =
1555		    (struct frame32 *)(uintptr_t)(caddr32_t)rp->r_sp;
1556		/* LINTED - alignment */
1557		struct rwindow32 *rwin = (struct rwindow32 *)mpcb->mpcb_wbuf;
1558		uint32_t v32 = (uint32_t)value;
1559
1560		if (mpcb->mpcb_wbcnt > 0) {
1561			int i = mpcb->mpcb_wbcnt;
1562			do {
1563				i--;
1564				if ((long)mpcb->mpcb_spbuf[i] != rp->r_sp)
1565					continue;
1566
1567				rwin[i].rw_local[reg - 16] = v32;
1568				atomic_inc_64(&fasttrap_putreg_mpcb_cnt);
1569				return;
1570			} while (i > 0);
1571		}
1572
1573		if (fasttrap_suword32(&fr->fr_local[reg - 16], v32) != 0) {
1574			if (mpcb->mpcb_wbcnt >= MAXWIN || copyin(fr,
1575			    &rwin[mpcb->mpcb_wbcnt], sizeof (*rwin)) != 0)
1576				goto err;
1577
1578			rwin[mpcb->mpcb_wbcnt].rw_local[reg - 16] = v32;
1579			mpcb->mpcb_spbuf[mpcb->mpcb_wbcnt] = (caddr_t)rp->r_sp;
1580			mpcb->mpcb_wbcnt++;
1581			atomic_inc_64(&fasttrap_putreg_mpcb_cnt);
1582			return;
1583		}
1584	}
1585
1586	atomic_inc_64(&fasttrap_putreg_slow_cnt);
1587	return;
1588
1589err:
1590	/*
1591	 * If we couldn't record this register's value, the process is in an
1592	 * irrecoverable state and we have no choice but to euthanize it.
1593	 */
1594	psignal(ttoproc(curthread), SIGILL);
1595}
1596