1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/dtrace_impl.h>
30 #include <sys/atomic.h>
31 #include <sys/model.h>
32 #include <sys/frame.h>
33 #include <sys/stack.h>
34 #include <sys/machpcb.h>
35 #include <sys/procfs_isa.h>
36 #include <sys/cmn_err.h>
37 #include <sys/sysmacros.h>
38 
39 #define	DTRACE_FMT3OP3_MASK	0x81000000
40 #define	DTRACE_FMT3OP3		0x80000000
41 #define	DTRACE_FMT3RS1_SHIFT	14
42 #define	DTRACE_FMT3RD_SHIFT	25
43 #define	DTRACE_DISP22_SHIFT	10
44 #define	DTRACE_RMASK		0x1f
45 #define	DTRACE_REG_L0		16
46 #define	DTRACE_REG_O7		15
47 #define	DTRACE_REG_I0		24
48 #define	DTRACE_REG_I6		30
49 #define	DTRACE_RET		0x81c7e008
50 #define	DTRACE_RETL		0x81c3e008
51 #define	DTRACE_SAVE_MASK	0xc1f80000
52 #define	DTRACE_SAVE		0x81e00000
53 #define	DTRACE_RESTORE		0x81e80000
54 #define	DTRACE_CALL_MASK	0xc0000000
55 #define	DTRACE_CALL		0x40000000
56 #define	DTRACE_JMPL_MASK	0x81f10000
57 #define	DTRACE_JMPL		0x81c00000
58 #define	DTRACE_BA_MASK		0xdfc00000
59 #define	DTRACE_BA		0x10800000
60 #define	DTRACE_BA_MAX		10
61 
62 extern int dtrace_getupcstack_top(uint64_t *, int, uintptr_t *);
63 extern int dtrace_getustackdepth_top(uintptr_t *);
64 extern ulong_t dtrace_getreg_win(uint_t, uint_t);
65 extern void dtrace_putreg_win(uint_t, ulong_t);
66 extern int dtrace_fish(int, int, uintptr_t *);
67 
68 int	dtrace_ustackdepth_max = 2048;
69 
70 /*
71  * This is similar in principle to getpcstack(), but there are several marked
72  * differences in implementation:
73  *
74  * (a)	dtrace_getpcstack() is called from probe context.  Thus, the call
75  *	to flush_windows() from getpcstack() is a call to the probe-safe
76  *	equivalent here.
77  *
78  * (b)  dtrace_getpcstack() is willing to sacrifice some performance to get
79  *	a correct stack.  While consumers of getpcstack() are largely
80  *	subsystem-specific in-kernel debugging facilities, DTrace consumers
81  *	are arbitrary user-level analysis tools; dtrace_getpcstack() must
82  *	deliver as correct a stack as possible.  Details on the issues
83  *	surrounding stack correctness are found below.
84  *
85  * (c)	dtrace_getpcstack() _always_ fills in pcstack_limit pc_t's -- filling
86  *	in the difference between the stack depth and pcstack_limit with NULLs.
87  *	Due to this behavior dtrace_getpcstack() returns void.
88  *
89  * (d)	dtrace_getpcstack() takes a third parameter, aframes, that
90  *	denotes the number of _artificial frames_ on the bottom of the
91  *	stack.  An artificial frame is one induced by the provider; all
92  *	artificial frames are stripped off before frames are stored to
93  *	pcstack.
94  *
95  * (e)	dtrace_getpcstack() takes a fourth parameter, pc, that indicates
96  *	an interrupted program counter (if any).  This should be a non-NULL
97  *	value if and only if the hit probe is unanchored.  (Anchored probes
98  *	don't fire through an interrupt source.)  This parameter is used to
99  *	assure (b), above.
100  */
101 void
102 dtrace_getpcstack(pc_t *pcstack, int pcstack_limit, int aframes, uint32_t *pc)
103 {
104 	struct frame *fp, *nextfp, *minfp, *stacktop;
105 	int depth = 0;
106 	int on_intr, j = 0;
107 	uint32_t i, r;
108 
109 	fp = (struct frame *)((caddr_t)dtrace_getfp() + STACK_BIAS);
110 	dtrace_flush_windows();
111 
112 	if (pc != NULL) {
113 		/*
114 		 * If we've been passed a non-NULL pc, we need to determine
115 		 * whether or not the specified program counter falls in a leaf
116 		 * function.  If it falls within a leaf function, we know that
117 		 * %o7 is valid in its frame (and we can just drive on).  If
118 		 * it's a non-leaf, however, we know that %o7 is garbage in the
119 		 * bottom frame.  To trim this frame, we simply increment
120 		 * aframes and drop into the stack-walking loop.
121 		 *
122 		 * To quickly determine if the specified program counter is in
123 		 * a leaf function, we exploit the fact that leaf functions
124 		 * tend to be short and non-leaf functions tend to frequently
125 		 * perform operations that are only permitted in a non-leaf
126 		 * function (e.g., using the %i's or %l's; calling a function;
127 		 * performing a restore).  We exploit these tendencies by
128 		 * simply scanning forward from the specified %pc -- if we see
129 		 * an operation only permitted in a non-leaf, we know we're in
130 		 * a non-leaf; if we see a retl, we know we're in a leaf.
131 		 * Fortunately, one need not perform anywhere near full
132 		 * disassembly to effectively determine the former: determining
133 		 * that an instruction is a format-3 instruction and decoding
134 		 * its rd and rs1 fields, for example, requires very little
135 		 * manipulation.  Overall, this method of leaf determination
136 		 * performs quite well:  on average, we only examine between
137 		 * 1.5 and 2.5 instructions before making the determination.
138 		 * (Outliers do exist, however; of note is the non-leaf
139 		 * function ip_sioctl_not_ours() which -- as of this writing --
140 		 * has a whopping 455 straight instructions that manipulate
141 		 * only %g's and %o's.)
142 		 */
143 		int delay = 0, branches = 0, taken = 0;
144 
145 		if (depth < pcstack_limit)
146 			pcstack[depth++] = (pc_t)(uintptr_t)pc;
147 
148 		/*
149 		 * Our heuristic is exactly that -- a heuristic -- and there
150 		 * exists a possibility that we could be either be vectored
151 		 * off into the weeds (by following a bogus branch) or could
152 		 * wander off the end of the function and off the end of a
153 		 * text mapping (by not following a conditional branch at the
154 		 * end of the function that is effectively always taken).  So
155 		 * as a precautionary measure, we set the NOFAULT flag.
156 		 */
157 		DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
158 
159 		for (;;) {
160 			i = pc[j++];
161 
162 			if ((i & DTRACE_FMT3OP3_MASK) == DTRACE_FMT3OP3) {
163 				/*
164 				 * This is a format-3 instruction.  We can
165 				 * look at rd and rs1.
166 				 */
167 				r = (i >> DTRACE_FMT3RS1_SHIFT) & DTRACE_RMASK;
168 
169 				if (r >= DTRACE_REG_L0)
170 					goto nonleaf;
171 
172 				r = (i >> DTRACE_FMT3RD_SHIFT) & DTRACE_RMASK;
173 
174 				if (r >= DTRACE_REG_L0)
175 					goto nonleaf;
176 
177 				if ((i & DTRACE_JMPL_MASK) == DTRACE_JMPL) {
178 					delay = 1;
179 					continue;
180 				}
181 
182 				/*
183 				 * If we see explicit manipulation with %o7
184 				 * as a destination register, we know that
185 				 * %o7 is likely bogus -- and we treat this
186 				 * function as a non-leaf.
187 				 */
188 				if (r == DTRACE_REG_O7) {
189 					if (delay)
190 						goto leaf;
191 
192 					i &= DTRACE_JMPL_MASK;
193 
194 					if (i == DTRACE_JMPL) {
195 						delay = 1;
196 						continue;
197 					}
198 
199 					goto nonleaf;
200 				}
201 			} else {
202 				/*
203 				 * If this is a call, it may or may not be
204 				 * a leaf; we need to check the delay slot.
205 				 */
206 				if ((i & DTRACE_CALL_MASK) == DTRACE_CALL) {
207 					delay = 1;
208 					continue;
209 				}
210 
211 				/*
212 				 * If we see a ret it's not a leaf; if we
213 				 * see a retl, it is a leaf.
214 				 */
215 				if (i == DTRACE_RET)
216 					goto nonleaf;
217 
218 				if (i == DTRACE_RETL)
219 					goto leaf;
220 
221 				/*
222 				 * If this is a ba (annulled or not), then we
223 				 * need to actually follow the branch.  No, we
224 				 * don't look at the delay slot -- hopefully
225 				 * anything that can be gleaned from the delay
226 				 * slot can also be gleaned from the branch
227 				 * target.  To prevent ourselves from iterating
228 				 * infinitely, we clamp the number of branches
229 				 * that we'll follow, and we refuse to follow
230 				 * the same branch twice consecutively.  In
231 				 * both cases, we abort by deciding that we're
232 				 * looking at a leaf.  While in theory this
233 				 * could be wrong (we could be in the middle of
234 				 * a loop in a non-leaf that ends with a ba and
235 				 * only manipulates outputs and globals in the
236 				 * body of the loop -- therefore leading us to
237 				 * the wrong conclusion), this doesn't seem to
238 				 * crop up in practice.  (Or rather, this
239 				 * condition could not be deliberately induced,
240 				 * despite concerted effort.)
241 				 */
242 				if ((i & DTRACE_BA_MASK) == DTRACE_BA) {
243 					if (++branches == DTRACE_BA_MAX ||
244 					    taken == j)
245 						goto nonleaf;
246 
247 					taken = j;
248 					j += ((int)(i << DTRACE_DISP22_SHIFT) >>
249 					    DTRACE_DISP22_SHIFT) - 1;
250 					continue;
251 				}
252 
253 				/*
254 				 * Finally, if it's a save, it should be
255 				 * treated as a leaf; if it's a restore it
256 				 * should not be treated as a leaf.
257 				 */
258 				if ((i & DTRACE_SAVE_MASK) == DTRACE_SAVE)
259 					goto leaf;
260 
261 				if ((i & DTRACE_SAVE_MASK) == DTRACE_RESTORE)
262 					goto nonleaf;
263 			}
264 
265 			if (delay) {
266 				/*
267 				 * If this was a delay slot instruction and
268 				 * we didn't pick it up elsewhere, this is a
269 				 * non-leaf.
270 				 */
271 				goto nonleaf;
272 			}
273 		}
274 nonleaf:
275 		aframes++;
276 leaf:
277 		DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
278 	}
279 
280 	if ((on_intr = CPU_ON_INTR(CPU)) != 0)
281 		stacktop = (struct frame *)(CPU->cpu_intr_stack + SA(MINFRAME));
282 	else
283 		stacktop = (struct frame *)curthread->t_stk;
284 	minfp = fp;
285 
286 	while (depth < pcstack_limit) {
287 		nextfp = (struct frame *)((caddr_t)fp->fr_savfp + STACK_BIAS);
288 		if (nextfp <= minfp || nextfp >= stacktop) {
289 			if (!on_intr && nextfp == stacktop && aframes != 0) {
290 				/*
291 				 * If we are exactly at the top of the stack
292 				 * with a non-zero number of artificial frames,
293 				 * it must be that the stack is filled with
294 				 * nothing _but_ artificial frames.  In this
295 				 * case, we assert that this is so, zero
296 				 * pcstack, and return.
297 				 */
298 				ASSERT(aframes == 1);
299 				ASSERT(depth == 0);
300 
301 				while (depth < pcstack_limit)
302 					pcstack[depth++] = NULL;
303 				return;
304 			}
305 
306 			if (on_intr) {
307 				/*
308 				 * Hop from interrupt stack to thread stack.
309 				 */
310 				stacktop = (struct frame *)curthread->t_stk;
311 				minfp = (struct frame *)curthread->t_stkbase;
312 
313 				on_intr = 0;
314 
315 				if (nextfp > minfp && nextfp < stacktop)
316 					continue;
317 			} else {
318 				/*
319 				 * High-level interrupts may occur when %sp is
320 				 * not necessarily contained in the stack
321 				 * bounds implied by %g7 -- interrupt thread
322 				 * management runs with %pil at DISP_LEVEL,
323 				 * and high-level interrupts may thus occur
324 				 * in windows when %sp and %g7 are not self-
325 				 * consistent.  If we call dtrace_getpcstack()
326 				 * from a high-level interrupt that has occurred
327 				 * in such a window, we will fail the above test
328 				 * of nextfp against minfp/stacktop.  If the
329 				 * high-level interrupt has in turn interrupted
330 				 * a non-passivated interrupt thread, we
331 				 * will execute the below code with non-zero
332 				 * aframes.  We therefore want to assert that
333 				 * aframes is zero _or_ we are in a high-level
334 				 * interrupt -- but because cpu_intr_actv is
335 				 * updated with high-level interrupts enabled,
336 				 * we must reduce this to only asserting that
337 				 * %pil is greater than DISP_LEVEL.
338 				 */
339 				ASSERT(aframes == 0 ||
340 				    dtrace_getipl() > DISP_LEVEL);
341 				pcstack[depth++] = (pc_t)fp->fr_savpc;
342 			}
343 
344 			while (depth < pcstack_limit)
345 				pcstack[depth++] = NULL;
346 			return;
347 		}
348 
349 		if (aframes > 0) {
350 			aframes--;
351 		} else {
352 			pcstack[depth++] = (pc_t)fp->fr_savpc;
353 		}
354 
355 		fp = nextfp;
356 		minfp = fp;
357 	}
358 }
359 
360 static int
361 dtrace_getustack_common(uint64_t *pcstack, int pcstack_limit, uintptr_t sp)
362 {
363 	proc_t *p = curproc;
364 	int ret = 0;
365 	uintptr_t oldsp;
366 	volatile uint16_t *flags =
367 	    (volatile uint16_t *)&cpu_core[CPU->cpu_id].cpuc_dtrace_flags;
368 
369 	ASSERT(pcstack == NULL || pcstack_limit > 0);
370 	ASSERT(dtrace_ustackdepth_max > 0);
371 
372 	if (p->p_model == DATAMODEL_NATIVE) {
373 		for (;;) {
374 			struct frame *fr = (struct frame *)(sp + STACK_BIAS);
375 			uintptr_t pc;
376 
377 			if (sp == 0 || fr == NULL ||
378 			    !IS_P2ALIGNED((uintptr_t)fr, STACK_ALIGN))
379 				break;
380 
381 			oldsp = sp;
382 
383 			pc = dtrace_fulword(&fr->fr_savpc);
384 			sp = dtrace_fulword(&fr->fr_savfp);
385 
386 			if (pc == 0)
387 				break;
388 
389 			/*
390 			 * We limit the number of times we can go around this
391 			 * loop to account for a circular stack.
392 			 */
393 			if (sp == oldsp || ret++ >= dtrace_ustackdepth_max) {
394 				*flags |= CPU_DTRACE_BADSTACK;
395 				cpu_core[CPU->cpu_id].cpuc_dtrace_illval = sp;
396 				break;
397 			}
398 
399 			if (pcstack != NULL) {
400 				*pcstack++ = pc;
401 				pcstack_limit--;
402 				if (pcstack_limit == 0)
403 					break;
404 			}
405 		}
406 	} else {
407 		/*
408 		 * Truncate the stack pointer to 32-bits as there may be
409 		 * garbage in the upper bits which would normally be ignored
410 		 * by the processor in 32-bit mode.
411 		 */
412 		sp = (uint32_t)sp;
413 
414 		for (;;) {
415 			struct frame32 *fr = (struct frame32 *)sp;
416 			uint32_t pc;
417 
418 			if (sp == 0 ||
419 			    !IS_P2ALIGNED((uintptr_t)fr, STACK_ALIGN32))
420 				break;
421 
422 			oldsp = sp;
423 
424 			pc = dtrace_fuword32(&fr->fr_savpc);
425 			sp = dtrace_fuword32(&fr->fr_savfp);
426 
427 			if (pc == 0)
428 				break;
429 
430 			if (sp == oldsp || ret++ >= dtrace_ustackdepth_max) {
431 				*flags |= CPU_DTRACE_BADSTACK;
432 				cpu_core[CPU->cpu_id].cpuc_dtrace_illval = sp;
433 				break;
434 			}
435 
436 			if (pcstack != NULL) {
437 				*pcstack++ = pc;
438 				pcstack_limit--;
439 				if (pcstack_limit == 0)
440 					break;
441 			}
442 		}
443 	}
444 
445 	return (ret);
446 }
447 
448 void
449 dtrace_getupcstack(uint64_t *pcstack, int pcstack_limit)
450 {
451 	klwp_t *lwp = ttolwp(curthread);
452 	proc_t *p = curproc;
453 	struct regs *rp;
454 	uintptr_t sp;
455 	int n;
456 
457 	ASSERT(DTRACE_CPUFLAG_ISSET(CPU_DTRACE_NOFAULT));
458 
459 	if (pcstack_limit <= 0)
460 		return;
461 
462 	/*
463 	 * If there's no user context we still need to zero the stack.
464 	 */
465 	if (lwp == NULL || p == NULL || (rp = lwp->lwp_regs) == NULL)
466 		goto zero;
467 
468 	*pcstack++ = (uint64_t)p->p_pid;
469 	pcstack_limit--;
470 
471 	if (pcstack_limit <= 0)
472 		return;
473 
474 	*pcstack++ = (uint64_t)rp->r_pc;
475 	pcstack_limit--;
476 
477 	if (pcstack_limit <= 0)
478 		return;
479 
480 	if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_ENTRY)) {
481 		*pcstack++ = (uint64_t)rp->r_o7;
482 		pcstack_limit--;
483 		if (pcstack_limit <= 0)
484 			return;
485 	}
486 
487 	sp = rp->r_sp;
488 
489 	n = dtrace_getupcstack_top(pcstack, pcstack_limit, &sp);
490 	ASSERT(n >= 0);
491 	ASSERT(n <= pcstack_limit);
492 
493 	pcstack += n;
494 	pcstack_limit -= n;
495 	if (pcstack_limit <= 0)
496 		return;
497 
498 	n = dtrace_getustack_common(pcstack, pcstack_limit, sp);
499 	ASSERT(n >= 0);
500 	ASSERT(n <= pcstack_limit);
501 
502 	pcstack += n;
503 	pcstack_limit -= n;
504 
505 zero:
506 	while (pcstack_limit-- > 0)
507 		*pcstack++ = NULL;
508 }
509 
510 int
511 dtrace_getustackdepth(void)
512 {
513 	klwp_t *lwp = ttolwp(curthread);
514 	proc_t *p = curproc;
515 	struct regs *rp;
516 	uintptr_t sp;
517 	int n = 1;
518 
519 	if (lwp == NULL || p == NULL || (rp = lwp->lwp_regs) == NULL)
520 		return (0);
521 
522 	if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_FAULT))
523 		return (-1);
524 
525 	sp = rp->r_sp;
526 
527 	n += dtrace_getustackdepth_top(&sp);
528 	n += dtrace_getustack_common(NULL, 0, sp);
529 
530 	/*
531 	 * Add one more to the stack depth if we're in an entry probe as long
532 	 * as the return address is non-NULL or there are additional frames
533 	 * beyond that NULL return address.
534 	 */
535 	if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_ENTRY) &&
536 	    (rp->r_o7 != NULL || n != 1))
537 		n++;
538 
539 	return (n);
540 }
541 
542 void
543 dtrace_getufpstack(uint64_t *pcstack, uint64_t *fpstack, int pcstack_limit)
544 {
545 	klwp_t *lwp = ttolwp(curthread);
546 	proc_t *p = ttoproc(curthread);
547 	struct regs *rp;
548 	uintptr_t sp;
549 
550 	if (pcstack_limit <= 0)
551 		return;
552 
553 	/*
554 	 * If there's no user context we still need to zero the stack.
555 	 */
556 	if (lwp == NULL || p == NULL || (rp = lwp->lwp_regs) == NULL)
557 		goto zero;
558 
559 	*pcstack++ = (uint64_t)p->p_pid;
560 	pcstack_limit--;
561 
562 	if (pcstack_limit <= 0)
563 		return;
564 
565 	if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_ENTRY)) {
566 		*fpstack++ = 0;
567 		*pcstack++ = (uint64_t)rp->r_pc;
568 		pcstack_limit--;
569 		if (pcstack_limit <= 0)
570 			return;
571 
572 		*fpstack++ = (uint64_t)rp->r_sp;
573 		*pcstack++ = (uint64_t)rp->r_o7;
574 		pcstack_limit--;
575 	} else {
576 		*fpstack++ = (uint64_t)rp->r_sp;
577 		*pcstack++ = (uint64_t)rp->r_pc;
578 		pcstack_limit--;
579 	}
580 
581 	if (pcstack_limit <= 0)
582 		return;
583 
584 	sp = rp->r_sp;
585 
586 	dtrace_flush_user_windows();
587 
588 	if (p->p_model == DATAMODEL_NATIVE) {
589 		while (pcstack_limit > 0) {
590 			struct frame *fr = (struct frame *)(sp + STACK_BIAS);
591 			uintptr_t pc;
592 
593 			if (sp == 0 || fr == NULL ||
594 			    ((uintptr_t)&fr->fr_savpc & 3) != 0 ||
595 			    ((uintptr_t)&fr->fr_savfp & 3) != 0)
596 				break;
597 
598 			pc = dtrace_fulword(&fr->fr_savpc);
599 			sp = dtrace_fulword(&fr->fr_savfp);
600 
601 			if (pc == 0)
602 				break;
603 
604 			*fpstack++ = sp;
605 			*pcstack++ = pc;
606 			pcstack_limit--;
607 		}
608 	} else {
609 		/*
610 		 * Truncate the stack pointer to 32-bits as there may be
611 		 * garbage in the upper bits which would normally be ignored
612 		 * by the processor in 32-bit mode.
613 		 */
614 		sp = (uint32_t)sp;
615 
616 		while (pcstack_limit > 0) {
617 			struct frame32 *fr = (struct frame32 *)sp;
618 			uint32_t pc;
619 
620 			if (sp == 0 ||
621 			    ((uintptr_t)&fr->fr_savpc & 3) != 0 ||
622 			    ((uintptr_t)&fr->fr_savfp & 3) != 0)
623 				break;
624 
625 			pc = dtrace_fuword32(&fr->fr_savpc);
626 			sp = dtrace_fuword32(&fr->fr_savfp);
627 
628 			if (pc == 0)
629 				break;
630 
631 			*fpstack++ = sp;
632 			*pcstack++ = pc;
633 			pcstack_limit--;
634 		}
635 	}
636 
637 zero:
638 	while (pcstack_limit-- > 0)
639 		*pcstack++ = NULL;
640 }
641 
642 uint64_t
643 dtrace_getarg(int arg, int aframes)
644 {
645 	uintptr_t val;
646 	struct frame *fp;
647 	uint64_t rval;
648 
649 	/*
650 	 * Account for the fact that dtrace_getarg() consumes an additional
651 	 * stack frame.
652 	 */
653 	aframes++;
654 
655 	if (arg < 6) {
656 		if (dtrace_fish(aframes, DTRACE_REG_I0 + arg, &val) == 0)
657 			return (val);
658 	} else {
659 		if (dtrace_fish(aframes, DTRACE_REG_I6, &val) == 0) {
660 			/*
661 			 * We have a stack pointer; grab the argument.
662 			 */
663 			fp = (struct frame *)(val + STACK_BIAS);
664 
665 			DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
666 			rval = fp->fr_argx[arg - 6];
667 			DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
668 
669 			return (rval);
670 		}
671 	}
672 
673 	/*
674 	 * There are other ways to do this.  But the slow, painful way works
675 	 * just fine.  Because this requires some loads, we need to set
676 	 * CPU_DTRACE_NOFAULT to protect against looking for an argument that
677 	 * isn't there.
678 	 */
679 	fp = (struct frame *)((caddr_t)dtrace_getfp() + STACK_BIAS);
680 	dtrace_flush_windows();
681 
682 	DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
683 
684 	for (aframes -= 1; aframes; aframes--)
685 		fp = (struct frame *)((caddr_t)fp->fr_savfp + STACK_BIAS);
686 
687 	if (arg < 6) {
688 		rval = fp->fr_arg[arg];
689 	} else {
690 		fp = (struct frame *)((caddr_t)fp->fr_savfp + STACK_BIAS);
691 		rval = fp->fr_argx[arg - 6];
692 	}
693 
694 	DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
695 
696 	return (rval);
697 }
698 
699 int
700 dtrace_getstackdepth(int aframes)
701 {
702 	struct frame *fp, *nextfp, *minfp, *stacktop;
703 	int depth = 0;
704 	int on_intr;
705 
706 	fp = (struct frame *)((caddr_t)dtrace_getfp() + STACK_BIAS);
707 	dtrace_flush_windows();
708 
709 	if ((on_intr = CPU_ON_INTR(CPU)) != 0)
710 		stacktop = (struct frame *)CPU->cpu_intr_stack + SA(MINFRAME);
711 	else
712 		stacktop = (struct frame *)curthread->t_stk;
713 	minfp = fp;
714 
715 	for (;;) {
716 		nextfp = (struct frame *)((caddr_t)fp->fr_savfp + STACK_BIAS);
717 		if (nextfp <= minfp || nextfp >= stacktop) {
718 			if (on_intr) {
719 				/*
720 				 * Hop from interrupt stack to thread stack.
721 				 */
722 				stacktop = (struct frame *)curthread->t_stk;
723 				minfp = (struct frame *)curthread->t_stkbase;
724 				on_intr = 0;
725 				continue;
726 			}
727 
728 			return (++depth);
729 		}
730 
731 		if (aframes > 0) {
732 			aframes--;
733 		} else {
734 			depth++;
735 		}
736 
737 		fp = nextfp;
738 		minfp = fp;
739 	}
740 }
741 
742 /*
743  * This uses the same register numbering scheme as in sys/procfs_isa.h.
744  */
745 ulong_t
746 dtrace_getreg(struct regs *rp, uint_t reg)
747 {
748 	ulong_t value;
749 	uintptr_t fp;
750 	struct machpcb *mpcb;
751 
752 	if (reg == R_G0)
753 		return (0);
754 
755 	if (reg <= R_G7)
756 		return ((&rp->r_g1)[reg - 1]);
757 
758 	if (reg > R_I7) {
759 		switch (reg) {
760 		case R_CCR:
761 			return ((rp->r_tstate >> TSTATE_CCR_SHIFT) &
762 			    TSTATE_CCR_MASK);
763 		case R_PC:
764 			return (rp->r_pc);
765 		case R_nPC:
766 			return (rp->r_npc);
767 		case R_Y:
768 			return (rp->r_y);
769 		case R_ASI:
770 			return ((rp->r_tstate >> TSTATE_ASI_SHIFT) &
771 			    TSTATE_ASI_MASK);
772 		case R_FPRS:
773 			return (dtrace_getfprs());
774 		default:
775 			DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
776 			return (0);
777 		}
778 	}
779 
780 	/*
781 	 * We reach go to the fake restore case if the probe we hit was a pid
782 	 * return probe on a restore instruction. We partially emulate the
783 	 * restore in the kernel and then execute a simple restore
784 	 * instruction that we've secreted away to do the actual register
785 	 * window manipulation. We need to go one register window further
786 	 * down to get at the %ls, and %is and we need to treat %os like %is
787 	 * to pull them out of the topmost user frame.
788 	 */
789 	if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_FAKERESTORE)) {
790 		if (reg > R_O7)
791 			goto fake_restore;
792 		else
793 			reg += R_I0 - R_O0;
794 
795 	} else if (reg <= R_O7) {
796 		return ((&rp->r_g1)[reg - 1]);
797 	}
798 
799 	if (dtrace_getotherwin() > 0)
800 		return (dtrace_getreg_win(reg, 1));
801 
802 	mpcb = (struct machpcb *)((caddr_t)rp - REGOFF);
803 
804 	if (curproc->p_model == DATAMODEL_NATIVE) {
805 		struct frame *fr = (void *)(rp->r_sp + STACK_BIAS);
806 
807 		if (mpcb->mpcb_wbcnt > 0) {
808 			struct rwindow *rwin = (void *)mpcb->mpcb_wbuf;
809 			int i = mpcb->mpcb_wbcnt;
810 			do {
811 				i--;
812 				if ((long)mpcb->mpcb_spbuf[i] == rp->r_sp)
813 					return (rwin[i].rw_local[reg - 16]);
814 			} while (i > 0);
815 		}
816 
817 		DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
818 		value = dtrace_fulword(&fr->fr_local[reg - 16]);
819 		DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
820 	} else {
821 		struct frame32 *fr = (void *)(uintptr_t)(caddr32_t)rp->r_sp;
822 
823 		if (mpcb->mpcb_wbcnt > 0) {
824 			struct rwindow32 *rwin = (void *)mpcb->mpcb_wbuf;
825 			int i = mpcb->mpcb_wbcnt;
826 			do {
827 				i--;
828 				if ((long)mpcb->mpcb_spbuf[i] == rp->r_sp)
829 					return (rwin[i].rw_local[reg - 16]);
830 			} while (i > 0);
831 		}
832 
833 		DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
834 		value = dtrace_fuword32(&fr->fr_local[reg - 16]);
835 		DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
836 	}
837 
838 	return (value);
839 
840 fake_restore:
841 	ASSERT(R_L0 <= reg && reg <= R_I7);
842 
843 	/*
844 	 * We first look two user windows down to see if we can dig out
845 	 * the register we're looking for.
846 	 */
847 	if (dtrace_getotherwin() > 1)
848 		return (dtrace_getreg_win(reg, 2));
849 
850 	/*
851 	 * First we need to get the frame pointer and then we perform
852 	 * the same computation as in the non-fake-o-restore case.
853 	 */
854 
855 	mpcb = (struct machpcb *)((caddr_t)rp - REGOFF);
856 
857 	if (dtrace_getotherwin() > 0) {
858 		fp = dtrace_getreg_win(R_FP, 1);
859 		goto got_fp;
860 	}
861 
862 	if (curproc->p_model == DATAMODEL_NATIVE) {
863 		struct frame *fr = (void *)(rp->r_sp + STACK_BIAS);
864 
865 		if (mpcb->mpcb_wbcnt > 0) {
866 			struct rwindow *rwin = (void *)mpcb->mpcb_wbuf;
867 			int i = mpcb->mpcb_wbcnt;
868 			do {
869 				i--;
870 				if ((long)mpcb->mpcb_spbuf[i] == rp->r_sp) {
871 					fp = rwin[i].rw_fp;
872 					goto got_fp;
873 				}
874 			} while (i > 0);
875 		}
876 
877 		DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
878 		fp = dtrace_fulword(&fr->fr_savfp);
879 		DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
880 		if (cpu_core[CPU->cpu_id].cpuc_dtrace_flags & CPU_DTRACE_FAULT)
881 			return (0);
882 	} else {
883 		struct frame32 *fr = (void *)(uintptr_t)(caddr32_t)rp->r_sp;
884 
885 		if (mpcb->mpcb_wbcnt > 0) {
886 			struct rwindow32 *rwin = (void *)mpcb->mpcb_wbuf;
887 			int i = mpcb->mpcb_wbcnt;
888 			do {
889 				i--;
890 				if ((long)mpcb->mpcb_spbuf[i] == rp->r_sp) {
891 					fp = rwin[i].rw_fp;
892 					goto got_fp;
893 				}
894 			} while (i > 0);
895 		}
896 
897 		DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
898 		fp = dtrace_fuword32(&fr->fr_savfp);
899 		DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
900 		if (cpu_core[CPU->cpu_id].cpuc_dtrace_flags & CPU_DTRACE_FAULT)
901 			return (0);
902 	}
903 got_fp:
904 
905 	if (curproc->p_model == DATAMODEL_NATIVE) {
906 		struct frame *fr = (void *)(fp + STACK_BIAS);
907 
908 		if (mpcb->mpcb_wbcnt > 0) {
909 			struct rwindow *rwin = (void *)mpcb->mpcb_wbuf;
910 			int i = mpcb->mpcb_wbcnt;
911 			do {
912 				i--;
913 				if ((long)mpcb->mpcb_spbuf[i] == fp)
914 					return (rwin[i].rw_local[reg - 16]);
915 			} while (i > 0);
916 		}
917 
918 		DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
919 		value = dtrace_fulword(&fr->fr_local[reg - 16]);
920 		DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
921 	} else {
922 		struct frame32 *fr = (void *)(uintptr_t)(caddr32_t)fp;
923 
924 		if (mpcb->mpcb_wbcnt > 0) {
925 			struct rwindow32 *rwin = (void *)mpcb->mpcb_wbuf;
926 			int i = mpcb->mpcb_wbcnt;
927 			do {
928 				i--;
929 				if ((long)mpcb->mpcb_spbuf[i] == fp)
930 					return (rwin[i].rw_local[reg - 16]);
931 			} while (i > 0);
932 		}
933 
934 		DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
935 		value = dtrace_fuword32(&fr->fr_local[reg - 16]);
936 		DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
937 	}
938 
939 	return (value);
940 }
941