1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 /*
27  * Copyright 2011 Joyent, Inc.  All rights reserved.
28  */
29 
30 #include <sys/dtrace_impl.h>
31 #include <sys/atomic.h>
32 #include <sys/model.h>
33 #include <sys/frame.h>
34 #include <sys/stack.h>
35 #include <sys/machpcb.h>
36 #include <sys/procfs_isa.h>
37 #include <sys/cmn_err.h>
38 #include <sys/sysmacros.h>
39 
40 #define	DTRACE_FMT3OP3_MASK	0x81000000
41 #define	DTRACE_FMT3OP3		0x80000000
42 #define	DTRACE_FMT3RS1_SHIFT	14
43 #define	DTRACE_FMT3RD_SHIFT	25
44 #define	DTRACE_DISP22_SHIFT	10
45 #define	DTRACE_RMASK		0x1f
46 #define	DTRACE_REG_L0		16
47 #define	DTRACE_REG_O7		15
48 #define	DTRACE_REG_I0		24
49 #define	DTRACE_REG_I6		30
50 #define	DTRACE_RET		0x81c7e008
51 #define	DTRACE_RETL		0x81c3e008
52 #define	DTRACE_SAVE_MASK	0xc1f80000
53 #define	DTRACE_SAVE		0x81e00000
54 #define	DTRACE_RESTORE		0x81e80000
55 #define	DTRACE_CALL_MASK	0xc0000000
56 #define	DTRACE_CALL		0x40000000
57 #define	DTRACE_JMPL_MASK	0x81f80000
58 #define	DTRACE_JMPL		0x81c00000
59 #define	DTRACE_BA_MASK		0xdfc00000
60 #define	DTRACE_BA		0x10800000
61 #define	DTRACE_BA_MAX		10
62 
63 extern int dtrace_getupcstack_top(uint64_t *, int, uintptr_t *);
64 extern int dtrace_getustackdepth_top(uintptr_t *);
65 extern ulong_t dtrace_getreg_win(uint_t, uint_t);
66 extern void dtrace_putreg_win(uint_t, ulong_t);
67 extern int dtrace_fish(int, int, uintptr_t *);
68 
69 int	dtrace_ustackdepth_max = 2048;
70 
71 /*
72  * This is similar in principle to getpcstack(), but there are several marked
73  * differences in implementation:
74  *
75  * (a)	dtrace_getpcstack() is called from probe context.  Thus, the call
76  *	to flush_windows() from getpcstack() is a call to the probe-safe
77  *	equivalent here.
78  *
79  * (b)  dtrace_getpcstack() is willing to sacrifice some performance to get
80  *	a correct stack.  While consumers of getpcstack() are largely
81  *	subsystem-specific in-kernel debugging facilities, DTrace consumers
82  *	are arbitrary user-level analysis tools; dtrace_getpcstack() must
83  *	deliver as correct a stack as possible.  Details on the issues
84  *	surrounding stack correctness are found below.
85  *
86  * (c)	dtrace_getpcstack() _always_ fills in pcstack_limit pc_t's -- filling
87  *	in the difference between the stack depth and pcstack_limit with NULLs.
88  *	Due to this behavior dtrace_getpcstack() returns void.
89  *
90  * (d)	dtrace_getpcstack() takes a third parameter, aframes, that
91  *	denotes the number of _artificial frames_ on the bottom of the
92  *	stack.  An artificial frame is one induced by the provider; all
93  *	artificial frames are stripped off before frames are stored to
94  *	pcstack.
95  *
96  * (e)	dtrace_getpcstack() takes a fourth parameter, pc, that indicates
97  *	an interrupted program counter (if any).  This should be a non-NULL
98  *	value if and only if the hit probe is unanchored.  (Anchored probes
99  *	don't fire through an interrupt source.)  This parameter is used to
100  *	assure (b), above.
101  */
102 void
103 dtrace_getpcstack(pc_t *pcstack, int pcstack_limit, int aframes, uint32_t *pc)
104 {
105 	struct frame *fp, *nextfp, *minfp, *stacktop;
106 	int depth = 0;
107 	int on_intr, j = 0;
108 	uint32_t i, r;
109 
110 	fp = (struct frame *)((caddr_t)dtrace_getfp() + STACK_BIAS);
111 	dtrace_flush_windows();
112 
113 	if (pc != NULL) {
114 		/*
115 		 * If we've been passed a non-NULL pc, we need to determine
116 		 * whether or not the specified program counter falls in a leaf
117 		 * function.  If it falls within a leaf function, we know that
118 		 * %o7 is valid in its frame (and we can just drive on).  If
119 		 * it's a non-leaf, however, we know that %o7 is garbage in the
120 		 * bottom frame.  To trim this frame, we simply increment
121 		 * aframes and drop into the stack-walking loop.
122 		 *
123 		 * To quickly determine if the specified program counter is in
124 		 * a leaf function, we exploit the fact that leaf functions
125 		 * tend to be short and non-leaf functions tend to frequently
126 		 * perform operations that are only permitted in a non-leaf
127 		 * function (e.g., using the %i's or %l's; calling a function;
128 		 * performing a restore).  We exploit these tendencies by
129 		 * simply scanning forward from the specified %pc -- if we see
130 		 * an operation only permitted in a non-leaf, we know we're in
131 		 * a non-leaf; if we see a retl, we know we're in a leaf.
132 		 * Fortunately, one need not perform anywhere near full
133 		 * disassembly to effectively determine the former: determining
134 		 * that an instruction is a format-3 instruction and decoding
135 		 * its rd and rs1 fields, for example, requires very little
136 		 * manipulation.  Overall, this method of leaf determination
137 		 * performs quite well:  on average, we only examine between
138 		 * 1.5 and 2.5 instructions before making the determination.
139 		 * (Outliers do exist, however; of note is the non-leaf
140 		 * function ip_sioctl_not_ours() which -- as of this writing --
141 		 * has a whopping 455 straight instructions that manipulate
142 		 * only %g's and %o's.)
143 		 */
144 		int delay = 0, branches = 0, taken = 0;
145 
146 		if (depth < pcstack_limit)
147 			pcstack[depth++] = (pc_t)(uintptr_t)pc;
148 
149 		/*
150 		 * Our heuristic is exactly that -- a heuristic -- and there
151 		 * exists a possibility that we could be either be vectored
152 		 * off into the weeds (by following a bogus branch) or could
153 		 * wander off the end of the function and off the end of a
154 		 * text mapping (by not following a conditional branch at the
155 		 * end of the function that is effectively always taken).  So
156 		 * as a precautionary measure, we set the NOFAULT flag.
157 		 */
158 		DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
159 
160 		for (;;) {
161 			i = pc[j++];
162 
163 			if ((i & DTRACE_FMT3OP3_MASK) == DTRACE_FMT3OP3) {
164 				/*
165 				 * This is a format-3 instruction.  We can
166 				 * look at rd and rs1.
167 				 */
168 				r = (i >> DTRACE_FMT3RS1_SHIFT) & DTRACE_RMASK;
169 
170 				if (r >= DTRACE_REG_L0)
171 					goto nonleaf;
172 
173 				r = (i >> DTRACE_FMT3RD_SHIFT) & DTRACE_RMASK;
174 
175 				if (r >= DTRACE_REG_L0)
176 					goto nonleaf;
177 
178 				if ((i & DTRACE_JMPL_MASK) == DTRACE_JMPL) {
179 					delay = 1;
180 					continue;
181 				}
182 
183 				/*
184 				 * If we see explicit manipulation with %o7
185 				 * as a destination register, we know that
186 				 * %o7 is likely bogus -- and we treat this
187 				 * function as a non-leaf.
188 				 */
189 				if (r == DTRACE_REG_O7) {
190 					if (delay)
191 						goto leaf;
192 
193 					i &= DTRACE_JMPL_MASK;
194 
195 					if (i == DTRACE_JMPL) {
196 						delay = 1;
197 						continue;
198 					}
199 
200 					goto nonleaf;
201 				}
202 			} else {
203 				/*
204 				 * If this is a call, it may or may not be
205 				 * a leaf; we need to check the delay slot.
206 				 */
207 				if ((i & DTRACE_CALL_MASK) == DTRACE_CALL) {
208 					delay = 1;
209 					continue;
210 				}
211 
212 				/*
213 				 * If we see a ret it's not a leaf; if we
214 				 * see a retl, it is a leaf.
215 				 */
216 				if (i == DTRACE_RET)
217 					goto nonleaf;
218 
219 				if (i == DTRACE_RETL)
220 					goto leaf;
221 
222 				/*
223 				 * If this is a ba (annulled or not), then we
224 				 * need to actually follow the branch.  No, we
225 				 * don't look at the delay slot -- hopefully
226 				 * anything that can be gleaned from the delay
227 				 * slot can also be gleaned from the branch
228 				 * target.  To prevent ourselves from iterating
229 				 * infinitely, we clamp the number of branches
230 				 * that we'll follow, and we refuse to follow
231 				 * the same branch twice consecutively.  In
232 				 * both cases, we abort by deciding that we're
233 				 * looking at a leaf.  While in theory this
234 				 * could be wrong (we could be in the middle of
235 				 * a loop in a non-leaf that ends with a ba and
236 				 * only manipulates outputs and globals in the
237 				 * body of the loop -- therefore leading us to
238 				 * the wrong conclusion), this doesn't seem to
239 				 * crop up in practice.  (Or rather, this
240 				 * condition could not be deliberately induced,
241 				 * despite concerted effort.)
242 				 */
243 				if ((i & DTRACE_BA_MASK) == DTRACE_BA) {
244 					if (++branches == DTRACE_BA_MAX ||
245 					    taken == j)
246 						goto nonleaf;
247 
248 					taken = j;
249 					j += ((int)(i << DTRACE_DISP22_SHIFT) >>
250 					    DTRACE_DISP22_SHIFT) - 1;
251 					continue;
252 				}
253 
254 				/*
255 				 * Finally, if it's a save, it should be
256 				 * treated as a leaf; if it's a restore it
257 				 * should not be treated as a leaf.
258 				 */
259 				if ((i & DTRACE_SAVE_MASK) == DTRACE_SAVE)
260 					goto leaf;
261 
262 				if ((i & DTRACE_SAVE_MASK) == DTRACE_RESTORE)
263 					goto nonleaf;
264 			}
265 
266 			if (delay) {
267 				/*
268 				 * If this was a delay slot instruction and
269 				 * we didn't pick it up elsewhere, this is a
270 				 * non-leaf.
271 				 */
272 				goto nonleaf;
273 			}
274 		}
275 nonleaf:
276 		aframes++;
277 leaf:
278 		DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
279 	}
280 
281 	if ((on_intr = CPU_ON_INTR(CPU)) != 0)
282 		stacktop = (struct frame *)(CPU->cpu_intr_stack + SA(MINFRAME));
283 	else
284 		stacktop = (struct frame *)curthread->t_stk;
285 	minfp = fp;
286 
287 	while (depth < pcstack_limit) {
288 		nextfp = (struct frame *)((caddr_t)fp->fr_savfp + STACK_BIAS);
289 		if (nextfp <= minfp || nextfp >= stacktop) {
290 			if (!on_intr && nextfp == stacktop && aframes != 0) {
291 				/*
292 				 * If we are exactly at the top of the stack
293 				 * with a non-zero number of artificial frames,
294 				 * it must be that the stack is filled with
295 				 * nothing _but_ artificial frames.  In this
296 				 * case, we assert that this is so, zero
297 				 * pcstack, and return.
298 				 */
299 				ASSERT(aframes == 1);
300 				ASSERT(depth == 0);
301 
302 				while (depth < pcstack_limit)
303 					pcstack[depth++] = NULL;
304 				return;
305 			}
306 
307 			if (on_intr) {
308 				/*
309 				 * Hop from interrupt stack to thread stack.
310 				 */
311 				stacktop = (struct frame *)curthread->t_stk;
312 				minfp = (struct frame *)curthread->t_stkbase;
313 
314 				on_intr = 0;
315 
316 				if (nextfp > minfp && nextfp < stacktop)
317 					continue;
318 			} else {
319 				/*
320 				 * High-level interrupts may occur when %sp is
321 				 * not necessarily contained in the stack
322 				 * bounds implied by %g7 -- interrupt thread
323 				 * management runs with %pil at DISP_LEVEL,
324 				 * and high-level interrupts may thus occur
325 				 * in windows when %sp and %g7 are not self-
326 				 * consistent.  If we call dtrace_getpcstack()
327 				 * from a high-level interrupt that has occurred
328 				 * in such a window, we will fail the above test
329 				 * of nextfp against minfp/stacktop.  If the
330 				 * high-level interrupt has in turn interrupted
331 				 * a non-passivated interrupt thread, we
332 				 * will execute the below code with non-zero
333 				 * aframes.  We therefore want to assert that
334 				 * aframes is zero _or_ we are in a high-level
335 				 * interrupt -- but because cpu_intr_actv is
336 				 * updated with high-level interrupts enabled,
337 				 * we must reduce this to only asserting that
338 				 * %pil is greater than DISP_LEVEL.
339 				 */
340 				ASSERT(aframes == 0 ||
341 				    dtrace_getipl() > DISP_LEVEL);
342 				pcstack[depth++] = (pc_t)fp->fr_savpc;
343 			}
344 
345 			while (depth < pcstack_limit)
346 				pcstack[depth++] = NULL;
347 			return;
348 		}
349 
350 		if (aframes > 0) {
351 			aframes--;
352 		} else {
353 			pcstack[depth++] = (pc_t)fp->fr_savpc;
354 		}
355 
356 		fp = nextfp;
357 		minfp = fp;
358 	}
359 }
360 
361 static int
362 dtrace_getustack_common(uint64_t *pcstack, int pcstack_limit, uintptr_t sp)
363 {
364 	proc_t *p = curproc;
365 	int ret = 0;
366 	uintptr_t oldsp;
367 	volatile uint16_t *flags =
368 	    (volatile uint16_t *)&cpu_core[CPU->cpu_id].cpuc_dtrace_flags;
369 
370 	ASSERT(pcstack == NULL || pcstack_limit > 0);
371 	ASSERT(dtrace_ustackdepth_max > 0);
372 
373 	if (p->p_model == DATAMODEL_NATIVE) {
374 		for (;;) {
375 			struct frame *fr = (struct frame *)(sp + STACK_BIAS);
376 			uintptr_t pc;
377 
378 			if (sp == 0 || fr == NULL ||
379 			    !IS_P2ALIGNED((uintptr_t)fr, STACK_ALIGN))
380 				break;
381 
382 			oldsp = sp;
383 
384 			pc = dtrace_fulword(&fr->fr_savpc);
385 			sp = dtrace_fulword(&fr->fr_savfp);
386 
387 			if (pc == 0)
388 				break;
389 
390 			/*
391 			 * We limit the number of times we can go around this
392 			 * loop to account for a circular stack.
393 			 */
394 			if (sp == oldsp || ret++ >= dtrace_ustackdepth_max) {
395 				*flags |= CPU_DTRACE_BADSTACK;
396 				cpu_core[CPU->cpu_id].cpuc_dtrace_illval = sp;
397 				break;
398 			}
399 
400 			if (pcstack != NULL) {
401 				*pcstack++ = pc;
402 				pcstack_limit--;
403 				if (pcstack_limit == 0)
404 					break;
405 			}
406 		}
407 	} else {
408 		/*
409 		 * Truncate the stack pointer to 32-bits as there may be
410 		 * garbage in the upper bits which would normally be ignored
411 		 * by the processor in 32-bit mode.
412 		 */
413 		sp = (uint32_t)sp;
414 
415 		for (;;) {
416 			struct frame32 *fr = (struct frame32 *)sp;
417 			uint32_t pc;
418 
419 			if (sp == 0 ||
420 			    !IS_P2ALIGNED((uintptr_t)fr, STACK_ALIGN32))
421 				break;
422 
423 			oldsp = sp;
424 
425 			pc = dtrace_fuword32(&fr->fr_savpc);
426 			sp = dtrace_fuword32(&fr->fr_savfp);
427 
428 			if (pc == 0)
429 				break;
430 
431 			if (sp == oldsp || ret++ >= dtrace_ustackdepth_max) {
432 				*flags |= CPU_DTRACE_BADSTACK;
433 				cpu_core[CPU->cpu_id].cpuc_dtrace_illval = sp;
434 				break;
435 			}
436 
437 			if (pcstack != NULL) {
438 				*pcstack++ = pc;
439 				pcstack_limit--;
440 				if (pcstack_limit == 0)
441 					break;
442 			}
443 		}
444 	}
445 
446 	return (ret);
447 }
448 
449 void
450 dtrace_getupcstack(uint64_t *pcstack, int pcstack_limit)
451 {
452 	klwp_t *lwp = ttolwp(curthread);
453 	proc_t *p = curproc;
454 	struct regs *rp;
455 	uintptr_t sp;
456 	int n;
457 
458 	ASSERT(DTRACE_CPUFLAG_ISSET(CPU_DTRACE_NOFAULT));
459 
460 	if (pcstack_limit <= 0)
461 		return;
462 
463 	/*
464 	 * If there's no user context we still need to zero the stack.
465 	 */
466 	if (lwp == NULL || p == NULL || (rp = lwp->lwp_regs) == NULL)
467 		goto zero;
468 
469 	*pcstack++ = (uint64_t)p->p_pid;
470 	pcstack_limit--;
471 
472 	if (pcstack_limit <= 0)
473 		return;
474 
475 	*pcstack++ = (uint64_t)rp->r_pc;
476 	pcstack_limit--;
477 
478 	if (pcstack_limit <= 0)
479 		return;
480 
481 	if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_ENTRY)) {
482 		*pcstack++ = (uint64_t)rp->r_o7;
483 		pcstack_limit--;
484 		if (pcstack_limit <= 0)
485 			return;
486 	}
487 
488 	sp = rp->r_sp;
489 
490 	n = dtrace_getupcstack_top(pcstack, pcstack_limit, &sp);
491 	ASSERT(n >= 0);
492 	ASSERT(n <= pcstack_limit);
493 
494 	pcstack += n;
495 	pcstack_limit -= n;
496 	if (pcstack_limit <= 0)
497 		return;
498 
499 	n = dtrace_getustack_common(pcstack, pcstack_limit, sp);
500 	ASSERT(n >= 0);
501 	ASSERT(n <= pcstack_limit);
502 
503 	pcstack += n;
504 	pcstack_limit -= n;
505 
506 zero:
507 	while (pcstack_limit-- > 0)
508 		*pcstack++ = NULL;
509 }
510 
511 int
512 dtrace_getustackdepth(void)
513 {
514 	klwp_t *lwp = ttolwp(curthread);
515 	proc_t *p = curproc;
516 	struct regs *rp;
517 	uintptr_t sp;
518 	int n = 1;
519 
520 	if (lwp == NULL || p == NULL || (rp = lwp->lwp_regs) == NULL)
521 		return (0);
522 
523 	if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_FAULT))
524 		return (-1);
525 
526 	sp = rp->r_sp;
527 
528 	n += dtrace_getustackdepth_top(&sp);
529 	n += dtrace_getustack_common(NULL, 0, sp);
530 
531 	/*
532 	 * Add one more to the stack depth if we're in an entry probe as long
533 	 * as the return address is non-NULL or there are additional frames
534 	 * beyond that NULL return address.
535 	 */
536 	if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_ENTRY) &&
537 	    (rp->r_o7 != NULL || n != 1))
538 		n++;
539 
540 	return (n);
541 }
542 
543 void
544 dtrace_getufpstack(uint64_t *pcstack, uint64_t *fpstack, int pcstack_limit)
545 {
546 	klwp_t *lwp = ttolwp(curthread);
547 	proc_t *p = ttoproc(curthread);
548 	struct regs *rp;
549 	uintptr_t sp;
550 
551 	if (pcstack_limit <= 0)
552 		return;
553 
554 	/*
555 	 * If there's no user context we still need to zero the stack.
556 	 */
557 	if (lwp == NULL || p == NULL || (rp = lwp->lwp_regs) == NULL)
558 		goto zero;
559 
560 	*pcstack++ = (uint64_t)p->p_pid;
561 	pcstack_limit--;
562 
563 	if (pcstack_limit <= 0)
564 		return;
565 
566 	if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_ENTRY)) {
567 		*fpstack++ = 0;
568 		*pcstack++ = (uint64_t)rp->r_pc;
569 		pcstack_limit--;
570 		if (pcstack_limit <= 0)
571 			return;
572 
573 		*fpstack++ = (uint64_t)rp->r_sp;
574 		*pcstack++ = (uint64_t)rp->r_o7;
575 		pcstack_limit--;
576 	} else {
577 		*fpstack++ = (uint64_t)rp->r_sp;
578 		*pcstack++ = (uint64_t)rp->r_pc;
579 		pcstack_limit--;
580 	}
581 
582 	if (pcstack_limit <= 0)
583 		return;
584 
585 	sp = rp->r_sp;
586 
587 	dtrace_flush_user_windows();
588 
589 	if (p->p_model == DATAMODEL_NATIVE) {
590 		while (pcstack_limit > 0) {
591 			struct frame *fr = (struct frame *)(sp + STACK_BIAS);
592 			uintptr_t pc;
593 
594 			if (sp == 0 || fr == NULL ||
595 			    ((uintptr_t)&fr->fr_savpc & 3) != 0 ||
596 			    ((uintptr_t)&fr->fr_savfp & 3) != 0)
597 				break;
598 
599 			pc = dtrace_fulword(&fr->fr_savpc);
600 			sp = dtrace_fulword(&fr->fr_savfp);
601 
602 			if (pc == 0)
603 				break;
604 
605 			*fpstack++ = sp;
606 			*pcstack++ = pc;
607 			pcstack_limit--;
608 		}
609 	} else {
610 		/*
611 		 * Truncate the stack pointer to 32-bits as there may be
612 		 * garbage in the upper bits which would normally be ignored
613 		 * by the processor in 32-bit mode.
614 		 */
615 		sp = (uint32_t)sp;
616 
617 		while (pcstack_limit > 0) {
618 			struct frame32 *fr = (struct frame32 *)sp;
619 			uint32_t pc;
620 
621 			if (sp == 0 ||
622 			    ((uintptr_t)&fr->fr_savpc & 3) != 0 ||
623 			    ((uintptr_t)&fr->fr_savfp & 3) != 0)
624 				break;
625 
626 			pc = dtrace_fuword32(&fr->fr_savpc);
627 			sp = dtrace_fuword32(&fr->fr_savfp);
628 
629 			if (pc == 0)
630 				break;
631 
632 			*fpstack++ = sp;
633 			*pcstack++ = pc;
634 			pcstack_limit--;
635 		}
636 	}
637 
638 zero:
639 	while (pcstack_limit-- > 0)
640 		*pcstack++ = NULL;
641 }
642 
643 uint64_t
644 dtrace_getarg(int arg, int aframes)
645 {
646 	uintptr_t val;
647 	struct frame *fp;
648 	uint64_t rval;
649 
650 	/*
651 	 * Account for the fact that dtrace_getarg() consumes an additional
652 	 * stack frame.
653 	 */
654 	aframes++;
655 
656 	if (arg < 6) {
657 		if (dtrace_fish(aframes, DTRACE_REG_I0 + arg, &val) == 0)
658 			return (val);
659 	} else {
660 		if (dtrace_fish(aframes, DTRACE_REG_I6, &val) == 0) {
661 			/*
662 			 * We have a stack pointer; grab the argument.
663 			 */
664 			fp = (struct frame *)(val + STACK_BIAS);
665 
666 			DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
667 			rval = fp->fr_argx[arg - 6];
668 			DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
669 
670 			return (rval);
671 		}
672 	}
673 
674 	/*
675 	 * There are other ways to do this.  But the slow, painful way works
676 	 * just fine.  Because this requires some loads, we need to set
677 	 * CPU_DTRACE_NOFAULT to protect against looking for an argument that
678 	 * isn't there.
679 	 */
680 	fp = (struct frame *)((caddr_t)dtrace_getfp() + STACK_BIAS);
681 	dtrace_flush_windows();
682 
683 	DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
684 
685 	for (aframes -= 1; aframes; aframes--)
686 		fp = (struct frame *)((caddr_t)fp->fr_savfp + STACK_BIAS);
687 
688 	if (arg < 6) {
689 		rval = fp->fr_arg[arg];
690 	} else {
691 		fp = (struct frame *)((caddr_t)fp->fr_savfp + STACK_BIAS);
692 		rval = fp->fr_argx[arg - 6];
693 	}
694 
695 	DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
696 
697 	return (rval);
698 }
699 
700 int
701 dtrace_getstackdepth(int aframes)
702 {
703 	struct frame *fp, *nextfp, *minfp, *stacktop;
704 	int depth = 0;
705 	int on_intr;
706 
707 	fp = (struct frame *)((caddr_t)dtrace_getfp() + STACK_BIAS);
708 	dtrace_flush_windows();
709 
710 	if ((on_intr = CPU_ON_INTR(CPU)) != 0)
711 		stacktop = (struct frame *)CPU->cpu_intr_stack + SA(MINFRAME);
712 	else
713 		stacktop = (struct frame *)curthread->t_stk;
714 	minfp = fp;
715 
716 	for (;;) {
717 		nextfp = (struct frame *)((caddr_t)fp->fr_savfp + STACK_BIAS);
718 		if (nextfp <= minfp || nextfp >= stacktop) {
719 			if (on_intr) {
720 				/*
721 				 * Hop from interrupt stack to thread stack.
722 				 */
723 				stacktop = (struct frame *)curthread->t_stk;
724 				minfp = (struct frame *)curthread->t_stkbase;
725 				on_intr = 0;
726 				continue;
727 			}
728 
729 			return (++depth);
730 		}
731 
732 		if (aframes > 0) {
733 			aframes--;
734 		} else {
735 			depth++;
736 		}
737 
738 		fp = nextfp;
739 		minfp = fp;
740 	}
741 }
742 
743 /*
744  * This uses the same register numbering scheme as in sys/procfs_isa.h.
745  */
746 ulong_t
747 dtrace_getreg(struct regs *rp, uint_t reg)
748 {
749 	ulong_t value;
750 	uintptr_t fp;
751 	struct machpcb *mpcb;
752 
753 	if (reg == R_G0)
754 		return (0);
755 
756 	if (reg <= R_G7)
757 		return ((&rp->r_g1)[reg - 1]);
758 
759 	if (reg > R_I7) {
760 		switch (reg) {
761 		case R_CCR:
762 			return ((rp->r_tstate >> TSTATE_CCR_SHIFT) &
763 			    TSTATE_CCR_MASK);
764 		case R_PC:
765 			return (rp->r_pc);
766 		case R_nPC:
767 			return (rp->r_npc);
768 		case R_Y:
769 			return (rp->r_y);
770 		case R_ASI:
771 			return ((rp->r_tstate >> TSTATE_ASI_SHIFT) &
772 			    TSTATE_ASI_MASK);
773 		case R_FPRS:
774 			return (dtrace_getfprs());
775 		default:
776 			DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
777 			return (0);
778 		}
779 	}
780 
781 	/*
782 	 * We reach go to the fake restore case if the probe we hit was a pid
783 	 * return probe on a restore instruction. We partially emulate the
784 	 * restore in the kernel and then execute a simple restore
785 	 * instruction that we've secreted away to do the actual register
786 	 * window manipulation. We need to go one register window further
787 	 * down to get at the %ls, and %is and we need to treat %os like %is
788 	 * to pull them out of the topmost user frame.
789 	 */
790 	if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_FAKERESTORE)) {
791 		if (reg > R_O7)
792 			goto fake_restore;
793 		else
794 			reg += R_I0 - R_O0;
795 
796 	} else if (reg <= R_O7) {
797 		return ((&rp->r_g1)[reg - 1]);
798 	}
799 
800 	if (dtrace_getotherwin() > 0)
801 		return (dtrace_getreg_win(reg, 1));
802 
803 	mpcb = (struct machpcb *)((caddr_t)rp - REGOFF);
804 
805 	if (curproc->p_model == DATAMODEL_NATIVE) {
806 		struct frame *fr = (void *)(rp->r_sp + STACK_BIAS);
807 
808 		if (mpcb->mpcb_wbcnt > 0) {
809 			struct rwindow *rwin = (void *)mpcb->mpcb_wbuf;
810 			int i = mpcb->mpcb_wbcnt;
811 			do {
812 				i--;
813 				if ((long)mpcb->mpcb_spbuf[i] == rp->r_sp)
814 					return (rwin[i].rw_local[reg - 16]);
815 			} while (i > 0);
816 		}
817 
818 		DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
819 		value = dtrace_fulword(&fr->fr_local[reg - 16]);
820 		DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
821 	} else {
822 		struct frame32 *fr = (void *)(uintptr_t)(caddr32_t)rp->r_sp;
823 
824 		if (mpcb->mpcb_wbcnt > 0) {
825 			struct rwindow32 *rwin = (void *)mpcb->mpcb_wbuf;
826 			int i = mpcb->mpcb_wbcnt;
827 			do {
828 				i--;
829 				if ((long)mpcb->mpcb_spbuf[i] == rp->r_sp)
830 					return (rwin[i].rw_local[reg - 16]);
831 			} while (i > 0);
832 		}
833 
834 		DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
835 		value = dtrace_fuword32(&fr->fr_local[reg - 16]);
836 		DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
837 	}
838 
839 	return (value);
840 
841 fake_restore:
842 	ASSERT(R_L0 <= reg && reg <= R_I7);
843 
844 	/*
845 	 * We first look two user windows down to see if we can dig out
846 	 * the register we're looking for.
847 	 */
848 	if (dtrace_getotherwin() > 1)
849 		return (dtrace_getreg_win(reg, 2));
850 
851 	/*
852 	 * First we need to get the frame pointer and then we perform
853 	 * the same computation as in the non-fake-o-restore case.
854 	 */
855 
856 	mpcb = (struct machpcb *)((caddr_t)rp - REGOFF);
857 
858 	if (dtrace_getotherwin() > 0) {
859 		fp = dtrace_getreg_win(R_FP, 1);
860 		goto got_fp;
861 	}
862 
863 	if (curproc->p_model == DATAMODEL_NATIVE) {
864 		struct frame *fr = (void *)(rp->r_sp + STACK_BIAS);
865 
866 		if (mpcb->mpcb_wbcnt > 0) {
867 			struct rwindow *rwin = (void *)mpcb->mpcb_wbuf;
868 			int i = mpcb->mpcb_wbcnt;
869 			do {
870 				i--;
871 				if ((long)mpcb->mpcb_spbuf[i] == rp->r_sp) {
872 					fp = rwin[i].rw_fp;
873 					goto got_fp;
874 				}
875 			} while (i > 0);
876 		}
877 
878 		DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
879 		fp = dtrace_fulword(&fr->fr_savfp);
880 		DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
881 		if (cpu_core[CPU->cpu_id].cpuc_dtrace_flags & CPU_DTRACE_FAULT)
882 			return (0);
883 	} else {
884 		struct frame32 *fr = (void *)(uintptr_t)(caddr32_t)rp->r_sp;
885 
886 		if (mpcb->mpcb_wbcnt > 0) {
887 			struct rwindow32 *rwin = (void *)mpcb->mpcb_wbuf;
888 			int i = mpcb->mpcb_wbcnt;
889 			do {
890 				i--;
891 				if ((long)mpcb->mpcb_spbuf[i] == rp->r_sp) {
892 					fp = rwin[i].rw_fp;
893 					goto got_fp;
894 				}
895 			} while (i > 0);
896 		}
897 
898 		DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
899 		fp = dtrace_fuword32(&fr->fr_savfp);
900 		DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
901 		if (cpu_core[CPU->cpu_id].cpuc_dtrace_flags & CPU_DTRACE_FAULT)
902 			return (0);
903 	}
904 got_fp:
905 
906 	if (curproc->p_model == DATAMODEL_NATIVE) {
907 		struct frame *fr = (void *)(fp + STACK_BIAS);
908 
909 		if (mpcb->mpcb_wbcnt > 0) {
910 			struct rwindow *rwin = (void *)mpcb->mpcb_wbuf;
911 			int i = mpcb->mpcb_wbcnt;
912 			do {
913 				i--;
914 				if ((long)mpcb->mpcb_spbuf[i] == fp)
915 					return (rwin[i].rw_local[reg - 16]);
916 			} while (i > 0);
917 		}
918 
919 		DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
920 		value = dtrace_fulword(&fr->fr_local[reg - 16]);
921 		DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
922 	} else {
923 		struct frame32 *fr = (void *)(uintptr_t)(caddr32_t)fp;
924 
925 		if (mpcb->mpcb_wbcnt > 0) {
926 			struct rwindow32 *rwin = (void *)mpcb->mpcb_wbuf;
927 			int i = mpcb->mpcb_wbcnt;
928 			do {
929 				i--;
930 				if ((long)mpcb->mpcb_spbuf[i] == fp)
931 					return (rwin[i].rw_local[reg - 16]);
932 			} while (i > 0);
933 		}
934 
935 		DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
936 		value = dtrace_fuword32(&fr->fr_local[reg - 16]);
937 		DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
938 	}
939 
940 	return (value);
941 }
942 
943 /*ARGSUSED*/
944 uint64_t
945 dtrace_getvmreg(uint_t ndx, volatile uint16_t *flags)
946 {
947 	*flags |= CPU_DTRACE_ILLOP;
948 
949 	return (0);
950 }
951