xref: /illumos-gate/usr/src/uts/common/fs/proc/prsubr.c (revision 8950e535f42dd006f8cfb2122c94f6b7557757e0)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Copyright (c) 2013, Joyent, Inc. All rights reserved.
25  * Copyright 2020 OmniOS Community Edition (OmniOSce) Association.
26  */
27 
28 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
29 /*	  All Rights Reserved	*/
30 
31 #include <sys/types.h>
32 #include <sys/t_lock.h>
33 #include <sys/param.h>
34 #include <sys/cmn_err.h>
35 #include <sys/cred.h>
36 #include <sys/priv.h>
37 #include <sys/debug.h>
38 #include <sys/errno.h>
39 #include <sys/inline.h>
40 #include <sys/kmem.h>
41 #include <sys/mman.h>
42 #include <sys/proc.h>
43 #include <sys/brand.h>
44 #include <sys/sobject.h>
45 #include <sys/sysmacros.h>
46 #include <sys/systm.h>
47 #include <sys/uio.h>
48 #include <sys/var.h>
49 #include <sys/vfs.h>
50 #include <sys/vnode.h>
51 #include <sys/session.h>
52 #include <sys/pcb.h>
53 #include <sys/signal.h>
54 #include <sys/user.h>
55 #include <sys/disp.h>
56 #include <sys/class.h>
57 #include <sys/ts.h>
58 #include <sys/bitmap.h>
59 #include <sys/poll.h>
60 #include <sys/shm_impl.h>
61 #include <sys/fault.h>
62 #include <sys/syscall.h>
63 #include <sys/procfs.h>
64 #include <sys/processor.h>
65 #include <sys/cpuvar.h>
66 #include <sys/copyops.h>
67 #include <sys/time.h>
68 #include <sys/msacct.h>
69 #include <sys/flock_impl.h>
70 #include <sys/stropts.h>
71 #include <sys/strsubr.h>
72 #include <sys/pathname.h>
73 #include <sys/mode.h>
74 #include <sys/socketvar.h>
75 #include <sys/autoconf.h>
76 #include <sys/dtrace.h>
77 #include <sys/timod.h>
78 #include <sys/fs/namenode.h>
79 #include <netinet/udp.h>
80 #include <netinet/tcp.h>
81 #include <inet/cc.h>
82 #include <vm/as.h>
83 #include <vm/rm.h>
84 #include <vm/seg.h>
85 #include <vm/seg_vn.h>
86 #include <vm/seg_dev.h>
87 #include <vm/seg_spt.h>
88 #include <vm/page.h>
89 #include <sys/vmparam.h>
90 #include <sys/swap.h>
91 #include <fs/proc/prdata.h>
92 #include <sys/task.h>
93 #include <sys/project.h>
94 #include <sys/contract_impl.h>
95 #include <sys/contract/process.h>
96 #include <sys/contract/process_impl.h>
97 #include <sys/schedctl.h>
98 #include <sys/pool.h>
99 #include <sys/zone.h>
100 #include <sys/atomic.h>
101 #include <sys/sdt.h>
102 
103 #define	MAX_ITERS_SPIN	5
104 
105 typedef struct prpagev {
106 	uint_t *pg_protv;	/* vector of page permissions */
107 	char *pg_incore;	/* vector of incore flags */
108 	size_t pg_npages;	/* number of pages in protv and incore */
109 	ulong_t pg_pnbase;	/* pn within segment of first protv element */
110 } prpagev_t;
111 
112 size_t pagev_lim = 256 * 1024;	/* limit on number of pages in prpagev_t */
113 
114 extern struct seg_ops segdev_ops;	/* needs a header file */
115 extern struct seg_ops segspt_shmops;	/* needs a header file */
116 
117 static	int	set_watched_page(proc_t *, caddr_t, caddr_t, ulong_t, ulong_t);
118 static	void	clear_watched_page(proc_t *, caddr_t, caddr_t, ulong_t);
119 
120 /*
121  * Choose an lwp from the complete set of lwps for the process.
122  * This is called for any operation applied to the process
123  * file descriptor that requires an lwp to operate upon.
124  *
125  * Returns a pointer to the thread for the selected LWP,
126  * and with the dispatcher lock held for the thread.
127  *
128  * The algorithm for choosing an lwp is critical for /proc semantics;
129  * don't touch this code unless you know all of the implications.
130  */
131 kthread_t *
132 prchoose(proc_t *p)
133 {
134 	kthread_t *t;
135 	kthread_t *t_onproc = NULL;	/* running on processor */
136 	kthread_t *t_run = NULL;	/* runnable, on disp queue */
137 	kthread_t *t_sleep = NULL;	/* sleeping */
138 	kthread_t *t_hold = NULL;	/* sleeping, performing hold */
139 	kthread_t *t_susp = NULL;	/* suspended stop */
140 	kthread_t *t_jstop = NULL;	/* jobcontrol stop, w/o directed stop */
141 	kthread_t *t_jdstop = NULL;	/* jobcontrol stop with directed stop */
142 	kthread_t *t_req = NULL;	/* requested stop */
143 	kthread_t *t_istop = NULL;	/* event-of-interest stop */
144 	kthread_t *t_dtrace = NULL;	/* DTrace stop */
145 
146 	ASSERT(MUTEX_HELD(&p->p_lock));
147 
148 	/*
149 	 * If the agent lwp exists, it takes precedence over all others.
150 	 */
151 	if ((t = p->p_agenttp) != NULL) {
152 		thread_lock(t);
153 		return (t);
154 	}
155 
156 	if ((t = p->p_tlist) == NULL)	/* start at the head of the list */
157 		return (t);
158 	do {		/* for eacn lwp in the process */
159 		if (VSTOPPED(t)) {	/* virtually stopped */
160 			if (t_req == NULL)
161 				t_req = t;
162 			continue;
163 		}
164 
165 		thread_lock(t);		/* make sure thread is in good state */
166 		switch (t->t_state) {
167 		default:
168 			panic("prchoose: bad thread state %d, thread 0x%p",
169 			    t->t_state, (void *)t);
170 			/*NOTREACHED*/
171 		case TS_SLEEP:
172 			/* this is filthy */
173 			if (t->t_wchan == (caddr_t)&p->p_holdlwps &&
174 			    t->t_wchan0 == NULL) {
175 				if (t_hold == NULL)
176 					t_hold = t;
177 			} else {
178 				if (t_sleep == NULL)
179 					t_sleep = t;
180 			}
181 			break;
182 		case TS_RUN:
183 		case TS_WAIT:
184 			if (t_run == NULL)
185 				t_run = t;
186 			break;
187 		case TS_ONPROC:
188 			if (t_onproc == NULL)
189 				t_onproc = t;
190 			break;
191 		case TS_ZOMB:		/* last possible choice */
192 			break;
193 		case TS_STOPPED:
194 			switch (t->t_whystop) {
195 			case PR_SUSPENDED:
196 				if (t_susp == NULL)
197 					t_susp = t;
198 				break;
199 			case PR_JOBCONTROL:
200 				if (t->t_proc_flag & TP_PRSTOP) {
201 					if (t_jdstop == NULL)
202 						t_jdstop = t;
203 				} else {
204 					if (t_jstop == NULL)
205 						t_jstop = t;
206 				}
207 				break;
208 			case PR_REQUESTED:
209 				if (t->t_dtrace_stop && t_dtrace == NULL)
210 					t_dtrace = t;
211 				else if (t_req == NULL)
212 					t_req = t;
213 				break;
214 			case PR_SYSENTRY:
215 			case PR_SYSEXIT:
216 			case PR_SIGNALLED:
217 			case PR_FAULTED:
218 				/*
219 				 * Make an lwp calling exit() be the
220 				 * last lwp seen in the process.
221 				 */
222 				if (t_istop == NULL ||
223 				    (t_istop->t_whystop == PR_SYSENTRY &&
224 				    t_istop->t_whatstop == SYS_exit))
225 					t_istop = t;
226 				break;
227 			case PR_CHECKPOINT:	/* can't happen? */
228 				break;
229 			default:
230 				panic("prchoose: bad t_whystop %d, thread 0x%p",
231 				    t->t_whystop, (void *)t);
232 				/*NOTREACHED*/
233 			}
234 			break;
235 		}
236 		thread_unlock(t);
237 	} while ((t = t->t_forw) != p->p_tlist);
238 
239 	if (t_onproc)
240 		t = t_onproc;
241 	else if (t_run)
242 		t = t_run;
243 	else if (t_sleep)
244 		t = t_sleep;
245 	else if (t_jstop)
246 		t = t_jstop;
247 	else if (t_jdstop)
248 		t = t_jdstop;
249 	else if (t_istop)
250 		t = t_istop;
251 	else if (t_dtrace)
252 		t = t_dtrace;
253 	else if (t_req)
254 		t = t_req;
255 	else if (t_hold)
256 		t = t_hold;
257 	else if (t_susp)
258 		t = t_susp;
259 	else			/* TS_ZOMB */
260 		t = p->p_tlist;
261 
262 	if (t != NULL)
263 		thread_lock(t);
264 	return (t);
265 }
266 
267 /*
268  * Wakeup anyone sleeping on the /proc vnode for the process/lwp to stop.
269  * Also call pollwakeup() if any lwps are waiting in poll() for POLLPRI
270  * on the /proc file descriptor.  Called from stop() when a traced
271  * process stops on an event of interest.  Also called from exit()
272  * and prinvalidate() to indicate POLLHUP and POLLERR respectively.
273  */
274 void
275 prnotify(struct vnode *vp)
276 {
277 	prcommon_t *pcp = VTOP(vp)->pr_common;
278 
279 	mutex_enter(&pcp->prc_mutex);
280 	cv_broadcast(&pcp->prc_wait);
281 	mutex_exit(&pcp->prc_mutex);
282 	if (pcp->prc_flags & PRC_POLL) {
283 		/*
284 		 * We call pollwakeup() with POLLHUP to ensure that
285 		 * the pollers are awakened even if they are polling
286 		 * for nothing (i.e., waiting for the process to exit).
287 		 * This enables the use of the PRC_POLL flag for optimization
288 		 * (we can turn off PRC_POLL only if we know no pollers remain).
289 		 */
290 		pcp->prc_flags &= ~PRC_POLL;
291 		pollwakeup(&pcp->prc_pollhead, POLLHUP);
292 	}
293 }
294 
295 /* called immediately below, in prfree() */
296 static void
297 prfreenotify(vnode_t *vp)
298 {
299 	prnode_t *pnp;
300 	prcommon_t *pcp;
301 
302 	while (vp != NULL) {
303 		pnp = VTOP(vp);
304 		pcp = pnp->pr_common;
305 		ASSERT(pcp->prc_thread == NULL);
306 		pcp->prc_proc = NULL;
307 		/*
308 		 * We can't call prnotify() here because we are holding
309 		 * pidlock.  We assert that there is no need to.
310 		 */
311 		mutex_enter(&pcp->prc_mutex);
312 		cv_broadcast(&pcp->prc_wait);
313 		mutex_exit(&pcp->prc_mutex);
314 		ASSERT(!(pcp->prc_flags & PRC_POLL));
315 
316 		vp = pnp->pr_next;
317 		pnp->pr_next = NULL;
318 	}
319 }
320 
321 /*
322  * Called from a hook in freeproc() when a traced process is removed
323  * from the process table.  The proc-table pointers of all associated
324  * /proc vnodes are cleared to indicate that the process has gone away.
325  */
326 void
327 prfree(proc_t *p)
328 {
329 	uint_t slot = p->p_slot;
330 
331 	ASSERT(MUTEX_HELD(&pidlock));
332 
333 	/*
334 	 * Block the process against /proc so it can be freed.
335 	 * It cannot be freed while locked by some controlling process.
336 	 * Lock ordering:
337 	 *	pidlock -> pr_pidlock -> p->p_lock -> pcp->prc_mutex
338 	 */
339 	mutex_enter(&pr_pidlock);	/* protects pcp->prc_proc */
340 	mutex_enter(&p->p_lock);
341 	while (p->p_proc_flag & P_PR_LOCK) {
342 		mutex_exit(&pr_pidlock);
343 		cv_wait(&pr_pid_cv[slot], &p->p_lock);
344 		mutex_exit(&p->p_lock);
345 		mutex_enter(&pr_pidlock);
346 		mutex_enter(&p->p_lock);
347 	}
348 
349 	ASSERT(p->p_tlist == NULL);
350 
351 	prfreenotify(p->p_plist);
352 	p->p_plist = NULL;
353 
354 	prfreenotify(p->p_trace);
355 	p->p_trace = NULL;
356 
357 	/*
358 	 * We broadcast to wake up everyone waiting for this process.
359 	 * No one can reach this process from this point on.
360 	 */
361 	cv_broadcast(&pr_pid_cv[slot]);
362 
363 	mutex_exit(&p->p_lock);
364 	mutex_exit(&pr_pidlock);
365 }
366 
367 /*
368  * Called from a hook in exit() when a traced process is becoming a zombie.
369  */
370 void
371 prexit(proc_t *p)
372 {
373 	ASSERT(MUTEX_HELD(&p->p_lock));
374 
375 	if (pr_watch_active(p)) {
376 		pr_free_watchpoints(p);
377 		watch_disable(curthread);
378 	}
379 	/* pr_free_watched_pages() is called in exit(), after dropping p_lock */
380 	if (p->p_trace) {
381 		VTOP(p->p_trace)->pr_common->prc_flags |= PRC_DESTROY;
382 		prnotify(p->p_trace);
383 	}
384 	cv_broadcast(&pr_pid_cv[p->p_slot]);	/* pauselwps() */
385 }
386 
387 /*
388  * Called when a thread calls lwp_exit().
389  */
390 void
391 prlwpexit(kthread_t *t)
392 {
393 	vnode_t *vp;
394 	prnode_t *pnp;
395 	prcommon_t *pcp;
396 	proc_t *p = ttoproc(t);
397 	lwpent_t *lep = p->p_lwpdir[t->t_dslot].ld_entry;
398 
399 	ASSERT(t == curthread);
400 	ASSERT(MUTEX_HELD(&p->p_lock));
401 
402 	/*
403 	 * The process must be blocked against /proc to do this safely.
404 	 * The lwp must not disappear while the process is marked P_PR_LOCK.
405 	 * It is the caller's responsibility to have called prbarrier(p).
406 	 */
407 	ASSERT(!(p->p_proc_flag & P_PR_LOCK));
408 
409 	for (vp = p->p_plist; vp != NULL; vp = pnp->pr_next) {
410 		pnp = VTOP(vp);
411 		pcp = pnp->pr_common;
412 		if (pcp->prc_thread == t) {
413 			pcp->prc_thread = NULL;
414 			pcp->prc_flags |= PRC_DESTROY;
415 		}
416 	}
417 
418 	for (vp = lep->le_trace; vp != NULL; vp = pnp->pr_next) {
419 		pnp = VTOP(vp);
420 		pcp = pnp->pr_common;
421 		pcp->prc_thread = NULL;
422 		pcp->prc_flags |= PRC_DESTROY;
423 		prnotify(vp);
424 	}
425 
426 	if (p->p_trace)
427 		prnotify(p->p_trace);
428 }
429 
430 /*
431  * Called when a zombie thread is joined or when a
432  * detached lwp exits.  Called from lwp_hash_out().
433  */
434 void
435 prlwpfree(proc_t *p, lwpent_t *lep)
436 {
437 	vnode_t *vp;
438 	prnode_t *pnp;
439 	prcommon_t *pcp;
440 
441 	ASSERT(MUTEX_HELD(&p->p_lock));
442 
443 	/*
444 	 * The process must be blocked against /proc to do this safely.
445 	 * The lwp must not disappear while the process is marked P_PR_LOCK.
446 	 * It is the caller's responsibility to have called prbarrier(p).
447 	 */
448 	ASSERT(!(p->p_proc_flag & P_PR_LOCK));
449 
450 	vp = lep->le_trace;
451 	lep->le_trace = NULL;
452 	while (vp) {
453 		prnotify(vp);
454 		pnp = VTOP(vp);
455 		pcp = pnp->pr_common;
456 		ASSERT(pcp->prc_thread == NULL &&
457 		    (pcp->prc_flags & PRC_DESTROY));
458 		pcp->prc_tslot = -1;
459 		vp = pnp->pr_next;
460 		pnp->pr_next = NULL;
461 	}
462 
463 	if (p->p_trace)
464 		prnotify(p->p_trace);
465 }
466 
467 /*
468  * Called from a hook in exec() when a thread starts exec().
469  */
470 void
471 prexecstart(void)
472 {
473 	proc_t *p = ttoproc(curthread);
474 	klwp_t *lwp = ttolwp(curthread);
475 
476 	/*
477 	 * The P_PR_EXEC flag blocks /proc operations for
478 	 * the duration of the exec().
479 	 * We can't start exec() while the process is
480 	 * locked by /proc, so we call prbarrier().
481 	 * lwp_nostop keeps the process from being stopped
482 	 * via job control for the duration of the exec().
483 	 */
484 
485 	ASSERT(MUTEX_HELD(&p->p_lock));
486 	prbarrier(p);
487 	lwp->lwp_nostop++;
488 	p->p_proc_flag |= P_PR_EXEC;
489 }
490 
491 /*
492  * Called from a hook in exec() when a thread finishes exec().
493  * The thread may or may not have succeeded.  Some other thread
494  * may have beat it to the punch.
495  */
496 void
497 prexecend(void)
498 {
499 	proc_t *p = ttoproc(curthread);
500 	klwp_t *lwp = ttolwp(curthread);
501 	vnode_t *vp;
502 	prnode_t *pnp;
503 	prcommon_t *pcp;
504 	model_t model = p->p_model;
505 	id_t tid = curthread->t_tid;
506 	int tslot = curthread->t_dslot;
507 
508 	ASSERT(MUTEX_HELD(&p->p_lock));
509 
510 	lwp->lwp_nostop--;
511 	if (p->p_flag & SEXITLWPS) {
512 		/*
513 		 * We are on our way to exiting because some
514 		 * other thread beat us in the race to exec().
515 		 * Don't clear the P_PR_EXEC flag in this case.
516 		 */
517 		return;
518 	}
519 
520 	/*
521 	 * Wake up anyone waiting in /proc for the process to complete exec().
522 	 */
523 	p->p_proc_flag &= ~P_PR_EXEC;
524 	if ((vp = p->p_trace) != NULL) {
525 		pcp = VTOP(vp)->pr_common;
526 		mutex_enter(&pcp->prc_mutex);
527 		cv_broadcast(&pcp->prc_wait);
528 		mutex_exit(&pcp->prc_mutex);
529 		for (; vp != NULL; vp = pnp->pr_next) {
530 			pnp = VTOP(vp);
531 			pnp->pr_common->prc_datamodel = model;
532 		}
533 	}
534 	if ((vp = p->p_lwpdir[tslot].ld_entry->le_trace) != NULL) {
535 		/*
536 		 * We dealt with the process common above.
537 		 */
538 		ASSERT(p->p_trace != NULL);
539 		pcp = VTOP(vp)->pr_common;
540 		mutex_enter(&pcp->prc_mutex);
541 		cv_broadcast(&pcp->prc_wait);
542 		mutex_exit(&pcp->prc_mutex);
543 		for (; vp != NULL; vp = pnp->pr_next) {
544 			pnp = VTOP(vp);
545 			pcp = pnp->pr_common;
546 			pcp->prc_datamodel = model;
547 			pcp->prc_tid = tid;
548 			pcp->prc_tslot = tslot;
549 		}
550 	}
551 }
552 
553 /*
554  * Called from a hook in relvm() just before freeing the address space.
555  * We free all the watched areas now.
556  */
557 void
558 prrelvm(void)
559 {
560 	proc_t *p = ttoproc(curthread);
561 
562 	mutex_enter(&p->p_lock);
563 	prbarrier(p);	/* block all other /proc operations */
564 	if (pr_watch_active(p)) {
565 		pr_free_watchpoints(p);
566 		watch_disable(curthread);
567 	}
568 	mutex_exit(&p->p_lock);
569 	pr_free_watched_pages(p);
570 }
571 
572 /*
573  * Called from hooks in exec-related code when a traced process
574  * attempts to exec(2) a setuid/setgid program or an unreadable
575  * file.  Rather than fail the exec we invalidate the associated
576  * /proc vnodes so that subsequent attempts to use them will fail.
577  *
578  * All /proc vnodes, except directory vnodes, are retained on a linked
579  * list (rooted at p_plist in the process structure) until last close.
580  *
581  * A controlling process must re-open the /proc files in order to
582  * regain control.
583  */
584 void
585 prinvalidate(struct user *up)
586 {
587 	kthread_t *t = curthread;
588 	proc_t *p = ttoproc(t);
589 	vnode_t *vp;
590 	prnode_t *pnp;
591 	int writers = 0;
592 
593 	mutex_enter(&p->p_lock);
594 	prbarrier(p);	/* block all other /proc operations */
595 
596 	/*
597 	 * At this moment, there can be only one lwp in the process.
598 	 */
599 	ASSERT(p->p_lwpcnt == 1 && p->p_zombcnt == 0);
600 
601 	/*
602 	 * Invalidate any currently active /proc vnodes.
603 	 */
604 	for (vp = p->p_plist; vp != NULL; vp = pnp->pr_next) {
605 		pnp = VTOP(vp);
606 		switch (pnp->pr_type) {
607 		case PR_PSINFO:		/* these files can read by anyone */
608 		case PR_LPSINFO:
609 		case PR_LWPSINFO:
610 		case PR_LWPDIR:
611 		case PR_LWPIDDIR:
612 		case PR_USAGE:
613 		case PR_LUSAGE:
614 		case PR_LWPUSAGE:
615 			break;
616 		default:
617 			pnp->pr_flags |= PR_INVAL;
618 			break;
619 		}
620 	}
621 	/*
622 	 * Wake up anyone waiting for the process or lwp.
623 	 * p->p_trace is guaranteed to be non-NULL if there
624 	 * are any open /proc files for this process.
625 	 */
626 	if ((vp = p->p_trace) != NULL) {
627 		prcommon_t *pcp = VTOP(vp)->pr_pcommon;
628 
629 		prnotify(vp);
630 		/*
631 		 * Are there any writers?
632 		 */
633 		if ((writers = pcp->prc_writers) != 0) {
634 			/*
635 			 * Clear the exclusive open flag (old /proc interface).
636 			 * Set prc_selfopens equal to prc_writers so that
637 			 * the next O_EXCL|O_WRITE open will succeed
638 			 * even with existing (though invalid) writers.
639 			 * prclose() must decrement prc_selfopens when
640 			 * the invalid files are closed.
641 			 */
642 			pcp->prc_flags &= ~PRC_EXCL;
643 			ASSERT(pcp->prc_selfopens <= writers);
644 			pcp->prc_selfopens = writers;
645 		}
646 	}
647 	vp = p->p_lwpdir[t->t_dslot].ld_entry->le_trace;
648 	while (vp != NULL) {
649 		/*
650 		 * We should not invalidate the lwpiddir vnodes,
651 		 * but the necessities of maintaining the old
652 		 * ioctl()-based version of /proc require it.
653 		 */
654 		pnp = VTOP(vp);
655 		pnp->pr_flags |= PR_INVAL;
656 		prnotify(vp);
657 		vp = pnp->pr_next;
658 	}
659 
660 	/*
661 	 * If any tracing flags are in effect and any vnodes are open for
662 	 * writing then set the requested-stop and run-on-last-close flags.
663 	 * Otherwise, clear all tracing flags.
664 	 */
665 	t->t_proc_flag &= ~TP_PAUSE;
666 	if ((p->p_proc_flag & P_PR_TRACE) && writers) {
667 		t->t_proc_flag |= TP_PRSTOP;
668 		aston(t);		/* so ISSIG will see the flag */
669 		p->p_proc_flag |= P_PR_RUNLCL;
670 	} else {
671 		premptyset(&up->u_entrymask);		/* syscalls */
672 		premptyset(&up->u_exitmask);
673 		up->u_systrap = 0;
674 		premptyset(&p->p_sigmask);		/* signals */
675 		premptyset(&p->p_fltmask);		/* faults */
676 		t->t_proc_flag &= ~(TP_PRSTOP|TP_PRVSTOP|TP_STOPPING);
677 		p->p_proc_flag &= ~(P_PR_RUNLCL|P_PR_KILLCL|P_PR_TRACE);
678 		prnostep(ttolwp(t));
679 	}
680 
681 	mutex_exit(&p->p_lock);
682 }
683 
684 /*
685  * Acquire the controlled process's p_lock and mark it P_PR_LOCK.
686  * Return with pr_pidlock held in all cases.
687  * Return with p_lock held if the the process still exists.
688  * Return value is the process pointer if the process still exists, else NULL.
689  * If we lock the process, give ourself kernel priority to avoid deadlocks;
690  * this is undone in prunlock().
691  */
692 proc_t *
693 pr_p_lock(prnode_t *pnp)
694 {
695 	proc_t *p;
696 	prcommon_t *pcp;
697 
698 	mutex_enter(&pr_pidlock);
699 	if ((pcp = pnp->pr_pcommon) == NULL || (p = pcp->prc_proc) == NULL)
700 		return (NULL);
701 	mutex_enter(&p->p_lock);
702 	while (p->p_proc_flag & P_PR_LOCK) {
703 		/*
704 		 * This cv/mutex pair is persistent even if
705 		 * the process disappears while we sleep.
706 		 */
707 		kcondvar_t *cv = &pr_pid_cv[p->p_slot];
708 		kmutex_t *mp = &p->p_lock;
709 
710 		mutex_exit(&pr_pidlock);
711 		cv_wait(cv, mp);
712 		mutex_exit(mp);
713 		mutex_enter(&pr_pidlock);
714 		if (pcp->prc_proc == NULL)
715 			return (NULL);
716 		ASSERT(p == pcp->prc_proc);
717 		mutex_enter(&p->p_lock);
718 	}
719 	p->p_proc_flag |= P_PR_LOCK;
720 	THREAD_KPRI_REQUEST();
721 	return (p);
722 }
723 
724 /*
725  * Lock the target process by setting P_PR_LOCK and grabbing p->p_lock.
726  * This prevents any lwp of the process from disappearing and
727  * blocks most operations that a process can perform on itself.
728  * Returns 0 on success, a non-zero error number on failure.
729  *
730  * 'zdisp' is ZYES or ZNO to indicate whether prlock() should succeed when
731  * the subject process is a zombie (ZYES) or fail for zombies (ZNO).
732  *
733  * error returns:
734  *	ENOENT: process or lwp has disappeared or process is exiting
735  *		(or has become a zombie and zdisp == ZNO).
736  *	EAGAIN: procfs vnode has become invalid.
737  *	EINTR:  signal arrived while waiting for exec to complete.
738  */
739 int
740 prlock(prnode_t *pnp, int zdisp)
741 {
742 	prcommon_t *pcp;
743 	proc_t *p;
744 
745 again:
746 	pcp = pnp->pr_common;
747 	p = pr_p_lock(pnp);
748 	mutex_exit(&pr_pidlock);
749 
750 	/*
751 	 * Return ENOENT immediately if there is no process.
752 	 */
753 	if (p == NULL)
754 		return (ENOENT);
755 
756 	ASSERT(p == pcp->prc_proc && p->p_stat != 0 && p->p_stat != SIDL);
757 
758 	/*
759 	 * Return ENOENT if process entered zombie state or is exiting
760 	 * and the 'zdisp' flag is set to ZNO indicating not to lock zombies.
761 	 */
762 	if (zdisp == ZNO &&
763 	    ((pcp->prc_flags & PRC_DESTROY) || (p->p_flag & SEXITING))) {
764 		prunlock(pnp);
765 		return (ENOENT);
766 	}
767 
768 	/*
769 	 * If lwp-specific, check to see if lwp has disappeared.
770 	 */
771 	if (pcp->prc_flags & PRC_LWP) {
772 		if ((zdisp == ZNO && (pcp->prc_flags & PRC_DESTROY)) ||
773 		    pcp->prc_tslot == -1) {
774 			prunlock(pnp);
775 			return (ENOENT);
776 		}
777 	}
778 
779 	/*
780 	 * Return EAGAIN if we have encountered a security violation.
781 	 * (The process exec'd a set-id or unreadable executable file.)
782 	 */
783 	if (pnp->pr_flags & PR_INVAL) {
784 		prunlock(pnp);
785 		return (EAGAIN);
786 	}
787 
788 	/*
789 	 * If process is undergoing an exec(), wait for
790 	 * completion and then start all over again.
791 	 */
792 	if (p->p_proc_flag & P_PR_EXEC) {
793 		pcp = pnp->pr_pcommon;	/* Put on the correct sleep queue */
794 		mutex_enter(&pcp->prc_mutex);
795 		prunlock(pnp);
796 		if (!cv_wait_sig(&pcp->prc_wait, &pcp->prc_mutex)) {
797 			mutex_exit(&pcp->prc_mutex);
798 			return (EINTR);
799 		}
800 		mutex_exit(&pcp->prc_mutex);
801 		goto again;
802 	}
803 
804 	/*
805 	 * We return holding p->p_lock.
806 	 */
807 	return (0);
808 }
809 
810 /*
811  * Undo prlock() and pr_p_lock().
812  * p->p_lock is still held; pr_pidlock is no longer held.
813  *
814  * prunmark() drops the P_PR_LOCK flag and wakes up another thread,
815  * if any, waiting for the flag to be dropped; it retains p->p_lock.
816  *
817  * prunlock() calls prunmark() and then drops p->p_lock.
818  */
819 void
820 prunmark(proc_t *p)
821 {
822 	ASSERT(p->p_proc_flag & P_PR_LOCK);
823 	ASSERT(MUTEX_HELD(&p->p_lock));
824 
825 	cv_signal(&pr_pid_cv[p->p_slot]);
826 	p->p_proc_flag &= ~P_PR_LOCK;
827 	THREAD_KPRI_RELEASE();
828 }
829 
830 void
831 prunlock(prnode_t *pnp)
832 {
833 	prcommon_t *pcp = pnp->pr_common;
834 	proc_t *p = pcp->prc_proc;
835 
836 	/*
837 	 * If we (or someone) gave it a SIGKILL, and it is not
838 	 * already a zombie, set it running unconditionally.
839 	 */
840 	if ((p->p_flag & SKILLED) &&
841 	    !(p->p_flag & SEXITING) &&
842 	    !(pcp->prc_flags & PRC_DESTROY) &&
843 	    !((pcp->prc_flags & PRC_LWP) && pcp->prc_tslot == -1))
844 		(void) pr_setrun(pnp, 0);
845 	prunmark(p);
846 	mutex_exit(&p->p_lock);
847 }
848 
849 /*
850  * Called while holding p->p_lock to delay until the process is unlocked.
851  * We enter holding p->p_lock; p->p_lock is dropped and reacquired.
852  * The process cannot become locked again until p->p_lock is dropped.
853  */
854 void
855 prbarrier(proc_t *p)
856 {
857 	ASSERT(MUTEX_HELD(&p->p_lock));
858 
859 	if (p->p_proc_flag & P_PR_LOCK) {
860 		/* The process is locked; delay until not locked */
861 		uint_t slot = p->p_slot;
862 
863 		while (p->p_proc_flag & P_PR_LOCK)
864 			cv_wait(&pr_pid_cv[slot], &p->p_lock);
865 		cv_signal(&pr_pid_cv[slot]);
866 	}
867 }
868 
869 /*
870  * Return process/lwp status.
871  * The u-block is mapped in by this routine and unmapped at the end.
872  */
873 void
874 prgetstatus(proc_t *p, pstatus_t *sp, zone_t *zp)
875 {
876 	kthread_t *t;
877 
878 	ASSERT(MUTEX_HELD(&p->p_lock));
879 
880 	t = prchoose(p);	/* returns locked thread */
881 	ASSERT(t != NULL);
882 	thread_unlock(t);
883 
884 	/* just bzero the process part, prgetlwpstatus() does the rest */
885 	bzero(sp, sizeof (pstatus_t) - sizeof (lwpstatus_t));
886 	sp->pr_nlwp = p->p_lwpcnt;
887 	sp->pr_nzomb = p->p_zombcnt;
888 	prassignset(&sp->pr_sigpend, &p->p_sig);
889 	sp->pr_brkbase = (uintptr_t)p->p_brkbase;
890 	sp->pr_brksize = p->p_brksize;
891 	sp->pr_stkbase = (uintptr_t)prgetstackbase(p);
892 	sp->pr_stksize = p->p_stksize;
893 	sp->pr_pid = p->p_pid;
894 	if (curproc->p_zone->zone_id != GLOBAL_ZONEID &&
895 	    (p->p_flag & SZONETOP)) {
896 		ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID);
897 		/*
898 		 * Inside local zones, fake zsched's pid as parent pids for
899 		 * processes which reference processes outside of the zone.
900 		 */
901 		sp->pr_ppid = curproc->p_zone->zone_zsched->p_pid;
902 	} else {
903 		sp->pr_ppid = p->p_ppid;
904 	}
905 	sp->pr_pgid  = p->p_pgrp;
906 	sp->pr_sid   = p->p_sessp->s_sid;
907 	sp->pr_taskid = p->p_task->tk_tkid;
908 	sp->pr_projid = p->p_task->tk_proj->kpj_id;
909 	sp->pr_zoneid = p->p_zone->zone_id;
910 	hrt2ts(mstate_aggr_state(p, LMS_USER), &sp->pr_utime);
911 	hrt2ts(mstate_aggr_state(p, LMS_SYSTEM), &sp->pr_stime);
912 	TICK_TO_TIMESTRUC(p->p_cutime, &sp->pr_cutime);
913 	TICK_TO_TIMESTRUC(p->p_cstime, &sp->pr_cstime);
914 	prassignset(&sp->pr_sigtrace, &p->p_sigmask);
915 	prassignset(&sp->pr_flttrace, &p->p_fltmask);
916 	prassignset(&sp->pr_sysentry, &PTOU(p)->u_entrymask);
917 	prassignset(&sp->pr_sysexit, &PTOU(p)->u_exitmask);
918 	switch (p->p_model) {
919 	case DATAMODEL_ILP32:
920 		sp->pr_dmodel = PR_MODEL_ILP32;
921 		break;
922 	case DATAMODEL_LP64:
923 		sp->pr_dmodel = PR_MODEL_LP64;
924 		break;
925 	}
926 	if (p->p_agenttp)
927 		sp->pr_agentid = p->p_agenttp->t_tid;
928 
929 	/* get the chosen lwp's status */
930 	prgetlwpstatus(t, &sp->pr_lwp, zp);
931 
932 	/* replicate the flags */
933 	sp->pr_flags = sp->pr_lwp.pr_flags;
934 }
935 
936 #ifdef _SYSCALL32_IMPL
937 void
938 prgetlwpstatus32(kthread_t *t, lwpstatus32_t *sp, zone_t *zp)
939 {
940 	proc_t *p = ttoproc(t);
941 	klwp_t *lwp = ttolwp(t);
942 	struct mstate *ms = &lwp->lwp_mstate;
943 	hrtime_t usr, sys;
944 	int flags;
945 	ulong_t instr;
946 
947 	ASSERT(MUTEX_HELD(&p->p_lock));
948 
949 	bzero(sp, sizeof (*sp));
950 	flags = 0L;
951 	if (t->t_state == TS_STOPPED) {
952 		flags |= PR_STOPPED;
953 		if ((t->t_schedflag & TS_PSTART) == 0)
954 			flags |= PR_ISTOP;
955 	} else if (VSTOPPED(t)) {
956 		flags |= PR_STOPPED|PR_ISTOP;
957 	}
958 	if (!(flags & PR_ISTOP) && (t->t_proc_flag & TP_PRSTOP))
959 		flags |= PR_DSTOP;
960 	if (lwp->lwp_asleep)
961 		flags |= PR_ASLEEP;
962 	if (t == p->p_agenttp)
963 		flags |= PR_AGENT;
964 	if (!(t->t_proc_flag & TP_TWAIT))
965 		flags |= PR_DETACH;
966 	if (t->t_proc_flag & TP_DAEMON)
967 		flags |= PR_DAEMON;
968 	if (p->p_proc_flag & P_PR_FORK)
969 		flags |= PR_FORK;
970 	if (p->p_proc_flag & P_PR_RUNLCL)
971 		flags |= PR_RLC;
972 	if (p->p_proc_flag & P_PR_KILLCL)
973 		flags |= PR_KLC;
974 	if (p->p_proc_flag & P_PR_ASYNC)
975 		flags |= PR_ASYNC;
976 	if (p->p_proc_flag & P_PR_BPTADJ)
977 		flags |= PR_BPTADJ;
978 	if (p->p_proc_flag & P_PR_PTRACE)
979 		flags |= PR_PTRACE;
980 	if (p->p_flag & SMSACCT)
981 		flags |= PR_MSACCT;
982 	if (p->p_flag & SMSFORK)
983 		flags |= PR_MSFORK;
984 	if (p->p_flag & SVFWAIT)
985 		flags |= PR_VFORKP;
986 	sp->pr_flags = flags;
987 	if (VSTOPPED(t)) {
988 		sp->pr_why   = PR_REQUESTED;
989 		sp->pr_what  = 0;
990 	} else {
991 		sp->pr_why   = t->t_whystop;
992 		sp->pr_what  = t->t_whatstop;
993 	}
994 	sp->pr_lwpid = t->t_tid;
995 	sp->pr_cursig  = lwp->lwp_cursig;
996 	prassignset(&sp->pr_lwppend, &t->t_sig);
997 	schedctl_finish_sigblock(t);
998 	prassignset(&sp->pr_lwphold, &t->t_hold);
999 	if (t->t_whystop == PR_FAULTED) {
1000 		siginfo_kto32(&lwp->lwp_siginfo, &sp->pr_info);
1001 		if (t->t_whatstop == FLTPAGE)
1002 			sp->pr_info.si_addr =
1003 			    (caddr32_t)(uintptr_t)lwp->lwp_siginfo.si_addr;
1004 	} else if (lwp->lwp_curinfo)
1005 		siginfo_kto32(&lwp->lwp_curinfo->sq_info, &sp->pr_info);
1006 	if (SI_FROMUSER(&lwp->lwp_siginfo) && zp->zone_id != GLOBAL_ZONEID &&
1007 	    sp->pr_info.si_zoneid != zp->zone_id) {
1008 		sp->pr_info.si_pid = zp->zone_zsched->p_pid;
1009 		sp->pr_info.si_uid = 0;
1010 		sp->pr_info.si_ctid = -1;
1011 		sp->pr_info.si_zoneid = zp->zone_id;
1012 	}
1013 	sp->pr_altstack.ss_sp =
1014 	    (caddr32_t)(uintptr_t)lwp->lwp_sigaltstack.ss_sp;
1015 	sp->pr_altstack.ss_size = (size32_t)lwp->lwp_sigaltstack.ss_size;
1016 	sp->pr_altstack.ss_flags = (int32_t)lwp->lwp_sigaltstack.ss_flags;
1017 	prgetaction32(p, PTOU(p), lwp->lwp_cursig, &sp->pr_action);
1018 	sp->pr_oldcontext = (caddr32_t)lwp->lwp_oldcontext;
1019 	sp->pr_ustack = (caddr32_t)lwp->lwp_ustack;
1020 	(void) strncpy(sp->pr_clname, sclass[t->t_cid].cl_name,
1021 	    sizeof (sp->pr_clname) - 1);
1022 	if (flags & PR_STOPPED)
1023 		hrt2ts32(t->t_stoptime, &sp->pr_tstamp);
1024 	usr = ms->ms_acct[LMS_USER];
1025 	sys = ms->ms_acct[LMS_SYSTEM] + ms->ms_acct[LMS_TRAP];
1026 	scalehrtime(&usr);
1027 	scalehrtime(&sys);
1028 	hrt2ts32(usr, &sp->pr_utime);
1029 	hrt2ts32(sys, &sp->pr_stime);
1030 
1031 	/*
1032 	 * Fetch the current instruction, if not a system process.
1033 	 * We don't attempt this unless the lwp is stopped.
1034 	 */
1035 	if ((p->p_flag & SSYS) || p->p_as == &kas)
1036 		sp->pr_flags |= (PR_ISSYS|PR_PCINVAL);
1037 	else if (!(flags & PR_STOPPED))
1038 		sp->pr_flags |= PR_PCINVAL;
1039 	else if (!prfetchinstr(lwp, &instr))
1040 		sp->pr_flags |= PR_PCINVAL;
1041 	else
1042 		sp->pr_instr = (uint32_t)instr;
1043 
1044 	/*
1045 	 * Drop p_lock while touching the lwp's stack.
1046 	 */
1047 	mutex_exit(&p->p_lock);
1048 	if (prisstep(lwp))
1049 		sp->pr_flags |= PR_STEP;
1050 	if ((flags & (PR_STOPPED|PR_ASLEEP)) && t->t_sysnum) {
1051 		int i;
1052 
1053 		sp->pr_syscall = get_syscall32_args(lwp,
1054 		    (int *)sp->pr_sysarg, &i);
1055 		sp->pr_nsysarg = (ushort_t)i;
1056 	}
1057 	if ((flags & PR_STOPPED) || t == curthread)
1058 		prgetprregs32(lwp, sp->pr_reg);
1059 	if ((t->t_state == TS_STOPPED && t->t_whystop == PR_SYSEXIT) ||
1060 	    (flags & PR_VFORKP)) {
1061 		long r1, r2;
1062 		user_t *up;
1063 		auxv_t *auxp;
1064 		int i;
1065 
1066 		sp->pr_errno = prgetrvals(lwp, &r1, &r2);
1067 		if (sp->pr_errno == 0) {
1068 			sp->pr_rval1 = (int32_t)r1;
1069 			sp->pr_rval2 = (int32_t)r2;
1070 			sp->pr_errpriv = PRIV_NONE;
1071 		} else
1072 			sp->pr_errpriv = lwp->lwp_badpriv;
1073 
1074 		if (t->t_sysnum == SYS_execve) {
1075 			up = PTOU(p);
1076 			sp->pr_sysarg[0] = 0;
1077 			sp->pr_sysarg[1] = (caddr32_t)up->u_argv;
1078 			sp->pr_sysarg[2] = (caddr32_t)up->u_envp;
1079 			for (i = 0, auxp = up->u_auxv;
1080 			    i < sizeof (up->u_auxv) / sizeof (up->u_auxv[0]);
1081 			    i++, auxp++) {
1082 				if (auxp->a_type == AT_SUN_EXECNAME) {
1083 					sp->pr_sysarg[0] =
1084 					    (caddr32_t)
1085 					    (uintptr_t)auxp->a_un.a_ptr;
1086 					break;
1087 				}
1088 			}
1089 		}
1090 	}
1091 	if (prhasfp())
1092 		prgetprfpregs32(lwp, &sp->pr_fpreg);
1093 	mutex_enter(&p->p_lock);
1094 }
1095 
1096 void
1097 prgetstatus32(proc_t *p, pstatus32_t *sp, zone_t *zp)
1098 {
1099 	kthread_t *t;
1100 
1101 	ASSERT(MUTEX_HELD(&p->p_lock));
1102 
1103 	t = prchoose(p);	/* returns locked thread */
1104 	ASSERT(t != NULL);
1105 	thread_unlock(t);
1106 
1107 	/* just bzero the process part, prgetlwpstatus32() does the rest */
1108 	bzero(sp, sizeof (pstatus32_t) - sizeof (lwpstatus32_t));
1109 	sp->pr_nlwp = p->p_lwpcnt;
1110 	sp->pr_nzomb = p->p_zombcnt;
1111 	prassignset(&sp->pr_sigpend, &p->p_sig);
1112 	sp->pr_brkbase = (uint32_t)(uintptr_t)p->p_brkbase;
1113 	sp->pr_brksize = (uint32_t)p->p_brksize;
1114 	sp->pr_stkbase = (uint32_t)(uintptr_t)prgetstackbase(p);
1115 	sp->pr_stksize = (uint32_t)p->p_stksize;
1116 	sp->pr_pid   = p->p_pid;
1117 	if (curproc->p_zone->zone_id != GLOBAL_ZONEID &&
1118 	    (p->p_flag & SZONETOP)) {
1119 		ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID);
1120 		/*
1121 		 * Inside local zones, fake zsched's pid as parent pids for
1122 		 * processes which reference processes outside of the zone.
1123 		 */
1124 		sp->pr_ppid = curproc->p_zone->zone_zsched->p_pid;
1125 	} else {
1126 		sp->pr_ppid = p->p_ppid;
1127 	}
1128 	sp->pr_pgid  = p->p_pgrp;
1129 	sp->pr_sid   = p->p_sessp->s_sid;
1130 	sp->pr_taskid = p->p_task->tk_tkid;
1131 	sp->pr_projid = p->p_task->tk_proj->kpj_id;
1132 	sp->pr_zoneid = p->p_zone->zone_id;
1133 	hrt2ts32(mstate_aggr_state(p, LMS_USER), &sp->pr_utime);
1134 	hrt2ts32(mstate_aggr_state(p, LMS_SYSTEM), &sp->pr_stime);
1135 	TICK_TO_TIMESTRUC32(p->p_cutime, &sp->pr_cutime);
1136 	TICK_TO_TIMESTRUC32(p->p_cstime, &sp->pr_cstime);
1137 	prassignset(&sp->pr_sigtrace, &p->p_sigmask);
1138 	prassignset(&sp->pr_flttrace, &p->p_fltmask);
1139 	prassignset(&sp->pr_sysentry, &PTOU(p)->u_entrymask);
1140 	prassignset(&sp->pr_sysexit, &PTOU(p)->u_exitmask);
1141 	switch (p->p_model) {
1142 	case DATAMODEL_ILP32:
1143 		sp->pr_dmodel = PR_MODEL_ILP32;
1144 		break;
1145 	case DATAMODEL_LP64:
1146 		sp->pr_dmodel = PR_MODEL_LP64;
1147 		break;
1148 	}
1149 	if (p->p_agenttp)
1150 		sp->pr_agentid = p->p_agenttp->t_tid;
1151 
1152 	/* get the chosen lwp's status */
1153 	prgetlwpstatus32(t, &sp->pr_lwp, zp);
1154 
1155 	/* replicate the flags */
1156 	sp->pr_flags = sp->pr_lwp.pr_flags;
1157 }
1158 #endif	/* _SYSCALL32_IMPL */
1159 
1160 /*
1161  * Return lwp status.
1162  */
1163 void
1164 prgetlwpstatus(kthread_t *t, lwpstatus_t *sp, zone_t *zp)
1165 {
1166 	proc_t *p = ttoproc(t);
1167 	klwp_t *lwp = ttolwp(t);
1168 	struct mstate *ms = &lwp->lwp_mstate;
1169 	hrtime_t usr, sys;
1170 	int flags;
1171 	ulong_t instr;
1172 
1173 	ASSERT(MUTEX_HELD(&p->p_lock));
1174 
1175 	bzero(sp, sizeof (*sp));
1176 	flags = 0L;
1177 	if (t->t_state == TS_STOPPED) {
1178 		flags |= PR_STOPPED;
1179 		if ((t->t_schedflag & TS_PSTART) == 0)
1180 			flags |= PR_ISTOP;
1181 	} else if (VSTOPPED(t)) {
1182 		flags |= PR_STOPPED|PR_ISTOP;
1183 	}
1184 	if (!(flags & PR_ISTOP) && (t->t_proc_flag & TP_PRSTOP))
1185 		flags |= PR_DSTOP;
1186 	if (lwp->lwp_asleep)
1187 		flags |= PR_ASLEEP;
1188 	if (t == p->p_agenttp)
1189 		flags |= PR_AGENT;
1190 	if (!(t->t_proc_flag & TP_TWAIT))
1191 		flags |= PR_DETACH;
1192 	if (t->t_proc_flag & TP_DAEMON)
1193 		flags |= PR_DAEMON;
1194 	if (p->p_proc_flag & P_PR_FORK)
1195 		flags |= PR_FORK;
1196 	if (p->p_proc_flag & P_PR_RUNLCL)
1197 		flags |= PR_RLC;
1198 	if (p->p_proc_flag & P_PR_KILLCL)
1199 		flags |= PR_KLC;
1200 	if (p->p_proc_flag & P_PR_ASYNC)
1201 		flags |= PR_ASYNC;
1202 	if (p->p_proc_flag & P_PR_BPTADJ)
1203 		flags |= PR_BPTADJ;
1204 	if (p->p_proc_flag & P_PR_PTRACE)
1205 		flags |= PR_PTRACE;
1206 	if (p->p_flag & SMSACCT)
1207 		flags |= PR_MSACCT;
1208 	if (p->p_flag & SMSFORK)
1209 		flags |= PR_MSFORK;
1210 	if (p->p_flag & SVFWAIT)
1211 		flags |= PR_VFORKP;
1212 	if (p->p_pgidp->pid_pgorphaned)
1213 		flags |= PR_ORPHAN;
1214 	if (p->p_pidflag & CLDNOSIGCHLD)
1215 		flags |= PR_NOSIGCHLD;
1216 	if (p->p_pidflag & CLDWAITPID)
1217 		flags |= PR_WAITPID;
1218 	sp->pr_flags = flags;
1219 	if (VSTOPPED(t)) {
1220 		sp->pr_why   = PR_REQUESTED;
1221 		sp->pr_what  = 0;
1222 	} else {
1223 		sp->pr_why   = t->t_whystop;
1224 		sp->pr_what  = t->t_whatstop;
1225 	}
1226 	sp->pr_lwpid = t->t_tid;
1227 	sp->pr_cursig  = lwp->lwp_cursig;
1228 	prassignset(&sp->pr_lwppend, &t->t_sig);
1229 	schedctl_finish_sigblock(t);
1230 	prassignset(&sp->pr_lwphold, &t->t_hold);
1231 	if (t->t_whystop == PR_FAULTED)
1232 		bcopy(&lwp->lwp_siginfo,
1233 		    &sp->pr_info, sizeof (k_siginfo_t));
1234 	else if (lwp->lwp_curinfo)
1235 		bcopy(&lwp->lwp_curinfo->sq_info,
1236 		    &sp->pr_info, sizeof (k_siginfo_t));
1237 	if (SI_FROMUSER(&lwp->lwp_siginfo) && zp->zone_id != GLOBAL_ZONEID &&
1238 	    sp->pr_info.si_zoneid != zp->zone_id) {
1239 		sp->pr_info.si_pid = zp->zone_zsched->p_pid;
1240 		sp->pr_info.si_uid = 0;
1241 		sp->pr_info.si_ctid = -1;
1242 		sp->pr_info.si_zoneid = zp->zone_id;
1243 	}
1244 	sp->pr_altstack = lwp->lwp_sigaltstack;
1245 	prgetaction(p, PTOU(p), lwp->lwp_cursig, &sp->pr_action);
1246 	sp->pr_oldcontext = (uintptr_t)lwp->lwp_oldcontext;
1247 	sp->pr_ustack = lwp->lwp_ustack;
1248 	(void) strncpy(sp->pr_clname, sclass[t->t_cid].cl_name,
1249 	    sizeof (sp->pr_clname) - 1);
1250 	if (flags & PR_STOPPED)
1251 		hrt2ts(t->t_stoptime, &sp->pr_tstamp);
1252 	usr = ms->ms_acct[LMS_USER];
1253 	sys = ms->ms_acct[LMS_SYSTEM] + ms->ms_acct[LMS_TRAP];
1254 	scalehrtime(&usr);
1255 	scalehrtime(&sys);
1256 	hrt2ts(usr, &sp->pr_utime);
1257 	hrt2ts(sys, &sp->pr_stime);
1258 
1259 	/*
1260 	 * Fetch the current instruction, if not a system process.
1261 	 * We don't attempt this unless the lwp is stopped.
1262 	 */
1263 	if ((p->p_flag & SSYS) || p->p_as == &kas)
1264 		sp->pr_flags |= (PR_ISSYS|PR_PCINVAL);
1265 	else if (!(flags & PR_STOPPED))
1266 		sp->pr_flags |= PR_PCINVAL;
1267 	else if (!prfetchinstr(lwp, &instr))
1268 		sp->pr_flags |= PR_PCINVAL;
1269 	else
1270 		sp->pr_instr = instr;
1271 
1272 	/*
1273 	 * Drop p_lock while touching the lwp's stack.
1274 	 */
1275 	mutex_exit(&p->p_lock);
1276 	if (prisstep(lwp))
1277 		sp->pr_flags |= PR_STEP;
1278 	if ((flags & (PR_STOPPED|PR_ASLEEP)) && t->t_sysnum) {
1279 		int i;
1280 
1281 		sp->pr_syscall = get_syscall_args(lwp,
1282 		    (long *)sp->pr_sysarg, &i);
1283 		sp->pr_nsysarg = (ushort_t)i;
1284 	}
1285 	if ((flags & PR_STOPPED) || t == curthread)
1286 		prgetprregs(lwp, sp->pr_reg);
1287 	if ((t->t_state == TS_STOPPED && t->t_whystop == PR_SYSEXIT) ||
1288 	    (flags & PR_VFORKP)) {
1289 		user_t *up;
1290 		auxv_t *auxp;
1291 		int i;
1292 
1293 		sp->pr_errno = prgetrvals(lwp, &sp->pr_rval1, &sp->pr_rval2);
1294 		if (sp->pr_errno == 0)
1295 			sp->pr_errpriv = PRIV_NONE;
1296 		else
1297 			sp->pr_errpriv = lwp->lwp_badpriv;
1298 
1299 		if (t->t_sysnum == SYS_execve) {
1300 			up = PTOU(p);
1301 			sp->pr_sysarg[0] = 0;
1302 			sp->pr_sysarg[1] = (uintptr_t)up->u_argv;
1303 			sp->pr_sysarg[2] = (uintptr_t)up->u_envp;
1304 			for (i = 0, auxp = up->u_auxv;
1305 			    i < sizeof (up->u_auxv) / sizeof (up->u_auxv[0]);
1306 			    i++, auxp++) {
1307 				if (auxp->a_type == AT_SUN_EXECNAME) {
1308 					sp->pr_sysarg[0] =
1309 					    (uintptr_t)auxp->a_un.a_ptr;
1310 					break;
1311 				}
1312 			}
1313 		}
1314 	}
1315 	if (prhasfp())
1316 		prgetprfpregs(lwp, &sp->pr_fpreg);
1317 	mutex_enter(&p->p_lock);
1318 }
1319 
1320 /*
1321  * Get the sigaction structure for the specified signal.  The u-block
1322  * must already have been mapped in by the caller.
1323  */
1324 void
1325 prgetaction(proc_t *p, user_t *up, uint_t sig, struct sigaction *sp)
1326 {
1327 	int nsig = PROC_IS_BRANDED(curproc)? BROP(curproc)->b_nsig : NSIG;
1328 
1329 	bzero(sp, sizeof (*sp));
1330 
1331 	if (sig != 0 && (unsigned)sig < nsig) {
1332 		sp->sa_handler = up->u_signal[sig-1];
1333 		prassignset(&sp->sa_mask, &up->u_sigmask[sig-1]);
1334 		if (sigismember(&up->u_sigonstack, sig))
1335 			sp->sa_flags |= SA_ONSTACK;
1336 		if (sigismember(&up->u_sigresethand, sig))
1337 			sp->sa_flags |= SA_RESETHAND;
1338 		if (sigismember(&up->u_sigrestart, sig))
1339 			sp->sa_flags |= SA_RESTART;
1340 		if (sigismember(&p->p_siginfo, sig))
1341 			sp->sa_flags |= SA_SIGINFO;
1342 		if (sigismember(&up->u_signodefer, sig))
1343 			sp->sa_flags |= SA_NODEFER;
1344 		if (sig == SIGCLD) {
1345 			if (p->p_flag & SNOWAIT)
1346 				sp->sa_flags |= SA_NOCLDWAIT;
1347 			if ((p->p_flag & SJCTL) == 0)
1348 				sp->sa_flags |= SA_NOCLDSTOP;
1349 		}
1350 	}
1351 }
1352 
1353 #ifdef _SYSCALL32_IMPL
1354 void
1355 prgetaction32(proc_t *p, user_t *up, uint_t sig, struct sigaction32 *sp)
1356 {
1357 	int nsig = PROC_IS_BRANDED(curproc)? BROP(curproc)->b_nsig : NSIG;
1358 
1359 	bzero(sp, sizeof (*sp));
1360 
1361 	if (sig != 0 && (unsigned)sig < nsig) {
1362 		sp->sa_handler = (caddr32_t)(uintptr_t)up->u_signal[sig-1];
1363 		prassignset(&sp->sa_mask, &up->u_sigmask[sig-1]);
1364 		if (sigismember(&up->u_sigonstack, sig))
1365 			sp->sa_flags |= SA_ONSTACK;
1366 		if (sigismember(&up->u_sigresethand, sig))
1367 			sp->sa_flags |= SA_RESETHAND;
1368 		if (sigismember(&up->u_sigrestart, sig))
1369 			sp->sa_flags |= SA_RESTART;
1370 		if (sigismember(&p->p_siginfo, sig))
1371 			sp->sa_flags |= SA_SIGINFO;
1372 		if (sigismember(&up->u_signodefer, sig))
1373 			sp->sa_flags |= SA_NODEFER;
1374 		if (sig == SIGCLD) {
1375 			if (p->p_flag & SNOWAIT)
1376 				sp->sa_flags |= SA_NOCLDWAIT;
1377 			if ((p->p_flag & SJCTL) == 0)
1378 				sp->sa_flags |= SA_NOCLDSTOP;
1379 		}
1380 	}
1381 }
1382 #endif	/* _SYSCALL32_IMPL */
1383 
1384 /*
1385  * Count the number of segments in this process's address space.
1386  */
1387 int
1388 prnsegs(struct as *as, int reserved)
1389 {
1390 	int n = 0;
1391 	struct seg *seg;
1392 
1393 	ASSERT(as != &kas && AS_WRITE_HELD(as));
1394 
1395 	for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) {
1396 		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, reserved);
1397 		caddr_t saddr, naddr;
1398 		void *tmp = NULL;
1399 
1400 		if ((seg->s_flags & S_HOLE) != 0) {
1401 			continue;
1402 		}
1403 
1404 		for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1405 			(void) pr_getprot(seg, reserved, &tmp,
1406 			    &saddr, &naddr, eaddr);
1407 			if (saddr != naddr)
1408 				n++;
1409 		}
1410 
1411 		ASSERT(tmp == NULL);
1412 	}
1413 
1414 	return (n);
1415 }
1416 
1417 /*
1418  * Convert uint32_t to decimal string w/o leading zeros.
1419  * Add trailing null characters if 'len' is greater than string length.
1420  * Return the string length.
1421  */
1422 int
1423 pr_u32tos(uint32_t n, char *s, int len)
1424 {
1425 	char cbuf[11];		/* 32-bit unsigned integer fits in 10 digits */
1426 	char *cp = cbuf;
1427 	char *end = s + len;
1428 
1429 	do {
1430 		*cp++ = (char)(n % 10 + '0');
1431 		n /= 10;
1432 	} while (n);
1433 
1434 	len = (int)(cp - cbuf);
1435 
1436 	do {
1437 		*s++ = *--cp;
1438 	} while (cp > cbuf);
1439 
1440 	while (s < end)		/* optional pad */
1441 		*s++ = '\0';
1442 
1443 	return (len);
1444 }
1445 
1446 /*
1447  * Convert uint64_t to decimal string w/o leading zeros.
1448  * Return the string length.
1449  */
1450 static int
1451 pr_u64tos(uint64_t n, char *s)
1452 {
1453 	char cbuf[21];		/* 64-bit unsigned integer fits in 20 digits */
1454 	char *cp = cbuf;
1455 	int len;
1456 
1457 	do {
1458 		*cp++ = (char)(n % 10 + '0');
1459 		n /= 10;
1460 	} while (n);
1461 
1462 	len = (int)(cp - cbuf);
1463 
1464 	do {
1465 		*s++ = *--cp;
1466 	} while (cp > cbuf);
1467 
1468 	return (len);
1469 }
1470 
1471 file_t *
1472 pr_getf(proc_t *p, uint_t fd, short *flag)
1473 {
1474 	uf_entry_t *ufp;
1475 	uf_info_t *fip;
1476 	file_t *fp;
1477 
1478 	ASSERT(MUTEX_HELD(&p->p_lock) && (p->p_proc_flag & P_PR_LOCK));
1479 
1480 	fip = P_FINFO(p);
1481 
1482 	if (fd >= fip->fi_nfiles)
1483 		return (NULL);
1484 
1485 	mutex_exit(&p->p_lock);
1486 	mutex_enter(&fip->fi_lock);
1487 	UF_ENTER(ufp, fip, fd);
1488 	if ((fp = ufp->uf_file) != NULL && fp->f_count > 0) {
1489 		if (flag != NULL)
1490 			*flag = ufp->uf_flag;
1491 		ufp->uf_refcnt++;
1492 	} else {
1493 		fp = NULL;
1494 	}
1495 	UF_EXIT(ufp);
1496 	mutex_exit(&fip->fi_lock);
1497 	mutex_enter(&p->p_lock);
1498 
1499 	return (fp);
1500 }
1501 
1502 void
1503 pr_releasef(proc_t *p, uint_t fd)
1504 {
1505 	uf_entry_t *ufp;
1506 	uf_info_t *fip;
1507 
1508 	ASSERT(MUTEX_HELD(&p->p_lock) && (p->p_proc_flag & P_PR_LOCK));
1509 
1510 	fip = P_FINFO(p);
1511 
1512 	mutex_exit(&p->p_lock);
1513 	mutex_enter(&fip->fi_lock);
1514 	UF_ENTER(ufp, fip, fd);
1515 	ASSERT3U(ufp->uf_refcnt, >, 0);
1516 	ufp->uf_refcnt--;
1517 	UF_EXIT(ufp);
1518 	mutex_exit(&fip->fi_lock);
1519 	mutex_enter(&p->p_lock);
1520 }
1521 
1522 void
1523 pr_object_name(char *name, vnode_t *vp, struct vattr *vattr)
1524 {
1525 	char *s = name;
1526 	struct vfs *vfsp;
1527 	struct vfssw *vfsswp;
1528 
1529 	if ((vfsp = vp->v_vfsp) != NULL &&
1530 	    ((vfsswp = vfssw + vfsp->vfs_fstype), vfsswp->vsw_name) &&
1531 	    *vfsswp->vsw_name) {
1532 		(void) strcpy(s, vfsswp->vsw_name);
1533 		s += strlen(s);
1534 		*s++ = '.';
1535 	}
1536 	s += pr_u32tos(getmajor(vattr->va_fsid), s, 0);
1537 	*s++ = '.';
1538 	s += pr_u32tos(getminor(vattr->va_fsid), s, 0);
1539 	*s++ = '.';
1540 	s += pr_u64tos(vattr->va_nodeid, s);
1541 	*s++ = '\0';
1542 }
1543 
1544 struct seg *
1545 break_seg(proc_t *p)
1546 {
1547 	caddr_t addr = p->p_brkbase;
1548 	struct seg *seg;
1549 	struct vnode *vp;
1550 
1551 	if (p->p_brksize != 0)
1552 		addr += p->p_brksize - 1;
1553 	seg = as_segat(p->p_as, addr);
1554 	if (seg != NULL && seg->s_ops == &segvn_ops &&
1555 	    (SEGOP_GETVP(seg, seg->s_base, &vp) != 0 || vp == NULL))
1556 		return (seg);
1557 	return (NULL);
1558 }
1559 
1560 /*
1561  * Implementation of service functions to handle procfs generic chained
1562  * copyout buffers.
1563  */
1564 typedef struct pr_iobuf_list {
1565 	list_node_t	piol_link;	/* buffer linkage */
1566 	size_t		piol_size;	/* total size (header + data) */
1567 	size_t		piol_usedsize;	/* amount to copy out from this buf */
1568 } piol_t;
1569 
1570 #define	MAPSIZE	(64 * 1024)
1571 #define	PIOL_DATABUF(iol)	((void *)(&(iol)[1]))
1572 
1573 void
1574 pr_iol_initlist(list_t *iolhead, size_t itemsize, int n)
1575 {
1576 	piol_t	*iol;
1577 	size_t	initial_size = MIN(1, n) * itemsize;
1578 
1579 	list_create(iolhead, sizeof (piol_t), offsetof(piol_t, piol_link));
1580 
1581 	ASSERT(list_head(iolhead) == NULL);
1582 	ASSERT(itemsize < MAPSIZE - sizeof (*iol));
1583 	ASSERT(initial_size > 0);
1584 
1585 	/*
1586 	 * Someone creating chained copyout buffers may ask for less than
1587 	 * MAPSIZE if the amount of data to be buffered is known to be
1588 	 * smaller than that.
1589 	 * But in order to prevent involuntary self-denial of service,
1590 	 * the requested input size is clamped at MAPSIZE.
1591 	 */
1592 	initial_size = MIN(MAPSIZE, initial_size + sizeof (*iol));
1593 	iol = kmem_alloc(initial_size, KM_SLEEP);
1594 	list_insert_head(iolhead, iol);
1595 	iol->piol_usedsize = 0;
1596 	iol->piol_size = initial_size;
1597 }
1598 
1599 void *
1600 pr_iol_newbuf(list_t *iolhead, size_t itemsize)
1601 {
1602 	piol_t	*iol;
1603 	char	*new;
1604 
1605 	ASSERT(itemsize < MAPSIZE - sizeof (*iol));
1606 	ASSERT(list_head(iolhead) != NULL);
1607 
1608 	iol = (piol_t *)list_tail(iolhead);
1609 
1610 	if (iol->piol_size <
1611 	    iol->piol_usedsize + sizeof (*iol) + itemsize) {
1612 		/*
1613 		 * Out of space in the current buffer. Allocate more.
1614 		 */
1615 		piol_t *newiol;
1616 
1617 		newiol = kmem_alloc(MAPSIZE, KM_SLEEP);
1618 		newiol->piol_size = MAPSIZE;
1619 		newiol->piol_usedsize = 0;
1620 
1621 		list_insert_after(iolhead, iol, newiol);
1622 		iol = list_next(iolhead, iol);
1623 		ASSERT(iol == newiol);
1624 	}
1625 	new = (char *)PIOL_DATABUF(iol) + iol->piol_usedsize;
1626 	iol->piol_usedsize += itemsize;
1627 	bzero(new, itemsize);
1628 	return (new);
1629 }
1630 
1631 void
1632 pr_iol_freelist(list_t *iolhead)
1633 {
1634 	piol_t	*iol;
1635 
1636 	while ((iol = list_head(iolhead)) != NULL) {
1637 		list_remove(iolhead, iol);
1638 		kmem_free(iol, iol->piol_size);
1639 	}
1640 	list_destroy(iolhead);
1641 }
1642 
1643 int
1644 pr_iol_copyout_and_free(list_t *iolhead, caddr_t *tgt, int errin)
1645 {
1646 	int error = errin;
1647 	piol_t	*iol;
1648 
1649 	while ((iol = list_head(iolhead)) != NULL) {
1650 		list_remove(iolhead, iol);
1651 		if (!error) {
1652 			if (copyout(PIOL_DATABUF(iol), *tgt,
1653 			    iol->piol_usedsize))
1654 				error = EFAULT;
1655 			*tgt += iol->piol_usedsize;
1656 		}
1657 		kmem_free(iol, iol->piol_size);
1658 	}
1659 	list_destroy(iolhead);
1660 
1661 	return (error);
1662 }
1663 
1664 int
1665 pr_iol_uiomove_and_free(list_t *iolhead, uio_t *uiop, int errin)
1666 {
1667 	offset_t	off = uiop->uio_offset;
1668 	char		*base;
1669 	size_t		size;
1670 	piol_t		*iol;
1671 	int		error = errin;
1672 
1673 	while ((iol = list_head(iolhead)) != NULL) {
1674 		list_remove(iolhead, iol);
1675 		base = PIOL_DATABUF(iol);
1676 		size = iol->piol_usedsize;
1677 		if (off <= size && error == 0 && uiop->uio_resid > 0)
1678 			error = uiomove(base + off, size - off,
1679 			    UIO_READ, uiop);
1680 		off = MAX(0, off - (offset_t)size);
1681 		kmem_free(iol, iol->piol_size);
1682 	}
1683 	list_destroy(iolhead);
1684 
1685 	return (error);
1686 }
1687 
1688 /*
1689  * Return an array of structures with memory map information.
1690  * We allocate here; the caller must deallocate.
1691  */
1692 int
1693 prgetmap(proc_t *p, int reserved, list_t *iolhead)
1694 {
1695 	struct as *as = p->p_as;
1696 	prmap_t *mp;
1697 	struct seg *seg;
1698 	struct seg *brkseg, *stkseg;
1699 	struct vnode *vp;
1700 	struct vattr vattr;
1701 	uint_t prot;
1702 
1703 	ASSERT(as != &kas && AS_WRITE_HELD(as));
1704 
1705 	/*
1706 	 * Request an initial buffer size that doesn't waste memory
1707 	 * if the address space has only a small number of segments.
1708 	 */
1709 	pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree));
1710 
1711 	if ((seg = AS_SEGFIRST(as)) == NULL)
1712 		return (0);
1713 
1714 	brkseg = break_seg(p);
1715 	stkseg = as_segat(as, prgetstackbase(p));
1716 
1717 	do {
1718 		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, reserved);
1719 		caddr_t saddr, naddr;
1720 		void *tmp = NULL;
1721 
1722 		if ((seg->s_flags & S_HOLE) != 0) {
1723 			continue;
1724 		}
1725 
1726 		for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1727 			prot = pr_getprot(seg, reserved, &tmp,
1728 			    &saddr, &naddr, eaddr);
1729 			if (saddr == naddr)
1730 				continue;
1731 
1732 			mp = pr_iol_newbuf(iolhead, sizeof (*mp));
1733 
1734 			mp->pr_vaddr = (uintptr_t)saddr;
1735 			mp->pr_size = naddr - saddr;
1736 			mp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
1737 			mp->pr_mflags = 0;
1738 			if (prot & PROT_READ)
1739 				mp->pr_mflags |= MA_READ;
1740 			if (prot & PROT_WRITE)
1741 				mp->pr_mflags |= MA_WRITE;
1742 			if (prot & PROT_EXEC)
1743 				mp->pr_mflags |= MA_EXEC;
1744 			if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
1745 				mp->pr_mflags |= MA_SHARED;
1746 			if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
1747 				mp->pr_mflags |= MA_NORESERVE;
1748 			if (seg->s_ops == &segspt_shmops ||
1749 			    (seg->s_ops == &segvn_ops &&
1750 			    (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL)))
1751 				mp->pr_mflags |= MA_ANON;
1752 			if (seg == brkseg)
1753 				mp->pr_mflags |= MA_BREAK;
1754 			else if (seg == stkseg) {
1755 				mp->pr_mflags |= MA_STACK;
1756 				if (reserved) {
1757 					size_t maxstack =
1758 					    ((size_t)p->p_stk_ctl +
1759 					    PAGEOFFSET) & PAGEMASK;
1760 					mp->pr_vaddr =
1761 					    (uintptr_t)prgetstackbase(p) +
1762 					    p->p_stksize - maxstack;
1763 					mp->pr_size = (uintptr_t)naddr -
1764 					    mp->pr_vaddr;
1765 				}
1766 			}
1767 			if (seg->s_ops == &segspt_shmops)
1768 				mp->pr_mflags |= MA_ISM | MA_SHM;
1769 			mp->pr_pagesize = PAGESIZE;
1770 
1771 			/*
1772 			 * Manufacture a filename for the "object" directory.
1773 			 */
1774 			vattr.va_mask = AT_FSID|AT_NODEID;
1775 			if (seg->s_ops == &segvn_ops &&
1776 			    SEGOP_GETVP(seg, saddr, &vp) == 0 &&
1777 			    vp != NULL && vp->v_type == VREG &&
1778 			    VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
1779 				if (vp == p->p_exec)
1780 					(void) strcpy(mp->pr_mapname, "a.out");
1781 				else
1782 					pr_object_name(mp->pr_mapname,
1783 					    vp, &vattr);
1784 			}
1785 
1786 			/*
1787 			 * Get the SysV shared memory id, if any.
1788 			 */
1789 			if ((mp->pr_mflags & MA_SHARED) && p->p_segacct &&
1790 			    (mp->pr_shmid = shmgetid(p, seg->s_base)) !=
1791 			    SHMID_NONE) {
1792 				if (mp->pr_shmid == SHMID_FREE)
1793 					mp->pr_shmid = -1;
1794 
1795 				mp->pr_mflags |= MA_SHM;
1796 			} else {
1797 				mp->pr_shmid = -1;
1798 			}
1799 		}
1800 		ASSERT(tmp == NULL);
1801 	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);
1802 
1803 	return (0);
1804 }
1805 
1806 #ifdef _SYSCALL32_IMPL
1807 int
1808 prgetmap32(proc_t *p, int reserved, list_t *iolhead)
1809 {
1810 	struct as *as = p->p_as;
1811 	prmap32_t *mp;
1812 	struct seg *seg;
1813 	struct seg *brkseg, *stkseg;
1814 	struct vnode *vp;
1815 	struct vattr vattr;
1816 	uint_t prot;
1817 
1818 	ASSERT(as != &kas && AS_WRITE_HELD(as));
1819 
1820 	/*
1821 	 * Request an initial buffer size that doesn't waste memory
1822 	 * if the address space has only a small number of segments.
1823 	 */
1824 	pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree));
1825 
1826 	if ((seg = AS_SEGFIRST(as)) == NULL)
1827 		return (0);
1828 
1829 	brkseg = break_seg(p);
1830 	stkseg = as_segat(as, prgetstackbase(p));
1831 
1832 	do {
1833 		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, reserved);
1834 		caddr_t saddr, naddr;
1835 		void *tmp = NULL;
1836 
1837 		if ((seg->s_flags & S_HOLE) != 0) {
1838 			continue;
1839 		}
1840 
1841 		for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1842 			prot = pr_getprot(seg, reserved, &tmp,
1843 			    &saddr, &naddr, eaddr);
1844 			if (saddr == naddr)
1845 				continue;
1846 
1847 			mp = pr_iol_newbuf(iolhead, sizeof (*mp));
1848 
1849 			mp->pr_vaddr = (caddr32_t)(uintptr_t)saddr;
1850 			mp->pr_size = (size32_t)(naddr - saddr);
1851 			mp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
1852 			mp->pr_mflags = 0;
1853 			if (prot & PROT_READ)
1854 				mp->pr_mflags |= MA_READ;
1855 			if (prot & PROT_WRITE)
1856 				mp->pr_mflags |= MA_WRITE;
1857 			if (prot & PROT_EXEC)
1858 				mp->pr_mflags |= MA_EXEC;
1859 			if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
1860 				mp->pr_mflags |= MA_SHARED;
1861 			if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
1862 				mp->pr_mflags |= MA_NORESERVE;
1863 			if (seg->s_ops == &segspt_shmops ||
1864 			    (seg->s_ops == &segvn_ops &&
1865 			    (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL)))
1866 				mp->pr_mflags |= MA_ANON;
1867 			if (seg == brkseg)
1868 				mp->pr_mflags |= MA_BREAK;
1869 			else if (seg == stkseg) {
1870 				mp->pr_mflags |= MA_STACK;
1871 				if (reserved) {
1872 					size_t maxstack =
1873 					    ((size_t)p->p_stk_ctl +
1874 					    PAGEOFFSET) & PAGEMASK;
1875 					uintptr_t vaddr =
1876 					    (uintptr_t)prgetstackbase(p) +
1877 					    p->p_stksize - maxstack;
1878 					mp->pr_vaddr = (caddr32_t)vaddr;
1879 					mp->pr_size = (size32_t)
1880 					    ((uintptr_t)naddr - vaddr);
1881 				}
1882 			}
1883 			if (seg->s_ops == &segspt_shmops)
1884 				mp->pr_mflags |= MA_ISM | MA_SHM;
1885 			mp->pr_pagesize = PAGESIZE;
1886 
1887 			/*
1888 			 * Manufacture a filename for the "object" directory.
1889 			 */
1890 			vattr.va_mask = AT_FSID|AT_NODEID;
1891 			if (seg->s_ops == &segvn_ops &&
1892 			    SEGOP_GETVP(seg, saddr, &vp) == 0 &&
1893 			    vp != NULL && vp->v_type == VREG &&
1894 			    VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
1895 				if (vp == p->p_exec)
1896 					(void) strcpy(mp->pr_mapname, "a.out");
1897 				else
1898 					pr_object_name(mp->pr_mapname,
1899 					    vp, &vattr);
1900 			}
1901 
1902 			/*
1903 			 * Get the SysV shared memory id, if any.
1904 			 */
1905 			if ((mp->pr_mflags & MA_SHARED) && p->p_segacct &&
1906 			    (mp->pr_shmid = shmgetid(p, seg->s_base)) !=
1907 			    SHMID_NONE) {
1908 				if (mp->pr_shmid == SHMID_FREE)
1909 					mp->pr_shmid = -1;
1910 
1911 				mp->pr_mflags |= MA_SHM;
1912 			} else {
1913 				mp->pr_shmid = -1;
1914 			}
1915 		}
1916 		ASSERT(tmp == NULL);
1917 	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);
1918 
1919 	return (0);
1920 }
1921 #endif	/* _SYSCALL32_IMPL */
1922 
1923 /*
1924  * Return the size of the /proc page data file.
1925  */
1926 size_t
1927 prpdsize(struct as *as)
1928 {
1929 	struct seg *seg;
1930 	size_t size;
1931 
1932 	ASSERT(as != &kas && AS_WRITE_HELD(as));
1933 
1934 	if ((seg = AS_SEGFIRST(as)) == NULL)
1935 		return (0);
1936 
1937 	size = sizeof (prpageheader_t);
1938 	do {
1939 		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
1940 		caddr_t saddr, naddr;
1941 		void *tmp = NULL;
1942 		size_t npage;
1943 
1944 		if ((seg->s_flags & S_HOLE) != 0) {
1945 			continue;
1946 		}
1947 
1948 		for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1949 			(void) pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr);
1950 			if ((npage = (naddr - saddr) / PAGESIZE) != 0)
1951 				size += sizeof (prasmap_t) + round8(npage);
1952 		}
1953 		ASSERT(tmp == NULL);
1954 	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);
1955 
1956 	return (size);
1957 }
1958 
1959 #ifdef _SYSCALL32_IMPL
1960 size_t
1961 prpdsize32(struct as *as)
1962 {
1963 	struct seg *seg;
1964 	size_t size;
1965 
1966 	ASSERT(as != &kas && AS_WRITE_HELD(as));
1967 
1968 	if ((seg = AS_SEGFIRST(as)) == NULL)
1969 		return (0);
1970 
1971 	size = sizeof (prpageheader32_t);
1972 	do {
1973 		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
1974 		caddr_t saddr, naddr;
1975 		void *tmp = NULL;
1976 		size_t npage;
1977 
1978 		if ((seg->s_flags & S_HOLE) != 0) {
1979 			continue;
1980 		}
1981 
1982 		for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1983 			(void) pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr);
1984 			if ((npage = (naddr - saddr) / PAGESIZE) != 0)
1985 				size += sizeof (prasmap32_t) + round8(npage);
1986 		}
1987 		ASSERT(tmp == NULL);
1988 	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);
1989 
1990 	return (size);
1991 }
1992 #endif	/* _SYSCALL32_IMPL */
1993 
1994 /*
1995  * Read page data information.
1996  */
1997 int
1998 prpdread(proc_t *p, uint_t hatid, struct uio *uiop)
1999 {
2000 	struct as *as = p->p_as;
2001 	caddr_t buf;
2002 	size_t size;
2003 	prpageheader_t *php;
2004 	prasmap_t *pmp;
2005 	struct seg *seg;
2006 	int error;
2007 
2008 again:
2009 	AS_LOCK_ENTER(as, RW_WRITER);
2010 
2011 	if ((seg = AS_SEGFIRST(as)) == NULL) {
2012 		AS_LOCK_EXIT(as);
2013 		return (0);
2014 	}
2015 	size = prpdsize(as);
2016 	if (uiop->uio_resid < size) {
2017 		AS_LOCK_EXIT(as);
2018 		return (E2BIG);
2019 	}
2020 
2021 	buf = kmem_zalloc(size, KM_SLEEP);
2022 	php = (prpageheader_t *)buf;
2023 	pmp = (prasmap_t *)(buf + sizeof (prpageheader_t));
2024 
2025 	hrt2ts(gethrtime(), &php->pr_tstamp);
2026 	php->pr_nmap = 0;
2027 	php->pr_npage = 0;
2028 	do {
2029 		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
2030 		caddr_t saddr, naddr;
2031 		void *tmp = NULL;
2032 
2033 		if ((seg->s_flags & S_HOLE) != 0) {
2034 			continue;
2035 		}
2036 
2037 		for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
2038 			struct vnode *vp;
2039 			struct vattr vattr;
2040 			size_t len;
2041 			size_t npage;
2042 			uint_t prot;
2043 			uintptr_t next;
2044 
2045 			prot = pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr);
2046 			if ((len = (size_t)(naddr - saddr)) == 0)
2047 				continue;
2048 			npage = len / PAGESIZE;
2049 			next = (uintptr_t)(pmp + 1) + round8(npage);
2050 			/*
2051 			 * It's possible that the address space can change
2052 			 * subtlely even though we're holding as->a_lock
2053 			 * due to the nondeterminism of page_exists() in
2054 			 * the presence of asychronously flushed pages or
2055 			 * mapped files whose sizes are changing.
2056 			 * page_exists() may be called indirectly from
2057 			 * pr_getprot() by a SEGOP_INCORE() routine.
2058 			 * If this happens we need to make sure we don't
2059 			 * overrun the buffer whose size we computed based
2060 			 * on the initial iteration through the segments.
2061 			 * Once we've detected an overflow, we need to clean
2062 			 * up the temporary memory allocated in pr_getprot()
2063 			 * and retry. If there's a pending signal, we return
2064 			 * EINTR so that this thread can be dislodged if
2065 			 * a latent bug causes us to spin indefinitely.
2066 			 */
2067 			if (next > (uintptr_t)buf + size) {
2068 				pr_getprot_done(&tmp);
2069 				AS_LOCK_EXIT(as);
2070 
2071 				kmem_free(buf, size);
2072 
2073 				if (ISSIG(curthread, JUSTLOOKING))
2074 					return (EINTR);
2075 
2076 				goto again;
2077 			}
2078 
2079 			php->pr_nmap++;
2080 			php->pr_npage += npage;
2081 			pmp->pr_vaddr = (uintptr_t)saddr;
2082 			pmp->pr_npage = npage;
2083 			pmp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
2084 			pmp->pr_mflags = 0;
2085 			if (prot & PROT_READ)
2086 				pmp->pr_mflags |= MA_READ;
2087 			if (prot & PROT_WRITE)
2088 				pmp->pr_mflags |= MA_WRITE;
2089 			if (prot & PROT_EXEC)
2090 				pmp->pr_mflags |= MA_EXEC;
2091 			if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
2092 				pmp->pr_mflags |= MA_SHARED;
2093 			if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
2094 				pmp->pr_mflags |= MA_NORESERVE;
2095 			if (seg->s_ops == &segspt_shmops ||
2096 			    (seg->s_ops == &segvn_ops &&
2097 			    (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL)))
2098 				pmp->pr_mflags |= MA_ANON;
2099 			if (seg->s_ops == &segspt_shmops)
2100 				pmp->pr_mflags |= MA_ISM | MA_SHM;
2101 			pmp->pr_pagesize = PAGESIZE;
2102 			/*
2103 			 * Manufacture a filename for the "object" directory.
2104 			 */
2105 			vattr.va_mask = AT_FSID|AT_NODEID;
2106 			if (seg->s_ops == &segvn_ops &&
2107 			    SEGOP_GETVP(seg, saddr, &vp) == 0 &&
2108 			    vp != NULL && vp->v_type == VREG &&
2109 			    VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
2110 				if (vp == p->p_exec)
2111 					(void) strcpy(pmp->pr_mapname, "a.out");
2112 				else
2113 					pr_object_name(pmp->pr_mapname,
2114 					    vp, &vattr);
2115 			}
2116 
2117 			/*
2118 			 * Get the SysV shared memory id, if any.
2119 			 */
2120 			if ((pmp->pr_mflags & MA_SHARED) && p->p_segacct &&
2121 			    (pmp->pr_shmid = shmgetid(p, seg->s_base)) !=
2122 			    SHMID_NONE) {
2123 				if (pmp->pr_shmid == SHMID_FREE)
2124 					pmp->pr_shmid = -1;
2125 
2126 				pmp->pr_mflags |= MA_SHM;
2127 			} else {
2128 				pmp->pr_shmid = -1;
2129 			}
2130 
2131 			hat_getstat(as, saddr, len, hatid,
2132 			    (char *)(pmp + 1), HAT_SYNC_ZERORM);
2133 			pmp = (prasmap_t *)next;
2134 		}
2135 		ASSERT(tmp == NULL);
2136 	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);
2137 
2138 	AS_LOCK_EXIT(as);
2139 
2140 	ASSERT((uintptr_t)pmp <= (uintptr_t)buf + size);
2141 	error = uiomove(buf, (caddr_t)pmp - buf, UIO_READ, uiop);
2142 	kmem_free(buf, size);
2143 
2144 	return (error);
2145 }
2146 
2147 #ifdef _SYSCALL32_IMPL
2148 int
2149 prpdread32(proc_t *p, uint_t hatid, struct uio *uiop)
2150 {
2151 	struct as *as = p->p_as;
2152 	caddr_t buf;
2153 	size_t size;
2154 	prpageheader32_t *php;
2155 	prasmap32_t *pmp;
2156 	struct seg *seg;
2157 	int error;
2158 
2159 again:
2160 	AS_LOCK_ENTER(as, RW_WRITER);
2161 
2162 	if ((seg = AS_SEGFIRST(as)) == NULL) {
2163 		AS_LOCK_EXIT(as);
2164 		return (0);
2165 	}
2166 	size = prpdsize32(as);
2167 	if (uiop->uio_resid < size) {
2168 		AS_LOCK_EXIT(as);
2169 		return (E2BIG);
2170 	}
2171 
2172 	buf = kmem_zalloc(size, KM_SLEEP);
2173 	php = (prpageheader32_t *)buf;
2174 	pmp = (prasmap32_t *)(buf + sizeof (prpageheader32_t));
2175 
2176 	hrt2ts32(gethrtime(), &php->pr_tstamp);
2177 	php->pr_nmap = 0;
2178 	php->pr_npage = 0;
2179 	do {
2180 		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
2181 		caddr_t saddr, naddr;
2182 		void *tmp = NULL;
2183 
2184 		if ((seg->s_flags & S_HOLE) != 0) {
2185 			continue;
2186 		}
2187 
2188 		for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
2189 			struct vnode *vp;
2190 			struct vattr vattr;
2191 			size_t len;
2192 			size_t npage;
2193 			uint_t prot;
2194 			uintptr_t next;
2195 
2196 			prot = pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr);
2197 			if ((len = (size_t)(naddr - saddr)) == 0)
2198 				continue;
2199 			npage = len / PAGESIZE;
2200 			next = (uintptr_t)(pmp + 1) + round8(npage);
2201 			/*
2202 			 * It's possible that the address space can change
2203 			 * subtlely even though we're holding as->a_lock
2204 			 * due to the nondeterminism of page_exists() in
2205 			 * the presence of asychronously flushed pages or
2206 			 * mapped files whose sizes are changing.
2207 			 * page_exists() may be called indirectly from
2208 			 * pr_getprot() by a SEGOP_INCORE() routine.
2209 			 * If this happens we need to make sure we don't
2210 			 * overrun the buffer whose size we computed based
2211 			 * on the initial iteration through the segments.
2212 			 * Once we've detected an overflow, we need to clean
2213 			 * up the temporary memory allocated in pr_getprot()
2214 			 * and retry. If there's a pending signal, we return
2215 			 * EINTR so that this thread can be dislodged if
2216 			 * a latent bug causes us to spin indefinitely.
2217 			 */
2218 			if (next > (uintptr_t)buf + size) {
2219 				pr_getprot_done(&tmp);
2220 				AS_LOCK_EXIT(as);
2221 
2222 				kmem_free(buf, size);
2223 
2224 				if (ISSIG(curthread, JUSTLOOKING))
2225 					return (EINTR);
2226 
2227 				goto again;
2228 			}
2229 
2230 			php->pr_nmap++;
2231 			php->pr_npage += npage;
2232 			pmp->pr_vaddr = (caddr32_t)(uintptr_t)saddr;
2233 			pmp->pr_npage = (size32_t)npage;
2234 			pmp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
2235 			pmp->pr_mflags = 0;
2236 			if (prot & PROT_READ)
2237 				pmp->pr_mflags |= MA_READ;
2238 			if (prot & PROT_WRITE)
2239 				pmp->pr_mflags |= MA_WRITE;
2240 			if (prot & PROT_EXEC)
2241 				pmp->pr_mflags |= MA_EXEC;
2242 			if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
2243 				pmp->pr_mflags |= MA_SHARED;
2244 			if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
2245 				pmp->pr_mflags |= MA_NORESERVE;
2246 			if (seg->s_ops == &segspt_shmops ||
2247 			    (seg->s_ops == &segvn_ops &&
2248 			    (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL)))
2249 				pmp->pr_mflags |= MA_ANON;
2250 			if (seg->s_ops == &segspt_shmops)
2251 				pmp->pr_mflags |= MA_ISM | MA_SHM;
2252 			pmp->pr_pagesize = PAGESIZE;
2253 			/*
2254 			 * Manufacture a filename for the "object" directory.
2255 			 */
2256 			vattr.va_mask = AT_FSID|AT_NODEID;
2257 			if (seg->s_ops == &segvn_ops &&
2258 			    SEGOP_GETVP(seg, saddr, &vp) == 0 &&
2259 			    vp != NULL && vp->v_type == VREG &&
2260 			    VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
2261 				if (vp == p->p_exec)
2262 					(void) strcpy(pmp->pr_mapname, "a.out");
2263 				else
2264 					pr_object_name(pmp->pr_mapname,
2265 					    vp, &vattr);
2266 			}
2267 
2268 			/*
2269 			 * Get the SysV shared memory id, if any.
2270 			 */
2271 			if ((pmp->pr_mflags & MA_SHARED) && p->p_segacct &&
2272 			    (pmp->pr_shmid = shmgetid(p, seg->s_base)) !=
2273 			    SHMID_NONE) {
2274 				if (pmp->pr_shmid == SHMID_FREE)
2275 					pmp->pr_shmid = -1;
2276 
2277 				pmp->pr_mflags |= MA_SHM;
2278 			} else {
2279 				pmp->pr_shmid = -1;
2280 			}
2281 
2282 			hat_getstat(as, saddr, len, hatid,
2283 			    (char *)(pmp + 1), HAT_SYNC_ZERORM);
2284 			pmp = (prasmap32_t *)next;
2285 		}
2286 		ASSERT(tmp == NULL);
2287 	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);
2288 
2289 	AS_LOCK_EXIT(as);
2290 
2291 	ASSERT((uintptr_t)pmp <= (uintptr_t)buf + size);
2292 	error = uiomove(buf, (caddr_t)pmp - buf, UIO_READ, uiop);
2293 	kmem_free(buf, size);
2294 
2295 	return (error);
2296 }
2297 #endif	/* _SYSCALL32_IMPL */
2298 
2299 ushort_t
2300 prgetpctcpu(uint64_t pct)
2301 {
2302 	/*
2303 	 * The value returned will be relevant in the zone of the examiner,
2304 	 * which may not be the same as the zone which performed the procfs
2305 	 * mount.
2306 	 */
2307 	int nonline = zone_ncpus_online_get(curproc->p_zone);
2308 
2309 	/*
2310 	 * Prorate over online cpus so we don't exceed 100%
2311 	 */
2312 	if (nonline > 1)
2313 		pct /= nonline;
2314 	pct >>= 16;		/* convert to 16-bit scaled integer */
2315 	if (pct > 0x8000)	/* might happen, due to rounding */
2316 		pct = 0x8000;
2317 	return ((ushort_t)pct);
2318 }
2319 
2320 /*
2321  * Return information used by ps(1).
2322  */
2323 void
2324 prgetpsinfo(proc_t *p, psinfo_t *psp)
2325 {
2326 	kthread_t *t;
2327 	struct cred *cred;
2328 	hrtime_t hrutime, hrstime;
2329 
2330 	ASSERT(MUTEX_HELD(&p->p_lock));
2331 
2332 	if ((t = prchoose(p)) == NULL)	/* returns locked thread */
2333 		bzero(psp, sizeof (*psp));
2334 	else {
2335 		thread_unlock(t);
2336 		bzero(psp, sizeof (*psp) - sizeof (psp->pr_lwp));
2337 	}
2338 
2339 	/*
2340 	 * only export SSYS and SMSACCT; everything else is off-limits to
2341 	 * userland apps.
2342 	 */
2343 	psp->pr_flag = p->p_flag & (SSYS | SMSACCT);
2344 	psp->pr_nlwp = p->p_lwpcnt;
2345 	psp->pr_nzomb = p->p_zombcnt;
2346 	mutex_enter(&p->p_crlock);
2347 	cred = p->p_cred;
2348 	psp->pr_uid = crgetruid(cred);
2349 	psp->pr_euid = crgetuid(cred);
2350 	psp->pr_gid = crgetrgid(cred);
2351 	psp->pr_egid = crgetgid(cred);
2352 	mutex_exit(&p->p_crlock);
2353 	psp->pr_pid = p->p_pid;
2354 	if (curproc->p_zone->zone_id != GLOBAL_ZONEID &&
2355 	    (p->p_flag & SZONETOP)) {
2356 		ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID);
2357 		/*
2358 		 * Inside local zones, fake zsched's pid as parent pids for
2359 		 * processes which reference processes outside of the zone.
2360 		 */
2361 		psp->pr_ppid = curproc->p_zone->zone_zsched->p_pid;
2362 	} else {
2363 		psp->pr_ppid = p->p_ppid;
2364 	}
2365 	psp->pr_pgid = p->p_pgrp;
2366 	psp->pr_sid = p->p_sessp->s_sid;
2367 	psp->pr_taskid = p->p_task->tk_tkid;
2368 	psp->pr_projid = p->p_task->tk_proj->kpj_id;
2369 	psp->pr_poolid = p->p_pool->pool_id;
2370 	psp->pr_zoneid = p->p_zone->zone_id;
2371 	if ((psp->pr_contract = PRCTID(p)) == 0)
2372 		psp->pr_contract = -1;
2373 	psp->pr_addr = (uintptr_t)prgetpsaddr(p);
2374 	switch (p->p_model) {
2375 	case DATAMODEL_ILP32:
2376 		psp->pr_dmodel = PR_MODEL_ILP32;
2377 		break;
2378 	case DATAMODEL_LP64:
2379 		psp->pr_dmodel = PR_MODEL_LP64;
2380 		break;
2381 	}
2382 	hrutime = mstate_aggr_state(p, LMS_USER);
2383 	hrstime = mstate_aggr_state(p, LMS_SYSTEM);
2384 	hrt2ts((hrutime + hrstime), &psp->pr_time);
2385 	TICK_TO_TIMESTRUC(p->p_cutime + p->p_cstime, &psp->pr_ctime);
2386 
2387 	if (t == NULL) {
2388 		int wcode = p->p_wcode;		/* must be atomic read */
2389 
2390 		if (wcode)
2391 			psp->pr_wstat = wstat(wcode, p->p_wdata);
2392 		psp->pr_ttydev = PRNODEV;
2393 		psp->pr_lwp.pr_state = SZOMB;
2394 		psp->pr_lwp.pr_sname = 'Z';
2395 		psp->pr_lwp.pr_bindpro = PBIND_NONE;
2396 		psp->pr_lwp.pr_bindpset = PS_NONE;
2397 	} else {
2398 		user_t *up = PTOU(p);
2399 		struct as *as;
2400 		dev_t d;
2401 		extern dev_t rwsconsdev, rconsdev, uconsdev;
2402 
2403 		d = cttydev(p);
2404 		/*
2405 		 * If the controlling terminal is the real
2406 		 * or workstation console device, map to what the
2407 		 * user thinks is the console device. Handle case when
2408 		 * rwsconsdev or rconsdev is set to NODEV for Starfire.
2409 		 */
2410 		if ((d == rwsconsdev || d == rconsdev) && d != NODEV)
2411 			d = uconsdev;
2412 		psp->pr_ttydev = (d == NODEV) ? PRNODEV : d;
2413 		psp->pr_start = up->u_start;
2414 		bcopy(up->u_comm, psp->pr_fname,
2415 		    MIN(sizeof (up->u_comm), sizeof (psp->pr_fname)-1));
2416 		bcopy(up->u_psargs, psp->pr_psargs,
2417 		    MIN(PRARGSZ-1, PSARGSZ));
2418 		psp->pr_argc = up->u_argc;
2419 		psp->pr_argv = up->u_argv;
2420 		psp->pr_envp = up->u_envp;
2421 
2422 		/* get the chosen lwp's lwpsinfo */
2423 		prgetlwpsinfo(t, &psp->pr_lwp);
2424 
2425 		/* compute %cpu for the process */
2426 		if (p->p_lwpcnt == 1)
2427 			psp->pr_pctcpu = psp->pr_lwp.pr_pctcpu;
2428 		else {
2429 			uint64_t pct = 0;
2430 			hrtime_t cur_time = gethrtime_unscaled();
2431 
2432 			t = p->p_tlist;
2433 			do {
2434 				pct += cpu_update_pct(t, cur_time);
2435 			} while ((t = t->t_forw) != p->p_tlist);
2436 
2437 			psp->pr_pctcpu = prgetpctcpu(pct);
2438 		}
2439 		if ((p->p_flag & SSYS) || (as = p->p_as) == &kas) {
2440 			psp->pr_size = 0;
2441 			psp->pr_rssize = 0;
2442 		} else {
2443 			mutex_exit(&p->p_lock);
2444 			AS_LOCK_ENTER(as, RW_READER);
2445 			psp->pr_size = btopr(as->a_resvsize) *
2446 			    (PAGESIZE / 1024);
2447 			psp->pr_rssize = rm_asrss(as) * (PAGESIZE / 1024);
2448 			psp->pr_pctmem = rm_pctmemory(as);
2449 			AS_LOCK_EXIT(as);
2450 			mutex_enter(&p->p_lock);
2451 		}
2452 	}
2453 }
2454 
2455 static size_t
2456 prfdinfomisc(list_t *data, uint_t type, const void *val, size_t vlen)
2457 {
2458 	pr_misc_header_t *misc;
2459 	size_t len;
2460 
2461 	len = PRFDINFO_ROUNDUP(sizeof (*misc) + vlen);
2462 
2463 	if (data != NULL) {
2464 		misc = pr_iol_newbuf(data, len);
2465 		misc->pr_misc_type = type;
2466 		misc->pr_misc_size = len;
2467 		misc++;
2468 		bcopy((char *)val, (char *)misc, vlen);
2469 	}
2470 
2471 	return (len);
2472 }
2473 
2474 /*
2475  * There's no elegant way to determine if a character device
2476  * supports TLI, so just check a hardcoded list of known TLI
2477  * devices.
2478  */
2479 
2480 static boolean_t
2481 pristli(vnode_t *vp)
2482 {
2483 	static const char *tlidevs[] = {
2484 	    "udp", "udp6", "tcp", "tcp6"
2485 	};
2486 	char *devname;
2487 	uint_t i;
2488 
2489 	ASSERT(vp != NULL);
2490 
2491 	if (vp->v_type != VCHR || vp->v_stream == NULL || vp->v_rdev == 0)
2492 		return (B_FALSE);
2493 
2494 	if ((devname = mod_major_to_name(getmajor(vp->v_rdev))) == NULL)
2495 		return (B_FALSE);
2496 
2497 	for (i = 0; i < ARRAY_SIZE(tlidevs); i++) {
2498 		if (strcmp(devname, tlidevs[i]) == 0)
2499 			return (B_TRUE);
2500 	}
2501 
2502 	return (B_FALSE);
2503 }
2504 
2505 static size_t
2506 prfdinfopath(proc_t *p, vnode_t *vp, list_t *data, cred_t *cred)
2507 {
2508 	char *pathname;
2509 	size_t pathlen;
2510 	size_t sz = 0;
2511 
2512 	/*
2513 	 * The global zone's path to a file in a non-global zone can exceed
2514 	 * MAXPATHLEN.
2515 	 */
2516 	pathlen = MAXPATHLEN * 2 + 1;
2517 	pathname = kmem_alloc(pathlen, KM_SLEEP);
2518 
2519 	if (vnodetopath(NULL, vp, pathname, pathlen, cred) == 0) {
2520 		sz += prfdinfomisc(data, PR_PATHNAME,
2521 		    pathname, strlen(pathname) + 1);
2522 	}
2523 
2524 	kmem_free(pathname, pathlen);
2525 
2526 	return (sz);
2527 }
2528 
2529 static size_t
2530 prfdinfotlisockopt(vnode_t *vp, list_t *data, cred_t *cred)
2531 {
2532 	strcmd_t strcmd;
2533 	int32_t rval;
2534 	size_t sz = 0;
2535 
2536 	strcmd.sc_cmd = TI_GETMYNAME;
2537 	strcmd.sc_timeout = 1;
2538 	strcmd.sc_len = STRCMDBUFSIZE;
2539 
2540 	if (VOP_IOCTL(vp, _I_CMD, (intptr_t)&strcmd, FKIOCTL, cred,
2541 	    &rval, NULL) == 0 && strcmd.sc_len > 0) {
2542 		sz += prfdinfomisc(data, PR_SOCKETNAME, strcmd.sc_buf,
2543 		    strcmd.sc_len);
2544 	}
2545 
2546 	strcmd.sc_cmd = TI_GETPEERNAME;
2547 	strcmd.sc_timeout = 1;
2548 	strcmd.sc_len = STRCMDBUFSIZE;
2549 
2550 	if (VOP_IOCTL(vp, _I_CMD, (intptr_t)&strcmd, FKIOCTL, cred,
2551 	    &rval, NULL) == 0 && strcmd.sc_len > 0) {
2552 		sz += prfdinfomisc(data, PR_PEERSOCKNAME, strcmd.sc_buf,
2553 		    strcmd.sc_len);
2554 	}
2555 
2556 	return (sz);
2557 }
2558 
2559 static size_t
2560 prfdinfosockopt(vnode_t *vp, list_t *data, cred_t *cred)
2561 {
2562 	sonode_t *so;
2563 	socklen_t vlen;
2564 	size_t sz = 0;
2565 	uint_t i;
2566 
2567 	if (vp->v_stream != NULL) {
2568 		so = VTOSO(vp->v_stream->sd_vnode);
2569 
2570 		if (so->so_version == SOV_STREAM)
2571 			so = NULL;
2572 	} else {
2573 		so = VTOSO(vp);
2574 	}
2575 
2576 	if (so == NULL)
2577 		return (0);
2578 
2579 	DTRACE_PROBE1(sonode, sonode_t *, so);
2580 
2581 	/* prmisc - PR_SOCKETNAME */
2582 
2583 	struct sockaddr_storage buf;
2584 	struct sockaddr *name = (struct sockaddr *)&buf;
2585 
2586 	vlen = sizeof (buf);
2587 	if (SOP_GETSOCKNAME(so, name, &vlen, cred) == 0 && vlen > 0)
2588 		sz += prfdinfomisc(data, PR_SOCKETNAME, name, vlen);
2589 
2590 	/* prmisc - PR_PEERSOCKNAME */
2591 
2592 	vlen = sizeof (buf);
2593 	if (SOP_GETPEERNAME(so, name, &vlen, B_FALSE, cred) == 0 && vlen > 0)
2594 		sz += prfdinfomisc(data, PR_PEERSOCKNAME, name, vlen);
2595 
2596 	/* prmisc - PR_SOCKOPTS_BOOL_OPTS */
2597 
2598 	static struct boolopt {
2599 		int		level;
2600 		int		opt;
2601 		int		bopt;
2602 	} boolopts[] = {
2603 		{ SOL_SOCKET, SO_DEBUG,		PR_SO_DEBUG },
2604 		{ SOL_SOCKET, SO_REUSEADDR,	PR_SO_REUSEADDR },
2605 #ifdef SO_REUSEPORT
2606 		/* SmartOS and OmniOS have SO_REUSEPORT */
2607 		{ SOL_SOCKET, SO_REUSEPORT,	PR_SO_REUSEPORT },
2608 #endif
2609 		{ SOL_SOCKET, SO_KEEPALIVE,	PR_SO_KEEPALIVE },
2610 		{ SOL_SOCKET, SO_DONTROUTE,	PR_SO_DONTROUTE },
2611 		{ SOL_SOCKET, SO_BROADCAST,	PR_SO_BROADCAST },
2612 		{ SOL_SOCKET, SO_OOBINLINE,	PR_SO_OOBINLINE },
2613 		{ SOL_SOCKET, SO_DGRAM_ERRIND,	PR_SO_DGRAM_ERRIND },
2614 		{ SOL_SOCKET, SO_ALLZONES,	PR_SO_ALLZONES },
2615 		{ SOL_SOCKET, SO_MAC_EXEMPT,	PR_SO_MAC_EXEMPT },
2616 		{ SOL_SOCKET, SO_MAC_IMPLICIT,	PR_SO_MAC_IMPLICIT },
2617 		{ SOL_SOCKET, SO_EXCLBIND,	PR_SO_EXCLBIND },
2618 		{ SOL_SOCKET, SO_VRRP,		PR_SO_VRRP },
2619 		{ IPPROTO_UDP, UDP_NAT_T_ENDPOINT,
2620 		    PR_UDP_NAT_T_ENDPOINT }
2621 	};
2622 	prsockopts_bool_opts_t opts;
2623 	int val;
2624 
2625 	if (data != NULL) {
2626 		opts.prsock_bool_opts = 0;
2627 
2628 		for (i = 0; i < ARRAY_SIZE(boolopts); i++) {
2629 			vlen = sizeof (val);
2630 			if (SOP_GETSOCKOPT(so, boolopts[i].level,
2631 			    boolopts[i].opt, &val, &vlen, 0, cred) == 0 &&
2632 			    val != 0) {
2633 				opts.prsock_bool_opts |= boolopts[i].bopt;
2634 			}
2635 		}
2636 	}
2637 
2638 	sz += prfdinfomisc(data, PR_SOCKOPTS_BOOL_OPTS, &opts, sizeof (opts));
2639 
2640 	/* prmisc - PR_SOCKOPT_LINGER */
2641 
2642 	struct linger l;
2643 
2644 	vlen = sizeof (l);
2645 	if (SOP_GETSOCKOPT(so, SOL_SOCKET, SO_LINGER, &l, &vlen,
2646 	    0, cred) == 0 && vlen > 0) {
2647 		sz += prfdinfomisc(data, PR_SOCKOPT_LINGER, &l, vlen);
2648 	}
2649 
2650 	/* prmisc - PR_SOCKOPT_* int types */
2651 
2652 	static struct sopt {
2653 		int		level;
2654 		int		opt;
2655 		int		bopt;
2656 	} sopts[] = {
2657 		{ SOL_SOCKET, SO_TYPE,		PR_SOCKOPT_TYPE },
2658 		{ SOL_SOCKET, SO_SNDBUF,	PR_SOCKOPT_SNDBUF },
2659 		{ SOL_SOCKET, SO_RCVBUF,	PR_SOCKOPT_RCVBUF }
2660 	};
2661 
2662 	for (i = 0; i < ARRAY_SIZE(sopts); i++) {
2663 		vlen = sizeof (val);
2664 		if (SOP_GETSOCKOPT(so, sopts[i].level, sopts[i].opt,
2665 		    &val, &vlen, 0, cred) == 0 && vlen > 0) {
2666 			sz += prfdinfomisc(data, sopts[i].bopt, &val, vlen);
2667 		}
2668 	}
2669 
2670 	/* prmisc - PR_SOCKOPT_IP_NEXTHOP */
2671 
2672 	in_addr_t nexthop_val;
2673 
2674 	vlen = sizeof (nexthop_val);
2675 	if (SOP_GETSOCKOPT(so, IPPROTO_IP, IP_NEXTHOP,
2676 	    &nexthop_val, &vlen, 0, cred) == 0 && vlen > 0) {
2677 		sz += prfdinfomisc(data, PR_SOCKOPT_IP_NEXTHOP,
2678 		    &nexthop_val, vlen);
2679 	}
2680 
2681 	/* prmisc - PR_SOCKOPT_IPV6_NEXTHOP */
2682 
2683 	struct sockaddr_in6 nexthop6_val;
2684 
2685 	vlen = sizeof (nexthop6_val);
2686 	if (SOP_GETSOCKOPT(so, IPPROTO_IPV6, IPV6_NEXTHOP,
2687 	    &nexthop6_val, &vlen, 0, cred) == 0 && vlen > 0) {
2688 		sz += prfdinfomisc(data, PR_SOCKOPT_IPV6_NEXTHOP,
2689 		    &nexthop6_val, vlen);
2690 	}
2691 
2692 	/* prmisc - PR_SOCKOPT_TCP_CONGESTION */
2693 
2694 	char cong[CC_ALGO_NAME_MAX];
2695 
2696 	vlen = sizeof (cong);
2697 	if (SOP_GETSOCKOPT(so, IPPROTO_TCP, TCP_CONGESTION,
2698 	    &cong, &vlen, 0, cred) == 0 && vlen > 0) {
2699 		sz += prfdinfomisc(data, PR_SOCKOPT_TCP_CONGESTION, cong, vlen);
2700 	}
2701 
2702 	/* prmisc - PR_SOCKFILTERS_PRIV */
2703 
2704 	struct fil_info fi;
2705 
2706 	vlen = sizeof (fi);
2707 	if (SOP_GETSOCKOPT(so, SOL_FILTER, FIL_LIST,
2708 	    &fi, &vlen, 0, cred) == 0 && vlen != 0) {
2709 		pr_misc_header_t *misc;
2710 		size_t len;
2711 
2712 		/*
2713 		 * We limit the number of returned filters to 32.
2714 		 * This is the maximum number that pfiles will print
2715 		 * anyway.
2716 		 */
2717 		vlen = MIN(32, fi.fi_pos + 1);
2718 		vlen *= sizeof (fi);
2719 
2720 		len = PRFDINFO_ROUNDUP(sizeof (*misc) + vlen);
2721 		sz += len;
2722 
2723 		if (data != NULL) {
2724 			/*
2725 			 * So that the filter list can be built incrementally,
2726 			 * prfdinfomisc() is not used here. Instead we
2727 			 * allocate a buffer directly on the copyout list using
2728 			 * pr_iol_newbuf()
2729 			 */
2730 			misc = pr_iol_newbuf(data, len);
2731 			misc->pr_misc_type = PR_SOCKFILTERS_PRIV;
2732 			misc->pr_misc_size = len;
2733 			misc++;
2734 			len = vlen;
2735 			if (SOP_GETSOCKOPT(so, SOL_FILTER, FIL_LIST,
2736 			    misc, &vlen, 0, cred) == 0) {
2737 				/*
2738 				 * In case the number of filters has reduced
2739 				 * since the first call, explicitly zero out
2740 				 * any unpopulated space.
2741 				 */
2742 				if (vlen < len)
2743 					bzero(misc + vlen, len - vlen);
2744 			} else {
2745 				/* Something went wrong, zero out the result */
2746 				bzero(misc, vlen);
2747 			}
2748 		}
2749 	}
2750 
2751 	return (sz);
2752 }
2753 
2754 typedef struct prfdinfo_nm_path_cbdata {
2755 	proc_t		*nmp_p;
2756 	u_offset_t	nmp_sz;
2757 	list_t		*nmp_data;
2758 } prfdinfo_nm_path_cbdata_t;
2759 
2760 static int
2761 prfdinfo_nm_path(const struct namenode *np, cred_t *cred, void *arg)
2762 {
2763 	prfdinfo_nm_path_cbdata_t *cb = arg;
2764 
2765 	cb->nmp_sz += prfdinfopath(cb->nmp_p, np->nm_vnode, cb->nmp_data, cred);
2766 
2767 	return (0);
2768 }
2769 
2770 u_offset_t
2771 prgetfdinfosize(proc_t *p, vnode_t *vp, cred_t *cred)
2772 {
2773 	u_offset_t sz;
2774 
2775 	/*
2776 	 * All fdinfo files will be at least this big -
2777 	 * sizeof fdinfo struct + zero length trailer
2778 	 */
2779 	sz = offsetof(prfdinfo_t, pr_misc) + sizeof (pr_misc_header_t);
2780 
2781 	/* Pathname */
2782 	switch (vp->v_type) {
2783 	case VDOOR: {
2784 		prfdinfo_nm_path_cbdata_t cb = {
2785 			.nmp_p		= p,
2786 			.nmp_data	= NULL,
2787 			.nmp_sz		= 0
2788 		};
2789 
2790 		(void) nm_walk_mounts(vp, prfdinfo_nm_path, cred, &cb);
2791 		sz += cb.nmp_sz;
2792 		break;
2793 	}
2794 	case VSOCK:
2795 		break;
2796 	default:
2797 		sz += prfdinfopath(p, vp, NULL, cred);
2798 	}
2799 
2800 	/* Socket options */
2801 	if (vp->v_type == VSOCK)
2802 		sz += prfdinfosockopt(vp, NULL, cred);
2803 
2804 	/* TLI/XTI sockets */
2805 	if (pristli(vp))
2806 		sz += prfdinfotlisockopt(vp, NULL, cred);
2807 
2808 	return (sz);
2809 }
2810 
2811 int
2812 prgetfdinfo(proc_t *p, vnode_t *vp, prfdinfo_t *fdinfo, cred_t *cred,
2813     cred_t *file_cred, list_t *data)
2814 {
2815 	vattr_t vattr;
2816 	int error;
2817 
2818 	/*
2819 	 * The buffer has been initialised to zero by pr_iol_newbuf().
2820 	 * Initialise defaults for any values that should not default to zero.
2821 	 */
2822 	fdinfo->pr_uid = (uid_t)-1;
2823 	fdinfo->pr_gid = (gid_t)-1;
2824 	fdinfo->pr_size = -1;
2825 	fdinfo->pr_locktype = F_UNLCK;
2826 	fdinfo->pr_lockpid = -1;
2827 	fdinfo->pr_locksysid = -1;
2828 	fdinfo->pr_peerpid = -1;
2829 
2830 	/* Offset */
2831 
2832 	/*
2833 	 * pr_offset has already been set from the underlying file_t.
2834 	 * Check if it is plausible and reset to -1 if not.
2835 	 */
2836 	if (fdinfo->pr_offset != -1 &&
2837 	    VOP_SEEK(vp, 0, (offset_t *)&fdinfo->pr_offset, NULL) != 0)
2838 		fdinfo->pr_offset = -1;
2839 
2840 	/*
2841 	 * Attributes
2842 	 *
2843 	 * We have two cred_t structures available here.
2844 	 * 'cred' is the caller's credential, and 'file_cred' is the credential
2845 	 * for the file being inspected.
2846 	 *
2847 	 * When looking up the file attributes, file_cred is used in order
2848 	 * that the correct ownership is set for doors and FIFOs. Since the
2849 	 * caller has permission to read the fdinfo file in proc, this does
2850 	 * not expose any additional information.
2851 	 */
2852 	vattr.va_mask = AT_STAT;
2853 	if (VOP_GETATTR(vp, &vattr, 0, file_cred, NULL) == 0) {
2854 		fdinfo->pr_major = getmajor(vattr.va_fsid);
2855 		fdinfo->pr_minor = getminor(vattr.va_fsid);
2856 		fdinfo->pr_rmajor = getmajor(vattr.va_rdev);
2857 		fdinfo->pr_rminor = getminor(vattr.va_rdev);
2858 		fdinfo->pr_ino = (ino64_t)vattr.va_nodeid;
2859 		fdinfo->pr_size = (off64_t)vattr.va_size;
2860 		fdinfo->pr_mode = VTTOIF(vattr.va_type) | vattr.va_mode;
2861 		fdinfo->pr_uid = vattr.va_uid;
2862 		fdinfo->pr_gid = vattr.va_gid;
2863 		if (vp->v_type == VSOCK)
2864 			fdinfo->pr_fileflags |= sock_getfasync(vp);
2865 	}
2866 
2867 	/* locks */
2868 
2869 	flock64_t bf;
2870 
2871 	bzero(&bf, sizeof (bf));
2872 	bf.l_type = F_WRLCK;
2873 
2874 	if (VOP_FRLOCK(vp, F_GETLK, &bf,
2875 	    (uint16_t)(fdinfo->pr_fileflags & 0xffff), 0, NULL,
2876 	    cred, NULL) == 0 && bf.l_type != F_UNLCK) {
2877 		fdinfo->pr_locktype = bf.l_type;
2878 		fdinfo->pr_lockpid = bf.l_pid;
2879 		fdinfo->pr_locksysid = bf.l_sysid;
2880 	}
2881 
2882 	/* peer cred */
2883 
2884 	k_peercred_t kpc;
2885 
2886 	switch (vp->v_type) {
2887 	case VFIFO:
2888 	case VSOCK: {
2889 		int32_t rval;
2890 
2891 		error = VOP_IOCTL(vp, _I_GETPEERCRED, (intptr_t)&kpc,
2892 		    FKIOCTL, cred, &rval, NULL);
2893 		break;
2894 	}
2895 	case VCHR: {
2896 		struct strioctl strioc;
2897 		int32_t rval;
2898 
2899 		if (vp->v_stream == NULL) {
2900 			error = ENOTSUP;
2901 			break;
2902 		}
2903 		strioc.ic_cmd = _I_GETPEERCRED;
2904 		strioc.ic_timout = INFTIM;
2905 		strioc.ic_len = (int)sizeof (k_peercred_t);
2906 		strioc.ic_dp = (char *)&kpc;
2907 
2908 		error = strdoioctl(vp->v_stream, &strioc, FNATIVE | FKIOCTL,
2909 		    STR_NOSIG | K_TO_K, cred, &rval);
2910 		break;
2911 	}
2912 	default:
2913 		error = ENOTSUP;
2914 		break;
2915 	}
2916 
2917 	if (error == 0 && kpc.pc_cr != NULL) {
2918 		proc_t *peerp;
2919 
2920 		fdinfo->pr_peerpid = kpc.pc_cpid;
2921 
2922 		crfree(kpc.pc_cr);
2923 
2924 		mutex_enter(&pidlock);
2925 		if ((peerp = prfind(fdinfo->pr_peerpid)) != NULL) {
2926 			user_t *up;
2927 
2928 			mutex_enter(&peerp->p_lock);
2929 			mutex_exit(&pidlock);
2930 
2931 			up = PTOU(peerp);
2932 			bcopy(up->u_comm, fdinfo->pr_peername,
2933 			    MIN(sizeof (up->u_comm),
2934 			    sizeof (fdinfo->pr_peername) - 1));
2935 
2936 			mutex_exit(&peerp->p_lock);
2937 		} else {
2938 			mutex_exit(&pidlock);
2939 		}
2940 	}
2941 
2942 	/* pathname */
2943 
2944 	switch (vp->v_type) {
2945 	case VDOOR: {
2946 		prfdinfo_nm_path_cbdata_t cb = {
2947 			.nmp_p		= p,
2948 			.nmp_data	= data,
2949 			.nmp_sz		= 0
2950 		};
2951 
2952 		(void) nm_walk_mounts(vp, prfdinfo_nm_path, cred, &cb);
2953 		break;
2954 	}
2955 	case VSOCK:
2956 		/*
2957 		 * Don't attempt to determine the path for a socket as the
2958 		 * vnode has no associated v_path. It will cause a linear scan
2959 		 * of the dnlc table and result in no path being found.
2960 		 */
2961 		break;
2962 	default:
2963 		(void) prfdinfopath(p, vp, data, cred);
2964 	}
2965 
2966 	/* socket options */
2967 	if (vp->v_type == VSOCK)
2968 		(void) prfdinfosockopt(vp, data, cred);
2969 
2970 	/* TLI/XTI stream sockets */
2971 	if (pristli(vp))
2972 		(void) prfdinfotlisockopt(vp, data, cred);
2973 
2974 	/*
2975 	 * Add a terminating header with a zero size.
2976 	 */
2977 	pr_misc_header_t *misc;
2978 
2979 	misc = pr_iol_newbuf(data, sizeof (*misc));
2980 	misc->pr_misc_size = 0;
2981 	misc->pr_misc_type = (uint_t)-1;
2982 
2983 	return (0);
2984 }
2985 
2986 #ifdef _SYSCALL32_IMPL
2987 void
2988 prgetpsinfo32(proc_t *p, psinfo32_t *psp)
2989 {
2990 	kthread_t *t;
2991 	struct cred *cred;
2992 	hrtime_t hrutime, hrstime;
2993 
2994 	ASSERT(MUTEX_HELD(&p->p_lock));
2995 
2996 	if ((t = prchoose(p)) == NULL)	/* returns locked thread */
2997 		bzero(psp, sizeof (*psp));
2998 	else {
2999 		thread_unlock(t);
3000 		bzero(psp, sizeof (*psp) - sizeof (psp->pr_lwp));
3001 	}
3002 
3003 	/*
3004 	 * only export SSYS and SMSACCT; everything else is off-limits to
3005 	 * userland apps.
3006 	 */
3007 	psp->pr_flag = p->p_flag & (SSYS | SMSACCT);
3008 	psp->pr_nlwp = p->p_lwpcnt;
3009 	psp->pr_nzomb = p->p_zombcnt;
3010 	mutex_enter(&p->p_crlock);
3011 	cred = p->p_cred;
3012 	psp->pr_uid = crgetruid(cred);
3013 	psp->pr_euid = crgetuid(cred);
3014 	psp->pr_gid = crgetrgid(cred);
3015 	psp->pr_egid = crgetgid(cred);
3016 	mutex_exit(&p->p_crlock);
3017 	psp->pr_pid = p->p_pid;
3018 	if (curproc->p_zone->zone_id != GLOBAL_ZONEID &&
3019 	    (p->p_flag & SZONETOP)) {
3020 		ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID);
3021 		/*
3022 		 * Inside local zones, fake zsched's pid as parent pids for
3023 		 * processes which reference processes outside of the zone.
3024 		 */
3025 		psp->pr_ppid = curproc->p_zone->zone_zsched->p_pid;
3026 	} else {
3027 		psp->pr_ppid = p->p_ppid;
3028 	}
3029 	psp->pr_pgid = p->p_pgrp;
3030 	psp->pr_sid = p->p_sessp->s_sid;
3031 	psp->pr_taskid = p->p_task->tk_tkid;
3032 	psp->pr_projid = p->p_task->tk_proj->kpj_id;
3033 	psp->pr_poolid = p->p_pool->pool_id;
3034 	psp->pr_zoneid = p->p_zone->zone_id;
3035 	if ((psp->pr_contract = PRCTID(p)) == 0)
3036 		psp->pr_contract = -1;
3037 	psp->pr_addr = 0;	/* cannot represent 64-bit addr in 32 bits */
3038 	switch (p->p_model) {
3039 	case DATAMODEL_ILP32:
3040 		psp->pr_dmodel = PR_MODEL_ILP32;
3041 		break;
3042 	case DATAMODEL_LP64:
3043 		psp->pr_dmodel = PR_MODEL_LP64;
3044 		break;
3045 	}
3046 	hrutime = mstate_aggr_state(p, LMS_USER);
3047 	hrstime = mstate_aggr_state(p, LMS_SYSTEM);
3048 	hrt2ts32(hrutime + hrstime, &psp->pr_time);
3049 	TICK_TO_TIMESTRUC32(p->p_cutime + p->p_cstime, &psp->pr_ctime);
3050 
3051 	if (t == NULL) {
3052 		extern int wstat(int, int);	/* needs a header file */
3053 		int wcode = p->p_wcode;		/* must be atomic read */
3054 
3055 		if (wcode)
3056 			psp->pr_wstat = wstat(wcode, p->p_wdata);
3057 		psp->pr_ttydev = PRNODEV32;
3058 		psp->pr_lwp.pr_state = SZOMB;
3059 		psp->pr_lwp.pr_sname = 'Z';
3060 	} else {
3061 		user_t *up = PTOU(p);
3062 		struct as *as;
3063 		dev_t d;
3064 		extern dev_t rwsconsdev, rconsdev, uconsdev;
3065 
3066 		d = cttydev(p);
3067 		/*
3068 		 * If the controlling terminal is the real
3069 		 * or workstation console device, map to what the
3070 		 * user thinks is the console device. Handle case when
3071 		 * rwsconsdev or rconsdev is set to NODEV for Starfire.
3072 		 */
3073 		if ((d == rwsconsdev || d == rconsdev) && d != NODEV)
3074 			d = uconsdev;
3075 		(void) cmpldev(&psp->pr_ttydev, d);
3076 		TIMESPEC_TO_TIMESPEC32(&psp->pr_start, &up->u_start);
3077 		bcopy(up->u_comm, psp->pr_fname,
3078 		    MIN(sizeof (up->u_comm), sizeof (psp->pr_fname)-1));
3079 		bcopy(up->u_psargs, psp->pr_psargs,
3080 		    MIN(PRARGSZ-1, PSARGSZ));
3081 		psp->pr_argc = up->u_argc;
3082 		psp->pr_argv = (caddr32_t)up->u_argv;
3083 		psp->pr_envp = (caddr32_t)up->u_envp;
3084 
3085 		/* get the chosen lwp's lwpsinfo */
3086 		prgetlwpsinfo32(t, &psp->pr_lwp);
3087 
3088 		/* compute %cpu for the process */
3089 		if (p->p_lwpcnt == 1)
3090 			psp->pr_pctcpu = psp->pr_lwp.pr_pctcpu;
3091 		else {
3092 			uint64_t pct = 0;
3093 			hrtime_t cur_time;
3094 
3095 			t = p->p_tlist;
3096 			cur_time = gethrtime_unscaled();
3097 			do {
3098 				pct += cpu_update_pct(t, cur_time);
3099 			} while ((t = t->t_forw) != p->p_tlist);
3100 
3101 			psp->pr_pctcpu = prgetpctcpu(pct);
3102 		}
3103 		if ((p->p_flag & SSYS) || (as = p->p_as) == &kas) {
3104 			psp->pr_size = 0;
3105 			psp->pr_rssize = 0;
3106 		} else {
3107 			mutex_exit(&p->p_lock);
3108 			AS_LOCK_ENTER(as, RW_READER);
3109 			psp->pr_size = (size32_t)
3110 			    (btopr(as->a_resvsize) * (PAGESIZE / 1024));
3111 			psp->pr_rssize = (size32_t)
3112 			    (rm_asrss(as) * (PAGESIZE / 1024));
3113 			psp->pr_pctmem = rm_pctmemory(as);
3114 			AS_LOCK_EXIT(as);
3115 			mutex_enter(&p->p_lock);
3116 		}
3117 	}
3118 
3119 	/*
3120 	 * If we are looking at an LP64 process, zero out
3121 	 * the fields that cannot be represented in ILP32.
3122 	 */
3123 	if (p->p_model != DATAMODEL_ILP32) {
3124 		psp->pr_size = 0;
3125 		psp->pr_rssize = 0;
3126 		psp->pr_argv = 0;
3127 		psp->pr_envp = 0;
3128 	}
3129 }
3130 
3131 #endif	/* _SYSCALL32_IMPL */
3132 
3133 void
3134 prgetlwpsinfo(kthread_t *t, lwpsinfo_t *psp)
3135 {
3136 	klwp_t *lwp = ttolwp(t);
3137 	sobj_ops_t *sobj;
3138 	char c, state;
3139 	uint64_t pct;
3140 	int retval, niceval;
3141 	hrtime_t hrutime, hrstime;
3142 
3143 	ASSERT(MUTEX_HELD(&ttoproc(t)->p_lock));
3144 
3145 	bzero(psp, sizeof (*psp));
3146 
3147 	psp->pr_flag = 0;	/* lwpsinfo_t.pr_flag is deprecated */
3148 	psp->pr_lwpid = t->t_tid;
3149 	psp->pr_addr = (uintptr_t)t;
3150 	psp->pr_wchan = (uintptr_t)t->t_wchan;
3151 
3152 	/* map the thread state enum into a process state enum */
3153 	state = VSTOPPED(t) ? TS_STOPPED : t->t_state;
3154 	switch (state) {
3155 	case TS_SLEEP:		state = SSLEEP;		c = 'S';	break;
3156 	case TS_RUN:		state = SRUN;		c = 'R';	break;
3157 	case TS_ONPROC:		state = SONPROC;	c = 'O';	break;
3158 	case TS_ZOMB:		state = SZOMB;		c = 'Z';	break;
3159 	case TS_STOPPED:	state = SSTOP;		c = 'T';	break;
3160 	case TS_WAIT:		state = SWAIT;		c = 'W';	break;
3161 	default:		state = 0;		c = '?';	break;
3162 	}
3163 	psp->pr_state = state;
3164 	psp->pr_sname = c;
3165 	if ((sobj = t->t_sobj_ops) != NULL)
3166 		psp->pr_stype = SOBJ_TYPE(sobj);
3167 	retval = CL_DONICE(t, NULL, 0, &niceval);
3168 	if (retval == 0) {
3169 		psp->pr_oldpri = v.v_maxsyspri - t->t_pri;
3170 		psp->pr_nice = niceval + NZERO;
3171 	}
3172 	psp->pr_syscall = t->t_sysnum;
3173 	psp->pr_pri = t->t_pri;
3174 	psp->pr_start.tv_sec = t->t_start;
3175 	psp->pr_start.tv_nsec = 0L;
3176 	hrutime = lwp->lwp_mstate.ms_acct[LMS_USER];
3177 	scalehrtime(&hrutime);
3178 	hrstime = lwp->lwp_mstate.ms_acct[LMS_SYSTEM] +
3179 	    lwp->lwp_mstate.ms_acct[LMS_TRAP];
3180 	scalehrtime(&hrstime);
3181 	hrt2ts(hrutime + hrstime, &psp->pr_time);
3182 	/* compute %cpu for the lwp */
3183 	pct = cpu_update_pct(t, gethrtime_unscaled());
3184 	psp->pr_pctcpu = prgetpctcpu(pct);
3185 	psp->pr_cpu = (psp->pr_pctcpu*100 + 0x6000) >> 15;	/* [0..99] */
3186 	if (psp->pr_cpu > 99)
3187 		psp->pr_cpu = 99;
3188 
3189 	(void) strncpy(psp->pr_clname, sclass[t->t_cid].cl_name,
3190 	    sizeof (psp->pr_clname) - 1);
3191 	bzero(psp->pr_name, sizeof (psp->pr_name));	/* XXX ??? */
3192 	psp->pr_onpro = t->t_cpu->cpu_id;
3193 	psp->pr_bindpro = t->t_bind_cpu;
3194 	psp->pr_bindpset = t->t_bind_pset;
3195 	psp->pr_lgrp = t->t_lpl->lpl_lgrpid;
3196 }
3197 
3198 #ifdef _SYSCALL32_IMPL
3199 void
3200 prgetlwpsinfo32(kthread_t *t, lwpsinfo32_t *psp)
3201 {
3202 	proc_t *p = ttoproc(t);
3203 	klwp_t *lwp = ttolwp(t);
3204 	sobj_ops_t *sobj;
3205 	char c, state;
3206 	uint64_t pct;
3207 	int retval, niceval;
3208 	hrtime_t hrutime, hrstime;
3209 
3210 	ASSERT(MUTEX_HELD(&p->p_lock));
3211 
3212 	bzero(psp, sizeof (*psp));
3213 
3214 	psp->pr_flag = 0;	/* lwpsinfo_t.pr_flag is deprecated */
3215 	psp->pr_lwpid = t->t_tid;
3216 	psp->pr_addr = 0;	/* cannot represent 64-bit addr in 32 bits */
3217 	psp->pr_wchan = 0;	/* cannot represent 64-bit addr in 32 bits */
3218 
3219 	/* map the thread state enum into a process state enum */
3220 	state = VSTOPPED(t) ? TS_STOPPED : t->t_state;
3221 	switch (state) {
3222 	case TS_SLEEP:		state = SSLEEP;		c = 'S';	break;
3223 	case TS_RUN:		state = SRUN;		c = 'R';	break;
3224 	case TS_ONPROC:		state = SONPROC;	c = 'O';	break;
3225 	case TS_ZOMB:		state = SZOMB;		c = 'Z';	break;
3226 	case TS_STOPPED:	state = SSTOP;		c = 'T';	break;
3227 	case TS_WAIT:		state = SWAIT;		c = 'W';	break;
3228 	default:		state = 0;		c = '?';	break;
3229 	}
3230 	psp->pr_state = state;
3231 	psp->pr_sname = c;
3232 	if ((sobj = t->t_sobj_ops) != NULL)
3233 		psp->pr_stype = SOBJ_TYPE(sobj);
3234 	retval = CL_DONICE(t, NULL, 0, &niceval);
3235 	if (retval == 0) {
3236 		psp->pr_oldpri = v.v_maxsyspri - t->t_pri;
3237 		psp->pr_nice = niceval + NZERO;
3238 	} else {
3239 		psp->pr_oldpri = 0;
3240 		psp->pr_nice = 0;
3241 	}
3242 	psp->pr_syscall = t->t_sysnum;
3243 	psp->pr_pri = t->t_pri;
3244 	psp->pr_start.tv_sec = (time32_t)t->t_start;
3245 	psp->pr_start.tv_nsec = 0L;
3246 	hrutime = lwp->lwp_mstate.ms_acct[LMS_USER];
3247 	scalehrtime(&hrutime);
3248 	hrstime = lwp->lwp_mstate.ms_acct[LMS_SYSTEM] +
3249 	    lwp->lwp_mstate.ms_acct[LMS_TRAP];
3250 	scalehrtime(&hrstime);
3251 	hrt2ts32(hrutime + hrstime, &psp->pr_time);
3252 	/* compute %cpu for the lwp */
3253 	pct = cpu_update_pct(t, gethrtime_unscaled());
3254 	psp->pr_pctcpu = prgetpctcpu(pct);
3255 	psp->pr_cpu = (psp->pr_pctcpu*100 + 0x6000) >> 15;	/* [0..99] */
3256 	if (psp->pr_cpu > 99)
3257 		psp->pr_cpu = 99;
3258 
3259 	(void) strncpy(psp->pr_clname, sclass[t->t_cid].cl_name,
3260 	    sizeof (psp->pr_clname) - 1);
3261 	bzero(psp->pr_name, sizeof (psp->pr_name));	/* XXX ??? */
3262 	psp->pr_onpro = t->t_cpu->cpu_id;
3263 	psp->pr_bindpro = t->t_bind_cpu;
3264 	psp->pr_bindpset = t->t_bind_pset;
3265 	psp->pr_lgrp = t->t_lpl->lpl_lgrpid;
3266 }
3267 #endif	/* _SYSCALL32_IMPL */
3268 
3269 #ifdef _SYSCALL32_IMPL
3270 
3271 #define	PR_COPY_FIELD(s, d, field)	 d->field = s->field
3272 
3273 #define	PR_COPY_FIELD_ILP32(s, d, field)				\
3274 	if (s->pr_dmodel == PR_MODEL_ILP32) {			\
3275 		d->field = s->field;				\
3276 	}
3277 
3278 #define	PR_COPY_TIMESPEC(s, d, field)				\
3279 	TIMESPEC_TO_TIMESPEC32(&d->field, &s->field);
3280 
3281 #define	PR_COPY_BUF(s, d, field)				\
3282 	bcopy(s->field, d->field, sizeof (d->field));
3283 
3284 #define	PR_IGNORE_FIELD(s, d, field)
3285 
3286 void
3287 lwpsinfo_kto32(const struct lwpsinfo *src, struct lwpsinfo32 *dest)
3288 {
3289 	bzero(dest, sizeof (*dest));
3290 
3291 	PR_COPY_FIELD(src, dest, pr_flag);
3292 	PR_COPY_FIELD(src, dest, pr_lwpid);
3293 	PR_IGNORE_FIELD(src, dest, pr_addr);
3294 	PR_IGNORE_FIELD(src, dest, pr_wchan);
3295 	PR_COPY_FIELD(src, dest, pr_stype);
3296 	PR_COPY_FIELD(src, dest, pr_state);
3297 	PR_COPY_FIELD(src, dest, pr_sname);
3298 	PR_COPY_FIELD(src, dest, pr_nice);
3299 	PR_COPY_FIELD(src, dest, pr_syscall);
3300 	PR_COPY_FIELD(src, dest, pr_oldpri);
3301 	PR_COPY_FIELD(src, dest, pr_cpu);
3302 	PR_COPY_FIELD(src, dest, pr_pri);
3303 	PR_COPY_FIELD(src, dest, pr_pctcpu);
3304 	PR_COPY_TIMESPEC(src, dest, pr_start);
3305 	PR_COPY_BUF(src, dest, pr_clname);
3306 	PR_COPY_BUF(src, dest, pr_name);
3307 	PR_COPY_FIELD(src, dest, pr_onpro);
3308 	PR_COPY_FIELD(src, dest, pr_bindpro);
3309 	PR_COPY_FIELD(src, dest, pr_bindpset);
3310 	PR_COPY_FIELD(src, dest, pr_lgrp);
3311 }
3312 
3313 void
3314 psinfo_kto32(const struct psinfo *src, struct psinfo32 *dest)
3315 {
3316 	bzero(dest, sizeof (*dest));
3317 
3318 	PR_COPY_FIELD(src, dest, pr_flag);
3319 	PR_COPY_FIELD(src, dest, pr_nlwp);
3320 	PR_COPY_FIELD(src, dest, pr_pid);
3321 	PR_COPY_FIELD(src, dest, pr_ppid);
3322 	PR_COPY_FIELD(src, dest, pr_pgid);
3323 	PR_COPY_FIELD(src, dest, pr_sid);
3324 	PR_COPY_FIELD(src, dest, pr_uid);
3325 	PR_COPY_FIELD(src, dest, pr_euid);
3326 	PR_COPY_FIELD(src, dest, pr_gid);
3327 	PR_COPY_FIELD(src, dest, pr_egid);
3328 	PR_IGNORE_FIELD(src, dest, pr_addr);
3329 	PR_COPY_FIELD_ILP32(src, dest, pr_size);
3330 	PR_COPY_FIELD_ILP32(src, dest, pr_rssize);
3331 	PR_COPY_FIELD(src, dest, pr_ttydev);
3332 	PR_COPY_FIELD(src, dest, pr_pctcpu);
3333 	PR_COPY_FIELD(src, dest, pr_pctmem);
3334 	PR_COPY_TIMESPEC(src, dest, pr_start);
3335 	PR_COPY_TIMESPEC(src, dest, pr_time);
3336 	PR_COPY_TIMESPEC(src, dest, pr_ctime);
3337 	PR_COPY_BUF(src, dest, pr_fname);
3338 	PR_COPY_BUF(src, dest, pr_psargs);
3339 	PR_COPY_FIELD(src, dest, pr_wstat);
3340 	PR_COPY_FIELD(src, dest, pr_argc);
3341 	PR_COPY_FIELD_ILP32(src, dest, pr_argv);
3342 	PR_COPY_FIELD_ILP32(src, dest, pr_envp);
3343 	PR_COPY_FIELD(src, dest, pr_dmodel);
3344 	PR_COPY_FIELD(src, dest, pr_taskid);
3345 	PR_COPY_FIELD(src, dest, pr_projid);
3346 	PR_COPY_FIELD(src, dest, pr_nzomb);
3347 	PR_COPY_FIELD(src, dest, pr_poolid);
3348 	PR_COPY_FIELD(src, dest, pr_contract);
3349 	PR_COPY_FIELD(src, dest, pr_poolid);
3350 	PR_COPY_FIELD(src, dest, pr_poolid);
3351 
3352 	lwpsinfo_kto32(&src->pr_lwp, &dest->pr_lwp);
3353 }
3354 
3355 #undef	PR_COPY_FIELD
3356 #undef	PR_COPY_FIELD_ILP32
3357 #undef	PR_COPY_TIMESPEC
3358 #undef	PR_COPY_BUF
3359 #undef	PR_IGNORE_FIELD
3360 
3361 #endif	/* _SYSCALL32_IMPL */
3362 
3363 /*
3364  * This used to get called when microstate accounting was disabled but
3365  * microstate information was requested.  Since Microstate accounting is on
3366  * regardless of the proc flags, this simply makes it appear to procfs that
3367  * microstate accounting is on.  This is relatively meaningless since you
3368  * can't turn it off, but this is here for the sake of appearances.
3369  */
3370 
3371 /*ARGSUSED*/
3372 void
3373 estimate_msacct(kthread_t *t, hrtime_t curtime)
3374 {
3375 	proc_t *p;
3376 
3377 	if (t == NULL)
3378 		return;
3379 
3380 	p = ttoproc(t);
3381 	ASSERT(MUTEX_HELD(&p->p_lock));
3382 
3383 	/*
3384 	 * A system process (p0) could be referenced if the thread is
3385 	 * in the process of exiting.  Don't turn on microstate accounting
3386 	 * in that case.
3387 	 */
3388 	if (p->p_flag & SSYS)
3389 		return;
3390 
3391 	/*
3392 	 * Loop through all the LWPs (kernel threads) in the process.
3393 	 */
3394 	t = p->p_tlist;
3395 	do {
3396 		t->t_proc_flag |= TP_MSACCT;
3397 	} while ((t = t->t_forw) != p->p_tlist);
3398 
3399 	p->p_flag |= SMSACCT;			/* set process-wide MSACCT */
3400 }
3401 
3402 /*
3403  * It's not really possible to disable microstate accounting anymore.
3404  * However, this routine simply turns off the ms accounting flags in a process
3405  * This way procfs can still pretend to turn microstate accounting on and
3406  * off for a process, but it actually doesn't do anything.  This is
3407  * a neutered form of preemptive idiot-proofing.
3408  */
3409 void
3410 disable_msacct(proc_t *p)
3411 {
3412 	kthread_t *t;
3413 
3414 	ASSERT(MUTEX_HELD(&p->p_lock));
3415 
3416 	p->p_flag &= ~SMSACCT;		/* clear process-wide MSACCT */
3417 	/*
3418 	 * Loop through all the LWPs (kernel threads) in the process.
3419 	 */
3420 	if ((t = p->p_tlist) != NULL) {
3421 		do {
3422 			/* clear per-thread flag */
3423 			t->t_proc_flag &= ~TP_MSACCT;
3424 		} while ((t = t->t_forw) != p->p_tlist);
3425 	}
3426 }
3427 
3428 /*
3429  * Return resource usage information.
3430  */
3431 void
3432 prgetusage(kthread_t *t, prhusage_t *pup)
3433 {
3434 	klwp_t *lwp = ttolwp(t);
3435 	hrtime_t *mstimep;
3436 	struct mstate *ms = &lwp->lwp_mstate;
3437 	int state;
3438 	int i;
3439 	hrtime_t curtime;
3440 	hrtime_t waitrq;
3441 	hrtime_t tmp1;
3442 
3443 	curtime = gethrtime_unscaled();
3444 
3445 	pup->pr_lwpid	= t->t_tid;
3446 	pup->pr_count	= 1;
3447 	pup->pr_create	= ms->ms_start;
3448 	pup->pr_term    = ms->ms_term;
3449 	scalehrtime(&pup->pr_create);
3450 	scalehrtime(&pup->pr_term);
3451 	if (ms->ms_term == 0) {
3452 		pup->pr_rtime = curtime - ms->ms_start;
3453 		scalehrtime(&pup->pr_rtime);
3454 	} else {
3455 		pup->pr_rtime = ms->ms_term - ms->ms_start;
3456 		scalehrtime(&pup->pr_rtime);
3457 	}
3458 
3459 
3460 	pup->pr_utime    = ms->ms_acct[LMS_USER];
3461 	pup->pr_stime    = ms->ms_acct[LMS_SYSTEM];
3462 	pup->pr_ttime    = ms->ms_acct[LMS_TRAP];
3463 	pup->pr_tftime   = ms->ms_acct[LMS_TFAULT];
3464 	pup->pr_dftime   = ms->ms_acct[LMS_DFAULT];
3465 	pup->pr_kftime   = ms->ms_acct[LMS_KFAULT];
3466 	pup->pr_ltime    = ms->ms_acct[LMS_USER_LOCK];
3467 	pup->pr_slptime  = ms->ms_acct[LMS_SLEEP];
3468 	pup->pr_wtime    = ms->ms_acct[LMS_WAIT_CPU];
3469 	pup->pr_stoptime = ms->ms_acct[LMS_STOPPED];
3470 
3471 	prscaleusage(pup);
3472 
3473 	/*
3474 	 * Adjust for time waiting in the dispatcher queue.
3475 	 */
3476 	waitrq = t->t_waitrq;	/* hopefully atomic */
3477 	if (waitrq != 0) {
3478 		if (waitrq > curtime) {
3479 			curtime = gethrtime_unscaled();
3480 		}
3481 		tmp1 = curtime - waitrq;
3482 		scalehrtime(&tmp1);
3483 		pup->pr_wtime += tmp1;
3484 		curtime = waitrq;
3485 	}
3486 
3487 	/*
3488 	 * Adjust for time spent in current microstate.
3489 	 */
3490 	if (ms->ms_state_start > curtime) {
3491 		curtime = gethrtime_unscaled();
3492 	}
3493 
3494 	i = 0;
3495 	do {
3496 		switch (state = t->t_mstate) {
3497 		case LMS_SLEEP:
3498 			/*
3499 			 * Update the timer for the current sleep state.
3500 			 */
3501 			switch (state = ms->ms_prev) {
3502 			case LMS_TFAULT:
3503 			case LMS_DFAULT:
3504 			case LMS_KFAULT:
3505 			case LMS_USER_LOCK:
3506 				break;
3507 			default:
3508 				state = LMS_SLEEP;
3509 				break;
3510 			}
3511 			break;
3512 		case LMS_TFAULT:
3513 		case LMS_DFAULT:
3514 		case LMS_KFAULT:
3515 		case LMS_USER_LOCK:
3516 			state = LMS_SYSTEM;
3517 			break;
3518 		}
3519 		switch (state) {
3520 		case LMS_USER:		mstimep = &pup->pr_utime;	break;
3521 		case LMS_SYSTEM:	mstimep = &pup->pr_stime;	break;
3522 		case LMS_TRAP:		mstimep = &pup->pr_ttime;	break;
3523 		case LMS_TFAULT:	mstimep = &pup->pr_tftime;	break;
3524 		case LMS_DFAULT:	mstimep = &pup->pr_dftime;	break;
3525 		case LMS_KFAULT:	mstimep = &pup->pr_kftime;	break;
3526 		case LMS_USER_LOCK:	mstimep = &pup->pr_ltime;	break;
3527 		case LMS_SLEEP:		mstimep = &pup->pr_slptime;	break;
3528 		case LMS_WAIT_CPU:	mstimep = &pup->pr_wtime;	break;
3529 		case LMS_STOPPED:	mstimep = &pup->pr_stoptime;	break;
3530 		default:		panic("prgetusage: unknown microstate");
3531 		}
3532 		tmp1 = curtime - ms->ms_state_start;
3533 		if (tmp1 < 0) {
3534 			curtime = gethrtime_unscaled();
3535 			i++;
3536 			continue;
3537 		}
3538 		scalehrtime(&tmp1);
3539 	} while (tmp1 < 0 && i < MAX_ITERS_SPIN);
3540 
3541 	*mstimep += tmp1;
3542 
3543 	/* update pup timestamp */
3544 	pup->pr_tstamp = curtime;
3545 	scalehrtime(&pup->pr_tstamp);
3546 
3547 	/*
3548 	 * Resource usage counters.
3549 	 */
3550 	pup->pr_minf  = lwp->lwp_ru.minflt;
3551 	pup->pr_majf  = lwp->lwp_ru.majflt;
3552 	pup->pr_nswap = lwp->lwp_ru.nswap;
3553 	pup->pr_inblk = lwp->lwp_ru.inblock;
3554 	pup->pr_oublk = lwp->lwp_ru.oublock;
3555 	pup->pr_msnd  = lwp->lwp_ru.msgsnd;
3556 	pup->pr_mrcv  = lwp->lwp_ru.msgrcv;
3557 	pup->pr_sigs  = lwp->lwp_ru.nsignals;
3558 	pup->pr_vctx  = lwp->lwp_ru.nvcsw;
3559 	pup->pr_ictx  = lwp->lwp_ru.nivcsw;
3560 	pup->pr_sysc  = lwp->lwp_ru.sysc;
3561 	pup->pr_ioch  = lwp->lwp_ru.ioch;
3562 }
3563 
3564 /*
3565  * Convert ms_acct stats from unscaled high-res time to nanoseconds
3566  */
3567 void
3568 prscaleusage(prhusage_t *usg)
3569 {
3570 	scalehrtime(&usg->pr_utime);
3571 	scalehrtime(&usg->pr_stime);
3572 	scalehrtime(&usg->pr_ttime);
3573 	scalehrtime(&usg->pr_tftime);
3574 	scalehrtime(&usg->pr_dftime);
3575 	scalehrtime(&usg->pr_kftime);
3576 	scalehrtime(&usg->pr_ltime);
3577 	scalehrtime(&usg->pr_slptime);
3578 	scalehrtime(&usg->pr_wtime);
3579 	scalehrtime(&usg->pr_stoptime);
3580 }
3581 
3582 
3583 /*
3584  * Sum resource usage information.
3585  */
3586 void
3587 praddusage(kthread_t *t, prhusage_t *pup)
3588 {
3589 	klwp_t *lwp = ttolwp(t);
3590 	hrtime_t *mstimep;
3591 	struct mstate *ms = &lwp->lwp_mstate;
3592 	int state;
3593 	int i;
3594 	hrtime_t curtime;
3595 	hrtime_t waitrq;
3596 	hrtime_t tmp;
3597 	prhusage_t conv;
3598 
3599 	curtime = gethrtime_unscaled();
3600 
3601 	if (ms->ms_term == 0) {
3602 		tmp = curtime - ms->ms_start;
3603 		scalehrtime(&tmp);
3604 		pup->pr_rtime += tmp;
3605 	} else {
3606 		tmp = ms->ms_term - ms->ms_start;
3607 		scalehrtime(&tmp);
3608 		pup->pr_rtime += tmp;
3609 	}
3610 
3611 	conv.pr_utime = ms->ms_acct[LMS_USER];
3612 	conv.pr_stime = ms->ms_acct[LMS_SYSTEM];
3613 	conv.pr_ttime = ms->ms_acct[LMS_TRAP];
3614 	conv.pr_tftime = ms->ms_acct[LMS_TFAULT];
3615 	conv.pr_dftime = ms->ms_acct[LMS_DFAULT];
3616 	conv.pr_kftime = ms->ms_acct[LMS_KFAULT];
3617 	conv.pr_ltime = ms->ms_acct[LMS_USER_LOCK];
3618 	conv.pr_slptime = ms->ms_acct[LMS_SLEEP];
3619 	conv.pr_wtime = ms->ms_acct[LMS_WAIT_CPU];
3620 	conv.pr_stoptime = ms->ms_acct[LMS_STOPPED];
3621 
3622 	prscaleusage(&conv);
3623 
3624 	pup->pr_utime	+= conv.pr_utime;
3625 	pup->pr_stime	+= conv.pr_stime;
3626 	pup->pr_ttime	+= conv.pr_ttime;
3627 	pup->pr_tftime	+= conv.pr_tftime;
3628 	pup->pr_dftime	+= conv.pr_dftime;
3629 	pup->pr_kftime	+= conv.pr_kftime;
3630 	pup->pr_ltime	+= conv.pr_ltime;
3631 	pup->pr_slptime	+= conv.pr_slptime;
3632 	pup->pr_wtime	+= conv.pr_wtime;
3633 	pup->pr_stoptime += conv.pr_stoptime;
3634 
3635 	/*
3636 	 * Adjust for time waiting in the dispatcher queue.
3637 	 */
3638 	waitrq = t->t_waitrq;	/* hopefully atomic */
3639 	if (waitrq != 0) {
3640 		if (waitrq > curtime) {
3641 			curtime = gethrtime_unscaled();
3642 		}
3643 		tmp = curtime - waitrq;
3644 		scalehrtime(&tmp);
3645 		pup->pr_wtime += tmp;
3646 		curtime = waitrq;
3647 	}
3648 
3649 	/*
3650 	 * Adjust for time spent in current microstate.
3651 	 */
3652 	if (ms->ms_state_start > curtime) {
3653 		curtime = gethrtime_unscaled();
3654 	}
3655 
3656 	i = 0;
3657 	do {
3658 		switch (state = t->t_mstate) {
3659 		case LMS_SLEEP:
3660 			/*
3661 			 * Update the timer for the current sleep state.
3662 			 */
3663 			switch (state = ms->ms_prev) {
3664 			case LMS_TFAULT:
3665 			case LMS_DFAULT:
3666 			case LMS_KFAULT:
3667 			case LMS_USER_LOCK:
3668 				break;
3669 			default:
3670 				state = LMS_SLEEP;
3671 				break;
3672 			}
3673 			break;
3674 		case LMS_TFAULT:
3675 		case LMS_DFAULT:
3676 		case LMS_KFAULT:
3677 		case LMS_USER_LOCK:
3678 			state = LMS_SYSTEM;
3679 			break;
3680 		}
3681 		switch (state) {
3682 		case LMS_USER:		mstimep = &pup->pr_utime;	break;
3683 		case LMS_SYSTEM:	mstimep = &pup->pr_stime;	break;
3684 		case LMS_TRAP:		mstimep = &pup->pr_ttime;	break;
3685 		case LMS_TFAULT:	mstimep = &pup->pr_tftime;	break;
3686 		case LMS_DFAULT:	mstimep = &pup->pr_dftime;	break;
3687 		case LMS_KFAULT:	mstimep = &pup->pr_kftime;	break;
3688 		case LMS_USER_LOCK:	mstimep = &pup->pr_ltime;	break;
3689 		case LMS_SLEEP:		mstimep = &pup->pr_slptime;	break;
3690 		case LMS_WAIT_CPU:	mstimep = &pup->pr_wtime;	break;
3691 		case LMS_STOPPED:	mstimep = &pup->pr_stoptime;	break;
3692 		default:		panic("praddusage: unknown microstate");
3693 		}
3694 		tmp = curtime - ms->ms_state_start;
3695 		if (tmp < 0) {
3696 			curtime = gethrtime_unscaled();
3697 			i++;
3698 			continue;
3699 		}
3700 		scalehrtime(&tmp);
3701 	} while (tmp < 0 && i < MAX_ITERS_SPIN);
3702 
3703 	*mstimep += tmp;
3704 
3705 	/* update pup timestamp */
3706 	pup->pr_tstamp = curtime;
3707 	scalehrtime(&pup->pr_tstamp);
3708 
3709 	/*
3710 	 * Resource usage counters.
3711 	 */
3712 	pup->pr_minf  += lwp->lwp_ru.minflt;
3713 	pup->pr_majf  += lwp->lwp_ru.majflt;
3714 	pup->pr_nswap += lwp->lwp_ru.nswap;
3715 	pup->pr_inblk += lwp->lwp_ru.inblock;
3716 	pup->pr_oublk += lwp->lwp_ru.oublock;
3717 	pup->pr_msnd  += lwp->lwp_ru.msgsnd;
3718 	pup->pr_mrcv  += lwp->lwp_ru.msgrcv;
3719 	pup->pr_sigs  += lwp->lwp_ru.nsignals;
3720 	pup->pr_vctx  += lwp->lwp_ru.nvcsw;
3721 	pup->pr_ictx  += lwp->lwp_ru.nivcsw;
3722 	pup->pr_sysc  += lwp->lwp_ru.sysc;
3723 	pup->pr_ioch  += lwp->lwp_ru.ioch;
3724 }
3725 
3726 /*
3727  * Convert a prhusage_t to a prusage_t.
3728  * This means convert each hrtime_t to a timestruc_t
3729  * and copy the count fields uint64_t => ulong_t.
3730  */
3731 void
3732 prcvtusage(prhusage_t *pup, prusage_t *upup)
3733 {
3734 	uint64_t *ullp;
3735 	ulong_t *ulp;
3736 	int i;
3737 
3738 	upup->pr_lwpid = pup->pr_lwpid;
3739 	upup->pr_count = pup->pr_count;
3740 
3741 	hrt2ts(pup->pr_tstamp,	&upup->pr_tstamp);
3742 	hrt2ts(pup->pr_create,	&upup->pr_create);
3743 	hrt2ts(pup->pr_term,	&upup->pr_term);
3744 	hrt2ts(pup->pr_rtime,	&upup->pr_rtime);
3745 	hrt2ts(pup->pr_utime,	&upup->pr_utime);
3746 	hrt2ts(pup->pr_stime,	&upup->pr_stime);
3747 	hrt2ts(pup->pr_ttime,	&upup->pr_ttime);
3748 	hrt2ts(pup->pr_tftime,	&upup->pr_tftime);
3749 	hrt2ts(pup->pr_dftime,	&upup->pr_dftime);
3750 	hrt2ts(pup->pr_kftime,	&upup->pr_kftime);
3751 	hrt2ts(pup->pr_ltime,	&upup->pr_ltime);
3752 	hrt2ts(pup->pr_slptime,	&upup->pr_slptime);
3753 	hrt2ts(pup->pr_wtime,	&upup->pr_wtime);
3754 	hrt2ts(pup->pr_stoptime, &upup->pr_stoptime);
3755 	bzero(upup->filltime, sizeof (upup->filltime));
3756 
3757 	ullp = &pup->pr_minf;
3758 	ulp = &upup->pr_minf;
3759 	for (i = 0; i < 22; i++)
3760 		*ulp++ = (ulong_t)*ullp++;
3761 }
3762 
3763 #ifdef _SYSCALL32_IMPL
3764 void
3765 prcvtusage32(prhusage_t *pup, prusage32_t *upup)
3766 {
3767 	uint64_t *ullp;
3768 	uint32_t *ulp;
3769 	int i;
3770 
3771 	upup->pr_lwpid = pup->pr_lwpid;
3772 	upup->pr_count = pup->pr_count;
3773 
3774 	hrt2ts32(pup->pr_tstamp,	&upup->pr_tstamp);
3775 	hrt2ts32(pup->pr_create,	&upup->pr_create);
3776 	hrt2ts32(pup->pr_term,		&upup->pr_term);
3777 	hrt2ts32(pup->pr_rtime,		&upup->pr_rtime);
3778 	hrt2ts32(pup->pr_utime,		&upup->pr_utime);
3779 	hrt2ts32(pup->pr_stime,		&upup->pr_stime);
3780 	hrt2ts32(pup->pr_ttime,		&upup->pr_ttime);
3781 	hrt2ts32(pup->pr_tftime,	&upup->pr_tftime);
3782 	hrt2ts32(pup->pr_dftime,	&upup->pr_dftime);
3783 	hrt2ts32(pup->pr_kftime,	&upup->pr_kftime);
3784 	hrt2ts32(pup->pr_ltime,		&upup->pr_ltime);
3785 	hrt2ts32(pup->pr_slptime,	&upup->pr_slptime);
3786 	hrt2ts32(pup->pr_wtime,		&upup->pr_wtime);
3787 	hrt2ts32(pup->pr_stoptime,	&upup->pr_stoptime);
3788 	bzero(upup->filltime, sizeof (upup->filltime));
3789 
3790 	ullp = &pup->pr_minf;
3791 	ulp = &upup->pr_minf;
3792 	for (i = 0; i < 22; i++)
3793 		*ulp++ = (uint32_t)*ullp++;
3794 }
3795 #endif	/* _SYSCALL32_IMPL */
3796 
3797 /*
3798  * Determine whether a set is empty.
3799  */
3800 int
3801 setisempty(uint32_t *sp, uint_t n)
3802 {
3803 	while (n--)
3804 		if (*sp++)
3805 			return (0);
3806 	return (1);
3807 }
3808 
3809 /*
3810  * Utility routine for establishing a watched area in the process.
3811  * Keep the list of watched areas sorted by virtual address.
3812  */
3813 int
3814 set_watched_area(proc_t *p, struct watched_area *pwa)
3815 {
3816 	caddr_t vaddr = pwa->wa_vaddr;
3817 	caddr_t eaddr = pwa->wa_eaddr;
3818 	ulong_t flags = pwa->wa_flags;
3819 	struct watched_area *target;
3820 	avl_index_t where;
3821 	int error = 0;
3822 
3823 	/* we must not be holding p->p_lock, but the process must be locked */
3824 	ASSERT(MUTEX_NOT_HELD(&p->p_lock));
3825 	ASSERT(p->p_proc_flag & P_PR_LOCK);
3826 
3827 	/*
3828 	 * If this is our first watchpoint, enable watchpoints for the process.
3829 	 */
3830 	if (!pr_watch_active(p)) {
3831 		kthread_t *t;
3832 
3833 		mutex_enter(&p->p_lock);
3834 		if ((t = p->p_tlist) != NULL) {
3835 			do {
3836 				watch_enable(t);
3837 			} while ((t = t->t_forw) != p->p_tlist);
3838 		}
3839 		mutex_exit(&p->p_lock);
3840 	}
3841 
3842 	target = pr_find_watched_area(p, pwa, &where);
3843 	if (target != NULL) {
3844 		/*
3845 		 * We discovered an existing, overlapping watched area.
3846 		 * Allow it only if it is an exact match.
3847 		 */
3848 		if (target->wa_vaddr != vaddr ||
3849 		    target->wa_eaddr != eaddr)
3850 			error = EINVAL;
3851 		else if (target->wa_flags != flags) {
3852 			error = set_watched_page(p, vaddr, eaddr,
3853 			    flags, target->wa_flags);
3854 			target->wa_flags = flags;
3855 		}
3856 		kmem_free(pwa, sizeof (struct watched_area));
3857 	} else {
3858 		avl_insert(&p->p_warea, pwa, where);
3859 		error = set_watched_page(p, vaddr, eaddr, flags, 0);
3860 	}
3861 
3862 	return (error);
3863 }
3864 
3865 /*
3866  * Utility routine for clearing a watched area in the process.
3867  * Must be an exact match of the virtual address.
3868  * size and flags don't matter.
3869  */
3870 int
3871 clear_watched_area(proc_t *p, struct watched_area *pwa)
3872 {
3873 	struct watched_area *found;
3874 
3875 	/* we must not be holding p->p_lock, but the process must be locked */
3876 	ASSERT(MUTEX_NOT_HELD(&p->p_lock));
3877 	ASSERT(p->p_proc_flag & P_PR_LOCK);
3878 
3879 
3880 	if (!pr_watch_active(p)) {
3881 		kmem_free(pwa, sizeof (struct watched_area));
3882 		return (0);
3883 	}
3884 
3885 	/*
3886 	 * Look for a matching address in the watched areas.  If a match is
3887 	 * found, clear the old watched area and adjust the watched page(s).  It
3888 	 * is not an error if there is no match.
3889 	 */
3890 	if ((found = pr_find_watched_area(p, pwa, NULL)) != NULL &&
3891 	    found->wa_vaddr == pwa->wa_vaddr) {
3892 		clear_watched_page(p, found->wa_vaddr, found->wa_eaddr,
3893 		    found->wa_flags);
3894 		avl_remove(&p->p_warea, found);
3895 		kmem_free(found, sizeof (struct watched_area));
3896 	}
3897 
3898 	kmem_free(pwa, sizeof (struct watched_area));
3899 
3900 	/*
3901 	 * If we removed the last watched area from the process, disable
3902 	 * watchpoints.
3903 	 */
3904 	if (!pr_watch_active(p)) {
3905 		kthread_t *t;
3906 
3907 		mutex_enter(&p->p_lock);
3908 		if ((t = p->p_tlist) != NULL) {
3909 			do {
3910 				watch_disable(t);
3911 			} while ((t = t->t_forw) != p->p_tlist);
3912 		}
3913 		mutex_exit(&p->p_lock);
3914 	}
3915 
3916 	return (0);
3917 }
3918 
3919 /*
3920  * Frees all the watched_area structures
3921  */
3922 void
3923 pr_free_watchpoints(proc_t *p)
3924 {
3925 	struct watched_area *delp;
3926 	void *cookie;
3927 
3928 	cookie = NULL;
3929 	while ((delp = avl_destroy_nodes(&p->p_warea, &cookie)) != NULL)
3930 		kmem_free(delp, sizeof (struct watched_area));
3931 
3932 	avl_destroy(&p->p_warea);
3933 }
3934 
3935 /*
3936  * This one is called by the traced process to unwatch all the
3937  * pages while deallocating the list of watched_page structs.
3938  */
3939 void
3940 pr_free_watched_pages(proc_t *p)
3941 {
3942 	struct as *as = p->p_as;
3943 	struct watched_page *pwp;
3944 	uint_t prot;
3945 	int    retrycnt, err;
3946 	void *cookie;
3947 
3948 	if (as == NULL || avl_numnodes(&as->a_wpage) == 0)
3949 		return;
3950 
3951 	ASSERT(MUTEX_NOT_HELD(&curproc->p_lock));
3952 	AS_LOCK_ENTER(as, RW_WRITER);
3953 
3954 	pwp = avl_first(&as->a_wpage);
3955 
3956 	cookie = NULL;
3957 	while ((pwp = avl_destroy_nodes(&as->a_wpage, &cookie)) != NULL) {
3958 		retrycnt = 0;
3959 		if ((prot = pwp->wp_oprot) != 0) {
3960 			caddr_t addr = pwp->wp_vaddr;
3961 			struct seg *seg;
3962 		retry:
3963 
3964 			if ((pwp->wp_prot != prot ||
3965 			    (pwp->wp_flags & WP_NOWATCH)) &&
3966 			    (seg = as_segat(as, addr)) != NULL) {
3967 				err = SEGOP_SETPROT(seg, addr, PAGESIZE, prot);
3968 				if (err == IE_RETRY) {
3969 					ASSERT(retrycnt == 0);
3970 					retrycnt++;
3971 					goto retry;
3972 				}
3973 			}
3974 		}
3975 		kmem_free(pwp, sizeof (struct watched_page));
3976 	}
3977 
3978 	avl_destroy(&as->a_wpage);
3979 	p->p_wprot = NULL;
3980 
3981 	AS_LOCK_EXIT(as);
3982 }
3983 
3984 /*
3985  * Insert a watched area into the list of watched pages.
3986  * If oflags is zero then we are adding a new watched area.
3987  * Otherwise we are changing the flags of an existing watched area.
3988  */
3989 static int
3990 set_watched_page(proc_t *p, caddr_t vaddr, caddr_t eaddr,
3991     ulong_t flags, ulong_t oflags)
3992 {
3993 	struct as *as = p->p_as;
3994 	avl_tree_t *pwp_tree;
3995 	struct watched_page *pwp, *newpwp;
3996 	struct watched_page tpw;
3997 	avl_index_t where;
3998 	struct seg *seg;
3999 	uint_t prot;
4000 	caddr_t addr;
4001 
4002 	/*
4003 	 * We need to pre-allocate a list of structures before we grab the
4004 	 * address space lock to avoid calling kmem_alloc(KM_SLEEP) with locks
4005 	 * held.
4006 	 */
4007 	newpwp = NULL;
4008 	for (addr = (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK);
4009 	    addr < eaddr; addr += PAGESIZE) {
4010 		pwp = kmem_zalloc(sizeof (struct watched_page), KM_SLEEP);
4011 		pwp->wp_list = newpwp;
4012 		newpwp = pwp;
4013 	}
4014 
4015 	AS_LOCK_ENTER(as, RW_WRITER);
4016 
4017 	/*
4018 	 * Search for an existing watched page to contain the watched area.
4019 	 * If none is found, grab a new one from the available list
4020 	 * and insert it in the active list, keeping the list sorted
4021 	 * by user-level virtual address.
4022 	 */
4023 	if (p->p_flag & SVFWAIT)
4024 		pwp_tree = &p->p_wpage;
4025 	else
4026 		pwp_tree = &as->a_wpage;
4027 
4028 again:
4029 	if (avl_numnodes(pwp_tree) > prnwatch) {
4030 		AS_LOCK_EXIT(as);
4031 		while (newpwp != NULL) {
4032 			pwp = newpwp->wp_list;
4033 			kmem_free(newpwp, sizeof (struct watched_page));
4034 			newpwp = pwp;
4035 		}
4036 		return (E2BIG);
4037 	}
4038 
4039 	tpw.wp_vaddr = (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK);
4040 	if ((pwp = avl_find(pwp_tree, &tpw, &where)) == NULL) {
4041 		pwp = newpwp;
4042 		newpwp = newpwp->wp_list;
4043 		pwp->wp_list = NULL;
4044 		pwp->wp_vaddr = (caddr_t)((uintptr_t)vaddr &
4045 		    (uintptr_t)PAGEMASK);
4046 		avl_insert(pwp_tree, pwp, where);
4047 	}
4048 
4049 	ASSERT(vaddr >= pwp->wp_vaddr && vaddr < pwp->wp_vaddr + PAGESIZE);
4050 
4051 	if (oflags & WA_READ)
4052 		pwp->wp_read--;
4053 	if (oflags & WA_WRITE)
4054 		pwp->wp_write--;
4055 	if (oflags & WA_EXEC)
4056 		pwp->wp_exec--;
4057 
4058 	ASSERT(pwp->wp_read >= 0);
4059 	ASSERT(pwp->wp_write >= 0);
4060 	ASSERT(pwp->wp_exec >= 0);
4061 
4062 	if (flags & WA_READ)
4063 		pwp->wp_read++;
4064 	if (flags & WA_WRITE)
4065 		pwp->wp_write++;
4066 	if (flags & WA_EXEC)
4067 		pwp->wp_exec++;
4068 
4069 	if (!(p->p_flag & SVFWAIT)) {
4070 		vaddr = pwp->wp_vaddr;
4071 		if (pwp->wp_oprot == 0 &&
4072 		    (seg = as_segat(as, vaddr)) != NULL) {
4073 			SEGOP_GETPROT(seg, vaddr, 0, &prot);
4074 			pwp->wp_oprot = (uchar_t)prot;
4075 			pwp->wp_prot = (uchar_t)prot;
4076 		}
4077 		if (pwp->wp_oprot != 0) {
4078 			prot = pwp->wp_oprot;
4079 			if (pwp->wp_read)
4080 				prot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC);
4081 			if (pwp->wp_write)
4082 				prot &= ~PROT_WRITE;
4083 			if (pwp->wp_exec)
4084 				prot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC);
4085 			if (!(pwp->wp_flags & WP_NOWATCH) &&
4086 			    pwp->wp_prot != prot &&
4087 			    (pwp->wp_flags & WP_SETPROT) == 0) {
4088 				pwp->wp_flags |= WP_SETPROT;
4089 				pwp->wp_list = p->p_wprot;
4090 				p->p_wprot = pwp;
4091 			}
4092 			pwp->wp_prot = (uchar_t)prot;
4093 		}
4094 	}
4095 
4096 	/*
4097 	 * If the watched area extends into the next page then do
4098 	 * it over again with the virtual address of the next page.
4099 	 */
4100 	if ((vaddr = pwp->wp_vaddr + PAGESIZE) < eaddr)
4101 		goto again;
4102 
4103 	AS_LOCK_EXIT(as);
4104 
4105 	/*
4106 	 * Free any pages we may have over-allocated
4107 	 */
4108 	while (newpwp != NULL) {
4109 		pwp = newpwp->wp_list;
4110 		kmem_free(newpwp, sizeof (struct watched_page));
4111 		newpwp = pwp;
4112 	}
4113 
4114 	return (0);
4115 }
4116 
4117 /*
4118  * Remove a watched area from the list of watched pages.
4119  * A watched area may extend over more than one page.
4120  */
4121 static void
4122 clear_watched_page(proc_t *p, caddr_t vaddr, caddr_t eaddr, ulong_t flags)
4123 {
4124 	struct as *as = p->p_as;
4125 	struct watched_page *pwp;
4126 	struct watched_page tpw;
4127 	avl_tree_t *tree;
4128 	avl_index_t where;
4129 
4130 	AS_LOCK_ENTER(as, RW_WRITER);
4131 
4132 	if (p->p_flag & SVFWAIT)
4133 		tree = &p->p_wpage;
4134 	else
4135 		tree = &as->a_wpage;
4136 
4137 	tpw.wp_vaddr = vaddr =
4138 	    (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK);
4139 	pwp = avl_find(tree, &tpw, &where);
4140 	if (pwp == NULL)
4141 		pwp = avl_nearest(tree, where, AVL_AFTER);
4142 
4143 	while (pwp != NULL && pwp->wp_vaddr < eaddr) {
4144 		ASSERT(vaddr <=  pwp->wp_vaddr);
4145 
4146 		if (flags & WA_READ)
4147 			pwp->wp_read--;
4148 		if (flags & WA_WRITE)
4149 			pwp->wp_write--;
4150 		if (flags & WA_EXEC)
4151 			pwp->wp_exec--;
4152 
4153 		if (pwp->wp_read + pwp->wp_write + pwp->wp_exec != 0) {
4154 			/*
4155 			 * Reset the hat layer's protections on this page.
4156 			 */
4157 			if (pwp->wp_oprot != 0) {
4158 				uint_t prot = pwp->wp_oprot;
4159 
4160 				if (pwp->wp_read)
4161 					prot &=
4162 					    ~(PROT_READ|PROT_WRITE|PROT_EXEC);
4163 				if (pwp->wp_write)
4164 					prot &= ~PROT_WRITE;
4165 				if (pwp->wp_exec)
4166 					prot &=
4167 					    ~(PROT_READ|PROT_WRITE|PROT_EXEC);
4168 				if (!(pwp->wp_flags & WP_NOWATCH) &&
4169 				    pwp->wp_prot != prot &&
4170 				    (pwp->wp_flags & WP_SETPROT) == 0) {
4171 					pwp->wp_flags |= WP_SETPROT;
4172 					pwp->wp_list = p->p_wprot;
4173 					p->p_wprot = pwp;
4174 				}
4175 				pwp->wp_prot = (uchar_t)prot;
4176 			}
4177 		} else {
4178 			/*
4179 			 * No watched areas remain in this page.
4180 			 * Reset everything to normal.
4181 			 */
4182 			if (pwp->wp_oprot != 0) {
4183 				pwp->wp_prot = pwp->wp_oprot;
4184 				if ((pwp->wp_flags & WP_SETPROT) == 0) {
4185 					pwp->wp_flags |= WP_SETPROT;
4186 					pwp->wp_list = p->p_wprot;
4187 					p->p_wprot = pwp;
4188 				}
4189 			}
4190 		}
4191 
4192 		pwp = AVL_NEXT(tree, pwp);
4193 	}
4194 
4195 	AS_LOCK_EXIT(as);
4196 }
4197 
4198 /*
4199  * Return the original protections for the specified page.
4200  */
4201 static void
4202 getwatchprot(struct as *as, caddr_t addr, uint_t *prot)
4203 {
4204 	struct watched_page *pwp;
4205 	struct watched_page tpw;
4206 
4207 	ASSERT(AS_LOCK_HELD(as));
4208 
4209 	tpw.wp_vaddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
4210 	if ((pwp = avl_find(&as->a_wpage, &tpw, NULL)) != NULL)
4211 		*prot = pwp->wp_oprot;
4212 }
4213 
4214 static prpagev_t *
4215 pr_pagev_create(struct seg *seg, int check_noreserve)
4216 {
4217 	prpagev_t *pagev = kmem_alloc(sizeof (prpagev_t), KM_SLEEP);
4218 	size_t total_pages = seg_pages(seg);
4219 
4220 	/*
4221 	 * Limit the size of our vectors to pagev_lim pages at a time.  We need
4222 	 * 4 or 5 bytes of storage per page, so this means we limit ourself
4223 	 * to about a megabyte of kernel heap by default.
4224 	 */
4225 	pagev->pg_npages = MIN(total_pages, pagev_lim);
4226 	pagev->pg_pnbase = 0;
4227 
4228 	pagev->pg_protv =
4229 	    kmem_alloc(pagev->pg_npages * sizeof (uint_t), KM_SLEEP);
4230 
4231 	if (check_noreserve)
4232 		pagev->pg_incore =
4233 		    kmem_alloc(pagev->pg_npages * sizeof (char), KM_SLEEP);
4234 	else
4235 		pagev->pg_incore = NULL;
4236 
4237 	return (pagev);
4238 }
4239 
4240 static void
4241 pr_pagev_destroy(prpagev_t *pagev)
4242 {
4243 	if (pagev->pg_incore != NULL)
4244 		kmem_free(pagev->pg_incore, pagev->pg_npages * sizeof (char));
4245 
4246 	kmem_free(pagev->pg_protv, pagev->pg_npages * sizeof (uint_t));
4247 	kmem_free(pagev, sizeof (prpagev_t));
4248 }
4249 
4250 static caddr_t
4251 pr_pagev_fill(prpagev_t *pagev, struct seg *seg, caddr_t addr, caddr_t eaddr)
4252 {
4253 	ulong_t lastpg = seg_page(seg, eaddr - 1);
4254 	ulong_t pn, pnlim;
4255 	caddr_t saddr;
4256 	size_t len;
4257 
4258 	ASSERT(addr >= seg->s_base && addr <= eaddr);
4259 
4260 	if (addr == eaddr)
4261 		return (eaddr);
4262 
4263 refill:
4264 	ASSERT(addr < eaddr);
4265 	pagev->pg_pnbase = seg_page(seg, addr);
4266 	pnlim = pagev->pg_pnbase + pagev->pg_npages;
4267 	saddr = addr;
4268 
4269 	if (lastpg < pnlim)
4270 		len = (size_t)(eaddr - addr);
4271 	else
4272 		len = pagev->pg_npages * PAGESIZE;
4273 
4274 	if (pagev->pg_incore != NULL) {
4275 		/*
4276 		 * INCORE cleverly has different semantics than GETPROT:
4277 		 * it returns info on pages up to but NOT including addr + len.
4278 		 */
4279 		SEGOP_INCORE(seg, addr, len, pagev->pg_incore);
4280 		pn = pagev->pg_pnbase;
4281 
4282 		do {
4283 			/*
4284 			 * Guilty knowledge here:  We know that segvn_incore
4285 			 * returns more than just the low-order bit that
4286 			 * indicates the page is actually in memory.  If any
4287 			 * bits are set, then the page has backing store.
4288 			 */
4289 			if (pagev->pg_incore[pn++ - pagev->pg_pnbase])
4290 				goto out;
4291 
4292 		} while ((addr += PAGESIZE) < eaddr && pn < pnlim);
4293 
4294 		/*
4295 		 * If we examined all the pages in the vector but we're not
4296 		 * at the end of the segment, take another lap.
4297 		 */
4298 		if (addr < eaddr)
4299 			goto refill;
4300 	}
4301 
4302 	/*
4303 	 * Need to take len - 1 because addr + len is the address of the
4304 	 * first byte of the page just past the end of what we want.
4305 	 */
4306 out:
4307 	SEGOP_GETPROT(seg, saddr, len - 1, pagev->pg_protv);
4308 	return (addr);
4309 }
4310 
4311 static caddr_t
4312 pr_pagev_nextprot(prpagev_t *pagev, struct seg *seg,
4313     caddr_t *saddrp, caddr_t eaddr, uint_t *protp)
4314 {
4315 	/*
4316 	 * Our starting address is either the specified address, or the base
4317 	 * address from the start of the pagev.  If the latter is greater,
4318 	 * this means a previous call to pr_pagev_fill has already scanned
4319 	 * further than the end of the previous mapping.
4320 	 */
4321 	caddr_t base = seg->s_base + pagev->pg_pnbase * PAGESIZE;
4322 	caddr_t addr = MAX(*saddrp, base);
4323 	ulong_t pn = seg_page(seg, addr);
4324 	uint_t prot, nprot;
4325 
4326 	/*
4327 	 * If we're dealing with noreserve pages, then advance addr to
4328 	 * the address of the next page which has backing store.
4329 	 */
4330 	if (pagev->pg_incore != NULL) {
4331 		while (pagev->pg_incore[pn - pagev->pg_pnbase] == 0) {
4332 			if ((addr += PAGESIZE) == eaddr) {
4333 				*saddrp = addr;
4334 				prot = 0;
4335 				goto out;
4336 			}
4337 			if (++pn == pagev->pg_pnbase + pagev->pg_npages) {
4338 				addr = pr_pagev_fill(pagev, seg, addr, eaddr);
4339 				if (addr == eaddr) {
4340 					*saddrp = addr;
4341 					prot = 0;
4342 					goto out;
4343 				}
4344 				pn = seg_page(seg, addr);
4345 			}
4346 		}
4347 	}
4348 
4349 	/*
4350 	 * Get the protections on the page corresponding to addr.
4351 	 */
4352 	pn = seg_page(seg, addr);
4353 	ASSERT(pn >= pagev->pg_pnbase);
4354 	ASSERT(pn < (pagev->pg_pnbase + pagev->pg_npages));
4355 
4356 	prot = pagev->pg_protv[pn - pagev->pg_pnbase];
4357 	getwatchprot(seg->s_as, addr, &prot);
4358 	*saddrp = addr;
4359 
4360 	/*
4361 	 * Now loop until we find a backed page with different protections
4362 	 * or we reach the end of this segment.
4363 	 */
4364 	while ((addr += PAGESIZE) < eaddr) {
4365 		/*
4366 		 * If pn has advanced to the page number following what we
4367 		 * have information on, refill the page vector and reset
4368 		 * addr and pn.  If pr_pagev_fill does not return the
4369 		 * address of the next page, we have a discontiguity and
4370 		 * thus have reached the end of the current mapping.
4371 		 */
4372 		if (++pn == pagev->pg_pnbase + pagev->pg_npages) {
4373 			caddr_t naddr = pr_pagev_fill(pagev, seg, addr, eaddr);
4374 			if (naddr != addr)
4375 				goto out;
4376 			pn = seg_page(seg, addr);
4377 		}
4378 
4379 		/*
4380 		 * The previous page's protections are in prot, and it has
4381 		 * backing.  If this page is MAP_NORESERVE and has no backing,
4382 		 * then end this mapping and return the previous protections.
4383 		 */
4384 		if (pagev->pg_incore != NULL &&
4385 		    pagev->pg_incore[pn - pagev->pg_pnbase] == 0)
4386 			break;
4387 
4388 		/*
4389 		 * Otherwise end the mapping if this page's protections (nprot)
4390 		 * are different than those in the previous page (prot).
4391 		 */
4392 		nprot = pagev->pg_protv[pn - pagev->pg_pnbase];
4393 		getwatchprot(seg->s_as, addr, &nprot);
4394 
4395 		if (nprot != prot)
4396 			break;
4397 	}
4398 
4399 out:
4400 	*protp = prot;
4401 	return (addr);
4402 }
4403 
4404 size_t
4405 pr_getsegsize(struct seg *seg, int reserved)
4406 {
4407 	size_t size = seg->s_size;
4408 
4409 	/*
4410 	 * If we're interested in the reserved space, return the size of the
4411 	 * segment itself.  Everything else in this function is a special case
4412 	 * to determine the actual underlying size of various segment types.
4413 	 */
4414 	if (reserved)
4415 		return (size);
4416 
4417 	/*
4418 	 * If this is a segvn mapping of a regular file, return the smaller
4419 	 * of the segment size and the remaining size of the file beyond
4420 	 * the file offset corresponding to seg->s_base.
4421 	 */
4422 	if (seg->s_ops == &segvn_ops) {
4423 		vattr_t vattr;
4424 		vnode_t *vp;
4425 
4426 		vattr.va_mask = AT_SIZE;
4427 
4428 		if (SEGOP_GETVP(seg, seg->s_base, &vp) == 0 &&
4429 		    vp != NULL && vp->v_type == VREG &&
4430 		    VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
4431 
4432 			u_offset_t fsize = vattr.va_size;
4433 			u_offset_t offset = SEGOP_GETOFFSET(seg, seg->s_base);
4434 
4435 			if (fsize < offset)
4436 				fsize = 0;
4437 			else
4438 				fsize -= offset;
4439 
4440 			fsize = roundup(fsize, (u_offset_t)PAGESIZE);
4441 
4442 			if (fsize < (u_offset_t)size)
4443 				size = (size_t)fsize;
4444 		}
4445 
4446 		return (size);
4447 	}
4448 
4449 	/*
4450 	 * If this is an ISM shared segment, don't include pages that are
4451 	 * beyond the real size of the spt segment that backs it.
4452 	 */
4453 	if (seg->s_ops == &segspt_shmops)
4454 		return (MIN(spt_realsize(seg), size));
4455 
4456 	/*
4457 	 * If this is segment is a mapping from /dev/null, then this is a
4458 	 * reservation of virtual address space and has no actual size.
4459 	 * Such segments are backed by segdev and have type set to neither
4460 	 * MAP_SHARED nor MAP_PRIVATE.
4461 	 */
4462 	if (seg->s_ops == &segdev_ops &&
4463 	    ((SEGOP_GETTYPE(seg, seg->s_base) &
4464 	    (MAP_SHARED | MAP_PRIVATE)) == 0))
4465 		return (0);
4466 
4467 	/*
4468 	 * If this segment doesn't match one of the special types we handle,
4469 	 * just return the size of the segment itself.
4470 	 */
4471 	return (size);
4472 }
4473 
4474 uint_t
4475 pr_getprot(struct seg *seg, int reserved, void **tmp,
4476     caddr_t *saddrp, caddr_t *naddrp, caddr_t eaddr)
4477 {
4478 	struct as *as = seg->s_as;
4479 
4480 	caddr_t saddr = *saddrp;
4481 	caddr_t naddr;
4482 
4483 	int check_noreserve;
4484 	uint_t prot;
4485 
4486 	union {
4487 		struct segvn_data *svd;
4488 		struct segdev_data *sdp;
4489 		void *data;
4490 	} s;
4491 
4492 	s.data = seg->s_data;
4493 
4494 	ASSERT(AS_WRITE_HELD(as));
4495 	ASSERT(saddr >= seg->s_base && saddr < eaddr);
4496 	ASSERT(eaddr <= seg->s_base + seg->s_size);
4497 
4498 	/*
4499 	 * Don't include MAP_NORESERVE pages in the address range
4500 	 * unless their mappings have actually materialized.
4501 	 * We cheat by knowing that segvn is the only segment
4502 	 * driver that supports MAP_NORESERVE.
4503 	 */
4504 	check_noreserve =
4505 	    (!reserved && seg->s_ops == &segvn_ops && s.svd != NULL &&
4506 	    (s.svd->vp == NULL || s.svd->vp->v_type != VREG) &&
4507 	    (s.svd->flags & MAP_NORESERVE));
4508 
4509 	/*
4510 	 * Examine every page only as a last resort.  We use guilty knowledge
4511 	 * of segvn and segdev to avoid this: if there are no per-page
4512 	 * protections present in the segment and we don't care about
4513 	 * MAP_NORESERVE, then s_data->prot is the prot for the whole segment.
4514 	 */
4515 	if (!check_noreserve && saddr == seg->s_base &&
4516 	    seg->s_ops == &segvn_ops && s.svd != NULL && s.svd->pageprot == 0) {
4517 		prot = s.svd->prot;
4518 		getwatchprot(as, saddr, &prot);
4519 		naddr = eaddr;
4520 
4521 	} else if (saddr == seg->s_base && seg->s_ops == &segdev_ops &&
4522 	    s.sdp != NULL && s.sdp->pageprot == 0) {
4523 		prot = s.sdp->prot;
4524 		getwatchprot(as, saddr, &prot);
4525 		naddr = eaddr;
4526 
4527 	} else {
4528 		prpagev_t *pagev;
4529 
4530 		/*
4531 		 * If addr is sitting at the start of the segment, then
4532 		 * create a page vector to store protection and incore
4533 		 * information for pages in the segment, and fill it.
4534 		 * Otherwise, we expect *tmp to address the prpagev_t
4535 		 * allocated by a previous call to this function.
4536 		 */
4537 		if (saddr == seg->s_base) {
4538 			pagev = pr_pagev_create(seg, check_noreserve);
4539 			saddr = pr_pagev_fill(pagev, seg, saddr, eaddr);
4540 
4541 			ASSERT(*tmp == NULL);
4542 			*tmp = pagev;
4543 
4544 			ASSERT(saddr <= eaddr);
4545 			*saddrp = saddr;
4546 
4547 			if (saddr == eaddr) {
4548 				naddr = saddr;
4549 				prot = 0;
4550 				goto out;
4551 			}
4552 
4553 		} else {
4554 			ASSERT(*tmp != NULL);
4555 			pagev = (prpagev_t *)*tmp;
4556 		}
4557 
4558 		naddr = pr_pagev_nextprot(pagev, seg, saddrp, eaddr, &prot);
4559 		ASSERT(naddr <= eaddr);
4560 	}
4561 
4562 out:
4563 	if (naddr == eaddr)
4564 		pr_getprot_done(tmp);
4565 	*naddrp = naddr;
4566 	return (prot);
4567 }
4568 
4569 void
4570 pr_getprot_done(void **tmp)
4571 {
4572 	if (*tmp != NULL) {
4573 		pr_pagev_destroy((prpagev_t *)*tmp);
4574 		*tmp = NULL;
4575 	}
4576 }
4577 
4578 /*
4579  * Return true iff the vnode is a /proc file from the object directory.
4580  */
4581 int
4582 pr_isobject(vnode_t *vp)
4583 {
4584 	return (vn_matchops(vp, prvnodeops) && VTOP(vp)->pr_type == PR_OBJECT);
4585 }
4586 
4587 /*
4588  * Return true iff the vnode is a /proc file opened by the process itself.
4589  */
4590 int
4591 pr_isself(vnode_t *vp)
4592 {
4593 	/*
4594 	 * XXX: To retain binary compatibility with the old
4595 	 * ioctl()-based version of /proc, we exempt self-opens
4596 	 * of /proc/<pid> from being marked close-on-exec.
4597 	 */
4598 	return (vn_matchops(vp, prvnodeops) &&
4599 	    (VTOP(vp)->pr_flags & PR_ISSELF) &&
4600 	    VTOP(vp)->pr_type != PR_PIDDIR);
4601 }
4602 
4603 static ssize_t
4604 pr_getpagesize(struct seg *seg, caddr_t saddr, caddr_t *naddrp, caddr_t eaddr)
4605 {
4606 	ssize_t pagesize, hatsize;
4607 
4608 	ASSERT(AS_WRITE_HELD(seg->s_as));
4609 	ASSERT(IS_P2ALIGNED(saddr, PAGESIZE));
4610 	ASSERT(IS_P2ALIGNED(eaddr, PAGESIZE));
4611 	ASSERT(saddr < eaddr);
4612 
4613 	pagesize = hatsize = hat_getpagesize(seg->s_as->a_hat, saddr);
4614 	ASSERT(pagesize == -1 || IS_P2ALIGNED(pagesize, pagesize));
4615 	ASSERT(pagesize != 0);
4616 
4617 	if (pagesize == -1)
4618 		pagesize = PAGESIZE;
4619 
4620 	saddr += P2NPHASE((uintptr_t)saddr, pagesize);
4621 
4622 	while (saddr < eaddr) {
4623 		if (hatsize != hat_getpagesize(seg->s_as->a_hat, saddr))
4624 			break;
4625 		ASSERT(IS_P2ALIGNED(saddr, pagesize));
4626 		saddr += pagesize;
4627 	}
4628 
4629 	*naddrp = ((saddr < eaddr) ? saddr : eaddr);
4630 	return (hatsize);
4631 }
4632 
4633 /*
4634  * Return an array of structures with extended memory map information.
4635  * We allocate here; the caller must deallocate.
4636  */
4637 int
4638 prgetxmap(proc_t *p, list_t *iolhead)
4639 {
4640 	struct as *as = p->p_as;
4641 	prxmap_t *mp;
4642 	struct seg *seg;
4643 	struct seg *brkseg, *stkseg;
4644 	struct vnode *vp;
4645 	struct vattr vattr;
4646 	uint_t prot;
4647 
4648 	ASSERT(as != &kas && AS_WRITE_HELD(as));
4649 
4650 	/*
4651 	 * Request an initial buffer size that doesn't waste memory
4652 	 * if the address space has only a small number of segments.
4653 	 */
4654 	pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree));
4655 
4656 	if ((seg = AS_SEGFIRST(as)) == NULL)
4657 		return (0);
4658 
4659 	brkseg = break_seg(p);
4660 	stkseg = as_segat(as, prgetstackbase(p));
4661 
4662 	do {
4663 		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
4664 		caddr_t saddr, naddr, baddr;
4665 		void *tmp = NULL;
4666 		ssize_t psz;
4667 		char *parr;
4668 		uint64_t npages;
4669 		uint64_t pagenum;
4670 
4671 		if ((seg->s_flags & S_HOLE) != 0) {
4672 			continue;
4673 		}
4674 		/*
4675 		 * Segment loop part one: iterate from the base of the segment
4676 		 * to its end, pausing at each address boundary (baddr) between
4677 		 * ranges that have different virtual memory protections.
4678 		 */
4679 		for (saddr = seg->s_base; saddr < eaddr; saddr = baddr) {
4680 			prot = pr_getprot(seg, 0, &tmp, &saddr, &baddr, eaddr);
4681 			ASSERT(baddr >= saddr && baddr <= eaddr);
4682 
4683 			/*
4684 			 * Segment loop part two: iterate from the current
4685 			 * position to the end of the protection boundary,
4686 			 * pausing at each address boundary (naddr) between
4687 			 * ranges that have different underlying page sizes.
4688 			 */
4689 			for (; saddr < baddr; saddr = naddr) {
4690 				psz = pr_getpagesize(seg, saddr, &naddr, baddr);
4691 				ASSERT(naddr >= saddr && naddr <= baddr);
4692 
4693 				mp = pr_iol_newbuf(iolhead, sizeof (*mp));
4694 
4695 				mp->pr_vaddr = (uintptr_t)saddr;
4696 				mp->pr_size = naddr - saddr;
4697 				mp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
4698 				mp->pr_mflags = 0;
4699 				if (prot & PROT_READ)
4700 					mp->pr_mflags |= MA_READ;
4701 				if (prot & PROT_WRITE)
4702 					mp->pr_mflags |= MA_WRITE;
4703 				if (prot & PROT_EXEC)
4704 					mp->pr_mflags |= MA_EXEC;
4705 				if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
4706 					mp->pr_mflags |= MA_SHARED;
4707 				if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
4708 					mp->pr_mflags |= MA_NORESERVE;
4709 				if (seg->s_ops == &segspt_shmops ||
4710 				    (seg->s_ops == &segvn_ops &&
4711 				    (SEGOP_GETVP(seg, saddr, &vp) != 0 ||
4712 				    vp == NULL)))
4713 					mp->pr_mflags |= MA_ANON;
4714 				if (seg == brkseg)
4715 					mp->pr_mflags |= MA_BREAK;
4716 				else if (seg == stkseg)
4717 					mp->pr_mflags |= MA_STACK;
4718 				if (seg->s_ops == &segspt_shmops)
4719 					mp->pr_mflags |= MA_ISM | MA_SHM;
4720 
4721 				mp->pr_pagesize = PAGESIZE;
4722 				if (psz == -1) {
4723 					mp->pr_hatpagesize = 0;
4724 				} else {
4725 					mp->pr_hatpagesize = psz;
4726 				}
4727 
4728 				/*
4729 				 * Manufacture a filename for the "object" dir.
4730 				 */
4731 				mp->pr_dev = PRNODEV;
4732 				vattr.va_mask = AT_FSID|AT_NODEID;
4733 				if (seg->s_ops == &segvn_ops &&
4734 				    SEGOP_GETVP(seg, saddr, &vp) == 0 &&
4735 				    vp != NULL && vp->v_type == VREG &&
4736 				    VOP_GETATTR(vp, &vattr, 0, CRED(),
4737 				    NULL) == 0) {
4738 					mp->pr_dev = vattr.va_fsid;
4739 					mp->pr_ino = vattr.va_nodeid;
4740 					if (vp == p->p_exec)
4741 						(void) strcpy(mp->pr_mapname,
4742 						    "a.out");
4743 					else
4744 						pr_object_name(mp->pr_mapname,
4745 						    vp, &vattr);
4746 				}
4747 
4748 				/*
4749 				 * Get the SysV shared memory id, if any.
4750 				 */
4751 				if ((mp->pr_mflags & MA_SHARED) &&
4752 				    p->p_segacct && (mp->pr_shmid = shmgetid(p,
4753 				    seg->s_base)) != SHMID_NONE) {
4754 					if (mp->pr_shmid == SHMID_FREE)
4755 						mp->pr_shmid = -1;
4756 
4757 					mp->pr_mflags |= MA_SHM;
4758 				} else {
4759 					mp->pr_shmid = -1;
4760 				}
4761 
4762 				npages = ((uintptr_t)(naddr - saddr)) >>
4763 				    PAGESHIFT;
4764 				parr = kmem_zalloc(npages, KM_SLEEP);
4765 
4766 				SEGOP_INCORE(seg, saddr, naddr - saddr, parr);
4767 
4768 				for (pagenum = 0; pagenum < npages; pagenum++) {
4769 					if (parr[pagenum] & SEG_PAGE_INCORE)
4770 						mp->pr_rss++;
4771 					if (parr[pagenum] & SEG_PAGE_ANON)
4772 						mp->pr_anon++;
4773 					if (parr[pagenum] & SEG_PAGE_LOCKED)
4774 						mp->pr_locked++;
4775 				}
4776 				kmem_free(parr, npages);
4777 			}
4778 		}
4779 		ASSERT(tmp == NULL);
4780 	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);
4781 
4782 	return (0);
4783 }
4784 
4785 /*
4786  * Return the process's credentials.  We don't need a 32-bit equivalent of
4787  * this function because prcred_t and prcred32_t are actually the same.
4788  */
4789 void
4790 prgetcred(proc_t *p, prcred_t *pcrp)
4791 {
4792 	mutex_enter(&p->p_crlock);
4793 	cred2prcred(p->p_cred, pcrp);
4794 	mutex_exit(&p->p_crlock);
4795 }
4796 
4797 void
4798 prgetsecflags(proc_t *p, prsecflags_t *psfp)
4799 {
4800 	ASSERT(psfp != NULL);
4801 
4802 	psfp->pr_version = PRSECFLAGS_VERSION_CURRENT;
4803 	psfp->pr_lower = p->p_secflags.psf_lower;
4804 	psfp->pr_upper = p->p_secflags.psf_upper;
4805 	psfp->pr_effective = p->p_secflags.psf_effective;
4806 	psfp->pr_inherit = p->p_secflags.psf_inherit;
4807 }
4808 
4809 /*
4810  * Compute actual size of the prpriv_t structure.
4811  */
4812 
4813 size_t
4814 prgetprivsize(void)
4815 {
4816 	return (priv_prgetprivsize(NULL));
4817 }
4818 
4819 /*
4820  * Return the process's privileges.  We don't need a 32-bit equivalent of
4821  * this function because prpriv_t and prpriv32_t are actually the same.
4822  */
4823 void
4824 prgetpriv(proc_t *p, prpriv_t *pprp)
4825 {
4826 	mutex_enter(&p->p_crlock);
4827 	cred2prpriv(p->p_cred, pprp);
4828 	mutex_exit(&p->p_crlock);
4829 }
4830 
4831 #ifdef _SYSCALL32_IMPL
4832 /*
4833  * Return an array of structures with HAT memory map information.
4834  * We allocate here; the caller must deallocate.
4835  */
4836 int
4837 prgetxmap32(proc_t *p, list_t *iolhead)
4838 {
4839 	struct as *as = p->p_as;
4840 	prxmap32_t *mp;
4841 	struct seg *seg;
4842 	struct seg *brkseg, *stkseg;
4843 	struct vnode *vp;
4844 	struct vattr vattr;
4845 	uint_t prot;
4846 
4847 	ASSERT(as != &kas && AS_WRITE_HELD(as));
4848 
4849 	/*
4850 	 * Request an initial buffer size that doesn't waste memory
4851 	 * if the address space has only a small number of segments.
4852 	 */
4853 	pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree));
4854 
4855 	if ((seg = AS_SEGFIRST(as)) == NULL)
4856 		return (0);
4857 
4858 	brkseg = break_seg(p);
4859 	stkseg = as_segat(as, prgetstackbase(p));
4860 
4861 	do {
4862 		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
4863 		caddr_t saddr, naddr, baddr;
4864 		void *tmp = NULL;
4865 		ssize_t psz;
4866 		char *parr;
4867 		uint64_t npages;
4868 		uint64_t pagenum;
4869 
4870 		if ((seg->s_flags & S_HOLE) != 0) {
4871 			continue;
4872 		}
4873 
4874 		/*
4875 		 * Segment loop part one: iterate from the base of the segment
4876 		 * to its end, pausing at each address boundary (baddr) between
4877 		 * ranges that have different virtual memory protections.
4878 		 */
4879 		for (saddr = seg->s_base; saddr < eaddr; saddr = baddr) {
4880 			prot = pr_getprot(seg, 0, &tmp, &saddr, &baddr, eaddr);
4881 			ASSERT(baddr >= saddr && baddr <= eaddr);
4882 
4883 			/*
4884 			 * Segment loop part two: iterate from the current
4885 			 * position to the end of the protection boundary,
4886 			 * pausing at each address boundary (naddr) between
4887 			 * ranges that have different underlying page sizes.
4888 			 */
4889 			for (; saddr < baddr; saddr = naddr) {
4890 				psz = pr_getpagesize(seg, saddr, &naddr, baddr);
4891 				ASSERT(naddr >= saddr && naddr <= baddr);
4892 
4893 				mp = pr_iol_newbuf(iolhead, sizeof (*mp));
4894 
4895 				mp->pr_vaddr = (caddr32_t)(uintptr_t)saddr;
4896 				mp->pr_size = (size32_t)(naddr - saddr);
4897 				mp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
4898 				mp->pr_mflags = 0;
4899 				if (prot & PROT_READ)
4900 					mp->pr_mflags |= MA_READ;
4901 				if (prot & PROT_WRITE)
4902 					mp->pr_mflags |= MA_WRITE;
4903 				if (prot & PROT_EXEC)
4904 					mp->pr_mflags |= MA_EXEC;
4905 				if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
4906 					mp->pr_mflags |= MA_SHARED;
4907 				if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
4908 					mp->pr_mflags |= MA_NORESERVE;
4909 				if (seg->s_ops == &segspt_shmops ||
4910 				    (seg->s_ops == &segvn_ops &&
4911 				    (SEGOP_GETVP(seg, saddr, &vp) != 0 ||
4912 				    vp == NULL)))
4913 					mp->pr_mflags |= MA_ANON;
4914 				if (seg == brkseg)
4915 					mp->pr_mflags |= MA_BREAK;
4916 				else if (seg == stkseg)
4917 					mp->pr_mflags |= MA_STACK;
4918 				if (seg->s_ops == &segspt_shmops)
4919 					mp->pr_mflags |= MA_ISM | MA_SHM;
4920 
4921 				mp->pr_pagesize = PAGESIZE;
4922 				if (psz == -1) {
4923 					mp->pr_hatpagesize = 0;
4924 				} else {
4925 					mp->pr_hatpagesize = psz;
4926 				}
4927 
4928 				/*
4929 				 * Manufacture a filename for the "object" dir.
4930 				 */
4931 				mp->pr_dev = PRNODEV32;
4932 				vattr.va_mask = AT_FSID|AT_NODEID;
4933 				if (seg->s_ops == &segvn_ops &&
4934 				    SEGOP_GETVP(seg, saddr, &vp) == 0 &&
4935 				    vp != NULL && vp->v_type == VREG &&
4936 				    VOP_GETATTR(vp, &vattr, 0, CRED(),
4937 				    NULL) == 0) {
4938 					(void) cmpldev(&mp->pr_dev,
4939 					    vattr.va_fsid);
4940 					mp->pr_ino = vattr.va_nodeid;
4941 					if (vp == p->p_exec)
4942 						(void) strcpy(mp->pr_mapname,
4943 						    "a.out");
4944 					else
4945 						pr_object_name(mp->pr_mapname,
4946 						    vp, &vattr);
4947 				}
4948 
4949 				/*
4950 				 * Get the SysV shared memory id, if any.
4951 				 */
4952 				if ((mp->pr_mflags & MA_SHARED) &&
4953 				    p->p_segacct && (mp->pr_shmid = shmgetid(p,
4954 				    seg->s_base)) != SHMID_NONE) {
4955 					if (mp->pr_shmid == SHMID_FREE)
4956 						mp->pr_shmid = -1;
4957 
4958 					mp->pr_mflags |= MA_SHM;
4959 				} else {
4960 					mp->pr_shmid = -1;
4961 				}
4962 
4963 				npages = ((uintptr_t)(naddr - saddr)) >>
4964 				    PAGESHIFT;
4965 				parr = kmem_zalloc(npages, KM_SLEEP);
4966 
4967 				SEGOP_INCORE(seg, saddr, naddr - saddr, parr);
4968 
4969 				for (pagenum = 0; pagenum < npages; pagenum++) {
4970 					if (parr[pagenum] & SEG_PAGE_INCORE)
4971 						mp->pr_rss++;
4972 					if (parr[pagenum] & SEG_PAGE_ANON)
4973 						mp->pr_anon++;
4974 					if (parr[pagenum] & SEG_PAGE_LOCKED)
4975 						mp->pr_locked++;
4976 				}
4977 				kmem_free(parr, npages);
4978 			}
4979 		}
4980 		ASSERT(tmp == NULL);
4981 	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);
4982 
4983 	return (0);
4984 }
4985 #endif	/* _SYSCALL32_IMPL */
4986