xref: /illumos-gate/usr/src/uts/common/fs/proc/prsubr.c (revision 72a6dc127431d372b6b6136087c736300544f8b7)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Copyright (c) 2013, Joyent, Inc. All rights reserved.
25  * Copyright 2020 OmniOS Community Edition (OmniOSce) Association.
26  */
27 
28 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
29 /*	  All Rights Reserved	*/
30 
31 #include <sys/types.h>
32 #include <sys/t_lock.h>
33 #include <sys/param.h>
34 #include <sys/cmn_err.h>
35 #include <sys/cred.h>
36 #include <sys/priv.h>
37 #include <sys/debug.h>
38 #include <sys/errno.h>
39 #include <sys/inline.h>
40 #include <sys/kmem.h>
41 #include <sys/mman.h>
42 #include <sys/proc.h>
43 #include <sys/brand.h>
44 #include <sys/sobject.h>
45 #include <sys/sysmacros.h>
46 #include <sys/systm.h>
47 #include <sys/uio.h>
48 #include <sys/var.h>
49 #include <sys/vfs.h>
50 #include <sys/vnode.h>
51 #include <sys/session.h>
52 #include <sys/pcb.h>
53 #include <sys/signal.h>
54 #include <sys/user.h>
55 #include <sys/disp.h>
56 #include <sys/class.h>
57 #include <sys/ts.h>
58 #include <sys/bitmap.h>
59 #include <sys/poll.h>
60 #include <sys/shm_impl.h>
61 #include <sys/fault.h>
62 #include <sys/syscall.h>
63 #include <sys/procfs.h>
64 #include <sys/processor.h>
65 #include <sys/cpuvar.h>
66 #include <sys/copyops.h>
67 #include <sys/time.h>
68 #include <sys/msacct.h>
69 #include <sys/flock_impl.h>
70 #include <sys/stropts.h>
71 #include <sys/strsubr.h>
72 #include <sys/pathname.h>
73 #include <sys/mode.h>
74 #include <sys/socketvar.h>
75 #include <sys/autoconf.h>
76 #include <sys/dtrace.h>
77 #include <sys/timod.h>
78 #include <sys/fs/namenode.h>
79 #include <netinet/udp.h>
80 #include <netinet/tcp.h>
81 #include <inet/cc.h>
82 #include <vm/as.h>
83 #include <vm/rm.h>
84 #include <vm/seg.h>
85 #include <vm/seg_vn.h>
86 #include <vm/seg_dev.h>
87 #include <vm/seg_spt.h>
88 #include <vm/page.h>
89 #include <sys/vmparam.h>
90 #include <sys/swap.h>
91 #include <fs/proc/prdata.h>
92 #include <sys/task.h>
93 #include <sys/project.h>
94 #include <sys/contract_impl.h>
95 #include <sys/contract/process.h>
96 #include <sys/contract/process_impl.h>
97 #include <sys/schedctl.h>
98 #include <sys/pool.h>
99 #include <sys/zone.h>
100 #include <sys/atomic.h>
101 #include <sys/sdt.h>
102 
103 #define	MAX_ITERS_SPIN	5
104 
105 typedef struct prpagev {
106 	uint_t *pg_protv;	/* vector of page permissions */
107 	char *pg_incore;	/* vector of incore flags */
108 	size_t pg_npages;	/* number of pages in protv and incore */
109 	ulong_t pg_pnbase;	/* pn within segment of first protv element */
110 } prpagev_t;
111 
112 size_t pagev_lim = 256 * 1024;	/* limit on number of pages in prpagev_t */
113 
114 extern struct seg_ops segdev_ops;	/* needs a header file */
115 extern struct seg_ops segspt_shmops;	/* needs a header file */
116 
117 static	int	set_watched_page(proc_t *, caddr_t, caddr_t, ulong_t, ulong_t);
118 static	void	clear_watched_page(proc_t *, caddr_t, caddr_t, ulong_t);
119 
120 /*
121  * Choose an lwp from the complete set of lwps for the process.
122  * This is called for any operation applied to the process
123  * file descriptor that requires an lwp to operate upon.
124  *
125  * Returns a pointer to the thread for the selected LWP,
126  * and with the dispatcher lock held for the thread.
127  *
128  * The algorithm for choosing an lwp is critical for /proc semantics;
129  * don't touch this code unless you know all of the implications.
130  */
131 kthread_t *
132 prchoose(proc_t *p)
133 {
134 	kthread_t *t;
135 	kthread_t *t_onproc = NULL;	/* running on processor */
136 	kthread_t *t_run = NULL;	/* runnable, on disp queue */
137 	kthread_t *t_sleep = NULL;	/* sleeping */
138 	kthread_t *t_hold = NULL;	/* sleeping, performing hold */
139 	kthread_t *t_susp = NULL;	/* suspended stop */
140 	kthread_t *t_jstop = NULL;	/* jobcontrol stop, w/o directed stop */
141 	kthread_t *t_jdstop = NULL;	/* jobcontrol stop with directed stop */
142 	kthread_t *t_req = NULL;	/* requested stop */
143 	kthread_t *t_istop = NULL;	/* event-of-interest stop */
144 	kthread_t *t_dtrace = NULL;	/* DTrace stop */
145 
146 	ASSERT(MUTEX_HELD(&p->p_lock));
147 
148 	/*
149 	 * If the agent lwp exists, it takes precedence over all others.
150 	 */
151 	if ((t = p->p_agenttp) != NULL) {
152 		thread_lock(t);
153 		return (t);
154 	}
155 
156 	if ((t = p->p_tlist) == NULL)	/* start at the head of the list */
157 		return (t);
158 	do {		/* for eacn lwp in the process */
159 		if (VSTOPPED(t)) {	/* virtually stopped */
160 			if (t_req == NULL)
161 				t_req = t;
162 			continue;
163 		}
164 
165 		thread_lock(t);		/* make sure thread is in good state */
166 		switch (t->t_state) {
167 		default:
168 			panic("prchoose: bad thread state %d, thread 0x%p",
169 			    t->t_state, (void *)t);
170 			/*NOTREACHED*/
171 		case TS_SLEEP:
172 			/* this is filthy */
173 			if (t->t_wchan == (caddr_t)&p->p_holdlwps &&
174 			    t->t_wchan0 == NULL) {
175 				if (t_hold == NULL)
176 					t_hold = t;
177 			} else {
178 				if (t_sleep == NULL)
179 					t_sleep = t;
180 			}
181 			break;
182 		case TS_RUN:
183 		case TS_WAIT:
184 			if (t_run == NULL)
185 				t_run = t;
186 			break;
187 		case TS_ONPROC:
188 			if (t_onproc == NULL)
189 				t_onproc = t;
190 			break;
191 		case TS_ZOMB:		/* last possible choice */
192 			break;
193 		case TS_STOPPED:
194 			switch (t->t_whystop) {
195 			case PR_SUSPENDED:
196 				if (t_susp == NULL)
197 					t_susp = t;
198 				break;
199 			case PR_JOBCONTROL:
200 				if (t->t_proc_flag & TP_PRSTOP) {
201 					if (t_jdstop == NULL)
202 						t_jdstop = t;
203 				} else {
204 					if (t_jstop == NULL)
205 						t_jstop = t;
206 				}
207 				break;
208 			case PR_REQUESTED:
209 				if (t->t_dtrace_stop && t_dtrace == NULL)
210 					t_dtrace = t;
211 				else if (t_req == NULL)
212 					t_req = t;
213 				break;
214 			case PR_SYSENTRY:
215 			case PR_SYSEXIT:
216 			case PR_SIGNALLED:
217 			case PR_FAULTED:
218 				/*
219 				 * Make an lwp calling exit() be the
220 				 * last lwp seen in the process.
221 				 */
222 				if (t_istop == NULL ||
223 				    (t_istop->t_whystop == PR_SYSENTRY &&
224 				    t_istop->t_whatstop == SYS_exit))
225 					t_istop = t;
226 				break;
227 			case PR_CHECKPOINT:	/* can't happen? */
228 				break;
229 			default:
230 				panic("prchoose: bad t_whystop %d, thread 0x%p",
231 				    t->t_whystop, (void *)t);
232 				/*NOTREACHED*/
233 			}
234 			break;
235 		}
236 		thread_unlock(t);
237 	} while ((t = t->t_forw) != p->p_tlist);
238 
239 	if (t_onproc)
240 		t = t_onproc;
241 	else if (t_run)
242 		t = t_run;
243 	else if (t_sleep)
244 		t = t_sleep;
245 	else if (t_jstop)
246 		t = t_jstop;
247 	else if (t_jdstop)
248 		t = t_jdstop;
249 	else if (t_istop)
250 		t = t_istop;
251 	else if (t_dtrace)
252 		t = t_dtrace;
253 	else if (t_req)
254 		t = t_req;
255 	else if (t_hold)
256 		t = t_hold;
257 	else if (t_susp)
258 		t = t_susp;
259 	else			/* TS_ZOMB */
260 		t = p->p_tlist;
261 
262 	if (t != NULL)
263 		thread_lock(t);
264 	return (t);
265 }
266 
267 /*
268  * Wakeup anyone sleeping on the /proc vnode for the process/lwp to stop.
269  * Also call pollwakeup() if any lwps are waiting in poll() for POLLPRI
270  * on the /proc file descriptor.  Called from stop() when a traced
271  * process stops on an event of interest.  Also called from exit()
272  * and prinvalidate() to indicate POLLHUP and POLLERR respectively.
273  */
274 void
275 prnotify(struct vnode *vp)
276 {
277 	prcommon_t *pcp = VTOP(vp)->pr_common;
278 
279 	mutex_enter(&pcp->prc_mutex);
280 	cv_broadcast(&pcp->prc_wait);
281 	mutex_exit(&pcp->prc_mutex);
282 	if (pcp->prc_flags & PRC_POLL) {
283 		/*
284 		 * We call pollwakeup() with POLLHUP to ensure that
285 		 * the pollers are awakened even if they are polling
286 		 * for nothing (i.e., waiting for the process to exit).
287 		 * This enables the use of the PRC_POLL flag for optimization
288 		 * (we can turn off PRC_POLL only if we know no pollers remain).
289 		 */
290 		pcp->prc_flags &= ~PRC_POLL;
291 		pollwakeup(&pcp->prc_pollhead, POLLHUP);
292 	}
293 }
294 
295 /* called immediately below, in prfree() */
296 static void
297 prfreenotify(vnode_t *vp)
298 {
299 	prnode_t *pnp;
300 	prcommon_t *pcp;
301 
302 	while (vp != NULL) {
303 		pnp = VTOP(vp);
304 		pcp = pnp->pr_common;
305 		ASSERT(pcp->prc_thread == NULL);
306 		pcp->prc_proc = NULL;
307 		/*
308 		 * We can't call prnotify() here because we are holding
309 		 * pidlock.  We assert that there is no need to.
310 		 */
311 		mutex_enter(&pcp->prc_mutex);
312 		cv_broadcast(&pcp->prc_wait);
313 		mutex_exit(&pcp->prc_mutex);
314 		ASSERT(!(pcp->prc_flags & PRC_POLL));
315 
316 		vp = pnp->pr_next;
317 		pnp->pr_next = NULL;
318 	}
319 }
320 
321 /*
322  * Called from a hook in freeproc() when a traced process is removed
323  * from the process table.  The proc-table pointers of all associated
324  * /proc vnodes are cleared to indicate that the process has gone away.
325  */
326 void
327 prfree(proc_t *p)
328 {
329 	uint_t slot = p->p_slot;
330 
331 	ASSERT(MUTEX_HELD(&pidlock));
332 
333 	/*
334 	 * Block the process against /proc so it can be freed.
335 	 * It cannot be freed while locked by some controlling process.
336 	 * Lock ordering:
337 	 *	pidlock -> pr_pidlock -> p->p_lock -> pcp->prc_mutex
338 	 */
339 	mutex_enter(&pr_pidlock);	/* protects pcp->prc_proc */
340 	mutex_enter(&p->p_lock);
341 	while (p->p_proc_flag & P_PR_LOCK) {
342 		mutex_exit(&pr_pidlock);
343 		cv_wait(&pr_pid_cv[slot], &p->p_lock);
344 		mutex_exit(&p->p_lock);
345 		mutex_enter(&pr_pidlock);
346 		mutex_enter(&p->p_lock);
347 	}
348 
349 	ASSERT(p->p_tlist == NULL);
350 
351 	prfreenotify(p->p_plist);
352 	p->p_plist = NULL;
353 
354 	prfreenotify(p->p_trace);
355 	p->p_trace = NULL;
356 
357 	/*
358 	 * We broadcast to wake up everyone waiting for this process.
359 	 * No one can reach this process from this point on.
360 	 */
361 	cv_broadcast(&pr_pid_cv[slot]);
362 
363 	mutex_exit(&p->p_lock);
364 	mutex_exit(&pr_pidlock);
365 }
366 
367 /*
368  * Called from a hook in exit() when a traced process is becoming a zombie.
369  */
370 void
371 prexit(proc_t *p)
372 {
373 	ASSERT(MUTEX_HELD(&p->p_lock));
374 
375 	if (pr_watch_active(p)) {
376 		pr_free_watchpoints(p);
377 		watch_disable(curthread);
378 	}
379 	/* pr_free_watched_pages() is called in exit(), after dropping p_lock */
380 	if (p->p_trace) {
381 		VTOP(p->p_trace)->pr_common->prc_flags |= PRC_DESTROY;
382 		prnotify(p->p_trace);
383 	}
384 	cv_broadcast(&pr_pid_cv[p->p_slot]);	/* pauselwps() */
385 }
386 
387 /*
388  * Called when a thread calls lwp_exit().
389  */
390 void
391 prlwpexit(kthread_t *t)
392 {
393 	vnode_t *vp;
394 	prnode_t *pnp;
395 	prcommon_t *pcp;
396 	proc_t *p = ttoproc(t);
397 	lwpent_t *lep = p->p_lwpdir[t->t_dslot].ld_entry;
398 
399 	ASSERT(t == curthread);
400 	ASSERT(MUTEX_HELD(&p->p_lock));
401 
402 	/*
403 	 * The process must be blocked against /proc to do this safely.
404 	 * The lwp must not disappear while the process is marked P_PR_LOCK.
405 	 * It is the caller's responsibility to have called prbarrier(p).
406 	 */
407 	ASSERT(!(p->p_proc_flag & P_PR_LOCK));
408 
409 	for (vp = p->p_plist; vp != NULL; vp = pnp->pr_next) {
410 		pnp = VTOP(vp);
411 		pcp = pnp->pr_common;
412 		if (pcp->prc_thread == t) {
413 			pcp->prc_thread = NULL;
414 			pcp->prc_flags |= PRC_DESTROY;
415 		}
416 	}
417 
418 	for (vp = lep->le_trace; vp != NULL; vp = pnp->pr_next) {
419 		pnp = VTOP(vp);
420 		pcp = pnp->pr_common;
421 		pcp->prc_thread = NULL;
422 		pcp->prc_flags |= PRC_DESTROY;
423 		prnotify(vp);
424 	}
425 
426 	if (p->p_trace)
427 		prnotify(p->p_trace);
428 }
429 
430 /*
431  * Called when a zombie thread is joined or when a
432  * detached lwp exits.  Called from lwp_hash_out().
433  */
434 void
435 prlwpfree(proc_t *p, lwpent_t *lep)
436 {
437 	vnode_t *vp;
438 	prnode_t *pnp;
439 	prcommon_t *pcp;
440 
441 	ASSERT(MUTEX_HELD(&p->p_lock));
442 
443 	/*
444 	 * The process must be blocked against /proc to do this safely.
445 	 * The lwp must not disappear while the process is marked P_PR_LOCK.
446 	 * It is the caller's responsibility to have called prbarrier(p).
447 	 */
448 	ASSERT(!(p->p_proc_flag & P_PR_LOCK));
449 
450 	vp = lep->le_trace;
451 	lep->le_trace = NULL;
452 	while (vp) {
453 		prnotify(vp);
454 		pnp = VTOP(vp);
455 		pcp = pnp->pr_common;
456 		ASSERT(pcp->prc_thread == NULL &&
457 		    (pcp->prc_flags & PRC_DESTROY));
458 		pcp->prc_tslot = -1;
459 		vp = pnp->pr_next;
460 		pnp->pr_next = NULL;
461 	}
462 
463 	if (p->p_trace)
464 		prnotify(p->p_trace);
465 }
466 
467 /*
468  * Called from a hook in exec() when a thread starts exec().
469  */
470 void
471 prexecstart(void)
472 {
473 	proc_t *p = ttoproc(curthread);
474 	klwp_t *lwp = ttolwp(curthread);
475 
476 	/*
477 	 * The P_PR_EXEC flag blocks /proc operations for
478 	 * the duration of the exec().
479 	 * We can't start exec() while the process is
480 	 * locked by /proc, so we call prbarrier().
481 	 * lwp_nostop keeps the process from being stopped
482 	 * via job control for the duration of the exec().
483 	 */
484 
485 	ASSERT(MUTEX_HELD(&p->p_lock));
486 	prbarrier(p);
487 	lwp->lwp_nostop++;
488 	p->p_proc_flag |= P_PR_EXEC;
489 }
490 
491 /*
492  * Called from a hook in exec() when a thread finishes exec().
493  * The thread may or may not have succeeded.  Some other thread
494  * may have beat it to the punch.
495  */
496 void
497 prexecend(void)
498 {
499 	proc_t *p = ttoproc(curthread);
500 	klwp_t *lwp = ttolwp(curthread);
501 	vnode_t *vp;
502 	prnode_t *pnp;
503 	prcommon_t *pcp;
504 	model_t model = p->p_model;
505 	id_t tid = curthread->t_tid;
506 	int tslot = curthread->t_dslot;
507 
508 	ASSERT(MUTEX_HELD(&p->p_lock));
509 
510 	lwp->lwp_nostop--;
511 	if (p->p_flag & SEXITLWPS) {
512 		/*
513 		 * We are on our way to exiting because some
514 		 * other thread beat us in the race to exec().
515 		 * Don't clear the P_PR_EXEC flag in this case.
516 		 */
517 		return;
518 	}
519 
520 	/*
521 	 * Wake up anyone waiting in /proc for the process to complete exec().
522 	 */
523 	p->p_proc_flag &= ~P_PR_EXEC;
524 	if ((vp = p->p_trace) != NULL) {
525 		pcp = VTOP(vp)->pr_common;
526 		mutex_enter(&pcp->prc_mutex);
527 		cv_broadcast(&pcp->prc_wait);
528 		mutex_exit(&pcp->prc_mutex);
529 		for (; vp != NULL; vp = pnp->pr_next) {
530 			pnp = VTOP(vp);
531 			pnp->pr_common->prc_datamodel = model;
532 		}
533 	}
534 	if ((vp = p->p_lwpdir[tslot].ld_entry->le_trace) != NULL) {
535 		/*
536 		 * We dealt with the process common above.
537 		 */
538 		ASSERT(p->p_trace != NULL);
539 		pcp = VTOP(vp)->pr_common;
540 		mutex_enter(&pcp->prc_mutex);
541 		cv_broadcast(&pcp->prc_wait);
542 		mutex_exit(&pcp->prc_mutex);
543 		for (; vp != NULL; vp = pnp->pr_next) {
544 			pnp = VTOP(vp);
545 			pcp = pnp->pr_common;
546 			pcp->prc_datamodel = model;
547 			pcp->prc_tid = tid;
548 			pcp->prc_tslot = tslot;
549 		}
550 	}
551 }
552 
553 /*
554  * Called from a hook in relvm() just before freeing the address space.
555  * We free all the watched areas now.
556  */
557 void
558 prrelvm(void)
559 {
560 	proc_t *p = ttoproc(curthread);
561 
562 	mutex_enter(&p->p_lock);
563 	prbarrier(p);	/* block all other /proc operations */
564 	if (pr_watch_active(p)) {
565 		pr_free_watchpoints(p);
566 		watch_disable(curthread);
567 	}
568 	mutex_exit(&p->p_lock);
569 	pr_free_watched_pages(p);
570 }
571 
572 /*
573  * Called from hooks in exec-related code when a traced process
574  * attempts to exec(2) a setuid/setgid program or an unreadable
575  * file.  Rather than fail the exec we invalidate the associated
576  * /proc vnodes so that subsequent attempts to use them will fail.
577  *
578  * All /proc vnodes, except directory vnodes, are retained on a linked
579  * list (rooted at p_plist in the process structure) until last close.
580  *
581  * A controlling process must re-open the /proc files in order to
582  * regain control.
583  */
584 void
585 prinvalidate(struct user *up)
586 {
587 	kthread_t *t = curthread;
588 	proc_t *p = ttoproc(t);
589 	vnode_t *vp;
590 	prnode_t *pnp;
591 	int writers = 0;
592 
593 	mutex_enter(&p->p_lock);
594 	prbarrier(p);	/* block all other /proc operations */
595 
596 	/*
597 	 * At this moment, there can be only one lwp in the process.
598 	 */
599 	ASSERT(p->p_lwpcnt == 1 && p->p_zombcnt == 0);
600 
601 	/*
602 	 * Invalidate any currently active /proc vnodes.
603 	 */
604 	for (vp = p->p_plist; vp != NULL; vp = pnp->pr_next) {
605 		pnp = VTOP(vp);
606 		switch (pnp->pr_type) {
607 		case PR_PSINFO:		/* these files can read by anyone */
608 		case PR_LPSINFO:
609 		case PR_LWPSINFO:
610 		case PR_LWPDIR:
611 		case PR_LWPIDDIR:
612 		case PR_USAGE:
613 		case PR_LUSAGE:
614 		case PR_LWPUSAGE:
615 			break;
616 		default:
617 			pnp->pr_flags |= PR_INVAL;
618 			break;
619 		}
620 	}
621 	/*
622 	 * Wake up anyone waiting for the process or lwp.
623 	 * p->p_trace is guaranteed to be non-NULL if there
624 	 * are any open /proc files for this process.
625 	 */
626 	if ((vp = p->p_trace) != NULL) {
627 		prcommon_t *pcp = VTOP(vp)->pr_pcommon;
628 
629 		prnotify(vp);
630 		/*
631 		 * Are there any writers?
632 		 */
633 		if ((writers = pcp->prc_writers) != 0) {
634 			/*
635 			 * Clear the exclusive open flag (old /proc interface).
636 			 * Set prc_selfopens equal to prc_writers so that
637 			 * the next O_EXCL|O_WRITE open will succeed
638 			 * even with existing (though invalid) writers.
639 			 * prclose() must decrement prc_selfopens when
640 			 * the invalid files are closed.
641 			 */
642 			pcp->prc_flags &= ~PRC_EXCL;
643 			ASSERT(pcp->prc_selfopens <= writers);
644 			pcp->prc_selfopens = writers;
645 		}
646 	}
647 	vp = p->p_lwpdir[t->t_dslot].ld_entry->le_trace;
648 	while (vp != NULL) {
649 		/*
650 		 * We should not invalidate the lwpiddir vnodes,
651 		 * but the necessities of maintaining the old
652 		 * ioctl()-based version of /proc require it.
653 		 */
654 		pnp = VTOP(vp);
655 		pnp->pr_flags |= PR_INVAL;
656 		prnotify(vp);
657 		vp = pnp->pr_next;
658 	}
659 
660 	/*
661 	 * If any tracing flags are in effect and any vnodes are open for
662 	 * writing then set the requested-stop and run-on-last-close flags.
663 	 * Otherwise, clear all tracing flags.
664 	 */
665 	t->t_proc_flag &= ~TP_PAUSE;
666 	if ((p->p_proc_flag & P_PR_TRACE) && writers) {
667 		t->t_proc_flag |= TP_PRSTOP;
668 		aston(t);		/* so ISSIG will see the flag */
669 		p->p_proc_flag |= P_PR_RUNLCL;
670 	} else {
671 		premptyset(&up->u_entrymask);		/* syscalls */
672 		premptyset(&up->u_exitmask);
673 		up->u_systrap = 0;
674 		premptyset(&p->p_sigmask);		/* signals */
675 		premptyset(&p->p_fltmask);		/* faults */
676 		t->t_proc_flag &= ~(TP_PRSTOP|TP_PRVSTOP|TP_STOPPING);
677 		p->p_proc_flag &= ~(P_PR_RUNLCL|P_PR_KILLCL|P_PR_TRACE);
678 		prnostep(ttolwp(t));
679 	}
680 
681 	mutex_exit(&p->p_lock);
682 }
683 
684 /*
685  * Acquire the controlled process's p_lock and mark it P_PR_LOCK.
686  * Return with pr_pidlock held in all cases.
687  * Return with p_lock held if the the process still exists.
688  * Return value is the process pointer if the process still exists, else NULL.
689  * If we lock the process, give ourself kernel priority to avoid deadlocks;
690  * this is undone in prunlock().
691  */
692 proc_t *
693 pr_p_lock(prnode_t *pnp)
694 {
695 	proc_t *p;
696 	prcommon_t *pcp;
697 
698 	mutex_enter(&pr_pidlock);
699 	if ((pcp = pnp->pr_pcommon) == NULL || (p = pcp->prc_proc) == NULL)
700 		return (NULL);
701 	mutex_enter(&p->p_lock);
702 	while (p->p_proc_flag & P_PR_LOCK) {
703 		/*
704 		 * This cv/mutex pair is persistent even if
705 		 * the process disappears while we sleep.
706 		 */
707 		kcondvar_t *cv = &pr_pid_cv[p->p_slot];
708 		kmutex_t *mp = &p->p_lock;
709 
710 		mutex_exit(&pr_pidlock);
711 		cv_wait(cv, mp);
712 		mutex_exit(mp);
713 		mutex_enter(&pr_pidlock);
714 		if (pcp->prc_proc == NULL)
715 			return (NULL);
716 		ASSERT(p == pcp->prc_proc);
717 		mutex_enter(&p->p_lock);
718 	}
719 	p->p_proc_flag |= P_PR_LOCK;
720 	return (p);
721 }
722 
723 /*
724  * Lock the target process by setting P_PR_LOCK and grabbing p->p_lock.
725  * This prevents any lwp of the process from disappearing and
726  * blocks most operations that a process can perform on itself.
727  * Returns 0 on success, a non-zero error number on failure.
728  *
729  * 'zdisp' is ZYES or ZNO to indicate whether prlock() should succeed when
730  * the subject process is a zombie (ZYES) or fail for zombies (ZNO).
731  *
732  * error returns:
733  *	ENOENT: process or lwp has disappeared or process is exiting
734  *		(or has become a zombie and zdisp == ZNO).
735  *	EAGAIN: procfs vnode has become invalid.
736  *	EINTR:  signal arrived while waiting for exec to complete.
737  */
738 int
739 prlock(prnode_t *pnp, int zdisp)
740 {
741 	prcommon_t *pcp;
742 	proc_t *p;
743 
744 again:
745 	pcp = pnp->pr_common;
746 	p = pr_p_lock(pnp);
747 	mutex_exit(&pr_pidlock);
748 
749 	/*
750 	 * Return ENOENT immediately if there is no process.
751 	 */
752 	if (p == NULL)
753 		return (ENOENT);
754 
755 	ASSERT(p == pcp->prc_proc && p->p_stat != 0 && p->p_stat != SIDL);
756 
757 	/*
758 	 * Return ENOENT if process entered zombie state or is exiting
759 	 * and the 'zdisp' flag is set to ZNO indicating not to lock zombies.
760 	 */
761 	if (zdisp == ZNO &&
762 	    ((pcp->prc_flags & PRC_DESTROY) || (p->p_flag & SEXITING))) {
763 		prunlock(pnp);
764 		return (ENOENT);
765 	}
766 
767 	/*
768 	 * If lwp-specific, check to see if lwp has disappeared.
769 	 */
770 	if (pcp->prc_flags & PRC_LWP) {
771 		if ((zdisp == ZNO && (pcp->prc_flags & PRC_DESTROY)) ||
772 		    pcp->prc_tslot == -1) {
773 			prunlock(pnp);
774 			return (ENOENT);
775 		}
776 	}
777 
778 	/*
779 	 * Return EAGAIN if we have encountered a security violation.
780 	 * (The process exec'd a set-id or unreadable executable file.)
781 	 */
782 	if (pnp->pr_flags & PR_INVAL) {
783 		prunlock(pnp);
784 		return (EAGAIN);
785 	}
786 
787 	/*
788 	 * If process is undergoing an exec(), wait for
789 	 * completion and then start all over again.
790 	 */
791 	if (p->p_proc_flag & P_PR_EXEC) {
792 		pcp = pnp->pr_pcommon;	/* Put on the correct sleep queue */
793 		mutex_enter(&pcp->prc_mutex);
794 		prunlock(pnp);
795 		if (!cv_wait_sig(&pcp->prc_wait, &pcp->prc_mutex)) {
796 			mutex_exit(&pcp->prc_mutex);
797 			return (EINTR);
798 		}
799 		mutex_exit(&pcp->prc_mutex);
800 		goto again;
801 	}
802 
803 	/*
804 	 * We return holding p->p_lock.
805 	 */
806 	return (0);
807 }
808 
809 /*
810  * Undo prlock() and pr_p_lock().
811  * p->p_lock is still held; pr_pidlock is no longer held.
812  *
813  * prunmark() drops the P_PR_LOCK flag and wakes up another thread,
814  * if any, waiting for the flag to be dropped; it retains p->p_lock.
815  *
816  * prunlock() calls prunmark() and then drops p->p_lock.
817  */
818 void
819 prunmark(proc_t *p)
820 {
821 	ASSERT(p->p_proc_flag & P_PR_LOCK);
822 	ASSERT(MUTEX_HELD(&p->p_lock));
823 
824 	cv_signal(&pr_pid_cv[p->p_slot]);
825 	p->p_proc_flag &= ~P_PR_LOCK;
826 }
827 
828 void
829 prunlock(prnode_t *pnp)
830 {
831 	prcommon_t *pcp = pnp->pr_common;
832 	proc_t *p = pcp->prc_proc;
833 
834 	/*
835 	 * If we (or someone) gave it a SIGKILL, and it is not
836 	 * already a zombie, set it running unconditionally.
837 	 */
838 	if ((p->p_flag & SKILLED) &&
839 	    !(p->p_flag & SEXITING) &&
840 	    !(pcp->prc_flags & PRC_DESTROY) &&
841 	    !((pcp->prc_flags & PRC_LWP) && pcp->prc_tslot == -1))
842 		(void) pr_setrun(pnp, 0);
843 	prunmark(p);
844 	mutex_exit(&p->p_lock);
845 }
846 
847 /*
848  * Called while holding p->p_lock to delay until the process is unlocked.
849  * We enter holding p->p_lock; p->p_lock is dropped and reacquired.
850  * The process cannot become locked again until p->p_lock is dropped.
851  */
852 void
853 prbarrier(proc_t *p)
854 {
855 	ASSERT(MUTEX_HELD(&p->p_lock));
856 
857 	if (p->p_proc_flag & P_PR_LOCK) {
858 		/* The process is locked; delay until not locked */
859 		uint_t slot = p->p_slot;
860 
861 		while (p->p_proc_flag & P_PR_LOCK)
862 			cv_wait(&pr_pid_cv[slot], &p->p_lock);
863 		cv_signal(&pr_pid_cv[slot]);
864 	}
865 }
866 
867 /*
868  * Return process/lwp status.
869  * The u-block is mapped in by this routine and unmapped at the end.
870  */
871 void
872 prgetstatus(proc_t *p, pstatus_t *sp, zone_t *zp)
873 {
874 	kthread_t *t;
875 
876 	ASSERT(MUTEX_HELD(&p->p_lock));
877 
878 	t = prchoose(p);	/* returns locked thread */
879 	ASSERT(t != NULL);
880 	thread_unlock(t);
881 
882 	/* just bzero the process part, prgetlwpstatus() does the rest */
883 	bzero(sp, sizeof (pstatus_t) - sizeof (lwpstatus_t));
884 	sp->pr_nlwp = p->p_lwpcnt;
885 	sp->pr_nzomb = p->p_zombcnt;
886 	prassignset(&sp->pr_sigpend, &p->p_sig);
887 	sp->pr_brkbase = (uintptr_t)p->p_brkbase;
888 	sp->pr_brksize = p->p_brksize;
889 	sp->pr_stkbase = (uintptr_t)prgetstackbase(p);
890 	sp->pr_stksize = p->p_stksize;
891 	sp->pr_pid = p->p_pid;
892 	if (curproc->p_zone->zone_id != GLOBAL_ZONEID &&
893 	    (p->p_flag & SZONETOP)) {
894 		ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID);
895 		/*
896 		 * Inside local zones, fake zsched's pid as parent pids for
897 		 * processes which reference processes outside of the zone.
898 		 */
899 		sp->pr_ppid = curproc->p_zone->zone_zsched->p_pid;
900 	} else {
901 		sp->pr_ppid = p->p_ppid;
902 	}
903 	sp->pr_pgid  = p->p_pgrp;
904 	sp->pr_sid   = p->p_sessp->s_sid;
905 	sp->pr_taskid = p->p_task->tk_tkid;
906 	sp->pr_projid = p->p_task->tk_proj->kpj_id;
907 	sp->pr_zoneid = p->p_zone->zone_id;
908 	hrt2ts(mstate_aggr_state(p, LMS_USER), &sp->pr_utime);
909 	hrt2ts(mstate_aggr_state(p, LMS_SYSTEM), &sp->pr_stime);
910 	TICK_TO_TIMESTRUC(p->p_cutime, &sp->pr_cutime);
911 	TICK_TO_TIMESTRUC(p->p_cstime, &sp->pr_cstime);
912 	prassignset(&sp->pr_sigtrace, &p->p_sigmask);
913 	prassignset(&sp->pr_flttrace, &p->p_fltmask);
914 	prassignset(&sp->pr_sysentry, &PTOU(p)->u_entrymask);
915 	prassignset(&sp->pr_sysexit, &PTOU(p)->u_exitmask);
916 	switch (p->p_model) {
917 	case DATAMODEL_ILP32:
918 		sp->pr_dmodel = PR_MODEL_ILP32;
919 		break;
920 	case DATAMODEL_LP64:
921 		sp->pr_dmodel = PR_MODEL_LP64;
922 		break;
923 	}
924 	if (p->p_agenttp)
925 		sp->pr_agentid = p->p_agenttp->t_tid;
926 
927 	/* get the chosen lwp's status */
928 	prgetlwpstatus(t, &sp->pr_lwp, zp);
929 
930 	/* replicate the flags */
931 	sp->pr_flags = sp->pr_lwp.pr_flags;
932 }
933 
934 /*
935  * Query mask of held signals for a given thread.
936  *
937  * This makes use of schedctl_sigblock() to query if userspace has requested
938  * that all maskable signals be held.  While it would be tempting to call
939  * schedctl_finish_sigblock() and apply that update to t->t_hold, it cannot be
940  * done safely without the risk of racing with the thread under consideration.
941  */
942 void
943 prgethold(kthread_t *t, sigset_t *sp)
944 {
945 	k_sigset_t set;
946 
947 	if (schedctl_sigblock(t)) {
948 		set.__sigbits[0] = FILLSET0 & ~CANTMASK0;
949 		set.__sigbits[1] = FILLSET1 & ~CANTMASK1;
950 		set.__sigbits[2] = FILLSET2 & ~CANTMASK2;
951 	} else {
952 		set = t->t_hold;
953 	}
954 	sigktou(&set, sp);
955 }
956 
957 #ifdef _SYSCALL32_IMPL
958 void
959 prgetlwpstatus32(kthread_t *t, lwpstatus32_t *sp, zone_t *zp)
960 {
961 	proc_t *p = ttoproc(t);
962 	klwp_t *lwp = ttolwp(t);
963 	struct mstate *ms = &lwp->lwp_mstate;
964 	hrtime_t usr, sys;
965 	int flags;
966 	ulong_t instr;
967 
968 	ASSERT(MUTEX_HELD(&p->p_lock));
969 
970 	bzero(sp, sizeof (*sp));
971 	flags = 0L;
972 	if (t->t_state == TS_STOPPED) {
973 		flags |= PR_STOPPED;
974 		if ((t->t_schedflag & TS_PSTART) == 0)
975 			flags |= PR_ISTOP;
976 	} else if (VSTOPPED(t)) {
977 		flags |= PR_STOPPED|PR_ISTOP;
978 	}
979 	if (!(flags & PR_ISTOP) && (t->t_proc_flag & TP_PRSTOP))
980 		flags |= PR_DSTOP;
981 	if (lwp->lwp_asleep)
982 		flags |= PR_ASLEEP;
983 	if (t == p->p_agenttp)
984 		flags |= PR_AGENT;
985 	if (!(t->t_proc_flag & TP_TWAIT))
986 		flags |= PR_DETACH;
987 	if (t->t_proc_flag & TP_DAEMON)
988 		flags |= PR_DAEMON;
989 	if (p->p_proc_flag & P_PR_FORK)
990 		flags |= PR_FORK;
991 	if (p->p_proc_flag & P_PR_RUNLCL)
992 		flags |= PR_RLC;
993 	if (p->p_proc_flag & P_PR_KILLCL)
994 		flags |= PR_KLC;
995 	if (p->p_proc_flag & P_PR_ASYNC)
996 		flags |= PR_ASYNC;
997 	if (p->p_proc_flag & P_PR_BPTADJ)
998 		flags |= PR_BPTADJ;
999 	if (p->p_proc_flag & P_PR_PTRACE)
1000 		flags |= PR_PTRACE;
1001 	if (p->p_flag & SMSACCT)
1002 		flags |= PR_MSACCT;
1003 	if (p->p_flag & SMSFORK)
1004 		flags |= PR_MSFORK;
1005 	if (p->p_flag & SVFWAIT)
1006 		flags |= PR_VFORKP;
1007 	sp->pr_flags = flags;
1008 	if (VSTOPPED(t)) {
1009 		sp->pr_why   = PR_REQUESTED;
1010 		sp->pr_what  = 0;
1011 	} else {
1012 		sp->pr_why   = t->t_whystop;
1013 		sp->pr_what  = t->t_whatstop;
1014 	}
1015 	sp->pr_lwpid = t->t_tid;
1016 	sp->pr_cursig  = lwp->lwp_cursig;
1017 	prassignset(&sp->pr_lwppend, &t->t_sig);
1018 	prgethold(t, &sp->pr_lwphold);
1019 	if (t->t_whystop == PR_FAULTED) {
1020 		siginfo_kto32(&lwp->lwp_siginfo, &sp->pr_info);
1021 		if (t->t_whatstop == FLTPAGE)
1022 			sp->pr_info.si_addr =
1023 			    (caddr32_t)(uintptr_t)lwp->lwp_siginfo.si_addr;
1024 	} else if (lwp->lwp_curinfo)
1025 		siginfo_kto32(&lwp->lwp_curinfo->sq_info, &sp->pr_info);
1026 	if (SI_FROMUSER(&lwp->lwp_siginfo) && zp->zone_id != GLOBAL_ZONEID &&
1027 	    sp->pr_info.si_zoneid != zp->zone_id) {
1028 		sp->pr_info.si_pid = zp->zone_zsched->p_pid;
1029 		sp->pr_info.si_uid = 0;
1030 		sp->pr_info.si_ctid = -1;
1031 		sp->pr_info.si_zoneid = zp->zone_id;
1032 	}
1033 	sp->pr_altstack.ss_sp =
1034 	    (caddr32_t)(uintptr_t)lwp->lwp_sigaltstack.ss_sp;
1035 	sp->pr_altstack.ss_size = (size32_t)lwp->lwp_sigaltstack.ss_size;
1036 	sp->pr_altstack.ss_flags = (int32_t)lwp->lwp_sigaltstack.ss_flags;
1037 	prgetaction32(p, PTOU(p), lwp->lwp_cursig, &sp->pr_action);
1038 	sp->pr_oldcontext = (caddr32_t)lwp->lwp_oldcontext;
1039 	sp->pr_ustack = (caddr32_t)lwp->lwp_ustack;
1040 	(void) strncpy(sp->pr_clname, sclass[t->t_cid].cl_name,
1041 	    sizeof (sp->pr_clname) - 1);
1042 	if (flags & PR_STOPPED)
1043 		hrt2ts32(t->t_stoptime, &sp->pr_tstamp);
1044 	usr = ms->ms_acct[LMS_USER];
1045 	sys = ms->ms_acct[LMS_SYSTEM] + ms->ms_acct[LMS_TRAP];
1046 	scalehrtime(&usr);
1047 	scalehrtime(&sys);
1048 	hrt2ts32(usr, &sp->pr_utime);
1049 	hrt2ts32(sys, &sp->pr_stime);
1050 
1051 	/*
1052 	 * Fetch the current instruction, if not a system process.
1053 	 * We don't attempt this unless the lwp is stopped.
1054 	 */
1055 	if ((p->p_flag & SSYS) || p->p_as == &kas)
1056 		sp->pr_flags |= (PR_ISSYS|PR_PCINVAL);
1057 	else if (!(flags & PR_STOPPED))
1058 		sp->pr_flags |= PR_PCINVAL;
1059 	else if (!prfetchinstr(lwp, &instr))
1060 		sp->pr_flags |= PR_PCINVAL;
1061 	else
1062 		sp->pr_instr = (uint32_t)instr;
1063 
1064 	/*
1065 	 * Drop p_lock while touching the lwp's stack.
1066 	 */
1067 	mutex_exit(&p->p_lock);
1068 	if (prisstep(lwp))
1069 		sp->pr_flags |= PR_STEP;
1070 	if ((flags & (PR_STOPPED|PR_ASLEEP)) && t->t_sysnum) {
1071 		int i;
1072 
1073 		sp->pr_syscall = get_syscall32_args(lwp,
1074 		    (int *)sp->pr_sysarg, &i);
1075 		sp->pr_nsysarg = (ushort_t)i;
1076 	}
1077 	if ((flags & PR_STOPPED) || t == curthread)
1078 		prgetprregs32(lwp, sp->pr_reg);
1079 	if ((t->t_state == TS_STOPPED && t->t_whystop == PR_SYSEXIT) ||
1080 	    (flags & PR_VFORKP)) {
1081 		long r1, r2;
1082 		user_t *up;
1083 		auxv_t *auxp;
1084 		int i;
1085 
1086 		sp->pr_errno = prgetrvals(lwp, &r1, &r2);
1087 		if (sp->pr_errno == 0) {
1088 			sp->pr_rval1 = (int32_t)r1;
1089 			sp->pr_rval2 = (int32_t)r2;
1090 			sp->pr_errpriv = PRIV_NONE;
1091 		} else
1092 			sp->pr_errpriv = lwp->lwp_badpriv;
1093 
1094 		if (t->t_sysnum == SYS_execve) {
1095 			up = PTOU(p);
1096 			sp->pr_sysarg[0] = 0;
1097 			sp->pr_sysarg[1] = (caddr32_t)up->u_argv;
1098 			sp->pr_sysarg[2] = (caddr32_t)up->u_envp;
1099 			for (i = 0, auxp = up->u_auxv;
1100 			    i < sizeof (up->u_auxv) / sizeof (up->u_auxv[0]);
1101 			    i++, auxp++) {
1102 				if (auxp->a_type == AT_SUN_EXECNAME) {
1103 					sp->pr_sysarg[0] =
1104 					    (caddr32_t)
1105 					    (uintptr_t)auxp->a_un.a_ptr;
1106 					break;
1107 				}
1108 			}
1109 		}
1110 	}
1111 	if (prhasfp())
1112 		prgetprfpregs32(lwp, &sp->pr_fpreg);
1113 	mutex_enter(&p->p_lock);
1114 }
1115 
1116 void
1117 prgetstatus32(proc_t *p, pstatus32_t *sp, zone_t *zp)
1118 {
1119 	kthread_t *t;
1120 
1121 	ASSERT(MUTEX_HELD(&p->p_lock));
1122 
1123 	t = prchoose(p);	/* returns locked thread */
1124 	ASSERT(t != NULL);
1125 	thread_unlock(t);
1126 
1127 	/* just bzero the process part, prgetlwpstatus32() does the rest */
1128 	bzero(sp, sizeof (pstatus32_t) - sizeof (lwpstatus32_t));
1129 	sp->pr_nlwp = p->p_lwpcnt;
1130 	sp->pr_nzomb = p->p_zombcnt;
1131 	prassignset(&sp->pr_sigpend, &p->p_sig);
1132 	sp->pr_brkbase = (uint32_t)(uintptr_t)p->p_brkbase;
1133 	sp->pr_brksize = (uint32_t)p->p_brksize;
1134 	sp->pr_stkbase = (uint32_t)(uintptr_t)prgetstackbase(p);
1135 	sp->pr_stksize = (uint32_t)p->p_stksize;
1136 	sp->pr_pid   = p->p_pid;
1137 	if (curproc->p_zone->zone_id != GLOBAL_ZONEID &&
1138 	    (p->p_flag & SZONETOP)) {
1139 		ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID);
1140 		/*
1141 		 * Inside local zones, fake zsched's pid as parent pids for
1142 		 * processes which reference processes outside of the zone.
1143 		 */
1144 		sp->pr_ppid = curproc->p_zone->zone_zsched->p_pid;
1145 	} else {
1146 		sp->pr_ppid = p->p_ppid;
1147 	}
1148 	sp->pr_pgid  = p->p_pgrp;
1149 	sp->pr_sid   = p->p_sessp->s_sid;
1150 	sp->pr_taskid = p->p_task->tk_tkid;
1151 	sp->pr_projid = p->p_task->tk_proj->kpj_id;
1152 	sp->pr_zoneid = p->p_zone->zone_id;
1153 	hrt2ts32(mstate_aggr_state(p, LMS_USER), &sp->pr_utime);
1154 	hrt2ts32(mstate_aggr_state(p, LMS_SYSTEM), &sp->pr_stime);
1155 	TICK_TO_TIMESTRUC32(p->p_cutime, &sp->pr_cutime);
1156 	TICK_TO_TIMESTRUC32(p->p_cstime, &sp->pr_cstime);
1157 	prassignset(&sp->pr_sigtrace, &p->p_sigmask);
1158 	prassignset(&sp->pr_flttrace, &p->p_fltmask);
1159 	prassignset(&sp->pr_sysentry, &PTOU(p)->u_entrymask);
1160 	prassignset(&sp->pr_sysexit, &PTOU(p)->u_exitmask);
1161 	switch (p->p_model) {
1162 	case DATAMODEL_ILP32:
1163 		sp->pr_dmodel = PR_MODEL_ILP32;
1164 		break;
1165 	case DATAMODEL_LP64:
1166 		sp->pr_dmodel = PR_MODEL_LP64;
1167 		break;
1168 	}
1169 	if (p->p_agenttp)
1170 		sp->pr_agentid = p->p_agenttp->t_tid;
1171 
1172 	/* get the chosen lwp's status */
1173 	prgetlwpstatus32(t, &sp->pr_lwp, zp);
1174 
1175 	/* replicate the flags */
1176 	sp->pr_flags = sp->pr_lwp.pr_flags;
1177 }
1178 #endif	/* _SYSCALL32_IMPL */
1179 
1180 /*
1181  * Return lwp status.
1182  */
1183 void
1184 prgetlwpstatus(kthread_t *t, lwpstatus_t *sp, zone_t *zp)
1185 {
1186 	proc_t *p = ttoproc(t);
1187 	klwp_t *lwp = ttolwp(t);
1188 	struct mstate *ms = &lwp->lwp_mstate;
1189 	hrtime_t usr, sys;
1190 	int flags;
1191 	ulong_t instr;
1192 
1193 	ASSERT(MUTEX_HELD(&p->p_lock));
1194 
1195 	bzero(sp, sizeof (*sp));
1196 	flags = 0L;
1197 	if (t->t_state == TS_STOPPED) {
1198 		flags |= PR_STOPPED;
1199 		if ((t->t_schedflag & TS_PSTART) == 0)
1200 			flags |= PR_ISTOP;
1201 	} else if (VSTOPPED(t)) {
1202 		flags |= PR_STOPPED|PR_ISTOP;
1203 	}
1204 	if (!(flags & PR_ISTOP) && (t->t_proc_flag & TP_PRSTOP))
1205 		flags |= PR_DSTOP;
1206 	if (lwp->lwp_asleep)
1207 		flags |= PR_ASLEEP;
1208 	if (t == p->p_agenttp)
1209 		flags |= PR_AGENT;
1210 	if (!(t->t_proc_flag & TP_TWAIT))
1211 		flags |= PR_DETACH;
1212 	if (t->t_proc_flag & TP_DAEMON)
1213 		flags |= PR_DAEMON;
1214 	if (p->p_proc_flag & P_PR_FORK)
1215 		flags |= PR_FORK;
1216 	if (p->p_proc_flag & P_PR_RUNLCL)
1217 		flags |= PR_RLC;
1218 	if (p->p_proc_flag & P_PR_KILLCL)
1219 		flags |= PR_KLC;
1220 	if (p->p_proc_flag & P_PR_ASYNC)
1221 		flags |= PR_ASYNC;
1222 	if (p->p_proc_flag & P_PR_BPTADJ)
1223 		flags |= PR_BPTADJ;
1224 	if (p->p_proc_flag & P_PR_PTRACE)
1225 		flags |= PR_PTRACE;
1226 	if (p->p_flag & SMSACCT)
1227 		flags |= PR_MSACCT;
1228 	if (p->p_flag & SMSFORK)
1229 		flags |= PR_MSFORK;
1230 	if (p->p_flag & SVFWAIT)
1231 		flags |= PR_VFORKP;
1232 	if (p->p_pgidp->pid_pgorphaned)
1233 		flags |= PR_ORPHAN;
1234 	if (p->p_pidflag & CLDNOSIGCHLD)
1235 		flags |= PR_NOSIGCHLD;
1236 	if (p->p_pidflag & CLDWAITPID)
1237 		flags |= PR_WAITPID;
1238 	sp->pr_flags = flags;
1239 	if (VSTOPPED(t)) {
1240 		sp->pr_why   = PR_REQUESTED;
1241 		sp->pr_what  = 0;
1242 	} else {
1243 		sp->pr_why   = t->t_whystop;
1244 		sp->pr_what  = t->t_whatstop;
1245 	}
1246 	sp->pr_lwpid = t->t_tid;
1247 	sp->pr_cursig  = lwp->lwp_cursig;
1248 	prassignset(&sp->pr_lwppend, &t->t_sig);
1249 	prgethold(t, &sp->pr_lwphold);
1250 	if (t->t_whystop == PR_FAULTED)
1251 		bcopy(&lwp->lwp_siginfo,
1252 		    &sp->pr_info, sizeof (k_siginfo_t));
1253 	else if (lwp->lwp_curinfo)
1254 		bcopy(&lwp->lwp_curinfo->sq_info,
1255 		    &sp->pr_info, sizeof (k_siginfo_t));
1256 	if (SI_FROMUSER(&lwp->lwp_siginfo) && zp->zone_id != GLOBAL_ZONEID &&
1257 	    sp->pr_info.si_zoneid != zp->zone_id) {
1258 		sp->pr_info.si_pid = zp->zone_zsched->p_pid;
1259 		sp->pr_info.si_uid = 0;
1260 		sp->pr_info.si_ctid = -1;
1261 		sp->pr_info.si_zoneid = zp->zone_id;
1262 	}
1263 	sp->pr_altstack = lwp->lwp_sigaltstack;
1264 	prgetaction(p, PTOU(p), lwp->lwp_cursig, &sp->pr_action);
1265 	sp->pr_oldcontext = (uintptr_t)lwp->lwp_oldcontext;
1266 	sp->pr_ustack = lwp->lwp_ustack;
1267 	(void) strncpy(sp->pr_clname, sclass[t->t_cid].cl_name,
1268 	    sizeof (sp->pr_clname) - 1);
1269 	if (flags & PR_STOPPED)
1270 		hrt2ts(t->t_stoptime, &sp->pr_tstamp);
1271 	usr = ms->ms_acct[LMS_USER];
1272 	sys = ms->ms_acct[LMS_SYSTEM] + ms->ms_acct[LMS_TRAP];
1273 	scalehrtime(&usr);
1274 	scalehrtime(&sys);
1275 	hrt2ts(usr, &sp->pr_utime);
1276 	hrt2ts(sys, &sp->pr_stime);
1277 
1278 	/*
1279 	 * Fetch the current instruction, if not a system process.
1280 	 * We don't attempt this unless the lwp is stopped.
1281 	 */
1282 	if ((p->p_flag & SSYS) || p->p_as == &kas)
1283 		sp->pr_flags |= (PR_ISSYS|PR_PCINVAL);
1284 	else if (!(flags & PR_STOPPED))
1285 		sp->pr_flags |= PR_PCINVAL;
1286 	else if (!prfetchinstr(lwp, &instr))
1287 		sp->pr_flags |= PR_PCINVAL;
1288 	else
1289 		sp->pr_instr = instr;
1290 
1291 	/*
1292 	 * Drop p_lock while touching the lwp's stack.
1293 	 */
1294 	mutex_exit(&p->p_lock);
1295 	if (prisstep(lwp))
1296 		sp->pr_flags |= PR_STEP;
1297 	if ((flags & (PR_STOPPED|PR_ASLEEP)) && t->t_sysnum) {
1298 		int i;
1299 
1300 		sp->pr_syscall = get_syscall_args(lwp,
1301 		    (long *)sp->pr_sysarg, &i);
1302 		sp->pr_nsysarg = (ushort_t)i;
1303 	}
1304 	if ((flags & PR_STOPPED) || t == curthread)
1305 		prgetprregs(lwp, sp->pr_reg);
1306 	if ((t->t_state == TS_STOPPED && t->t_whystop == PR_SYSEXIT) ||
1307 	    (flags & PR_VFORKP)) {
1308 		user_t *up;
1309 		auxv_t *auxp;
1310 		int i;
1311 
1312 		sp->pr_errno = prgetrvals(lwp, &sp->pr_rval1, &sp->pr_rval2);
1313 		if (sp->pr_errno == 0)
1314 			sp->pr_errpriv = PRIV_NONE;
1315 		else
1316 			sp->pr_errpriv = lwp->lwp_badpriv;
1317 
1318 		if (t->t_sysnum == SYS_execve) {
1319 			up = PTOU(p);
1320 			sp->pr_sysarg[0] = 0;
1321 			sp->pr_sysarg[1] = (uintptr_t)up->u_argv;
1322 			sp->pr_sysarg[2] = (uintptr_t)up->u_envp;
1323 			for (i = 0, auxp = up->u_auxv;
1324 			    i < sizeof (up->u_auxv) / sizeof (up->u_auxv[0]);
1325 			    i++, auxp++) {
1326 				if (auxp->a_type == AT_SUN_EXECNAME) {
1327 					sp->pr_sysarg[0] =
1328 					    (uintptr_t)auxp->a_un.a_ptr;
1329 					break;
1330 				}
1331 			}
1332 		}
1333 	}
1334 	if (prhasfp())
1335 		prgetprfpregs(lwp, &sp->pr_fpreg);
1336 	mutex_enter(&p->p_lock);
1337 }
1338 
1339 /*
1340  * Get the sigaction structure for the specified signal.  The u-block
1341  * must already have been mapped in by the caller.
1342  */
1343 void
1344 prgetaction(proc_t *p, user_t *up, uint_t sig, struct sigaction *sp)
1345 {
1346 	int nsig = PROC_IS_BRANDED(curproc)? BROP(curproc)->b_nsig : NSIG;
1347 
1348 	bzero(sp, sizeof (*sp));
1349 
1350 	if (sig != 0 && (unsigned)sig < nsig) {
1351 		sp->sa_handler = up->u_signal[sig-1];
1352 		prassignset(&sp->sa_mask, &up->u_sigmask[sig-1]);
1353 		if (sigismember(&up->u_sigonstack, sig))
1354 			sp->sa_flags |= SA_ONSTACK;
1355 		if (sigismember(&up->u_sigresethand, sig))
1356 			sp->sa_flags |= SA_RESETHAND;
1357 		if (sigismember(&up->u_sigrestart, sig))
1358 			sp->sa_flags |= SA_RESTART;
1359 		if (sigismember(&p->p_siginfo, sig))
1360 			sp->sa_flags |= SA_SIGINFO;
1361 		if (sigismember(&up->u_signodefer, sig))
1362 			sp->sa_flags |= SA_NODEFER;
1363 		if (sig == SIGCLD) {
1364 			if (p->p_flag & SNOWAIT)
1365 				sp->sa_flags |= SA_NOCLDWAIT;
1366 			if ((p->p_flag & SJCTL) == 0)
1367 				sp->sa_flags |= SA_NOCLDSTOP;
1368 		}
1369 	}
1370 }
1371 
1372 #ifdef _SYSCALL32_IMPL
1373 void
1374 prgetaction32(proc_t *p, user_t *up, uint_t sig, struct sigaction32 *sp)
1375 {
1376 	int nsig = PROC_IS_BRANDED(curproc)? BROP(curproc)->b_nsig : NSIG;
1377 
1378 	bzero(sp, sizeof (*sp));
1379 
1380 	if (sig != 0 && (unsigned)sig < nsig) {
1381 		sp->sa_handler = (caddr32_t)(uintptr_t)up->u_signal[sig-1];
1382 		prassignset(&sp->sa_mask, &up->u_sigmask[sig-1]);
1383 		if (sigismember(&up->u_sigonstack, sig))
1384 			sp->sa_flags |= SA_ONSTACK;
1385 		if (sigismember(&up->u_sigresethand, sig))
1386 			sp->sa_flags |= SA_RESETHAND;
1387 		if (sigismember(&up->u_sigrestart, sig))
1388 			sp->sa_flags |= SA_RESTART;
1389 		if (sigismember(&p->p_siginfo, sig))
1390 			sp->sa_flags |= SA_SIGINFO;
1391 		if (sigismember(&up->u_signodefer, sig))
1392 			sp->sa_flags |= SA_NODEFER;
1393 		if (sig == SIGCLD) {
1394 			if (p->p_flag & SNOWAIT)
1395 				sp->sa_flags |= SA_NOCLDWAIT;
1396 			if ((p->p_flag & SJCTL) == 0)
1397 				sp->sa_flags |= SA_NOCLDSTOP;
1398 		}
1399 	}
1400 }
1401 #endif	/* _SYSCALL32_IMPL */
1402 
1403 /*
1404  * Count the number of segments in this process's address space.
1405  */
1406 int
1407 prnsegs(struct as *as, int reserved)
1408 {
1409 	int n = 0;
1410 	struct seg *seg;
1411 
1412 	ASSERT(as != &kas && AS_WRITE_HELD(as));
1413 
1414 	for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) {
1415 		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, reserved);
1416 		caddr_t saddr, naddr;
1417 		void *tmp = NULL;
1418 
1419 		if ((seg->s_flags & S_HOLE) != 0) {
1420 			continue;
1421 		}
1422 
1423 		for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1424 			(void) pr_getprot(seg, reserved, &tmp,
1425 			    &saddr, &naddr, eaddr);
1426 			if (saddr != naddr)
1427 				n++;
1428 		}
1429 
1430 		ASSERT(tmp == NULL);
1431 	}
1432 
1433 	return (n);
1434 }
1435 
1436 /*
1437  * Convert uint32_t to decimal string w/o leading zeros.
1438  * Add trailing null characters if 'len' is greater than string length.
1439  * Return the string length.
1440  */
1441 int
1442 pr_u32tos(uint32_t n, char *s, int len)
1443 {
1444 	char cbuf[11];		/* 32-bit unsigned integer fits in 10 digits */
1445 	char *cp = cbuf;
1446 	char *end = s + len;
1447 
1448 	do {
1449 		*cp++ = (char)(n % 10 + '0');
1450 		n /= 10;
1451 	} while (n);
1452 
1453 	len = (int)(cp - cbuf);
1454 
1455 	do {
1456 		*s++ = *--cp;
1457 	} while (cp > cbuf);
1458 
1459 	while (s < end)		/* optional pad */
1460 		*s++ = '\0';
1461 
1462 	return (len);
1463 }
1464 
1465 /*
1466  * Convert uint64_t to decimal string w/o leading zeros.
1467  * Return the string length.
1468  */
1469 static int
1470 pr_u64tos(uint64_t n, char *s)
1471 {
1472 	char cbuf[21];		/* 64-bit unsigned integer fits in 20 digits */
1473 	char *cp = cbuf;
1474 	int len;
1475 
1476 	do {
1477 		*cp++ = (char)(n % 10 + '0');
1478 		n /= 10;
1479 	} while (n);
1480 
1481 	len = (int)(cp - cbuf);
1482 
1483 	do {
1484 		*s++ = *--cp;
1485 	} while (cp > cbuf);
1486 
1487 	return (len);
1488 }
1489 
1490 file_t *
1491 pr_getf(proc_t *p, uint_t fd, short *flag)
1492 {
1493 	uf_entry_t *ufp;
1494 	uf_info_t *fip;
1495 	file_t *fp;
1496 
1497 	ASSERT(MUTEX_HELD(&p->p_lock) && (p->p_proc_flag & P_PR_LOCK));
1498 
1499 	fip = P_FINFO(p);
1500 
1501 	if (fd >= fip->fi_nfiles)
1502 		return (NULL);
1503 
1504 	mutex_exit(&p->p_lock);
1505 	mutex_enter(&fip->fi_lock);
1506 	UF_ENTER(ufp, fip, fd);
1507 	if ((fp = ufp->uf_file) != NULL && fp->f_count > 0) {
1508 		if (flag != NULL)
1509 			*flag = ufp->uf_flag;
1510 		ufp->uf_refcnt++;
1511 	} else {
1512 		fp = NULL;
1513 	}
1514 	UF_EXIT(ufp);
1515 	mutex_exit(&fip->fi_lock);
1516 	mutex_enter(&p->p_lock);
1517 
1518 	return (fp);
1519 }
1520 
1521 void
1522 pr_releasef(proc_t *p, uint_t fd)
1523 {
1524 	uf_entry_t *ufp;
1525 	uf_info_t *fip;
1526 
1527 	ASSERT(MUTEX_HELD(&p->p_lock) && (p->p_proc_flag & P_PR_LOCK));
1528 
1529 	fip = P_FINFO(p);
1530 
1531 	mutex_exit(&p->p_lock);
1532 	mutex_enter(&fip->fi_lock);
1533 	UF_ENTER(ufp, fip, fd);
1534 	ASSERT3U(ufp->uf_refcnt, >, 0);
1535 	ufp->uf_refcnt--;
1536 	UF_EXIT(ufp);
1537 	mutex_exit(&fip->fi_lock);
1538 	mutex_enter(&p->p_lock);
1539 }
1540 
1541 void
1542 pr_object_name(char *name, vnode_t *vp, struct vattr *vattr)
1543 {
1544 	char *s = name;
1545 	struct vfs *vfsp;
1546 	struct vfssw *vfsswp;
1547 
1548 	if ((vfsp = vp->v_vfsp) != NULL &&
1549 	    ((vfsswp = vfssw + vfsp->vfs_fstype), vfsswp->vsw_name) &&
1550 	    *vfsswp->vsw_name) {
1551 		(void) strcpy(s, vfsswp->vsw_name);
1552 		s += strlen(s);
1553 		*s++ = '.';
1554 	}
1555 	s += pr_u32tos(getmajor(vattr->va_fsid), s, 0);
1556 	*s++ = '.';
1557 	s += pr_u32tos(getminor(vattr->va_fsid), s, 0);
1558 	*s++ = '.';
1559 	s += pr_u64tos(vattr->va_nodeid, s);
1560 	*s++ = '\0';
1561 }
1562 
1563 struct seg *
1564 break_seg(proc_t *p)
1565 {
1566 	caddr_t addr = p->p_brkbase;
1567 	struct seg *seg;
1568 	struct vnode *vp;
1569 
1570 	if (p->p_brksize != 0)
1571 		addr += p->p_brksize - 1;
1572 	seg = as_segat(p->p_as, addr);
1573 	if (seg != NULL && seg->s_ops == &segvn_ops &&
1574 	    (SEGOP_GETVP(seg, seg->s_base, &vp) != 0 || vp == NULL))
1575 		return (seg);
1576 	return (NULL);
1577 }
1578 
1579 /*
1580  * Implementation of service functions to handle procfs generic chained
1581  * copyout buffers.
1582  */
1583 typedef struct pr_iobuf_list {
1584 	list_node_t	piol_link;	/* buffer linkage */
1585 	size_t		piol_size;	/* total size (header + data) */
1586 	size_t		piol_usedsize;	/* amount to copy out from this buf */
1587 } piol_t;
1588 
1589 #define	MAPSIZE	(64 * 1024)
1590 #define	PIOL_DATABUF(iol)	((void *)(&(iol)[1]))
1591 
1592 void
1593 pr_iol_initlist(list_t *iolhead, size_t itemsize, int n)
1594 {
1595 	piol_t	*iol;
1596 	size_t	initial_size = MIN(1, n) * itemsize;
1597 
1598 	list_create(iolhead, sizeof (piol_t), offsetof(piol_t, piol_link));
1599 
1600 	ASSERT(list_head(iolhead) == NULL);
1601 	ASSERT(itemsize < MAPSIZE - sizeof (*iol));
1602 	ASSERT(initial_size > 0);
1603 
1604 	/*
1605 	 * Someone creating chained copyout buffers may ask for less than
1606 	 * MAPSIZE if the amount of data to be buffered is known to be
1607 	 * smaller than that.
1608 	 * But in order to prevent involuntary self-denial of service,
1609 	 * the requested input size is clamped at MAPSIZE.
1610 	 */
1611 	initial_size = MIN(MAPSIZE, initial_size + sizeof (*iol));
1612 	iol = kmem_alloc(initial_size, KM_SLEEP);
1613 	list_insert_head(iolhead, iol);
1614 	iol->piol_usedsize = 0;
1615 	iol->piol_size = initial_size;
1616 }
1617 
1618 void *
1619 pr_iol_newbuf(list_t *iolhead, size_t itemsize)
1620 {
1621 	piol_t	*iol;
1622 	char	*new;
1623 
1624 	ASSERT(itemsize < MAPSIZE - sizeof (*iol));
1625 	ASSERT(list_head(iolhead) != NULL);
1626 
1627 	iol = (piol_t *)list_tail(iolhead);
1628 
1629 	if (iol->piol_size <
1630 	    iol->piol_usedsize + sizeof (*iol) + itemsize) {
1631 		/*
1632 		 * Out of space in the current buffer. Allocate more.
1633 		 */
1634 		piol_t *newiol;
1635 
1636 		newiol = kmem_alloc(MAPSIZE, KM_SLEEP);
1637 		newiol->piol_size = MAPSIZE;
1638 		newiol->piol_usedsize = 0;
1639 
1640 		list_insert_after(iolhead, iol, newiol);
1641 		iol = list_next(iolhead, iol);
1642 		ASSERT(iol == newiol);
1643 	}
1644 	new = (char *)PIOL_DATABUF(iol) + iol->piol_usedsize;
1645 	iol->piol_usedsize += itemsize;
1646 	bzero(new, itemsize);
1647 	return (new);
1648 }
1649 
1650 void
1651 pr_iol_freelist(list_t *iolhead)
1652 {
1653 	piol_t	*iol;
1654 
1655 	while ((iol = list_head(iolhead)) != NULL) {
1656 		list_remove(iolhead, iol);
1657 		kmem_free(iol, iol->piol_size);
1658 	}
1659 	list_destroy(iolhead);
1660 }
1661 
1662 int
1663 pr_iol_copyout_and_free(list_t *iolhead, caddr_t *tgt, int errin)
1664 {
1665 	int error = errin;
1666 	piol_t	*iol;
1667 
1668 	while ((iol = list_head(iolhead)) != NULL) {
1669 		list_remove(iolhead, iol);
1670 		if (!error) {
1671 			if (copyout(PIOL_DATABUF(iol), *tgt,
1672 			    iol->piol_usedsize))
1673 				error = EFAULT;
1674 			*tgt += iol->piol_usedsize;
1675 		}
1676 		kmem_free(iol, iol->piol_size);
1677 	}
1678 	list_destroy(iolhead);
1679 
1680 	return (error);
1681 }
1682 
1683 int
1684 pr_iol_uiomove_and_free(list_t *iolhead, uio_t *uiop, int errin)
1685 {
1686 	offset_t	off = uiop->uio_offset;
1687 	char		*base;
1688 	size_t		size;
1689 	piol_t		*iol;
1690 	int		error = errin;
1691 
1692 	while ((iol = list_head(iolhead)) != NULL) {
1693 		list_remove(iolhead, iol);
1694 		base = PIOL_DATABUF(iol);
1695 		size = iol->piol_usedsize;
1696 		if (off <= size && error == 0 && uiop->uio_resid > 0)
1697 			error = uiomove(base + off, size - off,
1698 			    UIO_READ, uiop);
1699 		off = MAX(0, off - (offset_t)size);
1700 		kmem_free(iol, iol->piol_size);
1701 	}
1702 	list_destroy(iolhead);
1703 
1704 	return (error);
1705 }
1706 
1707 /*
1708  * Return an array of structures with memory map information.
1709  * We allocate here; the caller must deallocate.
1710  */
1711 int
1712 prgetmap(proc_t *p, int reserved, list_t *iolhead)
1713 {
1714 	struct as *as = p->p_as;
1715 	prmap_t *mp;
1716 	struct seg *seg;
1717 	struct seg *brkseg, *stkseg;
1718 	struct vnode *vp;
1719 	struct vattr vattr;
1720 	uint_t prot;
1721 
1722 	ASSERT(as != &kas && AS_WRITE_HELD(as));
1723 
1724 	/*
1725 	 * Request an initial buffer size that doesn't waste memory
1726 	 * if the address space has only a small number of segments.
1727 	 */
1728 	pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree));
1729 
1730 	if ((seg = AS_SEGFIRST(as)) == NULL)
1731 		return (0);
1732 
1733 	brkseg = break_seg(p);
1734 	stkseg = as_segat(as, prgetstackbase(p));
1735 
1736 	do {
1737 		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, reserved);
1738 		caddr_t saddr, naddr;
1739 		void *tmp = NULL;
1740 
1741 		if ((seg->s_flags & S_HOLE) != 0) {
1742 			continue;
1743 		}
1744 
1745 		for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1746 			prot = pr_getprot(seg, reserved, &tmp,
1747 			    &saddr, &naddr, eaddr);
1748 			if (saddr == naddr)
1749 				continue;
1750 
1751 			mp = pr_iol_newbuf(iolhead, sizeof (*mp));
1752 
1753 			mp->pr_vaddr = (uintptr_t)saddr;
1754 			mp->pr_size = naddr - saddr;
1755 			mp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
1756 			mp->pr_mflags = 0;
1757 			if (prot & PROT_READ)
1758 				mp->pr_mflags |= MA_READ;
1759 			if (prot & PROT_WRITE)
1760 				mp->pr_mflags |= MA_WRITE;
1761 			if (prot & PROT_EXEC)
1762 				mp->pr_mflags |= MA_EXEC;
1763 			if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
1764 				mp->pr_mflags |= MA_SHARED;
1765 			if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
1766 				mp->pr_mflags |= MA_NORESERVE;
1767 			if (seg->s_ops == &segspt_shmops ||
1768 			    (seg->s_ops == &segvn_ops &&
1769 			    (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL)))
1770 				mp->pr_mflags |= MA_ANON;
1771 			if (seg == brkseg)
1772 				mp->pr_mflags |= MA_BREAK;
1773 			else if (seg == stkseg) {
1774 				mp->pr_mflags |= MA_STACK;
1775 				if (reserved) {
1776 					size_t maxstack =
1777 					    ((size_t)p->p_stk_ctl +
1778 					    PAGEOFFSET) & PAGEMASK;
1779 					mp->pr_vaddr =
1780 					    (uintptr_t)prgetstackbase(p) +
1781 					    p->p_stksize - maxstack;
1782 					mp->pr_size = (uintptr_t)naddr -
1783 					    mp->pr_vaddr;
1784 				}
1785 			}
1786 			if (seg->s_ops == &segspt_shmops)
1787 				mp->pr_mflags |= MA_ISM | MA_SHM;
1788 			mp->pr_pagesize = PAGESIZE;
1789 
1790 			/*
1791 			 * Manufacture a filename for the "object" directory.
1792 			 */
1793 			vattr.va_mask = AT_FSID|AT_NODEID;
1794 			if (seg->s_ops == &segvn_ops &&
1795 			    SEGOP_GETVP(seg, saddr, &vp) == 0 &&
1796 			    vp != NULL && vp->v_type == VREG &&
1797 			    VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
1798 				if (vp == p->p_exec)
1799 					(void) strcpy(mp->pr_mapname, "a.out");
1800 				else
1801 					pr_object_name(mp->pr_mapname,
1802 					    vp, &vattr);
1803 			}
1804 
1805 			/*
1806 			 * Get the SysV shared memory id, if any.
1807 			 */
1808 			if ((mp->pr_mflags & MA_SHARED) && p->p_segacct &&
1809 			    (mp->pr_shmid = shmgetid(p, seg->s_base)) !=
1810 			    SHMID_NONE) {
1811 				if (mp->pr_shmid == SHMID_FREE)
1812 					mp->pr_shmid = -1;
1813 
1814 				mp->pr_mflags |= MA_SHM;
1815 			} else {
1816 				mp->pr_shmid = -1;
1817 			}
1818 		}
1819 		ASSERT(tmp == NULL);
1820 	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);
1821 
1822 	return (0);
1823 }
1824 
1825 #ifdef _SYSCALL32_IMPL
1826 int
1827 prgetmap32(proc_t *p, int reserved, list_t *iolhead)
1828 {
1829 	struct as *as = p->p_as;
1830 	prmap32_t *mp;
1831 	struct seg *seg;
1832 	struct seg *brkseg, *stkseg;
1833 	struct vnode *vp;
1834 	struct vattr vattr;
1835 	uint_t prot;
1836 
1837 	ASSERT(as != &kas && AS_WRITE_HELD(as));
1838 
1839 	/*
1840 	 * Request an initial buffer size that doesn't waste memory
1841 	 * if the address space has only a small number of segments.
1842 	 */
1843 	pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree));
1844 
1845 	if ((seg = AS_SEGFIRST(as)) == NULL)
1846 		return (0);
1847 
1848 	brkseg = break_seg(p);
1849 	stkseg = as_segat(as, prgetstackbase(p));
1850 
1851 	do {
1852 		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, reserved);
1853 		caddr_t saddr, naddr;
1854 		void *tmp = NULL;
1855 
1856 		if ((seg->s_flags & S_HOLE) != 0) {
1857 			continue;
1858 		}
1859 
1860 		for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1861 			prot = pr_getprot(seg, reserved, &tmp,
1862 			    &saddr, &naddr, eaddr);
1863 			if (saddr == naddr)
1864 				continue;
1865 
1866 			mp = pr_iol_newbuf(iolhead, sizeof (*mp));
1867 
1868 			mp->pr_vaddr = (caddr32_t)(uintptr_t)saddr;
1869 			mp->pr_size = (size32_t)(naddr - saddr);
1870 			mp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
1871 			mp->pr_mflags = 0;
1872 			if (prot & PROT_READ)
1873 				mp->pr_mflags |= MA_READ;
1874 			if (prot & PROT_WRITE)
1875 				mp->pr_mflags |= MA_WRITE;
1876 			if (prot & PROT_EXEC)
1877 				mp->pr_mflags |= MA_EXEC;
1878 			if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
1879 				mp->pr_mflags |= MA_SHARED;
1880 			if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
1881 				mp->pr_mflags |= MA_NORESERVE;
1882 			if (seg->s_ops == &segspt_shmops ||
1883 			    (seg->s_ops == &segvn_ops &&
1884 			    (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL)))
1885 				mp->pr_mflags |= MA_ANON;
1886 			if (seg == brkseg)
1887 				mp->pr_mflags |= MA_BREAK;
1888 			else if (seg == stkseg) {
1889 				mp->pr_mflags |= MA_STACK;
1890 				if (reserved) {
1891 					size_t maxstack =
1892 					    ((size_t)p->p_stk_ctl +
1893 					    PAGEOFFSET) & PAGEMASK;
1894 					uintptr_t vaddr =
1895 					    (uintptr_t)prgetstackbase(p) +
1896 					    p->p_stksize - maxstack;
1897 					mp->pr_vaddr = (caddr32_t)vaddr;
1898 					mp->pr_size = (size32_t)
1899 					    ((uintptr_t)naddr - vaddr);
1900 				}
1901 			}
1902 			if (seg->s_ops == &segspt_shmops)
1903 				mp->pr_mflags |= MA_ISM | MA_SHM;
1904 			mp->pr_pagesize = PAGESIZE;
1905 
1906 			/*
1907 			 * Manufacture a filename for the "object" directory.
1908 			 */
1909 			vattr.va_mask = AT_FSID|AT_NODEID;
1910 			if (seg->s_ops == &segvn_ops &&
1911 			    SEGOP_GETVP(seg, saddr, &vp) == 0 &&
1912 			    vp != NULL && vp->v_type == VREG &&
1913 			    VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
1914 				if (vp == p->p_exec)
1915 					(void) strcpy(mp->pr_mapname, "a.out");
1916 				else
1917 					pr_object_name(mp->pr_mapname,
1918 					    vp, &vattr);
1919 			}
1920 
1921 			/*
1922 			 * Get the SysV shared memory id, if any.
1923 			 */
1924 			if ((mp->pr_mflags & MA_SHARED) && p->p_segacct &&
1925 			    (mp->pr_shmid = shmgetid(p, seg->s_base)) !=
1926 			    SHMID_NONE) {
1927 				if (mp->pr_shmid == SHMID_FREE)
1928 					mp->pr_shmid = -1;
1929 
1930 				mp->pr_mflags |= MA_SHM;
1931 			} else {
1932 				mp->pr_shmid = -1;
1933 			}
1934 		}
1935 		ASSERT(tmp == NULL);
1936 	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);
1937 
1938 	return (0);
1939 }
1940 #endif	/* _SYSCALL32_IMPL */
1941 
1942 /*
1943  * Return the size of the /proc page data file.
1944  */
1945 size_t
1946 prpdsize(struct as *as)
1947 {
1948 	struct seg *seg;
1949 	size_t size;
1950 
1951 	ASSERT(as != &kas && AS_WRITE_HELD(as));
1952 
1953 	if ((seg = AS_SEGFIRST(as)) == NULL)
1954 		return (0);
1955 
1956 	size = sizeof (prpageheader_t);
1957 	do {
1958 		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
1959 		caddr_t saddr, naddr;
1960 		void *tmp = NULL;
1961 		size_t npage;
1962 
1963 		if ((seg->s_flags & S_HOLE) != 0) {
1964 			continue;
1965 		}
1966 
1967 		for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1968 			(void) pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr);
1969 			if ((npage = (naddr - saddr) / PAGESIZE) != 0)
1970 				size += sizeof (prasmap_t) + round8(npage);
1971 		}
1972 		ASSERT(tmp == NULL);
1973 	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);
1974 
1975 	return (size);
1976 }
1977 
1978 #ifdef _SYSCALL32_IMPL
1979 size_t
1980 prpdsize32(struct as *as)
1981 {
1982 	struct seg *seg;
1983 	size_t size;
1984 
1985 	ASSERT(as != &kas && AS_WRITE_HELD(as));
1986 
1987 	if ((seg = AS_SEGFIRST(as)) == NULL)
1988 		return (0);
1989 
1990 	size = sizeof (prpageheader32_t);
1991 	do {
1992 		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
1993 		caddr_t saddr, naddr;
1994 		void *tmp = NULL;
1995 		size_t npage;
1996 
1997 		if ((seg->s_flags & S_HOLE) != 0) {
1998 			continue;
1999 		}
2000 
2001 		for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
2002 			(void) pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr);
2003 			if ((npage = (naddr - saddr) / PAGESIZE) != 0)
2004 				size += sizeof (prasmap32_t) + round8(npage);
2005 		}
2006 		ASSERT(tmp == NULL);
2007 	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);
2008 
2009 	return (size);
2010 }
2011 #endif	/* _SYSCALL32_IMPL */
2012 
2013 /*
2014  * Read page data information.
2015  */
2016 int
2017 prpdread(proc_t *p, uint_t hatid, struct uio *uiop)
2018 {
2019 	struct as *as = p->p_as;
2020 	caddr_t buf;
2021 	size_t size;
2022 	prpageheader_t *php;
2023 	prasmap_t *pmp;
2024 	struct seg *seg;
2025 	int error;
2026 
2027 again:
2028 	AS_LOCK_ENTER(as, RW_WRITER);
2029 
2030 	if ((seg = AS_SEGFIRST(as)) == NULL) {
2031 		AS_LOCK_EXIT(as);
2032 		return (0);
2033 	}
2034 	size = prpdsize(as);
2035 	if (uiop->uio_resid < size) {
2036 		AS_LOCK_EXIT(as);
2037 		return (E2BIG);
2038 	}
2039 
2040 	buf = kmem_zalloc(size, KM_SLEEP);
2041 	php = (prpageheader_t *)buf;
2042 	pmp = (prasmap_t *)(buf + sizeof (prpageheader_t));
2043 
2044 	hrt2ts(gethrtime(), &php->pr_tstamp);
2045 	php->pr_nmap = 0;
2046 	php->pr_npage = 0;
2047 	do {
2048 		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
2049 		caddr_t saddr, naddr;
2050 		void *tmp = NULL;
2051 
2052 		if ((seg->s_flags & S_HOLE) != 0) {
2053 			continue;
2054 		}
2055 
2056 		for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
2057 			struct vnode *vp;
2058 			struct vattr vattr;
2059 			size_t len;
2060 			size_t npage;
2061 			uint_t prot;
2062 			uintptr_t next;
2063 
2064 			prot = pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr);
2065 			if ((len = (size_t)(naddr - saddr)) == 0)
2066 				continue;
2067 			npage = len / PAGESIZE;
2068 			next = (uintptr_t)(pmp + 1) + round8(npage);
2069 			/*
2070 			 * It's possible that the address space can change
2071 			 * subtlely even though we're holding as->a_lock
2072 			 * due to the nondeterminism of page_exists() in
2073 			 * the presence of asychronously flushed pages or
2074 			 * mapped files whose sizes are changing.
2075 			 * page_exists() may be called indirectly from
2076 			 * pr_getprot() by a SEGOP_INCORE() routine.
2077 			 * If this happens we need to make sure we don't
2078 			 * overrun the buffer whose size we computed based
2079 			 * on the initial iteration through the segments.
2080 			 * Once we've detected an overflow, we need to clean
2081 			 * up the temporary memory allocated in pr_getprot()
2082 			 * and retry. If there's a pending signal, we return
2083 			 * EINTR so that this thread can be dislodged if
2084 			 * a latent bug causes us to spin indefinitely.
2085 			 */
2086 			if (next > (uintptr_t)buf + size) {
2087 				pr_getprot_done(&tmp);
2088 				AS_LOCK_EXIT(as);
2089 
2090 				kmem_free(buf, size);
2091 
2092 				if (ISSIG(curthread, JUSTLOOKING))
2093 					return (EINTR);
2094 
2095 				goto again;
2096 			}
2097 
2098 			php->pr_nmap++;
2099 			php->pr_npage += npage;
2100 			pmp->pr_vaddr = (uintptr_t)saddr;
2101 			pmp->pr_npage = npage;
2102 			pmp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
2103 			pmp->pr_mflags = 0;
2104 			if (prot & PROT_READ)
2105 				pmp->pr_mflags |= MA_READ;
2106 			if (prot & PROT_WRITE)
2107 				pmp->pr_mflags |= MA_WRITE;
2108 			if (prot & PROT_EXEC)
2109 				pmp->pr_mflags |= MA_EXEC;
2110 			if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
2111 				pmp->pr_mflags |= MA_SHARED;
2112 			if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
2113 				pmp->pr_mflags |= MA_NORESERVE;
2114 			if (seg->s_ops == &segspt_shmops ||
2115 			    (seg->s_ops == &segvn_ops &&
2116 			    (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL)))
2117 				pmp->pr_mflags |= MA_ANON;
2118 			if (seg->s_ops == &segspt_shmops)
2119 				pmp->pr_mflags |= MA_ISM | MA_SHM;
2120 			pmp->pr_pagesize = PAGESIZE;
2121 			/*
2122 			 * Manufacture a filename for the "object" directory.
2123 			 */
2124 			vattr.va_mask = AT_FSID|AT_NODEID;
2125 			if (seg->s_ops == &segvn_ops &&
2126 			    SEGOP_GETVP(seg, saddr, &vp) == 0 &&
2127 			    vp != NULL && vp->v_type == VREG &&
2128 			    VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
2129 				if (vp == p->p_exec)
2130 					(void) strcpy(pmp->pr_mapname, "a.out");
2131 				else
2132 					pr_object_name(pmp->pr_mapname,
2133 					    vp, &vattr);
2134 			}
2135 
2136 			/*
2137 			 * Get the SysV shared memory id, if any.
2138 			 */
2139 			if ((pmp->pr_mflags & MA_SHARED) && p->p_segacct &&
2140 			    (pmp->pr_shmid = shmgetid(p, seg->s_base)) !=
2141 			    SHMID_NONE) {
2142 				if (pmp->pr_shmid == SHMID_FREE)
2143 					pmp->pr_shmid = -1;
2144 
2145 				pmp->pr_mflags |= MA_SHM;
2146 			} else {
2147 				pmp->pr_shmid = -1;
2148 			}
2149 
2150 			hat_getstat(as, saddr, len, hatid,
2151 			    (char *)(pmp + 1), HAT_SYNC_ZERORM);
2152 			pmp = (prasmap_t *)next;
2153 		}
2154 		ASSERT(tmp == NULL);
2155 	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);
2156 
2157 	AS_LOCK_EXIT(as);
2158 
2159 	ASSERT((uintptr_t)pmp <= (uintptr_t)buf + size);
2160 	error = uiomove(buf, (caddr_t)pmp - buf, UIO_READ, uiop);
2161 	kmem_free(buf, size);
2162 
2163 	return (error);
2164 }
2165 
2166 #ifdef _SYSCALL32_IMPL
2167 int
2168 prpdread32(proc_t *p, uint_t hatid, struct uio *uiop)
2169 {
2170 	struct as *as = p->p_as;
2171 	caddr_t buf;
2172 	size_t size;
2173 	prpageheader32_t *php;
2174 	prasmap32_t *pmp;
2175 	struct seg *seg;
2176 	int error;
2177 
2178 again:
2179 	AS_LOCK_ENTER(as, RW_WRITER);
2180 
2181 	if ((seg = AS_SEGFIRST(as)) == NULL) {
2182 		AS_LOCK_EXIT(as);
2183 		return (0);
2184 	}
2185 	size = prpdsize32(as);
2186 	if (uiop->uio_resid < size) {
2187 		AS_LOCK_EXIT(as);
2188 		return (E2BIG);
2189 	}
2190 
2191 	buf = kmem_zalloc(size, KM_SLEEP);
2192 	php = (prpageheader32_t *)buf;
2193 	pmp = (prasmap32_t *)(buf + sizeof (prpageheader32_t));
2194 
2195 	hrt2ts32(gethrtime(), &php->pr_tstamp);
2196 	php->pr_nmap = 0;
2197 	php->pr_npage = 0;
2198 	do {
2199 		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
2200 		caddr_t saddr, naddr;
2201 		void *tmp = NULL;
2202 
2203 		if ((seg->s_flags & S_HOLE) != 0) {
2204 			continue;
2205 		}
2206 
2207 		for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
2208 			struct vnode *vp;
2209 			struct vattr vattr;
2210 			size_t len;
2211 			size_t npage;
2212 			uint_t prot;
2213 			uintptr_t next;
2214 
2215 			prot = pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr);
2216 			if ((len = (size_t)(naddr - saddr)) == 0)
2217 				continue;
2218 			npage = len / PAGESIZE;
2219 			next = (uintptr_t)(pmp + 1) + round8(npage);
2220 			/*
2221 			 * It's possible that the address space can change
2222 			 * subtlely even though we're holding as->a_lock
2223 			 * due to the nondeterminism of page_exists() in
2224 			 * the presence of asychronously flushed pages or
2225 			 * mapped files whose sizes are changing.
2226 			 * page_exists() may be called indirectly from
2227 			 * pr_getprot() by a SEGOP_INCORE() routine.
2228 			 * If this happens we need to make sure we don't
2229 			 * overrun the buffer whose size we computed based
2230 			 * on the initial iteration through the segments.
2231 			 * Once we've detected an overflow, we need to clean
2232 			 * up the temporary memory allocated in pr_getprot()
2233 			 * and retry. If there's a pending signal, we return
2234 			 * EINTR so that this thread can be dislodged if
2235 			 * a latent bug causes us to spin indefinitely.
2236 			 */
2237 			if (next > (uintptr_t)buf + size) {
2238 				pr_getprot_done(&tmp);
2239 				AS_LOCK_EXIT(as);
2240 
2241 				kmem_free(buf, size);
2242 
2243 				if (ISSIG(curthread, JUSTLOOKING))
2244 					return (EINTR);
2245 
2246 				goto again;
2247 			}
2248 
2249 			php->pr_nmap++;
2250 			php->pr_npage += npage;
2251 			pmp->pr_vaddr = (caddr32_t)(uintptr_t)saddr;
2252 			pmp->pr_npage = (size32_t)npage;
2253 			pmp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
2254 			pmp->pr_mflags = 0;
2255 			if (prot & PROT_READ)
2256 				pmp->pr_mflags |= MA_READ;
2257 			if (prot & PROT_WRITE)
2258 				pmp->pr_mflags |= MA_WRITE;
2259 			if (prot & PROT_EXEC)
2260 				pmp->pr_mflags |= MA_EXEC;
2261 			if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
2262 				pmp->pr_mflags |= MA_SHARED;
2263 			if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
2264 				pmp->pr_mflags |= MA_NORESERVE;
2265 			if (seg->s_ops == &segspt_shmops ||
2266 			    (seg->s_ops == &segvn_ops &&
2267 			    (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL)))
2268 				pmp->pr_mflags |= MA_ANON;
2269 			if (seg->s_ops == &segspt_shmops)
2270 				pmp->pr_mflags |= MA_ISM | MA_SHM;
2271 			pmp->pr_pagesize = PAGESIZE;
2272 			/*
2273 			 * Manufacture a filename for the "object" directory.
2274 			 */
2275 			vattr.va_mask = AT_FSID|AT_NODEID;
2276 			if (seg->s_ops == &segvn_ops &&
2277 			    SEGOP_GETVP(seg, saddr, &vp) == 0 &&
2278 			    vp != NULL && vp->v_type == VREG &&
2279 			    VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
2280 				if (vp == p->p_exec)
2281 					(void) strcpy(pmp->pr_mapname, "a.out");
2282 				else
2283 					pr_object_name(pmp->pr_mapname,
2284 					    vp, &vattr);
2285 			}
2286 
2287 			/*
2288 			 * Get the SysV shared memory id, if any.
2289 			 */
2290 			if ((pmp->pr_mflags & MA_SHARED) && p->p_segacct &&
2291 			    (pmp->pr_shmid = shmgetid(p, seg->s_base)) !=
2292 			    SHMID_NONE) {
2293 				if (pmp->pr_shmid == SHMID_FREE)
2294 					pmp->pr_shmid = -1;
2295 
2296 				pmp->pr_mflags |= MA_SHM;
2297 			} else {
2298 				pmp->pr_shmid = -1;
2299 			}
2300 
2301 			hat_getstat(as, saddr, len, hatid,
2302 			    (char *)(pmp + 1), HAT_SYNC_ZERORM);
2303 			pmp = (prasmap32_t *)next;
2304 		}
2305 		ASSERT(tmp == NULL);
2306 	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);
2307 
2308 	AS_LOCK_EXIT(as);
2309 
2310 	ASSERT((uintptr_t)pmp <= (uintptr_t)buf + size);
2311 	error = uiomove(buf, (caddr_t)pmp - buf, UIO_READ, uiop);
2312 	kmem_free(buf, size);
2313 
2314 	return (error);
2315 }
2316 #endif	/* _SYSCALL32_IMPL */
2317 
2318 ushort_t
2319 prgetpctcpu(uint64_t pct)
2320 {
2321 	/*
2322 	 * The value returned will be relevant in the zone of the examiner,
2323 	 * which may not be the same as the zone which performed the procfs
2324 	 * mount.
2325 	 */
2326 	int nonline = zone_ncpus_online_get(curproc->p_zone);
2327 
2328 	/*
2329 	 * Prorate over online cpus so we don't exceed 100%
2330 	 */
2331 	if (nonline > 1)
2332 		pct /= nonline;
2333 	pct >>= 16;		/* convert to 16-bit scaled integer */
2334 	if (pct > 0x8000)	/* might happen, due to rounding */
2335 		pct = 0x8000;
2336 	return ((ushort_t)pct);
2337 }
2338 
2339 /*
2340  * Return information used by ps(1).
2341  */
2342 void
2343 prgetpsinfo(proc_t *p, psinfo_t *psp)
2344 {
2345 	kthread_t *t;
2346 	struct cred *cred;
2347 	hrtime_t hrutime, hrstime;
2348 
2349 	ASSERT(MUTEX_HELD(&p->p_lock));
2350 
2351 	if ((t = prchoose(p)) == NULL)	/* returns locked thread */
2352 		bzero(psp, sizeof (*psp));
2353 	else {
2354 		thread_unlock(t);
2355 		bzero(psp, sizeof (*psp) - sizeof (psp->pr_lwp));
2356 	}
2357 
2358 	/*
2359 	 * only export SSYS and SMSACCT; everything else is off-limits to
2360 	 * userland apps.
2361 	 */
2362 	psp->pr_flag = p->p_flag & (SSYS | SMSACCT);
2363 	psp->pr_nlwp = p->p_lwpcnt;
2364 	psp->pr_nzomb = p->p_zombcnt;
2365 	mutex_enter(&p->p_crlock);
2366 	cred = p->p_cred;
2367 	psp->pr_uid = crgetruid(cred);
2368 	psp->pr_euid = crgetuid(cred);
2369 	psp->pr_gid = crgetrgid(cred);
2370 	psp->pr_egid = crgetgid(cred);
2371 	mutex_exit(&p->p_crlock);
2372 	psp->pr_pid = p->p_pid;
2373 	if (curproc->p_zone->zone_id != GLOBAL_ZONEID &&
2374 	    (p->p_flag & SZONETOP)) {
2375 		ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID);
2376 		/*
2377 		 * Inside local zones, fake zsched's pid as parent pids for
2378 		 * processes which reference processes outside of the zone.
2379 		 */
2380 		psp->pr_ppid = curproc->p_zone->zone_zsched->p_pid;
2381 	} else {
2382 		psp->pr_ppid = p->p_ppid;
2383 	}
2384 	psp->pr_pgid = p->p_pgrp;
2385 	psp->pr_sid = p->p_sessp->s_sid;
2386 	psp->pr_taskid = p->p_task->tk_tkid;
2387 	psp->pr_projid = p->p_task->tk_proj->kpj_id;
2388 	psp->pr_poolid = p->p_pool->pool_id;
2389 	psp->pr_zoneid = p->p_zone->zone_id;
2390 	if ((psp->pr_contract = PRCTID(p)) == 0)
2391 		psp->pr_contract = -1;
2392 	psp->pr_addr = (uintptr_t)prgetpsaddr(p);
2393 	switch (p->p_model) {
2394 	case DATAMODEL_ILP32:
2395 		psp->pr_dmodel = PR_MODEL_ILP32;
2396 		break;
2397 	case DATAMODEL_LP64:
2398 		psp->pr_dmodel = PR_MODEL_LP64;
2399 		break;
2400 	}
2401 	hrutime = mstate_aggr_state(p, LMS_USER);
2402 	hrstime = mstate_aggr_state(p, LMS_SYSTEM);
2403 	hrt2ts((hrutime + hrstime), &psp->pr_time);
2404 	TICK_TO_TIMESTRUC(p->p_cutime + p->p_cstime, &psp->pr_ctime);
2405 
2406 	if (t == NULL) {
2407 		int wcode = p->p_wcode;		/* must be atomic read */
2408 
2409 		if (wcode)
2410 			psp->pr_wstat = wstat(wcode, p->p_wdata);
2411 		psp->pr_ttydev = PRNODEV;
2412 		psp->pr_lwp.pr_state = SZOMB;
2413 		psp->pr_lwp.pr_sname = 'Z';
2414 		psp->pr_lwp.pr_bindpro = PBIND_NONE;
2415 		psp->pr_lwp.pr_bindpset = PS_NONE;
2416 	} else {
2417 		user_t *up = PTOU(p);
2418 		struct as *as;
2419 		dev_t d;
2420 		extern dev_t rwsconsdev, rconsdev, uconsdev;
2421 
2422 		d = cttydev(p);
2423 		/*
2424 		 * If the controlling terminal is the real
2425 		 * or workstation console device, map to what the
2426 		 * user thinks is the console device. Handle case when
2427 		 * rwsconsdev or rconsdev is set to NODEV for Starfire.
2428 		 */
2429 		if ((d == rwsconsdev || d == rconsdev) && d != NODEV)
2430 			d = uconsdev;
2431 		psp->pr_ttydev = (d == NODEV) ? PRNODEV : d;
2432 		psp->pr_start = up->u_start;
2433 		bcopy(up->u_comm, psp->pr_fname,
2434 		    MIN(sizeof (up->u_comm), sizeof (psp->pr_fname)-1));
2435 		bcopy(up->u_psargs, psp->pr_psargs,
2436 		    MIN(PRARGSZ-1, PSARGSZ));
2437 		psp->pr_argc = up->u_argc;
2438 		psp->pr_argv = up->u_argv;
2439 		psp->pr_envp = up->u_envp;
2440 
2441 		/* get the chosen lwp's lwpsinfo */
2442 		prgetlwpsinfo(t, &psp->pr_lwp);
2443 
2444 		/* compute %cpu for the process */
2445 		if (p->p_lwpcnt == 1)
2446 			psp->pr_pctcpu = psp->pr_lwp.pr_pctcpu;
2447 		else {
2448 			uint64_t pct = 0;
2449 			hrtime_t cur_time = gethrtime_unscaled();
2450 
2451 			t = p->p_tlist;
2452 			do {
2453 				pct += cpu_update_pct(t, cur_time);
2454 			} while ((t = t->t_forw) != p->p_tlist);
2455 
2456 			psp->pr_pctcpu = prgetpctcpu(pct);
2457 		}
2458 		if ((p->p_flag & SSYS) || (as = p->p_as) == &kas) {
2459 			psp->pr_size = 0;
2460 			psp->pr_rssize = 0;
2461 		} else {
2462 			mutex_exit(&p->p_lock);
2463 			AS_LOCK_ENTER(as, RW_READER);
2464 			psp->pr_size = btopr(as->a_resvsize) *
2465 			    (PAGESIZE / 1024);
2466 			psp->pr_rssize = rm_asrss(as) * (PAGESIZE / 1024);
2467 			psp->pr_pctmem = rm_pctmemory(as);
2468 			AS_LOCK_EXIT(as);
2469 			mutex_enter(&p->p_lock);
2470 		}
2471 	}
2472 }
2473 
2474 static size_t
2475 prfdinfomisc(list_t *data, uint_t type, const void *val, size_t vlen)
2476 {
2477 	pr_misc_header_t *misc;
2478 	size_t len;
2479 
2480 	len = PRFDINFO_ROUNDUP(sizeof (*misc) + vlen);
2481 
2482 	if (data != NULL) {
2483 		misc = pr_iol_newbuf(data, len);
2484 		misc->pr_misc_type = type;
2485 		misc->pr_misc_size = len;
2486 		misc++;
2487 		bcopy((char *)val, (char *)misc, vlen);
2488 	}
2489 
2490 	return (len);
2491 }
2492 
2493 /*
2494  * There's no elegant way to determine if a character device
2495  * supports TLI, so just check a hardcoded list of known TLI
2496  * devices.
2497  */
2498 
2499 static boolean_t
2500 pristli(vnode_t *vp)
2501 {
2502 	static const char *tlidevs[] = {
2503 	    "udp", "udp6", "tcp", "tcp6"
2504 	};
2505 	char *devname;
2506 	uint_t i;
2507 
2508 	ASSERT(vp != NULL);
2509 
2510 	if (vp->v_type != VCHR || vp->v_stream == NULL || vp->v_rdev == 0)
2511 		return (B_FALSE);
2512 
2513 	if ((devname = mod_major_to_name(getmajor(vp->v_rdev))) == NULL)
2514 		return (B_FALSE);
2515 
2516 	for (i = 0; i < ARRAY_SIZE(tlidevs); i++) {
2517 		if (strcmp(devname, tlidevs[i]) == 0)
2518 			return (B_TRUE);
2519 	}
2520 
2521 	return (B_FALSE);
2522 }
2523 
2524 static size_t
2525 prfdinfopath(proc_t *p, vnode_t *vp, list_t *data, cred_t *cred)
2526 {
2527 	char *pathname;
2528 	size_t pathlen;
2529 	size_t sz = 0;
2530 
2531 	/*
2532 	 * The global zone's path to a file in a non-global zone can exceed
2533 	 * MAXPATHLEN.
2534 	 */
2535 	pathlen = MAXPATHLEN * 2 + 1;
2536 	pathname = kmem_alloc(pathlen, KM_SLEEP);
2537 
2538 	if (vnodetopath(NULL, vp, pathname, pathlen, cred) == 0) {
2539 		sz += prfdinfomisc(data, PR_PATHNAME,
2540 		    pathname, strlen(pathname) + 1);
2541 	}
2542 
2543 	kmem_free(pathname, pathlen);
2544 
2545 	return (sz);
2546 }
2547 
2548 static size_t
2549 prfdinfotlisockopt(vnode_t *vp, list_t *data, cred_t *cred)
2550 {
2551 	strcmd_t strcmd;
2552 	int32_t rval;
2553 	size_t sz = 0;
2554 
2555 	strcmd.sc_cmd = TI_GETMYNAME;
2556 	strcmd.sc_timeout = 1;
2557 	strcmd.sc_len = STRCMDBUFSIZE;
2558 
2559 	if (VOP_IOCTL(vp, _I_CMD, (intptr_t)&strcmd, FKIOCTL, cred,
2560 	    &rval, NULL) == 0 && strcmd.sc_len > 0) {
2561 		sz += prfdinfomisc(data, PR_SOCKETNAME, strcmd.sc_buf,
2562 		    strcmd.sc_len);
2563 	}
2564 
2565 	strcmd.sc_cmd = TI_GETPEERNAME;
2566 	strcmd.sc_timeout = 1;
2567 	strcmd.sc_len = STRCMDBUFSIZE;
2568 
2569 	if (VOP_IOCTL(vp, _I_CMD, (intptr_t)&strcmd, FKIOCTL, cred,
2570 	    &rval, NULL) == 0 && strcmd.sc_len > 0) {
2571 		sz += prfdinfomisc(data, PR_PEERSOCKNAME, strcmd.sc_buf,
2572 		    strcmd.sc_len);
2573 	}
2574 
2575 	return (sz);
2576 }
2577 
2578 static size_t
2579 prfdinfosockopt(vnode_t *vp, list_t *data, cred_t *cred)
2580 {
2581 	sonode_t *so;
2582 	socklen_t vlen;
2583 	size_t sz = 0;
2584 	uint_t i;
2585 
2586 	if (vp->v_stream != NULL) {
2587 		so = VTOSO(vp->v_stream->sd_vnode);
2588 
2589 		if (so->so_version == SOV_STREAM)
2590 			so = NULL;
2591 	} else {
2592 		so = VTOSO(vp);
2593 	}
2594 
2595 	if (so == NULL)
2596 		return (0);
2597 
2598 	DTRACE_PROBE1(sonode, sonode_t *, so);
2599 
2600 	/* prmisc - PR_SOCKETNAME */
2601 
2602 	struct sockaddr_storage buf;
2603 	struct sockaddr *name = (struct sockaddr *)&buf;
2604 
2605 	vlen = sizeof (buf);
2606 	if (SOP_GETSOCKNAME(so, name, &vlen, cred) == 0 && vlen > 0)
2607 		sz += prfdinfomisc(data, PR_SOCKETNAME, name, vlen);
2608 
2609 	/* prmisc - PR_PEERSOCKNAME */
2610 
2611 	vlen = sizeof (buf);
2612 	if (SOP_GETPEERNAME(so, name, &vlen, B_FALSE, cred) == 0 && vlen > 0)
2613 		sz += prfdinfomisc(data, PR_PEERSOCKNAME, name, vlen);
2614 
2615 	/* prmisc - PR_SOCKOPTS_BOOL_OPTS */
2616 
2617 	static struct boolopt {
2618 		int		level;
2619 		int		opt;
2620 		int		bopt;
2621 	} boolopts[] = {
2622 		{ SOL_SOCKET, SO_DEBUG,		PR_SO_DEBUG },
2623 		{ SOL_SOCKET, SO_REUSEADDR,	PR_SO_REUSEADDR },
2624 #ifdef SO_REUSEPORT
2625 		/* SmartOS and OmniOS have SO_REUSEPORT */
2626 		{ SOL_SOCKET, SO_REUSEPORT,	PR_SO_REUSEPORT },
2627 #endif
2628 		{ SOL_SOCKET, SO_KEEPALIVE,	PR_SO_KEEPALIVE },
2629 		{ SOL_SOCKET, SO_DONTROUTE,	PR_SO_DONTROUTE },
2630 		{ SOL_SOCKET, SO_BROADCAST,	PR_SO_BROADCAST },
2631 		{ SOL_SOCKET, SO_OOBINLINE,	PR_SO_OOBINLINE },
2632 		{ SOL_SOCKET, SO_DGRAM_ERRIND,	PR_SO_DGRAM_ERRIND },
2633 		{ SOL_SOCKET, SO_ALLZONES,	PR_SO_ALLZONES },
2634 		{ SOL_SOCKET, SO_MAC_EXEMPT,	PR_SO_MAC_EXEMPT },
2635 		{ SOL_SOCKET, SO_MAC_IMPLICIT,	PR_SO_MAC_IMPLICIT },
2636 		{ SOL_SOCKET, SO_EXCLBIND,	PR_SO_EXCLBIND },
2637 		{ SOL_SOCKET, SO_VRRP,		PR_SO_VRRP },
2638 		{ IPPROTO_UDP, UDP_NAT_T_ENDPOINT,
2639 		    PR_UDP_NAT_T_ENDPOINT }
2640 	};
2641 	prsockopts_bool_opts_t opts;
2642 	int val;
2643 
2644 	if (data != NULL) {
2645 		opts.prsock_bool_opts = 0;
2646 
2647 		for (i = 0; i < ARRAY_SIZE(boolopts); i++) {
2648 			vlen = sizeof (val);
2649 			if (SOP_GETSOCKOPT(so, boolopts[i].level,
2650 			    boolopts[i].opt, &val, &vlen, 0, cred) == 0 &&
2651 			    val != 0) {
2652 				opts.prsock_bool_opts |= boolopts[i].bopt;
2653 			}
2654 		}
2655 	}
2656 
2657 	sz += prfdinfomisc(data, PR_SOCKOPTS_BOOL_OPTS, &opts, sizeof (opts));
2658 
2659 	/* prmisc - PR_SOCKOPT_LINGER */
2660 
2661 	struct linger l;
2662 
2663 	vlen = sizeof (l);
2664 	if (SOP_GETSOCKOPT(so, SOL_SOCKET, SO_LINGER, &l, &vlen,
2665 	    0, cred) == 0 && vlen > 0) {
2666 		sz += prfdinfomisc(data, PR_SOCKOPT_LINGER, &l, vlen);
2667 	}
2668 
2669 	/* prmisc - PR_SOCKOPT_* int types */
2670 
2671 	static struct sopt {
2672 		int		level;
2673 		int		opt;
2674 		int		bopt;
2675 	} sopts[] = {
2676 		{ SOL_SOCKET, SO_TYPE,		PR_SOCKOPT_TYPE },
2677 		{ SOL_SOCKET, SO_SNDBUF,	PR_SOCKOPT_SNDBUF },
2678 		{ SOL_SOCKET, SO_RCVBUF,	PR_SOCKOPT_RCVBUF }
2679 	};
2680 
2681 	for (i = 0; i < ARRAY_SIZE(sopts); i++) {
2682 		vlen = sizeof (val);
2683 		if (SOP_GETSOCKOPT(so, sopts[i].level, sopts[i].opt,
2684 		    &val, &vlen, 0, cred) == 0 && vlen > 0) {
2685 			sz += prfdinfomisc(data, sopts[i].bopt, &val, vlen);
2686 		}
2687 	}
2688 
2689 	/* prmisc - PR_SOCKOPT_IP_NEXTHOP */
2690 
2691 	in_addr_t nexthop_val;
2692 
2693 	vlen = sizeof (nexthop_val);
2694 	if (SOP_GETSOCKOPT(so, IPPROTO_IP, IP_NEXTHOP,
2695 	    &nexthop_val, &vlen, 0, cred) == 0 && vlen > 0) {
2696 		sz += prfdinfomisc(data, PR_SOCKOPT_IP_NEXTHOP,
2697 		    &nexthop_val, vlen);
2698 	}
2699 
2700 	/* prmisc - PR_SOCKOPT_IPV6_NEXTHOP */
2701 
2702 	struct sockaddr_in6 nexthop6_val;
2703 
2704 	vlen = sizeof (nexthop6_val);
2705 	if (SOP_GETSOCKOPT(so, IPPROTO_IPV6, IPV6_NEXTHOP,
2706 	    &nexthop6_val, &vlen, 0, cred) == 0 && vlen > 0) {
2707 		sz += prfdinfomisc(data, PR_SOCKOPT_IPV6_NEXTHOP,
2708 		    &nexthop6_val, vlen);
2709 	}
2710 
2711 	/* prmisc - PR_SOCKOPT_TCP_CONGESTION */
2712 
2713 	char cong[CC_ALGO_NAME_MAX];
2714 
2715 	vlen = sizeof (cong);
2716 	if (SOP_GETSOCKOPT(so, IPPROTO_TCP, TCP_CONGESTION,
2717 	    &cong, &vlen, 0, cred) == 0 && vlen > 0) {
2718 		sz += prfdinfomisc(data, PR_SOCKOPT_TCP_CONGESTION, cong, vlen);
2719 	}
2720 
2721 	/* prmisc - PR_SOCKFILTERS_PRIV */
2722 
2723 	struct fil_info fi;
2724 
2725 	vlen = sizeof (fi);
2726 	if (SOP_GETSOCKOPT(so, SOL_FILTER, FIL_LIST,
2727 	    &fi, &vlen, 0, cred) == 0 && vlen != 0) {
2728 		pr_misc_header_t *misc;
2729 		size_t len;
2730 
2731 		/*
2732 		 * We limit the number of returned filters to 32.
2733 		 * This is the maximum number that pfiles will print
2734 		 * anyway.
2735 		 */
2736 		vlen = MIN(32, fi.fi_pos + 1);
2737 		vlen *= sizeof (fi);
2738 
2739 		len = PRFDINFO_ROUNDUP(sizeof (*misc) + vlen);
2740 		sz += len;
2741 
2742 		if (data != NULL) {
2743 			/*
2744 			 * So that the filter list can be built incrementally,
2745 			 * prfdinfomisc() is not used here. Instead we
2746 			 * allocate a buffer directly on the copyout list using
2747 			 * pr_iol_newbuf()
2748 			 */
2749 			misc = pr_iol_newbuf(data, len);
2750 			misc->pr_misc_type = PR_SOCKFILTERS_PRIV;
2751 			misc->pr_misc_size = len;
2752 			misc++;
2753 			len = vlen;
2754 			if (SOP_GETSOCKOPT(so, SOL_FILTER, FIL_LIST,
2755 			    misc, &vlen, 0, cred) == 0) {
2756 				/*
2757 				 * In case the number of filters has reduced
2758 				 * since the first call, explicitly zero out
2759 				 * any unpopulated space.
2760 				 */
2761 				if (vlen < len)
2762 					bzero(misc + vlen, len - vlen);
2763 			} else {
2764 				/* Something went wrong, zero out the result */
2765 				bzero(misc, vlen);
2766 			}
2767 		}
2768 	}
2769 
2770 	return (sz);
2771 }
2772 
2773 typedef struct prfdinfo_nm_path_cbdata {
2774 	proc_t		*nmp_p;
2775 	u_offset_t	nmp_sz;
2776 	list_t		*nmp_data;
2777 } prfdinfo_nm_path_cbdata_t;
2778 
2779 static int
2780 prfdinfo_nm_path(const struct namenode *np, cred_t *cred, void *arg)
2781 {
2782 	prfdinfo_nm_path_cbdata_t *cb = arg;
2783 
2784 	cb->nmp_sz += prfdinfopath(cb->nmp_p, np->nm_vnode, cb->nmp_data, cred);
2785 
2786 	return (0);
2787 }
2788 
2789 u_offset_t
2790 prgetfdinfosize(proc_t *p, vnode_t *vp, cred_t *cred)
2791 {
2792 	u_offset_t sz;
2793 
2794 	/*
2795 	 * All fdinfo files will be at least this big -
2796 	 * sizeof fdinfo struct + zero length trailer
2797 	 */
2798 	sz = offsetof(prfdinfo_t, pr_misc) + sizeof (pr_misc_header_t);
2799 
2800 	/* Pathname */
2801 	switch (vp->v_type) {
2802 	case VDOOR: {
2803 		prfdinfo_nm_path_cbdata_t cb = {
2804 			.nmp_p		= p,
2805 			.nmp_data	= NULL,
2806 			.nmp_sz		= 0
2807 		};
2808 
2809 		(void) nm_walk_mounts(vp, prfdinfo_nm_path, cred, &cb);
2810 		sz += cb.nmp_sz;
2811 		break;
2812 	}
2813 	case VSOCK:
2814 		break;
2815 	default:
2816 		sz += prfdinfopath(p, vp, NULL, cred);
2817 	}
2818 
2819 	/* Socket options */
2820 	if (vp->v_type == VSOCK)
2821 		sz += prfdinfosockopt(vp, NULL, cred);
2822 
2823 	/* TLI/XTI sockets */
2824 	if (pristli(vp))
2825 		sz += prfdinfotlisockopt(vp, NULL, cred);
2826 
2827 	return (sz);
2828 }
2829 
2830 int
2831 prgetfdinfo(proc_t *p, vnode_t *vp, prfdinfo_t *fdinfo, cred_t *cred,
2832     cred_t *file_cred, list_t *data)
2833 {
2834 	vattr_t vattr;
2835 	int error;
2836 
2837 	/*
2838 	 * The buffer has been initialised to zero by pr_iol_newbuf().
2839 	 * Initialise defaults for any values that should not default to zero.
2840 	 */
2841 	fdinfo->pr_uid = (uid_t)-1;
2842 	fdinfo->pr_gid = (gid_t)-1;
2843 	fdinfo->pr_size = -1;
2844 	fdinfo->pr_locktype = F_UNLCK;
2845 	fdinfo->pr_lockpid = -1;
2846 	fdinfo->pr_locksysid = -1;
2847 	fdinfo->pr_peerpid = -1;
2848 
2849 	/* Offset */
2850 
2851 	/*
2852 	 * pr_offset has already been set from the underlying file_t.
2853 	 * Check if it is plausible and reset to -1 if not.
2854 	 */
2855 	if (fdinfo->pr_offset != -1 &&
2856 	    VOP_SEEK(vp, 0, (offset_t *)&fdinfo->pr_offset, NULL) != 0)
2857 		fdinfo->pr_offset = -1;
2858 
2859 	/*
2860 	 * Attributes
2861 	 *
2862 	 * We have two cred_t structures available here.
2863 	 * 'cred' is the caller's credential, and 'file_cred' is the credential
2864 	 * for the file being inspected.
2865 	 *
2866 	 * When looking up the file attributes, file_cred is used in order
2867 	 * that the correct ownership is set for doors and FIFOs. Since the
2868 	 * caller has permission to read the fdinfo file in proc, this does
2869 	 * not expose any additional information.
2870 	 */
2871 	vattr.va_mask = AT_STAT;
2872 	if (VOP_GETATTR(vp, &vattr, 0, file_cred, NULL) == 0) {
2873 		fdinfo->pr_major = getmajor(vattr.va_fsid);
2874 		fdinfo->pr_minor = getminor(vattr.va_fsid);
2875 		fdinfo->pr_rmajor = getmajor(vattr.va_rdev);
2876 		fdinfo->pr_rminor = getminor(vattr.va_rdev);
2877 		fdinfo->pr_ino = (ino64_t)vattr.va_nodeid;
2878 		fdinfo->pr_size = (off64_t)vattr.va_size;
2879 		fdinfo->pr_mode = VTTOIF(vattr.va_type) | vattr.va_mode;
2880 		fdinfo->pr_uid = vattr.va_uid;
2881 		fdinfo->pr_gid = vattr.va_gid;
2882 		if (vp->v_type == VSOCK)
2883 			fdinfo->pr_fileflags |= sock_getfasync(vp);
2884 	}
2885 
2886 	/* locks */
2887 
2888 	flock64_t bf;
2889 
2890 	bzero(&bf, sizeof (bf));
2891 	bf.l_type = F_WRLCK;
2892 
2893 	if (VOP_FRLOCK(vp, F_GETLK, &bf,
2894 	    (uint16_t)(fdinfo->pr_fileflags & 0xffff), 0, NULL,
2895 	    cred, NULL) == 0 && bf.l_type != F_UNLCK) {
2896 		fdinfo->pr_locktype = bf.l_type;
2897 		fdinfo->pr_lockpid = bf.l_pid;
2898 		fdinfo->pr_locksysid = bf.l_sysid;
2899 	}
2900 
2901 	/* peer cred */
2902 
2903 	k_peercred_t kpc;
2904 
2905 	switch (vp->v_type) {
2906 	case VFIFO:
2907 	case VSOCK: {
2908 		int32_t rval;
2909 
2910 		error = VOP_IOCTL(vp, _I_GETPEERCRED, (intptr_t)&kpc,
2911 		    FKIOCTL, cred, &rval, NULL);
2912 		break;
2913 	}
2914 	case VCHR: {
2915 		struct strioctl strioc;
2916 		int32_t rval;
2917 
2918 		if (vp->v_stream == NULL) {
2919 			error = ENOTSUP;
2920 			break;
2921 		}
2922 		strioc.ic_cmd = _I_GETPEERCRED;
2923 		strioc.ic_timout = INFTIM;
2924 		strioc.ic_len = (int)sizeof (k_peercred_t);
2925 		strioc.ic_dp = (char *)&kpc;
2926 
2927 		error = strdoioctl(vp->v_stream, &strioc, FNATIVE | FKIOCTL,
2928 		    STR_NOSIG | K_TO_K, cred, &rval);
2929 		break;
2930 	}
2931 	default:
2932 		error = ENOTSUP;
2933 		break;
2934 	}
2935 
2936 	if (error == 0 && kpc.pc_cr != NULL) {
2937 		proc_t *peerp;
2938 
2939 		fdinfo->pr_peerpid = kpc.pc_cpid;
2940 
2941 		crfree(kpc.pc_cr);
2942 
2943 		mutex_enter(&pidlock);
2944 		if ((peerp = prfind(fdinfo->pr_peerpid)) != NULL) {
2945 			user_t *up;
2946 
2947 			mutex_enter(&peerp->p_lock);
2948 			mutex_exit(&pidlock);
2949 
2950 			up = PTOU(peerp);
2951 			bcopy(up->u_comm, fdinfo->pr_peername,
2952 			    MIN(sizeof (up->u_comm),
2953 			    sizeof (fdinfo->pr_peername) - 1));
2954 
2955 			mutex_exit(&peerp->p_lock);
2956 		} else {
2957 			mutex_exit(&pidlock);
2958 		}
2959 	}
2960 
2961 	/* pathname */
2962 
2963 	switch (vp->v_type) {
2964 	case VDOOR: {
2965 		prfdinfo_nm_path_cbdata_t cb = {
2966 			.nmp_p		= p,
2967 			.nmp_data	= data,
2968 			.nmp_sz		= 0
2969 		};
2970 
2971 		(void) nm_walk_mounts(vp, prfdinfo_nm_path, cred, &cb);
2972 		break;
2973 	}
2974 	case VSOCK:
2975 		/*
2976 		 * Don't attempt to determine the path for a socket as the
2977 		 * vnode has no associated v_path. It will cause a linear scan
2978 		 * of the dnlc table and result in no path being found.
2979 		 */
2980 		break;
2981 	default:
2982 		(void) prfdinfopath(p, vp, data, cred);
2983 	}
2984 
2985 	/* socket options */
2986 	if (vp->v_type == VSOCK)
2987 		(void) prfdinfosockopt(vp, data, cred);
2988 
2989 	/* TLI/XTI stream sockets */
2990 	if (pristli(vp))
2991 		(void) prfdinfotlisockopt(vp, data, cred);
2992 
2993 	/*
2994 	 * Add a terminating header with a zero size.
2995 	 */
2996 	pr_misc_header_t *misc;
2997 
2998 	misc = pr_iol_newbuf(data, sizeof (*misc));
2999 	misc->pr_misc_size = 0;
3000 	misc->pr_misc_type = (uint_t)-1;
3001 
3002 	return (0);
3003 }
3004 
3005 #ifdef _SYSCALL32_IMPL
3006 void
3007 prgetpsinfo32(proc_t *p, psinfo32_t *psp)
3008 {
3009 	kthread_t *t;
3010 	struct cred *cred;
3011 	hrtime_t hrutime, hrstime;
3012 
3013 	ASSERT(MUTEX_HELD(&p->p_lock));
3014 
3015 	if ((t = prchoose(p)) == NULL)	/* returns locked thread */
3016 		bzero(psp, sizeof (*psp));
3017 	else {
3018 		thread_unlock(t);
3019 		bzero(psp, sizeof (*psp) - sizeof (psp->pr_lwp));
3020 	}
3021 
3022 	/*
3023 	 * only export SSYS and SMSACCT; everything else is off-limits to
3024 	 * userland apps.
3025 	 */
3026 	psp->pr_flag = p->p_flag & (SSYS | SMSACCT);
3027 	psp->pr_nlwp = p->p_lwpcnt;
3028 	psp->pr_nzomb = p->p_zombcnt;
3029 	mutex_enter(&p->p_crlock);
3030 	cred = p->p_cred;
3031 	psp->pr_uid = crgetruid(cred);
3032 	psp->pr_euid = crgetuid(cred);
3033 	psp->pr_gid = crgetrgid(cred);
3034 	psp->pr_egid = crgetgid(cred);
3035 	mutex_exit(&p->p_crlock);
3036 	psp->pr_pid = p->p_pid;
3037 	if (curproc->p_zone->zone_id != GLOBAL_ZONEID &&
3038 	    (p->p_flag & SZONETOP)) {
3039 		ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID);
3040 		/*
3041 		 * Inside local zones, fake zsched's pid as parent pids for
3042 		 * processes which reference processes outside of the zone.
3043 		 */
3044 		psp->pr_ppid = curproc->p_zone->zone_zsched->p_pid;
3045 	} else {
3046 		psp->pr_ppid = p->p_ppid;
3047 	}
3048 	psp->pr_pgid = p->p_pgrp;
3049 	psp->pr_sid = p->p_sessp->s_sid;
3050 	psp->pr_taskid = p->p_task->tk_tkid;
3051 	psp->pr_projid = p->p_task->tk_proj->kpj_id;
3052 	psp->pr_poolid = p->p_pool->pool_id;
3053 	psp->pr_zoneid = p->p_zone->zone_id;
3054 	if ((psp->pr_contract = PRCTID(p)) == 0)
3055 		psp->pr_contract = -1;
3056 	psp->pr_addr = 0;	/* cannot represent 64-bit addr in 32 bits */
3057 	switch (p->p_model) {
3058 	case DATAMODEL_ILP32:
3059 		psp->pr_dmodel = PR_MODEL_ILP32;
3060 		break;
3061 	case DATAMODEL_LP64:
3062 		psp->pr_dmodel = PR_MODEL_LP64;
3063 		break;
3064 	}
3065 	hrutime = mstate_aggr_state(p, LMS_USER);
3066 	hrstime = mstate_aggr_state(p, LMS_SYSTEM);
3067 	hrt2ts32(hrutime + hrstime, &psp->pr_time);
3068 	TICK_TO_TIMESTRUC32(p->p_cutime + p->p_cstime, &psp->pr_ctime);
3069 
3070 	if (t == NULL) {
3071 		extern int wstat(int, int);	/* needs a header file */
3072 		int wcode = p->p_wcode;		/* must be atomic read */
3073 
3074 		if (wcode)
3075 			psp->pr_wstat = wstat(wcode, p->p_wdata);
3076 		psp->pr_ttydev = PRNODEV32;
3077 		psp->pr_lwp.pr_state = SZOMB;
3078 		psp->pr_lwp.pr_sname = 'Z';
3079 	} else {
3080 		user_t *up = PTOU(p);
3081 		struct as *as;
3082 		dev_t d;
3083 		extern dev_t rwsconsdev, rconsdev, uconsdev;
3084 
3085 		d = cttydev(p);
3086 		/*
3087 		 * If the controlling terminal is the real
3088 		 * or workstation console device, map to what the
3089 		 * user thinks is the console device. Handle case when
3090 		 * rwsconsdev or rconsdev is set to NODEV for Starfire.
3091 		 */
3092 		if ((d == rwsconsdev || d == rconsdev) && d != NODEV)
3093 			d = uconsdev;
3094 		(void) cmpldev(&psp->pr_ttydev, d);
3095 		TIMESPEC_TO_TIMESPEC32(&psp->pr_start, &up->u_start);
3096 		bcopy(up->u_comm, psp->pr_fname,
3097 		    MIN(sizeof (up->u_comm), sizeof (psp->pr_fname)-1));
3098 		bcopy(up->u_psargs, psp->pr_psargs,
3099 		    MIN(PRARGSZ-1, PSARGSZ));
3100 		psp->pr_argc = up->u_argc;
3101 		psp->pr_argv = (caddr32_t)up->u_argv;
3102 		psp->pr_envp = (caddr32_t)up->u_envp;
3103 
3104 		/* get the chosen lwp's lwpsinfo */
3105 		prgetlwpsinfo32(t, &psp->pr_lwp);
3106 
3107 		/* compute %cpu for the process */
3108 		if (p->p_lwpcnt == 1)
3109 			psp->pr_pctcpu = psp->pr_lwp.pr_pctcpu;
3110 		else {
3111 			uint64_t pct = 0;
3112 			hrtime_t cur_time;
3113 
3114 			t = p->p_tlist;
3115 			cur_time = gethrtime_unscaled();
3116 			do {
3117 				pct += cpu_update_pct(t, cur_time);
3118 			} while ((t = t->t_forw) != p->p_tlist);
3119 
3120 			psp->pr_pctcpu = prgetpctcpu(pct);
3121 		}
3122 		if ((p->p_flag & SSYS) || (as = p->p_as) == &kas) {
3123 			psp->pr_size = 0;
3124 			psp->pr_rssize = 0;
3125 		} else {
3126 			mutex_exit(&p->p_lock);
3127 			AS_LOCK_ENTER(as, RW_READER);
3128 			psp->pr_size = (size32_t)
3129 			    (btopr(as->a_resvsize) * (PAGESIZE / 1024));
3130 			psp->pr_rssize = (size32_t)
3131 			    (rm_asrss(as) * (PAGESIZE / 1024));
3132 			psp->pr_pctmem = rm_pctmemory(as);
3133 			AS_LOCK_EXIT(as);
3134 			mutex_enter(&p->p_lock);
3135 		}
3136 	}
3137 
3138 	/*
3139 	 * If we are looking at an LP64 process, zero out
3140 	 * the fields that cannot be represented in ILP32.
3141 	 */
3142 	if (p->p_model != DATAMODEL_ILP32) {
3143 		psp->pr_size = 0;
3144 		psp->pr_rssize = 0;
3145 		psp->pr_argv = 0;
3146 		psp->pr_envp = 0;
3147 	}
3148 }
3149 
3150 #endif	/* _SYSCALL32_IMPL */
3151 
3152 void
3153 prgetlwpsinfo(kthread_t *t, lwpsinfo_t *psp)
3154 {
3155 	klwp_t *lwp = ttolwp(t);
3156 	sobj_ops_t *sobj;
3157 	char c, state;
3158 	uint64_t pct;
3159 	int retval, niceval;
3160 	hrtime_t hrutime, hrstime;
3161 
3162 	ASSERT(MUTEX_HELD(&ttoproc(t)->p_lock));
3163 
3164 	bzero(psp, sizeof (*psp));
3165 
3166 	psp->pr_flag = 0;	/* lwpsinfo_t.pr_flag is deprecated */
3167 	psp->pr_lwpid = t->t_tid;
3168 	psp->pr_addr = (uintptr_t)t;
3169 	psp->pr_wchan = (uintptr_t)t->t_wchan;
3170 
3171 	/* map the thread state enum into a process state enum */
3172 	state = VSTOPPED(t) ? TS_STOPPED : t->t_state;
3173 	switch (state) {
3174 	case TS_SLEEP:		state = SSLEEP;		c = 'S';	break;
3175 	case TS_RUN:		state = SRUN;		c = 'R';	break;
3176 	case TS_ONPROC:		state = SONPROC;	c = 'O';	break;
3177 	case TS_ZOMB:		state = SZOMB;		c = 'Z';	break;
3178 	case TS_STOPPED:	state = SSTOP;		c = 'T';	break;
3179 	case TS_WAIT:		state = SWAIT;		c = 'W';	break;
3180 	default:		state = 0;		c = '?';	break;
3181 	}
3182 	psp->pr_state = state;
3183 	psp->pr_sname = c;
3184 	if ((sobj = t->t_sobj_ops) != NULL)
3185 		psp->pr_stype = SOBJ_TYPE(sobj);
3186 	retval = CL_DONICE(t, NULL, 0, &niceval);
3187 	if (retval == 0) {
3188 		psp->pr_oldpri = v.v_maxsyspri - t->t_pri;
3189 		psp->pr_nice = niceval + NZERO;
3190 	}
3191 	psp->pr_syscall = t->t_sysnum;
3192 	psp->pr_pri = t->t_pri;
3193 	psp->pr_start.tv_sec = t->t_start;
3194 	psp->pr_start.tv_nsec = 0L;
3195 	hrutime = lwp->lwp_mstate.ms_acct[LMS_USER];
3196 	scalehrtime(&hrutime);
3197 	hrstime = lwp->lwp_mstate.ms_acct[LMS_SYSTEM] +
3198 	    lwp->lwp_mstate.ms_acct[LMS_TRAP];
3199 	scalehrtime(&hrstime);
3200 	hrt2ts(hrutime + hrstime, &psp->pr_time);
3201 	/* compute %cpu for the lwp */
3202 	pct = cpu_update_pct(t, gethrtime_unscaled());
3203 	psp->pr_pctcpu = prgetpctcpu(pct);
3204 	psp->pr_cpu = (psp->pr_pctcpu*100 + 0x6000) >> 15;	/* [0..99] */
3205 	if (psp->pr_cpu > 99)
3206 		psp->pr_cpu = 99;
3207 
3208 	(void) strncpy(psp->pr_clname, sclass[t->t_cid].cl_name,
3209 	    sizeof (psp->pr_clname) - 1);
3210 	bzero(psp->pr_name, sizeof (psp->pr_name));	/* XXX ??? */
3211 	psp->pr_onpro = t->t_cpu->cpu_id;
3212 	psp->pr_bindpro = t->t_bind_cpu;
3213 	psp->pr_bindpset = t->t_bind_pset;
3214 	psp->pr_lgrp = t->t_lpl->lpl_lgrpid;
3215 }
3216 
3217 #ifdef _SYSCALL32_IMPL
3218 void
3219 prgetlwpsinfo32(kthread_t *t, lwpsinfo32_t *psp)
3220 {
3221 	klwp_t *lwp = ttolwp(t);
3222 	sobj_ops_t *sobj;
3223 	char c, state;
3224 	uint64_t pct;
3225 	int retval, niceval;
3226 	hrtime_t hrutime, hrstime;
3227 
3228 	ASSERT(MUTEX_HELD(&ttoproc(t)->p_lock));
3229 
3230 	bzero(psp, sizeof (*psp));
3231 
3232 	psp->pr_flag = 0;	/* lwpsinfo_t.pr_flag is deprecated */
3233 	psp->pr_lwpid = t->t_tid;
3234 	psp->pr_addr = 0;	/* cannot represent 64-bit addr in 32 bits */
3235 	psp->pr_wchan = 0;	/* cannot represent 64-bit addr in 32 bits */
3236 
3237 	/* map the thread state enum into a process state enum */
3238 	state = VSTOPPED(t) ? TS_STOPPED : t->t_state;
3239 	switch (state) {
3240 	case TS_SLEEP:		state = SSLEEP;		c = 'S';	break;
3241 	case TS_RUN:		state = SRUN;		c = 'R';	break;
3242 	case TS_ONPROC:		state = SONPROC;	c = 'O';	break;
3243 	case TS_ZOMB:		state = SZOMB;		c = 'Z';	break;
3244 	case TS_STOPPED:	state = SSTOP;		c = 'T';	break;
3245 	case TS_WAIT:		state = SWAIT;		c = 'W';	break;
3246 	default:		state = 0;		c = '?';	break;
3247 	}
3248 	psp->pr_state = state;
3249 	psp->pr_sname = c;
3250 	if ((sobj = t->t_sobj_ops) != NULL)
3251 		psp->pr_stype = SOBJ_TYPE(sobj);
3252 	retval = CL_DONICE(t, NULL, 0, &niceval);
3253 	if (retval == 0) {
3254 		psp->pr_oldpri = v.v_maxsyspri - t->t_pri;
3255 		psp->pr_nice = niceval + NZERO;
3256 	} else {
3257 		psp->pr_oldpri = 0;
3258 		psp->pr_nice = 0;
3259 	}
3260 	psp->pr_syscall = t->t_sysnum;
3261 	psp->pr_pri = t->t_pri;
3262 	psp->pr_start.tv_sec = (time32_t)t->t_start;
3263 	psp->pr_start.tv_nsec = 0L;
3264 	hrutime = lwp->lwp_mstate.ms_acct[LMS_USER];
3265 	scalehrtime(&hrutime);
3266 	hrstime = lwp->lwp_mstate.ms_acct[LMS_SYSTEM] +
3267 	    lwp->lwp_mstate.ms_acct[LMS_TRAP];
3268 	scalehrtime(&hrstime);
3269 	hrt2ts32(hrutime + hrstime, &psp->pr_time);
3270 	/* compute %cpu for the lwp */
3271 	pct = cpu_update_pct(t, gethrtime_unscaled());
3272 	psp->pr_pctcpu = prgetpctcpu(pct);
3273 	psp->pr_cpu = (psp->pr_pctcpu*100 + 0x6000) >> 15;	/* [0..99] */
3274 	if (psp->pr_cpu > 99)
3275 		psp->pr_cpu = 99;
3276 
3277 	(void) strncpy(psp->pr_clname, sclass[t->t_cid].cl_name,
3278 	    sizeof (psp->pr_clname) - 1);
3279 	bzero(psp->pr_name, sizeof (psp->pr_name));	/* XXX ??? */
3280 	psp->pr_onpro = t->t_cpu->cpu_id;
3281 	psp->pr_bindpro = t->t_bind_cpu;
3282 	psp->pr_bindpset = t->t_bind_pset;
3283 	psp->pr_lgrp = t->t_lpl->lpl_lgrpid;
3284 }
3285 #endif	/* _SYSCALL32_IMPL */
3286 
3287 #ifdef _SYSCALL32_IMPL
3288 
3289 #define	PR_COPY_FIELD(s, d, field)	 d->field = s->field
3290 
3291 #define	PR_COPY_FIELD_ILP32(s, d, field)				\
3292 	if (s->pr_dmodel == PR_MODEL_ILP32) {			\
3293 		d->field = s->field;				\
3294 	}
3295 
3296 #define	PR_COPY_TIMESPEC(s, d, field)				\
3297 	TIMESPEC_TO_TIMESPEC32(&d->field, &s->field);
3298 
3299 #define	PR_COPY_BUF(s, d, field)				\
3300 	bcopy(s->field, d->field, sizeof (d->field));
3301 
3302 #define	PR_IGNORE_FIELD(s, d, field)
3303 
3304 void
3305 lwpsinfo_kto32(const struct lwpsinfo *src, struct lwpsinfo32 *dest)
3306 {
3307 	bzero(dest, sizeof (*dest));
3308 
3309 	PR_COPY_FIELD(src, dest, pr_flag);
3310 	PR_COPY_FIELD(src, dest, pr_lwpid);
3311 	PR_IGNORE_FIELD(src, dest, pr_addr);
3312 	PR_IGNORE_FIELD(src, dest, pr_wchan);
3313 	PR_COPY_FIELD(src, dest, pr_stype);
3314 	PR_COPY_FIELD(src, dest, pr_state);
3315 	PR_COPY_FIELD(src, dest, pr_sname);
3316 	PR_COPY_FIELD(src, dest, pr_nice);
3317 	PR_COPY_FIELD(src, dest, pr_syscall);
3318 	PR_COPY_FIELD(src, dest, pr_oldpri);
3319 	PR_COPY_FIELD(src, dest, pr_cpu);
3320 	PR_COPY_FIELD(src, dest, pr_pri);
3321 	PR_COPY_FIELD(src, dest, pr_pctcpu);
3322 	PR_COPY_TIMESPEC(src, dest, pr_start);
3323 	PR_COPY_BUF(src, dest, pr_clname);
3324 	PR_COPY_BUF(src, dest, pr_name);
3325 	PR_COPY_FIELD(src, dest, pr_onpro);
3326 	PR_COPY_FIELD(src, dest, pr_bindpro);
3327 	PR_COPY_FIELD(src, dest, pr_bindpset);
3328 	PR_COPY_FIELD(src, dest, pr_lgrp);
3329 }
3330 
3331 void
3332 psinfo_kto32(const struct psinfo *src, struct psinfo32 *dest)
3333 {
3334 	bzero(dest, sizeof (*dest));
3335 
3336 	PR_COPY_FIELD(src, dest, pr_flag);
3337 	PR_COPY_FIELD(src, dest, pr_nlwp);
3338 	PR_COPY_FIELD(src, dest, pr_pid);
3339 	PR_COPY_FIELD(src, dest, pr_ppid);
3340 	PR_COPY_FIELD(src, dest, pr_pgid);
3341 	PR_COPY_FIELD(src, dest, pr_sid);
3342 	PR_COPY_FIELD(src, dest, pr_uid);
3343 	PR_COPY_FIELD(src, dest, pr_euid);
3344 	PR_COPY_FIELD(src, dest, pr_gid);
3345 	PR_COPY_FIELD(src, dest, pr_egid);
3346 	PR_IGNORE_FIELD(src, dest, pr_addr);
3347 	PR_COPY_FIELD_ILP32(src, dest, pr_size);
3348 	PR_COPY_FIELD_ILP32(src, dest, pr_rssize);
3349 	PR_COPY_FIELD(src, dest, pr_ttydev);
3350 	PR_COPY_FIELD(src, dest, pr_pctcpu);
3351 	PR_COPY_FIELD(src, dest, pr_pctmem);
3352 	PR_COPY_TIMESPEC(src, dest, pr_start);
3353 	PR_COPY_TIMESPEC(src, dest, pr_time);
3354 	PR_COPY_TIMESPEC(src, dest, pr_ctime);
3355 	PR_COPY_BUF(src, dest, pr_fname);
3356 	PR_COPY_BUF(src, dest, pr_psargs);
3357 	PR_COPY_FIELD(src, dest, pr_wstat);
3358 	PR_COPY_FIELD(src, dest, pr_argc);
3359 	PR_COPY_FIELD_ILP32(src, dest, pr_argv);
3360 	PR_COPY_FIELD_ILP32(src, dest, pr_envp);
3361 	PR_COPY_FIELD(src, dest, pr_dmodel);
3362 	PR_COPY_FIELD(src, dest, pr_taskid);
3363 	PR_COPY_FIELD(src, dest, pr_projid);
3364 	PR_COPY_FIELD(src, dest, pr_nzomb);
3365 	PR_COPY_FIELD(src, dest, pr_poolid);
3366 	PR_COPY_FIELD(src, dest, pr_contract);
3367 	PR_COPY_FIELD(src, dest, pr_poolid);
3368 	PR_COPY_FIELD(src, dest, pr_poolid);
3369 
3370 	lwpsinfo_kto32(&src->pr_lwp, &dest->pr_lwp);
3371 }
3372 
3373 #undef	PR_COPY_FIELD
3374 #undef	PR_COPY_FIELD_ILP32
3375 #undef	PR_COPY_TIMESPEC
3376 #undef	PR_COPY_BUF
3377 #undef	PR_IGNORE_FIELD
3378 
3379 #endif	/* _SYSCALL32_IMPL */
3380 
3381 /*
3382  * This used to get called when microstate accounting was disabled but
3383  * microstate information was requested.  Since Microstate accounting is on
3384  * regardless of the proc flags, this simply makes it appear to procfs that
3385  * microstate accounting is on.  This is relatively meaningless since you
3386  * can't turn it off, but this is here for the sake of appearances.
3387  */
3388 
3389 /*ARGSUSED*/
3390 void
3391 estimate_msacct(kthread_t *t, hrtime_t curtime)
3392 {
3393 	proc_t *p;
3394 
3395 	if (t == NULL)
3396 		return;
3397 
3398 	p = ttoproc(t);
3399 	ASSERT(MUTEX_HELD(&p->p_lock));
3400 
3401 	/*
3402 	 * A system process (p0) could be referenced if the thread is
3403 	 * in the process of exiting.  Don't turn on microstate accounting
3404 	 * in that case.
3405 	 */
3406 	if (p->p_flag & SSYS)
3407 		return;
3408 
3409 	/*
3410 	 * Loop through all the LWPs (kernel threads) in the process.
3411 	 */
3412 	t = p->p_tlist;
3413 	do {
3414 		t->t_proc_flag |= TP_MSACCT;
3415 	} while ((t = t->t_forw) != p->p_tlist);
3416 
3417 	p->p_flag |= SMSACCT;			/* set process-wide MSACCT */
3418 }
3419 
3420 /*
3421  * It's not really possible to disable microstate accounting anymore.
3422  * However, this routine simply turns off the ms accounting flags in a process
3423  * This way procfs can still pretend to turn microstate accounting on and
3424  * off for a process, but it actually doesn't do anything.  This is
3425  * a neutered form of preemptive idiot-proofing.
3426  */
3427 void
3428 disable_msacct(proc_t *p)
3429 {
3430 	kthread_t *t;
3431 
3432 	ASSERT(MUTEX_HELD(&p->p_lock));
3433 
3434 	p->p_flag &= ~SMSACCT;		/* clear process-wide MSACCT */
3435 	/*
3436 	 * Loop through all the LWPs (kernel threads) in the process.
3437 	 */
3438 	if ((t = p->p_tlist) != NULL) {
3439 		do {
3440 			/* clear per-thread flag */
3441 			t->t_proc_flag &= ~TP_MSACCT;
3442 		} while ((t = t->t_forw) != p->p_tlist);
3443 	}
3444 }
3445 
3446 /*
3447  * Return resource usage information.
3448  */
3449 void
3450 prgetusage(kthread_t *t, prhusage_t *pup)
3451 {
3452 	klwp_t *lwp = ttolwp(t);
3453 	hrtime_t *mstimep;
3454 	struct mstate *ms = &lwp->lwp_mstate;
3455 	int state;
3456 	int i;
3457 	hrtime_t curtime;
3458 	hrtime_t waitrq;
3459 	hrtime_t tmp1;
3460 
3461 	curtime = gethrtime_unscaled();
3462 
3463 	pup->pr_lwpid	= t->t_tid;
3464 	pup->pr_count	= 1;
3465 	pup->pr_create	= ms->ms_start;
3466 	pup->pr_term    = ms->ms_term;
3467 	scalehrtime(&pup->pr_create);
3468 	scalehrtime(&pup->pr_term);
3469 	if (ms->ms_term == 0) {
3470 		pup->pr_rtime = curtime - ms->ms_start;
3471 		scalehrtime(&pup->pr_rtime);
3472 	} else {
3473 		pup->pr_rtime = ms->ms_term - ms->ms_start;
3474 		scalehrtime(&pup->pr_rtime);
3475 	}
3476 
3477 
3478 	pup->pr_utime    = ms->ms_acct[LMS_USER];
3479 	pup->pr_stime    = ms->ms_acct[LMS_SYSTEM];
3480 	pup->pr_ttime    = ms->ms_acct[LMS_TRAP];
3481 	pup->pr_tftime   = ms->ms_acct[LMS_TFAULT];
3482 	pup->pr_dftime   = ms->ms_acct[LMS_DFAULT];
3483 	pup->pr_kftime   = ms->ms_acct[LMS_KFAULT];
3484 	pup->pr_ltime    = ms->ms_acct[LMS_USER_LOCK];
3485 	pup->pr_slptime  = ms->ms_acct[LMS_SLEEP];
3486 	pup->pr_wtime    = ms->ms_acct[LMS_WAIT_CPU];
3487 	pup->pr_stoptime = ms->ms_acct[LMS_STOPPED];
3488 
3489 	prscaleusage(pup);
3490 
3491 	/*
3492 	 * Adjust for time waiting in the dispatcher queue.
3493 	 */
3494 	waitrq = t->t_waitrq;	/* hopefully atomic */
3495 	if (waitrq != 0) {
3496 		if (waitrq > curtime) {
3497 			curtime = gethrtime_unscaled();
3498 		}
3499 		tmp1 = curtime - waitrq;
3500 		scalehrtime(&tmp1);
3501 		pup->pr_wtime += tmp1;
3502 		curtime = waitrq;
3503 	}
3504 
3505 	/*
3506 	 * Adjust for time spent in current microstate.
3507 	 */
3508 	if (ms->ms_state_start > curtime) {
3509 		curtime = gethrtime_unscaled();
3510 	}
3511 
3512 	i = 0;
3513 	do {
3514 		switch (state = t->t_mstate) {
3515 		case LMS_SLEEP:
3516 			/*
3517 			 * Update the timer for the current sleep state.
3518 			 */
3519 			switch (state = ms->ms_prev) {
3520 			case LMS_TFAULT:
3521 			case LMS_DFAULT:
3522 			case LMS_KFAULT:
3523 			case LMS_USER_LOCK:
3524 				break;
3525 			default:
3526 				state = LMS_SLEEP;
3527 				break;
3528 			}
3529 			break;
3530 		case LMS_TFAULT:
3531 		case LMS_DFAULT:
3532 		case LMS_KFAULT:
3533 		case LMS_USER_LOCK:
3534 			state = LMS_SYSTEM;
3535 			break;
3536 		}
3537 		switch (state) {
3538 		case LMS_USER:		mstimep = &pup->pr_utime;	break;
3539 		case LMS_SYSTEM:	mstimep = &pup->pr_stime;	break;
3540 		case LMS_TRAP:		mstimep = &pup->pr_ttime;	break;
3541 		case LMS_TFAULT:	mstimep = &pup->pr_tftime;	break;
3542 		case LMS_DFAULT:	mstimep = &pup->pr_dftime;	break;
3543 		case LMS_KFAULT:	mstimep = &pup->pr_kftime;	break;
3544 		case LMS_USER_LOCK:	mstimep = &pup->pr_ltime;	break;
3545 		case LMS_SLEEP:		mstimep = &pup->pr_slptime;	break;
3546 		case LMS_WAIT_CPU:	mstimep = &pup->pr_wtime;	break;
3547 		case LMS_STOPPED:	mstimep = &pup->pr_stoptime;	break;
3548 		default:		panic("prgetusage: unknown microstate");
3549 		}
3550 		tmp1 = curtime - ms->ms_state_start;
3551 		if (tmp1 < 0) {
3552 			curtime = gethrtime_unscaled();
3553 			i++;
3554 			continue;
3555 		}
3556 		scalehrtime(&tmp1);
3557 	} while (tmp1 < 0 && i < MAX_ITERS_SPIN);
3558 
3559 	*mstimep += tmp1;
3560 
3561 	/* update pup timestamp */
3562 	pup->pr_tstamp = curtime;
3563 	scalehrtime(&pup->pr_tstamp);
3564 
3565 	/*
3566 	 * Resource usage counters.
3567 	 */
3568 	pup->pr_minf  = lwp->lwp_ru.minflt;
3569 	pup->pr_majf  = lwp->lwp_ru.majflt;
3570 	pup->pr_nswap = lwp->lwp_ru.nswap;
3571 	pup->pr_inblk = lwp->lwp_ru.inblock;
3572 	pup->pr_oublk = lwp->lwp_ru.oublock;
3573 	pup->pr_msnd  = lwp->lwp_ru.msgsnd;
3574 	pup->pr_mrcv  = lwp->lwp_ru.msgrcv;
3575 	pup->pr_sigs  = lwp->lwp_ru.nsignals;
3576 	pup->pr_vctx  = lwp->lwp_ru.nvcsw;
3577 	pup->pr_ictx  = lwp->lwp_ru.nivcsw;
3578 	pup->pr_sysc  = lwp->lwp_ru.sysc;
3579 	pup->pr_ioch  = lwp->lwp_ru.ioch;
3580 }
3581 
3582 /*
3583  * Convert ms_acct stats from unscaled high-res time to nanoseconds
3584  */
3585 void
3586 prscaleusage(prhusage_t *usg)
3587 {
3588 	scalehrtime(&usg->pr_utime);
3589 	scalehrtime(&usg->pr_stime);
3590 	scalehrtime(&usg->pr_ttime);
3591 	scalehrtime(&usg->pr_tftime);
3592 	scalehrtime(&usg->pr_dftime);
3593 	scalehrtime(&usg->pr_kftime);
3594 	scalehrtime(&usg->pr_ltime);
3595 	scalehrtime(&usg->pr_slptime);
3596 	scalehrtime(&usg->pr_wtime);
3597 	scalehrtime(&usg->pr_stoptime);
3598 }
3599 
3600 
3601 /*
3602  * Sum resource usage information.
3603  */
3604 void
3605 praddusage(kthread_t *t, prhusage_t *pup)
3606 {
3607 	klwp_t *lwp = ttolwp(t);
3608 	hrtime_t *mstimep;
3609 	struct mstate *ms = &lwp->lwp_mstate;
3610 	int state;
3611 	int i;
3612 	hrtime_t curtime;
3613 	hrtime_t waitrq;
3614 	hrtime_t tmp;
3615 	prhusage_t conv;
3616 
3617 	curtime = gethrtime_unscaled();
3618 
3619 	if (ms->ms_term == 0) {
3620 		tmp = curtime - ms->ms_start;
3621 		scalehrtime(&tmp);
3622 		pup->pr_rtime += tmp;
3623 	} else {
3624 		tmp = ms->ms_term - ms->ms_start;
3625 		scalehrtime(&tmp);
3626 		pup->pr_rtime += tmp;
3627 	}
3628 
3629 	conv.pr_utime = ms->ms_acct[LMS_USER];
3630 	conv.pr_stime = ms->ms_acct[LMS_SYSTEM];
3631 	conv.pr_ttime = ms->ms_acct[LMS_TRAP];
3632 	conv.pr_tftime = ms->ms_acct[LMS_TFAULT];
3633 	conv.pr_dftime = ms->ms_acct[LMS_DFAULT];
3634 	conv.pr_kftime = ms->ms_acct[LMS_KFAULT];
3635 	conv.pr_ltime = ms->ms_acct[LMS_USER_LOCK];
3636 	conv.pr_slptime = ms->ms_acct[LMS_SLEEP];
3637 	conv.pr_wtime = ms->ms_acct[LMS_WAIT_CPU];
3638 	conv.pr_stoptime = ms->ms_acct[LMS_STOPPED];
3639 
3640 	prscaleusage(&conv);
3641 
3642 	pup->pr_utime	+= conv.pr_utime;
3643 	pup->pr_stime	+= conv.pr_stime;
3644 	pup->pr_ttime	+= conv.pr_ttime;
3645 	pup->pr_tftime	+= conv.pr_tftime;
3646 	pup->pr_dftime	+= conv.pr_dftime;
3647 	pup->pr_kftime	+= conv.pr_kftime;
3648 	pup->pr_ltime	+= conv.pr_ltime;
3649 	pup->pr_slptime	+= conv.pr_slptime;
3650 	pup->pr_wtime	+= conv.pr_wtime;
3651 	pup->pr_stoptime += conv.pr_stoptime;
3652 
3653 	/*
3654 	 * Adjust for time waiting in the dispatcher queue.
3655 	 */
3656 	waitrq = t->t_waitrq;	/* hopefully atomic */
3657 	if (waitrq != 0) {
3658 		if (waitrq > curtime) {
3659 			curtime = gethrtime_unscaled();
3660 		}
3661 		tmp = curtime - waitrq;
3662 		scalehrtime(&tmp);
3663 		pup->pr_wtime += tmp;
3664 		curtime = waitrq;
3665 	}
3666 
3667 	/*
3668 	 * Adjust for time spent in current microstate.
3669 	 */
3670 	if (ms->ms_state_start > curtime) {
3671 		curtime = gethrtime_unscaled();
3672 	}
3673 
3674 	i = 0;
3675 	do {
3676 		switch (state = t->t_mstate) {
3677 		case LMS_SLEEP:
3678 			/*
3679 			 * Update the timer for the current sleep state.
3680 			 */
3681 			switch (state = ms->ms_prev) {
3682 			case LMS_TFAULT:
3683 			case LMS_DFAULT:
3684 			case LMS_KFAULT:
3685 			case LMS_USER_LOCK:
3686 				break;
3687 			default:
3688 				state = LMS_SLEEP;
3689 				break;
3690 			}
3691 			break;
3692 		case LMS_TFAULT:
3693 		case LMS_DFAULT:
3694 		case LMS_KFAULT:
3695 		case LMS_USER_LOCK:
3696 			state = LMS_SYSTEM;
3697 			break;
3698 		}
3699 		switch (state) {
3700 		case LMS_USER:		mstimep = &pup->pr_utime;	break;
3701 		case LMS_SYSTEM:	mstimep = &pup->pr_stime;	break;
3702 		case LMS_TRAP:		mstimep = &pup->pr_ttime;	break;
3703 		case LMS_TFAULT:	mstimep = &pup->pr_tftime;	break;
3704 		case LMS_DFAULT:	mstimep = &pup->pr_dftime;	break;
3705 		case LMS_KFAULT:	mstimep = &pup->pr_kftime;	break;
3706 		case LMS_USER_LOCK:	mstimep = &pup->pr_ltime;	break;
3707 		case LMS_SLEEP:		mstimep = &pup->pr_slptime;	break;
3708 		case LMS_WAIT_CPU:	mstimep = &pup->pr_wtime;	break;
3709 		case LMS_STOPPED:	mstimep = &pup->pr_stoptime;	break;
3710 		default:		panic("praddusage: unknown microstate");
3711 		}
3712 		tmp = curtime - ms->ms_state_start;
3713 		if (tmp < 0) {
3714 			curtime = gethrtime_unscaled();
3715 			i++;
3716 			continue;
3717 		}
3718 		scalehrtime(&tmp);
3719 	} while (tmp < 0 && i < MAX_ITERS_SPIN);
3720 
3721 	*mstimep += tmp;
3722 
3723 	/* update pup timestamp */
3724 	pup->pr_tstamp = curtime;
3725 	scalehrtime(&pup->pr_tstamp);
3726 
3727 	/*
3728 	 * Resource usage counters.
3729 	 */
3730 	pup->pr_minf  += lwp->lwp_ru.minflt;
3731 	pup->pr_majf  += lwp->lwp_ru.majflt;
3732 	pup->pr_nswap += lwp->lwp_ru.nswap;
3733 	pup->pr_inblk += lwp->lwp_ru.inblock;
3734 	pup->pr_oublk += lwp->lwp_ru.oublock;
3735 	pup->pr_msnd  += lwp->lwp_ru.msgsnd;
3736 	pup->pr_mrcv  += lwp->lwp_ru.msgrcv;
3737 	pup->pr_sigs  += lwp->lwp_ru.nsignals;
3738 	pup->pr_vctx  += lwp->lwp_ru.nvcsw;
3739 	pup->pr_ictx  += lwp->lwp_ru.nivcsw;
3740 	pup->pr_sysc  += lwp->lwp_ru.sysc;
3741 	pup->pr_ioch  += lwp->lwp_ru.ioch;
3742 }
3743 
3744 /*
3745  * Convert a prhusage_t to a prusage_t.
3746  * This means convert each hrtime_t to a timestruc_t
3747  * and copy the count fields uint64_t => ulong_t.
3748  */
3749 void
3750 prcvtusage(prhusage_t *pup, prusage_t *upup)
3751 {
3752 	uint64_t *ullp;
3753 	ulong_t *ulp;
3754 	int i;
3755 
3756 	upup->pr_lwpid = pup->pr_lwpid;
3757 	upup->pr_count = pup->pr_count;
3758 
3759 	hrt2ts(pup->pr_tstamp,	&upup->pr_tstamp);
3760 	hrt2ts(pup->pr_create,	&upup->pr_create);
3761 	hrt2ts(pup->pr_term,	&upup->pr_term);
3762 	hrt2ts(pup->pr_rtime,	&upup->pr_rtime);
3763 	hrt2ts(pup->pr_utime,	&upup->pr_utime);
3764 	hrt2ts(pup->pr_stime,	&upup->pr_stime);
3765 	hrt2ts(pup->pr_ttime,	&upup->pr_ttime);
3766 	hrt2ts(pup->pr_tftime,	&upup->pr_tftime);
3767 	hrt2ts(pup->pr_dftime,	&upup->pr_dftime);
3768 	hrt2ts(pup->pr_kftime,	&upup->pr_kftime);
3769 	hrt2ts(pup->pr_ltime,	&upup->pr_ltime);
3770 	hrt2ts(pup->pr_slptime,	&upup->pr_slptime);
3771 	hrt2ts(pup->pr_wtime,	&upup->pr_wtime);
3772 	hrt2ts(pup->pr_stoptime, &upup->pr_stoptime);
3773 	bzero(upup->filltime, sizeof (upup->filltime));
3774 
3775 	ullp = &pup->pr_minf;
3776 	ulp = &upup->pr_minf;
3777 	for (i = 0; i < 22; i++)
3778 		*ulp++ = (ulong_t)*ullp++;
3779 }
3780 
3781 #ifdef _SYSCALL32_IMPL
3782 void
3783 prcvtusage32(prhusage_t *pup, prusage32_t *upup)
3784 {
3785 	uint64_t *ullp;
3786 	uint32_t *ulp;
3787 	int i;
3788 
3789 	upup->pr_lwpid = pup->pr_lwpid;
3790 	upup->pr_count = pup->pr_count;
3791 
3792 	hrt2ts32(pup->pr_tstamp,	&upup->pr_tstamp);
3793 	hrt2ts32(pup->pr_create,	&upup->pr_create);
3794 	hrt2ts32(pup->pr_term,		&upup->pr_term);
3795 	hrt2ts32(pup->pr_rtime,		&upup->pr_rtime);
3796 	hrt2ts32(pup->pr_utime,		&upup->pr_utime);
3797 	hrt2ts32(pup->pr_stime,		&upup->pr_stime);
3798 	hrt2ts32(pup->pr_ttime,		&upup->pr_ttime);
3799 	hrt2ts32(pup->pr_tftime,	&upup->pr_tftime);
3800 	hrt2ts32(pup->pr_dftime,	&upup->pr_dftime);
3801 	hrt2ts32(pup->pr_kftime,	&upup->pr_kftime);
3802 	hrt2ts32(pup->pr_ltime,		&upup->pr_ltime);
3803 	hrt2ts32(pup->pr_slptime,	&upup->pr_slptime);
3804 	hrt2ts32(pup->pr_wtime,		&upup->pr_wtime);
3805 	hrt2ts32(pup->pr_stoptime,	&upup->pr_stoptime);
3806 	bzero(upup->filltime, sizeof (upup->filltime));
3807 
3808 	ullp = &pup->pr_minf;
3809 	ulp = &upup->pr_minf;
3810 	for (i = 0; i < 22; i++)
3811 		*ulp++ = (uint32_t)*ullp++;
3812 }
3813 #endif	/* _SYSCALL32_IMPL */
3814 
3815 /*
3816  * Determine whether a set is empty.
3817  */
3818 int
3819 setisempty(uint32_t *sp, uint_t n)
3820 {
3821 	while (n--)
3822 		if (*sp++)
3823 			return (0);
3824 	return (1);
3825 }
3826 
3827 /*
3828  * Utility routine for establishing a watched area in the process.
3829  * Keep the list of watched areas sorted by virtual address.
3830  */
3831 int
3832 set_watched_area(proc_t *p, struct watched_area *pwa)
3833 {
3834 	caddr_t vaddr = pwa->wa_vaddr;
3835 	caddr_t eaddr = pwa->wa_eaddr;
3836 	ulong_t flags = pwa->wa_flags;
3837 	struct watched_area *target;
3838 	avl_index_t where;
3839 	int error = 0;
3840 
3841 	/* we must not be holding p->p_lock, but the process must be locked */
3842 	ASSERT(MUTEX_NOT_HELD(&p->p_lock));
3843 	ASSERT(p->p_proc_flag & P_PR_LOCK);
3844 
3845 	/*
3846 	 * If this is our first watchpoint, enable watchpoints for the process.
3847 	 */
3848 	if (!pr_watch_active(p)) {
3849 		kthread_t *t;
3850 
3851 		mutex_enter(&p->p_lock);
3852 		if ((t = p->p_tlist) != NULL) {
3853 			do {
3854 				watch_enable(t);
3855 			} while ((t = t->t_forw) != p->p_tlist);
3856 		}
3857 		mutex_exit(&p->p_lock);
3858 	}
3859 
3860 	target = pr_find_watched_area(p, pwa, &where);
3861 	if (target != NULL) {
3862 		/*
3863 		 * We discovered an existing, overlapping watched area.
3864 		 * Allow it only if it is an exact match.
3865 		 */
3866 		if (target->wa_vaddr != vaddr ||
3867 		    target->wa_eaddr != eaddr)
3868 			error = EINVAL;
3869 		else if (target->wa_flags != flags) {
3870 			error = set_watched_page(p, vaddr, eaddr,
3871 			    flags, target->wa_flags);
3872 			target->wa_flags = flags;
3873 		}
3874 		kmem_free(pwa, sizeof (struct watched_area));
3875 	} else {
3876 		avl_insert(&p->p_warea, pwa, where);
3877 		error = set_watched_page(p, vaddr, eaddr, flags, 0);
3878 	}
3879 
3880 	return (error);
3881 }
3882 
3883 /*
3884  * Utility routine for clearing a watched area in the process.
3885  * Must be an exact match of the virtual address.
3886  * size and flags don't matter.
3887  */
3888 int
3889 clear_watched_area(proc_t *p, struct watched_area *pwa)
3890 {
3891 	struct watched_area *found;
3892 
3893 	/* we must not be holding p->p_lock, but the process must be locked */
3894 	ASSERT(MUTEX_NOT_HELD(&p->p_lock));
3895 	ASSERT(p->p_proc_flag & P_PR_LOCK);
3896 
3897 
3898 	if (!pr_watch_active(p)) {
3899 		kmem_free(pwa, sizeof (struct watched_area));
3900 		return (0);
3901 	}
3902 
3903 	/*
3904 	 * Look for a matching address in the watched areas.  If a match is
3905 	 * found, clear the old watched area and adjust the watched page(s).  It
3906 	 * is not an error if there is no match.
3907 	 */
3908 	if ((found = pr_find_watched_area(p, pwa, NULL)) != NULL &&
3909 	    found->wa_vaddr == pwa->wa_vaddr) {
3910 		clear_watched_page(p, found->wa_vaddr, found->wa_eaddr,
3911 		    found->wa_flags);
3912 		avl_remove(&p->p_warea, found);
3913 		kmem_free(found, sizeof (struct watched_area));
3914 	}
3915 
3916 	kmem_free(pwa, sizeof (struct watched_area));
3917 
3918 	/*
3919 	 * If we removed the last watched area from the process, disable
3920 	 * watchpoints.
3921 	 */
3922 	if (!pr_watch_active(p)) {
3923 		kthread_t *t;
3924 
3925 		mutex_enter(&p->p_lock);
3926 		if ((t = p->p_tlist) != NULL) {
3927 			do {
3928 				watch_disable(t);
3929 			} while ((t = t->t_forw) != p->p_tlist);
3930 		}
3931 		mutex_exit(&p->p_lock);
3932 	}
3933 
3934 	return (0);
3935 }
3936 
3937 /*
3938  * Frees all the watched_area structures
3939  */
3940 void
3941 pr_free_watchpoints(proc_t *p)
3942 {
3943 	struct watched_area *delp;
3944 	void *cookie;
3945 
3946 	cookie = NULL;
3947 	while ((delp = avl_destroy_nodes(&p->p_warea, &cookie)) != NULL)
3948 		kmem_free(delp, sizeof (struct watched_area));
3949 
3950 	avl_destroy(&p->p_warea);
3951 }
3952 
3953 /*
3954  * This one is called by the traced process to unwatch all the
3955  * pages while deallocating the list of watched_page structs.
3956  */
3957 void
3958 pr_free_watched_pages(proc_t *p)
3959 {
3960 	struct as *as = p->p_as;
3961 	struct watched_page *pwp;
3962 	uint_t prot;
3963 	int    retrycnt, err;
3964 	void *cookie;
3965 
3966 	if (as == NULL || avl_numnodes(&as->a_wpage) == 0)
3967 		return;
3968 
3969 	ASSERT(MUTEX_NOT_HELD(&curproc->p_lock));
3970 	AS_LOCK_ENTER(as, RW_WRITER);
3971 
3972 	pwp = avl_first(&as->a_wpage);
3973 
3974 	cookie = NULL;
3975 	while ((pwp = avl_destroy_nodes(&as->a_wpage, &cookie)) != NULL) {
3976 		retrycnt = 0;
3977 		if ((prot = pwp->wp_oprot) != 0) {
3978 			caddr_t addr = pwp->wp_vaddr;
3979 			struct seg *seg;
3980 		retry:
3981 
3982 			if ((pwp->wp_prot != prot ||
3983 			    (pwp->wp_flags & WP_NOWATCH)) &&
3984 			    (seg = as_segat(as, addr)) != NULL) {
3985 				err = SEGOP_SETPROT(seg, addr, PAGESIZE, prot);
3986 				if (err == IE_RETRY) {
3987 					ASSERT(retrycnt == 0);
3988 					retrycnt++;
3989 					goto retry;
3990 				}
3991 			}
3992 		}
3993 		kmem_free(pwp, sizeof (struct watched_page));
3994 	}
3995 
3996 	avl_destroy(&as->a_wpage);
3997 	p->p_wprot = NULL;
3998 
3999 	AS_LOCK_EXIT(as);
4000 }
4001 
4002 /*
4003  * Insert a watched area into the list of watched pages.
4004  * If oflags is zero then we are adding a new watched area.
4005  * Otherwise we are changing the flags of an existing watched area.
4006  */
4007 static int
4008 set_watched_page(proc_t *p, caddr_t vaddr, caddr_t eaddr,
4009     ulong_t flags, ulong_t oflags)
4010 {
4011 	struct as *as = p->p_as;
4012 	avl_tree_t *pwp_tree;
4013 	struct watched_page *pwp, *newpwp;
4014 	struct watched_page tpw;
4015 	avl_index_t where;
4016 	struct seg *seg;
4017 	uint_t prot;
4018 	caddr_t addr;
4019 
4020 	/*
4021 	 * We need to pre-allocate a list of structures before we grab the
4022 	 * address space lock to avoid calling kmem_alloc(KM_SLEEP) with locks
4023 	 * held.
4024 	 */
4025 	newpwp = NULL;
4026 	for (addr = (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK);
4027 	    addr < eaddr; addr += PAGESIZE) {
4028 		pwp = kmem_zalloc(sizeof (struct watched_page), KM_SLEEP);
4029 		pwp->wp_list = newpwp;
4030 		newpwp = pwp;
4031 	}
4032 
4033 	AS_LOCK_ENTER(as, RW_WRITER);
4034 
4035 	/*
4036 	 * Search for an existing watched page to contain the watched area.
4037 	 * If none is found, grab a new one from the available list
4038 	 * and insert it in the active list, keeping the list sorted
4039 	 * by user-level virtual address.
4040 	 */
4041 	if (p->p_flag & SVFWAIT)
4042 		pwp_tree = &p->p_wpage;
4043 	else
4044 		pwp_tree = &as->a_wpage;
4045 
4046 again:
4047 	if (avl_numnodes(pwp_tree) > prnwatch) {
4048 		AS_LOCK_EXIT(as);
4049 		while (newpwp != NULL) {
4050 			pwp = newpwp->wp_list;
4051 			kmem_free(newpwp, sizeof (struct watched_page));
4052 			newpwp = pwp;
4053 		}
4054 		return (E2BIG);
4055 	}
4056 
4057 	tpw.wp_vaddr = (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK);
4058 	if ((pwp = avl_find(pwp_tree, &tpw, &where)) == NULL) {
4059 		pwp = newpwp;
4060 		newpwp = newpwp->wp_list;
4061 		pwp->wp_list = NULL;
4062 		pwp->wp_vaddr = (caddr_t)((uintptr_t)vaddr &
4063 		    (uintptr_t)PAGEMASK);
4064 		avl_insert(pwp_tree, pwp, where);
4065 	}
4066 
4067 	ASSERT(vaddr >= pwp->wp_vaddr && vaddr < pwp->wp_vaddr + PAGESIZE);
4068 
4069 	if (oflags & WA_READ)
4070 		pwp->wp_read--;
4071 	if (oflags & WA_WRITE)
4072 		pwp->wp_write--;
4073 	if (oflags & WA_EXEC)
4074 		pwp->wp_exec--;
4075 
4076 	ASSERT(pwp->wp_read >= 0);
4077 	ASSERT(pwp->wp_write >= 0);
4078 	ASSERT(pwp->wp_exec >= 0);
4079 
4080 	if (flags & WA_READ)
4081 		pwp->wp_read++;
4082 	if (flags & WA_WRITE)
4083 		pwp->wp_write++;
4084 	if (flags & WA_EXEC)
4085 		pwp->wp_exec++;
4086 
4087 	if (!(p->p_flag & SVFWAIT)) {
4088 		vaddr = pwp->wp_vaddr;
4089 		if (pwp->wp_oprot == 0 &&
4090 		    (seg = as_segat(as, vaddr)) != NULL) {
4091 			SEGOP_GETPROT(seg, vaddr, 0, &prot);
4092 			pwp->wp_oprot = (uchar_t)prot;
4093 			pwp->wp_prot = (uchar_t)prot;
4094 		}
4095 		if (pwp->wp_oprot != 0) {
4096 			prot = pwp->wp_oprot;
4097 			if (pwp->wp_read)
4098 				prot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC);
4099 			if (pwp->wp_write)
4100 				prot &= ~PROT_WRITE;
4101 			if (pwp->wp_exec)
4102 				prot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC);
4103 			if (!(pwp->wp_flags & WP_NOWATCH) &&
4104 			    pwp->wp_prot != prot &&
4105 			    (pwp->wp_flags & WP_SETPROT) == 0) {
4106 				pwp->wp_flags |= WP_SETPROT;
4107 				pwp->wp_list = p->p_wprot;
4108 				p->p_wprot = pwp;
4109 			}
4110 			pwp->wp_prot = (uchar_t)prot;
4111 		}
4112 	}
4113 
4114 	/*
4115 	 * If the watched area extends into the next page then do
4116 	 * it over again with the virtual address of the next page.
4117 	 */
4118 	if ((vaddr = pwp->wp_vaddr + PAGESIZE) < eaddr)
4119 		goto again;
4120 
4121 	AS_LOCK_EXIT(as);
4122 
4123 	/*
4124 	 * Free any pages we may have over-allocated
4125 	 */
4126 	while (newpwp != NULL) {
4127 		pwp = newpwp->wp_list;
4128 		kmem_free(newpwp, sizeof (struct watched_page));
4129 		newpwp = pwp;
4130 	}
4131 
4132 	return (0);
4133 }
4134 
4135 /*
4136  * Remove a watched area from the list of watched pages.
4137  * A watched area may extend over more than one page.
4138  */
4139 static void
4140 clear_watched_page(proc_t *p, caddr_t vaddr, caddr_t eaddr, ulong_t flags)
4141 {
4142 	struct as *as = p->p_as;
4143 	struct watched_page *pwp;
4144 	struct watched_page tpw;
4145 	avl_tree_t *tree;
4146 	avl_index_t where;
4147 
4148 	AS_LOCK_ENTER(as, RW_WRITER);
4149 
4150 	if (p->p_flag & SVFWAIT)
4151 		tree = &p->p_wpage;
4152 	else
4153 		tree = &as->a_wpage;
4154 
4155 	tpw.wp_vaddr = vaddr =
4156 	    (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK);
4157 	pwp = avl_find(tree, &tpw, &where);
4158 	if (pwp == NULL)
4159 		pwp = avl_nearest(tree, where, AVL_AFTER);
4160 
4161 	while (pwp != NULL && pwp->wp_vaddr < eaddr) {
4162 		ASSERT(vaddr <=  pwp->wp_vaddr);
4163 
4164 		if (flags & WA_READ)
4165 			pwp->wp_read--;
4166 		if (flags & WA_WRITE)
4167 			pwp->wp_write--;
4168 		if (flags & WA_EXEC)
4169 			pwp->wp_exec--;
4170 
4171 		if (pwp->wp_read + pwp->wp_write + pwp->wp_exec != 0) {
4172 			/*
4173 			 * Reset the hat layer's protections on this page.
4174 			 */
4175 			if (pwp->wp_oprot != 0) {
4176 				uint_t prot = pwp->wp_oprot;
4177 
4178 				if (pwp->wp_read)
4179 					prot &=
4180 					    ~(PROT_READ|PROT_WRITE|PROT_EXEC);
4181 				if (pwp->wp_write)
4182 					prot &= ~PROT_WRITE;
4183 				if (pwp->wp_exec)
4184 					prot &=
4185 					    ~(PROT_READ|PROT_WRITE|PROT_EXEC);
4186 				if (!(pwp->wp_flags & WP_NOWATCH) &&
4187 				    pwp->wp_prot != prot &&
4188 				    (pwp->wp_flags & WP_SETPROT) == 0) {
4189 					pwp->wp_flags |= WP_SETPROT;
4190 					pwp->wp_list = p->p_wprot;
4191 					p->p_wprot = pwp;
4192 				}
4193 				pwp->wp_prot = (uchar_t)prot;
4194 			}
4195 		} else {
4196 			/*
4197 			 * No watched areas remain in this page.
4198 			 * Reset everything to normal.
4199 			 */
4200 			if (pwp->wp_oprot != 0) {
4201 				pwp->wp_prot = pwp->wp_oprot;
4202 				if ((pwp->wp_flags & WP_SETPROT) == 0) {
4203 					pwp->wp_flags |= WP_SETPROT;
4204 					pwp->wp_list = p->p_wprot;
4205 					p->p_wprot = pwp;
4206 				}
4207 			}
4208 		}
4209 
4210 		pwp = AVL_NEXT(tree, pwp);
4211 	}
4212 
4213 	AS_LOCK_EXIT(as);
4214 }
4215 
4216 /*
4217  * Return the original protections for the specified page.
4218  */
4219 static void
4220 getwatchprot(struct as *as, caddr_t addr, uint_t *prot)
4221 {
4222 	struct watched_page *pwp;
4223 	struct watched_page tpw;
4224 
4225 	ASSERT(AS_LOCK_HELD(as));
4226 
4227 	tpw.wp_vaddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK);
4228 	if ((pwp = avl_find(&as->a_wpage, &tpw, NULL)) != NULL)
4229 		*prot = pwp->wp_oprot;
4230 }
4231 
4232 static prpagev_t *
4233 pr_pagev_create(struct seg *seg, int check_noreserve)
4234 {
4235 	prpagev_t *pagev = kmem_alloc(sizeof (prpagev_t), KM_SLEEP);
4236 	size_t total_pages = seg_pages(seg);
4237 
4238 	/*
4239 	 * Limit the size of our vectors to pagev_lim pages at a time.  We need
4240 	 * 4 or 5 bytes of storage per page, so this means we limit ourself
4241 	 * to about a megabyte of kernel heap by default.
4242 	 */
4243 	pagev->pg_npages = MIN(total_pages, pagev_lim);
4244 	pagev->pg_pnbase = 0;
4245 
4246 	pagev->pg_protv =
4247 	    kmem_alloc(pagev->pg_npages * sizeof (uint_t), KM_SLEEP);
4248 
4249 	if (check_noreserve)
4250 		pagev->pg_incore =
4251 		    kmem_alloc(pagev->pg_npages * sizeof (char), KM_SLEEP);
4252 	else
4253 		pagev->pg_incore = NULL;
4254 
4255 	return (pagev);
4256 }
4257 
4258 static void
4259 pr_pagev_destroy(prpagev_t *pagev)
4260 {
4261 	if (pagev->pg_incore != NULL)
4262 		kmem_free(pagev->pg_incore, pagev->pg_npages * sizeof (char));
4263 
4264 	kmem_free(pagev->pg_protv, pagev->pg_npages * sizeof (uint_t));
4265 	kmem_free(pagev, sizeof (prpagev_t));
4266 }
4267 
4268 static caddr_t
4269 pr_pagev_fill(prpagev_t *pagev, struct seg *seg, caddr_t addr, caddr_t eaddr)
4270 {
4271 	ulong_t lastpg = seg_page(seg, eaddr - 1);
4272 	ulong_t pn, pnlim;
4273 	caddr_t saddr;
4274 	size_t len;
4275 
4276 	ASSERT(addr >= seg->s_base && addr <= eaddr);
4277 
4278 	if (addr == eaddr)
4279 		return (eaddr);
4280 
4281 refill:
4282 	ASSERT(addr < eaddr);
4283 	pagev->pg_pnbase = seg_page(seg, addr);
4284 	pnlim = pagev->pg_pnbase + pagev->pg_npages;
4285 	saddr = addr;
4286 
4287 	if (lastpg < pnlim)
4288 		len = (size_t)(eaddr - addr);
4289 	else
4290 		len = pagev->pg_npages * PAGESIZE;
4291 
4292 	if (pagev->pg_incore != NULL) {
4293 		/*
4294 		 * INCORE cleverly has different semantics than GETPROT:
4295 		 * it returns info on pages up to but NOT including addr + len.
4296 		 */
4297 		SEGOP_INCORE(seg, addr, len, pagev->pg_incore);
4298 		pn = pagev->pg_pnbase;
4299 
4300 		do {
4301 			/*
4302 			 * Guilty knowledge here:  We know that segvn_incore
4303 			 * returns more than just the low-order bit that
4304 			 * indicates the page is actually in memory.  If any
4305 			 * bits are set, then the page has backing store.
4306 			 */
4307 			if (pagev->pg_incore[pn++ - pagev->pg_pnbase])
4308 				goto out;
4309 
4310 		} while ((addr += PAGESIZE) < eaddr && pn < pnlim);
4311 
4312 		/*
4313 		 * If we examined all the pages in the vector but we're not
4314 		 * at the end of the segment, take another lap.
4315 		 */
4316 		if (addr < eaddr)
4317 			goto refill;
4318 	}
4319 
4320 	/*
4321 	 * Need to take len - 1 because addr + len is the address of the
4322 	 * first byte of the page just past the end of what we want.
4323 	 */
4324 out:
4325 	SEGOP_GETPROT(seg, saddr, len - 1, pagev->pg_protv);
4326 	return (addr);
4327 }
4328 
4329 static caddr_t
4330 pr_pagev_nextprot(prpagev_t *pagev, struct seg *seg,
4331     caddr_t *saddrp, caddr_t eaddr, uint_t *protp)
4332 {
4333 	/*
4334 	 * Our starting address is either the specified address, or the base
4335 	 * address from the start of the pagev.  If the latter is greater,
4336 	 * this means a previous call to pr_pagev_fill has already scanned
4337 	 * further than the end of the previous mapping.
4338 	 */
4339 	caddr_t base = seg->s_base + pagev->pg_pnbase * PAGESIZE;
4340 	caddr_t addr = MAX(*saddrp, base);
4341 	ulong_t pn = seg_page(seg, addr);
4342 	uint_t prot, nprot;
4343 
4344 	/*
4345 	 * If we're dealing with noreserve pages, then advance addr to
4346 	 * the address of the next page which has backing store.
4347 	 */
4348 	if (pagev->pg_incore != NULL) {
4349 		while (pagev->pg_incore[pn - pagev->pg_pnbase] == 0) {
4350 			if ((addr += PAGESIZE) == eaddr) {
4351 				*saddrp = addr;
4352 				prot = 0;
4353 				goto out;
4354 			}
4355 			if (++pn == pagev->pg_pnbase + pagev->pg_npages) {
4356 				addr = pr_pagev_fill(pagev, seg, addr, eaddr);
4357 				if (addr == eaddr) {
4358 					*saddrp = addr;
4359 					prot = 0;
4360 					goto out;
4361 				}
4362 				pn = seg_page(seg, addr);
4363 			}
4364 		}
4365 	}
4366 
4367 	/*
4368 	 * Get the protections on the page corresponding to addr.
4369 	 */
4370 	pn = seg_page(seg, addr);
4371 	ASSERT(pn >= pagev->pg_pnbase);
4372 	ASSERT(pn < (pagev->pg_pnbase + pagev->pg_npages));
4373 
4374 	prot = pagev->pg_protv[pn - pagev->pg_pnbase];
4375 	getwatchprot(seg->s_as, addr, &prot);
4376 	*saddrp = addr;
4377 
4378 	/*
4379 	 * Now loop until we find a backed page with different protections
4380 	 * or we reach the end of this segment.
4381 	 */
4382 	while ((addr += PAGESIZE) < eaddr) {
4383 		/*
4384 		 * If pn has advanced to the page number following what we
4385 		 * have information on, refill the page vector and reset
4386 		 * addr and pn.  If pr_pagev_fill does not return the
4387 		 * address of the next page, we have a discontiguity and
4388 		 * thus have reached the end of the current mapping.
4389 		 */
4390 		if (++pn == pagev->pg_pnbase + pagev->pg_npages) {
4391 			caddr_t naddr = pr_pagev_fill(pagev, seg, addr, eaddr);
4392 			if (naddr != addr)
4393 				goto out;
4394 			pn = seg_page(seg, addr);
4395 		}
4396 
4397 		/*
4398 		 * The previous page's protections are in prot, and it has
4399 		 * backing.  If this page is MAP_NORESERVE and has no backing,
4400 		 * then end this mapping and return the previous protections.
4401 		 */
4402 		if (pagev->pg_incore != NULL &&
4403 		    pagev->pg_incore[pn - pagev->pg_pnbase] == 0)
4404 			break;
4405 
4406 		/*
4407 		 * Otherwise end the mapping if this page's protections (nprot)
4408 		 * are different than those in the previous page (prot).
4409 		 */
4410 		nprot = pagev->pg_protv[pn - pagev->pg_pnbase];
4411 		getwatchprot(seg->s_as, addr, &nprot);
4412 
4413 		if (nprot != prot)
4414 			break;
4415 	}
4416 
4417 out:
4418 	*protp = prot;
4419 	return (addr);
4420 }
4421 
4422 size_t
4423 pr_getsegsize(struct seg *seg, int reserved)
4424 {
4425 	size_t size = seg->s_size;
4426 
4427 	/*
4428 	 * If we're interested in the reserved space, return the size of the
4429 	 * segment itself.  Everything else in this function is a special case
4430 	 * to determine the actual underlying size of various segment types.
4431 	 */
4432 	if (reserved)
4433 		return (size);
4434 
4435 	/*
4436 	 * If this is a segvn mapping of a regular file, return the smaller
4437 	 * of the segment size and the remaining size of the file beyond
4438 	 * the file offset corresponding to seg->s_base.
4439 	 */
4440 	if (seg->s_ops == &segvn_ops) {
4441 		vattr_t vattr;
4442 		vnode_t *vp;
4443 
4444 		vattr.va_mask = AT_SIZE;
4445 
4446 		if (SEGOP_GETVP(seg, seg->s_base, &vp) == 0 &&
4447 		    vp != NULL && vp->v_type == VREG &&
4448 		    VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
4449 
4450 			u_offset_t fsize = vattr.va_size;
4451 			u_offset_t offset = SEGOP_GETOFFSET(seg, seg->s_base);
4452 
4453 			if (fsize < offset)
4454 				fsize = 0;
4455 			else
4456 				fsize -= offset;
4457 
4458 			fsize = roundup(fsize, (u_offset_t)PAGESIZE);
4459 
4460 			if (fsize < (u_offset_t)size)
4461 				size = (size_t)fsize;
4462 		}
4463 
4464 		return (size);
4465 	}
4466 
4467 	/*
4468 	 * If this is an ISM shared segment, don't include pages that are
4469 	 * beyond the real size of the spt segment that backs it.
4470 	 */
4471 	if (seg->s_ops == &segspt_shmops)
4472 		return (MIN(spt_realsize(seg), size));
4473 
4474 	/*
4475 	 * If this is segment is a mapping from /dev/null, then this is a
4476 	 * reservation of virtual address space and has no actual size.
4477 	 * Such segments are backed by segdev and have type set to neither
4478 	 * MAP_SHARED nor MAP_PRIVATE.
4479 	 */
4480 	if (seg->s_ops == &segdev_ops &&
4481 	    ((SEGOP_GETTYPE(seg, seg->s_base) &
4482 	    (MAP_SHARED | MAP_PRIVATE)) == 0))
4483 		return (0);
4484 
4485 	/*
4486 	 * If this segment doesn't match one of the special types we handle,
4487 	 * just return the size of the segment itself.
4488 	 */
4489 	return (size);
4490 }
4491 
4492 uint_t
4493 pr_getprot(struct seg *seg, int reserved, void **tmp,
4494     caddr_t *saddrp, caddr_t *naddrp, caddr_t eaddr)
4495 {
4496 	struct as *as = seg->s_as;
4497 
4498 	caddr_t saddr = *saddrp;
4499 	caddr_t naddr;
4500 
4501 	int check_noreserve;
4502 	uint_t prot;
4503 
4504 	union {
4505 		struct segvn_data *svd;
4506 		struct segdev_data *sdp;
4507 		void *data;
4508 	} s;
4509 
4510 	s.data = seg->s_data;
4511 
4512 	ASSERT(AS_WRITE_HELD(as));
4513 	ASSERT(saddr >= seg->s_base && saddr < eaddr);
4514 	ASSERT(eaddr <= seg->s_base + seg->s_size);
4515 
4516 	/*
4517 	 * Don't include MAP_NORESERVE pages in the address range
4518 	 * unless their mappings have actually materialized.
4519 	 * We cheat by knowing that segvn is the only segment
4520 	 * driver that supports MAP_NORESERVE.
4521 	 */
4522 	check_noreserve =
4523 	    (!reserved && seg->s_ops == &segvn_ops && s.svd != NULL &&
4524 	    (s.svd->vp == NULL || s.svd->vp->v_type != VREG) &&
4525 	    (s.svd->flags & MAP_NORESERVE));
4526 
4527 	/*
4528 	 * Examine every page only as a last resort.  We use guilty knowledge
4529 	 * of segvn and segdev to avoid this: if there are no per-page
4530 	 * protections present in the segment and we don't care about
4531 	 * MAP_NORESERVE, then s_data->prot is the prot for the whole segment.
4532 	 */
4533 	if (!check_noreserve && saddr == seg->s_base &&
4534 	    seg->s_ops == &segvn_ops && s.svd != NULL && s.svd->pageprot == 0) {
4535 		prot = s.svd->prot;
4536 		getwatchprot(as, saddr, &prot);
4537 		naddr = eaddr;
4538 
4539 	} else if (saddr == seg->s_base && seg->s_ops == &segdev_ops &&
4540 	    s.sdp != NULL && s.sdp->pageprot == 0) {
4541 		prot = s.sdp->prot;
4542 		getwatchprot(as, saddr, &prot);
4543 		naddr = eaddr;
4544 
4545 	} else {
4546 		prpagev_t *pagev;
4547 
4548 		/*
4549 		 * If addr is sitting at the start of the segment, then
4550 		 * create a page vector to store protection and incore
4551 		 * information for pages in the segment, and fill it.
4552 		 * Otherwise, we expect *tmp to address the prpagev_t
4553 		 * allocated by a previous call to this function.
4554 		 */
4555 		if (saddr == seg->s_base) {
4556 			pagev = pr_pagev_create(seg, check_noreserve);
4557 			saddr = pr_pagev_fill(pagev, seg, saddr, eaddr);
4558 
4559 			ASSERT(*tmp == NULL);
4560 			*tmp = pagev;
4561 
4562 			ASSERT(saddr <= eaddr);
4563 			*saddrp = saddr;
4564 
4565 			if (saddr == eaddr) {
4566 				naddr = saddr;
4567 				prot = 0;
4568 				goto out;
4569 			}
4570 
4571 		} else {
4572 			ASSERT(*tmp != NULL);
4573 			pagev = (prpagev_t *)*tmp;
4574 		}
4575 
4576 		naddr = pr_pagev_nextprot(pagev, seg, saddrp, eaddr, &prot);
4577 		ASSERT(naddr <= eaddr);
4578 	}
4579 
4580 out:
4581 	if (naddr == eaddr)
4582 		pr_getprot_done(tmp);
4583 	*naddrp = naddr;
4584 	return (prot);
4585 }
4586 
4587 void
4588 pr_getprot_done(void **tmp)
4589 {
4590 	if (*tmp != NULL) {
4591 		pr_pagev_destroy((prpagev_t *)*tmp);
4592 		*tmp = NULL;
4593 	}
4594 }
4595 
4596 /*
4597  * Return true iff the vnode is a /proc file from the object directory.
4598  */
4599 int
4600 pr_isobject(vnode_t *vp)
4601 {
4602 	return (vn_matchops(vp, prvnodeops) && VTOP(vp)->pr_type == PR_OBJECT);
4603 }
4604 
4605 /*
4606  * Return true iff the vnode is a /proc file opened by the process itself.
4607  */
4608 int
4609 pr_isself(vnode_t *vp)
4610 {
4611 	/*
4612 	 * XXX: To retain binary compatibility with the old
4613 	 * ioctl()-based version of /proc, we exempt self-opens
4614 	 * of /proc/<pid> from being marked close-on-exec.
4615 	 */
4616 	return (vn_matchops(vp, prvnodeops) &&
4617 	    (VTOP(vp)->pr_flags & PR_ISSELF) &&
4618 	    VTOP(vp)->pr_type != PR_PIDDIR);
4619 }
4620 
4621 static ssize_t
4622 pr_getpagesize(struct seg *seg, caddr_t saddr, caddr_t *naddrp, caddr_t eaddr)
4623 {
4624 	ssize_t pagesize, hatsize;
4625 
4626 	ASSERT(AS_WRITE_HELD(seg->s_as));
4627 	ASSERT(IS_P2ALIGNED(saddr, PAGESIZE));
4628 	ASSERT(IS_P2ALIGNED(eaddr, PAGESIZE));
4629 	ASSERT(saddr < eaddr);
4630 
4631 	pagesize = hatsize = hat_getpagesize(seg->s_as->a_hat, saddr);
4632 	ASSERT(pagesize == -1 || IS_P2ALIGNED(pagesize, pagesize));
4633 	ASSERT(pagesize != 0);
4634 
4635 	if (pagesize == -1)
4636 		pagesize = PAGESIZE;
4637 
4638 	saddr += P2NPHASE((uintptr_t)saddr, pagesize);
4639 
4640 	while (saddr < eaddr) {
4641 		if (hatsize != hat_getpagesize(seg->s_as->a_hat, saddr))
4642 			break;
4643 		ASSERT(IS_P2ALIGNED(saddr, pagesize));
4644 		saddr += pagesize;
4645 	}
4646 
4647 	*naddrp = ((saddr < eaddr) ? saddr : eaddr);
4648 	return (hatsize);
4649 }
4650 
4651 /*
4652  * Return an array of structures with extended memory map information.
4653  * We allocate here; the caller must deallocate.
4654  */
4655 int
4656 prgetxmap(proc_t *p, list_t *iolhead)
4657 {
4658 	struct as *as = p->p_as;
4659 	prxmap_t *mp;
4660 	struct seg *seg;
4661 	struct seg *brkseg, *stkseg;
4662 	struct vnode *vp;
4663 	struct vattr vattr;
4664 	uint_t prot;
4665 
4666 	ASSERT(as != &kas && AS_WRITE_HELD(as));
4667 
4668 	/*
4669 	 * Request an initial buffer size that doesn't waste memory
4670 	 * if the address space has only a small number of segments.
4671 	 */
4672 	pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree));
4673 
4674 	if ((seg = AS_SEGFIRST(as)) == NULL)
4675 		return (0);
4676 
4677 	brkseg = break_seg(p);
4678 	stkseg = as_segat(as, prgetstackbase(p));
4679 
4680 	do {
4681 		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
4682 		caddr_t saddr, naddr, baddr;
4683 		void *tmp = NULL;
4684 		ssize_t psz;
4685 		char *parr;
4686 		uint64_t npages;
4687 		uint64_t pagenum;
4688 
4689 		if ((seg->s_flags & S_HOLE) != 0) {
4690 			continue;
4691 		}
4692 		/*
4693 		 * Segment loop part one: iterate from the base of the segment
4694 		 * to its end, pausing at each address boundary (baddr) between
4695 		 * ranges that have different virtual memory protections.
4696 		 */
4697 		for (saddr = seg->s_base; saddr < eaddr; saddr = baddr) {
4698 			prot = pr_getprot(seg, 0, &tmp, &saddr, &baddr, eaddr);
4699 			ASSERT(baddr >= saddr && baddr <= eaddr);
4700 
4701 			/*
4702 			 * Segment loop part two: iterate from the current
4703 			 * position to the end of the protection boundary,
4704 			 * pausing at each address boundary (naddr) between
4705 			 * ranges that have different underlying page sizes.
4706 			 */
4707 			for (; saddr < baddr; saddr = naddr) {
4708 				psz = pr_getpagesize(seg, saddr, &naddr, baddr);
4709 				ASSERT(naddr >= saddr && naddr <= baddr);
4710 
4711 				mp = pr_iol_newbuf(iolhead, sizeof (*mp));
4712 
4713 				mp->pr_vaddr = (uintptr_t)saddr;
4714 				mp->pr_size = naddr - saddr;
4715 				mp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
4716 				mp->pr_mflags = 0;
4717 				if (prot & PROT_READ)
4718 					mp->pr_mflags |= MA_READ;
4719 				if (prot & PROT_WRITE)
4720 					mp->pr_mflags |= MA_WRITE;
4721 				if (prot & PROT_EXEC)
4722 					mp->pr_mflags |= MA_EXEC;
4723 				if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
4724 					mp->pr_mflags |= MA_SHARED;
4725 				if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
4726 					mp->pr_mflags |= MA_NORESERVE;
4727 				if (seg->s_ops == &segspt_shmops ||
4728 				    (seg->s_ops == &segvn_ops &&
4729 				    (SEGOP_GETVP(seg, saddr, &vp) != 0 ||
4730 				    vp == NULL)))
4731 					mp->pr_mflags |= MA_ANON;
4732 				if (seg == brkseg)
4733 					mp->pr_mflags |= MA_BREAK;
4734 				else if (seg == stkseg)
4735 					mp->pr_mflags |= MA_STACK;
4736 				if (seg->s_ops == &segspt_shmops)
4737 					mp->pr_mflags |= MA_ISM | MA_SHM;
4738 
4739 				mp->pr_pagesize = PAGESIZE;
4740 				if (psz == -1) {
4741 					mp->pr_hatpagesize = 0;
4742 				} else {
4743 					mp->pr_hatpagesize = psz;
4744 				}
4745 
4746 				/*
4747 				 * Manufacture a filename for the "object" dir.
4748 				 */
4749 				mp->pr_dev = PRNODEV;
4750 				vattr.va_mask = AT_FSID|AT_NODEID;
4751 				if (seg->s_ops == &segvn_ops &&
4752 				    SEGOP_GETVP(seg, saddr, &vp) == 0 &&
4753 				    vp != NULL && vp->v_type == VREG &&
4754 				    VOP_GETATTR(vp, &vattr, 0, CRED(),
4755 				    NULL) == 0) {
4756 					mp->pr_dev = vattr.va_fsid;
4757 					mp->pr_ino = vattr.va_nodeid;
4758 					if (vp == p->p_exec)
4759 						(void) strcpy(mp->pr_mapname,
4760 						    "a.out");
4761 					else
4762 						pr_object_name(mp->pr_mapname,
4763 						    vp, &vattr);
4764 				}
4765 
4766 				/*
4767 				 * Get the SysV shared memory id, if any.
4768 				 */
4769 				if ((mp->pr_mflags & MA_SHARED) &&
4770 				    p->p_segacct && (mp->pr_shmid = shmgetid(p,
4771 				    seg->s_base)) != SHMID_NONE) {
4772 					if (mp->pr_shmid == SHMID_FREE)
4773 						mp->pr_shmid = -1;
4774 
4775 					mp->pr_mflags |= MA_SHM;
4776 				} else {
4777 					mp->pr_shmid = -1;
4778 				}
4779 
4780 				npages = ((uintptr_t)(naddr - saddr)) >>
4781 				    PAGESHIFT;
4782 				parr = kmem_zalloc(npages, KM_SLEEP);
4783 
4784 				SEGOP_INCORE(seg, saddr, naddr - saddr, parr);
4785 
4786 				for (pagenum = 0; pagenum < npages; pagenum++) {
4787 					if (parr[pagenum] & SEG_PAGE_INCORE)
4788 						mp->pr_rss++;
4789 					if (parr[pagenum] & SEG_PAGE_ANON)
4790 						mp->pr_anon++;
4791 					if (parr[pagenum] & SEG_PAGE_LOCKED)
4792 						mp->pr_locked++;
4793 				}
4794 				kmem_free(parr, npages);
4795 			}
4796 		}
4797 		ASSERT(tmp == NULL);
4798 	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);
4799 
4800 	return (0);
4801 }
4802 
4803 /*
4804  * Return the process's credentials.  We don't need a 32-bit equivalent of
4805  * this function because prcred_t and prcred32_t are actually the same.
4806  */
4807 void
4808 prgetcred(proc_t *p, prcred_t *pcrp)
4809 {
4810 	mutex_enter(&p->p_crlock);
4811 	cred2prcred(p->p_cred, pcrp);
4812 	mutex_exit(&p->p_crlock);
4813 }
4814 
4815 void
4816 prgetsecflags(proc_t *p, prsecflags_t *psfp)
4817 {
4818 	ASSERT(psfp != NULL);
4819 
4820 	psfp->pr_version = PRSECFLAGS_VERSION_CURRENT;
4821 	psfp->pr_lower = p->p_secflags.psf_lower;
4822 	psfp->pr_upper = p->p_secflags.psf_upper;
4823 	psfp->pr_effective = p->p_secflags.psf_effective;
4824 	psfp->pr_inherit = p->p_secflags.psf_inherit;
4825 }
4826 
4827 /*
4828  * Compute actual size of the prpriv_t structure.
4829  */
4830 
4831 size_t
4832 prgetprivsize(void)
4833 {
4834 	return (priv_prgetprivsize(NULL));
4835 }
4836 
4837 /*
4838  * Return the process's privileges.  We don't need a 32-bit equivalent of
4839  * this function because prpriv_t and prpriv32_t are actually the same.
4840  */
4841 void
4842 prgetpriv(proc_t *p, prpriv_t *pprp)
4843 {
4844 	mutex_enter(&p->p_crlock);
4845 	cred2prpriv(p->p_cred, pprp);
4846 	mutex_exit(&p->p_crlock);
4847 }
4848 
4849 #ifdef _SYSCALL32_IMPL
4850 /*
4851  * Return an array of structures with HAT memory map information.
4852  * We allocate here; the caller must deallocate.
4853  */
4854 int
4855 prgetxmap32(proc_t *p, list_t *iolhead)
4856 {
4857 	struct as *as = p->p_as;
4858 	prxmap32_t *mp;
4859 	struct seg *seg;
4860 	struct seg *brkseg, *stkseg;
4861 	struct vnode *vp;
4862 	struct vattr vattr;
4863 	uint_t prot;
4864 
4865 	ASSERT(as != &kas && AS_WRITE_HELD(as));
4866 
4867 	/*
4868 	 * Request an initial buffer size that doesn't waste memory
4869 	 * if the address space has only a small number of segments.
4870 	 */
4871 	pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree));
4872 
4873 	if ((seg = AS_SEGFIRST(as)) == NULL)
4874 		return (0);
4875 
4876 	brkseg = break_seg(p);
4877 	stkseg = as_segat(as, prgetstackbase(p));
4878 
4879 	do {
4880 		caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
4881 		caddr_t saddr, naddr, baddr;
4882 		void *tmp = NULL;
4883 		ssize_t psz;
4884 		char *parr;
4885 		uint64_t npages;
4886 		uint64_t pagenum;
4887 
4888 		if ((seg->s_flags & S_HOLE) != 0) {
4889 			continue;
4890 		}
4891 
4892 		/*
4893 		 * Segment loop part one: iterate from the base of the segment
4894 		 * to its end, pausing at each address boundary (baddr) between
4895 		 * ranges that have different virtual memory protections.
4896 		 */
4897 		for (saddr = seg->s_base; saddr < eaddr; saddr = baddr) {
4898 			prot = pr_getprot(seg, 0, &tmp, &saddr, &baddr, eaddr);
4899 			ASSERT(baddr >= saddr && baddr <= eaddr);
4900 
4901 			/*
4902 			 * Segment loop part two: iterate from the current
4903 			 * position to the end of the protection boundary,
4904 			 * pausing at each address boundary (naddr) between
4905 			 * ranges that have different underlying page sizes.
4906 			 */
4907 			for (; saddr < baddr; saddr = naddr) {
4908 				psz = pr_getpagesize(seg, saddr, &naddr, baddr);
4909 				ASSERT(naddr >= saddr && naddr <= baddr);
4910 
4911 				mp = pr_iol_newbuf(iolhead, sizeof (*mp));
4912 
4913 				mp->pr_vaddr = (caddr32_t)(uintptr_t)saddr;
4914 				mp->pr_size = (size32_t)(naddr - saddr);
4915 				mp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
4916 				mp->pr_mflags = 0;
4917 				if (prot & PROT_READ)
4918 					mp->pr_mflags |= MA_READ;
4919 				if (prot & PROT_WRITE)
4920 					mp->pr_mflags |= MA_WRITE;
4921 				if (prot & PROT_EXEC)
4922 					mp->pr_mflags |= MA_EXEC;
4923 				if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
4924 					mp->pr_mflags |= MA_SHARED;
4925 				if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
4926 					mp->pr_mflags |= MA_NORESERVE;
4927 				if (seg->s_ops == &segspt_shmops ||
4928 				    (seg->s_ops == &segvn_ops &&
4929 				    (SEGOP_GETVP(seg, saddr, &vp) != 0 ||
4930 				    vp == NULL)))
4931 					mp->pr_mflags |= MA_ANON;
4932 				if (seg == brkseg)
4933 					mp->pr_mflags |= MA_BREAK;
4934 				else if (seg == stkseg)
4935 					mp->pr_mflags |= MA_STACK;
4936 				if (seg->s_ops == &segspt_shmops)
4937 					mp->pr_mflags |= MA_ISM | MA_SHM;
4938 
4939 				mp->pr_pagesize = PAGESIZE;
4940 				if (psz == -1) {
4941 					mp->pr_hatpagesize = 0;
4942 				} else {
4943 					mp->pr_hatpagesize = psz;
4944 				}
4945 
4946 				/*
4947 				 * Manufacture a filename for the "object" dir.
4948 				 */
4949 				mp->pr_dev = PRNODEV32;
4950 				vattr.va_mask = AT_FSID|AT_NODEID;
4951 				if (seg->s_ops == &segvn_ops &&
4952 				    SEGOP_GETVP(seg, saddr, &vp) == 0 &&
4953 				    vp != NULL && vp->v_type == VREG &&
4954 				    VOP_GETATTR(vp, &vattr, 0, CRED(),
4955 				    NULL) == 0) {
4956 					(void) cmpldev(&mp->pr_dev,
4957 					    vattr.va_fsid);
4958 					mp->pr_ino = vattr.va_nodeid;
4959 					if (vp == p->p_exec)
4960 						(void) strcpy(mp->pr_mapname,
4961 						    "a.out");
4962 					else
4963 						pr_object_name(mp->pr_mapname,
4964 						    vp, &vattr);
4965 				}
4966 
4967 				/*
4968 				 * Get the SysV shared memory id, if any.
4969 				 */
4970 				if ((mp->pr_mflags & MA_SHARED) &&
4971 				    p->p_segacct && (mp->pr_shmid = shmgetid(p,
4972 				    seg->s_base)) != SHMID_NONE) {
4973 					if (mp->pr_shmid == SHMID_FREE)
4974 						mp->pr_shmid = -1;
4975 
4976 					mp->pr_mflags |= MA_SHM;
4977 				} else {
4978 					mp->pr_shmid = -1;
4979 				}
4980 
4981 				npages = ((uintptr_t)(naddr - saddr)) >>
4982 				    PAGESHIFT;
4983 				parr = kmem_zalloc(npages, KM_SLEEP);
4984 
4985 				SEGOP_INCORE(seg, saddr, naddr - saddr, parr);
4986 
4987 				for (pagenum = 0; pagenum < npages; pagenum++) {
4988 					if (parr[pagenum] & SEG_PAGE_INCORE)
4989 						mp->pr_rss++;
4990 					if (parr[pagenum] & SEG_PAGE_ANON)
4991 						mp->pr_anon++;
4992 					if (parr[pagenum] & SEG_PAGE_LOCKED)
4993 						mp->pr_locked++;
4994 				}
4995 				kmem_free(parr, npages);
4996 			}
4997 		}
4998 		ASSERT(tmp == NULL);
4999 	} while ((seg = AS_SEGNEXT(as, seg)) != NULL);
5000 
5001 	return (0);
5002 }
5003 #endif	/* _SYSCALL32_IMPL */
5004