1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright 2017, Joyent, Inc.
25 * Copyright 2020 OmniOS Community Edition (OmniOSce) Association.
26 */
27
28 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
29 /* All Rights Reserved */
30
31 #include <sys/types.h>
32 #include <sys/t_lock.h>
33 #include <sys/param.h>
34 #include <sys/cmn_err.h>
35 #include <sys/cred.h>
36 #include <sys/priv.h>
37 #include <sys/debug.h>
38 #include <sys/errno.h>
39 #include <sys/inline.h>
40 #include <sys/kmem.h>
41 #include <sys/mman.h>
42 #include <sys/proc.h>
43 #include <sys/brand.h>
44 #include <sys/sobject.h>
45 #include <sys/sysmacros.h>
46 #include <sys/systm.h>
47 #include <sys/uio.h>
48 #include <sys/var.h>
49 #include <sys/vfs.h>
50 #include <sys/vnode.h>
51 #include <sys/session.h>
52 #include <sys/pcb.h>
53 #include <sys/signal.h>
54 #include <sys/user.h>
55 #include <sys/disp.h>
56 #include <sys/class.h>
57 #include <sys/ts.h>
58 #include <sys/bitmap.h>
59 #include <sys/poll.h>
60 #include <sys/shm_impl.h>
61 #include <sys/fault.h>
62 #include <sys/syscall.h>
63 #include <sys/procfs.h>
64 #include <sys/processor.h>
65 #include <sys/cpuvar.h>
66 #include <sys/copyops.h>
67 #include <sys/time.h>
68 #include <sys/msacct.h>
69 #include <sys/flock_impl.h>
70 #include <sys/stropts.h>
71 #include <sys/strsubr.h>
72 #include <sys/pathname.h>
73 #include <sys/mode.h>
74 #include <sys/socketvar.h>
75 #include <sys/autoconf.h>
76 #include <sys/dtrace.h>
77 #include <sys/timod.h>
78 #include <sys/fs/namenode.h>
79 #include <netinet/udp.h>
80 #include <netinet/tcp.h>
81 #include <inet/cc.h>
82 #include <vm/as.h>
83 #include <vm/rm.h>
84 #include <vm/seg.h>
85 #include <vm/seg_vn.h>
86 #include <vm/seg_dev.h>
87 #include <vm/seg_spt.h>
88 #include <vm/page.h>
89 #include <sys/vmparam.h>
90 #include <sys/swap.h>
91 #include <fs/proc/prdata.h>
92 #include <sys/task.h>
93 #include <sys/project.h>
94 #include <sys/contract_impl.h>
95 #include <sys/contract/process.h>
96 #include <sys/contract/process_impl.h>
97 #include <sys/schedctl.h>
98 #include <sys/pool.h>
99 #include <sys/zone.h>
100 #include <sys/atomic.h>
101 #include <sys/sdt.h>
102
103 #define MAX_ITERS_SPIN 5
104
105 typedef struct prpagev {
106 uint_t *pg_protv; /* vector of page permissions */
107 char *pg_incore; /* vector of incore flags */
108 size_t pg_npages; /* number of pages in protv and incore */
109 ulong_t pg_pnbase; /* pn within segment of first protv element */
110 } prpagev_t;
111
112 size_t pagev_lim = 256 * 1024; /* limit on number of pages in prpagev_t */
113
114 extern struct seg_ops segdev_ops; /* needs a header file */
115 extern struct seg_ops segspt_shmops; /* needs a header file */
116
117 static int set_watched_page(proc_t *, caddr_t, caddr_t, ulong_t, ulong_t);
118 static void clear_watched_page(proc_t *, caddr_t, caddr_t, ulong_t);
119
120 /*
121 * Choose an lwp from the complete set of lwps for the process.
122 * This is called for any operation applied to the process
123 * file descriptor that requires an lwp to operate upon.
124 *
125 * Returns a pointer to the thread for the selected LWP,
126 * and with the dispatcher lock held for the thread.
127 *
128 * The algorithm for choosing an lwp is critical for /proc semantics;
129 * don't touch this code unless you know all of the implications.
130 */
131 kthread_t *
prchoose(proc_t * p)132 prchoose(proc_t *p)
133 {
134 kthread_t *t;
135 kthread_t *t_onproc = NULL; /* running on processor */
136 kthread_t *t_run = NULL; /* runnable, on disp queue */
137 kthread_t *t_sleep = NULL; /* sleeping */
138 kthread_t *t_hold = NULL; /* sleeping, performing hold */
139 kthread_t *t_susp = NULL; /* suspended stop */
140 kthread_t *t_jstop = NULL; /* jobcontrol stop, w/o directed stop */
141 kthread_t *t_jdstop = NULL; /* jobcontrol stop with directed stop */
142 kthread_t *t_req = NULL; /* requested stop */
143 kthread_t *t_istop = NULL; /* event-of-interest stop */
144 kthread_t *t_dtrace = NULL; /* DTrace stop */
145
146 ASSERT(MUTEX_HELD(&p->p_lock));
147
148 /*
149 * If the agent lwp exists, it takes precedence over all others.
150 */
151 if ((t = p->p_agenttp) != NULL) {
152 thread_lock(t);
153 return (t);
154 }
155
156 if ((t = p->p_tlist) == NULL) /* start at the head of the list */
157 return (t);
158 do { /* for eacn lwp in the process */
159 if (VSTOPPED(t)) { /* virtually stopped */
160 if (t_req == NULL)
161 t_req = t;
162 continue;
163 }
164
165 /* If this is a process kernel thread, ignore it. */
166 if ((t->t_proc_flag & TP_KTHREAD) != 0) {
167 continue;
168 }
169
170 thread_lock(t); /* make sure thread is in good state */
171 switch (t->t_state) {
172 default:
173 panic("prchoose: bad thread state %d, thread 0x%p",
174 t->t_state, (void *)t);
175 /*NOTREACHED*/
176 case TS_SLEEP:
177 /* this is filthy */
178 if (t->t_wchan == (caddr_t)&p->p_holdlwps &&
179 t->t_wchan0 == NULL) {
180 if (t_hold == NULL)
181 t_hold = t;
182 } else {
183 if (t_sleep == NULL)
184 t_sleep = t;
185 }
186 break;
187 case TS_RUN:
188 case TS_WAIT:
189 if (t_run == NULL)
190 t_run = t;
191 break;
192 case TS_ONPROC:
193 if (t_onproc == NULL)
194 t_onproc = t;
195 break;
196 case TS_ZOMB: /* last possible choice */
197 break;
198 case TS_STOPPED:
199 switch (t->t_whystop) {
200 case PR_SUSPENDED:
201 if (t_susp == NULL)
202 t_susp = t;
203 break;
204 case PR_JOBCONTROL:
205 if (t->t_proc_flag & TP_PRSTOP) {
206 if (t_jdstop == NULL)
207 t_jdstop = t;
208 } else {
209 if (t_jstop == NULL)
210 t_jstop = t;
211 }
212 break;
213 case PR_REQUESTED:
214 if (t->t_dtrace_stop && t_dtrace == NULL)
215 t_dtrace = t;
216 else if (t_req == NULL)
217 t_req = t;
218 break;
219 case PR_SYSENTRY:
220 case PR_SYSEXIT:
221 case PR_SIGNALLED:
222 case PR_FAULTED:
223 /*
224 * Make an lwp calling exit() be the
225 * last lwp seen in the process.
226 */
227 if (t_istop == NULL ||
228 (t_istop->t_whystop == PR_SYSENTRY &&
229 t_istop->t_whatstop == SYS_exit))
230 t_istop = t;
231 break;
232 case PR_CHECKPOINT: /* can't happen? */
233 break;
234 default:
235 panic("prchoose: bad t_whystop %d, thread 0x%p",
236 t->t_whystop, (void *)t);
237 /*NOTREACHED*/
238 }
239 break;
240 }
241 thread_unlock(t);
242 } while ((t = t->t_forw) != p->p_tlist);
243
244 if (t_onproc)
245 t = t_onproc;
246 else if (t_run)
247 t = t_run;
248 else if (t_sleep)
249 t = t_sleep;
250 else if (t_jstop)
251 t = t_jstop;
252 else if (t_jdstop)
253 t = t_jdstop;
254 else if (t_istop)
255 t = t_istop;
256 else if (t_dtrace)
257 t = t_dtrace;
258 else if (t_req)
259 t = t_req;
260 else if (t_hold)
261 t = t_hold;
262 else if (t_susp)
263 t = t_susp;
264 else /* TS_ZOMB */
265 t = p->p_tlist;
266
267 if (t != NULL)
268 thread_lock(t);
269 return (t);
270 }
271
272 /*
273 * Wakeup anyone sleeping on the /proc vnode for the process/lwp to stop.
274 * Also call pollwakeup() if any lwps are waiting in poll() for POLLPRI
275 * on the /proc file descriptor. Called from stop() when a traced
276 * process stops on an event of interest. Also called from exit()
277 * and prinvalidate() to indicate POLLHUP and POLLERR respectively.
278 */
279 void
prnotify(struct vnode * vp)280 prnotify(struct vnode *vp)
281 {
282 prcommon_t *pcp = VTOP(vp)->pr_common;
283
284 mutex_enter(&pcp->prc_mutex);
285 cv_broadcast(&pcp->prc_wait);
286 mutex_exit(&pcp->prc_mutex);
287 if (pcp->prc_flags & PRC_POLL) {
288 /*
289 * We call pollwakeup() with POLLHUP to ensure that
290 * the pollers are awakened even if they are polling
291 * for nothing (i.e., waiting for the process to exit).
292 * This enables the use of the PRC_POLL flag for optimization
293 * (we can turn off PRC_POLL only if we know no pollers remain).
294 */
295 pcp->prc_flags &= ~PRC_POLL;
296 pollwakeup(&pcp->prc_pollhead, POLLHUP);
297 }
298 }
299
300 /* called immediately below, in prfree() */
301 static void
prfreenotify(vnode_t * vp)302 prfreenotify(vnode_t *vp)
303 {
304 prnode_t *pnp;
305 prcommon_t *pcp;
306
307 while (vp != NULL) {
308 pnp = VTOP(vp);
309 pcp = pnp->pr_common;
310 ASSERT(pcp->prc_thread == NULL);
311 pcp->prc_proc = NULL;
312 /*
313 * We can't call prnotify() here because we are holding
314 * pidlock. We assert that there is no need to.
315 */
316 mutex_enter(&pcp->prc_mutex);
317 cv_broadcast(&pcp->prc_wait);
318 mutex_exit(&pcp->prc_mutex);
319 ASSERT(!(pcp->prc_flags & PRC_POLL));
320
321 vp = pnp->pr_next;
322 pnp->pr_next = NULL;
323 }
324 }
325
326 /*
327 * Called from a hook in freeproc() when a traced process is removed
328 * from the process table. The proc-table pointers of all associated
329 * /proc vnodes are cleared to indicate that the process has gone away.
330 */
331 void
prfree(proc_t * p)332 prfree(proc_t *p)
333 {
334 uint_t slot = p->p_slot;
335
336 ASSERT(MUTEX_HELD(&pidlock));
337
338 /*
339 * Block the process against /proc so it can be freed.
340 * It cannot be freed while locked by some controlling process.
341 * Lock ordering:
342 * pidlock -> pr_pidlock -> p->p_lock -> pcp->prc_mutex
343 */
344 mutex_enter(&pr_pidlock); /* protects pcp->prc_proc */
345 mutex_enter(&p->p_lock);
346 while (p->p_proc_flag & P_PR_LOCK) {
347 mutex_exit(&pr_pidlock);
348 cv_wait(&pr_pid_cv[slot], &p->p_lock);
349 mutex_exit(&p->p_lock);
350 mutex_enter(&pr_pidlock);
351 mutex_enter(&p->p_lock);
352 }
353
354 ASSERT(p->p_tlist == NULL);
355
356 prfreenotify(p->p_plist);
357 p->p_plist = NULL;
358
359 prfreenotify(p->p_trace);
360 p->p_trace = NULL;
361
362 /*
363 * We broadcast to wake up everyone waiting for this process.
364 * No one can reach this process from this point on.
365 */
366 cv_broadcast(&pr_pid_cv[slot]);
367
368 mutex_exit(&p->p_lock);
369 mutex_exit(&pr_pidlock);
370 }
371
372 /*
373 * Called from a hook in exit() when a traced process is becoming a zombie.
374 */
375 void
prexit(proc_t * p)376 prexit(proc_t *p)
377 {
378 ASSERT(MUTEX_HELD(&p->p_lock));
379
380 if (pr_watch_active(p)) {
381 pr_free_watchpoints(p);
382 watch_disable(curthread);
383 }
384 /* pr_free_watched_pages() is called in exit(), after dropping p_lock */
385 if (p->p_trace) {
386 VTOP(p->p_trace)->pr_common->prc_flags |= PRC_DESTROY;
387 prnotify(p->p_trace);
388 }
389 cv_broadcast(&pr_pid_cv[p->p_slot]); /* pauselwps() */
390 }
391
392 /*
393 * Called when a thread calls lwp_exit().
394 */
395 void
prlwpexit(kthread_t * t)396 prlwpexit(kthread_t *t)
397 {
398 vnode_t *vp;
399 prnode_t *pnp;
400 prcommon_t *pcp;
401 proc_t *p = ttoproc(t);
402 lwpent_t *lep = p->p_lwpdir[t->t_dslot].ld_entry;
403
404 ASSERT(t == curthread);
405 ASSERT(MUTEX_HELD(&p->p_lock));
406
407 /*
408 * The process must be blocked against /proc to do this safely.
409 * The lwp must not disappear while the process is marked P_PR_LOCK.
410 * It is the caller's responsibility to have called prbarrier(p).
411 */
412 ASSERT(!(p->p_proc_flag & P_PR_LOCK));
413
414 for (vp = p->p_plist; vp != NULL; vp = pnp->pr_next) {
415 pnp = VTOP(vp);
416 pcp = pnp->pr_common;
417 if (pcp->prc_thread == t) {
418 pcp->prc_thread = NULL;
419 pcp->prc_flags |= PRC_DESTROY;
420 }
421 }
422
423 for (vp = lep->le_trace; vp != NULL; vp = pnp->pr_next) {
424 pnp = VTOP(vp);
425 pcp = pnp->pr_common;
426 pcp->prc_thread = NULL;
427 pcp->prc_flags |= PRC_DESTROY;
428 prnotify(vp);
429 }
430
431 if (p->p_trace)
432 prnotify(p->p_trace);
433 }
434
435 /*
436 * Called when a zombie thread is joined or when a
437 * detached lwp exits. Called from lwp_hash_out().
438 */
439 void
prlwpfree(proc_t * p,lwpent_t * lep)440 prlwpfree(proc_t *p, lwpent_t *lep)
441 {
442 vnode_t *vp;
443 prnode_t *pnp;
444 prcommon_t *pcp;
445
446 ASSERT(MUTEX_HELD(&p->p_lock));
447
448 /*
449 * The process must be blocked against /proc to do this safely.
450 * The lwp must not disappear while the process is marked P_PR_LOCK.
451 * It is the caller's responsibility to have called prbarrier(p).
452 */
453 ASSERT(!(p->p_proc_flag & P_PR_LOCK));
454
455 vp = lep->le_trace;
456 lep->le_trace = NULL;
457 while (vp) {
458 prnotify(vp);
459 pnp = VTOP(vp);
460 pcp = pnp->pr_common;
461 ASSERT(pcp->prc_thread == NULL &&
462 (pcp->prc_flags & PRC_DESTROY));
463 pcp->prc_tslot = -1;
464 vp = pnp->pr_next;
465 pnp->pr_next = NULL;
466 }
467
468 if (p->p_trace)
469 prnotify(p->p_trace);
470 }
471
472 /*
473 * Called from a hook in exec() when a thread starts exec().
474 */
475 void
prexecstart(void)476 prexecstart(void)
477 {
478 proc_t *p = ttoproc(curthread);
479 klwp_t *lwp = ttolwp(curthread);
480
481 /*
482 * The P_PR_EXEC flag blocks /proc operations for
483 * the duration of the exec().
484 * We can't start exec() while the process is
485 * locked by /proc, so we call prbarrier().
486 * lwp_nostop keeps the process from being stopped
487 * via job control for the duration of the exec().
488 */
489
490 ASSERT(MUTEX_HELD(&p->p_lock));
491 prbarrier(p);
492 lwp->lwp_nostop++;
493 p->p_proc_flag |= P_PR_EXEC;
494 }
495
496 /*
497 * Called from a hook in exec() when a thread finishes exec().
498 * The thread may or may not have succeeded. Some other thread
499 * may have beat it to the punch.
500 */
501 void
prexecend(void)502 prexecend(void)
503 {
504 proc_t *p = ttoproc(curthread);
505 klwp_t *lwp = ttolwp(curthread);
506 vnode_t *vp;
507 prnode_t *pnp;
508 prcommon_t *pcp;
509 model_t model = p->p_model;
510 id_t tid = curthread->t_tid;
511 int tslot = curthread->t_dslot;
512
513 ASSERT(MUTEX_HELD(&p->p_lock));
514
515 lwp->lwp_nostop--;
516 if (p->p_flag & SEXITLWPS) {
517 /*
518 * We are on our way to exiting because some
519 * other thread beat us in the race to exec().
520 * Don't clear the P_PR_EXEC flag in this case.
521 */
522 return;
523 }
524
525 /*
526 * Wake up anyone waiting in /proc for the process to complete exec().
527 */
528 p->p_proc_flag &= ~P_PR_EXEC;
529 if ((vp = p->p_trace) != NULL) {
530 pcp = VTOP(vp)->pr_common;
531 mutex_enter(&pcp->prc_mutex);
532 cv_broadcast(&pcp->prc_wait);
533 mutex_exit(&pcp->prc_mutex);
534 for (; vp != NULL; vp = pnp->pr_next) {
535 pnp = VTOP(vp);
536 pnp->pr_common->prc_datamodel = model;
537 }
538 }
539 if ((vp = p->p_lwpdir[tslot].ld_entry->le_trace) != NULL) {
540 /*
541 * We dealt with the process common above.
542 */
543 ASSERT(p->p_trace != NULL);
544 pcp = VTOP(vp)->pr_common;
545 mutex_enter(&pcp->prc_mutex);
546 cv_broadcast(&pcp->prc_wait);
547 mutex_exit(&pcp->prc_mutex);
548 for (; vp != NULL; vp = pnp->pr_next) {
549 pnp = VTOP(vp);
550 pcp = pnp->pr_common;
551 pcp->prc_datamodel = model;
552 pcp->prc_tid = tid;
553 pcp->prc_tslot = tslot;
554 }
555 }
556 }
557
558 /*
559 * Called from a hook in relvm() just before freeing the address space.
560 * We free all the watched areas now.
561 */
562 void
prrelvm(void)563 prrelvm(void)
564 {
565 proc_t *p = ttoproc(curthread);
566
567 mutex_enter(&p->p_lock);
568 prbarrier(p); /* block all other /proc operations */
569 if (pr_watch_active(p)) {
570 pr_free_watchpoints(p);
571 watch_disable(curthread);
572 }
573 mutex_exit(&p->p_lock);
574 pr_free_watched_pages(p);
575 }
576
577 /*
578 * Called from hooks in exec-related code when a traced process
579 * attempts to exec(2) a setuid/setgid program or an unreadable
580 * file. Rather than fail the exec we invalidate the associated
581 * /proc vnodes so that subsequent attempts to use them will fail.
582 *
583 * All /proc vnodes, except directory vnodes, are retained on a linked
584 * list (rooted at p_plist in the process structure) until last close.
585 *
586 * A controlling process must re-open the /proc files in order to
587 * regain control.
588 */
589 void
prinvalidate(struct user * up)590 prinvalidate(struct user *up)
591 {
592 kthread_t *t = curthread;
593 proc_t *p = ttoproc(t);
594 vnode_t *vp;
595 prnode_t *pnp;
596 int writers = 0;
597
598 mutex_enter(&p->p_lock);
599 prbarrier(p); /* block all other /proc operations */
600
601 /*
602 * At this moment, there can be only one lwp in the process.
603 */
604 ASSERT(p->p_lwpcnt == 1 && p->p_zombcnt == 0);
605
606 /*
607 * Invalidate any currently active /proc vnodes.
608 */
609 for (vp = p->p_plist; vp != NULL; vp = pnp->pr_next) {
610 pnp = VTOP(vp);
611 switch (pnp->pr_type) {
612 case PR_PSINFO: /* these files can read by anyone */
613 case PR_LPSINFO:
614 case PR_LWPSINFO:
615 case PR_LWPDIR:
616 case PR_LWPIDDIR:
617 case PR_USAGE:
618 case PR_LUSAGE:
619 case PR_LWPUSAGE:
620 break;
621 default:
622 pnp->pr_flags |= PR_INVAL;
623 break;
624 }
625 }
626 /*
627 * Wake up anyone waiting for the process or lwp.
628 * p->p_trace is guaranteed to be non-NULL if there
629 * are any open /proc files for this process.
630 */
631 if ((vp = p->p_trace) != NULL) {
632 prcommon_t *pcp = VTOP(vp)->pr_pcommon;
633
634 prnotify(vp);
635 /*
636 * Are there any writers?
637 */
638 if ((writers = pcp->prc_writers) != 0) {
639 /*
640 * Clear the exclusive open flag (old /proc interface).
641 * Set prc_selfopens equal to prc_writers so that
642 * the next O_EXCL|O_WRITE open will succeed
643 * even with existing (though invalid) writers.
644 * prclose() must decrement prc_selfopens when
645 * the invalid files are closed.
646 */
647 pcp->prc_flags &= ~PRC_EXCL;
648 ASSERT(pcp->prc_selfopens <= writers);
649 pcp->prc_selfopens = writers;
650 }
651 }
652 vp = p->p_lwpdir[t->t_dslot].ld_entry->le_trace;
653 while (vp != NULL) {
654 /*
655 * We should not invalidate the lwpiddir vnodes,
656 * but the necessities of maintaining the old
657 * ioctl()-based version of /proc require it.
658 */
659 pnp = VTOP(vp);
660 pnp->pr_flags |= PR_INVAL;
661 prnotify(vp);
662 vp = pnp->pr_next;
663 }
664
665 /*
666 * If any tracing flags are in effect and any vnodes are open for
667 * writing then set the requested-stop and run-on-last-close flags.
668 * Otherwise, clear all tracing flags.
669 */
670 t->t_proc_flag &= ~TP_PAUSE;
671 if ((p->p_proc_flag & P_PR_TRACE) && writers) {
672 t->t_proc_flag |= TP_PRSTOP;
673 aston(t); /* so ISSIG will see the flag */
674 p->p_proc_flag |= P_PR_RUNLCL;
675 } else {
676 premptyset(&up->u_entrymask); /* syscalls */
677 premptyset(&up->u_exitmask);
678 up->u_systrap = 0;
679 premptyset(&p->p_sigmask); /* signals */
680 premptyset(&p->p_fltmask); /* faults */
681 t->t_proc_flag &= ~(TP_PRSTOP|TP_PRVSTOP|TP_STOPPING);
682 p->p_proc_flag &= ~(P_PR_RUNLCL|P_PR_KILLCL|P_PR_TRACE);
683 prnostep(ttolwp(t));
684 }
685
686 mutex_exit(&p->p_lock);
687 }
688
689 /*
690 * Acquire the controlled process's p_lock and mark it P_PR_LOCK.
691 * Return with pr_pidlock held in all cases.
692 * Return with p_lock held if the the process still exists.
693 * Return value is the process pointer if the process still exists, else NULL.
694 * If we lock the process, give ourself kernel priority to avoid deadlocks;
695 * this is undone in prunlock().
696 */
697 proc_t *
pr_p_lock(prnode_t * pnp)698 pr_p_lock(prnode_t *pnp)
699 {
700 proc_t *p;
701 prcommon_t *pcp;
702
703 mutex_enter(&pr_pidlock);
704 if ((pcp = pnp->pr_pcommon) == NULL || (p = pcp->prc_proc) == NULL)
705 return (NULL);
706 mutex_enter(&p->p_lock);
707 while (p->p_proc_flag & P_PR_LOCK) {
708 /*
709 * This cv/mutex pair is persistent even if
710 * the process disappears while we sleep.
711 */
712 kcondvar_t *cv = &pr_pid_cv[p->p_slot];
713 kmutex_t *mp = &p->p_lock;
714
715 mutex_exit(&pr_pidlock);
716 cv_wait(cv, mp);
717 mutex_exit(mp);
718 mutex_enter(&pr_pidlock);
719 if (pcp->prc_proc == NULL)
720 return (NULL);
721 ASSERT(p == pcp->prc_proc);
722 mutex_enter(&p->p_lock);
723 }
724 p->p_proc_flag |= P_PR_LOCK;
725 return (p);
726 }
727
728 /*
729 * Lock the target process by setting P_PR_LOCK and grabbing p->p_lock.
730 * This prevents any lwp of the process from disappearing and
731 * blocks most operations that a process can perform on itself.
732 * Returns 0 on success, a non-zero error number on failure.
733 *
734 * 'zdisp' is ZYES or ZNO to indicate whether prlock() should succeed when
735 * the subject process is a zombie (ZYES) or fail for zombies (ZNO).
736 *
737 * error returns:
738 * ENOENT: process or lwp has disappeared or process is exiting
739 * (or has become a zombie and zdisp == ZNO).
740 * EAGAIN: procfs vnode has become invalid.
741 * EINTR: signal arrived while waiting for exec to complete.
742 */
743 int
prlock(prnode_t * pnp,int zdisp)744 prlock(prnode_t *pnp, int zdisp)
745 {
746 prcommon_t *pcp;
747 proc_t *p;
748
749 again:
750 pcp = pnp->pr_common;
751 p = pr_p_lock(pnp);
752 mutex_exit(&pr_pidlock);
753
754 /*
755 * Return ENOENT immediately if there is no process.
756 */
757 if (p == NULL)
758 return (ENOENT);
759
760 ASSERT(p == pcp->prc_proc && p->p_stat != 0 && p->p_stat != SIDL);
761
762 /*
763 * Return ENOENT if process entered zombie state or is exiting
764 * and the 'zdisp' flag is set to ZNO indicating not to lock zombies.
765 */
766 if (zdisp == ZNO &&
767 ((pcp->prc_flags & PRC_DESTROY) || (p->p_flag & SEXITING))) {
768 prunlock(pnp);
769 return (ENOENT);
770 }
771
772 /*
773 * If lwp-specific, check to see if lwp has disappeared.
774 */
775 if (pcp->prc_flags & PRC_LWP) {
776 if ((zdisp == ZNO && (pcp->prc_flags & PRC_DESTROY)) ||
777 pcp->prc_tslot == -1) {
778 prunlock(pnp);
779 return (ENOENT);
780 }
781 }
782
783 /*
784 * Return EAGAIN if we have encountered a security violation.
785 * (The process exec'd a set-id or unreadable executable file.)
786 */
787 if (pnp->pr_flags & PR_INVAL) {
788 prunlock(pnp);
789 return (EAGAIN);
790 }
791
792 /*
793 * If process is undergoing an exec(), wait for
794 * completion and then start all over again.
795 */
796 if (p->p_proc_flag & P_PR_EXEC) {
797 pcp = pnp->pr_pcommon; /* Put on the correct sleep queue */
798 mutex_enter(&pcp->prc_mutex);
799 prunlock(pnp);
800 if (!cv_wait_sig(&pcp->prc_wait, &pcp->prc_mutex)) {
801 mutex_exit(&pcp->prc_mutex);
802 return (EINTR);
803 }
804 mutex_exit(&pcp->prc_mutex);
805 goto again;
806 }
807
808 /*
809 * We return holding p->p_lock.
810 */
811 return (0);
812 }
813
814 /*
815 * Undo prlock() and pr_p_lock().
816 * p->p_lock is still held; pr_pidlock is no longer held.
817 *
818 * prunmark() drops the P_PR_LOCK flag and wakes up another thread,
819 * if any, waiting for the flag to be dropped; it retains p->p_lock.
820 *
821 * prunlock() calls prunmark() and then drops p->p_lock.
822 */
823 void
prunmark(proc_t * p)824 prunmark(proc_t *p)
825 {
826 ASSERT(p->p_proc_flag & P_PR_LOCK);
827 ASSERT(MUTEX_HELD(&p->p_lock));
828
829 cv_signal(&pr_pid_cv[p->p_slot]);
830 p->p_proc_flag &= ~P_PR_LOCK;
831 }
832
833 void
prunlock(prnode_t * pnp)834 prunlock(prnode_t *pnp)
835 {
836 prcommon_t *pcp = pnp->pr_common;
837 proc_t *p = pcp->prc_proc;
838
839 /*
840 * If we (or someone) gave it a SIGKILL, and it is not
841 * already a zombie, set it running unconditionally.
842 */
843 if ((p->p_flag & SKILLED) &&
844 !(p->p_flag & SEXITING) &&
845 !(pcp->prc_flags & PRC_DESTROY) &&
846 !((pcp->prc_flags & PRC_LWP) && pcp->prc_tslot == -1))
847 (void) pr_setrun(pnp, 0);
848 prunmark(p);
849 mutex_exit(&p->p_lock);
850 }
851
852 /*
853 * Called while holding p->p_lock to delay until the process is unlocked.
854 * We enter holding p->p_lock; p->p_lock is dropped and reacquired.
855 * The process cannot become locked again until p->p_lock is dropped.
856 */
857 void
prbarrier(proc_t * p)858 prbarrier(proc_t *p)
859 {
860 ASSERT(MUTEX_HELD(&p->p_lock));
861
862 if (p->p_proc_flag & P_PR_LOCK) {
863 /* The process is locked; delay until not locked */
864 uint_t slot = p->p_slot;
865
866 while (p->p_proc_flag & P_PR_LOCK)
867 cv_wait(&pr_pid_cv[slot], &p->p_lock);
868 cv_signal(&pr_pid_cv[slot]);
869 }
870 }
871
872 /*
873 * Return process/lwp status.
874 * The u-block is mapped in by this routine and unmapped at the end.
875 */
876 void
prgetstatus(proc_t * p,pstatus_t * sp,zone_t * zp)877 prgetstatus(proc_t *p, pstatus_t *sp, zone_t *zp)
878 {
879 kthread_t *t;
880
881 ASSERT(MUTEX_HELD(&p->p_lock));
882
883 t = prchoose(p); /* returns locked thread */
884 ASSERT(t != NULL);
885 thread_unlock(t);
886
887 /* just bzero the process part, prgetlwpstatus() does the rest */
888 bzero(sp, sizeof (pstatus_t) - sizeof (lwpstatus_t));
889 sp->pr_nlwp = p->p_lwpcnt;
890 sp->pr_nzomb = p->p_zombcnt;
891 prassignset(&sp->pr_sigpend, &p->p_sig);
892 sp->pr_brkbase = (uintptr_t)p->p_brkbase;
893 sp->pr_brksize = p->p_brksize;
894 sp->pr_stkbase = (uintptr_t)prgetstackbase(p);
895 sp->pr_stksize = p->p_stksize;
896 sp->pr_pid = p->p_pid;
897 if (curproc->p_zone->zone_id != GLOBAL_ZONEID &&
898 (p->p_flag & SZONETOP)) {
899 ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID);
900 /*
901 * Inside local zones, fake zsched's pid as parent pids for
902 * processes which reference processes outside of the zone.
903 */
904 sp->pr_ppid = curproc->p_zone->zone_zsched->p_pid;
905 } else {
906 sp->pr_ppid = p->p_ppid;
907 }
908 sp->pr_pgid = p->p_pgrp;
909 sp->pr_sid = p->p_sessp->s_sid;
910 sp->pr_taskid = p->p_task->tk_tkid;
911 sp->pr_projid = p->p_task->tk_proj->kpj_id;
912 sp->pr_zoneid = p->p_zone->zone_id;
913 hrt2ts(mstate_aggr_state(p, LMS_USER), &sp->pr_utime);
914 hrt2ts(mstate_aggr_state(p, LMS_SYSTEM), &sp->pr_stime);
915 TICK_TO_TIMESTRUC(p->p_cutime, &sp->pr_cutime);
916 TICK_TO_TIMESTRUC(p->p_cstime, &sp->pr_cstime);
917 prassignset(&sp->pr_sigtrace, &p->p_sigmask);
918 prassignset(&sp->pr_flttrace, &p->p_fltmask);
919 prassignset(&sp->pr_sysentry, &PTOU(p)->u_entrymask);
920 prassignset(&sp->pr_sysexit, &PTOU(p)->u_exitmask);
921 switch (p->p_model) {
922 case DATAMODEL_ILP32:
923 sp->pr_dmodel = PR_MODEL_ILP32;
924 break;
925 case DATAMODEL_LP64:
926 sp->pr_dmodel = PR_MODEL_LP64;
927 break;
928 }
929 if (p->p_agenttp)
930 sp->pr_agentid = p->p_agenttp->t_tid;
931
932 /* get the chosen lwp's status */
933 prgetlwpstatus(t, &sp->pr_lwp, zp);
934
935 /* replicate the flags */
936 sp->pr_flags = sp->pr_lwp.pr_flags;
937 }
938
939 /*
940 * Query mask of held signals for a given thread.
941 *
942 * This makes use of schedctl_sigblock() to query if userspace has requested
943 * that all maskable signals be held. While it would be tempting to call
944 * schedctl_finish_sigblock() and apply that update to t->t_hold, it cannot be
945 * done safely without the risk of racing with the thread under consideration.
946 */
947 void
prgethold(kthread_t * t,sigset_t * sp)948 prgethold(kthread_t *t, sigset_t *sp)
949 {
950 k_sigset_t set;
951
952 if (schedctl_sigblock(t)) {
953 set.__sigbits[0] = FILLSET0 & ~CANTMASK0;
954 set.__sigbits[1] = FILLSET1 & ~CANTMASK1;
955 set.__sigbits[2] = FILLSET2 & ~CANTMASK2;
956 } else {
957 set = t->t_hold;
958 }
959 sigktou(&set, sp);
960 }
961
962 #ifdef _SYSCALL32_IMPL
963 void
prgetlwpstatus32(kthread_t * t,lwpstatus32_t * sp,zone_t * zp)964 prgetlwpstatus32(kthread_t *t, lwpstatus32_t *sp, zone_t *zp)
965 {
966 proc_t *p = ttoproc(t);
967 klwp_t *lwp = ttolwp(t);
968 struct mstate *ms = &lwp->lwp_mstate;
969 hrtime_t usr, sys;
970 int flags;
971 ulong_t instr;
972
973 ASSERT(MUTEX_HELD(&p->p_lock));
974
975 bzero(sp, sizeof (*sp));
976 flags = 0L;
977 if (t->t_state == TS_STOPPED) {
978 flags |= PR_STOPPED;
979 if ((t->t_schedflag & TS_PSTART) == 0)
980 flags |= PR_ISTOP;
981 } else if (VSTOPPED(t)) {
982 flags |= PR_STOPPED|PR_ISTOP;
983 }
984 if (!(flags & PR_ISTOP) && (t->t_proc_flag & TP_PRSTOP))
985 flags |= PR_DSTOP;
986 if (lwp->lwp_asleep)
987 flags |= PR_ASLEEP;
988 if (t == p->p_agenttp)
989 flags |= PR_AGENT;
990 if (!(t->t_proc_flag & TP_TWAIT))
991 flags |= PR_DETACH;
992 if (t->t_proc_flag & TP_DAEMON)
993 flags |= PR_DAEMON;
994 if (p->p_proc_flag & P_PR_FORK)
995 flags |= PR_FORK;
996 if (p->p_proc_flag & P_PR_RUNLCL)
997 flags |= PR_RLC;
998 if (p->p_proc_flag & P_PR_KILLCL)
999 flags |= PR_KLC;
1000 if (p->p_proc_flag & P_PR_ASYNC)
1001 flags |= PR_ASYNC;
1002 if (p->p_proc_flag & P_PR_BPTADJ)
1003 flags |= PR_BPTADJ;
1004 if (p->p_proc_flag & P_PR_PTRACE)
1005 flags |= PR_PTRACE;
1006 if (p->p_flag & SMSACCT)
1007 flags |= PR_MSACCT;
1008 if (p->p_flag & SMSFORK)
1009 flags |= PR_MSFORK;
1010 if (p->p_flag & SVFWAIT)
1011 flags |= PR_VFORKP;
1012 sp->pr_flags = flags;
1013 if (VSTOPPED(t)) {
1014 sp->pr_why = PR_REQUESTED;
1015 sp->pr_what = 0;
1016 } else {
1017 sp->pr_why = t->t_whystop;
1018 sp->pr_what = t->t_whatstop;
1019 }
1020 sp->pr_lwpid = t->t_tid;
1021 sp->pr_cursig = lwp->lwp_cursig;
1022 prassignset(&sp->pr_lwppend, &t->t_sig);
1023 prgethold(t, &sp->pr_lwphold);
1024 if (t->t_whystop == PR_FAULTED) {
1025 siginfo_kto32(&lwp->lwp_siginfo, &sp->pr_info);
1026 if (t->t_whatstop == FLTPAGE)
1027 sp->pr_info.si_addr =
1028 (caddr32_t)(uintptr_t)lwp->lwp_siginfo.si_addr;
1029 } else if (lwp->lwp_curinfo)
1030 siginfo_kto32(&lwp->lwp_curinfo->sq_info, &sp->pr_info);
1031 if (SI_FROMUSER(&lwp->lwp_siginfo) && zp->zone_id != GLOBAL_ZONEID &&
1032 sp->pr_info.si_zoneid != zp->zone_id) {
1033 sp->pr_info.si_pid = zp->zone_zsched->p_pid;
1034 sp->pr_info.si_uid = 0;
1035 sp->pr_info.si_ctid = -1;
1036 sp->pr_info.si_zoneid = zp->zone_id;
1037 }
1038 sp->pr_altstack.ss_sp =
1039 (caddr32_t)(uintptr_t)lwp->lwp_sigaltstack.ss_sp;
1040 sp->pr_altstack.ss_size = (size32_t)lwp->lwp_sigaltstack.ss_size;
1041 sp->pr_altstack.ss_flags = (int32_t)lwp->lwp_sigaltstack.ss_flags;
1042 prgetaction32(p, PTOU(p), lwp->lwp_cursig, &sp->pr_action);
1043 sp->pr_oldcontext = (caddr32_t)lwp->lwp_oldcontext;
1044 sp->pr_ustack = (caddr32_t)lwp->lwp_ustack;
1045 (void) strncpy(sp->pr_clname, sclass[t->t_cid].cl_name,
1046 sizeof (sp->pr_clname) - 1);
1047 if (flags & PR_STOPPED)
1048 hrt2ts32(t->t_stoptime, &sp->pr_tstamp);
1049 usr = ms->ms_acct[LMS_USER];
1050 sys = ms->ms_acct[LMS_SYSTEM] + ms->ms_acct[LMS_TRAP];
1051 scalehrtime(&usr);
1052 scalehrtime(&sys);
1053 hrt2ts32(usr, &sp->pr_utime);
1054 hrt2ts32(sys, &sp->pr_stime);
1055
1056 /*
1057 * Fetch the current instruction, if not a system process.
1058 * We don't attempt this unless the lwp is stopped.
1059 */
1060 if ((p->p_flag & SSYS) || p->p_as == &kas)
1061 sp->pr_flags |= (PR_ISSYS|PR_PCINVAL);
1062 else if (!(flags & PR_STOPPED))
1063 sp->pr_flags |= PR_PCINVAL;
1064 else if (!prfetchinstr(lwp, &instr))
1065 sp->pr_flags |= PR_PCINVAL;
1066 else
1067 sp->pr_instr = (uint32_t)instr;
1068
1069 /*
1070 * Drop p_lock while touching the lwp's stack.
1071 */
1072 mutex_exit(&p->p_lock);
1073 if (prisstep(lwp))
1074 sp->pr_flags |= PR_STEP;
1075 if ((flags & (PR_STOPPED|PR_ASLEEP)) && t->t_sysnum) {
1076 int i;
1077
1078 sp->pr_syscall = get_syscall32_args(lwp,
1079 (int *)sp->pr_sysarg, &i);
1080 sp->pr_nsysarg = (ushort_t)i;
1081 }
1082 if ((flags & PR_STOPPED) || t == curthread)
1083 prgetprregs32(lwp, sp->pr_reg);
1084 if ((t->t_state == TS_STOPPED && t->t_whystop == PR_SYSEXIT) ||
1085 (flags & PR_VFORKP)) {
1086 long r1, r2;
1087 user_t *up;
1088 auxv_t *auxp;
1089 int i;
1090
1091 sp->pr_errno = prgetrvals(lwp, &r1, &r2);
1092 if (sp->pr_errno == 0) {
1093 sp->pr_rval1 = (int32_t)r1;
1094 sp->pr_rval2 = (int32_t)r2;
1095 sp->pr_errpriv = PRIV_NONE;
1096 } else
1097 sp->pr_errpriv = lwp->lwp_badpriv;
1098
1099 if (t->t_sysnum == SYS_execve) {
1100 up = PTOU(p);
1101 sp->pr_sysarg[0] = 0;
1102 sp->pr_sysarg[1] = (caddr32_t)up->u_argv;
1103 sp->pr_sysarg[2] = (caddr32_t)up->u_envp;
1104 for (i = 0, auxp = up->u_auxv;
1105 i < sizeof (up->u_auxv) / sizeof (up->u_auxv[0]);
1106 i++, auxp++) {
1107 if (auxp->a_type == AT_SUN_EXECNAME) {
1108 sp->pr_sysarg[0] =
1109 (caddr32_t)
1110 (uintptr_t)auxp->a_un.a_ptr;
1111 break;
1112 }
1113 }
1114 }
1115 }
1116 if (prhasfp())
1117 prgetprfpregs32(lwp, &sp->pr_fpreg);
1118 mutex_enter(&p->p_lock);
1119 }
1120
1121 void
prgetstatus32(proc_t * p,pstatus32_t * sp,zone_t * zp)1122 prgetstatus32(proc_t *p, pstatus32_t *sp, zone_t *zp)
1123 {
1124 kthread_t *t;
1125
1126 ASSERT(MUTEX_HELD(&p->p_lock));
1127
1128 t = prchoose(p); /* returns locked thread */
1129 ASSERT(t != NULL);
1130 thread_unlock(t);
1131
1132 /* just bzero the process part, prgetlwpstatus32() does the rest */
1133 bzero(sp, sizeof (pstatus32_t) - sizeof (lwpstatus32_t));
1134 sp->pr_nlwp = p->p_lwpcnt;
1135 sp->pr_nzomb = p->p_zombcnt;
1136 prassignset(&sp->pr_sigpend, &p->p_sig);
1137 sp->pr_brkbase = (uint32_t)(uintptr_t)p->p_brkbase;
1138 sp->pr_brksize = (uint32_t)p->p_brksize;
1139 sp->pr_stkbase = (uint32_t)(uintptr_t)prgetstackbase(p);
1140 sp->pr_stksize = (uint32_t)p->p_stksize;
1141 sp->pr_pid = p->p_pid;
1142 if (curproc->p_zone->zone_id != GLOBAL_ZONEID &&
1143 (p->p_flag & SZONETOP)) {
1144 ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID);
1145 /*
1146 * Inside local zones, fake zsched's pid as parent pids for
1147 * processes which reference processes outside of the zone.
1148 */
1149 sp->pr_ppid = curproc->p_zone->zone_zsched->p_pid;
1150 } else {
1151 sp->pr_ppid = p->p_ppid;
1152 }
1153 sp->pr_pgid = p->p_pgrp;
1154 sp->pr_sid = p->p_sessp->s_sid;
1155 sp->pr_taskid = p->p_task->tk_tkid;
1156 sp->pr_projid = p->p_task->tk_proj->kpj_id;
1157 sp->pr_zoneid = p->p_zone->zone_id;
1158 hrt2ts32(mstate_aggr_state(p, LMS_USER), &sp->pr_utime);
1159 hrt2ts32(mstate_aggr_state(p, LMS_SYSTEM), &sp->pr_stime);
1160 TICK_TO_TIMESTRUC32(p->p_cutime, &sp->pr_cutime);
1161 TICK_TO_TIMESTRUC32(p->p_cstime, &sp->pr_cstime);
1162 prassignset(&sp->pr_sigtrace, &p->p_sigmask);
1163 prassignset(&sp->pr_flttrace, &p->p_fltmask);
1164 prassignset(&sp->pr_sysentry, &PTOU(p)->u_entrymask);
1165 prassignset(&sp->pr_sysexit, &PTOU(p)->u_exitmask);
1166 switch (p->p_model) {
1167 case DATAMODEL_ILP32:
1168 sp->pr_dmodel = PR_MODEL_ILP32;
1169 break;
1170 case DATAMODEL_LP64:
1171 sp->pr_dmodel = PR_MODEL_LP64;
1172 break;
1173 }
1174 if (p->p_agenttp)
1175 sp->pr_agentid = p->p_agenttp->t_tid;
1176
1177 /* get the chosen lwp's status */
1178 prgetlwpstatus32(t, &sp->pr_lwp, zp);
1179
1180 /* replicate the flags */
1181 sp->pr_flags = sp->pr_lwp.pr_flags;
1182 }
1183 #endif /* _SYSCALL32_IMPL */
1184
1185 /*
1186 * Return lwp status.
1187 */
1188 void
prgetlwpstatus(kthread_t * t,lwpstatus_t * sp,zone_t * zp)1189 prgetlwpstatus(kthread_t *t, lwpstatus_t *sp, zone_t *zp)
1190 {
1191 proc_t *p = ttoproc(t);
1192 klwp_t *lwp = ttolwp(t);
1193 struct mstate *ms = &lwp->lwp_mstate;
1194 hrtime_t usr, sys;
1195 int flags;
1196 ulong_t instr;
1197
1198 ASSERT(MUTEX_HELD(&p->p_lock));
1199
1200 bzero(sp, sizeof (*sp));
1201 flags = 0L;
1202 if (t->t_state == TS_STOPPED) {
1203 flags |= PR_STOPPED;
1204 if ((t->t_schedflag & TS_PSTART) == 0)
1205 flags |= PR_ISTOP;
1206 } else if (VSTOPPED(t)) {
1207 flags |= PR_STOPPED|PR_ISTOP;
1208 }
1209 if (!(flags & PR_ISTOP) && (t->t_proc_flag & TP_PRSTOP))
1210 flags |= PR_DSTOP;
1211 if (lwp->lwp_asleep)
1212 flags |= PR_ASLEEP;
1213 if (t == p->p_agenttp)
1214 flags |= PR_AGENT;
1215 if (!(t->t_proc_flag & TP_TWAIT))
1216 flags |= PR_DETACH;
1217 if (t->t_proc_flag & TP_DAEMON)
1218 flags |= PR_DAEMON;
1219 if (p->p_proc_flag & P_PR_FORK)
1220 flags |= PR_FORK;
1221 if (p->p_proc_flag & P_PR_RUNLCL)
1222 flags |= PR_RLC;
1223 if (p->p_proc_flag & P_PR_KILLCL)
1224 flags |= PR_KLC;
1225 if (p->p_proc_flag & P_PR_ASYNC)
1226 flags |= PR_ASYNC;
1227 if (p->p_proc_flag & P_PR_BPTADJ)
1228 flags |= PR_BPTADJ;
1229 if (p->p_proc_flag & P_PR_PTRACE)
1230 flags |= PR_PTRACE;
1231 if (p->p_flag & SMSACCT)
1232 flags |= PR_MSACCT;
1233 if (p->p_flag & SMSFORK)
1234 flags |= PR_MSFORK;
1235 if (p->p_flag & SVFWAIT)
1236 flags |= PR_VFORKP;
1237 if (p->p_pgidp->pid_pgorphaned)
1238 flags |= PR_ORPHAN;
1239 if (p->p_pidflag & CLDNOSIGCHLD)
1240 flags |= PR_NOSIGCHLD;
1241 if (p->p_pidflag & CLDWAITPID)
1242 flags |= PR_WAITPID;
1243 sp->pr_flags = flags;
1244 if (VSTOPPED(t)) {
1245 sp->pr_why = PR_REQUESTED;
1246 sp->pr_what = 0;
1247 } else {
1248 sp->pr_why = t->t_whystop;
1249 sp->pr_what = t->t_whatstop;
1250 }
1251 sp->pr_lwpid = t->t_tid;
1252 sp->pr_cursig = lwp->lwp_cursig;
1253 prassignset(&sp->pr_lwppend, &t->t_sig);
1254 prgethold(t, &sp->pr_lwphold);
1255 if (t->t_whystop == PR_FAULTED)
1256 bcopy(&lwp->lwp_siginfo,
1257 &sp->pr_info, sizeof (k_siginfo_t));
1258 else if (lwp->lwp_curinfo)
1259 bcopy(&lwp->lwp_curinfo->sq_info,
1260 &sp->pr_info, sizeof (k_siginfo_t));
1261 if (SI_FROMUSER(&lwp->lwp_siginfo) && zp->zone_id != GLOBAL_ZONEID &&
1262 sp->pr_info.si_zoneid != zp->zone_id) {
1263 sp->pr_info.si_pid = zp->zone_zsched->p_pid;
1264 sp->pr_info.si_uid = 0;
1265 sp->pr_info.si_ctid = -1;
1266 sp->pr_info.si_zoneid = zp->zone_id;
1267 }
1268 sp->pr_altstack = lwp->lwp_sigaltstack;
1269 prgetaction(p, PTOU(p), lwp->lwp_cursig, &sp->pr_action);
1270 sp->pr_oldcontext = (uintptr_t)lwp->lwp_oldcontext;
1271 sp->pr_ustack = lwp->lwp_ustack;
1272 (void) strncpy(sp->pr_clname, sclass[t->t_cid].cl_name,
1273 sizeof (sp->pr_clname) - 1);
1274 if (flags & PR_STOPPED)
1275 hrt2ts(t->t_stoptime, &sp->pr_tstamp);
1276 usr = ms->ms_acct[LMS_USER];
1277 sys = ms->ms_acct[LMS_SYSTEM] + ms->ms_acct[LMS_TRAP];
1278 scalehrtime(&usr);
1279 scalehrtime(&sys);
1280 hrt2ts(usr, &sp->pr_utime);
1281 hrt2ts(sys, &sp->pr_stime);
1282
1283 /*
1284 * Fetch the current instruction, if not a system process.
1285 * We don't attempt this unless the lwp is stopped.
1286 */
1287 if ((p->p_flag & SSYS) || p->p_as == &kas)
1288 sp->pr_flags |= (PR_ISSYS|PR_PCINVAL);
1289 else if (!(flags & PR_STOPPED))
1290 sp->pr_flags |= PR_PCINVAL;
1291 else if (!prfetchinstr(lwp, &instr))
1292 sp->pr_flags |= PR_PCINVAL;
1293 else
1294 sp->pr_instr = instr;
1295
1296 /*
1297 * Drop p_lock while touching the lwp's stack.
1298 */
1299 mutex_exit(&p->p_lock);
1300 if (prisstep(lwp))
1301 sp->pr_flags |= PR_STEP;
1302 if ((flags & (PR_STOPPED|PR_ASLEEP)) && t->t_sysnum) {
1303 int i;
1304
1305 sp->pr_syscall = get_syscall_args(lwp,
1306 (long *)sp->pr_sysarg, &i);
1307 sp->pr_nsysarg = (ushort_t)i;
1308 }
1309 if ((flags & PR_STOPPED) || t == curthread)
1310 prgetprregs(lwp, sp->pr_reg);
1311 if ((t->t_state == TS_STOPPED && t->t_whystop == PR_SYSEXIT) ||
1312 (flags & PR_VFORKP)) {
1313 user_t *up;
1314 auxv_t *auxp;
1315 int i;
1316
1317 sp->pr_errno = prgetrvals(lwp, &sp->pr_rval1, &sp->pr_rval2);
1318 if (sp->pr_errno == 0)
1319 sp->pr_errpriv = PRIV_NONE;
1320 else
1321 sp->pr_errpriv = lwp->lwp_badpriv;
1322
1323 if (t->t_sysnum == SYS_execve) {
1324 up = PTOU(p);
1325 sp->pr_sysarg[0] = 0;
1326 sp->pr_sysarg[1] = (uintptr_t)up->u_argv;
1327 sp->pr_sysarg[2] = (uintptr_t)up->u_envp;
1328 for (i = 0, auxp = up->u_auxv;
1329 i < sizeof (up->u_auxv) / sizeof (up->u_auxv[0]);
1330 i++, auxp++) {
1331 if (auxp->a_type == AT_SUN_EXECNAME) {
1332 sp->pr_sysarg[0] =
1333 (uintptr_t)auxp->a_un.a_ptr;
1334 break;
1335 }
1336 }
1337 }
1338 }
1339 if (prhasfp())
1340 prgetprfpregs(lwp, &sp->pr_fpreg);
1341 mutex_enter(&p->p_lock);
1342 }
1343
1344 /*
1345 * Get the sigaction structure for the specified signal. The u-block
1346 * must already have been mapped in by the caller.
1347 */
1348 void
prgetaction(proc_t * p,user_t * up,uint_t sig,struct sigaction * sp)1349 prgetaction(proc_t *p, user_t *up, uint_t sig, struct sigaction *sp)
1350 {
1351 int nsig = PROC_IS_BRANDED(curproc)? BROP(curproc)->b_nsig : NSIG;
1352
1353 bzero(sp, sizeof (*sp));
1354
1355 if (sig != 0 && (unsigned)sig < nsig) {
1356 sp->sa_handler = up->u_signal[sig-1];
1357 prassignset(&sp->sa_mask, &up->u_sigmask[sig-1]);
1358 if (sigismember(&up->u_sigonstack, sig))
1359 sp->sa_flags |= SA_ONSTACK;
1360 if (sigismember(&up->u_sigresethand, sig))
1361 sp->sa_flags |= SA_RESETHAND;
1362 if (sigismember(&up->u_sigrestart, sig))
1363 sp->sa_flags |= SA_RESTART;
1364 if (sigismember(&p->p_siginfo, sig))
1365 sp->sa_flags |= SA_SIGINFO;
1366 if (sigismember(&up->u_signodefer, sig))
1367 sp->sa_flags |= SA_NODEFER;
1368 if (sig == SIGCLD) {
1369 if (p->p_flag & SNOWAIT)
1370 sp->sa_flags |= SA_NOCLDWAIT;
1371 if ((p->p_flag & SJCTL) == 0)
1372 sp->sa_flags |= SA_NOCLDSTOP;
1373 }
1374 }
1375 }
1376
1377 #ifdef _SYSCALL32_IMPL
1378 void
prgetaction32(proc_t * p,user_t * up,uint_t sig,struct sigaction32 * sp)1379 prgetaction32(proc_t *p, user_t *up, uint_t sig, struct sigaction32 *sp)
1380 {
1381 int nsig = PROC_IS_BRANDED(curproc)? BROP(curproc)->b_nsig : NSIG;
1382
1383 bzero(sp, sizeof (*sp));
1384
1385 if (sig != 0 && (unsigned)sig < nsig) {
1386 sp->sa_handler = (caddr32_t)(uintptr_t)up->u_signal[sig-1];
1387 prassignset(&sp->sa_mask, &up->u_sigmask[sig-1]);
1388 if (sigismember(&up->u_sigonstack, sig))
1389 sp->sa_flags |= SA_ONSTACK;
1390 if (sigismember(&up->u_sigresethand, sig))
1391 sp->sa_flags |= SA_RESETHAND;
1392 if (sigismember(&up->u_sigrestart, sig))
1393 sp->sa_flags |= SA_RESTART;
1394 if (sigismember(&p->p_siginfo, sig))
1395 sp->sa_flags |= SA_SIGINFO;
1396 if (sigismember(&up->u_signodefer, sig))
1397 sp->sa_flags |= SA_NODEFER;
1398 if (sig == SIGCLD) {
1399 if (p->p_flag & SNOWAIT)
1400 sp->sa_flags |= SA_NOCLDWAIT;
1401 if ((p->p_flag & SJCTL) == 0)
1402 sp->sa_flags |= SA_NOCLDSTOP;
1403 }
1404 }
1405 }
1406 #endif /* _SYSCALL32_IMPL */
1407
1408 /*
1409 * Count the number of segments in this process's address space.
1410 */
1411 int
prnsegs(struct as * as,int reserved)1412 prnsegs(struct as *as, int reserved)
1413 {
1414 int n = 0;
1415 struct seg *seg;
1416
1417 ASSERT(as != &kas && AS_WRITE_HELD(as));
1418
1419 for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) {
1420 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, reserved);
1421 caddr_t saddr, naddr;
1422 void *tmp = NULL;
1423
1424 if ((seg->s_flags & S_HOLE) != 0) {
1425 continue;
1426 }
1427
1428 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1429 (void) pr_getprot(seg, reserved, &tmp,
1430 &saddr, &naddr, eaddr);
1431 if (saddr != naddr)
1432 n++;
1433 }
1434
1435 ASSERT(tmp == NULL);
1436 }
1437
1438 return (n);
1439 }
1440
1441 /*
1442 * Convert uint32_t to decimal string w/o leading zeros.
1443 * Add trailing null characters if 'len' is greater than string length.
1444 * Return the string length.
1445 */
1446 int
pr_u32tos(uint32_t n,char * s,int len)1447 pr_u32tos(uint32_t n, char *s, int len)
1448 {
1449 char cbuf[11]; /* 32-bit unsigned integer fits in 10 digits */
1450 char *cp = cbuf;
1451 char *end = s + len;
1452
1453 do {
1454 *cp++ = (char)(n % 10 + '0');
1455 n /= 10;
1456 } while (n);
1457
1458 len = (int)(cp - cbuf);
1459
1460 do {
1461 *s++ = *--cp;
1462 } while (cp > cbuf);
1463
1464 while (s < end) /* optional pad */
1465 *s++ = '\0';
1466
1467 return (len);
1468 }
1469
1470 /*
1471 * Convert uint64_t to decimal string w/o leading zeros.
1472 * Return the string length.
1473 */
1474 static int
pr_u64tos(uint64_t n,char * s)1475 pr_u64tos(uint64_t n, char *s)
1476 {
1477 char cbuf[21]; /* 64-bit unsigned integer fits in 20 digits */
1478 char *cp = cbuf;
1479 int len;
1480
1481 do {
1482 *cp++ = (char)(n % 10 + '0');
1483 n /= 10;
1484 } while (n);
1485
1486 len = (int)(cp - cbuf);
1487
1488 do {
1489 *s++ = *--cp;
1490 } while (cp > cbuf);
1491
1492 return (len);
1493 }
1494
1495 file_t *
pr_getf(proc_t * p,uint_t fd,short * flag)1496 pr_getf(proc_t *p, uint_t fd, short *flag)
1497 {
1498 uf_entry_t *ufp;
1499 uf_info_t *fip;
1500 file_t *fp;
1501
1502 ASSERT(MUTEX_HELD(&p->p_lock) && (p->p_proc_flag & P_PR_LOCK));
1503
1504 fip = P_FINFO(p);
1505
1506 if (fd >= fip->fi_nfiles)
1507 return (NULL);
1508
1509 mutex_exit(&p->p_lock);
1510 mutex_enter(&fip->fi_lock);
1511 UF_ENTER(ufp, fip, fd);
1512 if ((fp = ufp->uf_file) != NULL && fp->f_count > 0) {
1513 if (flag != NULL)
1514 *flag = ufp->uf_flag;
1515 ufp->uf_refcnt++;
1516 } else {
1517 fp = NULL;
1518 }
1519 UF_EXIT(ufp);
1520 mutex_exit(&fip->fi_lock);
1521 mutex_enter(&p->p_lock);
1522
1523 return (fp);
1524 }
1525
1526 void
pr_releasef(proc_t * p,uint_t fd)1527 pr_releasef(proc_t *p, uint_t fd)
1528 {
1529 uf_entry_t *ufp;
1530 uf_info_t *fip;
1531
1532 ASSERT(MUTEX_HELD(&p->p_lock) && (p->p_proc_flag & P_PR_LOCK));
1533
1534 fip = P_FINFO(p);
1535
1536 mutex_exit(&p->p_lock);
1537 mutex_enter(&fip->fi_lock);
1538 UF_ENTER(ufp, fip, fd);
1539 ASSERT3U(ufp->uf_refcnt, >, 0);
1540 ufp->uf_refcnt--;
1541 UF_EXIT(ufp);
1542 mutex_exit(&fip->fi_lock);
1543 mutex_enter(&p->p_lock);
1544 }
1545
1546 void
pr_object_name(char * name,vnode_t * vp,struct vattr * vattr)1547 pr_object_name(char *name, vnode_t *vp, struct vattr *vattr)
1548 {
1549 char *s = name;
1550 struct vfs *vfsp;
1551 struct vfssw *vfsswp;
1552
1553 if ((vfsp = vp->v_vfsp) != NULL &&
1554 ((vfsswp = vfssw + vfsp->vfs_fstype), vfsswp->vsw_name) &&
1555 *vfsswp->vsw_name) {
1556 (void) strcpy(s, vfsswp->vsw_name);
1557 s += strlen(s);
1558 *s++ = '.';
1559 }
1560 s += pr_u32tos(getmajor(vattr->va_fsid), s, 0);
1561 *s++ = '.';
1562 s += pr_u32tos(getminor(vattr->va_fsid), s, 0);
1563 *s++ = '.';
1564 s += pr_u64tos(vattr->va_nodeid, s);
1565 *s++ = '\0';
1566 }
1567
1568 struct seg *
break_seg(proc_t * p)1569 break_seg(proc_t *p)
1570 {
1571 caddr_t addr = p->p_brkbase;
1572 struct seg *seg;
1573 struct vnode *vp;
1574
1575 if (p->p_brksize != 0)
1576 addr += p->p_brksize - 1;
1577 seg = as_segat(p->p_as, addr);
1578 if (seg != NULL && seg->s_ops == &segvn_ops &&
1579 (SEGOP_GETVP(seg, seg->s_base, &vp) != 0 || vp == NULL))
1580 return (seg);
1581 return (NULL);
1582 }
1583
1584 /*
1585 * Implementation of service functions to handle procfs generic chained
1586 * copyout buffers.
1587 */
1588 typedef struct pr_iobuf_list {
1589 list_node_t piol_link; /* buffer linkage */
1590 size_t piol_size; /* total size (header + data) */
1591 size_t piol_usedsize; /* amount to copy out from this buf */
1592 } piol_t;
1593
1594 #define MAPSIZE (64 * 1024)
1595 #define PIOL_DATABUF(iol) ((void *)(&(iol)[1]))
1596
1597 void
pr_iol_initlist(list_t * iolhead,size_t itemsize,int n)1598 pr_iol_initlist(list_t *iolhead, size_t itemsize, int n)
1599 {
1600 piol_t *iol;
1601 size_t initial_size = MIN(1, n) * itemsize;
1602
1603 list_create(iolhead, sizeof (piol_t), offsetof(piol_t, piol_link));
1604
1605 ASSERT(list_head(iolhead) == NULL);
1606 ASSERT(itemsize < MAPSIZE - sizeof (*iol));
1607 ASSERT(initial_size > 0);
1608
1609 /*
1610 * Someone creating chained copyout buffers may ask for less than
1611 * MAPSIZE if the amount of data to be buffered is known to be
1612 * smaller than that.
1613 * But in order to prevent involuntary self-denial of service,
1614 * the requested input size is clamped at MAPSIZE.
1615 */
1616 initial_size = MIN(MAPSIZE, initial_size + sizeof (*iol));
1617 iol = kmem_alloc(initial_size, KM_SLEEP);
1618 list_insert_head(iolhead, iol);
1619 iol->piol_usedsize = 0;
1620 iol->piol_size = initial_size;
1621 }
1622
1623 void *
pr_iol_newbuf(list_t * iolhead,size_t itemsize)1624 pr_iol_newbuf(list_t *iolhead, size_t itemsize)
1625 {
1626 piol_t *iol;
1627 char *new;
1628
1629 ASSERT(itemsize < MAPSIZE - sizeof (*iol));
1630 ASSERT(list_head(iolhead) != NULL);
1631
1632 iol = (piol_t *)list_tail(iolhead);
1633
1634 if (iol->piol_size <
1635 iol->piol_usedsize + sizeof (*iol) + itemsize) {
1636 /*
1637 * Out of space in the current buffer. Allocate more.
1638 */
1639 piol_t *newiol;
1640
1641 newiol = kmem_alloc(MAPSIZE, KM_SLEEP);
1642 newiol->piol_size = MAPSIZE;
1643 newiol->piol_usedsize = 0;
1644
1645 list_insert_after(iolhead, iol, newiol);
1646 iol = list_next(iolhead, iol);
1647 ASSERT(iol == newiol);
1648 }
1649 new = (char *)PIOL_DATABUF(iol) + iol->piol_usedsize;
1650 iol->piol_usedsize += itemsize;
1651 bzero(new, itemsize);
1652 return (new);
1653 }
1654
1655 void
pr_iol_freelist(list_t * iolhead)1656 pr_iol_freelist(list_t *iolhead)
1657 {
1658 piol_t *iol;
1659
1660 while ((iol = list_head(iolhead)) != NULL) {
1661 list_remove(iolhead, iol);
1662 kmem_free(iol, iol->piol_size);
1663 }
1664 list_destroy(iolhead);
1665 }
1666
1667 int
pr_iol_copyout_and_free(list_t * iolhead,caddr_t * tgt,int errin)1668 pr_iol_copyout_and_free(list_t *iolhead, caddr_t *tgt, int errin)
1669 {
1670 int error = errin;
1671 piol_t *iol;
1672
1673 while ((iol = list_head(iolhead)) != NULL) {
1674 list_remove(iolhead, iol);
1675 if (!error) {
1676 if (copyout(PIOL_DATABUF(iol), *tgt,
1677 iol->piol_usedsize))
1678 error = EFAULT;
1679 *tgt += iol->piol_usedsize;
1680 }
1681 kmem_free(iol, iol->piol_size);
1682 }
1683 list_destroy(iolhead);
1684
1685 return (error);
1686 }
1687
1688 int
pr_iol_uiomove_and_free(list_t * iolhead,uio_t * uiop,int errin)1689 pr_iol_uiomove_and_free(list_t *iolhead, uio_t *uiop, int errin)
1690 {
1691 offset_t off = uiop->uio_offset;
1692 char *base;
1693 size_t size;
1694 piol_t *iol;
1695 int error = errin;
1696
1697 while ((iol = list_head(iolhead)) != NULL) {
1698 list_remove(iolhead, iol);
1699 base = PIOL_DATABUF(iol);
1700 size = iol->piol_usedsize;
1701 if (off <= size && error == 0 && uiop->uio_resid > 0)
1702 error = uiomove(base + off, size - off,
1703 UIO_READ, uiop);
1704 off = MAX(0, off - (offset_t)size);
1705 kmem_free(iol, iol->piol_size);
1706 }
1707 list_destroy(iolhead);
1708
1709 return (error);
1710 }
1711
1712 /*
1713 * Return an array of structures with memory map information.
1714 * We allocate here; the caller must deallocate.
1715 */
1716 int
prgetmap(proc_t * p,int reserved,list_t * iolhead)1717 prgetmap(proc_t *p, int reserved, list_t *iolhead)
1718 {
1719 struct as *as = p->p_as;
1720 prmap_t *mp;
1721 struct seg *seg;
1722 struct seg *brkseg, *stkseg;
1723 struct vnode *vp;
1724 struct vattr vattr;
1725 uint_t prot;
1726
1727 ASSERT(as != &kas && AS_WRITE_HELD(as));
1728
1729 /*
1730 * Request an initial buffer size that doesn't waste memory
1731 * if the address space has only a small number of segments.
1732 */
1733 pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree));
1734
1735 if ((seg = AS_SEGFIRST(as)) == NULL)
1736 return (0);
1737
1738 brkseg = break_seg(p);
1739 stkseg = as_segat(as, prgetstackbase(p));
1740
1741 do {
1742 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, reserved);
1743 caddr_t saddr, naddr;
1744 void *tmp = NULL;
1745
1746 if ((seg->s_flags & S_HOLE) != 0) {
1747 continue;
1748 }
1749
1750 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1751 prot = pr_getprot(seg, reserved, &tmp,
1752 &saddr, &naddr, eaddr);
1753 if (saddr == naddr)
1754 continue;
1755
1756 mp = pr_iol_newbuf(iolhead, sizeof (*mp));
1757
1758 mp->pr_vaddr = (uintptr_t)saddr;
1759 mp->pr_size = naddr - saddr;
1760 mp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
1761 mp->pr_mflags = 0;
1762 if (prot & PROT_READ)
1763 mp->pr_mflags |= MA_READ;
1764 if (prot & PROT_WRITE)
1765 mp->pr_mflags |= MA_WRITE;
1766 if (prot & PROT_EXEC)
1767 mp->pr_mflags |= MA_EXEC;
1768 if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
1769 mp->pr_mflags |= MA_SHARED;
1770 if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
1771 mp->pr_mflags |= MA_NORESERVE;
1772 if (seg->s_ops == &segspt_shmops ||
1773 (seg->s_ops == &segvn_ops &&
1774 (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL)))
1775 mp->pr_mflags |= MA_ANON;
1776 if (seg == brkseg)
1777 mp->pr_mflags |= MA_BREAK;
1778 else if (seg == stkseg) {
1779 mp->pr_mflags |= MA_STACK;
1780 if (reserved) {
1781 size_t maxstack =
1782 ((size_t)p->p_stk_ctl +
1783 PAGEOFFSET) & PAGEMASK;
1784 mp->pr_vaddr =
1785 (uintptr_t)prgetstackbase(p) +
1786 p->p_stksize - maxstack;
1787 mp->pr_size = (uintptr_t)naddr -
1788 mp->pr_vaddr;
1789 }
1790 }
1791 if (seg->s_ops == &segspt_shmops)
1792 mp->pr_mflags |= MA_ISM | MA_SHM;
1793 mp->pr_pagesize = PAGESIZE;
1794
1795 /*
1796 * Manufacture a filename for the "object" directory.
1797 */
1798 vattr.va_mask = AT_FSID|AT_NODEID;
1799 if (seg->s_ops == &segvn_ops &&
1800 SEGOP_GETVP(seg, saddr, &vp) == 0 &&
1801 vp != NULL && vp->v_type == VREG &&
1802 VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
1803 if (vp == p->p_exec)
1804 (void) strcpy(mp->pr_mapname, "a.out");
1805 else
1806 pr_object_name(mp->pr_mapname,
1807 vp, &vattr);
1808 }
1809
1810 /*
1811 * Get the SysV shared memory id, if any.
1812 */
1813 if ((mp->pr_mflags & MA_SHARED) && p->p_segacct &&
1814 (mp->pr_shmid = shmgetid(p, seg->s_base)) !=
1815 SHMID_NONE) {
1816 if (mp->pr_shmid == SHMID_FREE)
1817 mp->pr_shmid = -1;
1818
1819 mp->pr_mflags |= MA_SHM;
1820 } else {
1821 mp->pr_shmid = -1;
1822 }
1823 }
1824 ASSERT(tmp == NULL);
1825 } while ((seg = AS_SEGNEXT(as, seg)) != NULL);
1826
1827 return (0);
1828 }
1829
1830 #ifdef _SYSCALL32_IMPL
1831 int
prgetmap32(proc_t * p,int reserved,list_t * iolhead)1832 prgetmap32(proc_t *p, int reserved, list_t *iolhead)
1833 {
1834 struct as *as = p->p_as;
1835 prmap32_t *mp;
1836 struct seg *seg;
1837 struct seg *brkseg, *stkseg;
1838 struct vnode *vp;
1839 struct vattr vattr;
1840 uint_t prot;
1841
1842 ASSERT(as != &kas && AS_WRITE_HELD(as));
1843
1844 /*
1845 * Request an initial buffer size that doesn't waste memory
1846 * if the address space has only a small number of segments.
1847 */
1848 pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree));
1849
1850 if ((seg = AS_SEGFIRST(as)) == NULL)
1851 return (0);
1852
1853 brkseg = break_seg(p);
1854 stkseg = as_segat(as, prgetstackbase(p));
1855
1856 do {
1857 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, reserved);
1858 caddr_t saddr, naddr;
1859 void *tmp = NULL;
1860
1861 if ((seg->s_flags & S_HOLE) != 0) {
1862 continue;
1863 }
1864
1865 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1866 prot = pr_getprot(seg, reserved, &tmp,
1867 &saddr, &naddr, eaddr);
1868 if (saddr == naddr)
1869 continue;
1870
1871 mp = pr_iol_newbuf(iolhead, sizeof (*mp));
1872
1873 mp->pr_vaddr = (caddr32_t)(uintptr_t)saddr;
1874 mp->pr_size = (size32_t)(naddr - saddr);
1875 mp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
1876 mp->pr_mflags = 0;
1877 if (prot & PROT_READ)
1878 mp->pr_mflags |= MA_READ;
1879 if (prot & PROT_WRITE)
1880 mp->pr_mflags |= MA_WRITE;
1881 if (prot & PROT_EXEC)
1882 mp->pr_mflags |= MA_EXEC;
1883 if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
1884 mp->pr_mflags |= MA_SHARED;
1885 if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
1886 mp->pr_mflags |= MA_NORESERVE;
1887 if (seg->s_ops == &segspt_shmops ||
1888 (seg->s_ops == &segvn_ops &&
1889 (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL)))
1890 mp->pr_mflags |= MA_ANON;
1891 if (seg == brkseg)
1892 mp->pr_mflags |= MA_BREAK;
1893 else if (seg == stkseg) {
1894 mp->pr_mflags |= MA_STACK;
1895 if (reserved) {
1896 size_t maxstack =
1897 ((size_t)p->p_stk_ctl +
1898 PAGEOFFSET) & PAGEMASK;
1899 uintptr_t vaddr =
1900 (uintptr_t)prgetstackbase(p) +
1901 p->p_stksize - maxstack;
1902 mp->pr_vaddr = (caddr32_t)vaddr;
1903 mp->pr_size = (size32_t)
1904 ((uintptr_t)naddr - vaddr);
1905 }
1906 }
1907 if (seg->s_ops == &segspt_shmops)
1908 mp->pr_mflags |= MA_ISM | MA_SHM;
1909 mp->pr_pagesize = PAGESIZE;
1910
1911 /*
1912 * Manufacture a filename for the "object" directory.
1913 */
1914 vattr.va_mask = AT_FSID|AT_NODEID;
1915 if (seg->s_ops == &segvn_ops &&
1916 SEGOP_GETVP(seg, saddr, &vp) == 0 &&
1917 vp != NULL && vp->v_type == VREG &&
1918 VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
1919 if (vp == p->p_exec)
1920 (void) strcpy(mp->pr_mapname, "a.out");
1921 else
1922 pr_object_name(mp->pr_mapname,
1923 vp, &vattr);
1924 }
1925
1926 /*
1927 * Get the SysV shared memory id, if any.
1928 */
1929 if ((mp->pr_mflags & MA_SHARED) && p->p_segacct &&
1930 (mp->pr_shmid = shmgetid(p, seg->s_base)) !=
1931 SHMID_NONE) {
1932 if (mp->pr_shmid == SHMID_FREE)
1933 mp->pr_shmid = -1;
1934
1935 mp->pr_mflags |= MA_SHM;
1936 } else {
1937 mp->pr_shmid = -1;
1938 }
1939 }
1940 ASSERT(tmp == NULL);
1941 } while ((seg = AS_SEGNEXT(as, seg)) != NULL);
1942
1943 return (0);
1944 }
1945 #endif /* _SYSCALL32_IMPL */
1946
1947 /*
1948 * Return the size of the /proc page data file.
1949 */
1950 size_t
prpdsize(struct as * as)1951 prpdsize(struct as *as)
1952 {
1953 struct seg *seg;
1954 size_t size;
1955
1956 ASSERT(as != &kas && AS_WRITE_HELD(as));
1957
1958 if ((seg = AS_SEGFIRST(as)) == NULL)
1959 return (0);
1960
1961 size = sizeof (prpageheader_t);
1962 do {
1963 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
1964 caddr_t saddr, naddr;
1965 void *tmp = NULL;
1966 size_t npage;
1967
1968 if ((seg->s_flags & S_HOLE) != 0) {
1969 continue;
1970 }
1971
1972 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
1973 (void) pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr);
1974 if ((npage = (naddr - saddr) / PAGESIZE) != 0)
1975 size += sizeof (prasmap_t) + round8(npage);
1976 }
1977 ASSERT(tmp == NULL);
1978 } while ((seg = AS_SEGNEXT(as, seg)) != NULL);
1979
1980 return (size);
1981 }
1982
1983 #ifdef _SYSCALL32_IMPL
1984 size_t
prpdsize32(struct as * as)1985 prpdsize32(struct as *as)
1986 {
1987 struct seg *seg;
1988 size_t size;
1989
1990 ASSERT(as != &kas && AS_WRITE_HELD(as));
1991
1992 if ((seg = AS_SEGFIRST(as)) == NULL)
1993 return (0);
1994
1995 size = sizeof (prpageheader32_t);
1996 do {
1997 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
1998 caddr_t saddr, naddr;
1999 void *tmp = NULL;
2000 size_t npage;
2001
2002 if ((seg->s_flags & S_HOLE) != 0) {
2003 continue;
2004 }
2005
2006 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
2007 (void) pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr);
2008 if ((npage = (naddr - saddr) / PAGESIZE) != 0)
2009 size += sizeof (prasmap32_t) + round8(npage);
2010 }
2011 ASSERT(tmp == NULL);
2012 } while ((seg = AS_SEGNEXT(as, seg)) != NULL);
2013
2014 return (size);
2015 }
2016 #endif /* _SYSCALL32_IMPL */
2017
2018 /*
2019 * Read page data information.
2020 */
2021 int
prpdread(proc_t * p,uint_t hatid,struct uio * uiop)2022 prpdread(proc_t *p, uint_t hatid, struct uio *uiop)
2023 {
2024 struct as *as = p->p_as;
2025 caddr_t buf;
2026 size_t size;
2027 prpageheader_t *php;
2028 prasmap_t *pmp;
2029 struct seg *seg;
2030 int error;
2031
2032 again:
2033 AS_LOCK_ENTER(as, RW_WRITER);
2034
2035 if ((seg = AS_SEGFIRST(as)) == NULL) {
2036 AS_LOCK_EXIT(as);
2037 return (0);
2038 }
2039 size = prpdsize(as);
2040 if (uiop->uio_resid < size) {
2041 AS_LOCK_EXIT(as);
2042 return (E2BIG);
2043 }
2044
2045 buf = kmem_zalloc(size, KM_SLEEP);
2046 php = (prpageheader_t *)buf;
2047 pmp = (prasmap_t *)(buf + sizeof (prpageheader_t));
2048
2049 hrt2ts(gethrtime(), &php->pr_tstamp);
2050 php->pr_nmap = 0;
2051 php->pr_npage = 0;
2052 do {
2053 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
2054 caddr_t saddr, naddr;
2055 void *tmp = NULL;
2056
2057 if ((seg->s_flags & S_HOLE) != 0) {
2058 continue;
2059 }
2060
2061 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
2062 struct vnode *vp;
2063 struct vattr vattr;
2064 size_t len;
2065 size_t npage;
2066 uint_t prot;
2067 uintptr_t next;
2068
2069 prot = pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr);
2070 if ((len = (size_t)(naddr - saddr)) == 0)
2071 continue;
2072 npage = len / PAGESIZE;
2073 next = (uintptr_t)(pmp + 1) + round8(npage);
2074 /*
2075 * It's possible that the address space can change
2076 * subtlely even though we're holding as->a_lock
2077 * due to the nondeterminism of page_exists() in
2078 * the presence of asychronously flushed pages or
2079 * mapped files whose sizes are changing.
2080 * page_exists() may be called indirectly from
2081 * pr_getprot() by a SEGOP_INCORE() routine.
2082 * If this happens we need to make sure we don't
2083 * overrun the buffer whose size we computed based
2084 * on the initial iteration through the segments.
2085 * Once we've detected an overflow, we need to clean
2086 * up the temporary memory allocated in pr_getprot()
2087 * and retry. If there's a pending signal, we return
2088 * EINTR so that this thread can be dislodged if
2089 * a latent bug causes us to spin indefinitely.
2090 */
2091 if (next > (uintptr_t)buf + size) {
2092 pr_getprot_done(&tmp);
2093 AS_LOCK_EXIT(as);
2094
2095 kmem_free(buf, size);
2096
2097 if (ISSIG(curthread, JUSTLOOKING))
2098 return (EINTR);
2099
2100 goto again;
2101 }
2102
2103 php->pr_nmap++;
2104 php->pr_npage += npage;
2105 pmp->pr_vaddr = (uintptr_t)saddr;
2106 pmp->pr_npage = npage;
2107 pmp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
2108 pmp->pr_mflags = 0;
2109 if (prot & PROT_READ)
2110 pmp->pr_mflags |= MA_READ;
2111 if (prot & PROT_WRITE)
2112 pmp->pr_mflags |= MA_WRITE;
2113 if (prot & PROT_EXEC)
2114 pmp->pr_mflags |= MA_EXEC;
2115 if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
2116 pmp->pr_mflags |= MA_SHARED;
2117 if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
2118 pmp->pr_mflags |= MA_NORESERVE;
2119 if (seg->s_ops == &segspt_shmops ||
2120 (seg->s_ops == &segvn_ops &&
2121 (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL)))
2122 pmp->pr_mflags |= MA_ANON;
2123 if (seg->s_ops == &segspt_shmops)
2124 pmp->pr_mflags |= MA_ISM | MA_SHM;
2125 pmp->pr_pagesize = PAGESIZE;
2126 /*
2127 * Manufacture a filename for the "object" directory.
2128 */
2129 vattr.va_mask = AT_FSID|AT_NODEID;
2130 if (seg->s_ops == &segvn_ops &&
2131 SEGOP_GETVP(seg, saddr, &vp) == 0 &&
2132 vp != NULL && vp->v_type == VREG &&
2133 VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
2134 if (vp == p->p_exec)
2135 (void) strcpy(pmp->pr_mapname, "a.out");
2136 else
2137 pr_object_name(pmp->pr_mapname,
2138 vp, &vattr);
2139 }
2140
2141 /*
2142 * Get the SysV shared memory id, if any.
2143 */
2144 if ((pmp->pr_mflags & MA_SHARED) && p->p_segacct &&
2145 (pmp->pr_shmid = shmgetid(p, seg->s_base)) !=
2146 SHMID_NONE) {
2147 if (pmp->pr_shmid == SHMID_FREE)
2148 pmp->pr_shmid = -1;
2149
2150 pmp->pr_mflags |= MA_SHM;
2151 } else {
2152 pmp->pr_shmid = -1;
2153 }
2154
2155 hat_getstat(as, saddr, len, hatid,
2156 (char *)(pmp + 1), HAT_SYNC_ZERORM);
2157 pmp = (prasmap_t *)next;
2158 }
2159 ASSERT(tmp == NULL);
2160 } while ((seg = AS_SEGNEXT(as, seg)) != NULL);
2161
2162 AS_LOCK_EXIT(as);
2163
2164 ASSERT((uintptr_t)pmp <= (uintptr_t)buf + size);
2165 error = uiomove(buf, (caddr_t)pmp - buf, UIO_READ, uiop);
2166 kmem_free(buf, size);
2167
2168 return (error);
2169 }
2170
2171 #ifdef _SYSCALL32_IMPL
2172 int
prpdread32(proc_t * p,uint_t hatid,struct uio * uiop)2173 prpdread32(proc_t *p, uint_t hatid, struct uio *uiop)
2174 {
2175 struct as *as = p->p_as;
2176 caddr_t buf;
2177 size_t size;
2178 prpageheader32_t *php;
2179 prasmap32_t *pmp;
2180 struct seg *seg;
2181 int error;
2182
2183 again:
2184 AS_LOCK_ENTER(as, RW_WRITER);
2185
2186 if ((seg = AS_SEGFIRST(as)) == NULL) {
2187 AS_LOCK_EXIT(as);
2188 return (0);
2189 }
2190 size = prpdsize32(as);
2191 if (uiop->uio_resid < size) {
2192 AS_LOCK_EXIT(as);
2193 return (E2BIG);
2194 }
2195
2196 buf = kmem_zalloc(size, KM_SLEEP);
2197 php = (prpageheader32_t *)buf;
2198 pmp = (prasmap32_t *)(buf + sizeof (prpageheader32_t));
2199
2200 hrt2ts32(gethrtime(), &php->pr_tstamp);
2201 php->pr_nmap = 0;
2202 php->pr_npage = 0;
2203 do {
2204 caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0);
2205 caddr_t saddr, naddr;
2206 void *tmp = NULL;
2207
2208 if ((seg->s_flags & S_HOLE) != 0) {
2209 continue;
2210 }
2211
2212 for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) {
2213 struct vnode *vp;
2214 struct vattr vattr;
2215 size_t len;
2216 size_t npage;
2217 uint_t prot;
2218 uintptr_t next;
2219
2220 prot = pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr);
2221 if ((len = (size_t)(naddr - saddr)) == 0)
2222 continue;
2223 npage = len / PAGESIZE;
2224 next = (uintptr_t)(pmp + 1) + round8(npage);
2225 /*
2226 * It's possible that the address space can change
2227 * subtlely even though we're holding as->a_lock
2228 * due to the nondeterminism of page_exists() in
2229 * the presence of asychronously flushed pages or
2230 * mapped files whose sizes are changing.
2231 * page_exists() may be called indirectly from
2232 * pr_getprot() by a SEGOP_INCORE() routine.
2233 * If this happens we need to make sure we don't
2234 * overrun the buffer whose size we computed based
2235 * on the initial iteration through the segments.
2236 * Once we've detected an overflow, we need to clean
2237 * up the temporary memory allocated in pr_getprot()
2238 * and retry. If there's a pending signal, we return
2239 * EINTR so that this thread can be dislodged if
2240 * a latent bug causes us to spin indefinitely.
2241 */
2242 if (next > (uintptr_t)buf + size) {
2243 pr_getprot_done(&tmp);
2244 AS_LOCK_EXIT(as);
2245
2246 kmem_free(buf, size);
2247
2248 if (ISSIG(curthread, JUSTLOOKING))
2249 return (EINTR);
2250
2251 goto again;
2252 }
2253
2254 php->pr_nmap++;
2255 php->pr_npage += npage;
2256 pmp->pr_vaddr = (caddr32_t)(uintptr_t)saddr;
2257 pmp->pr_npage = (size32_t)npage;
2258 pmp->pr_offset = SEGOP_GETOFFSET(seg, saddr);
2259 pmp->pr_mflags = 0;
2260 if (prot & PROT_READ)
2261 pmp->pr_mflags |= MA_READ;
2262 if (prot & PROT_WRITE)
2263 pmp->pr_mflags |= MA_WRITE;
2264 if (prot & PROT_EXEC)
2265 pmp->pr_mflags |= MA_EXEC;
2266 if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED)
2267 pmp->pr_mflags |= MA_SHARED;
2268 if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE)
2269 pmp->pr_mflags |= MA_NORESERVE;
2270 if (seg->s_ops == &segspt_shmops ||
2271 (seg->s_ops == &segvn_ops &&
2272 (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL)))
2273 pmp->pr_mflags |= MA_ANON;
2274 if (seg->s_ops == &segspt_shmops)
2275 pmp->pr_mflags |= MA_ISM | MA_SHM;
2276 pmp->pr_pagesize = PAGESIZE;
2277 /*
2278 * Manufacture a filename for the "object" directory.
2279 */
2280 vattr.va_mask = AT_FSID|AT_NODEID;
2281 if (seg->s_ops == &segvn_ops &&
2282 SEGOP_GETVP(seg, saddr, &vp) == 0 &&
2283 vp != NULL && vp->v_type == VREG &&
2284 VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) {
2285 if (vp == p->p_exec)
2286 (void) strcpy(pmp->pr_mapname, "a.out");
2287 else
2288 pr_object_name(pmp->pr_mapname,
2289 vp, &vattr);
2290 }
2291
2292 /*
2293 * Get the SysV shared memory id, if any.
2294 */
2295 if ((pmp->pr_mflags & MA_SHARED) && p->p_segacct &&
2296 (pmp->pr_shmid = shmgetid(p, seg->s_base)) !=
2297 SHMID_NONE) {
2298 if (pmp->pr_shmid == SHMID_FREE)
2299 pmp->pr_shmid = -1;
2300
2301 pmp->pr_mflags |= MA_SHM;
2302 } else {
2303 pmp->pr_shmid = -1;
2304 }
2305
2306 hat_getstat(as, saddr, len, hatid,
2307 (char *)(pmp + 1), HAT_SYNC_ZERORM);
2308 pmp = (prasmap32_t *)next;
2309 }
2310 ASSERT(tmp == NULL);
2311 } while ((seg = AS_SEGNEXT(as, seg)) != NULL);
2312
2313 AS_LOCK_EXIT(as);
2314
2315 ASSERT((uintptr_t)pmp <= (uintptr_t)buf + size);
2316 error = uiomove(buf, (caddr_t)pmp - buf, UIO_READ, uiop);
2317 kmem_free(buf, size);
2318
2319 return (error);
2320 }
2321 #endif /* _SYSCALL32_IMPL */
2322
2323 ushort_t
prgetpctcpu(uint64_t pct)2324 prgetpctcpu(uint64_t pct)
2325 {
2326 /*
2327 * The value returned will be relevant in the zone of the examiner,
2328 * which may not be the same as the zone which performed the procfs
2329 * mount.
2330 */
2331 int nonline = zone_ncpus_online_get(curproc->p_zone);
2332
2333 /*
2334 * Prorate over online cpus so we don't exceed 100%
2335 */
2336 if (nonline > 1)
2337 pct /= nonline;
2338 pct >>= 16; /* convert to 16-bit scaled integer */
2339 if (pct > 0x8000) /* might happen, due to rounding */
2340 pct = 0x8000;
2341 return ((ushort_t)pct);
2342 }
2343
2344 /*
2345 * Return information used by ps(1).
2346 */
2347 void
prgetpsinfo(proc_t * p,psinfo_t * psp)2348 prgetpsinfo(proc_t *p, psinfo_t *psp)
2349 {
2350 kthread_t *t;
2351 struct cred *cred;
2352 hrtime_t hrutime, hrstime;
2353
2354 ASSERT(MUTEX_HELD(&p->p_lock));
2355
2356 if ((t = prchoose(p)) == NULL) /* returns locked thread */
2357 bzero(psp, sizeof (*psp));
2358 else {
2359 thread_unlock(t);
2360 bzero(psp, sizeof (*psp) - sizeof (psp->pr_lwp));
2361 }
2362
2363 /*
2364 * only export SSYS and SMSACCT; everything else is off-limits to
2365 * userland apps.
2366 */
2367 psp->pr_flag = p->p_flag & (SSYS | SMSACCT);
2368 psp->pr_nlwp = p->p_lwpcnt;
2369 psp->pr_nzomb = p->p_zombcnt;
2370 mutex_enter(&p->p_crlock);
2371 cred = p->p_cred;
2372 psp->pr_uid = crgetruid(cred);
2373 psp->pr_euid = crgetuid(cred);
2374 psp->pr_gid = crgetrgid(cred);
2375 psp->pr_egid = crgetgid(cred);
2376 mutex_exit(&p->p_crlock);
2377 psp->pr_pid = p->p_pid;
2378 if (curproc->p_zone->zone_id != GLOBAL_ZONEID &&
2379 (p->p_flag & SZONETOP)) {
2380 ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID);
2381 /*
2382 * Inside local zones, fake zsched's pid as parent pids for
2383 * processes which reference processes outside of the zone.
2384 */
2385 psp->pr_ppid = curproc->p_zone->zone_zsched->p_pid;
2386 } else {
2387 psp->pr_ppid = p->p_ppid;
2388 }
2389 psp->pr_pgid = p->p_pgrp;
2390 psp->pr_sid = p->p_sessp->s_sid;
2391 psp->pr_taskid = p->p_task->tk_tkid;
2392 psp->pr_projid = p->p_task->tk_proj->kpj_id;
2393 psp->pr_poolid = p->p_pool->pool_id;
2394 psp->pr_zoneid = p->p_zone->zone_id;
2395 if ((psp->pr_contract = PRCTID(p)) == 0)
2396 psp->pr_contract = -1;
2397 psp->pr_addr = (uintptr_t)prgetpsaddr(p);
2398 switch (p->p_model) {
2399 case DATAMODEL_ILP32:
2400 psp->pr_dmodel = PR_MODEL_ILP32;
2401 break;
2402 case DATAMODEL_LP64:
2403 psp->pr_dmodel = PR_MODEL_LP64;
2404 break;
2405 }
2406 hrutime = mstate_aggr_state(p, LMS_USER);
2407 hrstime = mstate_aggr_state(p, LMS_SYSTEM);
2408 hrt2ts((hrutime + hrstime), &psp->pr_time);
2409 TICK_TO_TIMESTRUC(p->p_cutime + p->p_cstime, &psp->pr_ctime);
2410
2411 if (t == NULL) {
2412 int wcode = p->p_wcode; /* must be atomic read */
2413
2414 if (wcode)
2415 psp->pr_wstat = wstat(wcode, p->p_wdata);
2416 psp->pr_ttydev = PRNODEV;
2417 psp->pr_lwp.pr_state = SZOMB;
2418 psp->pr_lwp.pr_sname = 'Z';
2419 psp->pr_lwp.pr_bindpro = PBIND_NONE;
2420 psp->pr_lwp.pr_bindpset = PS_NONE;
2421 } else {
2422 user_t *up = PTOU(p);
2423 struct as *as;
2424 dev_t d;
2425 extern dev_t rwsconsdev, rconsdev, uconsdev;
2426
2427 d = cttydev(p);
2428 /*
2429 * If the controlling terminal is the real
2430 * or workstation console device, map to what the
2431 * user thinks is the console device. Handle case when
2432 * rwsconsdev or rconsdev is set to NODEV for Starfire.
2433 */
2434 if ((d == rwsconsdev || d == rconsdev) && d != NODEV)
2435 d = uconsdev;
2436 psp->pr_ttydev = (d == NODEV) ? PRNODEV : d;
2437 psp->pr_start = up->u_start;
2438 bcopy(up->u_comm, psp->pr_fname,
2439 MIN(sizeof (up->u_comm), sizeof (psp->pr_fname)-1));
2440 bcopy(up->u_psargs, psp->pr_psargs,
2441 MIN(PRARGSZ-1, PSARGSZ));
2442 psp->pr_argc = up->u_argc;
2443 psp->pr_argv = up->u_argv;
2444 psp->pr_envp = up->u_envp;
2445
2446 /* get the chosen lwp's lwpsinfo */
2447 prgetlwpsinfo(t, &psp->pr_lwp);
2448
2449 /* compute %cpu for the process */
2450 if (p->p_lwpcnt == 1)
2451 psp->pr_pctcpu = psp->pr_lwp.pr_pctcpu;
2452 else {
2453 uint64_t pct = 0;
2454 hrtime_t cur_time = gethrtime_unscaled();
2455
2456 t = p->p_tlist;
2457 do {
2458 pct += cpu_update_pct(t, cur_time);
2459 } while ((t = t->t_forw) != p->p_tlist);
2460
2461 psp->pr_pctcpu = prgetpctcpu(pct);
2462 }
2463 if ((p->p_flag & SSYS) || (as = p->p_as) == &kas) {
2464 psp->pr_size = 0;
2465 psp->pr_rssize = 0;
2466 } else {
2467 mutex_exit(&p->p_lock);
2468 AS_LOCK_ENTER(as, RW_READER);
2469 psp->pr_size = btopr(as->a_resvsize) *
2470 (PAGESIZE / 1024);
2471 psp->pr_rssize = rm_asrss(as) * (PAGESIZE / 1024);
2472 psp->pr_pctmem = rm_pctmemory(as);
2473 AS_LOCK_EXIT(as);
2474 mutex_enter(&p->p_lock);
2475 }
2476 }
2477 }
2478
2479 static size_t
prfdinfomisc(list_t * data,uint_t type,const void * val,size_t vlen)2480 prfdinfomisc(list_t *data, uint_t type, const void *val, size_t vlen)
2481 {
2482 pr_misc_header_t *misc;
2483 size_t len;
2484
2485 len = PRFDINFO_ROUNDUP(sizeof (*misc) + vlen);
2486
2487 if (data != NULL) {
2488 misc = pr_iol_newbuf(data, len);
2489 misc->pr_misc_type = type;
2490 misc->pr_misc_size = len;
2491 misc++;
2492 bcopy((char *)val, (char *)misc, vlen);
2493 }
2494
2495 return (len);
2496 }
2497
2498 /*
2499 * There's no elegant way to determine if a character device
2500 * supports TLI, so just check a hardcoded list of known TLI
2501 * devices.
2502 */
2503
2504 static boolean_t
pristli(vnode_t * vp)2505 pristli(vnode_t *vp)
2506 {
2507 static const char *tlidevs[] = {
2508 "udp", "udp6", "tcp", "tcp6"
2509 };
2510 char *devname;
2511 uint_t i;
2512
2513 ASSERT(vp != NULL);
2514
2515 if (vp->v_type != VCHR || vp->v_stream == NULL || vp->v_rdev == 0)
2516 return (B_FALSE);
2517
2518 if ((devname = mod_major_to_name(getmajor(vp->v_rdev))) == NULL)
2519 return (B_FALSE);
2520
2521 for (i = 0; i < ARRAY_SIZE(tlidevs); i++) {
2522 if (strcmp(devname, tlidevs[i]) == 0)
2523 return (B_TRUE);
2524 }
2525
2526 return (B_FALSE);
2527 }
2528
2529 static size_t
prfdinfopath(proc_t * p,vnode_t * vp,list_t * data,cred_t * cred)2530 prfdinfopath(proc_t *p, vnode_t *vp, list_t *data, cred_t *cred)
2531 {
2532 char *pathname;
2533 size_t pathlen;
2534 size_t sz = 0;
2535
2536 /*
2537 * The global zone's path to a file in a non-global zone can exceed
2538 * MAXPATHLEN.
2539 */
2540 pathlen = MAXPATHLEN * 2 + 1;
2541 pathname = kmem_alloc(pathlen, KM_SLEEP);
2542
2543 if (vnodetopath(NULL, vp, pathname, pathlen, cred) == 0) {
2544 sz += prfdinfomisc(data, PR_PATHNAME,
2545 pathname, strlen(pathname) + 1);
2546 }
2547
2548 kmem_free(pathname, pathlen);
2549
2550 return (sz);
2551 }
2552
2553 static size_t
prfdinfotlisockopt(vnode_t * vp,list_t * data,cred_t * cred)2554 prfdinfotlisockopt(vnode_t *vp, list_t *data, cred_t *cred)
2555 {
2556 strcmd_t strcmd;
2557 int32_t rval;
2558 size_t sz = 0;
2559
2560 strcmd.sc_cmd = TI_GETMYNAME;
2561 strcmd.sc_timeout = 1;
2562 strcmd.sc_len = STRCMDBUFSIZE;
2563
2564 if (VOP_IOCTL(vp, _I_CMD, (intptr_t)&strcmd, FKIOCTL, cred,
2565 &rval, NULL) == 0 && strcmd.sc_len > 0) {
2566 sz += prfdinfomisc(data, PR_SOCKETNAME, strcmd.sc_buf,
2567 strcmd.sc_len);
2568 }
2569
2570 strcmd.sc_cmd = TI_GETPEERNAME;
2571 strcmd.sc_timeout = 1;
2572 strcmd.sc_len = STRCMDBUFSIZE;
2573
2574 if (VOP_IOCTL(vp, _I_CMD, (intptr_t)&strcmd, FKIOCTL, cred,
2575 &rval, NULL) == 0 && strcmd.sc_len > 0) {
2576 sz += prfdinfomisc(data, PR_PEERSOCKNAME, strcmd.sc_buf,
2577 strcmd.sc_len);
2578 }
2579
2580 return (sz);
2581 }
2582
2583 static size_t
prfdinfosockopt(vnode_t * vp,list_t * data,cred_t * cred)2584 prfdinfosockopt(vnode_t *vp, list_t *data, cred_t *cred)
2585 {
2586 sonode_t *so;
2587 socklen_t vlen;
2588 size_t sz = 0;
2589 uint_t i;
2590
2591 if (vp->v_stream != NULL) {
2592 so = VTOSO(vp->v_stream->sd_vnode);
2593
2594 if (so->so_version == SOV_STREAM)
2595 so = NULL;
2596 } else {
2597 so = VTOSO(vp);
2598 }
2599
2600 if (so == NULL)
2601 return (0);
2602
2603 DTRACE_PROBE1(sonode, sonode_t *, so);
2604
2605 /* prmisc - PR_SOCKETNAME */
2606
2607 struct sockaddr_storage buf;
2608 struct sockaddr *name = (struct sockaddr *)&buf;
2609
2610 vlen = sizeof (buf);
2611 if (SOP_GETSOCKNAME(so, name, &vlen, cred) == 0 && vlen > 0)
2612 sz += prfdinfomisc(data, PR_SOCKETNAME, name, vlen);
2613
2614 /* prmisc - PR_PEERSOCKNAME */
2615
2616 vlen = sizeof (buf);
2617 if (SOP_GETPEERNAME(so, name, &vlen, B_FALSE, cred) == 0 && vlen > 0)
2618 sz += prfdinfomisc(data, PR_PEERSOCKNAME, name, vlen);
2619
2620 /* prmisc - PR_SOCKOPTS_BOOL_OPTS */
2621
2622 static struct boolopt {
2623 int level;
2624 int opt;
2625 int bopt;
2626 } boolopts[] = {
2627 { SOL_SOCKET, SO_DEBUG, PR_SO_DEBUG },
2628 { SOL_SOCKET, SO_REUSEADDR, PR_SO_REUSEADDR },
2629 #ifdef SO_REUSEPORT
2630 /* SmartOS and OmniOS have SO_REUSEPORT */
2631 { SOL_SOCKET, SO_REUSEPORT, PR_SO_REUSEPORT },
2632 #endif
2633 { SOL_SOCKET, SO_KEEPALIVE, PR_SO_KEEPALIVE },
2634 { SOL_SOCKET, SO_DONTROUTE, PR_SO_DONTROUTE },
2635 { SOL_SOCKET, SO_BROADCAST, PR_SO_BROADCAST },
2636 { SOL_SOCKET, SO_OOBINLINE, PR_SO_OOBINLINE },
2637 { SOL_SOCKET, SO_DGRAM_ERRIND, PR_SO_DGRAM_ERRIND },
2638 { SOL_SOCKET, SO_ALLZONES, PR_SO_ALLZONES },
2639 { SOL_SOCKET, SO_MAC_EXEMPT, PR_SO_MAC_EXEMPT },
2640 { SOL_SOCKET, SO_MAC_IMPLICIT, PR_SO_MAC_IMPLICIT },
2641 { SOL_SOCKET, SO_EXCLBIND, PR_SO_EXCLBIND },
2642 { SOL_SOCKET, SO_VRRP, PR_SO_VRRP },
2643 { IPPROTO_UDP, UDP_NAT_T_ENDPOINT,
2644 PR_UDP_NAT_T_ENDPOINT }
2645 };
2646 prsockopts_bool_opts_t opts;
2647 int val;
2648
2649 if (data != NULL) {
2650 opts.prsock_bool_opts = 0;
2651
2652 for (i = 0; i < ARRAY_SIZE(boolopts); i++) {
2653 vlen = sizeof (val);
2654 if (SOP_GETSOCKOPT(so, boolopts[i].level,
2655 boolopts[i].opt, &val, &vlen, 0, cred) == 0 &&
2656 val != 0) {
2657 opts.prsock_bool_opts |= boolopts[i].bopt;
2658 }
2659 }
2660 }
2661
2662 sz += prfdinfomisc(data, PR_SOCKOPTS_BOOL_OPTS, &opts, sizeof (opts));
2663
2664 /* prmisc - PR_SOCKOPT_LINGER */
2665
2666 struct linger l;
2667
2668 vlen = sizeof (l);
2669 if (SOP_GETSOCKOPT(so, SOL_SOCKET, SO_LINGER, &l, &vlen,
2670 0, cred) == 0 && vlen > 0) {
2671 sz += prfdinfomisc(data, PR_SOCKOPT_LINGER, &l, vlen);
2672 }
2673
2674 /* prmisc - PR_SOCKOPT_* int types */
2675
2676 static struct sopt {
2677 int level;
2678 int opt;
2679 int bopt;
2680 } sopts[] = {
2681 { SOL_SOCKET, SO_TYPE, PR_SOCKOPT_TYPE },
2682 { SOL_SOCKET, SO_SNDBUF, PR_SOCKOPT_SNDBUF },
2683 { SOL_SOCKET, SO_RCVBUF, PR_SOCKOPT_RCVBUF }
2684 };
2685
2686 for (i = 0; i < ARRAY_SIZE(sopts); i++) {
2687 vlen = sizeof (val);
2688 if (SOP_GETSOCKOPT(so, sopts[i].level, sopts[i].opt,
2689 &val, &vlen, 0, cred) == 0 && vlen > 0) {
2690 sz += prfdinfomisc(data, sopts[i].bopt, &val, vlen);
2691 }
2692 }
2693
2694 /* prmisc - PR_SOCKOPT_IP_NEXTHOP */
2695
2696 in_addr_t nexthop_val;
2697
2698 vlen = sizeof (nexthop_val);
2699 if (SOP_GETSOCKOPT(so, IPPROTO_IP, IP_NEXTHOP,
2700 &nexthop_val, &vlen, 0, cred) == 0 && vlen > 0) {
2701 sz += prfdinfomisc(data, PR_SOCKOPT_IP_NEXTHOP,
2702 &nexthop_val, vlen);
2703 }
2704
2705 /* prmisc - PR_SOCKOPT_IPV6_NEXTHOP */
2706
2707 struct sockaddr_in6 nexthop6_val;
2708
2709 vlen = sizeof (nexthop6_val);
2710 if (SOP_GETSOCKOPT(so, IPPROTO_IPV6, IPV6_NEXTHOP,
2711 &nexthop6_val, &vlen, 0, cred) == 0 && vlen > 0) {
2712 sz += prfdinfomisc(data, PR_SOCKOPT_IPV6_NEXTHOP,
2713 &nexthop6_val, vlen);
2714 }
2715
2716 /* prmisc - PR_SOCKOPT_TCP_CONGESTION */
2717
2718 char cong[CC_ALGO_NAME_MAX];
2719
2720 vlen = sizeof (cong);
2721 if (SOP_GETSOCKOPT(so, IPPROTO_TCP, TCP_CONGESTION,
2722 &cong, &vlen, 0, cred) == 0 && vlen > 0) {
2723 sz += prfdinfomisc(data, PR_SOCKOPT_TCP_CONGESTION, cong, vlen);
2724 }
2725
2726 /* prmisc - PR_SOCKFILTERS_PRIV */
2727
2728 struct fil_info fi;
2729
2730 vlen = sizeof (fi);
2731 if (SOP_GETSOCKOPT(so, SOL_FILTER, FIL_LIST,
2732 &fi, &vlen, 0, cred) == 0 && vlen != 0) {
2733 pr_misc_header_t *misc;
2734 size_t len;
2735
2736 /*
2737 * We limit the number of returned filters to 32.
2738 * This is the maximum number that pfiles will print
2739 * anyway.
2740 */
2741 vlen = MIN(32, fi.fi_pos + 1);
2742 vlen *= sizeof (fi);
2743
2744 len = PRFDINFO_ROUNDUP(sizeof (*misc) + vlen);
2745 sz += len;
2746
2747 if (data != NULL) {
2748 /*
2749 * So that the filter list can be built incrementally,
2750 * prfdinfomisc() is not used here. Instead we
2751 * allocate a buffer directly on the copyout list using
2752 * pr_iol_newbuf()
2753 */
2754 misc = pr_iol_newbuf(data, len);
2755 misc->pr_misc_type = PR_SOCKFILTERS_PRIV;
2756 misc->pr_misc_size = len;
2757 misc++;
2758 len = vlen;
2759 if (SOP_GETSOCKOPT(so, SOL_FILTER, FIL_LIST,
2760 misc, &vlen, 0, cred) == 0) {
2761 /*
2762 * In case the number of filters has reduced
2763 * since the first call, explicitly zero out
2764 * any unpopulated space.
2765 */
2766 if (vlen < len)
2767 bzero(misc + vlen, len - vlen);
2768 } else {
2769 /* Something went wrong, zero out the result */
2770 bzero(misc, vlen);
2771 }
2772 }
2773 }
2774
2775 return (sz);
2776 }
2777
2778 typedef struct prfdinfo_nm_path_cbdata {
2779 proc_t *nmp_p;
2780 u_offset_t nmp_sz;
2781 list_t *nmp_data;
2782 } prfdinfo_nm_path_cbdata_t;
2783
2784 static int
prfdinfo_nm_path(const struct namenode * np,cred_t * cred,void * arg)2785 prfdinfo_nm_path(const struct namenode *np, cred_t *cred, void *arg)
2786 {
2787 prfdinfo_nm_path_cbdata_t *cb = arg;
2788
2789 cb->nmp_sz += prfdinfopath(cb->nmp_p, np->nm_vnode, cb->nmp_data, cred);
2790
2791 return (0);
2792 }
2793
2794 u_offset_t
prgetfdinfosize(proc_t * p,vnode_t * vp,cred_t * cred)2795 prgetfdinfosize(proc_t *p, vnode_t *vp, cred_t *cred)
2796 {
2797 u_offset_t sz;
2798
2799 /*
2800 * All fdinfo files will be at least this big -
2801 * sizeof fdinfo struct + zero length trailer
2802 */
2803 sz = offsetof(prfdinfo_t, pr_misc) + sizeof (pr_misc_header_t);
2804
2805 /* Pathname */
2806 switch (vp->v_type) {
2807 case VDOOR: {
2808 prfdinfo_nm_path_cbdata_t cb = {
2809 .nmp_p = p,
2810 .nmp_data = NULL,
2811 .nmp_sz = 0
2812 };
2813
2814 (void) nm_walk_mounts(vp, prfdinfo_nm_path, cred, &cb);
2815 sz += cb.nmp_sz;
2816 break;
2817 }
2818 case VSOCK:
2819 break;
2820 default:
2821 sz += prfdinfopath(p, vp, NULL, cred);
2822 }
2823
2824 /* Socket options */
2825 if (vp->v_type == VSOCK)
2826 sz += prfdinfosockopt(vp, NULL, cred);
2827
2828 /* TLI/XTI sockets */
2829 if (pristli(vp))
2830 sz += prfdinfotlisockopt(vp, NULL, cred);
2831
2832 return (sz);
2833 }
2834
2835 int
prgetfdinfo(proc_t * p,vnode_t * vp,prfdinfo_t * fdinfo,cred_t * cred,cred_t * file_cred,list_t * data)2836 prgetfdinfo(proc_t *p, vnode_t *vp, prfdinfo_t *fdinfo, cred_t *cred,
2837 cred_t *file_cred, list_t *data)
2838 {
2839 vattr_t vattr;
2840 int error;
2841
2842 /*
2843 * The buffer has been initialised to zero by pr_iol_newbuf().
2844 * Initialise defaults for any values that should not default to zero.
2845 */
2846 fdinfo->pr_uid = (uid_t)-1;
2847 fdinfo->pr_gid = (gid_t)-1;
2848 fdinfo->pr_size = -1;
2849 fdinfo->pr_locktype = F_UNLCK;
2850 fdinfo->pr_lockpid = -1;
2851 fdinfo->pr_locksysid = -1;
2852 fdinfo->pr_peerpid = -1;
2853
2854 /* Offset */
2855
2856 /*
2857 * pr_offset has already been set from the underlying file_t.
2858 * Check if it is plausible and reset to -1 if not.
2859 */
2860 if (fdinfo->pr_offset != -1 &&
2861 VOP_SEEK(vp, 0, (offset_t *)&fdinfo->pr_offset, NULL) != 0)
2862 fdinfo->pr_offset = -1;
2863
2864 /*
2865 * Attributes
2866 *
2867 * We have two cred_t structures available here.
2868 * 'cred' is the caller's credential, and 'file_cred' is the credential
2869 * for the file being inspected.
2870 *
2871 * When looking up the file attributes, file_cred is used in order
2872 * that the correct ownership is set for doors and FIFOs. Since the
2873 * caller has permission to read the fdinfo file in proc, this does
2874 * not expose any additional information.
2875 */
2876 vattr.va_mask = AT_STAT;
2877 if (VOP_GETATTR(vp, &vattr, 0, file_cred, NULL) == 0) {
2878 fdinfo->pr_major = getmajor(vattr.va_fsid);
2879 fdinfo->pr_minor = getminor(vattr.va_fsid);
2880 fdinfo->pr_rmajor = getmajor(vattr.va_rdev);
2881 fdinfo->pr_rminor = getminor(vattr.va_rdev);
2882 fdinfo->pr_ino = (ino64_t)vattr.va_nodeid;
2883 fdinfo->pr_size = (off64_t)vattr.va_size;
2884 fdinfo->pr_mode = VTTOIF(vattr.va_type) | vattr.va_mode;
2885 fdinfo->pr_uid = vattr.va_uid;
2886 fdinfo->pr_gid = vattr.va_gid;
2887