/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright 2019 Joyent, Inc. * Copyright 2020 OmniOS Community Edition (OmniOSce) Association. * Copyright 2022 MNX Cloud, Inc. * Copyright 2022 Oxide Computer Company */ /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ /* All Rights Reserved */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #define MAX_ITERS_SPIN 5 typedef struct prpagev { uint_t *pg_protv; /* vector of page permissions */ char *pg_incore; /* vector of incore flags */ size_t pg_npages; /* number of pages in protv and incore */ ulong_t pg_pnbase; /* pn within segment of first protv element */ } prpagev_t; size_t pagev_lim = 256 * 1024; /* limit on number of pages in prpagev_t */ extern struct seg_ops segdev_ops; /* needs a header file */ extern struct seg_ops segspt_shmops; /* needs a header file */ static int set_watched_page(proc_t *, caddr_t, caddr_t, ulong_t, ulong_t); static void clear_watched_page(proc_t *, caddr_t, caddr_t, ulong_t); /* * Choose an lwp from the complete set of lwps for the process. * This is called for any operation applied to the process * file descriptor that requires an lwp to operate upon. * * Returns a pointer to the thread for the selected LWP, * and with the dispatcher lock held for the thread. * * The algorithm for choosing an lwp is critical for /proc semantics; * don't touch this code unless you know all of the implications. */ kthread_t * prchoose(proc_t *p) { kthread_t *t; kthread_t *t_onproc = NULL; /* running on processor */ kthread_t *t_run = NULL; /* runnable, on disp queue */ kthread_t *t_sleep = NULL; /* sleeping */ kthread_t *t_hold = NULL; /* sleeping, performing hold */ kthread_t *t_susp = NULL; /* suspended stop */ kthread_t *t_jstop = NULL; /* jobcontrol stop, w/o directed stop */ kthread_t *t_jdstop = NULL; /* jobcontrol stop with directed stop */ kthread_t *t_req = NULL; /* requested stop */ kthread_t *t_istop = NULL; /* event-of-interest stop */ kthread_t *t_dtrace = NULL; /* DTrace stop */ ASSERT(MUTEX_HELD(&p->p_lock)); /* * If the agent lwp exists, it takes precedence over all others. */ if ((t = p->p_agenttp) != NULL) { thread_lock(t); return (t); } if ((t = p->p_tlist) == NULL) /* start at the head of the list */ return (t); do { /* for eacn lwp in the process */ if (VSTOPPED(t)) { /* virtually stopped */ if (t_req == NULL) t_req = t; continue; } /* If this is a process kernel thread, ignore it. */ if ((t->t_proc_flag & TP_KTHREAD) != 0) { continue; } thread_lock(t); /* make sure thread is in good state */ switch (t->t_state) { default: panic("prchoose: bad thread state %d, thread 0x%p", t->t_state, (void *)t); /*NOTREACHED*/ case TS_SLEEP: /* this is filthy */ if (t->t_wchan == (caddr_t)&p->p_holdlwps && t->t_wchan0 == NULL) { if (t_hold == NULL) t_hold = t; } else { if (t_sleep == NULL) t_sleep = t; } break; case TS_RUN: case TS_WAIT: if (t_run == NULL) t_run = t; break; case TS_ONPROC: if (t_onproc == NULL) t_onproc = t; break; case TS_ZOMB: /* last possible choice */ break; case TS_STOPPED: switch (t->t_whystop) { case PR_SUSPENDED: if (t_susp == NULL) t_susp = t; break; case PR_JOBCONTROL: if (t->t_proc_flag & TP_PRSTOP) { if (t_jdstop == NULL) t_jdstop = t; } else { if (t_jstop == NULL) t_jstop = t; } break; case PR_REQUESTED: if (t->t_dtrace_stop && t_dtrace == NULL) t_dtrace = t; else if (t_req == NULL) t_req = t; break; case PR_SYSENTRY: case PR_SYSEXIT: case PR_SIGNALLED: case PR_FAULTED: /* * Make an lwp calling exit() be the * last lwp seen in the process. */ if (t_istop == NULL || (t_istop->t_whystop == PR_SYSENTRY && t_istop->t_whatstop == SYS_exit)) t_istop = t; break; case PR_CHECKPOINT: /* can't happen? */ break; default: panic("prchoose: bad t_whystop %d, thread 0x%p", t->t_whystop, (void *)t); /*NOTREACHED*/ } break; } thread_unlock(t); } while ((t = t->t_forw) != p->p_tlist); if (t_onproc) t = t_onproc; else if (t_run) t = t_run; else if (t_sleep) t = t_sleep; else if (t_jstop) t = t_jstop; else if (t_jdstop) t = t_jdstop; else if (t_istop) t = t_istop; else if (t_dtrace) t = t_dtrace; else if (t_req) t = t_req; else if (t_hold) t = t_hold; else if (t_susp) t = t_susp; else /* TS_ZOMB */ t = p->p_tlist; if (t != NULL) thread_lock(t); return (t); } /* * Wakeup anyone sleeping on the /proc vnode for the process/lwp to stop. * Also call pollwakeup() if any lwps are waiting in poll() for POLLPRI * on the /proc file descriptor. Called from stop() when a traced * process stops on an event of interest. Also called from exit() * and prinvalidate() to indicate POLLHUP and POLLERR respectively. */ void prnotify(struct vnode *vp) { prcommon_t *pcp = VTOP(vp)->pr_common; mutex_enter(&pcp->prc_mutex); cv_broadcast(&pcp->prc_wait); mutex_exit(&pcp->prc_mutex); if (pcp->prc_flags & PRC_POLL) { /* * We call pollwakeup() with POLLHUP to ensure that * the pollers are awakened even if they are polling * for nothing (i.e., waiting for the process to exit). * This enables the use of the PRC_POLL flag for optimization * (we can turn off PRC_POLL only if we know no pollers remain). */ pcp->prc_flags &= ~PRC_POLL; pollwakeup(&pcp->prc_pollhead, POLLHUP); } } /* called immediately below, in prfree() */ static void prfreenotify(vnode_t *vp) { prnode_t *pnp; prcommon_t *pcp; while (vp != NULL) { pnp = VTOP(vp); pcp = pnp->pr_common; ASSERT(pcp->prc_thread == NULL); pcp->prc_proc = NULL; /* * We can't call prnotify() here because we are holding * pidlock. We assert that there is no need to. */ mutex_enter(&pcp->prc_mutex); cv_broadcast(&pcp->prc_wait); mutex_exit(&pcp->prc_mutex); ASSERT(!(pcp->prc_flags & PRC_POLL)); vp = pnp->pr_next; pnp->pr_next = NULL; } } /* * Called from a hook in freeproc() when a traced process is removed * from the process table. The proc-table pointers of all associated * /proc vnodes are cleared to indicate that the process has gone away. */ void prfree(proc_t *p) { uint_t slot = p->p_slot; ASSERT(MUTEX_HELD(&pidlock)); /* * Block the process against /proc so it can be freed. * It cannot be freed while locked by some controlling process. * Lock ordering: * pidlock -> pr_pidlock -> p->p_lock -> pcp->prc_mutex */ mutex_enter(&pr_pidlock); /* protects pcp->prc_proc */ mutex_enter(&p->p_lock); while (p->p_proc_flag & P_PR_LOCK) { mutex_exit(&pr_pidlock); cv_wait(&pr_pid_cv[slot], &p->p_lock); mutex_exit(&p->p_lock); mutex_enter(&pr_pidlock); mutex_enter(&p->p_lock); } ASSERT(p->p_tlist == NULL); prfreenotify(p->p_plist); p->p_plist = NULL; prfreenotify(p->p_trace); p->p_trace = NULL; /* * We broadcast to wake up everyone waiting for this process. * No one can reach this process from this point on. */ cv_broadcast(&pr_pid_cv[slot]); mutex_exit(&p->p_lock); mutex_exit(&pr_pidlock); } /* * Called from a hook in exit() when a traced process is becoming a zombie. */ void prexit(proc_t *p) { ASSERT(MUTEX_HELD(&p->p_lock)); if (pr_watch_active(p)) { pr_free_watchpoints(p); watch_disable(curthread); } /* pr_free_watched_pages() is called in exit(), after dropping p_lock */ if (p->p_trace) { VTOP(p->p_trace)->pr_common->prc_flags |= PRC_DESTROY; prnotify(p->p_trace); } cv_broadcast(&pr_pid_cv[p->p_slot]); /* pauselwps() */ } /* * Called when a thread calls lwp_exit(). */ void prlwpexit(kthread_t *t) { vnode_t *vp; prnode_t *pnp; prcommon_t *pcp; proc_t *p = ttoproc(t); lwpent_t *lep = p->p_lwpdir[t->t_dslot].ld_entry; ASSERT(t == curthread); ASSERT(MUTEX_HELD(&p->p_lock)); /* * The process must be blocked against /proc to do this safely. * The lwp must not disappear while the process is marked P_PR_LOCK. * It is the caller's responsibility to have called prbarrier(p). */ ASSERT(!(p->p_proc_flag & P_PR_LOCK)); for (vp = p->p_plist; vp != NULL; vp = pnp->pr_next) { pnp = VTOP(vp); pcp = pnp->pr_common; if (pcp->prc_thread == t) { pcp->prc_thread = NULL; pcp->prc_flags |= PRC_DESTROY; } } for (vp = lep->le_trace; vp != NULL; vp = pnp->pr_next) { pnp = VTOP(vp); pcp = pnp->pr_common; pcp->prc_thread = NULL; pcp->prc_flags |= PRC_DESTROY; prnotify(vp); } if (p->p_trace) prnotify(p->p_trace); } /* * Called when a zombie thread is joined or when a * detached lwp exits. Called from lwp_hash_out(). */ void prlwpfree(proc_t *p, lwpent_t *lep) { vnode_t *vp; prnode_t *pnp; prcommon_t *pcp; ASSERT(MUTEX_HELD(&p->p_lock)); /* * The process must be blocked against /proc to do this safely. * The lwp must not disappear while the process is marked P_PR_LOCK. * It is the caller's responsibility to have called prbarrier(p). */ ASSERT(!(p->p_proc_flag & P_PR_LOCK)); vp = lep->le_trace; lep->le_trace = NULL; while (vp) { prnotify(vp); pnp = VTOP(vp); pcp = pnp->pr_common; ASSERT(pcp->prc_thread == NULL && (pcp->prc_flags & PRC_DESTROY)); pcp->prc_tslot = -1; vp = pnp->pr_next; pnp->pr_next = NULL; } if (p->p_trace) prnotify(p->p_trace); } /* * Called from a hook in exec() when a thread starts exec(). */ void prexecstart(void) { proc_t *p = ttoproc(curthread); klwp_t *lwp = ttolwp(curthread); /* * The P_PR_EXEC flag blocks /proc operations for * the duration of the exec(). * We can't start exec() while the process is * locked by /proc, so we call prbarrier(). * lwp_nostop keeps the process from being stopped * via job control for the duration of the exec(). */ ASSERT(MUTEX_HELD(&p->p_lock)); prbarrier(p); lwp->lwp_nostop++; p->p_proc_flag |= P_PR_EXEC; } /* * Called from a hook in exec() when a thread finishes exec(). * The thread may or may not have succeeded. Some other thread * may have beat it to the punch. */ void prexecend(void) { proc_t *p = ttoproc(curthread); klwp_t *lwp = ttolwp(curthread); vnode_t *vp; prnode_t *pnp; prcommon_t *pcp; model_t model = p->p_model; id_t tid = curthread->t_tid; int tslot = curthread->t_dslot; ASSERT(MUTEX_HELD(&p->p_lock)); lwp->lwp_nostop--; if (p->p_flag & SEXITLWPS) { /* * We are on our way to exiting because some * other thread beat us in the race to exec(). * Don't clear the P_PR_EXEC flag in this case. */ return; } /* * Wake up anyone waiting in /proc for the process to complete exec(). */ p->p_proc_flag &= ~P_PR_EXEC; if ((vp = p->p_trace) != NULL) { pcp = VTOP(vp)->pr_common; mutex_enter(&pcp->prc_mutex); cv_broadcast(&pcp->prc_wait); mutex_exit(&pcp->prc_mutex); for (; vp != NULL; vp = pnp->pr_next) { pnp = VTOP(vp); pnp->pr_common->prc_datamodel = model; } } if ((vp = p->p_lwpdir[tslot].ld_entry->le_trace) != NULL) { /* * We dealt with the process common above. */ ASSERT(p->p_trace != NULL); pcp = VTOP(vp)->pr_common; mutex_enter(&pcp->prc_mutex); cv_broadcast(&pcp->prc_wait); mutex_exit(&pcp->prc_mutex); for (; vp != NULL; vp = pnp->pr_next) { pnp = VTOP(vp); pcp = pnp->pr_common; pcp->prc_datamodel = model; pcp->prc_tid = tid; pcp->prc_tslot = tslot; } } } /* * Called from a hook in relvm() just before freeing the address space. * We free all the watched areas now. */ void prrelvm(void) { proc_t *p = ttoproc(curthread); mutex_enter(&p->p_lock); prbarrier(p); /* block all other /proc operations */ if (pr_watch_active(p)) { pr_free_watchpoints(p); watch_disable(curthread); } mutex_exit(&p->p_lock); pr_free_watched_pages(p); } /* * Called from hooks in exec-related code when a traced process * attempts to exec(2) a setuid/setgid program or an unreadable * file. Rather than fail the exec we invalidate the associated * /proc vnodes so that subsequent attempts to use them will fail. * * All /proc vnodes, except directory vnodes, are retained on a linked * list (rooted at p_plist in the process structure) until last close. * * A controlling process must re-open the /proc files in order to * regain control. */ void prinvalidate(struct user *up) { kthread_t *t = curthread; proc_t *p = ttoproc(t); vnode_t *vp; prnode_t *pnp; int writers = 0; mutex_enter(&p->p_lock); prbarrier(p); /* block all other /proc operations */ /* * At this moment, there can be only one lwp in the process. */ ASSERT(p->p_lwpcnt == 1 && p->p_zombcnt == 0); /* * Invalidate any currently active /proc vnodes. */ for (vp = p->p_plist; vp != NULL; vp = pnp->pr_next) { pnp = VTOP(vp); switch (pnp->pr_type) { case PR_PSINFO: /* these files can read by anyone */ case PR_LPSINFO: case PR_LWPSINFO: case PR_LWPDIR: case PR_LWPIDDIR: case PR_USAGE: case PR_LUSAGE: case PR_LWPUSAGE: break; default: pnp->pr_flags |= PR_INVAL; break; } } /* * Wake up anyone waiting for the process or lwp. * p->p_trace is guaranteed to be non-NULL if there * are any open /proc files for this process. */ if ((vp = p->p_trace) != NULL) { prcommon_t *pcp = VTOP(vp)->pr_pcommon; prnotify(vp); /* * Are there any writers? */ if ((writers = pcp->prc_writers) != 0) { /* * Clear the exclusive open flag (old /proc interface). * Set prc_selfopens equal to prc_writers so that * the next O_EXCL|O_WRITE open will succeed * even with existing (though invalid) writers. * prclose() must decrement prc_selfopens when * the invalid files are closed. */ pcp->prc_flags &= ~PRC_EXCL; ASSERT(pcp->prc_selfopens <= writers); pcp->prc_selfopens = writers; } } vp = p->p_lwpdir[t->t_dslot].ld_entry->le_trace; while (vp != NULL) { /* * We should not invalidate the lwpiddir vnodes, * but the necessities of maintaining the old * ioctl()-based version of /proc require it. */ pnp = VTOP(vp); pnp->pr_flags |= PR_INVAL; prnotify(vp); vp = pnp->pr_next; } /* * If any tracing flags are in effect and any vnodes are open for * writing then set the requested-stop and run-on-last-close flags. * Otherwise, clear all tracing flags. */ t->t_proc_flag &= ~TP_PAUSE; if ((p->p_proc_flag & P_PR_TRACE) && writers) { t->t_proc_flag |= TP_PRSTOP; aston(t); /* so ISSIG will see the flag */ p->p_proc_flag |= P_PR_RUNLCL; } else { premptyset(&up->u_entrymask); /* syscalls */ premptyset(&up->u_exitmask); up->u_systrap = 0; premptyset(&p->p_sigmask); /* signals */ premptyset(&p->p_fltmask); /* faults */ t->t_proc_flag &= ~(TP_PRSTOP|TP_PRVSTOP|TP_STOPPING); p->p_proc_flag &= ~(P_PR_RUNLCL|P_PR_KILLCL|P_PR_TRACE); prnostep(ttolwp(t)); } mutex_exit(&p->p_lock); } /* * Acquire the controlled process's p_lock and mark it P_PR_LOCK. * Return with pr_pidlock held in all cases. * Return with p_lock held if the the process still exists. * Return value is the process pointer if the process still exists, else NULL. * If we lock the process, give ourself kernel priority to avoid deadlocks; * this is undone in prunlock(). */ proc_t * pr_p_lock(prnode_t *pnp) { proc_t *p; prcommon_t *pcp; mutex_enter(&pr_pidlock); if ((pcp = pnp->pr_pcommon) == NULL || (p = pcp->prc_proc) == NULL) return (NULL); mutex_enter(&p->p_lock); while (p->p_proc_flag & P_PR_LOCK) { /* * This cv/mutex pair is persistent even if * the process disappears while we sleep. */ kcondvar_t *cv = &pr_pid_cv[p->p_slot]; kmutex_t *mp = &p->p_lock; mutex_exit(&pr_pidlock); cv_wait(cv, mp); mutex_exit(mp); mutex_enter(&pr_pidlock); if (pcp->prc_proc == NULL) return (NULL); ASSERT(p == pcp->prc_proc); mutex_enter(&p->p_lock); } p->p_proc_flag |= P_PR_LOCK; return (p); } /* * Lock the target process by setting P_PR_LOCK and grabbing p->p_lock. * This prevents any lwp of the process from disappearing and * blocks most operations that a process can perform on itself. * Returns 0 on success, a non-zero error number on failure. * * 'zdisp' is ZYES or ZNO to indicate whether prlock() should succeed when * the subject process is a zombie (ZYES) or fail for zombies (ZNO). * * error returns: * ENOENT: process or lwp has disappeared or process is exiting * (or has become a zombie and zdisp == ZNO). * EAGAIN: procfs vnode has become invalid. * EINTR: signal arrived while waiting for exec to complete. */ int prlock(prnode_t *pnp, int zdisp) { prcommon_t *pcp; proc_t *p; again: pcp = pnp->pr_common; p = pr_p_lock(pnp); mutex_exit(&pr_pidlock); /* * Return ENOENT immediately if there is no process. */ if (p == NULL) return (ENOENT); ASSERT(p == pcp->prc_proc && p->p_stat != 0 && p->p_stat != SIDL); /* * Return ENOENT if process entered zombie state or is exiting * and the 'zdisp' flag is set to ZNO indicating not to lock zombies. */ if (zdisp == ZNO && ((pcp->prc_flags & PRC_DESTROY) || (p->p_flag & SEXITING))) { prunlock(pnp); return (ENOENT); } /* * If lwp-specific, check to see if lwp has disappeared. */ if (pcp->prc_flags & PRC_LWP) { if ((zdisp == ZNO && (pcp->prc_flags & PRC_DESTROY)) || pcp->prc_tslot == -1) { prunlock(pnp); return (ENOENT); } } /* * Return EAGAIN if we have encountered a security violation. * (The process exec'd a set-id or unreadable executable file.) */ if (pnp->pr_flags & PR_INVAL) { prunlock(pnp); return (EAGAIN); } /* * If process is undergoing an exec(), wait for * completion and then start all over again. */ if (p->p_proc_flag & P_PR_EXEC) { pcp = pnp->pr_pcommon; /* Put on the correct sleep queue */ mutex_enter(&pcp->prc_mutex); prunlock(pnp); if (!cv_wait_sig(&pcp->prc_wait, &pcp->prc_mutex)) { mutex_exit(&pcp->prc_mutex); return (EINTR); } mutex_exit(&pcp->prc_mutex); goto again; } /* * We return holding p->p_lock. */ return (0); } /* * Undo prlock() and pr_p_lock(). * p->p_lock is still held; pr_pidlock is no longer held. * * prunmark() drops the P_PR_LOCK flag and wakes up another thread, * if any, waiting for the flag to be dropped; it retains p->p_lock. * * prunlock() calls prunmark() and then drops p->p_lock. */ void prunmark(proc_t *p) { ASSERT(p->p_proc_flag & P_PR_LOCK); ASSERT(MUTEX_HELD(&p->p_lock)); cv_signal(&pr_pid_cv[p->p_slot]); p->p_proc_flag &= ~P_PR_LOCK; } void prunlock(prnode_t *pnp) { prcommon_t *pcp = pnp->pr_common; proc_t *p = pcp->prc_proc; /* * If we (or someone) gave it a SIGKILL, and it is not * already a zombie, set it running unconditionally. */ if ((p->p_flag & SKILLED) && !(p->p_flag & SEXITING) && !(pcp->prc_flags & PRC_DESTROY) && !((pcp->prc_flags & PRC_LWP) && pcp->prc_tslot == -1)) (void) pr_setrun(pnp, 0); prunmark(p); mutex_exit(&p->p_lock); } /* * Called while holding p->p_lock to delay until the process is unlocked. * We enter holding p->p_lock; p->p_lock is dropped and reacquired. * The process cannot become locked again until p->p_lock is dropped. */ void prbarrier(proc_t *p) { ASSERT(MUTEX_HELD(&p->p_lock)); if (p->p_proc_flag & P_PR_LOCK) { /* The process is locked; delay until not locked */ uint_t slot = p->p_slot; while (p->p_proc_flag & P_PR_LOCK) cv_wait(&pr_pid_cv[slot], &p->p_lock); cv_signal(&pr_pid_cv[slot]); } } /* * Return process/lwp status. * The u-block is mapped in by this routine and unmapped at the end. */ void prgetstatus(proc_t *p, pstatus_t *sp, zone_t *zp) { kthread_t *t; ASSERT(MUTEX_HELD(&p->p_lock)); t = prchoose(p); /* returns locked thread */ ASSERT(t != NULL); thread_unlock(t); /* just bzero the process part, prgetlwpstatus() does the rest */ bzero(sp, sizeof (pstatus_t) - sizeof (lwpstatus_t)); sp->pr_nlwp = p->p_lwpcnt; sp->pr_nzomb = p->p_zombcnt; prassignset(&sp->pr_sigpend, &p->p_sig); sp->pr_brkbase = (uintptr_t)p->p_brkbase; sp->pr_brksize = p->p_brksize; sp->pr_stkbase = (uintptr_t)prgetstackbase(p); sp->pr_stksize = p->p_stksize; sp->pr_pid = p->p_pid; if (curproc->p_zone->zone_id != GLOBAL_ZONEID && (p->p_flag & SZONETOP)) { ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID); /* * Inside local zones, fake zsched's pid as parent pids for * processes which reference processes outside of the zone. */ sp->pr_ppid = curproc->p_zone->zone_zsched->p_pid; } else { sp->pr_ppid = p->p_ppid; } sp->pr_pgid = p->p_pgrp; sp->pr_sid = p->p_sessp->s_sid; sp->pr_taskid = p->p_task->tk_tkid; sp->pr_projid = p->p_task->tk_proj->kpj_id; sp->pr_zoneid = p->p_zone->zone_id; hrt2ts(mstate_aggr_state(p, LMS_USER), &sp->pr_utime); hrt2ts(mstate_aggr_state(p, LMS_SYSTEM), &sp->pr_stime); TICK_TO_TIMESTRUC(p->p_cutime, &sp->pr_cutime); TICK_TO_TIMESTRUC(p->p_cstime, &sp->pr_cstime); prassignset(&sp->pr_sigtrace, &p->p_sigmask); prassignset(&sp->pr_flttrace, &p->p_fltmask); prassignset(&sp->pr_sysentry, &PTOU(p)->u_entrymask); prassignset(&sp->pr_sysexit, &PTOU(p)->u_exitmask); switch (p->p_model) { case DATAMODEL_ILP32: sp->pr_dmodel = PR_MODEL_ILP32; break; case DATAMODEL_LP64: sp->pr_dmodel = PR_MODEL_LP64; break; } if (p->p_agenttp) sp->pr_agentid = p->p_agenttp->t_tid; /* get the chosen lwp's status */ prgetlwpstatus(t, &sp->pr_lwp, zp); /* replicate the flags */ sp->pr_flags = sp->pr_lwp.pr_flags; } /* * Query mask of held signals for a given thread. * * This makes use of schedctl_sigblock() to query if userspace has requested * that all maskable signals be held. While it would be tempting to call * schedctl_finish_sigblock() and apply that update to t->t_hold, it cannot be * done safely without the risk of racing with the thread under consideration. */ void prgethold(kthread_t *t, sigset_t *sp) { k_sigset_t set; if (schedctl_sigblock(t)) { set.__sigbits[0] = FILLSET0 & ~CANTMASK0; set.__sigbits[1] = FILLSET1 & ~CANTMASK1; set.__sigbits[2] = FILLSET2 & ~CANTMASK2; } else { set = t->t_hold; } sigktou(&set, sp); } #ifdef _SYSCALL32_IMPL void prgetlwpstatus32(kthread_t *t, lwpstatus32_t *sp, zone_t *zp) { proc_t *p = ttoproc(t); klwp_t *lwp = ttolwp(t); struct mstate *ms = &lwp->lwp_mstate; hrtime_t usr, sys; int flags; ulong_t instr; ASSERT(MUTEX_HELD(&p->p_lock)); bzero(sp, sizeof (*sp)); flags = 0L; if (t->t_state == TS_STOPPED) { flags |= PR_STOPPED; if ((t->t_schedflag & TS_PSTART) == 0) flags |= PR_ISTOP; } else if (VSTOPPED(t)) { flags |= PR_STOPPED|PR_ISTOP; } if (!(flags & PR_ISTOP) && (t->t_proc_flag & TP_PRSTOP)) flags |= PR_DSTOP; if (lwp->lwp_asleep) flags |= PR_ASLEEP; if (t == p->p_agenttp) flags |= PR_AGENT; if (!(t->t_proc_flag & TP_TWAIT)) flags |= PR_DETACH; if (t->t_proc_flag & TP_DAEMON) flags |= PR_DAEMON; if (p->p_proc_flag & P_PR_FORK) flags |= PR_FORK; if (p->p_proc_flag & P_PR_RUNLCL) flags |= PR_RLC; if (p->p_proc_flag & P_PR_KILLCL) flags |= PR_KLC; if (p->p_proc_flag & P_PR_ASYNC) flags |= PR_ASYNC; if (p->p_proc_flag & P_PR_BPTADJ) flags |= PR_BPTADJ; if (p->p_proc_flag & P_PR_PTRACE) flags |= PR_PTRACE; if (p->p_flag & SMSACCT) flags |= PR_MSACCT; if (p->p_flag & SMSFORK) flags |= PR_MSFORK; if (p->p_flag & SVFWAIT) flags |= PR_VFORKP; sp->pr_flags = flags; if (VSTOPPED(t)) { sp->pr_why = PR_REQUESTED; sp->pr_what = 0; } else { sp->pr_why = t->t_whystop; sp->pr_what = t->t_whatstop; } sp->pr_lwpid = t->t_tid; sp->pr_cursig = lwp->lwp_cursig; prassignset(&sp->pr_lwppend, &t->t_sig); prgethold(t, &sp->pr_lwphold); if (t->t_whystop == PR_FAULTED) { siginfo_kto32(&lwp->lwp_siginfo, &sp->pr_info); if (t->t_whatstop == FLTPAGE) sp->pr_info.si_addr = (caddr32_t)(uintptr_t)lwp->lwp_siginfo.si_addr; } else if (lwp->lwp_curinfo) siginfo_kto32(&lwp->lwp_curinfo->sq_info, &sp->pr_info); if (SI_FROMUSER(&lwp->lwp_siginfo) && zp->zone_id != GLOBAL_ZONEID && sp->pr_info.si_zoneid != zp->zone_id) { sp->pr_info.si_pid = zp->zone_zsched->p_pid; sp->pr_info.si_uid = 0; sp->pr_info.si_ctid = -1; sp->pr_info.si_zoneid = zp->zone_id; } sp->pr_altstack.ss_sp = (caddr32_t)(uintptr_t)lwp->lwp_sigaltstack.ss_sp; sp->pr_altstack.ss_size = (size32_t)lwp->lwp_sigaltstack.ss_size; sp->pr_altstack.ss_flags = (int32_t)lwp->lwp_sigaltstack.ss_flags; prgetaction32(p, PTOU(p), lwp->lwp_cursig, &sp->pr_action); sp->pr_oldcontext = (caddr32_t)lwp->lwp_oldcontext; sp->pr_ustack = (caddr32_t)lwp->lwp_ustack; (void) strncpy(sp->pr_clname, sclass[t->t_cid].cl_name, sizeof (sp->pr_clname) - 1); if (flags & PR_STOPPED) hrt2ts32(t->t_stoptime, &sp->pr_tstamp); usr = ms->ms_acct[LMS_USER]; sys = ms->ms_acct[LMS_SYSTEM] + ms->ms_acct[LMS_TRAP]; scalehrtime(&usr); scalehrtime(&sys); hrt2ts32(usr, &sp->pr_utime); hrt2ts32(sys, &sp->pr_stime); /* * Fetch the current instruction, if not a system process. * We don't attempt this unless the lwp is stopped. */ if ((p->p_flag & SSYS) || p->p_as == &kas) sp->pr_flags |= (PR_ISSYS|PR_PCINVAL); else if (!(flags & PR_STOPPED)) sp->pr_flags |= PR_PCINVAL; else if (!prfetchinstr(lwp, &instr)) sp->pr_flags |= PR_PCINVAL; else sp->pr_instr = (uint32_t)instr; /* * Drop p_lock while touching the lwp's stack. */ mutex_exit(&p->p_lock); if (prisstep(lwp)) sp->pr_flags |= PR_STEP; if ((flags & (PR_STOPPED|PR_ASLEEP)) && t->t_sysnum) { int i; sp->pr_syscall = get_syscall32_args(lwp, (int *)sp->pr_sysarg, &i); sp->pr_nsysarg = (ushort_t)i; } if ((flags & PR_STOPPED) || t == curthread) prgetprregs32(lwp, sp->pr_reg); if ((t->t_state == TS_STOPPED && t->t_whystop == PR_SYSEXIT) || (flags & PR_VFORKP)) { long r1, r2; user_t *up; auxv_t *auxp; int i; sp->pr_errno = prgetrvals(lwp, &r1, &r2); if (sp->pr_errno == 0) { sp->pr_rval1 = (int32_t)r1; sp->pr_rval2 = (int32_t)r2; sp->pr_errpriv = PRIV_NONE; } else sp->pr_errpriv = lwp->lwp_badpriv; if (t->t_sysnum == SYS_execve) { up = PTOU(p); sp->pr_sysarg[0] = 0; sp->pr_sysarg[1] = (caddr32_t)up->u_argv; sp->pr_sysarg[2] = (caddr32_t)up->u_envp; sp->pr_sysarg[3] = 0; for (i = 0, auxp = up->u_auxv; i < sizeof (up->u_auxv) / sizeof (up->u_auxv[0]); i++, auxp++) { if (auxp->a_type == AT_SUN_EXECNAME) { sp->pr_sysarg[0] = (caddr32_t) (uintptr_t)auxp->a_un.a_ptr; break; } } } } if (prhasfp()) prgetprfpregs32(lwp, &sp->pr_fpreg); mutex_enter(&p->p_lock); } void prgetstatus32(proc_t *p, pstatus32_t *sp, zone_t *zp) { kthread_t *t; ASSERT(MUTEX_HELD(&p->p_lock)); t = prchoose(p); /* returns locked thread */ ASSERT(t != NULL); thread_unlock(t); /* just bzero the process part, prgetlwpstatus32() does the rest */ bzero(sp, sizeof (pstatus32_t) - sizeof (lwpstatus32_t)); sp->pr_nlwp = p->p_lwpcnt; sp->pr_nzomb = p->p_zombcnt; prassignset(&sp->pr_sigpend, &p->p_sig); sp->pr_brkbase = (uint32_t)(uintptr_t)p->p_brkbase; sp->pr_brksize = (uint32_t)p->p_brksize; sp->pr_stkbase = (uint32_t)(uintptr_t)prgetstackbase(p); sp->pr_stksize = (uint32_t)p->p_stksize; sp->pr_pid = p->p_pid; if (curproc->p_zone->zone_id != GLOBAL_ZONEID && (p->p_flag & SZONETOP)) { ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID); /* * Inside local zones, fake zsched's pid as parent pids for * processes which reference processes outside of the zone. */ sp->pr_ppid = curproc->p_zone->zone_zsched->p_pid; } else { sp->pr_ppid = p->p_ppid; } sp->pr_pgid = p->p_pgrp; sp->pr_sid = p->p_sessp->s_sid; sp->pr_taskid = p->p_task->tk_tkid; sp->pr_projid = p->p_task->tk_proj->kpj_id; sp->pr_zoneid = p->p_zone->zone_id; hrt2ts32(mstate_aggr_state(p, LMS_USER), &sp->pr_utime); hrt2ts32(mstate_aggr_state(p, LMS_SYSTEM), &sp->pr_stime); TICK_TO_TIMESTRUC32(p->p_cutime, &sp->pr_cutime); TICK_TO_TIMESTRUC32(p->p_cstime, &sp->pr_cstime); prassignset(&sp->pr_sigtrace, &p->p_sigmask); prassignset(&sp->pr_flttrace, &p->p_fltmask); prassignset(&sp->pr_sysentry, &PTOU(p)->u_entrymask); prassignset(&sp->pr_sysexit, &PTOU(p)->u_exitmask); switch (p->p_model) { case DATAMODEL_ILP32: sp->pr_dmodel = PR_MODEL_ILP32; break; case DATAMODEL_LP64: sp->pr_dmodel = PR_MODEL_LP64; break; } if (p->p_agenttp) sp->pr_agentid = p->p_agenttp->t_tid; /* get the chosen lwp's status */ prgetlwpstatus32(t, &sp->pr_lwp, zp); /* replicate the flags */ sp->pr_flags = sp->pr_lwp.pr_flags; } #endif /* _SYSCALL32_IMPL */ /* * Return lwp status. */ void prgetlwpstatus(kthread_t *t, lwpstatus_t *sp, zone_t *zp) { proc_t *p = ttoproc(t); klwp_t *lwp = ttolwp(t); struct mstate *ms = &lwp->lwp_mstate; hrtime_t usr, sys; int flags; ulong_t instr; ASSERT(MUTEX_HELD(&p->p_lock)); bzero(sp, sizeof (*sp)); flags = 0L; if (t->t_state == TS_STOPPED) { flags |= PR_STOPPED; if ((t->t_schedflag & TS_PSTART) == 0) flags |= PR_ISTOP; } else if (VSTOPPED(t)) { flags |= PR_STOPPED|PR_ISTOP; } if (!(flags & PR_ISTOP) && (t->t_proc_flag & TP_PRSTOP)) flags |= PR_DSTOP; if (lwp->lwp_asleep) flags |= PR_ASLEEP; if (t == p->p_agenttp) flags |= PR_AGENT; if (!(t->t_proc_flag & TP_TWAIT)) flags |= PR_DETACH; if (t->t_proc_flag & TP_DAEMON) flags |= PR_DAEMON; if (p->p_proc_flag & P_PR_FORK) flags |= PR_FORK; if (p->p_proc_flag & P_PR_RUNLCL) flags |= PR_RLC; if (p->p_proc_flag & P_PR_KILLCL) flags |= PR_KLC; if (p->p_proc_flag & P_PR_ASYNC) flags |= PR_ASYNC; if (p->p_proc_flag & P_PR_BPTADJ) flags |= PR_BPTADJ; if (p->p_proc_flag & P_PR_PTRACE) flags |= PR_PTRACE; if (p->p_flag & SMSACCT) flags |= PR_MSACCT; if (p->p_flag & SMSFORK) flags |= PR_MSFORK; if (p->p_flag & SVFWAIT) flags |= PR_VFORKP; if (p->p_pgidp->pid_pgorphaned) flags |= PR_ORPHAN; if (p->p_pidflag & CLDNOSIGCHLD) flags |= PR_NOSIGCHLD; if (p->p_pidflag & CLDWAITPID) flags |= PR_WAITPID; sp->pr_flags = flags; if (VSTOPPED(t)) { sp->pr_why = PR_REQUESTED; sp->pr_what = 0; } else { sp->pr_why = t->t_whystop; sp->pr_what = t->t_whatstop; } sp->pr_lwpid = t->t_tid; sp->pr_cursig = lwp->lwp_cursig; prassignset(&sp->pr_lwppend, &t->t_sig); prgethold(t, &sp->pr_lwphold); if (t->t_whystop == PR_FAULTED) bcopy(&lwp->lwp_siginfo, &sp->pr_info, sizeof (k_siginfo_t)); else if (lwp->lwp_curinfo) bcopy(&lwp->lwp_curinfo->sq_info, &sp->pr_info, sizeof (k_siginfo_t)); if (SI_FROMUSER(&lwp->lwp_siginfo) && zp->zone_id != GLOBAL_ZONEID && sp->pr_info.si_zoneid != zp->zone_id) { sp->pr_info.si_pid = zp->zone_zsched->p_pid; sp->pr_info.si_uid = 0; sp->pr_info.si_ctid = -1; sp->pr_info.si_zoneid = zp->zone_id; } sp->pr_altstack = lwp->lwp_sigaltstack; prgetaction(p, PTOU(p), lwp->lwp_cursig, &sp->pr_action); sp->pr_oldcontext = (uintptr_t)lwp->lwp_oldcontext; sp->pr_ustack = lwp->lwp_ustack; (void) strncpy(sp->pr_clname, sclass[t->t_cid].cl_name, sizeof (sp->pr_clname) - 1); if (flags & PR_STOPPED) hrt2ts(t->t_stoptime, &sp->pr_tstamp); usr = ms->ms_acct[LMS_USER]; sys = ms->ms_acct[LMS_SYSTEM] + ms->ms_acct[LMS_TRAP]; scalehrtime(&usr); scalehrtime(&sys); hrt2ts(usr, &sp->pr_utime); hrt2ts(sys, &sp->pr_stime); /* * Fetch the current instruction, if not a system process. * We don't attempt this unless the lwp is stopped. */ if ((p->p_flag & SSYS) || p->p_as == &kas) sp->pr_flags |= (PR_ISSYS|PR_PCINVAL); else if (!(flags & PR_STOPPED)) sp->pr_flags |= PR_PCINVAL; else if (!prfetchinstr(lwp, &instr)) sp->pr_flags |= PR_PCINVAL; else sp->pr_instr = instr; /* * Drop p_lock while touching the lwp's stack. */ mutex_exit(&p->p_lock); if (prisstep(lwp)) sp->pr_flags |= PR_STEP; if ((flags & (PR_STOPPED|PR_ASLEEP)) && t->t_sysnum) { int i; sp->pr_syscall = get_syscall_args(lwp, (long *)sp->pr_sysarg, &i); sp->pr_nsysarg = (ushort_t)i; } if ((flags & PR_STOPPED) || t == curthread) prgetprregs(lwp, sp->pr_reg); if ((t->t_state == TS_STOPPED && t->t_whystop == PR_SYSEXIT) || (flags & PR_VFORKP)) { user_t *up; auxv_t *auxp; int i; sp->pr_errno = prgetrvals(lwp, &sp->pr_rval1, &sp->pr_rval2); if (sp->pr_errno == 0) sp->pr_errpriv = PRIV_NONE; else sp->pr_errpriv = lwp->lwp_badpriv; if (t->t_sysnum == SYS_execve) { up = PTOU(p); sp->pr_sysarg[0] = 0; sp->pr_sysarg[1] = (uintptr_t)up->u_argv; sp->pr_sysarg[2] = (uintptr_t)up->u_envp; sp->pr_sysarg[3] = 0; for (i = 0, auxp = up->u_auxv; i < sizeof (up->u_auxv) / sizeof (up->u_auxv[0]); i++, auxp++) { if (auxp->a_type == AT_SUN_EXECNAME) { sp->pr_sysarg[0] = (uintptr_t)auxp->a_un.a_ptr; break; } } } } if (prhasfp()) prgetprfpregs(lwp, &sp->pr_fpreg); mutex_enter(&p->p_lock); } /* * Get the sigaction structure for the specified signal. The u-block * must already have been mapped in by the caller. */ void prgetaction(proc_t *p, user_t *up, uint_t sig, struct sigaction *sp) { int nsig = PROC_IS_BRANDED(curproc)? BROP(curproc)->b_nsig : NSIG; bzero(sp, sizeof (*sp)); if (sig != 0 && (unsigned)sig < nsig) { sp->sa_handler = up->u_signal[sig-1]; prassignset(&sp->sa_mask, &up->u_sigmask[sig-1]); if (sigismember(&up->u_sigonstack, sig)) sp->sa_flags |= SA_ONSTACK; if (sigismember(&up->u_sigresethand, sig)) sp->sa_flags |= SA_RESETHAND; if (sigismember(&up->u_sigrestart, sig)) sp->sa_flags |= SA_RESTART; if (sigismember(&p->p_siginfo, sig)) sp->sa_flags |= SA_SIGINFO; if (sigismember(&up->u_signodefer, sig)) sp->sa_flags |= SA_NODEFER; if (sig == SIGCLD) { if (p->p_flag & SNOWAIT) sp->sa_flags |= SA_NOCLDWAIT; if ((p->p_flag & SJCTL) == 0) sp->sa_flags |= SA_NOCLDSTOP; } } } #ifdef _SYSCALL32_IMPL void prgetaction32(proc_t *p, user_t *up, uint_t sig, struct sigaction32 *sp) { int nsig = PROC_IS_BRANDED(curproc)? BROP(curproc)->b_nsig : NSIG; bzero(sp, sizeof (*sp)); if (sig != 0 && (unsigned)sig < nsig) { sp->sa_handler = (caddr32_t)(uintptr_t)up->u_signal[sig-1]; prassignset(&sp->sa_mask, &up->u_sigmask[sig-1]); if (sigismember(&up->u_sigonstack, sig)) sp->sa_flags |= SA_ONSTACK; if (sigismember(&up->u_sigresethand, sig)) sp->sa_flags |= SA_RESETHAND; if (sigismember(&up->u_sigrestart, sig)) sp->sa_flags |= SA_RESTART; if (sigismember(&p->p_siginfo, sig)) sp->sa_flags |= SA_SIGINFO; if (sigismember(&up->u_signodefer, sig)) sp->sa_flags |= SA_NODEFER; if (sig == SIGCLD) { if (p->p_flag & SNOWAIT) sp->sa_flags |= SA_NOCLDWAIT; if ((p->p_flag & SJCTL) == 0) sp->sa_flags |= SA_NOCLDSTOP; } } } #endif /* _SYSCALL32_IMPL */ /* * Count the number of segments in this process's address space. */ uint_t prnsegs(struct as *as, int reserved) { uint_t n = 0; struct seg *seg; ASSERT(as != &kas && AS_WRITE_HELD(as)); for (seg = AS_SEGFIRST(as); seg != NULL; seg = AS_SEGNEXT(as, seg)) { caddr_t eaddr = seg->s_base + pr_getsegsize(seg, reserved); caddr_t saddr, naddr; void *tmp = NULL; if ((seg->s_flags & S_HOLE) != 0) { continue; } for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) { (void) pr_getprot(seg, reserved, &tmp, &saddr, &naddr, eaddr); if (saddr != naddr) { n++; /* * prnsegs() was formerly designated to return * an 'int' despite having no ability or use * for negative results. As part of changing * it to 'uint_t', keep the old effective limit * of INT_MAX in place. */ if (n == INT_MAX) { pr_getprot_done(&tmp); ASSERT(tmp == NULL); return (n); } } } ASSERT(tmp == NULL); } return (n); } /* * Convert uint32_t to decimal string w/o leading zeros. * Add trailing null characters if 'len' is greater than string length. * Return the string length. */ int pr_u32tos(uint32_t n, char *s, int len) { char cbuf[11]; /* 32-bit unsigned integer fits in 10 digits */ char *cp = cbuf; char *end = s + len; do { *cp++ = (char)(n % 10 + '0'); n /= 10; } while (n); len = (int)(cp - cbuf); do { *s++ = *--cp; } while (cp > cbuf); while (s < end) /* optional pad */ *s++ = '\0'; return (len); } /* * Convert uint64_t to decimal string w/o leading zeros. * Return the string length. */ static int pr_u64tos(uint64_t n, char *s) { char cbuf[21]; /* 64-bit unsigned integer fits in 20 digits */ char *cp = cbuf; int len; do { *cp++ = (char)(n % 10 + '0'); n /= 10; } while (n); len = (int)(cp - cbuf); do { *s++ = *--cp; } while (cp > cbuf); return (len); } /* * Similar to getf() / getf_gen(), but for the specified process. On success, * returns the fp with fp->f_count incremented. The caller MUST call * closef(fp) on the returned fp after completing any actions using that fp. * We return a reference-held (fp->f_count bumped) file_t so no other closef() * can invoke destructive VOP_CLOSE actions while we're inspecting the * process's FD. * * Returns NULL for errors: either an empty process-table slot post-fi_lock * and UF_ENTER, or too many mutex_tryenter() failures on the file_t's f_tlock. * Both failure modes have DTrace probes. * * The current design of the procfs "close" code path uses the following lock * order of: * * 1: (file_t) f_tlock * 2: (proc_t) p_lock AND setting p->p_proc_flag's P_PR_LOCK * * That happens because closef() holds f_tlock while calling fop_close(), * which can be prclose(), which currently waits on and sets P_PR_LOCK at its * beginning. * * That lock order creates a challenge for pr_getf, which needs to take those * locks in the opposite order when the fd points to a procfs file descriptor. * The solution chosen here is to use mutex_tryenter on f_tlock and retry some * (limited) number of times, failing if we don't get both locks. * * The cases where this can fail are rare, and all involve a procfs caller * asking for info (eg. FDINFO) on another procfs FD. In these cases, * returning EBADF (which results from a NULL return from pr_getf()) is * acceptable. * * One can increase the number of tries in pr_getf_maxtries if one is worried * about the contentuous case. */ uint64_t pr_getf_tryfails; /* Bumped for statistic purposes. */ int pr_getf_maxtries = 3; /* So you can tune it from /etc/system */ file_t * pr_getf(proc_t *p, uint_t fd, short *flag) { uf_entry_t *ufp; uf_info_t *fip; file_t *fp; int tries = 0; ASSERT(MUTEX_HELD(&p->p_lock) && (p->p_proc_flag & P_PR_LOCK)); retry: fip = P_FINFO(p); if (fd >= fip->fi_nfiles) return (NULL); mutex_exit(&p->p_lock); mutex_enter(&fip->fi_lock); UF_ENTER(ufp, fip, fd); if ((fp = ufp->uf_file) != NULL && fp->f_count > 0) { if (mutex_tryenter(&fp->f_tlock)) { ASSERT(fp->f_count > 0); fp->f_count++; mutex_exit(&fp->f_tlock); if (flag != NULL) *flag = ufp->uf_flag; } else { /* * Note the number of mutex_trylock attempts. * * The exit path will catch this and try again if we * are below the retry threshhold (pr_getf_maxtries). */ tries++; pr_getf_tryfails++; /* * If we hit pr_getf_maxtries, we'll return NULL. * DTrace scripts looking for this sort of failure * should check when arg1 is pr_getf_maxtries. */ DTRACE_PROBE2(pr_getf_tryfail, file_t *, fp, int, tries); fp = NULL; } } else { fp = NULL; /* If we fail here, someone else closed this FD. */ DTRACE_PROBE1(pr_getf_emptyslot, int, tries); tries = pr_getf_maxtries; /* Don't bother retrying. */ } UF_EXIT(ufp); mutex_exit(&fip->fi_lock); mutex_enter(&p->p_lock); /* Use goto instead of tail-recursion so we can keep "tries" around. */ if (fp == NULL) { /* "tries" starts at 1. */ if (tries < pr_getf_maxtries) goto retry; } else { /* * Probes here will detect successes after arg1's number of * mutex_tryenter() calls. */ DTRACE_PROBE2(pr_getf_trysuccess, file_t *, fp, int, tries + 1); } return (fp); } /* * Just as pr_getf() is a little unusual in how it goes about making the file_t * safe for procfs consumers to access it, so too is pr_releasef() for safely * releasing that "hold". The "hold" is unlike normal file descriptor activity * -- procfs is just an interloper here, wanting access to the vnode_t without * risk of a racing close() disrupting the state. Just as pr_getf() avoids some * of the typical file_t behavior (such as auditing) when establishing its hold, * so too should pr_releasef(). It should not go through the motions of * closef() (since it is not a true close()) unless racing activity causes it to * be the last actor holding the refcount above zero. * * Under normal circumstances, we expect to find file_t`f_count > 1 after * the successful pr_getf() call. We are, after all, accessing a resource * already held by the process in question. We would also expect to rarely race * with a close() of the underlying fd, meaning that file_t`f_count > 1 would * still holds at pr_releasef() time. That would mean we only need to decrement * f_count, leaving it to the process to later close the fd (thus triggering * VOP_CLOSE(), etc). * * It is only when that process manages to close() the fd while we have it * "held" in procfs that we must make a trip through the traditional closef() * logic to ensure proper tear-down of the file_t. */ void pr_releasef(file_t *fp) { mutex_enter(&fp->f_tlock); if (fp->f_count > 1) { /* * This is the most common case: The file is still held open by * the process, and we simply need to release our hold by * decrementing f_count */ fp->f_count--; mutex_exit(&fp->f_tlock); } else { /* * A rare occasion: The process snuck a close() of this file * while we were doing our business in procfs. Given that * f_count == 1, we are the only one with a reference to the * file_t and need to take a trip through closef() to free it. */ mutex_exit(&fp->f_tlock); (void) closef(fp); } } void pr_object_name(char *name, vnode_t *vp, struct vattr *vattr) { char *s = name; struct vfs *vfsp; struct vfssw *vfsswp; if ((vfsp = vp->v_vfsp) != NULL && ((vfsswp = vfssw + vfsp->vfs_fstype), vfsswp->vsw_name) && *vfsswp->vsw_name) { (void) strcpy(s, vfsswp->vsw_name); s += strlen(s); *s++ = '.'; } s += pr_u32tos(getmajor(vattr->va_fsid), s, 0); *s++ = '.'; s += pr_u32tos(getminor(vattr->va_fsid), s, 0); *s++ = '.'; s += pr_u64tos(vattr->va_nodeid, s); *s++ = '\0'; } struct seg * break_seg(proc_t *p) { caddr_t addr = p->p_brkbase; struct seg *seg; struct vnode *vp; if (p->p_brksize != 0) addr += p->p_brksize - 1; seg = as_segat(p->p_as, addr); if (seg != NULL && seg->s_ops == &segvn_ops && (SEGOP_GETVP(seg, seg->s_base, &vp) != 0 || vp == NULL)) return (seg); return (NULL); } /* * Implementation of service functions to handle procfs generic chained * copyout buffers. */ typedef struct pr_iobuf_list { list_node_t piol_link; /* buffer linkage */ size_t piol_size; /* total size (header + data) */ size_t piol_usedsize; /* amount to copy out from this buf */ } piol_t; #define MAPSIZE (64 * 1024) #define PIOL_DATABUF(iol) ((void *)(&(iol)[1])) void pr_iol_initlist(list_t *iolhead, size_t itemsize, int n) { piol_t *iol; size_t initial_size = MIN(1, n) * itemsize; list_create(iolhead, sizeof (piol_t), offsetof(piol_t, piol_link)); ASSERT(list_head(iolhead) == NULL); ASSERT(itemsize < MAPSIZE - sizeof (*iol)); ASSERT(initial_size > 0); /* * Someone creating chained copyout buffers may ask for less than * MAPSIZE if the amount of data to be buffered is known to be * smaller than that. * But in order to prevent involuntary self-denial of service, * the requested input size is clamped at MAPSIZE. */ initial_size = MIN(MAPSIZE, initial_size + sizeof (*iol)); iol = kmem_alloc(initial_size, KM_SLEEP); list_insert_head(iolhead, iol); iol->piol_usedsize = 0; iol->piol_size = initial_size; } void * pr_iol_newbuf(list_t *iolhead, size_t itemsize) { piol_t *iol; char *new; ASSERT(itemsize < MAPSIZE - sizeof (*iol)); ASSERT(list_head(iolhead) != NULL); iol = (piol_t *)list_tail(iolhead); if (iol->piol_size < iol->piol_usedsize + sizeof (*iol) + itemsize) { /* * Out of space in the current buffer. Allocate more. */ piol_t *newiol; newiol = kmem_alloc(MAPSIZE, KM_SLEEP); newiol->piol_size = MAPSIZE; newiol->piol_usedsize = 0; list_insert_after(iolhead, iol, newiol); iol = list_next(iolhead, iol); ASSERT(iol == newiol); } new = (char *)PIOL_DATABUF(iol) + iol->piol_usedsize; iol->piol_usedsize += itemsize; bzero(new, itemsize); return (new); } void pr_iol_freelist(list_t *iolhead) { piol_t *iol; while ((iol = list_head(iolhead)) != NULL) { list_remove(iolhead, iol); kmem_free(iol, iol->piol_size); } list_destroy(iolhead); } int pr_iol_copyout_and_free(list_t *iolhead, caddr_t *tgt, int errin) { int error = errin; piol_t *iol; while ((iol = list_head(iolhead)) != NULL) { list_remove(iolhead, iol); if (!error) { if (copyout(PIOL_DATABUF(iol), *tgt, iol->piol_usedsize)) error = EFAULT; *tgt += iol->piol_usedsize; } kmem_free(iol, iol->piol_size); } list_destroy(iolhead); return (error); } int pr_iol_uiomove_and_free(list_t *iolhead, uio_t *uiop, int errin) { offset_t off = uiop->uio_offset; char *base; size_t size; piol_t *iol; int error = errin; while ((iol = list_head(iolhead)) != NULL) { list_remove(iolhead, iol); base = PIOL_DATABUF(iol); size = iol->piol_usedsize; if (off <= size && error == 0 && uiop->uio_resid > 0) error = uiomove(base + off, size - off, UIO_READ, uiop); off = MAX(0, off - (offset_t)size); kmem_free(iol, iol->piol_size); } list_destroy(iolhead); return (error); } /* * Return an array of structures with memory map information. * We allocate here; the caller must deallocate. */ int prgetmap(proc_t *p, int reserved, list_t *iolhead) { struct as *as = p->p_as; prmap_t *mp; struct seg *seg; struct seg *brkseg, *stkseg; struct vnode *vp; struct vattr vattr; uint_t prot; ASSERT(as != &kas && AS_WRITE_HELD(as)); /* * Request an initial buffer size that doesn't waste memory * if the address space has only a small number of segments. */ pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree)); if ((seg = AS_SEGFIRST(as)) == NULL) return (0); brkseg = break_seg(p); stkseg = as_segat(as, prgetstackbase(p)); do { caddr_t eaddr = seg->s_base + pr_getsegsize(seg, reserved); caddr_t saddr, naddr; void *tmp = NULL; if ((seg->s_flags & S_HOLE) != 0) { continue; } for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) { prot = pr_getprot(seg, reserved, &tmp, &saddr, &naddr, eaddr); if (saddr == naddr) continue; mp = pr_iol_newbuf(iolhead, sizeof (*mp)); mp->pr_vaddr = (uintptr_t)saddr; mp->pr_size = naddr - saddr; mp->pr_offset = SEGOP_GETOFFSET(seg, saddr); mp->pr_mflags = 0; if (prot & PROT_READ) mp->pr_mflags |= MA_READ; if (prot & PROT_WRITE) mp->pr_mflags |= MA_WRITE; if (prot & PROT_EXEC) mp->pr_mflags |= MA_EXEC; if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED) mp->pr_mflags |= MA_SHARED; if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE) mp->pr_mflags |= MA_NORESERVE; if (seg->s_ops == &segspt_shmops || (seg->s_ops == &segvn_ops && (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL))) mp->pr_mflags |= MA_ANON; if (seg == brkseg) mp->pr_mflags |= MA_BREAK; else if (seg == stkseg) { mp->pr_mflags |= MA_STACK; if (reserved) { size_t maxstack = ((size_t)p->p_stk_ctl + PAGEOFFSET) & PAGEMASK; mp->pr_vaddr = (uintptr_t)prgetstackbase(p) + p->p_stksize - maxstack; mp->pr_size = (uintptr_t)naddr - mp->pr_vaddr; } } if (seg->s_ops == &segspt_shmops) mp->pr_mflags |= MA_ISM | MA_SHM; mp->pr_pagesize = PAGESIZE; /* * Manufacture a filename for the "object" directory. */ vattr.va_mask = AT_FSID|AT_NODEID; if (seg->s_ops == &segvn_ops && SEGOP_GETVP(seg, saddr, &vp) == 0 && vp != NULL && vp->v_type == VREG && VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) { if (vp == p->p_exec) (void) strcpy(mp->pr_mapname, "a.out"); else pr_object_name(mp->pr_mapname, vp, &vattr); } /* * Get the SysV shared memory id, if any. */ if ((mp->pr_mflags & MA_SHARED) && p->p_segacct && (mp->pr_shmid = shmgetid(p, seg->s_base)) != SHMID_NONE) { if (mp->pr_shmid == SHMID_FREE) mp->pr_shmid = -1; mp->pr_mflags |= MA_SHM; } else { mp->pr_shmid = -1; } } ASSERT(tmp == NULL); } while ((seg = AS_SEGNEXT(as, seg)) != NULL); return (0); } #ifdef _SYSCALL32_IMPL int prgetmap32(proc_t *p, int reserved, list_t *iolhead) { struct as *as = p->p_as; prmap32_t *mp; struct seg *seg; struct seg *brkseg, *stkseg; struct vnode *vp; struct vattr vattr; uint_t prot; ASSERT(as != &kas && AS_WRITE_HELD(as)); /* * Request an initial buffer size that doesn't waste memory * if the address space has only a small number of segments. */ pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree)); if ((seg = AS_SEGFIRST(as)) == NULL) return (0); brkseg = break_seg(p); stkseg = as_segat(as, prgetstackbase(p)); do { caddr_t eaddr = seg->s_base + pr_getsegsize(seg, reserved); caddr_t saddr, naddr; void *tmp = NULL; if ((seg->s_flags & S_HOLE) != 0) { continue; } for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) { prot = pr_getprot(seg, reserved, &tmp, &saddr, &naddr, eaddr); if (saddr == naddr) continue; mp = pr_iol_newbuf(iolhead, sizeof (*mp)); mp->pr_vaddr = (caddr32_t)(uintptr_t)saddr; mp->pr_size = (size32_t)(naddr - saddr); mp->pr_offset = SEGOP_GETOFFSET(seg, saddr); mp->pr_mflags = 0; if (prot & PROT_READ) mp->pr_mflags |= MA_READ; if (prot & PROT_WRITE) mp->pr_mflags |= MA_WRITE; if (prot & PROT_EXEC) mp->pr_mflags |= MA_EXEC; if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED) mp->pr_mflags |= MA_SHARED; if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE) mp->pr_mflags |= MA_NORESERVE; if (seg->s_ops == &segspt_shmops || (seg->s_ops == &segvn_ops && (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL))) mp->pr_mflags |= MA_ANON; if (seg == brkseg) mp->pr_mflags |= MA_BREAK; else if (seg == stkseg) { mp->pr_mflags |= MA_STACK; if (reserved) { size_t maxstack = ((size_t)p->p_stk_ctl + PAGEOFFSET) & PAGEMASK; uintptr_t vaddr = (uintptr_t)prgetstackbase(p) + p->p_stksize - maxstack; mp->pr_vaddr = (caddr32_t)vaddr; mp->pr_size = (size32_t) ((uintptr_t)naddr - vaddr); } } if (seg->s_ops == &segspt_shmops) mp->pr_mflags |= MA_ISM | MA_SHM; mp->pr_pagesize = PAGESIZE; /* * Manufacture a filename for the "object" directory. */ vattr.va_mask = AT_FSID|AT_NODEID; if (seg->s_ops == &segvn_ops && SEGOP_GETVP(seg, saddr, &vp) == 0 && vp != NULL && vp->v_type == VREG && VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) { if (vp == p->p_exec) (void) strcpy(mp->pr_mapname, "a.out"); else pr_object_name(mp->pr_mapname, vp, &vattr); } /* * Get the SysV shared memory id, if any. */ if ((mp->pr_mflags & MA_SHARED) && p->p_segacct && (mp->pr_shmid = shmgetid(p, seg->s_base)) != SHMID_NONE) { if (mp->pr_shmid == SHMID_FREE) mp->pr_shmid = -1; mp->pr_mflags |= MA_SHM; } else { mp->pr_shmid = -1; } } ASSERT(tmp == NULL); } while ((seg = AS_SEGNEXT(as, seg)) != NULL); return (0); } #endif /* _SYSCALL32_IMPL */ /* * Return the size of the /proc page data file. */ size_t prpdsize(struct as *as) { struct seg *seg; size_t size; ASSERT(as != &kas && AS_WRITE_HELD(as)); if ((seg = AS_SEGFIRST(as)) == NULL) return (0); size = sizeof (prpageheader_t); do { caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0); caddr_t saddr, naddr; void *tmp = NULL; size_t npage; if ((seg->s_flags & S_HOLE) != 0) { continue; } for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) { (void) pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr); if ((npage = (naddr - saddr) / PAGESIZE) != 0) size += sizeof (prasmap_t) + round8(npage); } ASSERT(tmp == NULL); } while ((seg = AS_SEGNEXT(as, seg)) != NULL); return (size); } #ifdef _SYSCALL32_IMPL size_t prpdsize32(struct as *as) { struct seg *seg; size_t size; ASSERT(as != &kas && AS_WRITE_HELD(as)); if ((seg = AS_SEGFIRST(as)) == NULL) return (0); size = sizeof (prpageheader32_t); do { caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0); caddr_t saddr, naddr; void *tmp = NULL; size_t npage; if ((seg->s_flags & S_HOLE) != 0) { continue; } for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) { (void) pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr); if ((npage = (naddr - saddr) / PAGESIZE) != 0) size += sizeof (prasmap32_t) + round8(npage); } ASSERT(tmp == NULL); } while ((seg = AS_SEGNEXT(as, seg)) != NULL); return (size); } #endif /* _SYSCALL32_IMPL */ /* * Read page data information. */ int prpdread(proc_t *p, uint_t hatid, struct uio *uiop) { struct as *as = p->p_as; caddr_t buf; size_t size; prpageheader_t *php; prasmap_t *pmp; struct seg *seg; int error; again: AS_LOCK_ENTER(as, RW_WRITER); if ((seg = AS_SEGFIRST(as)) == NULL) { AS_LOCK_EXIT(as); return (0); } size = prpdsize(as); if (uiop->uio_resid < size) { AS_LOCK_EXIT(as); return (E2BIG); } buf = kmem_zalloc(size, KM_SLEEP); php = (prpageheader_t *)buf; pmp = (prasmap_t *)(buf + sizeof (prpageheader_t)); hrt2ts(gethrtime(), &php->pr_tstamp); php->pr_nmap = 0; php->pr_npage = 0; do { caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0); caddr_t saddr, naddr; void *tmp = NULL; if ((seg->s_flags & S_HOLE) != 0) { continue; } for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) { struct vnode *vp; struct vattr vattr; size_t len; size_t npage; uint_t prot; uintptr_t next; prot = pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr); if ((len = (size_t)(naddr - saddr)) == 0) continue; npage = len / PAGESIZE; next = (uintptr_t)(pmp + 1) + round8(npage); /* * It's possible that the address space can change * subtlely even though we're holding as->a_lock * due to the nondeterminism of page_exists() in * the presence of asychronously flushed pages or * mapped files whose sizes are changing. * page_exists() may be called indirectly from * pr_getprot() by a SEGOP_INCORE() routine. * If this happens we need to make sure we don't * overrun the buffer whose size we computed based * on the initial iteration through the segments. * Once we've detected an overflow, we need to clean * up the temporary memory allocated in pr_getprot() * and retry. If there's a pending signal, we return * EINTR so that this thread can be dislodged if * a latent bug causes us to spin indefinitely. */ if (next > (uintptr_t)buf + size) { pr_getprot_done(&tmp); AS_LOCK_EXIT(as); kmem_free(buf, size); if (ISSIG(curthread, JUSTLOOKING)) return (EINTR); goto again; } php->pr_nmap++; php->pr_npage += npage; pmp->pr_vaddr = (uintptr_t)saddr; pmp->pr_npage = npage; pmp->pr_offset = SEGOP_GETOFFSET(seg, saddr); pmp->pr_mflags = 0; if (prot & PROT_READ) pmp->pr_mflags |= MA_READ; if (prot & PROT_WRITE) pmp->pr_mflags |= MA_WRITE; if (prot & PROT_EXEC) pmp->pr_mflags |= MA_EXEC; if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED) pmp->pr_mflags |= MA_SHARED; if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE) pmp->pr_mflags |= MA_NORESERVE; if (seg->s_ops == &segspt_shmops || (seg->s_ops == &segvn_ops && (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL))) pmp->pr_mflags |= MA_ANON; if (seg->s_ops == &segspt_shmops) pmp->pr_mflags |= MA_ISM | MA_SHM; pmp->pr_pagesize = PAGESIZE; /* * Manufacture a filename for the "object" directory. */ vattr.va_mask = AT_FSID|AT_NODEID; if (seg->s_ops == &segvn_ops && SEGOP_GETVP(seg, saddr, &vp) == 0 && vp != NULL && vp->v_type == VREG && VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) { if (vp == p->p_exec) (void) strcpy(pmp->pr_mapname, "a.out"); else pr_object_name(pmp->pr_mapname, vp, &vattr); } /* * Get the SysV shared memory id, if any. */ if ((pmp->pr_mflags & MA_SHARED) && p->p_segacct && (pmp->pr_shmid = shmgetid(p, seg->s_base)) != SHMID_NONE) { if (pmp->pr_shmid == SHMID_FREE) pmp->pr_shmid = -1; pmp->pr_mflags |= MA_SHM; } else { pmp->pr_shmid = -1; } hat_getstat(as, saddr, len, hatid, (char *)(pmp + 1), HAT_SYNC_ZERORM); pmp = (prasmap_t *)next; } ASSERT(tmp == NULL); } while ((seg = AS_SEGNEXT(as, seg)) != NULL); AS_LOCK_EXIT(as); ASSERT((uintptr_t)pmp <= (uintptr_t)buf + size); error = uiomove(buf, (caddr_t)pmp - buf, UIO_READ, uiop); kmem_free(buf, size); return (error); } #ifdef _SYSCALL32_IMPL int prpdread32(proc_t *p, uint_t hatid, struct uio *uiop) { struct as *as = p->p_as; caddr_t buf; size_t size; prpageheader32_t *php; prasmap32_t *pmp; struct seg *seg; int error; again: AS_LOCK_ENTER(as, RW_WRITER); if ((seg = AS_SEGFIRST(as)) == NULL) { AS_LOCK_EXIT(as); return (0); } size = prpdsize32(as); if (uiop->uio_resid < size) { AS_LOCK_EXIT(as); return (E2BIG); } buf = kmem_zalloc(size, KM_SLEEP); php = (prpageheader32_t *)buf; pmp = (prasmap32_t *)(buf + sizeof (prpageheader32_t)); hrt2ts32(gethrtime(), &php->pr_tstamp); php->pr_nmap = 0; php->pr_npage = 0; do { caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0); caddr_t saddr, naddr; void *tmp = NULL; if ((seg->s_flags & S_HOLE) != 0) { continue; } for (saddr = seg->s_base; saddr < eaddr; saddr = naddr) { struct vnode *vp; struct vattr vattr; size_t len; size_t npage; uint_t prot; uintptr_t next; prot = pr_getprot(seg, 0, &tmp, &saddr, &naddr, eaddr); if ((len = (size_t)(naddr - saddr)) == 0) continue; npage = len / PAGESIZE; next = (uintptr_t)(pmp + 1) + round8(npage); /* * It's possible that the address space can change * subtlely even though we're holding as->a_lock * due to the nondeterminism of page_exists() in * the presence of asychronously flushed pages or * mapped files whose sizes are changing. * page_exists() may be called indirectly from * pr_getprot() by a SEGOP_INCORE() routine. * If this happens we need to make sure we don't * overrun the buffer whose size we computed based * on the initial iteration through the segments. * Once we've detected an overflow, we need to clean * up the temporary memory allocated in pr_getprot() * and retry. If there's a pending signal, we return * EINTR so that this thread can be dislodged if * a latent bug causes us to spin indefinitely. */ if (next > (uintptr_t)buf + size) { pr_getprot_done(&tmp); AS_LOCK_EXIT(as); kmem_free(buf, size); if (ISSIG(curthread, JUSTLOOKING)) return (EINTR); goto again; } php->pr_nmap++; php->pr_npage += npage; pmp->pr_vaddr = (caddr32_t)(uintptr_t)saddr; pmp->pr_npage = (size32_t)npage; pmp->pr_offset = SEGOP_GETOFFSET(seg, saddr); pmp->pr_mflags = 0; if (prot & PROT_READ) pmp->pr_mflags |= MA_READ; if (prot & PROT_WRITE) pmp->pr_mflags |= MA_WRITE; if (prot & PROT_EXEC) pmp->pr_mflags |= MA_EXEC; if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED) pmp->pr_mflags |= MA_SHARED; if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE) pmp->pr_mflags |= MA_NORESERVE; if (seg->s_ops == &segspt_shmops || (seg->s_ops == &segvn_ops && (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL))) pmp->pr_mflags |= MA_ANON; if (seg->s_ops == &segspt_shmops) pmp->pr_mflags |= MA_ISM | MA_SHM; pmp->pr_pagesize = PAGESIZE; /* * Manufacture a filename for the "object" directory. */ vattr.va_mask = AT_FSID|AT_NODEID; if (seg->s_ops == &segvn_ops && SEGOP_GETVP(seg, saddr, &vp) == 0 && vp != NULL && vp->v_type == VREG && VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) { if (vp == p->p_exec) (void) strcpy(pmp->pr_mapname, "a.out"); else pr_object_name(pmp->pr_mapname, vp, &vattr); } /* * Get the SysV shared memory id, if any. */ if ((pmp->pr_mflags & MA_SHARED) && p->p_segacct && (pmp->pr_shmid = shmgetid(p, seg->s_base)) != SHMID_NONE) { if (pmp->pr_shmid == SHMID_FREE) pmp->pr_shmid = -1; pmp->pr_mflags |= MA_SHM; } else { pmp->pr_shmid = -1; } hat_getstat(as, saddr, len, hatid, (char *)(pmp + 1), HAT_SYNC_ZERORM); pmp = (prasmap32_t *)next; } ASSERT(tmp == NULL); } while ((seg = AS_SEGNEXT(as, seg)) != NULL); AS_LOCK_EXIT(as); ASSERT((uintptr_t)pmp <= (uintptr_t)buf + size); error = uiomove(buf, (caddr_t)pmp - buf, UIO_READ, uiop); kmem_free(buf, size); return (error); } #endif /* _SYSCALL32_IMPL */ ushort_t prgetpctcpu(uint64_t pct) { /* * The value returned will be relevant in the zone of the examiner, * which may not be the same as the zone which performed the procfs * mount. */ int nonline = zone_ncpus_online_get(curproc->p_zone); /* * Prorate over online cpus so we don't exceed 100% */ if (nonline > 1) pct /= nonline; pct >>= 16; /* convert to 16-bit scaled integer */ if (pct > 0x8000) /* might happen, due to rounding */ pct = 0x8000; return ((ushort_t)pct); } /* * Return information used by ps(1). */ void prgetpsinfo(proc_t *p, psinfo_t *psp) { kthread_t *t; struct cred *cred; hrtime_t hrutime, hrstime; ASSERT(MUTEX_HELD(&p->p_lock)); if ((t = prchoose(p)) == NULL) /* returns locked thread */ bzero(psp, sizeof (*psp)); else { thread_unlock(t); bzero(psp, sizeof (*psp) - sizeof (psp->pr_lwp)); } /* * only export SSYS and SMSACCT; everything else is off-limits to * userland apps. */ psp->pr_flag = p->p_flag & (SSYS | SMSACCT); psp->pr_nlwp = p->p_lwpcnt; psp->pr_nzomb = p->p_zombcnt; mutex_enter(&p->p_crlock); cred = p->p_cred; psp->pr_uid = crgetruid(cred); psp->pr_euid = crgetuid(cred); psp->pr_gid = crgetrgid(cred); psp->pr_egid = crgetgid(cred); mutex_exit(&p->p_crlock); psp->pr_pid = p->p_pid; if (curproc->p_zone->zone_id != GLOBAL_ZONEID && (p->p_flag & SZONETOP)) { ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID); /* * Inside local zones, fake zsched's pid as parent pids for * processes which reference processes outside of the zone. */ psp->pr_ppid = curproc->p_zone->zone_zsched->p_pid; } else { psp->pr_ppid = p->p_ppid; } psp->pr_pgid = p->p_pgrp; psp->pr_sid = p->p_sessp->s_sid; psp->pr_taskid = p->p_task->tk_tkid; psp->pr_projid = p->p_task->tk_proj->kpj_id; psp->pr_poolid = p->p_pool->pool_id; psp->pr_zoneid = p->p_zone->zone_id; if ((psp->pr_contract = PRCTID(p)) == 0) psp->pr_contract = -1; psp->pr_addr = (uintptr_t)prgetpsaddr(p); switch (p->p_model) { case DATAMODEL_ILP32: psp->pr_dmodel = PR_MODEL_ILP32; break; case DATAMODEL_LP64: psp->pr_dmodel = PR_MODEL_LP64; break; } hrutime = mstate_aggr_state(p, LMS_USER); hrstime = mstate_aggr_state(p, LMS_SYSTEM); hrt2ts((hrutime + hrstime), &psp->pr_time); TICK_TO_TIMESTRUC(p->p_cutime + p->p_cstime, &psp->pr_ctime); if (t == NULL) { int wcode = p->p_wcode; /* must be atomic read */ if (wcode) psp->pr_wstat = wstat(wcode, p->p_wdata); psp->pr_ttydev = PRNODEV; psp->pr_lwp.pr_state = SZOMB; psp->pr_lwp.pr_sname = 'Z'; psp->pr_lwp.pr_bindpro = PBIND_NONE; psp->pr_lwp.pr_bindpset = PS_NONE; } else { user_t *up = PTOU(p); struct as *as; dev_t d; extern dev_t rwsconsdev, rconsdev, uconsdev; d = cttydev(p); /* * If the controlling terminal is the real * or workstation console device, map to what the * user thinks is the console device. Handle case when * rwsconsdev or rconsdev is set to NODEV for Starfire. */ if ((d == rwsconsdev || d == rconsdev) && d != NODEV) d = uconsdev; psp->pr_ttydev = (d == NODEV) ? PRNODEV : d; psp->pr_start = up->u_start; bcopy(up->u_comm, psp->pr_fname, MIN(sizeof (up->u_comm), sizeof (psp->pr_fname)-1)); bcopy(up->u_psargs, psp->pr_psargs, MIN(PRARGSZ-1, PSARGSZ)); psp->pr_argc = up->u_argc; psp->pr_argv = up->u_argv; psp->pr_envp = up->u_envp; /* get the chosen lwp's lwpsinfo */ prgetlwpsinfo(t, &psp->pr_lwp); /* compute %cpu for the process */ if (p->p_lwpcnt == 1) psp->pr_pctcpu = psp->pr_lwp.pr_pctcpu; else { uint64_t pct = 0; hrtime_t cur_time = gethrtime_unscaled(); t = p->p_tlist; do { pct += cpu_update_pct(t, cur_time); } while ((t = t->t_forw) != p->p_tlist); psp->pr_pctcpu = prgetpctcpu(pct); } if ((p->p_flag & SSYS) || (as = p->p_as) == &kas) { psp->pr_size = 0; psp->pr_rssize = 0; } else { mutex_exit(&p->p_lock); AS_LOCK_ENTER(as, RW_READER); psp->pr_size = btopr(as->a_resvsize) * (PAGESIZE / 1024); psp->pr_rssize = rm_asrss(as) * (PAGESIZE / 1024); psp->pr_pctmem = rm_pctmemory(as); AS_LOCK_EXIT(as); mutex_enter(&p->p_lock); } } } static size_t prfdinfomisc(list_t *data, uint_t type, const void *val, size_t vlen) { pr_misc_header_t *misc; size_t len; len = PRFDINFO_ROUNDUP(sizeof (*misc) + vlen); if (data != NULL) { misc = pr_iol_newbuf(data, len); misc->pr_misc_type = type; misc->pr_misc_size = len; misc++; bcopy((char *)val, (char *)misc, vlen); } return (len); } /* * There's no elegant way to determine if a character device * supports TLI, so just check a hardcoded list of known TLI * devices. */ static boolean_t pristli(vnode_t *vp) { static const char *tlidevs[] = { "udp", "udp6", "tcp", "tcp6" }; char *devname; uint_t i; ASSERT(vp != NULL); if (vp->v_type != VCHR || vp->v_stream == NULL || vp->v_rdev == 0) return (B_FALSE); if ((devname = mod_major_to_name(getmajor(vp->v_rdev))) == NULL) return (B_FALSE); for (i = 0; i < ARRAY_SIZE(tlidevs); i++) { if (strcmp(devname, tlidevs[i]) == 0) return (B_TRUE); } return (B_FALSE); } static size_t prfdinfopath(proc_t *p, vnode_t *vp, list_t *data, cred_t *cred) { char *pathname; size_t pathlen; size_t sz = 0; /* * The global zone's path to a file in a non-global zone can exceed * MAXPATHLEN. */ pathlen = MAXPATHLEN * 2 + 1; pathname = kmem_alloc(pathlen, KM_SLEEP); if (vnodetopath(NULL, vp, pathname, pathlen, cred) == 0) { sz += prfdinfomisc(data, PR_PATHNAME, pathname, strlen(pathname) + 1); } kmem_free(pathname, pathlen); return (sz); } static size_t prfdinfotlisockopt(vnode_t *vp, list_t *data, cred_t *cred) { strcmd_t strcmd; int32_t rval; size_t sz = 0; strcmd.sc_cmd = TI_GETMYNAME; strcmd.sc_timeout = 1; strcmd.sc_len = STRCMDBUFSIZE; if (VOP_IOCTL(vp, _I_CMD, (intptr_t)&strcmd, FKIOCTL, cred, &rval, NULL) == 0 && strcmd.sc_len > 0) { sz += prfdinfomisc(data, PR_SOCKETNAME, strcmd.sc_buf, strcmd.sc_len); } strcmd.sc_cmd = TI_GETPEERNAME; strcmd.sc_timeout = 1; strcmd.sc_len = STRCMDBUFSIZE; if (VOP_IOCTL(vp, _I_CMD, (intptr_t)&strcmd, FKIOCTL, cred, &rval, NULL) == 0 && strcmd.sc_len > 0) { sz += prfdinfomisc(data, PR_PEERSOCKNAME, strcmd.sc_buf, strcmd.sc_len); } return (sz); } static size_t prfdinfosockopt(vnode_t *vp, list_t *data, cred_t *cred) { sonode_t *so; socklen_t vlen; size_t sz = 0; uint_t i; if (vp->v_stream != NULL) { so = VTOSO(vp->v_stream->sd_vnode); if (so->so_version == SOV_STREAM) so = NULL; } else { so = VTOSO(vp); } if (so == NULL) return (0); DTRACE_PROBE1(sonode, sonode_t *, so); /* prmisc - PR_SOCKETNAME */ struct sockaddr_storage buf; struct sockaddr *name = (struct sockaddr *)&buf; vlen = sizeof (buf); if (SOP_GETSOCKNAME(so, name, &vlen, cred) == 0 && vlen > 0) sz += prfdinfomisc(data, PR_SOCKETNAME, name, vlen); /* prmisc - PR_PEERSOCKNAME */ vlen = sizeof (buf); if (SOP_GETPEERNAME(so, name, &vlen, B_FALSE, cred) == 0 && vlen > 0) sz += prfdinfomisc(data, PR_PEERSOCKNAME, name, vlen); /* prmisc - PR_SOCKOPTS_BOOL_OPTS */ static struct boolopt { int level; int opt; int bopt; } boolopts[] = { { SOL_SOCKET, SO_DEBUG, PR_SO_DEBUG }, { SOL_SOCKET, SO_REUSEADDR, PR_SO_REUSEADDR }, #ifdef SO_REUSEPORT /* SmartOS and OmniOS have SO_REUSEPORT */ { SOL_SOCKET, SO_REUSEPORT, PR_SO_REUSEPORT }, #endif { SOL_SOCKET, SO_KEEPALIVE, PR_SO_KEEPALIVE }, { SOL_SOCKET, SO_DONTROUTE, PR_SO_DONTROUTE }, { SOL_SOCKET, SO_BROADCAST, PR_SO_BROADCAST }, { SOL_SOCKET, SO_OOBINLINE, PR_SO_OOBINLINE }, { SOL_SOCKET, SO_DGRAM_ERRIND, PR_SO_DGRAM_ERRIND }, { SOL_SOCKET, SO_ALLZONES, PR_SO_ALLZONES }, { SOL_SOCKET, SO_MAC_EXEMPT, PR_SO_MAC_EXEMPT }, { SOL_SOCKET, SO_MAC_IMPLICIT, PR_SO_MAC_IMPLICIT }, { SOL_SOCKET, SO_EXCLBIND, PR_SO_EXCLBIND }, { SOL_SOCKET, SO_VRRP, PR_SO_VRRP }, { IPPROTO_UDP, UDP_NAT_T_ENDPOINT, PR_UDP_NAT_T_ENDPOINT } }; prsockopts_bool_opts_t opts; int val; if (data != NULL) { opts.prsock_bool_opts = 0; for (i = 0; i < ARRAY_SIZE(boolopts); i++) { vlen = sizeof (val); if (SOP_GETSOCKOPT(so, boolopts[i].level, boolopts[i].opt, &val, &vlen, 0, cred) == 0 && val != 0) { opts.prsock_bool_opts |= boolopts[i].bopt; } } } sz += prfdinfomisc(data, PR_SOCKOPTS_BOOL_OPTS, &opts, sizeof (opts)); /* prmisc - PR_SOCKOPT_LINGER */ struct linger l; vlen = sizeof (l); if (SOP_GETSOCKOPT(so, SOL_SOCKET, SO_LINGER, &l, &vlen, 0, cred) == 0 && vlen > 0) { sz += prfdinfomisc(data, PR_SOCKOPT_LINGER, &l, vlen); } /* prmisc - PR_SOCKOPT_* int types */ static struct sopt { int level; int opt; int bopt; } sopts[] = { { SOL_SOCKET, SO_TYPE, PR_SOCKOPT_TYPE }, { SOL_SOCKET, SO_SNDBUF, PR_SOCKOPT_SNDBUF }, { SOL_SOCKET, SO_RCVBUF, PR_SOCKOPT_RCVBUF } }; for (i = 0; i < ARRAY_SIZE(sopts); i++) { vlen = sizeof (val); if (SOP_GETSOCKOPT(so, sopts[i].level, sopts[i].opt, &val, &vlen, 0, cred) == 0 && vlen > 0) { sz += prfdinfomisc(data, sopts[i].bopt, &val, vlen); } } /* prmisc - PR_SOCKOPT_IP_NEXTHOP */ in_addr_t nexthop_val; vlen = sizeof (nexthop_val); if (SOP_GETSOCKOPT(so, IPPROTO_IP, IP_NEXTHOP, &nexthop_val, &vlen, 0, cred) == 0 && vlen > 0) { sz += prfdinfomisc(data, PR_SOCKOPT_IP_NEXTHOP, &nexthop_val, vlen); } /* prmisc - PR_SOCKOPT_IPV6_NEXTHOP */ struct sockaddr_in6 nexthop6_val; vlen = sizeof (nexthop6_val); if (SOP_GETSOCKOPT(so, IPPROTO_IPV6, IPV6_NEXTHOP, &nexthop6_val, &vlen, 0, cred) == 0 && vlen > 0) { sz += prfdinfomisc(data, PR_SOCKOPT_IPV6_NEXTHOP, &nexthop6_val, vlen); } /* prmisc - PR_SOCKOPT_TCP_CONGESTION */ char cong[CC_ALGO_NAME_MAX]; vlen = sizeof (cong); if (SOP_GETSOCKOPT(so, IPPROTO_TCP, TCP_CONGESTION, &cong, &vlen, 0, cred) == 0 && vlen > 0) { sz += prfdinfomisc(data, PR_SOCKOPT_TCP_CONGESTION, cong, vlen); } /* prmisc - PR_SOCKFILTERS_PRIV */ struct fil_info fi; vlen = sizeof (fi); if (SOP_GETSOCKOPT(so, SOL_FILTER, FIL_LIST, &fi, &vlen, 0, cred) == 0 && vlen != 0) { pr_misc_header_t *misc; size_t len; /* * We limit the number of returned filters to 32. * This is the maximum number that pfiles will print * anyway. */ vlen = MIN(32, fi.fi_pos + 1); vlen *= sizeof (fi); len = PRFDINFO_ROUNDUP(sizeof (*misc) + vlen); sz += len; if (data != NULL) { /* * So that the filter list can be built incrementally, * prfdinfomisc() is not used here. Instead we * allocate a buffer directly on the copyout list using * pr_iol_newbuf() */ misc = pr_iol_newbuf(data, len); misc->pr_misc_type = PR_SOCKFILTERS_PRIV; misc->pr_misc_size = len; misc++; len = vlen; if (SOP_GETSOCKOPT(so, SOL_FILTER, FIL_LIST, misc, &vlen, 0, cred) == 0) { /* * In case the number of filters has reduced * since the first call, explicitly zero out * any unpopulated space. */ if (vlen < len) bzero(misc + vlen, len - vlen); } else { /* Something went wrong, zero out the result */ bzero(misc, vlen); } } } return (sz); } typedef struct prfdinfo_nm_path_cbdata { proc_t *nmp_p; u_offset_t nmp_sz; list_t *nmp_data; } prfdinfo_nm_path_cbdata_t; static int prfdinfo_nm_path(const struct namenode *np, cred_t *cred, void *arg) { prfdinfo_nm_path_cbdata_t *cb = arg; cb->nmp_sz += prfdinfopath(cb->nmp_p, np->nm_vnode, cb->nmp_data, cred); return (0); } u_offset_t prgetfdinfosize(proc_t *p, vnode_t *vp, cred_t *cred) { u_offset_t sz; /* * All fdinfo files will be at least this big - * sizeof fdinfo struct + zero length trailer */ sz = offsetof(prfdinfo_t, pr_misc) + sizeof (pr_misc_header_t); /* Pathname */ switch (vp->v_type) { case VDOOR: { prfdinfo_nm_path_cbdata_t cb = { .nmp_p = p, .nmp_data = NULL, .nmp_sz = 0 }; (void) nm_walk_mounts(vp, prfdinfo_nm_path, cred, &cb); sz += cb.nmp_sz; break; } case VSOCK: break; default: sz += prfdinfopath(p, vp, NULL, cred); } /* Socket options */ if (vp->v_type == VSOCK) sz += prfdinfosockopt(vp, NULL, cred); /* TLI/XTI sockets */ if (pristli(vp)) sz += prfdinfotlisockopt(vp, NULL, cred); return (sz); } int prgetfdinfo(proc_t *p, vnode_t *vp, prfdinfo_t *fdinfo, cred_t *cred, cred_t *file_cred, list_t *data) { vattr_t vattr; int error; /* * The buffer has been initialised to zero by pr_iol_newbuf(). * Initialise defaults for any values that should not default to zero. */ fdinfo->pr_uid = (uid_t)-1; fdinfo->pr_gid = (gid_t)-1; fdinfo->pr_size = -1; fdinfo->pr_locktype = F_UNLCK; fdinfo->pr_lockpid = -1; fdinfo->pr_locksysid = -1; fdinfo->pr_peerpid = -1; /* Offset */ /* * pr_offset has already been set from the underlying file_t. * Check if it is plausible and reset to -1 if not. */ if (fdinfo->pr_offset != -1 && VOP_SEEK(vp, 0, (offset_t *)&fdinfo->pr_offset, NULL) != 0) fdinfo->pr_offset = -1; /* * Attributes * * We have two cred_t structures available here. * 'cred' is the caller's credential, and 'file_cred' is the credential * for the file being inspected. * * When looking up the file attributes, file_cred is used in order * that the correct ownership is set for doors and FIFOs. Since the * caller has permission to read the fdinfo file in proc, this does * not expose any additional information. */ vattr.va_mask = AT_STAT; if (VOP_GETATTR(vp, &vattr, 0, file_cred, NULL) == 0) { fdinfo->pr_major = getmajor(vattr.va_fsid); fdinfo->pr_minor = getminor(vattr.va_fsid); fdinfo->pr_rmajor = getmajor(vattr.va_rdev); fdinfo->pr_rminor = getminor(vattr.va_rdev); fdinfo->pr_ino = (ino64_t)vattr.va_nodeid; fdinfo->pr_size = (off64_t)vattr.va_size; fdinfo->pr_mode = VTTOIF(vattr.va_type) | vattr.va_mode; fdinfo->pr_uid = vattr.va_uid; fdinfo->pr_gid = vattr.va_gid; if (vp->v_type == VSOCK) fdinfo->pr_fileflags |= sock_getfasync(vp); } /* locks */ flock64_t bf; bzero(&bf, sizeof (bf)); bf.l_type = F_WRLCK; if (VOP_FRLOCK(vp, F_GETLK, &bf, (uint16_t)(fdinfo->pr_fileflags & 0xffff), 0, NULL, cred, NULL) == 0 && bf.l_type != F_UNLCK) { fdinfo->pr_locktype = bf.l_type; fdinfo->pr_lockpid = bf.l_pid; fdinfo->pr_locksysid = bf.l_sysid; } /* peer cred */ k_peercred_t kpc; switch (vp->v_type) { case VFIFO: case VSOCK: { int32_t rval; error = VOP_IOCTL(vp, _I_GETPEERCRED, (intptr_t)&kpc, FKIOCTL, cred, &rval, NULL); break; } case VCHR: { struct strioctl strioc; int32_t rval; if (vp->v_stream == NULL) { error = ENOTSUP; break; } strioc.ic_cmd = _I_GETPEERCRED; strioc.ic_timout = INFTIM; strioc.ic_len = (int)sizeof (k_peercred_t); strioc.ic_dp = (char *)&kpc; error = strdoioctl(vp->v_stream, &strioc, FNATIVE | FKIOCTL, STR_NOSIG | K_TO_K, cred, &rval); break; } default: error = ENOTSUP; break; } if (error == 0 && kpc.pc_cr != NULL) { proc_t *peerp; fdinfo->pr_peerpid = kpc.pc_cpid; crfree(kpc.pc_cr); mutex_enter(&pidlock); if ((peerp = prfind(fdinfo->pr_peerpid)) != NULL) { user_t *up; mutex_enter(&peerp->p_lock); mutex_exit(&pidlock); up = PTOU(peerp); bcopy(up->u_comm, fdinfo->pr_peername, MIN(sizeof (up->u_comm), sizeof (fdinfo->pr_peername) - 1)); mutex_exit(&peerp->p_lock); } else { mutex_exit(&pidlock); } } /* pathname */ switch (vp->v_type) { case VDOOR: { prfdinfo_nm_path_cbdata_t cb = { .nmp_p = p, .nmp_data = data, .nmp_sz = 0 }; (void) nm_walk_mounts(vp, prfdinfo_nm_path, cred, &cb); break; } case VSOCK: /* * Don't attempt to determine the path for a socket as the * vnode has no associated v_path. It will cause a linear scan * of the dnlc table and result in no path being found. */ break; default: (void) prfdinfopath(p, vp, data, cred); } /* socket options */ if (vp->v_type == VSOCK) (void) prfdinfosockopt(vp, data, cred); /* TLI/XTI stream sockets */ if (pristli(vp)) (void) prfdinfotlisockopt(vp, data, cred); /* * Add a terminating header with a zero size. */ pr_misc_header_t *misc; misc = pr_iol_newbuf(data, sizeof (*misc)); misc->pr_misc_size = 0; misc->pr_misc_type = (uint_t)-1; return (0); } #ifdef _SYSCALL32_IMPL void prgetpsinfo32(proc_t *p, psinfo32_t *psp) { kthread_t *t; struct cred *cred; hrtime_t hrutime, hrstime; ASSERT(MUTEX_HELD(&p->p_lock)); if ((t = prchoose(p)) == NULL) /* returns locked thread */ bzero(psp, sizeof (*psp)); else { thread_unlock(t); bzero(psp, sizeof (*psp) - sizeof (psp->pr_lwp)); } /* * only export SSYS and SMSACCT; everything else is off-limits to * userland apps. */ psp->pr_flag = p->p_flag & (SSYS | SMSACCT); psp->pr_nlwp = p->p_lwpcnt; psp->pr_nzomb = p->p_zombcnt; mutex_enter(&p->p_crlock); cred = p->p_cred; psp->pr_uid = crgetruid(cred); psp->pr_euid = crgetuid(cred); psp->pr_gid = crgetrgid(cred); psp->pr_egid = crgetgid(cred); mutex_exit(&p->p_crlock); psp->pr_pid = p->p_pid; if (curproc->p_zone->zone_id != GLOBAL_ZONEID && (p->p_flag & SZONETOP)) { ASSERT(p->p_zone->zone_id != GLOBAL_ZONEID); /* * Inside local zones, fake zsched's pid as parent pids for * processes which reference processes outside of the zone. */ psp->pr_ppid = curproc->p_zone->zone_zsched->p_pid; } else { psp->pr_ppid = p->p_ppid; } psp->pr_pgid = p->p_pgrp; psp->pr_sid = p->p_sessp->s_sid; psp->pr_taskid = p->p_task->tk_tkid; psp->pr_projid = p->p_task->tk_proj->kpj_id; psp->pr_poolid = p->p_pool->pool_id; psp->pr_zoneid = p->p_zone->zone_id; if ((psp->pr_contract = PRCTID(p)) == 0) psp->pr_contract = -1; psp->pr_addr = 0; /* cannot represent 64-bit addr in 32 bits */ switch (p->p_model) { case DATAMODEL_ILP32: psp->pr_dmodel = PR_MODEL_ILP32; break; case DATAMODEL_LP64: psp->pr_dmodel = PR_MODEL_LP64; break; } hrutime = mstate_aggr_state(p, LMS_USER); hrstime = mstate_aggr_state(p, LMS_SYSTEM); hrt2ts32(hrutime + hrstime, &psp->pr_time); TICK_TO_TIMESTRUC32(p->p_cutime + p->p_cstime, &psp->pr_ctime); if (t == NULL) { extern int wstat(int, int); /* needs a header file */ int wcode = p->p_wcode; /* must be atomic read */ if (wcode) psp->pr_wstat = wstat(wcode, p->p_wdata); psp->pr_ttydev = PRNODEV32; psp->pr_lwp.pr_state = SZOMB; psp->pr_lwp.pr_sname = 'Z'; } else { user_t *up = PTOU(p); struct as *as; dev_t d; extern dev_t rwsconsdev, rconsdev, uconsdev; d = cttydev(p); /* * If the controlling terminal is the real * or workstation console device, map to what the * user thinks is the console device. Handle case when * rwsconsdev or rconsdev is set to NODEV for Starfire. */ if ((d == rwsconsdev || d == rconsdev) && d != NODEV) d = uconsdev; (void) cmpldev(&psp->pr_ttydev, d); TIMESPEC_TO_TIMESPEC32(&psp->pr_start, &up->u_start); bcopy(up->u_comm, psp->pr_fname, MIN(sizeof (up->u_comm), sizeof (psp->pr_fname)-1)); bcopy(up->u_psargs, psp->pr_psargs, MIN(PRARGSZ-1, PSARGSZ)); psp->pr_argc = up->u_argc; psp->pr_argv = (caddr32_t)up->u_argv; psp->pr_envp = (caddr32_t)up->u_envp; /* get the chosen lwp's lwpsinfo */ prgetlwpsinfo32(t, &psp->pr_lwp); /* compute %cpu for the process */ if (p->p_lwpcnt == 1) psp->pr_pctcpu = psp->pr_lwp.pr_pctcpu; else { uint64_t pct = 0; hrtime_t cur_time; t = p->p_tlist; cur_time = gethrtime_unscaled(); do { pct += cpu_update_pct(t, cur_time); } while ((t = t->t_forw) != p->p_tlist); psp->pr_pctcpu = prgetpctcpu(pct); } if ((p->p_flag & SSYS) || (as = p->p_as) == &kas) { psp->pr_size = 0; psp->pr_rssize = 0; } else { mutex_exit(&p->p_lock); AS_LOCK_ENTER(as, RW_READER); psp->pr_size = (size32_t) (btopr(as->a_resvsize) * (PAGESIZE / 1024)); psp->pr_rssize = (size32_t) (rm_asrss(as) * (PAGESIZE / 1024)); psp->pr_pctmem = rm_pctmemory(as); AS_LOCK_EXIT(as); mutex_enter(&p->p_lock); } } /* * If we are looking at an LP64 process, zero out * the fields that cannot be represented in ILP32. */ if (p->p_model != DATAMODEL_ILP32) { psp->pr_size = 0; psp->pr_rssize = 0; psp->pr_argv = 0; psp->pr_envp = 0; } } #endif /* _SYSCALL32_IMPL */ void prgetlwpsinfo(kthread_t *t, lwpsinfo_t *psp) { klwp_t *lwp = ttolwp(t); sobj_ops_t *sobj; char c, state; uint64_t pct; int retval, niceval; hrtime_t hrutime, hrstime; ASSERT(MUTEX_HELD(&ttoproc(t)->p_lock)); bzero(psp, sizeof (*psp)); psp->pr_flag = 0; /* lwpsinfo_t.pr_flag is deprecated */ psp->pr_lwpid = t->t_tid; psp->pr_addr = (uintptr_t)t; psp->pr_wchan = (uintptr_t)t->t_wchan; /* map the thread state enum into a process state enum */ state = VSTOPPED(t) ? TS_STOPPED : t->t_state; switch (state) { case TS_SLEEP: state = SSLEEP; c = 'S'; break; case TS_RUN: state = SRUN; c = 'R'; break; case TS_ONPROC: state = SONPROC; c = 'O'; break; case TS_ZOMB: state = SZOMB; c = 'Z'; break; case TS_STOPPED: state = SSTOP; c = 'T'; break; case TS_WAIT: state = SWAIT; c = 'W'; break; default: state = 0; c = '?'; break; } psp->pr_state = state; psp->pr_sname = c; if ((sobj = t->t_sobj_ops) != NULL) psp->pr_stype = SOBJ_TYPE(sobj); retval = CL_DONICE(t, NULL, 0, &niceval); if (retval == 0) { psp->pr_oldpri = v.v_maxsyspri - t->t_pri; psp->pr_nice = niceval + NZERO; } psp->pr_syscall = t->t_sysnum; psp->pr_pri = t->t_pri; psp->pr_start.tv_sec = t->t_start; psp->pr_start.tv_nsec = 0L; hrutime = lwp->lwp_mstate.ms_acct[LMS_USER]; scalehrtime(&hrutime); hrstime = lwp->lwp_mstate.ms_acct[LMS_SYSTEM] + lwp->lwp_mstate.ms_acct[LMS_TRAP]; scalehrtime(&hrstime); hrt2ts(hrutime + hrstime, &psp->pr_time); /* compute %cpu for the lwp */ pct = cpu_update_pct(t, gethrtime_unscaled()); psp->pr_pctcpu = prgetpctcpu(pct); psp->pr_cpu = (psp->pr_pctcpu*100 + 0x6000) >> 15; /* [0..99] */ if (psp->pr_cpu > 99) psp->pr_cpu = 99; (void) strncpy(psp->pr_clname, sclass[t->t_cid].cl_name, sizeof (psp->pr_clname) - 1); bzero(psp->pr_name, sizeof (psp->pr_name)); /* XXX ??? */ psp->pr_onpro = t->t_cpu->cpu_id; psp->pr_bindpro = t->t_bind_cpu; psp->pr_bindpset = t->t_bind_pset; psp->pr_lgrp = t->t_lpl->lpl_lgrpid; } #ifdef _SYSCALL32_IMPL void prgetlwpsinfo32(kthread_t *t, lwpsinfo32_t *psp) { klwp_t *lwp = ttolwp(t); sobj_ops_t *sobj; char c, state; uint64_t pct; int retval, niceval; hrtime_t hrutime, hrstime; ASSERT(MUTEX_HELD(&ttoproc(t)->p_lock)); bzero(psp, sizeof (*psp)); psp->pr_flag = 0; /* lwpsinfo_t.pr_flag is deprecated */ psp->pr_lwpid = t->t_tid; psp->pr_addr = 0; /* cannot represent 64-bit addr in 32 bits */ psp->pr_wchan = 0; /* cannot represent 64-bit addr in 32 bits */ /* map the thread state enum into a process state enum */ state = VSTOPPED(t) ? TS_STOPPED : t->t_state; switch (state) { case TS_SLEEP: state = SSLEEP; c = 'S'; break; case TS_RUN: state = SRUN; c = 'R'; break; case TS_ONPROC: state = SONPROC; c = 'O'; break; case TS_ZOMB: state = SZOMB; c = 'Z'; break; case TS_STOPPED: state = SSTOP; c = 'T'; break; case TS_WAIT: state = SWAIT; c = 'W'; break; default: state = 0; c = '?'; break; } psp->pr_state = state; psp->pr_sname = c; if ((sobj = t->t_sobj_ops) != NULL) psp->pr_stype = SOBJ_TYPE(sobj); retval = CL_DONICE(t, NULL, 0, &niceval); if (retval == 0) { psp->pr_oldpri = v.v_maxsyspri - t->t_pri; psp->pr_nice = niceval + NZERO; } else { psp->pr_oldpri = 0; psp->pr_nice = 0; } psp->pr_syscall = t->t_sysnum; psp->pr_pri = t->t_pri; psp->pr_start.tv_sec = (time32_t)t->t_start; psp->pr_start.tv_nsec = 0L; hrutime = lwp->lwp_mstate.ms_acct[LMS_USER]; scalehrtime(&hrutime); hrstime = lwp->lwp_mstate.ms_acct[LMS_SYSTEM] + lwp->lwp_mstate.ms_acct[LMS_TRAP]; scalehrtime(&hrstime); hrt2ts32(hrutime + hrstime, &psp->pr_time); /* compute %cpu for the lwp */ pct = cpu_update_pct(t, gethrtime_unscaled()); psp->pr_pctcpu = prgetpctcpu(pct); psp->pr_cpu = (psp->pr_pctcpu*100 + 0x6000) >> 15; /* [0..99] */ if (psp->pr_cpu > 99) psp->pr_cpu = 99; (void) strncpy(psp->pr_clname, sclass[t->t_cid].cl_name, sizeof (psp->pr_clname) - 1); bzero(psp->pr_name, sizeof (psp->pr_name)); /* XXX ??? */ psp->pr_onpro = t->t_cpu->cpu_id; psp->pr_bindpro = t->t_bind_cpu; psp->pr_bindpset = t->t_bind_pset; psp->pr_lgrp = t->t_lpl->lpl_lgrpid; } #endif /* _SYSCALL32_IMPL */ #ifdef _SYSCALL32_IMPL #define PR_COPY_FIELD(s, d, field) d->field = s->field #define PR_COPY_FIELD_ILP32(s, d, field) \ if (s->pr_dmodel == PR_MODEL_ILP32) { \ d->field = s->field; \ } #define PR_COPY_TIMESPEC(s, d, field) \ TIMESPEC_TO_TIMESPEC32(&d->field, &s->field); #define PR_COPY_BUF(s, d, field) \ bcopy(s->field, d->field, sizeof (d->field)); #define PR_IGNORE_FIELD(s, d, field) void lwpsinfo_kto32(const struct lwpsinfo *src, struct lwpsinfo32 *dest) { bzero(dest, sizeof (*dest)); PR_COPY_FIELD(src, dest, pr_flag); PR_COPY_FIELD(src, dest, pr_lwpid); PR_IGNORE_FIELD(src, dest, pr_addr); PR_IGNORE_FIELD(src, dest, pr_wchan); PR_COPY_FIELD(src, dest, pr_stype); PR_COPY_FIELD(src, dest, pr_state); PR_COPY_FIELD(src, dest, pr_sname); PR_COPY_FIELD(src, dest, pr_nice); PR_COPY_FIELD(src, dest, pr_syscall); PR_COPY_FIELD(src, dest, pr_oldpri); PR_COPY_FIELD(src, dest, pr_cpu); PR_COPY_FIELD(src, dest, pr_pri); PR_COPY_FIELD(src, dest, pr_pctcpu); PR_COPY_TIMESPEC(src, dest, pr_start); PR_COPY_BUF(src, dest, pr_clname); PR_COPY_BUF(src, dest, pr_name); PR_COPY_FIELD(src, dest, pr_onpro); PR_COPY_FIELD(src, dest, pr_bindpro); PR_COPY_FIELD(src, dest, pr_bindpset); PR_COPY_FIELD(src, dest, pr_lgrp); } void psinfo_kto32(const struct psinfo *src, struct psinfo32 *dest) { bzero(dest, sizeof (*dest)); PR_COPY_FIELD(src, dest, pr_flag); PR_COPY_FIELD(src, dest, pr_nlwp); PR_COPY_FIELD(src, dest, pr_pid); PR_COPY_FIELD(src, dest, pr_ppid); PR_COPY_FIELD(src, dest, pr_pgid); PR_COPY_FIELD(src, dest, pr_sid); PR_COPY_FIELD(src, dest, pr_uid); PR_COPY_FIELD(src, dest, pr_euid); PR_COPY_FIELD(src, dest, pr_gid); PR_COPY_FIELD(src, dest, pr_egid); PR_IGNORE_FIELD(src, dest, pr_addr); PR_COPY_FIELD_ILP32(src, dest, pr_size); PR_COPY_FIELD_ILP32(src, dest, pr_rssize); PR_COPY_FIELD(src, dest, pr_ttydev); PR_COPY_FIELD(src, dest, pr_pctcpu); PR_COPY_FIELD(src, dest, pr_pctmem); PR_COPY_TIMESPEC(src, dest, pr_start); PR_COPY_TIMESPEC(src, dest, pr_time); PR_COPY_TIMESPEC(src, dest, pr_ctime); PR_COPY_BUF(src, dest, pr_fname); PR_COPY_BUF(src, dest, pr_psargs); PR_COPY_FIELD(src, dest, pr_wstat); PR_COPY_FIELD(src, dest, pr_argc); PR_COPY_FIELD_ILP32(src, dest, pr_argv); PR_COPY_FIELD_ILP32(src, dest, pr_envp); PR_COPY_FIELD(src, dest, pr_dmodel); PR_COPY_FIELD(src, dest, pr_taskid); PR_COPY_FIELD(src, dest, pr_projid); PR_COPY_FIELD(src, dest, pr_nzomb); PR_COPY_FIELD(src, dest, pr_poolid); PR_COPY_FIELD(src, dest, pr_contract); PR_COPY_FIELD(src, dest, pr_poolid); PR_COPY_FIELD(src, dest, pr_poolid); lwpsinfo_kto32(&src->pr_lwp, &dest->pr_lwp); } #undef PR_COPY_FIELD #undef PR_COPY_FIELD_ILP32 #undef PR_COPY_TIMESPEC #undef PR_COPY_BUF #undef PR_IGNORE_FIELD #endif /* _SYSCALL32_IMPL */ /* * This used to get called when microstate accounting was disabled but * microstate information was requested. Since Microstate accounting is on * regardless of the proc flags, this simply makes it appear to procfs that * microstate accounting is on. This is relatively meaningless since you * can't turn it off, but this is here for the sake of appearances. */ /*ARGSUSED*/ void estimate_msacct(kthread_t *t, hrtime_t curtime) { proc_t *p; if (t == NULL) return; p = ttoproc(t); ASSERT(MUTEX_HELD(&p->p_lock)); /* * A system process (p0) could be referenced if the thread is * in the process of exiting. Don't turn on microstate accounting * in that case. */ if (p->p_flag & SSYS) return; /* * Loop through all the LWPs (kernel threads) in the process. */ t = p->p_tlist; do { t->t_proc_flag |= TP_MSACCT; } while ((t = t->t_forw) != p->p_tlist); p->p_flag |= SMSACCT; /* set process-wide MSACCT */ } /* * It's not really possible to disable microstate accounting anymore. * However, this routine simply turns off the ms accounting flags in a process * This way procfs can still pretend to turn microstate accounting on and * off for a process, but it actually doesn't do anything. This is * a neutered form of preemptive idiot-proofing. */ void disable_msacct(proc_t *p) { kthread_t *t; ASSERT(MUTEX_HELD(&p->p_lock)); p->p_flag &= ~SMSACCT; /* clear process-wide MSACCT */ /* * Loop through all the LWPs (kernel threads) in the process. */ if ((t = p->p_tlist) != NULL) { do { /* clear per-thread flag */ t->t_proc_flag &= ~TP_MSACCT; } while ((t = t->t_forw) != p->p_tlist); } } /* * Return resource usage information. */ void prgetusage(kthread_t *t, prhusage_t *pup) { klwp_t *lwp = ttolwp(t); hrtime_t *mstimep; struct mstate *ms = &lwp->lwp_mstate; int state; int i; hrtime_t curtime; hrtime_t waitrq; hrtime_t tmp1; curtime = gethrtime_unscaled(); pup->pr_lwpid = t->t_tid; pup->pr_count = 1; pup->pr_create = ms->ms_start; pup->pr_term = ms->ms_term; scalehrtime(&pup->pr_create); scalehrtime(&pup->pr_term); if (ms->ms_term == 0) { pup->pr_rtime = curtime - ms->ms_start; scalehrtime(&pup->pr_rtime); } else { pup->pr_rtime = ms->ms_term - ms->ms_start; scalehrtime(&pup->pr_rtime); } pup->pr_utime = ms->ms_acct[LMS_USER]; pup->pr_stime = ms->ms_acct[LMS_SYSTEM]; pup->pr_ttime = ms->ms_acct[LMS_TRAP]; pup->pr_tftime = ms->ms_acct[LMS_TFAULT]; pup->pr_dftime = ms->ms_acct[LMS_DFAULT]; pup->pr_kftime = ms->ms_acct[LMS_KFAULT]; pup->pr_ltime = ms->ms_acct[LMS_USER_LOCK]; pup->pr_slptime = ms->ms_acct[LMS_SLEEP]; pup->pr_wtime = ms->ms_acct[LMS_WAIT_CPU]; pup->pr_stoptime = ms->ms_acct[LMS_STOPPED]; prscaleusage(pup); /* * Adjust for time waiting in the dispatcher queue. */ waitrq = t->t_waitrq; /* hopefully atomic */ if (waitrq != 0) { if (waitrq > curtime) { curtime = gethrtime_unscaled(); } tmp1 = curtime - waitrq; scalehrtime(&tmp1); pup->pr_wtime += tmp1; curtime = waitrq; } /* * Adjust for time spent in current microstate. */ if (ms->ms_state_start > curtime) { curtime = gethrtime_unscaled(); } i = 0; do { switch (state = t->t_mstate) { case LMS_SLEEP: /* * Update the timer for the current sleep state. */ switch (state = ms->ms_prev) { case LMS_TFAULT: case LMS_DFAULT: case LMS_KFAULT: case LMS_USER_LOCK: break; default: state = LMS_SLEEP; break; } break; case LMS_TFAULT: case LMS_DFAULT: case LMS_KFAULT: case LMS_USER_LOCK: state = LMS_SYSTEM; break; } switch (state) { case LMS_USER: mstimep = &pup->pr_utime; break; case LMS_SYSTEM: mstimep = &pup->pr_stime; break; case LMS_TRAP: mstimep = &pup->pr_ttime; break; case LMS_TFAULT: mstimep = &pup->pr_tftime; break; case LMS_DFAULT: mstimep = &pup->pr_dftime; break; case LMS_KFAULT: mstimep = &pup->pr_kftime; break; case LMS_USER_LOCK: mstimep = &pup->pr_ltime; break; case LMS_SLEEP: mstimep = &pup->pr_slptime; break; case LMS_WAIT_CPU: mstimep = &pup->pr_wtime; break; case LMS_STOPPED: mstimep = &pup->pr_stoptime; break; default: panic("prgetusage: unknown microstate"); } tmp1 = curtime - ms->ms_state_start; if (tmp1 < 0) { curtime = gethrtime_unscaled(); i++; continue; } scalehrtime(&tmp1); } while (tmp1 < 0 && i < MAX_ITERS_SPIN); *mstimep += tmp1; /* update pup timestamp */ pup->pr_tstamp = curtime; scalehrtime(&pup->pr_tstamp); /* * Resource usage counters. */ pup->pr_minf = lwp->lwp_ru.minflt; pup->pr_majf = lwp->lwp_ru.majflt; pup->pr_nswap = lwp->lwp_ru.nswap; pup->pr_inblk = lwp->lwp_ru.inblock; pup->pr_oublk = lwp->lwp_ru.oublock; pup->pr_msnd = lwp->lwp_ru.msgsnd; pup->pr_mrcv = lwp->lwp_ru.msgrcv; pup->pr_sigs = lwp->lwp_ru.nsignals; pup->pr_vctx = lwp->lwp_ru.nvcsw; pup->pr_ictx = lwp->lwp_ru.nivcsw; pup->pr_sysc = lwp->lwp_ru.sysc; pup->pr_ioch = lwp->lwp_ru.ioch; } /* * Convert ms_acct stats from unscaled high-res time to nanoseconds */ void prscaleusage(prhusage_t *usg) { scalehrtime(&usg->pr_utime); scalehrtime(&usg->pr_stime); scalehrtime(&usg->pr_ttime); scalehrtime(&usg->pr_tftime); scalehrtime(&usg->pr_dftime); scalehrtime(&usg->pr_kftime); scalehrtime(&usg->pr_ltime); scalehrtime(&usg->pr_slptime); scalehrtime(&usg->pr_wtime); scalehrtime(&usg->pr_stoptime); } /* * Sum resource usage information. */ void praddusage(kthread_t *t, prhusage_t *pup) { klwp_t *lwp = ttolwp(t); hrtime_t *mstimep; struct mstate *ms = &lwp->lwp_mstate; int state; int i; hrtime_t curtime; hrtime_t waitrq; hrtime_t tmp; prhusage_t conv; curtime = gethrtime_unscaled(); if (ms->ms_term == 0) { tmp = curtime - ms->ms_start; scalehrtime(&tmp); pup->pr_rtime += tmp; } else { tmp = ms->ms_term - ms->ms_start; scalehrtime(&tmp); pup->pr_rtime += tmp; } conv.pr_utime = ms->ms_acct[LMS_USER]; conv.pr_stime = ms->ms_acct[LMS_SYSTEM]; conv.pr_ttime = ms->ms_acct[LMS_TRAP]; conv.pr_tftime = ms->ms_acct[LMS_TFAULT]; conv.pr_dftime = ms->ms_acct[LMS_DFAULT]; conv.pr_kftime = ms->ms_acct[LMS_KFAULT]; conv.pr_ltime = ms->ms_acct[LMS_USER_LOCK]; conv.pr_slptime = ms->ms_acct[LMS_SLEEP]; conv.pr_wtime = ms->ms_acct[LMS_WAIT_CPU]; conv.pr_stoptime = ms->ms_acct[LMS_STOPPED]; prscaleusage(&conv); pup->pr_utime += conv.pr_utime; pup->pr_stime += conv.pr_stime; pup->pr_ttime += conv.pr_ttime; pup->pr_tftime += conv.pr_tftime; pup->pr_dftime += conv.pr_dftime; pup->pr_kftime += conv.pr_kftime; pup->pr_ltime += conv.pr_ltime; pup->pr_slptime += conv.pr_slptime; pup->pr_wtime += conv.pr_wtime; pup->pr_stoptime += conv.pr_stoptime; /* * Adjust for time waiting in the dispatcher queue. */ waitrq = t->t_waitrq; /* hopefully atomic */ if (waitrq != 0) { if (waitrq > curtime) { curtime = gethrtime_unscaled(); } tmp = curtime - waitrq; scalehrtime(&tmp); pup->pr_wtime += tmp; curtime = waitrq; } /* * Adjust for time spent in current microstate. */ if (ms->ms_state_start > curtime) { curtime = gethrtime_unscaled(); } i = 0; do { switch (state = t->t_mstate) { case LMS_SLEEP: /* * Update the timer for the current sleep state. */ switch (state = ms->ms_prev) { case LMS_TFAULT: case LMS_DFAULT: case LMS_KFAULT: case LMS_USER_LOCK: break; default: state = LMS_SLEEP; break; } break; case LMS_TFAULT: case LMS_DFAULT: case LMS_KFAULT: case LMS_USER_LOCK: state = LMS_SYSTEM; break; } switch (state) { case LMS_USER: mstimep = &pup->pr_utime; break; case LMS_SYSTEM: mstimep = &pup->pr_stime; break; case LMS_TRAP: mstimep = &pup->pr_ttime; break; case LMS_TFAULT: mstimep = &pup->pr_tftime; break; case LMS_DFAULT: mstimep = &pup->pr_dftime; break; case LMS_KFAULT: mstimep = &pup->pr_kftime; break; case LMS_USER_LOCK: mstimep = &pup->pr_ltime; break; case LMS_SLEEP: mstimep = &pup->pr_slptime; break; case LMS_WAIT_CPU: mstimep = &pup->pr_wtime; break; case LMS_STOPPED: mstimep = &pup->pr_stoptime; break; default: panic("praddusage: unknown microstate"); } tmp = curtime - ms->ms_state_start; if (tmp < 0) { curtime = gethrtime_unscaled(); i++; continue; } scalehrtime(&tmp); } while (tmp < 0 && i < MAX_ITERS_SPIN); *mstimep += tmp; /* update pup timestamp */ pup->pr_tstamp = curtime; scalehrtime(&pup->pr_tstamp); /* * Resource usage counters. */ pup->pr_minf += lwp->lwp_ru.minflt; pup->pr_majf += lwp->lwp_ru.majflt; pup->pr_nswap += lwp->lwp_ru.nswap; pup->pr_inblk += lwp->lwp_ru.inblock; pup->pr_oublk += lwp->lwp_ru.oublock; pup->pr_msnd += lwp->lwp_ru.msgsnd; pup->pr_mrcv += lwp->lwp_ru.msgrcv; pup->pr_sigs += lwp->lwp_ru.nsignals; pup->pr_vctx += lwp->lwp_ru.nvcsw; pup->pr_ictx += lwp->lwp_ru.nivcsw; pup->pr_sysc += lwp->lwp_ru.sysc; pup->pr_ioch += lwp->lwp_ru.ioch; } /* * Convert a prhusage_t to a prusage_t. * This means convert each hrtime_t to a timestruc_t * and copy the count fields uint64_t => ulong_t. */ void prcvtusage(prhusage_t *pup, prusage_t *upup) { uint64_t *ullp; ulong_t *ulp; int i; upup->pr_lwpid = pup->pr_lwpid; upup->pr_count = pup->pr_count; hrt2ts(pup->pr_tstamp, &upup->pr_tstamp); hrt2ts(pup->pr_create, &upup->pr_create); hrt2ts(pup->pr_term, &upup->pr_term); hrt2ts(pup->pr_rtime, &upup->pr_rtime); hrt2ts(pup->pr_utime, &upup->pr_utime); hrt2ts(pup->pr_stime, &upup->pr_stime); hrt2ts(pup->pr_ttime, &upup->pr_ttime); hrt2ts(pup->pr_tftime, &upup->pr_tftime); hrt2ts(pup->pr_dftime, &upup->pr_dftime); hrt2ts(pup->pr_kftime, &upup->pr_kftime); hrt2ts(pup->pr_ltime, &upup->pr_ltime); hrt2ts(pup->pr_slptime, &upup->pr_slptime); hrt2ts(pup->pr_wtime, &upup->pr_wtime); hrt2ts(pup->pr_stoptime, &upup->pr_stoptime); bzero(upup->filltime, sizeof (upup->filltime)); ullp = &pup->pr_minf; ulp = &upup->pr_minf; for (i = 0; i < 22; i++) *ulp++ = (ulong_t)*ullp++; } #ifdef _SYSCALL32_IMPL void prcvtusage32(prhusage_t *pup, prusage32_t *upup) { uint64_t *ullp; uint32_t *ulp; int i; upup->pr_lwpid = pup->pr_lwpid; upup->pr_count = pup->pr_count; hrt2ts32(pup->pr_tstamp, &upup->pr_tstamp); hrt2ts32(pup->pr_create, &upup->pr_create); hrt2ts32(pup->pr_term, &upup->pr_term); hrt2ts32(pup->pr_rtime, &upup->pr_rtime); hrt2ts32(pup->pr_utime, &upup->pr_utime); hrt2ts32(pup->pr_stime, &upup->pr_stime); hrt2ts32(pup->pr_ttime, &upup->pr_ttime); hrt2ts32(pup->pr_tftime, &upup->pr_tftime); hrt2ts32(pup->pr_dftime, &upup->pr_dftime); hrt2ts32(pup->pr_kftime, &upup->pr_kftime); hrt2ts32(pup->pr_ltime, &upup->pr_ltime); hrt2ts32(pup->pr_slptime, &upup->pr_slptime); hrt2ts32(pup->pr_wtime, &upup->pr_wtime); hrt2ts32(pup->pr_stoptime, &upup->pr_stoptime); bzero(upup->filltime, sizeof (upup->filltime)); ullp = &pup->pr_minf; ulp = &upup->pr_minf; for (i = 0; i < 22; i++) *ulp++ = (uint32_t)*ullp++; } #endif /* _SYSCALL32_IMPL */ /* * Determine whether a set is empty. */ int setisempty(uint32_t *sp, uint_t n) { while (n--) if (*sp++) return (0); return (1); } /* * Utility routine for establishing a watched area in the process. * Keep the list of watched areas sorted by virtual address. */ int set_watched_area(proc_t *p, struct watched_area *pwa) { caddr_t vaddr = pwa->wa_vaddr; caddr_t eaddr = pwa->wa_eaddr; ulong_t flags = pwa->wa_flags; struct watched_area *target; avl_index_t where; int error = 0; /* we must not be holding p->p_lock, but the process must be locked */ ASSERT(MUTEX_NOT_HELD(&p->p_lock)); ASSERT(p->p_proc_flag & P_PR_LOCK); /* * If this is our first watchpoint, enable watchpoints for the process. */ if (!pr_watch_active(p)) { kthread_t *t; mutex_enter(&p->p_lock); if ((t = p->p_tlist) != NULL) { do { watch_enable(t); } while ((t = t->t_forw) != p->p_tlist); } mutex_exit(&p->p_lock); } target = pr_find_watched_area(p, pwa, &where); if (target != NULL) { /* * We discovered an existing, overlapping watched area. * Allow it only if it is an exact match. */ if (target->wa_vaddr != vaddr || target->wa_eaddr != eaddr) error = EINVAL; else if (target->wa_flags != flags) { error = set_watched_page(p, vaddr, eaddr, flags, target->wa_flags); target->wa_flags = flags; } kmem_free(pwa, sizeof (struct watched_area)); } else { avl_insert(&p->p_warea, pwa, where); error = set_watched_page(p, vaddr, eaddr, flags, 0); } return (error); } /* * Utility routine for clearing a watched area in the process. * Must be an exact match of the virtual address. * size and flags don't matter. */ int clear_watched_area(proc_t *p, struct watched_area *pwa) { struct watched_area *found; /* we must not be holding p->p_lock, but the process must be locked */ ASSERT(MUTEX_NOT_HELD(&p->p_lock)); ASSERT(p->p_proc_flag & P_PR_LOCK); if (!pr_watch_active(p)) { kmem_free(pwa, sizeof (struct watched_area)); return (0); } /* * Look for a matching address in the watched areas. If a match is * found, clear the old watched area and adjust the watched page(s). It * is not an error if there is no match. */ if ((found = pr_find_watched_area(p, pwa, NULL)) != NULL && found->wa_vaddr == pwa->wa_vaddr) { clear_watched_page(p, found->wa_vaddr, found->wa_eaddr, found->wa_flags); avl_remove(&p->p_warea, found); kmem_free(found, sizeof (struct watched_area)); } kmem_free(pwa, sizeof (struct watched_area)); /* * If we removed the last watched area from the process, disable * watchpoints. */ if (!pr_watch_active(p)) { kthread_t *t; mutex_enter(&p->p_lock); if ((t = p->p_tlist) != NULL) { do { watch_disable(t); } while ((t = t->t_forw) != p->p_tlist); } mutex_exit(&p->p_lock); } return (0); } /* * Frees all the watched_area structures */ void pr_free_watchpoints(proc_t *p) { struct watched_area *delp; void *cookie; cookie = NULL; while ((delp = avl_destroy_nodes(&p->p_warea, &cookie)) != NULL) kmem_free(delp, sizeof (struct watched_area)); avl_destroy(&p->p_warea); } /* * This one is called by the traced process to unwatch all the * pages while deallocating the list of watched_page structs. */ void pr_free_watched_pages(proc_t *p) { struct as *as = p->p_as; struct watched_page *pwp; uint_t prot; int retrycnt, err; void *cookie; if (as == NULL || avl_numnodes(&as->a_wpage) == 0) return; ASSERT(MUTEX_NOT_HELD(&curproc->p_lock)); AS_LOCK_ENTER(as, RW_WRITER); pwp = avl_first(&as->a_wpage); cookie = NULL; while ((pwp = avl_destroy_nodes(&as->a_wpage, &cookie)) != NULL) { retrycnt = 0; if ((prot = pwp->wp_oprot) != 0) { caddr_t addr = pwp->wp_vaddr; struct seg *seg; retry: if ((pwp->wp_prot != prot || (pwp->wp_flags & WP_NOWATCH)) && (seg = as_segat(as, addr)) != NULL) { err = SEGOP_SETPROT(seg, addr, PAGESIZE, prot); if (err == IE_RETRY) { ASSERT(retrycnt == 0); retrycnt++; goto retry; } } } kmem_free(pwp, sizeof (struct watched_page)); } avl_destroy(&as->a_wpage); p->p_wprot = NULL; AS_LOCK_EXIT(as); } /* * Insert a watched area into the list of watched pages. * If oflags is zero then we are adding a new watched area. * Otherwise we are changing the flags of an existing watched area. */ static int set_watched_page(proc_t *p, caddr_t vaddr, caddr_t eaddr, ulong_t flags, ulong_t oflags) { struct as *as = p->p_as; avl_tree_t *pwp_tree; struct watched_page *pwp, *newpwp; struct watched_page tpw; avl_index_t where; struct seg *seg; uint_t prot; caddr_t addr; /* * We need to pre-allocate a list of structures before we grab the * address space lock to avoid calling kmem_alloc(KM_SLEEP) with locks * held. */ newpwp = NULL; for (addr = (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK); addr < eaddr; addr += PAGESIZE) { pwp = kmem_zalloc(sizeof (struct watched_page), KM_SLEEP); pwp->wp_list = newpwp; newpwp = pwp; } AS_LOCK_ENTER(as, RW_WRITER); /* * Search for an existing watched page to contain the watched area. * If none is found, grab a new one from the available list * and insert it in the active list, keeping the list sorted * by user-level virtual address. */ if (p->p_flag & SVFWAIT) pwp_tree = &p->p_wpage; else pwp_tree = &as->a_wpage; again: if (avl_numnodes(pwp_tree) > prnwatch) { AS_LOCK_EXIT(as); while (newpwp != NULL) { pwp = newpwp->wp_list; kmem_free(newpwp, sizeof (struct watched_page)); newpwp = pwp; } return (E2BIG); } tpw.wp_vaddr = (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK); if ((pwp = avl_find(pwp_tree, &tpw, &where)) == NULL) { pwp = newpwp; newpwp = newpwp->wp_list; pwp->wp_list = NULL; pwp->wp_vaddr = (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK); avl_insert(pwp_tree, pwp, where); } ASSERT(vaddr >= pwp->wp_vaddr && vaddr < pwp->wp_vaddr + PAGESIZE); if (oflags & WA_READ) pwp->wp_read--; if (oflags & WA_WRITE) pwp->wp_write--; if (oflags & WA_EXEC) pwp->wp_exec--; ASSERT(pwp->wp_read >= 0); ASSERT(pwp->wp_write >= 0); ASSERT(pwp->wp_exec >= 0); if (flags & WA_READ) pwp->wp_read++; if (flags & WA_WRITE) pwp->wp_write++; if (flags & WA_EXEC) pwp->wp_exec++; if (!(p->p_flag & SVFWAIT)) { vaddr = pwp->wp_vaddr; if (pwp->wp_oprot == 0 && (seg = as_segat(as, vaddr)) != NULL) { SEGOP_GETPROT(seg, vaddr, 0, &prot); pwp->wp_oprot = (uchar_t)prot; pwp->wp_prot = (uchar_t)prot; } if (pwp->wp_oprot != 0) { prot = pwp->wp_oprot; if (pwp->wp_read) prot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC); if (pwp->wp_write) prot &= ~PROT_WRITE; if (pwp->wp_exec) prot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC); if (!(pwp->wp_flags & WP_NOWATCH) && pwp->wp_prot != prot && (pwp->wp_flags & WP_SETPROT) == 0) { pwp->wp_flags |= WP_SETPROT; pwp->wp_list = p->p_wprot; p->p_wprot = pwp; } pwp->wp_prot = (uchar_t)prot; } } /* * If the watched area extends into the next page then do * it over again with the virtual address of the next page. */ if ((vaddr = pwp->wp_vaddr + PAGESIZE) < eaddr) goto again; AS_LOCK_EXIT(as); /* * Free any pages we may have over-allocated */ while (newpwp != NULL) { pwp = newpwp->wp_list; kmem_free(newpwp, sizeof (struct watched_page)); newpwp = pwp; } return (0); } /* * Remove a watched area from the list of watched pages. * A watched area may extend over more than one page. */ static void clear_watched_page(proc_t *p, caddr_t vaddr, caddr_t eaddr, ulong_t flags) { struct as *as = p->p_as; struct watched_page *pwp; struct watched_page tpw; avl_tree_t *tree; avl_index_t where; AS_LOCK_ENTER(as, RW_WRITER); if (p->p_flag & SVFWAIT) tree = &p->p_wpage; else tree = &as->a_wpage; tpw.wp_vaddr = vaddr = (caddr_t)((uintptr_t)vaddr & (uintptr_t)PAGEMASK); pwp = avl_find(tree, &tpw, &where); if (pwp == NULL) pwp = avl_nearest(tree, where, AVL_AFTER); while (pwp != NULL && pwp->wp_vaddr < eaddr) { ASSERT(vaddr <= pwp->wp_vaddr); if (flags & WA_READ) pwp->wp_read--; if (flags & WA_WRITE) pwp->wp_write--; if (flags & WA_EXEC) pwp->wp_exec--; if (pwp->wp_read + pwp->wp_write + pwp->wp_exec != 0) { /* * Reset the hat layer's protections on this page. */ if (pwp->wp_oprot != 0) { uint_t prot = pwp->wp_oprot; if (pwp->wp_read) prot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC); if (pwp->wp_write) prot &= ~PROT_WRITE; if (pwp->wp_exec) prot &= ~(PROT_READ|PROT_WRITE|PROT_EXEC); if (!(pwp->wp_flags & WP_NOWATCH) && pwp->wp_prot != prot && (pwp->wp_flags & WP_SETPROT) == 0) { pwp->wp_flags |= WP_SETPROT; pwp->wp_list = p->p_wprot; p->p_wprot = pwp; } pwp->wp_prot = (uchar_t)prot; } } else { /* * No watched areas remain in this page. * Reset everything to normal. */ if (pwp->wp_oprot != 0) { pwp->wp_prot = pwp->wp_oprot; if ((pwp->wp_flags & WP_SETPROT) == 0) { pwp->wp_flags |= WP_SETPROT; pwp->wp_list = p->p_wprot; p->p_wprot = pwp; } } } pwp = AVL_NEXT(tree, pwp); } AS_LOCK_EXIT(as); } /* * Return the original protections for the specified page. */ static void getwatchprot(struct as *as, caddr_t addr, uint_t *prot) { struct watched_page *pwp; struct watched_page tpw; ASSERT(AS_LOCK_HELD(as)); tpw.wp_vaddr = (caddr_t)((uintptr_t)addr & (uintptr_t)PAGEMASK); if ((pwp = avl_find(&as->a_wpage, &tpw, NULL)) != NULL) *prot = pwp->wp_oprot; } static prpagev_t * pr_pagev_create(struct seg *seg, int check_noreserve) { prpagev_t *pagev = kmem_alloc(sizeof (prpagev_t), KM_SLEEP); size_t total_pages = seg_pages(seg); /* * Limit the size of our vectors to pagev_lim pages at a time. We need * 4 or 5 bytes of storage per page, so this means we limit ourself * to about a megabyte of kernel heap by default. */ pagev->pg_npages = MIN(total_pages, pagev_lim); pagev->pg_pnbase = 0; pagev->pg_protv = kmem_alloc(pagev->pg_npages * sizeof (uint_t), KM_SLEEP); if (check_noreserve) pagev->pg_incore = kmem_alloc(pagev->pg_npages * sizeof (char), KM_SLEEP); else pagev->pg_incore = NULL; return (pagev); } static void pr_pagev_destroy(prpagev_t *pagev) { if (pagev->pg_incore != NULL) kmem_free(pagev->pg_incore, pagev->pg_npages * sizeof (char)); kmem_free(pagev->pg_protv, pagev->pg_npages * sizeof (uint_t)); kmem_free(pagev, sizeof (prpagev_t)); } static caddr_t pr_pagev_fill(prpagev_t *pagev, struct seg *seg, caddr_t addr, caddr_t eaddr) { ulong_t lastpg = seg_page(seg, eaddr - 1); ulong_t pn, pnlim; caddr_t saddr; size_t len; ASSERT(addr >= seg->s_base && addr <= eaddr); if (addr == eaddr) return (eaddr); refill: ASSERT(addr < eaddr); pagev->pg_pnbase = seg_page(seg, addr); pnlim = pagev->pg_pnbase + pagev->pg_npages; saddr = addr; if (lastpg < pnlim) len = (size_t)(eaddr - addr); else len = pagev->pg_npages * PAGESIZE; if (pagev->pg_incore != NULL) { /* * INCORE cleverly has different semantics than GETPROT: * it returns info on pages up to but NOT including addr + len. */ SEGOP_INCORE(seg, addr, len, pagev->pg_incore); pn = pagev->pg_pnbase; do { /* * Guilty knowledge here: We know that segvn_incore * returns more than just the low-order bit that * indicates the page is actually in memory. If any * bits are set, then the page has backing store. */ if (pagev->pg_incore[pn++ - pagev->pg_pnbase]) goto out; } while ((addr += PAGESIZE) < eaddr && pn < pnlim); /* * If we examined all the pages in the vector but we're not * at the end of the segment, take another lap. */ if (addr < eaddr) goto refill; } /* * Need to take len - 1 because addr + len is the address of the * first byte of the page just past the end of what we want. */ out: SEGOP_GETPROT(seg, saddr, len - 1, pagev->pg_protv); return (addr); } static caddr_t pr_pagev_nextprot(prpagev_t *pagev, struct seg *seg, caddr_t *saddrp, caddr_t eaddr, uint_t *protp) { /* * Our starting address is either the specified address, or the base * address from the start of the pagev. If the latter is greater, * this means a previous call to pr_pagev_fill has already scanned * further than the end of the previous mapping. */ caddr_t base = seg->s_base + pagev->pg_pnbase * PAGESIZE; caddr_t addr = MAX(*saddrp, base); ulong_t pn = seg_page(seg, addr); uint_t prot, nprot; /* * If we're dealing with noreserve pages, then advance addr to * the address of the next page which has backing store. */ if (pagev->pg_incore != NULL) { while (pagev->pg_incore[pn - pagev->pg_pnbase] == 0) { if ((addr += PAGESIZE) == eaddr) { *saddrp = addr; prot = 0; goto out; } if (++pn == pagev->pg_pnbase + pagev->pg_npages) { addr = pr_pagev_fill(pagev, seg, addr, eaddr); if (addr == eaddr) { *saddrp = addr; prot = 0; goto out; } pn = seg_page(seg, addr); } } } /* * Get the protections on the page corresponding to addr. */ pn = seg_page(seg, addr); ASSERT(pn >= pagev->pg_pnbase); ASSERT(pn < (pagev->pg_pnbase + pagev->pg_npages)); prot = pagev->pg_protv[pn - pagev->pg_pnbase]; getwatchprot(seg->s_as, addr, &prot); *saddrp = addr; /* * Now loop until we find a backed page with different protections * or we reach the end of this segment. */ while ((addr += PAGESIZE) < eaddr) { /* * If pn has advanced to the page number following what we * have information on, refill the page vector and reset * addr and pn. If pr_pagev_fill does not return the * address of the next page, we have a discontiguity and * thus have reached the end of the current mapping. */ if (++pn == pagev->pg_pnbase + pagev->pg_npages) { caddr_t naddr = pr_pagev_fill(pagev, seg, addr, eaddr); if (naddr != addr) goto out; pn = seg_page(seg, addr); } /* * The previous page's protections are in prot, and it has * backing. If this page is MAP_NORESERVE and has no backing, * then end this mapping and return the previous protections. */ if (pagev->pg_incore != NULL && pagev->pg_incore[pn - pagev->pg_pnbase] == 0) break; /* * Otherwise end the mapping if this page's protections (nprot) * are different than those in the previous page (prot). */ nprot = pagev->pg_protv[pn - pagev->pg_pnbase]; getwatchprot(seg->s_as, addr, &nprot); if (nprot != prot) break; } out: *protp = prot; return (addr); } size_t pr_getsegsize(struct seg *seg, int reserved) { size_t size = seg->s_size; /* * If we're interested in the reserved space, return the size of the * segment itself. Everything else in this function is a special case * to determine the actual underlying size of various segment types. */ if (reserved) return (size); /* * If this is a segvn mapping of a regular file, return the smaller * of the segment size and the remaining size of the file beyond * the file offset corresponding to seg->s_base. */ if (seg->s_ops == &segvn_ops) { vattr_t vattr; vnode_t *vp; vattr.va_mask = AT_SIZE; if (SEGOP_GETVP(seg, seg->s_base, &vp) == 0 && vp != NULL && vp->v_type == VREG && VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) { u_offset_t fsize = vattr.va_size; u_offset_t offset = SEGOP_GETOFFSET(seg, seg->s_base); if (fsize < offset) fsize = 0; else fsize -= offset; fsize = roundup(fsize, (u_offset_t)PAGESIZE); if (fsize < (u_offset_t)size) size = (size_t)fsize; } return (size); } /* * If this is an ISM shared segment, don't include pages that are * beyond the real size of the spt segment that backs it. */ if (seg->s_ops == &segspt_shmops) return (MIN(spt_realsize(seg), size)); /* * If this is segment is a mapping from /dev/null, then this is a * reservation of virtual address space and has no actual size. * Such segments are backed by segdev and have type set to neither * MAP_SHARED nor MAP_PRIVATE. */ if (seg->s_ops == &segdev_ops && ((SEGOP_GETTYPE(seg, seg->s_base) & (MAP_SHARED | MAP_PRIVATE)) == 0)) return (0); /* * If this segment doesn't match one of the special types we handle, * just return the size of the segment itself. */ return (size); } uint_t pr_getprot(struct seg *seg, int reserved, void **tmp, caddr_t *saddrp, caddr_t *naddrp, caddr_t eaddr) { struct as *as = seg->s_as; caddr_t saddr = *saddrp; caddr_t naddr; int check_noreserve; uint_t prot; union { struct segvn_data *svd; struct segdev_data *sdp; void *data; } s; s.data = seg->s_data; ASSERT(AS_WRITE_HELD(as)); ASSERT(saddr >= seg->s_base && saddr < eaddr); ASSERT(eaddr <= seg->s_base + seg->s_size); /* * Don't include MAP_NORESERVE pages in the address range * unless their mappings have actually materialized. * We cheat by knowing that segvn is the only segment * driver that supports MAP_NORESERVE. */ check_noreserve = (!reserved && seg->s_ops == &segvn_ops && s.svd != NULL && (s.svd->vp == NULL || s.svd->vp->v_type != VREG) && (s.svd->flags & MAP_NORESERVE)); /* * Examine every page only as a last resort. We use guilty knowledge * of segvn and segdev to avoid this: if there are no per-page * protections present in the segment and we don't care about * MAP_NORESERVE, then s_data->prot is the prot for the whole segment. */ if (!check_noreserve && saddr == seg->s_base && seg->s_ops == &segvn_ops && s.svd != NULL && s.svd->pageprot == 0) { prot = s.svd->prot; getwatchprot(as, saddr, &prot); naddr = eaddr; } else if (saddr == seg->s_base && seg->s_ops == &segdev_ops && s.sdp != NULL && s.sdp->pageprot == 0) { prot = s.sdp->prot; getwatchprot(as, saddr, &prot); naddr = eaddr; } else { prpagev_t *pagev; /* * If addr is sitting at the start of the segment, then * create a page vector to store protection and incore * information for pages in the segment, and fill it. * Otherwise, we expect *tmp to address the prpagev_t * allocated by a previous call to this function. */ if (saddr == seg->s_base) { pagev = pr_pagev_create(seg, check_noreserve); saddr = pr_pagev_fill(pagev, seg, saddr, eaddr); ASSERT(*tmp == NULL); *tmp = pagev; ASSERT(saddr <= eaddr); *saddrp = saddr; if (saddr == eaddr) { naddr = saddr; prot = 0; goto out; } } else { ASSERT(*tmp != NULL); pagev = (prpagev_t *)*tmp; } naddr = pr_pagev_nextprot(pagev, seg, saddrp, eaddr, &prot); ASSERT(naddr <= eaddr); } out: if (naddr == eaddr) pr_getprot_done(tmp); *naddrp = naddr; return (prot); } void pr_getprot_done(void **tmp) { if (*tmp != NULL) { pr_pagev_destroy((prpagev_t *)*tmp); *tmp = NULL; } } /* * Return true iff the vnode is a /proc file from the object directory. */ int pr_isobject(vnode_t *vp) { return (vn_matchops(vp, prvnodeops) && VTOP(vp)->pr_type == PR_OBJECT); } /* * Return true iff the vnode is a /proc file opened by the process itself. */ int pr_isself(vnode_t *vp) { /* * XXX: To retain binary compatibility with the old * ioctl()-based version of /proc, we exempt self-opens * of /proc/ from being marked close-on-exec. */ return (vn_matchops(vp, prvnodeops) && (VTOP(vp)->pr_flags & PR_ISSELF) && VTOP(vp)->pr_type != PR_PIDDIR); } static ssize_t pr_getpagesize(struct seg *seg, caddr_t saddr, caddr_t *naddrp, caddr_t eaddr) { ssize_t pagesize, hatsize; ASSERT(AS_WRITE_HELD(seg->s_as)); ASSERT(IS_P2ALIGNED(saddr, PAGESIZE)); ASSERT(IS_P2ALIGNED(eaddr, PAGESIZE)); ASSERT(saddr < eaddr); pagesize = hatsize = hat_getpagesize(seg->s_as->a_hat, saddr); ASSERT(pagesize == -1 || IS_P2ALIGNED(pagesize, pagesize)); ASSERT(pagesize != 0); if (pagesize == -1) pagesize = PAGESIZE; saddr += P2NPHASE((uintptr_t)saddr, pagesize); while (saddr < eaddr) { if (hatsize != hat_getpagesize(seg->s_as->a_hat, saddr)) break; ASSERT(IS_P2ALIGNED(saddr, pagesize)); saddr += pagesize; } *naddrp = ((saddr < eaddr) ? saddr : eaddr); return (hatsize); } /* * Return an array of structures with extended memory map information. * We allocate here; the caller must deallocate. */ int prgetxmap(proc_t *p, list_t *iolhead) { struct as *as = p->p_as; prxmap_t *mp; struct seg *seg; struct seg *brkseg, *stkseg; struct vnode *vp; struct vattr vattr; uint_t prot; ASSERT(as != &kas && AS_WRITE_HELD(as)); /* * Request an initial buffer size that doesn't waste memory * if the address space has only a small number of segments. */ pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree)); if ((seg = AS_SEGFIRST(as)) == NULL) return (0); brkseg = break_seg(p); stkseg = as_segat(as, prgetstackbase(p)); do { caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0); caddr_t saddr, naddr, baddr; void *tmp = NULL; ssize_t psz; char *parr; uint64_t npages; uint64_t pagenum; if ((seg->s_flags & S_HOLE) != 0) { continue; } /* * Segment loop part one: iterate from the base of the segment * to its end, pausing at each address boundary (baddr) between * ranges that have different virtual memory protections. */ for (saddr = seg->s_base; saddr < eaddr; saddr = baddr) { prot = pr_getprot(seg, 0, &tmp, &saddr, &baddr, eaddr); ASSERT(baddr >= saddr && baddr <= eaddr); /* * Segment loop part two: iterate from the current * position to the end of the protection boundary, * pausing at each address boundary (naddr) between * ranges that have different underlying page sizes. */ for (; saddr < baddr; saddr = naddr) { psz = pr_getpagesize(seg, saddr, &naddr, baddr); ASSERT(naddr >= saddr && naddr <= baddr); mp = pr_iol_newbuf(iolhead, sizeof (*mp)); mp->pr_vaddr = (uintptr_t)saddr; mp->pr_size = naddr - saddr; mp->pr_offset = SEGOP_GETOFFSET(seg, saddr); mp->pr_mflags = 0; if (prot & PROT_READ) mp->pr_mflags |= MA_READ; if (prot & PROT_WRITE) mp->pr_mflags |= MA_WRITE; if (prot & PROT_EXEC) mp->pr_mflags |= MA_EXEC; if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED) mp->pr_mflags |= MA_SHARED; if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE) mp->pr_mflags |= MA_NORESERVE; if (seg->s_ops == &segspt_shmops || (seg->s_ops == &segvn_ops && (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL))) mp->pr_mflags |= MA_ANON; if (seg == brkseg) mp->pr_mflags |= MA_BREAK; else if (seg == stkseg) mp->pr_mflags |= MA_STACK; if (seg->s_ops == &segspt_shmops) mp->pr_mflags |= MA_ISM | MA_SHM; mp->pr_pagesize = PAGESIZE; if (psz == -1) { mp->pr_hatpagesize = 0; } else { mp->pr_hatpagesize = psz; } /* * Manufacture a filename for the "object" dir. */ mp->pr_dev = PRNODEV; vattr.va_mask = AT_FSID|AT_NODEID; if (seg->s_ops == &segvn_ops && SEGOP_GETVP(seg, saddr, &vp) == 0 && vp != NULL && vp->v_type == VREG && VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) { mp->pr_dev = vattr.va_fsid; mp->pr_ino = vattr.va_nodeid; if (vp == p->p_exec) (void) strcpy(mp->pr_mapname, "a.out"); else pr_object_name(mp->pr_mapname, vp, &vattr); } /* * Get the SysV shared memory id, if any. */ if ((mp->pr_mflags & MA_SHARED) && p->p_segacct && (mp->pr_shmid = shmgetid(p, seg->s_base)) != SHMID_NONE) { if (mp->pr_shmid == SHMID_FREE) mp->pr_shmid = -1; mp->pr_mflags |= MA_SHM; } else { mp->pr_shmid = -1; } npages = ((uintptr_t)(naddr - saddr)) >> PAGESHIFT; parr = kmem_zalloc(npages, KM_SLEEP); SEGOP_INCORE(seg, saddr, naddr - saddr, parr); for (pagenum = 0; pagenum < npages; pagenum++) { if (parr[pagenum] & SEG_PAGE_INCORE) mp->pr_rss++; if (parr[pagenum] & SEG_PAGE_ANON) mp->pr_anon++; if (parr[pagenum] & SEG_PAGE_LOCKED) mp->pr_locked++; } kmem_free(parr, npages); } } ASSERT(tmp == NULL); } while ((seg = AS_SEGNEXT(as, seg)) != NULL); return (0); } /* * Return the process's credentials. We don't need a 32-bit equivalent of * this function because prcred_t and prcred32_t are actually the same. */ void prgetcred(proc_t *p, prcred_t *pcrp) { mutex_enter(&p->p_crlock); cred2prcred(p->p_cred, pcrp); mutex_exit(&p->p_crlock); } void prgetsecflags(proc_t *p, prsecflags_t *psfp) { ASSERT(psfp != NULL); bzero(psfp, sizeof (*psfp)); psfp->pr_version = PRSECFLAGS_VERSION_CURRENT; psfp->pr_lower = p->p_secflags.psf_lower; psfp->pr_upper = p->p_secflags.psf_upper; psfp->pr_effective = p->p_secflags.psf_effective; psfp->pr_inherit = p->p_secflags.psf_inherit; } /* * Compute actual size of the prpriv_t structure. */ size_t prgetprivsize(void) { return (priv_prgetprivsize(NULL)); } /* * Return the process's privileges. We don't need a 32-bit equivalent of * this function because prpriv_t and prpriv32_t are actually the same. */ void prgetpriv(proc_t *p, prpriv_t *pprp) { mutex_enter(&p->p_crlock); cred2prpriv(p->p_cred, pprp); mutex_exit(&p->p_crlock); } #ifdef _SYSCALL32_IMPL /* * Return an array of structures with HAT memory map information. * We allocate here; the caller must deallocate. */ int prgetxmap32(proc_t *p, list_t *iolhead) { struct as *as = p->p_as; prxmap32_t *mp; struct seg *seg; struct seg *brkseg, *stkseg; struct vnode *vp; struct vattr vattr; uint_t prot; ASSERT(as != &kas && AS_WRITE_HELD(as)); /* * Request an initial buffer size that doesn't waste memory * if the address space has only a small number of segments. */ pr_iol_initlist(iolhead, sizeof (*mp), avl_numnodes(&as->a_segtree)); if ((seg = AS_SEGFIRST(as)) == NULL) return (0); brkseg = break_seg(p); stkseg = as_segat(as, prgetstackbase(p)); do { caddr_t eaddr = seg->s_base + pr_getsegsize(seg, 0); caddr_t saddr, naddr, baddr; void *tmp = NULL; ssize_t psz; char *parr; uint64_t npages; uint64_t pagenum; if ((seg->s_flags & S_HOLE) != 0) { continue; } /* * Segment loop part one: iterate from the base of the segment * to its end, pausing at each address boundary (baddr) between * ranges that have different virtual memory protections. */ for (saddr = seg->s_base; saddr < eaddr; saddr = baddr) { prot = pr_getprot(seg, 0, &tmp, &saddr, &baddr, eaddr); ASSERT(baddr >= saddr && baddr <= eaddr); /* * Segment loop part two: iterate from the current * position to the end of the protection boundary, * pausing at each address boundary (naddr) between * ranges that have different underlying page sizes. */ for (; saddr < baddr; saddr = naddr) { psz = pr_getpagesize(seg, saddr, &naddr, baddr); ASSERT(naddr >= saddr && naddr <= baddr); mp = pr_iol_newbuf(iolhead, sizeof (*mp)); mp->pr_vaddr = (caddr32_t)(uintptr_t)saddr; mp->pr_size = (size32_t)(naddr - saddr); mp->pr_offset = SEGOP_GETOFFSET(seg, saddr); mp->pr_mflags = 0; if (prot & PROT_READ) mp->pr_mflags |= MA_READ; if (prot & PROT_WRITE) mp->pr_mflags |= MA_WRITE; if (prot & PROT_EXEC) mp->pr_mflags |= MA_EXEC; if (SEGOP_GETTYPE(seg, saddr) & MAP_SHARED) mp->pr_mflags |= MA_SHARED; if (SEGOP_GETTYPE(seg, saddr) & MAP_NORESERVE) mp->pr_mflags |= MA_NORESERVE; if (seg->s_ops == &segspt_shmops || (seg->s_ops == &segvn_ops && (SEGOP_GETVP(seg, saddr, &vp) != 0 || vp == NULL))) mp->pr_mflags |= MA_ANON; if (seg == brkseg) mp->pr_mflags |= MA_BREAK; else if (seg == stkseg) mp->pr_mflags |= MA_STACK; if (seg->s_ops == &segspt_shmops) mp->pr_mflags |= MA_ISM | MA_SHM; mp->pr_pagesize = PAGESIZE; if (psz == -1) { mp->pr_hatpagesize = 0; } else { mp->pr_hatpagesize = psz; } /* * Manufacture a filename for the "object" dir. */ mp->pr_dev = PRNODEV32; vattr.va_mask = AT_FSID|AT_NODEID; if (seg->s_ops == &segvn_ops && SEGOP_GETVP(seg, saddr, &vp) == 0 && vp != NULL && vp->v_type == VREG && VOP_GETATTR(vp, &vattr, 0, CRED(), NULL) == 0) { (void) cmpldev(&mp->pr_dev, vattr.va_fsid); mp->pr_ino = vattr.va_nodeid; if (vp == p->p_exec) (void) strcpy(mp->pr_mapname, "a.out"); else pr_object_name(mp->pr_mapname, vp, &vattr); } /* * Get the SysV shared memory id, if any. */ if ((mp->pr_mflags & MA_SHARED) && p->p_segacct && (mp->pr_shmid = shmgetid(p, seg->s_base)) != SHMID_NONE) { if (mp->pr_shmid == SHMID_FREE) mp->pr_shmid = -1; mp->pr_mflags |= MA_SHM; } else { mp->pr_shmid = -1; } npages = ((uintptr_t)(naddr - saddr)) >> PAGESHIFT; parr = kmem_zalloc(npages, KM_SLEEP); SEGOP_INCORE(seg, saddr, naddr - saddr, parr); for (pagenum = 0; pagenum < npages; pagenum++) { if (parr[pagenum] & SEG_PAGE_INCORE) mp->pr_rss++; if (parr[pagenum] & SEG_PAGE_ANON) mp->pr_anon++; if (parr[pagenum] & SEG_PAGE_LOCKED) mp->pr_locked++; } kmem_free(parr, npages); } } ASSERT(tmp == NULL); } while ((seg = AS_SEGNEXT(as, seg)) != NULL); return (0); } #endif /* _SYSCALL32_IMPL */