/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (c) 1989, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright 2019 Joyent, Inc. */ /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ /* All Rights Reserved */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* directory entries for /proc */ union procent { proc_t *pe_proc; union procent *pe_next; }; struct pid pid0 = { 0, /* pid_prinactive */ 1, /* pid_pgorphaned */ 0, /* pid_padding */ 0, /* pid_prslot */ 0, /* pid_id */ NULL, /* pid_pglink */ NULL, /* pid_pgtail */ NULL, /* pid_link */ 3 /* pid_ref */ }; static int pid_hashlen = 4; /* desired average hash chain length */ static int pid_hashsz; /* number of buckets in the hash table */ #define HASHPID(pid) (pidhash[((pid)&(pid_hashsz-1))]) extern uint_t nproc; extern struct kmem_cache *process_cache; static void upcount_init(void); kmutex_t pidlock; /* global process lock */ kmutex_t pr_pidlock; /* /proc global process lock */ kcondvar_t *pr_pid_cv; /* for /proc, one per process slot */ struct plock *proc_lock; /* persistent array of p_lock's */ /* * See the comment above pid_getlockslot() for a detailed explanation of this * constant. Note that a PLOCK_SHIFT of 3 implies 64-byte coherence * granularity; if the coherence granularity is ever changed, this constant * should be modified to reflect the change to minimize proc_lock false * sharing (correctness, however, is guaranteed regardless of the coherence * granularity). */ #define PLOCK_SHIFT 3 static kmutex_t pidlinklock; static struct pid **pidhash; static pid_t minpid; static pid_t mpid = FAMOUS_PIDS; /* one more than the last famous pid */ static union procent *procdir; static union procent *procentfree; static struct pid * pid_lookup(pid_t pid) { struct pid *pidp; ASSERT(MUTEX_HELD(&pidlinklock)); for (pidp = HASHPID(pid); pidp; pidp = pidp->pid_link) { if (pidp->pid_id == pid) { ASSERT(pidp->pid_ref > 0); break; } } return (pidp); } void pid_setmin(void) { if (jump_pid && jump_pid > mpid) minpid = mpid = jump_pid; else minpid = mpid; } /* * When prslots are simply used as an index to determine a process' p_lock, * adjacent prslots share adjacent p_locks. On machines where the size * of a mutex is smaller than that of a cache line (which, as of this writing, * is true for all machines on which Solaris runs), this can potentially * induce false sharing. The standard solution for false sharing is to pad * out one's data structures (in this case, struct plock). However, * given the size and (generally) sparse use of the proc_lock array, this * is suboptimal. We therefore stride through the proc_lock array with * a stride of PLOCK_SHIFT. PLOCK_SHIFT should be defined as: * * log_2 (coherence_granularity / sizeof (kmutex_t)) * * Under this scheme, false sharing is still possible -- but only when * the number of active processes is very large. Note that the one-to-one * mapping between prslots and lockslots is maintained. */ static int pid_getlockslot(int prslot) { int even = (v.v_proc >> PLOCK_SHIFT) << PLOCK_SHIFT; int perlap = even >> PLOCK_SHIFT; if (prslot >= even) return (prslot); return (((prslot % perlap) << PLOCK_SHIFT) + (prslot / perlap)); } /* * This function allocates a pid structure, a free pid, and optionally a * slot in the proc table for it. * * pid_allocate() returns the new pid on success, -1 on failure. */ pid_t pid_allocate(proc_t *prp, pid_t pid, int flags) { struct pid *pidp; union procent *pep; pid_t newpid, startpid; pidp = kmem_zalloc(sizeof (struct pid), KM_SLEEP); mutex_enter(&pidlinklock); pep = procentfree; if ((flags & PID_ALLOC_PROC) && pep == NULL) { /* * ran out of /proc directory entries */ goto failed; } if (pid != 0) { VERIFY(minpid == 0); VERIFY3P(pid, <, mpid); VERIFY3P(pid_lookup(pid), ==, NULL); newpid = pid; } else { /* * Allocate a pid */ ASSERT(minpid <= mpid && mpid < maxpid); startpid = mpid; for (;;) { newpid = mpid; if (++mpid == maxpid) mpid = minpid; if (pid_lookup(newpid) == NULL) break; if (mpid == startpid) goto failed; } } /* * Put pid into the pid hash table. */ pidp->pid_link = HASHPID(newpid); HASHPID(newpid) = pidp; pidp->pid_ref = 1; pidp->pid_id = newpid; if (flags & PID_ALLOC_PROC) { procentfree = pep->pe_next; pidp->pid_prslot = pep - procdir; pep->pe_proc = prp; prp->p_pidp = pidp; prp->p_lockp = &proc_lock[pid_getlockslot(pidp->pid_prslot)]; } else { pidp->pid_prslot = 0; } mutex_exit(&pidlinklock); return (newpid); failed: mutex_exit(&pidlinklock); kmem_free(pidp, sizeof (struct pid)); return (-1); } /* * decrement the reference count for pid */ int pid_rele(struct pid *pidp) { struct pid **pidpp; mutex_enter(&pidlinklock); ASSERT(pidp != &pid0); pidpp = &HASHPID(pidp->pid_id); for (;;) { ASSERT(*pidpp != NULL); if (*pidpp == pidp) break; pidpp = &(*pidpp)->pid_link; } *pidpp = pidp->pid_link; mutex_exit(&pidlinklock); kmem_free(pidp, sizeof (*pidp)); return (0); } void proc_entry_free(struct pid *pidp) { mutex_enter(&pidlinklock); pidp->pid_prinactive = 1; procdir[pidp->pid_prslot].pe_next = procentfree; procentfree = &procdir[pidp->pid_prslot]; mutex_exit(&pidlinklock); } /* * The original task needs to be passed in since the process has already been * detached from the task at this point in time. */ void pid_exit(proc_t *prp, struct task *tk) { struct pid *pidp; zone_t *zone = prp->p_zone; ASSERT(MUTEX_HELD(&pidlock)); /* * Exit process group. If it is NULL, it's because fork failed * before calling pgjoin(). */ ASSERT(prp->p_pgidp != NULL || prp->p_stat == SIDL); if (prp->p_pgidp != NULL) pgexit(prp); sess_rele(prp->p_sessp, B_TRUE); pidp = prp->p_pidp; proc_entry_free(pidp); if (audit_active) audit_pfree(prp); if (practive == prp) { practive = prp->p_next; } if (prp->p_next) { prp->p_next->p_prev = prp->p_prev; } if (prp->p_prev) { prp->p_prev->p_next = prp->p_next; } PID_RELE(pidp); mutex_destroy(&prp->p_crlock); kmem_cache_free(process_cache, prp); nproc--; /* * Decrement the process counts of the original task, project and zone. */ mutex_enter(&zone->zone_nlwps_lock); tk->tk_nprocs--; tk->tk_proj->kpj_nprocs--; zone->zone_nprocs--; mutex_exit(&zone->zone_nlwps_lock); } /* * Find a process visible from the specified zone given its process ID. */ proc_t * prfind_zone(pid_t pid, zoneid_t zoneid) { struct pid *pidp; proc_t *p; ASSERT(MUTEX_HELD(&pidlock)); mutex_enter(&pidlinklock); pidp = pid_lookup(pid); mutex_exit(&pidlinklock); if (pidp != NULL && pidp->pid_prinactive == 0) { p = procdir[pidp->pid_prslot].pe_proc; if (zoneid == ALL_ZONES || p->p_zone->zone_id == zoneid) return (p); } return (NULL); } /* * Find a process given its process ID. This obeys zone restrictions, * so if the caller is in a non-global zone it won't find processes * associated with other zones. Use prfind_zone(pid, ALL_ZONES) to * bypass this restriction. */ proc_t * prfind(pid_t pid) { zoneid_t zoneid; if (INGLOBALZONE(curproc)) zoneid = ALL_ZONES; else zoneid = getzoneid(); return (prfind_zone(pid, zoneid)); } proc_t * pgfind_zone(pid_t pgid, zoneid_t zoneid) { struct pid *pidp; ASSERT(MUTEX_HELD(&pidlock)); mutex_enter(&pidlinklock); pidp = pid_lookup(pgid); mutex_exit(&pidlinklock); if (pidp != NULL) { proc_t *p = pidp->pid_pglink; if (zoneid == ALL_ZONES || pgid == 0 || p == NULL || p->p_zone->zone_id == zoneid) return (p); } return (NULL); } /* * return the head of the list of processes whose process group ID is 'pgid', * or NULL, if no such process group */ proc_t * pgfind(pid_t pgid) { zoneid_t zoneid; if (INGLOBALZONE(curproc)) zoneid = ALL_ZONES; else zoneid = getzoneid(); return (pgfind_zone(pgid, zoneid)); } /* * Sets P_PR_LOCK on a non-system process. Process must be fully created * and not exiting to succeed. * * Returns 0 on success. * Returns 1 if P_PR_LOCK is set. * Returns -1 if proc is in invalid state. */ int sprtrylock_proc(proc_t *p) { ASSERT(MUTEX_HELD(&p->p_lock)); /* skip system and incomplete processes */ if (p->p_stat == SIDL || p->p_stat == SZOMB || (p->p_flag & (SSYS | SEXITING | SEXITLWPS))) { return (-1); } if (p->p_proc_flag & P_PR_LOCK) return (1); p->p_proc_flag |= P_PR_LOCK; return (0); } /* * Wait for P_PR_LOCK to become clear. Returns with p_lock dropped, * and the proc pointer no longer valid, as the proc may have exited. */ void sprwaitlock_proc(proc_t *p) { kmutex_t *mp; ASSERT(MUTEX_HELD(&p->p_lock)); ASSERT(p->p_proc_flag & P_PR_LOCK); /* * p_lock is persistent, but p itself is not -- it could * vanish during cv_wait(). Load p->p_lock now so we can * drop it after cv_wait() without referencing p. */ mp = &p->p_lock; cv_wait(&pr_pid_cv[p->p_slot], mp); mutex_exit(mp); } /* * If pid exists, find its proc, acquire its p_lock and mark it P_PR_LOCK. * Returns the proc pointer on success, NULL on failure. sprlock() is * really just a stripped-down version of pr_p_lock() to allow practive * walkers like dofusers() and dumpsys() to synchronize with /proc. */ proc_t * sprlock_zone(pid_t pid, zoneid_t zoneid) { proc_t *p; int ret; for (;;) { mutex_enter(&pidlock); if ((p = prfind_zone(pid, zoneid)) == NULL) { mutex_exit(&pidlock); return (NULL); } mutex_enter(&p->p_lock); mutex_exit(&pidlock); if (panicstr) return (p); ret = sprtrylock_proc(p); if (ret == -1) { mutex_exit(&p->p_lock); return (NULL); } else if (ret == 0) { break; } sprwaitlock_proc(p); } return (p); } proc_t * sprlock(pid_t pid) { zoneid_t zoneid; if (INGLOBALZONE(curproc)) zoneid = ALL_ZONES; else zoneid = getzoneid(); return (sprlock_zone(pid, zoneid)); } void sprlock_proc(proc_t *p) { ASSERT(MUTEX_HELD(&p->p_lock)); while (p->p_proc_flag & P_PR_LOCK) { cv_wait(&pr_pid_cv[p->p_slot], &p->p_lock); } p->p_proc_flag |= P_PR_LOCK; } void sprunlock(proc_t *p) { if (panicstr) { mutex_exit(&p->p_lock); return; } ASSERT(p->p_proc_flag & P_PR_LOCK); ASSERT(MUTEX_HELD(&p->p_lock)); cv_signal(&pr_pid_cv[p->p_slot]); p->p_proc_flag &= ~P_PR_LOCK; mutex_exit(&p->p_lock); } void pid_init(void) { int i; pid_hashsz = 1 << highbit(v.v_proc / pid_hashlen); pidhash = kmem_zalloc(sizeof (struct pid *) * pid_hashsz, KM_SLEEP); procdir = kmem_alloc(sizeof (union procent) * v.v_proc, KM_SLEEP); pr_pid_cv = kmem_zalloc(sizeof (kcondvar_t) * v.v_proc, KM_SLEEP); proc_lock = kmem_zalloc(sizeof (struct plock) * v.v_proc, KM_SLEEP); nproc = 1; practive = proc_sched; proc_sched->p_next = NULL; procdir[0].pe_proc = proc_sched; procentfree = &procdir[1]; for (i = 1; i < v.v_proc - 1; i++) procdir[i].pe_next = &procdir[i+1]; procdir[i].pe_next = NULL; HASHPID(0) = &pid0; upcount_init(); } proc_t * pid_entry(int slot) { union procent *pep; proc_t *prp; ASSERT(MUTEX_HELD(&pidlock)); ASSERT(slot >= 0 && slot < v.v_proc); pep = procdir[slot].pe_next; if (pep >= procdir && pep < &procdir[v.v_proc]) return (NULL); prp = procdir[slot].pe_proc; if (prp != 0 && prp->p_stat == SIDL) return (NULL); return (prp); } /* * Send the specified signal to all processes whose process group ID is * equal to 'pgid' */ void signal(pid_t pgid, int sig) { struct pid *pidp; proc_t *prp; mutex_enter(&pidlock); mutex_enter(&pidlinklock); if (pgid == 0 || (pidp = pid_lookup(pgid)) == NULL) { mutex_exit(&pidlinklock); mutex_exit(&pidlock); return; } mutex_exit(&pidlinklock); for (prp = pidp->pid_pglink; prp; prp = prp->p_pglink) { mutex_enter(&prp->p_lock); sigtoproc(prp, NULL, sig); mutex_exit(&prp->p_lock); } mutex_exit(&pidlock); } /* * Send the specified signal to the specified process */ void prsignal(struct pid *pidp, int sig) { if (!(pidp->pid_prinactive)) psignal(procdir[pidp->pid_prslot].pe_proc, sig); } #include /* * DDI/DKI interfaces for drivers to send signals to processes */ /* * obtain an opaque reference to a process for signaling */ void * proc_ref(void) { struct pid *pidp; mutex_enter(&pidlock); pidp = curproc->p_pidp; PID_HOLD(pidp); mutex_exit(&pidlock); return (pidp); } /* * release a reference to a process * - a process can exit even if a driver has a reference to it * - one proc_unref for every proc_ref */ void proc_unref(void *pref) { mutex_enter(&pidlock); PID_RELE((struct pid *)pref); mutex_exit(&pidlock); } /* * send a signal to a process * * - send the process the signal * - if the process went away, return a -1 * - if the process is still there return 0 */ int proc_signal(void *pref, int sig) { struct pid *pidp = pref; prsignal(pidp, sig); return (pidp->pid_prinactive ? -1 : 0); } static struct upcount **upc_hash; /* a boot time allocated array */ static ulong_t upc_hashmask; #define UPC_HASH(x, y) ((ulong_t)(x ^ y) & upc_hashmask) /* * Get us off the ground. Called once at boot. */ void upcount_init(void) { ulong_t upc_hashsize; /* * An entry per MB of memory is our current guess */ /* * 2^20 is a meg, so shifting right by 20 - PAGESHIFT * converts pages to megs (without overflowing a u_int * if you have more than 4G of memory, like ptob(physmem)/1M * would). */ upc_hashsize = (1 << highbit(physmem >> (20 - PAGESHIFT))); upc_hashmask = upc_hashsize - 1; upc_hash = kmem_zalloc(upc_hashsize * sizeof (struct upcount *), KM_SLEEP); } /* * Increment the number of processes associated with a given uid and zoneid. */ void upcount_inc(uid_t uid, zoneid_t zoneid) { struct upcount **upc, **hupc; struct upcount *new; ASSERT(MUTEX_HELD(&pidlock)); new = NULL; hupc = &upc_hash[UPC_HASH(uid, zoneid)]; top: upc = hupc; while ((*upc) != NULL) { if ((*upc)->up_uid == uid && (*upc)->up_zoneid == zoneid) { (*upc)->up_count++; if (new) { /* * did not need `new' afterall. */ kmem_free(new, sizeof (*new)); } return; } upc = &(*upc)->up_next; } /* * There is no entry for this pair. * Allocate one. If we have to drop pidlock, check * again. */ if (new == NULL) { new = (struct upcount *)kmem_alloc(sizeof (*new), KM_NOSLEEP); if (new == NULL) { mutex_exit(&pidlock); new = (struct upcount *)kmem_alloc(sizeof (*new), KM_SLEEP); mutex_enter(&pidlock); goto top; } } /* * On the assumption that a new user is going to do some * more forks, put the new upcount structure on the front. */ upc = hupc; new->up_uid = uid; new->up_zoneid = zoneid; new->up_count = 1; new->up_next = *upc; *upc = new; } /* * Decrement the number of processes a given uid and zoneid has. */ void upcount_dec(uid_t uid, zoneid_t zoneid) { struct upcount **upc; struct upcount *done; ASSERT(MUTEX_HELD(&pidlock)); upc = &upc_hash[UPC_HASH(uid, zoneid)]; while ((*upc) != NULL) { if ((*upc)->up_uid == uid && (*upc)->up_zoneid == zoneid) { (*upc)->up_count--; if ((*upc)->up_count == 0) { done = *upc; *upc = (*upc)->up_next; kmem_free(done, sizeof (*done)); } return; } upc = &(*upc)->up_next; } cmn_err(CE_PANIC, "decr_upcount-off the end"); } /* * Returns the number of processes a uid has. * Non-existent uid's are assumed to have no processes. */ int upcount_get(uid_t uid, zoneid_t zoneid) { struct upcount *upc; ASSERT(MUTEX_HELD(&pidlock)); upc = upc_hash[UPC_HASH(uid, zoneid)]; while (upc != NULL) { if (upc->up_uid == uid && upc->up_zoneid == zoneid) { return (upc->up_count); } upc = upc->up_next; } return (0); }