1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27/*
28 * Copyright (c) 2013, Joyent, Inc.  All rights reserved.
29 */
30
31#include <sys/types.h>
32#include <sys/uio.h>
33#include <sys/param.h>
34#include <sys/cmn_err.h>
35#include <sys/cred.h>
36#include <sys/policy.h>
37#include <sys/debug.h>
38#include <sys/errno.h>
39#include <sys/file.h>
40#include <sys/inline.h>
41#include <sys/kmem.h>
42#include <sys/proc.h>
43#include <sys/brand.h>
44#include <sys/regset.h>
45#include <sys/sysmacros.h>
46#include <sys/systm.h>
47#include <sys/vfs.h>
48#include <sys/vnode.h>
49#include <sys/signal.h>
50#include <sys/auxv.h>
51#include <sys/user.h>
52#include <sys/class.h>
53#include <sys/fault.h>
54#include <sys/syscall.h>
55#include <sys/procfs.h>
56#include <sys/zone.h>
57#include <sys/copyops.h>
58#include <sys/schedctl.h>
59#include <vm/as.h>
60#include <vm/seg.h>
61#include <fs/proc/prdata.h>
62#include <sys/contract/process_impl.h>
63
64static	void	pr_settrace(proc_t *, sigset_t *);
65static	int	pr_setfpregs(prnode_t *, prfpregset_t *);
66#if defined(__sparc)
67static	int	pr_setxregs(prnode_t *, prxregset_t *);
68static	int	pr_setasrs(prnode_t *, asrset_t);
69#endif
70static	int	pr_setvaddr(prnode_t *, caddr_t);
71static	int	pr_clearsig(prnode_t *);
72static	int	pr_clearflt(prnode_t *);
73static	int	pr_watch(prnode_t *, prwatch_t *, int *);
74static	int	pr_agent(prnode_t *, prgregset_t, int *);
75static	int	pr_rdwr(proc_t *, enum uio_rw, priovec_t *);
76static	int	pr_scred(proc_t *, prcred_t *, cred_t *, boolean_t);
77static	int	pr_spriv(proc_t *, prpriv_t *, cred_t *);
78static	int	pr_szoneid(proc_t *, zoneid_t, cred_t *);
79static	void	pauselwps(proc_t *);
80static	void	unpauselwps(proc_t *);
81
82typedef union {
83	long		sig;		/* PCKILL, PCUNKILL */
84	long		nice;		/* PCNICE */
85	long		timeo;		/* PCTWSTOP */
86	ulong_t		flags;		/* PCRUN, PCSET, PCUNSET */
87	caddr_t		vaddr;		/* PCSVADDR */
88	siginfo_t	siginfo;	/* PCSSIG */
89	sigset_t	sigset;		/* PCSTRACE, PCSHOLD */
90	fltset_t	fltset;		/* PCSFAULT */
91	sysset_t	sysset;		/* PCSENTRY, PCSEXIT */
92	prgregset_t	prgregset;	/* PCSREG, PCAGENT */
93	prfpregset_t	prfpregset;	/* PCSFPREG */
94#if defined(__sparc)
95	prxregset_t	prxregset;	/* PCSXREG */
96	asrset_t	asrset;		/* PCSASRS */
97#endif
98	prwatch_t	prwatch;	/* PCWATCH */
99	priovec_t	priovec;	/* PCREAD, PCWRITE */
100	prcred_t	prcred;		/* PCSCRED */
101	prpriv_t	prpriv;		/* PCSPRIV */
102	long		przoneid;	/* PCSZONE */
103} arg_t;
104
105static	int	pr_control(long, arg_t *, prnode_t *, cred_t *);
106
107static size_t
108ctlsize(long cmd, size_t resid, arg_t *argp)
109{
110	size_t size = sizeof (long);
111	size_t rnd;
112	int ngrp;
113
114	switch (cmd) {
115	case PCNULL:
116	case PCSTOP:
117	case PCDSTOP:
118	case PCWSTOP:
119	case PCCSIG:
120	case PCCFAULT:
121		break;
122	case PCSSIG:
123		size += sizeof (siginfo_t);
124		break;
125	case PCTWSTOP:
126		size += sizeof (long);
127		break;
128	case PCKILL:
129	case PCUNKILL:
130	case PCNICE:
131		size += sizeof (long);
132		break;
133	case PCRUN:
134	case PCSET:
135	case PCUNSET:
136		size += sizeof (ulong_t);
137		break;
138	case PCSVADDR:
139		size += sizeof (caddr_t);
140		break;
141	case PCSTRACE:
142	case PCSHOLD:
143		size += sizeof (sigset_t);
144		break;
145	case PCSFAULT:
146		size += sizeof (fltset_t);
147		break;
148	case PCSENTRY:
149	case PCSEXIT:
150		size += sizeof (sysset_t);
151		break;
152	case PCSREG:
153	case PCAGENT:
154		size += sizeof (prgregset_t);
155		break;
156	case PCSFPREG:
157		size += sizeof (prfpregset_t);
158		break;
159#if defined(__sparc)
160	case PCSXREG:
161		size += sizeof (prxregset_t);
162		break;
163	case PCSASRS:
164		size += sizeof (asrset_t);
165		break;
166#endif
167	case PCWATCH:
168		size += sizeof (prwatch_t);
169		break;
170	case PCREAD:
171	case PCWRITE:
172		size += sizeof (priovec_t);
173		break;
174	case PCSCRED:
175		size += sizeof (prcred_t);
176		break;
177	case PCSCREDX:
178		/*
179		 * We cannot derefence the pr_ngroups fields if it
180		 * we don't have enough data.
181		 */
182		if (resid < size + sizeof (prcred_t) - sizeof (gid_t))
183			return (0);
184		ngrp = argp->prcred.pr_ngroups;
185		if (ngrp < 0 || ngrp > ngroups_max)
186			return (0);
187
188		/* The result can be smaller than sizeof (prcred_t) */
189		size += sizeof (prcred_t) - sizeof (gid_t);
190		size += ngrp * sizeof (gid_t);
191		break;
192	case PCSPRIV:
193		if (resid >= size + sizeof (prpriv_t))
194			size += priv_prgetprivsize(&argp->prpriv);
195		else
196			return (0);
197		break;
198	case PCSZONE:
199		size += sizeof (long);
200		break;
201	default:
202		return (0);
203	}
204
205	/* Round up to a multiple of long, unless exact amount written */
206	if (size < resid) {
207		rnd = size & (sizeof (long) - 1);
208
209		if (rnd != 0)
210			size += sizeof (long) - rnd;
211	}
212
213	if (size > resid)
214		return (0);
215	return (size);
216}
217
218/*
219 * Control operations (lots).
220 */
221int
222prwritectl(vnode_t *vp, uio_t *uiop, cred_t *cr)
223{
224#define	MY_BUFFER_SIZE \
225		100 > 1 + sizeof (arg_t) / sizeof (long) ? \
226		100 : 1 + sizeof (arg_t) / sizeof (long)
227	long buf[MY_BUFFER_SIZE];
228	long *bufp;
229	size_t resid = 0;
230	size_t size;
231	prnode_t *pnp = VTOP(vp);
232	int error;
233	int locked = 0;
234
235	while (uiop->uio_resid) {
236		/*
237		 * Read several commands in one gulp.
238		 */
239		bufp = buf;
240		if (resid) {	/* move incomplete command to front of buffer */
241			long *tail;
242
243			if (resid >= sizeof (buf))
244				break;
245			tail = (long *)((char *)buf + sizeof (buf) - resid);
246			do {
247				*bufp++ = *tail++;
248			} while ((resid -= sizeof (long)) != 0);
249		}
250		resid = sizeof (buf) - ((char *)bufp - (char *)buf);
251		if (resid > uiop->uio_resid)
252			resid = uiop->uio_resid;
253		if (error = uiomove((caddr_t)bufp, resid, UIO_WRITE, uiop))
254			return (error);
255		resid += (char *)bufp - (char *)buf;
256		bufp = buf;
257
258		do {		/* loop over commands in buffer */
259			long cmd = bufp[0];
260			arg_t *argp = (arg_t *)&bufp[1];
261
262			size = ctlsize(cmd, resid, argp);
263			if (size == 0)	/* incomplete or invalid command */
264				break;
265			/*
266			 * Perform the specified control operation.
267			 */
268			if (!locked) {
269				if ((error = prlock(pnp, ZNO)) != 0)
270					return (error);
271				locked = 1;
272			}
273			if (error = pr_control(cmd, argp, pnp, cr)) {
274				if (error == -1)	/* -1 is timeout */
275					locked = 0;
276				else
277					return (error);
278			}
279			bufp = (long *)((char *)bufp + size);
280		} while ((resid -= size) != 0);
281
282		if (locked) {
283			prunlock(pnp);
284			locked = 0;
285		}
286	}
287	return (resid? EINVAL : 0);
288}
289
290static int
291pr_control(long cmd, arg_t *argp, prnode_t *pnp, cred_t *cr)
292{
293	prcommon_t *pcp;
294	proc_t *p;
295	int unlocked;
296	int error = 0;
297
298	if (cmd == PCNULL)
299		return (0);
300
301	pcp = pnp->pr_common;
302	p = pcp->prc_proc;
303	ASSERT(p != NULL);
304
305	/* System processes defy control. */
306	if (p->p_flag & SSYS) {
307		prunlock(pnp);
308		return (EBUSY);
309	}
310
311	switch (cmd) {
312
313	default:
314		error = EINVAL;
315		break;
316
317	case PCSTOP:	/* direct process or lwp to stop and wait for stop */
318	case PCDSTOP:	/* direct process or lwp to stop, don't wait */
319	case PCWSTOP:	/* wait for process or lwp to stop */
320	case PCTWSTOP:	/* wait for process or lwp to stop, with timeout */
321		{
322			time_t timeo;
323
324			/*
325			 * Can't apply to a system process.
326			 */
327			if (p->p_as == &kas) {
328				error = EBUSY;
329				break;
330			}
331
332			if (cmd == PCSTOP || cmd == PCDSTOP)
333				pr_stop(pnp);
334
335			if (cmd == PCDSTOP)
336				break;
337
338			/*
339			 * If an lwp is waiting for itself or its process,
340			 * don't wait. The stopped lwp would never see the
341			 * fact that it is stopped.
342			 */
343			if ((pcp->prc_flags & PRC_LWP)?
344			    (pcp->prc_thread == curthread) : (p == curproc)) {
345				if (cmd == PCWSTOP || cmd == PCTWSTOP)
346					error = EBUSY;
347				break;
348			}
349
350			timeo = (cmd == PCTWSTOP)? (time_t)argp->timeo : 0;
351			if ((error = pr_wait_stop(pnp, timeo)) != 0)
352				return (error);
353
354			break;
355		}
356
357	case PCRUN:	/* make lwp or process runnable */
358		error = pr_setrun(pnp, argp->flags);
359		break;
360
361	case PCSTRACE:	/* set signal trace mask */
362		pr_settrace(p,  &argp->sigset);
363		break;
364
365	case PCSSIG:	/* set current signal */
366		error = pr_setsig(pnp, &argp->siginfo);
367		if (argp->siginfo.si_signo == SIGKILL && error == 0) {
368			prunlock(pnp);
369			pr_wait_die(pnp);
370			return (-1);
371		}
372		break;
373
374	case PCKILL:	/* send signal */
375		error = pr_kill(pnp, (int)argp->sig, cr);
376		if (error == 0 && argp->sig == SIGKILL) {
377			prunlock(pnp);
378			pr_wait_die(pnp);
379			return (-1);
380		}
381		break;
382
383	case PCUNKILL:	/* delete a pending signal */
384		error = pr_unkill(pnp, (int)argp->sig);
385		break;
386
387	case PCNICE:	/* set nice priority */
388		error = pr_nice(p, (int)argp->nice, cr);
389		break;
390
391	case PCSENTRY:	/* set syscall entry bit mask */
392	case PCSEXIT:	/* set syscall exit bit mask */
393		pr_setentryexit(p, &argp->sysset, cmd == PCSENTRY);
394		break;
395
396	case PCSET:	/* set process flags */
397		error = pr_set(p, argp->flags);
398		break;
399
400	case PCUNSET:	/* unset process flags */
401		error = pr_unset(p, argp->flags);
402		break;
403
404	case PCSREG:	/* set general registers */
405		{
406			kthread_t *t = pr_thread(pnp);
407
408			if (!ISTOPPED(t) && !VSTOPPED(t) && !DSTOPPED(t)) {
409				thread_unlock(t);
410				error = EBUSY;
411			} else {
412				thread_unlock(t);
413				mutex_exit(&p->p_lock);
414				prsetprregs(ttolwp(t), argp->prgregset, 0);
415				mutex_enter(&p->p_lock);
416			}
417			break;
418		}
419
420	case PCSFPREG:	/* set floating-point registers */
421		error = pr_setfpregs(pnp, &argp->prfpregset);
422		break;
423
424	case PCSXREG:	/* set extra registers */
425#if defined(__sparc)
426		error = pr_setxregs(pnp, &argp->prxregset);
427#else
428		error = EINVAL;
429#endif
430		break;
431
432#if defined(__sparc)
433	case PCSASRS:	/* set ancillary state registers */
434		error = pr_setasrs(pnp, argp->asrset);
435		break;
436#endif
437
438	case PCSVADDR:	/* set virtual address at which to resume */
439		error = pr_setvaddr(pnp, argp->vaddr);
440		break;
441
442	case PCSHOLD:	/* set signal-hold mask */
443		pr_sethold(pnp, &argp->sigset);
444		break;
445
446	case PCSFAULT:	/* set mask of traced faults */
447		pr_setfault(p, &argp->fltset);
448		break;
449
450	case PCCSIG:	/* clear current signal */
451		error = pr_clearsig(pnp);
452		break;
453
454	case PCCFAULT:	/* clear current fault */
455		error = pr_clearflt(pnp);
456		break;
457
458	case PCWATCH:	/* set or clear watched areas */
459		error = pr_watch(pnp, &argp->prwatch, &unlocked);
460		if (error && unlocked)
461			return (error);
462		break;
463
464	case PCAGENT:	/* create the /proc agent lwp in the target process */
465		error = pr_agent(pnp, argp->prgregset, &unlocked);
466		if (error && unlocked)
467			return (error);
468		break;
469
470	case PCREAD:	/* read from the address space */
471		error = pr_rdwr(p, UIO_READ, &argp->priovec);
472		break;
473
474	case PCWRITE:	/* write to the address space */
475		error = pr_rdwr(p, UIO_WRITE, &argp->priovec);
476		break;
477
478	case PCSCRED:	/* set the process credentials */
479	case PCSCREDX:
480		error = pr_scred(p, &argp->prcred, cr, cmd == PCSCREDX);
481		break;
482
483	case PCSPRIV:	/* set the process privileges */
484		error = pr_spriv(p, &argp->prpriv, cr);
485		break;
486	case PCSZONE:	/* set the process's zoneid credentials */
487		error = pr_szoneid(p, (zoneid_t)argp->przoneid, cr);
488		break;
489	}
490
491	if (error)
492		prunlock(pnp);
493	return (error);
494}
495
496#ifdef _SYSCALL32_IMPL
497
498typedef union {
499	int32_t		sig;		/* PCKILL, PCUNKILL */
500	int32_t		nice;		/* PCNICE */
501	int32_t		timeo;		/* PCTWSTOP */
502	uint32_t	flags;		/* PCRUN, PCSET, PCUNSET */
503	caddr32_t	vaddr;		/* PCSVADDR */
504	siginfo32_t	siginfo;	/* PCSSIG */
505	sigset_t	sigset;		/* PCSTRACE, PCSHOLD */
506	fltset_t	fltset;		/* PCSFAULT */
507	sysset_t	sysset;		/* PCSENTRY, PCSEXIT */
508	prgregset32_t	prgregset;	/* PCSREG, PCAGENT */
509	prfpregset32_t	prfpregset;	/* PCSFPREG */
510#if defined(__sparc)
511	prxregset_t	prxregset;	/* PCSXREG */
512#endif
513	prwatch32_t	prwatch;	/* PCWATCH */
514	priovec32_t	priovec;	/* PCREAD, PCWRITE */
515	prcred32_t	prcred;		/* PCSCRED */
516	prpriv_t	prpriv;		/* PCSPRIV */
517	int32_t		przoneid;	/* PCSZONE */
518} arg32_t;
519
520static	int	pr_control32(int32_t, arg32_t *, prnode_t *, cred_t *);
521static	int	pr_setfpregs32(prnode_t *, prfpregset32_t *);
522
523/*
524 * Note that while ctlsize32() can use argp, it must do so only in a way
525 * that assumes 32-bit rather than 64-bit alignment as argp is a pointer
526 * to an array of 32-bit values and only 32-bit alignment is ensured.
527 */
528static size_t
529ctlsize32(int32_t cmd, size_t resid, arg32_t *argp)
530{
531	size_t size = sizeof (int32_t);
532	size_t rnd;
533	int ngrp;
534
535	switch (cmd) {
536	case PCNULL:
537	case PCSTOP:
538	case PCDSTOP:
539	case PCWSTOP:
540	case PCCSIG:
541	case PCCFAULT:
542		break;
543	case PCSSIG:
544		size += sizeof (siginfo32_t);
545		break;
546	case PCTWSTOP:
547		size += sizeof (int32_t);
548		break;
549	case PCKILL:
550	case PCUNKILL:
551	case PCNICE:
552		size += sizeof (int32_t);
553		break;
554	case PCRUN:
555	case PCSET:
556	case PCUNSET:
557		size += sizeof (uint32_t);
558		break;
559	case PCSVADDR:
560		size += sizeof (caddr32_t);
561		break;
562	case PCSTRACE:
563	case PCSHOLD:
564		size += sizeof (sigset_t);
565		break;
566	case PCSFAULT:
567		size += sizeof (fltset_t);
568		break;
569	case PCSENTRY:
570	case PCSEXIT:
571		size += sizeof (sysset_t);
572		break;
573	case PCSREG:
574	case PCAGENT:
575		size += sizeof (prgregset32_t);
576		break;
577	case PCSFPREG:
578		size += sizeof (prfpregset32_t);
579		break;
580#if defined(__sparc)
581	case PCSXREG:
582		size += sizeof (prxregset_t);
583		break;
584#endif
585	case PCWATCH:
586		size += sizeof (prwatch32_t);
587		break;
588	case PCREAD:
589	case PCWRITE:
590		size += sizeof (priovec32_t);
591		break;
592	case PCSCRED:
593		size += sizeof (prcred32_t);
594		break;
595	case PCSCREDX:
596		/*
597		 * We cannot derefence the pr_ngroups fields if it
598		 * we don't have enough data.
599		 */
600		if (resid < size + sizeof (prcred32_t) - sizeof (gid32_t))
601			return (0);
602		ngrp = argp->prcred.pr_ngroups;
603		if (ngrp < 0 || ngrp > ngroups_max)
604			return (0);
605
606		/* The result can be smaller than sizeof (prcred32_t) */
607		size += sizeof (prcred32_t) - sizeof (gid32_t);
608		size += ngrp * sizeof (gid32_t);
609		break;
610	case PCSPRIV:
611		if (resid >= size + sizeof (prpriv_t))
612			size += priv_prgetprivsize(&argp->prpriv);
613		else
614			return (0);
615		break;
616	case PCSZONE:
617		size += sizeof (int32_t);
618		break;
619	default:
620		return (0);
621	}
622
623	/* Round up to a multiple of int32_t */
624	rnd = size & (sizeof (int32_t) - 1);
625
626	if (rnd != 0)
627		size += sizeof (int32_t) - rnd;
628
629	if (size > resid)
630		return (0);
631	return (size);
632}
633
634/*
635 * Control operations (lots).
636 */
637int
638prwritectl32(struct vnode *vp, struct uio *uiop, cred_t *cr)
639{
640#define	MY_BUFFER_SIZE32 \
641		100 > 1 + sizeof (arg32_t) / sizeof (int32_t) ? \
642		100 : 1 + sizeof (arg32_t) / sizeof (int32_t)
643	int32_t buf[MY_BUFFER_SIZE32];
644	int32_t *bufp;
645	arg32_t arg;
646	size_t resid = 0;
647	size_t size;
648	prnode_t *pnp = VTOP(vp);
649	int error;
650	int locked = 0;
651
652	while (uiop->uio_resid) {
653		/*
654		 * Read several commands in one gulp.
655		 */
656		bufp = buf;
657		if (resid) {	/* move incomplete command to front of buffer */
658			int32_t *tail;
659
660			if (resid >= sizeof (buf))
661				break;
662			tail = (int32_t *)((char *)buf + sizeof (buf) - resid);
663			do {
664				*bufp++ = *tail++;
665			} while ((resid -= sizeof (int32_t)) != 0);
666		}
667		resid = sizeof (buf) - ((char *)bufp - (char *)buf);
668		if (resid > uiop->uio_resid)
669			resid = uiop->uio_resid;
670		if (error = uiomove((caddr_t)bufp, resid, UIO_WRITE, uiop))
671			return (error);
672		resid += (char *)bufp - (char *)buf;
673		bufp = buf;
674
675		do {		/* loop over commands in buffer */
676			int32_t cmd = bufp[0];
677			arg32_t *argp = (arg32_t *)&bufp[1];
678
679			size = ctlsize32(cmd, resid, argp);
680			if (size == 0)	/* incomplete or invalid command */
681				break;
682			/*
683			 * Perform the specified control operation.
684			 */
685			if (!locked) {
686				if ((error = prlock(pnp, ZNO)) != 0)
687					return (error);
688				locked = 1;
689			}
690
691			/*
692			 * Since some members of the arg32_t union contain
693			 * 64-bit values (which must be 64-bit aligned), we
694			 * can't simply pass a pointer to the structure as
695			 * it may be unaligned. Note that we do pass the
696			 * potentially unaligned structure to ctlsize32()
697			 * above, but that uses it a way that makes no
698			 * assumptions about alignment.
699			 */
700			ASSERT(size - sizeof (cmd) <= sizeof (arg));
701			bcopy(argp, &arg, size - sizeof (cmd));
702
703			if (error = pr_control32(cmd, &arg, pnp, cr)) {
704				if (error == -1)	/* -1 is timeout */
705					locked = 0;
706				else
707					return (error);
708			}
709			bufp = (int32_t *)((char *)bufp + size);
710		} while ((resid -= size) != 0);
711
712		if (locked) {
713			prunlock(pnp);
714			locked = 0;
715		}
716	}
717	return (resid? EINVAL : 0);
718}
719
720static int
721pr_control32(int32_t cmd, arg32_t *argp, prnode_t *pnp, cred_t *cr)
722{
723	prcommon_t *pcp;
724	proc_t *p;
725	int unlocked;
726	int error = 0;
727
728	if (cmd == PCNULL)
729		return (0);
730
731	pcp = pnp->pr_common;
732	p = pcp->prc_proc;
733	ASSERT(p != NULL);
734
735	if (p->p_flag & SSYS) {
736		prunlock(pnp);
737		return (EBUSY);
738	}
739
740	switch (cmd) {
741
742	default:
743		error = EINVAL;
744		break;
745
746	case PCSTOP:	/* direct process or lwp to stop and wait for stop */
747	case PCDSTOP:	/* direct process or lwp to stop, don't wait */
748	case PCWSTOP:	/* wait for process or lwp to stop */
749	case PCTWSTOP:	/* wait for process or lwp to stop, with timeout */
750		{
751			time_t timeo;
752
753			/*
754			 * Can't apply to a system process.
755			 */
756			if (p->p_as == &kas) {
757				error = EBUSY;
758				break;
759			}
760
761			if (cmd == PCSTOP || cmd == PCDSTOP)
762				pr_stop(pnp);
763
764			if (cmd == PCDSTOP)
765				break;
766
767			/*
768			 * If an lwp is waiting for itself or its process,
769			 * don't wait. The lwp will never see the fact that
770			 * itself is stopped.
771			 */
772			if ((pcp->prc_flags & PRC_LWP)?
773			    (pcp->prc_thread == curthread) : (p == curproc)) {
774				if (cmd == PCWSTOP || cmd == PCTWSTOP)
775					error = EBUSY;
776				break;
777			}
778
779			timeo = (cmd == PCTWSTOP)? (time_t)argp->timeo : 0;
780			if ((error = pr_wait_stop(pnp, timeo)) != 0)
781				return (error);
782
783			break;
784		}
785
786	case PCRUN:	/* make lwp or process runnable */
787		error = pr_setrun(pnp, (ulong_t)argp->flags);
788		break;
789
790	case PCSTRACE:	/* set signal trace mask */
791		pr_settrace(p,  &argp->sigset);
792		break;
793
794	case PCSSIG:	/* set current signal */
795		if (PROCESS_NOT_32BIT(p))
796			error = EOVERFLOW;
797		else {
798			int sig = (int)argp->siginfo.si_signo;
799			siginfo_t siginfo;
800
801			bzero(&siginfo, sizeof (siginfo));
802			siginfo_32tok(&argp->siginfo, (k_siginfo_t *)&siginfo);
803			error = pr_setsig(pnp, &siginfo);
804			if (sig == SIGKILL && error == 0) {
805				prunlock(pnp);
806				pr_wait_die(pnp);
807				return (-1);
808			}
809		}
810		break;
811
812	case PCKILL:	/* send signal */
813		error = pr_kill(pnp, (int)argp->sig, cr);
814		if (error == 0 && argp->sig == SIGKILL) {
815			prunlock(pnp);
816			pr_wait_die(pnp);
817			return (-1);
818		}
819		break;
820
821	case PCUNKILL:	/* delete a pending signal */
822		error = pr_unkill(pnp, (int)argp->sig);
823		break;
824
825	case PCNICE:	/* set nice priority */
826		error = pr_nice(p, (int)argp->nice, cr);
827		break;
828
829	case PCSENTRY:	/* set syscall entry bit mask */
830	case PCSEXIT:	/* set syscall exit bit mask */
831		pr_setentryexit(p, &argp->sysset, cmd == PCSENTRY);
832		break;
833
834	case PCSET:	/* set process flags */
835		error = pr_set(p, (long)argp->flags);
836		break;
837
838	case PCUNSET:	/* unset process flags */
839		error = pr_unset(p, (long)argp->flags);
840		break;
841
842	case PCSREG:	/* set general registers */
843		if (PROCESS_NOT_32BIT(p))
844			error = EOVERFLOW;
845		else {
846			kthread_t *t = pr_thread(pnp);
847
848			if (!ISTOPPED(t) && !VSTOPPED(t) && !DSTOPPED(t)) {
849				thread_unlock(t);
850				error = EBUSY;
851			} else {
852				prgregset_t prgregset;
853				klwp_t *lwp = ttolwp(t);
854
855				thread_unlock(t);
856				mutex_exit(&p->p_lock);
857				prgregset_32ton(lwp, argp->prgregset,
858				    prgregset);
859				prsetprregs(lwp, prgregset, 0);
860				mutex_enter(&p->p_lock);
861			}
862		}
863		break;
864
865	case PCSFPREG:	/* set floating-point registers */
866		if (PROCESS_NOT_32BIT(p))
867			error = EOVERFLOW;
868		else
869			error = pr_setfpregs32(pnp, &argp->prfpregset);
870		break;
871
872	case PCSXREG:	/* set extra registers */
873#if defined(__sparc)
874		if (PROCESS_NOT_32BIT(p))
875			error = EOVERFLOW;
876		else
877			error = pr_setxregs(pnp, &argp->prxregset);
878#else
879		error = EINVAL;
880#endif
881		break;
882
883	case PCSVADDR:	/* set virtual address at which to resume */
884		if (PROCESS_NOT_32BIT(p))
885			error = EOVERFLOW;
886		else
887			error = pr_setvaddr(pnp,
888			    (caddr_t)(uintptr_t)argp->vaddr);
889		break;
890
891	case PCSHOLD:	/* set signal-hold mask */
892		pr_sethold(pnp, &argp->sigset);
893		break;
894
895	case PCSFAULT:	/* set mask of traced faults */
896		pr_setfault(p, &argp->fltset);
897		break;
898
899	case PCCSIG:	/* clear current signal */
900		error = pr_clearsig(pnp);
901		break;
902
903	case PCCFAULT:	/* clear current fault */
904		error = pr_clearflt(pnp);
905		break;
906
907	case PCWATCH:	/* set or clear watched areas */
908		if (PROCESS_NOT_32BIT(p))
909			error = EOVERFLOW;
910		else {
911			prwatch_t prwatch;
912
913			prwatch.pr_vaddr = argp->prwatch.pr_vaddr;
914			prwatch.pr_size = argp->prwatch.pr_size;
915			prwatch.pr_wflags = argp->prwatch.pr_wflags;
916			prwatch.pr_pad = argp->prwatch.pr_pad;
917			error = pr_watch(pnp, &prwatch, &unlocked);
918			if (error && unlocked)
919				return (error);
920		}
921		break;
922
923	case PCAGENT:	/* create the /proc agent lwp in the target process */
924		if (PROCESS_NOT_32BIT(p))
925			error = EOVERFLOW;
926		else {
927			prgregset_t prgregset;
928			kthread_t *t = pr_thread(pnp);
929			klwp_t *lwp = ttolwp(t);
930			thread_unlock(t);
931			mutex_exit(&p->p_lock);
932			prgregset_32ton(lwp, argp->prgregset, prgregset);
933			mutex_enter(&p->p_lock);
934			error = pr_agent(pnp, prgregset, &unlocked);
935			if (error && unlocked)
936				return (error);
937		}
938		break;
939
940	case PCREAD:	/* read from the address space */
941	case PCWRITE:	/* write to the address space */
942		if (PROCESS_NOT_32BIT(p) || (pnp->pr_flags & PR_OFFMAX))
943			error = EOVERFLOW;
944		else {
945			enum uio_rw rw = (cmd == PCREAD)? UIO_READ : UIO_WRITE;
946			priovec_t priovec;
947
948			priovec.pio_base =
949			    (void *)(uintptr_t)argp->priovec.pio_base;
950			priovec.pio_len = (size_t)argp->priovec.pio_len;
951			priovec.pio_offset = (off_t)
952			    (uint32_t)argp->priovec.pio_offset;
953			error = pr_rdwr(p, rw, &priovec);
954		}
955		break;
956
957	case PCSCRED:	/* set the process credentials */
958	case PCSCREDX:
959		{
960			/*
961			 * All the fields in these structures are exactly the
962			 * same and so the structures are compatible.  In case
963			 * this ever changes, we catch this with the ASSERT
964			 * below.
965			 */
966			prcred_t *prcred = (prcred_t *)&argp->prcred;
967
968#ifndef __lint
969			ASSERT(sizeof (prcred_t) == sizeof (prcred32_t));
970#endif
971
972			error = pr_scred(p, prcred, cr, cmd == PCSCREDX);
973			break;
974		}
975
976	case PCSPRIV:	/* set the process privileges */
977		error = pr_spriv(p, &argp->prpriv, cr);
978		break;
979
980	case PCSZONE:	/* set the process's zoneid */
981		error = pr_szoneid(p, (zoneid_t)argp->przoneid, cr);
982		break;
983	}
984
985	if (error)
986		prunlock(pnp);
987	return (error);
988}
989
990#endif	/* _SYSCALL32_IMPL */
991
992/*
993 * Return the specific or chosen thread/lwp for a control operation.
994 * Returns with the thread locked via thread_lock(t).
995 */
996kthread_t *
997pr_thread(prnode_t *pnp)
998{
999	prcommon_t *pcp = pnp->pr_common;
1000	kthread_t *t;
1001
1002	if (pcp->prc_flags & PRC_LWP) {
1003		t = pcp->prc_thread;
1004		ASSERT(t != NULL);
1005		thread_lock(t);
1006	} else {
1007		proc_t *p = pcp->prc_proc;
1008		t = prchoose(p);	/* returns locked thread */
1009		ASSERT(t != NULL);
1010	}
1011
1012	return (t);
1013}
1014
1015/*
1016 * Direct the process or lwp to stop.
1017 */
1018void
1019pr_stop(prnode_t *pnp)
1020{
1021	prcommon_t *pcp = pnp->pr_common;
1022	proc_t *p = pcp->prc_proc;
1023	kthread_t *t;
1024	vnode_t *vp;
1025
1026	/*
1027	 * If already stopped, do nothing; otherwise flag
1028	 * it to be stopped the next time it tries to run.
1029	 * If sleeping at interruptible priority, set it
1030	 * running so it will stop within cv_wait_sig().
1031	 *
1032	 * Take care to cooperate with jobcontrol: if an lwp
1033	 * is stopped due to the default action of a jobcontrol
1034	 * stop signal, flag it to be stopped the next time it
1035	 * starts due to a SIGCONT signal.
1036	 */
1037	if (pcp->prc_flags & PRC_LWP)
1038		t = pcp->prc_thread;
1039	else
1040		t = p->p_tlist;
1041	ASSERT(t != NULL);
1042
1043	do {
1044		int notify;
1045
1046		notify = 0;
1047		thread_lock(t);
1048		if (!ISTOPPED(t)) {
1049			t->t_proc_flag |= TP_PRSTOP;
1050			t->t_sig_check = 1;	/* do ISSIG */
1051		}
1052
1053		/* Move the thread from wait queue to run queue */
1054		if (ISWAITING(t))
1055			setrun_locked(t);
1056
1057		if (ISWAKEABLE(t)) {
1058			if (t->t_wchan0 == NULL)
1059				setrun_locked(t);
1060			else if (!VSTOPPED(t)) {
1061				/*
1062				 * Mark it virtually stopped.
1063				 */
1064				t->t_proc_flag |= TP_PRVSTOP;
1065				notify = 1;
1066			}
1067		}
1068		/*
1069		 * force the thread into the kernel
1070		 * if it is not already there.
1071		 */
1072		prpokethread(t);
1073		thread_unlock(t);
1074		if (notify &&
1075		    (vp = p->p_lwpdir[t->t_dslot].ld_entry->le_trace) != NULL)
1076			prnotify(vp);
1077		if (pcp->prc_flags & PRC_LWP)
1078			break;
1079	} while ((t = t->t_forw) != p->p_tlist);
1080
1081	/*
1082	 * We do this just in case the thread we asked
1083	 * to stop is in holdlwps() (called from cfork()).
1084	 */
1085	cv_broadcast(&p->p_holdlwps);
1086}
1087
1088/*
1089 * Sleep until the lwp stops, but cooperate with
1090 * jobcontrol:  Don't wake up if the lwp is stopped
1091 * due to the default action of a jobcontrol stop signal.
1092 * If this is the process file descriptor, sleep
1093 * until all of the process's lwps stop.
1094 */
1095int
1096pr_wait_stop(prnode_t *pnp, time_t timeo)
1097{
1098	prcommon_t *pcp = pnp->pr_common;
1099	proc_t *p = pcp->prc_proc;
1100	timestruc_t rqtime;
1101	timestruc_t *rqtp = NULL;
1102	int timecheck = 0;
1103	kthread_t *t;
1104	int error;
1105
1106	if (timeo > 0) {	/* millisecond timeout */
1107		/*
1108		 * Determine the precise future time of the requested timeout.
1109		 */
1110		timestruc_t now;
1111
1112		timecheck = timechanged;
1113		gethrestime(&now);
1114		rqtp = &rqtime;
1115		rqtp->tv_sec = timeo / MILLISEC;
1116		rqtp->tv_nsec = (timeo % MILLISEC) * MICROSEC;
1117		timespecadd(rqtp, &now);
1118	}
1119
1120	if (pcp->prc_flags & PRC_LWP) {	/* lwp file descriptor */
1121		t = pcp->prc_thread;
1122		ASSERT(t != NULL);
1123		thread_lock(t);
1124		while (!ISTOPPED(t) && !VSTOPPED(t)) {
1125			thread_unlock(t);
1126			mutex_enter(&pcp->prc_mutex);
1127			prunlock(pnp);
1128			error = pr_wait(pcp, rqtp, timecheck);
1129			if (error)	/* -1 is timeout */
1130				return (error);
1131			if ((error = prlock(pnp, ZNO)) != 0)
1132				return (error);
1133			ASSERT(p == pcp->prc_proc);
1134			ASSERT(t == pcp->prc_thread);
1135			thread_lock(t);
1136		}
1137		thread_unlock(t);
1138	} else {			/* process file descriptor */
1139		t = prchoose(p);	/* returns locked thread */
1140		ASSERT(t != NULL);
1141		ASSERT(MUTEX_HELD(&p->p_lock));
1142		while ((!ISTOPPED(t) && !VSTOPPED(t) && !SUSPENDED(t)) ||
1143		    (p->p_flag & SEXITLWPS)) {
1144			thread_unlock(t);
1145			mutex_enter(&pcp->prc_mutex);
1146			prunlock(pnp);
1147			error = pr_wait(pcp, rqtp, timecheck);
1148			if (error)	/* -1 is timeout */
1149				return (error);
1150			if ((error = prlock(pnp, ZNO)) != 0)
1151				return (error);
1152			ASSERT(p == pcp->prc_proc);
1153			t = prchoose(p);	/* returns locked t */
1154			ASSERT(t != NULL);
1155		}
1156		thread_unlock(t);
1157	}
1158
1159	ASSERT(!(pcp->prc_flags & PRC_DESTROY) && p->p_stat != SZOMB &&
1160	    t != NULL && t->t_state != TS_ZOMB);
1161
1162	return (0);
1163}
1164
1165int
1166pr_setrun(prnode_t *pnp, ulong_t flags)
1167{
1168	prcommon_t *pcp = pnp->pr_common;
1169	proc_t *p = pcp->prc_proc;
1170	kthread_t *t;
1171	klwp_t *lwp;
1172
1173	/*
1174	 * Cannot set an lwp running if it is not stopped.
1175	 * Also, no lwp other than the /proc agent lwp can
1176	 * be set running so long as the /proc agent lwp exists.
1177	 */
1178	t = pr_thread(pnp);	/* returns locked thread */
1179	if ((!ISTOPPED(t) && !VSTOPPED(t) &&
1180	    !(t->t_proc_flag & TP_PRSTOP)) ||
1181	    (p->p_agenttp != NULL &&
1182	    (t != p->p_agenttp || !(pcp->prc_flags & PRC_LWP)))) {
1183		thread_unlock(t);
1184		return (EBUSY);
1185	}
1186	thread_unlock(t);
1187	if (flags & ~(PRCSIG|PRCFAULT|PRSTEP|PRSTOP|PRSABORT))
1188		return (EINVAL);
1189	lwp = ttolwp(t);
1190	if ((flags & PRCSIG) && lwp->lwp_cursig != SIGKILL) {
1191		/*
1192		 * Discard current siginfo_t, if any.
1193		 */
1194		lwp->lwp_cursig = 0;
1195		lwp->lwp_extsig = 0;
1196		if (lwp->lwp_curinfo) {
1197			siginfofree(lwp->lwp_curinfo);
1198			lwp->lwp_curinfo = NULL;
1199		}
1200	}
1201	if (flags & PRCFAULT)
1202		lwp->lwp_curflt = 0;
1203	/*
1204	 * We can't hold p->p_lock when we touch the lwp's registers.
1205	 * It may be swapped out and we will get a page fault.
1206	 */
1207	if (flags & PRSTEP) {
1208		mutex_exit(&p->p_lock);
1209		prstep(lwp, 0);
1210		mutex_enter(&p->p_lock);
1211	}
1212	if (flags & PRSTOP) {
1213		t->t_proc_flag |= TP_PRSTOP;
1214		t->t_sig_check = 1;	/* do ISSIG */
1215	}
1216	if (flags & PRSABORT)
1217		lwp->lwp_sysabort = 1;
1218	thread_lock(t);
1219	if ((pcp->prc_flags & PRC_LWP) || (flags & (PRSTEP|PRSTOP))) {
1220		/*
1221		 * Here, we are dealing with a single lwp.
1222		 */
1223		if (ISTOPPED(t)) {
1224			t->t_schedflag |= TS_PSTART;
1225			t->t_dtrace_stop = 0;
1226			setrun_locked(t);
1227		} else if (flags & PRSABORT) {
1228			t->t_proc_flag &=
1229			    ~(TP_PRSTOP|TP_PRVSTOP|TP_STOPPING);
1230			setrun_locked(t);
1231		} else if (!(flags & PRSTOP)) {
1232			t->t_proc_flag &=
1233			    ~(TP_PRSTOP|TP_PRVSTOP|TP_STOPPING);
1234		}
1235		thread_unlock(t);
1236	} else {
1237		/*
1238		 * Here, we are dealing with the whole process.
1239		 */
1240		if (ISTOPPED(t)) {
1241			/*
1242			 * The representative lwp is stopped on an event
1243			 * of interest.  We demote it to PR_REQUESTED and
1244			 * choose another representative lwp.  If the new
1245			 * representative lwp is not stopped on an event of
1246			 * interest (other than PR_REQUESTED), we set the
1247			 * whole process running, else we leave the process
1248			 * stopped showing the next event of interest.
1249			 */
1250			kthread_t *tx = NULL;
1251
1252			if (!(flags & PRSABORT) &&
1253			    t->t_whystop == PR_SYSENTRY &&
1254			    t->t_whatstop == SYS_lwp_exit)
1255				tx = t;		/* remember the exiting lwp */
1256			t->t_whystop = PR_REQUESTED;
1257			t->t_whatstop = 0;
1258			thread_unlock(t);
1259			t = prchoose(p);	/* returns locked t */
1260			ASSERT(ISTOPPED(t) || VSTOPPED(t));
1261			if (VSTOPPED(t) ||
1262			    t->t_whystop == PR_REQUESTED) {
1263				thread_unlock(t);
1264				allsetrun(p);
1265			} else {
1266				thread_unlock(t);
1267				/*
1268				 * As a special case, if the old representative
1269				 * lwp was stopped on entry to _lwp_exit()
1270				 * (and we are not aborting the system call),
1271				 * we set the old representative lwp running.
1272				 * We do this so that the next process stop
1273				 * will find the exiting lwp gone.
1274				 */
1275				if (tx != NULL) {
1276					thread_lock(tx);
1277					tx->t_schedflag |= TS_PSTART;
1278					t->t_dtrace_stop = 0;
1279					setrun_locked(tx);
1280					thread_unlock(tx);
1281				}
1282			}
1283		} else {
1284			/*
1285			 * No event of interest; set all of the lwps running.
1286			 */
1287			if (flags & PRSABORT) {
1288				t->t_proc_flag &=
1289				    ~(TP_PRSTOP|TP_PRVSTOP|TP_STOPPING);
1290				setrun_locked(t);
1291			}
1292			thread_unlock(t);
1293			allsetrun(p);
1294		}
1295	}
1296	return (0);
1297}
1298
1299/*
1300 * Wait until process/lwp stops or until timer expires.
1301 * Return EINTR for an interruption, -1 for timeout, else 0.
1302 */
1303int
1304pr_wait(prcommon_t *pcp,	/* prcommon referring to process/lwp */
1305	timestruc_t *ts,	/* absolute time of timeout, if any */
1306	int timecheck)
1307{
1308	int rval;
1309
1310	ASSERT(MUTEX_HELD(&pcp->prc_mutex));
1311	rval = cv_waituntil_sig(&pcp->prc_wait, &pcp->prc_mutex, ts, timecheck);
1312	mutex_exit(&pcp->prc_mutex);
1313	switch (rval) {
1314	case 0:
1315		return (EINTR);
1316	case -1:
1317		return (-1);
1318	default:
1319		return (0);
1320	}
1321}
1322
1323/*
1324 * Make all threads in the process runnable.
1325 */
1326void
1327allsetrun(proc_t *p)
1328{
1329	kthread_t *t;
1330
1331	ASSERT(MUTEX_HELD(&p->p_lock));
1332
1333	if ((t = p->p_tlist) != NULL) {
1334		do {
1335			thread_lock(t);
1336			ASSERT(!(t->t_proc_flag & TP_LWPEXIT));
1337			t->t_proc_flag &= ~(TP_PRSTOP|TP_PRVSTOP|TP_STOPPING);
1338			if (ISTOPPED(t)) {
1339				t->t_schedflag |= TS_PSTART;
1340				t->t_dtrace_stop = 0;
1341				setrun_locked(t);
1342			}
1343			thread_unlock(t);
1344		} while ((t = t->t_forw) != p->p_tlist);
1345	}
1346}
1347
1348/*
1349 * Wait for the process to die.
1350 * We do this after sending SIGKILL because we know it will
1351 * die soon and we want subsequent operations to return ENOENT.
1352 */
1353void
1354pr_wait_die(prnode_t *pnp)
1355{
1356	proc_t *p;
1357
1358	mutex_enter(&pidlock);
1359	while ((p = pnp->pr_common->prc_proc) != NULL && p->p_stat != SZOMB) {
1360		if (!cv_wait_sig(&p->p_srwchan_cv, &pidlock))
1361			break;
1362	}
1363	mutex_exit(&pidlock);
1364}
1365
1366static void
1367pr_settrace(proc_t *p, sigset_t *sp)
1368{
1369	prdelset(sp, SIGKILL);
1370	prassignset(&p->p_sigmask, sp);
1371	if (!sigisempty(&p->p_sigmask))
1372		p->p_proc_flag |= P_PR_TRACE;
1373	else if (prisempty(&p->p_fltmask)) {
1374		user_t *up = PTOU(p);
1375		if (up->u_systrap == 0)
1376			p->p_proc_flag &= ~P_PR_TRACE;
1377	}
1378}
1379
1380int
1381pr_setsig(prnode_t *pnp, siginfo_t *sip)
1382{
1383	int nsig = PROC_IS_BRANDED(curproc)? BROP(curproc)->b_nsig : NSIG;
1384	int sig = sip->si_signo;
1385	prcommon_t *pcp = pnp->pr_common;
1386	proc_t *p = pcp->prc_proc;
1387	kthread_t *t;
1388	klwp_t *lwp;
1389	int error = 0;
1390
1391	t = pr_thread(pnp);	/* returns locked thread */
1392	thread_unlock(t);
1393	lwp = ttolwp(t);
1394	if (sig < 0 || sig >= nsig)
1395		/* Zero allowed here */
1396		error = EINVAL;
1397	else if (lwp->lwp_cursig == SIGKILL)
1398		/* "can't happen", but just in case */
1399		error = EBUSY;
1400	else if ((lwp->lwp_cursig = (uchar_t)sig) == 0) {
1401		lwp->lwp_extsig = 0;
1402		/*
1403		 * Discard current siginfo_t, if any.
1404		 */
1405		if (lwp->lwp_curinfo) {
1406			siginfofree(lwp->lwp_curinfo);
1407			lwp->lwp_curinfo = NULL;
1408		}
1409	} else {
1410		kthread_t *tx;
1411		sigqueue_t *sqp;
1412
1413		/* drop p_lock to do kmem_alloc(KM_SLEEP) */
1414		mutex_exit(&p->p_lock);
1415		sqp = kmem_zalloc(sizeof (sigqueue_t), KM_SLEEP);
1416		mutex_enter(&p->p_lock);
1417
1418		if (lwp->lwp_curinfo == NULL)
1419			lwp->lwp_curinfo = sqp;
1420		else
1421			kmem_free(sqp, sizeof (sigqueue_t));
1422		/*
1423		 * Copy contents of info to current siginfo_t.
1424		 */
1425		bcopy(sip, &lwp->lwp_curinfo->sq_info,
1426		    sizeof (lwp->lwp_curinfo->sq_info));
1427		/*
1428		 * Prevent contents published by si_zoneid-unaware /proc
1429		 * consumers from being incorrectly filtered.  Because
1430		 * an uninitialized si_zoneid is the same as
1431		 * GLOBAL_ZONEID, this means that you can't pr_setsig a
1432		 * process in a non-global zone with a siginfo which
1433		 * appears to come from the global zone.
1434		 */
1435		if (SI_FROMUSER(sip) && sip->si_zoneid == 0)
1436			lwp->lwp_curinfo->sq_info.si_zoneid =
1437			    p->p_zone->zone_id;
1438		/*
1439		 * Side-effects for SIGKILL and jobcontrol signals.
1440		 */
1441		if (sig == SIGKILL) {
1442			p->p_flag |= SKILLED;
1443			p->p_flag &= ~SEXTKILLED;
1444		} else if (sig == SIGCONT) {
1445			p->p_flag |= SSCONT;
1446			sigdelq(p, NULL, SIGSTOP);
1447			sigdelq(p, NULL, SIGTSTP);
1448			sigdelq(p, NULL, SIGTTOU);
1449			sigdelq(p, NULL, SIGTTIN);
1450			sigdiffset(&p->p_sig, &stopdefault);
1451			sigdiffset(&p->p_extsig, &stopdefault);
1452			if ((tx = p->p_tlist) != NULL) {
1453				do {
1454					sigdelq(p, tx, SIGSTOP);
1455					sigdelq(p, tx, SIGTSTP);
1456					sigdelq(p, tx, SIGTTOU);
1457					sigdelq(p, tx, SIGTTIN);
1458					sigdiffset(&tx->t_sig, &stopdefault);
1459					sigdiffset(&tx->t_extsig, &stopdefault);
1460				} while ((tx = tx->t_forw) != p->p_tlist);
1461			}
1462		} else if (sigismember(&stopdefault, sig)) {
1463			if (PTOU(p)->u_signal[sig-1] == SIG_DFL &&
1464			    (sig == SIGSTOP || !p->p_pgidp->pid_pgorphaned))
1465				p->p_flag &= ~SSCONT;
1466			sigdelq(p, NULL, SIGCONT);
1467			sigdelset(&p->p_sig, SIGCONT);
1468			sigdelset(&p->p_extsig, SIGCONT);
1469			if ((tx = p->p_tlist) != NULL) {
1470				do {
1471					sigdelq(p, tx, SIGCONT);
1472					sigdelset(&tx->t_sig, SIGCONT);
1473					sigdelset(&tx->t_extsig, SIGCONT);
1474				} while ((tx = tx->t_forw) != p->p_tlist);
1475			}
1476		}
1477		thread_lock(t);
1478		if (ISWAKEABLE(t) || ISWAITING(t)) {
1479			/* Set signaled sleeping/waiting lwp running */
1480			setrun_locked(t);
1481		} else if (t->t_state == TS_STOPPED && sig == SIGKILL) {
1482			/* If SIGKILL, set stopped lwp running */
1483			p->p_stopsig = 0;
1484			t->t_schedflag |= TS_XSTART | TS_PSTART;
1485			t->t_dtrace_stop = 0;
1486			setrun_locked(t);
1487		}
1488		t->t_sig_check = 1;	/* so ISSIG will be done */
1489		thread_unlock(t);
1490		/*
1491		 * More jobcontrol side-effects.
1492		 */
1493		if (sig == SIGCONT && (tx = p->p_tlist) != NULL) {
1494			p->p_stopsig = 0;
1495			do {
1496				thread_lock(tx);
1497				if (tx->t_state == TS_STOPPED &&
1498				    tx->t_whystop == PR_JOBCONTROL) {
1499					tx->t_schedflag |= TS_XSTART;
1500					setrun_locked(tx);
1501				}
1502				thread_unlock(tx);
1503			} while ((tx = tx->t_forw) != p->p_tlist);
1504		}
1505	}
1506	return (error);
1507}
1508
1509int
1510pr_kill(prnode_t *pnp, int sig, cred_t *cr)
1511{
1512	int nsig = PROC_IS_BRANDED(curproc)? BROP(curproc)->b_nsig : NSIG;
1513	prcommon_t *pcp = pnp->pr_common;
1514	proc_t *p = pcp->prc_proc;
1515	k_siginfo_t info;
1516
1517	if (sig <= 0 || sig >= nsig)
1518		return (EINVAL);
1519
1520	bzero(&info, sizeof (info));
1521	info.si_signo = sig;
1522	info.si_code = SI_USER;
1523	info.si_pid = curproc->p_pid;
1524	info.si_ctid = PRCTID(curproc);
1525	info.si_zoneid = getzoneid();
1526	info.si_uid = crgetruid(cr);
1527	sigaddq(p, (pcp->prc_flags & PRC_LWP)?
1528	    pcp->prc_thread : NULL, &info, KM_NOSLEEP);
1529
1530	return (0);
1531}
1532
1533int
1534pr_unkill(prnode_t *pnp, int sig)
1535{
1536	int nsig = PROC_IS_BRANDED(curproc)? BROP(curproc)->b_nsig : NSIG;
1537	prcommon_t *pcp = pnp->pr_common;
1538	proc_t *p = pcp->prc_proc;
1539	sigqueue_t *infop = NULL;
1540
1541	if (sig <= 0 || sig >= nsig || sig == SIGKILL)
1542		return (EINVAL);
1543
1544	if (pcp->prc_flags & PRC_LWP)
1545		sigdeq(p, pcp->prc_thread, sig, &infop);
1546	else
1547		sigdeq(p, NULL, sig, &infop);
1548
1549	if (infop)
1550		siginfofree(infop);
1551
1552	return (0);
1553}
1554
1555int
1556pr_nice(proc_t *p, int nice, cred_t *cr)
1557{
1558	kthread_t *t;
1559	int err;
1560	int error = 0;
1561
1562	t = p->p_tlist;
1563	do {
1564		ASSERT(!(t->t_proc_flag & TP_LWPEXIT));
1565		err = CL_DONICE(t, cr, nice, (int *)NULL);
1566		schedctl_set_cidpri(t);
1567		if (error == 0)
1568			error = err;
1569	} while ((t = t->t_forw) != p->p_tlist);
1570
1571	return (error);
1572}
1573
1574void
1575pr_setentryexit(proc_t *p, sysset_t *sysset, int entry)
1576{
1577	user_t *up = PTOU(p);
1578
1579	if (entry) {
1580		prassignset(&up->u_entrymask, sysset);
1581	} else {
1582		prassignset(&up->u_exitmask, sysset);
1583	}
1584	if (!prisempty(&up->u_entrymask) ||
1585	    !prisempty(&up->u_exitmask)) {
1586		up->u_systrap = 1;
1587		p->p_proc_flag |= P_PR_TRACE;
1588		set_proc_sys(p);	/* set pre and post-sys flags */
1589	} else {
1590		up->u_systrap = 0;
1591		if (sigisempty(&p->p_sigmask) &&
1592		    prisempty(&p->p_fltmask))
1593			p->p_proc_flag &= ~P_PR_TRACE;
1594	}
1595}
1596
1597#define	ALLFLAGS	\
1598	(PR_FORK|PR_RLC|PR_KLC|PR_ASYNC|PR_BPTADJ|PR_MSACCT|PR_MSFORK|PR_PTRACE)
1599
1600int
1601pr_set(proc_t *p, long flags)
1602{
1603	if ((p->p_flag & SSYS) || p->p_as == &kas)
1604		return (EBUSY);
1605
1606	if (flags & ~ALLFLAGS)
1607		return (EINVAL);
1608
1609	if (flags & PR_FORK)
1610		p->p_proc_flag |= P_PR_FORK;
1611	if (flags & PR_RLC)
1612		p->p_proc_flag |= P_PR_RUNLCL;
1613	if (flags & PR_KLC)
1614		p->p_proc_flag |= P_PR_KILLCL;
1615	if (flags & PR_ASYNC)
1616		p->p_proc_flag |= P_PR_ASYNC;
1617	if (flags & PR_BPTADJ)
1618		p->p_proc_flag |= P_PR_BPTADJ;
1619	if (flags & PR_MSACCT)
1620		if ((p->p_flag & SMSACCT) == 0)
1621			estimate_msacct(p->p_tlist, gethrtime());
1622	if (flags & PR_MSFORK)
1623		p->p_flag |= SMSFORK;
1624	if (flags & PR_PTRACE) {
1625		p->p_proc_flag |= P_PR_PTRACE;
1626		/* ptraced process must die if parent dead */
1627		if (p->p_ppid == 1)
1628			sigtoproc(p, NULL, SIGKILL);
1629	}
1630
1631	return (0);
1632}
1633
1634int
1635pr_unset(proc_t *p, long flags)
1636{
1637	if ((p->p_flag & SSYS) || p->p_as == &kas)
1638		return (EBUSY);
1639
1640	if (flags & ~ALLFLAGS)
1641		return (EINVAL);
1642
1643	if (flags & PR_FORK)
1644		p->p_proc_flag &= ~P_PR_FORK;
1645	if (flags & PR_RLC)
1646		p->p_proc_flag &= ~P_PR_RUNLCL;
1647	if (flags & PR_KLC)
1648		p->p_proc_flag &= ~P_PR_KILLCL;
1649	if (flags & PR_ASYNC)
1650		p->p_proc_flag &= ~P_PR_ASYNC;
1651	if (flags & PR_BPTADJ)
1652		p->p_proc_flag &= ~P_PR_BPTADJ;
1653	if (flags & PR_MSACCT)
1654		disable_msacct(p);
1655	if (flags & PR_MSFORK)
1656		p->p_flag &= ~SMSFORK;
1657	if (flags & PR_PTRACE)
1658		p->p_proc_flag &= ~P_PR_PTRACE;
1659
1660	return (0);
1661}
1662
1663static int
1664pr_setfpregs(prnode_t *pnp, prfpregset_t *prfpregset)
1665{
1666	proc_t *p = pnp->pr_common->prc_proc;
1667	kthread_t *t = pr_thread(pnp);	/* returns locked thread */
1668
1669	if (!ISTOPPED(t) && !VSTOPPED(t) && !DSTOPPED(t)) {
1670		thread_unlock(t);
1671		return (EBUSY);
1672	}
1673	if (!prhasfp()) {
1674		thread_unlock(t);
1675		return (EINVAL);	/* No FP support */
1676	}
1677
1678	/* drop p_lock while touching the lwp's stack */
1679	thread_unlock(t);
1680	mutex_exit(&p->p_lock);
1681	prsetprfpregs(ttolwp(t), prfpregset);
1682	mutex_enter(&p->p_lock);
1683
1684	return (0);
1685}
1686
1687#ifdef	_SYSCALL32_IMPL
1688static int
1689pr_setfpregs32(prnode_t *pnp, prfpregset32_t *prfpregset)
1690{
1691	proc_t *p = pnp->pr_common->prc_proc;
1692	kthread_t *t = pr_thread(pnp);	/* returns locked thread */
1693
1694	if (!ISTOPPED(t) && !VSTOPPED(t) && !DSTOPPED(t)) {
1695		thread_unlock(t);
1696		return (EBUSY);
1697	}
1698	if (!prhasfp()) {
1699		thread_unlock(t);
1700		return (EINVAL);	/* No FP support */
1701	}
1702
1703	/* drop p_lock while touching the lwp's stack */
1704	thread_unlock(t);
1705	mutex_exit(&p->p_lock);
1706	prsetprfpregs32(ttolwp(t), prfpregset);
1707	mutex_enter(&p->p_lock);
1708
1709	return (0);
1710}
1711#endif	/* _SYSCALL32_IMPL */
1712
1713#if defined(__sparc)
1714/* ARGSUSED */
1715static int
1716pr_setxregs(prnode_t *pnp, prxregset_t *prxregset)
1717{
1718	proc_t *p = pnp->pr_common->prc_proc;
1719	kthread_t *t = pr_thread(pnp);	/* returns locked thread */
1720
1721	if (!ISTOPPED(t) && !VSTOPPED(t) && !DSTOPPED(t)) {
1722		thread_unlock(t);
1723		return (EBUSY);
1724	}
1725	thread_unlock(t);
1726
1727	if (!prhasx(p))
1728		return (EINVAL);	/* No extra register support */
1729
1730	/* drop p_lock while touching the lwp's stack */
1731	mutex_exit(&p->p_lock);
1732	prsetprxregs(ttolwp(t), (caddr_t)prxregset);
1733	mutex_enter(&p->p_lock);
1734
1735	return (0);
1736}
1737
1738static int
1739pr_setasrs(prnode_t *pnp, asrset_t asrset)
1740{
1741	proc_t *p = pnp->pr_common->prc_proc;
1742	kthread_t *t = pr_thread(pnp);	/* returns locked thread */
1743
1744	if (!ISTOPPED(t) && !VSTOPPED(t) && !DSTOPPED(t)) {
1745		thread_unlock(t);
1746		return (EBUSY);
1747	}
1748	thread_unlock(t);
1749
1750	/* drop p_lock while touching the lwp's stack */
1751	mutex_exit(&p->p_lock);
1752	prsetasregs(ttolwp(t), asrset);
1753	mutex_enter(&p->p_lock);
1754
1755	return (0);
1756}
1757#endif
1758
1759static int
1760pr_setvaddr(prnode_t *pnp, caddr_t vaddr)
1761{
1762	proc_t *p = pnp->pr_common->prc_proc;
1763	kthread_t *t = pr_thread(pnp);	/* returns locked thread */
1764
1765	if (!ISTOPPED(t) && !VSTOPPED(t) && !DSTOPPED(t)) {
1766		thread_unlock(t);
1767		return (EBUSY);
1768	}
1769
1770	/* drop p_lock while touching the lwp's stack */
1771	thread_unlock(t);
1772	mutex_exit(&p->p_lock);
1773	prsvaddr(ttolwp(t), vaddr);
1774	mutex_enter(&p->p_lock);
1775
1776	return (0);
1777}
1778
1779void
1780pr_sethold(prnode_t *pnp, sigset_t *sp)
1781{
1782	proc_t *p = pnp->pr_common->prc_proc;
1783	kthread_t *t = pr_thread(pnp);	/* returns locked thread */
1784
1785	schedctl_finish_sigblock(t);
1786	sigutok(sp, &t->t_hold);
1787	if (ISWAKEABLE(t) &&
1788	    (fsig(&p->p_sig, t) || fsig(&t->t_sig, t)))
1789		setrun_locked(t);
1790	t->t_sig_check = 1;	/* so thread will see new holdmask */
1791	thread_unlock(t);
1792}
1793
1794void
1795pr_setfault(proc_t *p, fltset_t *fltp)
1796{
1797	prassignset(&p->p_fltmask, fltp);
1798	if (!prisempty(&p->p_fltmask))
1799		p->p_proc_flag |= P_PR_TRACE;
1800	else if (sigisempty(&p->p_sigmask)) {
1801		user_t *up = PTOU(p);
1802		if (up->u_systrap == 0)
1803			p->p_proc_flag &= ~P_PR_TRACE;
1804	}
1805}
1806
1807static int
1808pr_clearsig(prnode_t *pnp)
1809{
1810	kthread_t *t = pr_thread(pnp);	/* returns locked thread */
1811	klwp_t *lwp = ttolwp(t);
1812
1813	thread_unlock(t);
1814	if (lwp->lwp_cursig == SIGKILL)
1815		return (EBUSY);
1816
1817	/*
1818	 * Discard current siginfo_t, if any.
1819	 */
1820	lwp->lwp_cursig = 0;
1821	lwp->lwp_extsig = 0;
1822	if (lwp->lwp_curinfo) {
1823		siginfofree(lwp->lwp_curinfo);
1824		lwp->lwp_curinfo = NULL;
1825	}
1826
1827	return (0);
1828}
1829
1830static int
1831pr_clearflt(prnode_t *pnp)
1832{
1833	kthread_t *t = pr_thread(pnp);	/* returns locked thread */
1834
1835	thread_unlock(t);
1836	ttolwp(t)->lwp_curflt = 0;
1837
1838	return (0);
1839}
1840
1841static int
1842pr_watch(prnode_t *pnp, prwatch_t *pwp, int *unlocked)
1843{
1844	proc_t *p = pnp->pr_common->prc_proc;
1845	struct as *as = p->p_as;
1846	uintptr_t vaddr = pwp->pr_vaddr;
1847	size_t size = pwp->pr_size;
1848	int wflags = pwp->pr_wflags;
1849	ulong_t newpage = 0;
1850	struct watched_area *pwa;
1851	int error;
1852
1853	*unlocked = 0;
1854
1855	/*
1856	 * Can't apply to a system process.
1857	 */
1858	if ((p->p_flag & SSYS) || p->p_as == &kas)
1859		return (EBUSY);
1860
1861	/*
1862	 * Verify that the address range does not wrap
1863	 * and that only the proper flags were specified.
1864	 */
1865	if ((wflags & ~WA_TRAPAFTER) == 0)
1866		size = 0;
1867	if (vaddr + size < vaddr ||
1868	    (wflags & ~(WA_READ|WA_WRITE|WA_EXEC|WA_TRAPAFTER)) != 0 ||
1869	    ((wflags & ~WA_TRAPAFTER) != 0 && size == 0))
1870		return (EINVAL);
1871
1872	/*
1873	 * Don't let the address range go above as->a_userlimit.
1874	 * There is no error here, just a limitation.
1875	 */
1876	if (vaddr >= (uintptr_t)as->a_userlimit)
1877		return (0);
1878	if (vaddr + size > (uintptr_t)as->a_userlimit)
1879		size = (uintptr_t)as->a_userlimit - vaddr;
1880
1881	/*
1882	 * Compute maximum number of pages this will add.
1883	 */
1884	if ((wflags & ~WA_TRAPAFTER) != 0) {
1885		ulong_t pagespan = (vaddr + size) - (vaddr & PAGEMASK);
1886		newpage = btopr(pagespan);
1887		if (newpage > 2 * prnwatch)
1888			return (E2BIG);
1889	}
1890
1891	/*
1892	 * Force the process to be fully stopped.
1893	 */
1894	if (p == curproc) {
1895		prunlock(pnp);
1896		while (holdwatch() != 0)
1897			continue;
1898		if ((error = prlock(pnp, ZNO)) != 0) {
1899			continuelwps(p);
1900			*unlocked = 1;
1901			return (error);
1902		}
1903	} else {
1904		pauselwps(p);
1905		while (pr_allstopped(p, 0) > 0) {
1906			/*
1907			 * This cv/mutex pair is persistent even
1908			 * if the process disappears after we
1909			 * unmark it and drop p->p_lock.
1910			 */
1911			kcondvar_t *cv = &pr_pid_cv[p->p_slot];
1912			kmutex_t *mp = &p->p_lock;
1913
1914			prunmark(p);
1915			(void) cv_wait(cv, mp);
1916			mutex_exit(mp);
1917			if ((error = prlock(pnp, ZNO)) != 0) {
1918				/*
1919				 * Unpause the process if it exists.
1920				 */
1921				p = pr_p_lock(pnp);
1922				mutex_exit(&pr_pidlock);
1923				if (p != NULL) {
1924					unpauselwps(p);
1925					prunlock(pnp);
1926				}
1927				*unlocked = 1;
1928				return (error);
1929			}
1930		}
1931	}
1932
1933	/*
1934	 * Drop p->p_lock in order to perform the rest of this.
1935	 * The process is still locked with the P_PR_LOCK flag.
1936	 */
1937	mutex_exit(&p->p_lock);
1938
1939	pwa = kmem_alloc(sizeof (struct watched_area), KM_SLEEP);
1940	pwa->wa_vaddr = (caddr_t)vaddr;
1941	pwa->wa_eaddr = (caddr_t)vaddr + size;
1942	pwa->wa_flags = (ulong_t)wflags;
1943
1944	error = ((pwa->wa_flags & ~WA_TRAPAFTER) == 0)?
1945	    clear_watched_area(p, pwa) : set_watched_area(p, pwa);
1946
1947	if (p == curproc) {
1948		setallwatch();
1949		mutex_enter(&p->p_lock);
1950		continuelwps(p);
1951	} else {
1952		mutex_enter(&p->p_lock);
1953		unpauselwps(p);
1954	}
1955
1956	return (error);
1957}
1958
1959/* jobcontrol stopped, but with a /proc directed stop in effect */
1960#define	JDSTOPPED(t)	\
1961	((t)->t_state == TS_STOPPED && \
1962	(t)->t_whystop == PR_JOBCONTROL && \
1963	((t)->t_proc_flag & TP_PRSTOP))
1964
1965/*
1966 * pr_agent() creates the agent lwp. If the process is exiting while
1967 * we are creating an agent lwp, then exitlwps() waits until the
1968 * agent has been created using prbarrier().
1969 */
1970static int
1971pr_agent(prnode_t *pnp, prgregset_t prgregset, int *unlocked)
1972{
1973	proc_t *p = pnp->pr_common->prc_proc;
1974	prcommon_t *pcp;
1975	kthread_t *t;
1976	kthread_t *ct;
1977	klwp_t *clwp;
1978	k_sigset_t smask;
1979	int cid;
1980	void *bufp = NULL;
1981	int error;
1982
1983	*unlocked = 0;
1984
1985	/*
1986	 * Cannot create the /proc agent lwp if :-
1987	 * - the process is not fully stopped or directed to stop.
1988	 * - there is an agent lwp already.
1989	 * - the process has been killed.
1990	 * - the process is exiting.
1991	 * - it's a vfork(2) parent.
1992	 */
1993	t = prchoose(p);	/* returns locked thread */
1994	ASSERT(t != NULL);
1995
1996	if ((!ISTOPPED(t) && !VSTOPPED(t) && !SUSPENDED(t) && !JDSTOPPED(t)) ||
1997	    p->p_agenttp != NULL ||
1998	    (p->p_flag & (SKILLED | SEXITING | SVFWAIT))) {
1999		thread_unlock(t);
2000		return (EBUSY);
2001	}
2002
2003	thread_unlock(t);
2004	mutex_exit(&p->p_lock);
2005
2006	sigfillset(&smask);
2007	sigdiffset(&smask, &cantmask);
2008	clwp = lwp_create(lwp_rtt, NULL, 0, p, TS_STOPPED,
2009	    t->t_pri, &smask, NOCLASS, 0);
2010	if (clwp == NULL) {
2011		mutex_enter(&p->p_lock);
2012		return (ENOMEM);
2013	}
2014	prsetprregs(clwp, prgregset, 1);
2015
2016	/*
2017	 * Because abandoning the agent inside the target process leads to
2018	 * a state that is essentially undebuggable, we record the psinfo of
2019	 * the process creating the agent and hang that off of the lwp.
2020	 */
2021	clwp->lwp_spymaster = kmem_zalloc(sizeof (psinfo_t), KM_SLEEP);
2022	mutex_enter(&curproc->p_lock);
2023	prgetpsinfo(curproc, clwp->lwp_spymaster);
2024	mutex_exit(&curproc->p_lock);
2025
2026	/*
2027	 * We overload pr_time in the spymaster to denote the time at which the
2028	 * agent was created.
2029	 */
2030	gethrestime(&clwp->lwp_spymaster->pr_time);
2031
2032retry:
2033	cid = t->t_cid;
2034	(void) CL_ALLOC(&bufp, cid, KM_SLEEP);
2035	mutex_enter(&p->p_lock);
2036	if (cid != t->t_cid) {
2037		/*
2038		 * Someone just changed this thread's scheduling class,
2039		 * so try pre-allocating the buffer again.  Hopefully we
2040		 * don't hit this often.
2041		 */
2042		mutex_exit(&p->p_lock);
2043		CL_FREE(cid, bufp);
2044		goto retry;
2045	}
2046
2047	clwp->lwp_ap = clwp->lwp_arg;
2048	clwp->lwp_eosys = NORMALRETURN;
2049	ct = lwptot(clwp);
2050	ct->t_clfuncs = t->t_clfuncs;
2051	CL_FORK(t, ct, bufp);
2052	ct->t_cid = t->t_cid;
2053	ct->t_proc_flag |= TP_PRSTOP;
2054	/*
2055	 * Setting t_sysnum to zero causes post_syscall()
2056	 * to bypass all syscall checks and go directly to
2057	 *	if (issig()) psig();
2058	 * so that the agent lwp will stop in issig_forreal()
2059	 * showing PR_REQUESTED.
2060	 */
2061	ct->t_sysnum = 0;
2062	ct->t_post_sys = 1;
2063	ct->t_sig_check = 1;
2064	p->p_agenttp = ct;
2065	ct->t_proc_flag &= ~TP_HOLDLWP;
2066
2067	pcp = pnp->pr_pcommon;
2068	mutex_enter(&pcp->prc_mutex);
2069
2070	lwp_create_done(ct);
2071
2072	/*
2073	 * Don't return until the agent is stopped on PR_REQUESTED.
2074	 */
2075
2076	for (;;) {
2077		prunlock(pnp);
2078		*unlocked = 1;
2079
2080		/*
2081		 * Wait for the agent to stop and notify us.
2082		 * If we've been interrupted, return that information.
2083		 */
2084		error = pr_wait(pcp, NULL, 0);
2085		if (error == EINTR) {
2086			error = 0;
2087			break;
2088		}
2089
2090		/*
2091		 * Confirm that the agent LWP has stopped.
2092		 */
2093
2094		if ((error = prlock(pnp, ZNO)) != 0)
2095			break;
2096		*unlocked = 0;
2097
2098		/*
2099		 * Since we dropped the lock on the process, the agent
2100		 * may have disappeared or changed. Grab the current
2101		 * agent and check fail if it has disappeared.
2102		 */
2103		if ((ct = p->p_agenttp) == NULL) {
2104			error = ENOENT;
2105			break;
2106		}
2107
2108		mutex_enter(&pcp->prc_mutex);
2109		thread_lock(ct);
2110
2111		if (ISTOPPED(ct)) {
2112			thread_unlock(ct);
2113			mutex_exit(&pcp->prc_mutex);
2114			break;
2115		}
2116
2117		thread_unlock(ct);
2118	}
2119
2120	return (error ? error : -1);
2121}
2122
2123static int
2124pr_rdwr(proc_t *p, enum uio_rw rw, priovec_t *pio)
2125{
2126	caddr_t base = (caddr_t)pio->pio_base;
2127	size_t cnt = pio->pio_len;
2128	uintptr_t offset = (uintptr_t)pio->pio_offset;
2129	struct uio auio;
2130	struct iovec aiov;
2131	int error = 0;
2132
2133	if ((p->p_flag & SSYS) || p->p_as == &kas)
2134		error = EIO;
2135	else if ((base + cnt) < base || (offset + cnt) < offset)
2136		error = EINVAL;
2137	else if (cnt != 0) {
2138		aiov.iov_base = base;
2139		aiov.iov_len = cnt;
2140
2141		auio.uio_loffset = offset;
2142		auio.uio_iov = &aiov;
2143		auio.uio_iovcnt = 1;
2144		auio.uio_resid = cnt;
2145		auio.uio_segflg = UIO_USERSPACE;
2146		auio.uio_llimit = (longlong_t)MAXOFFSET_T;
2147		auio.uio_fmode = FREAD|FWRITE;
2148		auio.uio_extflg = UIO_COPY_DEFAULT;
2149
2150		mutex_exit(&p->p_lock);
2151		error = prusrio(p, rw, &auio, 0);
2152		mutex_enter(&p->p_lock);
2153
2154		/*
2155		 * We have no way to return the i/o count,
2156		 * like read() or write() would do, so we
2157		 * return an error if the i/o was truncated.
2158		 */
2159		if (auio.uio_resid != 0 && error == 0)
2160			error = EIO;
2161	}
2162
2163	return (error);
2164}
2165
2166static int
2167pr_scred(proc_t *p, prcred_t *prcred, cred_t *cr, boolean_t dogrps)
2168{
2169	kthread_t *t;
2170	cred_t *oldcred;
2171	cred_t *newcred;
2172	uid_t oldruid;
2173	int error;
2174	zone_t *zone = crgetzone(cr);
2175
2176	if (!VALID_UID(prcred->pr_euid, zone) ||
2177	    !VALID_UID(prcred->pr_ruid, zone) ||
2178	    !VALID_UID(prcred->pr_suid, zone) ||
2179	    !VALID_GID(prcred->pr_egid, zone) ||
2180	    !VALID_GID(prcred->pr_rgid, zone) ||
2181	    !VALID_GID(prcred->pr_sgid, zone))
2182		return (EINVAL);
2183
2184	if (dogrps) {
2185		int ngrp = prcred->pr_ngroups;
2186		int i;
2187
2188		if (ngrp < 0 || ngrp > ngroups_max)
2189			return (EINVAL);
2190
2191		for (i = 0; i < ngrp; i++) {
2192			if (!VALID_GID(prcred->pr_groups[i], zone))
2193				return (EINVAL);
2194		}
2195	}
2196
2197	error = secpolicy_allow_setid(cr, prcred->pr_euid, B_FALSE);
2198
2199	if (error == 0 && prcred->pr_ruid != prcred->pr_euid)
2200		error = secpolicy_allow_setid(cr, prcred->pr_ruid, B_FALSE);
2201
2202	if (error == 0 && prcred->pr_suid != prcred->pr_euid &&
2203	    prcred->pr_suid != prcred->pr_ruid)
2204		error = secpolicy_allow_setid(cr, prcred->pr_suid, B_FALSE);
2205
2206	if (error)
2207		return (error);
2208
2209	mutex_exit(&p->p_lock);
2210
2211	/* hold old cred so it doesn't disappear while we dup it */
2212	mutex_enter(&p->p_crlock);
2213	crhold(oldcred = p->p_cred);
2214	mutex_exit(&p->p_crlock);
2215	newcred = crdup(oldcred);
2216	oldruid = crgetruid(oldcred);
2217	crfree(oldcred);
2218
2219	/* Error checking done above */
2220	(void) crsetresuid(newcred, prcred->pr_ruid, prcred->pr_euid,
2221	    prcred->pr_suid);
2222	(void) crsetresgid(newcred, prcred->pr_rgid, prcred->pr_egid,
2223	    prcred->pr_sgid);
2224
2225	if (dogrps) {
2226		(void) crsetgroups(newcred, prcred->pr_ngroups,
2227		    prcred->pr_groups);
2228
2229	}
2230
2231	mutex_enter(&p->p_crlock);
2232	oldcred = p->p_cred;
2233	p->p_cred = newcred;
2234	mutex_exit(&p->p_crlock);
2235	crfree(oldcred);
2236
2237	/*
2238	 * Keep count of processes per uid consistent.
2239	 */
2240	if (oldruid != prcred->pr_ruid) {
2241		zoneid_t zoneid = crgetzoneid(newcred);
2242
2243		mutex_enter(&pidlock);
2244		upcount_dec(oldruid, zoneid);
2245		upcount_inc(prcred->pr_ruid, zoneid);
2246		mutex_exit(&pidlock);
2247	}
2248
2249	/*
2250	 * Broadcast the cred change to the threads.
2251	 */
2252	mutex_enter(&p->p_lock);
2253	t = p->p_tlist;
2254	do {
2255		t->t_pre_sys = 1; /* so syscall will get new cred */
2256	} while ((t = t->t_forw) != p->p_tlist);
2257
2258	return (0);
2259}
2260
2261/*
2262 * Change process credentials to specified zone.  Used to temporarily
2263 * set a process to run in the global zone; only transitions between
2264 * the process's actual zone and the global zone are allowed.
2265 */
2266static int
2267pr_szoneid(proc_t *p, zoneid_t zoneid, cred_t *cr)
2268{
2269	kthread_t *t;
2270	cred_t *oldcred;
2271	cred_t *newcred;
2272	zone_t *zptr;
2273	zoneid_t oldzoneid;
2274
2275	if (secpolicy_zone_config(cr) != 0)
2276		return (EPERM);
2277	if (zoneid != GLOBAL_ZONEID && zoneid != p->p_zone->zone_id)
2278		return (EINVAL);
2279	if ((zptr = zone_find_by_id(zoneid)) == NULL)
2280		return (EINVAL);
2281	mutex_exit(&p->p_lock);
2282	mutex_enter(&p->p_crlock);
2283	oldcred = p->p_cred;
2284	crhold(oldcred);
2285	mutex_exit(&p->p_crlock);
2286	newcred = crdup(oldcred);
2287	oldzoneid = crgetzoneid(oldcred);
2288	crfree(oldcred);
2289
2290	crsetzone(newcred, zptr);
2291	zone_rele(zptr);
2292
2293	mutex_enter(&p->p_crlock);
2294	oldcred = p->p_cred;
2295	p->p_cred = newcred;
2296	mutex_exit(&p->p_crlock);
2297	crfree(oldcred);
2298
2299	/*
2300	 * The target process is changing zones (according to its cred), so
2301	 * update the per-zone upcounts, which are based on process creds.
2302	 */
2303	if (oldzoneid != zoneid) {
2304		uid_t ruid = crgetruid(newcred);
2305
2306		mutex_enter(&pidlock);
2307		upcount_dec(ruid, oldzoneid);
2308		upcount_inc(ruid, zoneid);
2309		mutex_exit(&pidlock);
2310	}
2311	/*
2312	 * Broadcast the cred change to the threads.
2313	 */
2314	mutex_enter(&p->p_lock);
2315	t = p->p_tlist;
2316	do {
2317		t->t_pre_sys = 1;	/* so syscall will get new cred */
2318	} while ((t = t->t_forw) != p->p_tlist);
2319
2320	return (0);
2321}
2322
2323static int
2324pr_spriv(proc_t *p, prpriv_t *prpriv, cred_t *cr)
2325{
2326	kthread_t *t;
2327	int err;
2328
2329	ASSERT(MUTEX_HELD(&p->p_lock));
2330
2331	if ((err = priv_pr_spriv(p, prpriv, cr)) == 0) {
2332		/*
2333		 * Broadcast the cred change to the threads.
2334		 */
2335		t = p->p_tlist;
2336		do {
2337			t->t_pre_sys = 1; /* so syscall will get new cred */
2338		} while ((t = t->t_forw) != p->p_tlist);
2339	}
2340
2341	return (err);
2342}
2343
2344/*
2345 * Return -1 if the process is the parent of a vfork(1) whose child has yet to
2346 * terminate or perform an exec(2).
2347 *
2348 * Returns 0 if the process is fully stopped except for the current thread (if
2349 * we are operating on our own process), 1 otherwise.
2350 *
2351 * If the watchstop flag is set, then we ignore threads with TP_WATCHSTOP set.
2352 * See holdwatch() for details.
2353 */
2354int
2355pr_allstopped(proc_t *p, int watchstop)
2356{
2357	kthread_t *t;
2358	int rv = 0;
2359
2360	ASSERT(MUTEX_HELD(&p->p_lock));
2361
2362	if (p->p_flag & SVFWAIT)	/* waiting for vfork'd child to exec */
2363		return (-1);
2364
2365	if ((t = p->p_tlist) != NULL) {
2366		do {
2367			if (t == curthread || VSTOPPED(t) ||
2368			    (watchstop && (t->t_proc_flag & TP_WATCHSTOP)))
2369				continue;
2370			thread_lock(t);
2371			switch (t->t_state) {
2372			case TS_ZOMB:
2373			case TS_STOPPED:
2374				break;
2375			case TS_SLEEP:
2376				if (!(t->t_flag & T_WAKEABLE) ||
2377				    t->t_wchan0 == NULL)
2378					rv = 1;
2379				break;
2380			default:
2381				rv = 1;
2382				break;
2383			}
2384			thread_unlock(t);
2385		} while (rv == 0 && (t = t->t_forw) != p->p_tlist);
2386	}
2387
2388	return (rv);
2389}
2390
2391/*
2392 * Cause all lwps in the process to pause (for watchpoint operations).
2393 */
2394static void
2395pauselwps(proc_t *p)
2396{
2397	kthread_t *t;
2398
2399	ASSERT(MUTEX_HELD(&p->p_lock));
2400	ASSERT(p != curproc);
2401
2402	if ((t = p->p_tlist) != NULL) {
2403		do {
2404			thread_lock(t);
2405			t->t_proc_flag |= TP_PAUSE;
2406			aston(t);
2407			if ((ISWAKEABLE(t) && (t->t_wchan0 == NULL)) ||
2408			    ISWAITING(t)) {
2409				setrun_locked(t);
2410			}
2411			prpokethread(t);
2412			thread_unlock(t);
2413		} while ((t = t->t_forw) != p->p_tlist);
2414	}
2415}
2416
2417/*
2418 * undo the effects of pauselwps()
2419 */
2420static void
2421unpauselwps(proc_t *p)
2422{
2423	kthread_t *t;
2424
2425	ASSERT(MUTEX_HELD(&p->p_lock));
2426	ASSERT(p != curproc);
2427
2428	if ((t = p->p_tlist) != NULL) {
2429		do {
2430			thread_lock(t);
2431			t->t_proc_flag &= ~TP_PAUSE;
2432			if (t->t_state == TS_STOPPED) {
2433				t->t_schedflag |= TS_UNPAUSE;
2434				t->t_dtrace_stop = 0;
2435				setrun_locked(t);
2436			}
2437			thread_unlock(t);
2438		} while ((t = t->t_forw) != p->p_tlist);
2439	}
2440}
2441
2442/*
2443 * Cancel all watched areas.  Called from prclose().
2444 */
2445proc_t *
2446pr_cancel_watch(prnode_t *pnp)
2447{
2448	proc_t *p = pnp->pr_pcommon->prc_proc;
2449	struct as *as;
2450	kthread_t *t;
2451
2452	ASSERT(MUTEX_HELD(&p->p_lock) && (p->p_proc_flag & P_PR_LOCK));
2453
2454	if (!pr_watch_active(p))
2455		return (p);
2456
2457	/*
2458	 * Pause the process before dealing with the watchpoints.
2459	 */
2460	if (p == curproc) {
2461		prunlock(pnp);
2462		while (holdwatch() != 0)
2463			continue;
2464		p = pr_p_lock(pnp);
2465		mutex_exit(&pr_pidlock);
2466		ASSERT(p == curproc);
2467	} else {
2468		pauselwps(p);
2469		while (p != NULL && pr_allstopped(p, 0) > 0) {
2470			/*
2471			 * This cv/mutex pair is persistent even
2472			 * if the process disappears after we
2473			 * unmark it and drop p->p_lock.
2474			 */
2475			kcondvar_t *cv = &pr_pid_cv[p->p_slot];
2476			kmutex_t *mp = &p->p_lock;
2477
2478			prunmark(p);
2479			(void) cv_wait(cv, mp);
2480			mutex_exit(mp);
2481			p = pr_p_lock(pnp);  /* NULL if process disappeared */
2482			mutex_exit(&pr_pidlock);
2483		}
2484	}
2485
2486	if (p == NULL)		/* the process disappeared */
2487		return (NULL);
2488
2489	ASSERT(p == pnp->pr_pcommon->prc_proc);
2490	ASSERT(MUTEX_HELD(&p->p_lock) && (p->p_proc_flag & P_PR_LOCK));
2491
2492	if (pr_watch_active(p)) {
2493		pr_free_watchpoints(p);
2494		if ((t = p->p_tlist) != NULL) {
2495			do {
2496				watch_disable(t);
2497
2498			} while ((t = t->t_forw) != p->p_tlist);
2499		}
2500	}
2501
2502	if ((as = p->p_as) != NULL) {
2503		avl_tree_t *tree;
2504		struct watched_page *pwp;
2505
2506		/*
2507		 * If this is the parent of a vfork, the watched page
2508		 * list has been moved temporarily to p->p_wpage.
2509		 */
2510		if (avl_numnodes(&p->p_wpage) != 0)
2511			tree = &p->p_wpage;
2512		else
2513			tree = &as->a_wpage;
2514
2515		mutex_exit(&p->p_lock);
2516		AS_LOCK_ENTER(as, RW_WRITER);
2517
2518		for (pwp = avl_first(tree); pwp != NULL;
2519		    pwp = AVL_NEXT(tree, pwp)) {
2520			pwp->wp_read = 0;
2521			pwp->wp_write = 0;
2522			pwp->wp_exec = 0;
2523			if ((pwp->wp_flags & WP_SETPROT) == 0) {
2524				pwp->wp_flags |= WP_SETPROT;
2525				pwp->wp_prot = pwp->wp_oprot;
2526				pwp->wp_list = p->p_wprot;
2527				p->p_wprot = pwp;
2528			}
2529		}
2530
2531		AS_LOCK_EXIT(as);
2532		mutex_enter(&p->p_lock);
2533	}
2534
2535	/*
2536	 * Unpause the process now.
2537	 */
2538	if (p == curproc)
2539		continuelwps(p);
2540	else
2541		unpauselwps(p);
2542
2543	return (p);
2544}
2545