1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License").  You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22/*
23 * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27/*
28 * Copyright 2012 DEY Storage Systems, Inc.  All rights reserved.
29 * Copyright 2018 Joyent, Inc.
30 * Copyright 2020 OmniOS Community Edition (OmniOSce) Association.
31 */
32
33#include <sys/types.h>
34#include <sys/param.h>
35#include <sys/thread.h>
36#include <sys/sysmacros.h>
37#include <sys/signal.h>
38#include <sys/cred.h>
39#include <sys/priv.h>
40#include <sys/user.h>
41#include <sys/file.h>
42#include <sys/errno.h>
43#include <sys/vnode.h>
44#include <sys/mode.h>
45#include <sys/vfs.h>
46#include <sys/mman.h>
47#include <sys/kmem.h>
48#include <sys/proc.h>
49#include <sys/pathname.h>
50#include <sys/cmn_err.h>
51#include <sys/systm.h>
52#include <sys/elf.h>
53#include <sys/vmsystm.h>
54#include <sys/debug.h>
55#include <sys/procfs.h>
56#include <sys/regset.h>
57#include <sys/auxv.h>
58#include <sys/exec.h>
59#include <sys/prsystm.h>
60#include <sys/utsname.h>
61#include <sys/zone.h>
62#include <vm/as.h>
63#include <vm/rm.h>
64#include <sys/modctl.h>
65#include <sys/systeminfo.h>
66#include <sys/machelf.h>
67#include <sys/sunddi.h>
68#include "elf_impl.h"
69#if defined(__i386) || defined(__i386_COMPAT)
70#include <sys/sysi86.h>
71#endif
72
73void
74setup_note_header(Phdr *v, proc_t *p)
75{
76	int nlwp = p->p_lwpcnt;
77	int nzomb = p->p_zombcnt;
78	int nfd;
79	size_t size;
80	prcred_t *pcrp;
81	uf_info_t *fip;
82	uf_entry_t *ufp;
83	int fd;
84
85	fip = P_FINFO(p);
86	nfd = 0;
87	mutex_enter(&fip->fi_lock);
88	for (fd = 0; fd < fip->fi_nfiles; fd++) {
89		UF_ENTER(ufp, fip, fd);
90		if ((ufp->uf_file != NULL) && (ufp->uf_file->f_count > 0))
91			nfd++;
92		UF_EXIT(ufp);
93	}
94	mutex_exit(&fip->fi_lock);
95
96	v[0].p_type = PT_NOTE;
97	v[0].p_flags = PF_R;
98	v[0].p_filesz = (sizeof (Note) * (10 + 3 * nlwp + nzomb + nfd))
99	    + roundup(sizeof (psinfo_t), sizeof (Word))
100	    + roundup(sizeof (pstatus_t), sizeof (Word))
101	    + roundup(prgetprivsize(), sizeof (Word))
102	    + roundup(priv_get_implinfo_size(), sizeof (Word))
103	    + roundup(strlen(platform) + 1, sizeof (Word))
104	    + roundup(strlen(p->p_zone->zone_name) + 1, sizeof (Word))
105	    + roundup(__KERN_NAUXV_IMPL * sizeof (aux_entry_t), sizeof (Word))
106	    + roundup(sizeof (utsname), sizeof (Word))
107	    + roundup(sizeof (core_content_t), sizeof (Word))
108	    + roundup(sizeof (prsecflags_t), sizeof (Word))
109	    + (nlwp + nzomb) * roundup(sizeof (lwpsinfo_t), sizeof (Word))
110	    + nlwp * roundup(sizeof (lwpstatus_t), sizeof (Word))
111	    + nlwp * roundup(sizeof (prlwpname_t), sizeof (Word))
112	    + nfd * roundup(sizeof (prfdinfo_core_t), sizeof (Word));
113
114	if (curproc->p_agenttp != NULL) {
115		v[0].p_filesz += sizeof (Note) +
116		    roundup(sizeof (psinfo_t), sizeof (Word));
117	}
118
119	size = sizeof (prcred_t) + sizeof (gid_t) * (ngroups_max - 1);
120	pcrp = kmem_alloc(size, KM_SLEEP);
121	prgetcred(p, pcrp);
122	if (pcrp->pr_ngroups != 0) {
123		v[0].p_filesz += sizeof (Note) + roundup(sizeof (prcred_t) +
124		    sizeof (gid_t) * (pcrp->pr_ngroups - 1), sizeof (Word));
125	} else {
126		v[0].p_filesz += sizeof (Note) +
127		    roundup(sizeof (prcred_t), sizeof (Word));
128	}
129	kmem_free(pcrp, size);
130
131
132#if defined(__i386) || defined(__i386_COMPAT)
133	mutex_enter(&p->p_ldtlock);
134	size = prnldt(p) * sizeof (struct ssd);
135	mutex_exit(&p->p_ldtlock);
136	if (size != 0)
137		v[0].p_filesz += sizeof (Note) + roundup(size, sizeof (Word));
138#endif	/* __i386 || __i386_COMPAT */
139
140	if ((size = prhasx(p)? prgetprxregsize(p) : 0) != 0)
141		v[0].p_filesz += nlwp * sizeof (Note)
142		    + nlwp * roundup(size, sizeof (Word));
143
144#if defined(__sparc)
145	/*
146	 * Figure out the number and sizes of register windows.
147	 */
148	{
149		kthread_t *t = p->p_tlist;
150		do {
151			if ((size = prnwindows(ttolwp(t))) != 0) {
152				size = sizeof (gwindows_t) -
153				    (SPARC_MAXREGWINDOW - size) *
154				    sizeof (struct rwindow);
155				v[0].p_filesz += sizeof (Note) +
156				    roundup(size, sizeof (Word));
157			}
158		} while ((t = t->t_forw) != p->p_tlist);
159	}
160	/*
161	 * Space for the Ancillary State Registers.
162	 */
163	if (p->p_model == DATAMODEL_LP64)
164		v[0].p_filesz += nlwp * sizeof (Note)
165		    + nlwp * roundup(sizeof (asrset_t), sizeof (Word));
166#endif /* __sparc */
167}
168
169int
170write_elfnotes(proc_t *p, int sig, vnode_t *vp, offset_t offset,
171    rlim64_t rlimit, cred_t *credp, core_content_t content)
172{
173	union {
174		psinfo_t	psinfo;
175		pstatus_t	pstatus;
176		lwpsinfo_t	lwpsinfo;
177		lwpstatus_t	lwpstatus;
178#if defined(__sparc)
179		gwindows_t	gwindows;
180		asrset_t	asrset;
181#endif /* __sparc */
182		char		xregs[1];
183		aux_entry_t	auxv[__KERN_NAUXV_IMPL];
184		prcred_t	pcred;
185		prpriv_t	ppriv;
186		priv_impl_info_t prinfo;
187		struct utsname	uts;
188		prsecflags_t	psecflags;
189	} *bigwad;
190
191	size_t xregsize = prhasx(p)? prgetprxregsize(p) : 0;
192	size_t crsize = sizeof (prcred_t) + sizeof (gid_t) * (ngroups_max - 1);
193	size_t psize = prgetprivsize();
194	size_t bigsize = MAX(psize, MAX(sizeof (*bigwad),
195	    MAX(xregsize, crsize)));
196
197	priv_impl_info_t *prii;
198
199	lwpdir_t *ldp;
200	lwpent_t *lep;
201	kthread_t *t;
202	klwp_t *lwp;
203	user_t *up;
204	int i;
205	int nlwp;
206	int nzomb;
207	int error;
208	uchar_t oldsig;
209	uf_info_t *fip;
210	int fd;
211	vnode_t *vroot;
212
213#if defined(__i386) || defined(__i386_COMPAT)
214	struct ssd *ssd;
215	size_t ssdsize;
216#endif	/* __i386 || __i386_COMPAT */
217
218	bigsize = MAX(bigsize, priv_get_implinfo_size());
219
220	bigwad = kmem_alloc(bigsize, KM_SLEEP);
221
222	/*
223	 * The order of the elfnote entries should be same here
224	 * and in the gcore(1) command.  Synchronization is
225	 * needed between the kernel and gcore(1).
226	 */
227
228	/*
229	 * Get the psinfo, and set the wait status to indicate that a core was
230	 * dumped.  We have to forge this since p->p_wcode is not set yet.
231	 */
232	mutex_enter(&p->p_lock);
233	prgetpsinfo(p, &bigwad->psinfo);
234	mutex_exit(&p->p_lock);
235	bigwad->psinfo.pr_wstat = wstat(CLD_DUMPED, sig);
236
237	error = elfnote(vp, &offset, NT_PSINFO, sizeof (bigwad->psinfo),
238	    (caddr_t)&bigwad->psinfo, rlimit, credp);
239	if (error)
240		goto done;
241
242	/*
243	 * Modify t_whystop and lwp_cursig so it appears that the current LWP
244	 * is stopped after faulting on the signal that caused the core dump.
245	 * As a result, prgetstatus() will record that signal, the saved
246	 * lwp_siginfo, and its signal handler in the core file status.  We
247	 * restore lwp_cursig in case a subsequent signal was received while
248	 * dumping core.
249	 */
250	mutex_enter(&p->p_lock);
251	lwp = ttolwp(curthread);
252
253	oldsig = lwp->lwp_cursig;
254	lwp->lwp_cursig = (uchar_t)sig;
255	curthread->t_whystop = PR_FAULTED;
256
257	prgetstatus(p, &bigwad->pstatus, p->p_zone);
258	bigwad->pstatus.pr_lwp.pr_why = 0;
259
260	curthread->t_whystop = 0;
261	lwp->lwp_cursig = oldsig;
262	mutex_exit(&p->p_lock);
263
264	error = elfnote(vp, &offset, NT_PSTATUS, sizeof (bigwad->pstatus),
265	    (caddr_t)&bigwad->pstatus, rlimit, credp);
266	if (error)
267		goto done;
268
269	error = elfnote(vp, &offset, NT_PLATFORM, strlen(platform) + 1,
270	    platform, rlimit, credp);
271	if (error)
272		goto done;
273
274	up = PTOU(p);
275	for (i = 0; i < __KERN_NAUXV_IMPL; i++) {
276		bigwad->auxv[i].a_type = up->u_auxv[i].a_type;
277		bigwad->auxv[i].a_un.a_val = up->u_auxv[i].a_un.a_val;
278	}
279	error = elfnote(vp, &offset, NT_AUXV, sizeof (bigwad->auxv),
280	    (caddr_t)bigwad->auxv, rlimit, credp);
281	if (error)
282		goto done;
283
284	bcopy(&utsname, &bigwad->uts, sizeof (struct utsname));
285	if (!INGLOBALZONE(p)) {
286		bcopy(p->p_zone->zone_nodename, &bigwad->uts.nodename,
287		    _SYS_NMLN);
288	}
289	error = elfnote(vp, &offset, NT_UTSNAME, sizeof (struct utsname),
290	    (caddr_t)&bigwad->uts, rlimit, credp);
291	if (error)
292		goto done;
293
294	prgetsecflags(p, &bigwad->psecflags);
295	error = elfnote(vp, &offset, NT_SECFLAGS, sizeof (prsecflags_t),
296	    (caddr_t)&bigwad->psecflags, rlimit, credp);
297	if (error)
298		goto done;
299
300	prgetcred(p, &bigwad->pcred);
301
302	if (bigwad->pcred.pr_ngroups != 0) {
303		crsize = sizeof (prcred_t) +
304		    sizeof (gid_t) * (bigwad->pcred.pr_ngroups - 1);
305	} else
306		crsize = sizeof (prcred_t);
307
308	error = elfnote(vp, &offset, NT_PRCRED, crsize,
309	    (caddr_t)&bigwad->pcred, rlimit, credp);
310	if (error)
311		goto done;
312
313	error = elfnote(vp, &offset, NT_CONTENT, sizeof (core_content_t),
314	    (caddr_t)&content, rlimit, credp);
315	if (error)
316		goto done;
317
318	prgetpriv(p, &bigwad->ppriv);
319
320	error = elfnote(vp, &offset, NT_PRPRIV, psize,
321	    (caddr_t)&bigwad->ppriv, rlimit, credp);
322	if (error)
323		goto done;
324
325	prii = priv_hold_implinfo();
326	error = elfnote(vp, &offset, NT_PRPRIVINFO, priv_get_implinfo_size(),
327	    (caddr_t)prii, rlimit, credp);
328	priv_release_implinfo();
329	if (error)
330		goto done;
331
332	/* zone can't go away as long as process exists */
333	error = elfnote(vp, &offset, NT_ZONENAME,
334	    strlen(p->p_zone->zone_name) + 1, p->p_zone->zone_name,
335	    rlimit, credp);
336	if (error)
337		goto done;
338
339
340	/* open file table */
341	vroot = PTOU(p)->u_rdir;
342	if (vroot == NULL)
343		vroot = rootdir;
344
345	VN_HOLD(vroot);
346
347	fip = P_FINFO(p);
348
349	for (fd = 0; fd < fip->fi_nfiles; fd++) {
350		uf_entry_t *ufp;
351		vnode_t *fvp;
352		struct file *fp;
353		vattr_t vattr;
354		prfdinfo_core_t fdinfo;
355
356		bzero(&fdinfo, sizeof (fdinfo));
357
358		mutex_enter(&fip->fi_lock);
359		UF_ENTER(ufp, fip, fd);
360		if (((fp = ufp->uf_file) == NULL) || (fp->f_count < 1)) {
361			UF_EXIT(ufp);
362			mutex_exit(&fip->fi_lock);
363			continue;
364		}
365
366		fdinfo.pr_fd = fd;
367		fdinfo.pr_fdflags = ufp->uf_flag;
368		fdinfo.pr_fileflags = fp->f_flag2;
369		fdinfo.pr_fileflags <<= 16;
370		fdinfo.pr_fileflags |= fp->f_flag;
371		if ((fdinfo.pr_fileflags & (FSEARCH | FEXEC)) == 0)
372			fdinfo.pr_fileflags += FOPEN;
373		fdinfo.pr_offset = fp->f_offset;
374
375
376		fvp = fp->f_vnode;
377		VN_HOLD(fvp);
378		UF_EXIT(ufp);
379		mutex_exit(&fip->fi_lock);
380
381		/*
382		 * There are some vnodes that have no corresponding
383		 * path.  Its reasonable for this to fail, in which
384		 * case the path will remain an empty string.
385		 */
386		(void) vnodetopath(vroot, fvp, fdinfo.pr_path,
387		    sizeof (fdinfo.pr_path), credp);
388
389		if (VOP_GETATTR(fvp, &vattr, 0, credp, NULL) != 0) {
390			/*
391			 * Try to write at least a subset of information
392			 */
393			fdinfo.pr_major = 0;
394			fdinfo.pr_minor = 0;
395			fdinfo.pr_ino = 0;
396			fdinfo.pr_mode = 0;
397			fdinfo.pr_uid = (uid_t)-1;
398			fdinfo.pr_gid = (gid_t)-1;
399			fdinfo.pr_rmajor = 0;
400			fdinfo.pr_rminor = 0;
401			fdinfo.pr_size = -1;
402
403			error = elfnote(vp, &offset, NT_FDINFO,
404			    sizeof (fdinfo), &fdinfo, rlimit, credp);
405			VN_RELE(fvp);
406			if (error) {
407				VN_RELE(vroot);
408				goto done;
409			}
410			continue;
411		}
412
413		if (fvp->v_type == VSOCK)
414			fdinfo.pr_fileflags |= sock_getfasync(fvp);
415
416		VN_RELE(fvp);
417
418		/*
419		 * This logic mirrors fstat(), which we cannot use
420		 * directly, as it calls copyout().
421		 */
422		fdinfo.pr_major = getmajor(vattr.va_fsid);
423		fdinfo.pr_minor = getminor(vattr.va_fsid);
424		fdinfo.pr_ino = (ino64_t)vattr.va_nodeid;
425		fdinfo.pr_mode = VTTOIF(vattr.va_type) | vattr.va_mode;
426		fdinfo.pr_uid = vattr.va_uid;
427		fdinfo.pr_gid = vattr.va_gid;
428		fdinfo.pr_rmajor = getmajor(vattr.va_rdev);
429		fdinfo.pr_rminor = getminor(vattr.va_rdev);
430		fdinfo.pr_size = (off64_t)vattr.va_size;
431
432		error = elfnote(vp, &offset, NT_FDINFO,
433		    sizeof (fdinfo), &fdinfo, rlimit, credp);
434		if (error) {
435			VN_RELE(vroot);
436			goto done;
437		}
438	}
439
440	VN_RELE(vroot);
441
442#if defined(__i386) || defined(__i386_COMPAT)
443	mutex_enter(&p->p_ldtlock);
444	ssdsize = prnldt(p) * sizeof (struct ssd);
445	if (ssdsize != 0) {
446		ssd = kmem_alloc(ssdsize, KM_SLEEP);
447		prgetldt(p, ssd);
448		error = elfnote(vp, &offset, NT_LDT, ssdsize,
449		    (caddr_t)ssd, rlimit, credp);
450		kmem_free(ssd, ssdsize);
451	}
452	mutex_exit(&p->p_ldtlock);
453	if (error)
454		goto done;
455#endif	/* __i386 || defined(__i386_COMPAT) */
456
457	nlwp = p->p_lwpcnt;
458	nzomb = p->p_zombcnt;
459	/* for each entry in the lwp directory ... */
460	for (ldp = p->p_lwpdir; nlwp + nzomb != 0; ldp++) {
461		prlwpname_t name = { 0, };
462
463		if ((lep = ldp->ld_entry) == NULL)	/* empty slot */
464			continue;
465
466		if ((t = lep->le_thread) != NULL) {	/* active lwp */
467			ASSERT(nlwp != 0);
468			nlwp--;
469			lwp = ttolwp(t);
470			mutex_enter(&p->p_lock);
471			prgetlwpsinfo(t, &bigwad->lwpsinfo);
472			if (t->t_name != NULL) {
473				(void) strlcpy(name.pr_lwpname, t->t_name,
474				    sizeof (name.pr_lwpname));
475			}
476			mutex_exit(&p->p_lock);
477		} else {				/* zombie lwp */
478			ASSERT(nzomb != 0);
479			nzomb--;
480			bzero(&bigwad->lwpsinfo, sizeof (bigwad->lwpsinfo));
481			bigwad->lwpsinfo.pr_lwpid = lep->le_lwpid;
482			bigwad->lwpsinfo.pr_state = SZOMB;
483			bigwad->lwpsinfo.pr_sname = 'Z';
484			bigwad->lwpsinfo.pr_start.tv_sec = lep->le_start;
485		}
486
487		name.pr_lwpid = bigwad->lwpsinfo.pr_lwpid;
488
489		error = elfnote(vp, &offset, NT_LWPSINFO,
490		    sizeof (bigwad->lwpsinfo), (caddr_t)&bigwad->lwpsinfo,
491		    rlimit, credp);
492		if (error)
493			goto done;
494
495		if (t == NULL)		/* nothing more to do for a zombie */
496			continue;
497
498		mutex_enter(&p->p_lock);
499		if (t == curthread) {
500			/*
501			 * Modify t_whystop and lwp_cursig so it appears that
502			 * the current LWP is stopped after faulting on the
503			 * signal that caused the core dump.  As a result,
504			 * prgetlwpstatus() will record that signal, the saved
505			 * lwp_siginfo, and its signal handler in the core file
506			 * status.  We restore lwp_cursig in case a subsequent
507			 * signal was received while dumping core.
508			 */
509			oldsig = lwp->lwp_cursig;
510			lwp->lwp_cursig = (uchar_t)sig;
511			t->t_whystop = PR_FAULTED;
512
513			prgetlwpstatus(t, &bigwad->lwpstatus, p->p_zone);
514			bigwad->lwpstatus.pr_why = 0;
515
516			t->t_whystop = 0;
517			lwp->lwp_cursig = oldsig;
518		} else {
519			prgetlwpstatus(t, &bigwad->lwpstatus, p->p_zone);
520		}
521		mutex_exit(&p->p_lock);
522		error = elfnote(vp, &offset, NT_LWPSTATUS,
523		    sizeof (bigwad->lwpstatus), (caddr_t)&bigwad->lwpstatus,
524		    rlimit, credp);
525		if (error)
526			goto done;
527
528		if ((error = elfnote(vp, &offset, NT_LWPNAME, sizeof (name),
529		    (caddr_t)&name, rlimit, credp)) != 0)
530			goto done;
531
532
533#if defined(__sparc)
534		/*
535		 * Unspilled SPARC register windows.
536		 */
537		{
538			size_t size = prnwindows(lwp);
539
540			if (size != 0) {
541				size = sizeof (gwindows_t) -
542				    (SPARC_MAXREGWINDOW - size) *
543				    sizeof (struct rwindow);
544				prgetwindows(lwp, &bigwad->gwindows);
545				error = elfnote(vp, &offset, NT_GWINDOWS,
546				    size, (caddr_t)&bigwad->gwindows,
547				    rlimit, credp);
548				if (error)
549					goto done;
550			}
551		}
552		/*
553		 * Ancillary State Registers.
554		 */
555		if (p->p_model == DATAMODEL_LP64) {
556			prgetasregs(lwp, bigwad->asrset);
557			error = elfnote(vp, &offset, NT_ASRS,
558			    sizeof (asrset_t), (caddr_t)bigwad->asrset,
559			    rlimit, credp);
560			if (error)
561				goto done;
562		}
563#endif /* __sparc */
564
565		if (xregsize) {
566			prgetprxregs(lwp, bigwad->xregs);
567			error = elfnote(vp, &offset, NT_PRXREG,
568			    xregsize, bigwad->xregs, rlimit, credp);
569			if (error)
570				goto done;
571		}
572
573		if (t->t_lwp->lwp_spymaster != NULL) {
574			void *psaddr = t->t_lwp->lwp_spymaster;
575#ifdef _ELF32_COMPAT
576			/*
577			 * On a 64-bit kernel with 32-bit ELF compatibility,
578			 * this file is compiled into two different objects:
579			 * one is compiled normally, and the other is compiled
580			 * with _ELF32_COMPAT set -- and therefore with a
581			 * psinfo_t defined to be a psinfo32_t.  However, the
582			 * psinfo_t denoting our spymaster is always of the
583			 * native type; if we are in the _ELF32_COMPAT case,
584			 * we need to explicitly convert it.
585			 */
586			if (p->p_model == DATAMODEL_ILP32) {
587				psinfo_kto32(psaddr, &bigwad->psinfo);
588				psaddr = &bigwad->psinfo;
589			}
590#endif
591
592			error = elfnote(vp, &offset, NT_SPYMASTER,
593			    sizeof (psinfo_t), psaddr, rlimit, credp);
594			if (error)
595				goto done;
596		}
597	}
598	ASSERT(nlwp == 0);
599
600done:
601	kmem_free(bigwad, bigsize);
602	return (error);
603}
604