1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <sys/errno.h>
28 #include <sys/exec.h>
29 #include <sys/file.h>
30 #include <sys/kmem.h>
31 #include <sys/modctl.h>
32 #include <sys/model.h>
33 #include <sys/proc.h>
34 #include <sys/syscall.h>
35 #include <sys/systm.h>
36 #include <sys/thread.h>
37 #include <sys/cmn_err.h>
38 #include <sys/archsystm.h>
39 #include <sys/pathname.h>
40 #include <sys/sunddi.h>
41 
42 #include <sys/machbrand.h>
43 #include <sys/brand.h>
44 #include "s10_brand.h"
45 
46 char *s10_emulation_table = NULL;
47 
48 void	s10_init_brand_data(zone_t *);
49 void	s10_free_brand_data(zone_t *);
50 void	s10_setbrand(proc_t *);
51 int	s10_getattr(zone_t *, int, void *, size_t *);
52 int	s10_setattr(zone_t *, int, void *, size_t);
53 int	s10_brandsys(int, int64_t *, uintptr_t, uintptr_t, uintptr_t,
54 		uintptr_t, uintptr_t, uintptr_t);
55 void	s10_copy_procdata(proc_t *, proc_t *);
56 void	s10_proc_exit(struct proc *, klwp_t *);
57 void	s10_exec();
58 int	s10_initlwp(klwp_t *);
59 void	s10_forklwp(klwp_t *, klwp_t *);
60 void	s10_freelwp(klwp_t *);
61 void	s10_lwpexit(klwp_t *);
62 int	s10_elfexec(vnode_t *, execa_t *, uarg_t *, intpdata_t *, int,
63 	long *, int, caddr_t, cred_t *, int);
64 
65 /* s10 brand */
66 struct brand_ops s10_brops = {
67 	s10_init_brand_data,
68 	s10_free_brand_data,
69 	s10_brandsys,
70 	s10_setbrand,
71 	s10_getattr,
72 	s10_setattr,
73 	s10_copy_procdata,
74 	s10_proc_exit,
75 	s10_exec,
76 	lwp_setrval,
77 	s10_initlwp,
78 	s10_forklwp,
79 	s10_freelwp,
80 	s10_lwpexit,
81 	s10_elfexec
82 };
83 
84 #ifdef	sparc
85 
86 struct brand_mach_ops s10_mops = {
87 	s10_brand_syscall_callback,
88 	s10_brand_syscall32_callback
89 };
90 
91 #else	/* sparc */
92 
93 #ifdef	__amd64
94 
95 struct brand_mach_ops s10_mops = {
96 	s10_brand_sysenter_callback,
97 	NULL,
98 	s10_brand_int91_callback,
99 	s10_brand_syscall_callback,
100 	s10_brand_syscall32_callback,
101 	NULL
102 };
103 
104 #else	/* ! __amd64 */
105 
106 struct brand_mach_ops s10_mops = {
107 	s10_brand_sysenter_callback,
108 	NULL,
109 	NULL,
110 	s10_brand_syscall_callback,
111 	NULL,
112 	NULL
113 };
114 #endif	/* __amd64 */
115 
116 #endif	/* _sparc */
117 
118 struct brand	s10_brand = {
119 	BRAND_VER_1,
120 	"solaris10",
121 	&s10_brops,
122 	&s10_mops
123 };
124 
125 static struct modlbrand modlbrand = {
126 	&mod_brandops,		/* type of module */
127 	"Solaris 10 Brand",	/* description of module */
128 	&s10_brand		/* driver ops */
129 };
130 
131 static struct modlinkage modlinkage = {
132 	MODREV_1, (void *)&modlbrand, NULL
133 };
134 
135 void
136 s10_setbrand(proc_t *p)
137 {
138 	ASSERT(p->p_brand == &s10_brand);
139 	ASSERT(p->p_brand_data == NULL);
140 
141 	/*
142 	 * We should only be called from exec(), when we know the process
143 	 * is single-threaded.
144 	 */
145 	ASSERT(p->p_tlist == p->p_tlist->t_forw);
146 
147 	p->p_brand_data = kmem_zalloc(sizeof (s10_proc_data_t), KM_SLEEP);
148 	(void) s10_initlwp(p->p_tlist->t_lwp);
149 }
150 
151 /*ARGSUSED*/
152 int
153 s10_getattr(zone_t *zone, int attr, void *buf, size_t *bufsize)
154 {
155 	ASSERT(zone->zone_brand == &s10_brand);
156 	if (attr == S10_EMUL_BITMAP) {
157 		if (buf == NULL || *bufsize != sizeof (s10_emul_bitmap_t))
158 			return (EINVAL);
159 		if (copyout(((s10_zone_data_t *)zone->zone_brand_data)->
160 		    emul_bitmap, buf, sizeof (s10_emul_bitmap_t)) != 0)
161 			return (EFAULT);
162 		return (0);
163 	}
164 
165 	return (EINVAL);
166 }
167 
168 int
169 s10_setattr(zone_t *zone, int attr, void *buf, size_t bufsize)
170 {
171 	ASSERT(zone->zone_brand == &s10_brand);
172 	if (attr == S10_EMUL_BITMAP) {
173 		if (buf == NULL || bufsize != sizeof (s10_emul_bitmap_t))
174 			return (EINVAL);
175 		if (copyin(buf, ((s10_zone_data_t *)zone->zone_brand_data)->
176 		    emul_bitmap, sizeof (s10_emul_bitmap_t)) != 0)
177 			return (EFAULT);
178 		return (0);
179 	}
180 
181 	return (EINVAL);
182 }
183 
184 #ifdef	__amd64
185 /*
186  * The Nevada kernel clears %fs for threads in 64-bit x86 processes but S10's
187  * libc expects %fs to be nonzero.  This causes some committed
188  * libc/libthread interfaces (e.g., thr_main()) to fail, which impacts several
189  * libraries, including libdoor.  This function sets the specified LWP's %fs
190  * register to the legacy S10 selector value (LWPFS_SEL).
191  *
192  * The best solution to the aforementioned problem is backporting CRs
193  * 6467491 to Solaris 10 so that 64-bit x86 Solaris 10 processes
194  * would accept zero for %fs.  Backporting the CRs is a requirement for running
195  * S10 Containers in PV domUs because 64-bit Xen clears %fsbase when %fs is
196  * nonzero.  Such behavior breaks 64-bit processes because Xen has to fetch the
197  * FS segments' base addresses from the LWPs' GDTs, which are only capable of
198  * 32-bit addressing.
199  */
200 /*ARGSUSED*/
201 static void
202 s10_amd64_correct_fsreg(klwp_t *l)
203 {
204 	if (lwp_getdatamodel(l) == DATAMODEL_NATIVE) {
205 		kpreempt_disable();
206 		l->lwp_pcb.pcb_fs = LWPFS_SEL;
207 		l->lwp_pcb.pcb_rupdate = 1;
208 		lwptot(l)->t_post_sys = 1;	/* Guarantee update_sregs() */
209 		kpreempt_enable();
210 	}
211 }
212 #endif	/* __amd64 */
213 
214 int
215 s10_native()
216 {
217 	struct user	*up = PTOU(curproc);
218 	char		*args_new, *comm_new, *p;
219 	int		len;
220 
221 	len = sizeof (S10_NATIVE_LINKER32 " ") - 1;
222 
223 	/*
224 	 * Make sure that the process' interpreter is the native dynamic linker.
225 	 * Convention dictates that native processes executing within solaris10-
226 	 * branded zones are interpreted by the native dynamic linker (the
227 	 * process and its arguments are specified as arguments to the dynamic
228 	 * linker).  If this convention is violated (i.e.,
229 	 * brandsys(B_S10_NATIVE, ...) is invoked by a process that shouldn't be
230 	 * native), then do nothing and silently indicate success.
231 	 */
232 	if (strcmp(up->u_comm, S10_LINKER_NAME) != 0)
233 		return (0);
234 	if (strncmp(up->u_psargs, S10_NATIVE_LINKER64 " /", len + 4) == 0)
235 		len += 3;		/* to account for "/64" in the path */
236 	else if (strncmp(up->u_psargs, S10_NATIVE_LINKER32 " /", len + 1) != 0)
237 		return (0);
238 
239 	args_new = strdup(&up->u_psargs[len]);
240 	if ((p = strchr(args_new, ' ')) != NULL)
241 		*p = '\0';
242 	if ((comm_new = strrchr(args_new, '/')) != NULL)
243 		comm_new = strdup(comm_new + 1);
244 	else
245 		comm_new = strdup(args_new);
246 	if (p != NULL)
247 		*p = ' ';
248 
249 	if ((strlen(args_new) != 0) && (strlen(comm_new) != 0)) {
250 		mutex_enter(&curproc->p_lock);
251 		(void) strlcpy(up->u_comm, comm_new, MAXCOMLEN+1);
252 		(void) strlcpy(up->u_psargs, args_new, PSARGSZ);
253 		mutex_exit(&curproc->p_lock);
254 	}
255 
256 	strfree(args_new);
257 	strfree(comm_new);
258 	return (0);
259 }
260 
261 /*
262  * Get the address of the user-space system call handler from the user
263  * process and attach it to the proc structure.
264  */
265 /*ARGSUSED*/
266 int
267 s10_brandsys(int cmd, int64_t *rval, uintptr_t arg1, uintptr_t arg2,
268     uintptr_t arg3, uintptr_t arg4, uintptr_t arg5, uintptr_t arg6)
269 {
270 	s10_proc_data_t	*spd;
271 	s10_brand_reg_t	reg;
272 	proc_t		*p = curproc;
273 	int		err;
274 
275 	*rval = 0;
276 
277 	/*
278 	 * B_EXEC_BRAND is redundant
279 	 * since the kernel assumes a native process doing an exec
280 	 * in a branded zone is going to run a branded processes.
281 	 * hence we don't support this operation.
282 	 */
283 	if (cmd == B_EXEC_BRAND)
284 		return (ENOSYS);
285 
286 	if (cmd == B_S10_NATIVE)
287 		return (s10_native());
288 
289 	/* For all other operations this must be a branded process. */
290 	if (p->p_brand == &native_brand)
291 		return (ENOSYS);
292 
293 	ASSERT(p->p_brand == &s10_brand);
294 	ASSERT(p->p_brand_data != NULL);
295 
296 	spd = (s10_proc_data_t *)p->p_brand_data;
297 
298 	switch (cmd) {
299 	case B_EXEC_NATIVE:
300 		err = exec_common(
301 		    (char *)arg1, (const char **)arg2, (const char **)arg3,
302 		    EBA_NATIVE);
303 		return (err);
304 
305 	case B_REGISTER:
306 		if (p->p_model == DATAMODEL_NATIVE) {
307 			if (copyin((void *)arg1, &reg, sizeof (reg)) != 0)
308 				return (EFAULT);
309 #if defined(_LP64)
310 		} else {
311 			s10_brand_reg32_t reg32;
312 
313 			if (copyin((void *)arg1, &reg32, sizeof (reg32)) != 0)
314 				return (EFAULT);
315 			reg.sbr_version = reg32.sbr_version;
316 			reg.sbr_handler = (caddr_t)(uintptr_t)reg32.sbr_handler;
317 #endif /* _LP64 */
318 		}
319 
320 		if (reg.sbr_version != S10_VERSION)
321 			return (ENOTSUP);
322 		spd->spd_handler = reg.sbr_handler;
323 		return (0);
324 
325 	case B_ELFDATA:
326 		if (p->p_model == DATAMODEL_NATIVE) {
327 			if (copyout(&spd->spd_elf_data, (void *)arg1,
328 			    sizeof (s10_elf_data_t)) != 0)
329 				return (EFAULT);
330 #if defined(_LP64)
331 		} else {
332 			s10_elf_data32_t sed32;
333 
334 			sed32.sed_phdr = spd->spd_elf_data.sed_phdr;
335 			sed32.sed_phent = spd->spd_elf_data.sed_phent;
336 			sed32.sed_phnum = spd->spd_elf_data.sed_phnum;
337 			sed32.sed_entry = spd->spd_elf_data.sed_entry;
338 			sed32.sed_base = spd->spd_elf_data.sed_base;
339 			sed32.sed_ldentry = spd->spd_elf_data.sed_ldentry;
340 			sed32.sed_lddata = spd->spd_elf_data.sed_lddata;
341 			if (copyout(&sed32, (void *)arg1, sizeof (sed32)) != 0)
342 				return (EFAULT);
343 #endif /* _LP64 */
344 		}
345 		return (0);
346 
347 	case B_S10_PIDINFO:
348 		/*
349 		 * The s10 brand needs to be able to get the pid of the
350 		 * current process and the pid of the zone's init, and it
351 		 * needs to do this on every process startup.  Early in
352 		 * brand startup, we can't call getpid() because calls to
353 		 * getpid() represent a magical signal to some old-skool
354 		 * debuggers.  By merging all of this into one call, we
355 		 * make this quite a bit cheaper and easier to handle in
356 		 * the brand module.
357 		 */
358 		if (copyout(&p->p_pid, (void *)arg1, sizeof (pid_t)) != 0)
359 			return (EFAULT);
360 		if (copyout(&p->p_zone->zone_proc_initpid, (void *)arg2,
361 		    sizeof (pid_t)) != 0)
362 			return (EFAULT);
363 		return (0);
364 
365 	case B_S10_TRUSS_POINT:
366 		/*
367 		 * This subcommand exists so that we can see truss output
368 		 * from interposed system calls that return without first
369 		 * calling any other system call, meaning they would be
370 		 * invisible to truss(1).
371 		 *
372 		 * If the second argument is set non-zero, set errno to that
373 		 * value as well.
374 		 *
375 		 * Arguments are:
376 		 *
377 		 *    arg1: syscall number
378 		 *    arg2: errno
379 		 */
380 		return ((arg2 == 0) ? 0 : set_errno((uint_t)arg2));
381 
382 	case B_S10_ISFDXATTRDIR: {
383 		/*
384 		 * This subcommand enables the userland brand emulation library
385 		 * to determine whether a file descriptor refers to an extended
386 		 * file attributes directory.  There is no standard syscall or
387 		 * libc function that can make such a determination.
388 		 */
389 		file_t *dir_filep;
390 
391 		dir_filep = getf((int)arg1);
392 		if (dir_filep == NULL)
393 			return (EBADF);
394 		ASSERT(dir_filep->f_vnode != NULL);
395 		*rval = IS_XATTRDIR(dir_filep->f_vnode);
396 		releasef((int)arg1);
397 		return (0);
398 	}
399 
400 #ifdef	__amd64
401 	case B_S10_FSREGCORRECTION:
402 		/*
403 		 * This subcommand exists so that the SYS_lwp_private and
404 		 * SYS_lwp_create syscalls can manually set the current thread's
405 		 * %fs register to the legacy S10 selector value for 64-bit x86
406 		 * processes.
407 		 */
408 		s10_amd64_correct_fsreg(ttolwp(curthread));
409 		return (0);
410 #endif	/* __amd64 */
411 	}
412 
413 	return (EINVAL);
414 }
415 
416 /*
417  * Copy the per-process brand data from a parent proc to a child.
418  */
419 void
420 s10_copy_procdata(proc_t *child, proc_t *parent)
421 {
422 	s10_proc_data_t	*spd;
423 
424 	ASSERT(parent->p_brand == &s10_brand);
425 	ASSERT(child->p_brand == &s10_brand);
426 	ASSERT(parent->p_brand_data != NULL);
427 	ASSERT(child->p_brand_data == NULL);
428 
429 	/* Just duplicate all the proc data of the parent for the child */
430 	spd = kmem_alloc(sizeof (s10_proc_data_t), KM_SLEEP);
431 	bcopy(parent->p_brand_data, spd, sizeof (s10_proc_data_t));
432 	child->p_brand_data = spd;
433 }
434 
435 /*ARGSUSED*/
436 void
437 s10_proc_exit(struct proc *p, klwp_t *l)
438 {
439 	ASSERT(p->p_brand == &s10_brand);
440 	ASSERT(p->p_brand_data != NULL);
441 
442 	/*
443 	 * We should only be called from proc_exit(), when we know that
444 	 * process is single-threaded.
445 	 */
446 	ASSERT(p->p_tlist == p->p_tlist->t_forw);
447 
448 	/* upon exit, free our lwp brand data */
449 	(void) s10_freelwp(ttolwp(curthread));
450 
451 	/* upon exit, free our proc brand data */
452 	kmem_free(p->p_brand_data, sizeof (s10_proc_data_t));
453 	p->p_brand_data = NULL;
454 }
455 
456 void
457 s10_exec()
458 {
459 	s10_proc_data_t	*spd = curproc->p_brand_data;
460 
461 	ASSERT(curproc->p_brand == &s10_brand);
462 	ASSERT(curproc->p_brand_data != NULL);
463 	ASSERT(ttolwp(curthread)->lwp_brand != NULL);
464 
465 	/*
466 	 * We should only be called from exec(), when we know the process
467 	 * is single-threaded.
468 	 */
469 	ASSERT(curproc->p_tlist == curproc->p_tlist->t_forw);
470 
471 	/* Upon exec, reset our lwp brand data. */
472 	(void) s10_freelwp(ttolwp(curthread));
473 	(void) s10_initlwp(ttolwp(curthread));
474 
475 	/*
476 	 * Upon exec, reset all the proc brand data, except for the elf
477 	 * data associated with the executable we are exec'ing.
478 	 */
479 	spd->spd_handler = NULL;
480 }
481 
482 /*ARGSUSED*/
483 int
484 s10_initlwp(klwp_t *l)
485 {
486 	ASSERT(l->lwp_procp->p_brand == &s10_brand);
487 	ASSERT(l->lwp_procp->p_brand_data != NULL);
488 	ASSERT(l->lwp_brand == NULL);
489 	l->lwp_brand = (void *)-1;
490 	return (0);
491 }
492 
493 /*ARGSUSED*/
494 void
495 s10_forklwp(klwp_t *p, klwp_t *c)
496 {
497 	ASSERT(p->lwp_procp->p_brand == &s10_brand);
498 	ASSERT(c->lwp_procp->p_brand == &s10_brand);
499 
500 	ASSERT(p->lwp_procp->p_brand_data != NULL);
501 	ASSERT(c->lwp_procp->p_brand_data != NULL);
502 
503 	/* Both LWPs have already had been initialized via s10_initlwp() */
504 	ASSERT(p->lwp_brand != NULL);
505 	ASSERT(c->lwp_brand != NULL);
506 
507 #ifdef	__amd64
508 	/*
509 	 * Only correct the child's %fs register if the parent's %fs register
510 	 * is LWPFS_SEL.  If the parent's %fs register is zero, then the Solaris
511 	 * 10 environment that we're emulating uses a version of libc that
512 	 * works when %fs is zero (i.e., it contains backports of CRs 6467491
513 	 * and 6501650).
514 	 */
515 	if (p->lwp_pcb.pcb_fs == LWPFS_SEL)
516 		s10_amd64_correct_fsreg(c);
517 #endif	/* __amd64 */
518 }
519 
520 /*ARGSUSED*/
521 void
522 s10_freelwp(klwp_t *l)
523 {
524 	ASSERT(l->lwp_procp->p_brand == &s10_brand);
525 	ASSERT(l->lwp_procp->p_brand_data != NULL);
526 	ASSERT(l->lwp_brand != NULL);
527 	l->lwp_brand = NULL;
528 }
529 
530 /*ARGSUSED*/
531 void
532 s10_lwpexit(klwp_t *l)
533 {
534 	ASSERT(l->lwp_procp->p_brand == &s10_brand);
535 	ASSERT(l->lwp_procp->p_brand_data != NULL);
536 	ASSERT(l->lwp_brand != NULL);
537 
538 	/*
539 	 * We should never be called for the last thread in a process.
540 	 * (That case is handled by s10_proc_exit().)  There for this lwp
541 	 * must be exiting from a multi-threaded process.
542 	 */
543 	ASSERT(l->lwp_procp->p_tlist != l->lwp_procp->p_tlist->t_forw);
544 
545 	l->lwp_brand = NULL;
546 }
547 
548 void
549 s10_free_brand_data(zone_t *zone)
550 {
551 	kmem_free(zone->zone_brand_data, sizeof (s10_zone_data_t));
552 }
553 
554 void
555 s10_init_brand_data(zone_t *zone)
556 {
557 	ASSERT(zone->zone_brand == &s10_brand);
558 	ASSERT(zone->zone_brand_data == NULL);
559 	zone->zone_brand_data = kmem_zalloc(sizeof (s10_zone_data_t), KM_SLEEP);
560 }
561 
562 #if defined(_LP64)
563 static void
564 Ehdr32to64(Elf32_Ehdr *src, Ehdr *dst)
565 {
566 	bcopy(src->e_ident, dst->e_ident, sizeof (src->e_ident));
567 	dst->e_type =		src->e_type;
568 	dst->e_machine =	src->e_machine;
569 	dst->e_version =	src->e_version;
570 	dst->e_entry =		src->e_entry;
571 	dst->e_phoff =		src->e_phoff;
572 	dst->e_shoff =		src->e_shoff;
573 	dst->e_flags =		src->e_flags;
574 	dst->e_ehsize =		src->e_ehsize;
575 	dst->e_phentsize =	src->e_phentsize;
576 	dst->e_phnum =		src->e_phnum;
577 	dst->e_shentsize =	src->e_shentsize;
578 	dst->e_shnum =		src->e_shnum;
579 	dst->e_shstrndx =	src->e_shstrndx;
580 }
581 #endif /* _LP64 */
582 
583 int
584 s10_elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap,
585 	int level, long *execsz, int setid, caddr_t exec_file, cred_t *cred,
586 	int brand_action)
587 {
588 	vnode_t		*nvp;
589 	Ehdr		ehdr;
590 	Addr		uphdr_vaddr;
591 	intptr_t	voffset;
592 	int		interp;
593 	int		i, err;
594 	struct execenv	env;
595 	struct user	*up = PTOU(curproc);
596 	s10_proc_data_t	*spd;
597 	s10_elf_data_t	sed, *sedp;
598 	char		*linker;
599 	uintptr_t	lddata; /* lddata of executable's linker */
600 
601 	ASSERT(curproc->p_brand == &s10_brand);
602 	ASSERT(curproc->p_brand_data != NULL);
603 
604 	spd = (s10_proc_data_t *)curproc->p_brand_data;
605 	sedp = &spd->spd_elf_data;
606 
607 	args->brandname = S10_BRANDNAME;
608 
609 	/*
610 	 * We will exec the brand library and then map in the target
611 	 * application and (optionally) the brand's default linker.
612 	 */
613 	if (args->to_model == DATAMODEL_NATIVE) {
614 		args->emulator = S10_LIB;
615 		linker = S10_LINKER;
616 #if defined(_LP64)
617 	} else {
618 		args->emulator = S10_LIB32;
619 		linker = S10_LINKER32;
620 #endif /* _LP64 */
621 	}
622 
623 	if ((err = lookupname(args->emulator, UIO_SYSSPACE, FOLLOW, NULLVPP,
624 	    &nvp)) != 0) {
625 		uprintf("%s: not found.", args->emulator);
626 		return (err);
627 	}
628 
629 	if (args->to_model == DATAMODEL_NATIVE) {
630 		err = elfexec(nvp, uap, args, idatap, level + 1, execsz,
631 		    setid, exec_file, cred, brand_action);
632 #if defined(_LP64)
633 	} else {
634 		err = elf32exec(nvp, uap, args, idatap, level + 1, execsz,
635 		    setid, exec_file, cred, brand_action);
636 #endif /* _LP64 */
637 	}
638 	VN_RELE(nvp);
639 	if (err != 0)
640 		return (err);
641 
642 	/*
643 	 * The u_auxv vectors are set up by elfexec to point to the brand
644 	 * emulation library and linker.  Save these so they can be copied to
645 	 * the specific brand aux vectors.
646 	 */
647 	bzero(&sed, sizeof (sed));
648 	for (i = 0; i < __KERN_NAUXV_IMPL; i++) {
649 		switch (up->u_auxv[i].a_type) {
650 		case AT_SUN_LDDATA:
651 			sed.sed_lddata = up->u_auxv[i].a_un.a_val;
652 			break;
653 		case AT_BASE:
654 			sed.sed_base = up->u_auxv[i].a_un.a_val;
655 			break;
656 		case AT_ENTRY:
657 			sed.sed_entry = up->u_auxv[i].a_un.a_val;
658 			break;
659 		case AT_PHDR:
660 			sed.sed_phdr = up->u_auxv[i].a_un.a_val;
661 			break;
662 		case AT_PHENT:
663 			sed.sed_phent = up->u_auxv[i].a_un.a_val;
664 			break;
665 		case AT_PHNUM:
666 			sed.sed_phnum = up->u_auxv[i].a_un.a_val;
667 			break;
668 		default:
669 			break;
670 		}
671 	}
672 	/* Make sure the emulator has an entry point */
673 	ASSERT(sed.sed_entry != NULL);
674 	ASSERT(sed.sed_phdr != NULL);
675 
676 	bzero(&env, sizeof (env));
677 	if (args->to_model == DATAMODEL_NATIVE) {
678 		err = mapexec_brand(vp, args, &ehdr, &uphdr_vaddr, &voffset,
679 		    exec_file, &interp, &env.ex_bssbase, &env.ex_brkbase,
680 		    &env.ex_brksize, NULL);
681 #if defined(_LP64)
682 	} else {
683 		Elf32_Ehdr ehdr32;
684 		Elf32_Addr uphdr_vaddr32;
685 		err = mapexec32_brand(vp, args, &ehdr32, &uphdr_vaddr32,
686 		    &voffset, exec_file, &interp, &env.ex_bssbase,
687 		    &env.ex_brkbase, &env.ex_brksize, NULL);
688 		Ehdr32to64(&ehdr32, &ehdr);
689 		if (uphdr_vaddr32 == (Elf32_Addr)-1)
690 			uphdr_vaddr = (Addr)-1;
691 		else
692 			uphdr_vaddr = uphdr_vaddr32;
693 #endif /* _LP64 */
694 	}
695 	if (err != 0)
696 		return (err);
697 
698 	/*
699 	 * Save off the important properties of the executable. The brand
700 	 * library will ask us for this data later, when it is initializing
701 	 * and getting ready to transfer control to the brand application.
702 	 */
703 	if (uphdr_vaddr == (Addr)-1)
704 		sedp->sed_phdr = voffset + ehdr.e_phoff;
705 	else
706 		sedp->sed_phdr = voffset + uphdr_vaddr;
707 	sedp->sed_entry = voffset + ehdr.e_entry;
708 	sedp->sed_phent = ehdr.e_phentsize;
709 	sedp->sed_phnum = ehdr.e_phnum;
710 
711 	if (interp) {
712 		if (ehdr.e_type == ET_DYN) {
713 			/*
714 			 * This is a shared object executable, so we need to
715 			 * pick a reasonable place to put the heap. Just don't
716 			 * use the first page.
717 			 */
718 			env.ex_brkbase = (caddr_t)PAGESIZE;
719 			env.ex_bssbase = (caddr_t)PAGESIZE;
720 		}
721 
722 		/*
723 		 * If the program needs an interpreter (most do), map it in and
724 		 * store relevant information about it in the aux vector, where
725 		 * the brand library can find it.
726 		 */
727 		if ((err = lookupname(linker, UIO_SYSSPACE,
728 		    FOLLOW, NULLVPP, &nvp)) != 0) {
729 			uprintf("%s: not found.", S10_LINKER);
730 			return (err);
731 		}
732 		if (args->to_model == DATAMODEL_NATIVE) {
733 			err = mapexec_brand(nvp, args, &ehdr,
734 			    &uphdr_vaddr, &voffset, exec_file, &interp,
735 			    NULL, NULL, NULL, &lddata);
736 #if defined(_LP64)
737 		} else {
738 			Elf32_Ehdr ehdr32;
739 			Elf32_Addr uphdr_vaddr32;
740 			err = mapexec32_brand(nvp, args, &ehdr32,
741 			    &uphdr_vaddr32, &voffset, exec_file, &interp,
742 			    NULL, NULL, NULL, &lddata);
743 			Ehdr32to64(&ehdr32, &ehdr);
744 			if (uphdr_vaddr32 == (Elf32_Addr)-1)
745 				uphdr_vaddr = (Addr)-1;
746 			else
747 				uphdr_vaddr = uphdr_vaddr32;
748 #endif /* _LP64 */
749 		}
750 		VN_RELE(nvp);
751 		if (err != 0)
752 			return (err);
753 
754 		/*
755 		 * Now that we know the base address of the brand's linker,
756 		 * place it in the aux vector.
757 		 */
758 		sedp->sed_base = voffset;
759 		sedp->sed_ldentry = voffset + ehdr.e_entry;
760 		sedp->sed_lddata = voffset + lddata;
761 	} else {
762 		/*
763 		 * This program has no interpreter. The brand library will
764 		 * jump to the address in the AT_SUN_BRAND_LDENTRY aux vector,
765 		 * so in this case, put the entry point of the main executable
766 		 * there.
767 		 */
768 		if (ehdr.e_type == ET_EXEC) {
769 			/*
770 			 * An executable with no interpreter, this must be a
771 			 * statically linked executable, which means we loaded
772 			 * it at the address specified in the elf header, in
773 			 * which case the e_entry field of the elf header is an
774 			 * absolute address.
775 			 */
776 			sedp->sed_ldentry = ehdr.e_entry;
777 			sedp->sed_entry = ehdr.e_entry;
778 			sedp->sed_lddata = NULL;
779 			sedp->sed_base = NULL;
780 		} else {
781 			/*
782 			 * A shared object with no interpreter, we use the
783 			 * calculated address from above.
784 			 */
785 			sedp->sed_ldentry = sedp->sed_entry;
786 			sedp->sed_entry = NULL;
787 			sedp->sed_phdr = NULL;
788 			sedp->sed_phent = NULL;
789 			sedp->sed_phnum = NULL;
790 			sedp->sed_lddata = NULL;
791 			sedp->sed_base = voffset;
792 
793 			if (ehdr.e_type == ET_DYN) {
794 				/*
795 				 * Delay setting the brkbase until the first
796 				 * call to brk(); see elfexec() for details.
797 				 */
798 				env.ex_bssbase = (caddr_t)0;
799 				env.ex_brkbase = (caddr_t)0;
800 				env.ex_brksize = 0;
801 			}
802 		}
803 	}
804 
805 	env.ex_magic = elfmagic;
806 	env.ex_vp = vp;
807 	setexecenv(&env);
808 
809 	/*
810 	 * It's time to manipulate the process aux vectors.  First
811 	 * we need to update the AT_SUN_AUXFLAGS aux vector to set
812 	 * the AF_SUN_NOPLM flag.
813 	 */
814 	if (args->to_model == DATAMODEL_NATIVE) {
815 		auxv_t		auxflags_auxv;
816 
817 		if (copyin(args->auxp_auxflags, &auxflags_auxv,
818 		    sizeof (auxflags_auxv)) != 0)
819 			return (EFAULT);
820 
821 		ASSERT(auxflags_auxv.a_type == AT_SUN_AUXFLAGS);
822 		auxflags_auxv.a_un.a_val |= AF_SUN_NOPLM;
823 		if (copyout(&auxflags_auxv, args->auxp_auxflags,
824 		    sizeof (auxflags_auxv)) != 0)
825 			return (EFAULT);
826 #if defined(_LP64)
827 	} else {
828 		auxv32_t	auxflags_auxv32;
829 
830 		if (copyin(args->auxp_auxflags, &auxflags_auxv32,
831 		    sizeof (auxflags_auxv32)) != 0)
832 			return (EFAULT);
833 
834 		ASSERT(auxflags_auxv32.a_type == AT_SUN_AUXFLAGS);
835 		auxflags_auxv32.a_un.a_val |= AF_SUN_NOPLM;
836 		if (copyout(&auxflags_auxv32, args->auxp_auxflags,
837 		    sizeof (auxflags_auxv32)) != 0)
838 			return (EFAULT);
839 #endif /* _LP64 */
840 	}
841 
842 	/* Second, copy out the brand specific aux vectors. */
843 	if (args->to_model == DATAMODEL_NATIVE) {
844 		auxv_t s10_auxv[] = {
845 		    { AT_SUN_BRAND_AUX1, 0 },
846 		    { AT_SUN_BRAND_AUX2, 0 },
847 		    { AT_SUN_BRAND_AUX3, 0 }
848 		};
849 
850 		ASSERT(s10_auxv[0].a_type == AT_SUN_BRAND_S10_LDDATA);
851 		s10_auxv[0].a_un.a_val = sed.sed_lddata;
852 
853 		if (copyout(&s10_auxv, args->auxp_brand,
854 		    sizeof (s10_auxv)) != 0)
855 			return (EFAULT);
856 #if defined(_LP64)
857 	} else {
858 		auxv32_t s10_auxv32[] = {
859 		    { AT_SUN_BRAND_AUX1, 0 },
860 		    { AT_SUN_BRAND_AUX2, 0 },
861 		    { AT_SUN_BRAND_AUX3, 0 }
862 		};
863 
864 		ASSERT(s10_auxv32[0].a_type == AT_SUN_BRAND_S10_LDDATA);
865 		s10_auxv32[0].a_un.a_val = (uint32_t)sed.sed_lddata;
866 		if (copyout(&s10_auxv32, args->auxp_brand,
867 		    sizeof (s10_auxv32)) != 0)
868 			return (EFAULT);
869 #endif /* _LP64 */
870 	}
871 
872 	/*
873 	 * Third, the the /proc aux vectors set up by elfexec() point to brand
874 	 * emulation library and it's linker.  Copy these to the /proc brand
875 	 * specific aux vector, and update the regular /proc aux vectors to
876 	 * point to the executable (and it's linker).  This will enable
877 	 * debuggers to access the executable via the usual /proc or elf notes
878 	 * aux vectors.
879 	 *
880 	 * The brand emulation library's linker will get it's aux vectors off
881 	 * the stack, and then update the stack with the executable's aux
882 	 * vectors before jumping to the executable's linker.
883 	 *
884 	 * Debugging the brand emulation library must be done from
885 	 * the global zone, where the librtld_db module knows how to fetch the
886 	 * brand specific aux vectors to access the brand emulation libraries
887 	 * linker.
888 	 */
889 	for (i = 0; i < __KERN_NAUXV_IMPL; i++) {
890 		ulong_t val;
891 
892 		switch (up->u_auxv[i].a_type) {
893 		case AT_SUN_BRAND_S10_LDDATA:
894 			up->u_auxv[i].a_un.a_val = sed.sed_lddata;
895 			continue;
896 		case AT_BASE:
897 			val = sedp->sed_base;
898 			break;
899 		case AT_ENTRY:
900 			val = sedp->sed_entry;
901 			break;
902 		case AT_PHDR:
903 			val = sedp->sed_phdr;
904 			break;
905 		case AT_PHENT:
906 			val = sedp->sed_phent;
907 			break;
908 		case AT_PHNUM:
909 			val = sedp->sed_phnum;
910 			break;
911 		case AT_SUN_LDDATA:
912 			val = sedp->sed_lddata;
913 			break;
914 		default:
915 			continue;
916 		}
917 
918 		up->u_auxv[i].a_un.a_val = val;
919 		if (val == NULL) {
920 			/* Hide the entry for static binaries */
921 			up->u_auxv[i].a_type = AT_IGNORE;
922 		}
923 	}
924 
925 	/*
926 	 * The last thing we do here is clear spd->spd_handler.  This is
927 	 * important because if we're already a branded process and if this
928 	 * exec succeeds, there is a window between when the exec() first
929 	 * returns to the userland of the new process and when our brand
930 	 * library get's initialized, during which we don't want system
931 	 * calls to be re-directed to our brand library since it hasn't
932 	 * been initialized yet.
933 	 */
934 	spd->spd_handler = NULL;
935 
936 	return (0);
937 }
938 
939 
940 int
941 _init(void)
942 {
943 	int err;
944 
945 	/*
946 	 * Set up the table indicating which system calls we want to
947 	 * interpose on.  We should probably build this automatically from
948 	 * a list of system calls that is shared with the user-space
949 	 * library.
950 	 */
951 	s10_emulation_table = kmem_zalloc(NSYSCALL, KM_SLEEP);
952 	s10_emulation_table[S10_SYS_forkall] = 1;		/*   2 */
953 	s10_emulation_table[S10_SYS_open] = 1;			/*   5 */
954 	s10_emulation_table[S10_SYS_wait] = 1;			/*   7 */
955 	s10_emulation_table[S10_SYS_creat] = 1;			/*   8 */
956 	s10_emulation_table[S10_SYS_unlink] = 1;		/*  10 */
957 	s10_emulation_table[S10_SYS_exec] = 1;			/*  11 */
958 	s10_emulation_table[S10_SYS_chown] = 1;			/*  16 */
959 	s10_emulation_table[S10_SYS_stat] = 1;			/*  18 */
960 	s10_emulation_table[S10_SYS_umount] = 1;		/*  22 */
961 	s10_emulation_table[S10_SYS_fstat] = 1;			/*  28 */
962 	s10_emulation_table[S10_SYS_utime] = 1;			/*  30 */
963 	s10_emulation_table[S10_SYS_access] = 1;		/*  33 */
964 	s10_emulation_table[S10_SYS_dup] = 1;			/*  41 */
965 	s10_emulation_table[SYS_ioctl] = 1;			/*  54 */
966 	s10_emulation_table[SYS_execve] = 1;			/*  59 */
967 	s10_emulation_table[SYS_acctctl] = 1;			/*  71 */
968 	s10_emulation_table[S10_SYS_issetugid] = 1;		/*  75 */
969 	s10_emulation_table[S10_SYS_fsat] = 1;			/*  76 */
970 	s10_emulation_table[S10_SYS_rmdir] = 1;			/*  79 */
971 	s10_emulation_table[SYS_getdents] = 1;			/*  81 */
972 	s10_emulation_table[S10_SYS_poll] = 1;			/*  87 */
973 	s10_emulation_table[S10_SYS_lstat] = 1;			/*  88 */
974 	s10_emulation_table[S10_SYS_fchown] = 1;		/*  94 */
975 #if defined(__x86)
976 	s10_emulation_table[S10_SYS_xstat] = 1;			/* 123 */
977 	s10_emulation_table[S10_SYS_lxstat] = 1;		/* 124 */
978 	s10_emulation_table[S10_SYS_fxstat] = 1;		/* 125 */
979 	s10_emulation_table[S10_SYS_xmknod] = 1;		/* 126 */
980 #endif
981 	s10_emulation_table[S10_SYS_lchown] = 1;		/* 130 */
982 	s10_emulation_table[S10_SYS_rename] = 1;		/* 134 */
983 	s10_emulation_table[SYS_uname] = 1;			/* 135 */
984 	s10_emulation_table[SYS_systeminfo] = 1;		/* 139 */
985 	s10_emulation_table[S10_SYS_fork1] = 1;			/* 143 */
986 	s10_emulation_table[S10_SYS_lwp_sema_wait] = 1;		/* 147 */
987 	s10_emulation_table[S10_SYS_utimes] = 1;		/* 154 */
988 #if defined(__amd64)
989 	s10_emulation_table[SYS_lwp_create] = 1;		/* 159 */
990 	s10_emulation_table[SYS_lwp_private] = 1;		/* 166 */
991 #endif	/* __amd64 */
992 	s10_emulation_table[S10_SYS_lwp_mutex_lock] = 1;	/* 169 */
993 	s10_emulation_table[SYS_pwrite] = 1;			/* 174 */
994 	s10_emulation_table[SYS_auditsys] = 1;			/* 186 */
995 	s10_emulation_table[SYS_sigqueue] = 1;			/* 190 */
996 	s10_emulation_table[SYS_lwp_mutex_timedlock] = 1;	/* 210 */
997 	s10_emulation_table[SYS_getdents64] = 1;		/* 213 */
998 	s10_emulation_table[S10_SYS_stat64] = 1;		/* 215 */
999 	s10_emulation_table[S10_SYS_lstat64] = 1;		/* 216 */
1000 	s10_emulation_table[S10_SYS_fstat64] = 1;		/* 217 */
1001 	s10_emulation_table[SYS_pwrite64] = 1;			/* 223 */
1002 	s10_emulation_table[S10_SYS_creat64] = 1;		/* 224 */
1003 	s10_emulation_table[S10_SYS_open64] = 1;		/* 225 */
1004 	s10_emulation_table[SYS_zone] = 1;			/* 227 */
1005 	s10_emulation_table[SYS_lwp_mutex_trylock] = 1;		/* 251 */
1006 
1007 	err = mod_install(&modlinkage);
1008 	if (err) {
1009 		cmn_err(CE_WARN, "Couldn't install brand module");
1010 		kmem_free(s10_emulation_table, NSYSCALL);
1011 	}
1012 
1013 	return (err);
1014 }
1015 
1016 int
1017 _info(struct modinfo *modinfop)
1018 {
1019 	return (mod_info(&modlinkage, modinfop));
1020 }
1021 
1022 int
1023 _fini(void)
1024 {
1025 	int err;
1026 
1027 	/*
1028 	 * If there are any zones using this brand, we can't allow it to be
1029 	 * unloaded.
1030 	 */
1031 	if (brand_zone_count(&s10_brand))
1032 		return (EBUSY);
1033 
1034 	kmem_free(s10_emulation_table, NSYSCALL);
1035 	s10_emulation_table = NULL;
1036 
1037 	err = mod_remove(&modlinkage);
1038 	if (err)
1039 		cmn_err(CE_WARN, "Couldn't unload s10 brand module");
1040 
1041 	return (err);
1042 }
1043