1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <sys/errno.h>
28 #include <sys/exec.h>
29 #include <sys/file.h>
30 #include <sys/kmem.h>
31 #include <sys/modctl.h>
32 #include <sys/model.h>
33 #include <sys/proc.h>
34 #include <sys/syscall.h>
35 #include <sys/systm.h>
36 #include <sys/thread.h>
37 #include <sys/cmn_err.h>
38 #include <sys/archsystm.h>
39 #include <sys/pathname.h>
40 #include <sys/sunddi.h>
41 
42 #include <sys/machbrand.h>
43 #include <sys/brand.h>
44 #include "s10_brand.h"
45 
46 char *s10_emulation_table = NULL;
47 
48 void	s10_init_brand_data(zone_t *);
49 void	s10_free_brand_data(zone_t *);
50 void	s10_setbrand(proc_t *);
51 int	s10_getattr(zone_t *, int, void *, size_t *);
52 int	s10_setattr(zone_t *, int, void *, size_t);
53 int	s10_brandsys(int, int64_t *, uintptr_t, uintptr_t, uintptr_t,
54 		uintptr_t, uintptr_t, uintptr_t);
55 void	s10_copy_procdata(proc_t *, proc_t *);
56 void	s10_proc_exit(struct proc *, klwp_t *);
57 void	s10_exec();
58 int	s10_initlwp(klwp_t *);
59 void	s10_forklwp(klwp_t *, klwp_t *);
60 void	s10_freelwp(klwp_t *);
61 void	s10_lwpexit(klwp_t *);
62 int	s10_elfexec(vnode_t *, execa_t *, uarg_t *, intpdata_t *, int,
63 	long *, int, caddr_t, cred_t *, int);
64 
65 /* s10 brand */
66 struct brand_ops s10_brops = {
67 	s10_init_brand_data,
68 	s10_free_brand_data,
69 	s10_brandsys,
70 	s10_setbrand,
71 	s10_getattr,
72 	s10_setattr,
73 	s10_copy_procdata,
74 	s10_proc_exit,
75 	s10_exec,
76 	lwp_setrval,
77 	s10_initlwp,
78 	s10_forklwp,
79 	s10_freelwp,
80 	s10_lwpexit,
81 	s10_elfexec,
82 	S10_NSIG
83 };
84 
85 #ifdef	sparc
86 
87 struct brand_mach_ops s10_mops = {
88 	s10_brand_syscall_callback,
89 	s10_brand_syscall32_callback
90 };
91 
92 #else	/* sparc */
93 
94 #ifdef	__amd64
95 
96 struct brand_mach_ops s10_mops = {
97 	s10_brand_sysenter_callback,
98 	NULL,
99 	s10_brand_int91_callback,
100 	s10_brand_syscall_callback,
101 	s10_brand_syscall32_callback,
102 	NULL
103 };
104 
105 #else	/* ! __amd64 */
106 
107 struct brand_mach_ops s10_mops = {
108 	s10_brand_sysenter_callback,
109 	NULL,
110 	NULL,
111 	s10_brand_syscall_callback,
112 	NULL,
113 	NULL
114 };
115 #endif	/* __amd64 */
116 
117 #endif	/* _sparc */
118 
119 struct brand	s10_brand = {
120 	BRAND_VER_1,
121 	"solaris10",
122 	&s10_brops,
123 	&s10_mops
124 };
125 
126 static struct modlbrand modlbrand = {
127 	&mod_brandops,		/* type of module */
128 	"Solaris 10 Brand",	/* description of module */
129 	&s10_brand		/* driver ops */
130 };
131 
132 static struct modlinkage modlinkage = {
133 	MODREV_1, (void *)&modlbrand, NULL
134 };
135 
136 void
137 s10_setbrand(proc_t *p)
138 {
139 	ASSERT(p->p_brand == &s10_brand);
140 	ASSERT(p->p_brand_data == NULL);
141 
142 	/*
143 	 * We should only be called from exec(), when we know the process
144 	 * is single-threaded.
145 	 */
146 	ASSERT(p->p_tlist == p->p_tlist->t_forw);
147 
148 	p->p_brand_data = kmem_zalloc(sizeof (s10_proc_data_t), KM_SLEEP);
149 	(void) s10_initlwp(p->p_tlist->t_lwp);
150 }
151 
152 /*ARGSUSED*/
153 int
154 s10_getattr(zone_t *zone, int attr, void *buf, size_t *bufsize)
155 {
156 	ASSERT(zone->zone_brand == &s10_brand);
157 	if (attr == S10_EMUL_BITMAP) {
158 		if (buf == NULL || *bufsize != sizeof (s10_emul_bitmap_t))
159 			return (EINVAL);
160 		if (copyout(((s10_zone_data_t *)zone->zone_brand_data)->
161 		    emul_bitmap, buf, sizeof (s10_emul_bitmap_t)) != 0)
162 			return (EFAULT);
163 		return (0);
164 	}
165 
166 	return (EINVAL);
167 }
168 
169 int
170 s10_setattr(zone_t *zone, int attr, void *buf, size_t bufsize)
171 {
172 	ASSERT(zone->zone_brand == &s10_brand);
173 	if (attr == S10_EMUL_BITMAP) {
174 		if (buf == NULL || bufsize != sizeof (s10_emul_bitmap_t))
175 			return (EINVAL);
176 		if (copyin(buf, ((s10_zone_data_t *)zone->zone_brand_data)->
177 		    emul_bitmap, sizeof (s10_emul_bitmap_t)) != 0)
178 			return (EFAULT);
179 		return (0);
180 	}
181 
182 	return (EINVAL);
183 }
184 
185 #ifdef	__amd64
186 /*
187  * The Nevada kernel clears %fs for threads in 64-bit x86 processes but S10's
188  * libc expects %fs to be nonzero.  This causes some committed
189  * libc/libthread interfaces (e.g., thr_main()) to fail, which impacts several
190  * libraries, including libdoor.  This function sets the specified LWP's %fs
191  * register to the legacy S10 selector value (LWPFS_SEL).
192  *
193  * The best solution to the aforementioned problem is backporting CRs
194  * 6467491 to Solaris 10 so that 64-bit x86 Solaris 10 processes
195  * would accept zero for %fs.  Backporting the CRs is a requirement for running
196  * S10 Containers in PV domUs because 64-bit Xen clears %fsbase when %fs is
197  * nonzero.  Such behavior breaks 64-bit processes because Xen has to fetch the
198  * FS segments' base addresses from the LWPs' GDTs, which are only capable of
199  * 32-bit addressing.
200  */
201 /*ARGSUSED*/
202 static void
203 s10_amd64_correct_fsreg(klwp_t *l)
204 {
205 	if (lwp_getdatamodel(l) == DATAMODEL_NATIVE) {
206 		kpreempt_disable();
207 		l->lwp_pcb.pcb_fs = LWPFS_SEL;
208 		l->lwp_pcb.pcb_rupdate = 1;
209 		lwptot(l)->t_post_sys = 1;	/* Guarantee update_sregs() */
210 		kpreempt_enable();
211 	}
212 }
213 #endif	/* __amd64 */
214 
215 int
216 s10_native()
217 {
218 	struct user	*up = PTOU(curproc);
219 	char		*args_new, *comm_new, *p;
220 	int		len;
221 
222 	len = sizeof (S10_NATIVE_LINKER32 " ") - 1;
223 
224 	/*
225 	 * Make sure that the process' interpreter is the native dynamic linker.
226 	 * Convention dictates that native processes executing within solaris10-
227 	 * branded zones are interpreted by the native dynamic linker (the
228 	 * process and its arguments are specified as arguments to the dynamic
229 	 * linker).  If this convention is violated (i.e.,
230 	 * brandsys(B_S10_NATIVE, ...) is invoked by a process that shouldn't be
231 	 * native), then do nothing and silently indicate success.
232 	 */
233 	if (strcmp(up->u_comm, S10_LINKER_NAME) != 0)
234 		return (0);
235 	if (strncmp(up->u_psargs, S10_NATIVE_LINKER64 " /", len + 4) == 0)
236 		len += 3;		/* to account for "/64" in the path */
237 	else if (strncmp(up->u_psargs, S10_NATIVE_LINKER32 " /", len + 1) != 0)
238 		return (0);
239 
240 	args_new = strdup(&up->u_psargs[len]);
241 	if ((p = strchr(args_new, ' ')) != NULL)
242 		*p = '\0';
243 	if ((comm_new = strrchr(args_new, '/')) != NULL)
244 		comm_new = strdup(comm_new + 1);
245 	else
246 		comm_new = strdup(args_new);
247 	if (p != NULL)
248 		*p = ' ';
249 
250 	if ((strlen(args_new) != 0) && (strlen(comm_new) != 0)) {
251 		mutex_enter(&curproc->p_lock);
252 		(void) strlcpy(up->u_comm, comm_new, MAXCOMLEN+1);
253 		(void) strlcpy(up->u_psargs, args_new, PSARGSZ);
254 		mutex_exit(&curproc->p_lock);
255 	}
256 
257 	strfree(args_new);
258 	strfree(comm_new);
259 	return (0);
260 }
261 
262 /*
263  * Get the address of the user-space system call handler from the user
264  * process and attach it to the proc structure.
265  */
266 /*ARGSUSED*/
267 int
268 s10_brandsys(int cmd, int64_t *rval, uintptr_t arg1, uintptr_t arg2,
269     uintptr_t arg3, uintptr_t arg4, uintptr_t arg5, uintptr_t arg6)
270 {
271 	s10_proc_data_t	*spd;
272 	s10_brand_reg_t	reg;
273 	proc_t		*p = curproc;
274 	int		err;
275 
276 	*rval = 0;
277 
278 	/*
279 	 * B_EXEC_BRAND is redundant
280 	 * since the kernel assumes a native process doing an exec
281 	 * in a branded zone is going to run a branded processes.
282 	 * hence we don't support this operation.
283 	 */
284 	if (cmd == B_EXEC_BRAND)
285 		return (ENOSYS);
286 
287 	if (cmd == B_S10_NATIVE)
288 		return (s10_native());
289 
290 	/* For all other operations this must be a branded process. */
291 	if (p->p_brand == &native_brand)
292 		return (ENOSYS);
293 
294 	ASSERT(p->p_brand == &s10_brand);
295 	ASSERT(p->p_brand_data != NULL);
296 
297 	spd = (s10_proc_data_t *)p->p_brand_data;
298 
299 	switch (cmd) {
300 	case B_EXEC_NATIVE:
301 		err = exec_common(
302 		    (char *)arg1, (const char **)arg2, (const char **)arg3,
303 		    EBA_NATIVE);
304 		return (err);
305 
306 	case B_REGISTER:
307 		if (p->p_model == DATAMODEL_NATIVE) {
308 			if (copyin((void *)arg1, &reg, sizeof (reg)) != 0)
309 				return (EFAULT);
310 #if defined(_LP64)
311 		} else {
312 			s10_brand_reg32_t reg32;
313 
314 			if (copyin((void *)arg1, &reg32, sizeof (reg32)) != 0)
315 				return (EFAULT);
316 			reg.sbr_version = reg32.sbr_version;
317 			reg.sbr_handler = (caddr_t)(uintptr_t)reg32.sbr_handler;
318 #endif /* _LP64 */
319 		}
320 
321 		if (reg.sbr_version != S10_VERSION)
322 			return (ENOTSUP);
323 		spd->spd_handler = reg.sbr_handler;
324 		return (0);
325 
326 	case B_ELFDATA:
327 		if (p->p_model == DATAMODEL_NATIVE) {
328 			if (copyout(&spd->spd_elf_data, (void *)arg1,
329 			    sizeof (s10_elf_data_t)) != 0)
330 				return (EFAULT);
331 #if defined(_LP64)
332 		} else {
333 			s10_elf_data32_t sed32;
334 
335 			sed32.sed_phdr = spd->spd_elf_data.sed_phdr;
336 			sed32.sed_phent = spd->spd_elf_data.sed_phent;
337 			sed32.sed_phnum = spd->spd_elf_data.sed_phnum;
338 			sed32.sed_entry = spd->spd_elf_data.sed_entry;
339 			sed32.sed_base = spd->spd_elf_data.sed_base;
340 			sed32.sed_ldentry = spd->spd_elf_data.sed_ldentry;
341 			sed32.sed_lddata = spd->spd_elf_data.sed_lddata;
342 			if (copyout(&sed32, (void *)arg1, sizeof (sed32)) != 0)
343 				return (EFAULT);
344 #endif /* _LP64 */
345 		}
346 		return (0);
347 
348 	case B_S10_PIDINFO:
349 		/*
350 		 * The s10 brand needs to be able to get the pid of the
351 		 * current process and the pid of the zone's init, and it
352 		 * needs to do this on every process startup.  Early in
353 		 * brand startup, we can't call getpid() because calls to
354 		 * getpid() represent a magical signal to some old-skool
355 		 * debuggers.  By merging all of this into one call, we
356 		 * make this quite a bit cheaper and easier to handle in
357 		 * the brand module.
358 		 */
359 		if (copyout(&p->p_pid, (void *)arg1, sizeof (pid_t)) != 0)
360 			return (EFAULT);
361 		if (copyout(&p->p_zone->zone_proc_initpid, (void *)arg2,
362 		    sizeof (pid_t)) != 0)
363 			return (EFAULT);
364 		return (0);
365 
366 	case B_S10_TRUSS_POINT:
367 		/*
368 		 * This subcommand exists so that we can see truss output
369 		 * from interposed system calls that return without first
370 		 * calling any other system call, meaning they would be
371 		 * invisible to truss(1).
372 		 *
373 		 * If the second argument is set non-zero, set errno to that
374 		 * value as well.
375 		 *
376 		 * Arguments are:
377 		 *
378 		 *    arg1: syscall number
379 		 *    arg2: errno
380 		 */
381 		return ((arg2 == 0) ? 0 : set_errno((uint_t)arg2));
382 
383 	case B_S10_ISFDXATTRDIR: {
384 		/*
385 		 * This subcommand enables the userland brand emulation library
386 		 * to determine whether a file descriptor refers to an extended
387 		 * file attributes directory.  There is no standard syscall or
388 		 * libc function that can make such a determination.
389 		 */
390 		file_t *dir_filep;
391 
392 		dir_filep = getf((int)arg1);
393 		if (dir_filep == NULL)
394 			return (EBADF);
395 		ASSERT(dir_filep->f_vnode != NULL);
396 		*rval = IS_XATTRDIR(dir_filep->f_vnode);
397 		releasef((int)arg1);
398 		return (0);
399 	}
400 
401 #ifdef	__amd64
402 	case B_S10_FSREGCORRECTION:
403 		/*
404 		 * This subcommand exists so that the SYS_lwp_private and
405 		 * SYS_lwp_create syscalls can manually set the current thread's
406 		 * %fs register to the legacy S10 selector value for 64-bit x86
407 		 * processes.
408 		 */
409 		s10_amd64_correct_fsreg(ttolwp(curthread));
410 		return (0);
411 #endif	/* __amd64 */
412 	}
413 
414 	return (EINVAL);
415 }
416 
417 /*
418  * Copy the per-process brand data from a parent proc to a child.
419  */
420 void
421 s10_copy_procdata(proc_t *child, proc_t *parent)
422 {
423 	s10_proc_data_t	*spd;
424 
425 	ASSERT(parent->p_brand == &s10_brand);
426 	ASSERT(child->p_brand == &s10_brand);
427 	ASSERT(parent->p_brand_data != NULL);
428 	ASSERT(child->p_brand_data == NULL);
429 
430 	/* Just duplicate all the proc data of the parent for the child */
431 	spd = kmem_alloc(sizeof (s10_proc_data_t), KM_SLEEP);
432 	bcopy(parent->p_brand_data, spd, sizeof (s10_proc_data_t));
433 	child->p_brand_data = spd;
434 }
435 
436 /*ARGSUSED*/
437 void
438 s10_proc_exit(struct proc *p, klwp_t *l)
439 {
440 	ASSERT(p->p_brand == &s10_brand);
441 	ASSERT(p->p_brand_data != NULL);
442 
443 	/*
444 	 * We should only be called from proc_exit(), when we know that
445 	 * process is single-threaded.
446 	 */
447 	ASSERT(p->p_tlist == p->p_tlist->t_forw);
448 
449 	/* upon exit, free our lwp brand data */
450 	(void) s10_freelwp(ttolwp(curthread));
451 
452 	/* upon exit, free our proc brand data */
453 	kmem_free(p->p_brand_data, sizeof (s10_proc_data_t));
454 	p->p_brand_data = NULL;
455 }
456 
457 void
458 s10_exec()
459 {
460 	s10_proc_data_t	*spd = curproc->p_brand_data;
461 
462 	ASSERT(curproc->p_brand == &s10_brand);
463 	ASSERT(curproc->p_brand_data != NULL);
464 	ASSERT(ttolwp(curthread)->lwp_brand != NULL);
465 
466 	/*
467 	 * We should only be called from exec(), when we know the process
468 	 * is single-threaded.
469 	 */
470 	ASSERT(curproc->p_tlist == curproc->p_tlist->t_forw);
471 
472 	/* Upon exec, reset our lwp brand data. */
473 	(void) s10_freelwp(ttolwp(curthread));
474 	(void) s10_initlwp(ttolwp(curthread));
475 
476 	/*
477 	 * Upon exec, reset all the proc brand data, except for the elf
478 	 * data associated with the executable we are exec'ing.
479 	 */
480 	spd->spd_handler = NULL;
481 }
482 
483 /*ARGSUSED*/
484 int
485 s10_initlwp(klwp_t *l)
486 {
487 	ASSERT(l->lwp_procp->p_brand == &s10_brand);
488 	ASSERT(l->lwp_procp->p_brand_data != NULL);
489 	ASSERT(l->lwp_brand == NULL);
490 	l->lwp_brand = (void *)-1;
491 	return (0);
492 }
493 
494 /*ARGSUSED*/
495 void
496 s10_forklwp(klwp_t *p, klwp_t *c)
497 {
498 	ASSERT(p->lwp_procp->p_brand == &s10_brand);
499 	ASSERT(c->lwp_procp->p_brand == &s10_brand);
500 
501 	ASSERT(p->lwp_procp->p_brand_data != NULL);
502 	ASSERT(c->lwp_procp->p_brand_data != NULL);
503 
504 	/* Both LWPs have already had been initialized via s10_initlwp() */
505 	ASSERT(p->lwp_brand != NULL);
506 	ASSERT(c->lwp_brand != NULL);
507 
508 #ifdef	__amd64
509 	/*
510 	 * Only correct the child's %fs register if the parent's %fs register
511 	 * is LWPFS_SEL.  If the parent's %fs register is zero, then the Solaris
512 	 * 10 environment that we're emulating uses a version of libc that
513 	 * works when %fs is zero (i.e., it contains backports of CRs 6467491
514 	 * and 6501650).
515 	 */
516 	if (p->lwp_pcb.pcb_fs == LWPFS_SEL)
517 		s10_amd64_correct_fsreg(c);
518 #endif	/* __amd64 */
519 }
520 
521 /*ARGSUSED*/
522 void
523 s10_freelwp(klwp_t *l)
524 {
525 	ASSERT(l->lwp_procp->p_brand == &s10_brand);
526 	ASSERT(l->lwp_procp->p_brand_data != NULL);
527 	ASSERT(l->lwp_brand != NULL);
528 	l->lwp_brand = NULL;
529 }
530 
531 /*ARGSUSED*/
532 void
533 s10_lwpexit(klwp_t *l)
534 {
535 	ASSERT(l->lwp_procp->p_brand == &s10_brand);
536 	ASSERT(l->lwp_procp->p_brand_data != NULL);
537 	ASSERT(l->lwp_brand != NULL);
538 
539 	/*
540 	 * We should never be called for the last thread in a process.
541 	 * (That case is handled by s10_proc_exit().)  There for this lwp
542 	 * must be exiting from a multi-threaded process.
543 	 */
544 	ASSERT(l->lwp_procp->p_tlist != l->lwp_procp->p_tlist->t_forw);
545 
546 	l->lwp_brand = NULL;
547 }
548 
549 void
550 s10_free_brand_data(zone_t *zone)
551 {
552 	kmem_free(zone->zone_brand_data, sizeof (s10_zone_data_t));
553 }
554 
555 void
556 s10_init_brand_data(zone_t *zone)
557 {
558 	ASSERT(zone->zone_brand == &s10_brand);
559 	ASSERT(zone->zone_brand_data == NULL);
560 	zone->zone_brand_data = kmem_zalloc(sizeof (s10_zone_data_t), KM_SLEEP);
561 }
562 
563 #if defined(_LP64)
564 static void
565 Ehdr32to64(Elf32_Ehdr *src, Ehdr *dst)
566 {
567 	bcopy(src->e_ident, dst->e_ident, sizeof (src->e_ident));
568 	dst->e_type =		src->e_type;
569 	dst->e_machine =	src->e_machine;
570 	dst->e_version =	src->e_version;
571 	dst->e_entry =		src->e_entry;
572 	dst->e_phoff =		src->e_phoff;
573 	dst->e_shoff =		src->e_shoff;
574 	dst->e_flags =		src->e_flags;
575 	dst->e_ehsize =		src->e_ehsize;
576 	dst->e_phentsize =	src->e_phentsize;
577 	dst->e_phnum =		src->e_phnum;
578 	dst->e_shentsize =	src->e_shentsize;
579 	dst->e_shnum =		src->e_shnum;
580 	dst->e_shstrndx =	src->e_shstrndx;
581 }
582 #endif /* _LP64 */
583 
584 int
585 s10_elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap,
586 	int level, long *execsz, int setid, caddr_t exec_file, cred_t *cred,
587 	int brand_action)
588 {
589 	vnode_t		*nvp;
590 	Ehdr		ehdr;
591 	Addr		uphdr_vaddr;
592 	intptr_t	voffset;
593 	int		interp;
594 	int		i, err;
595 	struct execenv	env;
596 	struct user	*up = PTOU(curproc);
597 	s10_proc_data_t	*spd;
598 	s10_elf_data_t	sed, *sedp;
599 	char		*linker;
600 	uintptr_t	lddata; /* lddata of executable's linker */
601 
602 	ASSERT(curproc->p_brand == &s10_brand);
603 	ASSERT(curproc->p_brand_data != NULL);
604 
605 	spd = (s10_proc_data_t *)curproc->p_brand_data;
606 	sedp = &spd->spd_elf_data;
607 
608 	args->brandname = S10_BRANDNAME;
609 
610 	/*
611 	 * We will exec the brand library and then map in the target
612 	 * application and (optionally) the brand's default linker.
613 	 */
614 	if (args->to_model == DATAMODEL_NATIVE) {
615 		args->emulator = S10_LIB;
616 		linker = S10_LINKER;
617 #if defined(_LP64)
618 	} else {
619 		args->emulator = S10_LIB32;
620 		linker = S10_LINKER32;
621 #endif /* _LP64 */
622 	}
623 
624 	if ((err = lookupname(args->emulator, UIO_SYSSPACE, FOLLOW, NULLVPP,
625 	    &nvp)) != 0) {
626 		uprintf("%s: not found.", args->emulator);
627 		return (err);
628 	}
629 
630 	if (args->to_model == DATAMODEL_NATIVE) {
631 		err = elfexec(nvp, uap, args, idatap, level + 1, execsz,
632 		    setid, exec_file, cred, brand_action);
633 #if defined(_LP64)
634 	} else {
635 		err = elf32exec(nvp, uap, args, idatap, level + 1, execsz,
636 		    setid, exec_file, cred, brand_action);
637 #endif /* _LP64 */
638 	}
639 	VN_RELE(nvp);
640 	if (err != 0)
641 		return (err);
642 
643 	/*
644 	 * The u_auxv vectors are set up by elfexec to point to the brand
645 	 * emulation library and linker.  Save these so they can be copied to
646 	 * the specific brand aux vectors.
647 	 */
648 	bzero(&sed, sizeof (sed));
649 	for (i = 0; i < __KERN_NAUXV_IMPL; i++) {
650 		switch (up->u_auxv[i].a_type) {
651 		case AT_SUN_LDDATA:
652 			sed.sed_lddata = up->u_auxv[i].a_un.a_val;
653 			break;
654 		case AT_BASE:
655 			sed.sed_base = up->u_auxv[i].a_un.a_val;
656 			break;
657 		case AT_ENTRY:
658 			sed.sed_entry = up->u_auxv[i].a_un.a_val;
659 			break;
660 		case AT_PHDR:
661 			sed.sed_phdr = up->u_auxv[i].a_un.a_val;
662 			break;
663 		case AT_PHENT:
664 			sed.sed_phent = up->u_auxv[i].a_un.a_val;
665 			break;
666 		case AT_PHNUM:
667 			sed.sed_phnum = up->u_auxv[i].a_un.a_val;
668 			break;
669 		default:
670 			break;
671 		}
672 	}
673 	/* Make sure the emulator has an entry point */
674 	ASSERT(sed.sed_entry != NULL);
675 	ASSERT(sed.sed_phdr != NULL);
676 
677 	bzero(&env, sizeof (env));
678 	if (args->to_model == DATAMODEL_NATIVE) {
679 		err = mapexec_brand(vp, args, &ehdr, &uphdr_vaddr, &voffset,
680 		    exec_file, &interp, &env.ex_bssbase, &env.ex_brkbase,
681 		    &env.ex_brksize, NULL);
682 #if defined(_LP64)
683 	} else {
684 		Elf32_Ehdr ehdr32;
685 		Elf32_Addr uphdr_vaddr32;
686 		err = mapexec32_brand(vp, args, &ehdr32, &uphdr_vaddr32,
687 		    &voffset, exec_file, &interp, &env.ex_bssbase,
688 		    &env.ex_brkbase, &env.ex_brksize, NULL);
689 		Ehdr32to64(&ehdr32, &ehdr);
690 		if (uphdr_vaddr32 == (Elf32_Addr)-1)
691 			uphdr_vaddr = (Addr)-1;
692 		else
693 			uphdr_vaddr = uphdr_vaddr32;
694 #endif /* _LP64 */
695 	}
696 	if (err != 0)
697 		return (err);
698 
699 	/*
700 	 * Save off the important properties of the executable. The brand
701 	 * library will ask us for this data later, when it is initializing
702 	 * and getting ready to transfer control to the brand application.
703 	 */
704 	if (uphdr_vaddr == (Addr)-1)
705 		sedp->sed_phdr = voffset + ehdr.e_phoff;
706 	else
707 		sedp->sed_phdr = voffset + uphdr_vaddr;
708 	sedp->sed_entry = voffset + ehdr.e_entry;
709 	sedp->sed_phent = ehdr.e_phentsize;
710 	sedp->sed_phnum = ehdr.e_phnum;
711 
712 	if (interp) {
713 		if (ehdr.e_type == ET_DYN) {
714 			/*
715 			 * This is a shared object executable, so we need to
716 			 * pick a reasonable place to put the heap. Just don't
717 			 * use the first page.
718 			 */
719 			env.ex_brkbase = (caddr_t)PAGESIZE;
720 			env.ex_bssbase = (caddr_t)PAGESIZE;
721 		}
722 
723 		/*
724 		 * If the program needs an interpreter (most do), map it in and
725 		 * store relevant information about it in the aux vector, where
726 		 * the brand library can find it.
727 		 */
728 		if ((err = lookupname(linker, UIO_SYSSPACE,
729 		    FOLLOW, NULLVPP, &nvp)) != 0) {
730 			uprintf("%s: not found.", S10_LINKER);
731 			return (err);
732 		}
733 		if (args->to_model == DATAMODEL_NATIVE) {
734 			err = mapexec_brand(nvp, args, &ehdr,
735 			    &uphdr_vaddr, &voffset, exec_file, &interp,
736 			    NULL, NULL, NULL, &lddata);
737 #if defined(_LP64)
738 		} else {
739 			Elf32_Ehdr ehdr32;
740 			Elf32_Addr uphdr_vaddr32;
741 			err = mapexec32_brand(nvp, args, &ehdr32,
742 			    &uphdr_vaddr32, &voffset, exec_file, &interp,
743 			    NULL, NULL, NULL, &lddata);
744 			Ehdr32to64(&ehdr32, &ehdr);
745 			if (uphdr_vaddr32 == (Elf32_Addr)-1)
746 				uphdr_vaddr = (Addr)-1;
747 			else
748 				uphdr_vaddr = uphdr_vaddr32;
749 #endif /* _LP64 */
750 		}
751 		VN_RELE(nvp);
752 		if (err != 0)
753 			return (err);
754 
755 		/*
756 		 * Now that we know the base address of the brand's linker,
757 		 * place it in the aux vector.
758 		 */
759 		sedp->sed_base = voffset;
760 		sedp->sed_ldentry = voffset + ehdr.e_entry;
761 		sedp->sed_lddata = voffset + lddata;
762 	} else {
763 		/*
764 		 * This program has no interpreter. The brand library will
765 		 * jump to the address in the AT_SUN_BRAND_LDENTRY aux vector,
766 		 * so in this case, put the entry point of the main executable
767 		 * there.
768 		 */
769 		if (ehdr.e_type == ET_EXEC) {
770 			/*
771 			 * An executable with no interpreter, this must be a
772 			 * statically linked executable, which means we loaded
773 			 * it at the address specified in the elf header, in
774 			 * which case the e_entry field of the elf header is an
775 			 * absolute address.
776 			 */
777 			sedp->sed_ldentry = ehdr.e_entry;
778 			sedp->sed_entry = ehdr.e_entry;
779 			sedp->sed_lddata = NULL;
780 			sedp->sed_base = NULL;
781 		} else {
782 			/*
783 			 * A shared object with no interpreter, we use the
784 			 * calculated address from above.
785 			 */
786 			sedp->sed_ldentry = sedp->sed_entry;
787 			sedp->sed_entry = NULL;
788 			sedp->sed_phdr = NULL;
789 			sedp->sed_phent = NULL;
790 			sedp->sed_phnum = NULL;
791 			sedp->sed_lddata = NULL;
792 			sedp->sed_base = voffset;
793 
794 			if (ehdr.e_type == ET_DYN) {
795 				/*
796 				 * Delay setting the brkbase until the first
797 				 * call to brk(); see elfexec() for details.
798 				 */
799 				env.ex_bssbase = (caddr_t)0;
800 				env.ex_brkbase = (caddr_t)0;
801 				env.ex_brksize = 0;
802 			}
803 		}
804 	}
805 
806 	env.ex_magic = elfmagic;
807 	env.ex_vp = vp;
808 	setexecenv(&env);
809 
810 	/*
811 	 * It's time to manipulate the process aux vectors.  First
812 	 * we need to update the AT_SUN_AUXFLAGS aux vector to set
813 	 * the AF_SUN_NOPLM flag.
814 	 */
815 	if (args->to_model == DATAMODEL_NATIVE) {
816 		auxv_t		auxflags_auxv;
817 
818 		if (copyin(args->auxp_auxflags, &auxflags_auxv,
819 		    sizeof (auxflags_auxv)) != 0)
820 			return (EFAULT);
821 
822 		ASSERT(auxflags_auxv.a_type == AT_SUN_AUXFLAGS);
823 		auxflags_auxv.a_un.a_val |= AF_SUN_NOPLM;
824 		if (copyout(&auxflags_auxv, args->auxp_auxflags,
825 		    sizeof (auxflags_auxv)) != 0)
826 			return (EFAULT);
827 #if defined(_LP64)
828 	} else {
829 		auxv32_t	auxflags_auxv32;
830 
831 		if (copyin(args->auxp_auxflags, &auxflags_auxv32,
832 		    sizeof (auxflags_auxv32)) != 0)
833 			return (EFAULT);
834 
835 		ASSERT(auxflags_auxv32.a_type == AT_SUN_AUXFLAGS);
836 		auxflags_auxv32.a_un.a_val |= AF_SUN_NOPLM;
837 		if (copyout(&auxflags_auxv32, args->auxp_auxflags,
838 		    sizeof (auxflags_auxv32)) != 0)
839 			return (EFAULT);
840 #endif /* _LP64 */
841 	}
842 
843 	/* Second, copy out the brand specific aux vectors. */
844 	if (args->to_model == DATAMODEL_NATIVE) {
845 		auxv_t s10_auxv[] = {
846 		    { AT_SUN_BRAND_AUX1, 0 },
847 		    { AT_SUN_BRAND_AUX2, 0 },
848 		    { AT_SUN_BRAND_AUX3, 0 }
849 		};
850 
851 		ASSERT(s10_auxv[0].a_type == AT_SUN_BRAND_S10_LDDATA);
852 		s10_auxv[0].a_un.a_val = sed.sed_lddata;
853 
854 		if (copyout(&s10_auxv, args->auxp_brand,
855 		    sizeof (s10_auxv)) != 0)
856 			return (EFAULT);
857 #if defined(_LP64)
858 	} else {
859 		auxv32_t s10_auxv32[] = {
860 		    { AT_SUN_BRAND_AUX1, 0 },
861 		    { AT_SUN_BRAND_AUX2, 0 },
862 		    { AT_SUN_BRAND_AUX3, 0 }
863 		};
864 
865 		ASSERT(s10_auxv32[0].a_type == AT_SUN_BRAND_S10_LDDATA);
866 		s10_auxv32[0].a_un.a_val = (uint32_t)sed.sed_lddata;
867 		if (copyout(&s10_auxv32, args->auxp_brand,
868 		    sizeof (s10_auxv32)) != 0)
869 			return (EFAULT);
870 #endif /* _LP64 */
871 	}
872 
873 	/*
874 	 * Third, the the /proc aux vectors set up by elfexec() point to brand
875 	 * emulation library and it's linker.  Copy these to the /proc brand
876 	 * specific aux vector, and update the regular /proc aux vectors to
877 	 * point to the executable (and it's linker).  This will enable
878 	 * debuggers to access the executable via the usual /proc or elf notes
879 	 * aux vectors.
880 	 *
881 	 * The brand emulation library's linker will get it's aux vectors off
882 	 * the stack, and then update the stack with the executable's aux
883 	 * vectors before jumping to the executable's linker.
884 	 *
885 	 * Debugging the brand emulation library must be done from
886 	 * the global zone, where the librtld_db module knows how to fetch the
887 	 * brand specific aux vectors to access the brand emulation libraries
888 	 * linker.
889 	 */
890 	for (i = 0; i < __KERN_NAUXV_IMPL; i++) {
891 		ulong_t val;
892 
893 		switch (up->u_auxv[i].a_type) {
894 		case AT_SUN_BRAND_S10_LDDATA:
895 			up->u_auxv[i].a_un.a_val = sed.sed_lddata;
896 			continue;
897 		case AT_BASE:
898 			val = sedp->sed_base;
899 			break;
900 		case AT_ENTRY:
901 			val = sedp->sed_entry;
902 			break;
903 		case AT_PHDR:
904 			val = sedp->sed_phdr;
905 			break;
906 		case AT_PHENT:
907 			val = sedp->sed_phent;
908 			break;
909 		case AT_PHNUM:
910 			val = sedp->sed_phnum;
911 			break;
912 		case AT_SUN_LDDATA:
913 			val = sedp->sed_lddata;
914 			break;
915 		default:
916 			continue;
917 		}
918 
919 		up->u_auxv[i].a_un.a_val = val;
920 		if (val == NULL) {
921 			/* Hide the entry for static binaries */
922 			up->u_auxv[i].a_type = AT_IGNORE;
923 		}
924 	}
925 
926 	/*
927 	 * The last thing we do here is clear spd->spd_handler.  This is
928 	 * important because if we're already a branded process and if this
929 	 * exec succeeds, there is a window between when the exec() first
930 	 * returns to the userland of the new process and when our brand
931 	 * library get's initialized, during which we don't want system
932 	 * calls to be re-directed to our brand library since it hasn't
933 	 * been initialized yet.
934 	 */
935 	spd->spd_handler = NULL;
936 
937 	return (0);
938 }
939 
940 
941 int
942 _init(void)
943 {
944 	int err;
945 
946 	/*
947 	 * Set up the table indicating which system calls we want to
948 	 * interpose on.  We should probably build this automatically from
949 	 * a list of system calls that is shared with the user-space
950 	 * library.
951 	 */
952 	s10_emulation_table = kmem_zalloc(NSYSCALL, KM_SLEEP);
953 	s10_emulation_table[S10_SYS_forkall] = 1;		/*   2 */
954 	s10_emulation_table[S10_SYS_open] = 1;			/*   5 */
955 	s10_emulation_table[S10_SYS_wait] = 1;			/*   7 */
956 	s10_emulation_table[S10_SYS_creat] = 1;			/*   8 */
957 	s10_emulation_table[S10_SYS_unlink] = 1;		/*  10 */
958 	s10_emulation_table[S10_SYS_exec] = 1;			/*  11 */
959 	s10_emulation_table[S10_SYS_chown] = 1;			/*  16 */
960 	s10_emulation_table[S10_SYS_stat] = 1;			/*  18 */
961 	s10_emulation_table[S10_SYS_umount] = 1;		/*  22 */
962 	s10_emulation_table[S10_SYS_fstat] = 1;			/*  28 */
963 	s10_emulation_table[S10_SYS_utime] = 1;			/*  30 */
964 	s10_emulation_table[S10_SYS_access] = 1;		/*  33 */
965 	s10_emulation_table[SYS_kill] = 1;			/*  37 */
966 	s10_emulation_table[S10_SYS_dup] = 1;			/*  41 */
967 	s10_emulation_table[SYS_ioctl] = 1;			/*  54 */
968 	s10_emulation_table[SYS_execve] = 1;			/*  59 */
969 	s10_emulation_table[SYS_acctctl] = 1;			/*  71 */
970 	s10_emulation_table[S10_SYS_issetugid] = 1;		/*  75 */
971 	s10_emulation_table[S10_SYS_fsat] = 1;			/*  76 */
972 	s10_emulation_table[S10_SYS_rmdir] = 1;			/*  79 */
973 	s10_emulation_table[SYS_getdents] = 1;			/*  81 */
974 	s10_emulation_table[S10_SYS_poll] = 1;			/*  87 */
975 	s10_emulation_table[S10_SYS_lstat] = 1;			/*  88 */
976 	s10_emulation_table[S10_SYS_fchown] = 1;		/*  94 */
977 	s10_emulation_table[SYS_sigprocmask] = 1;		/*  95 */
978 	s10_emulation_table[SYS_sigsuspend] = 1;		/*  96 */
979 	s10_emulation_table[SYS_sigaction] = 1;			/*  98 */
980 	s10_emulation_table[SYS_sigpending] = 1;		/*  99 */
981 	s10_emulation_table[SYS_context] = 1;			/* 100 */
982 	s10_emulation_table[SYS_waitid] = 1;			/* 107 */
983 	s10_emulation_table[SYS_sigsendsys] = 1;		/* 108 */
984 #if defined(__x86)
985 	s10_emulation_table[S10_SYS_xstat] = 1;			/* 123 */
986 	s10_emulation_table[S10_SYS_lxstat] = 1;		/* 124 */
987 	s10_emulation_table[S10_SYS_fxstat] = 1;		/* 125 */
988 	s10_emulation_table[S10_SYS_xmknod] = 1;		/* 126 */
989 #endif
990 	s10_emulation_table[S10_SYS_lchown] = 1;		/* 130 */
991 	s10_emulation_table[S10_SYS_rename] = 1;		/* 134 */
992 	s10_emulation_table[SYS_uname] = 1;			/* 135 */
993 	s10_emulation_table[SYS_sysconfig] = 1;			/* 137 */
994 	s10_emulation_table[SYS_systeminfo] = 1;		/* 139 */
995 	s10_emulation_table[S10_SYS_fork1] = 1;			/* 143 */
996 	s10_emulation_table[SYS_sigtimedwait] = 1;		/* 144 */
997 	s10_emulation_table[S10_SYS_lwp_sema_wait] = 1;		/* 147 */
998 	s10_emulation_table[S10_SYS_utimes] = 1;		/* 154 */
999 	s10_emulation_table[SYS_lwp_create] = 1;		/* 159 */
1000 	s10_emulation_table[SYS_lwp_kill] = 1;			/* 163 */
1001 	s10_emulation_table[SYS_lwp_sigmask] = 1;		/* 165 */
1002 #if defined(__amd64)
1003 	s10_emulation_table[SYS_lwp_private] = 1;		/* 166 */
1004 #endif	/* __amd64 */
1005 	s10_emulation_table[S10_SYS_lwp_mutex_lock] = 1;	/* 169 */
1006 	s10_emulation_table[SYS_pwrite] = 1;			/* 174 */
1007 	s10_emulation_table[SYS_auditsys] = 1;			/* 186 */
1008 	s10_emulation_table[SYS_sigqueue] = 1;			/* 190 */
1009 	s10_emulation_table[SYS_signotify] = 1;			/* 205 */
1010 	s10_emulation_table[SYS_lwp_mutex_timedlock] = 1;	/* 210 */
1011 	s10_emulation_table[SYS_getdents64] = 1;		/* 213 */
1012 	s10_emulation_table[S10_SYS_stat64] = 1;		/* 215 */
1013 	s10_emulation_table[S10_SYS_lstat64] = 1;		/* 216 */
1014 	s10_emulation_table[S10_SYS_fstat64] = 1;		/* 217 */
1015 	s10_emulation_table[SYS_pwrite64] = 1;			/* 223 */
1016 	s10_emulation_table[S10_SYS_creat64] = 1;		/* 224 */
1017 	s10_emulation_table[S10_SYS_open64] = 1;		/* 225 */
1018 	s10_emulation_table[SYS_zone] = 1;			/* 227 */
1019 	s10_emulation_table[SYS_lwp_mutex_trylock] = 1;		/* 251 */
1020 
1021 	err = mod_install(&modlinkage);
1022 	if (err) {
1023 		cmn_err(CE_WARN, "Couldn't install brand module");
1024 		kmem_free(s10_emulation_table, NSYSCALL);
1025 	}
1026 
1027 	return (err);
1028 }
1029 
1030 int
1031 _info(struct modinfo *modinfop)
1032 {
1033 	return (mod_info(&modlinkage, modinfop));
1034 }
1035 
1036 int
1037 _fini(void)
1038 {
1039 	int err;
1040 
1041 	/*
1042 	 * If there are any zones using this brand, we can't allow it to be
1043 	 * unloaded.
1044 	 */
1045 	if (brand_zone_count(&s10_brand))
1046 		return (EBUSY);
1047 
1048 	kmem_free(s10_emulation_table, NSYSCALL);
1049 	s10_emulation_table = NULL;
1050 
1051 	err = mod_remove(&modlinkage);
1052 	if (err)
1053 		cmn_err(CE_WARN, "Couldn't unload s10 brand module");
1054 
1055 	return (err);
1056 }
1057