1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include <sys/errno.h> 28 #include <sys/exec.h> 29 #include <sys/file.h> 30 #include <sys/kmem.h> 31 #include <sys/modctl.h> 32 #include <sys/model.h> 33 #include <sys/proc.h> 34 #include <sys/syscall.h> 35 #include <sys/systm.h> 36 #include <sys/thread.h> 37 #include <sys/cmn_err.h> 38 #include <sys/archsystm.h> 39 #include <sys/pathname.h> 40 #include <sys/sunddi.h> 41 42 #include <sys/machbrand.h> 43 #include <sys/brand.h> 44 #include "s10_brand.h" 45 46 char *s10_emulation_table = NULL; 47 48 void s10_init_brand_data(zone_t *); 49 void s10_free_brand_data(zone_t *); 50 void s10_setbrand(proc_t *); 51 int s10_getattr(zone_t *, int, void *, size_t *); 52 int s10_setattr(zone_t *, int, void *, size_t); 53 int s10_brandsys(int, int64_t *, uintptr_t, uintptr_t, uintptr_t, 54 uintptr_t, uintptr_t, uintptr_t); 55 void s10_copy_procdata(proc_t *, proc_t *); 56 void s10_proc_exit(struct proc *, klwp_t *); 57 void s10_exec(); 58 int s10_initlwp(klwp_t *); 59 void s10_forklwp(klwp_t *, klwp_t *); 60 void s10_freelwp(klwp_t *); 61 void s10_lwpexit(klwp_t *); 62 int s10_elfexec(vnode_t *, execa_t *, uarg_t *, intpdata_t *, int, 63 long *, int, caddr_t, cred_t *, int); 64 65 /* s10 brand */ 66 struct brand_ops s10_brops = { 67 s10_init_brand_data, 68 s10_free_brand_data, 69 s10_brandsys, 70 s10_setbrand, 71 s10_getattr, 72 s10_setattr, 73 s10_copy_procdata, 74 s10_proc_exit, 75 s10_exec, 76 lwp_setrval, 77 s10_initlwp, 78 s10_forklwp, 79 s10_freelwp, 80 s10_lwpexit, 81 s10_elfexec 82 }; 83 84 #ifdef sparc 85 86 struct brand_mach_ops s10_mops = { 87 s10_brand_syscall_callback, 88 s10_brand_syscall32_callback 89 }; 90 91 #else /* sparc */ 92 93 #ifdef __amd64 94 95 struct brand_mach_ops s10_mops = { 96 s10_brand_sysenter_callback, 97 NULL, 98 s10_brand_int91_callback, 99 s10_brand_syscall_callback, 100 s10_brand_syscall32_callback, 101 NULL 102 }; 103 104 #else /* ! __amd64 */ 105 106 struct brand_mach_ops s10_mops = { 107 s10_brand_sysenter_callback, 108 NULL, 109 NULL, 110 s10_brand_syscall_callback, 111 NULL, 112 NULL 113 }; 114 #endif /* __amd64 */ 115 116 #endif /* _sparc */ 117 118 struct brand s10_brand = { 119 BRAND_VER_1, 120 "solaris10", 121 &s10_brops, 122 &s10_mops 123 }; 124 125 static struct modlbrand modlbrand = { 126 &mod_brandops, /* type of module */ 127 "Solaris 10 Brand", /* description of module */ 128 &s10_brand /* driver ops */ 129 }; 130 131 static struct modlinkage modlinkage = { 132 MODREV_1, (void *)&modlbrand, NULL 133 }; 134 135 void 136 s10_setbrand(proc_t *p) 137 { 138 ASSERT(p->p_brand == &s10_brand); 139 ASSERT(p->p_brand_data == NULL); 140 141 /* 142 * We should only be called from exec(), when we know the process 143 * is single-threaded. 144 */ 145 ASSERT(p->p_tlist == p->p_tlist->t_forw); 146 147 p->p_brand_data = kmem_zalloc(sizeof (s10_proc_data_t), KM_SLEEP); 148 (void) s10_initlwp(p->p_tlist->t_lwp); 149 } 150 151 /*ARGSUSED*/ 152 int 153 s10_getattr(zone_t *zone, int attr, void *buf, size_t *bufsize) 154 { 155 ASSERT(zone->zone_brand == &s10_brand); 156 if (attr == S10_EMUL_BITMAP) { 157 if (buf == NULL || *bufsize != sizeof (s10_emul_bitmap_t)) 158 return (EINVAL); 159 if (copyout(((s10_zone_data_t *)zone->zone_brand_data)-> 160 emul_bitmap, buf, sizeof (s10_emul_bitmap_t)) != 0) 161 return (EFAULT); 162 return (0); 163 } 164 165 return (EINVAL); 166 } 167 168 int 169 s10_setattr(zone_t *zone, int attr, void *buf, size_t bufsize) 170 { 171 ASSERT(zone->zone_brand == &s10_brand); 172 if (attr == S10_EMUL_BITMAP) { 173 if (buf == NULL || bufsize != sizeof (s10_emul_bitmap_t)) 174 return (EINVAL); 175 if (copyin(buf, ((s10_zone_data_t *)zone->zone_brand_data)-> 176 emul_bitmap, sizeof (s10_emul_bitmap_t)) != 0) 177 return (EFAULT); 178 return (0); 179 } 180 181 return (EINVAL); 182 } 183 184 #ifdef __amd64 185 /* 186 * The Nevada kernel clears %fs for threads in 64-bit x86 processes but S10's 187 * libc expects %fs to be nonzero. This causes some committed 188 * libc/libthread interfaces (e.g., thr_main()) to fail, which impacts several 189 * libraries, including libdoor. This function sets the specified LWP's %fs 190 * register to the legacy S10 selector value (LWPFS_SEL). 191 * 192 * The best solution to the aforementioned problem is backporting CRs 193 * 6467491 to Solaris 10 so that 64-bit x86 Solaris 10 processes 194 * would accept zero for %fs. Backporting the CRs is a requirement for running 195 * S10 Containers in PV domUs because 64-bit Xen clears %fsbase when %fs is 196 * nonzero. Such behavior breaks 64-bit processes because Xen has to fetch the 197 * FS segments' base addresses from the LWPs' GDTs, which are only capable of 198 * 32-bit addressing. 199 */ 200 /*ARGSUSED*/ 201 static void 202 s10_amd64_correct_fsreg(klwp_t *l) 203 { 204 if (lwp_getdatamodel(l) == DATAMODEL_NATIVE) { 205 kpreempt_disable(); 206 l->lwp_pcb.pcb_fs = LWPFS_SEL; 207 l->lwp_pcb.pcb_rupdate = 1; 208 lwptot(l)->t_post_sys = 1; /* Guarantee update_sregs() */ 209 kpreempt_enable(); 210 } 211 } 212 #endif /* __amd64 */ 213 214 int 215 s10_native() 216 { 217 struct user *up = PTOU(curproc); 218 char *args_new, *comm_new, *p; 219 int len; 220 221 len = sizeof (S10_NATIVE_LINKER32 " ") - 1; 222 223 /* 224 * Make sure that the process' interpreter is the native dynamic linker. 225 * Convention dictates that native processes executing within solaris10- 226 * branded zones are interpreted by the native dynamic linker (the 227 * process and its arguments are specified as arguments to the dynamic 228 * linker). If this convention is violated (i.e., 229 * brandsys(B_S10_NATIVE, ...) is invoked by a process that shouldn't be 230 * native), then do nothing and silently indicate success. 231 */ 232 if (strcmp(up->u_comm, S10_LINKER_NAME) != 0) 233 return (0); 234 if (strncmp(up->u_psargs, S10_NATIVE_LINKER64 " /", len + 4) == 0) 235 len += 3; /* to account for "/64" in the path */ 236 else if (strncmp(up->u_psargs, S10_NATIVE_LINKER32 " /", len + 1) != 0) 237 return (0); 238 239 args_new = strdup(&up->u_psargs[len]); 240 if ((p = strchr(args_new, ' ')) != NULL) 241 *p = '\0'; 242 if ((comm_new = strrchr(args_new, '/')) != NULL) 243 comm_new = strdup(comm_new + 1); 244 else 245 comm_new = strdup(args_new); 246 if (p != NULL) 247 *p = ' '; 248 249 if ((strlen(args_new) != 0) && (strlen(comm_new) != 0)) { 250 mutex_enter(&curproc->p_lock); 251 (void) strlcpy(up->u_comm, comm_new, MAXCOMLEN+1); 252 (void) strlcpy(up->u_psargs, args_new, PSARGSZ); 253 mutex_exit(&curproc->p_lock); 254 } 255 256 strfree(args_new); 257 strfree(comm_new); 258 return (0); 259 } 260 261 /* 262 * Get the address of the user-space system call handler from the user 263 * process and attach it to the proc structure. 264 */ 265 /*ARGSUSED*/ 266 int 267 s10_brandsys(int cmd, int64_t *rval, uintptr_t arg1, uintptr_t arg2, 268 uintptr_t arg3, uintptr_t arg4, uintptr_t arg5, uintptr_t arg6) 269 { 270 s10_proc_data_t *spd; 271 s10_brand_reg_t reg; 272 proc_t *p = curproc; 273 int err; 274 275 *rval = 0; 276 277 /* 278 * B_EXEC_BRAND is redundant 279 * since the kernel assumes a native process doing an exec 280 * in a branded zone is going to run a branded processes. 281 * hence we don't support this operation. 282 */ 283 if (cmd == B_EXEC_BRAND) 284 return (ENOSYS); 285 286 if (cmd == B_S10_NATIVE) 287 return (s10_native()); 288 289 /* For all other operations this must be a branded process. */ 290 if (p->p_brand == &native_brand) 291 return (ENOSYS); 292 293 ASSERT(p->p_brand == &s10_brand); 294 ASSERT(p->p_brand_data != NULL); 295 296 spd = (s10_proc_data_t *)p->p_brand_data; 297 298 switch (cmd) { 299 case B_EXEC_NATIVE: 300 err = exec_common( 301 (char *)arg1, (const char **)arg2, (const char **)arg3, 302 EBA_NATIVE); 303 return (err); 304 305 case B_REGISTER: 306 if (p->p_model == DATAMODEL_NATIVE) { 307 if (copyin((void *)arg1, ®, sizeof (reg)) != 0) 308 return (EFAULT); 309 #if defined(_LP64) 310 } else { 311 s10_brand_reg32_t reg32; 312 313 if (copyin((void *)arg1, ®32, sizeof (reg32)) != 0) 314 return (EFAULT); 315 reg.sbr_version = reg32.sbr_version; 316 reg.sbr_handler = (caddr_t)(uintptr_t)reg32.sbr_handler; 317 #endif /* _LP64 */ 318 } 319 320 if (reg.sbr_version != S10_VERSION) 321 return (ENOTSUP); 322 spd->spd_handler = reg.sbr_handler; 323 return (0); 324 325 case B_ELFDATA: 326 if (p->p_model == DATAMODEL_NATIVE) { 327 if (copyout(&spd->spd_elf_data, (void *)arg1, 328 sizeof (s10_elf_data_t)) != 0) 329 return (EFAULT); 330 #if defined(_LP64) 331 } else { 332 s10_elf_data32_t sed32; 333 334 sed32.sed_phdr = spd->spd_elf_data.sed_phdr; 335 sed32.sed_phent = spd->spd_elf_data.sed_phent; 336 sed32.sed_phnum = spd->spd_elf_data.sed_phnum; 337 sed32.sed_entry = spd->spd_elf_data.sed_entry; 338 sed32.sed_base = spd->spd_elf_data.sed_base; 339 sed32.sed_ldentry = spd->spd_elf_data.sed_ldentry; 340 sed32.sed_lddata = spd->spd_elf_data.sed_lddata; 341 if (copyout(&sed32, (void *)arg1, sizeof (sed32)) != 0) 342 return (EFAULT); 343 #endif /* _LP64 */ 344 } 345 return (0); 346 347 case B_S10_PIDINFO: 348 /* 349 * The s10 brand needs to be able to get the pid of the 350 * current process and the pid of the zone's init, and it 351 * needs to do this on every process startup. Early in 352 * brand startup, we can't call getpid() because calls to 353 * getpid() represent a magical signal to some old-skool 354 * debuggers. By merging all of this into one call, we 355 * make this quite a bit cheaper and easier to handle in 356 * the brand module. 357 */ 358 if (copyout(&p->p_pid, (void *)arg1, sizeof (pid_t)) != 0) 359 return (EFAULT); 360 if (copyout(&p->p_zone->zone_proc_initpid, (void *)arg2, 361 sizeof (pid_t)) != 0) 362 return (EFAULT); 363 return (0); 364 365 case B_S10_TRUSS_POINT: 366 /* 367 * This subcommand exists so that we can see truss output 368 * from interposed system calls that return without first 369 * calling any other system call, meaning they would be 370 * invisible to truss(1). 371 * 372 * If the second argument is set non-zero, set errno to that 373 * value as well. 374 * 375 * Arguments are: 376 * 377 * arg1: syscall number 378 * arg2: errno 379 */ 380 return ((arg2 == 0) ? 0 : set_errno((uint_t)arg2)); 381 382 case B_S10_ISFDXATTRDIR: { 383 /* 384 * This subcommand enables the userland brand emulation library 385 * to determine whether a file descriptor refers to an extended 386 * file attributes directory. There is no standard syscall or 387 * libc function that can make such a determination. 388 */ 389 file_t *dir_filep; 390 391 dir_filep = getf((int)arg1); 392 if (dir_filep == NULL) 393 return (EBADF); 394 ASSERT(dir_filep->f_vnode != NULL); 395 *rval = IS_XATTRDIR(dir_filep->f_vnode); 396 releasef((int)arg1); 397 return (0); 398 } 399 400 #ifdef __amd64 401 case B_S10_FSREGCORRECTION: 402 /* 403 * This subcommand exists so that the SYS_lwp_private and 404 * SYS_lwp_create syscalls can manually set the current thread's 405 * %fs register to the legacy S10 selector value for 64-bit x86 406 * processes. 407 */ 408 s10_amd64_correct_fsreg(ttolwp(curthread)); 409 return (0); 410 #endif /* __amd64 */ 411 } 412 413 return (EINVAL); 414 } 415 416 /* 417 * Copy the per-process brand data from a parent proc to a child. 418 */ 419 void 420 s10_copy_procdata(proc_t *child, proc_t *parent) 421 { 422 s10_proc_data_t *spd; 423 424 ASSERT(parent->p_brand == &s10_brand); 425 ASSERT(child->p_brand == &s10_brand); 426 ASSERT(parent->p_brand_data != NULL); 427 ASSERT(child->p_brand_data == NULL); 428 429 /* Just duplicate all the proc data of the parent for the child */ 430 spd = kmem_alloc(sizeof (s10_proc_data_t), KM_SLEEP); 431 bcopy(parent->p_brand_data, spd, sizeof (s10_proc_data_t)); 432 child->p_brand_data = spd; 433 } 434 435 /*ARGSUSED*/ 436 void 437 s10_proc_exit(struct proc *p, klwp_t *l) 438 { 439 ASSERT(p->p_brand == &s10_brand); 440 ASSERT(p->p_brand_data != NULL); 441 442 /* 443 * We should only be called from proc_exit(), when we know that 444 * process is single-threaded. 445 */ 446 ASSERT(p->p_tlist == p->p_tlist->t_forw); 447 448 /* upon exit, free our lwp brand data */ 449 (void) s10_freelwp(ttolwp(curthread)); 450 451 /* upon exit, free our proc brand data */ 452 kmem_free(p->p_brand_data, sizeof (s10_proc_data_t)); 453 p->p_brand_data = NULL; 454 } 455 456 void 457 s10_exec() 458 { 459 s10_proc_data_t *spd = curproc->p_brand_data; 460 461 ASSERT(curproc->p_brand == &s10_brand); 462 ASSERT(curproc->p_brand_data != NULL); 463 ASSERT(ttolwp(curthread)->lwp_brand != NULL); 464 465 /* 466 * We should only be called from exec(), when we know the process 467 * is single-threaded. 468 */ 469 ASSERT(curproc->p_tlist == curproc->p_tlist->t_forw); 470 471 /* Upon exec, reset our lwp brand data. */ 472 (void) s10_freelwp(ttolwp(curthread)); 473 (void) s10_initlwp(ttolwp(curthread)); 474 475 /* 476 * Upon exec, reset all the proc brand data, except for the elf 477 * data associated with the executable we are exec'ing. 478 */ 479 spd->spd_handler = NULL; 480 } 481 482 /*ARGSUSED*/ 483 int 484 s10_initlwp(klwp_t *l) 485 { 486 ASSERT(l->lwp_procp->p_brand == &s10_brand); 487 ASSERT(l->lwp_procp->p_brand_data != NULL); 488 ASSERT(l->lwp_brand == NULL); 489 l->lwp_brand = (void *)-1; 490 return (0); 491 } 492 493 /*ARGSUSED*/ 494 void 495 s10_forklwp(klwp_t *p, klwp_t *c) 496 { 497 ASSERT(p->lwp_procp->p_brand == &s10_brand); 498 ASSERT(c->lwp_procp->p_brand == &s10_brand); 499 500 ASSERT(p->lwp_procp->p_brand_data != NULL); 501 ASSERT(c->lwp_procp->p_brand_data != NULL); 502 503 /* Both LWPs have already had been initialized via s10_initlwp() */ 504 ASSERT(p->lwp_brand != NULL); 505 ASSERT(c->lwp_brand != NULL); 506 507 #ifdef __amd64 508 /* 509 * Only correct the child's %fs register if the parent's %fs register 510 * is LWPFS_SEL. If the parent's %fs register is zero, then the Solaris 511 * 10 environment that we're emulating uses a version of libc that 512 * works when %fs is zero (i.e., it contains backports of CRs 6467491 513 * and 6501650). 514 */ 515 if (p->lwp_pcb.pcb_fs == LWPFS_SEL) 516 s10_amd64_correct_fsreg(c); 517 #endif /* __amd64 */ 518 } 519 520 /*ARGSUSED*/ 521 void 522 s10_freelwp(klwp_t *l) 523 { 524 ASSERT(l->lwp_procp->p_brand == &s10_brand); 525 ASSERT(l->lwp_procp->p_brand_data != NULL); 526 ASSERT(l->lwp_brand != NULL); 527 l->lwp_brand = NULL; 528 } 529 530 /*ARGSUSED*/ 531 void 532 s10_lwpexit(klwp_t *l) 533 { 534 ASSERT(l->lwp_procp->p_brand == &s10_brand); 535 ASSERT(l->lwp_procp->p_brand_data != NULL); 536 ASSERT(l->lwp_brand != NULL); 537 538 /* 539 * We should never be called for the last thread in a process. 540 * (That case is handled by s10_proc_exit().) There for this lwp 541 * must be exiting from a multi-threaded process. 542 */ 543 ASSERT(l->lwp_procp->p_tlist != l->lwp_procp->p_tlist->t_forw); 544 545 l->lwp_brand = NULL; 546 } 547 548 void 549 s10_free_brand_data(zone_t *zone) 550 { 551 kmem_free(zone->zone_brand_data, sizeof (s10_zone_data_t)); 552 } 553 554 void 555 s10_init_brand_data(zone_t *zone) 556 { 557 ASSERT(zone->zone_brand == &s10_brand); 558 ASSERT(zone->zone_brand_data == NULL); 559 zone->zone_brand_data = kmem_zalloc(sizeof (s10_zone_data_t), KM_SLEEP); 560 } 561 562 #if defined(_LP64) 563 static void 564 Ehdr32to64(Elf32_Ehdr *src, Ehdr *dst) 565 { 566 bcopy(src->e_ident, dst->e_ident, sizeof (src->e_ident)); 567 dst->e_type = src->e_type; 568 dst->e_machine = src->e_machine; 569 dst->e_version = src->e_version; 570 dst->e_entry = src->e_entry; 571 dst->e_phoff = src->e_phoff; 572 dst->e_shoff = src->e_shoff; 573 dst->e_flags = src->e_flags; 574 dst->e_ehsize = src->e_ehsize; 575 dst->e_phentsize = src->e_phentsize; 576 dst->e_phnum = src->e_phnum; 577 dst->e_shentsize = src->e_shentsize; 578 dst->e_shnum = src->e_shnum; 579 dst->e_shstrndx = src->e_shstrndx; 580 } 581 #endif /* _LP64 */ 582 583 int 584 s10_elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap, 585 int level, long *execsz, int setid, caddr_t exec_file, cred_t *cred, 586 int brand_action) 587 { 588 vnode_t *nvp; 589 Ehdr ehdr; 590 Addr uphdr_vaddr; 591 intptr_t voffset; 592 int interp; 593 int i, err; 594 struct execenv env; 595 struct user *up = PTOU(curproc); 596 s10_proc_data_t *spd; 597 s10_elf_data_t sed, *sedp; 598 char *linker; 599 uintptr_t lddata; /* lddata of executable's linker */ 600 601 ASSERT(curproc->p_brand == &s10_brand); 602 ASSERT(curproc->p_brand_data != NULL); 603 604 spd = (s10_proc_data_t *)curproc->p_brand_data; 605 sedp = &spd->spd_elf_data; 606 607 args->brandname = S10_BRANDNAME; 608 609 /* 610 * We will exec the brand library and then map in the target 611 * application and (optionally) the brand's default linker. 612 */ 613 if (args->to_model == DATAMODEL_NATIVE) { 614 args->emulator = S10_LIB; 615 linker = S10_LINKER; 616 #if defined(_LP64) 617 } else { 618 args->emulator = S10_LIB32; 619 linker = S10_LINKER32; 620 #endif /* _LP64 */ 621 } 622 623 if ((err = lookupname(args->emulator, UIO_SYSSPACE, FOLLOW, NULLVPP, 624 &nvp)) != 0) { 625 uprintf("%s: not found.", args->emulator); 626 return (err); 627 } 628 629 if (args->to_model == DATAMODEL_NATIVE) { 630 err = elfexec(nvp, uap, args, idatap, level + 1, execsz, 631 setid, exec_file, cred, brand_action); 632 #if defined(_LP64) 633 } else { 634 err = elf32exec(nvp, uap, args, idatap, level + 1, execsz, 635 setid, exec_file, cred, brand_action); 636 #endif /* _LP64 */ 637 } 638 VN_RELE(nvp); 639 if (err != 0) 640 return (err); 641 642 /* 643 * The u_auxv vectors are set up by elfexec to point to the brand 644 * emulation library and linker. Save these so they can be copied to 645 * the specific brand aux vectors. 646 */ 647 bzero(&sed, sizeof (sed)); 648 for (i = 0; i < __KERN_NAUXV_IMPL; i++) { 649 switch (up->u_auxv[i].a_type) { 650 case AT_SUN_LDDATA: 651 sed.sed_lddata = up->u_auxv[i].a_un.a_val; 652 break; 653 case AT_BASE: 654 sed.sed_base = up->u_auxv[i].a_un.a_val; 655 break; 656 case AT_ENTRY: 657 sed.sed_entry = up->u_auxv[i].a_un.a_val; 658 break; 659 case AT_PHDR: 660 sed.sed_phdr = up->u_auxv[i].a_un.a_val; 661 break; 662 case AT_PHENT: 663 sed.sed_phent = up->u_auxv[i].a_un.a_val; 664 break; 665 case AT_PHNUM: 666 sed.sed_phnum = up->u_auxv[i].a_un.a_val; 667 break; 668 default: 669 break; 670 } 671 } 672 /* Make sure the emulator has an entry point */ 673 ASSERT(sed.sed_entry != NULL); 674 ASSERT(sed.sed_phdr != NULL); 675 676 bzero(&env, sizeof (env)); 677 if (args->to_model == DATAMODEL_NATIVE) { 678 err = mapexec_brand(vp, args, &ehdr, &uphdr_vaddr, &voffset, 679 exec_file, &interp, &env.ex_bssbase, &env.ex_brkbase, 680 &env.ex_brksize, NULL); 681 #if defined(_LP64) 682 } else { 683 Elf32_Ehdr ehdr32; 684 Elf32_Addr uphdr_vaddr32; 685 err = mapexec32_brand(vp, args, &ehdr32, &uphdr_vaddr32, 686 &voffset, exec_file, &interp, &env.ex_bssbase, 687 &env.ex_brkbase, &env.ex_brksize, NULL); 688 Ehdr32to64(&ehdr32, &ehdr); 689 if (uphdr_vaddr32 == (Elf32_Addr)-1) 690 uphdr_vaddr = (Addr)-1; 691 else 692 uphdr_vaddr = uphdr_vaddr32; 693 #endif /* _LP64 */ 694 } 695 if (err != 0) 696 return (err); 697 698 /* 699 * Save off the important properties of the executable. The brand 700 * library will ask us for this data later, when it is initializing 701 * and getting ready to transfer control to the brand application. 702 */ 703 if (uphdr_vaddr == (Addr)-1) 704 sedp->sed_phdr = voffset + ehdr.e_phoff; 705 else 706 sedp->sed_phdr = voffset + uphdr_vaddr; 707 sedp->sed_entry = voffset + ehdr.e_entry; 708 sedp->sed_phent = ehdr.e_phentsize; 709 sedp->sed_phnum = ehdr.e_phnum; 710 711 if (interp) { 712 if (ehdr.e_type == ET_DYN) { 713 /* 714 * This is a shared object executable, so we need to 715 * pick a reasonable place to put the heap. Just don't 716 * use the first page. 717 */ 718 env.ex_brkbase = (caddr_t)PAGESIZE; 719 env.ex_bssbase = (caddr_t)PAGESIZE; 720 } 721 722 /* 723 * If the program needs an interpreter (most do), map it in and 724 * store relevant information about it in the aux vector, where 725 * the brand library can find it. 726 */ 727 if ((err = lookupname(linker, UIO_SYSSPACE, 728 FOLLOW, NULLVPP, &nvp)) != 0) { 729 uprintf("%s: not found.", S10_LINKER); 730 return (err); 731 } 732 if (args->to_model == DATAMODEL_NATIVE) { 733 err = mapexec_brand(nvp, args, &ehdr, 734 &uphdr_vaddr, &voffset, exec_file, &interp, 735 NULL, NULL, NULL, &lddata); 736 #if defined(_LP64) 737 } else { 738 Elf32_Ehdr ehdr32; 739 Elf32_Addr uphdr_vaddr32; 740 err = mapexec32_brand(nvp, args, &ehdr32, 741 &uphdr_vaddr32, &voffset, exec_file, &interp, 742 NULL, NULL, NULL, &lddata); 743 Ehdr32to64(&ehdr32, &ehdr); 744 if (uphdr_vaddr32 == (Elf32_Addr)-1) 745 uphdr_vaddr = (Addr)-1; 746 else 747 uphdr_vaddr = uphdr_vaddr32; 748 #endif /* _LP64 */ 749 } 750 VN_RELE(nvp); 751 if (err != 0) 752 return (err); 753 754 /* 755 * Now that we know the base address of the brand's linker, 756 * place it in the aux vector. 757 */ 758 sedp->sed_base = voffset; 759 sedp->sed_ldentry = voffset + ehdr.e_entry; 760 sedp->sed_lddata = voffset + lddata; 761 } else { 762 /* 763 * This program has no interpreter. The brand library will 764 * jump to the address in the AT_SUN_BRAND_LDENTRY aux vector, 765 * so in this case, put the entry point of the main executable 766 * there. 767 */ 768 if (ehdr.e_type == ET_EXEC) { 769 /* 770 * An executable with no interpreter, this must be a 771 * statically linked executable, which means we loaded 772 * it at the address specified in the elf header, in 773 * which case the e_entry field of the elf header is an 774 * absolute address. 775 */ 776 sedp->sed_ldentry = ehdr.e_entry; 777 sedp->sed_entry = ehdr.e_entry; 778 sedp->sed_lddata = NULL; 779 sedp->sed_base = NULL; 780 } else { 781 /* 782 * A shared object with no interpreter, we use the 783 * calculated address from above. 784 */ 785 sedp->sed_ldentry = sedp->sed_entry; 786 sedp->sed_entry = NULL; 787 sedp->sed_phdr = NULL; 788 sedp->sed_phent = NULL; 789 sedp->sed_phnum = NULL; 790 sedp->sed_lddata = NULL; 791 sedp->sed_base = voffset; 792 793 if (ehdr.e_type == ET_DYN) { 794 /* 795 * Delay setting the brkbase until the first 796 * call to brk(); see elfexec() for details. 797 */ 798 env.ex_bssbase = (caddr_t)0; 799 env.ex_brkbase = (caddr_t)0; 800 env.ex_brksize = 0; 801 } 802 } 803 } 804 805 env.ex_magic = elfmagic; 806 env.ex_vp = vp; 807 setexecenv(&env); 808 809 /* 810 * It's time to manipulate the process aux vectors. First 811 * we need to update the AT_SUN_AUXFLAGS aux vector to set 812 * the AF_SUN_NOPLM flag. 813 */ 814 if (args->to_model == DATAMODEL_NATIVE) { 815 auxv_t auxflags_auxv; 816 817 if (copyin(args->auxp_auxflags, &auxflags_auxv, 818 sizeof (auxflags_auxv)) != 0) 819 return (EFAULT); 820 821 ASSERT(auxflags_auxv.a_type == AT_SUN_AUXFLAGS); 822 auxflags_auxv.a_un.a_val |= AF_SUN_NOPLM; 823 if (copyout(&auxflags_auxv, args->auxp_auxflags, 824 sizeof (auxflags_auxv)) != 0) 825 return (EFAULT); 826 #if defined(_LP64) 827 } else { 828 auxv32_t auxflags_auxv32; 829 830 if (copyin(args->auxp_auxflags, &auxflags_auxv32, 831 sizeof (auxflags_auxv32)) != 0) 832 return (EFAULT); 833 834 ASSERT(auxflags_auxv32.a_type == AT_SUN_AUXFLAGS); 835 auxflags_auxv32.a_un.a_val |= AF_SUN_NOPLM; 836 if (copyout(&auxflags_auxv32, args->auxp_auxflags, 837 sizeof (auxflags_auxv32)) != 0) 838 return (EFAULT); 839 #endif /* _LP64 */ 840 } 841 842 /* Second, copy out the brand specific aux vectors. */ 843 if (args->to_model == DATAMODEL_NATIVE) { 844 auxv_t s10_auxv[] = { 845 { AT_SUN_BRAND_AUX1, 0 }, 846 { AT_SUN_BRAND_AUX2, 0 }, 847 { AT_SUN_BRAND_AUX3, 0 } 848 }; 849 850 ASSERT(s10_auxv[0].a_type == AT_SUN_BRAND_S10_LDDATA); 851 s10_auxv[0].a_un.a_val = sed.sed_lddata; 852 853 if (copyout(&s10_auxv, args->auxp_brand, 854 sizeof (s10_auxv)) != 0) 855 return (EFAULT); 856 #if defined(_LP64) 857 } else { 858 auxv32_t s10_auxv32[] = { 859 { AT_SUN_BRAND_AUX1, 0 }, 860 { AT_SUN_BRAND_AUX2, 0 }, 861 { AT_SUN_BRAND_AUX3, 0 } 862 }; 863 864 ASSERT(s10_auxv32[0].a_type == AT_SUN_BRAND_S10_LDDATA); 865 s10_auxv32[0].a_un.a_val = (uint32_t)sed.sed_lddata; 866 if (copyout(&s10_auxv32, args->auxp_brand, 867 sizeof (s10_auxv32)) != 0) 868 return (EFAULT); 869 #endif /* _LP64 */ 870 } 871 872 /* 873 * Third, the the /proc aux vectors set up by elfexec() point to brand 874 * emulation library and it's linker. Copy these to the /proc brand 875 * specific aux vector, and update the regular /proc aux vectors to 876 * point to the executable (and it's linker). This will enable 877 * debuggers to access the executable via the usual /proc or elf notes 878 * aux vectors. 879 * 880 * The brand emulation library's linker will get it's aux vectors off 881 * the stack, and then update the stack with the executable's aux 882 * vectors before jumping to the executable's linker. 883 * 884 * Debugging the brand emulation library must be done from 885 * the global zone, where the librtld_db module knows how to fetch the 886 * brand specific aux vectors to access the brand emulation libraries 887 * linker. 888 */ 889 for (i = 0; i < __KERN_NAUXV_IMPL; i++) { 890 ulong_t val; 891 892 switch (up->u_auxv[i].a_type) { 893 case AT_SUN_BRAND_S10_LDDATA: 894 up->u_auxv[i].a_un.a_val = sed.sed_lddata; 895 continue; 896 case AT_BASE: 897 val = sedp->sed_base; 898 break; 899 case AT_ENTRY: 900 val = sedp->sed_entry; 901 break; 902 case AT_PHDR: 903 val = sedp->sed_phdr; 904 break; 905 case AT_PHENT: 906 val = sedp->sed_phent; 907 break; 908 case AT_PHNUM: 909 val = sedp->sed_phnum; 910 break; 911 case AT_SUN_LDDATA: 912 val = sedp->sed_lddata; 913 break; 914 default: 915 continue; 916 } 917 918 up->u_auxv[i].a_un.a_val = val; 919 if (val == NULL) { 920 /* Hide the entry for static binaries */ 921 up->u_auxv[i].a_type = AT_IGNORE; 922 } 923 } 924 925 /* 926 * The last thing we do here is clear spd->spd_handler. This is 927 * important because if we're already a branded process and if this 928 * exec succeeds, there is a window between when the exec() first 929 * returns to the userland of the new process and when our brand 930 * library get's initialized, during which we don't want system 931 * calls to be re-directed to our brand library since it hasn't 932 * been initialized yet. 933 */ 934 spd->spd_handler = NULL; 935 936 return (0); 937 } 938 939 940 int 941 _init(void) 942 { 943 int err; 944 945 /* 946 * Set up the table indicating which system calls we want to 947 * interpose on. We should probably build this automatically from 948 * a list of system calls that is shared with the user-space 949 * library. 950 */ 951 s10_emulation_table = kmem_zalloc(NSYSCALL, KM_SLEEP); 952 s10_emulation_table[S10_SYS_forkall] = 1; /* 2 */ 953 s10_emulation_table[S10_SYS_open] = 1; /* 5 */ 954 s10_emulation_table[S10_SYS_wait] = 1; /* 7 */ 955 s10_emulation_table[S10_SYS_creat] = 1; /* 8 */ 956 s10_emulation_table[S10_SYS_unlink] = 1; /* 10 */ 957 s10_emulation_table[S10_SYS_exec] = 1; /* 11 */ 958 s10_emulation_table[S10_SYS_chown] = 1; /* 16 */ 959 s10_emulation_table[S10_SYS_stat] = 1; /* 18 */ 960 s10_emulation_table[S10_SYS_umount] = 1; /* 22 */ 961 s10_emulation_table[S10_SYS_fstat] = 1; /* 28 */ 962 s10_emulation_table[S10_SYS_utime] = 1; /* 30 */ 963 s10_emulation_table[S10_SYS_access] = 1; /* 33 */ 964 s10_emulation_table[S10_SYS_dup] = 1; /* 41 */ 965 s10_emulation_table[SYS_ioctl] = 1; /* 54 */ 966 s10_emulation_table[SYS_execve] = 1; /* 59 */ 967 s10_emulation_table[SYS_acctctl] = 1; /* 71 */ 968 s10_emulation_table[S10_SYS_issetugid] = 1; /* 75 */ 969 s10_emulation_table[S10_SYS_fsat] = 1; /* 76 */ 970 s10_emulation_table[S10_SYS_rmdir] = 1; /* 79 */ 971 s10_emulation_table[SYS_getdents] = 1; /* 81 */ 972 s10_emulation_table[S10_SYS_poll] = 1; /* 87 */ 973 s10_emulation_table[S10_SYS_lstat] = 1; /* 88 */ 974 s10_emulation_table[S10_SYS_fchown] = 1; /* 94 */ 975 #if defined(__x86) 976 s10_emulation_table[S10_SYS_xstat] = 1; /* 123 */ 977 s10_emulation_table[S10_SYS_lxstat] = 1; /* 124 */ 978 s10_emulation_table[S10_SYS_fxstat] = 1; /* 125 */ 979 s10_emulation_table[S10_SYS_xmknod] = 1; /* 126 */ 980 #endif 981 s10_emulation_table[S10_SYS_lchown] = 1; /* 130 */ 982 s10_emulation_table[S10_SYS_rename] = 1; /* 134 */ 983 s10_emulation_table[SYS_uname] = 1; /* 135 */ 984 s10_emulation_table[SYS_systeminfo] = 1; /* 139 */ 985 s10_emulation_table[S10_SYS_fork1] = 1; /* 143 */ 986 s10_emulation_table[S10_SYS_lwp_sema_wait] = 1; /* 147 */ 987 s10_emulation_table[S10_SYS_utimes] = 1; /* 154 */ 988 #if defined(__amd64) 989 s10_emulation_table[SYS_lwp_create] = 1; /* 159 */ 990 s10_emulation_table[SYS_lwp_private] = 1; /* 166 */ 991 #endif /* __amd64 */ 992 s10_emulation_table[S10_SYS_lwp_mutex_lock] = 1; /* 169 */ 993 s10_emulation_table[SYS_pwrite] = 1; /* 174 */ 994 s10_emulation_table[SYS_auditsys] = 1; /* 186 */ 995 s10_emulation_table[SYS_sigqueue] = 1; /* 190 */ 996 s10_emulation_table[SYS_lwp_mutex_timedlock] = 1; /* 210 */ 997 s10_emulation_table[SYS_getdents64] = 1; /* 213 */ 998 s10_emulation_table[S10_SYS_stat64] = 1; /* 215 */ 999 s10_emulation_table[S10_SYS_lstat64] = 1; /* 216 */ 1000 s10_emulation_table[S10_SYS_fstat64] = 1; /* 217 */ 1001 s10_emulation_table[SYS_pwrite64] = 1; /* 223 */ 1002 s10_emulation_table[S10_SYS_creat64] = 1; /* 224 */ 1003 s10_emulation_table[S10_SYS_open64] = 1; /* 225 */ 1004 s10_emulation_table[SYS_zone] = 1; /* 227 */ 1005 s10_emulation_table[SYS_lwp_mutex_trylock] = 1; /* 251 */ 1006 1007 err = mod_install(&modlinkage); 1008 if (err) { 1009 cmn_err(CE_WARN, "Couldn't install brand module"); 1010 kmem_free(s10_emulation_table, NSYSCALL); 1011 } 1012 1013 return (err); 1014 } 1015 1016 int 1017 _info(struct modinfo *modinfop) 1018 { 1019 return (mod_info(&modlinkage, modinfop)); 1020 } 1021 1022 int 1023 _fini(void) 1024 { 1025 int err; 1026 1027 /* 1028 * If there are any zones using this brand, we can't allow it to be 1029 * unloaded. 1030 */ 1031 if (brand_zone_count(&s10_brand)) 1032 return (EBUSY); 1033 1034 kmem_free(s10_emulation_table, NSYSCALL); 1035 s10_emulation_table = NULL; 1036 1037 err = mod_remove(&modlinkage); 1038 if (err) 1039 cmn_err(CE_WARN, "Couldn't unload s10 brand module"); 1040 1041 return (err); 1042 } 1043