1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include <sys/errno.h> 28 #include <sys/exec.h> 29 #include <sys/file.h> 30 #include <sys/kmem.h> 31 #include <sys/modctl.h> 32 #include <sys/model.h> 33 #include <sys/proc.h> 34 #include <sys/syscall.h> 35 #include <sys/systm.h> 36 #include <sys/thread.h> 37 #include <sys/cmn_err.h> 38 #include <sys/archsystm.h> 39 #include <sys/pathname.h> 40 #include <sys/sunddi.h> 41 42 #include <sys/machbrand.h> 43 #include <sys/brand.h> 44 #include "s10_brand.h" 45 46 char *s10_emulation_table = NULL; 47 48 void s10_init_brand_data(zone_t *); 49 void s10_free_brand_data(zone_t *); 50 void s10_setbrand(proc_t *); 51 int s10_getattr(zone_t *, int, void *, size_t *); 52 int s10_setattr(zone_t *, int, void *, size_t); 53 int s10_brandsys(int, int64_t *, uintptr_t, uintptr_t, uintptr_t, 54 uintptr_t, uintptr_t, uintptr_t); 55 void s10_copy_procdata(proc_t *, proc_t *); 56 void s10_proc_exit(struct proc *, klwp_t *); 57 void s10_exec(); 58 int s10_initlwp(klwp_t *); 59 void s10_forklwp(klwp_t *, klwp_t *); 60 void s10_freelwp(klwp_t *); 61 void s10_lwpexit(klwp_t *); 62 int s10_elfexec(vnode_t *, execa_t *, uarg_t *, intpdata_t *, int, 63 long *, int, caddr_t, cred_t *, int); 64 65 /* s10 brand */ 66 struct brand_ops s10_brops = { 67 s10_init_brand_data, 68 s10_free_brand_data, 69 s10_brandsys, 70 s10_setbrand, 71 s10_getattr, 72 s10_setattr, 73 s10_copy_procdata, 74 s10_proc_exit, 75 s10_exec, 76 lwp_setrval, 77 s10_initlwp, 78 s10_forklwp, 79 s10_freelwp, 80 s10_lwpexit, 81 s10_elfexec, 82 S10_NSIG 83 }; 84 85 #ifdef sparc 86 87 struct brand_mach_ops s10_mops = { 88 s10_brand_syscall_callback, 89 s10_brand_syscall32_callback 90 }; 91 92 #else /* sparc */ 93 94 #ifdef __amd64 95 96 struct brand_mach_ops s10_mops = { 97 s10_brand_sysenter_callback, 98 NULL, 99 s10_brand_int91_callback, 100 s10_brand_syscall_callback, 101 s10_brand_syscall32_callback, 102 NULL 103 }; 104 105 #else /* ! __amd64 */ 106 107 struct brand_mach_ops s10_mops = { 108 s10_brand_sysenter_callback, 109 NULL, 110 NULL, 111 s10_brand_syscall_callback, 112 NULL, 113 NULL 114 }; 115 #endif /* __amd64 */ 116 117 #endif /* _sparc */ 118 119 struct brand s10_brand = { 120 BRAND_VER_1, 121 "solaris10", 122 &s10_brops, 123 &s10_mops 124 }; 125 126 static struct modlbrand modlbrand = { 127 &mod_brandops, /* type of module */ 128 "Solaris 10 Brand", /* description of module */ 129 &s10_brand /* driver ops */ 130 }; 131 132 static struct modlinkage modlinkage = { 133 MODREV_1, (void *)&modlbrand, NULL 134 }; 135 136 void 137 s10_setbrand(proc_t *p) 138 { 139 ASSERT(p->p_brand == &s10_brand); 140 ASSERT(p->p_brand_data == NULL); 141 142 /* 143 * We should only be called from exec(), when we know the process 144 * is single-threaded. 145 */ 146 ASSERT(p->p_tlist == p->p_tlist->t_forw); 147 148 p->p_brand_data = kmem_zalloc(sizeof (s10_proc_data_t), KM_SLEEP); 149 (void) s10_initlwp(p->p_tlist->t_lwp); 150 } 151 152 /*ARGSUSED*/ 153 int 154 s10_getattr(zone_t *zone, int attr, void *buf, size_t *bufsize) 155 { 156 ASSERT(zone->zone_brand == &s10_brand); 157 if (attr == S10_EMUL_BITMAP) { 158 if (buf == NULL || *bufsize != sizeof (s10_emul_bitmap_t)) 159 return (EINVAL); 160 if (copyout(((s10_zone_data_t *)zone->zone_brand_data)-> 161 emul_bitmap, buf, sizeof (s10_emul_bitmap_t)) != 0) 162 return (EFAULT); 163 return (0); 164 } 165 166 return (EINVAL); 167 } 168 169 int 170 s10_setattr(zone_t *zone, int attr, void *buf, size_t bufsize) 171 { 172 ASSERT(zone->zone_brand == &s10_brand); 173 if (attr == S10_EMUL_BITMAP) { 174 if (buf == NULL || bufsize != sizeof (s10_emul_bitmap_t)) 175 return (EINVAL); 176 if (copyin(buf, ((s10_zone_data_t *)zone->zone_brand_data)-> 177 emul_bitmap, sizeof (s10_emul_bitmap_t)) != 0) 178 return (EFAULT); 179 return (0); 180 } 181 182 return (EINVAL); 183 } 184 185 #ifdef __amd64 186 /* 187 * The Nevada kernel clears %fs for threads in 64-bit x86 processes but S10's 188 * libc expects %fs to be nonzero. This causes some committed 189 * libc/libthread interfaces (e.g., thr_main()) to fail, which impacts several 190 * libraries, including libdoor. This function sets the specified LWP's %fs 191 * register to the legacy S10 selector value (LWPFS_SEL). 192 * 193 * The best solution to the aforementioned problem is backporting CRs 194 * 6467491 to Solaris 10 so that 64-bit x86 Solaris 10 processes 195 * would accept zero for %fs. Backporting the CRs is a requirement for running 196 * S10 Containers in PV domUs because 64-bit Xen clears %fsbase when %fs is 197 * nonzero. Such behavior breaks 64-bit processes because Xen has to fetch the 198 * FS segments' base addresses from the LWPs' GDTs, which are only capable of 199 * 32-bit addressing. 200 */ 201 /*ARGSUSED*/ 202 static void 203 s10_amd64_correct_fsreg(klwp_t *l) 204 { 205 if (lwp_getdatamodel(l) == DATAMODEL_NATIVE) { 206 kpreempt_disable(); 207 l->lwp_pcb.pcb_fs = LWPFS_SEL; 208 l->lwp_pcb.pcb_rupdate = 1; 209 lwptot(l)->t_post_sys = 1; /* Guarantee update_sregs() */ 210 kpreempt_enable(); 211 } 212 } 213 #endif /* __amd64 */ 214 215 int 216 s10_native() 217 { 218 struct user *up = PTOU(curproc); 219 char *args_new, *comm_new, *p; 220 int len; 221 222 len = sizeof (S10_NATIVE_LINKER32 " ") - 1; 223 224 /* 225 * Make sure that the process' interpreter is the native dynamic linker. 226 * Convention dictates that native processes executing within solaris10- 227 * branded zones are interpreted by the native dynamic linker (the 228 * process and its arguments are specified as arguments to the dynamic 229 * linker). If this convention is violated (i.e., 230 * brandsys(B_S10_NATIVE, ...) is invoked by a process that shouldn't be 231 * native), then do nothing and silently indicate success. 232 */ 233 if (strcmp(up->u_comm, S10_LINKER_NAME) != 0) 234 return (0); 235 if (strncmp(up->u_psargs, S10_NATIVE_LINKER64 " /", len + 4) == 0) 236 len += 3; /* to account for "/64" in the path */ 237 else if (strncmp(up->u_psargs, S10_NATIVE_LINKER32 " /", len + 1) != 0) 238 return (0); 239 240 args_new = strdup(&up->u_psargs[len]); 241 if ((p = strchr(args_new, ' ')) != NULL) 242 *p = '\0'; 243 if ((comm_new = strrchr(args_new, '/')) != NULL) 244 comm_new = strdup(comm_new + 1); 245 else 246 comm_new = strdup(args_new); 247 if (p != NULL) 248 *p = ' '; 249 250 if ((strlen(args_new) != 0) && (strlen(comm_new) != 0)) { 251 mutex_enter(&curproc->p_lock); 252 (void) strlcpy(up->u_comm, comm_new, MAXCOMLEN+1); 253 (void) strlcpy(up->u_psargs, args_new, PSARGSZ); 254 mutex_exit(&curproc->p_lock); 255 } 256 257 strfree(args_new); 258 strfree(comm_new); 259 return (0); 260 } 261 262 /* 263 * Get the address of the user-space system call handler from the user 264 * process and attach it to the proc structure. 265 */ 266 /*ARGSUSED*/ 267 int 268 s10_brandsys(int cmd, int64_t *rval, uintptr_t arg1, uintptr_t arg2, 269 uintptr_t arg3, uintptr_t arg4, uintptr_t arg5, uintptr_t arg6) 270 { 271 s10_proc_data_t *spd; 272 s10_brand_reg_t reg; 273 proc_t *p = curproc; 274 int err; 275 276 *rval = 0; 277 278 /* 279 * B_EXEC_BRAND is redundant 280 * since the kernel assumes a native process doing an exec 281 * in a branded zone is going to run a branded processes. 282 * hence we don't support this operation. 283 */ 284 if (cmd == B_EXEC_BRAND) 285 return (ENOSYS); 286 287 if (cmd == B_S10_NATIVE) 288 return (s10_native()); 289 290 /* For all other operations this must be a branded process. */ 291 if (p->p_brand == &native_brand) 292 return (ENOSYS); 293 294 ASSERT(p->p_brand == &s10_brand); 295 ASSERT(p->p_brand_data != NULL); 296 297 spd = (s10_proc_data_t *)p->p_brand_data; 298 299 switch (cmd) { 300 case B_EXEC_NATIVE: 301 err = exec_common( 302 (char *)arg1, (const char **)arg2, (const char **)arg3, 303 EBA_NATIVE); 304 return (err); 305 306 case B_REGISTER: 307 if (p->p_model == DATAMODEL_NATIVE) { 308 if (copyin((void *)arg1, ®, sizeof (reg)) != 0) 309 return (EFAULT); 310 #if defined(_LP64) 311 } else { 312 s10_brand_reg32_t reg32; 313 314 if (copyin((void *)arg1, ®32, sizeof (reg32)) != 0) 315 return (EFAULT); 316 reg.sbr_version = reg32.sbr_version; 317 reg.sbr_handler = (caddr_t)(uintptr_t)reg32.sbr_handler; 318 #endif /* _LP64 */ 319 } 320 321 if (reg.sbr_version != S10_VERSION) 322 return (ENOTSUP); 323 spd->spd_handler = reg.sbr_handler; 324 return (0); 325 326 case B_ELFDATA: 327 if (p->p_model == DATAMODEL_NATIVE) { 328 if (copyout(&spd->spd_elf_data, (void *)arg1, 329 sizeof (s10_elf_data_t)) != 0) 330 return (EFAULT); 331 #if defined(_LP64) 332 } else { 333 s10_elf_data32_t sed32; 334 335 sed32.sed_phdr = spd->spd_elf_data.sed_phdr; 336 sed32.sed_phent = spd->spd_elf_data.sed_phent; 337 sed32.sed_phnum = spd->spd_elf_data.sed_phnum; 338 sed32.sed_entry = spd->spd_elf_data.sed_entry; 339 sed32.sed_base = spd->spd_elf_data.sed_base; 340 sed32.sed_ldentry = spd->spd_elf_data.sed_ldentry; 341 sed32.sed_lddata = spd->spd_elf_data.sed_lddata; 342 if (copyout(&sed32, (void *)arg1, sizeof (sed32)) != 0) 343 return (EFAULT); 344 #endif /* _LP64 */ 345 } 346 return (0); 347 348 case B_S10_PIDINFO: 349 /* 350 * The s10 brand needs to be able to get the pid of the 351 * current process and the pid of the zone's init, and it 352 * needs to do this on every process startup. Early in 353 * brand startup, we can't call getpid() because calls to 354 * getpid() represent a magical signal to some old-skool 355 * debuggers. By merging all of this into one call, we 356 * make this quite a bit cheaper and easier to handle in 357 * the brand module. 358 */ 359 if (copyout(&p->p_pid, (void *)arg1, sizeof (pid_t)) != 0) 360 return (EFAULT); 361 if (copyout(&p->p_zone->zone_proc_initpid, (void *)arg2, 362 sizeof (pid_t)) != 0) 363 return (EFAULT); 364 return (0); 365 366 case B_S10_TRUSS_POINT: 367 /* 368 * This subcommand exists so that we can see truss output 369 * from interposed system calls that return without first 370 * calling any other system call, meaning they would be 371 * invisible to truss(1). 372 * 373 * If the second argument is set non-zero, set errno to that 374 * value as well. 375 * 376 * Arguments are: 377 * 378 * arg1: syscall number 379 * arg2: errno 380 */ 381 return ((arg2 == 0) ? 0 : set_errno((uint_t)arg2)); 382 383 case B_S10_ISFDXATTRDIR: { 384 /* 385 * This subcommand enables the userland brand emulation library 386 * to determine whether a file descriptor refers to an extended 387 * file attributes directory. There is no standard syscall or 388 * libc function that can make such a determination. 389 */ 390 file_t *dir_filep; 391 392 dir_filep = getf((int)arg1); 393 if (dir_filep == NULL) 394 return (EBADF); 395 ASSERT(dir_filep->f_vnode != NULL); 396 *rval = IS_XATTRDIR(dir_filep->f_vnode); 397 releasef((int)arg1); 398 return (0); 399 } 400 401 #ifdef __amd64 402 case B_S10_FSREGCORRECTION: 403 /* 404 * This subcommand exists so that the SYS_lwp_private and 405 * SYS_lwp_create syscalls can manually set the current thread's 406 * %fs register to the legacy S10 selector value for 64-bit x86 407 * processes. 408 */ 409 s10_amd64_correct_fsreg(ttolwp(curthread)); 410 return (0); 411 #endif /* __amd64 */ 412 } 413 414 return (EINVAL); 415 } 416 417 /* 418 * Copy the per-process brand data from a parent proc to a child. 419 */ 420 void 421 s10_copy_procdata(proc_t *child, proc_t *parent) 422 { 423 s10_proc_data_t *spd; 424 425 ASSERT(parent->p_brand == &s10_brand); 426 ASSERT(child->p_brand == &s10_brand); 427 ASSERT(parent->p_brand_data != NULL); 428 ASSERT(child->p_brand_data == NULL); 429 430 /* Just duplicate all the proc data of the parent for the child */ 431 spd = kmem_alloc(sizeof (s10_proc_data_t), KM_SLEEP); 432 bcopy(parent->p_brand_data, spd, sizeof (s10_proc_data_t)); 433 child->p_brand_data = spd; 434 } 435 436 /*ARGSUSED*/ 437 void 438 s10_proc_exit(struct proc *p, klwp_t *l) 439 { 440 ASSERT(p->p_brand == &s10_brand); 441 ASSERT(p->p_brand_data != NULL); 442 443 /* 444 * We should only be called from proc_exit(), when we know that 445 * process is single-threaded. 446 */ 447 ASSERT(p->p_tlist == p->p_tlist->t_forw); 448 449 /* upon exit, free our lwp brand data */ 450 (void) s10_freelwp(ttolwp(curthread)); 451 452 /* upon exit, free our proc brand data */ 453 kmem_free(p->p_brand_data, sizeof (s10_proc_data_t)); 454 p->p_brand_data = NULL; 455 } 456 457 void 458 s10_exec() 459 { 460 s10_proc_data_t *spd = curproc->p_brand_data; 461 462 ASSERT(curproc->p_brand == &s10_brand); 463 ASSERT(curproc->p_brand_data != NULL); 464 ASSERT(ttolwp(curthread)->lwp_brand != NULL); 465 466 /* 467 * We should only be called from exec(), when we know the process 468 * is single-threaded. 469 */ 470 ASSERT(curproc->p_tlist == curproc->p_tlist->t_forw); 471 472 /* Upon exec, reset our lwp brand data. */ 473 (void) s10_freelwp(ttolwp(curthread)); 474 (void) s10_initlwp(ttolwp(curthread)); 475 476 /* 477 * Upon exec, reset all the proc brand data, except for the elf 478 * data associated with the executable we are exec'ing. 479 */ 480 spd->spd_handler = NULL; 481 } 482 483 /*ARGSUSED*/ 484 int 485 s10_initlwp(klwp_t *l) 486 { 487 ASSERT(l->lwp_procp->p_brand == &s10_brand); 488 ASSERT(l->lwp_procp->p_brand_data != NULL); 489 ASSERT(l->lwp_brand == NULL); 490 l->lwp_brand = (void *)-1; 491 return (0); 492 } 493 494 /*ARGSUSED*/ 495 void 496 s10_forklwp(klwp_t *p, klwp_t *c) 497 { 498 ASSERT(p->lwp_procp->p_brand == &s10_brand); 499 ASSERT(c->lwp_procp->p_brand == &s10_brand); 500 501 ASSERT(p->lwp_procp->p_brand_data != NULL); 502 ASSERT(c->lwp_procp->p_brand_data != NULL); 503 504 /* Both LWPs have already had been initialized via s10_initlwp() */ 505 ASSERT(p->lwp_brand != NULL); 506 ASSERT(c->lwp_brand != NULL); 507 508 #ifdef __amd64 509 /* 510 * Only correct the child's %fs register if the parent's %fs register 511 * is LWPFS_SEL. If the parent's %fs register is zero, then the Solaris 512 * 10 environment that we're emulating uses a version of libc that 513 * works when %fs is zero (i.e., it contains backports of CRs 6467491 514 * and 6501650). 515 */ 516 if (p->lwp_pcb.pcb_fs == LWPFS_SEL) 517 s10_amd64_correct_fsreg(c); 518 #endif /* __amd64 */ 519 } 520 521 /*ARGSUSED*/ 522 void 523 s10_freelwp(klwp_t *l) 524 { 525 ASSERT(l->lwp_procp->p_brand == &s10_brand); 526 ASSERT(l->lwp_procp->p_brand_data != NULL); 527 ASSERT(l->lwp_brand != NULL); 528 l->lwp_brand = NULL; 529 } 530 531 /*ARGSUSED*/ 532 void 533 s10_lwpexit(klwp_t *l) 534 { 535 ASSERT(l->lwp_procp->p_brand == &s10_brand); 536 ASSERT(l->lwp_procp->p_brand_data != NULL); 537 ASSERT(l->lwp_brand != NULL); 538 539 /* 540 * We should never be called for the last thread in a process. 541 * (That case is handled by s10_proc_exit().) There for this lwp 542 * must be exiting from a multi-threaded process. 543 */ 544 ASSERT(l->lwp_procp->p_tlist != l->lwp_procp->p_tlist->t_forw); 545 546 l->lwp_brand = NULL; 547 } 548 549 void 550 s10_free_brand_data(zone_t *zone) 551 { 552 kmem_free(zone->zone_brand_data, sizeof (s10_zone_data_t)); 553 } 554 555 void 556 s10_init_brand_data(zone_t *zone) 557 { 558 ASSERT(zone->zone_brand == &s10_brand); 559 ASSERT(zone->zone_brand_data == NULL); 560 zone->zone_brand_data = kmem_zalloc(sizeof (s10_zone_data_t), KM_SLEEP); 561 } 562 563 #if defined(_LP64) 564 static void 565 Ehdr32to64(Elf32_Ehdr *src, Ehdr *dst) 566 { 567 bcopy(src->e_ident, dst->e_ident, sizeof (src->e_ident)); 568 dst->e_type = src->e_type; 569 dst->e_machine = src->e_machine; 570 dst->e_version = src->e_version; 571 dst->e_entry = src->e_entry; 572 dst->e_phoff = src->e_phoff; 573 dst->e_shoff = src->e_shoff; 574 dst->e_flags = src->e_flags; 575 dst->e_ehsize = src->e_ehsize; 576 dst->e_phentsize = src->e_phentsize; 577 dst->e_phnum = src->e_phnum; 578 dst->e_shentsize = src->e_shentsize; 579 dst->e_shnum = src->e_shnum; 580 dst->e_shstrndx = src->e_shstrndx; 581 } 582 #endif /* _LP64 */ 583 584 int 585 s10_elfexec(vnode_t *vp, execa_t *uap, uarg_t *args, intpdata_t *idatap, 586 int level, long *execsz, int setid, caddr_t exec_file, cred_t *cred, 587 int brand_action) 588 { 589 vnode_t *nvp; 590 Ehdr ehdr; 591 Addr uphdr_vaddr; 592 intptr_t voffset; 593 int interp; 594 int i, err; 595 struct execenv env; 596 struct user *up = PTOU(curproc); 597 s10_proc_data_t *spd; 598 s10_elf_data_t sed, *sedp; 599 char *linker; 600 uintptr_t lddata; /* lddata of executable's linker */ 601 602 ASSERT(curproc->p_brand == &s10_brand); 603 ASSERT(curproc->p_brand_data != NULL); 604 605 spd = (s10_proc_data_t *)curproc->p_brand_data; 606 sedp = &spd->spd_elf_data; 607 608 args->brandname = S10_BRANDNAME; 609 610 /* 611 * We will exec the brand library and then map in the target 612 * application and (optionally) the brand's default linker. 613 */ 614 if (args->to_model == DATAMODEL_NATIVE) { 615 args->emulator = S10_LIB; 616 linker = S10_LINKER; 617 #if defined(_LP64) 618 } else { 619 args->emulator = S10_LIB32; 620 linker = S10_LINKER32; 621 #endif /* _LP64 */ 622 } 623 624 if ((err = lookupname(args->emulator, UIO_SYSSPACE, FOLLOW, NULLVPP, 625 &nvp)) != 0) { 626 uprintf("%s: not found.", args->emulator); 627 return (err); 628 } 629 630 if (args->to_model == DATAMODEL_NATIVE) { 631 err = elfexec(nvp, uap, args, idatap, level + 1, execsz, 632 setid, exec_file, cred, brand_action); 633 #if defined(_LP64) 634 } else { 635 err = elf32exec(nvp, uap, args, idatap, level + 1, execsz, 636 setid, exec_file, cred, brand_action); 637 #endif /* _LP64 */ 638 } 639 VN_RELE(nvp); 640 if (err != 0) 641 return (err); 642 643 /* 644 * The u_auxv vectors are set up by elfexec to point to the brand 645 * emulation library and linker. Save these so they can be copied to 646 * the specific brand aux vectors. 647 */ 648 bzero(&sed, sizeof (sed)); 649 for (i = 0; i < __KERN_NAUXV_IMPL; i++) { 650 switch (up->u_auxv[i].a_type) { 651 case AT_SUN_LDDATA: 652 sed.sed_lddata = up->u_auxv[i].a_un.a_val; 653 break; 654 case AT_BASE: 655 sed.sed_base = up->u_auxv[i].a_un.a_val; 656 break; 657 case AT_ENTRY: 658 sed.sed_entry = up->u_auxv[i].a_un.a_val; 659 break; 660 case AT_PHDR: 661 sed.sed_phdr = up->u_auxv[i].a_un.a_val; 662 break; 663 case AT_PHENT: 664 sed.sed_phent = up->u_auxv[i].a_un.a_val; 665 break; 666 case AT_PHNUM: 667 sed.sed_phnum = up->u_auxv[i].a_un.a_val; 668 break; 669 default: 670 break; 671 } 672 } 673 /* Make sure the emulator has an entry point */ 674 ASSERT(sed.sed_entry != NULL); 675 ASSERT(sed.sed_phdr != NULL); 676 677 bzero(&env, sizeof (env)); 678 if (args->to_model == DATAMODEL_NATIVE) { 679 err = mapexec_brand(vp, args, &ehdr, &uphdr_vaddr, &voffset, 680 exec_file, &interp, &env.ex_bssbase, &env.ex_brkbase, 681 &env.ex_brksize, NULL); 682 #if defined(_LP64) 683 } else { 684 Elf32_Ehdr ehdr32; 685 Elf32_Addr uphdr_vaddr32; 686 err = mapexec32_brand(vp, args, &ehdr32, &uphdr_vaddr32, 687 &voffset, exec_file, &interp, &env.ex_bssbase, 688 &env.ex_brkbase, &env.ex_brksize, NULL); 689 Ehdr32to64(&ehdr32, &ehdr); 690 if (uphdr_vaddr32 == (Elf32_Addr)-1) 691 uphdr_vaddr = (Addr)-1; 692 else 693 uphdr_vaddr = uphdr_vaddr32; 694 #endif /* _LP64 */ 695 } 696 if (err != 0) 697 return (err); 698 699 /* 700 * Save off the important properties of the executable. The brand 701 * library will ask us for this data later, when it is initializing 702 * and getting ready to transfer control to the brand application. 703 */ 704 if (uphdr_vaddr == (Addr)-1) 705 sedp->sed_phdr = voffset + ehdr.e_phoff; 706 else 707 sedp->sed_phdr = voffset + uphdr_vaddr; 708 sedp->sed_entry = voffset + ehdr.e_entry; 709 sedp->sed_phent = ehdr.e_phentsize; 710 sedp->sed_phnum = ehdr.e_phnum; 711 712 if (interp) { 713 if (ehdr.e_type == ET_DYN) { 714 /* 715 * This is a shared object executable, so we need to 716 * pick a reasonable place to put the heap. Just don't 717 * use the first page. 718 */ 719 env.ex_brkbase = (caddr_t)PAGESIZE; 720 env.ex_bssbase = (caddr_t)PAGESIZE; 721 } 722 723 /* 724 * If the program needs an interpreter (most do), map it in and 725 * store relevant information about it in the aux vector, where 726 * the brand library can find it. 727 */ 728 if ((err = lookupname(linker, UIO_SYSSPACE, 729 FOLLOW, NULLVPP, &nvp)) != 0) { 730 uprintf("%s: not found.", S10_LINKER); 731 return (err); 732 } 733 if (args->to_model == DATAMODEL_NATIVE) { 734 err = mapexec_brand(nvp, args, &ehdr, 735 &uphdr_vaddr, &voffset, exec_file, &interp, 736 NULL, NULL, NULL, &lddata); 737 #if defined(_LP64) 738 } else { 739 Elf32_Ehdr ehdr32; 740 Elf32_Addr uphdr_vaddr32; 741 err = mapexec32_brand(nvp, args, &ehdr32, 742 &uphdr_vaddr32, &voffset, exec_file, &interp, 743 NULL, NULL, NULL, &lddata); 744 Ehdr32to64(&ehdr32, &ehdr); 745 if (uphdr_vaddr32 == (Elf32_Addr)-1) 746 uphdr_vaddr = (Addr)-1; 747 else 748 uphdr_vaddr = uphdr_vaddr32; 749 #endif /* _LP64 */ 750 } 751 VN_RELE(nvp); 752 if (err != 0) 753 return (err); 754 755 /* 756 * Now that we know the base address of the brand's linker, 757 * place it in the aux vector. 758 */ 759 sedp->sed_base = voffset; 760 sedp->sed_ldentry = voffset + ehdr.e_entry; 761 sedp->sed_lddata = voffset + lddata; 762 } else { 763 /* 764 * This program has no interpreter. The brand library will 765 * jump to the address in the AT_SUN_BRAND_LDENTRY aux vector, 766 * so in this case, put the entry point of the main executable 767 * there. 768 */ 769 if (ehdr.e_type == ET_EXEC) { 770 /* 771 * An executable with no interpreter, this must be a 772 * statically linked executable, which means we loaded 773 * it at the address specified in the elf header, in 774 * which case the e_entry field of the elf header is an 775 * absolute address. 776 */ 777 sedp->sed_ldentry = ehdr.e_entry; 778 sedp->sed_entry = ehdr.e_entry; 779 sedp->sed_lddata = NULL; 780 sedp->sed_base = NULL; 781 } else { 782 /* 783 * A shared object with no interpreter, we use the 784 * calculated address from above. 785 */ 786 sedp->sed_ldentry = sedp->sed_entry; 787 sedp->sed_entry = NULL; 788 sedp->sed_phdr = NULL; 789 sedp->sed_phent = NULL; 790 sedp->sed_phnum = NULL; 791 sedp->sed_lddata = NULL; 792 sedp->sed_base = voffset; 793 794 if (ehdr.e_type == ET_DYN) { 795 /* 796 * Delay setting the brkbase until the first 797 * call to brk(); see elfexec() for details. 798 */ 799 env.ex_bssbase = (caddr_t)0; 800 env.ex_brkbase = (caddr_t)0; 801 env.ex_brksize = 0; 802 } 803 } 804 } 805 806 env.ex_magic = elfmagic; 807 env.ex_vp = vp; 808 setexecenv(&env); 809 810 /* 811 * It's time to manipulate the process aux vectors. First 812 * we need to update the AT_SUN_AUXFLAGS aux vector to set 813 * the AF_SUN_NOPLM flag. 814 */ 815 if (args->to_model == DATAMODEL_NATIVE) { 816 auxv_t auxflags_auxv; 817 818 if (copyin(args->auxp_auxflags, &auxflags_auxv, 819 sizeof (auxflags_auxv)) != 0) 820 return (EFAULT); 821 822 ASSERT(auxflags_auxv.a_type == AT_SUN_AUXFLAGS); 823 auxflags_auxv.a_un.a_val |= AF_SUN_NOPLM; 824 if (copyout(&auxflags_auxv, args->auxp_auxflags, 825 sizeof (auxflags_auxv)) != 0) 826 return (EFAULT); 827 #if defined(_LP64) 828 } else { 829 auxv32_t auxflags_auxv32; 830 831 if (copyin(args->auxp_auxflags, &auxflags_auxv32, 832 sizeof (auxflags_auxv32)) != 0) 833 return (EFAULT); 834 835 ASSERT(auxflags_auxv32.a_type == AT_SUN_AUXFLAGS); 836 auxflags_auxv32.a_un.a_val |= AF_SUN_NOPLM; 837 if (copyout(&auxflags_auxv32, args->auxp_auxflags, 838 sizeof (auxflags_auxv32)) != 0) 839 return (EFAULT); 840 #endif /* _LP64 */ 841 } 842 843 /* Second, copy out the brand specific aux vectors. */ 844 if (args->to_model == DATAMODEL_NATIVE) { 845 auxv_t s10_auxv[] = { 846 { AT_SUN_BRAND_AUX1, 0 }, 847 { AT_SUN_BRAND_AUX2, 0 }, 848 { AT_SUN_BRAND_AUX3, 0 } 849 }; 850 851 ASSERT(s10_auxv[0].a_type == AT_SUN_BRAND_S10_LDDATA); 852 s10_auxv[0].a_un.a_val = sed.sed_lddata; 853 854 if (copyout(&s10_auxv, args->auxp_brand, 855 sizeof (s10_auxv)) != 0) 856 return (EFAULT); 857 #if defined(_LP64) 858 } else { 859 auxv32_t s10_auxv32[] = { 860 { AT_SUN_BRAND_AUX1, 0 }, 861 { AT_SUN_BRAND_AUX2, 0 }, 862 { AT_SUN_BRAND_AUX3, 0 } 863 }; 864 865 ASSERT(s10_auxv32[0].a_type == AT_SUN_BRAND_S10_LDDATA); 866 s10_auxv32[0].a_un.a_val = (uint32_t)sed.sed_lddata; 867 if (copyout(&s10_auxv32, args->auxp_brand, 868 sizeof (s10_auxv32)) != 0) 869 return (EFAULT); 870 #endif /* _LP64 */ 871 } 872 873 /* 874 * Third, the the /proc aux vectors set up by elfexec() point to brand 875 * emulation library and it's linker. Copy these to the /proc brand 876 * specific aux vector, and update the regular /proc aux vectors to 877 * point to the executable (and it's linker). This will enable 878 * debuggers to access the executable via the usual /proc or elf notes 879 * aux vectors. 880 * 881 * The brand emulation library's linker will get it's aux vectors off 882 * the stack, and then update the stack with the executable's aux 883 * vectors before jumping to the executable's linker. 884 * 885 * Debugging the brand emulation library must be done from 886 * the global zone, where the librtld_db module knows how to fetch the 887 * brand specific aux vectors to access the brand emulation libraries 888 * linker. 889 */ 890 for (i = 0; i < __KERN_NAUXV_IMPL; i++) { 891 ulong_t val; 892 893 switch (up->u_auxv[i].a_type) { 894 case AT_SUN_BRAND_S10_LDDATA: 895 up->u_auxv[i].a_un.a_val = sed.sed_lddata; 896 continue; 897 case AT_BASE: 898 val = sedp->sed_base; 899 break; 900 case AT_ENTRY: 901 val = sedp->sed_entry; 902 break; 903 case AT_PHDR: 904 val = sedp->sed_phdr; 905 break; 906 case AT_PHENT: 907 val = sedp->sed_phent; 908 break; 909 case AT_PHNUM: 910 val = sedp->sed_phnum; 911 break; 912 case AT_SUN_LDDATA: 913 val = sedp->sed_lddata; 914 break; 915 default: 916 continue; 917 } 918 919 up->u_auxv[i].a_un.a_val = val; 920 if (val == NULL) { 921 /* Hide the entry for static binaries */ 922 up->u_auxv[i].a_type = AT_IGNORE; 923 } 924 } 925 926 /* 927 * The last thing we do here is clear spd->spd_handler. This is 928 * important because if we're already a branded process and if this 929 * exec succeeds, there is a window between when the exec() first 930 * returns to the userland of the new process and when our brand 931 * library get's initialized, during which we don't want system 932 * calls to be re-directed to our brand library since it hasn't 933 * been initialized yet. 934 */ 935 spd->spd_handler = NULL; 936 937 return (0); 938 } 939 940 941 int 942 _init(void) 943 { 944 int err; 945 946 /* 947 * Set up the table indicating which system calls we want to 948 * interpose on. We should probably build this automatically from 949 * a list of system calls that is shared with the user-space 950 * library. 951 */ 952 s10_emulation_table = kmem_zalloc(NSYSCALL, KM_SLEEP); 953 s10_emulation_table[S10_SYS_forkall] = 1; /* 2 */ 954 s10_emulation_table[S10_SYS_open] = 1; /* 5 */ 955 s10_emulation_table[S10_SYS_wait] = 1; /* 7 */ 956 s10_emulation_table[S10_SYS_creat] = 1; /* 8 */ 957 s10_emulation_table[S10_SYS_unlink] = 1; /* 10 */ 958 s10_emulation_table[S10_SYS_exec] = 1; /* 11 */ 959 s10_emulation_table[S10_SYS_chown] = 1; /* 16 */ 960 s10_emulation_table[S10_SYS_stat] = 1; /* 18 */ 961 s10_emulation_table[S10_SYS_umount] = 1; /* 22 */ 962 s10_emulation_table[S10_SYS_fstat] = 1; /* 28 */ 963 s10_emulation_table[S10_SYS_utime] = 1; /* 30 */ 964 s10_emulation_table[S10_SYS_access] = 1; /* 33 */ 965 s10_emulation_table[SYS_kill] = 1; /* 37 */ 966 s10_emulation_table[S10_SYS_dup] = 1; /* 41 */ 967 s10_emulation_table[SYS_ioctl] = 1; /* 54 */ 968 s10_emulation_table[SYS_execve] = 1; /* 59 */ 969 s10_emulation_table[SYS_acctctl] = 1; /* 71 */ 970 s10_emulation_table[S10_SYS_issetugid] = 1; /* 75 */ 971 s10_emulation_table[S10_SYS_fsat] = 1; /* 76 */ 972 s10_emulation_table[S10_SYS_rmdir] = 1; /* 79 */ 973 s10_emulation_table[SYS_getdents] = 1; /* 81 */ 974 s10_emulation_table[S10_SYS_poll] = 1; /* 87 */ 975 s10_emulation_table[S10_SYS_lstat] = 1; /* 88 */ 976 s10_emulation_table[S10_SYS_fchown] = 1; /* 94 */ 977 s10_emulation_table[SYS_sigprocmask] = 1; /* 95 */ 978 s10_emulation_table[SYS_sigsuspend] = 1; /* 96 */ 979 s10_emulation_table[SYS_sigaction] = 1; /* 98 */ 980 s10_emulation_table[SYS_sigpending] = 1; /* 99 */ 981 s10_emulation_table[SYS_context] = 1; /* 100 */ 982 s10_emulation_table[SYS_waitid] = 1; /* 107 */ 983 s10_emulation_table[SYS_sigsendsys] = 1; /* 108 */ 984 #if defined(__x86) 985 s10_emulation_table[S10_SYS_xstat] = 1; /* 123 */ 986 s10_emulation_table[S10_SYS_lxstat] = 1; /* 124 */ 987 s10_emulation_table[S10_SYS_fxstat] = 1; /* 125 */ 988 s10_emulation_table[S10_SYS_xmknod] = 1; /* 126 */ 989 #endif 990 s10_emulation_table[S10_SYS_lchown] = 1; /* 130 */ 991 s10_emulation_table[S10_SYS_rename] = 1; /* 134 */ 992 s10_emulation_table[SYS_uname] = 1; /* 135 */ 993 s10_emulation_table[SYS_sysconfig] = 1; /* 137 */ 994 s10_emulation_table[SYS_systeminfo] = 1; /* 139 */ 995 s10_emulation_table[S10_SYS_fork1] = 1; /* 143 */ 996 s10_emulation_table[SYS_sigtimedwait] = 1; /* 144 */ 997 s10_emulation_table[S10_SYS_lwp_sema_wait] = 1; /* 147 */ 998 s10_emulation_table[S10_SYS_utimes] = 1; /* 154 */ 999 s10_emulation_table[SYS_lwp_create] = 1; /* 159 */ 1000 s10_emulation_table[SYS_lwp_kill] = 1; /* 163 */ 1001 s10_emulation_table[SYS_lwp_sigmask] = 1; /* 165 */ 1002 #if defined(__amd64) 1003 s10_emulation_table[SYS_lwp_private] = 1; /* 166 */ 1004 #endif /* __amd64 */ 1005 s10_emulation_table[S10_SYS_lwp_mutex_lock] = 1; /* 169 */ 1006 s10_emulation_table[SYS_pwrite] = 1; /* 174 */ 1007 s10_emulation_table[SYS_auditsys] = 1; /* 186 */ 1008 s10_emulation_table[SYS_sigqueue] = 1; /* 190 */ 1009 s10_emulation_table[SYS_signotify] = 1; /* 205 */ 1010 s10_emulation_table[SYS_lwp_mutex_timedlock] = 1; /* 210 */ 1011 s10_emulation_table[SYS_getdents64] = 1; /* 213 */ 1012 s10_emulation_table[S10_SYS_stat64] = 1; /* 215 */ 1013 s10_emulation_table[S10_SYS_lstat64] = 1; /* 216 */ 1014 s10_emulation_table[S10_SYS_fstat64] = 1; /* 217 */ 1015 s10_emulation_table[SYS_pwrite64] = 1; /* 223 */ 1016 s10_emulation_table[S10_SYS_creat64] = 1; /* 224 */ 1017 s10_emulation_table[S10_SYS_open64] = 1; /* 225 */ 1018 s10_emulation_table[SYS_zone] = 1; /* 227 */ 1019 s10_emulation_table[SYS_lwp_mutex_trylock] = 1; /* 251 */ 1020 1021 err = mod_install(&modlinkage); 1022 if (err) { 1023 cmn_err(CE_WARN, "Couldn't install brand module"); 1024 kmem_free(s10_emulation_table, NSYSCALL); 1025 } 1026 1027 return (err); 1028 } 1029 1030 int 1031 _info(struct modinfo *modinfop) 1032 { 1033 return (mod_info(&modlinkage, modinfop)); 1034 } 1035 1036 int 1037 _fini(void) 1038 { 1039 int err; 1040 1041 /* 1042 * If there are any zones using this brand, we can't allow it to be 1043 * unloaded. 1044 */ 1045 if (brand_zone_count(&s10_brand)) 1046 return (EBUSY); 1047 1048 kmem_free(s10_emulation_table, NSYSCALL); 1049 s10_emulation_table = NULL; 1050 1051 err = mod_remove(&modlinkage); 1052 if (err) 1053 cmn_err(CE_WARN, "Couldn't unload s10 brand module"); 1054 1055 return (err); 1056 } 1057