19acbbeafSnn /* 29acbbeafSnn * CDDL HEADER START 39acbbeafSnn * 49acbbeafSnn * The contents of this file are subject to the terms of the 59acbbeafSnn * Common Development and Distribution License (the "License"). 69acbbeafSnn * You may not use this file except in compliance with the License. 79acbbeafSnn * 89acbbeafSnn * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 99acbbeafSnn * or http://www.opensolaris.org/os/licensing. 109acbbeafSnn * See the License for the specific language governing permissions 119acbbeafSnn * and limitations under the License. 129acbbeafSnn * 139acbbeafSnn * When distributing Covered Code, include this CDDL HEADER in each 149acbbeafSnn * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 159acbbeafSnn * If applicable, add the following below this CDDL HEADER, with the 169acbbeafSnn * fields enclosed by brackets "[]" replaced with your own identifying 179acbbeafSnn * information: Portions Copyright [yyyy] [name of copyright owner] 189acbbeafSnn * 199acbbeafSnn * CDDL HEADER END 209acbbeafSnn */ 219acbbeafSnn /* 22*fd9e7635Sedp * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 239acbbeafSnn * Use is subject to license terms. 249acbbeafSnn */ 259acbbeafSnn 269acbbeafSnn #pragma ident "%Z%%M% %I% %E% SMI" 279acbbeafSnn 289acbbeafSnn #include <sys/kmem.h> 299acbbeafSnn #include <sys/errno.h> 309acbbeafSnn #include <sys/systm.h> 319acbbeafSnn #include <sys/cmn_err.h> 329acbbeafSnn #include <sys/brand.h> 339acbbeafSnn #include <sys/machbrand.h> 349acbbeafSnn #include <sys/modctl.h> 359acbbeafSnn #include <sys/rwlock.h> 369acbbeafSnn #include <sys/zone.h> 379acbbeafSnn 389acbbeafSnn #define SUPPORTED_BRAND_VERSION BRAND_VER_1 399acbbeafSnn 409acbbeafSnn #if defined(__sparcv9) 41725deb8fSedp /* sparcv9 uses system wide brand interposition hooks */ 42725deb8fSedp static void brand_plat_interposition_enable(void); 43725deb8fSedp static void brand_plat_interposition_disable(void); 44725deb8fSedp 459acbbeafSnn struct brand_mach_ops native_mach_ops = { 469acbbeafSnn NULL, NULL 479acbbeafSnn }; 4859f2ff5cSedp #else /* !__sparcv9 */ 499acbbeafSnn struct brand_mach_ops native_mach_ops = { 509acbbeafSnn NULL, NULL, NULL, NULL, NULL, NULL 519acbbeafSnn }; 5259f2ff5cSedp #endif /* !__sparcv9 */ 539acbbeafSnn 549acbbeafSnn brand_t native_brand = { 559acbbeafSnn BRAND_VER_1, 569acbbeafSnn "native", 579acbbeafSnn NULL, 589acbbeafSnn &native_mach_ops 599acbbeafSnn }; 609acbbeafSnn 619acbbeafSnn /* 629acbbeafSnn * Used to maintain a list of all the brands currently loaded into the 639acbbeafSnn * kernel. 649acbbeafSnn */ 659acbbeafSnn struct brand_list { 669acbbeafSnn int bl_refcnt; 679acbbeafSnn struct brand_list *bl_next; 689acbbeafSnn brand_t *bl_brand; 699acbbeafSnn }; 709acbbeafSnn 719acbbeafSnn static struct brand_list *brand_list = NULL; 729acbbeafSnn 739acbbeafSnn /* 749acbbeafSnn * This lock protects the integrity of the brand list. 759acbbeafSnn */ 769acbbeafSnn static kmutex_t brand_list_lock; 779acbbeafSnn 789acbbeafSnn void 799acbbeafSnn brand_init() 809acbbeafSnn { 819acbbeafSnn mutex_init(&brand_list_lock, NULL, MUTEX_DEFAULT, NULL); 829acbbeafSnn p0.p_brand = &native_brand; 839acbbeafSnn } 849acbbeafSnn 859acbbeafSnn int 869acbbeafSnn brand_register(brand_t *brand) 879acbbeafSnn { 889acbbeafSnn struct brand_list *list, *scan; 899acbbeafSnn 909acbbeafSnn if (brand == NULL) 919acbbeafSnn return (EINVAL); 929acbbeafSnn 939acbbeafSnn if (is_system_labeled()) { 949acbbeafSnn cmn_err(CE_WARN, 959acbbeafSnn "Branded zones are not allowed on labeled systems."); 969acbbeafSnn return (EINVAL); 979acbbeafSnn } 989acbbeafSnn 999acbbeafSnn if (brand->b_version != SUPPORTED_BRAND_VERSION) { 1009acbbeafSnn if (brand->b_version < SUPPORTED_BRAND_VERSION) { 1019acbbeafSnn cmn_err(CE_WARN, 1029acbbeafSnn "brand '%s' was built to run on older versions " 1039acbbeafSnn "of Solaris.", 1049acbbeafSnn brand->b_name); 1059acbbeafSnn } else { 1069acbbeafSnn cmn_err(CE_WARN, 1079acbbeafSnn "brand '%s' was built to run on a newer version " 1089acbbeafSnn "of Solaris.", 1099acbbeafSnn brand->b_name); 1109acbbeafSnn } 1119acbbeafSnn return (EINVAL); 1129acbbeafSnn } 1139acbbeafSnn 1149acbbeafSnn /* Sanity checks */ 1159acbbeafSnn if (brand->b_name == NULL || brand->b_ops == NULL || 1169acbbeafSnn brand->b_ops->b_brandsys == NULL) { 1179acbbeafSnn cmn_err(CE_WARN, "Malformed brand"); 1189acbbeafSnn return (EINVAL); 1199acbbeafSnn } 1209acbbeafSnn 1219acbbeafSnn list = kmem_alloc(sizeof (struct brand_list), KM_SLEEP); 1229acbbeafSnn 1239acbbeafSnn /* Add the brand to the list of loaded brands. */ 1249acbbeafSnn mutex_enter(&brand_list_lock); 1259acbbeafSnn 1269acbbeafSnn /* 1279acbbeafSnn * Check to be sure we haven't already registered this brand. 1289acbbeafSnn */ 1299acbbeafSnn for (scan = brand_list; scan != NULL; scan = scan->bl_next) { 1309acbbeafSnn if (strcmp(brand->b_name, scan->bl_brand->b_name) == 0) { 1319acbbeafSnn cmn_err(CE_WARN, 1329acbbeafSnn "Invalid attempt to load a second instance of " 1339acbbeafSnn "brand %s", brand->b_name); 1349acbbeafSnn mutex_exit(&brand_list_lock); 1359acbbeafSnn kmem_free(list, sizeof (struct brand_list)); 1369acbbeafSnn return (EINVAL); 1379acbbeafSnn } 1389acbbeafSnn } 1399acbbeafSnn 140725deb8fSedp #if defined(__sparcv9) 141725deb8fSedp /* sparcv9 uses system wide brand interposition hooks */ 142725deb8fSedp if (brand_list == NULL) 143725deb8fSedp brand_plat_interposition_enable(); 144725deb8fSedp #endif /* __sparcv9 */ 145725deb8fSedp 1469acbbeafSnn list->bl_brand = brand; 1479acbbeafSnn list->bl_refcnt = 0; 1489acbbeafSnn list->bl_next = brand_list; 1499acbbeafSnn brand_list = list; 150725deb8fSedp 1519acbbeafSnn mutex_exit(&brand_list_lock); 1529acbbeafSnn 1539acbbeafSnn return (0); 1549acbbeafSnn } 1559acbbeafSnn 1569acbbeafSnn /* 1579acbbeafSnn * The kernel module implementing this brand is being unloaded, so remove 1589acbbeafSnn * it from the list of active brands. 1599acbbeafSnn */ 1609acbbeafSnn int 1619acbbeafSnn brand_unregister(brand_t *brand) 1629acbbeafSnn { 1639acbbeafSnn struct brand_list *list, *prev; 1649acbbeafSnn 1659acbbeafSnn /* Sanity checks */ 1669acbbeafSnn if (brand == NULL || brand->b_name == NULL) { 1679acbbeafSnn cmn_err(CE_WARN, "Malformed brand"); 1689acbbeafSnn return (EINVAL); 1699acbbeafSnn } 1709acbbeafSnn 1719acbbeafSnn prev = NULL; 1729acbbeafSnn mutex_enter(&brand_list_lock); 1739acbbeafSnn 1749acbbeafSnn for (list = brand_list; list != NULL; list = list->bl_next) { 1759acbbeafSnn if (list->bl_brand == brand) 1769acbbeafSnn break; 1779acbbeafSnn prev = list; 1789acbbeafSnn } 1799acbbeafSnn 1809acbbeafSnn if (list == NULL) { 1819acbbeafSnn cmn_err(CE_WARN, "Brand %s wasn't registered", brand->b_name); 1829acbbeafSnn mutex_exit(&brand_list_lock); 1839acbbeafSnn return (EINVAL); 1849acbbeafSnn } 1859acbbeafSnn 1869acbbeafSnn if (list->bl_refcnt > 0) { 1879acbbeafSnn cmn_err(CE_WARN, "Unregistering brand %s which is still in use", 1889acbbeafSnn brand->b_name); 1899acbbeafSnn mutex_exit(&brand_list_lock); 1909acbbeafSnn return (EBUSY); 1919acbbeafSnn } 1929acbbeafSnn 1939acbbeafSnn /* Remove brand from the list */ 1949acbbeafSnn if (prev != NULL) 1959acbbeafSnn prev->bl_next = list->bl_next; 1969acbbeafSnn else 1979acbbeafSnn brand_list = list->bl_next; 1989acbbeafSnn 199725deb8fSedp #if defined(__sparcv9) 200725deb8fSedp /* sparcv9 uses system wide brand interposition hooks */ 201725deb8fSedp if (brand_list == NULL) 202725deb8fSedp brand_plat_interposition_disable(); 203725deb8fSedp #endif /* __sparcv9 */ 204725deb8fSedp 2059acbbeafSnn mutex_exit(&brand_list_lock); 2069acbbeafSnn 2079acbbeafSnn kmem_free(list, sizeof (struct brand_list)); 2089acbbeafSnn 2099acbbeafSnn return (0); 2109acbbeafSnn } 2119acbbeafSnn 2129acbbeafSnn /* 2139acbbeafSnn * Record that a zone of this brand has been instantiated. If the kernel 2149acbbeafSnn * module implementing this brand's functionality is not present, this 2159acbbeafSnn * routine attempts to load the module as a side effect. 2169acbbeafSnn */ 2179acbbeafSnn brand_t * 2189acbbeafSnn brand_register_zone(struct brand_attr *attr) 2199acbbeafSnn { 2209acbbeafSnn struct brand_list *l = NULL; 2219acbbeafSnn ddi_modhandle_t hdl = NULL; 2229acbbeafSnn char *modname; 2239acbbeafSnn int err = 0; 2249acbbeafSnn 2259acbbeafSnn if (is_system_labeled()) { 2269acbbeafSnn cmn_err(CE_WARN, 2279acbbeafSnn "Branded zones are not allowed on labeled systems."); 2289acbbeafSnn return (NULL); 2299acbbeafSnn } 2309acbbeafSnn 2319acbbeafSnn /* 2329acbbeafSnn * We make at most two passes through this loop. The first time 2339acbbeafSnn * through, we're looking to see if this is a new user of an 2349acbbeafSnn * already loaded brand. If the brand hasn't been loaded, we 2359acbbeafSnn * call ddi_modopen() to force it to be loaded and then make a 2369acbbeafSnn * second pass through the list of brands. If we don't find the 2379acbbeafSnn * brand the second time through it means that the modname 2389acbbeafSnn * specified in the brand_attr structure doesn't provide the brand 2399acbbeafSnn * specified in the brandname field. This would suggest a bug in 2409acbbeafSnn * the brand's config.xml file. We close the module and return 2419acbbeafSnn * 'NULL' to the caller. 2429acbbeafSnn */ 2439acbbeafSnn for (;;) { 2449acbbeafSnn /* 2459acbbeafSnn * Search list of loaded brands 2469acbbeafSnn */ 2479acbbeafSnn mutex_enter(&brand_list_lock); 2489acbbeafSnn for (l = brand_list; l != NULL; l = l->bl_next) 2499acbbeafSnn if (strcmp(attr->ba_brandname, 2509acbbeafSnn l->bl_brand->b_name) == 0) 2519acbbeafSnn break; 2529acbbeafSnn if ((l != NULL) || (hdl != NULL)) 2539acbbeafSnn break; 2549acbbeafSnn mutex_exit(&brand_list_lock); 2559acbbeafSnn 2569acbbeafSnn /* 2579acbbeafSnn * We didn't find that the requested brand has been loaded 2589acbbeafSnn * yet, so we trigger the load of the appropriate kernel 2599acbbeafSnn * module and search the list again. 2609acbbeafSnn */ 2619acbbeafSnn modname = kmem_alloc(MAXPATHLEN, KM_SLEEP); 2629acbbeafSnn (void) strcpy(modname, "brand/"); 2639acbbeafSnn (void) strcat(modname, attr->ba_modname); 2649acbbeafSnn hdl = ddi_modopen(modname, KRTLD_MODE_FIRST, &err); 2659acbbeafSnn kmem_free(modname, MAXPATHLEN); 2669acbbeafSnn 2679acbbeafSnn if (err != 0) 2689acbbeafSnn return (NULL); 2699acbbeafSnn } 2709acbbeafSnn 2719acbbeafSnn /* 2729acbbeafSnn * If we found the matching brand, bump its reference count. 2739acbbeafSnn */ 2749acbbeafSnn if (l != NULL) 2759acbbeafSnn l->bl_refcnt++; 2769acbbeafSnn 2779acbbeafSnn mutex_exit(&brand_list_lock); 2789acbbeafSnn 2799acbbeafSnn if (hdl != NULL) 2809acbbeafSnn (void) ddi_modclose(hdl); 2819acbbeafSnn 2829acbbeafSnn return ((l != NULL) ? l->bl_brand : NULL); 2839acbbeafSnn } 2849acbbeafSnn 2859acbbeafSnn /* 2869acbbeafSnn * Return the number of zones currently using this brand. 2879acbbeafSnn */ 2889acbbeafSnn int 2899acbbeafSnn brand_zone_count(struct brand *bp) 2909acbbeafSnn { 2919acbbeafSnn struct brand_list *l; 2929acbbeafSnn int cnt = 0; 2939acbbeafSnn 2949acbbeafSnn mutex_enter(&brand_list_lock); 2959acbbeafSnn for (l = brand_list; l != NULL; l = l->bl_next) 2969acbbeafSnn if (l->bl_brand == bp) { 2979acbbeafSnn cnt = l->bl_refcnt; 2989acbbeafSnn break; 2999acbbeafSnn } 3009acbbeafSnn mutex_exit(&brand_list_lock); 3019acbbeafSnn 3029acbbeafSnn return (cnt); 3039acbbeafSnn } 3049acbbeafSnn 3059acbbeafSnn void 3069acbbeafSnn brand_unregister_zone(struct brand *bp) 3079acbbeafSnn { 3089acbbeafSnn struct brand_list *list; 3099acbbeafSnn 3109acbbeafSnn mutex_enter(&brand_list_lock); 3119acbbeafSnn for (list = brand_list; list != NULL; list = list->bl_next) { 3129acbbeafSnn if (list->bl_brand == bp) { 3139acbbeafSnn ASSERT(list->bl_refcnt > 0); 3149acbbeafSnn list->bl_refcnt--; 3159acbbeafSnn break; 3169acbbeafSnn } 3179acbbeafSnn } 3189acbbeafSnn mutex_exit(&brand_list_lock); 3199acbbeafSnn } 3209acbbeafSnn 3219acbbeafSnn void 3229acbbeafSnn brand_setbrand(proc_t *p) 3239acbbeafSnn { 3249acbbeafSnn brand_t *bp = p->p_zone->zone_brand; 3259acbbeafSnn 3269acbbeafSnn ASSERT(bp != NULL); 3279acbbeafSnn ASSERT(p->p_brand == &native_brand); 3289acbbeafSnn 3299acbbeafSnn /* 3309acbbeafSnn * We should only be called from exec(), when we know the process 3319acbbeafSnn * is single-threaded. 3329acbbeafSnn */ 3339acbbeafSnn ASSERT(p->p_tlist == p->p_tlist->t_forw); 3349acbbeafSnn 3359acbbeafSnn p->p_brand = bp; 336*fd9e7635Sedp ASSERT(PROC_IS_BRANDED(p)); 337*fd9e7635Sedp BROP(p)->b_setbrand(p); 338*fd9e7635Sedp } 339*fd9e7635Sedp 340*fd9e7635Sedp void 341*fd9e7635Sedp brand_clearbrand(proc_t *p) 342*fd9e7635Sedp { 343*fd9e7635Sedp brand_t *bp = p->p_zone->zone_brand; 344*fd9e7635Sedp ASSERT(bp != NULL); 345*fd9e7635Sedp 346*fd9e7635Sedp /* 347*fd9e7635Sedp * We should only be called from exec_common() or proc_exit(), 348*fd9e7635Sedp * when we know the process is single-threaded. 349*fd9e7635Sedp */ 350*fd9e7635Sedp ASSERT(p->p_tlist == p->p_tlist->t_forw); 351*fd9e7635Sedp 352*fd9e7635Sedp ASSERT(PROC_IS_BRANDED(p)); 353*fd9e7635Sedp BROP(p)->b_proc_exit(p, p->p_tlist->t_lwp); 354*fd9e7635Sedp p->p_brand = &native_brand; 3559acbbeafSnn } 35659f2ff5cSedp 35759f2ff5cSedp #if defined(__sparcv9) 35859f2ff5cSedp /* 359725deb8fSedp * Currently, only sparc has system level brand syscall interposition. 36059f2ff5cSedp * On x86 we're able to enable syscall interposition on a per-cpu basis 36159f2ff5cSedp * when a branded thread is scheduled to run on a cpu. 36259f2ff5cSedp */ 36359f2ff5cSedp 36459f2ff5cSedp /* Local variables needed for dynamic syscall interposition support */ 36559f2ff5cSedp static uint32_t syscall_trap_patch_instr_orig; 36659f2ff5cSedp static uint32_t syscall_trap32_patch_instr_orig; 36759f2ff5cSedp 36859f2ff5cSedp /* Trap Table syscall entry hot patch points */ 36959f2ff5cSedp extern void syscall_trap_patch_point(void); 37059f2ff5cSedp extern void syscall_trap32_patch_point(void); 37159f2ff5cSedp 37259f2ff5cSedp /* Alternate syscall entry handlers used when branded zones are running */ 37359f2ff5cSedp extern void syscall_wrapper(void); 37459f2ff5cSedp extern void syscall_wrapper32(void); 37559f2ff5cSedp 37659f2ff5cSedp /* Macros used to facilitate sparcv9 instruction generation */ 37759f2ff5cSedp #define BA_A_INSTR 0x30800000 /* ba,a addr */ 37859f2ff5cSedp #define DISP22(from, to) \ 37959f2ff5cSedp ((((uintptr_t)(to) - (uintptr_t)(from)) >> 2) & 0x3fffff) 38059f2ff5cSedp 38159f2ff5cSedp /*ARGSUSED*/ 382725deb8fSedp static void 383725deb8fSedp brand_plat_interposition_enable(void) 38459f2ff5cSedp { 385725deb8fSedp ASSERT(MUTEX_HELD(&brand_list_lock)); 38659f2ff5cSedp 38759f2ff5cSedp /* 38859f2ff5cSedp * Before we hot patch the kernel save the current instructions 389725deb8fSedp * so that we can restore them later. 39059f2ff5cSedp */ 39159f2ff5cSedp syscall_trap_patch_instr_orig = 39259f2ff5cSedp *(uint32_t *)syscall_trap_patch_point; 39359f2ff5cSedp syscall_trap32_patch_instr_orig = 39459f2ff5cSedp *(uint32_t *)syscall_trap32_patch_point; 39559f2ff5cSedp 39659f2ff5cSedp /* 39759f2ff5cSedp * Modify the trap table at the patch points. 39859f2ff5cSedp * 39959f2ff5cSedp * We basically replace the first instruction at the patch 40059f2ff5cSedp * point with a ba,a instruction that will transfer control 40159f2ff5cSedp * to syscall_wrapper or syscall_wrapper32 for 64-bit and 40259f2ff5cSedp * 32-bit syscalls respectively. It's important to note that 40359f2ff5cSedp * the annul bit is set in the branch so we don't execute 40459f2ff5cSedp * the instruction directly following the one we're patching 40559f2ff5cSedp * during the branch's delay slot. 40659f2ff5cSedp * 40759f2ff5cSedp * It also doesn't matter that we're not atomically updating both 40859f2ff5cSedp * the 64 and 32 bit syscall paths at the same time since there's 40959f2ff5cSedp * no actual branded processes running on the system yet. 41059f2ff5cSedp */ 41159f2ff5cSedp hot_patch_kernel_text((caddr_t)syscall_trap_patch_point, 41259f2ff5cSedp BA_A_INSTR | DISP22(syscall_trap_patch_point, syscall_wrapper), 41359f2ff5cSedp 4); 41459f2ff5cSedp hot_patch_kernel_text((caddr_t)syscall_trap32_patch_point, 41559f2ff5cSedp BA_A_INSTR | DISP22(syscall_trap32_patch_point, syscall_wrapper32), 41659f2ff5cSedp 4); 41759f2ff5cSedp } 41859f2ff5cSedp 41959f2ff5cSedp /*ARGSUSED*/ 420725deb8fSedp static void 421725deb8fSedp brand_plat_interposition_disable(void) 42259f2ff5cSedp { 423725deb8fSedp ASSERT(MUTEX_HELD(&brand_list_lock)); 42459f2ff5cSedp 42559f2ff5cSedp /* 42659f2ff5cSedp * Restore the original instructions at the trap table syscall 42759f2ff5cSedp * patch points to disable the brand syscall interposition 42859f2ff5cSedp * mechanism. 42959f2ff5cSedp */ 43059f2ff5cSedp hot_patch_kernel_text((caddr_t)syscall_trap_patch_point, 43159f2ff5cSedp syscall_trap_patch_instr_orig, 4); 43259f2ff5cSedp hot_patch_kernel_text((caddr_t)syscall_trap32_patch_point, 43359f2ff5cSedp syscall_trap32_patch_instr_orig, 4); 43459f2ff5cSedp } 43559f2ff5cSedp #endif /* __sparcv9 */ 436