xref: /illumos-gate/usr/src/uts/common/os/brand.c (revision fd9e7635)
19acbbeafSnn /*
29acbbeafSnn  * CDDL HEADER START
39acbbeafSnn  *
49acbbeafSnn  * The contents of this file are subject to the terms of the
59acbbeafSnn  * Common Development and Distribution License (the "License").
69acbbeafSnn  * You may not use this file except in compliance with the License.
79acbbeafSnn  *
89acbbeafSnn  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
99acbbeafSnn  * or http://www.opensolaris.org/os/licensing.
109acbbeafSnn  * See the License for the specific language governing permissions
119acbbeafSnn  * and limitations under the License.
129acbbeafSnn  *
139acbbeafSnn  * When distributing Covered Code, include this CDDL HEADER in each
149acbbeafSnn  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
159acbbeafSnn  * If applicable, add the following below this CDDL HEADER, with the
169acbbeafSnn  * fields enclosed by brackets "[]" replaced with your own identifying
179acbbeafSnn  * information: Portions Copyright [yyyy] [name of copyright owner]
189acbbeafSnn  *
199acbbeafSnn  * CDDL HEADER END
209acbbeafSnn  */
219acbbeafSnn /*
22*fd9e7635Sedp  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
239acbbeafSnn  * Use is subject to license terms.
249acbbeafSnn  */
259acbbeafSnn 
269acbbeafSnn #pragma ident	"%Z%%M%	%I%	%E% SMI"
279acbbeafSnn 
289acbbeafSnn #include <sys/kmem.h>
299acbbeafSnn #include <sys/errno.h>
309acbbeafSnn #include <sys/systm.h>
319acbbeafSnn #include <sys/cmn_err.h>
329acbbeafSnn #include <sys/brand.h>
339acbbeafSnn #include <sys/machbrand.h>
349acbbeafSnn #include <sys/modctl.h>
359acbbeafSnn #include <sys/rwlock.h>
369acbbeafSnn #include <sys/zone.h>
379acbbeafSnn 
389acbbeafSnn #define	SUPPORTED_BRAND_VERSION BRAND_VER_1
399acbbeafSnn 
409acbbeafSnn #if defined(__sparcv9)
41725deb8fSedp /* sparcv9 uses system wide brand interposition hooks */
42725deb8fSedp static void brand_plat_interposition_enable(void);
43725deb8fSedp static void brand_plat_interposition_disable(void);
44725deb8fSedp 
459acbbeafSnn struct brand_mach_ops native_mach_ops  = {
469acbbeafSnn 		NULL, NULL
479acbbeafSnn };
4859f2ff5cSedp #else /* !__sparcv9 */
499acbbeafSnn struct brand_mach_ops native_mach_ops  = {
509acbbeafSnn 		NULL, NULL, NULL, NULL, NULL, NULL
519acbbeafSnn };
5259f2ff5cSedp #endif /* !__sparcv9 */
539acbbeafSnn 
549acbbeafSnn brand_t native_brand = {
559acbbeafSnn 		BRAND_VER_1,
569acbbeafSnn 		"native",
579acbbeafSnn 		NULL,
589acbbeafSnn 		&native_mach_ops
599acbbeafSnn };
609acbbeafSnn 
619acbbeafSnn /*
629acbbeafSnn  * Used to maintain a list of all the brands currently loaded into the
639acbbeafSnn  * kernel.
649acbbeafSnn  */
659acbbeafSnn struct brand_list {
669acbbeafSnn 	int			bl_refcnt;
679acbbeafSnn 	struct brand_list	*bl_next;
689acbbeafSnn 	brand_t			*bl_brand;
699acbbeafSnn };
709acbbeafSnn 
719acbbeafSnn static struct brand_list *brand_list = NULL;
729acbbeafSnn 
739acbbeafSnn /*
749acbbeafSnn  * This lock protects the integrity of the brand list.
759acbbeafSnn  */
769acbbeafSnn static kmutex_t brand_list_lock;
779acbbeafSnn 
789acbbeafSnn void
799acbbeafSnn brand_init()
809acbbeafSnn {
819acbbeafSnn 	mutex_init(&brand_list_lock, NULL, MUTEX_DEFAULT, NULL);
829acbbeafSnn 	p0.p_brand = &native_brand;
839acbbeafSnn }
849acbbeafSnn 
859acbbeafSnn int
869acbbeafSnn brand_register(brand_t *brand)
879acbbeafSnn {
889acbbeafSnn 	struct brand_list *list, *scan;
899acbbeafSnn 
909acbbeafSnn 	if (brand == NULL)
919acbbeafSnn 		return (EINVAL);
929acbbeafSnn 
939acbbeafSnn 	if (is_system_labeled()) {
949acbbeafSnn 		cmn_err(CE_WARN,
959acbbeafSnn 		    "Branded zones are not allowed on labeled systems.");
969acbbeafSnn 		return (EINVAL);
979acbbeafSnn 	}
989acbbeafSnn 
999acbbeafSnn 	if (brand->b_version != SUPPORTED_BRAND_VERSION) {
1009acbbeafSnn 		if (brand->b_version < SUPPORTED_BRAND_VERSION) {
1019acbbeafSnn 			cmn_err(CE_WARN,
1029acbbeafSnn 			    "brand '%s' was built to run on older versions "
1039acbbeafSnn 			    "of Solaris.",
1049acbbeafSnn 			    brand->b_name);
1059acbbeafSnn 		} else {
1069acbbeafSnn 			cmn_err(CE_WARN,
1079acbbeafSnn 			    "brand '%s' was built to run on a newer version "
1089acbbeafSnn 			    "of Solaris.",
1099acbbeafSnn 			    brand->b_name);
1109acbbeafSnn 		}
1119acbbeafSnn 		return (EINVAL);
1129acbbeafSnn 	}
1139acbbeafSnn 
1149acbbeafSnn 	/* Sanity checks */
1159acbbeafSnn 	if (brand->b_name == NULL || brand->b_ops == NULL ||
1169acbbeafSnn 	    brand->b_ops->b_brandsys == NULL) {
1179acbbeafSnn 		cmn_err(CE_WARN, "Malformed brand");
1189acbbeafSnn 		return (EINVAL);
1199acbbeafSnn 	}
1209acbbeafSnn 
1219acbbeafSnn 	list = kmem_alloc(sizeof (struct brand_list), KM_SLEEP);
1229acbbeafSnn 
1239acbbeafSnn 	/* Add the brand to the list of loaded brands. */
1249acbbeafSnn 	mutex_enter(&brand_list_lock);
1259acbbeafSnn 
1269acbbeafSnn 	/*
1279acbbeafSnn 	 * Check to be sure we haven't already registered this brand.
1289acbbeafSnn 	 */
1299acbbeafSnn 	for (scan = brand_list; scan != NULL; scan = scan->bl_next) {
1309acbbeafSnn 		if (strcmp(brand->b_name, scan->bl_brand->b_name) == 0) {
1319acbbeafSnn 			cmn_err(CE_WARN,
1329acbbeafSnn 			    "Invalid attempt to load a second instance of "
1339acbbeafSnn 			    "brand %s", brand->b_name);
1349acbbeafSnn 			mutex_exit(&brand_list_lock);
1359acbbeafSnn 			kmem_free(list, sizeof (struct brand_list));
1369acbbeafSnn 			return (EINVAL);
1379acbbeafSnn 		}
1389acbbeafSnn 	}
1399acbbeafSnn 
140725deb8fSedp #if defined(__sparcv9)
141725deb8fSedp 	/* sparcv9 uses system wide brand interposition hooks */
142725deb8fSedp 	if (brand_list == NULL)
143725deb8fSedp 		brand_plat_interposition_enable();
144725deb8fSedp #endif /* __sparcv9 */
145725deb8fSedp 
1469acbbeafSnn 	list->bl_brand = brand;
1479acbbeafSnn 	list->bl_refcnt = 0;
1489acbbeafSnn 	list->bl_next = brand_list;
1499acbbeafSnn 	brand_list = list;
150725deb8fSedp 
1519acbbeafSnn 	mutex_exit(&brand_list_lock);
1529acbbeafSnn 
1539acbbeafSnn 	return (0);
1549acbbeafSnn }
1559acbbeafSnn 
1569acbbeafSnn /*
1579acbbeafSnn  * The kernel module implementing this brand is being unloaded, so remove
1589acbbeafSnn  * it from the list of active brands.
1599acbbeafSnn  */
1609acbbeafSnn int
1619acbbeafSnn brand_unregister(brand_t *brand)
1629acbbeafSnn {
1639acbbeafSnn 	struct brand_list *list, *prev;
1649acbbeafSnn 
1659acbbeafSnn 	/* Sanity checks */
1669acbbeafSnn 	if (brand == NULL || brand->b_name == NULL) {
1679acbbeafSnn 		cmn_err(CE_WARN, "Malformed brand");
1689acbbeafSnn 		return (EINVAL);
1699acbbeafSnn 	}
1709acbbeafSnn 
1719acbbeafSnn 	prev = NULL;
1729acbbeafSnn 	mutex_enter(&brand_list_lock);
1739acbbeafSnn 
1749acbbeafSnn 	for (list = brand_list; list != NULL; list = list->bl_next) {
1759acbbeafSnn 		if (list->bl_brand == brand)
1769acbbeafSnn 			break;
1779acbbeafSnn 		prev = list;
1789acbbeafSnn 	}
1799acbbeafSnn 
1809acbbeafSnn 	if (list == NULL) {
1819acbbeafSnn 		cmn_err(CE_WARN, "Brand %s wasn't registered", brand->b_name);
1829acbbeafSnn 		mutex_exit(&brand_list_lock);
1839acbbeafSnn 		return (EINVAL);
1849acbbeafSnn 	}
1859acbbeafSnn 
1869acbbeafSnn 	if (list->bl_refcnt > 0) {
1879acbbeafSnn 		cmn_err(CE_WARN, "Unregistering brand %s which is still in use",
1889acbbeafSnn 		    brand->b_name);
1899acbbeafSnn 		mutex_exit(&brand_list_lock);
1909acbbeafSnn 		return (EBUSY);
1919acbbeafSnn 	}
1929acbbeafSnn 
1939acbbeafSnn 	/* Remove brand from the list */
1949acbbeafSnn 	if (prev != NULL)
1959acbbeafSnn 		prev->bl_next = list->bl_next;
1969acbbeafSnn 	else
1979acbbeafSnn 		brand_list = list->bl_next;
1989acbbeafSnn 
199725deb8fSedp #if defined(__sparcv9)
200725deb8fSedp 	/* sparcv9 uses system wide brand interposition hooks */
201725deb8fSedp 	if (brand_list == NULL)
202725deb8fSedp 		brand_plat_interposition_disable();
203725deb8fSedp #endif /* __sparcv9 */
204725deb8fSedp 
2059acbbeafSnn 	mutex_exit(&brand_list_lock);
2069acbbeafSnn 
2079acbbeafSnn 	kmem_free(list, sizeof (struct brand_list));
2089acbbeafSnn 
2099acbbeafSnn 	return (0);
2109acbbeafSnn }
2119acbbeafSnn 
2129acbbeafSnn /*
2139acbbeafSnn  * Record that a zone of this brand has been instantiated.  If the kernel
2149acbbeafSnn  * module implementing this brand's functionality is not present, this
2159acbbeafSnn  * routine attempts to load the module as a side effect.
2169acbbeafSnn  */
2179acbbeafSnn brand_t *
2189acbbeafSnn brand_register_zone(struct brand_attr *attr)
2199acbbeafSnn {
2209acbbeafSnn 	struct brand_list *l = NULL;
2219acbbeafSnn 	ddi_modhandle_t	hdl = NULL;
2229acbbeafSnn 	char *modname;
2239acbbeafSnn 	int err = 0;
2249acbbeafSnn 
2259acbbeafSnn 	if (is_system_labeled()) {
2269acbbeafSnn 		cmn_err(CE_WARN,
2279acbbeafSnn 		    "Branded zones are not allowed on labeled systems.");
2289acbbeafSnn 		return (NULL);
2299acbbeafSnn 	}
2309acbbeafSnn 
2319acbbeafSnn 	/*
2329acbbeafSnn 	 * We make at most two passes through this loop.  The first time
2339acbbeafSnn 	 * through, we're looking to see if this is a new user of an
2349acbbeafSnn 	 * already loaded brand.  If the brand hasn't been loaded, we
2359acbbeafSnn 	 * call ddi_modopen() to force it to be loaded and then make a
2369acbbeafSnn 	 * second pass through the list of brands.  If we don't find the
2379acbbeafSnn 	 * brand the second time through it means that the modname
2389acbbeafSnn 	 * specified in the brand_attr structure doesn't provide the brand
2399acbbeafSnn 	 * specified in the brandname field.  This would suggest a bug in
2409acbbeafSnn 	 * the brand's config.xml file.  We close the module and return
2419acbbeafSnn 	 * 'NULL' to the caller.
2429acbbeafSnn 	 */
2439acbbeafSnn 	for (;;) {
2449acbbeafSnn 		/*
2459acbbeafSnn 		 * Search list of loaded brands
2469acbbeafSnn 		 */
2479acbbeafSnn 		mutex_enter(&brand_list_lock);
2489acbbeafSnn 		for (l = brand_list; l != NULL; l = l->bl_next)
2499acbbeafSnn 			if (strcmp(attr->ba_brandname,
2509acbbeafSnn 			    l->bl_brand->b_name) == 0)
2519acbbeafSnn 				break;
2529acbbeafSnn 		if ((l != NULL) || (hdl != NULL))
2539acbbeafSnn 			break;
2549acbbeafSnn 		mutex_exit(&brand_list_lock);
2559acbbeafSnn 
2569acbbeafSnn 		/*
2579acbbeafSnn 		 * We didn't find that the requested brand has been loaded
2589acbbeafSnn 		 * yet, so we trigger the load of the appropriate kernel
2599acbbeafSnn 		 * module and search the list again.
2609acbbeafSnn 		 */
2619acbbeafSnn 		modname = kmem_alloc(MAXPATHLEN, KM_SLEEP);
2629acbbeafSnn 		(void) strcpy(modname, "brand/");
2639acbbeafSnn 		(void) strcat(modname, attr->ba_modname);
2649acbbeafSnn 		hdl = ddi_modopen(modname, KRTLD_MODE_FIRST, &err);
2659acbbeafSnn 		kmem_free(modname, MAXPATHLEN);
2669acbbeafSnn 
2679acbbeafSnn 		if (err != 0)
2689acbbeafSnn 			return (NULL);
2699acbbeafSnn 	}
2709acbbeafSnn 
2719acbbeafSnn 	/*
2729acbbeafSnn 	 * If we found the matching brand, bump its reference count.
2739acbbeafSnn 	 */
2749acbbeafSnn 	if (l != NULL)
2759acbbeafSnn 		l->bl_refcnt++;
2769acbbeafSnn 
2779acbbeafSnn 	mutex_exit(&brand_list_lock);
2789acbbeafSnn 
2799acbbeafSnn 	if (hdl != NULL)
2809acbbeafSnn 		(void) ddi_modclose(hdl);
2819acbbeafSnn 
2829acbbeafSnn 	return ((l != NULL) ? l->bl_brand : NULL);
2839acbbeafSnn }
2849acbbeafSnn 
2859acbbeafSnn /*
2869acbbeafSnn  * Return the number of zones currently using this brand.
2879acbbeafSnn  */
2889acbbeafSnn int
2899acbbeafSnn brand_zone_count(struct brand *bp)
2909acbbeafSnn {
2919acbbeafSnn 	struct brand_list *l;
2929acbbeafSnn 	int cnt = 0;
2939acbbeafSnn 
2949acbbeafSnn 	mutex_enter(&brand_list_lock);
2959acbbeafSnn 	for (l = brand_list; l != NULL; l = l->bl_next)
2969acbbeafSnn 		if (l->bl_brand == bp) {
2979acbbeafSnn 			cnt = l->bl_refcnt;
2989acbbeafSnn 			break;
2999acbbeafSnn 		}
3009acbbeafSnn 	mutex_exit(&brand_list_lock);
3019acbbeafSnn 
3029acbbeafSnn 	return (cnt);
3039acbbeafSnn }
3049acbbeafSnn 
3059acbbeafSnn void
3069acbbeafSnn brand_unregister_zone(struct brand *bp)
3079acbbeafSnn {
3089acbbeafSnn 	struct brand_list *list;
3099acbbeafSnn 
3109acbbeafSnn 	mutex_enter(&brand_list_lock);
3119acbbeafSnn 	for (list = brand_list; list != NULL; list = list->bl_next) {
3129acbbeafSnn 		if (list->bl_brand == bp) {
3139acbbeafSnn 			ASSERT(list->bl_refcnt > 0);
3149acbbeafSnn 			list->bl_refcnt--;
3159acbbeafSnn 			break;
3169acbbeafSnn 		}
3179acbbeafSnn 	}
3189acbbeafSnn 	mutex_exit(&brand_list_lock);
3199acbbeafSnn }
3209acbbeafSnn 
3219acbbeafSnn void
3229acbbeafSnn brand_setbrand(proc_t *p)
3239acbbeafSnn {
3249acbbeafSnn 	brand_t *bp = p->p_zone->zone_brand;
3259acbbeafSnn 
3269acbbeafSnn 	ASSERT(bp != NULL);
3279acbbeafSnn 	ASSERT(p->p_brand == &native_brand);
3289acbbeafSnn 
3299acbbeafSnn 	/*
3309acbbeafSnn 	 * We should only be called from exec(), when we know the process
3319acbbeafSnn 	 * is single-threaded.
3329acbbeafSnn 	 */
3339acbbeafSnn 	ASSERT(p->p_tlist == p->p_tlist->t_forw);
3349acbbeafSnn 
3359acbbeafSnn 	p->p_brand = bp;
336*fd9e7635Sedp 	ASSERT(PROC_IS_BRANDED(p));
337*fd9e7635Sedp 	BROP(p)->b_setbrand(p);
338*fd9e7635Sedp }
339*fd9e7635Sedp 
340*fd9e7635Sedp void
341*fd9e7635Sedp brand_clearbrand(proc_t *p)
342*fd9e7635Sedp {
343*fd9e7635Sedp 	brand_t *bp = p->p_zone->zone_brand;
344*fd9e7635Sedp 	ASSERT(bp != NULL);
345*fd9e7635Sedp 
346*fd9e7635Sedp 	/*
347*fd9e7635Sedp 	 * We should only be called from exec_common() or proc_exit(),
348*fd9e7635Sedp 	 * when we know the process is single-threaded.
349*fd9e7635Sedp 	 */
350*fd9e7635Sedp 	ASSERT(p->p_tlist == p->p_tlist->t_forw);
351*fd9e7635Sedp 
352*fd9e7635Sedp 	ASSERT(PROC_IS_BRANDED(p));
353*fd9e7635Sedp 	BROP(p)->b_proc_exit(p, p->p_tlist->t_lwp);
354*fd9e7635Sedp 	p->p_brand = &native_brand;
3559acbbeafSnn }
35659f2ff5cSedp 
35759f2ff5cSedp #if defined(__sparcv9)
35859f2ff5cSedp /*
359725deb8fSedp  * Currently, only sparc has system level brand syscall interposition.
36059f2ff5cSedp  * On x86 we're able to enable syscall interposition on a per-cpu basis
36159f2ff5cSedp  * when a branded thread is scheduled to run on a cpu.
36259f2ff5cSedp  */
36359f2ff5cSedp 
36459f2ff5cSedp /* Local variables needed for dynamic syscall interposition support */
36559f2ff5cSedp static uint32_t	syscall_trap_patch_instr_orig;
36659f2ff5cSedp static uint32_t	syscall_trap32_patch_instr_orig;
36759f2ff5cSedp 
36859f2ff5cSedp /* Trap Table syscall entry hot patch points */
36959f2ff5cSedp extern void	syscall_trap_patch_point(void);
37059f2ff5cSedp extern void	syscall_trap32_patch_point(void);
37159f2ff5cSedp 
37259f2ff5cSedp /* Alternate syscall entry handlers used when branded zones are running */
37359f2ff5cSedp extern void	syscall_wrapper(void);
37459f2ff5cSedp extern void	syscall_wrapper32(void);
37559f2ff5cSedp 
37659f2ff5cSedp /* Macros used to facilitate sparcv9 instruction generation */
37759f2ff5cSedp #define	BA_A_INSTR	0x30800000	/* ba,a addr */
37859f2ff5cSedp #define	DISP22(from, to) \
37959f2ff5cSedp 	((((uintptr_t)(to) - (uintptr_t)(from)) >> 2) & 0x3fffff)
38059f2ff5cSedp 
38159f2ff5cSedp /*ARGSUSED*/
382725deb8fSedp static void
383725deb8fSedp brand_plat_interposition_enable(void)
38459f2ff5cSedp {
385725deb8fSedp 	ASSERT(MUTEX_HELD(&brand_list_lock));
38659f2ff5cSedp 
38759f2ff5cSedp 	/*
38859f2ff5cSedp 	 * Before we hot patch the kernel save the current instructions
389725deb8fSedp 	 * so that we can restore them later.
39059f2ff5cSedp 	 */
39159f2ff5cSedp 	syscall_trap_patch_instr_orig =
39259f2ff5cSedp 	    *(uint32_t *)syscall_trap_patch_point;
39359f2ff5cSedp 	syscall_trap32_patch_instr_orig =
39459f2ff5cSedp 	    *(uint32_t *)syscall_trap32_patch_point;
39559f2ff5cSedp 
39659f2ff5cSedp 	/*
39759f2ff5cSedp 	 * Modify the trap table at the patch points.
39859f2ff5cSedp 	 *
39959f2ff5cSedp 	 * We basically replace the first instruction at the patch
40059f2ff5cSedp 	 * point with a ba,a instruction that will transfer control
40159f2ff5cSedp 	 * to syscall_wrapper or syscall_wrapper32 for 64-bit and
40259f2ff5cSedp 	 * 32-bit syscalls respectively.  It's important to note that
40359f2ff5cSedp 	 * the annul bit is set in the branch so we don't execute
40459f2ff5cSedp 	 * the instruction directly following the one we're patching
40559f2ff5cSedp 	 * during the branch's delay slot.
40659f2ff5cSedp 	 *
40759f2ff5cSedp 	 * It also doesn't matter that we're not atomically updating both
40859f2ff5cSedp 	 * the 64 and 32 bit syscall paths at the same time since there's
40959f2ff5cSedp 	 * no actual branded processes running on the system yet.
41059f2ff5cSedp 	 */
41159f2ff5cSedp 	hot_patch_kernel_text((caddr_t)syscall_trap_patch_point,
41259f2ff5cSedp 	    BA_A_INSTR | DISP22(syscall_trap_patch_point, syscall_wrapper),
41359f2ff5cSedp 	    4);
41459f2ff5cSedp 	hot_patch_kernel_text((caddr_t)syscall_trap32_patch_point,
41559f2ff5cSedp 	    BA_A_INSTR | DISP22(syscall_trap32_patch_point, syscall_wrapper32),
41659f2ff5cSedp 	    4);
41759f2ff5cSedp }
41859f2ff5cSedp 
41959f2ff5cSedp /*ARGSUSED*/
420725deb8fSedp static void
421725deb8fSedp brand_plat_interposition_disable(void)
42259f2ff5cSedp {
423725deb8fSedp 	ASSERT(MUTEX_HELD(&brand_list_lock));
42459f2ff5cSedp 
42559f2ff5cSedp 	/*
42659f2ff5cSedp 	 * Restore the original instructions at the trap table syscall
42759f2ff5cSedp 	 * patch points to disable the brand syscall interposition
42859f2ff5cSedp 	 * mechanism.
42959f2ff5cSedp 	 */
43059f2ff5cSedp 	hot_patch_kernel_text((caddr_t)syscall_trap_patch_point,
43159f2ff5cSedp 	    syscall_trap_patch_instr_orig, 4);
43259f2ff5cSedp 	hot_patch_kernel_text((caddr_t)syscall_trap32_patch_point,
43359f2ff5cSedp 	    syscall_trap32_patch_instr_orig, 4);
43459f2ff5cSedp }
43559f2ff5cSedp #endif /* __sparcv9 */
436