xref: /illumos-gate/usr/src/uts/common/disp/cpupart.c (revision 3df2e8b2)
17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate  * CDDL HEADER START
37c478bd9Sstevel@tonic-gate  *
47c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5e824d57fSjohnlev  * Common Development and Distribution License (the "License").
6e824d57fSjohnlev  * You may not use this file except in compliance with the License.
77c478bd9Sstevel@tonic-gate  *
87c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
107c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
117c478bd9Sstevel@tonic-gate  * and limitations under the License.
127c478bd9Sstevel@tonic-gate  *
137c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
147c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
167c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
177c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bd9Sstevel@tonic-gate  *
197c478bd9Sstevel@tonic-gate  * CDDL HEADER END
207c478bd9Sstevel@tonic-gate  */
217c478bd9Sstevel@tonic-gate /*
220542eecfSRafael Vanoni  * Copyright (c) 1996, 2010, Oracle and/or its affiliates. All rights reserved.
23455e370cSJohn Levon  *
24455e370cSJohn Levon  * Copyright 2018 Joyent, Inc.
252ee85738SJohn Gallagher  * Copyright (c) 2017 by Delphix. All rights reserved.
267c478bd9Sstevel@tonic-gate  */
277c478bd9Sstevel@tonic-gate 
287c478bd9Sstevel@tonic-gate #include <sys/types.h>
297c478bd9Sstevel@tonic-gate #include <sys/systm.h>
307c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h>
317c478bd9Sstevel@tonic-gate #include <sys/cpuvar.h>
327c478bd9Sstevel@tonic-gate #include <sys/thread.h>
337c478bd9Sstevel@tonic-gate #include <sys/disp.h>
347c478bd9Sstevel@tonic-gate #include <sys/kmem.h>
357c478bd9Sstevel@tonic-gate #include <sys/debug.h>
367c478bd9Sstevel@tonic-gate #include <sys/cpupart.h>
377c478bd9Sstevel@tonic-gate #include <sys/pset.h>
387c478bd9Sstevel@tonic-gate #include <sys/var.h>
397c478bd9Sstevel@tonic-gate #include <sys/cyclic.h>
407c478bd9Sstevel@tonic-gate #include <sys/lgrp.h>
41fb2f18f8Sesaxe #include <sys/pghw.h>
427c478bd9Sstevel@tonic-gate #include <sys/loadavg.h>
437c478bd9Sstevel@tonic-gate #include <sys/class.h>
447c478bd9Sstevel@tonic-gate #include <sys/fss.h>
457c478bd9Sstevel@tonic-gate #include <sys/pool.h>
467c478bd9Sstevel@tonic-gate #include <sys/pool_pset.h>
477c478bd9Sstevel@tonic-gate #include <sys/policy.h>
487c478bd9Sstevel@tonic-gate 
497c478bd9Sstevel@tonic-gate /*
507c478bd9Sstevel@tonic-gate  * Calling pool_lock() protects the pools configuration, which includes
517c478bd9Sstevel@tonic-gate  * CPU partitions.  cpu_lock protects the CPU partition list, and prevents
527c478bd9Sstevel@tonic-gate  * partitions from being created or destroyed while the lock is held.
537c478bd9Sstevel@tonic-gate  * The lock ordering with respect to related locks is:
547c478bd9Sstevel@tonic-gate  *
557c478bd9Sstevel@tonic-gate  *    pool_lock() ---> cpu_lock  --->  pidlock  -->  p_lock
567c478bd9Sstevel@tonic-gate  *
577c478bd9Sstevel@tonic-gate  * Blocking memory allocations may be made while holding "pool_lock"
587c478bd9Sstevel@tonic-gate  * or cpu_lock.
597c478bd9Sstevel@tonic-gate  */
607c478bd9Sstevel@tonic-gate 
617c478bd9Sstevel@tonic-gate /*
627c478bd9Sstevel@tonic-gate  * The cp_default partition is allocated statically, but its lgroup load average
637c478bd9Sstevel@tonic-gate  * (lpl) list is allocated dynamically after kmem subsystem is initialized. This
647c478bd9Sstevel@tonic-gate  * saves some memory since the space allocated reflects the actual number of
657c478bd9Sstevel@tonic-gate  * lgroups supported by the platform. The lgrp facility provides a temporary
667c478bd9Sstevel@tonic-gate  * space to hold lpl information during system bootstrap.
677c478bd9Sstevel@tonic-gate  */
687c478bd9Sstevel@tonic-gate 
697c478bd9Sstevel@tonic-gate cpupart_t		*cp_list_head;
707c478bd9Sstevel@tonic-gate cpupart_t		cp_default;
717c478bd9Sstevel@tonic-gate static cpupartid_t	cp_id_next;
727c478bd9Sstevel@tonic-gate uint_t			cp_numparts;
737c478bd9Sstevel@tonic-gate uint_t			cp_numparts_nonempty;
747c478bd9Sstevel@tonic-gate 
757c478bd9Sstevel@tonic-gate /*
767c478bd9Sstevel@tonic-gate  * Need to limit total number of partitions to avoid slowing down the
777c478bd9Sstevel@tonic-gate  * clock code too much.  The clock code traverses the list of
787c478bd9Sstevel@tonic-gate  * partitions and needs to be able to execute in a reasonable amount
797c478bd9Sstevel@tonic-gate  * of time (less than 1/hz seconds).  The maximum is sized based on
807c478bd9Sstevel@tonic-gate  * max_ncpus so it shouldn't be a problem unless there are large
817c478bd9Sstevel@tonic-gate  * numbers of empty partitions.
827c478bd9Sstevel@tonic-gate  */
837c478bd9Sstevel@tonic-gate static uint_t		cp_max_numparts;
847c478bd9Sstevel@tonic-gate 
857c478bd9Sstevel@tonic-gate /*
867c478bd9Sstevel@tonic-gate  * Processor sets and CPU partitions are different but related concepts.
877c478bd9Sstevel@tonic-gate  * A processor set is a user-level abstraction allowing users to create
887c478bd9Sstevel@tonic-gate  * sets of CPUs and bind threads exclusively to those sets.  A CPU
897c478bd9Sstevel@tonic-gate  * partition is a kernel dispatcher object consisting of a set of CPUs
907c478bd9Sstevel@tonic-gate  * and a global dispatch queue.  The processor set abstraction is
917c478bd9Sstevel@tonic-gate  * implemented via a CPU partition, and currently there is a 1-1
927c478bd9Sstevel@tonic-gate  * mapping between processor sets and partitions (excluding the default
937c478bd9Sstevel@tonic-gate  * partition, which is not visible as a processor set).  Hence, the
947c478bd9Sstevel@tonic-gate  * numbering for processor sets and CPU partitions is identical.  This
957c478bd9Sstevel@tonic-gate  * may not always be true in the future, and these macros could become
967c478bd9Sstevel@tonic-gate  * less trivial if we support e.g. a processor set containing multiple
977c478bd9Sstevel@tonic-gate  * CPU partitions.
987c478bd9Sstevel@tonic-gate  */
997c478bd9Sstevel@tonic-gate #define	PSTOCP(psid)	((cpupartid_t)((psid) == PS_NONE ? CP_DEFAULT : (psid)))
1007c478bd9Sstevel@tonic-gate #define	CPTOPS(cpid)	((psetid_t)((cpid) == CP_DEFAULT ? PS_NONE : (cpid)))
1017c478bd9Sstevel@tonic-gate 
1020b70c467Sakolb static int cpupart_unbind_threads(cpupart_t *, boolean_t);
1030b70c467Sakolb 
1047c478bd9Sstevel@tonic-gate /*
1057c478bd9Sstevel@tonic-gate  * Find a CPU partition given a processor set ID.
1067c478bd9Sstevel@tonic-gate  */
1077c478bd9Sstevel@tonic-gate static cpupart_t *
cpupart_find_all(psetid_t psid)1087c478bd9Sstevel@tonic-gate cpupart_find_all(psetid_t psid)
1097c478bd9Sstevel@tonic-gate {
1107c478bd9Sstevel@tonic-gate 	cpupart_t *cp;
1117c478bd9Sstevel@tonic-gate 	cpupartid_t cpid = PSTOCP(psid);
1127c478bd9Sstevel@tonic-gate 
1137c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&cpu_lock));
1147c478bd9Sstevel@tonic-gate 
1157c478bd9Sstevel@tonic-gate 	/* default partition not visible as a processor set */
1167c478bd9Sstevel@tonic-gate 	if (psid == CP_DEFAULT)
1177c478bd9Sstevel@tonic-gate 		return (NULL);
1187c478bd9Sstevel@tonic-gate 
1197c478bd9Sstevel@tonic-gate 	if (psid == PS_MYID)
1207c478bd9Sstevel@tonic-gate 		return (curthread->t_cpupart);
1217c478bd9Sstevel@tonic-gate 
1227c478bd9Sstevel@tonic-gate 	cp = cp_list_head;
1237c478bd9Sstevel@tonic-gate 	do {
1247c478bd9Sstevel@tonic-gate 		if (cp->cp_id == cpid)
1257c478bd9Sstevel@tonic-gate 			return (cp);
1267c478bd9Sstevel@tonic-gate 		cp = cp->cp_next;
1277c478bd9Sstevel@tonic-gate 	} while (cp != cp_list_head);
1287c478bd9Sstevel@tonic-gate 	return (NULL);
1297c478bd9Sstevel@tonic-gate }
1307c478bd9Sstevel@tonic-gate 
1317c478bd9Sstevel@tonic-gate /*
1327c478bd9Sstevel@tonic-gate  * Find a CPU partition given a processor set ID if the processor set
1337c478bd9Sstevel@tonic-gate  * should be visible from the calling zone.
1347c478bd9Sstevel@tonic-gate  */
1357c478bd9Sstevel@tonic-gate cpupart_t *
cpupart_find(psetid_t psid)1367c478bd9Sstevel@tonic-gate cpupart_find(psetid_t psid)
1377c478bd9Sstevel@tonic-gate {
1387c478bd9Sstevel@tonic-gate 	cpupart_t *cp;
1397c478bd9Sstevel@tonic-gate 
1407c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&cpu_lock));
1417c478bd9Sstevel@tonic-gate 	cp = cpupart_find_all(psid);
1427c478bd9Sstevel@tonic-gate 	if (cp != NULL && !INGLOBALZONE(curproc) && pool_pset_enabled() &&
1437c478bd9Sstevel@tonic-gate 	    zone_pset_get(curproc->p_zone) != CPTOPS(cp->cp_id))
1447c478bd9Sstevel@tonic-gate 			return (NULL);
1457c478bd9Sstevel@tonic-gate 	return (cp);
1467c478bd9Sstevel@tonic-gate }
1477c478bd9Sstevel@tonic-gate 
1487c478bd9Sstevel@tonic-gate static int
cpupart_kstat_update(kstat_t * ksp,int rw)1497c478bd9Sstevel@tonic-gate cpupart_kstat_update(kstat_t *ksp, int rw)
1507c478bd9Sstevel@tonic-gate {
1517c478bd9Sstevel@tonic-gate 	cpupart_t *cp = (cpupart_t *)ksp->ks_private;
1527c478bd9Sstevel@tonic-gate 	cpupart_kstat_t *cpksp = ksp->ks_data;
1537c478bd9Sstevel@tonic-gate 
1547c478bd9Sstevel@tonic-gate 	if (rw == KSTAT_WRITE)
1557c478bd9Sstevel@tonic-gate 		return (EACCES);
1567c478bd9Sstevel@tonic-gate 
1577c478bd9Sstevel@tonic-gate 	cpksp->cpk_updates.value.ui64 = cp->cp_updates;
1587c478bd9Sstevel@tonic-gate 	cpksp->cpk_runnable.value.ui64 = cp->cp_nrunnable_cum;
1597c478bd9Sstevel@tonic-gate 	cpksp->cpk_waiting.value.ui64 = cp->cp_nwaiting_cum;
1607c478bd9Sstevel@tonic-gate 	cpksp->cpk_ncpus.value.ui32 = cp->cp_ncpus;
1617c478bd9Sstevel@tonic-gate 	cpksp->cpk_avenrun_1min.value.ui32 = cp->cp_hp_avenrun[0] >>
1627c478bd9Sstevel@tonic-gate 	    (16 - FSHIFT);
1637c478bd9Sstevel@tonic-gate 	cpksp->cpk_avenrun_5min.value.ui32 = cp->cp_hp_avenrun[1] >>
1647c478bd9Sstevel@tonic-gate 	    (16 - FSHIFT);
1657c478bd9Sstevel@tonic-gate 	cpksp->cpk_avenrun_15min.value.ui32 = cp->cp_hp_avenrun[2] >>
1667c478bd9Sstevel@tonic-gate 	    (16 - FSHIFT);
1677c478bd9Sstevel@tonic-gate 	return (0);
1687c478bd9Sstevel@tonic-gate }
1697c478bd9Sstevel@tonic-gate 
1707c478bd9Sstevel@tonic-gate static void
cpupart_kstat_create(cpupart_t * cp)1717c478bd9Sstevel@tonic-gate cpupart_kstat_create(cpupart_t *cp)
1727c478bd9Sstevel@tonic-gate {
1737c478bd9Sstevel@tonic-gate 	kstat_t *ksp;
1747c478bd9Sstevel@tonic-gate 	zoneid_t zoneid;
1757c478bd9Sstevel@tonic-gate 
1767c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&cpu_lock));
1777c478bd9Sstevel@tonic-gate 
1787c478bd9Sstevel@tonic-gate 	/*
1797c478bd9Sstevel@tonic-gate 	 * We have a bit of a chicken-egg problem since this code will
1807c478bd9Sstevel@tonic-gate 	 * get called to create the kstats for CP_DEFAULT before the
1817c478bd9Sstevel@tonic-gate 	 * pools framework gets initialized.  We circumvent the problem
1827c478bd9Sstevel@tonic-gate 	 * by special-casing cp_default.
1837c478bd9Sstevel@tonic-gate 	 */
1847c478bd9Sstevel@tonic-gate 	if (cp != &cp_default && pool_pset_enabled())
1857c478bd9Sstevel@tonic-gate 		zoneid = GLOBAL_ZONEID;
1867c478bd9Sstevel@tonic-gate 	else
1877c478bd9Sstevel@tonic-gate 		zoneid = ALL_ZONES;
1887c478bd9Sstevel@tonic-gate 	ksp = kstat_create_zone("unix", cp->cp_id, "pset", "misc",
1897c478bd9Sstevel@tonic-gate 	    KSTAT_TYPE_NAMED,
1907c478bd9Sstevel@tonic-gate 	    sizeof (cpupart_kstat_t) / sizeof (kstat_named_t), 0, zoneid);
1917c478bd9Sstevel@tonic-gate 	if (ksp != NULL) {
1927c478bd9Sstevel@tonic-gate 		cpupart_kstat_t *cpksp = ksp->ks_data;
1937c478bd9Sstevel@tonic-gate 
1947c478bd9Sstevel@tonic-gate 		kstat_named_init(&cpksp->cpk_updates, "updates",
1957c478bd9Sstevel@tonic-gate 		    KSTAT_DATA_UINT64);
1967c478bd9Sstevel@tonic-gate 		kstat_named_init(&cpksp->cpk_runnable, "runnable",
1977c478bd9Sstevel@tonic-gate 		    KSTAT_DATA_UINT64);
1987c478bd9Sstevel@tonic-gate 		kstat_named_init(&cpksp->cpk_waiting, "waiting",
1997c478bd9Sstevel@tonic-gate 		    KSTAT_DATA_UINT64);
2007c478bd9Sstevel@tonic-gate 		kstat_named_init(&cpksp->cpk_ncpus, "ncpus",
2017c478bd9Sstevel@tonic-gate 		    KSTAT_DATA_UINT32);
2027c478bd9Sstevel@tonic-gate 		kstat_named_init(&cpksp->cpk_avenrun_1min, "avenrun_1min",
2037c478bd9Sstevel@tonic-gate 		    KSTAT_DATA_UINT32);
2047c478bd9Sstevel@tonic-gate 		kstat_named_init(&cpksp->cpk_avenrun_5min, "avenrun_5min",
2057c478bd9Sstevel@tonic-gate 		    KSTAT_DATA_UINT32);
2067c478bd9Sstevel@tonic-gate 		kstat_named_init(&cpksp->cpk_avenrun_15min, "avenrun_15min",
2077c478bd9Sstevel@tonic-gate 		    KSTAT_DATA_UINT32);
2087c478bd9Sstevel@tonic-gate 
2097c478bd9Sstevel@tonic-gate 		ksp->ks_update = cpupart_kstat_update;
2107c478bd9Sstevel@tonic-gate 		ksp->ks_private = cp;
2117c478bd9Sstevel@tonic-gate 
2127c478bd9Sstevel@tonic-gate 		kstat_install(ksp);
2137c478bd9Sstevel@tonic-gate 	}
2147c478bd9Sstevel@tonic-gate 	cp->cp_kstat = ksp;
2157c478bd9Sstevel@tonic-gate }
2167c478bd9Sstevel@tonic-gate 
2176890d023SEric Saxe /*
2186890d023SEric Saxe  * Initialize the cpupart's lgrp partions (lpls)
2196890d023SEric Saxe  */
2206890d023SEric Saxe static void
cpupart_lpl_initialize(cpupart_t * cp)2216890d023SEric Saxe cpupart_lpl_initialize(cpupart_t *cp)
2226890d023SEric Saxe {
2236890d023SEric Saxe 	int i, sz;
2246890d023SEric Saxe 
2256890d023SEric Saxe 	sz = cp->cp_nlgrploads = lgrp_plat_max_lgrps();
2266890d023SEric Saxe 	cp->cp_lgrploads = kmem_zalloc(sizeof (lpl_t) * sz, KM_SLEEP);
2276890d023SEric Saxe 
2286890d023SEric Saxe 	for (i = 0; i < sz; i++) {
2296890d023SEric Saxe 		/*
2306890d023SEric Saxe 		 * The last entry of the lpl's resource set is always NULL
2316890d023SEric Saxe 		 * by design (to facilitate iteration)...hence the "oversizing"
2326890d023SEric Saxe 		 * by 1.
2336890d023SEric Saxe 		 */
2346890d023SEric Saxe 		cp->cp_lgrploads[i].lpl_rset_sz = sz + 1;
2356890d023SEric Saxe 		cp->cp_lgrploads[i].lpl_rset =
2366890d023SEric Saxe 		    kmem_zalloc(sizeof (struct lgrp_ld *) * (sz + 1), KM_SLEEP);
2376890d023SEric Saxe 		cp->cp_lgrploads[i].lpl_id2rset =
2386890d023SEric Saxe 		    kmem_zalloc(sizeof (int) * (sz + 1), KM_SLEEP);
2396890d023SEric Saxe 		cp->cp_lgrploads[i].lpl_lgrpid = i;
2406890d023SEric Saxe 	}
2416890d023SEric Saxe }
2426890d023SEric Saxe 
2436890d023SEric Saxe /*
2446890d023SEric Saxe  * Teardown the cpupart's lgrp partitions
2456890d023SEric Saxe  */
2466890d023SEric Saxe static void
cpupart_lpl_teardown(cpupart_t * cp)2476890d023SEric Saxe cpupart_lpl_teardown(cpupart_t *cp)
2486890d023SEric Saxe {
2496890d023SEric Saxe 	int i, sz;
2506890d023SEric Saxe 	lpl_t *lpl;
2516890d023SEric Saxe 
2526890d023SEric Saxe 	for (i = 0; i < cp->cp_nlgrploads; i++) {
2536890d023SEric Saxe 		lpl = &cp->cp_lgrploads[i];
2546890d023SEric Saxe 
2556890d023SEric Saxe 		sz = lpl->lpl_rset_sz;
2566890d023SEric Saxe 		kmem_free(lpl->lpl_rset, sizeof (struct lgrp_ld *) * sz);
2576890d023SEric Saxe 		kmem_free(lpl->lpl_id2rset, sizeof (int) * sz);
2586890d023SEric Saxe 		lpl->lpl_rset = NULL;
2596890d023SEric Saxe 		lpl->lpl_id2rset = NULL;
2606890d023SEric Saxe 	}
2616890d023SEric Saxe 	kmem_free(cp->cp_lgrploads, sizeof (lpl_t) * cp->cp_nlgrploads);
2626890d023SEric Saxe 	cp->cp_lgrploads = NULL;
2636890d023SEric Saxe }
2646890d023SEric Saxe 
2657c478bd9Sstevel@tonic-gate /*
2667c478bd9Sstevel@tonic-gate  * Initialize the default partition and kpreempt disp queue.
2677c478bd9Sstevel@tonic-gate  */
2687c478bd9Sstevel@tonic-gate void
cpupart_initialize_default(void)2697c478bd9Sstevel@tonic-gate cpupart_initialize_default(void)
2707c478bd9Sstevel@tonic-gate {
2717c478bd9Sstevel@tonic-gate 	lgrp_id_t i;
2727c478bd9Sstevel@tonic-gate 
2737c478bd9Sstevel@tonic-gate 	cp_list_head = &cp_default;
2747c478bd9Sstevel@tonic-gate 	cp_default.cp_next = &cp_default;
2757c478bd9Sstevel@tonic-gate 	cp_default.cp_prev = &cp_default;
2767c478bd9Sstevel@tonic-gate 	cp_default.cp_id = CP_DEFAULT;
2777c478bd9Sstevel@tonic-gate 	cp_default.cp_kp_queue.disp_maxrunpri = -1;
2787c478bd9Sstevel@tonic-gate 	cp_default.cp_kp_queue.disp_max_unbound_pri = -1;
2797c478bd9Sstevel@tonic-gate 	cp_default.cp_kp_queue.disp_cpu = NULL;
2807c478bd9Sstevel@tonic-gate 	cp_default.cp_gen = 0;
2817c478bd9Sstevel@tonic-gate 	cp_default.cp_loadavg.lg_cur = 0;
2827c478bd9Sstevel@tonic-gate 	cp_default.cp_loadavg.lg_len = 0;
2837c478bd9Sstevel@tonic-gate 	cp_default.cp_loadavg.lg_total = 0;
2847c478bd9Sstevel@tonic-gate 	for (i = 0; i < S_LOADAVG_SZ; i++) {
2857c478bd9Sstevel@tonic-gate 		cp_default.cp_loadavg.lg_loads[i] = 0;
2867c478bd9Sstevel@tonic-gate 	}
2877c478bd9Sstevel@tonic-gate 	DISP_LOCK_INIT(&cp_default.cp_kp_queue.disp_lock);
2887c478bd9Sstevel@tonic-gate 	cp_id_next = CP_DEFAULT + 1;
2897c478bd9Sstevel@tonic-gate 	cpupart_kstat_create(&cp_default);
2907c478bd9Sstevel@tonic-gate 	cp_numparts = 1;
2917c478bd9Sstevel@tonic-gate 	if (cp_max_numparts == 0)	/* allow for /etc/system tuning */
2927c478bd9Sstevel@tonic-gate 		cp_max_numparts = max_ncpus * 2 + 1;
2937c478bd9Sstevel@tonic-gate 	/*
2947c478bd9Sstevel@tonic-gate 	 * Allocate space for cp_default list of lgrploads
2957c478bd9Sstevel@tonic-gate 	 */
2966890d023SEric Saxe 	cpupart_lpl_initialize(&cp_default);
2977c478bd9Sstevel@tonic-gate 
2987c478bd9Sstevel@tonic-gate 	/*
2997c478bd9Sstevel@tonic-gate 	 * The initial lpl topology is created in a special lpl list
3007c478bd9Sstevel@tonic-gate 	 * lpl_bootstrap. It should be copied to cp_default.
3017c478bd9Sstevel@tonic-gate 	 * NOTE: lpl_topo_bootstrap() also updates CPU0 cpu_lpl pointer to point
3027c478bd9Sstevel@tonic-gate 	 *	 to the correct lpl in the cp_default.cp_lgrploads list.
3037c478bd9Sstevel@tonic-gate 	 */
3047c478bd9Sstevel@tonic-gate 	lpl_topo_bootstrap(cp_default.cp_lgrploads,
3057c478bd9Sstevel@tonic-gate 	    cp_default.cp_nlgrploads);
3067c478bd9Sstevel@tonic-gate 
3076890d023SEric Saxe 
3087c478bd9Sstevel@tonic-gate 	cp_default.cp_attr = PSET_NOESCAPE;
3097c478bd9Sstevel@tonic-gate 	cp_numparts_nonempty = 1;
3107c478bd9Sstevel@tonic-gate 	/*
3117c478bd9Sstevel@tonic-gate 	 * Set t0's home
3127c478bd9Sstevel@tonic-gate 	 */
3137c478bd9Sstevel@tonic-gate 	t0.t_lpl = &cp_default.cp_lgrploads[LGRP_ROOTID];
314fb2f18f8Sesaxe 
315fb2f18f8Sesaxe 	bitset_init(&cp_default.cp_cmt_pgs);
3160542eecfSRafael Vanoni 	bitset_init_fanout(&cp_default.cp_haltset, cp_haltset_fanout);
3170542eecfSRafael Vanoni 
3186890d023SEric Saxe 	bitset_resize(&cp_default.cp_haltset, max_ncpus);
3197c478bd9Sstevel@tonic-gate }
3207c478bd9Sstevel@tonic-gate 
3217c478bd9Sstevel@tonic-gate 
3227c478bd9Sstevel@tonic-gate static int
cpupart_move_cpu(cpu_t * cp,cpupart_t * newpp,int forced)3237c478bd9Sstevel@tonic-gate cpupart_move_cpu(cpu_t *cp, cpupart_t *newpp, int forced)
3247c478bd9Sstevel@tonic-gate {
3257c478bd9Sstevel@tonic-gate 	cpupart_t *oldpp;
3267c478bd9Sstevel@tonic-gate 	cpu_t	*ncp, *newlist;
3277c478bd9Sstevel@tonic-gate 	kthread_t *t;
3287c478bd9Sstevel@tonic-gate 	int	move_threads = 1;
3297c478bd9Sstevel@tonic-gate 	lgrp_id_t lgrpid;
330455e370cSJohn Levon 	proc_t	*p;
3317c478bd9Sstevel@tonic-gate 	int lgrp_diff_lpl;
3327c478bd9Sstevel@tonic-gate 	lpl_t	*cpu_lpl;
3337c478bd9Sstevel@tonic-gate 	int	ret;
3340b70c467Sakolb 	boolean_t unbind_all_threads = (forced != 0);
3357c478bd9Sstevel@tonic-gate 
3367c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&cpu_lock));
3377c478bd9Sstevel@tonic-gate 	ASSERT(newpp != NULL);
3387c478bd9Sstevel@tonic-gate 
3397c478bd9Sstevel@tonic-gate 	oldpp = cp->cpu_part;
3407c478bd9Sstevel@tonic-gate 	ASSERT(oldpp != NULL);
3417c478bd9Sstevel@tonic-gate 	ASSERT(oldpp->cp_ncpus > 0);
3427c478bd9Sstevel@tonic-gate 
3437c478bd9Sstevel@tonic-gate 	if (newpp == oldpp) {
3447c478bd9Sstevel@tonic-gate 		/*
3457c478bd9Sstevel@tonic-gate 		 * Don't need to do anything.
3467c478bd9Sstevel@tonic-gate 		 */
3477c478bd9Sstevel@tonic-gate 		return (0);
3487c478bd9Sstevel@tonic-gate 	}
3497c478bd9Sstevel@tonic-gate 
3507c478bd9Sstevel@tonic-gate 	cpu_state_change_notify(cp->cpu_id, CPU_CPUPART_OUT);
3517c478bd9Sstevel@tonic-gate 
3527c478bd9Sstevel@tonic-gate 	if (!disp_bound_partition(cp, 0)) {
3537c478bd9Sstevel@tonic-gate 		/*
3547c478bd9Sstevel@tonic-gate 		 * Don't need to move threads if there are no threads in
3557c478bd9Sstevel@tonic-gate 		 * the partition.  Note that threads can't enter the
3567c478bd9Sstevel@tonic-gate 		 * partition while we're holding cpu_lock.
3577c478bd9Sstevel@tonic-gate 		 */
3587c478bd9Sstevel@tonic-gate 		move_threads = 0;
3597c478bd9Sstevel@tonic-gate 	} else if (oldpp->cp_ncpus == 1) {
3600b70c467Sakolb 		/*
3610b70c467Sakolb 		 * The last CPU is removed from a partition which has threads
3620b70c467Sakolb 		 * running in it. Some of these threads may be bound to this
3630b70c467Sakolb 		 * CPU.
3640b70c467Sakolb 		 *
3650b70c467Sakolb 		 * Attempt to unbind threads from the CPU and from the processor
3660b70c467Sakolb 		 * set. Note that no threads should be bound to this CPU since
3670b70c467Sakolb 		 * cpupart_move_threads will refuse to move bound threads to
3680b70c467Sakolb 		 * other CPUs.
3690b70c467Sakolb 		 */
3700b70c467Sakolb 		(void) cpu_unbind(oldpp->cp_cpulist->cpu_id, B_FALSE);
3710b70c467Sakolb 		(void) cpupart_unbind_threads(oldpp, B_FALSE);
3720b70c467Sakolb 
3730b70c467Sakolb 		if (!disp_bound_partition(cp, 0)) {
3740b70c467Sakolb 			/*
3750b70c467Sakolb 			 * No bound threads in this partition any more
3760b70c467Sakolb 			 */
3770b70c467Sakolb 			move_threads = 0;
3780b70c467Sakolb 		} else {
3790b70c467Sakolb 			/*
3800b70c467Sakolb 			 * There are still threads bound to the partition
3810b70c467Sakolb 			 */
3820b70c467Sakolb 			cpu_state_change_notify(cp->cpu_id, CPU_CPUPART_IN);
3830b70c467Sakolb 			return (EBUSY);
3840b70c467Sakolb 		}
3857c478bd9Sstevel@tonic-gate 	}
3867c478bd9Sstevel@tonic-gate 
3870b70c467Sakolb 	/*
3880b70c467Sakolb 	 * If forced flag is set unbind any threads from this CPU.
3890b70c467Sakolb 	 * Otherwise unbind soft-bound threads only.
3900b70c467Sakolb 	 */
3910b70c467Sakolb 	if ((ret = cpu_unbind(cp->cpu_id, unbind_all_threads)) != 0) {
3927c478bd9Sstevel@tonic-gate 		cpu_state_change_notify(cp->cpu_id, CPU_CPUPART_IN);
3937c478bd9Sstevel@tonic-gate 		return (ret);
3947c478bd9Sstevel@tonic-gate 	}
3957c478bd9Sstevel@tonic-gate 
3967c478bd9Sstevel@tonic-gate 	/*
3977c478bd9Sstevel@tonic-gate 	 * Stop further threads weak binding to this cpu.
3987c478bd9Sstevel@tonic-gate 	 */
3997c478bd9Sstevel@tonic-gate 	cpu_inmotion = cp;
4007c478bd9Sstevel@tonic-gate 	membar_enter();
4017c478bd9Sstevel@tonic-gate 
402fb2f18f8Sesaxe 	/*
403fb2f18f8Sesaxe 	 * Notify the Processor Groups subsystem that the CPU
404fb2f18f8Sesaxe 	 * will be moving cpu partitions. This is done before
405fb2f18f8Sesaxe 	 * CPUs are paused to provide an opportunity for any
406fb2f18f8Sesaxe 	 * needed memory allocations.
407fb2f18f8Sesaxe 	 */
408fb2f18f8Sesaxe 	pg_cpupart_out(cp, oldpp);
409fb2f18f8Sesaxe 	pg_cpupart_in(cp, newpp);
410fb2f18f8Sesaxe 
4117c478bd9Sstevel@tonic-gate again:
4127c478bd9Sstevel@tonic-gate 	if (move_threads) {
4137c478bd9Sstevel@tonic-gate 		int loop_count;
4147c478bd9Sstevel@tonic-gate 		/*
4157c478bd9Sstevel@tonic-gate 		 * Check for threads strong or weak bound to this CPU.
4167c478bd9Sstevel@tonic-gate 		 */
4177c478bd9Sstevel@tonic-gate 		for (loop_count = 0; disp_bound_threads(cp, 0); loop_count++) {
4187c478bd9Sstevel@tonic-gate 			if (loop_count >= 5) {
4197c478bd9Sstevel@tonic-gate 				cpu_state_change_notify(cp->cpu_id,
4207c478bd9Sstevel@tonic-gate 				    CPU_CPUPART_IN);
421fb2f18f8Sesaxe 				pg_cpupart_out(cp, newpp);
422fb2f18f8Sesaxe 				pg_cpupart_in(cp, oldpp);
4237c478bd9Sstevel@tonic-gate 				cpu_inmotion = NULL;
4247c478bd9Sstevel@tonic-gate 				return (EBUSY);	/* some threads still bound */
4257c478bd9Sstevel@tonic-gate 			}
4267c478bd9Sstevel@tonic-gate 			delay(1);
4277c478bd9Sstevel@tonic-gate 		}
4287c478bd9Sstevel@tonic-gate 	}
4297c478bd9Sstevel@tonic-gate 
4307c478bd9Sstevel@tonic-gate 	/*
4317c478bd9Sstevel@tonic-gate 	 * Before we actually start changing data structures, notify
4327c478bd9Sstevel@tonic-gate 	 * the cyclic subsystem that we want to move this CPU out of its
4337c478bd9Sstevel@tonic-gate 	 * partition.
4347c478bd9Sstevel@tonic-gate 	 */
4357c478bd9Sstevel@tonic-gate 	if (!cyclic_move_out(cp)) {
4367c478bd9Sstevel@tonic-gate 		/*
4377c478bd9Sstevel@tonic-gate 		 * This CPU must be the last CPU in a processor set with
4387c478bd9Sstevel@tonic-gate 		 * a bound cyclic.
4397c478bd9Sstevel@tonic-gate 		 */
4407c478bd9Sstevel@tonic-gate 		cpu_state_change_notify(cp->cpu_id, CPU_CPUPART_IN);
441fb2f18f8Sesaxe 		pg_cpupart_out(cp, newpp);
442fb2f18f8Sesaxe 		pg_cpupart_in(cp, oldpp);
4437c478bd9Sstevel@tonic-gate 		cpu_inmotion = NULL;
4447c478bd9Sstevel@tonic-gate 		return (EBUSY);
4457c478bd9Sstevel@tonic-gate 	}
4467c478bd9Sstevel@tonic-gate 
4470ed5c46eSJosef 'Jeff' Sipek 	pause_cpus(cp, NULL);
4487c478bd9Sstevel@tonic-gate 
4497c478bd9Sstevel@tonic-gate 	if (move_threads) {
4507c478bd9Sstevel@tonic-gate 		/*
4517c478bd9Sstevel@tonic-gate 		 * The thread on cpu before the pause thread may have read
4527c478bd9Sstevel@tonic-gate 		 * cpu_inmotion before we raised the barrier above.  Check
4537c478bd9Sstevel@tonic-gate 		 * again.
4547c478bd9Sstevel@tonic-gate 		 */
4557c478bd9Sstevel@tonic-gate 		if (disp_bound_threads(cp, 1)) {
4567c478bd9Sstevel@tonic-gate 			start_cpus();
4577c478bd9Sstevel@tonic-gate 			goto again;
4587c478bd9Sstevel@tonic-gate 		}
4597c478bd9Sstevel@tonic-gate 
4607c478bd9Sstevel@tonic-gate 	}
4617c478bd9Sstevel@tonic-gate 
4627c478bd9Sstevel@tonic-gate 	/*
463fb2f18f8Sesaxe 	 * Now that CPUs are paused, let the PG subsystem perform
464fb2f18f8Sesaxe 	 * any necessary data structure updates.
4657c478bd9Sstevel@tonic-gate 	 */
466fb2f18f8Sesaxe 	pg_cpupart_move(cp, oldpp, newpp);
4677c478bd9Sstevel@tonic-gate 
4687c478bd9Sstevel@tonic-gate 	/* save this cpu's lgroup -- it'll be the same in the new partition */
4697c478bd9Sstevel@tonic-gate 	lgrpid = cp->cpu_lpl->lpl_lgrpid;
4707c478bd9Sstevel@tonic-gate 
4717c478bd9Sstevel@tonic-gate 	cpu_lpl = cp->cpu_lpl;
4727c478bd9Sstevel@tonic-gate 	/*
4737c478bd9Sstevel@tonic-gate 	 * let the lgroup framework know cp has left the partition
4747c478bd9Sstevel@tonic-gate 	 */
4757c478bd9Sstevel@tonic-gate 	lgrp_config(LGRP_CONFIG_CPUPART_DEL, (uintptr_t)cp, lgrpid);
4767c478bd9Sstevel@tonic-gate 
4777c478bd9Sstevel@tonic-gate 	/* move out of old partition */
4787c478bd9Sstevel@tonic-gate 	oldpp->cp_ncpus--;
4797c478bd9Sstevel@tonic-gate 	if (oldpp->cp_ncpus > 0) {
4807c478bd9Sstevel@tonic-gate 
4817c478bd9Sstevel@tonic-gate 		ncp = cp->cpu_prev_part->cpu_next_part = cp->cpu_next_part;
4827c478bd9Sstevel@tonic-gate 		cp->cpu_next_part->cpu_prev_part = cp->cpu_prev_part;
4837c478bd9Sstevel@tonic-gate 		if (oldpp->cp_cpulist == cp) {
4847c478bd9Sstevel@tonic-gate 			oldpp->cp_cpulist = ncp;
4857c478bd9Sstevel@tonic-gate 		}
4867c478bd9Sstevel@tonic-gate 	} else {
4877c478bd9Sstevel@tonic-gate 		ncp = oldpp->cp_cpulist = NULL;
4887c478bd9Sstevel@tonic-gate 		cp_numparts_nonempty--;
4897c478bd9Sstevel@tonic-gate 		ASSERT(cp_numparts_nonempty != 0);
4907c478bd9Sstevel@tonic-gate 	}
4917c478bd9Sstevel@tonic-gate 	oldpp->cp_gen++;
4927c478bd9Sstevel@tonic-gate 
4937c478bd9Sstevel@tonic-gate 	/* move into new partition */
4947c478bd9Sstevel@tonic-gate 	newlist = newpp->cp_cpulist;
4957c478bd9Sstevel@tonic-gate 	if (newlist == NULL) {
4967c478bd9Sstevel@tonic-gate 		newpp->cp_cpulist = cp->cpu_next_part = cp->cpu_prev_part = cp;
4977c478bd9Sstevel@tonic-gate 		cp_numparts_nonempty++;
4987c478bd9Sstevel@tonic-gate 		ASSERT(cp_numparts_nonempty != 0);
4997c478bd9Sstevel@tonic-gate 	} else {
5007c478bd9Sstevel@tonic-gate 		cp->cpu_next_part = newlist;
5017c478bd9Sstevel@tonic-gate 		cp->cpu_prev_part = newlist->cpu_prev_part;
5027c478bd9Sstevel@tonic-gate 		newlist->cpu_prev_part->cpu_next_part = cp;
5037c478bd9Sstevel@tonic-gate 		newlist->cpu_prev_part = cp;
5047c478bd9Sstevel@tonic-gate 	}
5057c478bd9Sstevel@tonic-gate 	cp->cpu_part = newpp;
5067c478bd9Sstevel@tonic-gate 	newpp->cp_ncpus++;
5077c478bd9Sstevel@tonic-gate 	newpp->cp_gen++;
5087c478bd9Sstevel@tonic-gate 
5096890d023SEric Saxe 	ASSERT(bitset_is_null(&newpp->cp_haltset));
5106890d023SEric Saxe 	ASSERT(bitset_is_null(&oldpp->cp_haltset));
5117c478bd9Sstevel@tonic-gate 
5127c478bd9Sstevel@tonic-gate 	/*
5137c478bd9Sstevel@tonic-gate 	 * let the lgroup framework know cp has entered the partition
5147c478bd9Sstevel@tonic-gate 	 */
5157c478bd9Sstevel@tonic-gate 	lgrp_config(LGRP_CONFIG_CPUPART_ADD, (uintptr_t)cp, lgrpid);
5167c478bd9Sstevel@tonic-gate 
5177c478bd9Sstevel@tonic-gate 	/*
5187c478bd9Sstevel@tonic-gate 	 * If necessary, move threads off processor.
5197c478bd9Sstevel@tonic-gate 	 */
5207c478bd9Sstevel@tonic-gate 	if (move_threads) {
5217c478bd9Sstevel@tonic-gate 		ASSERT(ncp != NULL);
5227c478bd9Sstevel@tonic-gate 
5237c478bd9Sstevel@tonic-gate 		/*
5247c478bd9Sstevel@tonic-gate 		 * Walk thru the active process list to look for
5257c478bd9Sstevel@tonic-gate 		 * threads that need to have a new home lgroup,
5267c478bd9Sstevel@tonic-gate 		 * or the last CPU they run on is the same CPU
5277c478bd9Sstevel@tonic-gate 		 * being moved out of the partition.
5287c478bd9Sstevel@tonic-gate 		 */
5297c478bd9Sstevel@tonic-gate 
5307c478bd9Sstevel@tonic-gate 		for (p = practive; p != NULL; p = p->p_next) {
5317c478bd9Sstevel@tonic-gate 
5327c478bd9Sstevel@tonic-gate 			t = p->p_tlist;
5337c478bd9Sstevel@tonic-gate 
5347c478bd9Sstevel@tonic-gate 			if (t == NULL)
5357c478bd9Sstevel@tonic-gate 				continue;
5367c478bd9Sstevel@tonic-gate 
5377c478bd9Sstevel@tonic-gate 			lgrp_diff_lpl = 0;
5387c478bd9Sstevel@tonic-gate 
5397c478bd9Sstevel@tonic-gate 			do {
5407c478bd9Sstevel@tonic-gate 
5417c478bd9Sstevel@tonic-gate 				ASSERT(t->t_lpl != NULL);
5427c478bd9Sstevel@tonic-gate 
5437c478bd9Sstevel@tonic-gate 				/*
5447c478bd9Sstevel@tonic-gate 				 * Update the count of how many threads are
5457c478bd9Sstevel@tonic-gate 				 * in this CPU's lgroup but have a different lpl
5467c478bd9Sstevel@tonic-gate 				 */
5477c478bd9Sstevel@tonic-gate 
5487c478bd9Sstevel@tonic-gate 				if (t->t_lpl != cpu_lpl &&
5497c478bd9Sstevel@tonic-gate 				    t->t_lpl->lpl_lgrpid == lgrpid)
5507c478bd9Sstevel@tonic-gate 					lgrp_diff_lpl++;
5517c478bd9Sstevel@tonic-gate 				/*
5527c478bd9Sstevel@tonic-gate 				 * If the lgroup that t is assigned to no
5537c478bd9Sstevel@tonic-gate 				 * longer has any CPUs in t's partition,
5547c478bd9Sstevel@tonic-gate 				 * we'll have to choose a new lgroup for t.
5557c478bd9Sstevel@tonic-gate 				 */
5567c478bd9Sstevel@tonic-gate 
5577c478bd9Sstevel@tonic-gate 				if (!LGRP_CPUS_IN_PART(t->t_lpl->lpl_lgrpid,
5587c478bd9Sstevel@tonic-gate 				    t->t_cpupart)) {
5597c478bd9Sstevel@tonic-gate 					lgrp_move_thread(t,
5607c478bd9Sstevel@tonic-gate 					    lgrp_choose(t, t->t_cpupart), 0);
5617c478bd9Sstevel@tonic-gate 				}
5627c478bd9Sstevel@tonic-gate 
5637c478bd9Sstevel@tonic-gate 				/*
5647c478bd9Sstevel@tonic-gate 				 * make sure lpl points to our own partition
5657c478bd9Sstevel@tonic-gate 				 */
5667c478bd9Sstevel@tonic-gate 				ASSERT(t->t_lpl >= t->t_cpupart->cp_lgrploads &&
5677c478bd9Sstevel@tonic-gate 				    (t->t_lpl < t->t_cpupart->cp_lgrploads +
5686890d023SEric Saxe 				    t->t_cpupart->cp_nlgrploads));
5697c478bd9Sstevel@tonic-gate 
5707c478bd9Sstevel@tonic-gate 				ASSERT(t->t_lpl->lpl_ncpu > 0);
5717c478bd9Sstevel@tonic-gate 
5727c478bd9Sstevel@tonic-gate 				/* Update CPU last ran on if it was this CPU */
5737c478bd9Sstevel@tonic-gate 				if (t->t_cpu == cp && t->t_cpupart == oldpp &&
5747c478bd9Sstevel@tonic-gate 				    t->t_bound_cpu != cp) {
575455e370cSJohn Levon 					t->t_cpu = disp_lowpri_cpu(ncp, t,
576455e370cSJohn Levon 					    t->t_pri);
5777c478bd9Sstevel@tonic-gate 				}
5787c478bd9Sstevel@tonic-gate 				t = t->t_forw;
5797c478bd9Sstevel@tonic-gate 			} while (t != p->p_tlist);
5807c478bd9Sstevel@tonic-gate 
5817c478bd9Sstevel@tonic-gate 			/*
5827c478bd9Sstevel@tonic-gate 			 * Didn't find any threads in the same lgroup as this
5837c478bd9Sstevel@tonic-gate 			 * CPU with a different lpl, so remove the lgroup from
5847c478bd9Sstevel@tonic-gate 			 * the process lgroup bitmask.
5857c478bd9Sstevel@tonic-gate 			 */
5867c478bd9Sstevel@tonic-gate 
5877c478bd9Sstevel@tonic-gate 			if (lgrp_diff_lpl)
5887c478bd9Sstevel@tonic-gate 				klgrpset_del(p->p_lgrpset, lgrpid);
5897c478bd9Sstevel@tonic-gate 		}
5907c478bd9Sstevel@tonic-gate 
5917c478bd9Sstevel@tonic-gate 		/*
5927c478bd9Sstevel@tonic-gate 		 * Walk thread list looking for threads that need to be
5937c478bd9Sstevel@tonic-gate 		 * rehomed, since there are some threads that are not in
5947c478bd9Sstevel@tonic-gate 		 * their process's p_tlist.
5957c478bd9Sstevel@tonic-gate 		 */
5967c478bd9Sstevel@tonic-gate 
5977c478bd9Sstevel@tonic-gate 		t = curthread;
5987c478bd9Sstevel@tonic-gate 
5997c478bd9Sstevel@tonic-gate 		do {
6007c478bd9Sstevel@tonic-gate 			ASSERT(t != NULL && t->t_lpl != NULL);
6017c478bd9Sstevel@tonic-gate 
6027c478bd9Sstevel@tonic-gate 			/*
6037c478bd9Sstevel@tonic-gate 			 * If the lgroup that t is assigned to no
6047c478bd9Sstevel@tonic-gate 			 * longer has any CPUs in t's partition,
6057c478bd9Sstevel@tonic-gate 			 * we'll have to choose a new lgroup for t.
6067c478bd9Sstevel@tonic-gate 			 * Also, choose best lgroup for home when
6077c478bd9Sstevel@tonic-gate 			 * thread has specified lgroup affinities,
6087c478bd9Sstevel@tonic-gate 			 * since there may be an lgroup with more
6097c478bd9Sstevel@tonic-gate 			 * affinity available after moving CPUs
6107c478bd9Sstevel@tonic-gate 			 * around.
6117c478bd9Sstevel@tonic-gate 			 */
6127c478bd9Sstevel@tonic-gate 			if (!LGRP_CPUS_IN_PART(t->t_lpl->lpl_lgrpid,
6137c478bd9Sstevel@tonic-gate 			    t->t_cpupart) || t->t_lgrp_affinity) {
6147c478bd9Sstevel@tonic-gate 				lgrp_move_thread(t,
6157c478bd9Sstevel@tonic-gate 				    lgrp_choose(t, t->t_cpupart), 1);
6167c478bd9Sstevel@tonic-gate 			}
6177c478bd9Sstevel@tonic-gate 
6187c478bd9Sstevel@tonic-gate 			/* make sure lpl points to our own partition */
6197c478bd9Sstevel@tonic-gate 			ASSERT((t->t_lpl >= t->t_cpupart->cp_lgrploads) &&
6207c478bd9Sstevel@tonic-gate 			    (t->t_lpl < t->t_cpupart->cp_lgrploads +
6216890d023SEric Saxe 			    t->t_cpupart->cp_nlgrploads));
6227c478bd9Sstevel@tonic-gate 
6237c478bd9Sstevel@tonic-gate 			ASSERT(t->t_lpl->lpl_ncpu > 0);
6247c478bd9Sstevel@tonic-gate 
6257c478bd9Sstevel@tonic-gate 			/* Update CPU last ran on if it was this CPU */
6267c478bd9Sstevel@tonic-gate 			if (t->t_cpu == cp && t->t_cpupart == oldpp &&
6277c478bd9Sstevel@tonic-gate 			    t->t_bound_cpu != cp) {
628455e370cSJohn Levon 				t->t_cpu = disp_lowpri_cpu(ncp, t,
629455e370cSJohn Levon 				    t->t_pri);
6307c478bd9Sstevel@tonic-gate 			}
6317c478bd9Sstevel@tonic-gate 
6327c478bd9Sstevel@tonic-gate 			t = t->t_next;
6337c478bd9Sstevel@tonic-gate 		} while (t != curthread);
6347c478bd9Sstevel@tonic-gate 
6357c478bd9Sstevel@tonic-gate 		/*
6367c478bd9Sstevel@tonic-gate 		 * Clear off the CPU's run queue, and the kp queue if the
6377c478bd9Sstevel@tonic-gate 		 * partition is now empty.
6387c478bd9Sstevel@tonic-gate 		 */
6397c478bd9Sstevel@tonic-gate 		disp_cpu_inactive(cp);
6407c478bd9Sstevel@tonic-gate 
6417c478bd9Sstevel@tonic-gate 		/*
6427c478bd9Sstevel@tonic-gate 		 * Make cp switch to a thread from the new partition.
6437c478bd9Sstevel@tonic-gate 		 */
6447c478bd9Sstevel@tonic-gate 		cp->cpu_runrun = 1;
6457c478bd9Sstevel@tonic-gate 		cp->cpu_kprunrun = 1;
6467c478bd9Sstevel@tonic-gate 	}
6477c478bd9Sstevel@tonic-gate 
6487c478bd9Sstevel@tonic-gate 	cpu_inmotion = NULL;
6497c478bd9Sstevel@tonic-gate 	start_cpus();
6507c478bd9Sstevel@tonic-gate 
6517c478bd9Sstevel@tonic-gate 	/*
6527c478bd9Sstevel@tonic-gate 	 * Let anyone interested know that cpu has been added to the set.
6537c478bd9Sstevel@tonic-gate 	 */
6547c478bd9Sstevel@tonic-gate 	cpu_state_change_notify(cp->cpu_id, CPU_CPUPART_IN);
6557c478bd9Sstevel@tonic-gate 
6567c478bd9Sstevel@tonic-gate 	/*
6577c478bd9Sstevel@tonic-gate 	 * Now let the cyclic subsystem know that it can reshuffle cyclics
6587c478bd9Sstevel@tonic-gate 	 * bound to the new processor set.
6597c478bd9Sstevel@tonic-gate 	 */
6607c478bd9Sstevel@tonic-gate 	cyclic_move_in(cp);
6617c478bd9Sstevel@tonic-gate 
6627c478bd9Sstevel@tonic-gate 	return (0);
6637c478bd9Sstevel@tonic-gate }
6647c478bd9Sstevel@tonic-gate 
6657c478bd9Sstevel@tonic-gate /*
6667c478bd9Sstevel@tonic-gate  * Check if thread can be moved to a new cpu partition.  Called by
6677c478bd9Sstevel@tonic-gate  * cpupart_move_thread() and pset_bind_start().
6687c478bd9Sstevel@tonic-gate  */
6697c478bd9Sstevel@tonic-gate int
cpupart_movable_thread(kthread_id_t tp,cpupart_t * cp,int ignore)6707c478bd9Sstevel@tonic-gate cpupart_movable_thread(kthread_id_t tp, cpupart_t *cp, int ignore)
6717c478bd9Sstevel@tonic-gate {
6727c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&cpu_lock));
6737c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&ttoproc(tp)->p_lock));
6747c478bd9Sstevel@tonic-gate 	ASSERT(cp != NULL);
6757c478bd9Sstevel@tonic-gate 	ASSERT(THREAD_LOCK_HELD(tp));
6767c478bd9Sstevel@tonic-gate 
6777c478bd9Sstevel@tonic-gate 	/*
6787c478bd9Sstevel@tonic-gate 	 * CPU-bound threads can't be moved.
6797c478bd9Sstevel@tonic-gate 	 */
6807c478bd9Sstevel@tonic-gate 	if (!ignore) {
6817c478bd9Sstevel@tonic-gate 		cpu_t *boundcpu = tp->t_bound_cpu ? tp->t_bound_cpu :
6827c478bd9Sstevel@tonic-gate 		    tp->t_weakbound_cpu;
6837c478bd9Sstevel@tonic-gate 		if (boundcpu != NULL && boundcpu->cpu_part != cp)
6847c478bd9Sstevel@tonic-gate 			return (EBUSY);
6857c478bd9Sstevel@tonic-gate 	}
68635a5a358SJonathan Adams 
68735a5a358SJonathan Adams 	if (tp->t_cid == sysdccid) {
68835a5a358SJonathan Adams 		return (EINVAL);	/* For now, sysdc threads can't move */
68935a5a358SJonathan Adams 	}
69035a5a358SJonathan Adams 
6917c478bd9Sstevel@tonic-gate 	return (0);
6927c478bd9Sstevel@tonic-gate }
6937c478bd9Sstevel@tonic-gate 
6947c478bd9Sstevel@tonic-gate /*
6957c478bd9Sstevel@tonic-gate  * Move thread to new partition.  If ignore is non-zero, then CPU
6967c478bd9Sstevel@tonic-gate  * bindings should be ignored (this is used when destroying a
6977c478bd9Sstevel@tonic-gate  * partition).
6987c478bd9Sstevel@tonic-gate  */
6997c478bd9Sstevel@tonic-gate static int
cpupart_move_thread(kthread_id_t tp,cpupart_t * newpp,int ignore,void * projbuf,void * zonebuf)7007c478bd9Sstevel@tonic-gate cpupart_move_thread(kthread_id_t tp, cpupart_t *newpp, int ignore,
7017c478bd9Sstevel@tonic-gate     void *projbuf, void *zonebuf)
7027c478bd9Sstevel@tonic-gate {
7037c478bd9Sstevel@tonic-gate 	cpupart_t *oldpp = tp->t_cpupart;
7047c478bd9Sstevel@tonic-gate 	int ret;
7057c478bd9Sstevel@tonic-gate 
7067c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&cpu_lock));
7077c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&pidlock));
7087c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&ttoproc(tp)->p_lock));
7097c478bd9Sstevel@tonic-gate 	ASSERT(newpp != NULL);
7107c478bd9Sstevel@tonic-gate 
7117c478bd9Sstevel@tonic-gate 	if (newpp->cp_cpulist == NULL)
7127c478bd9Sstevel@tonic-gate 		return (EINVAL);
7137c478bd9Sstevel@tonic-gate 
7147c478bd9Sstevel@tonic-gate 	/*
7157c478bd9Sstevel@tonic-gate 	 * Check for errors first.
7167c478bd9Sstevel@tonic-gate 	 */
7177c478bd9Sstevel@tonic-gate 	thread_lock(tp);
7187c478bd9Sstevel@tonic-gate 	if ((ret = cpupart_movable_thread(tp, newpp, ignore)) != 0) {
7197c478bd9Sstevel@tonic-gate 		thread_unlock(tp);
7207c478bd9Sstevel@tonic-gate 		return (ret);
7217c478bd9Sstevel@tonic-gate 	}
7227c478bd9Sstevel@tonic-gate 
7237c478bd9Sstevel@tonic-gate 	/* move the thread */
7247c478bd9Sstevel@tonic-gate 	if (oldpp != newpp) {
7257c478bd9Sstevel@tonic-gate 		/*
7267c478bd9Sstevel@tonic-gate 		 * Make the thread switch to the new partition.
7277c478bd9Sstevel@tonic-gate 		 */
7287c478bd9Sstevel@tonic-gate 		tp->t_cpupart = newpp;
7297c478bd9Sstevel@tonic-gate 		ASSERT(tp->t_lpl != NULL);
7307c478bd9Sstevel@tonic-gate 		/*
7317c478bd9Sstevel@tonic-gate 		 * Leave the thread on the same lgroup if possible; otherwise
7327c478bd9Sstevel@tonic-gate 		 * choose a new lgroup for it.  In either case, update its
7337c478bd9Sstevel@tonic-gate 		 * t_lpl.
7347c478bd9Sstevel@tonic-gate 		 */
7357c478bd9Sstevel@tonic-gate 		if (LGRP_CPUS_IN_PART(tp->t_lpl->lpl_lgrpid, newpp) &&
7367c478bd9Sstevel@tonic-gate 		    tp->t_lgrp_affinity == NULL) {
7377c478bd9Sstevel@tonic-gate 			/*
7387c478bd9Sstevel@tonic-gate 			 * The thread's lgroup has CPUs in the thread's new
7397c478bd9Sstevel@tonic-gate 			 * partition, so the thread can stay assigned to the
7407c478bd9Sstevel@tonic-gate 			 * same lgroup.  Update its t_lpl to point to the
7417c478bd9Sstevel@tonic-gate 			 * lpl_t for its lgroup in its new partition.
7427c478bd9Sstevel@tonic-gate 			 */
7437c478bd9Sstevel@tonic-gate 			lgrp_move_thread(tp, &tp->t_cpupart->\
7447c478bd9Sstevel@tonic-gate 			    cp_lgrploads[tp->t_lpl->lpl_lgrpid], 1);
7457c478bd9Sstevel@tonic-gate 		} else {
7467c478bd9Sstevel@tonic-gate 			/*
7477c478bd9Sstevel@tonic-gate 			 * The thread's lgroup has no cpus in its new
7487c478bd9Sstevel@tonic-gate 			 * partition or it has specified lgroup affinities,
7497c478bd9Sstevel@tonic-gate 			 * so choose the best lgroup for the thread and
7507c478bd9Sstevel@tonic-gate 			 * assign it to that lgroup.
7517c478bd9Sstevel@tonic-gate 			 */
7527c478bd9Sstevel@tonic-gate 			lgrp_move_thread(tp, lgrp_choose(tp, tp->t_cpupart),
7537c478bd9Sstevel@tonic-gate 			    1);
7547c478bd9Sstevel@tonic-gate 		}
7557c478bd9Sstevel@tonic-gate 		/*
7567c478bd9Sstevel@tonic-gate 		 * make sure lpl points to our own partition
7577c478bd9Sstevel@tonic-gate 		 */
7587c478bd9Sstevel@tonic-gate 		ASSERT((tp->t_lpl >= tp->t_cpupart->cp_lgrploads) &&
7597c478bd9Sstevel@tonic-gate 		    (tp->t_lpl < tp->t_cpupart->cp_lgrploads +
7606890d023SEric Saxe 		    tp->t_cpupart->cp_nlgrploads));
7617c478bd9Sstevel@tonic-gate 
7627c478bd9Sstevel@tonic-gate 		ASSERT(tp->t_lpl->lpl_ncpu > 0);
7637c478bd9Sstevel@tonic-gate 
7647c478bd9Sstevel@tonic-gate 		if (tp->t_state == TS_ONPROC) {
7657c478bd9Sstevel@tonic-gate 			cpu_surrender(tp);
7667c478bd9Sstevel@tonic-gate 		} else if (tp->t_state == TS_RUN) {
7677c478bd9Sstevel@tonic-gate 			(void) dispdeq(tp);
7687c478bd9Sstevel@tonic-gate 			setbackdq(tp);
7697c478bd9Sstevel@tonic-gate 		}
7707c478bd9Sstevel@tonic-gate 	}
7717c478bd9Sstevel@tonic-gate 
7727c478bd9Sstevel@tonic-gate 	/*
7737c478bd9Sstevel@tonic-gate 	 * Our binding has changed; set TP_CHANGEBIND.
7747c478bd9Sstevel@tonic-gate 	 */
7757c478bd9Sstevel@tonic-gate 	tp->t_proc_flag |= TP_CHANGEBIND;
7767c478bd9Sstevel@tonic-gate 	aston(tp);
7777c478bd9Sstevel@tonic-gate 
7787c478bd9Sstevel@tonic-gate 	thread_unlock(tp);
7797c478bd9Sstevel@tonic-gate 	fss_changepset(tp, newpp, projbuf, zonebuf);
7807c478bd9Sstevel@tonic-gate 
7817c478bd9Sstevel@tonic-gate 	return (0);		/* success */
7827c478bd9Sstevel@tonic-gate }
7837c478bd9Sstevel@tonic-gate 
7847c478bd9Sstevel@tonic-gate 
7857c478bd9Sstevel@tonic-gate /*
7867c478bd9Sstevel@tonic-gate  * This function binds a thread to a partition.  Must be called with the
7877c478bd9Sstevel@tonic-gate  * p_lock of the containing process held (to keep the thread from going
7887c478bd9Sstevel@tonic-gate  * away), and thus also with cpu_lock held (since cpu_lock must be
7897c478bd9Sstevel@tonic-gate  * acquired before p_lock).  If ignore is non-zero, then CPU bindings
7907c478bd9Sstevel@tonic-gate  * should be ignored (this is used when destroying a partition).
7917c478bd9Sstevel@tonic-gate  */
7927c478bd9Sstevel@tonic-gate int
cpupart_bind_thread(kthread_id_t tp,psetid_t psid,int ignore,void * projbuf,void * zonebuf)7937c478bd9Sstevel@tonic-gate cpupart_bind_thread(kthread_id_t tp, psetid_t psid, int ignore, void *projbuf,
7947c478bd9Sstevel@tonic-gate     void *zonebuf)
7957c478bd9Sstevel@tonic-gate {
7967c478bd9Sstevel@tonic-gate 	cpupart_t	*newpp;
7977c478bd9Sstevel@tonic-gate 
7987c478bd9Sstevel@tonic-gate 	ASSERT(pool_lock_held());
7997c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&cpu_lock));
8007c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&pidlock));
8017c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&ttoproc(tp)->p_lock));
8027c478bd9Sstevel@tonic-gate 
8037c478bd9Sstevel@tonic-gate 	if (psid == PS_NONE)
8047c478bd9Sstevel@tonic-gate 		newpp = &cp_default;
8057c478bd9Sstevel@tonic-gate 	else {
8067c478bd9Sstevel@tonic-gate 		newpp = cpupart_find(psid);
8077c478bd9Sstevel@tonic-gate 		if (newpp == NULL) {
8087c478bd9Sstevel@tonic-gate 			return (EINVAL);
8097c478bd9Sstevel@tonic-gate 		}
8107c478bd9Sstevel@tonic-gate 	}
8117c478bd9Sstevel@tonic-gate 	return (cpupart_move_thread(tp, newpp, ignore, projbuf, zonebuf));
8127c478bd9Sstevel@tonic-gate }
8137c478bd9Sstevel@tonic-gate 
8147c478bd9Sstevel@tonic-gate 
8157c478bd9Sstevel@tonic-gate /*
8167c478bd9Sstevel@tonic-gate  * Create a new partition.  On MP systems, this also allocates a
8177c478bd9Sstevel@tonic-gate  * kpreempt disp queue for that partition.
8187c478bd9Sstevel@tonic-gate  */
8197c478bd9Sstevel@tonic-gate int
cpupart_create(psetid_t * psid)8207c478bd9Sstevel@tonic-gate cpupart_create(psetid_t *psid)
8217c478bd9Sstevel@tonic-gate {
8227c478bd9Sstevel@tonic-gate 	cpupart_t	*pp;
8237c478bd9Sstevel@tonic-gate 
8247c478bd9Sstevel@tonic-gate 	ASSERT(pool_lock_held());
8257c478bd9Sstevel@tonic-gate 
8267c478bd9Sstevel@tonic-gate 	pp = kmem_zalloc(sizeof (cpupart_t), KM_SLEEP);
8277c478bd9Sstevel@tonic-gate 
8287c478bd9Sstevel@tonic-gate 	mutex_enter(&cpu_lock);
8297c478bd9Sstevel@tonic-gate 	if (cp_numparts == cp_max_numparts) {
8307c478bd9Sstevel@tonic-gate 		mutex_exit(&cpu_lock);
8317c478bd9Sstevel@tonic-gate 		kmem_free(pp, sizeof (cpupart_t));
8327c478bd9Sstevel@tonic-gate 		return (ENOMEM);
8337c478bd9Sstevel@tonic-gate 	}
8347c478bd9Sstevel@tonic-gate 	cp_numparts++;
8357c478bd9Sstevel@tonic-gate 	/* find the next free partition ID */
8367c478bd9Sstevel@tonic-gate 	while (cpupart_find(CPTOPS(cp_id_next)) != NULL)
8377c478bd9Sstevel@tonic-gate 		cp_id_next++;
8387c478bd9Sstevel@tonic-gate 	pp->cp_id = cp_id_next++;
8397c478bd9Sstevel@tonic-gate 	pp->cp_ncpus = 0;
8407c478bd9Sstevel@tonic-gate 	pp->cp_cpulist = NULL;
8417c478bd9Sstevel@tonic-gate 	pp->cp_attr = 0;
8427c478bd9Sstevel@tonic-gate 	klgrpset_clear(pp->cp_lgrpset);
8437c478bd9Sstevel@tonic-gate 	pp->cp_kp_queue.disp_maxrunpri = -1;
8447c478bd9Sstevel@tonic-gate 	pp->cp_kp_queue.disp_max_unbound_pri = -1;
8457c478bd9Sstevel@tonic-gate 	pp->cp_kp_queue.disp_cpu = NULL;
8467c478bd9Sstevel@tonic-gate 	pp->cp_gen = 0;
8477c478bd9Sstevel@tonic-gate 	DISP_LOCK_INIT(&pp->cp_kp_queue.disp_lock);
8487c478bd9Sstevel@tonic-gate 	*psid = CPTOPS(pp->cp_id);
8497c478bd9Sstevel@tonic-gate 	disp_kp_alloc(&pp->cp_kp_queue, v.v_nglobpris);
8507c478bd9Sstevel@tonic-gate 	cpupart_kstat_create(pp);
8516890d023SEric Saxe 	cpupart_lpl_initialize(pp);
8526890d023SEric Saxe 
853fb2f18f8Sesaxe 	bitset_init(&pp->cp_cmt_pgs);
8547c478bd9Sstevel@tonic-gate 
8556890d023SEric Saxe 	/*
8560542eecfSRafael Vanoni 	 * Initialize and size the partition's bitset of halted CPUs.
8576890d023SEric Saxe 	 */
8580542eecfSRafael Vanoni 	bitset_init_fanout(&pp->cp_haltset, cp_haltset_fanout);
8596890d023SEric Saxe 	bitset_resize(&pp->cp_haltset, max_ncpus);
8606890d023SEric Saxe 
8617c478bd9Sstevel@tonic-gate 	/*
8627c478bd9Sstevel@tonic-gate 	 * Pause all CPUs while changing the partition list, to make sure
8637c478bd9Sstevel@tonic-gate 	 * the clock thread (which traverses the list without holding
8647c478bd9Sstevel@tonic-gate 	 * cpu_lock) isn't running.
8657c478bd9Sstevel@tonic-gate 	 */
8660ed5c46eSJosef 'Jeff' Sipek 	pause_cpus(NULL, NULL);
8677c478bd9Sstevel@tonic-gate 	pp->cp_next = cp_list_head;
8687c478bd9Sstevel@tonic-gate 	pp->cp_prev = cp_list_head->cp_prev;
8697c478bd9Sstevel@tonic-gate 	cp_list_head->cp_prev->cp_next = pp;
8707c478bd9Sstevel@tonic-gate 	cp_list_head->cp_prev = pp;
8717c478bd9Sstevel@tonic-gate 	start_cpus();
8727c478bd9Sstevel@tonic-gate 	mutex_exit(&cpu_lock);
8737c478bd9Sstevel@tonic-gate 
8747c478bd9Sstevel@tonic-gate 	return (0);
8757c478bd9Sstevel@tonic-gate }
8767c478bd9Sstevel@tonic-gate 
8777c478bd9Sstevel@tonic-gate /*
8780b70c467Sakolb  * Move threads from specified partition to cp_default. If `force' is specified,
8790b70c467Sakolb  * move all threads, otherwise move only soft-bound threads.
8807c478bd9Sstevel@tonic-gate  */
8810b70c467Sakolb static int
cpupart_unbind_threads(cpupart_t * pp,boolean_t unbind_all)8820b70c467Sakolb cpupart_unbind_threads(cpupart_t *pp, boolean_t unbind_all)
8837c478bd9Sstevel@tonic-gate {
884455e370cSJohn Levon 	void	*projbuf, *zonebuf;
8857c478bd9Sstevel@tonic-gate 	kthread_t *t;
8867c478bd9Sstevel@tonic-gate 	proc_t	*p;
8870b70c467Sakolb 	int	err = 0;
888*3df2e8b2SRobert Mustacchi 	psetid_t psid;
8897c478bd9Sstevel@tonic-gate 
8907c478bd9Sstevel@tonic-gate 	ASSERT(pool_lock_held());
8910b70c467Sakolb 	ASSERT(MUTEX_HELD(&cpu_lock));
8927c478bd9Sstevel@tonic-gate 
8937c478bd9Sstevel@tonic-gate 	if (pp == NULL || pp == &cp_default) {
8947c478bd9Sstevel@tonic-gate 		return (EINVAL);
8957c478bd9Sstevel@tonic-gate 	}
896*3df2e8b2SRobert Mustacchi 	psid = pp->cp_id;
8977c478bd9Sstevel@tonic-gate 
8987c478bd9Sstevel@tonic-gate 	/*
8997c478bd9Sstevel@tonic-gate 	 * Pre-allocate enough buffers for FSS for all active projects and
9007c478bd9Sstevel@tonic-gate 	 * for all active zones on the system.  Unused buffers will be
9017c478bd9Sstevel@tonic-gate 	 * freed later by fss_freebuf().
9027c478bd9Sstevel@tonic-gate 	 */
9037c478bd9Sstevel@tonic-gate 	projbuf = fss_allocbuf(FSS_NPROJ_BUF, FSS_ALLOC_PROJ);
9047c478bd9Sstevel@tonic-gate 	zonebuf = fss_allocbuf(FSS_NPROJ_BUF, FSS_ALLOC_ZONE);
9057c478bd9Sstevel@tonic-gate 
9067c478bd9Sstevel@tonic-gate 	mutex_enter(&pidlock);
9077c478bd9Sstevel@tonic-gate 	t = curthread;
9087c478bd9Sstevel@tonic-gate 	do {
9097c478bd9Sstevel@tonic-gate 		if (t->t_bind_pset == psid) {
9107c478bd9Sstevel@tonic-gate again:			p = ttoproc(t);
9117c478bd9Sstevel@tonic-gate 			mutex_enter(&p->p_lock);
9127c478bd9Sstevel@tonic-gate 			if (ttoproc(t) != p) {
9137c478bd9Sstevel@tonic-gate 				/*
9147c478bd9Sstevel@tonic-gate 				 * lwp_exit has changed this thread's process
9157c478bd9Sstevel@tonic-gate 				 * pointer before we grabbed its p_lock.
9167c478bd9Sstevel@tonic-gate 				 */
9177c478bd9Sstevel@tonic-gate 				mutex_exit(&p->p_lock);
9187c478bd9Sstevel@tonic-gate 				goto again;
9197c478bd9Sstevel@tonic-gate 			}
9200b70c467Sakolb 
9210b70c467Sakolb 			/*
9220b70c467Sakolb 			 * Can only unbind threads which have revocable binding
9230b70c467Sakolb 			 * unless force unbinding requested.
9240b70c467Sakolb 			 */
9250b70c467Sakolb 			if (unbind_all || TB_PSET_IS_SOFT(t)) {
9260b70c467Sakolb 				err = cpupart_bind_thread(t, PS_NONE, 1,
9270b70c467Sakolb 				    projbuf, zonebuf);
9280b70c467Sakolb 				if (err) {
9290b70c467Sakolb 					mutex_exit(&p->p_lock);
9300b70c467Sakolb 					mutex_exit(&pidlock);
9310b70c467Sakolb 					fss_freebuf(projbuf, FSS_ALLOC_PROJ);
9320b70c467Sakolb 					fss_freebuf(zonebuf, FSS_ALLOC_ZONE);
9330b70c467Sakolb 					return (err);
9340b70c467Sakolb 				}
9350b70c467Sakolb 				t->t_bind_pset = PS_NONE;
9367c478bd9Sstevel@tonic-gate 			}
9377c478bd9Sstevel@tonic-gate 			mutex_exit(&p->p_lock);
9387c478bd9Sstevel@tonic-gate 		}
9397c478bd9Sstevel@tonic-gate 		t = t->t_next;
9407c478bd9Sstevel@tonic-gate 	} while (t != curthread);
9417c478bd9Sstevel@tonic-gate 
9427c478bd9Sstevel@tonic-gate 	mutex_exit(&pidlock);
9437c478bd9Sstevel@tonic-gate 	fss_freebuf(projbuf, FSS_ALLOC_PROJ);
9447c478bd9Sstevel@tonic-gate 	fss_freebuf(zonebuf, FSS_ALLOC_ZONE);
9450b70c467Sakolb 	return (err);
9460b70c467Sakolb }
9470b70c467Sakolb 
9480b70c467Sakolb /*
9490b70c467Sakolb  * Destroy a partition.
9500b70c467Sakolb  */
9510b70c467Sakolb int
cpupart_destroy(psetid_t psid)9520b70c467Sakolb cpupart_destroy(psetid_t psid)
9530b70c467Sakolb {
9540b70c467Sakolb 	cpu_t	*cp, *first_cp;
9550b70c467Sakolb 	cpupart_t *pp, *newpp;
9560b70c467Sakolb 	int	err = 0;
9570b70c467Sakolb 
9580b70c467Sakolb 	ASSERT(pool_lock_held());
9590b70c467Sakolb 	mutex_enter(&cpu_lock);
9600b70c467Sakolb 
9610b70c467Sakolb 	pp = cpupart_find(psid);
9620b70c467Sakolb 	if (pp == NULL || pp == &cp_default) {
9630b70c467Sakolb 		mutex_exit(&cpu_lock);
9640b70c467Sakolb 		return (EINVAL);
9650b70c467Sakolb 	}
9660b70c467Sakolb 
9670b70c467Sakolb 	/*
9680b70c467Sakolb 	 * Unbind all the threads currently bound to the partition.
9690b70c467Sakolb 	 */
9700b70c467Sakolb 	err = cpupart_unbind_threads(pp, B_TRUE);
9710b70c467Sakolb 	if (err) {
9720b70c467Sakolb 		mutex_exit(&cpu_lock);
9730b70c467Sakolb 		return (err);
9740b70c467Sakolb 	}
9757c478bd9Sstevel@tonic-gate 
9767c478bd9Sstevel@tonic-gate 	newpp = &cp_default;
9777c478bd9Sstevel@tonic-gate 	while ((cp = pp->cp_cpulist) != NULL) {
978*3df2e8b2SRobert Mustacchi 		if ((err = cpupart_move_cpu(cp, newpp, 0)) != 0) {
9797c478bd9Sstevel@tonic-gate 			mutex_exit(&cpu_lock);
9807c478bd9Sstevel@tonic-gate 			return (err);
9817c478bd9Sstevel@tonic-gate 		}
9827c478bd9Sstevel@tonic-gate 	}
9837c478bd9Sstevel@tonic-gate 
984fb2f18f8Sesaxe 	ASSERT(bitset_is_null(&pp->cp_cmt_pgs));
9856890d023SEric Saxe 	ASSERT(bitset_is_null(&pp->cp_haltset));
9867c478bd9Sstevel@tonic-gate 
987fb2f18f8Sesaxe 	/*
9886890d023SEric Saxe 	 * Teardown the partition's group of active CMT PGs and halted
9896890d023SEric Saxe 	 * CPUs now that they have all left.
990fb2f18f8Sesaxe 	 */
991fb2f18f8Sesaxe 	bitset_fini(&pp->cp_cmt_pgs);
9926890d023SEric Saxe 	bitset_fini(&pp->cp_haltset);
993fb2f18f8Sesaxe 
9947c478bd9Sstevel@tonic-gate 	/*
9957c478bd9Sstevel@tonic-gate 	 * Reset the pointers in any offline processors so they won't
9967c478bd9Sstevel@tonic-gate 	 * try to rejoin the destroyed partition when they're turned
9977c478bd9Sstevel@tonic-gate 	 * online.
9987c478bd9Sstevel@tonic-gate 	 */
9997c478bd9Sstevel@tonic-gate 	first_cp = cp = CPU;
10007c478bd9Sstevel@tonic-gate 	do {
10017c478bd9Sstevel@tonic-gate 		if (cp->cpu_part == pp) {
10027c478bd9Sstevel@tonic-gate 			ASSERT(cp->cpu_flags & CPU_OFFLINE);
10037c478bd9Sstevel@tonic-gate 			cp->cpu_part = newpp;
10047c478bd9Sstevel@tonic-gate 		}
10057c478bd9Sstevel@tonic-gate 		cp = cp->cpu_next;
10067c478bd9Sstevel@tonic-gate 	} while (cp != first_cp);
10077c478bd9Sstevel@tonic-gate 
10087c478bd9Sstevel@tonic-gate 	/*
10097c478bd9Sstevel@tonic-gate 	 * Pause all CPUs while changing the partition list, to make sure
10107c478bd9Sstevel@tonic-gate 	 * the clock thread (which traverses the list without holding
10117c478bd9Sstevel@tonic-gate 	 * cpu_lock) isn't running.
10127c478bd9Sstevel@tonic-gate 	 */
10130ed5c46eSJosef 'Jeff' Sipek 	pause_cpus(NULL, NULL);
10147c478bd9Sstevel@tonic-gate 	pp->cp_prev->cp_next = pp->cp_next;
10157c478bd9Sstevel@tonic-gate 	pp->cp_next->cp_prev = pp->cp_prev;
10167c478bd9Sstevel@tonic-gate 	if (cp_list_head == pp)
10177c478bd9Sstevel@tonic-gate 		cp_list_head = pp->cp_next;
10187c478bd9Sstevel@tonic-gate 	start_cpus();
10197c478bd9Sstevel@tonic-gate 
10207c478bd9Sstevel@tonic-gate 	if (cp_id_next > pp->cp_id)
10217c478bd9Sstevel@tonic-gate 		cp_id_next = pp->cp_id;
10227c478bd9Sstevel@tonic-gate 
10237c478bd9Sstevel@tonic-gate 	if (pp->cp_kstat)
10247c478bd9Sstevel@tonic-gate 		kstat_delete(pp->cp_kstat);
10257c478bd9Sstevel@tonic-gate 
10267c478bd9Sstevel@tonic-gate 	cp_numparts--;
10277c478bd9Sstevel@tonic-gate 
10287c478bd9Sstevel@tonic-gate 	disp_kp_free(&pp->cp_kp_queue);
10296890d023SEric Saxe 
10306890d023SEric Saxe 	cpupart_lpl_teardown(pp);
10316890d023SEric Saxe 
10327c478bd9Sstevel@tonic-gate 	kmem_free(pp, sizeof (cpupart_t));
10337c478bd9Sstevel@tonic-gate 	mutex_exit(&cpu_lock);
10347c478bd9Sstevel@tonic-gate 
10357c478bd9Sstevel@tonic-gate 	return (err);
10367c478bd9Sstevel@tonic-gate }
10377c478bd9Sstevel@tonic-gate 
10387c478bd9Sstevel@tonic-gate 
10397c478bd9Sstevel@tonic-gate /*
10407c478bd9Sstevel@tonic-gate  * Return the ID of the partition to which the specified processor belongs.
10417c478bd9Sstevel@tonic-gate  */
10427c478bd9Sstevel@tonic-gate psetid_t
cpupart_query_cpu(cpu_t * cp)10437c478bd9Sstevel@tonic-gate cpupart_query_cpu(cpu_t *cp)
10447c478bd9Sstevel@tonic-gate {
10457c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&cpu_lock));
10467c478bd9Sstevel@tonic-gate 
10477c478bd9Sstevel@tonic-gate 	return (CPTOPS(cp->cpu_part->cp_id));
10487c478bd9Sstevel@tonic-gate }
10497c478bd9Sstevel@tonic-gate 
10507c478bd9Sstevel@tonic-gate 
10517c478bd9Sstevel@tonic-gate /*
10527c478bd9Sstevel@tonic-gate  * Attach a processor to an existing partition.
10537c478bd9Sstevel@tonic-gate  */
10547c478bd9Sstevel@tonic-gate int
cpupart_attach_cpu(psetid_t psid,cpu_t * cp,int forced)10557c478bd9Sstevel@tonic-gate cpupart_attach_cpu(psetid_t psid, cpu_t *cp, int forced)
10567c478bd9Sstevel@tonic-gate {
10577c478bd9Sstevel@tonic-gate 	cpupart_t	*pp;
10587c478bd9Sstevel@tonic-gate 	int		err;
10597c478bd9Sstevel@tonic-gate 
10607c478bd9Sstevel@tonic-gate 	ASSERT(pool_lock_held());
10617c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&cpu_lock));
10627c478bd9Sstevel@tonic-gate 
10637c478bd9Sstevel@tonic-gate 	pp = cpupart_find(psid);
10647c478bd9Sstevel@tonic-gate 	if (pp == NULL)
10657c478bd9Sstevel@tonic-gate 		return (EINVAL);
10667c478bd9Sstevel@tonic-gate 	if (cp->cpu_flags & CPU_OFFLINE)
10677c478bd9Sstevel@tonic-gate 		return (EINVAL);
10687c478bd9Sstevel@tonic-gate 
10697c478bd9Sstevel@tonic-gate 	err = cpupart_move_cpu(cp, pp, forced);
10707c478bd9Sstevel@tonic-gate 	return (err);
10717c478bd9Sstevel@tonic-gate }
10727c478bd9Sstevel@tonic-gate 
10737c478bd9Sstevel@tonic-gate /*
10747c478bd9Sstevel@tonic-gate  * Get a list of cpus belonging to the partition.  If numcpus is NULL,
10757c478bd9Sstevel@tonic-gate  * this just checks for a valid partition.  If numcpus is non-NULL but
10767c478bd9Sstevel@tonic-gate  * cpulist is NULL, the current number of cpus is stored in *numcpus.
10777c478bd9Sstevel@tonic-gate  * If both are non-NULL, the current number of cpus is stored in *numcpus,
10787c478bd9Sstevel@tonic-gate  * and a list of those cpus up to the size originally in *numcpus is
10797c478bd9Sstevel@tonic-gate  * stored in cpulist[].  Also, store the processor set id in *psid.
10807c478bd9Sstevel@tonic-gate  * This is useful in case the processor set id passed in was PS_MYID.
10817c478bd9Sstevel@tonic-gate  */
10827c478bd9Sstevel@tonic-gate int
cpupart_get_cpus(psetid_t * psid,processorid_t * cpulist,uint_t * numcpus)10837c478bd9Sstevel@tonic-gate cpupart_get_cpus(psetid_t *psid, processorid_t *cpulist, uint_t *numcpus)
10847c478bd9Sstevel@tonic-gate {
10857c478bd9Sstevel@tonic-gate 	cpupart_t	*pp;
10867c478bd9Sstevel@tonic-gate 	uint_t		ncpus;
10877c478bd9Sstevel@tonic-gate 	cpu_t		*c;
10887c478bd9Sstevel@tonic-gate 	int		i;
10897c478bd9Sstevel@tonic-gate 
10907c478bd9Sstevel@tonic-gate 	mutex_enter(&cpu_lock);
10917c478bd9Sstevel@tonic-gate 	pp = cpupart_find(*psid);
10927c478bd9Sstevel@tonic-gate 	if (pp == NULL) {
10937c478bd9Sstevel@tonic-gate 		mutex_exit(&cpu_lock);
10947c478bd9Sstevel@tonic-gate 		return (EINVAL);
10957c478bd9Sstevel@tonic-gate 	}
10967c478bd9Sstevel@tonic-gate 	*psid = CPTOPS(pp->cp_id);
10977c478bd9Sstevel@tonic-gate 	ncpus = pp->cp_ncpus;
10987c478bd9Sstevel@tonic-gate 	if (numcpus) {
10997c478bd9Sstevel@tonic-gate 		if (ncpus > *numcpus) {
11007c478bd9Sstevel@tonic-gate 			/*
11017c478bd9Sstevel@tonic-gate 			 * Only copy as many cpus as were passed in, but
11027c478bd9Sstevel@tonic-gate 			 * pass back the real number.
11037c478bd9Sstevel@tonic-gate 			 */
11047c478bd9Sstevel@tonic-gate 			uint_t t = ncpus;
11057c478bd9Sstevel@tonic-gate 			ncpus = *numcpus;
11067c478bd9Sstevel@tonic-gate 			*numcpus = t;
11077c478bd9Sstevel@tonic-gate 		} else
11087c478bd9Sstevel@tonic-gate 			*numcpus = ncpus;
11097c478bd9Sstevel@tonic-gate 
11107c478bd9Sstevel@tonic-gate 		if (cpulist) {
11117c478bd9Sstevel@tonic-gate 			c = pp->cp_cpulist;
11127c478bd9Sstevel@tonic-gate 			for (i = 0; i < ncpus; i++) {
11137c478bd9Sstevel@tonic-gate 				ASSERT(c != NULL);
11147c478bd9Sstevel@tonic-gate 				cpulist[i] = c->cpu_id;
11157c478bd9Sstevel@tonic-gate 				c = c->cpu_next_part;
11167c478bd9Sstevel@tonic-gate 			}
11177c478bd9Sstevel@tonic-gate 		}
11187c478bd9Sstevel@tonic-gate 	}
11197c478bd9Sstevel@tonic-gate 	mutex_exit(&cpu_lock);
11207c478bd9Sstevel@tonic-gate 	return (0);
11217c478bd9Sstevel@tonic-gate }
11227c478bd9Sstevel@tonic-gate 
11237c478bd9Sstevel@tonic-gate /*
11247c478bd9Sstevel@tonic-gate  * Reallocate kpreempt queues for each CPU partition.  Called from
11257c478bd9Sstevel@tonic-gate  * disp_setup when a new scheduling class is loaded that increases the
11267c478bd9Sstevel@tonic-gate  * number of priorities in the system.
11277c478bd9Sstevel@tonic-gate  */
11287c478bd9Sstevel@tonic-gate void
cpupart_kpqalloc(pri_t npri)11297c478bd9Sstevel@tonic-gate cpupart_kpqalloc(pri_t npri)
11307c478bd9Sstevel@tonic-gate {
11317c478bd9Sstevel@tonic-gate 	cpupart_t *cpp;
11327c478bd9Sstevel@tonic-gate 
11337c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&cpu_lock));
11347c478bd9Sstevel@tonic-gate 	cpp = cp_list_head;
11357c478bd9Sstevel@tonic-gate 	do {
11367c478bd9Sstevel@tonic-gate 		disp_kp_alloc(&cpp->cp_kp_queue, npri);
11377c478bd9Sstevel@tonic-gate 		cpp = cpp->cp_next;
11387c478bd9Sstevel@tonic-gate 	} while (cpp != cp_list_head);
11397c478bd9Sstevel@tonic-gate }
11407c478bd9Sstevel@tonic-gate 
11417c478bd9Sstevel@tonic-gate int
cpupart_get_loadavg(psetid_t psid,int * buf,int nelem)11427c478bd9Sstevel@tonic-gate cpupart_get_loadavg(psetid_t psid, int *buf, int nelem)
11437c478bd9Sstevel@tonic-gate {
11447c478bd9Sstevel@tonic-gate 	cpupart_t *cp;
11457c478bd9Sstevel@tonic-gate 	int i;
11467c478bd9Sstevel@tonic-gate 
11477c478bd9Sstevel@tonic-gate 	ASSERT(nelem >= 0);
11487c478bd9Sstevel@tonic-gate 	ASSERT(nelem <= LOADAVG_NSTATS);
11497c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&cpu_lock));
11507c478bd9Sstevel@tonic-gate 
11517c478bd9Sstevel@tonic-gate 	cp = cpupart_find(psid);
11527c478bd9Sstevel@tonic-gate 	if (cp == NULL)
11537c478bd9Sstevel@tonic-gate 		return (EINVAL);
11547c478bd9Sstevel@tonic-gate 	for (i = 0; i < nelem; i++)
11557c478bd9Sstevel@tonic-gate 		buf[i] = cp->cp_hp_avenrun[i] >> (16 - FSHIFT);
11567c478bd9Sstevel@tonic-gate 
11577c478bd9Sstevel@tonic-gate 	return (0);
11587c478bd9Sstevel@tonic-gate }
11597c478bd9Sstevel@tonic-gate 
11607c478bd9Sstevel@tonic-gate 
11617c478bd9Sstevel@tonic-gate uint_t
cpupart_list(psetid_t * list,uint_t nelem,int flag)11627c478bd9Sstevel@tonic-gate cpupart_list(psetid_t *list, uint_t nelem, int flag)
11637c478bd9Sstevel@tonic-gate {
11647c478bd9Sstevel@tonic-gate 	uint_t numpart = 0;
11657c478bd9Sstevel@tonic-gate 	cpupart_t *cp;
11667c478bd9Sstevel@tonic-gate 
11677c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&cpu_lock));
11687c478bd9Sstevel@tonic-gate 	ASSERT(flag == CP_ALL || flag == CP_NONEMPTY);
11697c478bd9Sstevel@tonic-gate 
11707c478bd9Sstevel@tonic-gate 	if (list != NULL) {
11717c478bd9Sstevel@tonic-gate 		cp = cp_list_head;
11727c478bd9Sstevel@tonic-gate 		do {
11737c478bd9Sstevel@tonic-gate 			if (((flag == CP_ALL) && (cp != &cp_default)) ||
11747c478bd9Sstevel@tonic-gate 			    ((flag == CP_NONEMPTY) && (cp->cp_ncpus != 0))) {
11757c478bd9Sstevel@tonic-gate 				if (numpart == nelem)
11767c478bd9Sstevel@tonic-gate 					break;
11777c478bd9Sstevel@tonic-gate 				list[numpart++] = CPTOPS(cp->cp_id);
11787c478bd9Sstevel@tonic-gate 			}
11797c478bd9Sstevel@tonic-gate 			cp = cp->cp_next;
11807c478bd9Sstevel@tonic-gate 		} while (cp != cp_list_head);
11817c478bd9Sstevel@tonic-gate 	}
11827c478bd9Sstevel@tonic-gate 
11837c478bd9Sstevel@tonic-gate 	ASSERT(numpart < cp_numparts);
11847c478bd9Sstevel@tonic-gate 
11857c478bd9Sstevel@tonic-gate 	if (flag == CP_ALL)
11867c478bd9Sstevel@tonic-gate 		numpart = cp_numparts - 1; /* leave out default partition */
11877c478bd9Sstevel@tonic-gate 	else if (flag == CP_NONEMPTY)
11887c478bd9Sstevel@tonic-gate 		numpart = cp_numparts_nonempty;
11897c478bd9Sstevel@tonic-gate 
11907c478bd9Sstevel@tonic-gate 	return (numpart);
11917c478bd9Sstevel@tonic-gate }
11927c478bd9Sstevel@tonic-gate 
11937c478bd9Sstevel@tonic-gate int
cpupart_setattr(psetid_t psid,uint_t attr)11947c478bd9Sstevel@tonic-gate cpupart_setattr(psetid_t psid, uint_t attr)
11957c478bd9Sstevel@tonic-gate {
11967c478bd9Sstevel@tonic-gate 	cpupart_t *cp;
11977c478bd9Sstevel@tonic-gate 
11987c478bd9Sstevel@tonic-gate 	ASSERT(pool_lock_held());
11997c478bd9Sstevel@tonic-gate 
12007c478bd9Sstevel@tonic-gate 	mutex_enter(&cpu_lock);
12017c478bd9Sstevel@tonic-gate 	if ((cp = cpupart_find(psid)) == NULL) {
12027c478bd9Sstevel@tonic-gate 		mutex_exit(&cpu_lock);
12037c478bd9Sstevel@tonic-gate 		return (EINVAL);
12047c478bd9Sstevel@tonic-gate 	}
12057c478bd9Sstevel@tonic-gate 	/*
12067c478bd9Sstevel@tonic-gate 	 * PSET_NOESCAPE attribute for default cpu partition is always set
12077c478bd9Sstevel@tonic-gate 	 */
12087c478bd9Sstevel@tonic-gate 	if (cp == &cp_default && !(attr & PSET_NOESCAPE)) {
12097c478bd9Sstevel@tonic-gate 		mutex_exit(&cpu_lock);
12107c478bd9Sstevel@tonic-gate 		return (EINVAL);
12117c478bd9Sstevel@tonic-gate 	}
12127c478bd9Sstevel@tonic-gate 	cp->cp_attr = attr;
12137c478bd9Sstevel@tonic-gate 	mutex_exit(&cpu_lock);
12147c478bd9Sstevel@tonic-gate 	return (0);
12157c478bd9Sstevel@tonic-gate }
12167c478bd9Sstevel@tonic-gate 
12177c478bd9Sstevel@tonic-gate int
cpupart_getattr(psetid_t psid,uint_t * attrp)12187c478bd9Sstevel@tonic-gate cpupart_getattr(psetid_t psid, uint_t *attrp)
12197c478bd9Sstevel@tonic-gate {
12207c478bd9Sstevel@tonic-gate 	cpupart_t *cp;
12217c478bd9Sstevel@tonic-gate 
12227c478bd9Sstevel@tonic-gate 	mutex_enter(&cpu_lock);
12237c478bd9Sstevel@tonic-gate 	if ((cp = cpupart_find(psid)) == NULL) {
12247c478bd9Sstevel@tonic-gate 		mutex_exit(&cpu_lock);
12257c478bd9Sstevel@tonic-gate 		return (EINVAL);
12267c478bd9Sstevel@tonic-gate 	}
12277c478bd9Sstevel@tonic-gate 	*attrp = cp->cp_attr;
12287c478bd9Sstevel@tonic-gate 	mutex_exit(&cpu_lock);
12297c478bd9Sstevel@tonic-gate 	return (0);
12307c478bd9Sstevel@tonic-gate }
1231