xref: /illumos-gate/usr/src/uts/common/disp/cpupart.c (revision 7c478bd9)
1*7c478bd9Sstevel@tonic-gate /*
2*7c478bd9Sstevel@tonic-gate  * CDDL HEADER START
3*7c478bd9Sstevel@tonic-gate  *
4*7c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
5*7c478bd9Sstevel@tonic-gate  * Common Development and Distribution License, Version 1.0 only
6*7c478bd9Sstevel@tonic-gate  * (the "License").  You may not use this file except in compliance
7*7c478bd9Sstevel@tonic-gate  * with the License.
8*7c478bd9Sstevel@tonic-gate  *
9*7c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10*7c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
11*7c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
12*7c478bd9Sstevel@tonic-gate  * and limitations under the License.
13*7c478bd9Sstevel@tonic-gate  *
14*7c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
15*7c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16*7c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
17*7c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
18*7c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
19*7c478bd9Sstevel@tonic-gate  *
20*7c478bd9Sstevel@tonic-gate  * CDDL HEADER END
21*7c478bd9Sstevel@tonic-gate  */
22*7c478bd9Sstevel@tonic-gate /*
23*7c478bd9Sstevel@tonic-gate  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24*7c478bd9Sstevel@tonic-gate  * Use is subject to license terms.
25*7c478bd9Sstevel@tonic-gate  */
26*7c478bd9Sstevel@tonic-gate 
27*7c478bd9Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"
28*7c478bd9Sstevel@tonic-gate 
29*7c478bd9Sstevel@tonic-gate #include <sys/types.h>
30*7c478bd9Sstevel@tonic-gate #include <sys/systm.h>
31*7c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h>
32*7c478bd9Sstevel@tonic-gate #include <sys/cpuvar.h>
33*7c478bd9Sstevel@tonic-gate #include <sys/thread.h>
34*7c478bd9Sstevel@tonic-gate #include <sys/disp.h>
35*7c478bd9Sstevel@tonic-gate #include <sys/kmem.h>
36*7c478bd9Sstevel@tonic-gate #include <sys/debug.h>
37*7c478bd9Sstevel@tonic-gate #include <sys/cpupart.h>
38*7c478bd9Sstevel@tonic-gate #include <sys/pset.h>
39*7c478bd9Sstevel@tonic-gate #include <sys/var.h>
40*7c478bd9Sstevel@tonic-gate #include <sys/cyclic.h>
41*7c478bd9Sstevel@tonic-gate #include <sys/lgrp.h>
42*7c478bd9Sstevel@tonic-gate #include <sys/chip.h>
43*7c478bd9Sstevel@tonic-gate #include <sys/loadavg.h>
44*7c478bd9Sstevel@tonic-gate #include <sys/class.h>
45*7c478bd9Sstevel@tonic-gate #include <sys/fss.h>
46*7c478bd9Sstevel@tonic-gate #include <sys/pool.h>
47*7c478bd9Sstevel@tonic-gate #include <sys/pool_pset.h>
48*7c478bd9Sstevel@tonic-gate #include <sys/policy.h>
49*7c478bd9Sstevel@tonic-gate 
50*7c478bd9Sstevel@tonic-gate /*
51*7c478bd9Sstevel@tonic-gate  * Calling pool_lock() protects the pools configuration, which includes
52*7c478bd9Sstevel@tonic-gate  * CPU partitions.  cpu_lock protects the CPU partition list, and prevents
53*7c478bd9Sstevel@tonic-gate  * partitions from being created or destroyed while the lock is held.
54*7c478bd9Sstevel@tonic-gate  * The lock ordering with respect to related locks is:
55*7c478bd9Sstevel@tonic-gate  *
56*7c478bd9Sstevel@tonic-gate  *    pool_lock() ---> cpu_lock  --->  pidlock  -->  p_lock
57*7c478bd9Sstevel@tonic-gate  *
58*7c478bd9Sstevel@tonic-gate  * Blocking memory allocations may be made while holding "pool_lock"
59*7c478bd9Sstevel@tonic-gate  * or cpu_lock.
60*7c478bd9Sstevel@tonic-gate  */
61*7c478bd9Sstevel@tonic-gate 
62*7c478bd9Sstevel@tonic-gate /*
63*7c478bd9Sstevel@tonic-gate  * The cp_default partition is allocated statically, but its lgroup load average
64*7c478bd9Sstevel@tonic-gate  * (lpl) list is allocated dynamically after kmem subsystem is initialized. This
65*7c478bd9Sstevel@tonic-gate  * saves some memory since the space allocated reflects the actual number of
66*7c478bd9Sstevel@tonic-gate  * lgroups supported by the platform. The lgrp facility provides a temporary
67*7c478bd9Sstevel@tonic-gate  * space to hold lpl information during system bootstrap.
68*7c478bd9Sstevel@tonic-gate  */
69*7c478bd9Sstevel@tonic-gate 
70*7c478bd9Sstevel@tonic-gate cpupart_t		*cp_list_head;
71*7c478bd9Sstevel@tonic-gate cpupart_t		cp_default;
72*7c478bd9Sstevel@tonic-gate static cpupartid_t	cp_id_next;
73*7c478bd9Sstevel@tonic-gate uint_t			cp_numparts;
74*7c478bd9Sstevel@tonic-gate uint_t			cp_numparts_nonempty;
75*7c478bd9Sstevel@tonic-gate 
76*7c478bd9Sstevel@tonic-gate /*
77*7c478bd9Sstevel@tonic-gate  * Need to limit total number of partitions to avoid slowing down the
78*7c478bd9Sstevel@tonic-gate  * clock code too much.  The clock code traverses the list of
79*7c478bd9Sstevel@tonic-gate  * partitions and needs to be able to execute in a reasonable amount
80*7c478bd9Sstevel@tonic-gate  * of time (less than 1/hz seconds).  The maximum is sized based on
81*7c478bd9Sstevel@tonic-gate  * max_ncpus so it shouldn't be a problem unless there are large
82*7c478bd9Sstevel@tonic-gate  * numbers of empty partitions.
83*7c478bd9Sstevel@tonic-gate  */
84*7c478bd9Sstevel@tonic-gate static uint_t		cp_max_numparts;
85*7c478bd9Sstevel@tonic-gate 
86*7c478bd9Sstevel@tonic-gate /*
87*7c478bd9Sstevel@tonic-gate  * Processor sets and CPU partitions are different but related concepts.
88*7c478bd9Sstevel@tonic-gate  * A processor set is a user-level abstraction allowing users to create
89*7c478bd9Sstevel@tonic-gate  * sets of CPUs and bind threads exclusively to those sets.  A CPU
90*7c478bd9Sstevel@tonic-gate  * partition is a kernel dispatcher object consisting of a set of CPUs
91*7c478bd9Sstevel@tonic-gate  * and a global dispatch queue.  The processor set abstraction is
92*7c478bd9Sstevel@tonic-gate  * implemented via a CPU partition, and currently there is a 1-1
93*7c478bd9Sstevel@tonic-gate  * mapping between processor sets and partitions (excluding the default
94*7c478bd9Sstevel@tonic-gate  * partition, which is not visible as a processor set).  Hence, the
95*7c478bd9Sstevel@tonic-gate  * numbering for processor sets and CPU partitions is identical.  This
96*7c478bd9Sstevel@tonic-gate  * may not always be true in the future, and these macros could become
97*7c478bd9Sstevel@tonic-gate  * less trivial if we support e.g. a processor set containing multiple
98*7c478bd9Sstevel@tonic-gate  * CPU partitions.
99*7c478bd9Sstevel@tonic-gate  */
100*7c478bd9Sstevel@tonic-gate #define	PSTOCP(psid)	((cpupartid_t)((psid) == PS_NONE ? CP_DEFAULT : (psid)))
101*7c478bd9Sstevel@tonic-gate #define	CPTOPS(cpid)	((psetid_t)((cpid) == CP_DEFAULT ? PS_NONE : (cpid)))
102*7c478bd9Sstevel@tonic-gate 
103*7c478bd9Sstevel@tonic-gate 
104*7c478bd9Sstevel@tonic-gate /*
105*7c478bd9Sstevel@tonic-gate  * Find a CPU partition given a processor set ID.
106*7c478bd9Sstevel@tonic-gate  */
107*7c478bd9Sstevel@tonic-gate static cpupart_t *
108*7c478bd9Sstevel@tonic-gate cpupart_find_all(psetid_t psid)
109*7c478bd9Sstevel@tonic-gate {
110*7c478bd9Sstevel@tonic-gate 	cpupart_t *cp;
111*7c478bd9Sstevel@tonic-gate 	cpupartid_t cpid = PSTOCP(psid);
112*7c478bd9Sstevel@tonic-gate 
113*7c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&cpu_lock));
114*7c478bd9Sstevel@tonic-gate 
115*7c478bd9Sstevel@tonic-gate 	/* default partition not visible as a processor set */
116*7c478bd9Sstevel@tonic-gate 	if (psid == CP_DEFAULT)
117*7c478bd9Sstevel@tonic-gate 		return (NULL);
118*7c478bd9Sstevel@tonic-gate 
119*7c478bd9Sstevel@tonic-gate 	if (psid == PS_MYID)
120*7c478bd9Sstevel@tonic-gate 		return (curthread->t_cpupart);
121*7c478bd9Sstevel@tonic-gate 
122*7c478bd9Sstevel@tonic-gate 	cp = cp_list_head;
123*7c478bd9Sstevel@tonic-gate 	do {
124*7c478bd9Sstevel@tonic-gate 		if (cp->cp_id == cpid)
125*7c478bd9Sstevel@tonic-gate 			return (cp);
126*7c478bd9Sstevel@tonic-gate 		cp = cp->cp_next;
127*7c478bd9Sstevel@tonic-gate 	} while (cp != cp_list_head);
128*7c478bd9Sstevel@tonic-gate 	return (NULL);
129*7c478bd9Sstevel@tonic-gate }
130*7c478bd9Sstevel@tonic-gate 
131*7c478bd9Sstevel@tonic-gate /*
132*7c478bd9Sstevel@tonic-gate  * Find a CPU partition given a processor set ID if the processor set
133*7c478bd9Sstevel@tonic-gate  * should be visible from the calling zone.
134*7c478bd9Sstevel@tonic-gate  */
135*7c478bd9Sstevel@tonic-gate cpupart_t *
136*7c478bd9Sstevel@tonic-gate cpupart_find(psetid_t psid)
137*7c478bd9Sstevel@tonic-gate {
138*7c478bd9Sstevel@tonic-gate 	cpupart_t *cp;
139*7c478bd9Sstevel@tonic-gate 
140*7c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&cpu_lock));
141*7c478bd9Sstevel@tonic-gate 	cp = cpupart_find_all(psid);
142*7c478bd9Sstevel@tonic-gate 	if (cp != NULL && !INGLOBALZONE(curproc) && pool_pset_enabled() &&
143*7c478bd9Sstevel@tonic-gate 	    zone_pset_get(curproc->p_zone) != CPTOPS(cp->cp_id))
144*7c478bd9Sstevel@tonic-gate 			return (NULL);
145*7c478bd9Sstevel@tonic-gate 	return (cp);
146*7c478bd9Sstevel@tonic-gate }
147*7c478bd9Sstevel@tonic-gate 
148*7c478bd9Sstevel@tonic-gate static int
149*7c478bd9Sstevel@tonic-gate cpupart_kstat_update(kstat_t *ksp, int rw)
150*7c478bd9Sstevel@tonic-gate {
151*7c478bd9Sstevel@tonic-gate 	cpupart_t *cp = (cpupart_t *)ksp->ks_private;
152*7c478bd9Sstevel@tonic-gate 	cpupart_kstat_t *cpksp = ksp->ks_data;
153*7c478bd9Sstevel@tonic-gate 
154*7c478bd9Sstevel@tonic-gate 	if (rw == KSTAT_WRITE)
155*7c478bd9Sstevel@tonic-gate 		return (EACCES);
156*7c478bd9Sstevel@tonic-gate 
157*7c478bd9Sstevel@tonic-gate 	cpksp->cpk_updates.value.ui64 = cp->cp_updates;
158*7c478bd9Sstevel@tonic-gate 	cpksp->cpk_runnable.value.ui64 = cp->cp_nrunnable_cum;
159*7c478bd9Sstevel@tonic-gate 	cpksp->cpk_waiting.value.ui64 = cp->cp_nwaiting_cum;
160*7c478bd9Sstevel@tonic-gate 	cpksp->cpk_ncpus.value.ui32 = cp->cp_ncpus;
161*7c478bd9Sstevel@tonic-gate 	cpksp->cpk_avenrun_1min.value.ui32 = cp->cp_hp_avenrun[0] >>
162*7c478bd9Sstevel@tonic-gate 	    (16 - FSHIFT);
163*7c478bd9Sstevel@tonic-gate 	cpksp->cpk_avenrun_5min.value.ui32 = cp->cp_hp_avenrun[1] >>
164*7c478bd9Sstevel@tonic-gate 	    (16 - FSHIFT);
165*7c478bd9Sstevel@tonic-gate 	cpksp->cpk_avenrun_15min.value.ui32 = cp->cp_hp_avenrun[2] >>
166*7c478bd9Sstevel@tonic-gate 	    (16 - FSHIFT);
167*7c478bd9Sstevel@tonic-gate 	return (0);
168*7c478bd9Sstevel@tonic-gate }
169*7c478bd9Sstevel@tonic-gate 
170*7c478bd9Sstevel@tonic-gate static void
171*7c478bd9Sstevel@tonic-gate cpupart_kstat_create(cpupart_t *cp)
172*7c478bd9Sstevel@tonic-gate {
173*7c478bd9Sstevel@tonic-gate 	kstat_t *ksp;
174*7c478bd9Sstevel@tonic-gate 	zoneid_t zoneid;
175*7c478bd9Sstevel@tonic-gate 
176*7c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&cpu_lock));
177*7c478bd9Sstevel@tonic-gate 
178*7c478bd9Sstevel@tonic-gate 	/*
179*7c478bd9Sstevel@tonic-gate 	 * We have a bit of a chicken-egg problem since this code will
180*7c478bd9Sstevel@tonic-gate 	 * get called to create the kstats for CP_DEFAULT before the
181*7c478bd9Sstevel@tonic-gate 	 * pools framework gets initialized.  We circumvent the problem
182*7c478bd9Sstevel@tonic-gate 	 * by special-casing cp_default.
183*7c478bd9Sstevel@tonic-gate 	 */
184*7c478bd9Sstevel@tonic-gate 	if (cp != &cp_default && pool_pset_enabled())
185*7c478bd9Sstevel@tonic-gate 		zoneid = GLOBAL_ZONEID;
186*7c478bd9Sstevel@tonic-gate 	else
187*7c478bd9Sstevel@tonic-gate 		zoneid = ALL_ZONES;
188*7c478bd9Sstevel@tonic-gate 	ksp = kstat_create_zone("unix", cp->cp_id, "pset", "misc",
189*7c478bd9Sstevel@tonic-gate 	    KSTAT_TYPE_NAMED,
190*7c478bd9Sstevel@tonic-gate 	    sizeof (cpupart_kstat_t) / sizeof (kstat_named_t), 0, zoneid);
191*7c478bd9Sstevel@tonic-gate 	if (ksp != NULL) {
192*7c478bd9Sstevel@tonic-gate 		cpupart_kstat_t *cpksp = ksp->ks_data;
193*7c478bd9Sstevel@tonic-gate 
194*7c478bd9Sstevel@tonic-gate 		kstat_named_init(&cpksp->cpk_updates, "updates",
195*7c478bd9Sstevel@tonic-gate 		    KSTAT_DATA_UINT64);
196*7c478bd9Sstevel@tonic-gate 		kstat_named_init(&cpksp->cpk_runnable, "runnable",
197*7c478bd9Sstevel@tonic-gate 		    KSTAT_DATA_UINT64);
198*7c478bd9Sstevel@tonic-gate 		kstat_named_init(&cpksp->cpk_waiting, "waiting",
199*7c478bd9Sstevel@tonic-gate 		    KSTAT_DATA_UINT64);
200*7c478bd9Sstevel@tonic-gate 		kstat_named_init(&cpksp->cpk_ncpus, "ncpus",
201*7c478bd9Sstevel@tonic-gate 		    KSTAT_DATA_UINT32);
202*7c478bd9Sstevel@tonic-gate 		kstat_named_init(&cpksp->cpk_avenrun_1min, "avenrun_1min",
203*7c478bd9Sstevel@tonic-gate 		    KSTAT_DATA_UINT32);
204*7c478bd9Sstevel@tonic-gate 		kstat_named_init(&cpksp->cpk_avenrun_5min, "avenrun_5min",
205*7c478bd9Sstevel@tonic-gate 		    KSTAT_DATA_UINT32);
206*7c478bd9Sstevel@tonic-gate 		kstat_named_init(&cpksp->cpk_avenrun_15min, "avenrun_15min",
207*7c478bd9Sstevel@tonic-gate 		    KSTAT_DATA_UINT32);
208*7c478bd9Sstevel@tonic-gate 
209*7c478bd9Sstevel@tonic-gate 		ksp->ks_update = cpupart_kstat_update;
210*7c478bd9Sstevel@tonic-gate 		ksp->ks_private = cp;
211*7c478bd9Sstevel@tonic-gate 
212*7c478bd9Sstevel@tonic-gate 		kstat_install(ksp);
213*7c478bd9Sstevel@tonic-gate 	}
214*7c478bd9Sstevel@tonic-gate 	cp->cp_kstat = ksp;
215*7c478bd9Sstevel@tonic-gate }
216*7c478bd9Sstevel@tonic-gate 
217*7c478bd9Sstevel@tonic-gate /*
218*7c478bd9Sstevel@tonic-gate  * Initialize the default partition and kpreempt disp queue.
219*7c478bd9Sstevel@tonic-gate  */
220*7c478bd9Sstevel@tonic-gate void
221*7c478bd9Sstevel@tonic-gate cpupart_initialize_default(void)
222*7c478bd9Sstevel@tonic-gate {
223*7c478bd9Sstevel@tonic-gate 	lgrp_id_t i;
224*7c478bd9Sstevel@tonic-gate 
225*7c478bd9Sstevel@tonic-gate 	cp_list_head = &cp_default;
226*7c478bd9Sstevel@tonic-gate 	cp_default.cp_next = &cp_default;
227*7c478bd9Sstevel@tonic-gate 	cp_default.cp_prev = &cp_default;
228*7c478bd9Sstevel@tonic-gate 	cp_default.cp_id = CP_DEFAULT;
229*7c478bd9Sstevel@tonic-gate 	cp_default.cp_kp_queue.disp_maxrunpri = -1;
230*7c478bd9Sstevel@tonic-gate 	cp_default.cp_kp_queue.disp_max_unbound_pri = -1;
231*7c478bd9Sstevel@tonic-gate 	cp_default.cp_kp_queue.disp_cpu = NULL;
232*7c478bd9Sstevel@tonic-gate 	cp_default.cp_gen = 0;
233*7c478bd9Sstevel@tonic-gate 	cp_default.cp_loadavg.lg_cur = 0;
234*7c478bd9Sstevel@tonic-gate 	cp_default.cp_loadavg.lg_len = 0;
235*7c478bd9Sstevel@tonic-gate 	cp_default.cp_loadavg.lg_total = 0;
236*7c478bd9Sstevel@tonic-gate 	for (i = 0; i < S_LOADAVG_SZ; i++) {
237*7c478bd9Sstevel@tonic-gate 		cp_default.cp_loadavg.lg_loads[i] = 0;
238*7c478bd9Sstevel@tonic-gate 	}
239*7c478bd9Sstevel@tonic-gate 	CPUSET_ZERO(cp_default.cp_haltset);
240*7c478bd9Sstevel@tonic-gate 	DISP_LOCK_INIT(&cp_default.cp_kp_queue.disp_lock);
241*7c478bd9Sstevel@tonic-gate 	cp_id_next = CP_DEFAULT + 1;
242*7c478bd9Sstevel@tonic-gate 	cpupart_kstat_create(&cp_default);
243*7c478bd9Sstevel@tonic-gate 	cp_numparts = 1;
244*7c478bd9Sstevel@tonic-gate 	if (cp_max_numparts == 0)	/* allow for /etc/system tuning */
245*7c478bd9Sstevel@tonic-gate 		cp_max_numparts = max_ncpus * 2 + 1;
246*7c478bd9Sstevel@tonic-gate 	/*
247*7c478bd9Sstevel@tonic-gate 	 * Allocate space for cp_default list of lgrploads
248*7c478bd9Sstevel@tonic-gate 	 */
249*7c478bd9Sstevel@tonic-gate 	cp_default.cp_nlgrploads = lgrp_plat_max_lgrps();
250*7c478bd9Sstevel@tonic-gate 	cp_default.cp_lgrploads = kmem_zalloc(sizeof (lpl_t) *
251*7c478bd9Sstevel@tonic-gate 	    cp_default.cp_nlgrploads, KM_SLEEP);
252*7c478bd9Sstevel@tonic-gate 
253*7c478bd9Sstevel@tonic-gate 	/*
254*7c478bd9Sstevel@tonic-gate 	 * The initial lpl topology is created in a special lpl list
255*7c478bd9Sstevel@tonic-gate 	 * lpl_bootstrap. It should be copied to cp_default.
256*7c478bd9Sstevel@tonic-gate 	 * NOTE: lpl_topo_bootstrap() also updates CPU0 cpu_lpl pointer to point
257*7c478bd9Sstevel@tonic-gate 	 *	 to the correct lpl in the cp_default.cp_lgrploads list.
258*7c478bd9Sstevel@tonic-gate 	 */
259*7c478bd9Sstevel@tonic-gate 	lpl_topo_bootstrap(cp_default.cp_lgrploads,
260*7c478bd9Sstevel@tonic-gate 	    cp_default.cp_nlgrploads);
261*7c478bd9Sstevel@tonic-gate 
262*7c478bd9Sstevel@tonic-gate 	for (i = 0; i < cp_default.cp_nlgrploads; i++) {
263*7c478bd9Sstevel@tonic-gate 		cp_default.cp_lgrploads[i].lpl_lgrpid = i;
264*7c478bd9Sstevel@tonic-gate 	}
265*7c478bd9Sstevel@tonic-gate 	cp_default.cp_attr = PSET_NOESCAPE;
266*7c478bd9Sstevel@tonic-gate 	cp_numparts_nonempty = 1;
267*7c478bd9Sstevel@tonic-gate 	/*
268*7c478bd9Sstevel@tonic-gate 	 * Set t0's home
269*7c478bd9Sstevel@tonic-gate 	 */
270*7c478bd9Sstevel@tonic-gate 	t0.t_lpl = &cp_default.cp_lgrploads[LGRP_ROOTID];
271*7c478bd9Sstevel@tonic-gate }
272*7c478bd9Sstevel@tonic-gate 
273*7c478bd9Sstevel@tonic-gate 
274*7c478bd9Sstevel@tonic-gate static int
275*7c478bd9Sstevel@tonic-gate cpupart_move_cpu(cpu_t *cp, cpupart_t *newpp, int forced)
276*7c478bd9Sstevel@tonic-gate {
277*7c478bd9Sstevel@tonic-gate 	cpupart_t *oldpp;
278*7c478bd9Sstevel@tonic-gate 	cpu_t	*ncp, *newlist;
279*7c478bd9Sstevel@tonic-gate 	kthread_t *t;
280*7c478bd9Sstevel@tonic-gate 	int	move_threads = 1;
281*7c478bd9Sstevel@tonic-gate 	lgrp_id_t lgrpid;
282*7c478bd9Sstevel@tonic-gate 	proc_t 	*p;
283*7c478bd9Sstevel@tonic-gate 	int lgrp_diff_lpl;
284*7c478bd9Sstevel@tonic-gate 	lpl_t	*cpu_lpl;
285*7c478bd9Sstevel@tonic-gate 	int	ret;
286*7c478bd9Sstevel@tonic-gate 
287*7c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&cpu_lock));
288*7c478bd9Sstevel@tonic-gate 	ASSERT(newpp != NULL);
289*7c478bd9Sstevel@tonic-gate 
290*7c478bd9Sstevel@tonic-gate 	oldpp = cp->cpu_part;
291*7c478bd9Sstevel@tonic-gate 	ASSERT(oldpp != NULL);
292*7c478bd9Sstevel@tonic-gate 	ASSERT(oldpp->cp_ncpus > 0);
293*7c478bd9Sstevel@tonic-gate 
294*7c478bd9Sstevel@tonic-gate 	if (newpp == oldpp) {
295*7c478bd9Sstevel@tonic-gate 		/*
296*7c478bd9Sstevel@tonic-gate 		 * Don't need to do anything.
297*7c478bd9Sstevel@tonic-gate 		 */
298*7c478bd9Sstevel@tonic-gate 		return (0);
299*7c478bd9Sstevel@tonic-gate 	}
300*7c478bd9Sstevel@tonic-gate 
301*7c478bd9Sstevel@tonic-gate 	cpu_state_change_notify(cp->cpu_id, CPU_CPUPART_OUT);
302*7c478bd9Sstevel@tonic-gate 
303*7c478bd9Sstevel@tonic-gate 	if (!disp_bound_partition(cp, 0)) {
304*7c478bd9Sstevel@tonic-gate 		/*
305*7c478bd9Sstevel@tonic-gate 		 * Don't need to move threads if there are no threads in
306*7c478bd9Sstevel@tonic-gate 		 * the partition.  Note that threads can't enter the
307*7c478bd9Sstevel@tonic-gate 		 * partition while we're holding cpu_lock.
308*7c478bd9Sstevel@tonic-gate 		 */
309*7c478bd9Sstevel@tonic-gate 		move_threads = 0;
310*7c478bd9Sstevel@tonic-gate 	} else if (oldpp->cp_ncpus == 1) {
311*7c478bd9Sstevel@tonic-gate 		cpu_state_change_notify(cp->cpu_id, CPU_CPUPART_IN);
312*7c478bd9Sstevel@tonic-gate 		return (EBUSY);
313*7c478bd9Sstevel@tonic-gate 	}
314*7c478bd9Sstevel@tonic-gate 
315*7c478bd9Sstevel@tonic-gate 	if (forced && (ret = cpu_unbind(cp->cpu_id)) != 0) {
316*7c478bd9Sstevel@tonic-gate 		cpu_state_change_notify(cp->cpu_id, CPU_CPUPART_IN);
317*7c478bd9Sstevel@tonic-gate 		return (ret);
318*7c478bd9Sstevel@tonic-gate 	}
319*7c478bd9Sstevel@tonic-gate 
320*7c478bd9Sstevel@tonic-gate 	/*
321*7c478bd9Sstevel@tonic-gate 	 * Stop further threads weak binding to this cpu.
322*7c478bd9Sstevel@tonic-gate 	 */
323*7c478bd9Sstevel@tonic-gate 	cpu_inmotion = cp;
324*7c478bd9Sstevel@tonic-gate 	membar_enter();
325*7c478bd9Sstevel@tonic-gate 
326*7c478bd9Sstevel@tonic-gate again:
327*7c478bd9Sstevel@tonic-gate 	if (move_threads) {
328*7c478bd9Sstevel@tonic-gate 		int loop_count;
329*7c478bd9Sstevel@tonic-gate 		/*
330*7c478bd9Sstevel@tonic-gate 		 * Check for threads strong or weak bound to this CPU.
331*7c478bd9Sstevel@tonic-gate 		 */
332*7c478bd9Sstevel@tonic-gate 		for (loop_count = 0; disp_bound_threads(cp, 0); loop_count++) {
333*7c478bd9Sstevel@tonic-gate 			if (loop_count >= 5) {
334*7c478bd9Sstevel@tonic-gate 				cpu_state_change_notify(cp->cpu_id,
335*7c478bd9Sstevel@tonic-gate 				    CPU_CPUPART_IN);
336*7c478bd9Sstevel@tonic-gate 				cpu_inmotion = NULL;
337*7c478bd9Sstevel@tonic-gate 				return (EBUSY);	/* some threads still bound */
338*7c478bd9Sstevel@tonic-gate 			}
339*7c478bd9Sstevel@tonic-gate 			delay(1);
340*7c478bd9Sstevel@tonic-gate 		}
341*7c478bd9Sstevel@tonic-gate 	}
342*7c478bd9Sstevel@tonic-gate 
343*7c478bd9Sstevel@tonic-gate 	/*
344*7c478bd9Sstevel@tonic-gate 	 * Before we actually start changing data structures, notify
345*7c478bd9Sstevel@tonic-gate 	 * the cyclic subsystem that we want to move this CPU out of its
346*7c478bd9Sstevel@tonic-gate 	 * partition.
347*7c478bd9Sstevel@tonic-gate 	 */
348*7c478bd9Sstevel@tonic-gate 	if (!cyclic_move_out(cp)) {
349*7c478bd9Sstevel@tonic-gate 		/*
350*7c478bd9Sstevel@tonic-gate 		 * This CPU must be the last CPU in a processor set with
351*7c478bd9Sstevel@tonic-gate 		 * a bound cyclic.
352*7c478bd9Sstevel@tonic-gate 		 */
353*7c478bd9Sstevel@tonic-gate 		cpu_state_change_notify(cp->cpu_id, CPU_CPUPART_IN);
354*7c478bd9Sstevel@tonic-gate 		cpu_inmotion = NULL;
355*7c478bd9Sstevel@tonic-gate 		return (EBUSY);
356*7c478bd9Sstevel@tonic-gate 	}
357*7c478bd9Sstevel@tonic-gate 
358*7c478bd9Sstevel@tonic-gate 	pause_cpus(cp);
359*7c478bd9Sstevel@tonic-gate 
360*7c478bd9Sstevel@tonic-gate 	if (move_threads) {
361*7c478bd9Sstevel@tonic-gate 		/*
362*7c478bd9Sstevel@tonic-gate 		 * The thread on cpu before the pause thread may have read
363*7c478bd9Sstevel@tonic-gate 		 * cpu_inmotion before we raised the barrier above.  Check
364*7c478bd9Sstevel@tonic-gate 		 * again.
365*7c478bd9Sstevel@tonic-gate 		 */
366*7c478bd9Sstevel@tonic-gate 		if (disp_bound_threads(cp, 1)) {
367*7c478bd9Sstevel@tonic-gate 			start_cpus();
368*7c478bd9Sstevel@tonic-gate 			goto again;
369*7c478bd9Sstevel@tonic-gate 		}
370*7c478bd9Sstevel@tonic-gate 
371*7c478bd9Sstevel@tonic-gate 	}
372*7c478bd9Sstevel@tonic-gate 
373*7c478bd9Sstevel@tonic-gate 	/*
374*7c478bd9Sstevel@tonic-gate 	 * Update the set of chip's being spanned
375*7c478bd9Sstevel@tonic-gate 	 */
376*7c478bd9Sstevel@tonic-gate 	chip_cpu_move_part(cp, oldpp, newpp);
377*7c478bd9Sstevel@tonic-gate 
378*7c478bd9Sstevel@tonic-gate 	/* save this cpu's lgroup -- it'll be the same in the new partition */
379*7c478bd9Sstevel@tonic-gate 	lgrpid = cp->cpu_lpl->lpl_lgrpid;
380*7c478bd9Sstevel@tonic-gate 
381*7c478bd9Sstevel@tonic-gate 	cpu_lpl = cp->cpu_lpl;
382*7c478bd9Sstevel@tonic-gate 	/*
383*7c478bd9Sstevel@tonic-gate 	 * let the lgroup framework know cp has left the partition
384*7c478bd9Sstevel@tonic-gate 	 */
385*7c478bd9Sstevel@tonic-gate 	lgrp_config(LGRP_CONFIG_CPUPART_DEL, (uintptr_t)cp, lgrpid);
386*7c478bd9Sstevel@tonic-gate 
387*7c478bd9Sstevel@tonic-gate 	/* move out of old partition */
388*7c478bd9Sstevel@tonic-gate 	oldpp->cp_ncpus--;
389*7c478bd9Sstevel@tonic-gate 	if (oldpp->cp_ncpus > 0) {
390*7c478bd9Sstevel@tonic-gate 
391*7c478bd9Sstevel@tonic-gate 		ncp = cp->cpu_prev_part->cpu_next_part = cp->cpu_next_part;
392*7c478bd9Sstevel@tonic-gate 		cp->cpu_next_part->cpu_prev_part = cp->cpu_prev_part;
393*7c478bd9Sstevel@tonic-gate 		if (oldpp->cp_cpulist == cp) {
394*7c478bd9Sstevel@tonic-gate 			oldpp->cp_cpulist = ncp;
395*7c478bd9Sstevel@tonic-gate 		}
396*7c478bd9Sstevel@tonic-gate 	} else {
397*7c478bd9Sstevel@tonic-gate 		ncp = oldpp->cp_cpulist = NULL;
398*7c478bd9Sstevel@tonic-gate 		cp_numparts_nonempty--;
399*7c478bd9Sstevel@tonic-gate 		ASSERT(cp_numparts_nonempty != 0);
400*7c478bd9Sstevel@tonic-gate 	}
401*7c478bd9Sstevel@tonic-gate 	oldpp->cp_gen++;
402*7c478bd9Sstevel@tonic-gate 
403*7c478bd9Sstevel@tonic-gate 	/* move into new partition */
404*7c478bd9Sstevel@tonic-gate 	newlist = newpp->cp_cpulist;
405*7c478bd9Sstevel@tonic-gate 	if (newlist == NULL) {
406*7c478bd9Sstevel@tonic-gate 		newpp->cp_cpulist = cp->cpu_next_part = cp->cpu_prev_part = cp;
407*7c478bd9Sstevel@tonic-gate 		cp_numparts_nonempty++;
408*7c478bd9Sstevel@tonic-gate 		ASSERT(cp_numparts_nonempty != 0);
409*7c478bd9Sstevel@tonic-gate 	} else {
410*7c478bd9Sstevel@tonic-gate 		cp->cpu_next_part = newlist;
411*7c478bd9Sstevel@tonic-gate 		cp->cpu_prev_part = newlist->cpu_prev_part;
412*7c478bd9Sstevel@tonic-gate 		newlist->cpu_prev_part->cpu_next_part = cp;
413*7c478bd9Sstevel@tonic-gate 		newlist->cpu_prev_part = cp;
414*7c478bd9Sstevel@tonic-gate 	}
415*7c478bd9Sstevel@tonic-gate 	cp->cpu_part = newpp;
416*7c478bd9Sstevel@tonic-gate 	newpp->cp_ncpus++;
417*7c478bd9Sstevel@tonic-gate 	newpp->cp_gen++;
418*7c478bd9Sstevel@tonic-gate 
419*7c478bd9Sstevel@tonic-gate 	ASSERT(CPUSET_ISNULL(newpp->cp_haltset));
420*7c478bd9Sstevel@tonic-gate 	ASSERT(CPUSET_ISNULL(oldpp->cp_haltset));
421*7c478bd9Sstevel@tonic-gate 
422*7c478bd9Sstevel@tonic-gate 	/*
423*7c478bd9Sstevel@tonic-gate 	 * let the lgroup framework know cp has entered the partition
424*7c478bd9Sstevel@tonic-gate 	 */
425*7c478bd9Sstevel@tonic-gate 	lgrp_config(LGRP_CONFIG_CPUPART_ADD, (uintptr_t)cp, lgrpid);
426*7c478bd9Sstevel@tonic-gate 
427*7c478bd9Sstevel@tonic-gate 	/*
428*7c478bd9Sstevel@tonic-gate 	 * If necessary, move threads off processor.
429*7c478bd9Sstevel@tonic-gate 	 */
430*7c478bd9Sstevel@tonic-gate 	if (move_threads) {
431*7c478bd9Sstevel@tonic-gate 		ASSERT(ncp != NULL);
432*7c478bd9Sstevel@tonic-gate 
433*7c478bd9Sstevel@tonic-gate 		/*
434*7c478bd9Sstevel@tonic-gate 		 * Walk thru the active process list to look for
435*7c478bd9Sstevel@tonic-gate 		 * threads that need to have a new home lgroup,
436*7c478bd9Sstevel@tonic-gate 		 * or the last CPU they run on is the same CPU
437*7c478bd9Sstevel@tonic-gate 		 * being moved out of the partition.
438*7c478bd9Sstevel@tonic-gate 		 */
439*7c478bd9Sstevel@tonic-gate 
440*7c478bd9Sstevel@tonic-gate 		for (p = practive; p != NULL; p = p->p_next) {
441*7c478bd9Sstevel@tonic-gate 
442*7c478bd9Sstevel@tonic-gate 			t = p->p_tlist;
443*7c478bd9Sstevel@tonic-gate 
444*7c478bd9Sstevel@tonic-gate 			if (t == NULL)
445*7c478bd9Sstevel@tonic-gate 				continue;
446*7c478bd9Sstevel@tonic-gate 
447*7c478bd9Sstevel@tonic-gate 			lgrp_diff_lpl = 0;
448*7c478bd9Sstevel@tonic-gate 
449*7c478bd9Sstevel@tonic-gate 			do {
450*7c478bd9Sstevel@tonic-gate 
451*7c478bd9Sstevel@tonic-gate 				ASSERT(t->t_lpl != NULL);
452*7c478bd9Sstevel@tonic-gate 
453*7c478bd9Sstevel@tonic-gate 				/*
454*7c478bd9Sstevel@tonic-gate 				 * Update the count of how many threads are
455*7c478bd9Sstevel@tonic-gate 				 * in this CPU's lgroup but have a different lpl
456*7c478bd9Sstevel@tonic-gate 				 */
457*7c478bd9Sstevel@tonic-gate 
458*7c478bd9Sstevel@tonic-gate 				if (t->t_lpl != cpu_lpl &&
459*7c478bd9Sstevel@tonic-gate 				    t->t_lpl->lpl_lgrpid == lgrpid)
460*7c478bd9Sstevel@tonic-gate 					lgrp_diff_lpl++;
461*7c478bd9Sstevel@tonic-gate 				/*
462*7c478bd9Sstevel@tonic-gate 				 * If the lgroup that t is assigned to no
463*7c478bd9Sstevel@tonic-gate 				 * longer has any CPUs in t's partition,
464*7c478bd9Sstevel@tonic-gate 				 * we'll have to choose a new lgroup for t.
465*7c478bd9Sstevel@tonic-gate 				 */
466*7c478bd9Sstevel@tonic-gate 
467*7c478bd9Sstevel@tonic-gate 				if (!LGRP_CPUS_IN_PART(t->t_lpl->lpl_lgrpid,
468*7c478bd9Sstevel@tonic-gate 				    t->t_cpupart)) {
469*7c478bd9Sstevel@tonic-gate 					lgrp_move_thread(t,
470*7c478bd9Sstevel@tonic-gate 					    lgrp_choose(t, t->t_cpupart), 0);
471*7c478bd9Sstevel@tonic-gate 				}
472*7c478bd9Sstevel@tonic-gate 
473*7c478bd9Sstevel@tonic-gate 				/*
474*7c478bd9Sstevel@tonic-gate 				 * make sure lpl points to our own partition
475*7c478bd9Sstevel@tonic-gate 				 */
476*7c478bd9Sstevel@tonic-gate 				ASSERT(t->t_lpl >= t->t_cpupart->cp_lgrploads &&
477*7c478bd9Sstevel@tonic-gate 				    (t->t_lpl < t->t_cpupart->cp_lgrploads +
478*7c478bd9Sstevel@tonic-gate 					t->t_cpupart->cp_nlgrploads));
479*7c478bd9Sstevel@tonic-gate 
480*7c478bd9Sstevel@tonic-gate 				ASSERT(t->t_lpl->lpl_ncpu > 0);
481*7c478bd9Sstevel@tonic-gate 
482*7c478bd9Sstevel@tonic-gate 				/* Update CPU last ran on if it was this CPU */
483*7c478bd9Sstevel@tonic-gate 				if (t->t_cpu == cp && t->t_cpupart == oldpp &&
484*7c478bd9Sstevel@tonic-gate 				    t->t_bound_cpu != cp) {
485*7c478bd9Sstevel@tonic-gate 					t->t_cpu = disp_lowpri_cpu(ncp,
486*7c478bd9Sstevel@tonic-gate 					    t->t_lpl, t->t_pri, NULL);
487*7c478bd9Sstevel@tonic-gate 				}
488*7c478bd9Sstevel@tonic-gate 				t = t->t_forw;
489*7c478bd9Sstevel@tonic-gate 			} while (t != p->p_tlist);
490*7c478bd9Sstevel@tonic-gate 
491*7c478bd9Sstevel@tonic-gate 			/*
492*7c478bd9Sstevel@tonic-gate 			 * Didn't find any threads in the same lgroup as this
493*7c478bd9Sstevel@tonic-gate 			 * CPU with a different lpl, so remove the lgroup from
494*7c478bd9Sstevel@tonic-gate 			 * the process lgroup bitmask.
495*7c478bd9Sstevel@tonic-gate 			 */
496*7c478bd9Sstevel@tonic-gate 
497*7c478bd9Sstevel@tonic-gate 			if (lgrp_diff_lpl)
498*7c478bd9Sstevel@tonic-gate 				klgrpset_del(p->p_lgrpset, lgrpid);
499*7c478bd9Sstevel@tonic-gate 		}
500*7c478bd9Sstevel@tonic-gate 
501*7c478bd9Sstevel@tonic-gate 		/*
502*7c478bd9Sstevel@tonic-gate 		 * Walk thread list looking for threads that need to be
503*7c478bd9Sstevel@tonic-gate 		 * rehomed, since there are some threads that are not in
504*7c478bd9Sstevel@tonic-gate 		 * their process's p_tlist.
505*7c478bd9Sstevel@tonic-gate 		 */
506*7c478bd9Sstevel@tonic-gate 
507*7c478bd9Sstevel@tonic-gate 		t = curthread;
508*7c478bd9Sstevel@tonic-gate 
509*7c478bd9Sstevel@tonic-gate 		do {
510*7c478bd9Sstevel@tonic-gate 			ASSERT(t != NULL && t->t_lpl != NULL);
511*7c478bd9Sstevel@tonic-gate 
512*7c478bd9Sstevel@tonic-gate 			/*
513*7c478bd9Sstevel@tonic-gate 			 * If the lgroup that t is assigned to no
514*7c478bd9Sstevel@tonic-gate 			 * longer has any CPUs in t's partition,
515*7c478bd9Sstevel@tonic-gate 			 * we'll have to choose a new lgroup for t.
516*7c478bd9Sstevel@tonic-gate 			 * Also, choose best lgroup for home when
517*7c478bd9Sstevel@tonic-gate 			 * thread has specified lgroup affinities,
518*7c478bd9Sstevel@tonic-gate 			 * since there may be an lgroup with more
519*7c478bd9Sstevel@tonic-gate 			 * affinity available after moving CPUs
520*7c478bd9Sstevel@tonic-gate 			 * around.
521*7c478bd9Sstevel@tonic-gate 			 */
522*7c478bd9Sstevel@tonic-gate 			if (!LGRP_CPUS_IN_PART(t->t_lpl->lpl_lgrpid,
523*7c478bd9Sstevel@tonic-gate 			    t->t_cpupart) || t->t_lgrp_affinity) {
524*7c478bd9Sstevel@tonic-gate 				lgrp_move_thread(t,
525*7c478bd9Sstevel@tonic-gate 				    lgrp_choose(t, t->t_cpupart), 1);
526*7c478bd9Sstevel@tonic-gate 			}
527*7c478bd9Sstevel@tonic-gate 
528*7c478bd9Sstevel@tonic-gate 			/* make sure lpl points to our own partition */
529*7c478bd9Sstevel@tonic-gate 			ASSERT((t->t_lpl >= t->t_cpupart->cp_lgrploads) &&
530*7c478bd9Sstevel@tonic-gate 			    (t->t_lpl < t->t_cpupart->cp_lgrploads +
531*7c478bd9Sstevel@tonic-gate 				t->t_cpupart->cp_nlgrploads));
532*7c478bd9Sstevel@tonic-gate 
533*7c478bd9Sstevel@tonic-gate 			ASSERT(t->t_lpl->lpl_ncpu > 0);
534*7c478bd9Sstevel@tonic-gate 
535*7c478bd9Sstevel@tonic-gate 			/* Update CPU last ran on if it was this CPU */
536*7c478bd9Sstevel@tonic-gate 			if (t->t_cpu == cp && t->t_cpupart == oldpp &&
537*7c478bd9Sstevel@tonic-gate 			    t->t_bound_cpu != cp) {
538*7c478bd9Sstevel@tonic-gate 				t->t_cpu = disp_lowpri_cpu(ncp, t->t_lpl,
539*7c478bd9Sstevel@tonic-gate 				    t->t_pri, NULL);
540*7c478bd9Sstevel@tonic-gate 			}
541*7c478bd9Sstevel@tonic-gate 
542*7c478bd9Sstevel@tonic-gate 			t = t->t_next;
543*7c478bd9Sstevel@tonic-gate 		} while (t != curthread);
544*7c478bd9Sstevel@tonic-gate 
545*7c478bd9Sstevel@tonic-gate 		/*
546*7c478bd9Sstevel@tonic-gate 		 * Clear off the CPU's run queue, and the kp queue if the
547*7c478bd9Sstevel@tonic-gate 		 * partition is now empty.
548*7c478bd9Sstevel@tonic-gate 		 */
549*7c478bd9Sstevel@tonic-gate 		disp_cpu_inactive(cp);
550*7c478bd9Sstevel@tonic-gate 
551*7c478bd9Sstevel@tonic-gate 		/*
552*7c478bd9Sstevel@tonic-gate 		 * Make cp switch to a thread from the new partition.
553*7c478bd9Sstevel@tonic-gate 		 */
554*7c478bd9Sstevel@tonic-gate 		cp->cpu_runrun = 1;
555*7c478bd9Sstevel@tonic-gate 		cp->cpu_kprunrun = 1;
556*7c478bd9Sstevel@tonic-gate 	}
557*7c478bd9Sstevel@tonic-gate 
558*7c478bd9Sstevel@tonic-gate 	cpu_inmotion = NULL;
559*7c478bd9Sstevel@tonic-gate 	start_cpus();
560*7c478bd9Sstevel@tonic-gate 
561*7c478bd9Sstevel@tonic-gate 	/*
562*7c478bd9Sstevel@tonic-gate 	 * Let anyone interested know that cpu has been added to the set.
563*7c478bd9Sstevel@tonic-gate 	 */
564*7c478bd9Sstevel@tonic-gate 	cpu_state_change_notify(cp->cpu_id, CPU_CPUPART_IN);
565*7c478bd9Sstevel@tonic-gate 
566*7c478bd9Sstevel@tonic-gate 	/*
567*7c478bd9Sstevel@tonic-gate 	 * Now let the cyclic subsystem know that it can reshuffle cyclics
568*7c478bd9Sstevel@tonic-gate 	 * bound to the new processor set.
569*7c478bd9Sstevel@tonic-gate 	 */
570*7c478bd9Sstevel@tonic-gate 	cyclic_move_in(cp);
571*7c478bd9Sstevel@tonic-gate 
572*7c478bd9Sstevel@tonic-gate 	return (0);
573*7c478bd9Sstevel@tonic-gate }
574*7c478bd9Sstevel@tonic-gate 
575*7c478bd9Sstevel@tonic-gate /*
576*7c478bd9Sstevel@tonic-gate  * Check if thread can be moved to a new cpu partition.  Called by
577*7c478bd9Sstevel@tonic-gate  * cpupart_move_thread() and pset_bind_start().
578*7c478bd9Sstevel@tonic-gate  */
579*7c478bd9Sstevel@tonic-gate int
580*7c478bd9Sstevel@tonic-gate cpupart_movable_thread(kthread_id_t tp, cpupart_t *cp, int ignore)
581*7c478bd9Sstevel@tonic-gate {
582*7c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&cpu_lock));
583*7c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&ttoproc(tp)->p_lock));
584*7c478bd9Sstevel@tonic-gate 	ASSERT(cp != NULL);
585*7c478bd9Sstevel@tonic-gate 	ASSERT(THREAD_LOCK_HELD(tp));
586*7c478bd9Sstevel@tonic-gate 
587*7c478bd9Sstevel@tonic-gate 	/*
588*7c478bd9Sstevel@tonic-gate 	 * CPU-bound threads can't be moved.
589*7c478bd9Sstevel@tonic-gate 	 */
590*7c478bd9Sstevel@tonic-gate 	if (!ignore) {
591*7c478bd9Sstevel@tonic-gate 		cpu_t *boundcpu = tp->t_bound_cpu ? tp->t_bound_cpu :
592*7c478bd9Sstevel@tonic-gate 		    tp->t_weakbound_cpu;
593*7c478bd9Sstevel@tonic-gate 		if (boundcpu != NULL && boundcpu->cpu_part != cp)
594*7c478bd9Sstevel@tonic-gate 			return (EBUSY);
595*7c478bd9Sstevel@tonic-gate 	}
596*7c478bd9Sstevel@tonic-gate 	return (0);
597*7c478bd9Sstevel@tonic-gate }
598*7c478bd9Sstevel@tonic-gate 
599*7c478bd9Sstevel@tonic-gate /*
600*7c478bd9Sstevel@tonic-gate  * Move thread to new partition.  If ignore is non-zero, then CPU
601*7c478bd9Sstevel@tonic-gate  * bindings should be ignored (this is used when destroying a
602*7c478bd9Sstevel@tonic-gate  * partition).
603*7c478bd9Sstevel@tonic-gate  */
604*7c478bd9Sstevel@tonic-gate static int
605*7c478bd9Sstevel@tonic-gate cpupart_move_thread(kthread_id_t tp, cpupart_t *newpp, int ignore,
606*7c478bd9Sstevel@tonic-gate     void *projbuf, void *zonebuf)
607*7c478bd9Sstevel@tonic-gate {
608*7c478bd9Sstevel@tonic-gate 	cpupart_t *oldpp = tp->t_cpupart;
609*7c478bd9Sstevel@tonic-gate 	int ret;
610*7c478bd9Sstevel@tonic-gate 
611*7c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&cpu_lock));
612*7c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&pidlock));
613*7c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&ttoproc(tp)->p_lock));
614*7c478bd9Sstevel@tonic-gate 	ASSERT(newpp != NULL);
615*7c478bd9Sstevel@tonic-gate 
616*7c478bd9Sstevel@tonic-gate 	if (newpp->cp_cpulist == NULL)
617*7c478bd9Sstevel@tonic-gate 		return (EINVAL);
618*7c478bd9Sstevel@tonic-gate 
619*7c478bd9Sstevel@tonic-gate 	/*
620*7c478bd9Sstevel@tonic-gate 	 * Check for errors first.
621*7c478bd9Sstevel@tonic-gate 	 */
622*7c478bd9Sstevel@tonic-gate 	thread_lock(tp);
623*7c478bd9Sstevel@tonic-gate 	if ((ret = cpupart_movable_thread(tp, newpp, ignore)) != 0) {
624*7c478bd9Sstevel@tonic-gate 		thread_unlock(tp);
625*7c478bd9Sstevel@tonic-gate 		return (ret);
626*7c478bd9Sstevel@tonic-gate 	}
627*7c478bd9Sstevel@tonic-gate 
628*7c478bd9Sstevel@tonic-gate 	/* move the thread */
629*7c478bd9Sstevel@tonic-gate 	if (oldpp != newpp) {
630*7c478bd9Sstevel@tonic-gate 		/*
631*7c478bd9Sstevel@tonic-gate 		 * Make the thread switch to the new partition.
632*7c478bd9Sstevel@tonic-gate 		 */
633*7c478bd9Sstevel@tonic-gate 		tp->t_cpupart = newpp;
634*7c478bd9Sstevel@tonic-gate 		ASSERT(tp->t_lpl != NULL);
635*7c478bd9Sstevel@tonic-gate 		/*
636*7c478bd9Sstevel@tonic-gate 		 * Leave the thread on the same lgroup if possible; otherwise
637*7c478bd9Sstevel@tonic-gate 		 * choose a new lgroup for it.  In either case, update its
638*7c478bd9Sstevel@tonic-gate 		 * t_lpl.
639*7c478bd9Sstevel@tonic-gate 		 */
640*7c478bd9Sstevel@tonic-gate 		if (LGRP_CPUS_IN_PART(tp->t_lpl->lpl_lgrpid, newpp) &&
641*7c478bd9Sstevel@tonic-gate 		    tp->t_lgrp_affinity == NULL) {
642*7c478bd9Sstevel@tonic-gate 			/*
643*7c478bd9Sstevel@tonic-gate 			 * The thread's lgroup has CPUs in the thread's new
644*7c478bd9Sstevel@tonic-gate 			 * partition, so the thread can stay assigned to the
645*7c478bd9Sstevel@tonic-gate 			 * same lgroup.  Update its t_lpl to point to the
646*7c478bd9Sstevel@tonic-gate 			 * lpl_t for its lgroup in its new partition.
647*7c478bd9Sstevel@tonic-gate 			 */
648*7c478bd9Sstevel@tonic-gate 			lgrp_move_thread(tp, &tp->t_cpupart->\
649*7c478bd9Sstevel@tonic-gate 			    cp_lgrploads[tp->t_lpl->lpl_lgrpid], 1);
650*7c478bd9Sstevel@tonic-gate 		} else {
651*7c478bd9Sstevel@tonic-gate 			/*
652*7c478bd9Sstevel@tonic-gate 			 * The thread's lgroup has no cpus in its new
653*7c478bd9Sstevel@tonic-gate 			 * partition or it has specified lgroup affinities,
654*7c478bd9Sstevel@tonic-gate 			 * so choose the best lgroup for the thread and
655*7c478bd9Sstevel@tonic-gate 			 * assign it to that lgroup.
656*7c478bd9Sstevel@tonic-gate 			 */
657*7c478bd9Sstevel@tonic-gate 			lgrp_move_thread(tp, lgrp_choose(tp, tp->t_cpupart),
658*7c478bd9Sstevel@tonic-gate 			    1);
659*7c478bd9Sstevel@tonic-gate 		}
660*7c478bd9Sstevel@tonic-gate 		/*
661*7c478bd9Sstevel@tonic-gate 		 * make sure lpl points to our own partition
662*7c478bd9Sstevel@tonic-gate 		 */
663*7c478bd9Sstevel@tonic-gate 		ASSERT((tp->t_lpl >= tp->t_cpupart->cp_lgrploads) &&
664*7c478bd9Sstevel@tonic-gate 		    (tp->t_lpl < tp->t_cpupart->cp_lgrploads +
665*7c478bd9Sstevel@tonic-gate 			tp->t_cpupart->cp_nlgrploads));
666*7c478bd9Sstevel@tonic-gate 
667*7c478bd9Sstevel@tonic-gate 		ASSERT(tp->t_lpl->lpl_ncpu > 0);
668*7c478bd9Sstevel@tonic-gate 
669*7c478bd9Sstevel@tonic-gate 		if (tp->t_state == TS_ONPROC) {
670*7c478bd9Sstevel@tonic-gate 			cpu_surrender(tp);
671*7c478bd9Sstevel@tonic-gate 		} else if (tp->t_state == TS_RUN) {
672*7c478bd9Sstevel@tonic-gate 			(void) dispdeq(tp);
673*7c478bd9Sstevel@tonic-gate 			setbackdq(tp);
674*7c478bd9Sstevel@tonic-gate 		}
675*7c478bd9Sstevel@tonic-gate 	}
676*7c478bd9Sstevel@tonic-gate 
677*7c478bd9Sstevel@tonic-gate 	/*
678*7c478bd9Sstevel@tonic-gate 	 * Our binding has changed; set TP_CHANGEBIND.
679*7c478bd9Sstevel@tonic-gate 	 */
680*7c478bd9Sstevel@tonic-gate 	tp->t_proc_flag |= TP_CHANGEBIND;
681*7c478bd9Sstevel@tonic-gate 	aston(tp);
682*7c478bd9Sstevel@tonic-gate 
683*7c478bd9Sstevel@tonic-gate 	thread_unlock(tp);
684*7c478bd9Sstevel@tonic-gate 	fss_changepset(tp, newpp, projbuf, zonebuf);
685*7c478bd9Sstevel@tonic-gate 
686*7c478bd9Sstevel@tonic-gate 	return (0);		/* success */
687*7c478bd9Sstevel@tonic-gate }
688*7c478bd9Sstevel@tonic-gate 
689*7c478bd9Sstevel@tonic-gate 
690*7c478bd9Sstevel@tonic-gate /*
691*7c478bd9Sstevel@tonic-gate  * This function binds a thread to a partition.  Must be called with the
692*7c478bd9Sstevel@tonic-gate  * p_lock of the containing process held (to keep the thread from going
693*7c478bd9Sstevel@tonic-gate  * away), and thus also with cpu_lock held (since cpu_lock must be
694*7c478bd9Sstevel@tonic-gate  * acquired before p_lock).  If ignore is non-zero, then CPU bindings
695*7c478bd9Sstevel@tonic-gate  * should be ignored (this is used when destroying a partition).
696*7c478bd9Sstevel@tonic-gate  */
697*7c478bd9Sstevel@tonic-gate int
698*7c478bd9Sstevel@tonic-gate cpupart_bind_thread(kthread_id_t tp, psetid_t psid, int ignore, void *projbuf,
699*7c478bd9Sstevel@tonic-gate     void *zonebuf)
700*7c478bd9Sstevel@tonic-gate {
701*7c478bd9Sstevel@tonic-gate 	cpupart_t	*newpp;
702*7c478bd9Sstevel@tonic-gate 
703*7c478bd9Sstevel@tonic-gate 	ASSERT(pool_lock_held());
704*7c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&cpu_lock));
705*7c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&pidlock));
706*7c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&ttoproc(tp)->p_lock));
707*7c478bd9Sstevel@tonic-gate 
708*7c478bd9Sstevel@tonic-gate 	if (psid == PS_NONE)
709*7c478bd9Sstevel@tonic-gate 		newpp = &cp_default;
710*7c478bd9Sstevel@tonic-gate 	else {
711*7c478bd9Sstevel@tonic-gate 		newpp = cpupart_find(psid);
712*7c478bd9Sstevel@tonic-gate 		if (newpp == NULL) {
713*7c478bd9Sstevel@tonic-gate 			return (EINVAL);
714*7c478bd9Sstevel@tonic-gate 		}
715*7c478bd9Sstevel@tonic-gate 	}
716*7c478bd9Sstevel@tonic-gate 	return (cpupart_move_thread(tp, newpp, ignore, projbuf, zonebuf));
717*7c478bd9Sstevel@tonic-gate }
718*7c478bd9Sstevel@tonic-gate 
719*7c478bd9Sstevel@tonic-gate 
720*7c478bd9Sstevel@tonic-gate /*
721*7c478bd9Sstevel@tonic-gate  * Create a new partition.  On MP systems, this also allocates a
722*7c478bd9Sstevel@tonic-gate  * kpreempt disp queue for that partition.
723*7c478bd9Sstevel@tonic-gate  */
724*7c478bd9Sstevel@tonic-gate int
725*7c478bd9Sstevel@tonic-gate cpupart_create(psetid_t *psid)
726*7c478bd9Sstevel@tonic-gate {
727*7c478bd9Sstevel@tonic-gate 	cpupart_t	*pp;
728*7c478bd9Sstevel@tonic-gate 	lgrp_id_t	i;
729*7c478bd9Sstevel@tonic-gate 
730*7c478bd9Sstevel@tonic-gate 	ASSERT(pool_lock_held());
731*7c478bd9Sstevel@tonic-gate 
732*7c478bd9Sstevel@tonic-gate 	pp = kmem_zalloc(sizeof (cpupart_t), KM_SLEEP);
733*7c478bd9Sstevel@tonic-gate 	pp->cp_nlgrploads = lgrp_plat_max_lgrps();
734*7c478bd9Sstevel@tonic-gate 	pp->cp_lgrploads = kmem_zalloc(sizeof (lpl_t) * pp->cp_nlgrploads,
735*7c478bd9Sstevel@tonic-gate 	    KM_SLEEP);
736*7c478bd9Sstevel@tonic-gate 
737*7c478bd9Sstevel@tonic-gate 	mutex_enter(&cpu_lock);
738*7c478bd9Sstevel@tonic-gate 	if (cp_numparts == cp_max_numparts) {
739*7c478bd9Sstevel@tonic-gate 		mutex_exit(&cpu_lock);
740*7c478bd9Sstevel@tonic-gate 		kmem_free(pp->cp_lgrploads, sizeof (lpl_t) * pp->cp_nlgrploads);
741*7c478bd9Sstevel@tonic-gate 		pp->cp_lgrploads = NULL;
742*7c478bd9Sstevel@tonic-gate 		kmem_free(pp, sizeof (cpupart_t));
743*7c478bd9Sstevel@tonic-gate 		return (ENOMEM);
744*7c478bd9Sstevel@tonic-gate 	}
745*7c478bd9Sstevel@tonic-gate 	cp_numparts++;
746*7c478bd9Sstevel@tonic-gate 	/* find the next free partition ID */
747*7c478bd9Sstevel@tonic-gate 	while (cpupart_find(CPTOPS(cp_id_next)) != NULL)
748*7c478bd9Sstevel@tonic-gate 		cp_id_next++;
749*7c478bd9Sstevel@tonic-gate 	pp->cp_id = cp_id_next++;
750*7c478bd9Sstevel@tonic-gate 	pp->cp_ncpus = 0;
751*7c478bd9Sstevel@tonic-gate 	pp->cp_cpulist = NULL;
752*7c478bd9Sstevel@tonic-gate 	pp->cp_attr = 0;
753*7c478bd9Sstevel@tonic-gate 	klgrpset_clear(pp->cp_lgrpset);
754*7c478bd9Sstevel@tonic-gate 	pp->cp_kp_queue.disp_maxrunpri = -1;
755*7c478bd9Sstevel@tonic-gate 	pp->cp_kp_queue.disp_max_unbound_pri = -1;
756*7c478bd9Sstevel@tonic-gate 	pp->cp_kp_queue.disp_cpu = NULL;
757*7c478bd9Sstevel@tonic-gate 	pp->cp_gen = 0;
758*7c478bd9Sstevel@tonic-gate 	CPUSET_ZERO(pp->cp_haltset);
759*7c478bd9Sstevel@tonic-gate 	DISP_LOCK_INIT(&pp->cp_kp_queue.disp_lock);
760*7c478bd9Sstevel@tonic-gate 	*psid = CPTOPS(pp->cp_id);
761*7c478bd9Sstevel@tonic-gate 	disp_kp_alloc(&pp->cp_kp_queue, v.v_nglobpris);
762*7c478bd9Sstevel@tonic-gate 	cpupart_kstat_create(pp);
763*7c478bd9Sstevel@tonic-gate 	for (i = 0; i < pp->cp_nlgrploads; i++) {
764*7c478bd9Sstevel@tonic-gate 		pp->cp_lgrploads[i].lpl_lgrpid = i;
765*7c478bd9Sstevel@tonic-gate 	}
766*7c478bd9Sstevel@tonic-gate 	CHIP_SET_ZERO(pp->cp_chipset);
767*7c478bd9Sstevel@tonic-gate 
768*7c478bd9Sstevel@tonic-gate 	/*
769*7c478bd9Sstevel@tonic-gate 	 * Pause all CPUs while changing the partition list, to make sure
770*7c478bd9Sstevel@tonic-gate 	 * the clock thread (which traverses the list without holding
771*7c478bd9Sstevel@tonic-gate 	 * cpu_lock) isn't running.
772*7c478bd9Sstevel@tonic-gate 	 */
773*7c478bd9Sstevel@tonic-gate 	pause_cpus(NULL);
774*7c478bd9Sstevel@tonic-gate 	pp->cp_next = cp_list_head;
775*7c478bd9Sstevel@tonic-gate 	pp->cp_prev = cp_list_head->cp_prev;
776*7c478bd9Sstevel@tonic-gate 	cp_list_head->cp_prev->cp_next = pp;
777*7c478bd9Sstevel@tonic-gate 	cp_list_head->cp_prev = pp;
778*7c478bd9Sstevel@tonic-gate 	start_cpus();
779*7c478bd9Sstevel@tonic-gate 	mutex_exit(&cpu_lock);
780*7c478bd9Sstevel@tonic-gate 
781*7c478bd9Sstevel@tonic-gate 	return (0);
782*7c478bd9Sstevel@tonic-gate }
783*7c478bd9Sstevel@tonic-gate 
784*7c478bd9Sstevel@tonic-gate 
785*7c478bd9Sstevel@tonic-gate /*
786*7c478bd9Sstevel@tonic-gate  * Destroy a partition.
787*7c478bd9Sstevel@tonic-gate  */
788*7c478bd9Sstevel@tonic-gate int
789*7c478bd9Sstevel@tonic-gate cpupart_destroy(psetid_t psid)
790*7c478bd9Sstevel@tonic-gate {
791*7c478bd9Sstevel@tonic-gate 	cpu_t	*cp, *first_cp;
792*7c478bd9Sstevel@tonic-gate 	cpupart_t *pp, *newpp;
793*7c478bd9Sstevel@tonic-gate 	int	err = 0;
794*7c478bd9Sstevel@tonic-gate 	void 	*projbuf, *zonebuf;
795*7c478bd9Sstevel@tonic-gate 	kthread_t *t;
796*7c478bd9Sstevel@tonic-gate 	proc_t	*p;
797*7c478bd9Sstevel@tonic-gate 
798*7c478bd9Sstevel@tonic-gate 	ASSERT(pool_lock_held());
799*7c478bd9Sstevel@tonic-gate 	mutex_enter(&cpu_lock);
800*7c478bd9Sstevel@tonic-gate 
801*7c478bd9Sstevel@tonic-gate 	pp = cpupart_find(psid);
802*7c478bd9Sstevel@tonic-gate 	if (pp == NULL || pp == &cp_default) {
803*7c478bd9Sstevel@tonic-gate 		mutex_exit(&cpu_lock);
804*7c478bd9Sstevel@tonic-gate 		return (EINVAL);
805*7c478bd9Sstevel@tonic-gate 	}
806*7c478bd9Sstevel@tonic-gate 
807*7c478bd9Sstevel@tonic-gate 	/*
808*7c478bd9Sstevel@tonic-gate 	 * Pre-allocate enough buffers for FSS for all active projects and
809*7c478bd9Sstevel@tonic-gate 	 * for all active zones on the system.  Unused buffers will be
810*7c478bd9Sstevel@tonic-gate 	 * freed later by fss_freebuf().
811*7c478bd9Sstevel@tonic-gate 	 */
812*7c478bd9Sstevel@tonic-gate 	projbuf = fss_allocbuf(FSS_NPROJ_BUF, FSS_ALLOC_PROJ);
813*7c478bd9Sstevel@tonic-gate 	zonebuf = fss_allocbuf(FSS_NPROJ_BUF, FSS_ALLOC_ZONE);
814*7c478bd9Sstevel@tonic-gate 
815*7c478bd9Sstevel@tonic-gate 	/*
816*7c478bd9Sstevel@tonic-gate 	 * First need to unbind all the threads currently bound to the
817*7c478bd9Sstevel@tonic-gate 	 * partition.  Then do the actual destroy (which moves the CPUs).
818*7c478bd9Sstevel@tonic-gate 	 */
819*7c478bd9Sstevel@tonic-gate 	mutex_enter(&pidlock);
820*7c478bd9Sstevel@tonic-gate 	t = curthread;
821*7c478bd9Sstevel@tonic-gate 	do {
822*7c478bd9Sstevel@tonic-gate 		if (t->t_bind_pset == psid) {
823*7c478bd9Sstevel@tonic-gate again:			p = ttoproc(t);
824*7c478bd9Sstevel@tonic-gate 			mutex_enter(&p->p_lock);
825*7c478bd9Sstevel@tonic-gate 			if (ttoproc(t) != p) {
826*7c478bd9Sstevel@tonic-gate 				/*
827*7c478bd9Sstevel@tonic-gate 				 * lwp_exit has changed this thread's process
828*7c478bd9Sstevel@tonic-gate 				 * pointer before we grabbed its p_lock.
829*7c478bd9Sstevel@tonic-gate 				 */
830*7c478bd9Sstevel@tonic-gate 				mutex_exit(&p->p_lock);
831*7c478bd9Sstevel@tonic-gate 				goto again;
832*7c478bd9Sstevel@tonic-gate 			}
833*7c478bd9Sstevel@tonic-gate 			err = cpupart_bind_thread(t, PS_NONE, 1,
834*7c478bd9Sstevel@tonic-gate 			    projbuf, zonebuf);
835*7c478bd9Sstevel@tonic-gate 			if (err) {
836*7c478bd9Sstevel@tonic-gate 				mutex_exit(&p->p_lock);
837*7c478bd9Sstevel@tonic-gate 				mutex_exit(&pidlock);
838*7c478bd9Sstevel@tonic-gate 				mutex_exit(&cpu_lock);
839*7c478bd9Sstevel@tonic-gate 				fss_freebuf(projbuf, FSS_ALLOC_PROJ);
840*7c478bd9Sstevel@tonic-gate 				fss_freebuf(zonebuf, FSS_ALLOC_ZONE);
841*7c478bd9Sstevel@tonic-gate 				return (err);
842*7c478bd9Sstevel@tonic-gate 			}
843*7c478bd9Sstevel@tonic-gate 			t->t_bind_pset = PS_NONE;
844*7c478bd9Sstevel@tonic-gate 			mutex_exit(&p->p_lock);
845*7c478bd9Sstevel@tonic-gate 		}
846*7c478bd9Sstevel@tonic-gate 		t = t->t_next;
847*7c478bd9Sstevel@tonic-gate 	} while (t != curthread);
848*7c478bd9Sstevel@tonic-gate 
849*7c478bd9Sstevel@tonic-gate 	mutex_exit(&pidlock);
850*7c478bd9Sstevel@tonic-gate 	fss_freebuf(projbuf, FSS_ALLOC_PROJ);
851*7c478bd9Sstevel@tonic-gate 	fss_freebuf(zonebuf, FSS_ALLOC_ZONE);
852*7c478bd9Sstevel@tonic-gate 
853*7c478bd9Sstevel@tonic-gate 	newpp = &cp_default;
854*7c478bd9Sstevel@tonic-gate 	while ((cp = pp->cp_cpulist) != NULL) {
855*7c478bd9Sstevel@tonic-gate 		if (err = cpupart_move_cpu(cp, newpp, 0)) {
856*7c478bd9Sstevel@tonic-gate 			mutex_exit(&cpu_lock);
857*7c478bd9Sstevel@tonic-gate 			return (err);
858*7c478bd9Sstevel@tonic-gate 		}
859*7c478bd9Sstevel@tonic-gate 	}
860*7c478bd9Sstevel@tonic-gate 
861*7c478bd9Sstevel@tonic-gate 	ASSERT(CHIP_SET_ISNULL(pp->cp_chipset));
862*7c478bd9Sstevel@tonic-gate 	ASSERT(CPUSET_ISNULL(pp->cp_haltset));
863*7c478bd9Sstevel@tonic-gate 
864*7c478bd9Sstevel@tonic-gate 	/*
865*7c478bd9Sstevel@tonic-gate 	 * Reset the pointers in any offline processors so they won't
866*7c478bd9Sstevel@tonic-gate 	 * try to rejoin the destroyed partition when they're turned
867*7c478bd9Sstevel@tonic-gate 	 * online.
868*7c478bd9Sstevel@tonic-gate 	 */
869*7c478bd9Sstevel@tonic-gate 	first_cp = cp = CPU;
870*7c478bd9Sstevel@tonic-gate 	do {
871*7c478bd9Sstevel@tonic-gate 		if (cp->cpu_part == pp) {
872*7c478bd9Sstevel@tonic-gate 			ASSERT(cp->cpu_flags & CPU_OFFLINE);
873*7c478bd9Sstevel@tonic-gate 			cp->cpu_part = newpp;
874*7c478bd9Sstevel@tonic-gate 		}
875*7c478bd9Sstevel@tonic-gate 		cp = cp->cpu_next;
876*7c478bd9Sstevel@tonic-gate 	} while (cp != first_cp);
877*7c478bd9Sstevel@tonic-gate 
878*7c478bd9Sstevel@tonic-gate 	/*
879*7c478bd9Sstevel@tonic-gate 	 * Pause all CPUs while changing the partition list, to make sure
880*7c478bd9Sstevel@tonic-gate 	 * the clock thread (which traverses the list without holding
881*7c478bd9Sstevel@tonic-gate 	 * cpu_lock) isn't running.
882*7c478bd9Sstevel@tonic-gate 	 */
883*7c478bd9Sstevel@tonic-gate 	pause_cpus(NULL);
884*7c478bd9Sstevel@tonic-gate 	pp->cp_prev->cp_next = pp->cp_next;
885*7c478bd9Sstevel@tonic-gate 	pp->cp_next->cp_prev = pp->cp_prev;
886*7c478bd9Sstevel@tonic-gate 	if (cp_list_head == pp)
887*7c478bd9Sstevel@tonic-gate 		cp_list_head = pp->cp_next;
888*7c478bd9Sstevel@tonic-gate 	start_cpus();
889*7c478bd9Sstevel@tonic-gate 
890*7c478bd9Sstevel@tonic-gate 	if (cp_id_next > pp->cp_id)
891*7c478bd9Sstevel@tonic-gate 		cp_id_next = pp->cp_id;
892*7c478bd9Sstevel@tonic-gate 
893*7c478bd9Sstevel@tonic-gate 	if (pp->cp_kstat)
894*7c478bd9Sstevel@tonic-gate 		kstat_delete(pp->cp_kstat);
895*7c478bd9Sstevel@tonic-gate 
896*7c478bd9Sstevel@tonic-gate 	cp_numparts--;
897*7c478bd9Sstevel@tonic-gate 
898*7c478bd9Sstevel@tonic-gate 	disp_kp_free(&pp->cp_kp_queue);
899*7c478bd9Sstevel@tonic-gate 	kmem_free(pp->cp_lgrploads, sizeof (lpl_t) * pp->cp_nlgrploads);
900*7c478bd9Sstevel@tonic-gate 	pp->cp_lgrploads = NULL;
901*7c478bd9Sstevel@tonic-gate 	kmem_free(pp, sizeof (cpupart_t));
902*7c478bd9Sstevel@tonic-gate 	mutex_exit(&cpu_lock);
903*7c478bd9Sstevel@tonic-gate 
904*7c478bd9Sstevel@tonic-gate 	return (err);
905*7c478bd9Sstevel@tonic-gate }
906*7c478bd9Sstevel@tonic-gate 
907*7c478bd9Sstevel@tonic-gate 
908*7c478bd9Sstevel@tonic-gate /*
909*7c478bd9Sstevel@tonic-gate  * Return the ID of the partition to which the specified processor belongs.
910*7c478bd9Sstevel@tonic-gate  */
911*7c478bd9Sstevel@tonic-gate psetid_t
912*7c478bd9Sstevel@tonic-gate cpupart_query_cpu(cpu_t *cp)
913*7c478bd9Sstevel@tonic-gate {
914*7c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&cpu_lock));
915*7c478bd9Sstevel@tonic-gate 
916*7c478bd9Sstevel@tonic-gate 	return (CPTOPS(cp->cpu_part->cp_id));
917*7c478bd9Sstevel@tonic-gate }
918*7c478bd9Sstevel@tonic-gate 
919*7c478bd9Sstevel@tonic-gate 
920*7c478bd9Sstevel@tonic-gate /*
921*7c478bd9Sstevel@tonic-gate  * Attach a processor to an existing partition.
922*7c478bd9Sstevel@tonic-gate  */
923*7c478bd9Sstevel@tonic-gate int
924*7c478bd9Sstevel@tonic-gate cpupart_attach_cpu(psetid_t psid, cpu_t *cp, int forced)
925*7c478bd9Sstevel@tonic-gate {
926*7c478bd9Sstevel@tonic-gate 	cpupart_t	*pp;
927*7c478bd9Sstevel@tonic-gate 	int		err;
928*7c478bd9Sstevel@tonic-gate 
929*7c478bd9Sstevel@tonic-gate 	ASSERT(pool_lock_held());
930*7c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&cpu_lock));
931*7c478bd9Sstevel@tonic-gate 
932*7c478bd9Sstevel@tonic-gate 	pp = cpupart_find(psid);
933*7c478bd9Sstevel@tonic-gate 	if (pp == NULL)
934*7c478bd9Sstevel@tonic-gate 		return (EINVAL);
935*7c478bd9Sstevel@tonic-gate 	if (cp->cpu_flags & CPU_OFFLINE)
936*7c478bd9Sstevel@tonic-gate 		return (EINVAL);
937*7c478bd9Sstevel@tonic-gate 
938*7c478bd9Sstevel@tonic-gate 	err = cpupart_move_cpu(cp, pp, forced);
939*7c478bd9Sstevel@tonic-gate 	return (err);
940*7c478bd9Sstevel@tonic-gate }
941*7c478bd9Sstevel@tonic-gate 
942*7c478bd9Sstevel@tonic-gate /*
943*7c478bd9Sstevel@tonic-gate  * Get a list of cpus belonging to the partition.  If numcpus is NULL,
944*7c478bd9Sstevel@tonic-gate  * this just checks for a valid partition.  If numcpus is non-NULL but
945*7c478bd9Sstevel@tonic-gate  * cpulist is NULL, the current number of cpus is stored in *numcpus.
946*7c478bd9Sstevel@tonic-gate  * If both are non-NULL, the current number of cpus is stored in *numcpus,
947*7c478bd9Sstevel@tonic-gate  * and a list of those cpus up to the size originally in *numcpus is
948*7c478bd9Sstevel@tonic-gate  * stored in cpulist[].  Also, store the processor set id in *psid.
949*7c478bd9Sstevel@tonic-gate  * This is useful in case the processor set id passed in was PS_MYID.
950*7c478bd9Sstevel@tonic-gate  */
951*7c478bd9Sstevel@tonic-gate int
952*7c478bd9Sstevel@tonic-gate cpupart_get_cpus(psetid_t *psid, processorid_t *cpulist, uint_t *numcpus)
953*7c478bd9Sstevel@tonic-gate {
954*7c478bd9Sstevel@tonic-gate 	cpupart_t	*pp;
955*7c478bd9Sstevel@tonic-gate 	uint_t		ncpus;
956*7c478bd9Sstevel@tonic-gate 	cpu_t		*c;
957*7c478bd9Sstevel@tonic-gate 	int		i;
958*7c478bd9Sstevel@tonic-gate 
959*7c478bd9Sstevel@tonic-gate 	mutex_enter(&cpu_lock);
960*7c478bd9Sstevel@tonic-gate 	pp = cpupart_find(*psid);
961*7c478bd9Sstevel@tonic-gate 	if (pp == NULL) {
962*7c478bd9Sstevel@tonic-gate 		mutex_exit(&cpu_lock);
963*7c478bd9Sstevel@tonic-gate 		return (EINVAL);
964*7c478bd9Sstevel@tonic-gate 	}
965*7c478bd9Sstevel@tonic-gate 	*psid = CPTOPS(pp->cp_id);
966*7c478bd9Sstevel@tonic-gate 	ncpus = pp->cp_ncpus;
967*7c478bd9Sstevel@tonic-gate 	if (numcpus) {
968*7c478bd9Sstevel@tonic-gate 		if (ncpus > *numcpus) {
969*7c478bd9Sstevel@tonic-gate 			/*
970*7c478bd9Sstevel@tonic-gate 			 * Only copy as many cpus as were passed in, but
971*7c478bd9Sstevel@tonic-gate 			 * pass back the real number.
972*7c478bd9Sstevel@tonic-gate 			 */
973*7c478bd9Sstevel@tonic-gate 			uint_t t = ncpus;
974*7c478bd9Sstevel@tonic-gate 			ncpus = *numcpus;
975*7c478bd9Sstevel@tonic-gate 			*numcpus = t;
976*7c478bd9Sstevel@tonic-gate 		} else
977*7c478bd9Sstevel@tonic-gate 			*numcpus = ncpus;
978*7c478bd9Sstevel@tonic-gate 
979*7c478bd9Sstevel@tonic-gate 		if (cpulist) {
980*7c478bd9Sstevel@tonic-gate 			c = pp->cp_cpulist;
981*7c478bd9Sstevel@tonic-gate 			for (i = 0; i < ncpus; i++) {
982*7c478bd9Sstevel@tonic-gate 				ASSERT(c != NULL);
983*7c478bd9Sstevel@tonic-gate 				cpulist[i] = c->cpu_id;
984*7c478bd9Sstevel@tonic-gate 				c = c->cpu_next_part;
985*7c478bd9Sstevel@tonic-gate 			}
986*7c478bd9Sstevel@tonic-gate 		}
987*7c478bd9Sstevel@tonic-gate 	}
988*7c478bd9Sstevel@tonic-gate 	mutex_exit(&cpu_lock);
989*7c478bd9Sstevel@tonic-gate 	return (0);
990*7c478bd9Sstevel@tonic-gate }
991*7c478bd9Sstevel@tonic-gate 
992*7c478bd9Sstevel@tonic-gate /*
993*7c478bd9Sstevel@tonic-gate  * Reallocate kpreempt queues for each CPU partition.  Called from
994*7c478bd9Sstevel@tonic-gate  * disp_setup when a new scheduling class is loaded that increases the
995*7c478bd9Sstevel@tonic-gate  * number of priorities in the system.
996*7c478bd9Sstevel@tonic-gate  */
997*7c478bd9Sstevel@tonic-gate void
998*7c478bd9Sstevel@tonic-gate cpupart_kpqalloc(pri_t npri)
999*7c478bd9Sstevel@tonic-gate {
1000*7c478bd9Sstevel@tonic-gate 	cpupart_t *cpp;
1001*7c478bd9Sstevel@tonic-gate 
1002*7c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&cpu_lock));
1003*7c478bd9Sstevel@tonic-gate 	cpp = cp_list_head;
1004*7c478bd9Sstevel@tonic-gate 	do {
1005*7c478bd9Sstevel@tonic-gate 		disp_kp_alloc(&cpp->cp_kp_queue, npri);
1006*7c478bd9Sstevel@tonic-gate 		cpp = cpp->cp_next;
1007*7c478bd9Sstevel@tonic-gate 	} while (cpp != cp_list_head);
1008*7c478bd9Sstevel@tonic-gate }
1009*7c478bd9Sstevel@tonic-gate 
1010*7c478bd9Sstevel@tonic-gate int
1011*7c478bd9Sstevel@tonic-gate cpupart_get_loadavg(psetid_t psid, int *buf, int nelem)
1012*7c478bd9Sstevel@tonic-gate {
1013*7c478bd9Sstevel@tonic-gate 	cpupart_t *cp;
1014*7c478bd9Sstevel@tonic-gate 	int i;
1015*7c478bd9Sstevel@tonic-gate 
1016*7c478bd9Sstevel@tonic-gate 	ASSERT(nelem >= 0);
1017*7c478bd9Sstevel@tonic-gate 	ASSERT(nelem <= LOADAVG_NSTATS);
1018*7c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&cpu_lock));
1019*7c478bd9Sstevel@tonic-gate 
1020*7c478bd9Sstevel@tonic-gate 	cp = cpupart_find(psid);
1021*7c478bd9Sstevel@tonic-gate 	if (cp == NULL)
1022*7c478bd9Sstevel@tonic-gate 		return (EINVAL);
1023*7c478bd9Sstevel@tonic-gate 	for (i = 0; i < nelem; i++)
1024*7c478bd9Sstevel@tonic-gate 		buf[i] = cp->cp_hp_avenrun[i] >> (16 - FSHIFT);
1025*7c478bd9Sstevel@tonic-gate 
1026*7c478bd9Sstevel@tonic-gate 	return (0);
1027*7c478bd9Sstevel@tonic-gate }
1028*7c478bd9Sstevel@tonic-gate 
1029*7c478bd9Sstevel@tonic-gate 
1030*7c478bd9Sstevel@tonic-gate uint_t
1031*7c478bd9Sstevel@tonic-gate cpupart_list(psetid_t *list, uint_t nelem, int flag)
1032*7c478bd9Sstevel@tonic-gate {
1033*7c478bd9Sstevel@tonic-gate 	uint_t numpart = 0;
1034*7c478bd9Sstevel@tonic-gate 	cpupart_t *cp;
1035*7c478bd9Sstevel@tonic-gate 
1036*7c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&cpu_lock));
1037*7c478bd9Sstevel@tonic-gate 	ASSERT(flag == CP_ALL || flag == CP_NONEMPTY);
1038*7c478bd9Sstevel@tonic-gate 
1039*7c478bd9Sstevel@tonic-gate 	if (list != NULL) {
1040*7c478bd9Sstevel@tonic-gate 		cp = cp_list_head;
1041*7c478bd9Sstevel@tonic-gate 		do {
1042*7c478bd9Sstevel@tonic-gate 			if (((flag == CP_ALL) && (cp != &cp_default)) ||
1043*7c478bd9Sstevel@tonic-gate 			    ((flag == CP_NONEMPTY) && (cp->cp_ncpus != 0))) {
1044*7c478bd9Sstevel@tonic-gate 				if (numpart == nelem)
1045*7c478bd9Sstevel@tonic-gate 					break;
1046*7c478bd9Sstevel@tonic-gate 				list[numpart++] = CPTOPS(cp->cp_id);
1047*7c478bd9Sstevel@tonic-gate 			}
1048*7c478bd9Sstevel@tonic-gate 			cp = cp->cp_next;
1049*7c478bd9Sstevel@tonic-gate 		} while (cp != cp_list_head);
1050*7c478bd9Sstevel@tonic-gate 	}
1051*7c478bd9Sstevel@tonic-gate 
1052*7c478bd9Sstevel@tonic-gate 	ASSERT(numpart < cp_numparts);
1053*7c478bd9Sstevel@tonic-gate 
1054*7c478bd9Sstevel@tonic-gate 	if (flag == CP_ALL)
1055*7c478bd9Sstevel@tonic-gate 		numpart = cp_numparts - 1; /* leave out default partition */
1056*7c478bd9Sstevel@tonic-gate 	else if (flag == CP_NONEMPTY)
1057*7c478bd9Sstevel@tonic-gate 		numpart = cp_numparts_nonempty;
1058*7c478bd9Sstevel@tonic-gate 
1059*7c478bd9Sstevel@tonic-gate 	return (numpart);
1060*7c478bd9Sstevel@tonic-gate }
1061*7c478bd9Sstevel@tonic-gate 
1062*7c478bd9Sstevel@tonic-gate int
1063*7c478bd9Sstevel@tonic-gate cpupart_setattr(psetid_t psid, uint_t attr)
1064*7c478bd9Sstevel@tonic-gate {
1065*7c478bd9Sstevel@tonic-gate 	cpupart_t *cp;
1066*7c478bd9Sstevel@tonic-gate 
1067*7c478bd9Sstevel@tonic-gate 	ASSERT(pool_lock_held());
1068*7c478bd9Sstevel@tonic-gate 
1069*7c478bd9Sstevel@tonic-gate 	mutex_enter(&cpu_lock);
1070*7c478bd9Sstevel@tonic-gate 	if ((cp = cpupart_find(psid)) == NULL) {
1071*7c478bd9Sstevel@tonic-gate 		mutex_exit(&cpu_lock);
1072*7c478bd9Sstevel@tonic-gate 		return (EINVAL);
1073*7c478bd9Sstevel@tonic-gate 	}
1074*7c478bd9Sstevel@tonic-gate 	/*
1075*7c478bd9Sstevel@tonic-gate 	 * PSET_NOESCAPE attribute for default cpu partition is always set
1076*7c478bd9Sstevel@tonic-gate 	 */
1077*7c478bd9Sstevel@tonic-gate 	if (cp == &cp_default && !(attr & PSET_NOESCAPE)) {
1078*7c478bd9Sstevel@tonic-gate 		mutex_exit(&cpu_lock);
1079*7c478bd9Sstevel@tonic-gate 		return (EINVAL);
1080*7c478bd9Sstevel@tonic-gate 	}
1081*7c478bd9Sstevel@tonic-gate 	cp->cp_attr = attr;
1082*7c478bd9Sstevel@tonic-gate 	mutex_exit(&cpu_lock);
1083*7c478bd9Sstevel@tonic-gate 	return (0);
1084*7c478bd9Sstevel@tonic-gate }
1085*7c478bd9Sstevel@tonic-gate 
1086*7c478bd9Sstevel@tonic-gate int
1087*7c478bd9Sstevel@tonic-gate cpupart_getattr(psetid_t psid, uint_t *attrp)
1088*7c478bd9Sstevel@tonic-gate {
1089*7c478bd9Sstevel@tonic-gate 	cpupart_t *cp;
1090*7c478bd9Sstevel@tonic-gate 
1091*7c478bd9Sstevel@tonic-gate 	mutex_enter(&cpu_lock);
1092*7c478bd9Sstevel@tonic-gate 	if ((cp = cpupart_find(psid)) == NULL) {
1093*7c478bd9Sstevel@tonic-gate 		mutex_exit(&cpu_lock);
1094*7c478bd9Sstevel@tonic-gate 		return (EINVAL);
1095*7c478bd9Sstevel@tonic-gate 	}
1096*7c478bd9Sstevel@tonic-gate 	*attrp = cp->cp_attr;
1097*7c478bd9Sstevel@tonic-gate 	mutex_exit(&cpu_lock);
1098*7c478bd9Sstevel@tonic-gate 	return (0);
1099*7c478bd9Sstevel@tonic-gate }
1100