1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <sys/systm.h>
27 #include <sys/types.h>
28 #include <sys/param.h>
29 #include <sys/thread.h>
30 #include <sys/cpuvar.h>
31 #include <sys/cpupart.h>
32 #include <sys/cmn_err.h>
33 #include <sys/disp.h>
34 #include <sys/group.h>
35 #include <sys/bitset.h>
36 #include <sys/lgrp.h>
37 #include <sys/cmt.h>
38 
39 /*
40  * CMT dispatcher policies
41  *
42  * This file implements CMT dispatching policies using Processor Groups.
43  *
44  * The scheduler/dispatcher leverages knowledge of the performance
45  * relevant CMT sharing relationships existing between CPUs to implement
46  * load balancing, and coalescence thread placement policies.
47  *
48  * Load balancing policy seeks to improve performance by minimizing
49  * contention over shared processor resources / facilities. Coalescence
50  * policies improve resource utilization and ultimately power efficiency.
51  *
52  * On NUMA systems, the dispatcher will generally perform load balancing and
53  * coalescence within (and not across) lgroups. This is because there isn't
54  * much sense in trying to correct an imbalance by sending a thread outside
55  * of its home, if it would attempt to return home a short while later.
56  * The dispatcher will implement CMT policy across lgroups however, if
57  * it can do so with a thread homed to the root lgroup, since root homed
58  * threads have no lgroup affinity.
59  */
60 
61 /*
62  * Return non-zero if, given the policy, we should migrate from running
63  * somewhere "here" to somewhere "there".
64  */
65 static int
cmt_should_migrate(pg_cmt_t * here,pg_cmt_t * there,pg_cmt_policy_t policy,int self)66 cmt_should_migrate(pg_cmt_t *here, pg_cmt_t *there, pg_cmt_policy_t policy,
67     int self)
68 {
69 	uint32_t here_util, there_util;
70 
71 	here_util = here->cmt_utilization;
72 	there_util = there->cmt_utilization;
73 
74 	/*
75 	 * This assumes that curthread's utilization is "1"
76 	 */
77 	if (self && bitset_in_set(&here->cmt_cpus_actv_set, CPU->cpu_seqid))
78 		here_util--;	/* Ignore curthread's effect */
79 
80 	/*
81 	 * Load balancing and coalescence are conflicting policies
82 	 */
83 	ASSERT((policy & (CMT_BALANCE|CMT_COALESCE)) !=
84 	    (CMT_BALANCE|CMT_COALESCE));
85 
86 	if (policy & CMT_BALANCE) {
87 		/*
88 		 * Balance utilization
89 		 *
90 		 * If the target is comparatively underutilized
91 		 * (either in an absolute sense, or scaled by capacity),
92 		 * then choose to balance.
93 		 */
94 		if ((here_util > there_util) ||
95 		    (here_util == there_util &&
96 		    (CMT_CAPACITY(there) > CMT_CAPACITY(here)))) {
97 			return (1);
98 		}
99 	} else if (policy & CMT_COALESCE) {
100 		/*
101 		 * Attempt to drive group utilization up to capacity
102 		 */
103 		if (there_util > here_util &&
104 		    there_util < CMT_CAPACITY(there))
105 			return (1);
106 	}
107 	return (0);
108 }
109 
110 /*
111  * Perform multi-level CMT load balancing of running threads.
112  *
113  * tp is the thread being enqueued.
114  * cp is a hint CPU, against which CMT load balancing will be performed.
115  *
116  * Returns cp, or a CPU better than cp with respect to balancing
117  * running thread load.
118  */
119 cpu_t *
cmt_balance(kthread_t * tp,cpu_t * cp)120 cmt_balance(kthread_t *tp, cpu_t *cp)
121 {
122 	int		hint, i, cpu, nsiblings;
123 	int		self = 0;
124 	group_t		*cmt_pgs, *siblings;
125 	pg_cmt_t	*pg, *pg_tmp, *tpg = NULL;
126 	int		level = 0;
127 	cpu_t		*newcp;
128 	extern cmt_lgrp_t *cmt_root;
129 
130 	ASSERT(THREAD_LOCK_HELD(tp));
131 
132 	cmt_pgs = &cp->cpu_pg->cmt_pgs;
133 
134 	if (GROUP_SIZE(cmt_pgs) == 0)
135 		return (cp);	/* nothing to do */
136 
137 	if (tp == curthread)
138 		self = 1;
139 
140 	/*
141 	 * Balance across siblings in the CPUs CMT lineage
142 	 * If the thread is homed to the root lgroup, perform
143 	 * top level balancing against other top level PGs
144 	 * in the system. Otherwise, start with the default
145 	 * top level siblings group, which is within the leaf lgroup
146 	 */
147 	pg = GROUP_ACCESS(cmt_pgs, level);
148 	if (tp->t_lpl->lpl_lgrpid == LGRP_ROOTID)
149 		siblings = &cmt_root->cl_pgs;
150 	else
151 		siblings = pg->cmt_siblings;
152 
153 	/*
154 	 * Traverse down the lineage until we find a level that needs
155 	 * balancing, or we get to the end.
156 	 */
157 	for (;;) {
158 		nsiblings = GROUP_SIZE(siblings);	/* self inclusive */
159 		if (nsiblings == 1)
160 			goto next_level;
161 
162 		hint = CPU_PSEUDO_RANDOM() % nsiblings;
163 
164 		/*
165 		 * Find a balancing candidate from among our siblings
166 		 * "hint" is a hint for where to start looking
167 		 */
168 		i = hint;
169 		do {
170 			ASSERT(i < nsiblings);
171 			pg_tmp = GROUP_ACCESS(siblings, i);
172 
173 			/*
174 			 * The candidate must not be us, and must
175 			 * have some CPU resources in the thread's
176 			 * partition
177 			 */
178 			if (pg_tmp != pg &&
179 			    bitset_in_set(&tp->t_cpupart->cp_cmt_pgs,
180 			    ((pg_t *)pg_tmp)->pg_id)) {
181 				tpg = pg_tmp;
182 				break;
183 			}
184 
185 			if (++i >= nsiblings)
186 				i = 0;
187 		} while (i != hint);
188 
189 		if (!tpg)
190 			goto next_level; /* no candidates at this level */
191 
192 		/*
193 		 * Decide if we should migrate from the current PG to a
194 		 * target PG given a policy
195 		 */
196 		if (cmt_should_migrate(pg, tpg, pg->cmt_policy, self))
197 			break;
198 		tpg = NULL;
199 
200 next_level:
201 		if (++level == GROUP_SIZE(cmt_pgs))
202 			break;
203 
204 		pg = GROUP_ACCESS(cmt_pgs, level);
205 		siblings = pg->cmt_siblings;
206 	}
207 
208 	if (tpg) {
209 		uint_t	tgt_size = GROUP_SIZE(&tpg->cmt_cpus_actv);
210 
211 		/*
212 		 * Select an idle CPU from the target
213 		 */
214 		hint = CPU_PSEUDO_RANDOM() % tgt_size;
215 		cpu = hint;
216 		do {
217 			newcp = GROUP_ACCESS(&tpg->cmt_cpus_actv, cpu);
218 			if (newcp->cpu_part == tp->t_cpupart &&
219 			    newcp->cpu_dispatch_pri == -1) {
220 				cp = newcp;
221 				break;
222 			}
223 			if (++cpu == tgt_size)
224 				cpu = 0;
225 		} while (cpu != hint);
226 	}
227 
228 	return (cp);
229 }
230