/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #include #include #include #include #include #include #include #include #include #include #include #include /* * CMT dispatcher policies * * This file implements CMT dispatching policies using Processor Groups. * * The scheduler/dispatcher leverages knowledge of the performance * relevant CMT sharing relationships existing between CPUs to implement * load balancing, and coalescence thread placement policies. * * Load balancing policy seeks to improve performance by minimizing * contention over shared processor resources / facilities. Coalescence * policies improve resource utilization and ultimately power efficiency. * * On NUMA systems, the dispatcher will generally perform load balancing and * coalescence within (and not across) lgroups. This is because there isn't * much sense in trying to correct an imbalance by sending a thread outside * of its home, if it would attempt to return home a short while later. * The dispatcher will implement CMT policy across lgroups however, if * it can do so with a thread homed to the root lgroup, since root homed * threads have no lgroup affinity. */ /* * Return non-zero if, given the policy, we should migrate from running * somewhere "here" to somewhere "there". */ static int cmt_should_migrate(pg_cmt_t *here, pg_cmt_t *there, pg_cmt_policy_t policy, int self) { uint32_t here_util, there_util; here_util = here->cmt_utilization; there_util = there->cmt_utilization; /* * This assumes that curthread's utilization is "1" */ if (self && bitset_in_set(&here->cmt_cpus_actv_set, CPU->cpu_seqid)) here_util--; /* Ignore curthread's effect */ /* * Load balancing and coalescence are conflicting policies */ ASSERT((policy & (CMT_BALANCE|CMT_COALESCE)) != (CMT_BALANCE|CMT_COALESCE)); if (policy & CMT_BALANCE) { /* * Balance utilization * * If the target is comparatively underutilized * (either in an absolute sense, or scaled by capacity), * then choose to balance. */ if ((here_util > there_util) || (here_util == there_util && (CMT_CAPACITY(there) > CMT_CAPACITY(here)))) { return (1); } } else if (policy & CMT_COALESCE) { /* * Attempt to drive group utilization up to capacity */ if (there_util > here_util && there_util < CMT_CAPACITY(there)) return (1); } return (0); } /* * Perform multi-level CMT load balancing of running threads. * * tp is the thread being enqueued. * cp is a hint CPU, against which CMT load balancing will be performed. * * Returns cp, or a CPU better than cp with respect to balancing * running thread load. */ cpu_t * cmt_balance(kthread_t *tp, cpu_t *cp) { int hint, i, cpu, nsiblings; int self = 0; group_t *cmt_pgs, *siblings; pg_cmt_t *pg, *pg_tmp, *tpg = NULL; int level = 0; cpu_t *newcp; extern cmt_lgrp_t *cmt_root; ASSERT(THREAD_LOCK_HELD(tp)); cmt_pgs = &cp->cpu_pg->cmt_pgs; if (GROUP_SIZE(cmt_pgs) == 0) return (cp); /* nothing to do */ if (tp == curthread) self = 1; /* * Balance across siblings in the CPUs CMT lineage * If the thread is homed to the root lgroup, perform * top level balancing against other top level PGs * in the system. Otherwise, start with the default * top level siblings group, which is within the leaf lgroup */ pg = GROUP_ACCESS(cmt_pgs, level); if (tp->t_lpl->lpl_lgrpid == LGRP_ROOTID) siblings = &cmt_root->cl_pgs; else siblings = pg->cmt_siblings; /* * Traverse down the lineage until we find a level that needs * balancing, or we get to the end. */ for (;;) { nsiblings = GROUP_SIZE(siblings); /* self inclusive */ if (nsiblings == 1) goto next_level; hint = CPU_PSEUDO_RANDOM() % nsiblings; /* * Find a balancing candidate from among our siblings * "hint" is a hint for where to start looking */ i = hint; do { ASSERT(i < nsiblings); pg_tmp = GROUP_ACCESS(siblings, i); /* * The candidate must not be us, and must * have some CPU resources in the thread's * partition */ if (pg_tmp != pg && bitset_in_set(&tp->t_cpupart->cp_cmt_pgs, ((pg_t *)pg_tmp)->pg_id)) { tpg = pg_tmp; break; } if (++i >= nsiblings) i = 0; } while (i != hint); if (!tpg) goto next_level; /* no candidates at this level */ /* * Decide if we should migrate from the current PG to a * target PG given a policy */ if (cmt_should_migrate(pg, tpg, pg->cmt_policy, self)) break; tpg = NULL; next_level: if (++level == GROUP_SIZE(cmt_pgs)) break; pg = GROUP_ACCESS(cmt_pgs, level); siblings = pg->cmt_siblings; } if (tpg) { uint_t tgt_size = GROUP_SIZE(&tpg->cmt_cpus_actv); /* * Select an idle CPU from the target */ hint = CPU_PSEUDO_RANDOM() % tgt_size; cpu = hint; do { newcp = GROUP_ACCESS(&tpg->cmt_cpus_actv, cpu); if (newcp->cpu_part == tp->t_cpupart && newcp->cpu_dispatch_pri == -1) { cp = newcp; break; } if (++cpu == tgt_size) cpu = 0; } while (cpu != hint); } return (cp); }