17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate * CDDL HEADER START
37c478bd9Sstevel@tonic-gate *
47c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the
5ab761399Sesaxe * Common Development and Distribution License (the "License").
6ab761399Sesaxe * You may not use this file except in compliance with the License.
77c478bd9Sstevel@tonic-gate *
87c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
97c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing.
107c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions
117c478bd9Sstevel@tonic-gate * and limitations under the License.
127c478bd9Sstevel@tonic-gate *
137c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each
147c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
157c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the
167c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying
177c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner]
187c478bd9Sstevel@tonic-gate *
197c478bd9Sstevel@tonic-gate * CDDL HEADER END
207c478bd9Sstevel@tonic-gate */
217c478bd9Sstevel@tonic-gate /*
22d5d7cf4eSJonathan Chew * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
237c478bd9Sstevel@tonic-gate * Use is subject to license terms.
24c3377ee9SJohn Levon * Copyright 2019 Joyent, Inc.
257c478bd9Sstevel@tonic-gate */
267c478bd9Sstevel@tonic-gate
277c478bd9Sstevel@tonic-gate /*
287c478bd9Sstevel@tonic-gate * Basic NUMA support in terms of locality groups
297c478bd9Sstevel@tonic-gate *
307c478bd9Sstevel@tonic-gate * Solaris needs to know which CPUs, memory, etc. are near each other to
317c478bd9Sstevel@tonic-gate * provide good performance on NUMA machines by optimizing for locality.
327c478bd9Sstevel@tonic-gate * In order to do this, a new abstraction called a "locality group (lgroup)"
337c478bd9Sstevel@tonic-gate * has been introduced to keep track of which CPU-like and memory-like hardware
347c478bd9Sstevel@tonic-gate * resources are close to each other. Currently, latency is the only measure
357c478bd9Sstevel@tonic-gate * used to determine how to group hardware resources into lgroups, but this
367c478bd9Sstevel@tonic-gate * does not limit the groupings to be based solely on latency. Other factors
377c478bd9Sstevel@tonic-gate * may be used to determine the groupings in the future.
387c478bd9Sstevel@tonic-gate *
397c478bd9Sstevel@tonic-gate * Lgroups are organized into a hieararchy or topology that represents the
407c478bd9Sstevel@tonic-gate * latency topology of the machine. There is always at least a root lgroup in
417c478bd9Sstevel@tonic-gate * the system. It represents all the hardware resources in the machine at a
427c478bd9Sstevel@tonic-gate * latency big enough that any hardware resource can at least access any other
437c478bd9Sstevel@tonic-gate * hardware resource within that latency. A Uniform Memory Access (UMA)
447c478bd9Sstevel@tonic-gate * machine is represented with one lgroup (the root). In contrast, a NUMA
457c478bd9Sstevel@tonic-gate * machine is represented at least by the root lgroup and some number of leaf
467c478bd9Sstevel@tonic-gate * lgroups where the leaf lgroups contain the hardware resources within the
477c478bd9Sstevel@tonic-gate * least latency of each other and the root lgroup still contains all the
487c478bd9Sstevel@tonic-gate * resources in the machine. Some number of intermediate lgroups may exist
497c478bd9Sstevel@tonic-gate * which represent more levels of locality than just the local latency of the
507c478bd9Sstevel@tonic-gate * leaf lgroups and the system latency of the root lgroup. Non-leaf lgroups
517c478bd9Sstevel@tonic-gate * (eg. root and intermediate lgroups) contain the next nearest resources to
527c478bd9Sstevel@tonic-gate * its children lgroups. Thus, the lgroup hierarchy from a given leaf lgroup
537c478bd9Sstevel@tonic-gate * to the root lgroup shows the hardware resources from closest to farthest
547c478bd9Sstevel@tonic-gate * from the leaf lgroup such that each successive ancestor lgroup contains
557c478bd9Sstevel@tonic-gate * the next nearest resources at the next level of locality from the previous.
567c478bd9Sstevel@tonic-gate *
577c478bd9Sstevel@tonic-gate * The kernel uses the lgroup abstraction to know how to allocate resources
587c478bd9Sstevel@tonic-gate * near a given process/thread. At fork() and lwp/thread_create() time, a
597c478bd9Sstevel@tonic-gate * "home" lgroup is chosen for a thread. This is done by picking the lgroup
607c478bd9Sstevel@tonic-gate * with the lowest load average. Binding to a processor or processor set will
617c478bd9Sstevel@tonic-gate * change the home lgroup for a thread. The scheduler has been modified to try
627c478bd9Sstevel@tonic-gate * to dispatch a thread on a CPU in its home lgroup. Physical memory
637c478bd9Sstevel@tonic-gate * allocation is lgroup aware too, so memory will be allocated from the current
647c478bd9Sstevel@tonic-gate * thread's home lgroup if possible. If the desired resources are not
657c478bd9Sstevel@tonic-gate * available, the kernel traverses the lgroup hierarchy going to the parent
667c478bd9Sstevel@tonic-gate * lgroup to find resources at the next level of locality until it reaches the
677c478bd9Sstevel@tonic-gate * root lgroup.
687c478bd9Sstevel@tonic-gate */
697c478bd9Sstevel@tonic-gate
707c478bd9Sstevel@tonic-gate #include <sys/lgrp.h>
717c478bd9Sstevel@tonic-gate #include <sys/lgrp_user.h>
727c478bd9Sstevel@tonic-gate #include <sys/types.h>
737c478bd9Sstevel@tonic-gate #include <sys/mman.h>
747c478bd9Sstevel@tonic-gate #include <sys/param.h>
757c478bd9Sstevel@tonic-gate #include <sys/var.h>
767c478bd9Sstevel@tonic-gate #include <sys/thread.h>
777c478bd9Sstevel@tonic-gate #include <sys/cpuvar.h>
787c478bd9Sstevel@tonic-gate #include <sys/cpupart.h>
797c478bd9Sstevel@tonic-gate #include <sys/kmem.h>
807c478bd9Sstevel@tonic-gate #include <vm/seg.h>
817c478bd9Sstevel@tonic-gate #include <vm/seg_kmem.h>
827c478bd9Sstevel@tonic-gate #include <vm/seg_spt.h>
837c478bd9Sstevel@tonic-gate #include <vm/seg_vn.h>
847c478bd9Sstevel@tonic-gate #include <vm/as.h>
857c478bd9Sstevel@tonic-gate #include <sys/atomic.h>
867c478bd9Sstevel@tonic-gate #include <sys/systm.h>
877c478bd9Sstevel@tonic-gate #include <sys/errno.h>
887c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h>
897c478bd9Sstevel@tonic-gate #include <sys/kstat.h>
907c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h>
91fb2f18f8Sesaxe #include <sys/pg.h>
927c478bd9Sstevel@tonic-gate #include <sys/promif.h>
937c478bd9Sstevel@tonic-gate #include <sys/sdt.h>
94c3377ee9SJohn Levon #include <sys/smt.h>
957c478bd9Sstevel@tonic-gate
967c478bd9Sstevel@tonic-gate lgrp_gen_t lgrp_gen = 0; /* generation of lgroup hierarchy */
977c478bd9Sstevel@tonic-gate lgrp_t *lgrp_table[NLGRPS_MAX]; /* table of all initialized lgrp_t structs */
987c478bd9Sstevel@tonic-gate /* indexed by lgrp_id */
997c478bd9Sstevel@tonic-gate int nlgrps; /* number of lgroups in machine */
1007c478bd9Sstevel@tonic-gate int lgrp_alloc_hint = -1; /* hint for where to try to allocate next */
1017c478bd9Sstevel@tonic-gate int lgrp_alloc_max = 0; /* max lgroup ID allocated so far */
1027c478bd9Sstevel@tonic-gate
1037c478bd9Sstevel@tonic-gate /*
1047c478bd9Sstevel@tonic-gate * Kstat data for lgroups.
1057c478bd9Sstevel@tonic-gate *
1067c478bd9Sstevel@tonic-gate * Actual kstat data is collected in lgrp_stats array.
1077c478bd9Sstevel@tonic-gate * The lgrp_kstat_data array of named kstats is used to extract data from
1087c478bd9Sstevel@tonic-gate * lgrp_stats and present it to kstat framework. It is protected from partallel
1097c478bd9Sstevel@tonic-gate * modifications by lgrp_kstat_mutex. This may cause some contention when
1107c478bd9Sstevel@tonic-gate * several kstat commands run in parallel but this is not the
1117c478bd9Sstevel@tonic-gate * performance-critical path.
1127c478bd9Sstevel@tonic-gate */
1137c478bd9Sstevel@tonic-gate extern struct lgrp_stats lgrp_stats[]; /* table of per-lgrp stats */
1147c478bd9Sstevel@tonic-gate
1157c478bd9Sstevel@tonic-gate /*
1167c478bd9Sstevel@tonic-gate * Declare kstat names statically for enums as defined in the header file.
1177c478bd9Sstevel@tonic-gate */
1187c478bd9Sstevel@tonic-gate LGRP_KSTAT_NAMES;
1197c478bd9Sstevel@tonic-gate
1207c478bd9Sstevel@tonic-gate static void lgrp_kstat_init(void);
1217c478bd9Sstevel@tonic-gate static int lgrp_kstat_extract(kstat_t *, int);
1227c478bd9Sstevel@tonic-gate static void lgrp_kstat_reset(lgrp_id_t);
1237c478bd9Sstevel@tonic-gate
1247c478bd9Sstevel@tonic-gate static struct kstat_named lgrp_kstat_data[LGRP_NUM_STATS];
1257c478bd9Sstevel@tonic-gate static kmutex_t lgrp_kstat_mutex;
1267c478bd9Sstevel@tonic-gate
1277c478bd9Sstevel@tonic-gate
1287c478bd9Sstevel@tonic-gate /*
1297c478bd9Sstevel@tonic-gate * max number of lgroups supported by the platform
1307c478bd9Sstevel@tonic-gate */
1317c478bd9Sstevel@tonic-gate int nlgrpsmax = 0;
1327c478bd9Sstevel@tonic-gate
1337c478bd9Sstevel@tonic-gate /*
1347c478bd9Sstevel@tonic-gate * The root lgroup. Represents the set of resources at the system wide
1357c478bd9Sstevel@tonic-gate * level of locality.
1367c478bd9Sstevel@tonic-gate */
1377c478bd9Sstevel@tonic-gate lgrp_t *lgrp_root = NULL;
1387c478bd9Sstevel@tonic-gate
1397c478bd9Sstevel@tonic-gate /*
1407c478bd9Sstevel@tonic-gate * During system bootstrap cp_default does not contain the list of lgrp load
1417c478bd9Sstevel@tonic-gate * averages (cp_lgrploads). The list is allocated after the first CPU is brought
1427c478bd9Sstevel@tonic-gate * on-line when cp_default is initialized by cpupart_initialize_default().
1437c478bd9Sstevel@tonic-gate * Configuring CPU0 may create a two-level topology with root and one leaf node
1447c478bd9Sstevel@tonic-gate * containing CPU0. This topology is initially constructed in a special
1457c478bd9Sstevel@tonic-gate * statically allocated 2-element lpl list lpl_bootstrap_list and later cloned
1467c478bd9Sstevel@tonic-gate * to cp_default when cp_default is initialized. The lpl_bootstrap_list is used
1477c478bd9Sstevel@tonic-gate * for all lpl operations until cp_default is fully constructed.
1487c478bd9Sstevel@tonic-gate *
1497c478bd9Sstevel@tonic-gate * The lpl_bootstrap_list is maintained by the code in lgrp.c. Every other
1507c478bd9Sstevel@tonic-gate * consumer who needs default lpl should use lpl_bootstrap which is a pointer to
1517c478bd9Sstevel@tonic-gate * the first element of lpl_bootstrap_list.
152394b433dSesaxe *
153394b433dSesaxe * CPUs that are added to the system, but have not yet been assigned to an
154394b433dSesaxe * lgrp will use lpl_bootstrap as a default lpl. This is necessary because
155394b433dSesaxe * on some architectures (x86) it's possible for the slave CPU startup thread
156394b433dSesaxe * to enter the dispatcher or allocate memory before calling lgrp_cpu_init().
1577c478bd9Sstevel@tonic-gate */
1587c478bd9Sstevel@tonic-gate #define LPL_BOOTSTRAP_SIZE 2
1597c478bd9Sstevel@tonic-gate static lpl_t lpl_bootstrap_list[LPL_BOOTSTRAP_SIZE];
1607c478bd9Sstevel@tonic-gate lpl_t *lpl_bootstrap;
1616890d023SEric Saxe static lpl_t *lpl_bootstrap_rset[LPL_BOOTSTRAP_SIZE];
1626890d023SEric Saxe static int lpl_bootstrap_id2rset[LPL_BOOTSTRAP_SIZE];
1637c478bd9Sstevel@tonic-gate
164394b433dSesaxe /*
165394b433dSesaxe * If cp still references the bootstrap lpl, it has not yet been added to
166394b433dSesaxe * an lgrp. lgrp_mem_choose() uses this macro to detect the case where
167394b433dSesaxe * a thread is trying to allocate memory close to a CPU that has no lgrp.
168394b433dSesaxe */
169394b433dSesaxe #define LGRP_CPU_HAS_NO_LGRP(cp) ((cp)->cpu_lpl == lpl_bootstrap)
170394b433dSesaxe
1717c478bd9Sstevel@tonic-gate static lgrp_t lroot;
1727c478bd9Sstevel@tonic-gate
1737c478bd9Sstevel@tonic-gate /*
1747c478bd9Sstevel@tonic-gate * Size, in bytes, beyond which random memory allocation policy is applied
1757c478bd9Sstevel@tonic-gate * to non-shared memory. Default is the maximum size, so random memory
1767c478bd9Sstevel@tonic-gate * allocation won't be used for non-shared memory by default.
1777c478bd9Sstevel@tonic-gate */
1787c478bd9Sstevel@tonic-gate size_t lgrp_privm_random_thresh = (size_t)(-1);
1797c478bd9Sstevel@tonic-gate
180c6402783Sakolb /* the maximum effect that a single thread can have on it's lgroup's load */
181c6402783Sakolb #define LGRP_LOADAVG_MAX_EFFECT(ncpu) \
182c6402783Sakolb ((lgrp_loadavg_max_effect) / (ncpu))
183c6402783Sakolb uint32_t lgrp_loadavg_max_effect = LGRP_LOADAVG_THREAD_MAX;
184c6402783Sakolb
185c6402783Sakolb
1867c478bd9Sstevel@tonic-gate /*
1877c478bd9Sstevel@tonic-gate * Size, in bytes, beyond which random memory allocation policy is applied to
1887c478bd9Sstevel@tonic-gate * shared memory. Default is 8MB (2 ISM pages).
1897c478bd9Sstevel@tonic-gate */
1907c478bd9Sstevel@tonic-gate size_t lgrp_shm_random_thresh = 8*1024*1024;
1917c478bd9Sstevel@tonic-gate
1927c478bd9Sstevel@tonic-gate /*
1937c478bd9Sstevel@tonic-gate * Whether to do processor set aware memory allocation by default
1947c478bd9Sstevel@tonic-gate */
1957c478bd9Sstevel@tonic-gate int lgrp_mem_pset_aware = 0;
1967c478bd9Sstevel@tonic-gate
1977c478bd9Sstevel@tonic-gate /*
1987c478bd9Sstevel@tonic-gate * Set the default memory allocation policy for root lgroup
1997c478bd9Sstevel@tonic-gate */
2007c478bd9Sstevel@tonic-gate lgrp_mem_policy_t lgrp_mem_policy_root = LGRP_MEM_POLICY_RANDOM;
2017c478bd9Sstevel@tonic-gate
2027c478bd9Sstevel@tonic-gate /*
2037c478bd9Sstevel@tonic-gate * Set the default memory allocation policy. For most platforms,
2047c478bd9Sstevel@tonic-gate * next touch is sufficient, but some platforms may wish to override
2057c478bd9Sstevel@tonic-gate * this.
2067c478bd9Sstevel@tonic-gate */
2077c478bd9Sstevel@tonic-gate lgrp_mem_policy_t lgrp_mem_default_policy = LGRP_MEM_POLICY_NEXT;
2087c478bd9Sstevel@tonic-gate
2097c478bd9Sstevel@tonic-gate
2107c478bd9Sstevel@tonic-gate /*
2117c478bd9Sstevel@tonic-gate * lgroup CPU event handlers
2127c478bd9Sstevel@tonic-gate */
2137c478bd9Sstevel@tonic-gate static void lgrp_cpu_init(struct cpu *);
2147c478bd9Sstevel@tonic-gate static void lgrp_cpu_fini(struct cpu *, lgrp_id_t);
2157c478bd9Sstevel@tonic-gate static lgrp_t *lgrp_cpu_to_lgrp(struct cpu *);
2167c478bd9Sstevel@tonic-gate
2177c478bd9Sstevel@tonic-gate /*
2187c478bd9Sstevel@tonic-gate * lgroup memory event handlers
2197c478bd9Sstevel@tonic-gate */
2207c478bd9Sstevel@tonic-gate static void lgrp_mem_init(int, lgrp_handle_t, boolean_t);
2217c478bd9Sstevel@tonic-gate static void lgrp_mem_fini(int, lgrp_handle_t, boolean_t);
2227c478bd9Sstevel@tonic-gate static void lgrp_mem_rename(int, lgrp_handle_t, lgrp_handle_t);
2237c478bd9Sstevel@tonic-gate
2247c478bd9Sstevel@tonic-gate /*
2257c478bd9Sstevel@tonic-gate * lgroup CPU partition event handlers
2267c478bd9Sstevel@tonic-gate */
2277c478bd9Sstevel@tonic-gate static void lgrp_part_add_cpu(struct cpu *, lgrp_id_t);
2287c478bd9Sstevel@tonic-gate static void lgrp_part_del_cpu(struct cpu *);
2297c478bd9Sstevel@tonic-gate
230d5d7cf4eSJonathan Chew /*
231d5d7cf4eSJonathan Chew * lgroup framework initialization
232d5d7cf4eSJonathan Chew */
233d5d7cf4eSJonathan Chew static void lgrp_main_init(void);
234d5d7cf4eSJonathan Chew static void lgrp_main_mp_init(void);
2357c478bd9Sstevel@tonic-gate static void lgrp_root_init(void);
236d5d7cf4eSJonathan Chew static void lgrp_setup(void);
2377c478bd9Sstevel@tonic-gate
2387c478bd9Sstevel@tonic-gate /*
2397c478bd9Sstevel@tonic-gate * lpl topology
2407c478bd9Sstevel@tonic-gate */
2417c478bd9Sstevel@tonic-gate static void lpl_init(lpl_t *, lpl_t *, lgrp_t *);
2427c478bd9Sstevel@tonic-gate static void lpl_clear(lpl_t *);
2437c478bd9Sstevel@tonic-gate static void lpl_leaf_insert(lpl_t *, struct cpupart *);
2447c478bd9Sstevel@tonic-gate static void lpl_leaf_remove(lpl_t *, struct cpupart *);
2457c478bd9Sstevel@tonic-gate static void lpl_rset_add(lpl_t *, lpl_t *);
2467c478bd9Sstevel@tonic-gate static void lpl_rset_del(lpl_t *, lpl_t *);
2477c478bd9Sstevel@tonic-gate static int lpl_rset_contains(lpl_t *, lpl_t *);
2487c478bd9Sstevel@tonic-gate static void lpl_cpu_adjcnt(lpl_act_t, struct cpu *);
2497c478bd9Sstevel@tonic-gate static void lpl_child_update(lpl_t *, struct cpupart *);
2507c478bd9Sstevel@tonic-gate static int lpl_pick(lpl_t *, lpl_t *);
2517c478bd9Sstevel@tonic-gate static void lpl_verify_wrapper(struct cpupart *);
2527c478bd9Sstevel@tonic-gate
2537c478bd9Sstevel@tonic-gate /*
2547c478bd9Sstevel@tonic-gate * defines for lpl topology verifier return codes
2557c478bd9Sstevel@tonic-gate */
2567c478bd9Sstevel@tonic-gate
2577c478bd9Sstevel@tonic-gate #define LPL_TOPO_CORRECT 0
2587c478bd9Sstevel@tonic-gate #define LPL_TOPO_PART_HAS_NO_LPL -1
2597c478bd9Sstevel@tonic-gate #define LPL_TOPO_CPUS_NOT_EMPTY -2
2607c478bd9Sstevel@tonic-gate #define LPL_TOPO_LGRP_MISMATCH -3
2617c478bd9Sstevel@tonic-gate #define LPL_TOPO_MISSING_PARENT -4
2627c478bd9Sstevel@tonic-gate #define LPL_TOPO_PARENT_MISMATCH -5
2637c478bd9Sstevel@tonic-gate #define LPL_TOPO_BAD_CPUCNT -6
2647c478bd9Sstevel@tonic-gate #define LPL_TOPO_RSET_MISMATCH -7
2657c478bd9Sstevel@tonic-gate #define LPL_TOPO_LPL_ORPHANED -8
2667c478bd9Sstevel@tonic-gate #define LPL_TOPO_LPL_BAD_NCPU -9
2677c478bd9Sstevel@tonic-gate #define LPL_TOPO_RSET_MSSNG_LF -10
2687c478bd9Sstevel@tonic-gate #define LPL_TOPO_CPU_HAS_BAD_LPL -11
2696890d023SEric Saxe #define LPL_TOPO_NONLEAF_HAS_CPUS -12
2706890d023SEric Saxe #define LPL_TOPO_LGRP_NOT_LEAF -13
2716890d023SEric Saxe #define LPL_TOPO_BAD_RSETCNT -14
2727c478bd9Sstevel@tonic-gate
2737c478bd9Sstevel@tonic-gate /*
2747c478bd9Sstevel@tonic-gate * Return whether lgroup optimizations should be enabled on this system
2757c478bd9Sstevel@tonic-gate */
2767c478bd9Sstevel@tonic-gate int
lgrp_optimizations(void)2777c478bd9Sstevel@tonic-gate lgrp_optimizations(void)
2787c478bd9Sstevel@tonic-gate {
2797c478bd9Sstevel@tonic-gate /*
2807c478bd9Sstevel@tonic-gate * System must have more than 2 lgroups to enable lgroup optimizations
2817c478bd9Sstevel@tonic-gate *
2827c478bd9Sstevel@tonic-gate * XXX This assumes that a 2 lgroup system has an empty root lgroup
2837c478bd9Sstevel@tonic-gate * with one child lgroup containing all the resources. A 2 lgroup
2847c478bd9Sstevel@tonic-gate * system with a root lgroup directly containing CPUs or memory might
2857c478bd9Sstevel@tonic-gate * need lgroup optimizations with its child lgroup, but there
2867c478bd9Sstevel@tonic-gate * isn't such a machine for now....
2877c478bd9Sstevel@tonic-gate */
2887c478bd9Sstevel@tonic-gate if (nlgrps > 2)
2897c478bd9Sstevel@tonic-gate return (1);
2907c478bd9Sstevel@tonic-gate
2917c478bd9Sstevel@tonic-gate return (0);
2927c478bd9Sstevel@tonic-gate }
2937c478bd9Sstevel@tonic-gate
2947c478bd9Sstevel@tonic-gate /*
295d5d7cf4eSJonathan Chew * Setup root lgroup
2967c478bd9Sstevel@tonic-gate */
2977c478bd9Sstevel@tonic-gate static void
lgrp_root_init(void)2987c478bd9Sstevel@tonic-gate lgrp_root_init(void)
2997c478bd9Sstevel@tonic-gate {
3007c478bd9Sstevel@tonic-gate lgrp_handle_t hand;
3017c478bd9Sstevel@tonic-gate int i;
3027c478bd9Sstevel@tonic-gate lgrp_id_t id;
3037c478bd9Sstevel@tonic-gate
3047c478bd9Sstevel@tonic-gate /*
3057c478bd9Sstevel@tonic-gate * Create the "root" lgroup
3067c478bd9Sstevel@tonic-gate */
3077c478bd9Sstevel@tonic-gate ASSERT(nlgrps == 0);
3087c478bd9Sstevel@tonic-gate id = nlgrps++;
3097c478bd9Sstevel@tonic-gate
3107c478bd9Sstevel@tonic-gate lgrp_root = &lroot;
3117c478bd9Sstevel@tonic-gate
3127c478bd9Sstevel@tonic-gate lgrp_root->lgrp_cpu = NULL;
3137c478bd9Sstevel@tonic-gate lgrp_root->lgrp_mnodes = 0;
3147c478bd9Sstevel@tonic-gate lgrp_root->lgrp_nmnodes = 0;
3157c478bd9Sstevel@tonic-gate hand = lgrp_plat_root_hand();
3167c478bd9Sstevel@tonic-gate lgrp_root->lgrp_plathand = hand;
3177c478bd9Sstevel@tonic-gate
3187c478bd9Sstevel@tonic-gate lgrp_root->lgrp_id = id;
3197c478bd9Sstevel@tonic-gate lgrp_root->lgrp_cpucnt = 0;
3207c478bd9Sstevel@tonic-gate lgrp_root->lgrp_childcnt = 0;
3217c478bd9Sstevel@tonic-gate klgrpset_clear(lgrp_root->lgrp_children);
3227c478bd9Sstevel@tonic-gate klgrpset_clear(lgrp_root->lgrp_leaves);
3237c478bd9Sstevel@tonic-gate lgrp_root->lgrp_parent = NULL;
3247c478bd9Sstevel@tonic-gate lgrp_root->lgrp_latency = lgrp_plat_latency(hand, hand);
3257c478bd9Sstevel@tonic-gate
3267c478bd9Sstevel@tonic-gate for (i = 0; i < LGRP_RSRC_COUNT; i++)
3277c478bd9Sstevel@tonic-gate klgrpset_clear(lgrp_root->lgrp_set[i]);
3287c478bd9Sstevel@tonic-gate
3297c478bd9Sstevel@tonic-gate lgrp_root->lgrp_kstat = NULL;
3307c478bd9Sstevel@tonic-gate
3317c478bd9Sstevel@tonic-gate lgrp_table[id] = lgrp_root;
3327c478bd9Sstevel@tonic-gate
3337c478bd9Sstevel@tonic-gate /*
3347c478bd9Sstevel@tonic-gate * Setup initial lpl list for CPU0 and initial t0 home.
3357c478bd9Sstevel@tonic-gate * The only lpl space we have so far is lpl_bootstrap. It is used for
336394b433dSesaxe * all topology operations until cp_default is initialized at which
337394b433dSesaxe * point t0.t_lpl will be updated.
3387c478bd9Sstevel@tonic-gate */
3397c478bd9Sstevel@tonic-gate lpl_bootstrap = lpl_bootstrap_list;
3407c478bd9Sstevel@tonic-gate t0.t_lpl = lpl_bootstrap;
3417c478bd9Sstevel@tonic-gate cp_default.cp_nlgrploads = LPL_BOOTSTRAP_SIZE;
3427c478bd9Sstevel@tonic-gate lpl_bootstrap_list[1].lpl_lgrpid = 1;
3436890d023SEric Saxe
3446890d023SEric Saxe /*
3456890d023SEric Saxe * Set up the bootstrap rset
3466890d023SEric Saxe * Since the bootstrap toplogy has just the root, and a leaf,
3476890d023SEric Saxe * the rset contains just the leaf, and both lpls can use the same rset
3486890d023SEric Saxe */
3496890d023SEric Saxe lpl_bootstrap_rset[0] = &lpl_bootstrap_list[1];
3506890d023SEric Saxe lpl_bootstrap_list[0].lpl_rset_sz = 1;
3516890d023SEric Saxe lpl_bootstrap_list[0].lpl_rset = lpl_bootstrap_rset;
3526890d023SEric Saxe lpl_bootstrap_list[0].lpl_id2rset = lpl_bootstrap_id2rset;
3536890d023SEric Saxe
3546890d023SEric Saxe lpl_bootstrap_list[1].lpl_rset_sz = 1;
3556890d023SEric Saxe lpl_bootstrap_list[1].lpl_rset = lpl_bootstrap_rset;
3566890d023SEric Saxe lpl_bootstrap_list[1].lpl_id2rset = lpl_bootstrap_id2rset;
3576890d023SEric Saxe
3587c478bd9Sstevel@tonic-gate cp_default.cp_lgrploads = lpl_bootstrap;
3597c478bd9Sstevel@tonic-gate }
3607c478bd9Sstevel@tonic-gate
3617c478bd9Sstevel@tonic-gate /*
3627c478bd9Sstevel@tonic-gate * Initialize the lgroup framework and allow the platform to do the same
363d5d7cf4eSJonathan Chew *
364d5d7cf4eSJonathan Chew * This happens in stages during boot and is all funnelled through this routine
365d5d7cf4eSJonathan Chew * (see definition of lgrp_init_stages_t to see what happens at each stage and
366d5d7cf4eSJonathan Chew * when)
3677c478bd9Sstevel@tonic-gate */
3687c478bd9Sstevel@tonic-gate void
lgrp_init(lgrp_init_stages_t stage)369d5d7cf4eSJonathan Chew lgrp_init(lgrp_init_stages_t stage)
3707c478bd9Sstevel@tonic-gate {
3717c478bd9Sstevel@tonic-gate /*
3727c478bd9Sstevel@tonic-gate * Initialize the platform
3737c478bd9Sstevel@tonic-gate */
374d5d7cf4eSJonathan Chew lgrp_plat_init(stage);
3757c478bd9Sstevel@tonic-gate
376d5d7cf4eSJonathan Chew switch (stage) {
377d5d7cf4eSJonathan Chew case LGRP_INIT_STAGE1:
378d5d7cf4eSJonathan Chew /*
379d5d7cf4eSJonathan Chew * Set max number of lgroups supported on this platform which
380d5d7cf4eSJonathan Chew * must be less than the max number of lgroups supported by the
381d5d7cf4eSJonathan Chew * common lgroup framework (eg. NLGRPS_MAX is max elements in
382d5d7cf4eSJonathan Chew * lgrp_table[], etc.)
383d5d7cf4eSJonathan Chew */
384d5d7cf4eSJonathan Chew nlgrpsmax = lgrp_plat_max_lgrps();
385d5d7cf4eSJonathan Chew ASSERT(nlgrpsmax <= NLGRPS_MAX);
386d5d7cf4eSJonathan Chew break;
387d5d7cf4eSJonathan Chew
388d5d7cf4eSJonathan Chew case LGRP_INIT_STAGE2:
389d5d7cf4eSJonathan Chew lgrp_setup();
390d5d7cf4eSJonathan Chew break;
391d5d7cf4eSJonathan Chew
392d5d7cf4eSJonathan Chew case LGRP_INIT_STAGE4:
393d5d7cf4eSJonathan Chew lgrp_main_init();
394d5d7cf4eSJonathan Chew break;
395d5d7cf4eSJonathan Chew
396d5d7cf4eSJonathan Chew case LGRP_INIT_STAGE5:
397d5d7cf4eSJonathan Chew lgrp_main_mp_init();
398d5d7cf4eSJonathan Chew break;
399d5d7cf4eSJonathan Chew
400d5d7cf4eSJonathan Chew default:
401d5d7cf4eSJonathan Chew break;
402d5d7cf4eSJonathan Chew }
4037c478bd9Sstevel@tonic-gate }
4047c478bd9Sstevel@tonic-gate
4057c478bd9Sstevel@tonic-gate /*
4067c478bd9Sstevel@tonic-gate * Create the root and cpu0's lgroup, and set t0's home.
4077c478bd9Sstevel@tonic-gate */
408d5d7cf4eSJonathan Chew static void
lgrp_setup(void)4097c478bd9Sstevel@tonic-gate lgrp_setup(void)
4107c478bd9Sstevel@tonic-gate {
4117c478bd9Sstevel@tonic-gate /*
4127c478bd9Sstevel@tonic-gate * Setup the root lgroup
4137c478bd9Sstevel@tonic-gate */
4147c478bd9Sstevel@tonic-gate lgrp_root_init();
4157c478bd9Sstevel@tonic-gate
4167c478bd9Sstevel@tonic-gate /*
4177c478bd9Sstevel@tonic-gate * Add cpu0 to an lgroup
4187c478bd9Sstevel@tonic-gate */
4197c478bd9Sstevel@tonic-gate lgrp_config(LGRP_CONFIG_CPU_ADD, (uintptr_t)CPU, 0);
4207c478bd9Sstevel@tonic-gate lgrp_config(LGRP_CONFIG_CPU_ONLINE, (uintptr_t)CPU, 0);
4217c478bd9Sstevel@tonic-gate }
4227c478bd9Sstevel@tonic-gate
4237c478bd9Sstevel@tonic-gate /*
4247c478bd9Sstevel@tonic-gate * true when lgrp initialization has been completed.
4257c478bd9Sstevel@tonic-gate */
4267c478bd9Sstevel@tonic-gate int lgrp_initialized = 0;
4277c478bd9Sstevel@tonic-gate
4287c478bd9Sstevel@tonic-gate /*
4297c478bd9Sstevel@tonic-gate * True when lgrp topology is constructed.
4307c478bd9Sstevel@tonic-gate */
4317c478bd9Sstevel@tonic-gate int lgrp_topo_initialized = 0;
4327c478bd9Sstevel@tonic-gate
4337c478bd9Sstevel@tonic-gate /*
4347c478bd9Sstevel@tonic-gate * Init routine called after startup(), /etc/system has been processed,
4357c478bd9Sstevel@tonic-gate * and cpu0 has been added to an lgroup.
4367c478bd9Sstevel@tonic-gate */
437d5d7cf4eSJonathan Chew static void
lgrp_main_init(void)4387c478bd9Sstevel@tonic-gate lgrp_main_init(void)
4397c478bd9Sstevel@tonic-gate {
4407c478bd9Sstevel@tonic-gate cpu_t *cp = CPU;
4417c478bd9Sstevel@tonic-gate lgrp_id_t lgrpid;
4427c478bd9Sstevel@tonic-gate int i;
443a6604450Sesaxe extern void pg_cpu0_reinit();
444a6604450Sesaxe
4457c478bd9Sstevel@tonic-gate /*
4467c478bd9Sstevel@tonic-gate * Enforce a valid lgrp_mem_default_policy
4477c478bd9Sstevel@tonic-gate */
4487c478bd9Sstevel@tonic-gate if ((lgrp_mem_default_policy <= LGRP_MEM_POLICY_DEFAULT) ||
4492cb27123Saguzovsk (lgrp_mem_default_policy >= LGRP_NUM_MEM_POLICIES) ||
4502cb27123Saguzovsk (lgrp_mem_default_policy == LGRP_MEM_POLICY_NEXT_SEG))
4517c478bd9Sstevel@tonic-gate lgrp_mem_default_policy = LGRP_MEM_POLICY_NEXT;
4527c478bd9Sstevel@tonic-gate
4537c478bd9Sstevel@tonic-gate /*
4547c478bd9Sstevel@tonic-gate * See if mpo should be disabled.
4557c478bd9Sstevel@tonic-gate * This may happen in the case of null proc LPA on Starcat.
4567c478bd9Sstevel@tonic-gate * The platform won't be able to detect null proc LPA until after
4577c478bd9Sstevel@tonic-gate * cpu0 and memory have already been added to lgroups.
4587c478bd9Sstevel@tonic-gate * When and if it is detected, the Starcat platform will return
4597c478bd9Sstevel@tonic-gate * a different platform handle for cpu0 which is what we check for
4607c478bd9Sstevel@tonic-gate * here. If mpo should be disabled move cpu0 to it's rightful place
4617c478bd9Sstevel@tonic-gate * (the root), and destroy the remaining lgroups. This effectively
4627c478bd9Sstevel@tonic-gate * provides an UMA lgroup topology.
4637c478bd9Sstevel@tonic-gate */
4647c478bd9Sstevel@tonic-gate lgrpid = cp->cpu_lpl->lpl_lgrpid;
4657c478bd9Sstevel@tonic-gate if (lgrp_table[lgrpid]->lgrp_plathand !=
4667c478bd9Sstevel@tonic-gate lgrp_plat_cpu_to_hand(cp->cpu_id)) {
4677c478bd9Sstevel@tonic-gate lgrp_part_del_cpu(cp);
4687c478bd9Sstevel@tonic-gate lgrp_cpu_fini(cp, lgrpid);
4697c478bd9Sstevel@tonic-gate
4707c478bd9Sstevel@tonic-gate lgrp_cpu_init(cp);
4717c478bd9Sstevel@tonic-gate lgrp_part_add_cpu(cp, cp->cpu_lpl->lpl_lgrpid);
4727c478bd9Sstevel@tonic-gate
4737c478bd9Sstevel@tonic-gate ASSERT(cp->cpu_lpl->lpl_lgrpid == LGRP_ROOTID);
4747c478bd9Sstevel@tonic-gate
475a6604450Sesaxe /*
476a6604450Sesaxe * Notify the PG subsystem that the CPU's lgrp
477a6604450Sesaxe * association has changed
478a6604450Sesaxe */
479a6604450Sesaxe pg_cpu0_reinit();
480a6604450Sesaxe
4818c6a5496Sjjc /*
4828c6a5496Sjjc * Destroy all lgroups except for root
4838c6a5496Sjjc */
4847c478bd9Sstevel@tonic-gate for (i = 0; i <= lgrp_alloc_max; i++) {
4857c478bd9Sstevel@tonic-gate if (LGRP_EXISTS(lgrp_table[i]) &&
4867c478bd9Sstevel@tonic-gate lgrp_table[i] != lgrp_root)
4877c478bd9Sstevel@tonic-gate lgrp_destroy(lgrp_table[i]);
4887c478bd9Sstevel@tonic-gate }
4898c6a5496Sjjc
4908c6a5496Sjjc /*
4918c6a5496Sjjc * Fix up root to point at itself for leaves and resources
4928c6a5496Sjjc * and not have any children
4938c6a5496Sjjc */
4948c6a5496Sjjc lgrp_root->lgrp_childcnt = 0;
4958c6a5496Sjjc klgrpset_clear(lgrp_root->lgrp_children);
4968c6a5496Sjjc klgrpset_clear(lgrp_root->lgrp_leaves);
4978c6a5496Sjjc klgrpset_add(lgrp_root->lgrp_leaves, LGRP_ROOTID);
4987c478bd9Sstevel@tonic-gate klgrpset_clear(lgrp_root->lgrp_set[LGRP_RSRC_MEM]);
4997c478bd9Sstevel@tonic-gate klgrpset_add(lgrp_root->lgrp_set[LGRP_RSRC_MEM], LGRP_ROOTID);
5007c478bd9Sstevel@tonic-gate }
5017c478bd9Sstevel@tonic-gate
5027c478bd9Sstevel@tonic-gate /*
5037c478bd9Sstevel@tonic-gate * Initialize kstats framework.
5047c478bd9Sstevel@tonic-gate */
5057c478bd9Sstevel@tonic-gate lgrp_kstat_init();
5067c478bd9Sstevel@tonic-gate /*
5077c478bd9Sstevel@tonic-gate * cpu0 is finally where it should be, so create it's lgroup's kstats
5087c478bd9Sstevel@tonic-gate */
5097c478bd9Sstevel@tonic-gate mutex_enter(&cpu_lock);
5107c478bd9Sstevel@tonic-gate lgrp_kstat_create(cp);
5117c478bd9Sstevel@tonic-gate mutex_exit(&cpu_lock);
5127c478bd9Sstevel@tonic-gate
5137c478bd9Sstevel@tonic-gate lgrp_initialized = 1;
5147c478bd9Sstevel@tonic-gate }
5157c478bd9Sstevel@tonic-gate
5167c478bd9Sstevel@tonic-gate /*
5177c478bd9Sstevel@tonic-gate * Finish lgrp initialization after all CPUS are brought on-line.
5187c478bd9Sstevel@tonic-gate * This routine is called after start_other_cpus().
5197c478bd9Sstevel@tonic-gate */
520d5d7cf4eSJonathan Chew static void
lgrp_main_mp_init(void)5217c478bd9Sstevel@tonic-gate lgrp_main_mp_init(void)
5227c478bd9Sstevel@tonic-gate {
5237c478bd9Sstevel@tonic-gate klgrpset_t changed;
5247c478bd9Sstevel@tonic-gate
525c3377ee9SJohn Levon smt_init();
526455e370cSJohn Levon
5277c478bd9Sstevel@tonic-gate /*
5287c478bd9Sstevel@tonic-gate * Update lgroup topology (if necessary)
5297c478bd9Sstevel@tonic-gate */
5307c478bd9Sstevel@tonic-gate klgrpset_clear(changed);
5317c478bd9Sstevel@tonic-gate (void) lgrp_topo_update(lgrp_table, lgrp_alloc_max + 1, &changed);
5327c478bd9Sstevel@tonic-gate lgrp_topo_initialized = 1;
5337c478bd9Sstevel@tonic-gate }
5347c478bd9Sstevel@tonic-gate
53503400a71Sjjc /*
53603400a71Sjjc * Change latency of lgroup with specified lgroup platform handle (if one is
53703400a71Sjjc * given) or change all lgroups with old latency to new latency
53803400a71Sjjc */
53903400a71Sjjc void
lgrp_latency_change(lgrp_handle_t hand,u_longlong_t oldtime,u_longlong_t newtime)54003400a71Sjjc lgrp_latency_change(lgrp_handle_t hand, u_longlong_t oldtime,
54103400a71Sjjc u_longlong_t newtime)
54203400a71Sjjc {
54303400a71Sjjc lgrp_t *lgrp;
54403400a71Sjjc int i;
54503400a71Sjjc
54603400a71Sjjc for (i = 0; i <= lgrp_alloc_max; i++) {
54703400a71Sjjc lgrp = lgrp_table[i];
54803400a71Sjjc
54903400a71Sjjc if (!LGRP_EXISTS(lgrp))
55003400a71Sjjc continue;
55103400a71Sjjc
55203400a71Sjjc if ((hand == LGRP_NULL_HANDLE &&
55303400a71Sjjc lgrp->lgrp_latency == oldtime) ||
55403400a71Sjjc (hand != LGRP_NULL_HANDLE && lgrp->lgrp_plathand == hand))
55503400a71Sjjc lgrp->lgrp_latency = (int)newtime;
55603400a71Sjjc }
55703400a71Sjjc }
55803400a71Sjjc
5597c478bd9Sstevel@tonic-gate /*
5607c478bd9Sstevel@tonic-gate * Handle lgroup (re)configuration events (eg. addition of CPU, etc.)
5617c478bd9Sstevel@tonic-gate */
5627c478bd9Sstevel@tonic-gate void
lgrp_config(lgrp_config_flag_t event,uintptr_t resource,uintptr_t where)5637c478bd9Sstevel@tonic-gate lgrp_config(lgrp_config_flag_t event, uintptr_t resource, uintptr_t where)
5647c478bd9Sstevel@tonic-gate {
5657c478bd9Sstevel@tonic-gate klgrpset_t changed;
5667c478bd9Sstevel@tonic-gate cpu_t *cp;
5677c478bd9Sstevel@tonic-gate lgrp_id_t id;
5687c478bd9Sstevel@tonic-gate int rc;
5697c478bd9Sstevel@tonic-gate
5707c478bd9Sstevel@tonic-gate switch (event) {
5717c478bd9Sstevel@tonic-gate /*
5727c478bd9Sstevel@tonic-gate * The following (re)configuration events are common code
5737c478bd9Sstevel@tonic-gate * initiated. lgrp_plat_config() is called here to inform the
5747c478bd9Sstevel@tonic-gate * platform of the reconfiguration event.
5757c478bd9Sstevel@tonic-gate */
5767c478bd9Sstevel@tonic-gate case LGRP_CONFIG_CPU_ADD:
577394b433dSesaxe cp = (cpu_t *)resource;
578394b433dSesaxe
579394b433dSesaxe /*
580394b433dSesaxe * Initialize the new CPU's lgrp related next/prev
581394b433dSesaxe * links, and give it a bootstrap lpl so that it can
582394b433dSesaxe * survive should it need to enter the dispatcher.
583394b433dSesaxe */
584394b433dSesaxe cp->cpu_next_lpl = cp;
585394b433dSesaxe cp->cpu_prev_lpl = cp;
586394b433dSesaxe cp->cpu_next_lgrp = cp;
587394b433dSesaxe cp->cpu_prev_lgrp = cp;
588394b433dSesaxe cp->cpu_lpl = lpl_bootstrap;
589394b433dSesaxe
5907c478bd9Sstevel@tonic-gate lgrp_plat_config(event, resource);
5911a5e258fSJosef 'Jeff' Sipek atomic_inc_32(&lgrp_gen);
5927c478bd9Sstevel@tonic-gate
5937c478bd9Sstevel@tonic-gate break;
5947c478bd9Sstevel@tonic-gate case LGRP_CONFIG_CPU_DEL:
5957c478bd9Sstevel@tonic-gate lgrp_plat_config(event, resource);
5961a5e258fSJosef 'Jeff' Sipek atomic_inc_32(&lgrp_gen);
5977c478bd9Sstevel@tonic-gate
5987c478bd9Sstevel@tonic-gate break;
5997c478bd9Sstevel@tonic-gate case LGRP_CONFIG_CPU_ONLINE:
6007c478bd9Sstevel@tonic-gate cp = (cpu_t *)resource;
6017c478bd9Sstevel@tonic-gate lgrp_cpu_init(cp);
6027c478bd9Sstevel@tonic-gate lgrp_part_add_cpu(cp, cp->cpu_lpl->lpl_lgrpid);
6037c478bd9Sstevel@tonic-gate rc = lpl_topo_verify(cp->cpu_part);
6047c478bd9Sstevel@tonic-gate if (rc != LPL_TOPO_CORRECT) {
6057c478bd9Sstevel@tonic-gate panic("lpl_topo_verify failed: %d", rc);
6067c478bd9Sstevel@tonic-gate }
6077c478bd9Sstevel@tonic-gate lgrp_plat_config(event, resource);
6081a5e258fSJosef 'Jeff' Sipek atomic_inc_32(&lgrp_gen);
6097c478bd9Sstevel@tonic-gate
6107c478bd9Sstevel@tonic-gate break;
6117c478bd9Sstevel@tonic-gate case LGRP_CONFIG_CPU_OFFLINE:
6127c478bd9Sstevel@tonic-gate cp = (cpu_t *)resource;
6137c478bd9Sstevel@tonic-gate id = cp->cpu_lpl->lpl_lgrpid;
6147c478bd9Sstevel@tonic-gate lgrp_part_del_cpu(cp);
6157c478bd9Sstevel@tonic-gate lgrp_cpu_fini(cp, id);
6167c478bd9Sstevel@tonic-gate rc = lpl_topo_verify(cp->cpu_part);
6177c478bd9Sstevel@tonic-gate if (rc != LPL_TOPO_CORRECT) {
6187c478bd9Sstevel@tonic-gate panic("lpl_topo_verify failed: %d", rc);
6197c478bd9Sstevel@tonic-gate }
6207c478bd9Sstevel@tonic-gate lgrp_plat_config(event, resource);
6211a5e258fSJosef 'Jeff' Sipek atomic_inc_32(&lgrp_gen);
6227c478bd9Sstevel@tonic-gate
6237c478bd9Sstevel@tonic-gate break;
6247c478bd9Sstevel@tonic-gate case LGRP_CONFIG_CPUPART_ADD:
6257c478bd9Sstevel@tonic-gate cp = (cpu_t *)resource;
6267c478bd9Sstevel@tonic-gate lgrp_part_add_cpu((cpu_t *)resource, (lgrp_id_t)where);
6277c478bd9Sstevel@tonic-gate rc = lpl_topo_verify(cp->cpu_part);
6287c478bd9Sstevel@tonic-gate if (rc != LPL_TOPO_CORRECT) {
6297c478bd9Sstevel@tonic-gate panic("lpl_topo_verify failed: %d", rc);
6307c478bd9Sstevel@tonic-gate }
6317c478bd9Sstevel@tonic-gate lgrp_plat_config(event, resource);
6327c478bd9Sstevel@tonic-gate
6337c478bd9Sstevel@tonic-gate break;
6347c478bd9Sstevel@tonic-gate case LGRP_CONFIG_CPUPART_DEL:
6357c478bd9Sstevel@tonic-gate cp = (cpu_t *)resource;
6367c478bd9Sstevel@tonic-gate lgrp_part_del_cpu((cpu_t *)resource);
6377c478bd9Sstevel@tonic-gate rc = lpl_topo_verify(cp->cpu_part);
6387c478bd9Sstevel@tonic-gate if (rc != LPL_TOPO_CORRECT) {
6397c478bd9Sstevel@tonic-gate panic("lpl_topo_verify failed: %d", rc);
6407c478bd9Sstevel@tonic-gate }
6417c478bd9Sstevel@tonic-gate lgrp_plat_config(event, resource);
6427c478bd9Sstevel@tonic-gate
6437c478bd9Sstevel@tonic-gate break;
6447c478bd9Sstevel@tonic-gate /*
6457c478bd9Sstevel@tonic-gate * The following events are initiated by the memnode
6467c478bd9Sstevel@tonic-gate * subsystem.
6477c478bd9Sstevel@tonic-gate */
6487c478bd9Sstevel@tonic-gate case LGRP_CONFIG_MEM_ADD:
6497c478bd9Sstevel@tonic-gate lgrp_mem_init((int)resource, where, B_FALSE);
6501a5e258fSJosef 'Jeff' Sipek atomic_inc_32(&lgrp_gen);
6517c478bd9Sstevel@tonic-gate
6527c478bd9Sstevel@tonic-gate break;
6537c478bd9Sstevel@tonic-gate case LGRP_CONFIG_MEM_DEL:
6547c478bd9Sstevel@tonic-gate lgrp_mem_fini((int)resource, where, B_FALSE);
6551a5e258fSJosef 'Jeff' Sipek atomic_inc_32(&lgrp_gen);
6567c478bd9Sstevel@tonic-gate
6577c478bd9Sstevel@tonic-gate break;
6587c478bd9Sstevel@tonic-gate case LGRP_CONFIG_MEM_RENAME: {
6597c478bd9Sstevel@tonic-gate lgrp_config_mem_rename_t *ren_arg =
6607c478bd9Sstevel@tonic-gate (lgrp_config_mem_rename_t *)where;
6617c478bd9Sstevel@tonic-gate
6627c478bd9Sstevel@tonic-gate lgrp_mem_rename((int)resource,
6637c478bd9Sstevel@tonic-gate ren_arg->lmem_rename_from,
6647c478bd9Sstevel@tonic-gate ren_arg->lmem_rename_to);
6651a5e258fSJosef 'Jeff' Sipek atomic_inc_32(&lgrp_gen);
6667c478bd9Sstevel@tonic-gate
6677c478bd9Sstevel@tonic-gate break;
6687c478bd9Sstevel@tonic-gate }
6697c478bd9Sstevel@tonic-gate case LGRP_CONFIG_GEN_UPDATE:
6701a5e258fSJosef 'Jeff' Sipek atomic_inc_32(&lgrp_gen);
6717c478bd9Sstevel@tonic-gate
6727c478bd9Sstevel@tonic-gate break;
6737c478bd9Sstevel@tonic-gate case LGRP_CONFIG_FLATTEN:
6747c478bd9Sstevel@tonic-gate if (where == 0)
6757c478bd9Sstevel@tonic-gate lgrp_topo_levels = (int)resource;
6767c478bd9Sstevel@tonic-gate else
6777c478bd9Sstevel@tonic-gate (void) lgrp_topo_flatten(resource,
6787c478bd9Sstevel@tonic-gate lgrp_table, lgrp_alloc_max, &changed);
6797c478bd9Sstevel@tonic-gate
6807c478bd9Sstevel@tonic-gate break;
6817c478bd9Sstevel@tonic-gate /*
68203400a71Sjjc * Update any lgroups with old latency to new latency
6837c478bd9Sstevel@tonic-gate */
68403400a71Sjjc case LGRP_CONFIG_LAT_CHANGE_ALL:
68503400a71Sjjc lgrp_latency_change(LGRP_NULL_HANDLE, (u_longlong_t)resource,
68603400a71Sjjc (u_longlong_t)where);
68703400a71Sjjc
68803400a71Sjjc break;
68903400a71Sjjc /*
69003400a71Sjjc * Update lgroup with specified lgroup platform handle to have
69103400a71Sjjc * new latency
69203400a71Sjjc */
69303400a71Sjjc case LGRP_CONFIG_LAT_CHANGE:
69403400a71Sjjc lgrp_latency_change((lgrp_handle_t)resource, 0,
6957c478bd9Sstevel@tonic-gate (u_longlong_t)where);
6967c478bd9Sstevel@tonic-gate
6977c478bd9Sstevel@tonic-gate break;
6987c478bd9Sstevel@tonic-gate case LGRP_CONFIG_NOP:
6997c478bd9Sstevel@tonic-gate
7007c478bd9Sstevel@tonic-gate break;
7017c478bd9Sstevel@tonic-gate default:
7027c478bd9Sstevel@tonic-gate break;
7037c478bd9Sstevel@tonic-gate }
7047c478bd9Sstevel@tonic-gate
7057c478bd9Sstevel@tonic-gate }
7067c478bd9Sstevel@tonic-gate
7077c478bd9Sstevel@tonic-gate /*
7087c478bd9Sstevel@tonic-gate * Called to add lgrp info into cpu structure from cpu_add_unit;
7097c478bd9Sstevel@tonic-gate * do not assume cpu is in cpu[] yet!
7107c478bd9Sstevel@tonic-gate *
7117c478bd9Sstevel@tonic-gate * CPUs are brought online with all other CPUs paused so we can't
7127c478bd9Sstevel@tonic-gate * allocate memory or we could deadlock the system, so we rely on
7137c478bd9Sstevel@tonic-gate * the platform to statically allocate as much space as we need
7147c478bd9Sstevel@tonic-gate * for the lgrp structs and stats.
7157c478bd9Sstevel@tonic-gate */
7167c478bd9Sstevel@tonic-gate static void
lgrp_cpu_init(struct cpu * cp)7177c478bd9Sstevel@tonic-gate lgrp_cpu_init(struct cpu *cp)
7187c478bd9Sstevel@tonic-gate {
7197c478bd9Sstevel@tonic-gate klgrpset_t changed;
7207c478bd9Sstevel@tonic-gate int count;
7217c478bd9Sstevel@tonic-gate lgrp_handle_t hand;
7227c478bd9Sstevel@tonic-gate int first_cpu;
7237c478bd9Sstevel@tonic-gate lgrp_t *my_lgrp;
7247c478bd9Sstevel@tonic-gate lgrp_id_t lgrpid;
7257c478bd9Sstevel@tonic-gate struct cpu *cptr;
7267c478bd9Sstevel@tonic-gate
7277c478bd9Sstevel@tonic-gate /*
7287c478bd9Sstevel@tonic-gate * This is the first time through if the resource set
7297c478bd9Sstevel@tonic-gate * for the root lgroup is empty. After cpu0 has been
7307c478bd9Sstevel@tonic-gate * initially added to an lgroup, the root's CPU resource
7317c478bd9Sstevel@tonic-gate * set can never be empty, since the system's last CPU
7327c478bd9Sstevel@tonic-gate * cannot be offlined.
7337c478bd9Sstevel@tonic-gate */
7347c478bd9Sstevel@tonic-gate if (klgrpset_isempty(lgrp_root->lgrp_set[LGRP_RSRC_CPU])) {
7357c478bd9Sstevel@tonic-gate /*
7367c478bd9Sstevel@tonic-gate * First time through.
7377c478bd9Sstevel@tonic-gate */
7387c478bd9Sstevel@tonic-gate first_cpu = 1;
7397c478bd9Sstevel@tonic-gate } else {
7407c478bd9Sstevel@tonic-gate /*
7417c478bd9Sstevel@tonic-gate * If cpu0 needs to move lgroups, we may come
7427c478bd9Sstevel@tonic-gate * through here again, at which time cpu_lock won't
7437c478bd9Sstevel@tonic-gate * be held, and lgrp_initialized will be false.
7447c478bd9Sstevel@tonic-gate */
7457c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&cpu_lock) || !lgrp_initialized);
7467c478bd9Sstevel@tonic-gate ASSERT(cp->cpu_part != NULL);
7477c478bd9Sstevel@tonic-gate first_cpu = 0;
7487c478bd9Sstevel@tonic-gate }
7497c478bd9Sstevel@tonic-gate
7507c478bd9Sstevel@tonic-gate hand = lgrp_plat_cpu_to_hand(cp->cpu_id);
7517c478bd9Sstevel@tonic-gate my_lgrp = lgrp_hand_to_lgrp(hand);
7527c478bd9Sstevel@tonic-gate
7537c478bd9Sstevel@tonic-gate if (my_lgrp == NULL) {
7547c478bd9Sstevel@tonic-gate /*
7557c478bd9Sstevel@tonic-gate * Create new lgrp and add it to lgroup topology
7567c478bd9Sstevel@tonic-gate */
7577c478bd9Sstevel@tonic-gate my_lgrp = lgrp_create();
7587c478bd9Sstevel@tonic-gate my_lgrp->lgrp_plathand = hand;
7597c478bd9Sstevel@tonic-gate my_lgrp->lgrp_latency = lgrp_plat_latency(hand, hand);
7607c478bd9Sstevel@tonic-gate lgrpid = my_lgrp->lgrp_id;
7617c478bd9Sstevel@tonic-gate klgrpset_add(my_lgrp->lgrp_leaves, lgrpid);
7627c478bd9Sstevel@tonic-gate klgrpset_add(my_lgrp->lgrp_set[LGRP_RSRC_CPU], lgrpid);
7637c478bd9Sstevel@tonic-gate
7647c478bd9Sstevel@tonic-gate count = 0;
7657c478bd9Sstevel@tonic-gate klgrpset_clear(changed);
7667c478bd9Sstevel@tonic-gate count += lgrp_leaf_add(my_lgrp, lgrp_table, lgrp_alloc_max + 1,
7677c478bd9Sstevel@tonic-gate &changed);
7682dae3fb5Sjjc /*
7692dae3fb5Sjjc * May have added new intermediate lgroups, so need to add
7702dae3fb5Sjjc * resources other than CPUs which are added below
7712dae3fb5Sjjc */
7722dae3fb5Sjjc (void) lgrp_mnode_update(changed, NULL);
7737c478bd9Sstevel@tonic-gate } else if (my_lgrp->lgrp_latency == 0 && lgrp_plat_latency(hand, hand)
7747c478bd9Sstevel@tonic-gate > 0) {
7757c478bd9Sstevel@tonic-gate /*
7767c478bd9Sstevel@tonic-gate * Leaf lgroup was created, but latency wasn't available
7777c478bd9Sstevel@tonic-gate * then. So, set latency for it and fill in rest of lgroup
7787c478bd9Sstevel@tonic-gate * topology now that we know how far it is from other leaf
7797c478bd9Sstevel@tonic-gate * lgroups.
7807c478bd9Sstevel@tonic-gate */
7817c478bd9Sstevel@tonic-gate lgrpid = my_lgrp->lgrp_id;
7827c478bd9Sstevel@tonic-gate klgrpset_clear(changed);
7837c478bd9Sstevel@tonic-gate if (!klgrpset_ismember(my_lgrp->lgrp_set[LGRP_RSRC_CPU],
7847c478bd9Sstevel@tonic-gate lgrpid))
7857c478bd9Sstevel@tonic-gate klgrpset_add(my_lgrp->lgrp_set[LGRP_RSRC_CPU], lgrpid);
7867c478bd9Sstevel@tonic-gate count = lgrp_leaf_add(my_lgrp, lgrp_table, lgrp_alloc_max + 1,
7877c478bd9Sstevel@tonic-gate &changed);
7887c478bd9Sstevel@tonic-gate
7897c478bd9Sstevel@tonic-gate /*
7907c478bd9Sstevel@tonic-gate * May have added new intermediate lgroups, so need to add
7917c478bd9Sstevel@tonic-gate * resources other than CPUs which are added below
7927c478bd9Sstevel@tonic-gate */
7937c478bd9Sstevel@tonic-gate (void) lgrp_mnode_update(changed, NULL);
7947c478bd9Sstevel@tonic-gate } else if (!klgrpset_ismember(my_lgrp->lgrp_set[LGRP_RSRC_CPU],
7957c478bd9Sstevel@tonic-gate my_lgrp->lgrp_id)) {
7967c478bd9Sstevel@tonic-gate int i;
7977c478bd9Sstevel@tonic-gate
7987c478bd9Sstevel@tonic-gate /*
7997c478bd9Sstevel@tonic-gate * Update existing lgroup and lgroups containing it with CPU
8007c478bd9Sstevel@tonic-gate * resource
8017c478bd9Sstevel@tonic-gate */
8027c478bd9Sstevel@tonic-gate lgrpid = my_lgrp->lgrp_id;
8037c478bd9Sstevel@tonic-gate klgrpset_add(my_lgrp->lgrp_set[LGRP_RSRC_CPU], lgrpid);
8047c478bd9Sstevel@tonic-gate for (i = 0; i <= lgrp_alloc_max; i++) {
8057c478bd9Sstevel@tonic-gate lgrp_t *lgrp;
8067c478bd9Sstevel@tonic-gate
8077c478bd9Sstevel@tonic-gate lgrp = lgrp_table[i];
8087c478bd9Sstevel@tonic-gate if (!LGRP_EXISTS(lgrp) ||
8097c478bd9Sstevel@tonic-gate !lgrp_rsets_member(lgrp->lgrp_set, lgrpid))
8107c478bd9Sstevel@tonic-gate continue;
8117c478bd9Sstevel@tonic-gate
8127c478bd9Sstevel@tonic-gate klgrpset_add(lgrp->lgrp_set[LGRP_RSRC_CPU], lgrpid);
8137c478bd9Sstevel@tonic-gate }
8147c478bd9Sstevel@tonic-gate }
8157c478bd9Sstevel@tonic-gate
8167c478bd9Sstevel@tonic-gate lgrpid = my_lgrp->lgrp_id;
8177c478bd9Sstevel@tonic-gate cp->cpu_lpl = &cp->cpu_part->cp_lgrploads[lgrpid];
8187c478bd9Sstevel@tonic-gate
8197c478bd9Sstevel@tonic-gate /*
8207c478bd9Sstevel@tonic-gate * For multi-lgroup systems, need to setup lpl for CPU0 or CPU0 will
8217c478bd9Sstevel@tonic-gate * end up in lpl for lgroup 0 whether it is supposed to be in there or
8227c478bd9Sstevel@tonic-gate * not since none of lgroup IDs in the lpl's have been set yet.
8237c478bd9Sstevel@tonic-gate */
8247c478bd9Sstevel@tonic-gate if (first_cpu && nlgrpsmax > 1 && lgrpid != cp->cpu_lpl->lpl_lgrpid)
8257c478bd9Sstevel@tonic-gate cp->cpu_lpl->lpl_lgrpid = lgrpid;
8267c478bd9Sstevel@tonic-gate
8277c478bd9Sstevel@tonic-gate /*
8287c478bd9Sstevel@tonic-gate * link the CPU into the lgrp's CPU list
8297c478bd9Sstevel@tonic-gate */
8307c478bd9Sstevel@tonic-gate if (my_lgrp->lgrp_cpucnt == 0) {
8317c478bd9Sstevel@tonic-gate my_lgrp->lgrp_cpu = cp;
8327c478bd9Sstevel@tonic-gate cp->cpu_next_lgrp = cp->cpu_prev_lgrp = cp;
8337c478bd9Sstevel@tonic-gate } else {
8347c478bd9Sstevel@tonic-gate cptr = my_lgrp->lgrp_cpu;
8357c478bd9Sstevel@tonic-gate cp->cpu_next_lgrp = cptr;
8367c478bd9Sstevel@tonic-gate cp->cpu_prev_lgrp = cptr->cpu_prev_lgrp;
8377c478bd9Sstevel@tonic-gate cptr->cpu_prev_lgrp->cpu_next_lgrp = cp;
8387c478bd9Sstevel@tonic-gate cptr->cpu_prev_lgrp = cp;
8397c478bd9Sstevel@tonic-gate }
8407c478bd9Sstevel@tonic-gate my_lgrp->lgrp_cpucnt++;
8417c478bd9Sstevel@tonic-gate }
8427c478bd9Sstevel@tonic-gate
8437c478bd9Sstevel@tonic-gate lgrp_t *
lgrp_create(void)8447c478bd9Sstevel@tonic-gate lgrp_create(void)
8457c478bd9Sstevel@tonic-gate {
8467c478bd9Sstevel@tonic-gate lgrp_t *my_lgrp;
8477c478bd9Sstevel@tonic-gate lgrp_id_t lgrpid;
8487c478bd9Sstevel@tonic-gate int i;
8497c478bd9Sstevel@tonic-gate
8507c478bd9Sstevel@tonic-gate ASSERT(!lgrp_initialized || MUTEX_HELD(&cpu_lock));
851584b574aSToomas Soome lgrpid = 0;
8527c478bd9Sstevel@tonic-gate
8537c478bd9Sstevel@tonic-gate /*
8547c478bd9Sstevel@tonic-gate * Find an open slot in the lgroup table and recycle unused lgroup
8557c478bd9Sstevel@tonic-gate * left there if any
8567c478bd9Sstevel@tonic-gate */
8577c478bd9Sstevel@tonic-gate my_lgrp = NULL;
8587c478bd9Sstevel@tonic-gate if (lgrp_alloc_hint == -1)
8597c478bd9Sstevel@tonic-gate /*
8607c478bd9Sstevel@tonic-gate * Allocate from end when hint not set yet because no lgroups
8617c478bd9Sstevel@tonic-gate * have been deleted yet
8627c478bd9Sstevel@tonic-gate */
8637c478bd9Sstevel@tonic-gate lgrpid = nlgrps++;
8647c478bd9Sstevel@tonic-gate else {
8657c478bd9Sstevel@tonic-gate /*
8667c478bd9Sstevel@tonic-gate * Start looking for next open slot from hint and leave hint
8677c478bd9Sstevel@tonic-gate * at slot allocated
8687c478bd9Sstevel@tonic-gate */
8697c478bd9Sstevel@tonic-gate for (i = lgrp_alloc_hint; i < nlgrpsmax; i++) {
8707c478bd9Sstevel@tonic-gate my_lgrp = lgrp_table[i];
8717c478bd9Sstevel@tonic-gate if (!LGRP_EXISTS(my_lgrp)) {
8727c478bd9Sstevel@tonic-gate lgrpid = i;
8737c478bd9Sstevel@tonic-gate nlgrps++;
8747c478bd9Sstevel@tonic-gate break;
8757c478bd9Sstevel@tonic-gate }
8767c478bd9Sstevel@tonic-gate }
8777c478bd9Sstevel@tonic-gate lgrp_alloc_hint = lgrpid;
8787c478bd9Sstevel@tonic-gate }
8797c478bd9Sstevel@tonic-gate
8807c478bd9Sstevel@tonic-gate /*
8817c478bd9Sstevel@tonic-gate * Keep track of max lgroup ID allocated so far to cut down on searches
8827c478bd9Sstevel@tonic-gate */
8837c478bd9Sstevel@tonic-gate if (lgrpid > lgrp_alloc_max)
8847c478bd9Sstevel@tonic-gate lgrp_alloc_max = lgrpid;
8857c478bd9Sstevel@tonic-gate
8867c478bd9Sstevel@tonic-gate /*
8877c478bd9Sstevel@tonic-gate * Need to allocate new lgroup if next open slot didn't have one
8887c478bd9Sstevel@tonic-gate * for recycling
8897c478bd9Sstevel@tonic-gate */
8907c478bd9Sstevel@tonic-gate if (my_lgrp == NULL)
8917c478bd9Sstevel@tonic-gate my_lgrp = lgrp_plat_alloc(lgrpid);
8927c478bd9Sstevel@tonic-gate
8937c478bd9Sstevel@tonic-gate if (nlgrps > nlgrpsmax || my_lgrp == NULL)
8947c478bd9Sstevel@tonic-gate panic("Too many lgrps for platform (%d)", nlgrps);
8957c478bd9Sstevel@tonic-gate
8967c478bd9Sstevel@tonic-gate my_lgrp->lgrp_id = lgrpid;
8977c478bd9Sstevel@tonic-gate my_lgrp->lgrp_latency = 0;
8987c478bd9Sstevel@tonic-gate my_lgrp->lgrp_plathand = LGRP_NULL_HANDLE;
8997c478bd9Sstevel@tonic-gate my_lgrp->lgrp_parent = NULL;
9007c478bd9Sstevel@tonic-gate my_lgrp->lgrp_childcnt = 0;
9017c478bd9Sstevel@tonic-gate my_lgrp->lgrp_mnodes = (mnodeset_t)0;
9027c478bd9Sstevel@tonic-gate my_lgrp->lgrp_nmnodes = 0;
9037c478bd9Sstevel@tonic-gate klgrpset_clear(my_lgrp->lgrp_children);
9047c478bd9Sstevel@tonic-gate klgrpset_clear(my_lgrp->lgrp_leaves);
9057c478bd9Sstevel@tonic-gate for (i = 0; i < LGRP_RSRC_COUNT; i++)
9067c478bd9Sstevel@tonic-gate klgrpset_clear(my_lgrp->lgrp_set[i]);
9077c478bd9Sstevel@tonic-gate
9087c478bd9Sstevel@tonic-gate my_lgrp->lgrp_cpu = NULL;
9097c478bd9Sstevel@tonic-gate my_lgrp->lgrp_cpucnt = 0;
9107c478bd9Sstevel@tonic-gate
9117c478bd9Sstevel@tonic-gate if (my_lgrp->lgrp_kstat != NULL)
9127c478bd9Sstevel@tonic-gate lgrp_kstat_reset(lgrpid);
9137c478bd9Sstevel@tonic-gate
9147c478bd9Sstevel@tonic-gate lgrp_table[my_lgrp->lgrp_id] = my_lgrp;
9157c478bd9Sstevel@tonic-gate
9167c478bd9Sstevel@tonic-gate return (my_lgrp);
9177c478bd9Sstevel@tonic-gate }
9187c478bd9Sstevel@tonic-gate
9197c478bd9Sstevel@tonic-gate void
lgrp_destroy(lgrp_t * lgrp)9207c478bd9Sstevel@tonic-gate lgrp_destroy(lgrp_t *lgrp)
9217c478bd9Sstevel@tonic-gate {
9227c478bd9Sstevel@tonic-gate int i;
9237c478bd9Sstevel@tonic-gate
9247c478bd9Sstevel@tonic-gate /*
9257c478bd9Sstevel@tonic-gate * Unless this lgroup is being destroyed on behalf of
9267c478bd9Sstevel@tonic-gate * the boot CPU, cpu_lock must be held
9277c478bd9Sstevel@tonic-gate */
9287c478bd9Sstevel@tonic-gate ASSERT(!lgrp_initialized || MUTEX_HELD(&cpu_lock));
9297c478bd9Sstevel@tonic-gate
9307c478bd9Sstevel@tonic-gate if (nlgrps == 1)
9317c478bd9Sstevel@tonic-gate cmn_err(CE_PANIC, "Can't destroy only lgroup!");
9327c478bd9Sstevel@tonic-gate
9337c478bd9Sstevel@tonic-gate if (!LGRP_EXISTS(lgrp))
9347c478bd9Sstevel@tonic-gate return;
9357c478bd9Sstevel@tonic-gate
9367c478bd9Sstevel@tonic-gate /*
9377c478bd9Sstevel@tonic-gate * Set hint to lgroup being deleted and try to keep lower numbered
9387c478bd9Sstevel@tonic-gate * hints to facilitate finding empty slots
9397c478bd9Sstevel@tonic-gate */
9407c478bd9Sstevel@tonic-gate if (lgrp_alloc_hint == -1 || lgrp->lgrp_id < lgrp_alloc_hint)
9417c478bd9Sstevel@tonic-gate lgrp_alloc_hint = lgrp->lgrp_id;
9427c478bd9Sstevel@tonic-gate
9437c478bd9Sstevel@tonic-gate /*
9447c478bd9Sstevel@tonic-gate * Mark this lgroup to be recycled by setting its lgroup ID to
9457c478bd9Sstevel@tonic-gate * LGRP_NONE and clear relevant fields
9467c478bd9Sstevel@tonic-gate */
9477c478bd9Sstevel@tonic-gate lgrp->lgrp_id = LGRP_NONE;
9487c478bd9Sstevel@tonic-gate lgrp->lgrp_latency = 0;
9497c478bd9Sstevel@tonic-gate lgrp->lgrp_plathand = LGRP_NULL_HANDLE;
9507c478bd9Sstevel@tonic-gate lgrp->lgrp_parent = NULL;
9517c478bd9Sstevel@tonic-gate lgrp->lgrp_childcnt = 0;
9527c478bd9Sstevel@tonic-gate
9537c478bd9Sstevel@tonic-gate klgrpset_clear(lgrp->lgrp_children);
9547c478bd9Sstevel@tonic-gate klgrpset_clear(lgrp->lgrp_leaves);
9557c478bd9Sstevel@tonic-gate for (i = 0; i < LGRP_RSRC_COUNT; i++)
9567c478bd9Sstevel@tonic-gate klgrpset_clear(lgrp->lgrp_set[i]);
9577c478bd9Sstevel@tonic-gate
9587c478bd9Sstevel@tonic-gate lgrp->lgrp_mnodes = (mnodeset_t)0;
9597c478bd9Sstevel@tonic-gate lgrp->lgrp_nmnodes = 0;
9607c478bd9Sstevel@tonic-gate
9617c478bd9Sstevel@tonic-gate lgrp->lgrp_cpu = NULL;
9627c478bd9Sstevel@tonic-gate lgrp->lgrp_cpucnt = 0;
9637c478bd9Sstevel@tonic-gate
9647c478bd9Sstevel@tonic-gate nlgrps--;
9657c478bd9Sstevel@tonic-gate }
9667c478bd9Sstevel@tonic-gate
9677c478bd9Sstevel@tonic-gate /*
9687c478bd9Sstevel@tonic-gate * Initialize kstat data. Called from lgrp intialization code.
9697c478bd9Sstevel@tonic-gate */
9707c478bd9Sstevel@tonic-gate static void
lgrp_kstat_init(void)9717c478bd9Sstevel@tonic-gate lgrp_kstat_init(void)
9727c478bd9Sstevel@tonic-gate {
9737c478bd9Sstevel@tonic-gate lgrp_stat_t stat;
9747c478bd9Sstevel@tonic-gate
9757c478bd9Sstevel@tonic-gate mutex_init(&lgrp_kstat_mutex, NULL, MUTEX_DEFAULT, NULL);
9767c478bd9Sstevel@tonic-gate
9777c478bd9Sstevel@tonic-gate for (stat = 0; stat < LGRP_NUM_STATS; stat++)
9787c478bd9Sstevel@tonic-gate kstat_named_init(&lgrp_kstat_data[stat],
9797c478bd9Sstevel@tonic-gate lgrp_kstat_names[stat], KSTAT_DATA_INT64);
9807c478bd9Sstevel@tonic-gate }
9817c478bd9Sstevel@tonic-gate
9827c478bd9Sstevel@tonic-gate /*
9837c478bd9Sstevel@tonic-gate * initialize an lgrp's kstats if needed
9847c478bd9Sstevel@tonic-gate * called with cpu_lock held but not with cpus paused.
9857c478bd9Sstevel@tonic-gate * we don't tear these down now because we don't know about
9867c478bd9Sstevel@tonic-gate * memory leaving the lgrp yet...
9877c478bd9Sstevel@tonic-gate */
9887c478bd9Sstevel@tonic-gate
9897c478bd9Sstevel@tonic-gate void
lgrp_kstat_create(cpu_t * cp)9907c478bd9Sstevel@tonic-gate lgrp_kstat_create(cpu_t *cp)
9917c478bd9Sstevel@tonic-gate {
9927c478bd9Sstevel@tonic-gate kstat_t *lgrp_kstat;
9937c478bd9Sstevel@tonic-gate lgrp_id_t lgrpid;
9947c478bd9Sstevel@tonic-gate lgrp_t *my_lgrp;
9957c478bd9Sstevel@tonic-gate
9967c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&cpu_lock));
9977c478bd9Sstevel@tonic-gate
9987c478bd9Sstevel@tonic-gate lgrpid = cp->cpu_lpl->lpl_lgrpid;
9997c478bd9Sstevel@tonic-gate my_lgrp = lgrp_table[lgrpid];
10007c478bd9Sstevel@tonic-gate
10017c478bd9Sstevel@tonic-gate if (my_lgrp->lgrp_kstat != NULL)
10027c478bd9Sstevel@tonic-gate return; /* already initialized */
10037c478bd9Sstevel@tonic-gate
10047c478bd9Sstevel@tonic-gate lgrp_kstat = kstat_create("lgrp", lgrpid, NULL, "misc",
10057c478bd9Sstevel@tonic-gate KSTAT_TYPE_NAMED, LGRP_NUM_STATS,
10067c478bd9Sstevel@tonic-gate KSTAT_FLAG_VIRTUAL | KSTAT_FLAG_WRITABLE);
10077c478bd9Sstevel@tonic-gate
10087c478bd9Sstevel@tonic-gate if (lgrp_kstat != NULL) {
10097c478bd9Sstevel@tonic-gate lgrp_kstat->ks_lock = &lgrp_kstat_mutex;
10107c478bd9Sstevel@tonic-gate lgrp_kstat->ks_private = my_lgrp;
10117c478bd9Sstevel@tonic-gate lgrp_kstat->ks_data = &lgrp_kstat_data;
10127c478bd9Sstevel@tonic-gate lgrp_kstat->ks_update = lgrp_kstat_extract;
10137c478bd9Sstevel@tonic-gate my_lgrp->lgrp_kstat = lgrp_kstat;
10147c478bd9Sstevel@tonic-gate kstat_install(lgrp_kstat);
10157c478bd9Sstevel@tonic-gate }
10167c478bd9Sstevel@tonic-gate }
10177c478bd9Sstevel@tonic-gate
10187c478bd9Sstevel@tonic-gate /*
10197c478bd9Sstevel@tonic-gate * this will do something when we manage to remove now unused lgrps
10207c478bd9Sstevel@tonic-gate */
10217c478bd9Sstevel@tonic-gate
10227c478bd9Sstevel@tonic-gate /* ARGSUSED */
10237c478bd9Sstevel@tonic-gate void
lgrp_kstat_destroy(cpu_t * cp)10247c478bd9Sstevel@tonic-gate lgrp_kstat_destroy(cpu_t *cp)
10257c478bd9Sstevel@tonic-gate {
10267c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&cpu_lock));
10277c478bd9Sstevel@tonic-gate }
10287c478bd9Sstevel@tonic-gate
10297c478bd9Sstevel@tonic-gate /*
10307c478bd9Sstevel@tonic-gate * Called when a CPU is off-lined.
10317c478bd9Sstevel@tonic-gate */
10327c478bd9Sstevel@tonic-gate static void
lgrp_cpu_fini(struct cpu * cp,lgrp_id_t lgrpid)10337c478bd9Sstevel@tonic-gate lgrp_cpu_fini(struct cpu *cp, lgrp_id_t lgrpid)
10347c478bd9Sstevel@tonic-gate {
10357c478bd9Sstevel@tonic-gate lgrp_t *my_lgrp;
10367c478bd9Sstevel@tonic-gate struct cpu *prev;
10377c478bd9Sstevel@tonic-gate struct cpu *next;
10387c478bd9Sstevel@tonic-gate
10397c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&cpu_lock) || !lgrp_initialized);
10407c478bd9Sstevel@tonic-gate
10417c478bd9Sstevel@tonic-gate prev = cp->cpu_prev_lgrp;
10427c478bd9Sstevel@tonic-gate next = cp->cpu_next_lgrp;
10437c478bd9Sstevel@tonic-gate
10447c478bd9Sstevel@tonic-gate prev->cpu_next_lgrp = next;
10457c478bd9Sstevel@tonic-gate next->cpu_prev_lgrp = prev;
10467c478bd9Sstevel@tonic-gate
10477c478bd9Sstevel@tonic-gate /*
10487c478bd9Sstevel@tonic-gate * just because I'm paranoid doesn't mean...
10497c478bd9Sstevel@tonic-gate */
10507c478bd9Sstevel@tonic-gate
10517c478bd9Sstevel@tonic-gate cp->cpu_next_lgrp = cp->cpu_prev_lgrp = NULL;
10527c478bd9Sstevel@tonic-gate
10537c478bd9Sstevel@tonic-gate my_lgrp = lgrp_table[lgrpid];
10547c478bd9Sstevel@tonic-gate my_lgrp->lgrp_cpucnt--;
10557c478bd9Sstevel@tonic-gate
10567c478bd9Sstevel@tonic-gate /*
10577c478bd9Sstevel@tonic-gate * Removing last CPU in lgroup, so update lgroup topology
10587c478bd9Sstevel@tonic-gate */
10597c478bd9Sstevel@tonic-gate if (my_lgrp->lgrp_cpucnt == 0) {
10607c478bd9Sstevel@tonic-gate klgrpset_t changed;
10617c478bd9Sstevel@tonic-gate int count;
10627c478bd9Sstevel@tonic-gate int i;
10637c478bd9Sstevel@tonic-gate
10647c478bd9Sstevel@tonic-gate my_lgrp->lgrp_cpu = NULL;
10657c478bd9Sstevel@tonic-gate
10667c478bd9Sstevel@tonic-gate /*
10677c478bd9Sstevel@tonic-gate * Remove this lgroup from its lgroup CPU resources and remove
10687c478bd9Sstevel@tonic-gate * lgroup from lgroup topology if it doesn't have any more
10697c478bd9Sstevel@tonic-gate * resources in it now
10707c478bd9Sstevel@tonic-gate */
10717c478bd9Sstevel@tonic-gate klgrpset_del(my_lgrp->lgrp_set[LGRP_RSRC_CPU], lgrpid);
10727c478bd9Sstevel@tonic-gate if (lgrp_rsets_empty(my_lgrp->lgrp_set)) {
10737c478bd9Sstevel@tonic-gate count = 0;
10747c478bd9Sstevel@tonic-gate klgrpset_clear(changed);
10757c478bd9Sstevel@tonic-gate count += lgrp_leaf_delete(my_lgrp, lgrp_table,
10767c478bd9Sstevel@tonic-gate lgrp_alloc_max + 1, &changed);
10777c478bd9Sstevel@tonic-gate return;
10787c478bd9Sstevel@tonic-gate }
10797c478bd9Sstevel@tonic-gate
10807c478bd9Sstevel@tonic-gate /*
10817c478bd9Sstevel@tonic-gate * This lgroup isn't empty, so just remove it from CPU
10827c478bd9Sstevel@tonic-gate * resources of any lgroups that contain it as such
10837c478bd9Sstevel@tonic-gate */
10847c478bd9Sstevel@tonic-gate for (i = 0; i <= lgrp_alloc_max; i++) {
10857c478bd9Sstevel@tonic-gate lgrp_t *lgrp;
10867c478bd9Sstevel@tonic-gate
10877c478bd9Sstevel@tonic-gate lgrp = lgrp_table[i];
10887c478bd9Sstevel@tonic-gate if (!LGRP_EXISTS(lgrp) ||
10897c478bd9Sstevel@tonic-gate !klgrpset_ismember(lgrp->lgrp_set[LGRP_RSRC_CPU],
10907c478bd9Sstevel@tonic-gate lgrpid))
10917c478bd9Sstevel@tonic-gate continue;
10927c478bd9Sstevel@tonic-gate
10937c478bd9Sstevel@tonic-gate klgrpset_del(lgrp->lgrp_set[LGRP_RSRC_CPU], lgrpid);
10947c478bd9Sstevel@tonic-gate }
10957c478bd9Sstevel@tonic-gate return;
10967c478bd9Sstevel@tonic-gate }
10977c478bd9Sstevel@tonic-gate
10987c478bd9Sstevel@tonic-gate if (my_lgrp->lgrp_cpu == cp)
10997c478bd9Sstevel@tonic-gate my_lgrp->lgrp_cpu = next;
11007c478bd9Sstevel@tonic-gate
11017c478bd9Sstevel@tonic-gate }
11027c478bd9Sstevel@tonic-gate
11037c478bd9Sstevel@tonic-gate /*
11047c478bd9Sstevel@tonic-gate * Update memory nodes in target lgroups and return ones that get changed
11057c478bd9Sstevel@tonic-gate */
11067c478bd9Sstevel@tonic-gate int
lgrp_mnode_update(klgrpset_t target,klgrpset_t * changed)11077c478bd9Sstevel@tonic-gate lgrp_mnode_update(klgrpset_t target, klgrpset_t *changed)
11087c478bd9Sstevel@tonic-gate {
11097c478bd9Sstevel@tonic-gate int count;
11107c478bd9Sstevel@tonic-gate int i;
11117c478bd9Sstevel@tonic-gate int j;
11127c478bd9Sstevel@tonic-gate lgrp_t *lgrp;
11137c478bd9Sstevel@tonic-gate lgrp_t *lgrp_rsrc;
11147c478bd9Sstevel@tonic-gate
11157c478bd9Sstevel@tonic-gate count = 0;
11167c478bd9Sstevel@tonic-gate if (changed)
11177c478bd9Sstevel@tonic-gate klgrpset_clear(*changed);
11187c478bd9Sstevel@tonic-gate
11197c478bd9Sstevel@tonic-gate if (klgrpset_isempty(target))
11207c478bd9Sstevel@tonic-gate return (0);
11217c478bd9Sstevel@tonic-gate
11227c478bd9Sstevel@tonic-gate /*
11237c478bd9Sstevel@tonic-gate * Find each lgroup in target lgroups
11247c478bd9Sstevel@tonic-gate */
11257c478bd9Sstevel@tonic-gate for (i = 0; i <= lgrp_alloc_max; i++) {
11267c478bd9Sstevel@tonic-gate /*
11277c478bd9Sstevel@tonic-gate * Skip any lgroups that don't exist or aren't in target group
11287c478bd9Sstevel@tonic-gate */
11297c478bd9Sstevel@tonic-gate lgrp = lgrp_table[i];
11307c478bd9Sstevel@tonic-gate if (!klgrpset_ismember(target, i) || !LGRP_EXISTS(lgrp)) {
11317c478bd9Sstevel@tonic-gate continue;
11327c478bd9Sstevel@tonic-gate }
11337c478bd9Sstevel@tonic-gate
11347c478bd9Sstevel@tonic-gate /*
11357c478bd9Sstevel@tonic-gate * Initialize memnodes for intermediate lgroups to 0
11367c478bd9Sstevel@tonic-gate * and update them from scratch since they may have completely
11377c478bd9Sstevel@tonic-gate * changed
11387c478bd9Sstevel@tonic-gate */
11397c478bd9Sstevel@tonic-gate if (lgrp->lgrp_childcnt && lgrp != lgrp_root) {
11407c478bd9Sstevel@tonic-gate lgrp->lgrp_mnodes = (mnodeset_t)0;
11417c478bd9Sstevel@tonic-gate lgrp->lgrp_nmnodes = 0;
11427c478bd9Sstevel@tonic-gate }
11437c478bd9Sstevel@tonic-gate
11447c478bd9Sstevel@tonic-gate /*
11457c478bd9Sstevel@tonic-gate * Update memory nodes of of target lgroup with memory nodes
11467c478bd9Sstevel@tonic-gate * from each lgroup in its lgroup memory resource set
11477c478bd9Sstevel@tonic-gate */
11487c478bd9Sstevel@tonic-gate for (j = 0; j <= lgrp_alloc_max; j++) {
11497c478bd9Sstevel@tonic-gate int k;
11507c478bd9Sstevel@tonic-gate
11517c478bd9Sstevel@tonic-gate /*
11527c478bd9Sstevel@tonic-gate * Skip any lgroups that don't exist or aren't in
11537c478bd9Sstevel@tonic-gate * memory resources of target lgroup
11547c478bd9Sstevel@tonic-gate */
11557c478bd9Sstevel@tonic-gate lgrp_rsrc = lgrp_table[j];
11567c478bd9Sstevel@tonic-gate if (!LGRP_EXISTS(lgrp_rsrc) ||
11577c478bd9Sstevel@tonic-gate !klgrpset_ismember(lgrp->lgrp_set[LGRP_RSRC_MEM],
11587c478bd9Sstevel@tonic-gate j))
11597c478bd9Sstevel@tonic-gate continue;
11607c478bd9Sstevel@tonic-gate
11617c478bd9Sstevel@tonic-gate /*
11627c478bd9Sstevel@tonic-gate * Update target lgroup's memnodes to include memnodes
11637c478bd9Sstevel@tonic-gate * of this lgroup
11647c478bd9Sstevel@tonic-gate */
11657c478bd9Sstevel@tonic-gate for (k = 0; k < sizeof (mnodeset_t) * NBBY; k++) {
11667c478bd9Sstevel@tonic-gate mnodeset_t mnode_mask;
11677c478bd9Sstevel@tonic-gate
11687c478bd9Sstevel@tonic-gate mnode_mask = (mnodeset_t)1 << k;
11697c478bd9Sstevel@tonic-gate if ((lgrp_rsrc->lgrp_mnodes & mnode_mask) &&
11707c478bd9Sstevel@tonic-gate !(lgrp->lgrp_mnodes & mnode_mask)) {
11717c478bd9Sstevel@tonic-gate lgrp->lgrp_mnodes |= mnode_mask;
11727c478bd9Sstevel@tonic-gate lgrp->lgrp_nmnodes++;
11737c478bd9Sstevel@tonic-gate }
11747c478bd9Sstevel@tonic-gate }
11757c478bd9Sstevel@tonic-gate count++;
11767c478bd9Sstevel@tonic-gate if (changed)
11777c478bd9Sstevel@tonic-gate klgrpset_add(*changed, lgrp->lgrp_id);
11787c478bd9Sstevel@tonic-gate }
11797c478bd9Sstevel@tonic-gate }
11807c478bd9Sstevel@tonic-gate
11817c478bd9Sstevel@tonic-gate return (count);
11827c478bd9Sstevel@tonic-gate }
11837c478bd9Sstevel@tonic-gate
11847c478bd9Sstevel@tonic-gate /*
11857c478bd9Sstevel@tonic-gate * Memory copy-rename. Called when the "mnode" containing the kernel cage memory
11867c478bd9Sstevel@tonic-gate * is moved from one board to another. The "from" and "to" arguments specify the
11877c478bd9Sstevel@tonic-gate * source and the destination of the move.
11887c478bd9Sstevel@tonic-gate *
11897c478bd9Sstevel@tonic-gate * See plat_lgrp_config() for a detailed description of the copy-rename
11907c478bd9Sstevel@tonic-gate * semantics.
11917c478bd9Sstevel@tonic-gate *
11927c478bd9Sstevel@tonic-gate * The lgrp_mem_rename() is called by the platform copy-rename code to update
11937c478bd9Sstevel@tonic-gate * the lgroup topology which is changing as memory moves from one lgroup to
11947c478bd9Sstevel@tonic-gate * another. It removes the mnode from the source lgroup and re-inserts it in the
11957c478bd9Sstevel@tonic-gate * target lgroup.
11967c478bd9Sstevel@tonic-gate *
11977c478bd9Sstevel@tonic-gate * The lgrp_mem_rename() function passes a flag to lgrp_mem_init() and
11987c478bd9Sstevel@tonic-gate * lgrp_mem_fini() telling that the insertion and deleteion are part of a DR
11997c478bd9Sstevel@tonic-gate * copy-rename operation.
12007c478bd9Sstevel@tonic-gate *
12017c478bd9Sstevel@tonic-gate * There is one case which requires special handling. If the system contains
12027c478bd9Sstevel@tonic-gate * only two boards (mnodes), the lgrp_mem_fini() removes the only mnode from the
12037c478bd9Sstevel@tonic-gate * lgroup hierarchy. This mnode is soon re-inserted back in the hierarchy by
12047c478bd9Sstevel@tonic-gate * lgrp_mem_init), but there is a window when the system has no memory in the
12057c478bd9Sstevel@tonic-gate * lgroup hierarchy. If another thread tries to allocate memory during this
12067c478bd9Sstevel@tonic-gate * window, the allocation will fail, although the system has physical memory.
12077c478bd9Sstevel@tonic-gate * This may cause a system panic or a deadlock (some sleeping memory allocations
12087c478bd9Sstevel@tonic-gate * happen with cpu_lock held which prevents lgrp_mem_init() from re-inserting
12097c478bd9Sstevel@tonic-gate * the mnode back).
12107c478bd9Sstevel@tonic-gate *
12117c478bd9Sstevel@tonic-gate * The lgrp_memnode_choose() function walks the lgroup hierarchy looking for the
12127c478bd9Sstevel@tonic-gate * lgrp with non-empty lgrp_mnodes. To deal with the special case above,
12137c478bd9Sstevel@tonic-gate * lgrp_mem_fini() does not remove the last mnode from the lroot->lgrp_mnodes,
12147c478bd9Sstevel@tonic-gate * but it updates the rest of the lgroup topology as if the mnode was actually
12157c478bd9Sstevel@tonic-gate * removed. The lgrp_mem_init() function recognizes that the mnode being
12167c478bd9Sstevel@tonic-gate * inserted represents such a special case and updates the topology
12177c478bd9Sstevel@tonic-gate * appropriately.
12187c478bd9Sstevel@tonic-gate */
12197c478bd9Sstevel@tonic-gate void
lgrp_mem_rename(int mnode,lgrp_handle_t from,lgrp_handle_t to)12207c478bd9Sstevel@tonic-gate lgrp_mem_rename(int mnode, lgrp_handle_t from, lgrp_handle_t to)
12217c478bd9Sstevel@tonic-gate {
12227c478bd9Sstevel@tonic-gate /*
12237c478bd9Sstevel@tonic-gate * Remove the memory from the source node and add it to the destination
12247c478bd9Sstevel@tonic-gate * node.
12257c478bd9Sstevel@tonic-gate */
12267c478bd9Sstevel@tonic-gate lgrp_mem_fini(mnode, from, B_TRUE);
12277c478bd9Sstevel@tonic-gate lgrp_mem_init(mnode, to, B_TRUE);
12287c478bd9Sstevel@tonic-gate }
12297c478bd9Sstevel@tonic-gate
12307c478bd9Sstevel@tonic-gate /*
12317c478bd9Sstevel@tonic-gate * Called to indicate that the lgrp with platform handle "hand" now
12327c478bd9Sstevel@tonic-gate * contains the memory identified by "mnode".
12337c478bd9Sstevel@tonic-gate *
12347c478bd9Sstevel@tonic-gate * LOCKING for this routine is a bit tricky. Usually it is called without
12357c478bd9Sstevel@tonic-gate * cpu_lock and it must must grab cpu_lock here to prevent racing with other
12367c478bd9Sstevel@tonic-gate * callers. During DR of the board containing the caged memory it may be called
12377c478bd9Sstevel@tonic-gate * with cpu_lock already held and CPUs paused.
12387c478bd9Sstevel@tonic-gate *
12397c478bd9Sstevel@tonic-gate * If the insertion is part of the DR copy-rename and the inserted mnode (and
12407c478bd9Sstevel@tonic-gate * only this mnode) is already present in the lgrp_root->lgrp_mnodes set, we are
12417c478bd9Sstevel@tonic-gate * dealing with the special case of DR copy-rename described in
12427c478bd9Sstevel@tonic-gate * lgrp_mem_rename().
12437c478bd9Sstevel@tonic-gate */
12447c478bd9Sstevel@tonic-gate void
lgrp_mem_init(int mnode,lgrp_handle_t hand,boolean_t is_copy_rename)12457c478bd9Sstevel@tonic-gate lgrp_mem_init(int mnode, lgrp_handle_t hand, boolean_t is_copy_rename)
12467c478bd9Sstevel@tonic-gate {
12477c478bd9Sstevel@tonic-gate klgrpset_t changed;
12487c478bd9Sstevel@tonic-gate int count;
12497c478bd9Sstevel@tonic-gate int i;
12507c478bd9Sstevel@tonic-gate lgrp_t *my_lgrp;
12517c478bd9Sstevel@tonic-gate lgrp_id_t lgrpid;
12527c478bd9Sstevel@tonic-gate mnodeset_t mnodes_mask = ((mnodeset_t)1 << mnode);
12537c478bd9Sstevel@tonic-gate boolean_t drop_lock = B_FALSE;
12547c478bd9Sstevel@tonic-gate boolean_t need_synch = B_FALSE;
12557c478bd9Sstevel@tonic-gate
12567c478bd9Sstevel@tonic-gate /*
12577c478bd9Sstevel@tonic-gate * Grab CPU lock (if we haven't already)
12587c478bd9Sstevel@tonic-gate */
12597c478bd9Sstevel@tonic-gate if (!MUTEX_HELD(&cpu_lock)) {
12607c478bd9Sstevel@tonic-gate mutex_enter(&cpu_lock);
12617c478bd9Sstevel@tonic-gate drop_lock = B_TRUE;
12627c478bd9Sstevel@tonic-gate }
12637c478bd9Sstevel@tonic-gate
12647c478bd9Sstevel@tonic-gate /*
12657c478bd9Sstevel@tonic-gate * This routine may be called from a context where we already
12667c478bd9Sstevel@tonic-gate * hold cpu_lock, and have already paused cpus.
12677c478bd9Sstevel@tonic-gate */
12687c478bd9Sstevel@tonic-gate if (!cpus_paused())
12697c478bd9Sstevel@tonic-gate need_synch = B_TRUE;
12707c478bd9Sstevel@tonic-gate
12717c478bd9Sstevel@tonic-gate /*
12727c478bd9Sstevel@tonic-gate * Check if this mnode is already configured and return immediately if
12737c478bd9Sstevel@tonic-gate * it is.
12747c478bd9Sstevel@tonic-gate *
12757c478bd9Sstevel@tonic-gate * NOTE: in special case of copy-rename of the only remaining mnode,
12767c478bd9Sstevel@tonic-gate * lgrp_mem_fini() refuses to remove the last mnode from the root, so we
12777c478bd9Sstevel@tonic-gate * recognize this case and continue as usual, but skip the update to
12787c478bd9Sstevel@tonic-gate * the lgrp_mnodes and the lgrp_nmnodes. This restores the inconsistency
12797c478bd9Sstevel@tonic-gate * in topology, temporarily introduced by lgrp_mem_fini().
12807c478bd9Sstevel@tonic-gate */
12817c478bd9Sstevel@tonic-gate if (! (is_copy_rename && (lgrp_root->lgrp_mnodes == mnodes_mask)) &&
12827c478bd9Sstevel@tonic-gate lgrp_root->lgrp_mnodes & mnodes_mask) {
12837c478bd9Sstevel@tonic-gate if (drop_lock)
12847c478bd9Sstevel@tonic-gate mutex_exit(&cpu_lock);
12857c478bd9Sstevel@tonic-gate return;
12867c478bd9Sstevel@tonic-gate }
12877c478bd9Sstevel@tonic-gate
12887c478bd9Sstevel@tonic-gate /*
12897c478bd9Sstevel@tonic-gate * Update lgroup topology with new memory resources, keeping track of
12907c478bd9Sstevel@tonic-gate * which lgroups change
12917c478bd9Sstevel@tonic-gate */
12927c478bd9Sstevel@tonic-gate count = 0;
12937c478bd9Sstevel@tonic-gate klgrpset_clear(changed);
12947c478bd9Sstevel@tonic-gate my_lgrp = lgrp_hand_to_lgrp(hand);
12957c478bd9Sstevel@tonic-gate if (my_lgrp == NULL) {
12967c478bd9Sstevel@tonic-gate /* new lgrp */
12977c478bd9Sstevel@tonic-gate my_lgrp = lgrp_create();
12987c478bd9Sstevel@tonic-gate lgrpid = my_lgrp->lgrp_id;
12997c478bd9Sstevel@tonic-gate my_lgrp->lgrp_plathand = hand;
13007c478bd9Sstevel@tonic-gate my_lgrp->lgrp_latency = lgrp_plat_latency(hand, hand);
13017c478bd9Sstevel@tonic-gate klgrpset_add(my_lgrp->lgrp_leaves, lgrpid);
13027c478bd9Sstevel@tonic-gate klgrpset_add(my_lgrp->lgrp_set[LGRP_RSRC_MEM], lgrpid);
13037c478bd9Sstevel@tonic-gate
13047c478bd9Sstevel@tonic-gate if (need_synch)
13050ed5c46eSJosef 'Jeff' Sipek pause_cpus(NULL, NULL);
13067c478bd9Sstevel@tonic-gate count = lgrp_leaf_add(my_lgrp, lgrp_table, lgrp_alloc_max + 1,
13077c478bd9Sstevel@tonic-gate &changed);
13087c478bd9Sstevel@tonic-gate if (need_synch)
13097c478bd9Sstevel@tonic-gate start_cpus();
13107c478bd9Sstevel@tonic-gate } else if (my_lgrp->lgrp_latency == 0 && lgrp_plat_latency(hand, hand)
13117c478bd9Sstevel@tonic-gate > 0) {
13127c478bd9Sstevel@tonic-gate /*
13137c478bd9Sstevel@tonic-gate * Leaf lgroup was created, but latency wasn't available
13147c478bd9Sstevel@tonic-gate * then. So, set latency for it and fill in rest of lgroup
13157c478bd9Sstevel@tonic-gate * topology now that we know how far it is from other leaf
13167c478bd9Sstevel@tonic-gate * lgroups.
13177c478bd9Sstevel@tonic-gate */
13187c478bd9Sstevel@tonic-gate klgrpset_clear(changed);
13197c478bd9Sstevel@tonic-gate lgrpid = my_lgrp->lgrp_id;
13207c478bd9Sstevel@tonic-gate if (!klgrpset_ismember(my_lgrp->lgrp_set[LGRP_RSRC_MEM],
13217c478bd9Sstevel@tonic-gate lgrpid))
13227c478bd9Sstevel@tonic-gate klgrpset_add(my_lgrp->lgrp_set[LGRP_RSRC_MEM], lgrpid);
13237c478bd9Sstevel@tonic-gate if (need_synch)
13240ed5c46eSJosef 'Jeff' Sipek pause_cpus(NULL, NULL);
13257c478bd9Sstevel@tonic-gate count = lgrp_leaf_add(my_lgrp, lgrp_table, lgrp_alloc_max + 1,
13267c478bd9Sstevel@tonic-gate &changed);
13277c478bd9Sstevel@tonic-gate if (need_synch)
13287c478bd9Sstevel@tonic-gate start_cpus();
13297c478bd9Sstevel@tonic-gate } else if (!klgrpset_ismember(my_lgrp->lgrp_set[LGRP_RSRC_MEM],
13307c478bd9Sstevel@tonic-gate my_lgrp->lgrp_id)) {
13312dae3fb5Sjjc /*
13322dae3fb5Sjjc * Add new lgroup memory resource to existing lgroup
13332dae3fb5Sjjc */
13347c478bd9Sstevel@tonic-gate lgrpid = my_lgrp->lgrp_id;
13357c478bd9Sstevel@tonic-gate klgrpset_add(my_lgrp->lgrp_set[LGRP_RSRC_MEM], lgrpid);
13367c478bd9Sstevel@tonic-gate klgrpset_add(changed, lgrpid);
13377c478bd9Sstevel@tonic-gate count++;
13387c478bd9Sstevel@tonic-gate for (i = 0; i <= lgrp_alloc_max; i++) {
13397c478bd9Sstevel@tonic-gate lgrp_t *lgrp;
13407c478bd9Sstevel@tonic-gate
13417c478bd9Sstevel@tonic-gate lgrp = lgrp_table[i];
13427c478bd9Sstevel@tonic-gate if (!LGRP_EXISTS(lgrp) ||
13437c478bd9Sstevel@tonic-gate !lgrp_rsets_member(lgrp->lgrp_set, lgrpid))
13447c478bd9Sstevel@tonic-gate continue;
13457c478bd9Sstevel@tonic-gate
13467c478bd9Sstevel@tonic-gate klgrpset_add(lgrp->lgrp_set[LGRP_RSRC_MEM], lgrpid);
13477c478bd9Sstevel@tonic-gate klgrpset_add(changed, lgrp->lgrp_id);
13487c478bd9Sstevel@tonic-gate count++;
13497c478bd9Sstevel@tonic-gate }
1350584b574aSToomas Soome } else {
1351584b574aSToomas Soome if (drop_lock)
1352584b574aSToomas Soome mutex_exit(&cpu_lock);
1353584b574aSToomas Soome return;
13547c478bd9Sstevel@tonic-gate }
13557c478bd9Sstevel@tonic-gate
13567c478bd9Sstevel@tonic-gate /*
13577c478bd9Sstevel@tonic-gate * Add memory node to lgroup and remove lgroup from ones that need
13587c478bd9Sstevel@tonic-gate * to be updated
13597c478bd9Sstevel@tonic-gate */
13607c478bd9Sstevel@tonic-gate if (!(my_lgrp->lgrp_mnodes & mnodes_mask)) {
13617c478bd9Sstevel@tonic-gate my_lgrp->lgrp_mnodes |= mnodes_mask;
13627c478bd9Sstevel@tonic-gate my_lgrp->lgrp_nmnodes++;
13637c478bd9Sstevel@tonic-gate }
13647c478bd9Sstevel@tonic-gate klgrpset_del(changed, lgrpid);
13657c478bd9Sstevel@tonic-gate
13667c478bd9Sstevel@tonic-gate /*
13677c478bd9Sstevel@tonic-gate * Update memory node information for all lgroups that changed and
13687c478bd9Sstevel@tonic-gate * contain new memory node as a resource
13697c478bd9Sstevel@tonic-gate */
13707c478bd9Sstevel@tonic-gate if (count)
13717c478bd9Sstevel@tonic-gate (void) lgrp_mnode_update(changed, NULL);
13727c478bd9Sstevel@tonic-gate
13737c478bd9Sstevel@tonic-gate if (drop_lock)
13747c478bd9Sstevel@tonic-gate mutex_exit(&cpu_lock);
13757c478bd9Sstevel@tonic-gate }
13767c478bd9Sstevel@tonic-gate
13777c478bd9Sstevel@tonic-gate /*
13787c478bd9Sstevel@tonic-gate * Called to indicate that the lgroup associated with the platform
13797c478bd9Sstevel@tonic-gate * handle "hand" no longer contains given memory node
13807c478bd9Sstevel@tonic-gate *
13817c478bd9Sstevel@tonic-gate * LOCKING for this routine is a bit tricky. Usually it is called without
13827c478bd9Sstevel@tonic-gate * cpu_lock and it must must grab cpu_lock here to prevent racing with other
13837c478bd9Sstevel@tonic-gate * callers. During DR of the board containing the caged memory it may be called
13847c478bd9Sstevel@tonic-gate * with cpu_lock already held and CPUs paused.
13857c478bd9Sstevel@tonic-gate *
13867c478bd9Sstevel@tonic-gate * If the deletion is part of the DR copy-rename and the deleted mnode is the
13877c478bd9Sstevel@tonic-gate * only one present in the lgrp_root->lgrp_mnodes, all the topology is updated,
13887c478bd9Sstevel@tonic-gate * but lgrp_root->lgrp_mnodes is left intact. Later, lgrp_mem_init() will insert
13897c478bd9Sstevel@tonic-gate * the same mnode back into the topology. See lgrp_mem_rename() and
13907c478bd9Sstevel@tonic-gate * lgrp_mem_init() for additional details.
13917c478bd9Sstevel@tonic-gate */
13927c478bd9Sstevel@tonic-gate void
lgrp_mem_fini(int mnode,lgrp_handle_t hand,boolean_t is_copy_rename)13937c478bd9Sstevel@tonic-gate lgrp_mem_fini(int mnode, lgrp_handle_t hand, boolean_t is_copy_rename)
13947c478bd9Sstevel@tonic-gate {
13957c478bd9Sstevel@tonic-gate klgrpset_t changed;
13967c478bd9Sstevel@tonic-gate int count;
13977c478bd9Sstevel@tonic-gate int i;
13987c478bd9Sstevel@tonic-gate lgrp_t *my_lgrp;
13997c478bd9Sstevel@tonic-gate lgrp_id_t lgrpid;
14007c478bd9Sstevel@tonic-gate mnodeset_t mnodes_mask;
14017c478bd9Sstevel@tonic-gate boolean_t drop_lock = B_FALSE;
14027c478bd9Sstevel@tonic-gate boolean_t need_synch = B_FALSE;
14037c478bd9Sstevel@tonic-gate
14047c478bd9Sstevel@tonic-gate /*
14057c478bd9Sstevel@tonic-gate * Grab CPU lock (if we haven't already)
14067c478bd9Sstevel@tonic-gate */
14077c478bd9Sstevel@tonic-gate if (!MUTEX_HELD(&cpu_lock)) {
14087c478bd9Sstevel@tonic-gate mutex_enter(&cpu_lock);
14097c478bd9Sstevel@tonic-gate drop_lock = B_TRUE;
14107c478bd9Sstevel@tonic-gate }
14117c478bd9Sstevel@tonic-gate
14127c478bd9Sstevel@tonic-gate /*
14137c478bd9Sstevel@tonic-gate * This routine may be called from a context where we already
14147c478bd9Sstevel@tonic-gate * hold cpu_lock and have already paused cpus.
14157c478bd9Sstevel@tonic-gate */
14167c478bd9Sstevel@tonic-gate if (!cpus_paused())
14177c478bd9Sstevel@tonic-gate need_synch = B_TRUE;
14187c478bd9Sstevel@tonic-gate
14197c478bd9Sstevel@tonic-gate my_lgrp = lgrp_hand_to_lgrp(hand);
14207c478bd9Sstevel@tonic-gate
14217c478bd9Sstevel@tonic-gate /*
14227c478bd9Sstevel@tonic-gate * The lgrp *must* be pre-existing
14237c478bd9Sstevel@tonic-gate */
14247c478bd9Sstevel@tonic-gate ASSERT(my_lgrp != NULL);
14257c478bd9Sstevel@tonic-gate
14267c478bd9Sstevel@tonic-gate /*
14277c478bd9Sstevel@tonic-gate * Delete memory node from lgroups which contain it
14287c478bd9Sstevel@tonic-gate */
14297c478bd9Sstevel@tonic-gate mnodes_mask = ((mnodeset_t)1 << mnode);
14307c478bd9Sstevel@tonic-gate for (i = 0; i <= lgrp_alloc_max; i++) {
14317c478bd9Sstevel@tonic-gate lgrp_t *lgrp = lgrp_table[i];
14327c478bd9Sstevel@tonic-gate /*
14337c478bd9Sstevel@tonic-gate * Skip any non-existent lgroups and any lgroups that don't
14347c478bd9Sstevel@tonic-gate * contain leaf lgroup of memory as a memory resource
14357c478bd9Sstevel@tonic-gate */
14367c478bd9Sstevel@tonic-gate if (!LGRP_EXISTS(lgrp) ||
14377c478bd9Sstevel@tonic-gate !(lgrp->lgrp_mnodes & mnodes_mask))
14387c478bd9Sstevel@tonic-gate continue;
14397c478bd9Sstevel@tonic-gate
14407c478bd9Sstevel@tonic-gate /*
14417c478bd9Sstevel@tonic-gate * Avoid removing the last mnode from the root in the DR
14427c478bd9Sstevel@tonic-gate * copy-rename case. See lgrp_mem_rename() for details.
14437c478bd9Sstevel@tonic-gate */
14447c478bd9Sstevel@tonic-gate if (is_copy_rename &&
14457c478bd9Sstevel@tonic-gate (lgrp == lgrp_root) && (lgrp->lgrp_mnodes == mnodes_mask))
14467c478bd9Sstevel@tonic-gate continue;
14477c478bd9Sstevel@tonic-gate
14487c478bd9Sstevel@tonic-gate /*
14497c478bd9Sstevel@tonic-gate * Remove memory node from lgroup.
14507c478bd9Sstevel@tonic-gate */
14517c478bd9Sstevel@tonic-gate lgrp->lgrp_mnodes &= ~mnodes_mask;
1452*3df2e8b2SRobert Mustacchi ASSERT(lgrp->lgrp_nmnodes > 0);
14537c478bd9Sstevel@tonic-gate lgrp->lgrp_nmnodes--;
14547c478bd9Sstevel@tonic-gate }
14557c478bd9Sstevel@tonic-gate ASSERT(lgrp_root->lgrp_nmnodes > 0);
14567c478bd9Sstevel@tonic-gate
14577c478bd9Sstevel@tonic-gate /*
14587c478bd9Sstevel@tonic-gate * Don't need to update lgroup topology if this lgroup still has memory.
14597c478bd9Sstevel@tonic-gate *
14607c478bd9Sstevel@tonic-gate * In the special case of DR copy-rename with the only mnode being
14617c478bd9Sstevel@tonic-gate * removed, the lgrp_mnodes for the root is always non-zero, but we
14627c478bd9Sstevel@tonic-gate * still need to update the lgroup topology.
14637c478bd9Sstevel@tonic-gate */
14647c478bd9Sstevel@tonic-gate if ((my_lgrp->lgrp_nmnodes > 0) &&
14656890d023SEric Saxe !(is_copy_rename && (my_lgrp == lgrp_root) &&
14666890d023SEric Saxe (my_lgrp->lgrp_mnodes == mnodes_mask))) {
14677c478bd9Sstevel@tonic-gate if (drop_lock)
14687c478bd9Sstevel@tonic-gate mutex_exit(&cpu_lock);
14697c478bd9Sstevel@tonic-gate return;
14707c478bd9Sstevel@tonic-gate }
14717c478bd9Sstevel@tonic-gate
14727c478bd9Sstevel@tonic-gate /*
14737c478bd9Sstevel@tonic-gate * This lgroup does not contain any memory now
14747c478bd9Sstevel@tonic-gate */
14757c478bd9Sstevel@tonic-gate klgrpset_clear(my_lgrp->lgrp_set[LGRP_RSRC_MEM]);
14767c478bd9Sstevel@tonic-gate
14777c478bd9Sstevel@tonic-gate /*
14787c478bd9Sstevel@tonic-gate * Remove this lgroup from lgroup topology if it does not contain any
14797c478bd9Sstevel@tonic-gate * resources now
14807c478bd9Sstevel@tonic-gate */
14817c478bd9Sstevel@tonic-gate lgrpid = my_lgrp->lgrp_id;
14827c478bd9Sstevel@tonic-gate count = 0;
14837c478bd9Sstevel@tonic-gate klgrpset_clear(changed);
14847c478bd9Sstevel@tonic-gate if (lgrp_rsets_empty(my_lgrp->lgrp_set)) {
14857c478bd9Sstevel@tonic-gate /*
14867c478bd9Sstevel@tonic-gate * Delete lgroup when no more resources
14877c478bd9Sstevel@tonic-gate */
14887c478bd9Sstevel@tonic-gate if (need_synch)
14890ed5c46eSJosef 'Jeff' Sipek pause_cpus(NULL, NULL);
14907c478bd9Sstevel@tonic-gate count = lgrp_leaf_delete(my_lgrp, lgrp_table,
14917c478bd9Sstevel@tonic-gate lgrp_alloc_max + 1, &changed);
14927c478bd9Sstevel@tonic-gate ASSERT(count > 0);
14937c478bd9Sstevel@tonic-gate if (need_synch)
14947c478bd9Sstevel@tonic-gate start_cpus();
14957c478bd9Sstevel@tonic-gate } else {
14967c478bd9Sstevel@tonic-gate /*
14977c478bd9Sstevel@tonic-gate * Remove lgroup from memory resources of any lgroups that
14987c478bd9Sstevel@tonic-gate * contain it as such
14997c478bd9Sstevel@tonic-gate */
15007c478bd9Sstevel@tonic-gate for (i = 0; i <= lgrp_alloc_max; i++) {
15017c478bd9Sstevel@tonic-gate lgrp_t *lgrp;
15027c478bd9Sstevel@tonic-gate
15037c478bd9Sstevel@tonic-gate lgrp = lgrp_table[i];
15047c478bd9Sstevel@tonic-gate if (!LGRP_EXISTS(lgrp) ||
15057c478bd9Sstevel@tonic-gate !klgrpset_ismember(lgrp->lgrp_set[LGRP_RSRC_MEM],
15067c478bd9Sstevel@tonic-gate lgrpid))
15077c478bd9Sstevel@tonic-gate continue;
15087c478bd9Sstevel@tonic-gate
15097c478bd9Sstevel@tonic-gate klgrpset_del(lgrp->lgrp_set[LGRP_RSRC_MEM], lgrpid);
15107c478bd9Sstevel@tonic-gate }
15117c478bd9Sstevel@tonic-gate }
15127c478bd9Sstevel@tonic-gate if (drop_lock)
15137c478bd9Sstevel@tonic-gate mutex_exit(&cpu_lock);
15147c478bd9Sstevel@tonic-gate }
15157c478bd9Sstevel@tonic-gate
15167c478bd9Sstevel@tonic-gate /*
15177c478bd9Sstevel@tonic-gate * Return lgroup with given platform handle
15187c478bd9Sstevel@tonic-gate */
15197c478bd9Sstevel@tonic-gate lgrp_t *
lgrp_hand_to_lgrp(lgrp_handle_t hand)15207c478bd9Sstevel@tonic-gate lgrp_hand_to_lgrp(lgrp_handle_t hand)
15217c478bd9Sstevel@tonic-gate {
15227c478bd9Sstevel@tonic-gate int i;
15237c478bd9Sstevel@tonic-gate lgrp_t *lgrp;
15247c478bd9Sstevel@tonic-gate
15257c478bd9Sstevel@tonic-gate if (hand == LGRP_NULL_HANDLE)
15267c478bd9Sstevel@tonic-gate return (NULL);
15277c478bd9Sstevel@tonic-gate
15287c478bd9Sstevel@tonic-gate for (i = 0; i <= lgrp_alloc_max; i++) {
15297c478bd9Sstevel@tonic-gate lgrp = lgrp_table[i];
15307c478bd9Sstevel@tonic-gate if (LGRP_EXISTS(lgrp) && lgrp->lgrp_plathand == hand)
15317c478bd9Sstevel@tonic-gate return (lgrp);
15327c478bd9Sstevel@tonic-gate }
15337c478bd9Sstevel@tonic-gate return (NULL);
15347c478bd9Sstevel@tonic-gate }
15357c478bd9Sstevel@tonic-gate
15367c478bd9Sstevel@tonic-gate /*
15377c478bd9Sstevel@tonic-gate * Return the home lgroup of the current thread.
15387c478bd9Sstevel@tonic-gate * We must do this with kernel preemption disabled, since we don't want our
15397c478bd9Sstevel@tonic-gate * thread to be re-homed while we're poking around with its lpl, and the lpl
15407c478bd9Sstevel@tonic-gate * should never be NULL.
15417c478bd9Sstevel@tonic-gate *
15427c478bd9Sstevel@tonic-gate * NOTE: Can't guarantee that lgroup will be valid once kernel preemption
15437c478bd9Sstevel@tonic-gate * is enabled because of DR. Callers can use disable kernel preemption
15447c478bd9Sstevel@tonic-gate * around this call to guarantee that the lgroup will be valid beyond this
15457c478bd9Sstevel@tonic-gate * routine, since kernel preemption can be recursive.
15467c478bd9Sstevel@tonic-gate */
15477c478bd9Sstevel@tonic-gate lgrp_t *
lgrp_home_lgrp(void)15487c478bd9Sstevel@tonic-gate lgrp_home_lgrp(void)
15497c478bd9Sstevel@tonic-gate {
15507c478bd9Sstevel@tonic-gate lgrp_t *lgrp;
15517c478bd9Sstevel@tonic-gate lpl_t *lpl;
15527c478bd9Sstevel@tonic-gate
15537c478bd9Sstevel@tonic-gate kpreempt_disable();
15547c478bd9Sstevel@tonic-gate
15557c478bd9Sstevel@tonic-gate lpl = curthread->t_lpl;
15567c478bd9Sstevel@tonic-gate ASSERT(lpl != NULL);
15577c478bd9Sstevel@tonic-gate ASSERT(lpl->lpl_lgrpid >= 0 && lpl->lpl_lgrpid <= lgrp_alloc_max);
15587c478bd9Sstevel@tonic-gate ASSERT(LGRP_EXISTS(lgrp_table[lpl->lpl_lgrpid]));
15597c478bd9Sstevel@tonic-gate lgrp = lgrp_table[lpl->lpl_lgrpid];
15607c478bd9Sstevel@tonic-gate
15617c478bd9Sstevel@tonic-gate kpreempt_enable();
15627c478bd9Sstevel@tonic-gate
15637c478bd9Sstevel@tonic-gate return (lgrp);
15647c478bd9Sstevel@tonic-gate }
15657c478bd9Sstevel@tonic-gate
15667c478bd9Sstevel@tonic-gate /*
15677c478bd9Sstevel@tonic-gate * Return ID of home lgroup for given thread
15687c478bd9Sstevel@tonic-gate * (See comments for lgrp_home_lgrp() for special care and handling
15697c478bd9Sstevel@tonic-gate * instructions)
15707c478bd9Sstevel@tonic-gate */
15717c478bd9Sstevel@tonic-gate lgrp_id_t
lgrp_home_id(kthread_t * t)15727c478bd9Sstevel@tonic-gate lgrp_home_id(kthread_t *t)
15737c478bd9Sstevel@tonic-gate {
15747c478bd9Sstevel@tonic-gate lgrp_id_t lgrp;
15757c478bd9Sstevel@tonic-gate lpl_t *lpl;
15767c478bd9Sstevel@tonic-gate
15777c478bd9Sstevel@tonic-gate ASSERT(t != NULL);
15787c478bd9Sstevel@tonic-gate /*
15797c478bd9Sstevel@tonic-gate * We'd like to ASSERT(MUTEX_HELD(&ttoproc(t)->p_lock)), but we
15807c478bd9Sstevel@tonic-gate * cannot since the HAT layer can call into this routine to
15817c478bd9Sstevel@tonic-gate * determine the locality for its data structures in the context
15827c478bd9Sstevel@tonic-gate * of a page fault.
15837c478bd9Sstevel@tonic-gate */
15847c478bd9Sstevel@tonic-gate
15857c478bd9Sstevel@tonic-gate kpreempt_disable();
15867c478bd9Sstevel@tonic-gate
15877c478bd9Sstevel@tonic-gate lpl = t->t_lpl;
15887c478bd9Sstevel@tonic-gate ASSERT(lpl != NULL);
15897c478bd9Sstevel@tonic-gate ASSERT(lpl->lpl_lgrpid >= 0 && lpl->lpl_lgrpid <= lgrp_alloc_max);
15907c478bd9Sstevel@tonic-gate lgrp = lpl->lpl_lgrpid;
15917c478bd9Sstevel@tonic-gate
15927c478bd9Sstevel@tonic-gate kpreempt_enable();
15937c478bd9Sstevel@tonic-gate
15947c478bd9Sstevel@tonic-gate return (lgrp);
15957c478bd9Sstevel@tonic-gate }
15967c478bd9Sstevel@tonic-gate
15977c478bd9Sstevel@tonic-gate /*
15987c478bd9Sstevel@tonic-gate * Return lgroup containing the physical memory for the given page frame number
15997c478bd9Sstevel@tonic-gate */
16007c478bd9Sstevel@tonic-gate lgrp_t *
lgrp_pfn_to_lgrp(pfn_t pfn)16017c478bd9Sstevel@tonic-gate lgrp_pfn_to_lgrp(pfn_t pfn)
16027c478bd9Sstevel@tonic-gate {
16037c478bd9Sstevel@tonic-gate lgrp_handle_t hand;
16047c478bd9Sstevel@tonic-gate int i;
16057c478bd9Sstevel@tonic-gate lgrp_t *lgrp;
16067c478bd9Sstevel@tonic-gate
16077c478bd9Sstevel@tonic-gate hand = lgrp_plat_pfn_to_hand(pfn);
16087c478bd9Sstevel@tonic-gate if (hand != LGRP_NULL_HANDLE)
16097c478bd9Sstevel@tonic-gate for (i = 0; i <= lgrp_alloc_max; i++) {
16107c478bd9Sstevel@tonic-gate lgrp = lgrp_table[i];
16117c478bd9Sstevel@tonic-gate if (LGRP_EXISTS(lgrp) && lgrp->lgrp_plathand == hand)
16127c478bd9Sstevel@tonic-gate return (lgrp);
16137c478bd9Sstevel@tonic-gate }
16147c478bd9Sstevel@tonic-gate return (NULL);
16157c478bd9Sstevel@tonic-gate }
16167c478bd9Sstevel@tonic-gate
16177c478bd9Sstevel@tonic-gate /*
16187c478bd9Sstevel@tonic-gate * Return lgroup containing the physical memory for the given page frame number
16197c478bd9Sstevel@tonic-gate */
16207c478bd9Sstevel@tonic-gate lgrp_t *
lgrp_phys_to_lgrp(u_longlong_t physaddr)16217c478bd9Sstevel@tonic-gate lgrp_phys_to_lgrp(u_longlong_t physaddr)
16227c478bd9Sstevel@tonic-gate {
16237c478bd9Sstevel@tonic-gate lgrp_handle_t hand;
16247c478bd9Sstevel@tonic-gate int i;
16257c478bd9Sstevel@tonic-gate lgrp_t *lgrp;
16267c478bd9Sstevel@tonic-gate pfn_t pfn;
16277c478bd9Sstevel@tonic-gate
16287c478bd9Sstevel@tonic-gate pfn = btop(physaddr);
16297c478bd9Sstevel@tonic-gate hand = lgrp_plat_pfn_to_hand(pfn);
16307c478bd9Sstevel@tonic-gate if (hand != LGRP_NULL_HANDLE)
16317c478bd9Sstevel@tonic-gate for (i = 0; i <= lgrp_alloc_max; i++) {
16327c478bd9Sstevel@tonic-gate lgrp = lgrp_table[i];
16337c478bd9Sstevel@tonic-gate if (LGRP_EXISTS(lgrp) && lgrp->lgrp_plathand == hand)
16347c478bd9Sstevel@tonic-gate return (lgrp);
16357c478bd9Sstevel@tonic-gate }
16367c478bd9Sstevel@tonic-gate return (NULL);
16377c478bd9Sstevel@tonic-gate }
16387c478bd9Sstevel@tonic-gate
16397c478bd9Sstevel@tonic-gate /*
16407c478bd9Sstevel@tonic-gate * Return the leaf lgroup containing the given CPU
1641394b433dSesaxe *
1642394b433dSesaxe * The caller needs to take precautions necessary to prevent
1643fb2f18f8Sesaxe * "cpu", and it's lpl from going away across a call to this function.
1644394b433dSesaxe * hint: kpreempt_disable()/kpreempt_enable()
16457c478bd9Sstevel@tonic-gate */
16467c478bd9Sstevel@tonic-gate static lgrp_t *
lgrp_cpu_to_lgrp(cpu_t * cpu)16477c478bd9Sstevel@tonic-gate lgrp_cpu_to_lgrp(cpu_t *cpu)
16487c478bd9Sstevel@tonic-gate {
1649ab761399Sesaxe return (cpu->cpu_lpl->lpl_lgrp);
16507c478bd9Sstevel@tonic-gate }
16517c478bd9Sstevel@tonic-gate
16527c478bd9Sstevel@tonic-gate /*
16537c478bd9Sstevel@tonic-gate * Return the sum of the partition loads in an lgrp divided by
16547c478bd9Sstevel@tonic-gate * the number of CPUs in the lgrp. This is our best approximation
16557c478bd9Sstevel@tonic-gate * of an 'lgroup load average' for a useful per-lgroup kstat.
16567c478bd9Sstevel@tonic-gate */
16577c478bd9Sstevel@tonic-gate static uint64_t
lgrp_sum_loadavgs(lgrp_t * lgrp)16587c478bd9Sstevel@tonic-gate lgrp_sum_loadavgs(lgrp_t *lgrp)
16597c478bd9Sstevel@tonic-gate {
16607c478bd9Sstevel@tonic-gate cpu_t *cpu;
16617c478bd9Sstevel@tonic-gate int ncpu;
16627c478bd9Sstevel@tonic-gate uint64_t loads = 0;
16637c478bd9Sstevel@tonic-gate
16647c478bd9Sstevel@tonic-gate mutex_enter(&cpu_lock);
16657c478bd9Sstevel@tonic-gate
16667c478bd9Sstevel@tonic-gate cpu = lgrp->lgrp_cpu;
16677c478bd9Sstevel@tonic-gate ncpu = lgrp->lgrp_cpucnt;
16687c478bd9Sstevel@tonic-gate
16697c478bd9Sstevel@tonic-gate if (cpu == NULL || ncpu == 0) {
16707c478bd9Sstevel@tonic-gate mutex_exit(&cpu_lock);
16717c478bd9Sstevel@tonic-gate return (0ull);
16727c478bd9Sstevel@tonic-gate }
16737c478bd9Sstevel@tonic-gate
16747c478bd9Sstevel@tonic-gate do {
16757c478bd9Sstevel@tonic-gate loads += cpu->cpu_lpl->lpl_loadavg;
16767c478bd9Sstevel@tonic-gate cpu = cpu->cpu_next_lgrp;
16777c478bd9Sstevel@tonic-gate } while (cpu != lgrp->lgrp_cpu);
16787c478bd9Sstevel@tonic-gate
16797c478bd9Sstevel@tonic-gate mutex_exit(&cpu_lock);
16807c478bd9Sstevel@tonic-gate
16817c478bd9Sstevel@tonic-gate return (loads / ncpu);
16827c478bd9Sstevel@tonic-gate }
16837c478bd9Sstevel@tonic-gate
16847c478bd9Sstevel@tonic-gate void
lgrp_stat_add(lgrp_id_t lgrpid,lgrp_stat_t stat,int64_t val)16857c478bd9Sstevel@tonic-gate lgrp_stat_add(lgrp_id_t lgrpid, lgrp_stat_t stat, int64_t val)
16867c478bd9Sstevel@tonic-gate {
16877c478bd9Sstevel@tonic-gate struct lgrp_stats *pstats;
16887c478bd9Sstevel@tonic-gate
16897c478bd9Sstevel@tonic-gate /*
16907c478bd9Sstevel@tonic-gate * Verify that the caller isn't trying to add to
16917c478bd9Sstevel@tonic-gate * a statistic for an lgroup that has gone away
16927c478bd9Sstevel@tonic-gate */
16937c478bd9Sstevel@tonic-gate if (lgrpid < 0 || lgrpid > lgrp_alloc_max)
16947c478bd9Sstevel@tonic-gate return;
16957c478bd9Sstevel@tonic-gate
16967c478bd9Sstevel@tonic-gate pstats = &lgrp_stats[lgrpid];
16977c478bd9Sstevel@tonic-gate atomic_add_64((uint64_t *)LGRP_STAT_WRITE_PTR(pstats, stat), val);
16987c478bd9Sstevel@tonic-gate }
16997c478bd9Sstevel@tonic-gate
17007c478bd9Sstevel@tonic-gate int64_t
lgrp_stat_read(lgrp_id_t lgrpid,lgrp_stat_t stat)17017c478bd9Sstevel@tonic-gate lgrp_stat_read(lgrp_id_t lgrpid, lgrp_stat_t stat)
17027c478bd9Sstevel@tonic-gate {
17037c478bd9Sstevel@tonic-gate uint64_t val;
17047c478bd9Sstevel@tonic-gate struct lgrp_stats *pstats;
17057c478bd9Sstevel@tonic-gate
17067c478bd9Sstevel@tonic-gate if (lgrpid < 0 || lgrpid > lgrp_alloc_max)
17077c478bd9Sstevel@tonic-gate return ((int64_t)0);
17087c478bd9Sstevel@tonic-gate
17097c478bd9Sstevel@tonic-gate pstats = &lgrp_stats[lgrpid];
17107c478bd9Sstevel@tonic-gate LGRP_STAT_READ(pstats, stat, val);
17117c478bd9Sstevel@tonic-gate return (val);
17127c478bd9Sstevel@tonic-gate }
17137c478bd9Sstevel@tonic-gate
17147c478bd9Sstevel@tonic-gate /*
17157c478bd9Sstevel@tonic-gate * Reset all kstats for lgrp specified by its lgrpid.
17167c478bd9Sstevel@tonic-gate */
17177c478bd9Sstevel@tonic-gate static void
lgrp_kstat_reset(lgrp_id_t lgrpid)17187c478bd9Sstevel@tonic-gate lgrp_kstat_reset(lgrp_id_t lgrpid)
17197c478bd9Sstevel@tonic-gate {
17207c478bd9Sstevel@tonic-gate lgrp_stat_t stat;
17217c478bd9Sstevel@tonic-gate
17227c478bd9Sstevel@tonic-gate if (lgrpid < 0 || lgrpid > lgrp_alloc_max)
17237c478bd9Sstevel@tonic-gate return;
17247c478bd9Sstevel@tonic-gate
17257c478bd9Sstevel@tonic-gate for (stat = 0; stat < LGRP_NUM_COUNTER_STATS; stat++) {
17267c478bd9Sstevel@tonic-gate LGRP_STAT_RESET(&lgrp_stats[lgrpid], stat);
17277c478bd9Sstevel@tonic-gate }
17287c478bd9Sstevel@tonic-gate }
17297c478bd9Sstevel@tonic-gate
17307c478bd9Sstevel@tonic-gate /*
17317c478bd9Sstevel@tonic-gate * Collect all per-lgrp statistics for the lgrp associated with this
17327c478bd9Sstevel@tonic-gate * kstat, and store them in the ks_data array.
17337c478bd9Sstevel@tonic-gate *
17347c478bd9Sstevel@tonic-gate * The superuser can reset all the running counter statistics for an
17357c478bd9Sstevel@tonic-gate * lgrp by writing to any of the lgrp's stats.
17367c478bd9Sstevel@tonic-gate */
17377c478bd9Sstevel@tonic-gate static int
lgrp_kstat_extract(kstat_t * ksp,int rw)17387c478bd9Sstevel@tonic-gate lgrp_kstat_extract(kstat_t *ksp, int rw)
17397c478bd9Sstevel@tonic-gate {
17407c478bd9Sstevel@tonic-gate lgrp_stat_t stat;
17417c478bd9Sstevel@tonic-gate struct kstat_named *ksd;
17427c478bd9Sstevel@tonic-gate lgrp_t *lgrp;
17437c478bd9Sstevel@tonic-gate lgrp_id_t lgrpid;
17447c478bd9Sstevel@tonic-gate
17457c478bd9Sstevel@tonic-gate lgrp = (lgrp_t *)ksp->ks_private;
17467c478bd9Sstevel@tonic-gate
17477c478bd9Sstevel@tonic-gate ksd = (struct kstat_named *)ksp->ks_data;
17487c478bd9Sstevel@tonic-gate ASSERT(ksd == (struct kstat_named *)&lgrp_kstat_data);
17497c478bd9Sstevel@tonic-gate
17507c478bd9Sstevel@tonic-gate lgrpid = lgrp->lgrp_id;
17517c478bd9Sstevel@tonic-gate
17527c478bd9Sstevel@tonic-gate if (lgrpid == LGRP_NONE) {
17537c478bd9Sstevel@tonic-gate /*
17547c478bd9Sstevel@tonic-gate * Return all zeroes as stats for freed lgrp.
17557c478bd9Sstevel@tonic-gate */
17567c478bd9Sstevel@tonic-gate for (stat = 0; stat < LGRP_NUM_COUNTER_STATS; stat++) {
17577c478bd9Sstevel@tonic-gate ksd[stat].value.i64 = 0;
17587c478bd9Sstevel@tonic-gate }
17597c478bd9Sstevel@tonic-gate ksd[stat + LGRP_NUM_CPUS].value.i64 = 0;
17607c478bd9Sstevel@tonic-gate ksd[stat + LGRP_NUM_PG_INSTALL].value.i64 = 0;
17617c478bd9Sstevel@tonic-gate ksd[stat + LGRP_NUM_PG_AVAIL].value.i64 = 0;
17627c478bd9Sstevel@tonic-gate ksd[stat + LGRP_NUM_PG_FREE].value.i64 = 0;
17637c478bd9Sstevel@tonic-gate ksd[stat + LGRP_LOADAVG].value.i64 = 0;
17647c478bd9Sstevel@tonic-gate } else if (rw != KSTAT_WRITE) {
17657c478bd9Sstevel@tonic-gate /*
17667c478bd9Sstevel@tonic-gate * Handle counter stats
17677c478bd9Sstevel@tonic-gate */
17687c478bd9Sstevel@tonic-gate for (stat = 0; stat < LGRP_NUM_COUNTER_STATS; stat++) {
17697c478bd9Sstevel@tonic-gate ksd[stat].value.i64 = lgrp_stat_read(lgrpid, stat);
17707c478bd9Sstevel@tonic-gate }
17717c478bd9Sstevel@tonic-gate
17727c478bd9Sstevel@tonic-gate /*
17737c478bd9Sstevel@tonic-gate * Handle kernel data snapshot stats
17747c478bd9Sstevel@tonic-gate */
17757c478bd9Sstevel@tonic-gate ksd[stat + LGRP_NUM_CPUS].value.i64 = lgrp->lgrp_cpucnt;
17767c478bd9Sstevel@tonic-gate ksd[stat + LGRP_NUM_PG_INSTALL].value.i64 =
17777c478bd9Sstevel@tonic-gate lgrp_mem_size(lgrpid, LGRP_MEM_SIZE_INSTALL);
17787c478bd9Sstevel@tonic-gate ksd[stat + LGRP_NUM_PG_AVAIL].value.i64 =
17797c478bd9Sstevel@tonic-gate lgrp_mem_size(lgrpid, LGRP_MEM_SIZE_AVAIL);
17807c478bd9Sstevel@tonic-gate ksd[stat + LGRP_NUM_PG_FREE].value.i64 =
17817c478bd9Sstevel@tonic-gate lgrp_mem_size(lgrpid, LGRP_MEM_SIZE_FREE);
17827c478bd9Sstevel@tonic-gate ksd[stat + LGRP_LOADAVG].value.i64 = lgrp_sum_loadavgs(lgrp);
1783c6402783Sakolb ksd[stat + LGRP_LOADAVG_SCALE].value.i64 =
1784c6402783Sakolb lgrp_loadavg_max_effect;
17857c478bd9Sstevel@tonic-gate } else {
17867c478bd9Sstevel@tonic-gate lgrp_kstat_reset(lgrpid);
17877c478bd9Sstevel@tonic-gate }
17887c478bd9Sstevel@tonic-gate
17897c478bd9Sstevel@tonic-gate return (0);
17907c478bd9Sstevel@tonic-gate }
17917c478bd9Sstevel@tonic-gate
17927c478bd9Sstevel@tonic-gate int
lgrp_query_cpu(processorid_t id,lgrp_id_t * lp)17937c478bd9Sstevel@tonic-gate lgrp_query_cpu(processorid_t id, lgrp_id_t *lp)
17947c478bd9Sstevel@tonic-gate {
17957c478bd9Sstevel@tonic-gate cpu_t *cp;
17967c478bd9Sstevel@tonic-gate
17977c478bd9Sstevel@tonic-gate mutex_enter(&cpu_lock);
17987c478bd9Sstevel@tonic-gate
17997c478bd9Sstevel@tonic-gate if ((cp = cpu_get(id)) == NULL) {
18007c478bd9Sstevel@tonic-gate mutex_exit(&cpu_lock);
18017c478bd9Sstevel@tonic-gate return (EINVAL);
18027c478bd9Sstevel@tonic-gate }
18037c478bd9Sstevel@tonic-gate
18047c478bd9Sstevel@tonic-gate if (cpu_is_offline(cp) || cpu_is_poweredoff(cp)) {
18057c478bd9Sstevel@tonic-gate mutex_exit(&cpu_lock);
18067c478bd9Sstevel@tonic-gate return (EINVAL);
18077c478bd9Sstevel@tonic-gate }
18087c478bd9Sstevel@tonic-gate
18097c478bd9Sstevel@tonic-gate ASSERT(cp->cpu_lpl != NULL);
18107c478bd9Sstevel@tonic-gate
18117c478bd9Sstevel@tonic-gate *lp = cp->cpu_lpl->lpl_lgrpid;
18127c478bd9Sstevel@tonic-gate
18137c478bd9Sstevel@tonic-gate mutex_exit(&cpu_lock);
18147c478bd9Sstevel@tonic-gate
18157c478bd9Sstevel@tonic-gate return (0);
18167c478bd9Sstevel@tonic-gate }
18177c478bd9Sstevel@tonic-gate
18187c478bd9Sstevel@tonic-gate int
lgrp_query_load(processorid_t id,lgrp_load_t * lp)18197c478bd9Sstevel@tonic-gate lgrp_query_load(processorid_t id, lgrp_load_t *lp)
18207c478bd9Sstevel@tonic-gate {
18217c478bd9Sstevel@tonic-gate cpu_t *cp;
18227c478bd9Sstevel@tonic-gate
18237c478bd9Sstevel@tonic-gate mutex_enter(&cpu_lock);
18247c478bd9Sstevel@tonic-gate
18257c478bd9Sstevel@tonic-gate if ((cp = cpu_get(id)) == NULL) {
18267c478bd9Sstevel@tonic-gate mutex_exit(&cpu_lock);
18277c478bd9Sstevel@tonic-gate return (EINVAL);
18287c478bd9Sstevel@tonic-gate }
18297c478bd9Sstevel@tonic-gate
18307c478bd9Sstevel@tonic-gate ASSERT(cp->cpu_lpl != NULL);
18317c478bd9Sstevel@tonic-gate
18327c478bd9Sstevel@tonic-gate *lp = cp->cpu_lpl->lpl_loadavg;
18337c478bd9Sstevel@tonic-gate
18347c478bd9Sstevel@tonic-gate mutex_exit(&cpu_lock);
18357c478bd9Sstevel@tonic-gate
18367c478bd9Sstevel@tonic-gate return (0);
18377c478bd9Sstevel@tonic-gate }
18387c478bd9Sstevel@tonic-gate
18397c478bd9Sstevel@tonic-gate /*
18407c478bd9Sstevel@tonic-gate * Add a resource named by lpl_leaf to rset of lpl_target
18417c478bd9Sstevel@tonic-gate *
18427c478bd9Sstevel@tonic-gate * This routine also adjusts ncpu and nrset if the call succeeds in adding a
18437c478bd9Sstevel@tonic-gate * resource. It is adjusted here, as this is presently the only place that we
18447c478bd9Sstevel@tonic-gate * can be certain a resource addition has succeeded.
18457c478bd9Sstevel@tonic-gate *
18467c478bd9Sstevel@tonic-gate * We keep the list of rsets sorted so that the dispatcher can quickly walk the
18477c478bd9Sstevel@tonic-gate * list in order until it reaches a NULL. (This list is required to be NULL
18487c478bd9Sstevel@tonic-gate * terminated, too). This is done so that we can mark start pos + 1, so that
18497c478bd9Sstevel@tonic-gate * each lpl is traversed sequentially, but in a different order. We hope this
18507c478bd9Sstevel@tonic-gate * will improve performance a bit. (Hopefully, less read-to-own traffic...)
18517c478bd9Sstevel@tonic-gate */
18527c478bd9Sstevel@tonic-gate
18537c478bd9Sstevel@tonic-gate void
lpl_rset_add(lpl_t * lpl_target,lpl_t * lpl_leaf)18547c478bd9Sstevel@tonic-gate lpl_rset_add(lpl_t *lpl_target, lpl_t *lpl_leaf)
18557c478bd9Sstevel@tonic-gate {
18567c478bd9Sstevel@tonic-gate int i;
18577c478bd9Sstevel@tonic-gate int entry_slot = 0;
18587c478bd9Sstevel@tonic-gate
18597c478bd9Sstevel@tonic-gate /* return if leaf is already present */
18607c478bd9Sstevel@tonic-gate for (i = 0; i < lpl_target->lpl_nrset; i++) {
18617c478bd9Sstevel@tonic-gate if (lpl_target->lpl_rset[i] == lpl_leaf) {
18627c478bd9Sstevel@tonic-gate return;
18637c478bd9Sstevel@tonic-gate }
18647c478bd9Sstevel@tonic-gate
18657c478bd9Sstevel@tonic-gate if (lpl_target->lpl_rset[i]->lpl_lgrpid >
18667c478bd9Sstevel@tonic-gate lpl_leaf->lpl_lgrpid) {
18677c478bd9Sstevel@tonic-gate break;
18687c478bd9Sstevel@tonic-gate }
18697c478bd9Sstevel@tonic-gate }
18707c478bd9Sstevel@tonic-gate
18717c478bd9Sstevel@tonic-gate /* insert leaf, update counts */
18727c478bd9Sstevel@tonic-gate entry_slot = i;
18737c478bd9Sstevel@tonic-gate i = lpl_target->lpl_nrset++;
18747c478bd9Sstevel@tonic-gate
18757c478bd9Sstevel@tonic-gate /*
18767c478bd9Sstevel@tonic-gate * Start at the end of the rset array and work backwards towards the
18777c478bd9Sstevel@tonic-gate * slot into which the new lpl will be inserted. This effectively
18787c478bd9Sstevel@tonic-gate * preserves the current ordering by scooting everybody over one entry,
18797c478bd9Sstevel@tonic-gate * and placing the new entry into the space created.
18807c478bd9Sstevel@tonic-gate */
18817c478bd9Sstevel@tonic-gate while (i-- > entry_slot) {
18827c478bd9Sstevel@tonic-gate lpl_target->lpl_rset[i + 1] = lpl_target->lpl_rset[i];
18836890d023SEric Saxe lpl_target->lpl_id2rset[lpl_target->lpl_rset[i]->lpl_lgrpid] =
18846890d023SEric Saxe i + 1;
18857c478bd9Sstevel@tonic-gate }
18867c478bd9Sstevel@tonic-gate
18877c478bd9Sstevel@tonic-gate lpl_target->lpl_rset[entry_slot] = lpl_leaf;
18886890d023SEric Saxe lpl_target->lpl_id2rset[lpl_leaf->lpl_lgrpid] = entry_slot;
18896890d023SEric Saxe
18907c478bd9Sstevel@tonic-gate lpl_target->lpl_ncpu += lpl_leaf->lpl_ncpu;
18917c478bd9Sstevel@tonic-gate }
18927c478bd9Sstevel@tonic-gate
18937c478bd9Sstevel@tonic-gate /*
18946890d023SEric Saxe * Update each of lpl_parent's children with a reference to their parent.
18957c478bd9Sstevel@tonic-gate * The lgrp topology is used as the reference since it is fully
18967c478bd9Sstevel@tonic-gate * consistent and correct at this point.
18977c478bd9Sstevel@tonic-gate * This should be called after any potential change in lpl_parent's
18987c478bd9Sstevel@tonic-gate * rset.
18997c478bd9Sstevel@tonic-gate */
19007c478bd9Sstevel@tonic-gate static void
lpl_child_update(lpl_t * lpl_parent,struct cpupart * cp)19017c478bd9Sstevel@tonic-gate lpl_child_update(lpl_t *lpl_parent, struct cpupart *cp)
19027c478bd9Sstevel@tonic-gate {
19036890d023SEric Saxe klgrpset_t children;
19046890d023SEric Saxe int i;
19057c478bd9Sstevel@tonic-gate
19067c478bd9Sstevel@tonic-gate children = lgrp_table[lpl_parent->lpl_lgrpid]->lgrp_children;
19077c478bd9Sstevel@tonic-gate if (klgrpset_isempty(children))
19087c478bd9Sstevel@tonic-gate return; /* nothing to do */
19097c478bd9Sstevel@tonic-gate
19107c478bd9Sstevel@tonic-gate for (i = 0; i <= lgrp_alloc_max; i++) {
19117c478bd9Sstevel@tonic-gate if (klgrpset_ismember(children, i)) {
19127c478bd9Sstevel@tonic-gate /*
19137c478bd9Sstevel@tonic-gate * (Re)set the parent. It may be incorrect if
19147c478bd9Sstevel@tonic-gate * lpl_parent is new in the topology.
19157c478bd9Sstevel@tonic-gate */
19167c478bd9Sstevel@tonic-gate cp->cp_lgrploads[i].lpl_parent = lpl_parent;
19177c478bd9Sstevel@tonic-gate }
19187c478bd9Sstevel@tonic-gate }
19197c478bd9Sstevel@tonic-gate }
19207c478bd9Sstevel@tonic-gate
19217c478bd9Sstevel@tonic-gate /*
19227c478bd9Sstevel@tonic-gate * Delete resource lpl_leaf from rset of lpl_target, assuming it's there.
19237c478bd9Sstevel@tonic-gate *
19247c478bd9Sstevel@tonic-gate * This routine also adjusts ncpu and nrset if the call succeeds in deleting a
19257c478bd9Sstevel@tonic-gate * resource. The values are adjusted here, as this is the only place that we can
19267c478bd9Sstevel@tonic-gate * be certain a resource was successfully deleted.
19277c478bd9Sstevel@tonic-gate */
19287c478bd9Sstevel@tonic-gate void
lpl_rset_del(lpl_t * lpl_target,lpl_t * lpl_leaf)19297c478bd9Sstevel@tonic-gate lpl_rset_del(lpl_t *lpl_target, lpl_t *lpl_leaf)
19307c478bd9Sstevel@tonic-gate {
19317c478bd9Sstevel@tonic-gate int i;
19326890d023SEric Saxe lpl_t *leaf;
19336890d023SEric Saxe
19346890d023SEric Saxe if (lpl_target->lpl_nrset == 0)
19356890d023SEric Saxe return;
19367c478bd9Sstevel@tonic-gate
19377c478bd9Sstevel@tonic-gate /* find leaf in intermediate node */
19387c478bd9Sstevel@tonic-gate for (i = 0; i < lpl_target->lpl_nrset; i++) {
19397c478bd9Sstevel@tonic-gate if (lpl_target->lpl_rset[i] == lpl_leaf)
19407c478bd9Sstevel@tonic-gate break;
19417c478bd9Sstevel@tonic-gate }
19427c478bd9Sstevel@tonic-gate
19437c478bd9Sstevel@tonic-gate /* return if leaf not found */
19447c478bd9Sstevel@tonic-gate if (lpl_target->lpl_rset[i] != lpl_leaf)
19457c478bd9Sstevel@tonic-gate return;
19467c478bd9Sstevel@tonic-gate
19477c478bd9Sstevel@tonic-gate /* prune leaf, compress array */
19487c478bd9Sstevel@tonic-gate lpl_target->lpl_rset[lpl_target->lpl_nrset--] = NULL;
19496890d023SEric Saxe lpl_target->lpl_id2rset[lpl_leaf->lpl_lgrpid] = -1;
19507c478bd9Sstevel@tonic-gate lpl_target->lpl_ncpu--;
19517c478bd9Sstevel@tonic-gate do {
19527c478bd9Sstevel@tonic-gate lpl_target->lpl_rset[i] = lpl_target->lpl_rset[i + 1];
19536890d023SEric Saxe /*
19546890d023SEric Saxe * Update the lgrp id <=> rset mapping
19556890d023SEric Saxe */
19566890d023SEric Saxe if ((leaf = lpl_target->lpl_rset[i]) != NULL) {
19576890d023SEric Saxe lpl_target->lpl_id2rset[leaf->lpl_lgrpid] = i;
19586890d023SEric Saxe }
19597c478bd9Sstevel@tonic-gate } while (i++ < lpl_target->lpl_nrset);
19607c478bd9Sstevel@tonic-gate }
19617c478bd9Sstevel@tonic-gate
19627c478bd9Sstevel@tonic-gate /*
19637c478bd9Sstevel@tonic-gate * Check to see if the resource set of the target lpl contains the
19647c478bd9Sstevel@tonic-gate * supplied leaf lpl. This returns 1 if the lpl is found, 0 if it is not.
19657c478bd9Sstevel@tonic-gate */
19667c478bd9Sstevel@tonic-gate
19677c478bd9Sstevel@tonic-gate int
lpl_rset_contains(lpl_t * lpl_target,lpl_t * lpl_leaf)19687c478bd9Sstevel@tonic-gate lpl_rset_contains(lpl_t *lpl_target, lpl_t *lpl_leaf)
19697c478bd9Sstevel@tonic-gate {
19707c478bd9Sstevel@tonic-gate int i;
19717c478bd9Sstevel@tonic-gate
19727c478bd9Sstevel@tonic-gate for (i = 0; i < lpl_target->lpl_nrset; i++) {
19737c478bd9Sstevel@tonic-gate if (lpl_target->lpl_rset[i] == lpl_leaf)
19747c478bd9Sstevel@tonic-gate return (1);
19757c478bd9Sstevel@tonic-gate }
19767c478bd9Sstevel@tonic-gate
19777c478bd9Sstevel@tonic-gate return (0);
19787c478bd9Sstevel@tonic-gate }
19797c478bd9Sstevel@tonic-gate
19807c478bd9Sstevel@tonic-gate /*
19817c478bd9Sstevel@tonic-gate * Called when we change cpu lpl membership. This increments or decrements the
19827c478bd9Sstevel@tonic-gate * per-cpu counter in every lpl in which our leaf appears.
19837c478bd9Sstevel@tonic-gate */
19847c478bd9Sstevel@tonic-gate void
lpl_cpu_adjcnt(lpl_act_t act,cpu_t * cp)19857c478bd9Sstevel@tonic-gate lpl_cpu_adjcnt(lpl_act_t act, cpu_t *cp)
19867c478bd9Sstevel@tonic-gate {
19877c478bd9Sstevel@tonic-gate cpupart_t *cpupart;
19887c478bd9Sstevel@tonic-gate lgrp_t *lgrp_leaf;
19897c478bd9Sstevel@tonic-gate lgrp_t *lgrp_cur;
19907c478bd9Sstevel@tonic-gate lpl_t *lpl_leaf;
19917c478bd9Sstevel@tonic-gate lpl_t *lpl_cur;
19927c478bd9Sstevel@tonic-gate int i;
19937c478bd9Sstevel@tonic-gate
19947c478bd9Sstevel@tonic-gate ASSERT(act == LPL_DECREMENT || act == LPL_INCREMENT);
19957c478bd9Sstevel@tonic-gate
19967c478bd9Sstevel@tonic-gate cpupart = cp->cpu_part;
19977c478bd9Sstevel@tonic-gate lpl_leaf = cp->cpu_lpl;
19987c478bd9Sstevel@tonic-gate lgrp_leaf = lgrp_table[lpl_leaf->lpl_lgrpid];
19997c478bd9Sstevel@tonic-gate
20007c478bd9Sstevel@tonic-gate for (i = 0; i <= lgrp_alloc_max; i++) {
20017c478bd9Sstevel@tonic-gate lgrp_cur = lgrp_table[i];
20027c478bd9Sstevel@tonic-gate
20037c478bd9Sstevel@tonic-gate /*
20047c478bd9Sstevel@tonic-gate * Don't adjust if the lgrp isn't there, if we're the leaf lpl
20057c478bd9Sstevel@tonic-gate * for the cpu in question, or if the current lgrp and leaf
20067c478bd9Sstevel@tonic-gate * don't share the same resources.
20077c478bd9Sstevel@tonic-gate */
20087c478bd9Sstevel@tonic-gate
20097c478bd9Sstevel@tonic-gate if (!LGRP_EXISTS(lgrp_cur) || (lgrp_cur == lgrp_leaf) ||
20107c478bd9Sstevel@tonic-gate !klgrpset_intersects(lgrp_leaf->lgrp_set[LGRP_RSRC_CPU],
20117c478bd9Sstevel@tonic-gate lgrp_cur->lgrp_set[LGRP_RSRC_CPU]))
20127c478bd9Sstevel@tonic-gate continue;
20137c478bd9Sstevel@tonic-gate
20147c478bd9Sstevel@tonic-gate
20157c478bd9Sstevel@tonic-gate lpl_cur = &cpupart->cp_lgrploads[lgrp_cur->lgrp_id];
20167c478bd9Sstevel@tonic-gate
20177c478bd9Sstevel@tonic-gate if (lpl_cur->lpl_nrset > 0) {
20187c478bd9Sstevel@tonic-gate if (act == LPL_INCREMENT) {
20197c478bd9Sstevel@tonic-gate lpl_cur->lpl_ncpu++;
20207c478bd9Sstevel@tonic-gate } else if (act == LPL_DECREMENT) {
20217c478bd9Sstevel@tonic-gate lpl_cur->lpl_ncpu--;
20227c478bd9Sstevel@tonic-gate }
20237c478bd9Sstevel@tonic-gate }
20247c478bd9Sstevel@tonic-gate }
20257c478bd9Sstevel@tonic-gate }
20267c478bd9Sstevel@tonic-gate
20277c478bd9Sstevel@tonic-gate /*
20287c478bd9Sstevel@tonic-gate * Initialize lpl with given resources and specified lgrp
20297c478bd9Sstevel@tonic-gate */
20307c478bd9Sstevel@tonic-gate void
lpl_init(lpl_t * lpl,lpl_t * lpl_leaf,lgrp_t * lgrp)20317c478bd9Sstevel@tonic-gate lpl_init(lpl_t *lpl, lpl_t *lpl_leaf, lgrp_t *lgrp)
20327c478bd9Sstevel@tonic-gate {
20337c478bd9Sstevel@tonic-gate lpl->lpl_lgrpid = lgrp->lgrp_id;
20347c478bd9Sstevel@tonic-gate lpl->lpl_loadavg = 0;
20357c478bd9Sstevel@tonic-gate if (lpl == lpl_leaf)
20367c478bd9Sstevel@tonic-gate lpl->lpl_ncpu = 1;
20377c478bd9Sstevel@tonic-gate else
20387c478bd9Sstevel@tonic-gate lpl->lpl_ncpu = lpl_leaf->lpl_ncpu;
20397c478bd9Sstevel@tonic-gate lpl->lpl_nrset = 1;
20407c478bd9Sstevel@tonic-gate lpl->lpl_rset[0] = lpl_leaf;
20416890d023SEric Saxe lpl->lpl_id2rset[lpl_leaf->lpl_lgrpid] = 0;
20427c478bd9Sstevel@tonic-gate lpl->lpl_lgrp = lgrp;
20437c478bd9Sstevel@tonic-gate lpl->lpl_parent = NULL; /* set by lpl_leaf_insert() */
20447c478bd9Sstevel@tonic-gate lpl->lpl_cpus = NULL; /* set by lgrp_part_add_cpu() */
20457c478bd9Sstevel@tonic-gate }
20467c478bd9Sstevel@tonic-gate
20477c478bd9Sstevel@tonic-gate /*
20487c478bd9Sstevel@tonic-gate * Clear an unused lpl
20497c478bd9Sstevel@tonic-gate */
20507c478bd9Sstevel@tonic-gate void
lpl_clear(lpl_t * lpl)20517c478bd9Sstevel@tonic-gate lpl_clear(lpl_t *lpl)
20527c478bd9Sstevel@tonic-gate {
20536890d023SEric Saxe /*
20546890d023SEric Saxe * Clear out all fields in the lpl except:
20556890d023SEric Saxe * lpl_lgrpid - to facilitate debugging
20566890d023SEric Saxe * lpl_rset, lpl_rset_sz, lpl_id2rset - rset array references / size
20576890d023SEric Saxe *
20586890d023SEric Saxe * Note that the lpl's rset and id2rset mapping are cleared as well.
20596890d023SEric Saxe */
20606890d023SEric Saxe lpl->lpl_loadavg = 0;
20616890d023SEric Saxe lpl->lpl_ncpu = 0;
20626890d023SEric Saxe lpl->lpl_lgrp = NULL;
20636890d023SEric Saxe lpl->lpl_parent = NULL;
20646890d023SEric Saxe lpl->lpl_cpus = NULL;
20656890d023SEric Saxe lpl->lpl_nrset = 0;
20666890d023SEric Saxe lpl->lpl_homed_time = 0;
20676890d023SEric Saxe bzero(lpl->lpl_rset, sizeof (lpl->lpl_rset[0]) * lpl->lpl_rset_sz);
20686890d023SEric Saxe bzero(lpl->lpl_id2rset,
20696890d023SEric Saxe sizeof (lpl->lpl_id2rset[0]) * lpl->lpl_rset_sz);
20707c478bd9Sstevel@tonic-gate }
20717c478bd9Sstevel@tonic-gate
20727c478bd9Sstevel@tonic-gate /*
20737c478bd9Sstevel@tonic-gate * Given a CPU-partition, verify that the lpl topology in the CPU-partition
20747c478bd9Sstevel@tonic-gate * is in sync with the lgroup toplogy in the system. The lpl topology may not
20757c478bd9Sstevel@tonic-gate * make full use of all of the lgroup topology, but this checks to make sure
20767c478bd9Sstevel@tonic-gate * that for the parts that it does use, it has correctly understood the
20777c478bd9Sstevel@tonic-gate * relationships that exist. This function returns
20787c478bd9Sstevel@tonic-gate * 0 if the topology is correct, and a non-zero error code, for non-debug
20797c478bd9Sstevel@tonic-gate * kernels if incorrect. Asserts are spread throughout the code to aid in
20807c478bd9Sstevel@tonic-gate * debugging on a DEBUG kernel.
20817c478bd9Sstevel@tonic-gate */
20827c478bd9Sstevel@tonic-gate int
lpl_topo_verify(cpupart_t * cpupart)20837c478bd9Sstevel@tonic-gate lpl_topo_verify(cpupart_t *cpupart)
20847c478bd9Sstevel@tonic-gate {
20857c478bd9Sstevel@tonic-gate lgrp_t *lgrp;
20867c478bd9Sstevel@tonic-gate lpl_t *lpl;
20877c478bd9Sstevel@tonic-gate klgrpset_t rset;
20887c478bd9Sstevel@tonic-gate klgrpset_t cset;
20897c478bd9Sstevel@tonic-gate cpu_t *cpu;
20907c478bd9Sstevel@tonic-gate cpu_t *cp_start;
20917c478bd9Sstevel@tonic-gate int i;
20927c478bd9Sstevel@tonic-gate int j;
20937c478bd9Sstevel@tonic-gate int sum;
20947c478bd9Sstevel@tonic-gate
20957c478bd9Sstevel@tonic-gate /* topology can't be incorrect if it doesn't exist */
20967c478bd9Sstevel@tonic-gate if (!lgrp_topo_initialized || !lgrp_initialized)
20977c478bd9Sstevel@tonic-gate return (LPL_TOPO_CORRECT);
20987c478bd9Sstevel@tonic-gate
20997c478bd9Sstevel@tonic-gate ASSERT(cpupart != NULL);
21007c478bd9Sstevel@tonic-gate
21017c478bd9Sstevel@tonic-gate for (i = 0; i <= lgrp_alloc_max; i++) {
21027c478bd9Sstevel@tonic-gate lgrp = lgrp_table[i];
21037c478bd9Sstevel@tonic-gate lpl = NULL;
21047c478bd9Sstevel@tonic-gate /* make sure lpls are allocated */
21057c478bd9Sstevel@tonic-gate ASSERT(cpupart->cp_lgrploads);
21067c478bd9Sstevel@tonic-gate if (!cpupart->cp_lgrploads)
21077c478bd9Sstevel@tonic-gate return (LPL_TOPO_PART_HAS_NO_LPL);
21087c478bd9Sstevel@tonic-gate
21097c478bd9Sstevel@tonic-gate lpl = &cpupart->cp_lgrploads[i];
21107c478bd9Sstevel@tonic-gate /* make sure our index is good */
21117c478bd9Sstevel@tonic-gate ASSERT(i < cpupart->cp_nlgrploads);
21127c478bd9Sstevel@tonic-gate
21137c478bd9Sstevel@tonic-gate /* if lgroup doesn't exist, make sure lpl is empty */
21147c478bd9Sstevel@tonic-gate if (!LGRP_EXISTS(lgrp)) {
21157c478bd9Sstevel@tonic-gate ASSERT(lpl->lpl_ncpu == 0);
21167c478bd9Sstevel@tonic-gate if (lpl->lpl_ncpu > 0) {
21177c478bd9Sstevel@tonic-gate return (LPL_TOPO_CPUS_NOT_EMPTY);
21187c478bd9Sstevel@tonic-gate } else {
21197c478bd9Sstevel@tonic-gate continue;
21207c478bd9Sstevel@tonic-gate }
21217c478bd9Sstevel@tonic-gate }
21227c478bd9Sstevel@tonic-gate
21237c478bd9Sstevel@tonic-gate /* verify that lgroup and lpl are identically numbered */
21247c478bd9Sstevel@tonic-gate ASSERT(lgrp->lgrp_id == lpl->lpl_lgrpid);
21257c478bd9Sstevel@tonic-gate
21267c478bd9Sstevel@tonic-gate /* if lgroup isn't in our partition, make sure lpl is empty */
21277c478bd9Sstevel@tonic-gate if (!klgrpset_intersects(lgrp->lgrp_leaves,
21287c478bd9Sstevel@tonic-gate cpupart->cp_lgrpset)) {
21297c478bd9Sstevel@tonic-gate ASSERT(lpl->lpl_ncpu == 0);
21307c478bd9Sstevel@tonic-gate if (lpl->lpl_ncpu > 0) {
21317c478bd9Sstevel@tonic-gate return (LPL_TOPO_CPUS_NOT_EMPTY);
21327c478bd9Sstevel@tonic-gate }
21337c478bd9Sstevel@tonic-gate /*
21347c478bd9Sstevel@tonic-gate * lpl is empty, and lgroup isn't in partition. verify
21357c478bd9Sstevel@tonic-gate * that lpl doesn't show up in anyone else's rsets (in
21367c478bd9Sstevel@tonic-gate * this partition, anyway)
21377c478bd9Sstevel@tonic-gate */
21387c478bd9Sstevel@tonic-gate for (j = 0; j < cpupart->cp_nlgrploads; j++) {
21397c478bd9Sstevel@tonic-gate lpl_t *i_lpl; /* lpl we're iterating over */
21407c478bd9Sstevel@tonic-gate
21417c478bd9Sstevel@tonic-gate i_lpl = &cpupart->cp_lgrploads[j];
21427c478bd9Sstevel@tonic-gate
21437c478bd9Sstevel@tonic-gate ASSERT(!lpl_rset_contains(i_lpl, lpl));
21447c478bd9Sstevel@tonic-gate if (lpl_rset_contains(i_lpl, lpl)) {
21457c478bd9Sstevel@tonic-gate return (LPL_TOPO_LPL_ORPHANED);
21467c478bd9Sstevel@tonic-gate }
21477c478bd9Sstevel@tonic-gate }
21487c478bd9Sstevel@tonic-gate /* lgroup is empty, and everything is ok. continue */
21497c478bd9Sstevel@tonic-gate continue;
21507c478bd9Sstevel@tonic-gate }
21517c478bd9Sstevel@tonic-gate
21527c478bd9Sstevel@tonic-gate
21537c478bd9Sstevel@tonic-gate /* lgroup is in this partition, now check it against lpl */
21547c478bd9Sstevel@tonic-gate
21557c478bd9Sstevel@tonic-gate /* do both have matching lgrps? */
21567c478bd9Sstevel@tonic-gate ASSERT(lgrp == lpl->lpl_lgrp);
21577c478bd9Sstevel@tonic-gate if (lgrp != lpl->lpl_lgrp) {
21587c478bd9Sstevel@tonic-gate return (LPL_TOPO_LGRP_MISMATCH);
21597c478bd9Sstevel@tonic-gate }
21607c478bd9Sstevel@tonic-gate
21617c478bd9Sstevel@tonic-gate /* do the parent lgroups exist and do they match? */
21627c478bd9Sstevel@tonic-gate if (lgrp->lgrp_parent) {
2163*3df2e8b2SRobert Mustacchi ASSERT(lpl->lpl_parent != NULL &&
2164*3df2e8b2SRobert Mustacchi lgrp->lgrp_parent->lgrp_id ==
21656890d023SEric Saxe lpl->lpl_parent->lpl_lgrpid);
21667c478bd9Sstevel@tonic-gate
21677c478bd9Sstevel@tonic-gate if (!lpl->lpl_parent) {
21687c478bd9Sstevel@tonic-gate return (LPL_TOPO_MISSING_PARENT);
21697c478bd9Sstevel@tonic-gate } else if (lgrp->lgrp_parent->lgrp_id !=
21707c478bd9Sstevel@tonic-gate lpl->lpl_parent->lpl_lgrpid) {
21717c478bd9Sstevel@tonic-gate return (LPL_TOPO_PARENT_MISMATCH);
21727c478bd9Sstevel@tonic-gate }
21737c478bd9Sstevel@tonic-gate }
21747c478bd9Sstevel@tonic-gate
21757c478bd9Sstevel@tonic-gate /* only leaf lgroups keep a cpucnt, only check leaves */
21767c478bd9Sstevel@tonic-gate if ((lpl->lpl_nrset == 1) && (lpl == lpl->lpl_rset[0])) {
21777c478bd9Sstevel@tonic-gate
21787c478bd9Sstevel@tonic-gate /* verify that lgrp is also a leaf */
21797c478bd9Sstevel@tonic-gate ASSERT((lgrp->lgrp_childcnt == 0) &&
21807c478bd9Sstevel@tonic-gate (klgrpset_ismember(lgrp->lgrp_leaves,
21817c478bd9Sstevel@tonic-gate lpl->lpl_lgrpid)));
21827c478bd9Sstevel@tonic-gate
21837c478bd9Sstevel@tonic-gate if ((lgrp->lgrp_childcnt > 0) ||
21847c478bd9Sstevel@tonic-gate (!klgrpset_ismember(lgrp->lgrp_leaves,
21857c478bd9Sstevel@tonic-gate lpl->lpl_lgrpid))) {
21867c478bd9Sstevel@tonic-gate return (LPL_TOPO_LGRP_NOT_LEAF);
21877c478bd9Sstevel@tonic-gate }
21887c478bd9Sstevel@tonic-gate
21897c478bd9Sstevel@tonic-gate ASSERT((lgrp->lgrp_cpucnt >= lpl->lpl_ncpu) &&
21907c478bd9Sstevel@tonic-gate (lpl->lpl_ncpu > 0));
21917c478bd9Sstevel@tonic-gate if ((lgrp->lgrp_cpucnt < lpl->lpl_ncpu) ||
21926890d023SEric Saxe (lpl->lpl_ncpu <= 0)) {
21937c478bd9Sstevel@tonic-gate return (LPL_TOPO_BAD_CPUCNT);
21947c478bd9Sstevel@tonic-gate }
21957c478bd9Sstevel@tonic-gate
21967c478bd9Sstevel@tonic-gate /*
21977c478bd9Sstevel@tonic-gate * Check that lpl_ncpu also matches the number of
21987c478bd9Sstevel@tonic-gate * cpus in the lpl's linked list. This only exists in
21997c478bd9Sstevel@tonic-gate * leaves, but they should always match.
22007c478bd9Sstevel@tonic-gate */
22017c478bd9Sstevel@tonic-gate j = 0;
22027c478bd9Sstevel@tonic-gate cpu = cp_start = lpl->lpl_cpus;
22037c478bd9Sstevel@tonic-gate while (cpu != NULL) {
22047c478bd9Sstevel@tonic-gate j++;
22057c478bd9Sstevel@tonic-gate
22067c478bd9Sstevel@tonic-gate /* check to make sure cpu's lpl is leaf lpl */
22077c478bd9Sstevel@tonic-gate ASSERT(cpu->cpu_lpl == lpl);
22087c478bd9Sstevel@tonic-gate if (cpu->cpu_lpl != lpl) {
22097c478bd9Sstevel@tonic-gate return (LPL_TOPO_CPU_HAS_BAD_LPL);
22107c478bd9Sstevel@tonic-gate }
22117c478bd9Sstevel@tonic-gate
22127c478bd9Sstevel@tonic-gate /* check next cpu */
22137c478bd9Sstevel@tonic-gate if ((cpu = cpu->cpu_next_lpl) != cp_start) {
22147c478bd9Sstevel@tonic-gate continue;
22157c478bd9Sstevel@tonic-gate } else {
22167c478bd9Sstevel@tonic-gate cpu = NULL;
22177c478bd9Sstevel@tonic-gate }
22187c478bd9Sstevel@tonic-gate }
22197c478bd9Sstevel@tonic-gate
22207c478bd9Sstevel@tonic-gate ASSERT(j == lpl->lpl_ncpu);
22217c478bd9Sstevel@tonic-gate if (j != lpl->lpl_ncpu) {
22227c478bd9Sstevel@tonic-gate return (LPL_TOPO_LPL_BAD_NCPU);
22237c478bd9Sstevel@tonic-gate }
22247c478bd9Sstevel@tonic-gate
22257c478bd9Sstevel@tonic-gate /*
22267c478bd9Sstevel@tonic-gate * Also, check that leaf lpl is contained in all
22277c478bd9Sstevel@tonic-gate * intermediate lpls that name the leaf as a descendant
22287c478bd9Sstevel@tonic-gate */
22297c478bd9Sstevel@tonic-gate for (j = 0; j <= lgrp_alloc_max; j++) {
22307c478bd9Sstevel@tonic-gate klgrpset_t intersect;
22317c478bd9Sstevel@tonic-gate lgrp_t *lgrp_cand;
22327c478bd9Sstevel@tonic-gate lpl_t *lpl_cand;
22337c478bd9Sstevel@tonic-gate
22347c478bd9Sstevel@tonic-gate lgrp_cand = lgrp_table[j];
22357c478bd9Sstevel@tonic-gate intersect = klgrpset_intersects(
22367c478bd9Sstevel@tonic-gate lgrp_cand->lgrp_set[LGRP_RSRC_CPU],
22377c478bd9Sstevel@tonic-gate cpupart->cp_lgrpset);
22387c478bd9Sstevel@tonic-gate
22397c478bd9Sstevel@tonic-gate if (!LGRP_EXISTS(lgrp_cand) ||
22407c478bd9Sstevel@tonic-gate !klgrpset_intersects(lgrp_cand->lgrp_leaves,
22417c478bd9Sstevel@tonic-gate cpupart->cp_lgrpset) ||
22427c478bd9Sstevel@tonic-gate (intersect == 0))
22437c478bd9Sstevel@tonic-gate continue;
22447c478bd9Sstevel@tonic-gate
22457c478bd9Sstevel@tonic-gate lpl_cand =
22467c478bd9Sstevel@tonic-gate &cpupart->cp_lgrploads[lgrp_cand->lgrp_id];
22477c478bd9Sstevel@tonic-gate
22487c478bd9Sstevel@tonic-gate if (klgrpset_ismember(intersect,
22497c478bd9Sstevel@tonic-gate lgrp->lgrp_id)) {
22507c478bd9Sstevel@tonic-gate ASSERT(lpl_rset_contains(lpl_cand,
22517c478bd9Sstevel@tonic-gate lpl));
22527c478bd9Sstevel@tonic-gate
22537c478bd9Sstevel@tonic-gate if (!lpl_rset_contains(lpl_cand, lpl)) {
22547c478bd9Sstevel@tonic-gate return (LPL_TOPO_RSET_MSSNG_LF);
22557c478bd9Sstevel@tonic-gate }
22567c478bd9Sstevel@tonic-gate }
22577c478bd9Sstevel@tonic-gate }
22587c478bd9Sstevel@tonic-gate
22597c478bd9Sstevel@tonic-gate } else { /* non-leaf specific checks */
22607c478bd9Sstevel@tonic-gate
22617c478bd9Sstevel@tonic-gate /*
22627c478bd9Sstevel@tonic-gate * Non-leaf lpls should have lpl_cpus == NULL
22637c478bd9Sstevel@tonic-gate * verify that this is so
22647c478bd9Sstevel@tonic-gate */
22657c478bd9Sstevel@tonic-gate ASSERT(lpl->lpl_cpus == NULL);
22667c478bd9Sstevel@tonic-gate if (lpl->lpl_cpus != NULL) {
22677c478bd9Sstevel@tonic-gate return (LPL_TOPO_NONLEAF_HAS_CPUS);
22687c478bd9Sstevel@tonic-gate }
22697c478bd9Sstevel@tonic-gate
22707c478bd9Sstevel@tonic-gate /*
22717c478bd9Sstevel@tonic-gate * verify that the sum of the cpus in the leaf resources
22727c478bd9Sstevel@tonic-gate * is equal to the total ncpu in the intermediate
22737c478bd9Sstevel@tonic-gate */
22747c478bd9Sstevel@tonic-gate for (j = sum = 0; j < lpl->lpl_nrset; j++) {
22757c478bd9Sstevel@tonic-gate sum += lpl->lpl_rset[j]->lpl_ncpu;
22767c478bd9Sstevel@tonic-gate }
22777c478bd9Sstevel@tonic-gate
22787c478bd9Sstevel@tonic-gate ASSERT(sum == lpl->lpl_ncpu);
22797c478bd9Sstevel@tonic-gate if (sum != lpl->lpl_ncpu) {
22807c478bd9Sstevel@tonic-gate return (LPL_TOPO_LPL_BAD_NCPU);
22817c478bd9Sstevel@tonic-gate }
22827c478bd9Sstevel@tonic-gate }
22837c478bd9Sstevel@tonic-gate
22847c478bd9Sstevel@tonic-gate /*
22857c478bd9Sstevel@tonic-gate * Check the rset of the lpl in question. Make sure that each
22867c478bd9Sstevel@tonic-gate * rset contains a subset of the resources in
22877c478bd9Sstevel@tonic-gate * lgrp_set[LGRP_RSRC_CPU] and in cp_lgrpset. This also makes
22887c478bd9Sstevel@tonic-gate * sure that each rset doesn't include resources that are
22897c478bd9Sstevel@tonic-gate * outside of that set. (Which would be resources somehow not
22907c478bd9Sstevel@tonic-gate * accounted for).
22917c478bd9Sstevel@tonic-gate */
22927c478bd9Sstevel@tonic-gate klgrpset_clear(rset);
22937c478bd9Sstevel@tonic-gate for (j = 0; j < lpl->lpl_nrset; j++) {
22947c478bd9Sstevel@tonic-gate klgrpset_add(rset, lpl->lpl_rset[j]->lpl_lgrpid);
22957c478bd9Sstevel@tonic-gate }
22967c478bd9Sstevel@tonic-gate klgrpset_copy(cset, rset);
22977c478bd9Sstevel@tonic-gate /* make sure lpl rset matches lgrp rset */
22987c478bd9Sstevel@tonic-gate klgrpset_diff(rset, lgrp->lgrp_set[LGRP_RSRC_CPU]);
22997c478bd9Sstevel@tonic-gate /* make sure rset is contained with in partition, too */
23007c478bd9Sstevel@tonic-gate klgrpset_diff(cset, cpupart->cp_lgrpset);
23017c478bd9Sstevel@tonic-gate
23026890d023SEric Saxe ASSERT(klgrpset_isempty(rset) && klgrpset_isempty(cset));
23036890d023SEric Saxe if (!klgrpset_isempty(rset) || !klgrpset_isempty(cset)) {
23047c478bd9Sstevel@tonic-gate return (LPL_TOPO_RSET_MISMATCH);
23057c478bd9Sstevel@tonic-gate }
23067c478bd9Sstevel@tonic-gate
23077c478bd9Sstevel@tonic-gate /*
23087c478bd9Sstevel@tonic-gate * check to make sure lpl_nrset matches the number of rsets
23097c478bd9Sstevel@tonic-gate * contained in the lpl
23107c478bd9Sstevel@tonic-gate */
23116890d023SEric Saxe for (j = 0; j < lpl->lpl_nrset; j++) {
23126890d023SEric Saxe if (lpl->lpl_rset[j] == NULL)
23136890d023SEric Saxe break;
23146890d023SEric Saxe }
23157c478bd9Sstevel@tonic-gate
23167c478bd9Sstevel@tonic-gate ASSERT(j == lpl->lpl_nrset);
23177c478bd9Sstevel@tonic-gate if (j != lpl->lpl_nrset) {
23187c478bd9Sstevel@tonic-gate return (LPL_TOPO_BAD_RSETCNT);
23197c478bd9Sstevel@tonic-gate }
23207c478bd9Sstevel@tonic-gate
23217c478bd9Sstevel@tonic-gate }
23227c478bd9Sstevel@tonic-gate return (LPL_TOPO_CORRECT);
23237c478bd9Sstevel@tonic-gate }
23247c478bd9Sstevel@tonic-gate
23257c478bd9Sstevel@tonic-gate /*
23267c478bd9Sstevel@tonic-gate * Flatten lpl topology to given number of levels. This is presently only
23277c478bd9Sstevel@tonic-gate * implemented for a flatten to 2 levels, which will prune out the intermediates
23287c478bd9Sstevel@tonic-gate * and home the leaf lpls to the root lpl.
23297c478bd9Sstevel@tonic-gate */
23307c478bd9Sstevel@tonic-gate int
lpl_topo_flatten(int levels)23317c478bd9Sstevel@tonic-gate lpl_topo_flatten(int levels)
23327c478bd9Sstevel@tonic-gate {
23337c478bd9Sstevel@tonic-gate int i;
23347c478bd9Sstevel@tonic-gate uint_t sum;
23357c478bd9Sstevel@tonic-gate lgrp_t *lgrp_cur;
23367c478bd9Sstevel@tonic-gate lpl_t *lpl_cur;
23377c478bd9Sstevel@tonic-gate lpl_t *lpl_root;
23387c478bd9Sstevel@tonic-gate cpupart_t *cp;
23397c478bd9Sstevel@tonic-gate
23407c478bd9Sstevel@tonic-gate if (levels != 2)
23417c478bd9Sstevel@tonic-gate return (0);
23427c478bd9Sstevel@tonic-gate
23437c478bd9Sstevel@tonic-gate /* called w/ cpus paused - grab no locks! */
23447c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&cpu_lock) || curthread->t_preempt > 0 ||
23457c478bd9Sstevel@tonic-gate !lgrp_initialized);
23467c478bd9Sstevel@tonic-gate
23477c478bd9Sstevel@tonic-gate cp = cp_list_head;
23487c478bd9Sstevel@tonic-gate do {
23497c478bd9Sstevel@tonic-gate lpl_root = &cp->cp_lgrploads[lgrp_root->lgrp_id];
23507c478bd9Sstevel@tonic-gate ASSERT(LGRP_EXISTS(lgrp_root) && (lpl_root->lpl_ncpu > 0));
23517c478bd9Sstevel@tonic-gate
23527c478bd9Sstevel@tonic-gate for (i = 0; i <= lgrp_alloc_max; i++) {
23537c478bd9Sstevel@tonic-gate lgrp_cur = lgrp_table[i];
23547c478bd9Sstevel@tonic-gate lpl_cur = &cp->cp_lgrploads[i];
23557c478bd9Sstevel@tonic-gate
23567c478bd9Sstevel@tonic-gate if ((lgrp_cur == lgrp_root) ||
23577c478bd9Sstevel@tonic-gate (!LGRP_EXISTS(lgrp_cur) &&
23587c478bd9Sstevel@tonic-gate (lpl_cur->lpl_ncpu == 0)))
23597c478bd9Sstevel@tonic-gate continue;
23607c478bd9Sstevel@tonic-gate
23617c478bd9Sstevel@tonic-gate if (!LGRP_EXISTS(lgrp_cur) && (lpl_cur->lpl_ncpu > 0)) {
23627c478bd9Sstevel@tonic-gate /*
23637c478bd9Sstevel@tonic-gate * this should be a deleted intermediate, so
23647c478bd9Sstevel@tonic-gate * clear it
23657c478bd9Sstevel@tonic-gate */
23667c478bd9Sstevel@tonic-gate lpl_clear(lpl_cur);
23677c478bd9Sstevel@tonic-gate } else if ((lpl_cur->lpl_nrset == 1) &&
23687c478bd9Sstevel@tonic-gate (lpl_cur->lpl_rset[0] == lpl_cur) &&
23697c478bd9Sstevel@tonic-gate ((lpl_cur->lpl_parent->lpl_ncpu == 0) ||
23707c478bd9Sstevel@tonic-gate (!LGRP_EXISTS(lpl_cur->lpl_parent->lpl_lgrp)))) {
23717c478bd9Sstevel@tonic-gate /*
23727c478bd9Sstevel@tonic-gate * this is a leaf whose parent was deleted, or
23737c478bd9Sstevel@tonic-gate * whose parent had their lgrp deleted. (And
23747c478bd9Sstevel@tonic-gate * whose parent will soon be deleted). Point
23757c478bd9Sstevel@tonic-gate * this guy back to the root lpl.
23767c478bd9Sstevel@tonic-gate */
23777c478bd9Sstevel@tonic-gate lpl_cur->lpl_parent = lpl_root;
23787c478bd9Sstevel@tonic-gate lpl_rset_add(lpl_root, lpl_cur);
23797c478bd9Sstevel@tonic-gate }
23807c478bd9Sstevel@tonic-gate
23817c478bd9Sstevel@tonic-gate }
23827c478bd9Sstevel@tonic-gate
23837c478bd9Sstevel@tonic-gate /*
23847c478bd9Sstevel@tonic-gate * Now that we're done, make sure the count on the root lpl is
23857c478bd9Sstevel@tonic-gate * correct, and update the hints of the children for the sake of
23867c478bd9Sstevel@tonic-gate * thoroughness
23877c478bd9Sstevel@tonic-gate */
23887c478bd9Sstevel@tonic-gate for (i = sum = 0; i < lpl_root->lpl_nrset; i++) {
23897c478bd9Sstevel@tonic-gate sum += lpl_root->lpl_rset[i]->lpl_ncpu;
23907c478bd9Sstevel@tonic-gate }
23917c478bd9Sstevel@tonic-gate lpl_root->lpl_ncpu = sum;
23927c478bd9Sstevel@tonic-gate lpl_child_update(lpl_root, cp);
23937c478bd9Sstevel@tonic-gate
23947c478bd9Sstevel@tonic-gate cp = cp->cp_next;
23957c478bd9Sstevel@tonic-gate } while (cp != cp_list_head);
23967c478bd9Sstevel@tonic-gate
23977c478bd9Sstevel@tonic-gate return (levels);
23987c478bd9Sstevel@tonic-gate }
23997c478bd9Sstevel@tonic-gate
24007c478bd9Sstevel@tonic-gate /*
24017c478bd9Sstevel@tonic-gate * Insert a lpl into the resource hierarchy and create any additional lpls that
24027c478bd9Sstevel@tonic-gate * are necessary to represent the varying states of locality for the cpu
24037c478bd9Sstevel@tonic-gate * resoruces newly added to the partition.
24047c478bd9Sstevel@tonic-gate *
24057c478bd9Sstevel@tonic-gate * This routine is clever enough that it can correctly add resources from the
24067c478bd9Sstevel@tonic-gate * new leaf into both direct and indirect resource sets in the hierarchy. (Ie,
24077c478bd9Sstevel@tonic-gate * those for which the lpl is a leaf as opposed to simply a named equally local
24087c478bd9Sstevel@tonic-gate * resource). The one special case that needs additional processing is when a
24097c478bd9Sstevel@tonic-gate * new intermediate lpl is introduced. Since the main loop only traverses
24107c478bd9Sstevel@tonic-gate * looking to add the leaf resource where it does not yet exist, additional work
24117c478bd9Sstevel@tonic-gate * is necessary to add other leaf resources that may need to exist in the newly
24127c478bd9Sstevel@tonic-gate * created intermediate. This is performed by the second inner loop, and is
24137c478bd9Sstevel@tonic-gate * only done when the check for more than one overlapping resource succeeds.
24147c478bd9Sstevel@tonic-gate */
24157c478bd9Sstevel@tonic-gate
24167c478bd9Sstevel@tonic-gate void
lpl_leaf_insert(lpl_t * lpl_leaf,cpupart_t * cpupart)24177c478bd9Sstevel@tonic-gate lpl_leaf_insert(lpl_t *lpl_leaf, cpupart_t *cpupart)
24187c478bd9Sstevel@tonic-gate {
24197c478bd9Sstevel@tonic-gate int i;
24207c478bd9Sstevel@tonic-gate int j;
24217c478bd9Sstevel@tonic-gate int rset_num_intersect;
24227c478bd9Sstevel@tonic-gate lgrp_t *lgrp_cur;
24237c478bd9Sstevel@tonic-gate lpl_t *lpl_cur;
24247c478bd9Sstevel@tonic-gate lpl_t *lpl_parent;
2425ab761399Sesaxe lgrp_id_t parent_id;
24267c478bd9Sstevel@tonic-gate klgrpset_t rset_intersect; /* resources in cpupart and lgrp */
24277c478bd9Sstevel@tonic-gate
24287c478bd9Sstevel@tonic-gate for (i = 0; i <= lgrp_alloc_max; i++) {
24297c478bd9Sstevel@tonic-gate lgrp_cur = lgrp_table[i];
24307c478bd9Sstevel@tonic-gate
24317c478bd9Sstevel@tonic-gate /*
24327c478bd9Sstevel@tonic-gate * Don't insert if the lgrp isn't there, if the leaf isn't
24337c478bd9Sstevel@tonic-gate * contained within the current lgrp, or if the current lgrp has
24347c478bd9Sstevel@tonic-gate * no leaves in this partition
24357c478bd9Sstevel@tonic-gate */
24367c478bd9Sstevel@tonic-gate
24377c478bd9Sstevel@tonic-gate if (!LGRP_EXISTS(lgrp_cur) ||
24387c478bd9Sstevel@tonic-gate !klgrpset_ismember(lgrp_cur->lgrp_set[LGRP_RSRC_CPU],
24397c478bd9Sstevel@tonic-gate lpl_leaf->lpl_lgrpid) ||
24407c478bd9Sstevel@tonic-gate !klgrpset_intersects(lgrp_cur->lgrp_leaves,
24417c478bd9Sstevel@tonic-gate cpupart->cp_lgrpset))
24427c478bd9Sstevel@tonic-gate continue;
24437c478bd9Sstevel@tonic-gate
24447c478bd9Sstevel@tonic-gate lpl_cur = &cpupart->cp_lgrploads[lgrp_cur->lgrp_id];
24457c478bd9Sstevel@tonic-gate if (lgrp_cur->lgrp_parent != NULL) {
24467c478bd9Sstevel@tonic-gate /* if lgrp has a parent, assign it properly */
24477c478bd9Sstevel@tonic-gate parent_id = lgrp_cur->lgrp_parent->lgrp_id;
24487c478bd9Sstevel@tonic-gate lpl_parent = &cpupart->cp_lgrploads[parent_id];
24497c478bd9Sstevel@tonic-gate } else {
24507c478bd9Sstevel@tonic-gate /* if not, make sure parent ptr gets set to null */
24517c478bd9Sstevel@tonic-gate lpl_parent = NULL;
24527c478bd9Sstevel@tonic-gate }
24537c478bd9Sstevel@tonic-gate
24547c478bd9Sstevel@tonic-gate if (lpl_cur == lpl_leaf) {
24557c478bd9Sstevel@tonic-gate /*
24567c478bd9Sstevel@tonic-gate * Almost all leaf state was initialized elsewhere. The
24577c478bd9Sstevel@tonic-gate * only thing left to do is to set the parent.
24587c478bd9Sstevel@tonic-gate */
24597c478bd9Sstevel@tonic-gate lpl_cur->lpl_parent = lpl_parent;
24607c478bd9Sstevel@tonic-gate continue;
24617c478bd9Sstevel@tonic-gate }
24627c478bd9Sstevel@tonic-gate
24637c478bd9Sstevel@tonic-gate lpl_clear(lpl_cur);
24647c478bd9Sstevel@tonic-gate lpl_init(lpl_cur, lpl_leaf, lgrp_cur);
24657c478bd9Sstevel@tonic-gate
24667c478bd9Sstevel@tonic-gate lpl_cur->lpl_parent = lpl_parent;
24677c478bd9Sstevel@tonic-gate
24687c478bd9Sstevel@tonic-gate /* does new lpl need to be populated with other resources? */
24697c478bd9Sstevel@tonic-gate rset_intersect =
24707c478bd9Sstevel@tonic-gate klgrpset_intersects(lgrp_cur->lgrp_set[LGRP_RSRC_CPU],
24716890d023SEric Saxe cpupart->cp_lgrpset);
24727c478bd9Sstevel@tonic-gate klgrpset_nlgrps(rset_intersect, rset_num_intersect);
24737c478bd9Sstevel@tonic-gate
24747c478bd9Sstevel@tonic-gate if (rset_num_intersect > 1) {
24757c478bd9Sstevel@tonic-gate /*
24767c478bd9Sstevel@tonic-gate * If so, figure out what lpls have resources that
24777c478bd9Sstevel@tonic-gate * intersect this one, and add them.
24787c478bd9Sstevel@tonic-gate */
24797c478bd9Sstevel@tonic-gate for (j = 0; j <= lgrp_alloc_max; j++) {
24807c478bd9Sstevel@tonic-gate lgrp_t *lgrp_cand; /* candidate lgrp */
24817c478bd9Sstevel@tonic-gate lpl_t *lpl_cand; /* candidate lpl */
24827c478bd9Sstevel@tonic-gate
24837c478bd9Sstevel@tonic-gate lgrp_cand = lgrp_table[j];
24847c478bd9Sstevel@tonic-gate if (!LGRP_EXISTS(lgrp_cand) ||
24857c478bd9Sstevel@tonic-gate !klgrpset_ismember(rset_intersect,
24866890d023SEric Saxe lgrp_cand->lgrp_id))
24877c478bd9Sstevel@tonic-gate continue;
24887c478bd9Sstevel@tonic-gate lpl_cand =
24897c478bd9Sstevel@tonic-gate &cpupart->cp_lgrploads[lgrp_cand->lgrp_id];
24907c478bd9Sstevel@tonic-gate lpl_rset_add(lpl_cur, lpl_cand);
24917c478bd9Sstevel@tonic-gate }
24927c478bd9Sstevel@tonic-gate }
24937c478bd9Sstevel@tonic-gate /*
24947c478bd9Sstevel@tonic-gate * This lpl's rset has changed. Update the hint in it's
24957c478bd9Sstevel@tonic-gate * children.
24967c478bd9Sstevel@tonic-gate */
24977c478bd9Sstevel@tonic-gate lpl_child_update(lpl_cur, cpupart);
24987c478bd9Sstevel@tonic-gate }
24997c478bd9Sstevel@tonic-gate }
25007c478bd9Sstevel@tonic-gate
25017c478bd9Sstevel@tonic-gate /*
25027c478bd9Sstevel@tonic-gate * remove a lpl from the hierarchy of resources, clearing its state when
25037c478bd9Sstevel@tonic-gate * finished. If the lpls at the intermediate levels of the hierarchy have no
25047c478bd9Sstevel@tonic-gate * remaining resources, or no longer name a leaf resource in the cpu-partition,
25057c478bd9Sstevel@tonic-gate * delete them as well.
25067c478bd9Sstevel@tonic-gate */
25077c478bd9Sstevel@tonic-gate
25087c478bd9Sstevel@tonic-gate void
lpl_leaf_remove(lpl_t * lpl_leaf,cpupart_t * cpupart)25097c478bd9Sstevel@tonic-gate lpl_leaf_remove(lpl_t *lpl_leaf, cpupart_t *cpupart)
25107c478bd9Sstevel@tonic-gate {
25117c478bd9Sstevel@tonic-gate int i;
25127c478bd9Sstevel@tonic-gate lgrp_t *lgrp_cur;
25137c478bd9Sstevel@tonic-gate lpl_t *lpl_cur;
25147c478bd9Sstevel@tonic-gate klgrpset_t leaf_intersect; /* intersection of leaves */
25157c478bd9Sstevel@tonic-gate
25167c478bd9Sstevel@tonic-gate for (i = 0; i <= lgrp_alloc_max; i++) {
25177c478bd9Sstevel@tonic-gate lgrp_cur = lgrp_table[i];
25187c478bd9Sstevel@tonic-gate
25197c478bd9Sstevel@tonic-gate /*
25207c478bd9Sstevel@tonic-gate * Don't attempt to remove from lgrps that aren't there, that
25217c478bd9Sstevel@tonic-gate * don't contain our leaf, or from the leaf itself. (We do that
25227c478bd9Sstevel@tonic-gate * later)
25237c478bd9Sstevel@tonic-gate */
25247c478bd9Sstevel@tonic-gate
25257c478bd9Sstevel@tonic-gate if (!LGRP_EXISTS(lgrp_cur))
25267c478bd9Sstevel@tonic-gate continue;
25277c478bd9Sstevel@tonic-gate
25287c478bd9Sstevel@tonic-gate lpl_cur = &cpupart->cp_lgrploads[lgrp_cur->lgrp_id];
25297c478bd9Sstevel@tonic-gate
25307c478bd9Sstevel@tonic-gate if (!klgrpset_ismember(lgrp_cur->lgrp_set[LGRP_RSRC_CPU],
25317c478bd9Sstevel@tonic-gate lpl_leaf->lpl_lgrpid) ||
25327c478bd9Sstevel@tonic-gate (lpl_cur == lpl_leaf)) {
25337c478bd9Sstevel@tonic-gate continue;
25347c478bd9Sstevel@tonic-gate }
25357c478bd9Sstevel@tonic-gate
25367c478bd9Sstevel@tonic-gate /*
25377c478bd9Sstevel@tonic-gate * This is a slightly sleazy simplification in that we have
25387c478bd9Sstevel@tonic-gate * already marked the cp_lgrpset as no longer containing the
25397c478bd9Sstevel@tonic-gate * leaf we've deleted. Any lpls that pass the above checks
25407c478bd9Sstevel@tonic-gate * based upon lgrp membership but not necessarily cpu-part
25417c478bd9Sstevel@tonic-gate * membership also get cleared by the checks below. Currently
25427c478bd9Sstevel@tonic-gate * this is harmless, as the lpls should be empty anyway.
25437c478bd9Sstevel@tonic-gate *
25447c478bd9Sstevel@tonic-gate * In particular, we want to preserve lpls that have additional
25457c478bd9Sstevel@tonic-gate * leaf resources, even though we don't yet have a processor
25467c478bd9Sstevel@tonic-gate * architecture that represents resources this way.
25477c478bd9Sstevel@tonic-gate */
25487c478bd9Sstevel@tonic-gate
25497c478bd9Sstevel@tonic-gate leaf_intersect = klgrpset_intersects(lgrp_cur->lgrp_leaves,
25507c478bd9Sstevel@tonic-gate cpupart->cp_lgrpset);
25517c478bd9Sstevel@tonic-gate
25527c478bd9Sstevel@tonic-gate lpl_rset_del(lpl_cur, lpl_leaf);
25537c478bd9Sstevel@tonic-gate if ((lpl_cur->lpl_nrset == 0) || (!leaf_intersect)) {
25547c478bd9Sstevel@tonic-gate lpl_clear(lpl_cur);
25557c478bd9Sstevel@tonic-gate } else {
25567c478bd9Sstevel@tonic-gate /*
25577c478bd9Sstevel@tonic-gate * Update this lpl's children
25587c478bd9Sstevel@tonic-gate */
25597c478bd9Sstevel@tonic-gate lpl_child_update(lpl_cur, cpupart);
25607c478bd9Sstevel@tonic-gate }
25617c478bd9Sstevel@tonic-gate }
25627c478bd9Sstevel@tonic-gate lpl_clear(lpl_leaf);
25637c478bd9Sstevel@tonic-gate }
25647c478bd9Sstevel@tonic-gate
25657c478bd9Sstevel@tonic-gate /*
25667c478bd9Sstevel@tonic-gate * add a cpu to a partition in terms of lgrp load avg bookeeping
25677c478bd9Sstevel@tonic-gate *
25687c478bd9Sstevel@tonic-gate * The lpl (cpu partition load average information) is now arranged in a
25697c478bd9Sstevel@tonic-gate * hierarchical fashion whereby resources that are closest, ie. most local, to
25707c478bd9Sstevel@tonic-gate * the cpu in question are considered to be leaves in a tree of resources.
25717c478bd9Sstevel@tonic-gate * There are two general cases for cpu additon:
25727c478bd9Sstevel@tonic-gate *
25737c478bd9Sstevel@tonic-gate * 1. A lpl structure that contains resources already in the hierarchy tree.
25747c478bd9Sstevel@tonic-gate * In this case, all of the associated lpl relationships have been defined, and
25757c478bd9Sstevel@tonic-gate * all that is necessary is that we link the new cpu into the per-lpl list of
25767c478bd9Sstevel@tonic-gate * cpus, and increment the ncpu count of all places where this cpu resource will
25777c478bd9Sstevel@tonic-gate * be accounted for. lpl_cpu_adjcnt updates the cpu count, and the cpu pointer
25787c478bd9Sstevel@tonic-gate * pushing is accomplished by this routine.
25797c478bd9Sstevel@tonic-gate *
25807c478bd9Sstevel@tonic-gate * 2. The lpl to contain the resources in this cpu-partition for this lgrp does
25817c478bd9Sstevel@tonic-gate * not exist yet. In this case, it is necessary to build the leaf lpl, and
25827c478bd9Sstevel@tonic-gate * construct the hierarchy of state necessary to name it's more distant
25837c478bd9Sstevel@tonic-gate * resources, if they should exist. The leaf structure is initialized by this
25847c478bd9Sstevel@tonic-gate * routine, as is the cpu-partition state for the lgrp membership. This routine
25857c478bd9Sstevel@tonic-gate * also calls lpl_leaf_insert() which inserts the named lpl into the hierarchy
25867c478bd9Sstevel@tonic-gate * and builds all of the "ancestoral" state necessary to identify resources at
25877c478bd9Sstevel@tonic-gate * differing levels of locality.
25887c478bd9Sstevel@tonic-gate */
25897c478bd9Sstevel@tonic-gate void
lgrp_part_add_cpu(cpu_t * cp,lgrp_id_t lgrpid)25907c478bd9Sstevel@tonic-gate lgrp_part_add_cpu(cpu_t *cp, lgrp_id_t lgrpid)
25917c478bd9Sstevel@tonic-gate {
25927c478bd9Sstevel@tonic-gate cpupart_t *cpupart;
25937c478bd9Sstevel@tonic-gate lgrp_t *lgrp_leaf;
25947c478bd9Sstevel@tonic-gate lpl_t *lpl_leaf;
25957c478bd9Sstevel@tonic-gate
25967c478bd9Sstevel@tonic-gate /* called sometimes w/ cpus paused - grab no locks */
25977c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&cpu_lock) || !lgrp_initialized);
25987c478bd9Sstevel@tonic-gate
25997c478bd9Sstevel@tonic-gate cpupart = cp->cpu_part;
26007c478bd9Sstevel@tonic-gate lgrp_leaf = lgrp_table[lgrpid];
26017c478bd9Sstevel@tonic-gate
26027c478bd9Sstevel@tonic-gate /* don't add non-existent lgrp */
26037c478bd9Sstevel@tonic-gate ASSERT(LGRP_EXISTS(lgrp_leaf));
26047c478bd9Sstevel@tonic-gate lpl_leaf = &cpupart->cp_lgrploads[lgrpid];
26057c478bd9Sstevel@tonic-gate cp->cpu_lpl = lpl_leaf;
26067c478bd9Sstevel@tonic-gate
26077c478bd9Sstevel@tonic-gate /* only leaf lpls contain cpus */
26087c478bd9Sstevel@tonic-gate
26097c478bd9Sstevel@tonic-gate if (lpl_leaf->lpl_ncpu++ == 0) {
26107c478bd9Sstevel@tonic-gate lpl_init(lpl_leaf, lpl_leaf, lgrp_leaf);
26117c478bd9Sstevel@tonic-gate klgrpset_add(cpupart->cp_lgrpset, lgrpid);
26127c478bd9Sstevel@tonic-gate lpl_leaf_insert(lpl_leaf, cpupart);
26137c478bd9Sstevel@tonic-gate } else {
26147c478bd9Sstevel@tonic-gate /*
26157c478bd9Sstevel@tonic-gate * the lpl should already exist in the parent, so just update
26167c478bd9Sstevel@tonic-gate * the count of available CPUs
26177c478bd9Sstevel@tonic-gate */
26187c478bd9Sstevel@tonic-gate lpl_cpu_adjcnt(LPL_INCREMENT, cp);
26197c478bd9Sstevel@tonic-gate }
26207c478bd9Sstevel@tonic-gate
26217c478bd9Sstevel@tonic-gate /* link cpu into list of cpus in lpl */
26227c478bd9Sstevel@tonic-gate
26237c478bd9Sstevel@tonic-gate if (lpl_leaf->lpl_cpus) {
26247c478bd9Sstevel@tonic-gate cp->cpu_next_lpl = lpl_leaf->lpl_cpus;
26257c478bd9Sstevel@tonic-gate cp->cpu_prev_lpl = lpl_leaf->lpl_cpus->cpu_prev_lpl;
26267c478bd9Sstevel@tonic-gate lpl_leaf->lpl_cpus->cpu_prev_lpl->cpu_next_lpl = cp;
26277c478bd9Sstevel@tonic-gate lpl_leaf->lpl_cpus->cpu_prev_lpl = cp;
26287c478bd9Sstevel@tonic-gate } else {
26297c478bd9Sstevel@tonic-gate /*
26307c478bd9Sstevel@tonic-gate * We increment ncpu immediately after we create a new leaf
26317c478bd9Sstevel@tonic-gate * lpl, so assert that ncpu == 1 for the case where we don't
26327c478bd9Sstevel@tonic-gate * have any cpu pointers yet.
26337c478bd9Sstevel@tonic-gate */
26347c478bd9Sstevel@tonic-gate ASSERT(lpl_leaf->lpl_ncpu == 1);
26357c478bd9Sstevel@tonic-gate lpl_leaf->lpl_cpus = cp->cpu_next_lpl = cp->cpu_prev_lpl = cp;
26367c478bd9Sstevel@tonic-gate }
26377c478bd9Sstevel@tonic-gate
26387c478bd9Sstevel@tonic-gate }
26397c478bd9Sstevel@tonic-gate
26407c478bd9Sstevel@tonic-gate
26417c478bd9Sstevel@tonic-gate /*
26427c478bd9Sstevel@tonic-gate * remove a cpu from a partition in terms of lgrp load avg bookeeping
26437c478bd9Sstevel@tonic-gate *
26447c478bd9Sstevel@tonic-gate * The lpl (cpu partition load average information) is now arranged in a
26457c478bd9Sstevel@tonic-gate * hierarchical fashion whereby resources that are closest, ie. most local, to
26467c478bd9Sstevel@tonic-gate * the cpu in question are considered to be leaves in a tree of resources.
26477c478bd9Sstevel@tonic-gate * There are two removal cases in question:
26487c478bd9Sstevel@tonic-gate *
26497c478bd9Sstevel@tonic-gate * 1. Removal of the resource in the leaf leaves other resources remaining in
26507c478bd9Sstevel@tonic-gate * that leaf. (Another cpu still exists at this level of locality). In this
26517c478bd9Sstevel@tonic-gate * case, the count of available cpus is decremented in all assocated lpls by
26527c478bd9Sstevel@tonic-gate * calling lpl_adj_cpucnt(), and the pointer to the removed cpu is pruned
26537c478bd9Sstevel@tonic-gate * from the per-cpu lpl list.
26547c478bd9Sstevel@tonic-gate *
26557c478bd9Sstevel@tonic-gate * 2. Removal of the resource results in the lpl containing no resources. (It's
26567c478bd9Sstevel@tonic-gate * empty) In this case, all of what has occurred for the first step must take
26577c478bd9Sstevel@tonic-gate * place; however, additionally we must remove the lpl structure itself, prune
26587c478bd9Sstevel@tonic-gate * out any stranded lpls that do not directly name a leaf resource, and mark the
26597c478bd9Sstevel@tonic-gate * cpu partition in question as no longer containing resources from the lgrp of
26607c478bd9Sstevel@tonic-gate * the lpl that has been delted. Cpu-partition changes are handled by this
26617c478bd9Sstevel@tonic-gate * method, but the lpl_leaf_remove function deals with the details of pruning
26627c478bd9Sstevel@tonic-gate * out the empty lpl and any of its orphaned direct ancestors.
26637c478bd9Sstevel@tonic-gate */
26647c478bd9Sstevel@tonic-gate void
lgrp_part_del_cpu(cpu_t * cp)26657c478bd9Sstevel@tonic-gate lgrp_part_del_cpu(cpu_t *cp)
26667c478bd9Sstevel@tonic-gate {
26677c478bd9Sstevel@tonic-gate lpl_t *lpl;
26687c478bd9Sstevel@tonic-gate lpl_t *leaf_lpl;
26697c478bd9Sstevel@tonic-gate lgrp_t *lgrp_leaf;
26707c478bd9Sstevel@tonic-gate
26717c478bd9Sstevel@tonic-gate /* called sometimes w/ cpus paused - grab no locks */
26727c478bd9Sstevel@tonic-gate
26737c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&cpu_lock) || !lgrp_initialized);
26747c478bd9Sstevel@tonic-gate
26757c478bd9Sstevel@tonic-gate lpl = leaf_lpl = cp->cpu_lpl;
26767c478bd9Sstevel@tonic-gate lgrp_leaf = leaf_lpl->lpl_lgrp;
26777c478bd9Sstevel@tonic-gate
26787c478bd9Sstevel@tonic-gate /* don't delete a leaf that isn't there */
26797c478bd9Sstevel@tonic-gate ASSERT(LGRP_EXISTS(lgrp_leaf));
26807c478bd9Sstevel@tonic-gate
26817c478bd9Sstevel@tonic-gate /* no double-deletes */
26827c478bd9Sstevel@tonic-gate ASSERT(lpl->lpl_ncpu);
26837c478bd9Sstevel@tonic-gate if (--lpl->lpl_ncpu == 0) {
26847c478bd9Sstevel@tonic-gate /*
26857c478bd9Sstevel@tonic-gate * This was the last cpu in this lgroup for this partition,
26867c478bd9Sstevel@tonic-gate * clear its bit in the partition's lgroup bitmask
26877c478bd9Sstevel@tonic-gate */
26887c478bd9Sstevel@tonic-gate klgrpset_del(cp->cpu_part->cp_lgrpset, lpl->lpl_lgrpid);
26897c478bd9Sstevel@tonic-gate
26907c478bd9Sstevel@tonic-gate /* eliminate remaning lpl link pointers in cpu, lpl */
26917c478bd9Sstevel@tonic-gate lpl->lpl_cpus = cp->cpu_next_lpl = cp->cpu_prev_lpl = NULL;
26927c478bd9Sstevel@tonic-gate
26937c478bd9Sstevel@tonic-gate lpl_leaf_remove(leaf_lpl, cp->cpu_part);
26947c478bd9Sstevel@tonic-gate } else {
26957c478bd9Sstevel@tonic-gate
26967c478bd9Sstevel@tonic-gate /* unlink cpu from lists of cpus in lpl */
26977c478bd9Sstevel@tonic-gate cp->cpu_prev_lpl->cpu_next_lpl = cp->cpu_next_lpl;
26987c478bd9Sstevel@tonic-gate cp->cpu_next_lpl->cpu_prev_lpl = cp->cpu_prev_lpl;
26997c478bd9Sstevel@tonic-gate if (lpl->lpl_cpus == cp) {
27007c478bd9Sstevel@tonic-gate lpl->lpl_cpus = cp->cpu_next_lpl;
27017c478bd9Sstevel@tonic-gate }
27027c478bd9Sstevel@tonic-gate
27037c478bd9Sstevel@tonic-gate /*
27047c478bd9Sstevel@tonic-gate * Update the cpu count in the lpls associated with parent
27057c478bd9Sstevel@tonic-gate * lgroups.
27067c478bd9Sstevel@tonic-gate */
27077c478bd9Sstevel@tonic-gate lpl_cpu_adjcnt(LPL_DECREMENT, cp);
27087c478bd9Sstevel@tonic-gate
27097c478bd9Sstevel@tonic-gate }
27107c478bd9Sstevel@tonic-gate /* clear cpu's lpl ptr when we're all done */
27117c478bd9Sstevel@tonic-gate cp->cpu_lpl = NULL;
27127c478bd9Sstevel@tonic-gate }
27137c478bd9Sstevel@tonic-gate
27147c478bd9Sstevel@tonic-gate /*
27157c478bd9Sstevel@tonic-gate * Recompute load average for the specified partition/lgrp fragment.
27167c478bd9Sstevel@tonic-gate *
27177c478bd9Sstevel@tonic-gate * We rely on the fact that this routine is called from the clock thread
27187c478bd9Sstevel@tonic-gate * at a point before the clock thread can block (i.e. before its first
27197c478bd9Sstevel@tonic-gate * lock request). Since the clock thread can not be preempted (since it
27207c478bd9Sstevel@tonic-gate * runs at highest priority), we know that cpu partitions can not change
27217c478bd9Sstevel@tonic-gate * (since doing so would require either the repartition requester or the
27227c478bd9Sstevel@tonic-gate * cpu_pause thread to run on this cpu), so we can update the cpu's load
27237c478bd9Sstevel@tonic-gate * without grabbing cpu_lock.
27247c478bd9Sstevel@tonic-gate */
27257c478bd9Sstevel@tonic-gate void
lgrp_loadavg(lpl_t * lpl,uint_t nrcpus,int ageflag)27267c478bd9Sstevel@tonic-gate lgrp_loadavg(lpl_t *lpl, uint_t nrcpus, int ageflag)
27277c478bd9Sstevel@tonic-gate {
27287c478bd9Sstevel@tonic-gate uint_t ncpu;
27297c478bd9Sstevel@tonic-gate int64_t old, new, f;
27307c478bd9Sstevel@tonic-gate
27317c478bd9Sstevel@tonic-gate /*
27327c478bd9Sstevel@tonic-gate * 1 - exp(-1/(20 * ncpu)) << 13 = 400 for 1 cpu...
27337c478bd9Sstevel@tonic-gate */
27347c478bd9Sstevel@tonic-gate static short expval[] = {
27357c478bd9Sstevel@tonic-gate 0, 3196, 1618, 1083,
27367c478bd9Sstevel@tonic-gate 814, 652, 543, 466,
27377c478bd9Sstevel@tonic-gate 408, 363, 326, 297,
27387c478bd9Sstevel@tonic-gate 272, 251, 233, 218,
27397c478bd9Sstevel@tonic-gate 204, 192, 181, 172,
27407c478bd9Sstevel@tonic-gate 163, 155, 148, 142,
27417c478bd9Sstevel@tonic-gate 136, 130, 125, 121,
27427c478bd9Sstevel@tonic-gate 116, 112, 109, 105
27437c478bd9Sstevel@tonic-gate };
27447c478bd9Sstevel@tonic-gate
27457c478bd9Sstevel@tonic-gate /* ASSERT (called from clock level) */
27467c478bd9Sstevel@tonic-gate
27477c478bd9Sstevel@tonic-gate if ((lpl == NULL) || /* we're booting - this is easiest for now */
27487c478bd9Sstevel@tonic-gate ((ncpu = lpl->lpl_ncpu) == 0)) {
27497c478bd9Sstevel@tonic-gate return;
27507c478bd9Sstevel@tonic-gate }
27517c478bd9Sstevel@tonic-gate
27527c478bd9Sstevel@tonic-gate for (;;) {
27537c478bd9Sstevel@tonic-gate
27547c478bd9Sstevel@tonic-gate if (ncpu >= sizeof (expval) / sizeof (expval[0]))
27557c478bd9Sstevel@tonic-gate f = expval[1]/ncpu; /* good approx. for large ncpu */
27567c478bd9Sstevel@tonic-gate else
27577c478bd9Sstevel@tonic-gate f = expval[ncpu];
27587c478bd9Sstevel@tonic-gate
27597c478bd9Sstevel@tonic-gate /*
27607c478bd9Sstevel@tonic-gate * Modify the load average atomically to avoid losing
27617c478bd9Sstevel@tonic-gate * anticipatory load updates (see lgrp_move_thread()).
27627c478bd9Sstevel@tonic-gate */
27637c478bd9Sstevel@tonic-gate if (ageflag) {
27647c478bd9Sstevel@tonic-gate /*
27657c478bd9Sstevel@tonic-gate * We're supposed to both update and age the load.
27667c478bd9Sstevel@tonic-gate * This happens 10 times/sec. per cpu. We do a
27677c478bd9Sstevel@tonic-gate * little hoop-jumping to avoid integer overflow.
27687c478bd9Sstevel@tonic-gate */
27697c478bd9Sstevel@tonic-gate int64_t q, r;
27707c478bd9Sstevel@tonic-gate
27717c478bd9Sstevel@tonic-gate do {
27727c478bd9Sstevel@tonic-gate old = new = lpl->lpl_loadavg;
27737c478bd9Sstevel@tonic-gate q = (old >> 16) << 7;
27747c478bd9Sstevel@tonic-gate r = (old & 0xffff) << 7;
27757c478bd9Sstevel@tonic-gate new += ((long long)(nrcpus - q) * f -
27767c478bd9Sstevel@tonic-gate ((r * f) >> 16)) >> 7;
27777c478bd9Sstevel@tonic-gate
27787c478bd9Sstevel@tonic-gate /*
27797c478bd9Sstevel@tonic-gate * Check for overflow
27807c478bd9Sstevel@tonic-gate */
27817c478bd9Sstevel@tonic-gate if (new > LGRP_LOADAVG_MAX)
27827c478bd9Sstevel@tonic-gate new = LGRP_LOADAVG_MAX;
27837c478bd9Sstevel@tonic-gate else if (new < 0)
27847c478bd9Sstevel@tonic-gate new = 0;
278575d94465SJosef 'Jeff' Sipek } while (atomic_cas_32((lgrp_load_t *)&lpl->lpl_loadavg,
278675d94465SJosef 'Jeff' Sipek old, new) != old);
27877c478bd9Sstevel@tonic-gate } else {
27887c478bd9Sstevel@tonic-gate /*
27897c478bd9Sstevel@tonic-gate * We're supposed to update the load, but not age it.
27907c478bd9Sstevel@tonic-gate * This option is used to update the load (which either
27917c478bd9Sstevel@tonic-gate * has already been aged in this 1/10 sec. interval or
27927c478bd9Sstevel@tonic-gate * soon will be) to account for a remotely executing
27937c478bd9Sstevel@tonic-gate * thread.
27947c478bd9Sstevel@tonic-gate */
27957c478bd9Sstevel@tonic-gate do {
27967c478bd9Sstevel@tonic-gate old = new = lpl->lpl_loadavg;
27977c478bd9Sstevel@tonic-gate new += f;
27987c478bd9Sstevel@tonic-gate /*
27997c478bd9Sstevel@tonic-gate * Check for overflow
28007c478bd9Sstevel@tonic-gate * Underflow not possible here
28017c478bd9Sstevel@tonic-gate */
28027c478bd9Sstevel@tonic-gate if (new < old)
28037c478bd9Sstevel@tonic-gate new = LGRP_LOADAVG_MAX;
280475d94465SJosef 'Jeff' Sipek } while (atomic_cas_32((lgrp_load_t *)&lpl->lpl_loadavg,
280575d94465SJosef 'Jeff' Sipek old, new) != old);
28067c478bd9Sstevel@tonic-gate }
28077c478bd9Sstevel@tonic-gate
28087c478bd9Sstevel@tonic-gate /*
28097c478bd9Sstevel@tonic-gate * Do the same for this lpl's parent
28107c478bd9Sstevel@tonic-gate */
28117c478bd9Sstevel@tonic-gate if ((lpl = lpl->lpl_parent) == NULL)
28127c478bd9Sstevel@tonic-gate break;
28137c478bd9Sstevel@tonic-gate ncpu = lpl->lpl_ncpu;
28147c478bd9Sstevel@tonic-gate }
28157c478bd9Sstevel@tonic-gate }
28167c478bd9Sstevel@tonic-gate
28177c478bd9Sstevel@tonic-gate /*
28187c478bd9Sstevel@tonic-gate * Initialize lpl topology in the target based on topology currently present in
28197c478bd9Sstevel@tonic-gate * lpl_bootstrap.
28207c478bd9Sstevel@tonic-gate *
28217c478bd9Sstevel@tonic-gate * lpl_topo_bootstrap is only called once from cpupart_initialize_default() to
28227c478bd9Sstevel@tonic-gate * initialize cp_default list of lpls. Up to this point all topology operations
28237c478bd9Sstevel@tonic-gate * were performed using lpl_bootstrap. Now cp_default has its own list of lpls
28247c478bd9Sstevel@tonic-gate * and all subsequent lpl operations should use it instead of lpl_bootstrap. The
28257c478bd9Sstevel@tonic-gate * `target' points to the list of lpls in cp_default and `size' is the size of
28267c478bd9Sstevel@tonic-gate * this list.
28277c478bd9Sstevel@tonic-gate *
28287c478bd9Sstevel@tonic-gate * This function walks the lpl topology in lpl_bootstrap and does for things:
28297c478bd9Sstevel@tonic-gate *
28307c478bd9Sstevel@tonic-gate * 1) Copies all fields from lpl_bootstrap to the target.
28317c478bd9Sstevel@tonic-gate *
28327c478bd9Sstevel@tonic-gate * 2) Sets CPU0 lpl pointer to the correct element of the target list.
28337c478bd9Sstevel@tonic-gate *
28347c478bd9Sstevel@tonic-gate * 3) Updates lpl_parent pointers to point to the lpls in the target list
28357c478bd9Sstevel@tonic-gate * instead of lpl_bootstrap.
28367c478bd9Sstevel@tonic-gate *
28377c478bd9Sstevel@tonic-gate * 4) Updates pointers in the resource list of the target to point to the lpls
28387c478bd9Sstevel@tonic-gate * in the target list instead of lpl_bootstrap.
28397c478bd9Sstevel@tonic-gate *
28407c478bd9Sstevel@tonic-gate * After lpl_topo_bootstrap() completes, target contains the same information
28417c478bd9Sstevel@tonic-gate * that would be present there if it were used during boot instead of
28427c478bd9Sstevel@tonic-gate * lpl_bootstrap. There is no need in information in lpl_bootstrap after this
28437c478bd9Sstevel@tonic-gate * and it is bzeroed.
28447c478bd9Sstevel@tonic-gate */
28457c478bd9Sstevel@tonic-gate void
lpl_topo_bootstrap(lpl_t * target,int size)28467c478bd9Sstevel@tonic-gate lpl_topo_bootstrap(lpl_t *target, int size)
28477c478bd9Sstevel@tonic-gate {
28487c478bd9Sstevel@tonic-gate lpl_t *lpl = lpl_bootstrap;
28497c478bd9Sstevel@tonic-gate lpl_t *target_lpl = target;
28506890d023SEric Saxe lpl_t **rset;
28516890d023SEric Saxe int *id2rset;
28526890d023SEric Saxe int sz;
28537c478bd9Sstevel@tonic-gate int howmany;
28547c478bd9Sstevel@tonic-gate int id;
28557c478bd9Sstevel@tonic-gate int i;
28567c478bd9Sstevel@tonic-gate
28577c478bd9Sstevel@tonic-gate /*
28587c478bd9Sstevel@tonic-gate * The only target that should be passed here is cp_default lpl list.
28597c478bd9Sstevel@tonic-gate */
28607c478bd9Sstevel@tonic-gate ASSERT(target == cp_default.cp_lgrploads);
28617c478bd9Sstevel@tonic-gate ASSERT(size == cp_default.cp_nlgrploads);
28627c478bd9Sstevel@tonic-gate ASSERT(!lgrp_topo_initialized);
28637c478bd9Sstevel@tonic-gate ASSERT(ncpus == 1);
28647c478bd9Sstevel@tonic-gate
28657c478bd9Sstevel@tonic-gate howmany = MIN(LPL_BOOTSTRAP_SIZE, size);
28667c478bd9Sstevel@tonic-gate for (i = 0; i < howmany; i++, lpl++, target_lpl++) {
28677c478bd9Sstevel@tonic-gate /*
28686890d023SEric Saxe * Copy all fields from lpl, except for the rset,
28696890d023SEric Saxe * lgrp id <=> rset mapping storage,
28706890d023SEric Saxe * and amount of storage
28717c478bd9Sstevel@tonic-gate */
28726890d023SEric Saxe rset = target_lpl->lpl_rset;
28736890d023SEric Saxe id2rset = target_lpl->lpl_id2rset;
28746890d023SEric Saxe sz = target_lpl->lpl_rset_sz;
28757c478bd9Sstevel@tonic-gate
28767c478bd9Sstevel@tonic-gate *target_lpl = *lpl;
28777c478bd9Sstevel@tonic-gate
28786890d023SEric Saxe target_lpl->lpl_rset_sz = sz;
28796890d023SEric Saxe target_lpl->lpl_rset = rset;
28806890d023SEric Saxe target_lpl->lpl_id2rset = id2rset;
28816890d023SEric Saxe
28827c478bd9Sstevel@tonic-gate /*
28837c478bd9Sstevel@tonic-gate * Substitute CPU0 lpl pointer with one relative to target.
28847c478bd9Sstevel@tonic-gate */
28857c478bd9Sstevel@tonic-gate if (lpl->lpl_cpus == CPU) {
28867c478bd9Sstevel@tonic-gate ASSERT(CPU->cpu_lpl == lpl);
28877c478bd9Sstevel@tonic-gate CPU->cpu_lpl = target_lpl;
28887c478bd9Sstevel@tonic-gate }
28897c478bd9Sstevel@tonic-gate
28907c478bd9Sstevel@tonic-gate /*
28917c478bd9Sstevel@tonic-gate * Substitute parent information with parent relative to target.
28927c478bd9Sstevel@tonic-gate */
28937c478bd9Sstevel@tonic-gate if (lpl->lpl_parent != NULL)
28947c478bd9Sstevel@tonic-gate target_lpl->lpl_parent = (lpl_t *)
28957c478bd9Sstevel@tonic-gate (((uintptr_t)lpl->lpl_parent -
28966890d023SEric Saxe (uintptr_t)lpl_bootstrap) +
28976890d023SEric Saxe (uintptr_t)target);
28987c478bd9Sstevel@tonic-gate
28997c478bd9Sstevel@tonic-gate /*
29007c478bd9Sstevel@tonic-gate * Walk over resource set substituting pointers relative to
29016890d023SEric Saxe * lpl_bootstrap's rset to pointers relative to target's
29027c478bd9Sstevel@tonic-gate */
29037c478bd9Sstevel@tonic-gate ASSERT(lpl->lpl_nrset <= 1);
29047c478bd9Sstevel@tonic-gate
29057c478bd9Sstevel@tonic-gate for (id = 0; id < lpl->lpl_nrset; id++) {
29067c478bd9Sstevel@tonic-gate if (lpl->lpl_rset[id] != NULL) {
29076890d023SEric Saxe target_lpl->lpl_rset[id] = (lpl_t *)
29087c478bd9Sstevel@tonic-gate (((uintptr_t)lpl->lpl_rset[id] -
29096890d023SEric Saxe (uintptr_t)lpl_bootstrap) +
29106890d023SEric Saxe (uintptr_t)target);
29117c478bd9Sstevel@tonic-gate }
29126890d023SEric Saxe target_lpl->lpl_id2rset[id] =
29136890d023SEric Saxe lpl->lpl_id2rset[id];
29147c478bd9Sstevel@tonic-gate }
29157c478bd9Sstevel@tonic-gate }
29167c478bd9Sstevel@tonic-gate
29177c478bd9Sstevel@tonic-gate /*
29186890d023SEric Saxe * Clean up the bootstrap lpls since we have switched over to the
29196890d023SEric Saxe * actual lpl array in the default cpu partition.
29206890d023SEric Saxe *
29216890d023SEric Saxe * We still need to keep one empty lpl around for newly starting
29226890d023SEric Saxe * slave CPUs to reference should they need to make it through the
29236890d023SEric Saxe * dispatcher prior to their lgrp/lpl initialization.
29246890d023SEric Saxe *
29256890d023SEric Saxe * The lpl related dispatcher code has been designed to work properly
29266890d023SEric Saxe * (and without extra checks) for this special case of a zero'ed
29276890d023SEric Saxe * bootstrap lpl. Such an lpl appears to the dispatcher as an lpl
29286890d023SEric Saxe * with lgrpid 0 and an empty resource set. Iteration over the rset
29296890d023SEric Saxe * array by the dispatcher is also NULL terminated for this reason.
29306890d023SEric Saxe *
29316890d023SEric Saxe * This provides the desired behaviour for an uninitialized CPU.
29326890d023SEric Saxe * It shouldn't see any other CPU to either dispatch to or steal
29336890d023SEric Saxe * from until it is properly initialized.
29347c478bd9Sstevel@tonic-gate */
29357c478bd9Sstevel@tonic-gate bzero(lpl_bootstrap_list, sizeof (lpl_bootstrap_list));
29366890d023SEric Saxe bzero(lpl_bootstrap_id2rset, sizeof (lpl_bootstrap_id2rset));
29376890d023SEric Saxe bzero(lpl_bootstrap_rset, sizeof (lpl_bootstrap_rset));
29386890d023SEric Saxe
29396890d023SEric Saxe lpl_bootstrap_list[0].lpl_rset = lpl_bootstrap_rset;
29406890d023SEric Saxe lpl_bootstrap_list[0].lpl_id2rset = lpl_bootstrap_id2rset;
29417c478bd9Sstevel@tonic-gate }
29427c478bd9Sstevel@tonic-gate
29437c478bd9Sstevel@tonic-gate /*
29447c478bd9Sstevel@tonic-gate * If the lowest load among the lgroups a process' threads are currently
29457c478bd9Sstevel@tonic-gate * spread across is greater than lgrp_expand_proc_thresh, we'll consider
29467c478bd9Sstevel@tonic-gate * expanding the process to a new lgroup.
29477c478bd9Sstevel@tonic-gate */
29487c478bd9Sstevel@tonic-gate #define LGRP_EXPAND_PROC_THRESH_DEFAULT 62250
29497c478bd9Sstevel@tonic-gate lgrp_load_t lgrp_expand_proc_thresh = LGRP_EXPAND_PROC_THRESH_DEFAULT;
29507c478bd9Sstevel@tonic-gate
29517c478bd9Sstevel@tonic-gate #define LGRP_EXPAND_PROC_THRESH(ncpu) \
29527c478bd9Sstevel@tonic-gate ((lgrp_expand_proc_thresh) / (ncpu))
29537c478bd9Sstevel@tonic-gate
29547c478bd9Sstevel@tonic-gate /*
29557c478bd9Sstevel@tonic-gate * A process will be expanded to a new lgroup only if the difference between
29567c478bd9Sstevel@tonic-gate * the lowest load on the lgroups the process' thread's are currently spread
29577c478bd9Sstevel@tonic-gate * across and the lowest load on the other lgroups in the process' partition
29587c478bd9Sstevel@tonic-gate * is greater than lgrp_expand_proc_diff.
29597c478bd9Sstevel@tonic-gate */
29607c478bd9Sstevel@tonic-gate #define LGRP_EXPAND_PROC_DIFF_DEFAULT 60000
29617c478bd9Sstevel@tonic-gate lgrp_load_t lgrp_expand_proc_diff = LGRP_EXPAND_PROC_DIFF_DEFAULT;
29627c478bd9Sstevel@tonic-gate
29637c478bd9Sstevel@tonic-gate #define LGRP_EXPAND_PROC_DIFF(ncpu) \
29647c478bd9Sstevel@tonic-gate ((lgrp_expand_proc_diff) / (ncpu))
29657c478bd9Sstevel@tonic-gate
29667c478bd9Sstevel@tonic-gate /*
29677c478bd9Sstevel@tonic-gate * The loadavg tolerance accounts for "noise" inherent in the load, which may
29687c478bd9Sstevel@tonic-gate * be present due to impreciseness of the load average decay algorithm.
29697c478bd9Sstevel@tonic-gate *
29707c478bd9Sstevel@tonic-gate * The default tolerance is lgrp_loadavg_max_effect. Note that the tunable
29717c478bd9Sstevel@tonic-gate * tolerance is scaled by the number of cpus in the lgroup just like
29727c478bd9Sstevel@tonic-gate * lgrp_loadavg_max_effect. For example, if lgrp_loadavg_tolerance = 0x10000,
29737c478bd9Sstevel@tonic-gate * and ncpu = 4, then lgrp_choose will consider differences in lgroup loads
29747c478bd9Sstevel@tonic-gate * of: 0x10000 / 4 => 0x4000 or greater to be significant.
29757c478bd9Sstevel@tonic-gate */
29767c478bd9Sstevel@tonic-gate uint32_t lgrp_loadavg_tolerance = LGRP_LOADAVG_THREAD_MAX;
29777c478bd9Sstevel@tonic-gate #define LGRP_LOADAVG_TOLERANCE(ncpu) \
29787c478bd9Sstevel@tonic-gate ((lgrp_loadavg_tolerance) / ncpu)
29797c478bd9Sstevel@tonic-gate
29807c478bd9Sstevel@tonic-gate /*
29817c478bd9Sstevel@tonic-gate * lgrp_choose() will choose root lgroup as home when lowest lgroup load
29827c478bd9Sstevel@tonic-gate * average is above this threshold
29837c478bd9Sstevel@tonic-gate */
29847c478bd9Sstevel@tonic-gate uint32_t lgrp_load_thresh = UINT32_MAX;
29857c478bd9Sstevel@tonic-gate
29867c478bd9Sstevel@tonic-gate /*
29877c478bd9Sstevel@tonic-gate * lgrp_choose() will try to skip any lgroups with less memory
29887c478bd9Sstevel@tonic-gate * than this free when choosing a home lgroup
29897c478bd9Sstevel@tonic-gate */
29907c478bd9Sstevel@tonic-gate pgcnt_t lgrp_mem_free_thresh = 0;
29917c478bd9Sstevel@tonic-gate
29927c478bd9Sstevel@tonic-gate /*
29937c478bd9Sstevel@tonic-gate * When choosing between similarly loaded lgroups, lgrp_choose() will pick
29947c478bd9Sstevel@tonic-gate * one based on one of the following policies:
29957c478bd9Sstevel@tonic-gate * - Random selection
29967c478bd9Sstevel@tonic-gate * - Pseudo round robin placement
29977c478bd9Sstevel@tonic-gate * - Longest time since a thread was last placed
29987c478bd9Sstevel@tonic-gate */
29997c478bd9Sstevel@tonic-gate #define LGRP_CHOOSE_RANDOM 1
30007c478bd9Sstevel@tonic-gate #define LGRP_CHOOSE_RR 2
30017c478bd9Sstevel@tonic-gate #define LGRP_CHOOSE_TIME 3
30027c478bd9Sstevel@tonic-gate
30037c478bd9Sstevel@tonic-gate int lgrp_choose_policy = LGRP_CHOOSE_TIME;
30047c478bd9Sstevel@tonic-gate
30057c478bd9Sstevel@tonic-gate /*
30067c478bd9Sstevel@tonic-gate * Choose a suitable leaf lgroup for a kthread. The kthread is assumed not to
30077c478bd9Sstevel@tonic-gate * be bound to a CPU or processor set.
30087c478bd9Sstevel@tonic-gate *
30097c478bd9Sstevel@tonic-gate * Arguments:
30107c478bd9Sstevel@tonic-gate * t The thread
30117c478bd9Sstevel@tonic-gate * cpupart The partition the thread belongs to.
30127c478bd9Sstevel@tonic-gate *
30137c478bd9Sstevel@tonic-gate * NOTE: Should at least be called with the cpu_lock held, kernel preemption
30147c478bd9Sstevel@tonic-gate * disabled, or thread_lock held (at splhigh) to protect against the CPU
30157c478bd9Sstevel@tonic-gate * partitions changing out from under us and assumes that given thread is
30167c478bd9Sstevel@tonic-gate * protected. Also, called sometimes w/ cpus paused or kernel preemption
30177c478bd9Sstevel@tonic-gate * disabled, so don't grab any locks because we should never block under
30187c478bd9Sstevel@tonic-gate * those conditions.
30197c478bd9Sstevel@tonic-gate */
30207c478bd9Sstevel@tonic-gate lpl_t *
lgrp_choose(kthread_t * t,cpupart_t * cpupart)30217c478bd9Sstevel@tonic-gate lgrp_choose(kthread_t *t, cpupart_t *cpupart)
30227c478bd9Sstevel@tonic-gate {
30237c478bd9Sstevel@tonic-gate lgrp_load_t bestload, bestrload;
30247c478bd9Sstevel@tonic-gate int lgrpid_offset, lgrp_count;
30257c478bd9Sstevel@tonic-gate lgrp_id_t lgrpid, lgrpid_start;
30267c478bd9Sstevel@tonic-gate lpl_t *lpl, *bestlpl, *bestrlpl;
30277c478bd9Sstevel@tonic-gate klgrpset_t lgrpset;
30287c478bd9Sstevel@tonic-gate proc_t *p;
30297c478bd9Sstevel@tonic-gate
30307c478bd9Sstevel@tonic-gate ASSERT(t != NULL);
30317c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&cpu_lock) || curthread->t_preempt > 0 ||
30327c478bd9Sstevel@tonic-gate THREAD_LOCK_HELD(t));
30337c478bd9Sstevel@tonic-gate ASSERT(cpupart != NULL);
30347c478bd9Sstevel@tonic-gate
30357c478bd9Sstevel@tonic-gate p = t->t_procp;
30367c478bd9Sstevel@tonic-gate
30377c478bd9Sstevel@tonic-gate /* A process should always be in an active partition */
30387c478bd9Sstevel@tonic-gate ASSERT(!klgrpset_isempty(cpupart->cp_lgrpset));
30397c478bd9Sstevel@tonic-gate
30407c478bd9Sstevel@tonic-gate bestlpl = bestrlpl = NULL;
30417c478bd9Sstevel@tonic-gate bestload = bestrload = LGRP_LOADAVG_MAX;
30427c478bd9Sstevel@tonic-gate lgrpset = cpupart->cp_lgrpset;
30437c478bd9Sstevel@tonic-gate
30447c478bd9Sstevel@tonic-gate switch (lgrp_choose_policy) {
30457c478bd9Sstevel@tonic-gate case LGRP_CHOOSE_RR:
30467c478bd9Sstevel@tonic-gate lgrpid = cpupart->cp_lgrp_hint;
30477c478bd9Sstevel@tonic-gate do {
30487c478bd9Sstevel@tonic-gate if (++lgrpid > lgrp_alloc_max)
30497c478bd9Sstevel@tonic-gate lgrpid = 0;
30507c478bd9Sstevel@tonic-gate } while (!klgrpset_ismember(lgrpset, lgrpid));
30517c478bd9Sstevel@tonic-gate
30527c478bd9Sstevel@tonic-gate break;
30537c478bd9Sstevel@tonic-gate default:
30547c478bd9Sstevel@tonic-gate case LGRP_CHOOSE_TIME:
30557c478bd9Sstevel@tonic-gate case LGRP_CHOOSE_RANDOM:
30567c478bd9Sstevel@tonic-gate klgrpset_nlgrps(lgrpset, lgrp_count);
30577c478bd9Sstevel@tonic-gate lgrpid_offset =
30587c478bd9Sstevel@tonic-gate (((ushort_t)(gethrtime() >> 4)) % lgrp_count) + 1;
30597c478bd9Sstevel@tonic-gate for (lgrpid = 0; ; lgrpid++) {
30607c478bd9Sstevel@tonic-gate if (klgrpset_ismember(lgrpset, lgrpid)) {
30617c478bd9Sstevel@tonic-gate if (--lgrpid_offset == 0)
30627c478bd9Sstevel@tonic-gate break;
30637c478bd9Sstevel@tonic-gate }
30647c478bd9Sstevel@tonic-gate }
30657c478bd9Sstevel@tonic-gate break;
30667c478bd9Sstevel@tonic-gate }
30677c478bd9Sstevel@tonic-gate
30687c478bd9Sstevel@tonic-gate lgrpid_start = lgrpid;
30697c478bd9Sstevel@tonic-gate
30707c478bd9Sstevel@tonic-gate DTRACE_PROBE2(lgrp_choose_start, lgrp_id_t, lgrpid_start,
30717c478bd9Sstevel@tonic-gate lgrp_id_t, cpupart->cp_lgrp_hint);
30727c478bd9Sstevel@tonic-gate
30737c478bd9Sstevel@tonic-gate /*
30747c478bd9Sstevel@tonic-gate * Use lgroup affinities (if any) to choose best lgroup
30757c478bd9Sstevel@tonic-gate *
30767c478bd9Sstevel@tonic-gate * NOTE: Assumes that thread is protected from going away and its
30777c478bd9Sstevel@tonic-gate * lgroup affinities won't change (ie. p_lock, or
30787c478bd9Sstevel@tonic-gate * thread_lock() being held and/or CPUs paused)
30797c478bd9Sstevel@tonic-gate */
30807c478bd9Sstevel@tonic-gate if (t->t_lgrp_affinity) {
308103400a71Sjjc lpl = lgrp_affinity_best(t, cpupart, lgrpid_start, B_FALSE);
30827c478bd9Sstevel@tonic-gate if (lpl != NULL)
30837c478bd9Sstevel@tonic-gate return (lpl);
30847c478bd9Sstevel@tonic-gate }
30857c478bd9Sstevel@tonic-gate
30867c478bd9Sstevel@tonic-gate ASSERT(klgrpset_ismember(lgrpset, lgrpid_start));
30877c478bd9Sstevel@tonic-gate
30887c478bd9Sstevel@tonic-gate do {
30897c478bd9Sstevel@tonic-gate pgcnt_t npgs;
30907c478bd9Sstevel@tonic-gate
30917c478bd9Sstevel@tonic-gate /*
30927c478bd9Sstevel@tonic-gate * Skip any lgroups outside of thread's pset
30937c478bd9Sstevel@tonic-gate */
30947c478bd9Sstevel@tonic-gate if (!klgrpset_ismember(lgrpset, lgrpid)) {
30957c478bd9Sstevel@tonic-gate if (++lgrpid > lgrp_alloc_max)
30967c478bd9Sstevel@tonic-gate lgrpid = 0; /* wrap the search */
30977c478bd9Sstevel@tonic-gate continue;
30987c478bd9Sstevel@tonic-gate }
30997c478bd9Sstevel@tonic-gate
31007c478bd9Sstevel@tonic-gate /*
31017c478bd9Sstevel@tonic-gate * Skip any non-leaf lgroups
31027c478bd9Sstevel@tonic-gate */
31037c478bd9Sstevel@tonic-gate if (lgrp_table[lgrpid]->lgrp_childcnt != 0)
31047c478bd9Sstevel@tonic-gate continue;
31057c478bd9Sstevel@tonic-gate
31067c478bd9Sstevel@tonic-gate /*
31077c478bd9Sstevel@tonic-gate * Skip any lgroups without enough free memory
31087c478bd9Sstevel@tonic-gate * (when threshold set to nonzero positive value)
31097c478bd9Sstevel@tonic-gate */
31107c478bd9Sstevel@tonic-gate if (lgrp_mem_free_thresh > 0) {
31117c478bd9Sstevel@tonic-gate npgs = lgrp_mem_size(lgrpid, LGRP_MEM_SIZE_FREE);
31127c478bd9Sstevel@tonic-gate if (npgs < lgrp_mem_free_thresh) {
31137c478bd9Sstevel@tonic-gate if (++lgrpid > lgrp_alloc_max)
31147c478bd9Sstevel@tonic-gate lgrpid = 0; /* wrap the search */
31157c478bd9Sstevel@tonic-gate continue;
31167c478bd9Sstevel@tonic-gate }
31177c478bd9Sstevel@tonic-gate }
31187c478bd9Sstevel@tonic-gate
31197c478bd9Sstevel@tonic-gate lpl = &cpupart->cp_lgrploads[lgrpid];
31207c478bd9Sstevel@tonic-gate if (klgrpset_isempty(p->p_lgrpset) ||
31217c478bd9Sstevel@tonic-gate klgrpset_ismember(p->p_lgrpset, lgrpid)) {
31227c478bd9Sstevel@tonic-gate /*
31237c478bd9Sstevel@tonic-gate * Either this is a new process or the process already
31247c478bd9Sstevel@tonic-gate * has threads on this lgrp, so this is a preferred
31257c478bd9Sstevel@tonic-gate * lgroup for the thread.
31267c478bd9Sstevel@tonic-gate */
3127ab761399Sesaxe if (bestlpl == NULL ||
3128ab761399Sesaxe lpl_pick(lpl, bestlpl)) {
31297c478bd9Sstevel@tonic-gate bestload = lpl->lpl_loadavg;
31307c478bd9Sstevel@tonic-gate bestlpl = lpl;
31317c478bd9Sstevel@tonic-gate }
31327c478bd9Sstevel@tonic-gate } else {
31337c478bd9Sstevel@tonic-gate /*
31347c478bd9Sstevel@tonic-gate * The process doesn't have any threads on this lgrp,
31357c478bd9Sstevel@tonic-gate * but we're willing to consider this lgrp if the load
31367c478bd9Sstevel@tonic-gate * difference is big enough to justify splitting up
31377c478bd9Sstevel@tonic-gate * the process' threads.
31387c478bd9Sstevel@tonic-gate */
3139ab761399Sesaxe if (bestrlpl == NULL ||
3140ab761399Sesaxe lpl_pick(lpl, bestrlpl)) {
31417c478bd9Sstevel@tonic-gate bestrload = lpl->lpl_loadavg;
31427c478bd9Sstevel@tonic-gate bestrlpl = lpl;
31437c478bd9Sstevel@tonic-gate }
31447c478bd9Sstevel@tonic-gate }
31457c478bd9Sstevel@tonic-gate if (++lgrpid > lgrp_alloc_max)
31467c478bd9Sstevel@tonic-gate lgrpid = 0; /* wrap the search */
31477c478bd9Sstevel@tonic-gate } while (lgrpid != lgrpid_start);
31487c478bd9Sstevel@tonic-gate
31497c478bd9Sstevel@tonic-gate /*
31507c478bd9Sstevel@tonic-gate * Return root lgroup if threshold isn't set to maximum value and
31517c478bd9Sstevel@tonic-gate * lowest lgroup load average more than a certain threshold
31527c478bd9Sstevel@tonic-gate */
31537c478bd9Sstevel@tonic-gate if (lgrp_load_thresh != UINT32_MAX &&
31547c478bd9Sstevel@tonic-gate bestload >= lgrp_load_thresh && bestrload >= lgrp_load_thresh)
31557c478bd9Sstevel@tonic-gate return (&cpupart->cp_lgrploads[lgrp_root->lgrp_id]);
31567c478bd9Sstevel@tonic-gate
31577c478bd9Sstevel@tonic-gate /*
31587c478bd9Sstevel@tonic-gate * If all the lgroups over which the thread's process is spread are
3159ab761399Sesaxe * heavily loaded, or otherwise undesirable, we'll consider placing
3160ab761399Sesaxe * the thread on one of the other leaf lgroups in the thread's
3161ab761399Sesaxe * partition.
31627c478bd9Sstevel@tonic-gate */
3163ab761399Sesaxe if ((bestlpl == NULL) ||
3164ab761399Sesaxe ((bestload > LGRP_EXPAND_PROC_THRESH(bestlpl->lpl_ncpu)) &&
31657c478bd9Sstevel@tonic-gate (bestrload < bestload) && /* paranoid about wraparound */
31667c478bd9Sstevel@tonic-gate (bestrload + LGRP_EXPAND_PROC_DIFF(bestrlpl->lpl_ncpu) <
3167ab761399Sesaxe bestload))) {
31687c478bd9Sstevel@tonic-gate bestlpl = bestrlpl;
31697c478bd9Sstevel@tonic-gate }
31707c478bd9Sstevel@tonic-gate
3171ab761399Sesaxe if (bestlpl == NULL) {
3172ab761399Sesaxe /*
3173ab761399Sesaxe * No lgroup looked particularly good, but we still
3174ab761399Sesaxe * have to pick something. Go with the randomly selected
3175ab761399Sesaxe * legal lgroup we started with above.
3176ab761399Sesaxe */
3177ab761399Sesaxe bestlpl = &cpupart->cp_lgrploads[lgrpid_start];
3178ab761399Sesaxe }
3179ab761399Sesaxe
31807c478bd9Sstevel@tonic-gate cpupart->cp_lgrp_hint = bestlpl->lpl_lgrpid;
31817c478bd9Sstevel@tonic-gate bestlpl->lpl_homed_time = gethrtime_unscaled();
31827c478bd9Sstevel@tonic-gate
31837c478bd9Sstevel@tonic-gate ASSERT(bestlpl->lpl_ncpu > 0);
31847c478bd9Sstevel@tonic-gate return (bestlpl);
31857c478bd9Sstevel@tonic-gate }
31867c478bd9Sstevel@tonic-gate
31877c478bd9Sstevel@tonic-gate /*
3188ab761399Sesaxe * Decide if lpl1 is a better candidate than lpl2 for lgrp homing.
3189ab761399Sesaxe * Returns non-zero if lpl1 is a better candidate, and 0 otherwise.
31907c478bd9Sstevel@tonic-gate */
31917c478bd9Sstevel@tonic-gate static int
lpl_pick(lpl_t * lpl1,lpl_t * lpl2)31927c478bd9Sstevel@tonic-gate lpl_pick(lpl_t *lpl1, lpl_t *lpl2)
31937c478bd9Sstevel@tonic-gate {
31947c478bd9Sstevel@tonic-gate lgrp_load_t l1, l2;
31957c478bd9Sstevel@tonic-gate lgrp_load_t tolerance = LGRP_LOADAVG_TOLERANCE(lpl1->lpl_ncpu);
31967c478bd9Sstevel@tonic-gate
31977c478bd9Sstevel@tonic-gate l1 = lpl1->lpl_loadavg;
31987c478bd9Sstevel@tonic-gate l2 = lpl2->lpl_loadavg;
31997c478bd9Sstevel@tonic-gate
32007c478bd9Sstevel@tonic-gate if ((l1 + tolerance < l2) && (l1 < l2)) {
32017c478bd9Sstevel@tonic-gate /* lpl1 is significantly less loaded than lpl2 */
32027c478bd9Sstevel@tonic-gate return (1);
32037c478bd9Sstevel@tonic-gate }
32047c478bd9Sstevel@tonic-gate
32057c478bd9Sstevel@tonic-gate if (lgrp_choose_policy == LGRP_CHOOSE_TIME &&
32067c478bd9Sstevel@tonic-gate l1 + tolerance >= l2 && l1 < l2 &&
32077c478bd9Sstevel@tonic-gate lpl1->lpl_homed_time < lpl2->lpl_homed_time) {
32087c478bd9Sstevel@tonic-gate /*
32097c478bd9Sstevel@tonic-gate * lpl1's load is within the tolerance of lpl2. We're
32107c478bd9Sstevel@tonic-gate * willing to consider it be to better however if
32117c478bd9Sstevel@tonic-gate * it has been longer since we last homed a thread there
32127c478bd9Sstevel@tonic-gate */
32137c478bd9Sstevel@tonic-gate return (1);
32147c478bd9Sstevel@tonic-gate }
32157c478bd9Sstevel@tonic-gate
32167c478bd9Sstevel@tonic-gate return (0);
32177c478bd9Sstevel@tonic-gate }
32187c478bd9Sstevel@tonic-gate
32192cb27123Saguzovsk /*
32202cb27123Saguzovsk * lgrp_trthr_moves counts the number of times main thread (t_tid = 1) of a
32212cb27123Saguzovsk * process that uses text replication changed home lgrp. This info is used by
32222cb27123Saguzovsk * segvn asyncronous thread to detect if it needs to recheck what lgrps
32232cb27123Saguzovsk * should be used for text replication.
32242cb27123Saguzovsk */
32252cb27123Saguzovsk static uint64_t lgrp_trthr_moves = 0;
32262cb27123Saguzovsk
32272cb27123Saguzovsk uint64_t
lgrp_get_trthr_migrations(void)32282cb27123Saguzovsk lgrp_get_trthr_migrations(void)
32292cb27123Saguzovsk {
32302cb27123Saguzovsk return (lgrp_trthr_moves);
32312cb27123Saguzovsk }
32322cb27123Saguzovsk
32332cb27123Saguzovsk void
lgrp_update_trthr_migrations(uint64_t incr)32342cb27123Saguzovsk lgrp_update_trthr_migrations(uint64_t incr)
32352cb27123Saguzovsk {
32362cb27123Saguzovsk atomic_add_64(&lgrp_trthr_moves, incr);
32372cb27123Saguzovsk }
32382cb27123Saguzovsk
32397c478bd9Sstevel@tonic-gate /*
32407c478bd9Sstevel@tonic-gate * An LWP is expected to be assigned to an lgroup for at least this long
32417c478bd9Sstevel@tonic-gate * for its anticipatory load to be justified. NOTE that this value should
32427c478bd9Sstevel@tonic-gate * not be set extremely huge (say, larger than 100 years), to avoid problems
32437c478bd9Sstevel@tonic-gate * with overflow in the calculation that uses it.
32447c478bd9Sstevel@tonic-gate */
32457c478bd9Sstevel@tonic-gate #define LGRP_MIN_NSEC (NANOSEC / 10) /* 1/10 of a second */
32467c478bd9Sstevel@tonic-gate hrtime_t lgrp_min_nsec = LGRP_MIN_NSEC;
32477c478bd9Sstevel@tonic-gate
32487c478bd9Sstevel@tonic-gate /*
32497c478bd9Sstevel@tonic-gate * Routine to change a thread's lgroup affiliation. This routine updates
32507c478bd9Sstevel@tonic-gate * the thread's kthread_t struct and its process' proc_t struct to note the
32517c478bd9Sstevel@tonic-gate * thread's new lgroup affiliation, and its lgroup affinities.
32527c478bd9Sstevel@tonic-gate *
32537c478bd9Sstevel@tonic-gate * Note that this is the only routine that modifies a thread's t_lpl field,
32547c478bd9Sstevel@tonic-gate * and that adds in or removes anticipatory load.
32557c478bd9Sstevel@tonic-gate *
32567c478bd9Sstevel@tonic-gate * If the thread is exiting, newlpl is NULL.
32577c478bd9Sstevel@tonic-gate *
32587c478bd9Sstevel@tonic-gate * Locking:
32597c478bd9Sstevel@tonic-gate * The following lock must be held on entry:
32607c478bd9Sstevel@tonic-gate * cpu_lock, kpreempt_disable(), or thread_lock -- to assure t's new lgrp
32617c478bd9Sstevel@tonic-gate * doesn't get removed from t's partition
32627c478bd9Sstevel@tonic-gate *
32637c478bd9Sstevel@tonic-gate * This routine is not allowed to grab any locks, since it may be called
32647c478bd9Sstevel@tonic-gate * with cpus paused (such as from cpu_offline).
32657c478bd9Sstevel@tonic-gate */
32667c478bd9Sstevel@tonic-gate void
lgrp_move_thread(kthread_t * t,lpl_t * newlpl,int do_lgrpset_delete)32677c478bd9Sstevel@tonic-gate lgrp_move_thread(kthread_t *t, lpl_t *newlpl, int do_lgrpset_delete)
32687c478bd9Sstevel@tonic-gate {
32697c478bd9Sstevel@tonic-gate proc_t *p;
32707c478bd9Sstevel@tonic-gate lpl_t *lpl, *oldlpl;
32717c478bd9Sstevel@tonic-gate lgrp_id_t oldid;
32727c478bd9Sstevel@tonic-gate kthread_t *tp;
32737c478bd9Sstevel@tonic-gate uint_t ncpu;
32747c478bd9Sstevel@tonic-gate lgrp_load_t old, new;
32757c478bd9Sstevel@tonic-gate
32767c478bd9Sstevel@tonic-gate ASSERT(t);
32777c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&cpu_lock) || curthread->t_preempt > 0 ||
32787c478bd9Sstevel@tonic-gate THREAD_LOCK_HELD(t));
32797c478bd9Sstevel@tonic-gate
32807c478bd9Sstevel@tonic-gate /*
32817c478bd9Sstevel@tonic-gate * If not changing lpls, just return
32827c478bd9Sstevel@tonic-gate */
32837c478bd9Sstevel@tonic-gate if ((oldlpl = t->t_lpl) == newlpl)
32847c478bd9Sstevel@tonic-gate return;
32857c478bd9Sstevel@tonic-gate
32867c478bd9Sstevel@tonic-gate /*
32877c478bd9Sstevel@tonic-gate * Make sure the thread's lwp hasn't exited (if so, this thread is now
32887c478bd9Sstevel@tonic-gate * associated with process 0 rather than with its original process).
32897c478bd9Sstevel@tonic-gate */
32907c478bd9Sstevel@tonic-gate if (t->t_proc_flag & TP_LWPEXIT) {
32917c478bd9Sstevel@tonic-gate if (newlpl != NULL) {
32927c478bd9Sstevel@tonic-gate t->t_lpl = newlpl;
32937c478bd9Sstevel@tonic-gate }
32947c478bd9Sstevel@tonic-gate return;
32957c478bd9Sstevel@tonic-gate }
32967c478bd9Sstevel@tonic-gate
32977c478bd9Sstevel@tonic-gate p = ttoproc(t);
32987c478bd9Sstevel@tonic-gate
32997c478bd9Sstevel@tonic-gate /*
33007c478bd9Sstevel@tonic-gate * If the thread had a previous lgroup, update its process' p_lgrpset
33017c478bd9Sstevel@tonic-gate * to account for it being moved from its old lgroup.
33027c478bd9Sstevel@tonic-gate */
33037c478bd9Sstevel@tonic-gate if ((oldlpl != NULL) && /* thread had a previous lgroup */
33047c478bd9Sstevel@tonic-gate (p->p_tlist != NULL)) {
33057c478bd9Sstevel@tonic-gate oldid = oldlpl->lpl_lgrpid;
33067c478bd9Sstevel@tonic-gate
33077c478bd9Sstevel@tonic-gate if (newlpl != NULL)
33087c478bd9Sstevel@tonic-gate lgrp_stat_add(oldid, LGRP_NUM_MIGR, 1);
33097c478bd9Sstevel@tonic-gate
33107c478bd9Sstevel@tonic-gate if ((do_lgrpset_delete) &&
33117c478bd9Sstevel@tonic-gate (klgrpset_ismember(p->p_lgrpset, oldid))) {
33127c478bd9Sstevel@tonic-gate for (tp = p->p_tlist->t_forw; ; tp = tp->t_forw) {
33137c478bd9Sstevel@tonic-gate /*
33147c478bd9Sstevel@tonic-gate * Check if a thread other than the thread
33157c478bd9Sstevel@tonic-gate * that's moving is assigned to the same
33167c478bd9Sstevel@tonic-gate * lgroup as the thread that's moving. Note
33177c478bd9Sstevel@tonic-gate * that we have to compare lgroup IDs, rather
33187c478bd9Sstevel@tonic-gate * than simply comparing t_lpl's, since the
33197c478bd9Sstevel@tonic-gate * threads may belong to different partitions
33207c478bd9Sstevel@tonic-gate * but be assigned to the same lgroup.
33217c478bd9Sstevel@tonic-gate */
33227c478bd9Sstevel@tonic-gate ASSERT(tp->t_lpl != NULL);
33237c478bd9Sstevel@tonic-gate
33247c478bd9Sstevel@tonic-gate if ((tp != t) &&
33257c478bd9Sstevel@tonic-gate (tp->t_lpl->lpl_lgrpid == oldid)) {
33267c478bd9Sstevel@tonic-gate /*
33277c478bd9Sstevel@tonic-gate * Another thread is assigned to the
33287c478bd9Sstevel@tonic-gate * same lgroup as the thread that's
33297c478bd9Sstevel@tonic-gate * moving, p_lgrpset doesn't change.
33307c478bd9Sstevel@tonic-gate */
33317c478bd9Sstevel@tonic-gate break;
33327c478bd9Sstevel@tonic-gate } else if (tp == p->p_tlist) {
33337c478bd9Sstevel@tonic-gate /*
33347c478bd9Sstevel@tonic-gate * No other thread is assigned to the
33357c478bd9Sstevel@tonic-gate * same lgroup as the exiting thread,
33367c478bd9Sstevel@tonic-gate * clear the lgroup's bit in p_lgrpset.
33377c478bd9Sstevel@tonic-gate */
33387c478bd9Sstevel@tonic-gate klgrpset_del(p->p_lgrpset, oldid);
33397c478bd9Sstevel@tonic-gate break;
33407c478bd9Sstevel@tonic-gate }
33417c478bd9Sstevel@tonic-gate }
33427c478bd9Sstevel@tonic-gate }
33437c478bd9Sstevel@tonic-gate
33447c478bd9Sstevel@tonic-gate /*
33457c478bd9Sstevel@tonic-gate * If this thread was assigned to its old lgroup for such a
33467c478bd9Sstevel@tonic-gate * short amount of time that the anticipatory load that was
33477c478bd9Sstevel@tonic-gate * added on its behalf has aged very little, remove that
33487c478bd9Sstevel@tonic-gate * anticipatory load.
33497c478bd9Sstevel@tonic-gate */
33507c478bd9Sstevel@tonic-gate if ((t->t_anttime + lgrp_min_nsec > gethrtime()) &&
33517c478bd9Sstevel@tonic-gate ((ncpu = oldlpl->lpl_ncpu) > 0)) {
33527c478bd9Sstevel@tonic-gate lpl = oldlpl;
33537c478bd9Sstevel@tonic-gate for (;;) {
33547c478bd9Sstevel@tonic-gate do {
33557c478bd9Sstevel@tonic-gate old = new = lpl->lpl_loadavg;
33567c478bd9Sstevel@tonic-gate new -= LGRP_LOADAVG_MAX_EFFECT(ncpu);
33577c478bd9Sstevel@tonic-gate if (new > old) {
33587c478bd9Sstevel@tonic-gate /*
33597c478bd9Sstevel@tonic-gate * this can happen if the load
33607c478bd9Sstevel@tonic-gate * average was aged since we
33617c478bd9Sstevel@tonic-gate * added in the anticipatory
33627c478bd9Sstevel@tonic-gate * load
33637c478bd9Sstevel@tonic-gate */
33647c478bd9Sstevel@tonic-gate new = 0;
33657c478bd9Sstevel@tonic-gate }
336675d94465SJosef 'Jeff' Sipek } while (atomic_cas_32(
33676890d023SEric Saxe (lgrp_load_t *)&lpl->lpl_loadavg, old,
33686890d023SEric Saxe new) != old);
33697c478bd9Sstevel@tonic-gate
33707c478bd9Sstevel@tonic-gate lpl = lpl->lpl_parent;
33717c478bd9Sstevel@tonic-gate if (lpl == NULL)
33727c478bd9Sstevel@tonic-gate break;
33737c478bd9Sstevel@tonic-gate
33747c478bd9Sstevel@tonic-gate ncpu = lpl->lpl_ncpu;
33757c478bd9Sstevel@tonic-gate ASSERT(ncpu > 0);
33767c478bd9Sstevel@tonic-gate }
33777c478bd9Sstevel@tonic-gate }
33787c478bd9Sstevel@tonic-gate }
33797c478bd9Sstevel@tonic-gate /*
33807c478bd9Sstevel@tonic-gate * If the thread has a new lgroup (i.e. it's not exiting), update its
33817c478bd9Sstevel@tonic-gate * t_lpl and its process' p_lgrpset, and apply an anticipatory load
33827c478bd9Sstevel@tonic-gate * to its new lgroup to account for its move to its new lgroup.
33837c478bd9Sstevel@tonic-gate */
33847c478bd9Sstevel@tonic-gate if (newlpl != NULL) {
33857c478bd9Sstevel@tonic-gate /*
33867c478bd9Sstevel@tonic-gate * This thread is moving to a new lgroup
33877c478bd9Sstevel@tonic-gate */
33887c478bd9Sstevel@tonic-gate t->t_lpl = newlpl;
33892cb27123Saguzovsk if (t->t_tid == 1 && p->p_t1_lgrpid != newlpl->lpl_lgrpid) {
33902cb27123Saguzovsk p->p_t1_lgrpid = newlpl->lpl_lgrpid;
33912cb27123Saguzovsk membar_producer();
33922cb27123Saguzovsk if (p->p_tr_lgrpid != LGRP_NONE &&
33932cb27123Saguzovsk p->p_tr_lgrpid != p->p_t1_lgrpid) {
33942cb27123Saguzovsk lgrp_update_trthr_migrations(1);
33952cb27123Saguzovsk }
33962cb27123Saguzovsk }
33977c478bd9Sstevel@tonic-gate
33987c478bd9Sstevel@tonic-gate /*
33997c478bd9Sstevel@tonic-gate * Reflect move in load average of new lgroup
34007c478bd9Sstevel@tonic-gate * unless it is root lgroup
34017c478bd9Sstevel@tonic-gate */
34027c478bd9Sstevel@tonic-gate if (lgrp_table[newlpl->lpl_lgrpid] == lgrp_root)
34037c478bd9Sstevel@tonic-gate return;
34047c478bd9Sstevel@tonic-gate
34057c478bd9Sstevel@tonic-gate if (!klgrpset_ismember(p->p_lgrpset, newlpl->lpl_lgrpid)) {
34067c478bd9Sstevel@tonic-gate klgrpset_add(p->p_lgrpset, newlpl->lpl_lgrpid);
34077c478bd9Sstevel@tonic-gate }
34087c478bd9Sstevel@tonic-gate
34097c478bd9Sstevel@tonic-gate /*
34107c478bd9Sstevel@tonic-gate * It'll take some time for the load on the new lgroup
34117c478bd9Sstevel@tonic-gate * to reflect this thread's placement on it. We'd
34127c478bd9Sstevel@tonic-gate * like not, however, to have all threads between now
34137c478bd9Sstevel@tonic-gate * and then also piling on to this lgroup. To avoid
34147c478bd9Sstevel@tonic-gate * this pileup, we anticipate the load this thread
34157c478bd9Sstevel@tonic-gate * will generate on its new lgroup. The goal is to
34167c478bd9Sstevel@tonic-gate * make the lgroup's load appear as though the thread
34177c478bd9Sstevel@tonic-gate * had been there all along. We're very conservative
34187c478bd9Sstevel@tonic-gate * in calculating this anticipatory load, we assume
34197c478bd9Sstevel@tonic-gate * the worst case case (100% CPU-bound thread). This
34207c478bd9Sstevel@tonic-gate * may be modified in the future to be more accurate.
34217c478bd9Sstevel@tonic-gate */
34227c478bd9Sstevel@tonic-gate lpl = newlpl;
34237c478bd9Sstevel@tonic-gate for (;;) {
34247c478bd9Sstevel@tonic-gate ncpu = lpl->lpl_ncpu;
34257c478bd9Sstevel@tonic-gate ASSERT(ncpu > 0);
34267c478bd9Sstevel@tonic-gate do {
34277c478bd9Sstevel@tonic-gate old = new = lpl->lpl_loadavg;
34287c478bd9Sstevel@tonic-gate new += LGRP_LOADAVG_MAX_EFFECT(ncpu);
34297c478bd9Sstevel@tonic-gate /*
34307c478bd9Sstevel@tonic-gate * Check for overflow
34317c478bd9Sstevel@tonic-gate * Underflow not possible here
34327c478bd9Sstevel@tonic-gate */
34337c478bd9Sstevel@tonic-gate if (new < old)
34347c478bd9Sstevel@tonic-gate new = UINT32_MAX;
343575d94465SJosef 'Jeff' Sipek } while (atomic_cas_32((lgrp_load_t *)&lpl->lpl_loadavg,
343675d94465SJosef 'Jeff' Sipek old, new) != old);
34377c478bd9Sstevel@tonic-gate
34387c478bd9Sstevel@tonic-gate lpl = lpl->lpl_parent;
34397c478bd9Sstevel@tonic-gate if (lpl == NULL)
34407c478bd9Sstevel@tonic-gate break;
34417c478bd9Sstevel@tonic-gate }
34427c478bd9Sstevel@tonic-gate t->t_anttime = gethrtime();
34437c478bd9Sstevel@tonic-gate }
34447c478bd9Sstevel@tonic-gate }
34457c478bd9Sstevel@tonic-gate
34467c478bd9Sstevel@tonic-gate /*
34477c478bd9Sstevel@tonic-gate * Return lgroup memory allocation policy given advice from madvise(3C)
34487c478bd9Sstevel@tonic-gate */
34497c478bd9Sstevel@tonic-gate lgrp_mem_policy_t
lgrp_madv_to_policy(uchar_t advice,size_t size,int type)34507c478bd9Sstevel@tonic-gate lgrp_madv_to_policy(uchar_t advice, size_t size, int type)
34517c478bd9Sstevel@tonic-gate {
34527c478bd9Sstevel@tonic-gate switch (advice) {
34537c478bd9Sstevel@tonic-gate case MADV_ACCESS_LWP:
34547c478bd9Sstevel@tonic-gate return (LGRP_MEM_POLICY_NEXT);
34557c478bd9Sstevel@tonic-gate case MADV_ACCESS_MANY:
34567c478bd9Sstevel@tonic-gate return (LGRP_MEM_POLICY_RANDOM);
34577c478bd9Sstevel@tonic-gate default:
34587c478bd9Sstevel@tonic-gate return (lgrp_mem_policy_default(size, type));
34597c478bd9Sstevel@tonic-gate }
34607c478bd9Sstevel@tonic-gate }
34617c478bd9Sstevel@tonic-gate
34627c478bd9Sstevel@tonic-gate /*
34637c478bd9Sstevel@tonic-gate * Figure out default policy
34647c478bd9Sstevel@tonic-gate */
34657c478bd9Sstevel@tonic-gate lgrp_mem_policy_t
lgrp_mem_policy_default(size_t size,int type)34667c478bd9Sstevel@tonic-gate lgrp_mem_policy_default(size_t size, int type)
34677c478bd9Sstevel@tonic-gate {
34687c478bd9Sstevel@tonic-gate cpupart_t *cp;
34697c478bd9Sstevel@tonic-gate lgrp_mem_policy_t policy;
34707c478bd9Sstevel@tonic-gate size_t pset_mem_size;
34717c478bd9Sstevel@tonic-gate
34727c478bd9Sstevel@tonic-gate /*
34737c478bd9Sstevel@tonic-gate * Randomly allocate memory across lgroups for shared memory
34747c478bd9Sstevel@tonic-gate * beyond a certain threshold
34757c478bd9Sstevel@tonic-gate */
34767c478bd9Sstevel@tonic-gate if ((type != MAP_SHARED && size > lgrp_privm_random_thresh) ||
34777c478bd9Sstevel@tonic-gate (type == MAP_SHARED && size > lgrp_shm_random_thresh)) {
34787c478bd9Sstevel@tonic-gate /*
34797c478bd9Sstevel@tonic-gate * Get total memory size of current thread's pset
34807c478bd9Sstevel@tonic-gate */
34817c478bd9Sstevel@tonic-gate kpreempt_disable();
34827c478bd9Sstevel@tonic-gate cp = curthread->t_cpupart;
34837c478bd9Sstevel@tonic-gate klgrpset_totalsize(cp->cp_lgrpset, pset_mem_size);
34847c478bd9Sstevel@tonic-gate kpreempt_enable();
34857c478bd9Sstevel@tonic-gate
34867c478bd9Sstevel@tonic-gate /*
34877c478bd9Sstevel@tonic-gate * Choose policy to randomly allocate memory across
34887c478bd9Sstevel@tonic-gate * lgroups in pset if it will fit and is not default
34897c478bd9Sstevel@tonic-gate * partition. Otherwise, allocate memory randomly
34907c478bd9Sstevel@tonic-gate * across machine.
34917c478bd9Sstevel@tonic-gate */
34927c478bd9Sstevel@tonic-gate if (lgrp_mem_pset_aware && size < pset_mem_size)
34937c478bd9Sstevel@tonic-gate policy = LGRP_MEM_POLICY_RANDOM_PSET;
34947c478bd9Sstevel@tonic-gate else
34957c478bd9Sstevel@tonic-gate policy = LGRP_MEM_POLICY_RANDOM;
34967c478bd9Sstevel@tonic-gate } else
34977c478bd9Sstevel@tonic-gate /*
34987c478bd9Sstevel@tonic-gate * Apply default policy for private memory and
34997c478bd9Sstevel@tonic-gate * shared memory under the respective random
35007c478bd9Sstevel@tonic-gate * threshold.
35017c478bd9Sstevel@tonic-gate */
35027c478bd9Sstevel@tonic-gate policy = lgrp_mem_default_policy;
35037c478bd9Sstevel@tonic-gate
35047c478bd9Sstevel@tonic-gate return (policy);
35057c478bd9Sstevel@tonic-gate }
35067c478bd9Sstevel@tonic-gate
35077c478bd9Sstevel@tonic-gate /*
35087c478bd9Sstevel@tonic-gate * Get memory allocation policy for this segment
35097c478bd9Sstevel@tonic-gate */
35107c478bd9Sstevel@tonic-gate lgrp_mem_policy_info_t *
lgrp_mem_policy_get(struct seg * seg,caddr_t vaddr)35117c478bd9Sstevel@tonic-gate lgrp_mem_policy_get(struct seg *seg, caddr_t vaddr)
35127c478bd9Sstevel@tonic-gate {
35137c478bd9Sstevel@tonic-gate lgrp_mem_policy_info_t *policy_info;
35147c478bd9Sstevel@tonic-gate extern struct seg_ops segspt_ops;
35157c478bd9Sstevel@tonic-gate extern struct seg_ops segspt_shmops;
35167c478bd9Sstevel@tonic-gate
35177c478bd9Sstevel@tonic-gate /*
35187c478bd9Sstevel@tonic-gate * This is for binary compatibility to protect against third party
35197c478bd9Sstevel@tonic-gate * segment drivers which haven't recompiled to allow for
35207c478bd9Sstevel@tonic-gate * SEGOP_GETPOLICY()
35217c478bd9Sstevel@tonic-gate */
35227c478bd9Sstevel@tonic-gate if (seg->s_ops != &segvn_ops && seg->s_ops != &segspt_ops &&
35237c478bd9Sstevel@tonic-gate seg->s_ops != &segspt_shmops)
35247c478bd9Sstevel@tonic-gate return (NULL);
35257c478bd9Sstevel@tonic-gate
35267c478bd9Sstevel@tonic-gate policy_info = NULL;
35277c478bd9Sstevel@tonic-gate if (seg->s_ops->getpolicy != NULL)
35287c478bd9Sstevel@tonic-gate policy_info = SEGOP_GETPOLICY(seg, vaddr);
35297c478bd9Sstevel@tonic-gate
35307c478bd9Sstevel@tonic-gate return (policy_info);
35317c478bd9Sstevel@tonic-gate }
35327c478bd9Sstevel@tonic-gate
35337c478bd9Sstevel@tonic-gate /*
35347c478bd9Sstevel@tonic-gate * Set policy for allocating private memory given desired policy, policy info,
35357c478bd9Sstevel@tonic-gate * size in bytes of memory that policy is being applied.
35367c478bd9Sstevel@tonic-gate * Return 0 if policy wasn't set already and 1 if policy was set already
35377c478bd9Sstevel@tonic-gate */
35387c478bd9Sstevel@tonic-gate int
lgrp_privm_policy_set(lgrp_mem_policy_t policy,lgrp_mem_policy_info_t * policy_info,size_t size)35397c478bd9Sstevel@tonic-gate lgrp_privm_policy_set(lgrp_mem_policy_t policy,
35407c478bd9Sstevel@tonic-gate lgrp_mem_policy_info_t *policy_info, size_t size)
35417c478bd9Sstevel@tonic-gate {
35427c478bd9Sstevel@tonic-gate
35437c478bd9Sstevel@tonic-gate ASSERT(policy_info != NULL);
35447c478bd9Sstevel@tonic-gate
35457c478bd9Sstevel@tonic-gate if (policy == LGRP_MEM_POLICY_DEFAULT)
35467c478bd9Sstevel@tonic-gate policy = lgrp_mem_policy_default(size, MAP_PRIVATE);
35477c478bd9Sstevel@tonic-gate
35487c478bd9Sstevel@tonic-gate /*
35497c478bd9Sstevel@tonic-gate * Policy set already?
35507c478bd9Sstevel@tonic-gate */
35517c478bd9Sstevel@tonic-gate if (policy == policy_info->mem_policy)
35527c478bd9Sstevel@tonic-gate return (1);
35537c478bd9Sstevel@tonic-gate
35547c478bd9Sstevel@tonic-gate /*
35557c478bd9Sstevel@tonic-gate * Set policy
35567c478bd9Sstevel@tonic-gate */
35577c478bd9Sstevel@tonic-gate policy_info->mem_policy = policy;
35582cb27123Saguzovsk policy_info->mem_lgrpid = LGRP_NONE;
35597c478bd9Sstevel@tonic-gate
35607c478bd9Sstevel@tonic-gate return (0);
35617c478bd9Sstevel@tonic-gate }
35627c478bd9Sstevel@tonic-gate
35637c478bd9Sstevel@tonic-gate
35647c478bd9Sstevel@tonic-gate /*
35657c478bd9Sstevel@tonic-gate * Get shared memory allocation policy with given tree and offset
35667c478bd9Sstevel@tonic-gate */
35677c478bd9Sstevel@tonic-gate lgrp_mem_policy_info_t *
lgrp_shm_policy_get(struct anon_map * amp,ulong_t anon_index,vnode_t * vp,u_offset_t vn_off)35687c478bd9Sstevel@tonic-gate lgrp_shm_policy_get(struct anon_map *amp, ulong_t anon_index, vnode_t *vp,
35697c478bd9Sstevel@tonic-gate u_offset_t vn_off)
35707c478bd9Sstevel@tonic-gate {
35717c478bd9Sstevel@tonic-gate u_offset_t off;
35727c478bd9Sstevel@tonic-gate lgrp_mem_policy_info_t *policy_info;
35737c478bd9Sstevel@tonic-gate lgrp_shm_policy_seg_t *policy_seg;
35747c478bd9Sstevel@tonic-gate lgrp_shm_locality_t *shm_locality;
35757c478bd9Sstevel@tonic-gate avl_tree_t *tree;
35767c478bd9Sstevel@tonic-gate avl_index_t where;
35777c478bd9Sstevel@tonic-gate
3578584b574aSToomas Soome shm_locality = NULL;
3579584b574aSToomas Soome tree = NULL;
35807c478bd9Sstevel@tonic-gate /*
35817c478bd9Sstevel@tonic-gate * Get policy segment tree from anon_map or vnode and use specified
35827c478bd9Sstevel@tonic-gate * anon index or vnode offset as offset
35837c478bd9Sstevel@tonic-gate *
35847c478bd9Sstevel@tonic-gate * Assume that no lock needs to be held on anon_map or vnode, since
35857c478bd9Sstevel@tonic-gate * they should be protected by their reference count which must be
35867c478bd9Sstevel@tonic-gate * nonzero for an existing segment
35877c478bd9Sstevel@tonic-gate */
35887c478bd9Sstevel@tonic-gate if (amp) {
35897c478bd9Sstevel@tonic-gate ASSERT(amp->refcnt != 0);
35907c478bd9Sstevel@tonic-gate shm_locality = amp->locality;
35917c478bd9Sstevel@tonic-gate if (shm_locality == NULL)
35927c478bd9Sstevel@tonic-gate return (NULL);
35937c478bd9Sstevel@tonic-gate tree = shm_locality->loc_tree;
35947c478bd9Sstevel@tonic-gate off = ptob(anon_index);
35957c478bd9Sstevel@tonic-gate } else if (vp) {
35967c478bd9Sstevel@tonic-gate shm_locality = vp->v_locality;
35977c478bd9Sstevel@tonic-gate if (shm_locality == NULL)
35987c478bd9Sstevel@tonic-gate return (NULL);
35997c478bd9Sstevel@tonic-gate ASSERT(shm_locality->loc_count != 0);
36007c478bd9Sstevel@tonic-gate tree = shm_locality->loc_tree;
36017c478bd9Sstevel@tonic-gate off = vn_off;
36027c478bd9Sstevel@tonic-gate }
36037c478bd9Sstevel@tonic-gate
36047c478bd9Sstevel@tonic-gate if (tree == NULL)
36057c478bd9Sstevel@tonic-gate return (NULL);
36067c478bd9Sstevel@tonic-gate
36077c478bd9Sstevel@tonic-gate /*
36087c478bd9Sstevel@tonic-gate * Lookup policy segment for offset into shared object and return
36097c478bd9Sstevel@tonic-gate * policy info
36107c478bd9Sstevel@tonic-gate */
36117c478bd9Sstevel@tonic-gate rw_enter(&shm_locality->loc_lock, RW_READER);
36127c478bd9Sstevel@tonic-gate policy_info = NULL;
36137c478bd9Sstevel@tonic-gate policy_seg = avl_find(tree, &off, &where);
36147c478bd9Sstevel@tonic-gate if (policy_seg)
36157c478bd9Sstevel@tonic-gate policy_info = &policy_seg->shm_policy;
36167c478bd9Sstevel@tonic-gate rw_exit(&shm_locality->loc_lock);
36177c478bd9Sstevel@tonic-gate
36187c478bd9Sstevel@tonic-gate return (policy_info);
36197c478bd9Sstevel@tonic-gate }
36207c478bd9Sstevel@tonic-gate
3621611ffe8aSesaxe /*
3622611ffe8aSesaxe * Default memory allocation policy for kernel segmap pages
3623611ffe8aSesaxe */
3624611ffe8aSesaxe lgrp_mem_policy_t lgrp_segmap_default_policy = LGRP_MEM_POLICY_RANDOM;
3625611ffe8aSesaxe
36267c478bd9Sstevel@tonic-gate /*
36277c478bd9Sstevel@tonic-gate * Return lgroup to use for allocating memory
36287c478bd9Sstevel@tonic-gate * given the segment and address
36297c478bd9Sstevel@tonic-gate *
36307c478bd9Sstevel@tonic-gate * There isn't any mutual exclusion that exists between calls
36317c478bd9Sstevel@tonic-gate * to this routine and DR, so this routine and whomever calls it
36327c478bd9Sstevel@tonic-gate * should be mindful of the possibility that the lgrp returned
36337c478bd9Sstevel@tonic-gate * may be deleted. If this happens, dereferences of the lgrp
36347c478bd9Sstevel@tonic-gate * pointer will still be safe, but the resources in the lgrp will
36357c478bd9Sstevel@tonic-gate * be gone, and LGRP_EXISTS() will no longer be true.
36367c478bd9Sstevel@tonic-gate */
36377c478bd9Sstevel@tonic-gate lgrp_t *
lgrp_mem_choose(struct seg * seg,caddr_t vaddr,size_t pgsz)36387c478bd9Sstevel@tonic-gate lgrp_mem_choose(struct seg *seg, caddr_t vaddr, size_t pgsz)
36397c478bd9Sstevel@tonic-gate {
36407c478bd9Sstevel@tonic-gate int i;
36417c478bd9Sstevel@tonic-gate lgrp_t *lgrp;
36427c478bd9Sstevel@tonic-gate klgrpset_t lgrpset;
36437c478bd9Sstevel@tonic-gate int lgrps_spanned;
36447c478bd9Sstevel@tonic-gate unsigned long off;
36457c478bd9Sstevel@tonic-gate lgrp_mem_policy_t policy;
36467c478bd9Sstevel@tonic-gate lgrp_mem_policy_info_t *policy_info;
36477c478bd9Sstevel@tonic-gate ushort_t random;
36487c478bd9Sstevel@tonic-gate int stat = 0;
3649611ffe8aSesaxe extern struct seg *segkmap;
36507c478bd9Sstevel@tonic-gate
36517c478bd9Sstevel@tonic-gate /*
36527c478bd9Sstevel@tonic-gate * Just return null if the lgrp framework hasn't finished
36537c478bd9Sstevel@tonic-gate * initializing or if this is a UMA machine.
36547c478bd9Sstevel@tonic-gate */
36557c478bd9Sstevel@tonic-gate if (nlgrps == 1 || !lgrp_initialized)
36567c478bd9Sstevel@tonic-gate return (lgrp_root);
36577c478bd9Sstevel@tonic-gate
36587c478bd9Sstevel@tonic-gate /*
36597c478bd9Sstevel@tonic-gate * Get memory allocation policy for this segment
36607c478bd9Sstevel@tonic-gate */
36617c478bd9Sstevel@tonic-gate policy = lgrp_mem_default_policy;
36627c478bd9Sstevel@tonic-gate if (seg != NULL) {
36637c478bd9Sstevel@tonic-gate if (seg->s_as == &kas) {
3664611ffe8aSesaxe if (seg == segkmap)
3665611ffe8aSesaxe policy = lgrp_segmap_default_policy;
36667c478bd9Sstevel@tonic-gate if (policy == LGRP_MEM_POLICY_RANDOM_PROC ||
36677c478bd9Sstevel@tonic-gate policy == LGRP_MEM_POLICY_RANDOM_PSET)
36687c478bd9Sstevel@tonic-gate policy = LGRP_MEM_POLICY_RANDOM;
36697c478bd9Sstevel@tonic-gate } else {
36707c478bd9Sstevel@tonic-gate policy_info = lgrp_mem_policy_get(seg, vaddr);
36712cb27123Saguzovsk if (policy_info != NULL) {
36727c478bd9Sstevel@tonic-gate policy = policy_info->mem_policy;
36732cb27123Saguzovsk if (policy == LGRP_MEM_POLICY_NEXT_SEG) {
36742cb27123Saguzovsk lgrp_id_t id = policy_info->mem_lgrpid;
36752cb27123Saguzovsk ASSERT(id != LGRP_NONE);
36762cb27123Saguzovsk ASSERT(id < NLGRPS_MAX);
36772cb27123Saguzovsk lgrp = lgrp_table[id];
36782cb27123Saguzovsk if (!LGRP_EXISTS(lgrp)) {
36792cb27123Saguzovsk policy = LGRP_MEM_POLICY_NEXT;
36802cb27123Saguzovsk } else {
36812cb27123Saguzovsk lgrp_stat_add(id,
36822cb27123Saguzovsk LGRP_NUM_NEXT_SEG, 1);
36832cb27123Saguzovsk return (lgrp);
36842cb27123Saguzovsk }
36852cb27123Saguzovsk }
36862cb27123Saguzovsk }
36877c478bd9Sstevel@tonic-gate }
36887c478bd9Sstevel@tonic-gate }
36897c478bd9Sstevel@tonic-gate lgrpset = 0;
36907c478bd9Sstevel@tonic-gate
36917c478bd9Sstevel@tonic-gate /*
36927c478bd9Sstevel@tonic-gate * Initialize lgroup to home by default
36937c478bd9Sstevel@tonic-gate */
36947c478bd9Sstevel@tonic-gate lgrp = lgrp_home_lgrp();
36957c478bd9Sstevel@tonic-gate
36967c478bd9Sstevel@tonic-gate /*
36977c478bd9Sstevel@tonic-gate * When homing threads on root lgrp, override default memory
36987c478bd9Sstevel@tonic-gate * allocation policies with root lgroup memory allocation policy
36997c478bd9Sstevel@tonic-gate */
37007c478bd9Sstevel@tonic-gate if (lgrp == lgrp_root)
37017c478bd9Sstevel@tonic-gate policy = lgrp_mem_policy_root;
37027c478bd9Sstevel@tonic-gate
37037c478bd9Sstevel@tonic-gate /*
37047c478bd9Sstevel@tonic-gate * Implement policy
37057c478bd9Sstevel@tonic-gate */
37067c478bd9Sstevel@tonic-gate switch (policy) {
37077c478bd9Sstevel@tonic-gate case LGRP_MEM_POLICY_NEXT_CPU:
37087c478bd9Sstevel@tonic-gate
37097c478bd9Sstevel@tonic-gate /*
37107c478bd9Sstevel@tonic-gate * Return lgroup of current CPU which faulted on memory
3711394b433dSesaxe * If the CPU isn't currently in an lgrp, then opt to
3712394b433dSesaxe * allocate from the root.
3713394b433dSesaxe *
3714394b433dSesaxe * Kernel preemption needs to be disabled here to prevent
3715394b433dSesaxe * the current CPU from going away before lgrp is found.
37167c478bd9Sstevel@tonic-gate */
3717394b433dSesaxe if (LGRP_CPU_HAS_NO_LGRP(CPU)) {
3718394b433dSesaxe lgrp = lgrp_root;
3719394b433dSesaxe } else {
3720394b433dSesaxe kpreempt_disable();
3721394b433dSesaxe lgrp = lgrp_cpu_to_lgrp(CPU);
3722394b433dSesaxe kpreempt_enable();
3723394b433dSesaxe }
37247c478bd9Sstevel@tonic-gate break;
37257c478bd9Sstevel@tonic-gate
37267c478bd9Sstevel@tonic-gate case LGRP_MEM_POLICY_NEXT:
37277c478bd9Sstevel@tonic-gate case LGRP_MEM_POLICY_DEFAULT:
37287c478bd9Sstevel@tonic-gate default:
37297c478bd9Sstevel@tonic-gate
37307c478bd9Sstevel@tonic-gate /*
37317c478bd9Sstevel@tonic-gate * Just return current thread's home lgroup
37327c478bd9Sstevel@tonic-gate * for default policy (next touch)
37337c478bd9Sstevel@tonic-gate * If the thread is homed to the root,
37347c478bd9Sstevel@tonic-gate * then the default policy is random across lgroups.
37357c478bd9Sstevel@tonic-gate * Fallthrough to the random case.
37367c478bd9Sstevel@tonic-gate */
37377c478bd9Sstevel@tonic-gate if (lgrp != lgrp_root) {
37387c478bd9Sstevel@tonic-gate if (policy == LGRP_MEM_POLICY_NEXT)
37397c478bd9Sstevel@tonic-gate lgrp_stat_add(lgrp->lgrp_id, LGRP_NUM_NEXT, 1);
37407c478bd9Sstevel@tonic-gate else
37417c478bd9Sstevel@tonic-gate lgrp_stat_add(lgrp->lgrp_id,
37427c478bd9Sstevel@tonic-gate LGRP_NUM_DEFAULT, 1);
37437c478bd9Sstevel@tonic-gate break;
37447c478bd9Sstevel@tonic-gate }
3745df23f1c1SToomas Soome /* FALLTHROUGH */
37467c478bd9Sstevel@tonic-gate case LGRP_MEM_POLICY_RANDOM:
37477c478bd9Sstevel@tonic-gate
37487c478bd9Sstevel@tonic-gate /*
37497c478bd9Sstevel@tonic-gate * Return a random leaf lgroup with memory
37507c478bd9Sstevel@tonic-gate */
37517c478bd9Sstevel@tonic-gate lgrpset = lgrp_root->lgrp_set[LGRP_RSRC_MEM];
37527c478bd9Sstevel@tonic-gate /*
37537c478bd9Sstevel@tonic-gate * Count how many lgroups are spanned
37547c478bd9Sstevel@tonic-gate */
37557c478bd9Sstevel@tonic-gate klgrpset_nlgrps(lgrpset, lgrps_spanned);
37567c478bd9Sstevel@tonic-gate
37577c478bd9Sstevel@tonic-gate /*
37587c478bd9Sstevel@tonic-gate * There may be no memnodes in the root lgroup during DR copy
37597c478bd9Sstevel@tonic-gate * rename on a system with only two boards (memnodes)
37607c478bd9Sstevel@tonic-gate * configured. In this case just return the root lgrp.
37617c478bd9Sstevel@tonic-gate */
37627c478bd9Sstevel@tonic-gate if (lgrps_spanned == 0) {
37637c478bd9Sstevel@tonic-gate lgrp = lgrp_root;
37647c478bd9Sstevel@tonic-gate break;
37657c478bd9Sstevel@tonic-gate }
37667c478bd9Sstevel@tonic-gate
37677c478bd9Sstevel@tonic-gate /*
37687c478bd9Sstevel@tonic-gate * Pick a random offset within lgroups spanned
37697c478bd9Sstevel@tonic-gate * and return lgroup at that offset
37707c478bd9Sstevel@tonic-gate */
37717c478bd9Sstevel@tonic-gate random = (ushort_t)gethrtime() >> 4;
37727c478bd9Sstevel@tonic-gate off = random % lgrps_spanned;
37737c478bd9Sstevel@tonic-gate ASSERT(off <= lgrp_alloc_max);
37747c478bd9Sstevel@tonic-gate
37757c478bd9Sstevel@tonic-gate for (i = 0; i <= lgrp_alloc_max; i++) {
37767c478bd9Sstevel@tonic-gate if (!klgrpset_ismember(lgrpset, i))
37777c478bd9Sstevel@tonic-gate continue;
37787c478bd9Sstevel@tonic-gate if (off)
37797c478bd9Sstevel@tonic-gate off--;
37807c478bd9Sstevel@tonic-gate else {
37817c478bd9Sstevel@tonic-gate lgrp = lgrp_table[i];
37827c478bd9Sstevel@tonic-gate lgrp_stat_add(lgrp->lgrp_id, LGRP_NUM_RANDOM,
37837c478bd9Sstevel@tonic-gate 1);
37847c478bd9Sstevel@tonic-gate break;
37857c478bd9Sstevel@tonic-gate }
37867c478bd9Sstevel@tonic-gate }
37877c478bd9Sstevel@tonic-gate break;
37887c478bd9Sstevel@tonic-gate
37897c478bd9Sstevel@tonic-gate case LGRP_MEM_POLICY_RANDOM_PROC:
37907c478bd9Sstevel@tonic-gate
37917c478bd9Sstevel@tonic-gate /*
37927c478bd9Sstevel@tonic-gate * Grab copy of bitmask of lgroups spanned by
37937c478bd9Sstevel@tonic-gate * this process
37947c478bd9Sstevel@tonic-gate */
37957c478bd9Sstevel@tonic-gate klgrpset_copy(lgrpset, curproc->p_lgrpset);
37967c478bd9Sstevel@tonic-gate stat = LGRP_NUM_RANDOM_PROC;
37977c478bd9Sstevel@tonic-gate
3798df23f1c1SToomas Soome /* FALLTHROUGH */
37997c478bd9Sstevel@tonic-gate case LGRP_MEM_POLICY_RANDOM_PSET:
38007c478bd9Sstevel@tonic-gate
38017c478bd9Sstevel@tonic-gate if (!stat)
38027c478bd9Sstevel@tonic-gate stat = LGRP_NUM_RANDOM_PSET;
38037c478bd9Sstevel@tonic-gate
38047c478bd9Sstevel@tonic-gate if (klgrpset_isempty(lgrpset)) {
38057c478bd9Sstevel@tonic-gate /*
38067c478bd9Sstevel@tonic-gate * Grab copy of bitmask of lgroups spanned by
38077c478bd9Sstevel@tonic-gate * this processor set
38087c478bd9Sstevel@tonic-gate */
38097c478bd9Sstevel@tonic-gate kpreempt_disable();
38107c478bd9Sstevel@tonic-gate klgrpset_copy(lgrpset,
38117c478bd9Sstevel@tonic-gate curthread->t_cpupart->cp_lgrpset);
38127c478bd9Sstevel@tonic-gate kpreempt_enable();
38137c478bd9Sstevel@tonic-gate }
38147c478bd9Sstevel@tonic-gate
38157c478bd9Sstevel@tonic-gate /*
38167c478bd9Sstevel@tonic-gate * Count how many lgroups are spanned
38177c478bd9Sstevel@tonic-gate */
38187c478bd9Sstevel@tonic-gate klgrpset_nlgrps(lgrpset, lgrps_spanned);
38197c478bd9Sstevel@tonic-gate ASSERT(lgrps_spanned <= nlgrps);
38207c478bd9Sstevel@tonic-gate
38217c478bd9Sstevel@tonic-gate /*
38227c478bd9Sstevel@tonic-gate * Probably lgrps_spanned should be always non-zero, but to be
38237c478bd9Sstevel@tonic-gate * on the safe side we return lgrp_root if it is empty.
38247c478bd9Sstevel@tonic-gate */
38257c478bd9Sstevel@tonic-gate if (lgrps_spanned == 0) {
38267c478bd9Sstevel@tonic-gate lgrp = lgrp_root;
38277c478bd9Sstevel@tonic-gate break;
38287c478bd9Sstevel@tonic-gate }
38297c478bd9Sstevel@tonic-gate
38307c478bd9Sstevel@tonic-gate /*
38317c478bd9Sstevel@tonic-gate * Pick a random offset within lgroups spanned
38327c478bd9Sstevel@tonic-gate * and return lgroup at that offset
38337c478bd9Sstevel@tonic-gate */
38347c478bd9Sstevel@tonic-gate random = (ushort_t)gethrtime() >> 4;
38357c478bd9Sstevel@tonic-gate off = random % lgrps_spanned;
38367c478bd9Sstevel@tonic-gate ASSERT(off <= lgrp_alloc_max);
38377c478bd9Sstevel@tonic-gate
38387c478bd9Sstevel@tonic-gate for (i = 0; i <= lgrp_alloc_max; i++) {
38397c478bd9Sstevel@tonic-gate if (!klgrpset_ismember(lgrpset, i))
38407c478bd9Sstevel@tonic-gate continue;
38417c478bd9Sstevel@tonic-gate if (off)
38427c478bd9Sstevel@tonic-gate off--;
38437c478bd9Sstevel@tonic-gate else {
38447c478bd9Sstevel@tonic-gate lgrp = lgrp_table[i];
38457c478bd9Sstevel@tonic-gate lgrp_stat_add(lgrp->lgrp_id, LGRP_NUM_RANDOM,
38467c478bd9Sstevel@tonic-gate 1);
38477c478bd9Sstevel@tonic-gate break;
38487c478bd9Sstevel@tonic-gate }
38497c478bd9Sstevel@tonic-gate }
38507c478bd9Sstevel@tonic-gate break;
38517c478bd9Sstevel@tonic-gate
38527c478bd9Sstevel@tonic-gate case LGRP_MEM_POLICY_ROUNDROBIN:
38537c478bd9Sstevel@tonic-gate
38547c478bd9Sstevel@tonic-gate /*
38557c478bd9Sstevel@tonic-gate * Use offset within segment to determine
38567c478bd9Sstevel@tonic-gate * offset from home lgroup to choose for
38577c478bd9Sstevel@tonic-gate * next lgroup to allocate memory from
38587c478bd9Sstevel@tonic-gate */
38597c478bd9Sstevel@tonic-gate off = ((unsigned long)(vaddr - seg->s_base) / pgsz) %
38607c478bd9Sstevel@tonic-gate (lgrp_alloc_max + 1);
38617c478bd9Sstevel@tonic-gate
38627c478bd9Sstevel@tonic-gate kpreempt_disable();
38637c478bd9Sstevel@tonic-gate lgrpset = lgrp_root->lgrp_set[LGRP_RSRC_MEM];
38647c478bd9Sstevel@tonic-gate i = lgrp->lgrp_id;
38657c478bd9Sstevel@tonic-gate kpreempt_enable();
38667c478bd9Sstevel@tonic-gate
38677c478bd9Sstevel@tonic-gate while (off > 0) {
38687c478bd9Sstevel@tonic-gate i = (i + 1) % (lgrp_alloc_max + 1);
38697c478bd9Sstevel@tonic-gate lgrp = lgrp_table[i];
38707c478bd9Sstevel@tonic-gate if (klgrpset_ismember(lgrpset, i))
38717c478bd9Sstevel@tonic-gate off--;
38727c478bd9Sstevel@tonic-gate }
38737c478bd9Sstevel@tonic-gate lgrp_stat_add(lgrp->lgrp_id, LGRP_NUM_ROUNDROBIN, 1);
38747c478bd9Sstevel@tonic-gate
38757c478bd9Sstevel@tonic-gate break;
38767c478bd9Sstevel@tonic-gate }
38777c478bd9Sstevel@tonic-gate
38787c478bd9Sstevel@tonic-gate ASSERT(lgrp != NULL);
38797c478bd9Sstevel@tonic-gate return (lgrp);
38807c478bd9Sstevel@tonic-gate }
38817c478bd9Sstevel@tonic-gate
38827c478bd9Sstevel@tonic-gate /*
38837c478bd9Sstevel@tonic-gate * Return the number of pages in an lgroup
38847c478bd9Sstevel@tonic-gate *
38857c478bd9Sstevel@tonic-gate * NOTE: NUMA test (numat) driver uses this, so changing arguments or semantics
38867c478bd9Sstevel@tonic-gate * could cause tests that rely on the numat driver to fail....
38877c478bd9Sstevel@tonic-gate */
38887c478bd9Sstevel@tonic-gate pgcnt_t
lgrp_mem_size(lgrp_id_t lgrpid,lgrp_mem_query_t query)38897c478bd9Sstevel@tonic-gate lgrp_mem_size(lgrp_id_t lgrpid, lgrp_mem_query_t query)
38907c478bd9Sstevel@tonic-gate {
38917c478bd9Sstevel@tonic-gate lgrp_t *lgrp;
38927c478bd9Sstevel@tonic-gate
38937c478bd9Sstevel@tonic-gate lgrp = lgrp_table[lgrpid];
38947c478bd9Sstevel@tonic-gate if (!LGRP_EXISTS(lgrp) ||
38957c478bd9Sstevel@tonic-gate klgrpset_isempty(lgrp->lgrp_set[LGRP_RSRC_MEM]) ||
38967c478bd9Sstevel@tonic-gate !klgrpset_ismember(lgrp->lgrp_set[LGRP_RSRC_MEM], lgrpid))
38977c478bd9Sstevel@tonic-gate return (0);
38987c478bd9Sstevel@tonic-gate
38997c478bd9Sstevel@tonic-gate return (lgrp_plat_mem_size(lgrp->lgrp_plathand, query));
39007c478bd9Sstevel@tonic-gate }
39017c478bd9Sstevel@tonic-gate
39027c478bd9Sstevel@tonic-gate /*
39037c478bd9Sstevel@tonic-gate * Initialize lgroup shared memory allocation policy support
39047c478bd9Sstevel@tonic-gate */
39057c478bd9Sstevel@tonic-gate void
lgrp_shm_policy_init(struct anon_map * amp,vnode_t * vp)39067c478bd9Sstevel@tonic-gate lgrp_shm_policy_init(struct anon_map *amp, vnode_t *vp)
39077c478bd9Sstevel@tonic-gate {
39087c478bd9Sstevel@tonic-gate lgrp_shm_locality_t *shm_locality;
39097c478bd9Sstevel@tonic-gate
39107c478bd9Sstevel@tonic-gate /*
39117c478bd9Sstevel@tonic-gate * Initialize locality field in anon_map
39127c478bd9Sstevel@tonic-gate * Don't need any locks because this is called when anon_map is
39137c478bd9Sstevel@tonic-gate * allocated, but not used anywhere yet.
39147c478bd9Sstevel@tonic-gate */
39157c478bd9Sstevel@tonic-gate if (amp) {
39167c478bd9Sstevel@tonic-gate ANON_LOCK_ENTER(&->a_rwlock, RW_WRITER);
39177c478bd9Sstevel@tonic-gate if (amp->locality == NULL) {
39187c478bd9Sstevel@tonic-gate /*
39197c478bd9Sstevel@tonic-gate * Allocate and initialize shared memory locality info
39207c478bd9Sstevel@tonic-gate * and set anon_map locality pointer to it
39217c478bd9Sstevel@tonic-gate * Drop lock across kmem_alloc(KM_SLEEP)
39227c478bd9Sstevel@tonic-gate */
39237c478bd9Sstevel@tonic-gate ANON_LOCK_EXIT(&->a_rwlock);
39247c478bd9Sstevel@tonic-gate shm_locality = kmem_alloc(sizeof (*shm_locality),
39257c478bd9Sstevel@tonic-gate KM_SLEEP);
39267c478bd9Sstevel@tonic-gate rw_init(&shm_locality->loc_lock, NULL, RW_DEFAULT,
39277c478bd9Sstevel@tonic-gate NULL);
39287c478bd9Sstevel@tonic-gate shm_locality->loc_count = 1; /* not used for amp */
39297c478bd9Sstevel@tonic-gate shm_locality->loc_tree = NULL;
39307c478bd9Sstevel@tonic-gate
39317c478bd9Sstevel@tonic-gate /*
39327c478bd9Sstevel@tonic-gate * Reacquire lock and check to see whether anyone beat
39337c478bd9Sstevel@tonic-gate * us to initializing the locality info
39347c478bd9Sstevel@tonic-gate */
39357c478bd9Sstevel@tonic-gate ANON_LOCK_ENTER(&->a_rwlock, RW_WRITER);
39367c478bd9Sstevel@tonic-gate if (amp->locality != NULL) {
39377c478bd9Sstevel@tonic-gate rw_destroy(&shm_locality->loc_lock);
39387c478bd9Sstevel@tonic-gate kmem_free(shm_locality,
39397c478bd9Sstevel@tonic-gate sizeof (*shm_locality));
39407c478bd9Sstevel@tonic-gate } else
39417c478bd9Sstevel@tonic-gate amp->locality = shm_locality;
39427c478bd9Sstevel@tonic-gate }
39437c478bd9Sstevel@tonic-gate ANON_LOCK_EXIT(&->a_rwlock);
39447c478bd9Sstevel@tonic-gate return;
39457c478bd9Sstevel@tonic-gate }
39467c478bd9Sstevel@tonic-gate
39477c478bd9Sstevel@tonic-gate /*
39487c478bd9Sstevel@tonic-gate * Allocate shared vnode policy info if vnode is not locality aware yet
39497c478bd9Sstevel@tonic-gate */
39507c478bd9Sstevel@tonic-gate mutex_enter(&vp->v_lock);
39517c478bd9Sstevel@tonic-gate if ((vp->v_flag & V_LOCALITY) == 0) {
39527c478bd9Sstevel@tonic-gate /*
39537c478bd9Sstevel@tonic-gate * Allocate and initialize shared memory locality info
39547c478bd9Sstevel@tonic-gate */
39557c478bd9Sstevel@tonic-gate mutex_exit(&vp->v_lock);
39567c478bd9Sstevel@tonic-gate shm_locality = kmem_alloc(sizeof (*shm_locality), KM_SLEEP);
39577c478bd9Sstevel@tonic-gate rw_init(&shm_locality->loc_lock, NULL, RW_DEFAULT, NULL);
39587c478bd9Sstevel@tonic-gate shm_locality->loc_count = 1;
39597c478bd9Sstevel@tonic-gate shm_locality->loc_tree = NULL;
39607c478bd9Sstevel@tonic-gate
39617c478bd9Sstevel@tonic-gate /*
39627c478bd9Sstevel@tonic-gate * Point vnode locality field at shared vnode policy info
39637c478bd9Sstevel@tonic-gate * and set locality aware flag in vnode
39647c478bd9Sstevel@tonic-gate */
39657c478bd9Sstevel@tonic-gate mutex_enter(&vp->v_lock);
39667c478bd9Sstevel@tonic-gate if ((vp->v_flag & V_LOCALITY) == 0) {
39677c478bd9Sstevel@tonic-gate vp->v_locality = shm_locality;
39687c478bd9Sstevel@tonic-gate vp->v_flag |= V_LOCALITY;
39697c478bd9Sstevel@tonic-gate } else {
39707c478bd9Sstevel@tonic-gate /*
39717c478bd9Sstevel@tonic-gate * Lost race so free locality info and increment count.
39727c478bd9Sstevel@tonic-gate */
39737c478bd9Sstevel@tonic-gate rw_destroy(&shm_locality->loc_lock);
39747c478bd9Sstevel@tonic-gate kmem_free(shm_locality, sizeof (*shm_locality));
39757c478bd9Sstevel@tonic-gate shm_locality = vp->v_locality;
39767c478bd9Sstevel@tonic-gate shm_locality->loc_count++;
39777c478bd9Sstevel@tonic-gate }
39787c478bd9Sstevel@tonic-gate mutex_exit(&vp->v_lock);
39797c478bd9Sstevel@tonic-gate
39807c478bd9Sstevel@tonic-gate return;
39817c478bd9Sstevel@tonic-gate }
39827c478bd9Sstevel@tonic-gate
39837c478bd9Sstevel@tonic-gate /*
39847c478bd9Sstevel@tonic-gate * Increment reference count of number of segments mapping this vnode
39857c478bd9Sstevel@tonic-gate * shared
39867c478bd9Sstevel@tonic-gate */
39877c478bd9Sstevel@tonic-gate shm_locality = vp->v_locality;
39887c478bd9Sstevel@tonic-gate shm_locality->loc_count++;
39897c478bd9Sstevel@tonic-gate mutex_exit(&vp->v_lock);
39907c478bd9Sstevel@tonic-gate }
39917c478bd9Sstevel@tonic-gate
39927c478bd9Sstevel@tonic-gate /*
39937c478bd9Sstevel@tonic-gate * Destroy the given shared memory policy segment tree
39947c478bd9Sstevel@tonic-gate */
39957c478bd9Sstevel@tonic-gate void
lgrp_shm_policy_tree_destroy(avl_tree_t * tree)39967c478bd9Sstevel@tonic-gate lgrp_shm_policy_tree_destroy(avl_tree_t *tree)
39977c478bd9Sstevel@tonic-gate {
39987c478bd9Sstevel@tonic-gate lgrp_shm_policy_seg_t *cur;
39997c478bd9Sstevel@tonic-gate lgrp_shm_policy_seg_t *next;
40007c478bd9Sstevel@tonic-gate
40017c478bd9Sstevel@tonic-gate if (tree == NULL)
40027c478bd9Sstevel@tonic-gate return;
40037c478bd9Sstevel@tonic-gate
40047c478bd9Sstevel@tonic-gate cur = (lgrp_shm_policy_seg_t *)avl_first(tree);
40057c478bd9Sstevel@tonic-gate while (cur != NULL) {
40067c478bd9Sstevel@tonic-gate next = AVL_NEXT(tree, cur);
40077c478bd9Sstevel@tonic-gate avl_remove(tree, cur);
40087c478bd9Sstevel@tonic-gate kmem_free(cur, sizeof (*cur));
40097c478bd9Sstevel@tonic-gate cur = next;
40107c478bd9Sstevel@tonic-gate }
40117c478bd9Sstevel@tonic-gate kmem_free(tree, sizeof (avl_tree_t));
40127c478bd9Sstevel@tonic-gate }
40137c478bd9Sstevel@tonic-gate
40147c478bd9Sstevel@tonic-gate /*
40157c478bd9Sstevel@tonic-gate * Uninitialize lgroup shared memory allocation policy support
40167c478bd9Sstevel@tonic-gate */
40177c478bd9Sstevel@tonic-gate void
lgrp_shm_policy_fini(struct anon_map * amp,vnode_t * vp)40187c478bd9Sstevel@tonic-gate lgrp_shm_policy_fini(struct anon_map *amp, vnode_t *vp)
40197c478bd9Sstevel@tonic-gate {
40207c478bd9Sstevel@tonic-gate lgrp_shm_locality_t *shm_locality;
40217c478bd9Sstevel@tonic-gate
40227c478bd9Sstevel@tonic-gate /*
40237c478bd9Sstevel@tonic-gate * For anon_map, deallocate shared memory policy tree and
40247c478bd9Sstevel@tonic-gate * zero locality field
40257c478bd9Sstevel@tonic-gate * Don't need any locks because anon_map is being freed
40267c478bd9Sstevel@tonic-gate */
40277c478bd9Sstevel@tonic-gate if (amp) {
40287c478bd9Sstevel@tonic-gate if (amp->locality == NULL)
40297c478bd9Sstevel@tonic-gate return;
40307c478bd9Sstevel@tonic-gate shm_locality = amp->locality;
40317c478bd9Sstevel@tonic-gate shm_locality->loc_count = 0; /* not really used for amp */
40327c478bd9Sstevel@tonic-gate rw_destroy(&shm_locality->loc_lock);
40337c478bd9Sstevel@tonic-gate lgrp_shm_policy_tree_destroy(shm_locality->loc_tree);
40347c478bd9Sstevel@tonic-gate kmem_free(shm_locality, sizeof (*shm_locality));
40357c478bd9Sstevel@tonic-gate amp->locality = 0;
40367c478bd9Sstevel@tonic-gate return;
40377c478bd9Sstevel@tonic-gate }
40387c478bd9Sstevel@tonic-gate
40397c478bd9Sstevel@tonic-gate /*
40407c478bd9Sstevel@tonic-gate * For vnode, decrement reference count of segments mapping this vnode
40417c478bd9Sstevel@tonic-gate * shared and delete locality info if reference count drops to 0
40427c478bd9Sstevel@tonic-gate */
40437c478bd9Sstevel@tonic-gate mutex_enter(&vp->v_lock);
40447c478bd9Sstevel@tonic-gate shm_locality = vp->v_locality;
40457c478bd9Sstevel@tonic-gate shm_locality->loc_count--;
40467c478bd9Sstevel@tonic-gate
40477c478bd9Sstevel@tonic-gate if (shm_locality->loc_count == 0) {
40487c478bd9Sstevel@tonic-gate rw_destroy(&shm_locality->loc_lock);
40497c478bd9Sstevel@tonic-gate lgrp_shm_policy_tree_destroy(shm_locality->loc_tree);
40507c478bd9Sstevel@tonic-gate kmem_free(shm_locality, sizeof (*shm_locality));
40517c478bd9Sstevel@tonic-gate vp->v_locality = 0;
40527c478bd9Sstevel@tonic-gate vp->v_flag &= ~V_LOCALITY;
40537c478bd9Sstevel@tonic-gate }
40547c478bd9Sstevel@tonic-gate mutex_exit(&vp->v_lock);
40557c478bd9Sstevel@tonic-gate }
40567c478bd9Sstevel@tonic-gate
40577c478bd9Sstevel@tonic-gate /*
40587c478bd9Sstevel@tonic-gate * Compare two shared memory policy segments
40597c478bd9Sstevel@tonic-gate * Used by AVL tree code for searching
40607c478bd9Sstevel@tonic-gate */
40617c478bd9Sstevel@tonic-gate int
lgrp_shm_policy_compar(const void * x,const void * y)40627c478bd9Sstevel@tonic-gate lgrp_shm_policy_compar(const void *x, const void *y)
40637c478bd9Sstevel@tonic-gate {
40647c478bd9Sstevel@tonic-gate lgrp_shm_policy_seg_t *a = (lgrp_shm_policy_seg_t *)x;
40657c478bd9Sstevel@tonic-gate lgrp_shm_policy_seg_t *b = (lgrp_shm_policy_seg_t *)y;
40667c478bd9Sstevel@tonic-gate
40677c478bd9Sstevel@tonic-gate if (a->shm_off < b->shm_off)
40687c478bd9Sstevel@tonic-gate return (-1);
40697c478bd9Sstevel@tonic-gate if (a->shm_off >= b->shm_off + b->shm_size)
40707c478bd9Sstevel@tonic-gate return (1);
40717c478bd9Sstevel@tonic-gate return (0);
40727c478bd9Sstevel@tonic-gate }
40737c478bd9Sstevel@tonic-gate
40747c478bd9Sstevel@tonic-gate /*
40757c478bd9Sstevel@tonic-gate * Concatenate seg1 with seg2 and remove seg2
40767c478bd9Sstevel@tonic-gate */
40777c478bd9Sstevel@tonic-gate static int
lgrp_shm_policy_concat(avl_tree_t * tree,lgrp_shm_policy_seg_t * seg1,lgrp_shm_policy_seg_t * seg2)40787c478bd9Sstevel@tonic-gate lgrp_shm_policy_concat(avl_tree_t *tree, lgrp_shm_policy_seg_t *seg1,
40797c478bd9Sstevel@tonic-gate lgrp_shm_policy_seg_t *seg2)
40807c478bd9Sstevel@tonic-gate {
40817c478bd9Sstevel@tonic-gate if (!seg1 || !seg2 ||
40827c478bd9Sstevel@tonic-gate seg1->shm_off + seg1->shm_size != seg2->shm_off ||
40837c478bd9Sstevel@tonic-gate seg1->shm_policy.mem_policy != seg2->shm_policy.mem_policy)
40847c478bd9Sstevel@tonic-gate return (-1);
40857c478bd9Sstevel@tonic-gate
40867c478bd9Sstevel@tonic-gate seg1->shm_size += seg2->shm_size;
40877c478bd9Sstevel@tonic-gate avl_remove(tree, seg2);
40887c478bd9Sstevel@tonic-gate kmem_free(seg2, sizeof (*seg2));
40897c478bd9Sstevel@tonic-gate return (0);
40907c478bd9Sstevel@tonic-gate }
40917c478bd9Sstevel@tonic-gate
40927c478bd9Sstevel@tonic-gate /*
40937c478bd9Sstevel@tonic-gate * Split segment at given offset and return rightmost (uppermost) segment
40947c478bd9Sstevel@tonic-gate * Assumes that there are no overlapping segments
40957c478bd9Sstevel@tonic-gate */
40967c478bd9Sstevel@tonic-gate static lgrp_shm_policy_seg_t *
lgrp_shm_policy_split(avl_tree_t * tree,lgrp_shm_policy_seg_t * seg,u_offset_t off)40977c478bd9Sstevel@tonic-gate lgrp_shm_policy_split(avl_tree_t *tree, lgrp_shm_policy_seg_t *seg,
40987c478bd9Sstevel@tonic-gate u_offset_t off)
40997c478bd9Sstevel@tonic-gate {
41007c478bd9Sstevel@tonic-gate lgrp_shm_policy_seg_t *newseg;
41017c478bd9Sstevel@tonic-gate avl_index_t where;
41027c478bd9Sstevel@tonic-gate
4103*3df2e8b2SRobert Mustacchi ASSERT(seg != NULL && (off >= seg->shm_off &&
4104*3df2e8b2SRobert Mustacchi off <= seg->shm_off + seg->shm_size));
41057c478bd9Sstevel@tonic-gate
4106*3df2e8b2SRobert Mustacchi if (!seg || off < seg->shm_off ||
4107*3df2e8b2SRobert Mustacchi off > seg->shm_off + seg->shm_size) {
41087c478bd9Sstevel@tonic-gate return (NULL);
4109*3df2e8b2SRobert Mustacchi }
41107c478bd9Sstevel@tonic-gate
41117c478bd9Sstevel@tonic-gate if (off == seg->shm_off || off == seg->shm_off + seg->shm_size)
41127c478bd9Sstevel@tonic-gate return (seg);
41137c478bd9Sstevel@tonic-gate
41147c478bd9Sstevel@tonic-gate /*
41157c478bd9Sstevel@tonic-gate * Adjust size of left segment and allocate new (right) segment
41167c478bd9Sstevel@tonic-gate */
41177c478bd9Sstevel@tonic-gate newseg = kmem_alloc(sizeof (lgrp_shm_policy_seg_t), KM_SLEEP);
41187c478bd9Sstevel@tonic-gate newseg->shm_policy = seg->shm_policy;
41197c478bd9Sstevel@tonic-gate newseg->shm_off = off;
41207c478bd9Sstevel@tonic-gate newseg->shm_size = seg->shm_size - (off - seg->shm_off);
41217c478bd9Sstevel@tonic-gate seg->shm_size = off - seg->shm_off;
41227c478bd9Sstevel@tonic-gate
41237c478bd9Sstevel@tonic-gate /*
41247c478bd9Sstevel@tonic-gate * Find where to insert new segment in AVL tree and insert it
41257c478bd9Sstevel@tonic-gate */
41267c478bd9Sstevel@tonic-gate (void) avl_find(tree, &off, &where);
41277c478bd9Sstevel@tonic-gate avl_insert(tree, newseg, where);
41287c478bd9Sstevel@tonic-gate
41297c478bd9Sstevel@tonic-gate return (newseg);
41307c478bd9Sstevel@tonic-gate }
41317c478bd9Sstevel@tonic-gate
41327c478bd9Sstevel@tonic-gate /*
41337c478bd9Sstevel@tonic-gate * Set shared memory allocation policy on specified shared object at given
41347c478bd9Sstevel@tonic-gate * offset and length
41357c478bd9Sstevel@tonic-gate *
41367c478bd9Sstevel@tonic-gate * Return 0 if policy wasn't set already, 1 if policy was set already, and
41377c478bd9Sstevel@tonic-gate * -1 if can't set policy.
41387c478bd9Sstevel@tonic-gate */
41397c478bd9Sstevel@tonic-gate int
lgrp_shm_policy_set(lgrp_mem_policy_t policy,struct anon_map * amp,ulong_t anon_index,vnode_t * vp,u_offset_t vn_off,size_t len)41407c478bd9Sstevel@tonic-gate lgrp_shm_policy_set(lgrp_mem_policy_t policy, struct anon_map *amp,
41417c478bd9Sstevel@tonic-gate ulong_t anon_index, vnode_t *vp, u_offset_t vn_off, size_t len)
41427c478bd9Sstevel@tonic-gate {
41437c478bd9Sstevel@tonic-gate u_offset_t eoff;
41447c478bd9Sstevel@tonic-gate lgrp_shm_policy_seg_t *next;
41457c478bd9Sstevel@tonic-gate lgrp_shm_policy_seg_t *newseg;
41467c478bd9Sstevel@tonic-gate u_offset_t off;
41477c478bd9Sstevel@tonic-gate u_offset_t oldeoff;
41487c478bd9Sstevel@tonic-gate lgrp_shm_policy_seg_t *prev;
41497c478bd9Sstevel@tonic-gate int retval;
41507c478bd9Sstevel@tonic-gate lgrp_shm_policy_seg_t *seg;
41517c478bd9Sstevel@tonic-gate lgrp_shm_locality_t *shm_locality;
41527c478bd9Sstevel@tonic-gate avl_tree_t *tree;
41537c478bd9Sstevel@tonic-gate avl_index_t where;
41547c478bd9Sstevel@tonic-gate
41557c478bd9Sstevel@tonic-gate ASSERT(amp || vp);
41567c478bd9Sstevel@tonic-gate ASSERT((len & PAGEOFFSET) == 0);
41577c478bd9Sstevel@tonic-gate
41587c478bd9Sstevel@tonic-gate if (len == 0)
41597c478bd9Sstevel@tonic-gate return (-1);
41607c478bd9Sstevel@tonic-gate
41617c478bd9Sstevel@tonic-gate retval = 0;
41627c478bd9Sstevel@tonic-gate
41637c478bd9Sstevel@tonic-gate /*
41647c478bd9Sstevel@tonic-gate * Get locality info and starting offset into shared object
41657c478bd9Sstevel@tonic-gate * Try anon map first and then vnode
41667c478bd9Sstevel@tonic-gate * Assume that no locks need to be held on anon_map or vnode, since
41677c478bd9Sstevel@tonic-gate * it should be protected by its reference count which must be nonzero
41687c478bd9Sstevel@tonic-gate * for an existing segment.
41697c478bd9Sstevel@tonic-gate */
41707c478bd9Sstevel@tonic-gate if (amp) {
41717c478bd9Sstevel@tonic-gate /*
41727c478bd9Sstevel@tonic-gate * Get policy info from anon_map
41737c478bd9Sstevel@tonic-gate *
41747c478bd9Sstevel@tonic-gate */
41757c478bd9Sstevel@tonic-gate ASSERT(amp->refcnt != 0);
41767c478bd9Sstevel@tonic-gate if (amp->locality == NULL)
41777c478bd9Sstevel@tonic-gate lgrp_shm_policy_init(amp, NULL);
41787c478bd9Sstevel@tonic-gate shm_locality = amp->locality;
41797c478bd9Sstevel@tonic-gate off = ptob(anon_index);
41807c478bd9Sstevel@tonic-gate } else if (vp) {
41817c478bd9Sstevel@tonic-gate /*
41827c478bd9Sstevel@tonic-gate * Get policy info from vnode
41837c478bd9Sstevel@tonic-gate */
41847c478bd9Sstevel@tonic-gate if ((vp->v_flag & V_LOCALITY) == 0 || vp->v_locality == NULL)
41857c478bd9Sstevel@tonic-gate lgrp_shm_policy_init(NULL, vp);
41867c478bd9Sstevel@tonic-gate shm_locality = vp->v_locality;
41877c478bd9Sstevel@tonic-gate ASSERT(shm_locality->loc_count != 0);
41887c478bd9Sstevel@tonic-gate off = vn_off;
41897c478bd9Sstevel@tonic-gate } else
41907c478bd9Sstevel@tonic-gate return (-1);
41917c478bd9Sstevel@tonic-gate
41927c478bd9Sstevel@tonic-gate ASSERT((off & PAGEOFFSET) == 0);
41937c478bd9Sstevel@tonic-gate
41947c478bd9Sstevel@tonic-gate /*
41957c478bd9Sstevel@tonic-gate * Figure out default policy
41967c478bd9Sstevel@tonic-gate */
41977c478bd9Sstevel@tonic-gate if (policy == LGRP_MEM_POLICY_DEFAULT)
41987c478bd9Sstevel@tonic-gate policy = lgrp_mem_policy_default(len, MAP_SHARED);
41997c478bd9Sstevel@tonic-gate
42007c478bd9Sstevel@tonic-gate /*
42017c478bd9Sstevel@tonic-gate * Create AVL tree if there isn't one yet
42027c478bd9Sstevel@tonic-gate * and set locality field to point at it
42037c478bd9Sstevel@tonic-gate */
42047c478bd9Sstevel@tonic-gate rw_enter(&shm_locality->loc_lock, RW_WRITER);
42057c478bd9Sstevel@tonic-gate tree = shm_locality->loc_tree;
42067c478bd9Sstevel@tonic-gate if (!tree) {
42077c478bd9Sstevel@tonic-gate rw_exit(&shm_locality->loc_lock);
42087c478bd9Sstevel@tonic-gate
42097c478bd9Sstevel@tonic-gate tree = kmem_alloc(sizeof (avl_tree_t), KM_SLEEP);
42107c478bd9Sstevel@tonic-gate
42117c478bd9Sstevel@tonic-gate rw_enter(&shm_locality->loc_lock, RW_WRITER);
42127c478bd9Sstevel@tonic-gate if (shm_locality->loc_tree == NULL) {
42137c478bd9Sstevel@tonic-gate avl_create(tree, lgrp_shm_policy_compar,
42147c478bd9Sstevel@tonic-gate sizeof (lgrp_shm_policy_seg_t),
42157c478bd9Sstevel@tonic-gate offsetof(lgrp_shm_policy_seg_t, shm_tree));
42167c478bd9Sstevel@tonic-gate shm_locality->loc_tree = tree;
42177c478bd9Sstevel@tonic-gate } else {
42187c478bd9Sstevel@tonic-gate /*
42197c478bd9Sstevel@tonic-gate * Another thread managed to set up the tree
42207c478bd9Sstevel@tonic-gate * before we could. Free the tree we allocated
42217c478bd9Sstevel@tonic-gate * and use the one that's already there.
42227c478bd9Sstevel@tonic-gate */
42237c478bd9Sstevel@tonic-gate kmem_free(tree, sizeof (*tree));
42247c478bd9Sstevel@tonic-gate tree = shm_locality->loc_tree;
42257c478bd9Sstevel@tonic-gate }
42267c478bd9Sstevel@tonic-gate }
42277c478bd9Sstevel@tonic-gate
42287c478bd9Sstevel@tonic-gate /*
42297c478bd9Sstevel@tonic-gate * Set policy
42307c478bd9Sstevel@tonic-gate *
42317c478bd9Sstevel@tonic-gate * Need to maintain hold on writer's lock to keep tree from
42327c478bd9Sstevel@tonic-gate * changing out from under us
42337c478bd9Sstevel@tonic-gate */
42347c478bd9Sstevel@tonic-gate while (len != 0) {
42357c478bd9Sstevel@tonic-gate /*
42367c478bd9Sstevel@tonic-gate * Find policy segment for specified offset into shared object
42377c478bd9Sstevel@tonic-gate */
42387c478bd9Sstevel@tonic-gate seg = avl_find(tree, &off, &where);
42397c478bd9Sstevel@tonic-gate
42407c478bd9Sstevel@tonic-gate /*
42417c478bd9Sstevel@tonic-gate * Didn't find any existing segment that contains specified
42427c478bd9Sstevel@tonic-gate * offset, so allocate new segment, insert it, and concatenate
42437c478bd9Sstevel@tonic-gate * with adjacent segments if possible
42447c478bd9Sstevel@tonic-gate */
42457c478bd9Sstevel@tonic-gate if (seg == NULL) {
42467c478bd9Sstevel@tonic-gate newseg = kmem_alloc(sizeof (lgrp_shm_policy_seg_t),
42477c478bd9Sstevel@tonic-gate KM_SLEEP);
42487c478bd9Sstevel@tonic-gate newseg->shm_policy.mem_policy = policy;
42492cb27123Saguzovsk newseg->shm_policy.mem_lgrpid = LGRP_NONE;
42507c478bd9Sstevel@tonic-gate newseg->shm_off = off;
42517c478bd9Sstevel@tonic-gate avl_insert(tree, newseg, where);
42527c478bd9Sstevel@tonic-gate
42537c478bd9Sstevel@tonic-gate /*
42547c478bd9Sstevel@tonic-gate * Check to see whether new segment overlaps with next
42557c478bd9Sstevel@tonic-gate * one, set length of new segment accordingly, and
42567c478bd9Sstevel@tonic-gate * calculate remaining length and next offset
42577c478bd9Sstevel@tonic-gate */
42587c478bd9Sstevel@tonic-gate seg = AVL_NEXT(tree, newseg);
42597c478bd9Sstevel@tonic-gate if (seg == NULL || off + len <= seg->shm_off) {
42607c478bd9Sstevel@tonic-gate newseg->shm_size = len;
42617c478bd9Sstevel@tonic-gate len = 0;
42627c478bd9Sstevel@tonic-gate } else {
42637c478bd9Sstevel@tonic-gate newseg->shm_size = seg->shm_off - off;
42647c478bd9Sstevel@tonic-gate off = seg->shm_off;
42657c478bd9Sstevel@tonic-gate len -= newseg->shm_size;
42667c478bd9Sstevel@tonic-gate }
42677c478bd9Sstevel@tonic-gate
42687c478bd9Sstevel@tonic-gate /*
42697c478bd9Sstevel@tonic-gate * Try to concatenate new segment with next and
42707c478bd9Sstevel@tonic-gate * previous ones, since they might have the same policy
42717c478bd9Sstevel@tonic-gate * now. Grab previous and next segments first because
42727c478bd9Sstevel@tonic-gate * they will change on concatenation.
42737c478bd9Sstevel@tonic-gate */
42747c478bd9Sstevel@tonic-gate prev = AVL_PREV(tree, newseg);
42757c478bd9Sstevel@tonic-gate next = AVL_NEXT(tree, newseg);
42767c478bd9Sstevel@tonic-gate (void) lgrp_shm_policy_concat(tree, newseg, next);
42777c478bd9Sstevel@tonic-gate (void) lgrp_shm_policy_concat(tree, prev, newseg);
42787c478bd9Sstevel@tonic-gate
42797c478bd9Sstevel@tonic-gate continue;
42807c478bd9Sstevel@tonic-gate }
42817c478bd9Sstevel@tonic-gate
42827c478bd9Sstevel@tonic-gate eoff = off + len;
42837c478bd9Sstevel@tonic-gate oldeoff = seg->shm_off + seg->shm_size;
42847c478bd9Sstevel@tonic-gate
42857c478bd9Sstevel@tonic-gate /*
42867c478bd9Sstevel@tonic-gate * Policy set already?
42877c478bd9Sstevel@tonic-gate */
42887c478bd9Sstevel@tonic-gate if (policy == seg->shm_policy.mem_policy) {
42897c478bd9Sstevel@tonic-gate /*
42907c478bd9Sstevel@tonic-gate * Nothing left to do if offset and length
42917c478bd9Sstevel@tonic-gate * fall within this segment
42927c478bd9Sstevel@tonic-gate */
42937c478bd9Sstevel@tonic-gate if (eoff <= oldeoff) {
42947c478bd9Sstevel@tonic-gate retval = 1;
42957c478bd9Sstevel@tonic-gate break;
42967c478bd9Sstevel@tonic-gate } else {
42977c478bd9Sstevel@tonic-gate len = eoff - oldeoff;
42987c478bd9Sstevel@tonic-gate off = oldeoff;
42997c478bd9Sstevel@tonic-gate continue;
43007c478bd9Sstevel@tonic-gate }
43017c478bd9Sstevel@tonic-gate }
43027c478bd9Sstevel@tonic-gate
43037c478bd9Sstevel@tonic-gate /*
43047c478bd9Sstevel@tonic-gate * Specified offset and length match existing segment exactly
43057c478bd9Sstevel@tonic-gate */
43067c478bd9Sstevel@tonic-gate if (off == seg->shm_off && len == seg->shm_size) {
43077c478bd9Sstevel@tonic-gate /*
43087c478bd9Sstevel@tonic-gate * Set policy and update current length
43097c478bd9Sstevel@tonic-gate */
43107c478bd9Sstevel@tonic-gate seg->shm_policy.mem_policy = policy;
43112cb27123Saguzovsk seg->shm_policy.mem_lgrpid = LGRP_NONE;
43127c478bd9Sstevel@tonic-gate len = 0;
43137c478bd9Sstevel@tonic-gate
43147c478bd9Sstevel@tonic-gate /*
43157c478bd9Sstevel@tonic-gate * Try concatenating new segment with previous and next
43167c478bd9Sstevel@tonic-gate * segments, since they might have the same policy now.
43177c478bd9Sstevel@tonic-gate * Grab previous and next segments first because they
43187c478bd9Sstevel@tonic-gate * will change on concatenation.
43197c478bd9Sstevel@tonic-gate */
43207c478bd9Sstevel@tonic-gate prev = AVL_PREV(tree, seg);
43217c478bd9Sstevel@tonic-gate next = AVL_NEXT(tree, seg);
43227c478bd9Sstevel@tonic-gate (void) lgrp_shm_policy_concat(tree, seg, next);
43237c478bd9Sstevel@tonic-gate (void) lgrp_shm_policy_concat(tree, prev, seg);
43247c478bd9Sstevel@tonic-gate } else {
43257c478bd9Sstevel@tonic-gate /*
43267c478bd9Sstevel@tonic-gate * Specified offset and length only apply to part of
43277c478bd9Sstevel@tonic-gate * existing segment
43287c478bd9Sstevel@tonic-gate */
43297c478bd9Sstevel@tonic-gate
43307c478bd9Sstevel@tonic-gate /*
43317c478bd9Sstevel@tonic-gate * New segment starts in middle of old one, so split
43327c478bd9Sstevel@tonic-gate * new one off near beginning of old one
43337c478bd9Sstevel@tonic-gate */
43347c478bd9Sstevel@tonic-gate newseg = NULL;
43357c478bd9Sstevel@tonic-gate if (off > seg->shm_off) {
43367c478bd9Sstevel@tonic-gate newseg = lgrp_shm_policy_split(tree, seg, off);
43377c478bd9Sstevel@tonic-gate
43387c478bd9Sstevel@tonic-gate /*
43397c478bd9Sstevel@tonic-gate * New segment ends where old one did, so try
43407c478bd9Sstevel@tonic-gate * to concatenate with next segment
43417c478bd9Sstevel@tonic-gate */
43427c478bd9Sstevel@tonic-gate if (eoff == oldeoff) {
43437c478bd9Sstevel@tonic-gate newseg->shm_policy.mem_policy = policy;
43442cb27123Saguzovsk newseg->shm_policy.mem_lgrpid =
43452cb27123Saguzovsk LGRP_NONE;
43467c478bd9Sstevel@tonic-gate (void) lgrp_shm_policy_concat(tree,
43477c478bd9Sstevel@tonic-gate newseg, AVL_NEXT(tree, newseg));
43487c478bd9Sstevel@tonic-gate break;
43497c478bd9Sstevel@tonic-gate }
43507c478bd9Sstevel@tonic-gate }
43517c478bd9Sstevel@tonic-gate
43527c478bd9Sstevel@tonic-gate /*
43537c478bd9Sstevel@tonic-gate * New segment ends before old one, so split off end of
43547c478bd9Sstevel@tonic-gate * old one
43557c478bd9Sstevel@tonic-gate */
43567c478bd9Sstevel@tonic-gate if (eoff < oldeoff) {
43577c478bd9Sstevel@tonic-gate if (newseg) {
43587c478bd9Sstevel@tonic-gate (void) lgrp_shm_policy_split(tree,
43597c478bd9Sstevel@tonic-gate newseg, eoff);
43607c478bd9Sstevel@tonic-gate newseg->shm_policy.mem_policy = policy;
43612cb27123Saguzovsk newseg->shm_policy.mem_lgrpid =
43622cb27123Saguzovsk LGRP_NONE;
43637c478bd9Sstevel@tonic-gate } else {
43647c478bd9Sstevel@tonic-gate (void) lgrp_shm_policy_split(tree, seg,
43657c478bd9Sstevel@tonic-gate eoff);
43667c478bd9Sstevel@tonic-gate seg->shm_policy.mem_policy = policy;
43672cb27123Saguzovsk seg->shm_policy.mem_lgrpid = LGRP_NONE;
43687c478bd9Sstevel@tonic-gate }
43697c478bd9Sstevel@tonic-gate
43707c478bd9Sstevel@tonic-gate if (off == seg->shm_off)
43717c478bd9Sstevel@tonic-gate (void) lgrp_shm_policy_concat(tree,
43727c478bd9Sstevel@tonic-gate AVL_PREV(tree, seg), seg);
43737c478bd9Sstevel@tonic-gate break;
43747c478bd9Sstevel@tonic-gate }
43757c478bd9Sstevel@tonic-gate
43767c478bd9Sstevel@tonic-gate /*
43777c478bd9Sstevel@tonic-gate * Calculate remaining length and next offset
43787c478bd9Sstevel@tonic-gate */
43797c478bd9Sstevel@tonic-gate len = eoff - oldeoff;
43807c478bd9Sstevel@tonic-gate off = oldeoff;
43817c478bd9Sstevel@tonic-gate }
43827c478bd9Sstevel@tonic-gate }
43837c478bd9Sstevel@tonic-gate
43847c478bd9Sstevel@tonic-gate rw_exit(&shm_locality->loc_lock);
43857c478bd9Sstevel@tonic-gate return (retval);
43867c478bd9Sstevel@tonic-gate }
43877c478bd9Sstevel@tonic-gate
43887c478bd9Sstevel@tonic-gate /*
43897c478bd9Sstevel@tonic-gate * Return the best memnode from which to allocate memory given
43907c478bd9Sstevel@tonic-gate * an lgroup.
43917c478bd9Sstevel@tonic-gate *
43927c478bd9Sstevel@tonic-gate * "c" is for cookie, which is good enough for me.
43937c478bd9Sstevel@tonic-gate * It references a cookie struct that should be zero'ed to initialize.
43947c478bd9Sstevel@tonic-gate * The cookie should live on the caller's stack.
43957c478bd9Sstevel@tonic-gate *
43967c478bd9Sstevel@tonic-gate * The routine returns -1 when:
43977c478bd9Sstevel@tonic-gate * - traverse is 0, and all the memnodes in "lgrp" have been returned.
43987c478bd9Sstevel@tonic-gate * - traverse is 1, and all the memnodes in the system have been
43997c478bd9Sstevel@tonic-gate * returned.
44007c478bd9Sstevel@tonic-gate */
44017c478bd9Sstevel@tonic-gate int
lgrp_memnode_choose(lgrp_mnode_cookie_t * c)44027c478bd9Sstevel@tonic-gate lgrp_memnode_choose(lgrp_mnode_cookie_t *c)
44037c478bd9Sstevel@tonic-gate {
44047c478bd9Sstevel@tonic-gate lgrp_t *lp = c->lmc_lgrp;
44057c478bd9Sstevel@tonic-gate mnodeset_t nodes = c->lmc_nodes;
44067c478bd9Sstevel@tonic-gate int cnt = c->lmc_cnt;
44077c478bd9Sstevel@tonic-gate int offset, mnode;
44087c478bd9Sstevel@tonic-gate
44097c478bd9Sstevel@tonic-gate extern int max_mem_nodes;
44107c478bd9Sstevel@tonic-gate
44117c478bd9Sstevel@tonic-gate /*
44127c478bd9Sstevel@tonic-gate * If the set is empty, and the caller is willing, traverse
44137c478bd9Sstevel@tonic-gate * up the hierarchy until we find a non-empty set.
44147c478bd9Sstevel@tonic-gate */
44157c478bd9Sstevel@tonic-gate while (nodes == (mnodeset_t)0 || cnt <= 0) {
44167c478bd9Sstevel@tonic-gate if (c->lmc_scope == LGRP_SRCH_LOCAL ||
44177c478bd9Sstevel@tonic-gate ((lp = lp->lgrp_parent) == NULL))
44187c478bd9Sstevel@tonic-gate return (-1);
44197c478bd9Sstevel@tonic-gate
44207c478bd9Sstevel@tonic-gate nodes = lp->lgrp_mnodes & ~(c->lmc_tried);
44217c478bd9Sstevel@tonic-gate cnt = lp->lgrp_nmnodes - c->lmc_ntried;
44227c478bd9Sstevel@tonic-gate }
44237c478bd9Sstevel@tonic-gate
44247c478bd9Sstevel@tonic-gate /*
44257c478bd9Sstevel@tonic-gate * Select a memnode by picking one at a "random" offset.
44267c478bd9Sstevel@tonic-gate * Because of DR, memnodes can come and go at any time.
44277c478bd9Sstevel@tonic-gate * This code must be able to cope with the possibility
44287c478bd9Sstevel@tonic-gate * that the nodes count "cnt" is inconsistent with respect
44297c478bd9Sstevel@tonic-gate * to the number of elements actually in "nodes", and
44307c478bd9Sstevel@tonic-gate * therefore that the offset chosen could be greater than
44317c478bd9Sstevel@tonic-gate * the number of elements in the set (some memnodes may
44327c478bd9Sstevel@tonic-gate * have dissapeared just before cnt was read).
44337c478bd9Sstevel@tonic-gate * If this happens, the search simply wraps back to the
44347c478bd9Sstevel@tonic-gate * beginning of the set.
44357c478bd9Sstevel@tonic-gate */
44367c478bd9Sstevel@tonic-gate ASSERT(nodes != (mnodeset_t)0 && cnt > 0);
44377c478bd9Sstevel@tonic-gate offset = c->lmc_rand % cnt;
44387c478bd9Sstevel@tonic-gate do {
44397c478bd9Sstevel@tonic-gate for (mnode = 0; mnode < max_mem_nodes; mnode++)
44407c478bd9Sstevel@tonic-gate if (nodes & ((mnodeset_t)1 << mnode))
44417c478bd9Sstevel@tonic-gate if (!offset--)
44427c478bd9Sstevel@tonic-gate break;
44437c478bd9Sstevel@tonic-gate } while (mnode >= max_mem_nodes);
44447c478bd9Sstevel@tonic-gate
44457c478bd9Sstevel@tonic-gate /* Found a node. Store state before returning. */
44467c478bd9Sstevel@tonic-gate c->lmc_lgrp = lp;
44477c478bd9Sstevel@tonic-gate c->lmc_nodes = (nodes & ~((mnodeset_t)1 << mnode));
44487c478bd9Sstevel@tonic-gate c->lmc_cnt = cnt - 1;
44497c478bd9Sstevel@tonic-gate c->lmc_tried = (c->lmc_tried | ((mnodeset_t)1 << mnode));
44507c478bd9Sstevel@tonic-gate c->lmc_ntried++;
44517c478bd9Sstevel@tonic-gate
44527c478bd9Sstevel@tonic-gate return (mnode);
44537c478bd9Sstevel@tonic-gate }
4454