17c478bd9Sstevel@tonic-gate /* 27c478bd9Sstevel@tonic-gate * CDDL HEADER START 37c478bd9Sstevel@tonic-gate * 47c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the 57c478bd9Sstevel@tonic-gate * Common Development and Distribution License, Version 1.0 only 67c478bd9Sstevel@tonic-gate * (the "License"). You may not use this file except in compliance 77c478bd9Sstevel@tonic-gate * with the License. 87c478bd9Sstevel@tonic-gate * 97c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 107c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 117c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions 127c478bd9Sstevel@tonic-gate * and limitations under the License. 137c478bd9Sstevel@tonic-gate * 147c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 157c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 167c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 177c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 187c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 197c478bd9Sstevel@tonic-gate * 207c478bd9Sstevel@tonic-gate * CDDL HEADER END 217c478bd9Sstevel@tonic-gate */ 227c478bd9Sstevel@tonic-gate /* 237c478bd9Sstevel@tonic-gate * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 247c478bd9Sstevel@tonic-gate * Use is subject to license terms. 257c478bd9Sstevel@tonic-gate */ 267c478bd9Sstevel@tonic-gate 277c478bd9Sstevel@tonic-gate /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 287c478bd9Sstevel@tonic-gate /* All Rights Reserved */ 297c478bd9Sstevel@tonic-gate 307c478bd9Sstevel@tonic-gate 317c478bd9Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" /* from SVr4.0 1.30 */ 327c478bd9Sstevel@tonic-gate 337c478bd9Sstevel@tonic-gate #include <sys/types.h> 347c478bd9Sstevel@tonic-gate #include <sys/param.h> 357c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h> 367c478bd9Sstevel@tonic-gate #include <sys/signal.h> 377c478bd9Sstevel@tonic-gate #include <sys/user.h> 387c478bd9Sstevel@tonic-gate #include <sys/systm.h> 397c478bd9Sstevel@tonic-gate #include <sys/sysinfo.h> 407c478bd9Sstevel@tonic-gate #include <sys/var.h> 417c478bd9Sstevel@tonic-gate #include <sys/errno.h> 427c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h> 437c478bd9Sstevel@tonic-gate #include <sys/debug.h> 447c478bd9Sstevel@tonic-gate #include <sys/inline.h> 457c478bd9Sstevel@tonic-gate #include <sys/disp.h> 467c478bd9Sstevel@tonic-gate #include <sys/class.h> 477c478bd9Sstevel@tonic-gate #include <sys/bitmap.h> 487c478bd9Sstevel@tonic-gate #include <sys/kmem.h> 497c478bd9Sstevel@tonic-gate #include <sys/cpuvar.h> 507c478bd9Sstevel@tonic-gate #include <sys/vtrace.h> 517c478bd9Sstevel@tonic-gate #include <sys/tnf.h> 527c478bd9Sstevel@tonic-gate #include <sys/cpupart.h> 537c478bd9Sstevel@tonic-gate #include <sys/lgrp.h> 547c478bd9Sstevel@tonic-gate #include <sys/chip.h> 557c478bd9Sstevel@tonic-gate #include <sys/schedctl.h> 567c478bd9Sstevel@tonic-gate #include <sys/atomic.h> 577c478bd9Sstevel@tonic-gate #include <sys/dtrace.h> 587c478bd9Sstevel@tonic-gate #include <sys/sdt.h> 597c478bd9Sstevel@tonic-gate 607c478bd9Sstevel@tonic-gate #include <vm/as.h> 617c478bd9Sstevel@tonic-gate 627c478bd9Sstevel@tonic-gate #define BOUND_CPU 0x1 637c478bd9Sstevel@tonic-gate #define BOUND_PARTITION 0x2 647c478bd9Sstevel@tonic-gate #define BOUND_INTR 0x4 657c478bd9Sstevel@tonic-gate 667c478bd9Sstevel@tonic-gate /* Dispatch queue allocation structure and functions */ 677c478bd9Sstevel@tonic-gate struct disp_queue_info { 687c478bd9Sstevel@tonic-gate disp_t *dp; 697c478bd9Sstevel@tonic-gate dispq_t *olddispq; 707c478bd9Sstevel@tonic-gate dispq_t *newdispq; 717c478bd9Sstevel@tonic-gate ulong_t *olddqactmap; 727c478bd9Sstevel@tonic-gate ulong_t *newdqactmap; 737c478bd9Sstevel@tonic-gate int oldnglobpris; 747c478bd9Sstevel@tonic-gate }; 757c478bd9Sstevel@tonic-gate static void disp_dq_alloc(struct disp_queue_info *dptr, int numpris, 767c478bd9Sstevel@tonic-gate disp_t *dp); 777c478bd9Sstevel@tonic-gate static void disp_dq_assign(struct disp_queue_info *dptr, int numpris); 787c478bd9Sstevel@tonic-gate static void disp_dq_free(struct disp_queue_info *dptr); 797c478bd9Sstevel@tonic-gate 807c478bd9Sstevel@tonic-gate /* platform-specific routine to call when processor is idle */ 817c478bd9Sstevel@tonic-gate static void generic_idle_cpu(); 827c478bd9Sstevel@tonic-gate void (*idle_cpu)() = generic_idle_cpu; 837c478bd9Sstevel@tonic-gate 847c478bd9Sstevel@tonic-gate /* routines invoked when a CPU enters/exits the idle loop */ 857c478bd9Sstevel@tonic-gate static void idle_enter(); 867c478bd9Sstevel@tonic-gate static void idle_exit(); 877c478bd9Sstevel@tonic-gate 887c478bd9Sstevel@tonic-gate /* platform-specific routine to call when thread is enqueued */ 897c478bd9Sstevel@tonic-gate static void generic_enq_thread(cpu_t *, int); 907c478bd9Sstevel@tonic-gate void (*disp_enq_thread)(cpu_t *, int) = generic_enq_thread; 917c478bd9Sstevel@tonic-gate 927c478bd9Sstevel@tonic-gate pri_t kpreemptpri; /* priority where kernel preemption applies */ 937c478bd9Sstevel@tonic-gate pri_t upreemptpri = 0; /* priority where normal preemption applies */ 947c478bd9Sstevel@tonic-gate pri_t intr_pri; /* interrupt thread priority base level */ 957c478bd9Sstevel@tonic-gate 967c478bd9Sstevel@tonic-gate #define KPQPRI -1 /* priority where cpu affinity is dropped for kp queue */ 977c478bd9Sstevel@tonic-gate pri_t kpqpri = KPQPRI; /* can be set in /etc/system */ 987c478bd9Sstevel@tonic-gate disp_t cpu0_disp; /* boot CPU's dispatch queue */ 997c478bd9Sstevel@tonic-gate disp_lock_t swapped_lock; /* lock swapped threads and swap queue */ 1007c478bd9Sstevel@tonic-gate int nswapped; /* total number of swapped threads */ 1017c478bd9Sstevel@tonic-gate void disp_swapped_enq(kthread_t *tp); 1027c478bd9Sstevel@tonic-gate static void disp_swapped_setrun(kthread_t *tp); 1037c478bd9Sstevel@tonic-gate static void cpu_resched(cpu_t *cp, pri_t tpri); 1047c478bd9Sstevel@tonic-gate 1057c478bd9Sstevel@tonic-gate /* 1067c478bd9Sstevel@tonic-gate * If this is set, only interrupt threads will cause kernel preemptions. 1077c478bd9Sstevel@tonic-gate * This is done by changing the value of kpreemptpri. kpreemptpri 1087c478bd9Sstevel@tonic-gate * will either be the max sysclass pri + 1 or the min interrupt pri. 1097c478bd9Sstevel@tonic-gate */ 1107c478bd9Sstevel@tonic-gate int only_intr_kpreempt; 1117c478bd9Sstevel@tonic-gate 1127c478bd9Sstevel@tonic-gate extern void set_idle_cpu(int cpun); 1137c478bd9Sstevel@tonic-gate extern void unset_idle_cpu(int cpun); 1147c478bd9Sstevel@tonic-gate static void setkpdq(kthread_t *tp, int borf); 1157c478bd9Sstevel@tonic-gate #define SETKP_BACK 0 1167c478bd9Sstevel@tonic-gate #define SETKP_FRONT 1 1177c478bd9Sstevel@tonic-gate /* 1187c478bd9Sstevel@tonic-gate * Parameter that determines how recently a thread must have run 1197c478bd9Sstevel@tonic-gate * on the CPU to be considered loosely-bound to that CPU to reduce 1207c478bd9Sstevel@tonic-gate * cold cache effects. The interval is in hertz. 1217c478bd9Sstevel@tonic-gate * 1227c478bd9Sstevel@tonic-gate * The platform may define a per physical processor adjustment of 1237c478bd9Sstevel@tonic-gate * this parameter. For efficiency, the effective rechoose interval 1247c478bd9Sstevel@tonic-gate * (rechoose_interval + per chip adjustment) is maintained in the 1257c478bd9Sstevel@tonic-gate * cpu structures. See cpu_choose() 1267c478bd9Sstevel@tonic-gate */ 1277c478bd9Sstevel@tonic-gate int rechoose_interval = RECHOOSE_INTERVAL; 1287c478bd9Sstevel@tonic-gate 1297c478bd9Sstevel@tonic-gate static cpu_t *cpu_choose(kthread_t *, pri_t); 1307c478bd9Sstevel@tonic-gate 1317c478bd9Sstevel@tonic-gate id_t defaultcid; /* system "default" class; see dispadmin(1M) */ 1327c478bd9Sstevel@tonic-gate 1337c478bd9Sstevel@tonic-gate disp_lock_t transition_lock; /* lock on transitioning threads */ 1347c478bd9Sstevel@tonic-gate disp_lock_t stop_lock; /* lock on stopped threads */ 1357c478bd9Sstevel@tonic-gate 1367c478bd9Sstevel@tonic-gate static void cpu_dispqalloc(int numpris); 1377c478bd9Sstevel@tonic-gate 1387c478bd9Sstevel@tonic-gate static kthread_t *disp_getwork(cpu_t *to); 1397c478bd9Sstevel@tonic-gate static kthread_t *disp_getbest(disp_t *from); 1407c478bd9Sstevel@tonic-gate static kthread_t *disp_ratify(kthread_t *tp, disp_t *kpq); 1417c478bd9Sstevel@tonic-gate 1427c478bd9Sstevel@tonic-gate void swtch_to(kthread_t *); 1437c478bd9Sstevel@tonic-gate 1447c478bd9Sstevel@tonic-gate /* 1457c478bd9Sstevel@tonic-gate * dispatcher and scheduler initialization 1467c478bd9Sstevel@tonic-gate */ 1477c478bd9Sstevel@tonic-gate 1487c478bd9Sstevel@tonic-gate /* 1497c478bd9Sstevel@tonic-gate * disp_setup - Common code to calculate and allocate dispatcher 1507c478bd9Sstevel@tonic-gate * variables and structures based on the maximum priority. 1517c478bd9Sstevel@tonic-gate */ 1527c478bd9Sstevel@tonic-gate static void 1537c478bd9Sstevel@tonic-gate disp_setup(pri_t maxglobpri, pri_t oldnglobpris) 1547c478bd9Sstevel@tonic-gate { 1557c478bd9Sstevel@tonic-gate pri_t newnglobpris; 1567c478bd9Sstevel@tonic-gate 1577c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&cpu_lock)); 1587c478bd9Sstevel@tonic-gate 1597c478bd9Sstevel@tonic-gate newnglobpris = maxglobpri + 1 + LOCK_LEVEL; 1607c478bd9Sstevel@tonic-gate 1617c478bd9Sstevel@tonic-gate if (newnglobpris > oldnglobpris) { 1627c478bd9Sstevel@tonic-gate /* 1637c478bd9Sstevel@tonic-gate * Allocate new kp queues for each CPU partition. 1647c478bd9Sstevel@tonic-gate */ 1657c478bd9Sstevel@tonic-gate cpupart_kpqalloc(newnglobpris); 1667c478bd9Sstevel@tonic-gate 1677c478bd9Sstevel@tonic-gate /* 1687c478bd9Sstevel@tonic-gate * Allocate new dispatch queues for each CPU. 1697c478bd9Sstevel@tonic-gate */ 1707c478bd9Sstevel@tonic-gate cpu_dispqalloc(newnglobpris); 1717c478bd9Sstevel@tonic-gate 1727c478bd9Sstevel@tonic-gate /* 1737c478bd9Sstevel@tonic-gate * compute new interrupt thread base priority 1747c478bd9Sstevel@tonic-gate */ 1757c478bd9Sstevel@tonic-gate intr_pri = maxglobpri; 1767c478bd9Sstevel@tonic-gate if (only_intr_kpreempt) { 1777c478bd9Sstevel@tonic-gate kpreemptpri = intr_pri + 1; 1787c478bd9Sstevel@tonic-gate if (kpqpri == KPQPRI) 1797c478bd9Sstevel@tonic-gate kpqpri = kpreemptpri; 1807c478bd9Sstevel@tonic-gate } 1817c478bd9Sstevel@tonic-gate v.v_nglobpris = newnglobpris; 1827c478bd9Sstevel@tonic-gate } 1837c478bd9Sstevel@tonic-gate } 1847c478bd9Sstevel@tonic-gate 1857c478bd9Sstevel@tonic-gate /* 1867c478bd9Sstevel@tonic-gate * dispinit - Called to initialize all loaded classes and the 1877c478bd9Sstevel@tonic-gate * dispatcher framework. 1887c478bd9Sstevel@tonic-gate */ 1897c478bd9Sstevel@tonic-gate void 1907c478bd9Sstevel@tonic-gate dispinit(void) 1917c478bd9Sstevel@tonic-gate { 1927c478bd9Sstevel@tonic-gate id_t cid; 1937c478bd9Sstevel@tonic-gate pri_t maxglobpri; 1947c478bd9Sstevel@tonic-gate pri_t cl_maxglobpri; 1957c478bd9Sstevel@tonic-gate 1967c478bd9Sstevel@tonic-gate maxglobpri = -1; 1977c478bd9Sstevel@tonic-gate 1987c478bd9Sstevel@tonic-gate /* 1997c478bd9Sstevel@tonic-gate * Initialize transition lock, which will always be set. 2007c478bd9Sstevel@tonic-gate */ 2017c478bd9Sstevel@tonic-gate DISP_LOCK_INIT(&transition_lock); 2027c478bd9Sstevel@tonic-gate disp_lock_enter_high(&transition_lock); 2037c478bd9Sstevel@tonic-gate DISP_LOCK_INIT(&stop_lock); 2047c478bd9Sstevel@tonic-gate 2057c478bd9Sstevel@tonic-gate mutex_enter(&cpu_lock); 2067c478bd9Sstevel@tonic-gate CPU->cpu_disp->disp_maxrunpri = -1; 2077c478bd9Sstevel@tonic-gate CPU->cpu_disp->disp_max_unbound_pri = -1; 2087c478bd9Sstevel@tonic-gate /* 2097c478bd9Sstevel@tonic-gate * Initialize the default CPU partition. 2107c478bd9Sstevel@tonic-gate */ 2117c478bd9Sstevel@tonic-gate cpupart_initialize_default(); 2127c478bd9Sstevel@tonic-gate /* 2137c478bd9Sstevel@tonic-gate * Call the class specific initialization functions for 2147c478bd9Sstevel@tonic-gate * all pre-installed schedulers. 2157c478bd9Sstevel@tonic-gate * 2167c478bd9Sstevel@tonic-gate * We pass the size of a class specific parameter 2177c478bd9Sstevel@tonic-gate * buffer to each of the initialization functions 2187c478bd9Sstevel@tonic-gate * to try to catch problems with backward compatibility 2197c478bd9Sstevel@tonic-gate * of class modules. 2207c478bd9Sstevel@tonic-gate * 2217c478bd9Sstevel@tonic-gate * For example a new class module running on an old system 2227c478bd9Sstevel@tonic-gate * which didn't provide sufficiently large parameter buffers 2237c478bd9Sstevel@tonic-gate * would be bad news. Class initialization modules can check for 2247c478bd9Sstevel@tonic-gate * this and take action if they detect a problem. 2257c478bd9Sstevel@tonic-gate */ 2267c478bd9Sstevel@tonic-gate 2277c478bd9Sstevel@tonic-gate for (cid = 0; cid < nclass; cid++) { 2287c478bd9Sstevel@tonic-gate sclass_t *sc; 2297c478bd9Sstevel@tonic-gate 2307c478bd9Sstevel@tonic-gate sc = &sclass[cid]; 2317c478bd9Sstevel@tonic-gate if (SCHED_INSTALLED(sc)) { 2327c478bd9Sstevel@tonic-gate cl_maxglobpri = sc->cl_init(cid, PC_CLPARMSZ, 2337c478bd9Sstevel@tonic-gate &sc->cl_funcs); 2347c478bd9Sstevel@tonic-gate if (cl_maxglobpri > maxglobpri) 2357c478bd9Sstevel@tonic-gate maxglobpri = cl_maxglobpri; 2367c478bd9Sstevel@tonic-gate } 2377c478bd9Sstevel@tonic-gate } 2387c478bd9Sstevel@tonic-gate kpreemptpri = (pri_t)v.v_maxsyspri + 1; 2397c478bd9Sstevel@tonic-gate if (kpqpri == KPQPRI) 2407c478bd9Sstevel@tonic-gate kpqpri = kpreemptpri; 2417c478bd9Sstevel@tonic-gate 2427c478bd9Sstevel@tonic-gate ASSERT(maxglobpri >= 0); 2437c478bd9Sstevel@tonic-gate disp_setup(maxglobpri, 0); 2447c478bd9Sstevel@tonic-gate 2457c478bd9Sstevel@tonic-gate mutex_exit(&cpu_lock); 2467c478bd9Sstevel@tonic-gate 2477c478bd9Sstevel@tonic-gate /* 2487c478bd9Sstevel@tonic-gate * Get the default class ID; this may be later modified via 2497c478bd9Sstevel@tonic-gate * dispadmin(1M). This will load the class (normally TS) and that will 2507c478bd9Sstevel@tonic-gate * call disp_add(), which is why we had to drop cpu_lock first. 2517c478bd9Sstevel@tonic-gate */ 2527c478bd9Sstevel@tonic-gate if (getcid(defaultclass, &defaultcid) != 0) { 2537c478bd9Sstevel@tonic-gate cmn_err(CE_PANIC, "Couldn't load default scheduling class '%s'", 2547c478bd9Sstevel@tonic-gate defaultclass); 2557c478bd9Sstevel@tonic-gate } 2567c478bd9Sstevel@tonic-gate } 2577c478bd9Sstevel@tonic-gate 2587c478bd9Sstevel@tonic-gate /* 2597c478bd9Sstevel@tonic-gate * disp_add - Called with class pointer to initialize the dispatcher 2607c478bd9Sstevel@tonic-gate * for a newly loaded class. 2617c478bd9Sstevel@tonic-gate */ 2627c478bd9Sstevel@tonic-gate void 2637c478bd9Sstevel@tonic-gate disp_add(sclass_t *clp) 2647c478bd9Sstevel@tonic-gate { 2657c478bd9Sstevel@tonic-gate pri_t maxglobpri; 2667c478bd9Sstevel@tonic-gate pri_t cl_maxglobpri; 2677c478bd9Sstevel@tonic-gate 2687c478bd9Sstevel@tonic-gate mutex_enter(&cpu_lock); 2697c478bd9Sstevel@tonic-gate /* 2707c478bd9Sstevel@tonic-gate * Initialize the scheduler class. 2717c478bd9Sstevel@tonic-gate */ 2727c478bd9Sstevel@tonic-gate maxglobpri = (pri_t)(v.v_nglobpris - LOCK_LEVEL - 1); 2737c478bd9Sstevel@tonic-gate cl_maxglobpri = clp->cl_init(clp - sclass, PC_CLPARMSZ, &clp->cl_funcs); 2747c478bd9Sstevel@tonic-gate if (cl_maxglobpri > maxglobpri) 2757c478bd9Sstevel@tonic-gate maxglobpri = cl_maxglobpri; 2767c478bd9Sstevel@tonic-gate 2777c478bd9Sstevel@tonic-gate /* 2787c478bd9Sstevel@tonic-gate * Save old queue information. Since we're initializing a 2797c478bd9Sstevel@tonic-gate * new scheduling class which has just been loaded, then 2807c478bd9Sstevel@tonic-gate * the size of the dispq may have changed. We need to handle 2817c478bd9Sstevel@tonic-gate * that here. 2827c478bd9Sstevel@tonic-gate */ 2837c478bd9Sstevel@tonic-gate disp_setup(maxglobpri, v.v_nglobpris); 2847c478bd9Sstevel@tonic-gate 2857c478bd9Sstevel@tonic-gate mutex_exit(&cpu_lock); 2867c478bd9Sstevel@tonic-gate } 2877c478bd9Sstevel@tonic-gate 2887c478bd9Sstevel@tonic-gate 2897c478bd9Sstevel@tonic-gate /* 2907c478bd9Sstevel@tonic-gate * For each CPU, allocate new dispatch queues 2917c478bd9Sstevel@tonic-gate * with the stated number of priorities. 2927c478bd9Sstevel@tonic-gate */ 2937c478bd9Sstevel@tonic-gate static void 2947c478bd9Sstevel@tonic-gate cpu_dispqalloc(int numpris) 2957c478bd9Sstevel@tonic-gate { 2967c478bd9Sstevel@tonic-gate cpu_t *cpup; 2977c478bd9Sstevel@tonic-gate struct disp_queue_info *disp_mem; 2987c478bd9Sstevel@tonic-gate int i, num; 2997c478bd9Sstevel@tonic-gate 3007c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&cpu_lock)); 3017c478bd9Sstevel@tonic-gate 3027c478bd9Sstevel@tonic-gate disp_mem = kmem_zalloc(NCPU * 3037c478bd9Sstevel@tonic-gate sizeof (struct disp_queue_info), KM_SLEEP); 3047c478bd9Sstevel@tonic-gate 3057c478bd9Sstevel@tonic-gate /* 3067c478bd9Sstevel@tonic-gate * This routine must allocate all of the memory before stopping 3077c478bd9Sstevel@tonic-gate * the cpus because it must not sleep in kmem_alloc while the 3087c478bd9Sstevel@tonic-gate * CPUs are stopped. Locks they hold will not be freed until they 3097c478bd9Sstevel@tonic-gate * are restarted. 3107c478bd9Sstevel@tonic-gate */ 3117c478bd9Sstevel@tonic-gate i = 0; 3127c478bd9Sstevel@tonic-gate cpup = cpu_list; 3137c478bd9Sstevel@tonic-gate do { 3147c478bd9Sstevel@tonic-gate disp_dq_alloc(&disp_mem[i], numpris, cpup->cpu_disp); 3157c478bd9Sstevel@tonic-gate i++; 3167c478bd9Sstevel@tonic-gate cpup = cpup->cpu_next; 3177c478bd9Sstevel@tonic-gate } while (cpup != cpu_list); 3187c478bd9Sstevel@tonic-gate num = i; 3197c478bd9Sstevel@tonic-gate 3207c478bd9Sstevel@tonic-gate pause_cpus(NULL); 3217c478bd9Sstevel@tonic-gate for (i = 0; i < num; i++) 3227c478bd9Sstevel@tonic-gate disp_dq_assign(&disp_mem[i], numpris); 3237c478bd9Sstevel@tonic-gate start_cpus(); 3247c478bd9Sstevel@tonic-gate 3257c478bd9Sstevel@tonic-gate /* 3267c478bd9Sstevel@tonic-gate * I must free all of the memory after starting the cpus because 3277c478bd9Sstevel@tonic-gate * I can not risk sleeping in kmem_free while the cpus are stopped. 3287c478bd9Sstevel@tonic-gate */ 3297c478bd9Sstevel@tonic-gate for (i = 0; i < num; i++) 3307c478bd9Sstevel@tonic-gate disp_dq_free(&disp_mem[i]); 3317c478bd9Sstevel@tonic-gate 3327c478bd9Sstevel@tonic-gate kmem_free(disp_mem, NCPU * sizeof (struct disp_queue_info)); 3337c478bd9Sstevel@tonic-gate } 3347c478bd9Sstevel@tonic-gate 3357c478bd9Sstevel@tonic-gate static void 3367c478bd9Sstevel@tonic-gate disp_dq_alloc(struct disp_queue_info *dptr, int numpris, disp_t *dp) 3377c478bd9Sstevel@tonic-gate { 3387c478bd9Sstevel@tonic-gate dptr->newdispq = kmem_zalloc(numpris * sizeof (dispq_t), KM_SLEEP); 3397c478bd9Sstevel@tonic-gate dptr->newdqactmap = kmem_zalloc(((numpris / BT_NBIPUL) + 1) * 3407c478bd9Sstevel@tonic-gate sizeof (long), KM_SLEEP); 3417c478bd9Sstevel@tonic-gate dptr->dp = dp; 3427c478bd9Sstevel@tonic-gate } 3437c478bd9Sstevel@tonic-gate 3447c478bd9Sstevel@tonic-gate static void 3457c478bd9Sstevel@tonic-gate disp_dq_assign(struct disp_queue_info *dptr, int numpris) 3467c478bd9Sstevel@tonic-gate { 3477c478bd9Sstevel@tonic-gate disp_t *dp; 3487c478bd9Sstevel@tonic-gate 3497c478bd9Sstevel@tonic-gate dp = dptr->dp; 3507c478bd9Sstevel@tonic-gate dptr->olddispq = dp->disp_q; 3517c478bd9Sstevel@tonic-gate dptr->olddqactmap = dp->disp_qactmap; 3527c478bd9Sstevel@tonic-gate dptr->oldnglobpris = dp->disp_npri; 3537c478bd9Sstevel@tonic-gate 3547c478bd9Sstevel@tonic-gate ASSERT(dptr->oldnglobpris < numpris); 3557c478bd9Sstevel@tonic-gate 3567c478bd9Sstevel@tonic-gate if (dptr->olddispq != NULL) { 3577c478bd9Sstevel@tonic-gate /* 3587c478bd9Sstevel@tonic-gate * Use kcopy because bcopy is platform-specific 3597c478bd9Sstevel@tonic-gate * and could block while we might have paused the cpus. 3607c478bd9Sstevel@tonic-gate */ 3617c478bd9Sstevel@tonic-gate (void) kcopy(dptr->olddispq, dptr->newdispq, 3627c478bd9Sstevel@tonic-gate dptr->oldnglobpris * sizeof (dispq_t)); 3637c478bd9Sstevel@tonic-gate (void) kcopy(dptr->olddqactmap, dptr->newdqactmap, 3647c478bd9Sstevel@tonic-gate ((dptr->oldnglobpris / BT_NBIPUL) + 1) * 3657c478bd9Sstevel@tonic-gate sizeof (long)); 3667c478bd9Sstevel@tonic-gate } 3677c478bd9Sstevel@tonic-gate dp->disp_q = dptr->newdispq; 3687c478bd9Sstevel@tonic-gate dp->disp_qactmap = dptr->newdqactmap; 3697c478bd9Sstevel@tonic-gate dp->disp_q_limit = &dptr->newdispq[numpris]; 3707c478bd9Sstevel@tonic-gate dp->disp_npri = numpris; 3717c478bd9Sstevel@tonic-gate } 3727c478bd9Sstevel@tonic-gate 3737c478bd9Sstevel@tonic-gate static void 3747c478bd9Sstevel@tonic-gate disp_dq_free(struct disp_queue_info *dptr) 3757c478bd9Sstevel@tonic-gate { 3767c478bd9Sstevel@tonic-gate if (dptr->olddispq != NULL) 3777c478bd9Sstevel@tonic-gate kmem_free(dptr->olddispq, 3787c478bd9Sstevel@tonic-gate dptr->oldnglobpris * sizeof (dispq_t)); 3797c478bd9Sstevel@tonic-gate if (dptr->olddqactmap != NULL) 3807c478bd9Sstevel@tonic-gate kmem_free(dptr->olddqactmap, 3817c478bd9Sstevel@tonic-gate ((dptr->oldnglobpris / BT_NBIPUL) + 1) * sizeof (long)); 3827c478bd9Sstevel@tonic-gate } 3837c478bd9Sstevel@tonic-gate 3847c478bd9Sstevel@tonic-gate /* 3857c478bd9Sstevel@tonic-gate * For a newly created CPU, initialize the dispatch queue. 3867c478bd9Sstevel@tonic-gate * This is called before the CPU is known through cpu[] or on any lists. 3877c478bd9Sstevel@tonic-gate */ 3887c478bd9Sstevel@tonic-gate void 3897c478bd9Sstevel@tonic-gate disp_cpu_init(cpu_t *cp) 3907c478bd9Sstevel@tonic-gate { 3917c478bd9Sstevel@tonic-gate disp_t *dp; 3927c478bd9Sstevel@tonic-gate dispq_t *newdispq; 3937c478bd9Sstevel@tonic-gate ulong_t *newdqactmap; 3947c478bd9Sstevel@tonic-gate 3957c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&cpu_lock)); /* protect dispatcher queue sizes */ 3967c478bd9Sstevel@tonic-gate 3977c478bd9Sstevel@tonic-gate if (cp == cpu0_disp.disp_cpu) 3987c478bd9Sstevel@tonic-gate dp = &cpu0_disp; 3997c478bd9Sstevel@tonic-gate else 4007c478bd9Sstevel@tonic-gate dp = kmem_alloc(sizeof (disp_t), KM_SLEEP); 4017c478bd9Sstevel@tonic-gate bzero(dp, sizeof (disp_t)); 4027c478bd9Sstevel@tonic-gate cp->cpu_disp = dp; 4037c478bd9Sstevel@tonic-gate dp->disp_cpu = cp; 4047c478bd9Sstevel@tonic-gate dp->disp_maxrunpri = -1; 4057c478bd9Sstevel@tonic-gate dp->disp_max_unbound_pri = -1; 4067c478bd9Sstevel@tonic-gate DISP_LOCK_INIT(&cp->cpu_thread_lock); 4077c478bd9Sstevel@tonic-gate /* 4087c478bd9Sstevel@tonic-gate * Allocate memory for the dispatcher queue headers 4097c478bd9Sstevel@tonic-gate * and the active queue bitmap. 4107c478bd9Sstevel@tonic-gate */ 4117c478bd9Sstevel@tonic-gate newdispq = kmem_zalloc(v.v_nglobpris * sizeof (dispq_t), KM_SLEEP); 4127c478bd9Sstevel@tonic-gate newdqactmap = kmem_zalloc(((v.v_nglobpris / BT_NBIPUL) + 1) * 4137c478bd9Sstevel@tonic-gate sizeof (long), KM_SLEEP); 4147c478bd9Sstevel@tonic-gate dp->disp_q = newdispq; 4157c478bd9Sstevel@tonic-gate dp->disp_qactmap = newdqactmap; 4167c478bd9Sstevel@tonic-gate dp->disp_q_limit = &newdispq[v.v_nglobpris]; 4177c478bd9Sstevel@tonic-gate dp->disp_npri = v.v_nglobpris; 4187c478bd9Sstevel@tonic-gate } 4197c478bd9Sstevel@tonic-gate 4207c478bd9Sstevel@tonic-gate void 4217c478bd9Sstevel@tonic-gate disp_cpu_fini(cpu_t *cp) 4227c478bd9Sstevel@tonic-gate { 4237c478bd9Sstevel@tonic-gate ASSERT(MUTEX_HELD(&cpu_lock)); 4247c478bd9Sstevel@tonic-gate 4257c478bd9Sstevel@tonic-gate disp_kp_free(cp->cpu_disp); 4267c478bd9Sstevel@tonic-gate if (cp->cpu_disp != &cpu0_disp) 4277c478bd9Sstevel@tonic-gate kmem_free(cp->cpu_disp, sizeof (disp_t)); 4287c478bd9Sstevel@tonic-gate } 4297c478bd9Sstevel@tonic-gate 4307c478bd9Sstevel@tonic-gate /* 4317c478bd9Sstevel@tonic-gate * Allocate new, larger kpreempt dispatch queue to replace the old one. 4327c478bd9Sstevel@tonic-gate */ 4337c478bd9Sstevel@tonic-gate void 4347c478bd9Sstevel@tonic-gate disp_kp_alloc(disp_t *dq, pri_t npri) 4357c478bd9Sstevel@tonic-gate { 4367c478bd9Sstevel@tonic-gate struct disp_queue_info mem_info; 4377c478bd9Sstevel@tonic-gate 4387c478bd9Sstevel@tonic-gate if (npri > dq->disp_npri) { 4397c478bd9Sstevel@tonic-gate /* 4407c478bd9Sstevel@tonic-gate * Allocate memory for the new array. 4417c478bd9Sstevel@tonic-gate */ 4427c478bd9Sstevel@tonic-gate disp_dq_alloc(&mem_info, npri, dq); 4437c478bd9Sstevel@tonic-gate 4447c478bd9Sstevel@tonic-gate /* 4457c478bd9Sstevel@tonic-gate * We need to copy the old structures to the new 4467c478bd9Sstevel@tonic-gate * and free the old. 4477c478bd9Sstevel@tonic-gate */ 4487c478bd9Sstevel@tonic-gate disp_dq_assign(&mem_info, npri); 4497c478bd9Sstevel@tonic-gate disp_dq_free(&mem_info); 4507c478bd9Sstevel@tonic-gate } 4517c478bd9Sstevel@tonic-gate } 4527c478bd9Sstevel@tonic-gate 4537c478bd9Sstevel@tonic-gate /* 4547c478bd9Sstevel@tonic-gate * Free dispatch queue. 4557c478bd9Sstevel@tonic-gate * Used for the kpreempt queues for a removed CPU partition and 4567c478bd9Sstevel@tonic-gate * for the per-CPU queues of deleted CPUs. 4577c478bd9Sstevel@tonic-gate */ 4587c478bd9Sstevel@tonic-gate void 4597c478bd9Sstevel@tonic-gate disp_kp_free(disp_t *dq) 4607c478bd9Sstevel@tonic-gate { 4617c478bd9Sstevel@tonic-gate struct disp_queue_info mem_info; 4627c478bd9Sstevel@tonic-gate 4637c478bd9Sstevel@tonic-gate mem_info.olddispq = dq->disp_q; 4647c478bd9Sstevel@tonic-gate mem_info.olddqactmap = dq->disp_qactmap; 4657c478bd9Sstevel@tonic-gate mem_info.oldnglobpris = dq->disp_npri; 4667c478bd9Sstevel@tonic-gate disp_dq_free(&mem_info); 4677c478bd9Sstevel@tonic-gate } 4687c478bd9Sstevel@tonic-gate 4697c478bd9Sstevel@tonic-gate /* 4707c478bd9Sstevel@tonic-gate * End dispatcher and scheduler initialization. 4717c478bd9Sstevel@tonic-gate */ 4727c478bd9Sstevel@tonic-gate 4737c478bd9Sstevel@tonic-gate /* 4747c478bd9Sstevel@tonic-gate * See if there's anything to do other than remain idle. 4757c478bd9Sstevel@tonic-gate * Return non-zero if there is. 4767c478bd9Sstevel@tonic-gate * 4777c478bd9Sstevel@tonic-gate * This function must be called with high spl, or with 4787c478bd9Sstevel@tonic-gate * kernel preemption disabled to prevent the partition's 4797c478bd9Sstevel@tonic-gate * active cpu list from changing while being traversed. 4807c478bd9Sstevel@tonic-gate * 4817c478bd9Sstevel@tonic-gate */ 4827c478bd9Sstevel@tonic-gate int 4837c478bd9Sstevel@tonic-gate disp_anywork(void) 4847c478bd9Sstevel@tonic-gate { 4857c478bd9Sstevel@tonic-gate cpu_t *cp = CPU; 4867c478bd9Sstevel@tonic-gate cpu_t *ocp; 4877c478bd9Sstevel@tonic-gate 4887c478bd9Sstevel@tonic-gate if (cp->cpu_disp->disp_nrunnable != 0) 4897c478bd9Sstevel@tonic-gate return (1); 4907c478bd9Sstevel@tonic-gate 4917c478bd9Sstevel@tonic-gate if (!(cp->cpu_flags & CPU_OFFLINE)) { 4927c478bd9Sstevel@tonic-gate if (CP_MAXRUNPRI(cp->cpu_part) >= 0) 4937c478bd9Sstevel@tonic-gate return (1); 4947c478bd9Sstevel@tonic-gate 4957c478bd9Sstevel@tonic-gate /* 4967c478bd9Sstevel@tonic-gate * Work can be taken from another CPU if: 4977c478bd9Sstevel@tonic-gate * - There is unbound work on the run queue 4987c478bd9Sstevel@tonic-gate * - That work isn't a thread undergoing a 4997c478bd9Sstevel@tonic-gate * - context switch on an otherwise empty queue. 5007c478bd9Sstevel@tonic-gate * - The CPU isn't running the idle loop. 5017c478bd9Sstevel@tonic-gate */ 5027c478bd9Sstevel@tonic-gate for (ocp = cp->cpu_next_part; ocp != cp; 5037c478bd9Sstevel@tonic-gate ocp = ocp->cpu_next_part) { 5047c478bd9Sstevel@tonic-gate ASSERT(CPU_ACTIVE(ocp)); 5057c478bd9Sstevel@tonic-gate 5067c478bd9Sstevel@tonic-gate if (ocp->cpu_disp->disp_max_unbound_pri != -1 && 5077c478bd9Sstevel@tonic-gate !((ocp->cpu_disp_flags & CPU_DISP_DONTSTEAL) && 5087c478bd9Sstevel@tonic-gate ocp->cpu_disp->disp_nrunnable == 1) && 5097c478bd9Sstevel@tonic-gate ocp->cpu_dispatch_pri != -1) 5107c478bd9Sstevel@tonic-gate return (1); 5117c478bd9Sstevel@tonic-gate } 5127c478bd9Sstevel@tonic-gate } 5137c478bd9Sstevel@tonic-gate return (0); 5147c478bd9Sstevel@tonic-gate } 5157c478bd9Sstevel@tonic-gate 5167c478bd9Sstevel@tonic-gate /* 5177c478bd9Sstevel@tonic-gate * Called when CPU enters the idle loop 5187c478bd9Sstevel@tonic-gate */ 5197c478bd9Sstevel@tonic-gate static void 5207c478bd9Sstevel@tonic-gate idle_enter() 5217c478bd9Sstevel@tonic-gate { 5227c478bd9Sstevel@tonic-gate cpu_t *cp = CPU; 5237c478bd9Sstevel@tonic-gate 524*eda89462Sesolom new_cpu_mstate(CMS_IDLE, gethrtime_unscaled()); 5257c478bd9Sstevel@tonic-gate CPU_STATS_ADDQ(cp, sys, idlethread, 1); 5267c478bd9Sstevel@tonic-gate set_idle_cpu(cp->cpu_id); /* arch-dependent hook */ 5277c478bd9Sstevel@tonic-gate } 5287c478bd9Sstevel@tonic-gate 5297c478bd9Sstevel@tonic-gate /* 5307c478bd9Sstevel@tonic-gate * Called when CPU exits the idle loop 5317c478bd9Sstevel@tonic-gate */ 5327c478bd9Sstevel@tonic-gate static void 5337c478bd9Sstevel@tonic-gate idle_exit() 5347c478bd9Sstevel@tonic-gate { 5357c478bd9Sstevel@tonic-gate cpu_t *cp = CPU; 5367c478bd9Sstevel@tonic-gate 537*eda89462Sesolom new_cpu_mstate(CMS_SYSTEM, gethrtime_unscaled()); 5387c478bd9Sstevel@tonic-gate unset_idle_cpu(cp->cpu_id); /* arch-dependent hook */ 5397c478bd9Sstevel@tonic-gate } 5407c478bd9Sstevel@tonic-gate 5417c478bd9Sstevel@tonic-gate /* 5427c478bd9Sstevel@tonic-gate * Idle loop. 5437c478bd9Sstevel@tonic-gate */ 5447c478bd9Sstevel@tonic-gate void 5457c478bd9Sstevel@tonic-gate idle() 5467c478bd9Sstevel@tonic-gate { 5477c478bd9Sstevel@tonic-gate struct cpu *cp = CPU; /* pointer to this CPU */ 5487c478bd9Sstevel@tonic-gate kthread_t *t; /* taken thread */ 5497c478bd9Sstevel@tonic-gate 5507c478bd9Sstevel@tonic-gate idle_enter(); 5517c478bd9Sstevel@tonic-gate 5527c478bd9Sstevel@tonic-gate /* 5537c478bd9Sstevel@tonic-gate * Uniprocessor version of idle loop. 5547c478bd9Sstevel@tonic-gate * Do this until notified that we're on an actual multiprocessor. 5557c478bd9Sstevel@tonic-gate */ 5567c478bd9Sstevel@tonic-gate while (ncpus == 1) { 5577c478bd9Sstevel@tonic-gate if (cp->cpu_disp->disp_nrunnable == 0) { 5587c478bd9Sstevel@tonic-gate (*idle_cpu)(); 5597c478bd9Sstevel@tonic-gate continue; 5607c478bd9Sstevel@tonic-gate } 5617c478bd9Sstevel@tonic-gate idle_exit(); 5627c478bd9Sstevel@tonic-gate swtch(); 5637c478bd9Sstevel@tonic-gate 5647c478bd9Sstevel@tonic-gate idle_enter(); /* returned from swtch */ 5657c478bd9Sstevel@tonic-gate } 5667c478bd9Sstevel@tonic-gate 5677c478bd9Sstevel@tonic-gate /* 5687c478bd9Sstevel@tonic-gate * Multiprocessor idle loop. 5697c478bd9Sstevel@tonic-gate */ 5707c478bd9Sstevel@tonic-gate for (;;) { 5717c478bd9Sstevel@tonic-gate /* 5727c478bd9Sstevel@tonic-gate * If CPU is completely quiesced by p_online(2), just wait 5737c478bd9Sstevel@tonic-gate * here with minimal bus traffic until put online. 5747c478bd9Sstevel@tonic-gate */ 5757c478bd9Sstevel@tonic-gate while (cp->cpu_flags & CPU_QUIESCED) 5767c478bd9Sstevel@tonic-gate (*idle_cpu)(); 5777c478bd9Sstevel@tonic-gate 5787c478bd9Sstevel@tonic-gate if (cp->cpu_disp->disp_nrunnable != 0) { 5797c478bd9Sstevel@tonic-gate idle_exit(); 5807c478bd9Sstevel@tonic-gate swtch(); 5817c478bd9Sstevel@tonic-gate } else { 5827c478bd9Sstevel@tonic-gate if (cp->cpu_flags & CPU_OFFLINE) 5837c478bd9Sstevel@tonic-gate continue; 5847c478bd9Sstevel@tonic-gate if ((t = disp_getwork(cp)) == NULL) { 5857c478bd9Sstevel@tonic-gate if (cp->cpu_chosen_level != -1) { 5867c478bd9Sstevel@tonic-gate disp_t *dp = cp->cpu_disp; 5877c478bd9Sstevel@tonic-gate disp_t *kpq; 5887c478bd9Sstevel@tonic-gate 5897c478bd9Sstevel@tonic-gate disp_lock_enter(&dp->disp_lock); 5907c478bd9Sstevel@tonic-gate /* 5917c478bd9Sstevel@tonic-gate * Set kpq under lock to prevent 5927c478bd9Sstevel@tonic-gate * migration between partitions. 5937c478bd9Sstevel@tonic-gate */ 5947c478bd9Sstevel@tonic-gate kpq = &cp->cpu_part->cp_kp_queue; 5957c478bd9Sstevel@tonic-gate if (kpq->disp_maxrunpri == -1) 5967c478bd9Sstevel@tonic-gate cp->cpu_chosen_level = -1; 5977c478bd9Sstevel@tonic-gate disp_lock_exit(&dp->disp_lock); 5987c478bd9Sstevel@tonic-gate } 5997c478bd9Sstevel@tonic-gate (*idle_cpu)(); 6007c478bd9Sstevel@tonic-gate continue; 6017c478bd9Sstevel@tonic-gate } 6027c478bd9Sstevel@tonic-gate idle_exit(); 6037c478bd9Sstevel@tonic-gate restore_mstate(t); 6047c478bd9Sstevel@tonic-gate swtch_to(t); 6057c478bd9Sstevel@tonic-gate } 6067c478bd9Sstevel@tonic-gate idle_enter(); /* returned from swtch/swtch_to */ 6077c478bd9Sstevel@tonic-gate } 6087c478bd9Sstevel@tonic-gate } 6097c478bd9Sstevel@tonic-gate 6107c478bd9Sstevel@tonic-gate 6117c478bd9Sstevel@tonic-gate /* 6127c478bd9Sstevel@tonic-gate * Preempt the currently running thread in favor of the highest 6137c478bd9Sstevel@tonic-gate * priority thread. The class of the current thread controls 6147c478bd9Sstevel@tonic-gate * where it goes on the dispatcher queues. If panicking, turn 6157c478bd9Sstevel@tonic-gate * preemption off. 6167c478bd9Sstevel@tonic-gate */ 6177c478bd9Sstevel@tonic-gate void 6187c478bd9Sstevel@tonic-gate preempt() 6197c478bd9Sstevel@tonic-gate { 6207c478bd9Sstevel@tonic-gate kthread_t *t = curthread; 6217c478bd9Sstevel@tonic-gate klwp_t *lwp = ttolwp(curthread); 6227c478bd9Sstevel@tonic-gate 6237c478bd9Sstevel@tonic-gate if (panicstr) 6247c478bd9Sstevel@tonic-gate return; 6257c478bd9Sstevel@tonic-gate 6267c478bd9Sstevel@tonic-gate TRACE_0(TR_FAC_DISP, TR_PREEMPT_START, "preempt_start"); 6277c478bd9Sstevel@tonic-gate 6287c478bd9Sstevel@tonic-gate thread_lock(t); 6297c478bd9Sstevel@tonic-gate 6307c478bd9Sstevel@tonic-gate if (t->t_state != TS_ONPROC || t->t_disp_queue != CPU->cpu_disp) { 6317c478bd9Sstevel@tonic-gate /* 6327c478bd9Sstevel@tonic-gate * this thread has already been chosen to be run on 6337c478bd9Sstevel@tonic-gate * another CPU. Clear kprunrun on this CPU since we're 6347c478bd9Sstevel@tonic-gate * already headed for swtch(). 6357c478bd9Sstevel@tonic-gate */ 6367c478bd9Sstevel@tonic-gate CPU->cpu_kprunrun = 0; 6377c478bd9Sstevel@tonic-gate thread_unlock_nopreempt(t); 6387c478bd9Sstevel@tonic-gate TRACE_0(TR_FAC_DISP, TR_PREEMPT_END, "preempt_end"); 6397c478bd9Sstevel@tonic-gate } else { 6407c478bd9Sstevel@tonic-gate if (lwp != NULL) 6417c478bd9Sstevel@tonic-gate lwp->lwp_ru.nivcsw++; 6427c478bd9Sstevel@tonic-gate CPU_STATS_ADDQ(CPU, sys, inv_swtch, 1); 6437c478bd9Sstevel@tonic-gate THREAD_TRANSITION(t); 6447c478bd9Sstevel@tonic-gate CL_PREEMPT(t); 6457c478bd9Sstevel@tonic-gate DTRACE_SCHED(preempt); 6467c478bd9Sstevel@tonic-gate thread_unlock_nopreempt(t); 6477c478bd9Sstevel@tonic-gate 6487c478bd9Sstevel@tonic-gate TRACE_0(TR_FAC_DISP, TR_PREEMPT_END, "preempt_end"); 6497c478bd9Sstevel@tonic-gate 6507c478bd9Sstevel@tonic-gate swtch(); /* clears CPU->cpu_runrun via disp() */ 6517c478bd9Sstevel@tonic-gate } 6527c478bd9Sstevel@tonic-gate } 6537c478bd9Sstevel@tonic-gate 6547c478bd9Sstevel@tonic-gate extern kthread_t *thread_unpin(); 6557c478bd9Sstevel@tonic-gate 6567c478bd9Sstevel@tonic-gate /* 6577c478bd9Sstevel@tonic-gate * disp() - find the highest priority thread for this processor to run, and 6587c478bd9Sstevel@tonic-gate * set it in TS_ONPROC state so that resume() can be called to run it. 6597c478bd9Sstevel@tonic-gate */ 6607c478bd9Sstevel@tonic-gate static kthread_t * 6617c478bd9Sstevel@tonic-gate disp() 6627c478bd9Sstevel@tonic-gate { 6637c478bd9Sstevel@tonic-gate cpu_t *cpup; 6647c478bd9Sstevel@tonic-gate disp_t *dp; 6657c478bd9Sstevel@tonic-gate kthread_t *tp; 6667c478bd9Sstevel@tonic-gate dispq_t *dq; 6677c478bd9Sstevel@tonic-gate int maxrunword; 6687c478bd9Sstevel@tonic-gate pri_t pri; 6697c478bd9Sstevel@tonic-gate disp_t *kpq; 6707c478bd9Sstevel@tonic-gate 6717c478bd9Sstevel@tonic-gate TRACE_0(TR_FAC_DISP, TR_DISP_START, "disp_start"); 6727c478bd9Sstevel@tonic-gate 6737c478bd9Sstevel@tonic-gate cpup = CPU; 6747c478bd9Sstevel@tonic-gate /* 6757c478bd9Sstevel@tonic-gate * Find the highest priority loaded, runnable thread. 6767c478bd9Sstevel@tonic-gate */ 6777c478bd9Sstevel@tonic-gate dp = cpup->cpu_disp; 6787c478bd9Sstevel@tonic-gate 6797c478bd9Sstevel@tonic-gate reschedule: 6807c478bd9Sstevel@tonic-gate /* 6817c478bd9Sstevel@tonic-gate * If there is more important work on the global queue with a better 6827c478bd9Sstevel@tonic-gate * priority than the maximum on this CPU, take it now. 6837c478bd9Sstevel@tonic-gate */ 6847c478bd9Sstevel@tonic-gate kpq = &cpup->cpu_part->cp_kp_queue; 6857c478bd9Sstevel@tonic-gate while ((pri = kpq->disp_maxrunpri) >= 0 && 6867c478bd9Sstevel@tonic-gate pri >= dp->disp_maxrunpri && 6877c478bd9Sstevel@tonic-gate (cpup->cpu_flags & CPU_OFFLINE) == 0 && 6887c478bd9Sstevel@tonic-gate (tp = disp_getbest(kpq)) != NULL) { 6897c478bd9Sstevel@tonic-gate if (disp_ratify(tp, kpq) != NULL) { 6907c478bd9Sstevel@tonic-gate TRACE_1(TR_FAC_DISP, TR_DISP_END, 6917c478bd9Sstevel@tonic-gate "disp_end:tid %p", tp); 6927c478bd9Sstevel@tonic-gate restore_mstate(tp); 6937c478bd9Sstevel@tonic-gate return (tp); 6947c478bd9Sstevel@tonic-gate } 6957c478bd9Sstevel@tonic-gate } 6967c478bd9Sstevel@tonic-gate 6977c478bd9Sstevel@tonic-gate disp_lock_enter(&dp->disp_lock); 6987c478bd9Sstevel@tonic-gate pri = dp->disp_maxrunpri; 6997c478bd9Sstevel@tonic-gate 7007c478bd9Sstevel@tonic-gate /* 7017c478bd9Sstevel@tonic-gate * If there is nothing to run, look at what's runnable on other queues. 7027c478bd9Sstevel@tonic-gate * Choose the idle thread if the CPU is quiesced. 7037c478bd9Sstevel@tonic-gate * Note that CPUs that have the CPU_OFFLINE flag set can still run 7047c478bd9Sstevel@tonic-gate * interrupt threads, which will be the only threads on the CPU's own 7057c478bd9Sstevel@tonic-gate * queue, but cannot run threads from other queues. 7067c478bd9Sstevel@tonic-gate */ 7077c478bd9Sstevel@tonic-gate if (pri == -1) { 7087c478bd9Sstevel@tonic-gate if (!(cpup->cpu_flags & CPU_OFFLINE)) { 7097c478bd9Sstevel@tonic-gate disp_lock_exit(&dp->disp_lock); 7107c478bd9Sstevel@tonic-gate if ((tp = disp_getwork(cpup)) == NULL) { 7117c478bd9Sstevel@tonic-gate tp = cpup->cpu_idle_thread; 7127c478bd9Sstevel@tonic-gate (void) splhigh(); 7137c478bd9Sstevel@tonic-gate THREAD_ONPROC(tp, cpup); 7147c478bd9Sstevel@tonic-gate cpup->cpu_dispthread = tp; 7157c478bd9Sstevel@tonic-gate cpup->cpu_dispatch_pri = -1; 7167c478bd9Sstevel@tonic-gate cpup->cpu_runrun = cpup->cpu_kprunrun = 0; 7177c478bd9Sstevel@tonic-gate cpup->cpu_chosen_level = -1; 7187c478bd9Sstevel@tonic-gate } 7197c478bd9Sstevel@tonic-gate } else { 7207c478bd9Sstevel@tonic-gate disp_lock_exit_high(&dp->disp_lock); 7217c478bd9Sstevel@tonic-gate tp = cpup->cpu_idle_thread; 7227c478bd9Sstevel@tonic-gate THREAD_ONPROC(tp, cpup); 7237c478bd9Sstevel@tonic-gate cpup->cpu_dispthread = tp; 7247c478bd9Sstevel@tonic-gate cpup->cpu_dispatch_pri = -1; 7257c478bd9Sstevel@tonic-gate cpup->cpu_runrun = cpup->cpu_kprunrun = 0; 7267c478bd9Sstevel@tonic-gate cpup->cpu_chosen_level = -1; 7277c478bd9Sstevel@tonic-gate } 7287c478bd9Sstevel@tonic-gate TRACE_1(TR_FAC_DISP, TR_DISP_END, 7297c478bd9Sstevel@tonic-gate "disp_end:tid %p", tp); 7307c478bd9Sstevel@tonic-gate restore_mstate(tp); 7317c478bd9Sstevel@tonic-gate return (tp); 7327c478bd9Sstevel@tonic-gate } 7337c478bd9Sstevel@tonic-gate 7347c478bd9Sstevel@tonic-gate dq = &dp->disp_q[pri]; 7357c478bd9Sstevel@tonic-gate tp = dq->dq_first; 7367c478bd9Sstevel@tonic-gate 7377c478bd9Sstevel@tonic-gate ASSERT(tp != NULL); 7387c478bd9Sstevel@tonic-gate ASSERT(tp->t_schedflag & TS_LOAD); /* thread must be swapped in */ 7397c478bd9Sstevel@tonic-gate 7407c478bd9Sstevel@tonic-gate DTRACE_SCHED2(dequeue, kthread_t *, tp, disp_t *, dp); 7417c478bd9Sstevel@tonic-gate 7427c478bd9Sstevel@tonic-gate /* 7437c478bd9Sstevel@tonic-gate * Found it so remove it from queue. 7447c478bd9Sstevel@tonic-gate */ 7457c478bd9Sstevel@tonic-gate dp->disp_nrunnable--; 7467c478bd9Sstevel@tonic-gate dq->dq_sruncnt--; 7477c478bd9Sstevel@tonic-gate if ((dq->dq_first = tp->t_link) == NULL) { 7487c478bd9Sstevel@tonic-gate ulong_t *dqactmap = dp->disp_qactmap; 7497c478bd9Sstevel@tonic-gate 7507c478bd9Sstevel@tonic-gate ASSERT(dq->dq_sruncnt == 0); 7517c478bd9Sstevel@tonic-gate dq->dq_last = NULL; 7527c478bd9Sstevel@tonic-gate 7537c478bd9Sstevel@tonic-gate /* 7547c478bd9Sstevel@tonic-gate * The queue is empty, so the corresponding bit needs to be 7557c478bd9Sstevel@tonic-gate * turned off in dqactmap. If nrunnable != 0 just took the 7567c478bd9Sstevel@tonic-gate * last runnable thread off the 7577c478bd9Sstevel@tonic-gate * highest queue, so recompute disp_maxrunpri. 7587c478bd9Sstevel@tonic-gate */ 7597c478bd9Sstevel@tonic-gate maxrunword = pri >> BT_ULSHIFT; 7607c478bd9Sstevel@tonic-gate dqactmap[maxrunword] &= ~BT_BIW(pri); 7617c478bd9Sstevel@tonic-gate 7627c478bd9Sstevel@tonic-gate if (dp->disp_nrunnable == 0) { 7637c478bd9Sstevel@tonic-gate dp->disp_max_unbound_pri = -1; 7647c478bd9Sstevel@tonic-gate dp->disp_maxrunpri = -1; 7657c478bd9Sstevel@tonic-gate } else { 7667c478bd9Sstevel@tonic-gate int ipri; 7677c478bd9Sstevel@tonic-gate 7687c478bd9Sstevel@tonic-gate ipri = bt_gethighbit(dqactmap, maxrunword); 7697c478bd9Sstevel@tonic-gate dp->disp_maxrunpri = ipri; 7707c478bd9Sstevel@tonic-gate if (ipri < dp->disp_max_unbound_pri) 7717c478bd9Sstevel@tonic-gate dp->disp_max_unbound_pri = ipri; 7727c478bd9Sstevel@tonic-gate } 7737c478bd9Sstevel@tonic-gate } else { 7747c478bd9Sstevel@tonic-gate tp->t_link = NULL; 7757c478bd9Sstevel@tonic-gate } 7767c478bd9Sstevel@tonic-gate 7777c478bd9Sstevel@tonic-gate /* 7787c478bd9Sstevel@tonic-gate * Set TS_DONT_SWAP flag to prevent another processor from swapping 7797c478bd9Sstevel@tonic-gate * out this thread before we have a chance to run it. 7807c478bd9Sstevel@tonic-gate * While running, it is protected against swapping by t_lock. 7817c478bd9Sstevel@tonic-gate */ 7827c478bd9Sstevel@tonic-gate tp->t_schedflag |= TS_DONT_SWAP; 7837c478bd9Sstevel@tonic-gate cpup->cpu_dispthread = tp; /* protected by spl only */ 7847c478bd9Sstevel@tonic-gate cpup->cpu_dispatch_pri = pri; 7857c478bd9Sstevel@tonic-gate ASSERT(pri == DISP_PRIO(tp)); 7867c478bd9Sstevel@tonic-gate thread_onproc(tp, cpup); /* set t_state to TS_ONPROC */ 7877c478bd9Sstevel@tonic-gate disp_lock_exit_high(&dp->disp_lock); /* drop run queue lock */ 7887c478bd9Sstevel@tonic-gate 7897c478bd9Sstevel@tonic-gate ASSERT(tp != NULL); 7907c478bd9Sstevel@tonic-gate TRACE_1(TR_FAC_DISP, TR_DISP_END, 7917c478bd9Sstevel@tonic-gate "disp_end:tid %p", tp); 7927c478bd9Sstevel@tonic-gate 7937c478bd9Sstevel@tonic-gate if (disp_ratify(tp, kpq) == NULL) 7947c478bd9Sstevel@tonic-gate goto reschedule; 7957c478bd9Sstevel@tonic-gate 7967c478bd9Sstevel@tonic-gate restore_mstate(tp); 7977c478bd9Sstevel@tonic-gate return (tp); 7987c478bd9Sstevel@tonic-gate } 7997c478bd9Sstevel@tonic-gate 8007c478bd9Sstevel@tonic-gate /* 8017c478bd9Sstevel@tonic-gate * swtch() 8027c478bd9Sstevel@tonic-gate * Find best runnable thread and run it. 8037c478bd9Sstevel@tonic-gate * Called with the current thread already switched to a new state, 8047c478bd9Sstevel@tonic-gate * on a sleep queue, run queue, stopped, and not zombied. 8057c478bd9Sstevel@tonic-gate * May be called at any spl level less than or equal to LOCK_LEVEL. 8067c478bd9Sstevel@tonic-gate * Always drops spl to the base level (spl0()). 8077c478bd9Sstevel@tonic-gate */ 8087c478bd9Sstevel@tonic-gate void 8097c478bd9Sstevel@tonic-gate swtch() 8107c478bd9Sstevel@tonic-gate { 8117c478bd9Sstevel@tonic-gate kthread_t *t = curthread; 8127c478bd9Sstevel@tonic-gate kthread_t *next; 8137c478bd9Sstevel@tonic-gate cpu_t *cp; 8147c478bd9Sstevel@tonic-gate 8157c478bd9Sstevel@tonic-gate TRACE_0(TR_FAC_DISP, TR_SWTCH_START, "swtch_start"); 8167c478bd9Sstevel@tonic-gate 8177c478bd9Sstevel@tonic-gate if (t->t_flag & T_INTR_THREAD) 8187c478bd9Sstevel@tonic-gate cpu_intr_swtch_enter(t); 8197c478bd9Sstevel@tonic-gate 8207c478bd9Sstevel@tonic-gate if (t->t_intr != NULL) { 8217c478bd9Sstevel@tonic-gate /* 8227c478bd9Sstevel@tonic-gate * We are an interrupt thread. Setup and return 8237c478bd9Sstevel@tonic-gate * the interrupted thread to be resumed. 8247c478bd9Sstevel@tonic-gate */ 8257c478bd9Sstevel@tonic-gate (void) splhigh(); /* block other scheduler action */ 8267c478bd9Sstevel@tonic-gate cp = CPU; /* now protected against migration */ 8277c478bd9Sstevel@tonic-gate ASSERT(CPU_ON_INTR(cp) == 0); /* not called with PIL > 10 */ 8287c478bd9Sstevel@tonic-gate CPU_STATS_ADDQ(cp, sys, pswitch, 1); 8297c478bd9Sstevel@tonic-gate CPU_STATS_ADDQ(cp, sys, intrblk, 1); 8307c478bd9Sstevel@tonic-gate next = thread_unpin(); 8317c478bd9Sstevel@tonic-gate TRACE_0(TR_FAC_DISP, TR_RESUME_START, "resume_start"); 8327c478bd9Sstevel@tonic-gate resume_from_intr(next); 8337c478bd9Sstevel@tonic-gate } else { 8347c478bd9Sstevel@tonic-gate #ifdef DEBUG 8357c478bd9Sstevel@tonic-gate if (t->t_state == TS_ONPROC && 8367c478bd9Sstevel@tonic-gate t->t_disp_queue->disp_cpu == CPU && 8377c478bd9Sstevel@tonic-gate t->t_preempt == 0) { 8387c478bd9Sstevel@tonic-gate thread_lock(t); 8397c478bd9Sstevel@tonic-gate ASSERT(t->t_state != TS_ONPROC || 8407c478bd9Sstevel@tonic-gate t->t_disp_queue->disp_cpu != CPU || 8417c478bd9Sstevel@tonic-gate t->t_preempt != 0); /* cannot migrate */ 8427c478bd9Sstevel@tonic-gate thread_unlock_nopreempt(t); 8437c478bd9Sstevel@tonic-gate } 8447c478bd9Sstevel@tonic-gate #endif /* DEBUG */ 8457c478bd9Sstevel@tonic-gate cp = CPU; 8467c478bd9Sstevel@tonic-gate next = disp(); /* returns with spl high */ 8477c478bd9Sstevel@tonic-gate ASSERT(CPU_ON_INTR(cp) == 0); /* not called with PIL > 10 */ 8487c478bd9Sstevel@tonic-gate 8497c478bd9Sstevel@tonic-gate /* OK to steal anything left on run queue */ 8507c478bd9Sstevel@tonic-gate cp->cpu_disp_flags &= ~CPU_DISP_DONTSTEAL; 8517c478bd9Sstevel@tonic-gate 8527c478bd9Sstevel@tonic-gate if (next != t) { 8537c478bd9Sstevel@tonic-gate if (t == cp->cpu_idle_thread) { 8547c478bd9Sstevel@tonic-gate CHIP_NRUNNING(cp->cpu_chip, 1); 8557c478bd9Sstevel@tonic-gate } else if (next == cp->cpu_idle_thread) { 8567c478bd9Sstevel@tonic-gate CHIP_NRUNNING(cp->cpu_chip, -1); 8577c478bd9Sstevel@tonic-gate } 8587c478bd9Sstevel@tonic-gate 8597c478bd9Sstevel@tonic-gate CPU_STATS_ADDQ(cp, sys, pswitch, 1); 8607c478bd9Sstevel@tonic-gate cp->cpu_last_swtch = t->t_disp_time = lbolt; 8617c478bd9Sstevel@tonic-gate TRACE_0(TR_FAC_DISP, TR_RESUME_START, "resume_start"); 8627c478bd9Sstevel@tonic-gate 8637c478bd9Sstevel@tonic-gate if (dtrace_vtime_active) 8647c478bd9Sstevel@tonic-gate dtrace_vtime_switch(next); 8657c478bd9Sstevel@tonic-gate 8667c478bd9Sstevel@tonic-gate resume(next); 8677c478bd9Sstevel@tonic-gate /* 8687c478bd9Sstevel@tonic-gate * The TR_RESUME_END and TR_SWTCH_END trace points 8697c478bd9Sstevel@tonic-gate * appear at the end of resume(), because we may not 8707c478bd9Sstevel@tonic-gate * return here 8717c478bd9Sstevel@tonic-gate */ 8727c478bd9Sstevel@tonic-gate } else { 8737c478bd9Sstevel@tonic-gate if (t->t_flag & T_INTR_THREAD) 8747c478bd9Sstevel@tonic-gate cpu_intr_swtch_exit(t); 8757c478bd9Sstevel@tonic-gate 8767c478bd9Sstevel@tonic-gate DTRACE_SCHED(remain__cpu); 8777c478bd9Sstevel@tonic-gate TRACE_0(TR_FAC_DISP, TR_SWTCH_END, "swtch_end"); 8787c478bd9Sstevel@tonic-gate (void) spl0(); 8797c478bd9Sstevel@tonic-gate } 8807c478bd9Sstevel@tonic-gate } 8817c478bd9Sstevel@tonic-gate } 8827c478bd9Sstevel@tonic-gate 8837c478bd9Sstevel@tonic-gate /* 8847c478bd9Sstevel@tonic-gate * swtch_from_zombie() 8857c478bd9Sstevel@tonic-gate * Special case of swtch(), which allows checks for TS_ZOMB to be 8867c478bd9Sstevel@tonic-gate * eliminated from normal resume. 8877c478bd9Sstevel@tonic-gate * Find best runnable thread and run it. 8887c478bd9Sstevel@tonic-gate * Called with the current thread zombied. 8897c478bd9Sstevel@tonic-gate * Zombies cannot migrate, so CPU references are safe. 8907c478bd9Sstevel@tonic-gate */ 8917c478bd9Sstevel@tonic-gate void 8927c478bd9Sstevel@tonic-gate swtch_from_zombie() 8937c478bd9Sstevel@tonic-gate { 8947c478bd9Sstevel@tonic-gate kthread_t *next; 8957c478bd9Sstevel@tonic-gate cpu_t *cpu = CPU; 8967c478bd9Sstevel@tonic-gate 8977c478bd9Sstevel@tonic-gate TRACE_0(TR_FAC_DISP, TR_SWTCH_START, "swtch_start"); 8987c478bd9Sstevel@tonic-gate 8997c478bd9Sstevel@tonic-gate ASSERT(curthread->t_state == TS_ZOMB); 9007c478bd9Sstevel@tonic-gate 9017c478bd9Sstevel@tonic-gate next = disp(); /* returns with spl high */ 9027c478bd9Sstevel@tonic-gate ASSERT(CPU_ON_INTR(CPU) == 0); /* not called with PIL > 10 */ 9037c478bd9Sstevel@tonic-gate CPU_STATS_ADDQ(CPU, sys, pswitch, 1); 9047c478bd9Sstevel@tonic-gate ASSERT(next != curthread); 9057c478bd9Sstevel@tonic-gate TRACE_0(TR_FAC_DISP, TR_RESUME_START, "resume_start"); 9067c478bd9Sstevel@tonic-gate 9077c478bd9Sstevel@tonic-gate if (next == cpu->cpu_idle_thread) 9087c478bd9Sstevel@tonic-gate CHIP_NRUNNING(cpu->cpu_chip, -1); 9097c478bd9Sstevel@tonic-gate 9107c478bd9Sstevel@tonic-gate if (dtrace_vtime_active) 9117c478bd9Sstevel@tonic-gate dtrace_vtime_switch(next); 9127c478bd9Sstevel@tonic-gate 9137c478bd9Sstevel@tonic-gate resume_from_zombie(next); 9147c478bd9Sstevel@tonic-gate /* 9157c478bd9Sstevel@tonic-gate * The TR_RESUME_END and TR_SWTCH_END trace points 9167c478bd9Sstevel@tonic-gate * appear at the end of resume(), because we certainly will not 9177c478bd9Sstevel@tonic-gate * return here 9187c478bd9Sstevel@tonic-gate */ 9197c478bd9Sstevel@tonic-gate } 9207c478bd9Sstevel@tonic-gate 9217c478bd9Sstevel@tonic-gate #if defined(DEBUG) && (defined(DISP_DEBUG) || defined(lint)) 9227c478bd9Sstevel@tonic-gate static int 9237c478bd9Sstevel@tonic-gate thread_on_queue(kthread_t *tp) 9247c478bd9Sstevel@tonic-gate { 9257c478bd9Sstevel@tonic-gate cpu_t *cp; 9267c478bd9Sstevel@tonic-gate cpu_t *self; 9277c478bd9Sstevel@tonic-gate disp_t *dp; 9287c478bd9Sstevel@tonic-gate 9297c478bd9Sstevel@tonic-gate self = CPU; 9307c478bd9Sstevel@tonic-gate cp = self->cpu_next_onln; 9317c478bd9Sstevel@tonic-gate dp = cp->cpu_disp; 9327c478bd9Sstevel@tonic-gate for (;;) { 9337c478bd9Sstevel@tonic-gate dispq_t *dq; 9347c478bd9Sstevel@tonic-gate dispq_t *eq; 9357c478bd9Sstevel@tonic-gate 9367c478bd9Sstevel@tonic-gate disp_lock_enter_high(&dp->disp_lock); 9377c478bd9Sstevel@tonic-gate for (dq = dp->disp_q, eq = dp->disp_q_limit; dq < eq; ++dq) { 9387c478bd9Sstevel@tonic-gate kthread_t *rp; 9397c478bd9Sstevel@tonic-gate 9407c478bd9Sstevel@tonic-gate ASSERT(dq->dq_last == NULL || 9417c478bd9Sstevel@tonic-gate dq->dq_last->t_link == NULL); 9427c478bd9Sstevel@tonic-gate for (rp = dq->dq_first; rp; rp = rp->t_link) 9437c478bd9Sstevel@tonic-gate if (tp == rp) { 9447c478bd9Sstevel@tonic-gate disp_lock_exit_high(&dp->disp_lock); 9457c478bd9Sstevel@tonic-gate return (1); 9467c478bd9Sstevel@tonic-gate } 9477c478bd9Sstevel@tonic-gate } 9487c478bd9Sstevel@tonic-gate disp_lock_exit_high(&dp->disp_lock); 9497c478bd9Sstevel@tonic-gate if (cp == NULL) 9507c478bd9Sstevel@tonic-gate break; 9517c478bd9Sstevel@tonic-gate if (cp == self) { 9527c478bd9Sstevel@tonic-gate cp = NULL; 9537c478bd9Sstevel@tonic-gate dp = &cp->cpu_part->cp_kp_queue; 9547c478bd9Sstevel@tonic-gate } else { 9557c478bd9Sstevel@tonic-gate cp = cp->cpu_next_onln; 9567c478bd9Sstevel@tonic-gate dp = cp->cpu_disp; 9577c478bd9Sstevel@tonic-gate } 9587c478bd9Sstevel@tonic-gate } 9597c478bd9Sstevel@tonic-gate return (0); 9607c478bd9Sstevel@tonic-gate } /* end of thread_on_queue */ 9617c478bd9Sstevel@tonic-gate #else 9627c478bd9Sstevel@tonic-gate 9637c478bd9Sstevel@tonic-gate #define thread_on_queue(tp) 0 /* ASSERT must be !thread_on_queue */ 9647c478bd9Sstevel@tonic-gate 9657c478bd9Sstevel@tonic-gate #endif /* DEBUG */ 9667c478bd9Sstevel@tonic-gate 9677c478bd9Sstevel@tonic-gate /* 9687c478bd9Sstevel@tonic-gate * like swtch(), but switch to a specified thread taken from another CPU. 9697c478bd9Sstevel@tonic-gate * called with spl high.. 9707c478bd9Sstevel@tonic-gate */ 9717c478bd9Sstevel@tonic-gate void 9727c478bd9Sstevel@tonic-gate swtch_to(kthread_t *next) 9737c478bd9Sstevel@tonic-gate { 9747c478bd9Sstevel@tonic-gate cpu_t *cp = CPU; 9757c478bd9Sstevel@tonic-gate 9767c478bd9Sstevel@tonic-gate TRACE_0(TR_FAC_DISP, TR_SWTCH_START, "swtch_start"); 9777c478bd9Sstevel@tonic-gate 9787c478bd9Sstevel@tonic-gate /* 9797c478bd9Sstevel@tonic-gate * Update context switch statistics. 9807c478bd9Sstevel@tonic-gate */ 9817c478bd9Sstevel@tonic-gate CPU_STATS_ADDQ(cp, sys, pswitch, 1); 9827c478bd9Sstevel@tonic-gate 9837c478bd9Sstevel@tonic-gate TRACE_0(TR_FAC_DISP, TR_RESUME_START, "resume_start"); 9847c478bd9Sstevel@tonic-gate 9857c478bd9Sstevel@tonic-gate if (curthread == cp->cpu_idle_thread) 9867c478bd9Sstevel@tonic-gate CHIP_NRUNNING(cp->cpu_chip, 1); 9877c478bd9Sstevel@tonic-gate 9887c478bd9Sstevel@tonic-gate /* OK to steal anything left on run queue */ 9897c478bd9Sstevel@tonic-gate cp->cpu_disp_flags &= ~CPU_DISP_DONTSTEAL; 9907c478bd9Sstevel@tonic-gate 9917c478bd9Sstevel@tonic-gate /* record last execution time */ 9927c478bd9Sstevel@tonic-gate cp->cpu_last_swtch = curthread->t_disp_time = lbolt; 9937c478bd9Sstevel@tonic-gate 9947c478bd9Sstevel@tonic-gate if (dtrace_vtime_active) 9957c478bd9Sstevel@tonic-gate dtrace_vtime_switch(next); 9967c478bd9Sstevel@tonic-gate 9977c478bd9Sstevel@tonic-gate resume(next); 9987c478bd9Sstevel@tonic-gate /* 9997c478bd9Sstevel@tonic-gate * The TR_RESUME_END and TR_SWTCH_END trace points 10007c478bd9Sstevel@tonic-gate * appear at the end of resume(), because we may not 10017c478bd9Sstevel@tonic-gate * return here 10027c478bd9Sstevel@tonic-gate */ 10037c478bd9Sstevel@tonic-gate } 10047c478bd9Sstevel@tonic-gate 10057c478bd9Sstevel@tonic-gate 10067c478bd9Sstevel@tonic-gate 10077c478bd9Sstevel@tonic-gate #define CPU_IDLING(pri) ((pri) == -1) 10087c478bd9Sstevel@tonic-gate 10097c478bd9Sstevel@tonic-gate static void 10107c478bd9Sstevel@tonic-gate cpu_resched(cpu_t *cp, pri_t tpri) 10117c478bd9Sstevel@tonic-gate { 10127c478bd9Sstevel@tonic-gate int call_poke_cpu = 0; 10137c478bd9Sstevel@tonic-gate pri_t cpupri = cp->cpu_dispatch_pri; 10147c478bd9Sstevel@tonic-gate 10157c478bd9Sstevel@tonic-gate if (!CPU_IDLING(cpupri) && (cpupri < tpri)) { 10167c478bd9Sstevel@tonic-gate TRACE_2(TR_FAC_DISP, TR_CPU_RESCHED, 10177c478bd9Sstevel@tonic-gate "CPU_RESCHED:Tpri %d Cpupri %d", tpri, cpupri); 10187c478bd9Sstevel@tonic-gate if (tpri >= upreemptpri && cp->cpu_runrun == 0) { 10197c478bd9Sstevel@tonic-gate cp->cpu_runrun = 1; 10207c478bd9Sstevel@tonic-gate aston(cp->cpu_dispthread); 10217c478bd9Sstevel@tonic-gate if (tpri < kpreemptpri && cp != CPU) 10227c478bd9Sstevel@tonic-gate call_poke_cpu = 1; 10237c478bd9Sstevel@tonic-gate } 10247c478bd9Sstevel@tonic-gate if (tpri >= kpreemptpri && cp->cpu_kprunrun == 0) { 10257c478bd9Sstevel@tonic-gate cp->cpu_kprunrun = 1; 10267c478bd9Sstevel@tonic-gate if (cp != CPU) 10277c478bd9Sstevel@tonic-gate call_poke_cpu = 1; 10287c478bd9Sstevel@tonic-gate } 10297c478bd9Sstevel@tonic-gate } 10307c478bd9Sstevel@tonic-gate 10317c478bd9Sstevel@tonic-gate /* 10327c478bd9Sstevel@tonic-gate * Propagate cpu_runrun, and cpu_kprunrun to global visibility. 10337c478bd9Sstevel@tonic-gate */ 10347c478bd9Sstevel@tonic-gate membar_enter(); 10357c478bd9Sstevel@tonic-gate 10367c478bd9Sstevel@tonic-gate if (call_poke_cpu) 10377c478bd9Sstevel@tonic-gate poke_cpu(cp->cpu_id); 10387c478bd9Sstevel@tonic-gate } 10397c478bd9Sstevel@tonic-gate 10407c478bd9Sstevel@tonic-gate /* 10417c478bd9Sstevel@tonic-gate * Routine used by setbackdq() to balance load across the physical 10427c478bd9Sstevel@tonic-gate * processors. Returns a CPU of a lesser loaded chip in the lgroup 10437c478bd9Sstevel@tonic-gate * if balancing is necessary, or the "hint" CPU if it's not. 10447c478bd9Sstevel@tonic-gate * 10457c478bd9Sstevel@tonic-gate * - tp is the thread being enqueued 10467c478bd9Sstevel@tonic-gate * - cp is a hint CPU (chosen by cpu_choose()). 10477c478bd9Sstevel@tonic-gate * - curchip (if not NULL) is the chip on which the current thread 10487c478bd9Sstevel@tonic-gate * is running. 10497c478bd9Sstevel@tonic-gate * 10507c478bd9Sstevel@tonic-gate * The thread lock for "tp" must be held while calling this routine. 10517c478bd9Sstevel@tonic-gate */ 10527c478bd9Sstevel@tonic-gate static cpu_t * 10537c478bd9Sstevel@tonic-gate chip_balance(kthread_t *tp, cpu_t *cp, chip_t *curchip) 10547c478bd9Sstevel@tonic-gate { 10557c478bd9Sstevel@tonic-gate int chp_nrun, ochp_nrun; 10567c478bd9Sstevel@tonic-gate chip_t *chp, *nchp; 10577c478bd9Sstevel@tonic-gate 10587c478bd9Sstevel@tonic-gate chp = cp->cpu_chip; 10597c478bd9Sstevel@tonic-gate chp_nrun = chp->chip_nrunning; 10607c478bd9Sstevel@tonic-gate 10617c478bd9Sstevel@tonic-gate if (chp == curchip) 10627c478bd9Sstevel@tonic-gate chp_nrun--; /* Ignore curthread */ 10637c478bd9Sstevel@tonic-gate 10647c478bd9Sstevel@tonic-gate /* 10657c478bd9Sstevel@tonic-gate * If this chip isn't at all idle, then let 10667c478bd9Sstevel@tonic-gate * run queue balancing do the work. 10677c478bd9Sstevel@tonic-gate */ 10687c478bd9Sstevel@tonic-gate if (chp_nrun == chp->chip_ncpu) 10697c478bd9Sstevel@tonic-gate return (cp); 10707c478bd9Sstevel@tonic-gate 10717c478bd9Sstevel@tonic-gate nchp = chp->chip_balance; 10727c478bd9Sstevel@tonic-gate do { 10737c478bd9Sstevel@tonic-gate if (nchp == chp || 10747c478bd9Sstevel@tonic-gate !CHIP_IN_CPUPART(nchp, tp->t_cpupart)) 10757c478bd9Sstevel@tonic-gate continue; 10767c478bd9Sstevel@tonic-gate 10777c478bd9Sstevel@tonic-gate ochp_nrun = nchp->chip_nrunning; 10787c478bd9Sstevel@tonic-gate 10797c478bd9Sstevel@tonic-gate /* 10807c478bd9Sstevel@tonic-gate * If the other chip is running less threads, 10817c478bd9Sstevel@tonic-gate * or if it's running the same number of threads, but 10827c478bd9Sstevel@tonic-gate * has more online logical CPUs, then choose to balance. 10837c478bd9Sstevel@tonic-gate */ 10847c478bd9Sstevel@tonic-gate if (chp_nrun > ochp_nrun || 10857c478bd9Sstevel@tonic-gate (chp_nrun == ochp_nrun && 10867c478bd9Sstevel@tonic-gate nchp->chip_ncpu > chp->chip_ncpu)) { 10877c478bd9Sstevel@tonic-gate cp = nchp->chip_cpus; 10887c478bd9Sstevel@tonic-gate nchp->chip_cpus = cp->cpu_next_chip; 10897c478bd9Sstevel@tonic-gate 10907c478bd9Sstevel@tonic-gate /* 10917c478bd9Sstevel@tonic-gate * Find a CPU on the chip in the correct 10927c478bd9Sstevel@tonic-gate * partition. We know at least one exists 10937c478bd9Sstevel@tonic-gate * because of the CHIP_IN_CPUPART() check above. 10947c478bd9Sstevel@tonic-gate */ 10957c478bd9Sstevel@tonic-gate while (cp->cpu_part != tp->t_cpupart) 10967c478bd9Sstevel@tonic-gate cp = cp->cpu_next_chip; 10977c478bd9Sstevel@tonic-gate } 10987c478bd9Sstevel@tonic-gate chp->chip_balance = nchp->chip_next_lgrp; 10997c478bd9Sstevel@tonic-gate break; 11007c478bd9Sstevel@tonic-gate } while ((nchp = nchp->chip_next_lgrp) != chp->chip_balance); 11017c478bd9Sstevel@tonic-gate 11027c478bd9Sstevel@tonic-gate ASSERT(CHIP_IN_CPUPART(cp->cpu_chip, tp->t_cpupart)); 11037c478bd9Sstevel@tonic-gate return (cp); 11047c478bd9Sstevel@tonic-gate } 11057c478bd9Sstevel@tonic-gate 11067c478bd9Sstevel@tonic-gate /* 11077c478bd9Sstevel@tonic-gate * setbackdq() keeps runqs balanced such that the difference in length 11087c478bd9Sstevel@tonic-gate * between the chosen runq and the next one is no more than RUNQ_MAX_DIFF. 11097c478bd9Sstevel@tonic-gate * For threads with priorities below RUNQ_MATCH_PRI levels, the runq's lengths 11107c478bd9Sstevel@tonic-gate * must match. When per-thread TS_RUNQMATCH flag is set, setbackdq() will 11117c478bd9Sstevel@tonic-gate * try to keep runqs perfectly balanced regardless of the thread priority. 11127c478bd9Sstevel@tonic-gate */ 11137c478bd9Sstevel@tonic-gate #define RUNQ_MATCH_PRI 16 /* pri below which queue lengths must match */ 11147c478bd9Sstevel@tonic-gate #define RUNQ_MAX_DIFF 2 /* maximum runq length difference */ 11157c478bd9Sstevel@tonic-gate #define RUNQ_LEN(cp, pri) ((cp)->cpu_disp->disp_q[pri].dq_sruncnt) 11167c478bd9Sstevel@tonic-gate 11177c478bd9Sstevel@tonic-gate /* 11187c478bd9Sstevel@tonic-gate * Put the specified thread on the back of the dispatcher 11197c478bd9Sstevel@tonic-gate * queue corresponding to its current priority. 11207c478bd9Sstevel@tonic-gate * 11217c478bd9Sstevel@tonic-gate * Called with the thread in transition, onproc or stopped state 11227c478bd9Sstevel@tonic-gate * and locked (transition implies locked) and at high spl. 11237c478bd9Sstevel@tonic-gate * Returns with the thread in TS_RUN state and still locked. 11247c478bd9Sstevel@tonic-gate */ 11257c478bd9Sstevel@tonic-gate void 11267c478bd9Sstevel@tonic-gate setbackdq(kthread_t *tp) 11277c478bd9Sstevel@tonic-gate { 11287c478bd9Sstevel@tonic-gate dispq_t *dq; 11297c478bd9Sstevel@tonic-gate disp_t *dp; 11307c478bd9Sstevel@tonic-gate chip_t *curchip = NULL; 11317c478bd9Sstevel@tonic-gate cpu_t *cp; 11327c478bd9Sstevel@tonic-gate pri_t tpri; 11337c478bd9Sstevel@tonic-gate int bound; 11347c478bd9Sstevel@tonic-gate 11357c478bd9Sstevel@tonic-gate ASSERT(THREAD_LOCK_HELD(tp)); 11367c478bd9Sstevel@tonic-gate ASSERT((tp->t_schedflag & TS_ALLSTART) == 0); 11377c478bd9Sstevel@tonic-gate 11387c478bd9Sstevel@tonic-gate if (tp->t_waitrq == 0) { 11397c478bd9Sstevel@tonic-gate hrtime_t curtime; 11407c478bd9Sstevel@tonic-gate 11417c478bd9Sstevel@tonic-gate curtime = gethrtime_unscaled(); 11427c478bd9Sstevel@tonic-gate (void) cpu_update_pct(tp, curtime); 11437c478bd9Sstevel@tonic-gate tp->t_waitrq = curtime; 11447c478bd9Sstevel@tonic-gate } else { 11457c478bd9Sstevel@tonic-gate (void) cpu_update_pct(tp, gethrtime_unscaled()); 11467c478bd9Sstevel@tonic-gate } 11477c478bd9Sstevel@tonic-gate 11487c478bd9Sstevel@tonic-gate ASSERT(!thread_on_queue(tp)); /* make sure tp isn't on a runq */ 11497c478bd9Sstevel@tonic-gate 11507c478bd9Sstevel@tonic-gate /* 11517c478bd9Sstevel@tonic-gate * If thread is "swapped" or on the swap queue don't 11527c478bd9Sstevel@tonic-gate * queue it, but wake sched. 11537c478bd9Sstevel@tonic-gate */ 11547c478bd9Sstevel@tonic-gate if ((tp->t_schedflag & (TS_LOAD | TS_ON_SWAPQ)) != TS_LOAD) { 11557c478bd9Sstevel@tonic-gate disp_swapped_setrun(tp); 11567c478bd9Sstevel@tonic-gate return; 11577c478bd9Sstevel@tonic-gate } 11587c478bd9Sstevel@tonic-gate 11597c478bd9Sstevel@tonic-gate tpri = DISP_PRIO(tp); 11607c478bd9Sstevel@tonic-gate if (tp == curthread) { 11617c478bd9Sstevel@tonic-gate curchip = CPU->cpu_chip; 11627c478bd9Sstevel@tonic-gate } 11637c478bd9Sstevel@tonic-gate 11647c478bd9Sstevel@tonic-gate if (ncpus == 1) 11657c478bd9Sstevel@tonic-gate cp = tp->t_cpu; 11667c478bd9Sstevel@tonic-gate else if (!tp->t_bound_cpu && !tp->t_weakbound_cpu) { 11677c478bd9Sstevel@tonic-gate if (tpri >= kpqpri) { 11687c478bd9Sstevel@tonic-gate setkpdq(tp, SETKP_BACK); 11697c478bd9Sstevel@tonic-gate return; 11707c478bd9Sstevel@tonic-gate } 11717c478bd9Sstevel@tonic-gate /* 11727c478bd9Sstevel@tonic-gate * Let cpu_choose suggest a CPU. 11737c478bd9Sstevel@tonic-gate */ 11747c478bd9Sstevel@tonic-gate cp = cpu_choose(tp, tpri); 11757c478bd9Sstevel@tonic-gate 11767c478bd9Sstevel@tonic-gate if (tp->t_cpupart == cp->cpu_part) { 11777c478bd9Sstevel@tonic-gate int qlen; 11787c478bd9Sstevel@tonic-gate 11797c478bd9Sstevel@tonic-gate /* 11807c478bd9Sstevel@tonic-gate * Select another CPU if we need 11817c478bd9Sstevel@tonic-gate * to do some load balancing across the 11827c478bd9Sstevel@tonic-gate * physical processors. 11837c478bd9Sstevel@tonic-gate */ 11847c478bd9Sstevel@tonic-gate if (CHIP_SHOULD_BALANCE(cp->cpu_chip)) 11857c478bd9Sstevel@tonic-gate cp = chip_balance(tp, cp, curchip); 11867c478bd9Sstevel@tonic-gate 11877c478bd9Sstevel@tonic-gate /* 11887c478bd9Sstevel@tonic-gate * Balance across the run queues 11897c478bd9Sstevel@tonic-gate */ 11907c478bd9Sstevel@tonic-gate qlen = RUNQ_LEN(cp, tpri); 11917c478bd9Sstevel@tonic-gate if (tpri >= RUNQ_MATCH_PRI && 11927c478bd9Sstevel@tonic-gate !(tp->t_schedflag & TS_RUNQMATCH)) 11937c478bd9Sstevel@tonic-gate qlen -= RUNQ_MAX_DIFF; 11947c478bd9Sstevel@tonic-gate if (qlen > 0) { 11957c478bd9Sstevel@tonic-gate cpu_t *np; 11967c478bd9Sstevel@tonic-gate 11977c478bd9Sstevel@tonic-gate if (tp->t_lpl->lpl_lgrpid == LGRP_ROOTID) 11987c478bd9Sstevel@tonic-gate np = cp->cpu_next_part; 11997c478bd9Sstevel@tonic-gate else { 12007c478bd9Sstevel@tonic-gate if ((np = cp->cpu_next_lpl) == cp) 12017c478bd9Sstevel@tonic-gate np = cp->cpu_next_part; 12027c478bd9Sstevel@tonic-gate } 12037c478bd9Sstevel@tonic-gate if (RUNQ_LEN(np, tpri) < qlen) 12047c478bd9Sstevel@tonic-gate cp = np; 12057c478bd9Sstevel@tonic-gate } 12067c478bd9Sstevel@tonic-gate } else { 12077c478bd9Sstevel@tonic-gate /* 12087c478bd9Sstevel@tonic-gate * Migrate to a cpu in the new partition. 12097c478bd9Sstevel@tonic-gate */ 12107c478bd9Sstevel@tonic-gate cp = disp_lowpri_cpu(tp->t_cpupart->cp_cpulist, 12117c478bd9Sstevel@tonic-gate tp->t_lpl, tp->t_pri, NULL); 12127c478bd9Sstevel@tonic-gate } 12137c478bd9Sstevel@tonic-gate bound = 0; 12147c478bd9Sstevel@tonic-gate ASSERT((cp->cpu_flags & CPU_QUIESCED) == 0); 12157c478bd9Sstevel@tonic-gate } else { 12167c478bd9Sstevel@tonic-gate /* 12177c478bd9Sstevel@tonic-gate * It is possible that t_weakbound_cpu != t_bound_cpu (for 12187c478bd9Sstevel@tonic-gate * a short time until weak binding that existed when the 12197c478bd9Sstevel@tonic-gate * strong binding was established has dropped) so we must 12207c478bd9Sstevel@tonic-gate * favour weak binding over strong. 12217c478bd9Sstevel@tonic-gate */ 12227c478bd9Sstevel@tonic-gate cp = tp->t_weakbound_cpu ? 12237c478bd9Sstevel@tonic-gate tp->t_weakbound_cpu : tp->t_bound_cpu; 12247c478bd9Sstevel@tonic-gate bound = 1; 12257c478bd9Sstevel@tonic-gate } 12267c478bd9Sstevel@tonic-gate dp = cp->cpu_disp; 12277c478bd9Sstevel@tonic-gate disp_lock_enter_high(&dp->disp_lock); 12287c478bd9Sstevel@tonic-gate 12297c478bd9Sstevel@tonic-gate DTRACE_SCHED3(enqueue, kthread_t *, tp, disp_t *, dp, int, 0); 12307c478bd9Sstevel@tonic-gate TRACE_3(TR_FAC_DISP, TR_BACKQ, "setbackdq:pri %d cpu %p tid %p", 12317c478bd9Sstevel@tonic-gate tpri, cp, tp); 12327c478bd9Sstevel@tonic-gate 12337c478bd9Sstevel@tonic-gate #ifndef NPROBE 12347c478bd9Sstevel@tonic-gate /* Kernel probe */ 12357c478bd9Sstevel@tonic-gate if (tnf_tracing_active) 12367c478bd9Sstevel@tonic-gate tnf_thread_queue(tp, cp, tpri); 12377c478bd9Sstevel@tonic-gate #endif /* NPROBE */ 12387c478bd9Sstevel@tonic-gate 12397c478bd9Sstevel@tonic-gate ASSERT(tpri >= 0 && tpri < dp->disp_npri); 12407c478bd9Sstevel@tonic-gate 12417c478bd9Sstevel@tonic-gate THREAD_RUN(tp, &dp->disp_lock); /* set t_state to TS_RUN */ 12427c478bd9Sstevel@tonic-gate tp->t_disp_queue = dp; 12437c478bd9Sstevel@tonic-gate tp->t_link = NULL; 12447c478bd9Sstevel@tonic-gate 12457c478bd9Sstevel@tonic-gate dq = &dp->disp_q[tpri]; 12467c478bd9Sstevel@tonic-gate dp->disp_nrunnable++; 12477c478bd9Sstevel@tonic-gate membar_enter(); 12487c478bd9Sstevel@tonic-gate 12497c478bd9Sstevel@tonic-gate if (dq->dq_sruncnt++ != 0) { 12507c478bd9Sstevel@tonic-gate ASSERT(dq->dq_first != NULL); 12517c478bd9Sstevel@tonic-gate dq->dq_last->t_link = tp; 12527c478bd9Sstevel@tonic-gate dq->dq_last = tp; 12537c478bd9Sstevel@tonic-gate } else { 12547c478bd9Sstevel@tonic-gate ASSERT(dq->dq_first == NULL); 12557c478bd9Sstevel@tonic-gate ASSERT(dq->dq_last == NULL); 12567c478bd9Sstevel@tonic-gate dq->dq_first = dq->dq_last = tp; 12577c478bd9Sstevel@tonic-gate BT_SET(dp->disp_qactmap, tpri); 12587c478bd9Sstevel@tonic-gate if (tpri > dp->disp_maxrunpri) { 12597c478bd9Sstevel@tonic-gate dp->disp_maxrunpri = tpri; 12607c478bd9Sstevel@tonic-gate membar_enter(); 12617c478bd9Sstevel@tonic-gate cpu_resched(cp, tpri); 12627c478bd9Sstevel@tonic-gate } 12637c478bd9Sstevel@tonic-gate } 12647c478bd9Sstevel@tonic-gate 12657c478bd9Sstevel@tonic-gate if (!bound && tpri > dp->disp_max_unbound_pri) { 12667c478bd9Sstevel@tonic-gate if (tp == curthread && dp->disp_max_unbound_pri == -1 && 12677c478bd9Sstevel@tonic-gate cp == CPU) { 12687c478bd9Sstevel@tonic-gate /* 12697c478bd9Sstevel@tonic-gate * If there are no other unbound threads on the 12707c478bd9Sstevel@tonic-gate * run queue, don't allow other CPUs to steal 12717c478bd9Sstevel@tonic-gate * this thread while we are in the middle of a 12727c478bd9Sstevel@tonic-gate * context switch. We may just switch to it 12737c478bd9Sstevel@tonic-gate * again right away. CPU_DISP_DONTSTEAL is cleared 12747c478bd9Sstevel@tonic-gate * in swtch and swtch_to. 12757c478bd9Sstevel@tonic-gate */ 12767c478bd9Sstevel@tonic-gate cp->cpu_disp_flags |= CPU_DISP_DONTSTEAL; 12777c478bd9Sstevel@tonic-gate } 12787c478bd9Sstevel@tonic-gate dp->disp_max_unbound_pri = tpri; 12797c478bd9Sstevel@tonic-gate } 12807c478bd9Sstevel@tonic-gate (*disp_enq_thread)(cp, bound); 12817c478bd9Sstevel@tonic-gate } 12827c478bd9Sstevel@tonic-gate 12837c478bd9Sstevel@tonic-gate /* 12847c478bd9Sstevel@tonic-gate * Put the specified thread on the front of the dispatcher 12857c478bd9Sstevel@tonic-gate * queue corresponding to its current priority. 12867c478bd9Sstevel@tonic-gate * 12877c478bd9Sstevel@tonic-gate * Called with the thread in transition, onproc or stopped state 12887c478bd9Sstevel@tonic-gate * and locked (transition implies locked) and at high spl. 12897c478bd9Sstevel@tonic-gate * Returns with the thread in TS_RUN state and still locked. 12907c478bd9Sstevel@tonic-gate */ 12917c478bd9Sstevel@tonic-gate void 12927c478bd9Sstevel@tonic-gate setfrontdq(kthread_t *tp) 12937c478bd9Sstevel@tonic-gate { 12947c478bd9Sstevel@tonic-gate disp_t *dp; 12957c478bd9Sstevel@tonic-gate dispq_t *dq; 12967c478bd9Sstevel@tonic-gate cpu_t *cp; 12977c478bd9Sstevel@tonic-gate pri_t tpri; 12987c478bd9Sstevel@tonic-gate int bound; 12997c478bd9Sstevel@tonic-gate 13007c478bd9Sstevel@tonic-gate ASSERT(THREAD_LOCK_HELD(tp)); 13017c478bd9Sstevel@tonic-gate ASSERT((tp->t_schedflag & TS_ALLSTART) == 0); 13027c478bd9Sstevel@tonic-gate 13037c478bd9Sstevel@tonic-gate if (tp->t_waitrq == 0) { 13047c478bd9Sstevel@tonic-gate hrtime_t curtime; 13057c478bd9Sstevel@tonic-gate 13067c478bd9Sstevel@tonic-gate curtime = gethrtime_unscaled(); 13077c478bd9Sstevel@tonic-gate (void) cpu_update_pct(tp, curtime); 13087c478bd9Sstevel@tonic-gate tp->t_waitrq = curtime; 13097c478bd9Sstevel@tonic-gate } else { 13107c478bd9Sstevel@tonic-gate (void) cpu_update_pct(tp, gethrtime_unscaled()); 13117c478bd9Sstevel@tonic-gate } 13127c478bd9Sstevel@tonic-gate 13137c478bd9Sstevel@tonic-gate ASSERT(!thread_on_queue(tp)); /* make sure tp isn't on a runq */ 13147c478bd9Sstevel@tonic-gate 13157c478bd9Sstevel@tonic-gate /* 13167c478bd9Sstevel@tonic-gate * If thread is "swapped" or on the swap queue don't 13177c478bd9Sstevel@tonic-gate * queue it, but wake sched. 13187c478bd9Sstevel@tonic-gate */ 13197c478bd9Sstevel@tonic-gate if ((tp->t_schedflag & (TS_LOAD | TS_ON_SWAPQ)) != TS_LOAD) { 13207c478bd9Sstevel@tonic-gate disp_swapped_setrun(tp); 13217c478bd9Sstevel@tonic-gate return; 13227c478bd9Sstevel@tonic-gate } 13237c478bd9Sstevel@tonic-gate 13247c478bd9Sstevel@tonic-gate tpri = DISP_PRIO(tp); 13257c478bd9Sstevel@tonic-gate if (ncpus == 1) 13267c478bd9Sstevel@tonic-gate cp = tp->t_cpu; 13277c478bd9Sstevel@tonic-gate else if (!tp->t_bound_cpu && !tp->t_weakbound_cpu) { 13287c478bd9Sstevel@tonic-gate if (tpri >= kpqpri) { 13297c478bd9Sstevel@tonic-gate setkpdq(tp, SETKP_FRONT); 13307c478bd9Sstevel@tonic-gate return; 13317c478bd9Sstevel@tonic-gate } 13327c478bd9Sstevel@tonic-gate cp = tp->t_cpu; 13337c478bd9Sstevel@tonic-gate if (tp->t_cpupart == cp->cpu_part) { 13347c478bd9Sstevel@tonic-gate /* 13357c478bd9Sstevel@tonic-gate * If we are of higher or equal priority than 13367c478bd9Sstevel@tonic-gate * the highest priority runnable thread of 13377c478bd9Sstevel@tonic-gate * the current CPU, just pick this CPU. Otherwise 13387c478bd9Sstevel@tonic-gate * Let cpu_choose() select the CPU. If this cpu 13397c478bd9Sstevel@tonic-gate * is the target of an offline request then do not 13407c478bd9Sstevel@tonic-gate * pick it - a thread_nomigrate() on the in motion 13417c478bd9Sstevel@tonic-gate * cpu relies on this when it forces a preempt. 13427c478bd9Sstevel@tonic-gate */ 13437c478bd9Sstevel@tonic-gate if (tpri < cp->cpu_disp->disp_maxrunpri || 13447c478bd9Sstevel@tonic-gate cp == cpu_inmotion) 13457c478bd9Sstevel@tonic-gate cp = cpu_choose(tp, tpri); 13467c478bd9Sstevel@tonic-gate } else { 13477c478bd9Sstevel@tonic-gate /* 13487c478bd9Sstevel@tonic-gate * Migrate to a cpu in the new partition. 13497c478bd9Sstevel@tonic-gate */ 13507c478bd9Sstevel@tonic-gate cp = disp_lowpri_cpu(tp->t_cpupart->cp_cpulist, 13517c478bd9Sstevel@tonic-gate tp->t_lpl, tp->t_pri, NULL); 13527c478bd9Sstevel@tonic-gate } 13537c478bd9Sstevel@tonic-gate bound = 0; 13547c478bd9Sstevel@tonic-gate ASSERT((cp->cpu_flags & CPU_QUIESCED) == 0); 13557c478bd9Sstevel@tonic-gate } else { 13567c478bd9Sstevel@tonic-gate /* 13577c478bd9Sstevel@tonic-gate * It is possible that t_weakbound_cpu != t_bound_cpu (for 13587c478bd9Sstevel@tonic-gate * a short time until weak binding that existed when the 13597c478bd9Sstevel@tonic-gate * strong binding was established has dropped) so we must 13607c478bd9Sstevel@tonic-gate * favour weak binding over strong. 13617c478bd9Sstevel@tonic-gate */ 13627c478bd9Sstevel@tonic-gate cp = tp->t_weakbound_cpu ? 13637c478bd9Sstevel@tonic-gate tp->t_weakbound_cpu : tp->t_bound_cpu; 13647c478bd9Sstevel@tonic-gate bound = 1; 13657c478bd9Sstevel@tonic-gate } 13667c478bd9Sstevel@tonic-gate dp = cp->cpu_disp; 13677c478bd9Sstevel@tonic-gate disp_lock_enter_high(&dp->disp_lock); 13687c478bd9Sstevel@tonic-gate 13697c478bd9Sstevel@tonic-gate TRACE_2(TR_FAC_DISP, TR_FRONTQ, "frontq:pri %d tid %p", tpri, tp); 13707c478bd9Sstevel@tonic-gate DTRACE_SCHED3(enqueue, kthread_t *, tp, disp_t *, dp, int, 1); 13717c478bd9Sstevel@tonic-gate 13727c478bd9Sstevel@tonic-gate #ifndef NPROBE 13737c478bd9Sstevel@tonic-gate /* Kernel probe */ 13747c478bd9Sstevel@tonic-gate if (tnf_tracing_active) 13757c478bd9Sstevel@tonic-gate tnf_thread_queue(tp, cp, tpri); 13767c478bd9Sstevel@tonic-gate #endif /* NPROBE */ 13777c478bd9Sstevel@tonic-gate 13787c478bd9Sstevel@tonic-gate ASSERT(tpri >= 0 && tpri < dp->disp_npri); 13797c478bd9Sstevel@tonic-gate 13807c478bd9Sstevel@tonic-gate THREAD_RUN(tp, &dp->disp_lock); /* set TS_RUN state and lock */ 13817c478bd9Sstevel@tonic-gate tp->t_disp_queue = dp; 13827c478bd9Sstevel@tonic-gate 13837c478bd9Sstevel@tonic-gate dq = &dp->disp_q[tpri]; 13847c478bd9Sstevel@tonic-gate dp->disp_nrunnable++; 13857c478bd9Sstevel@tonic-gate membar_enter(); 13867c478bd9Sstevel@tonic-gate 13877c478bd9Sstevel@tonic-gate if (dq->dq_sruncnt++ != 0) { 13887c478bd9Sstevel@tonic-gate ASSERT(dq->dq_last != NULL); 13897c478bd9Sstevel@tonic-gate tp->t_link = dq->dq_first; 13907c478bd9Sstevel@tonic-gate dq->dq_first = tp; 13917c478bd9Sstevel@tonic-gate } else { 13927c478bd9Sstevel@tonic-gate ASSERT(dq->dq_last == NULL); 13937c478bd9Sstevel@tonic-gate ASSERT(dq->dq_first == NULL); 13947c478bd9Sstevel@tonic-gate tp->t_link = NULL; 13957c478bd9Sstevel@tonic-gate dq->dq_first = dq->dq_last = tp; 13967c478bd9Sstevel@tonic-gate BT_SET(dp->disp_qactmap, tpri); 13977c478bd9Sstevel@tonic-gate if (tpri > dp->disp_maxrunpri) { 13987c478bd9Sstevel@tonic-gate dp->disp_maxrunpri = tpri; 13997c478bd9Sstevel@tonic-gate membar_enter(); 14007c478bd9Sstevel@tonic-gate cpu_resched(cp, tpri); 14017c478bd9Sstevel@tonic-gate } 14027c478bd9Sstevel@tonic-gate } 14037c478bd9Sstevel@tonic-gate 14047c478bd9Sstevel@tonic-gate if (!bound && tpri > dp->disp_max_unbound_pri) { 14057c478bd9Sstevel@tonic-gate if (tp == curthread && dp->disp_max_unbound_pri == -1 && 14067c478bd9Sstevel@tonic-gate cp == CPU) { 14077c478bd9Sstevel@tonic-gate /* 14087c478bd9Sstevel@tonic-gate * If there are no other unbound threads on the 14097c478bd9Sstevel@tonic-gate * run queue, don't allow other CPUs to steal 14107c478bd9Sstevel@tonic-gate * this thread while we are in the middle of a 14117c478bd9Sstevel@tonic-gate * context switch. We may just switch to it 14127c478bd9Sstevel@tonic-gate * again right away. CPU_DISP_DONTSTEAL is cleared 14137c478bd9Sstevel@tonic-gate * in swtch and swtch_to. 14147c478bd9Sstevel@tonic-gate */ 14157c478bd9Sstevel@tonic-gate cp->cpu_disp_flags |= CPU_DISP_DONTSTEAL; 14167c478bd9Sstevel@tonic-gate } 14177c478bd9Sstevel@tonic-gate dp->disp_max_unbound_pri = tpri; 14187c478bd9Sstevel@tonic-gate } 14197c478bd9Sstevel@tonic-gate (*disp_enq_thread)(cp, bound); 14207c478bd9Sstevel@tonic-gate } 14217c478bd9Sstevel@tonic-gate 14227c478bd9Sstevel@tonic-gate /* 14237c478bd9Sstevel@tonic-gate * Put a high-priority unbound thread on the kp queue 14247c478bd9Sstevel@tonic-gate */ 14257c478bd9Sstevel@tonic-gate static void 14267c478bd9Sstevel@tonic-gate setkpdq(kthread_t *tp, int borf) 14277c478bd9Sstevel@tonic-gate { 14287c478bd9Sstevel@tonic-gate dispq_t *dq; 14297c478bd9Sstevel@tonic-gate disp_t *dp; 14307c478bd9Sstevel@tonic-gate cpu_t *cp; 14317c478bd9Sstevel@tonic-gate pri_t tpri; 14327c478bd9Sstevel@tonic-gate 14337c478bd9Sstevel@tonic-gate tpri = DISP_PRIO(tp); 14347c478bd9Sstevel@tonic-gate 14357c478bd9Sstevel@tonic-gate dp = &tp->t_cpupart->cp_kp_queue; 14367c478bd9Sstevel@tonic-gate disp_lock_enter_high(&dp->disp_lock); 14377c478bd9Sstevel@tonic-gate 14387c478bd9Sstevel@tonic-gate TRACE_2(TR_FAC_DISP, TR_FRONTQ, "frontq:pri %d tid %p", tpri, tp); 14397c478bd9Sstevel@tonic-gate 14407c478bd9Sstevel@tonic-gate ASSERT(tpri >= 0 && tpri < dp->disp_npri); 14417c478bd9Sstevel@tonic-gate DTRACE_SCHED3(enqueue, kthread_t *, tp, disp_t *, dp, int, borf); 14427c478bd9Sstevel@tonic-gate THREAD_RUN(tp, &dp->disp_lock); /* set t_state to TS_RUN */ 14437c478bd9Sstevel@tonic-gate tp->t_disp_queue = dp; 14447c478bd9Sstevel@tonic-gate dp->disp_nrunnable++; 14457c478bd9Sstevel@tonic-gate dq = &dp->disp_q[tpri]; 14467c478bd9Sstevel@tonic-gate 14477c478bd9Sstevel@tonic-gate if (dq->dq_sruncnt++ != 0) { 14487c478bd9Sstevel@tonic-gate if (borf == SETKP_BACK) { 14497c478bd9Sstevel@tonic-gate ASSERT(dq->dq_first != NULL); 14507c478bd9Sstevel@tonic-gate tp->t_link = NULL; 14517c478bd9Sstevel@tonic-gate dq->dq_last->t_link = tp; 14527c478bd9Sstevel@tonic-gate dq->dq_last = tp; 14537c478bd9Sstevel@tonic-gate } else { 14547c478bd9Sstevel@tonic-gate ASSERT(dq->dq_last != NULL); 14557c478bd9Sstevel@tonic-gate tp->t_link = dq->dq_first; 14567c478bd9Sstevel@tonic-gate dq->dq_first = tp; 14577c478bd9Sstevel@tonic-gate } 14587c478bd9Sstevel@tonic-gate } else { 14597c478bd9Sstevel@tonic-gate if (borf == SETKP_BACK) { 14607c478bd9Sstevel@tonic-gate ASSERT(dq->dq_first == NULL); 14617c478bd9Sstevel@tonic-gate ASSERT(dq->dq_last == NULL); 14627c478bd9Sstevel@tonic-gate dq->dq_first = dq->dq_last = tp; 14637c478bd9Sstevel@tonic-gate } else { 14647c478bd9Sstevel@tonic-gate ASSERT(dq->dq_last == NULL); 14657c478bd9Sstevel@tonic-gate ASSERT(dq->dq_first == NULL); 14667c478bd9Sstevel@tonic-gate tp->t_link = NULL; 14677c478bd9Sstevel@tonic-gate dq->dq_first = dq->dq_last = tp; 14687c478bd9Sstevel@tonic-gate } 14697c478bd9Sstevel@tonic-gate BT_SET(dp->disp_qactmap, tpri); 14707c478bd9Sstevel@tonic-gate if (tpri > dp->disp_max_unbound_pri) 14717c478bd9Sstevel@tonic-gate dp->disp_max_unbound_pri = tpri; 14727c478bd9Sstevel@tonic-gate if (tpri > dp->disp_maxrunpri) { 14737c478bd9Sstevel@tonic-gate dp->disp_maxrunpri = tpri; 14747c478bd9Sstevel@tonic-gate membar_enter(); 14757c478bd9Sstevel@tonic-gate } 14767c478bd9Sstevel@tonic-gate } 14777c478bd9Sstevel@tonic-gate 14787c478bd9Sstevel@tonic-gate cp = tp->t_cpu; 14797c478bd9Sstevel@tonic-gate if (tp->t_cpupart != cp->cpu_part) { 14807c478bd9Sstevel@tonic-gate /* migrate to a cpu in the new partition */ 14817c478bd9Sstevel@tonic-gate cp = tp->t_cpupart->cp_cpulist; 14827c478bd9Sstevel@tonic-gate } 14837c478bd9Sstevel@tonic-gate cp = disp_lowpri_cpu(cp, tp->t_lpl, tp->t_pri, NULL); 14847c478bd9Sstevel@tonic-gate disp_lock_enter_high(&cp->cpu_disp->disp_lock); 14857c478bd9Sstevel@tonic-gate ASSERT((cp->cpu_flags & CPU_QUIESCED) == 0); 14867c478bd9Sstevel@tonic-gate 14877c478bd9Sstevel@tonic-gate #ifndef NPROBE 14887c478bd9Sstevel@tonic-gate /* Kernel probe */ 14897c478bd9Sstevel@tonic-gate if (tnf_tracing_active) 14907c478bd9Sstevel@tonic-gate tnf_thread_queue(tp, cp, tpri); 14917c478bd9Sstevel@tonic-gate #endif /* NPROBE */ 14927c478bd9Sstevel@tonic-gate 14937c478bd9Sstevel@tonic-gate if (cp->cpu_chosen_level < tpri) 14947c478bd9Sstevel@tonic-gate cp->cpu_chosen_level = tpri; 14957c478bd9Sstevel@tonic-gate cpu_resched(cp, tpri); 14967c478bd9Sstevel@tonic-gate disp_lock_exit_high(&cp->cpu_disp->disp_lock); 14977c478bd9Sstevel@tonic-gate (*disp_enq_thread)(cp, 0); 14987c478bd9Sstevel@tonic-gate } 14997c478bd9Sstevel@tonic-gate 15007c478bd9Sstevel@tonic-gate /* 15017c478bd9Sstevel@tonic-gate * Remove a thread from the dispatcher queue if it is on it. 15027c478bd9Sstevel@tonic-gate * It is not an error if it is not found but we return whether 15037c478bd9Sstevel@tonic-gate * or not it was found in case the caller wants to check. 15047c478bd9Sstevel@tonic-gate */ 15057c478bd9Sstevel@tonic-gate int 15067c478bd9Sstevel@tonic-gate dispdeq(kthread_t *tp) 15077c478bd9Sstevel@tonic-gate { 15087c478bd9Sstevel@tonic-gate disp_t *dp; 15097c478bd9Sstevel@tonic-gate dispq_t *dq; 15107c478bd9Sstevel@tonic-gate kthread_t *rp; 15117c478bd9Sstevel@tonic-gate kthread_t *trp; 15127c478bd9Sstevel@tonic-gate kthread_t **ptp; 15137c478bd9Sstevel@tonic-gate int tpri; 15147c478bd9Sstevel@tonic-gate 15157c478bd9Sstevel@tonic-gate ASSERT(THREAD_LOCK_HELD(tp)); 15167c478bd9Sstevel@tonic-gate 15177c478bd9Sstevel@tonic-gate if (tp->t_state != TS_RUN) 15187c478bd9Sstevel@tonic-gate return (0); 15197c478bd9Sstevel@tonic-gate 15207c478bd9Sstevel@tonic-gate /* 15217c478bd9Sstevel@tonic-gate * The thread is "swapped" or is on the swap queue and 15227c478bd9Sstevel@tonic-gate * hence no longer on the run queue, so return true. 15237c478bd9Sstevel@tonic-gate */ 15247c478bd9Sstevel@tonic-gate if ((tp->t_schedflag & (TS_LOAD | TS_ON_SWAPQ)) != TS_LOAD) 15257c478bd9Sstevel@tonic-gate return (1); 15267c478bd9Sstevel@tonic-gate 15277c478bd9Sstevel@tonic-gate tpri = DISP_PRIO(tp); 15287c478bd9Sstevel@tonic-gate dp = tp->t_disp_queue; 15297c478bd9Sstevel@tonic-gate ASSERT(tpri < dp->disp_npri); 15307c478bd9Sstevel@tonic-gate dq = &dp->disp_q[tpri]; 15317c478bd9Sstevel@tonic-gate ptp = &dq->dq_first; 15327c478bd9Sstevel@tonic-gate rp = *ptp; 15337c478bd9Sstevel@tonic-gate trp = NULL; 15347c478bd9Sstevel@tonic-gate 15357c478bd9Sstevel@tonic-gate ASSERT(dq->dq_last == NULL || dq->dq_last->t_link == NULL); 15367c478bd9Sstevel@tonic-gate 15377c478bd9Sstevel@tonic-gate /* 15387c478bd9Sstevel@tonic-gate * Search for thread in queue. 15397c478bd9Sstevel@tonic-gate * Double links would simplify this at the expense of disp/setrun. 15407c478bd9Sstevel@tonic-gate */ 15417c478bd9Sstevel@tonic-gate while (rp != tp && rp != NULL) { 15427c478bd9Sstevel@tonic-gate trp = rp; 15437c478bd9Sstevel@tonic-gate ptp = &trp->t_link; 15447c478bd9Sstevel@tonic-gate rp = trp->t_link; 15457c478bd9Sstevel@tonic-gate } 15467c478bd9Sstevel@tonic-gate 15477c478bd9Sstevel@tonic-gate if (rp == NULL) { 15487c478bd9Sstevel@tonic-gate panic("dispdeq: thread not on queue"); 15497c478bd9Sstevel@tonic-gate } 15507c478bd9Sstevel@tonic-gate 15517c478bd9Sstevel@tonic-gate DTRACE_SCHED2(dequeue, kthread_t *, tp, disp_t *, dp); 15527c478bd9Sstevel@tonic-gate 15537c478bd9Sstevel@tonic-gate /* 15547c478bd9Sstevel@tonic-gate * Found it so remove it from queue. 15557c478bd9Sstevel@tonic-gate */ 15567c478bd9Sstevel@tonic-gate if ((*ptp = rp->t_link) == NULL) 15577c478bd9Sstevel@tonic-gate dq->dq_last = trp; 15587c478bd9Sstevel@tonic-gate 15597c478bd9Sstevel@tonic-gate dp->disp_nrunnable--; 15607c478bd9Sstevel@tonic-gate if (--dq->dq_sruncnt == 0) { 15617c478bd9Sstevel@tonic-gate dp->disp_qactmap[tpri >> BT_ULSHIFT] &= ~BT_BIW(tpri); 15627c478bd9Sstevel@tonic-gate if (dp->disp_nrunnable == 0) { 15637c478bd9Sstevel@tonic-gate dp->disp_max_unbound_pri = -1; 15647c478bd9Sstevel@tonic-gate dp->disp_maxrunpri = -1; 15657c478bd9Sstevel@tonic-gate } else if (tpri == dp->disp_maxrunpri) { 15667c478bd9Sstevel@tonic-gate int ipri; 15677c478bd9Sstevel@tonic-gate 15687c478bd9Sstevel@tonic-gate ipri = bt_gethighbit(dp->disp_qactmap, 15697c478bd9Sstevel@tonic-gate dp->disp_maxrunpri >> BT_ULSHIFT); 15707c478bd9Sstevel@tonic-gate if (ipri < dp->disp_max_unbound_pri) 15717c478bd9Sstevel@tonic-gate dp->disp_max_unbound_pri = ipri; 15727c478bd9Sstevel@tonic-gate dp->disp_maxrunpri = ipri; 15737c478bd9Sstevel@tonic-gate } 15747c478bd9Sstevel@tonic-gate } 15757c478bd9Sstevel@tonic-gate tp->t_link = NULL; 15767c478bd9Sstevel@tonic-gate THREAD_TRANSITION(tp); /* put in intermediate state */ 15777c478bd9Sstevel@tonic-gate return (1); 15787c478bd9Sstevel@tonic-gate } 15797c478bd9Sstevel@tonic-gate 15807c478bd9Sstevel@tonic-gate 15817c478bd9Sstevel@tonic-gate /* 15827c478bd9Sstevel@tonic-gate * dq_sruninc and dq_srundec are public functions for 15837c478bd9Sstevel@tonic-gate * incrementing/decrementing the sruncnts when a thread on 15847c478bd9Sstevel@tonic-gate * a dispatcher queue is made schedulable/unschedulable by 15857c478bd9Sstevel@tonic-gate * resetting the TS_LOAD flag. 15867c478bd9Sstevel@tonic-gate * 15877c478bd9Sstevel@tonic-gate * The caller MUST have the thread lock and therefore the dispatcher 15887c478bd9Sstevel@tonic-gate * queue lock so that the operation which changes 15897c478bd9Sstevel@tonic-gate * the flag, the operation that checks the status of the thread to 15907c478bd9Sstevel@tonic-gate * determine if it's on a disp queue AND the call to this function 15917c478bd9Sstevel@tonic-gate * are one atomic operation with respect to interrupts. 15927c478bd9Sstevel@tonic-gate */ 15937c478bd9Sstevel@tonic-gate 15947c478bd9Sstevel@tonic-gate /* 15957c478bd9Sstevel@tonic-gate * Called by sched AFTER TS_LOAD flag is set on a swapped, runnable thread. 15967c478bd9Sstevel@tonic-gate */ 15977c478bd9Sstevel@tonic-gate void 15987c478bd9Sstevel@tonic-gate dq_sruninc(kthread_t *t) 15997c478bd9Sstevel@tonic-gate { 16007c478bd9Sstevel@tonic-gate ASSERT(t->t_state == TS_RUN); 16017c478bd9Sstevel@tonic-gate ASSERT(t->t_schedflag & TS_LOAD); 16027c478bd9Sstevel@tonic-gate 16037c478bd9Sstevel@tonic-gate THREAD_TRANSITION(t); 16047c478bd9Sstevel@tonic-gate setfrontdq(t); 16057c478bd9Sstevel@tonic-gate } 16067c478bd9Sstevel@tonic-gate 16077c478bd9Sstevel@tonic-gate /* 16087c478bd9Sstevel@tonic-gate * See comment on calling conventions above. 16097c478bd9Sstevel@tonic-gate * Called by sched BEFORE TS_LOAD flag is cleared on a runnable thread. 16107c478bd9Sstevel@tonic-gate */ 16117c478bd9Sstevel@tonic-gate void 16127c478bd9Sstevel@tonic-gate dq_srundec(kthread_t *t) 16137c478bd9Sstevel@tonic-gate { 16147c478bd9Sstevel@tonic-gate ASSERT(t->t_schedflag & TS_LOAD); 16157c478bd9Sstevel@tonic-gate 16167c478bd9Sstevel@tonic-gate (void) dispdeq(t); 16177c478bd9Sstevel@tonic-gate disp_swapped_enq(t); 16187c478bd9Sstevel@tonic-gate } 16197c478bd9Sstevel@tonic-gate 16207c478bd9Sstevel@tonic-gate /* 16217c478bd9Sstevel@tonic-gate * Change the dispatcher lock of thread to the "swapped_lock" 16227c478bd9Sstevel@tonic-gate * and return with thread lock still held. 16237c478bd9Sstevel@tonic-gate * 16247c478bd9Sstevel@tonic-gate * Called with thread_lock held, in transition state, and at high spl. 16257c478bd9Sstevel@tonic-gate */ 16267c478bd9Sstevel@tonic-gate void 16277c478bd9Sstevel@tonic-gate disp_swapped_enq(kthread_t *tp) 16287c478bd9Sstevel@tonic-gate { 16297c478bd9Sstevel@tonic-gate ASSERT(THREAD_LOCK_HELD(tp)); 16307c478bd9Sstevel@tonic-gate ASSERT(tp->t_schedflag & TS_LOAD); 16317c478bd9Sstevel@tonic-gate 16327c478bd9Sstevel@tonic-gate switch (tp->t_state) { 16337c478bd9Sstevel@tonic-gate case TS_RUN: 16347c478bd9Sstevel@tonic-gate disp_lock_enter_high(&swapped_lock); 16357c478bd9Sstevel@tonic-gate THREAD_SWAP(tp, &swapped_lock); /* set TS_RUN state and lock */ 16367c478bd9Sstevel@tonic-gate break; 16377c478bd9Sstevel@tonic-gate case TS_ONPROC: 16387c478bd9Sstevel@tonic-gate disp_lock_enter_high(&swapped_lock); 16397c478bd9Sstevel@tonic-gate THREAD_TRANSITION(tp); 16407c478bd9Sstevel@tonic-gate wake_sched_sec = 1; /* tell clock to wake sched */ 16417c478bd9Sstevel@tonic-gate THREAD_SWAP(tp, &swapped_lock); /* set TS_RUN state and lock */ 16427c478bd9Sstevel@tonic-gate break; 16437c478bd9Sstevel@tonic-gate default: 16447c478bd9Sstevel@tonic-gate panic("disp_swapped: tp: %p bad t_state", (void *)tp); 16457c478bd9Sstevel@tonic-gate } 16467c478bd9Sstevel@tonic-gate } 16477c478bd9Sstevel@tonic-gate 16487c478bd9Sstevel@tonic-gate /* 16497c478bd9Sstevel@tonic-gate * This routine is called by setbackdq/setfrontdq if the thread is 16507c478bd9Sstevel@tonic-gate * not loaded or loaded and on the swap queue. 16517c478bd9Sstevel@tonic-gate * 16527c478bd9Sstevel@tonic-gate * Thread state TS_SLEEP implies that a swapped thread 16537c478bd9Sstevel@tonic-gate * has been woken up and needs to be swapped in by the swapper. 16547c478bd9Sstevel@tonic-gate * 16557c478bd9Sstevel@tonic-gate * Thread state TS_RUN, it implies that the priority of a swapped 16567c478bd9Sstevel@tonic-gate * thread is being increased by scheduling class (e.g. ts_update). 16577c478bd9Sstevel@tonic-gate */ 16587c478bd9Sstevel@tonic-gate static void 16597c478bd9Sstevel@tonic-gate disp_swapped_setrun(kthread_t *tp) 16607c478bd9Sstevel@tonic-gate { 16617c478bd9Sstevel@tonic-gate ASSERT(THREAD_LOCK_HELD(tp)); 16627c478bd9Sstevel@tonic-gate ASSERT((tp->t_schedflag & (TS_LOAD | TS_ON_SWAPQ)) != TS_LOAD); 16637c478bd9Sstevel@tonic-gate 16647c478bd9Sstevel@tonic-gate switch (tp->t_state) { 16657c478bd9Sstevel@tonic-gate case TS_SLEEP: 16667c478bd9Sstevel@tonic-gate disp_lock_enter_high(&swapped_lock); 16677c478bd9Sstevel@tonic-gate /* 16687c478bd9Sstevel@tonic-gate * Wakeup sched immediately (i.e., next tick) if the 16697c478bd9Sstevel@tonic-gate * thread priority is above maxclsyspri. 16707c478bd9Sstevel@tonic-gate */ 16717c478bd9Sstevel@tonic-gate if (DISP_PRIO(tp) > maxclsyspri) 16727c478bd9Sstevel@tonic-gate wake_sched = 1; 16737c478bd9Sstevel@tonic-gate else 16747c478bd9Sstevel@tonic-gate wake_sched_sec = 1; 16757c478bd9Sstevel@tonic-gate THREAD_RUN(tp, &swapped_lock); /* set TS_RUN state and lock */ 16767c478bd9Sstevel@tonic-gate break; 16777c478bd9Sstevel@tonic-gate case TS_RUN: /* called from ts_update */ 16787c478bd9Sstevel@tonic-gate break; 16797c478bd9Sstevel@tonic-gate default: 16807c478bd9Sstevel@tonic-gate panic("disp_swapped_setrun: tp: %p bad t_state", tp); 16817c478bd9Sstevel@tonic-gate } 16827c478bd9Sstevel@tonic-gate } 16837c478bd9Sstevel@tonic-gate 16847c478bd9Sstevel@tonic-gate 16857c478bd9Sstevel@tonic-gate /* 16867c478bd9Sstevel@tonic-gate * Make a thread give up its processor. Find the processor on 16877c478bd9Sstevel@tonic-gate * which this thread is executing, and have that processor 16887c478bd9Sstevel@tonic-gate * preempt. 16897c478bd9Sstevel@tonic-gate */ 16907c478bd9Sstevel@tonic-gate void 16917c478bd9Sstevel@tonic-gate cpu_surrender(kthread_t *tp) 16927c478bd9Sstevel@tonic-gate { 16937c478bd9Sstevel@tonic-gate cpu_t *cpup; 16947c478bd9Sstevel@tonic-gate int max_pri; 16957c478bd9Sstevel@tonic-gate int max_run_pri; 16967c478bd9Sstevel@tonic-gate klwp_t *lwp; 16977c478bd9Sstevel@tonic-gate 16987c478bd9Sstevel@tonic-gate ASSERT(THREAD_LOCK_HELD(tp)); 16997c478bd9Sstevel@tonic-gate 17007c478bd9Sstevel@tonic-gate if (tp->t_state != TS_ONPROC) 17017c478bd9Sstevel@tonic-gate return; 17027c478bd9Sstevel@tonic-gate cpup = tp->t_disp_queue->disp_cpu; /* CPU thread dispatched to */ 17037c478bd9Sstevel@tonic-gate max_pri = cpup->cpu_disp->disp_maxrunpri; /* best pri of that CPU */ 17047c478bd9Sstevel@tonic-gate max_run_pri = CP_MAXRUNPRI(cpup->cpu_part); 17057c478bd9Sstevel@tonic-gate if (max_pri < max_run_pri) 17067c478bd9Sstevel@tonic-gate max_pri = max_run_pri; 17077c478bd9Sstevel@tonic-gate 17087c478bd9Sstevel@tonic-gate cpup->cpu_runrun = 1; 17097c478bd9Sstevel@tonic-gate if (max_pri >= kpreemptpri && cpup->cpu_kprunrun == 0) { 17107c478bd9Sstevel@tonic-gate cpup->cpu_kprunrun = 1; 17117c478bd9Sstevel@tonic-gate } 17127c478bd9Sstevel@tonic-gate 17137c478bd9Sstevel@tonic-gate /* 17147c478bd9Sstevel@tonic-gate * Propagate cpu_runrun, and cpu_kprunrun to global visibility. 17157c478bd9Sstevel@tonic-gate */ 17167c478bd9Sstevel@tonic-gate membar_enter(); 17177c478bd9Sstevel@tonic-gate 17187c478bd9Sstevel@tonic-gate DTRACE_SCHED1(surrender, kthread_t *, tp); 17197c478bd9Sstevel@tonic-gate 17207c478bd9Sstevel@tonic-gate /* 17217c478bd9Sstevel@tonic-gate * Make the target thread take an excursion through trap() 17227c478bd9Sstevel@tonic-gate * to do preempt() (unless we're already in trap or post_syscall, 17237c478bd9Sstevel@tonic-gate * calling cpu_surrender via CL_TRAPRET). 17247c478bd9Sstevel@tonic-gate */ 17257c478bd9Sstevel@tonic-gate if (tp != curthread || (lwp = tp->t_lwp) == NULL || 17267c478bd9Sstevel@tonic-gate lwp->lwp_state != LWP_USER) { 17277c478bd9Sstevel@tonic-gate aston(tp); 17287c478bd9Sstevel@tonic-gate if (cpup != CPU) 17297c478bd9Sstevel@tonic-gate poke_cpu(cpup->cpu_id); 17307c478bd9Sstevel@tonic-gate } 17317c478bd9Sstevel@tonic-gate TRACE_2(TR_FAC_DISP, TR_CPU_SURRENDER, 17327c478bd9Sstevel@tonic-gate "cpu_surrender:tid %p cpu %p", tp, cpup); 17337c478bd9Sstevel@tonic-gate } 17347c478bd9Sstevel@tonic-gate 17357c478bd9Sstevel@tonic-gate 17367c478bd9Sstevel@tonic-gate /* 17377c478bd9Sstevel@tonic-gate * Commit to and ratify a scheduling decision 17387c478bd9Sstevel@tonic-gate */ 17397c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 17407c478bd9Sstevel@tonic-gate static kthread_t * 17417c478bd9Sstevel@tonic-gate disp_ratify(kthread_t *tp, disp_t *kpq) 17427c478bd9Sstevel@tonic-gate { 17437c478bd9Sstevel@tonic-gate pri_t tpri, maxpri; 17447c478bd9Sstevel@tonic-gate pri_t maxkpri; 17457c478bd9Sstevel@tonic-gate cpu_t *cpup; 17467c478bd9Sstevel@tonic-gate 17477c478bd9Sstevel@tonic-gate ASSERT(tp != NULL); 17487c478bd9Sstevel@tonic-gate /* 17497c478bd9Sstevel@tonic-gate * Commit to, then ratify scheduling decision 17507c478bd9Sstevel@tonic-gate */ 17517c478bd9Sstevel@tonic-gate cpup = CPU; 17527c478bd9Sstevel@tonic-gate if (cpup->cpu_runrun != 0) 17537c478bd9Sstevel@tonic-gate cpup->cpu_runrun = 0; 17547c478bd9Sstevel@tonic-gate if (cpup->cpu_kprunrun != 0) 17557c478bd9Sstevel@tonic-gate cpup->cpu_kprunrun = 0; 17567c478bd9Sstevel@tonic-gate if (cpup->cpu_chosen_level != -1) 17577c478bd9Sstevel@tonic-gate cpup->cpu_chosen_level = -1; 17587c478bd9Sstevel@tonic-gate membar_enter(); 17597c478bd9Sstevel@tonic-gate tpri = DISP_PRIO(tp); 17607c478bd9Sstevel@tonic-gate maxpri = cpup->cpu_disp->disp_maxrunpri; 17617c478bd9Sstevel@tonic-gate maxkpri = kpq->disp_maxrunpri; 17627c478bd9Sstevel@tonic-gate if (maxpri < maxkpri) 17637c478bd9Sstevel@tonic-gate maxpri = maxkpri; 17647c478bd9Sstevel@tonic-gate if (tpri < maxpri) { 17657c478bd9Sstevel@tonic-gate /* 17667c478bd9Sstevel@tonic-gate * should have done better 17677c478bd9Sstevel@tonic-gate * put this one back and indicate to try again 17687c478bd9Sstevel@tonic-gate */ 17697c478bd9Sstevel@tonic-gate cpup->cpu_dispthread = curthread; /* fixup dispthread */ 17707c478bd9Sstevel@tonic-gate cpup->cpu_dispatch_pri = DISP_PRIO(curthread); 17717c478bd9Sstevel@tonic-gate thread_lock_high(tp); 17727c478bd9Sstevel@tonic-gate THREAD_TRANSITION(tp); 17737c478bd9Sstevel@tonic-gate setfrontdq(tp); 17747c478bd9Sstevel@tonic-gate thread_unlock_nopreempt(tp); 17757c478bd9Sstevel@tonic-gate 17767c478bd9Sstevel@tonic-gate tp = NULL; 17777c478bd9Sstevel@tonic-gate } 17787c478bd9Sstevel@tonic-gate return (tp); 17797c478bd9Sstevel@tonic-gate } 17807c478bd9Sstevel@tonic-gate 17817c478bd9Sstevel@tonic-gate /* 17827c478bd9Sstevel@tonic-gate * See if there is any work on the dispatcher queue for other CPUs. 17837c478bd9Sstevel@tonic-gate * If there is, dequeue the best thread and return. 17847c478bd9Sstevel@tonic-gate */ 17857c478bd9Sstevel@tonic-gate static kthread_t * 17867c478bd9Sstevel@tonic-gate disp_getwork(cpu_t *cp) 17877c478bd9Sstevel@tonic-gate { 17887c478bd9Sstevel@tonic-gate cpu_t *ocp; /* other CPU */ 17897c478bd9Sstevel@tonic-gate cpu_t *ocp_start; 17907c478bd9Sstevel@tonic-gate cpu_t *tcp; /* target local CPU */ 17917c478bd9Sstevel@tonic-gate kthread_t *tp; 17927c478bd9Sstevel@tonic-gate pri_t maxpri; 17937c478bd9Sstevel@tonic-gate int s; 17947c478bd9Sstevel@tonic-gate disp_t *kpq; /* kp queue for this partition */ 17957c478bd9Sstevel@tonic-gate lpl_t *lpl, *lpl_leaf; 17967c478bd9Sstevel@tonic-gate int hint, leafidx; 17977c478bd9Sstevel@tonic-gate 17987c478bd9Sstevel@tonic-gate maxpri = -1; 17997c478bd9Sstevel@tonic-gate tcp = NULL; 18007c478bd9Sstevel@tonic-gate 18017c478bd9Sstevel@tonic-gate kpq = &cp->cpu_part->cp_kp_queue; 18027c478bd9Sstevel@tonic-gate while (kpq->disp_maxrunpri >= 0) { 18037c478bd9Sstevel@tonic-gate /* 18047c478bd9Sstevel@tonic-gate * Try to take a thread from the kp_queue. 18057c478bd9Sstevel@tonic-gate */ 18067c478bd9Sstevel@tonic-gate tp = (disp_getbest(kpq)); 18077c478bd9Sstevel@tonic-gate if (tp) 18087c478bd9Sstevel@tonic-gate return (disp_ratify(tp, kpq)); 18097c478bd9Sstevel@tonic-gate } 18107c478bd9Sstevel@tonic-gate 18117c478bd9Sstevel@tonic-gate s = splhigh(); /* protect the cpu_active list */ 18127c478bd9Sstevel@tonic-gate 18137c478bd9Sstevel@tonic-gate /* 18147c478bd9Sstevel@tonic-gate * Try to find something to do on another CPU's run queue. 18157c478bd9Sstevel@tonic-gate * Loop through all other CPUs looking for the one with the highest 18167c478bd9Sstevel@tonic-gate * priority unbound thread. 18177c478bd9Sstevel@tonic-gate * 18187c478bd9Sstevel@tonic-gate * On NUMA machines, the partition's CPUs are consulted in order of 18197c478bd9Sstevel@tonic-gate * distance from the current CPU. This way, the first available 18207c478bd9Sstevel@tonic-gate * work found is also the closest, and will suffer the least 18217c478bd9Sstevel@tonic-gate * from being migrated. 18227c478bd9Sstevel@tonic-gate */ 18237c478bd9Sstevel@tonic-gate lpl = lpl_leaf = cp->cpu_lpl; 18247c478bd9Sstevel@tonic-gate hint = leafidx = 0; 18257c478bd9Sstevel@tonic-gate 18267c478bd9Sstevel@tonic-gate /* 18277c478bd9Sstevel@tonic-gate * This loop traverses the lpl hierarchy. Higher level lpls represent 18287c478bd9Sstevel@tonic-gate * broader levels of locality 18297c478bd9Sstevel@tonic-gate */ 18307c478bd9Sstevel@tonic-gate do { 18317c478bd9Sstevel@tonic-gate /* This loop iterates over the lpl's leaves */ 18327c478bd9Sstevel@tonic-gate do { 18337c478bd9Sstevel@tonic-gate if (lpl_leaf != cp->cpu_lpl) 18347c478bd9Sstevel@tonic-gate ocp = lpl_leaf->lpl_cpus; 18357c478bd9Sstevel@tonic-gate else 18367c478bd9Sstevel@tonic-gate ocp = cp->cpu_next_lpl; 18377c478bd9Sstevel@tonic-gate 18387c478bd9Sstevel@tonic-gate /* This loop iterates over the CPUs in the leaf */ 18397c478bd9Sstevel@tonic-gate ocp_start = ocp; 18407c478bd9Sstevel@tonic-gate do { 18417c478bd9Sstevel@tonic-gate pri_t pri; 18427c478bd9Sstevel@tonic-gate 18437c478bd9Sstevel@tonic-gate ASSERT(CPU_ACTIVE(ocp)); 18447c478bd9Sstevel@tonic-gate 18457c478bd9Sstevel@tonic-gate /* 18467c478bd9Sstevel@tonic-gate * End our stroll around the partition if: 18477c478bd9Sstevel@tonic-gate * 18487c478bd9Sstevel@tonic-gate * - Something became runnable on the local 18497c478bd9Sstevel@tonic-gate * queue 18507c478bd9Sstevel@tonic-gate * 18517c478bd9Sstevel@tonic-gate * - We're at the broadest level of locality and 18527c478bd9Sstevel@tonic-gate * we happen across another idle CPU. At the 18537c478bd9Sstevel@tonic-gate * highest level of locality, all CPUs will 18547c478bd9Sstevel@tonic-gate * walk the partition's CPUs in the same 18557c478bd9Sstevel@tonic-gate * order, so we can end our stroll taking 18567c478bd9Sstevel@tonic-gate * comfort in knowing the other idle CPU is 18577c478bd9Sstevel@tonic-gate * already covering the next portion of the 18587c478bd9Sstevel@tonic-gate * list. 18597c478bd9Sstevel@tonic-gate */ 18607c478bd9Sstevel@tonic-gate if (cp->cpu_disp->disp_nrunnable != 0) 18617c478bd9Sstevel@tonic-gate break; 18627c478bd9Sstevel@tonic-gate if (ocp->cpu_dispatch_pri == -1) { 18637c478bd9Sstevel@tonic-gate if (ocp->cpu_disp_flags & 18647c478bd9Sstevel@tonic-gate CPU_DISP_HALTED) 18657c478bd9Sstevel@tonic-gate continue; 18667c478bd9Sstevel@tonic-gate else if (lpl->lpl_parent == NULL) 18677c478bd9Sstevel@tonic-gate break; 18687c478bd9Sstevel@tonic-gate } 18697c478bd9Sstevel@tonic-gate 18707c478bd9Sstevel@tonic-gate /* 18717c478bd9Sstevel@tonic-gate * If there's only one thread and the CPU 18727c478bd9Sstevel@tonic-gate * is in the middle of a context switch, 18737c478bd9Sstevel@tonic-gate * or it's currently running the idle thread, 18747c478bd9Sstevel@tonic-gate * don't steal it. 18757c478bd9Sstevel@tonic-gate */ 18767c478bd9Sstevel@tonic-gate if ((ocp->cpu_disp_flags & 18777c478bd9Sstevel@tonic-gate CPU_DISP_DONTSTEAL) && 18787c478bd9Sstevel@tonic-gate ocp->cpu_disp->disp_nrunnable == 1) 18797c478bd9Sstevel@tonic-gate continue; 18807c478bd9Sstevel@tonic-gate 18817c478bd9Sstevel@tonic-gate pri = ocp->cpu_disp->disp_max_unbound_pri; 18827c478bd9Sstevel@tonic-gate if (pri > maxpri) { 18837c478bd9Sstevel@tonic-gate maxpri = pri; 18847c478bd9Sstevel@tonic-gate tcp = ocp; 18857c478bd9Sstevel@tonic-gate } 18867c478bd9Sstevel@tonic-gate } while ((ocp = ocp->cpu_next_lpl) != ocp_start); 18877c478bd9Sstevel@tonic-gate 18887c478bd9Sstevel@tonic-gate if ((lpl_leaf = lpl->lpl_rset[++leafidx]) == NULL) { 18897c478bd9Sstevel@tonic-gate leafidx = 0; 18907c478bd9Sstevel@tonic-gate lpl_leaf = lpl->lpl_rset[leafidx]; 18917c478bd9Sstevel@tonic-gate } 18927c478bd9Sstevel@tonic-gate } while (leafidx != hint); 18937c478bd9Sstevel@tonic-gate 18947c478bd9Sstevel@tonic-gate hint = leafidx = lpl->lpl_hint; 18957c478bd9Sstevel@tonic-gate if ((lpl = lpl->lpl_parent) != NULL) 18967c478bd9Sstevel@tonic-gate lpl_leaf = lpl->lpl_rset[hint]; 18977c478bd9Sstevel@tonic-gate } while (!tcp && lpl); 18987c478bd9Sstevel@tonic-gate 18997c478bd9Sstevel@tonic-gate splx(s); 19007c478bd9Sstevel@tonic-gate 19017c478bd9Sstevel@tonic-gate /* 19027c478bd9Sstevel@tonic-gate * If another queue looks good, and there is still nothing on 19037c478bd9Sstevel@tonic-gate * the local queue, try to transfer one or more threads 19047c478bd9Sstevel@tonic-gate * from it to our queue. 19057c478bd9Sstevel@tonic-gate */ 19067c478bd9Sstevel@tonic-gate if (tcp && cp->cpu_disp->disp_nrunnable == 0) { 19077c478bd9Sstevel@tonic-gate tp = (disp_getbest(tcp->cpu_disp)); 19087c478bd9Sstevel@tonic-gate if (tp) 19097c478bd9Sstevel@tonic-gate return (disp_ratify(tp, kpq)); 19107c478bd9Sstevel@tonic-gate } 19117c478bd9Sstevel@tonic-gate return (NULL); 19127c478bd9Sstevel@tonic-gate } 19137c478bd9Sstevel@tonic-gate 19147c478bd9Sstevel@tonic-gate 19157c478bd9Sstevel@tonic-gate /* 19167c478bd9Sstevel@tonic-gate * disp_fix_unbound_pri() 19177c478bd9Sstevel@tonic-gate * Determines the maximum priority of unbound threads on the queue. 19187c478bd9Sstevel@tonic-gate * The priority is kept for the queue, but is only increased, never 19197c478bd9Sstevel@tonic-gate * reduced unless some CPU is looking for something on that queue. 19207c478bd9Sstevel@tonic-gate * 19217c478bd9Sstevel@tonic-gate * The priority argument is the known upper limit. 19227c478bd9Sstevel@tonic-gate * 19237c478bd9Sstevel@tonic-gate * Perhaps this should be kept accurately, but that probably means 19247c478bd9Sstevel@tonic-gate * separate bitmaps for bound and unbound threads. Since only idled 19257c478bd9Sstevel@tonic-gate * CPUs will have to do this recalculation, it seems better this way. 19267c478bd9Sstevel@tonic-gate */ 19277c478bd9Sstevel@tonic-gate static void 19287c478bd9Sstevel@tonic-gate disp_fix_unbound_pri(disp_t *dp, pri_t pri) 19297c478bd9Sstevel@tonic-gate { 19307c478bd9Sstevel@tonic-gate kthread_t *tp; 19317c478bd9Sstevel@tonic-gate dispq_t *dq; 19327c478bd9Sstevel@tonic-gate ulong_t *dqactmap = dp->disp_qactmap; 19337c478bd9Sstevel@tonic-gate ulong_t mapword; 19347c478bd9Sstevel@tonic-gate int wx; 19357c478bd9Sstevel@tonic-gate 19367c478bd9Sstevel@tonic-gate ASSERT(DISP_LOCK_HELD(&dp->disp_lock)); 19377c478bd9Sstevel@tonic-gate 19387c478bd9Sstevel@tonic-gate ASSERT(pri >= 0); /* checked by caller */ 19397c478bd9Sstevel@tonic-gate 19407c478bd9Sstevel@tonic-gate /* 19417c478bd9Sstevel@tonic-gate * Start the search at the next lowest priority below the supplied 19427c478bd9Sstevel@tonic-gate * priority. This depends on the bitmap implementation. 19437c478bd9Sstevel@tonic-gate */ 19447c478bd9Sstevel@tonic-gate do { 19457c478bd9Sstevel@tonic-gate wx = pri >> BT_ULSHIFT; /* index of word in map */ 19467c478bd9Sstevel@tonic-gate 19477c478bd9Sstevel@tonic-gate /* 19487c478bd9Sstevel@tonic-gate * Form mask for all lower priorities in the word. 19497c478bd9Sstevel@tonic-gate */ 19507c478bd9Sstevel@tonic-gate mapword = dqactmap[wx] & (BT_BIW(pri) - 1); 19517c478bd9Sstevel@tonic-gate 19527c478bd9Sstevel@tonic-gate /* 19537c478bd9Sstevel@tonic-gate * Get next lower active priority. 19547c478bd9Sstevel@tonic-gate */ 19557c478bd9Sstevel@tonic-gate if (mapword != 0) { 19567c478bd9Sstevel@tonic-gate pri = (wx << BT_ULSHIFT) + highbit(mapword) - 1; 19577c478bd9Sstevel@tonic-gate } else if (wx > 0) { 19587c478bd9Sstevel@tonic-gate pri = bt_gethighbit(dqactmap, wx - 1); /* sign extend */ 19597c478bd9Sstevel@tonic-gate if (pri < 0) 19607c478bd9Sstevel@tonic-gate break; 19617c478bd9Sstevel@tonic-gate } else { 19627c478bd9Sstevel@tonic-gate pri = -1; 19637c478bd9Sstevel@tonic-gate break; 19647c478bd9Sstevel@tonic-gate } 19657c478bd9Sstevel@tonic-gate 19667c478bd9Sstevel@tonic-gate /* 19677c478bd9Sstevel@tonic-gate * Search the queue for unbound, runnable threads. 19687c478bd9Sstevel@tonic-gate */ 19697c478bd9Sstevel@tonic-gate dq = &dp->disp_q[pri]; 19707c478bd9Sstevel@tonic-gate tp = dq->dq_first; 19717c478bd9Sstevel@tonic-gate 19727c478bd9Sstevel@tonic-gate while (tp && (tp->t_bound_cpu || tp->t_weakbound_cpu)) { 19737c478bd9Sstevel@tonic-gate tp = tp->t_link; 19747c478bd9Sstevel@tonic-gate } 19757c478bd9Sstevel@tonic-gate 19767c478bd9Sstevel@tonic-gate /* 19777c478bd9Sstevel@tonic-gate * If a thread was found, set the priority and return. 19787c478bd9Sstevel@tonic-gate */ 19797c478bd9Sstevel@tonic-gate } while (tp == NULL); 19807c478bd9Sstevel@tonic-gate 19817c478bd9Sstevel@tonic-gate /* 19827c478bd9Sstevel@tonic-gate * pri holds the maximum unbound thread priority or -1. 19837c478bd9Sstevel@tonic-gate */ 19847c478bd9Sstevel@tonic-gate if (dp->disp_max_unbound_pri != pri) 19857c478bd9Sstevel@tonic-gate dp->disp_max_unbound_pri = pri; 19867c478bd9Sstevel@tonic-gate } 19877c478bd9Sstevel@tonic-gate 19887c478bd9Sstevel@tonic-gate /* 19897c478bd9Sstevel@tonic-gate * disp_adjust_unbound_pri() - thread is becoming unbound, so we should 19907c478bd9Sstevel@tonic-gate * check if the CPU to which is was previously bound should have 19917c478bd9Sstevel@tonic-gate * its disp_max_unbound_pri increased. 19927c478bd9Sstevel@tonic-gate */ 19937c478bd9Sstevel@tonic-gate void 19947c478bd9Sstevel@tonic-gate disp_adjust_unbound_pri(kthread_t *tp) 19957c478bd9Sstevel@tonic-gate { 19967c478bd9Sstevel@tonic-gate disp_t *dp; 19977c478bd9Sstevel@tonic-gate pri_t tpri; 19987c478bd9Sstevel@tonic-gate 19997c478bd9Sstevel@tonic-gate ASSERT(THREAD_LOCK_HELD(tp)); 20007c478bd9Sstevel@tonic-gate 20017c478bd9Sstevel@tonic-gate /* 20027c478bd9Sstevel@tonic-gate * Don't do anything if the thread is not bound, or 20037c478bd9Sstevel@tonic-gate * currently not runnable or swapped out. 20047c478bd9Sstevel@tonic-gate */ 20057c478bd9Sstevel@tonic-gate if (tp->t_bound_cpu == NULL || 20067c478bd9Sstevel@tonic-gate tp->t_state != TS_RUN || 20077c478bd9Sstevel@tonic-gate tp->t_schedflag & TS_ON_SWAPQ) 20087c478bd9Sstevel@tonic-gate return; 20097c478bd9Sstevel@tonic-gate 20107c478bd9Sstevel@tonic-gate tpri = DISP_PRIO(tp); 20117c478bd9Sstevel@tonic-gate dp = tp->t_bound_cpu->cpu_disp; 20127c478bd9Sstevel@tonic-gate ASSERT(tpri >= 0 && tpri < dp->disp_npri); 20137c478bd9Sstevel@tonic-gate if (tpri > dp->disp_max_unbound_pri) 20147c478bd9Sstevel@tonic-gate dp->disp_max_unbound_pri = tpri; 20157c478bd9Sstevel@tonic-gate } 20167c478bd9Sstevel@tonic-gate 20177c478bd9Sstevel@tonic-gate /* 20187c478bd9Sstevel@tonic-gate * disp_getbest() - de-queue the highest priority unbound runnable thread. 20197c478bd9Sstevel@tonic-gate * returns with the thread unlocked and onproc 20207c478bd9Sstevel@tonic-gate * but at splhigh (like disp()). 20217c478bd9Sstevel@tonic-gate * returns NULL if nothing found. 20227c478bd9Sstevel@tonic-gate * 20237c478bd9Sstevel@tonic-gate * Passed a pointer to a dispatch queue not associated with this CPU. 20247c478bd9Sstevel@tonic-gate */ 20257c478bd9Sstevel@tonic-gate static kthread_t * 20267c478bd9Sstevel@tonic-gate disp_getbest(disp_t *dp) 20277c478bd9Sstevel@tonic-gate { 20287c478bd9Sstevel@tonic-gate kthread_t *tp; 20297c478bd9Sstevel@tonic-gate dispq_t *dq; 20307c478bd9Sstevel@tonic-gate pri_t pri; 20317c478bd9Sstevel@tonic-gate cpu_t *cp; 20327c478bd9Sstevel@tonic-gate 20337c478bd9Sstevel@tonic-gate disp_lock_enter(&dp->disp_lock); 20347c478bd9Sstevel@tonic-gate 20357c478bd9Sstevel@tonic-gate /* 20367c478bd9Sstevel@tonic-gate * If there is nothing to run, or the CPU is in the middle of a 20377c478bd9Sstevel@tonic-gate * context switch of the only thread, return NULL. 20387c478bd9Sstevel@tonic-gate */ 20397c478bd9Sstevel@tonic-gate pri = dp->disp_max_unbound_pri; 20407c478bd9Sstevel@tonic-gate if (pri == -1 || 20417c478bd9Sstevel@tonic-gate (dp->disp_cpu != NULL && 20427c478bd9Sstevel@tonic-gate (dp->disp_cpu->cpu_disp_flags & CPU_DISP_DONTSTEAL) && 20437c478bd9Sstevel@tonic-gate dp->disp_cpu->cpu_disp->disp_nrunnable == 1)) { 20447c478bd9Sstevel@tonic-gate disp_lock_exit_nopreempt(&dp->disp_lock); 20457c478bd9Sstevel@tonic-gate return (NULL); 20467c478bd9Sstevel@tonic-gate } 20477c478bd9Sstevel@tonic-gate 20487c478bd9Sstevel@tonic-gate dq = &dp->disp_q[pri]; 20497c478bd9Sstevel@tonic-gate tp = dq->dq_first; 20507c478bd9Sstevel@tonic-gate 20517c478bd9Sstevel@tonic-gate /* 20527c478bd9Sstevel@tonic-gate * Skip over bound threads. 20537c478bd9Sstevel@tonic-gate * Bound threads can be here even though disp_max_unbound_pri 20547c478bd9Sstevel@tonic-gate * indicated this level. Besides, it not always accurate because it 20557c478bd9Sstevel@tonic-gate * isn't reduced until another CPU looks for work. 20567c478bd9Sstevel@tonic-gate * Note that tp could be NULL right away due to this. 20577c478bd9Sstevel@tonic-gate */ 20587c478bd9Sstevel@tonic-gate while (tp && (tp->t_bound_cpu || tp->t_weakbound_cpu)) { 20597c478bd9Sstevel@tonic-gate tp = tp->t_link; 20607c478bd9Sstevel@tonic-gate } 20617c478bd9Sstevel@tonic-gate 20627c478bd9Sstevel@tonic-gate /* 20637c478bd9Sstevel@tonic-gate * If there were no unbound threads on this queue, find the queue 20647c478bd9Sstevel@tonic-gate * where they are and then return NULL so that other CPUs will be 20657c478bd9Sstevel@tonic-gate * considered. 20667c478bd9Sstevel@tonic-gate */ 20677c478bd9Sstevel@tonic-gate if (tp == NULL) { 20687c478bd9Sstevel@tonic-gate disp_fix_unbound_pri(dp, pri); 20697c478bd9Sstevel@tonic-gate disp_lock_exit_nopreempt(&dp->disp_lock); 20707c478bd9Sstevel@tonic-gate return (NULL); 20717c478bd9Sstevel@tonic-gate } 20727c478bd9Sstevel@tonic-gate 20737c478bd9Sstevel@tonic-gate /* 20747c478bd9Sstevel@tonic-gate * Found a runnable, unbound thread, so remove it from queue. 20757c478bd9Sstevel@tonic-gate * dispdeq() requires that we have the thread locked, and we do, 20767c478bd9Sstevel@tonic-gate * by virtue of holding the dispatch queue lock. dispdeq() will 20777c478bd9Sstevel@tonic-gate * put the thread in transition state, thereby dropping the dispq 20787c478bd9Sstevel@tonic-gate * lock. 20797c478bd9Sstevel@tonic-gate */ 20807c478bd9Sstevel@tonic-gate #ifdef DEBUG 20817c478bd9Sstevel@tonic-gate { 20827c478bd9Sstevel@tonic-gate int thread_was_on_queue; 20837c478bd9Sstevel@tonic-gate 20847c478bd9Sstevel@tonic-gate thread_was_on_queue = dispdeq(tp); /* drops disp_lock */ 20857c478bd9Sstevel@tonic-gate ASSERT(thread_was_on_queue); 20867c478bd9Sstevel@tonic-gate } 20877c478bd9Sstevel@tonic-gate #else /* DEBUG */ 20887c478bd9Sstevel@tonic-gate (void) dispdeq(tp); /* drops disp_lock */ 20897c478bd9Sstevel@tonic-gate #endif /* DEBUG */ 20907c478bd9Sstevel@tonic-gate 20917c478bd9Sstevel@tonic-gate tp->t_schedflag |= TS_DONT_SWAP; 20927c478bd9Sstevel@tonic-gate 20937c478bd9Sstevel@tonic-gate /* 20947c478bd9Sstevel@tonic-gate * Setup thread to run on the current CPU. 20957c478bd9Sstevel@tonic-gate */ 20967c478bd9Sstevel@tonic-gate cp = CPU; 20977c478bd9Sstevel@tonic-gate 20987c478bd9Sstevel@tonic-gate tp->t_disp_queue = cp->cpu_disp; 20997c478bd9Sstevel@tonic-gate 21007c478bd9Sstevel@tonic-gate cp->cpu_dispthread = tp; /* protected by spl only */ 21017c478bd9Sstevel@tonic-gate cp->cpu_dispatch_pri = pri; 21027c478bd9Sstevel@tonic-gate ASSERT(pri == DISP_PRIO(tp)); 21037c478bd9Sstevel@tonic-gate 21047c478bd9Sstevel@tonic-gate thread_onproc(tp, cp); /* set t_state to TS_ONPROC */ 21057c478bd9Sstevel@tonic-gate 21067c478bd9Sstevel@tonic-gate /* 21077c478bd9Sstevel@tonic-gate * Return with spl high so that swtch() won't need to raise it. 21087c478bd9Sstevel@tonic-gate * The disp_lock was dropped by dispdeq(). 21097c478bd9Sstevel@tonic-gate */ 21107c478bd9Sstevel@tonic-gate 21117c478bd9Sstevel@tonic-gate return (tp); 21127c478bd9Sstevel@tonic-gate } 21137c478bd9Sstevel@tonic-gate 21147c478bd9Sstevel@tonic-gate /* 21157c478bd9Sstevel@tonic-gate * disp_bound_common() - common routine for higher level functions 21167c478bd9Sstevel@tonic-gate * that check for bound threads under certain conditions. 21177c478bd9Sstevel@tonic-gate * If 'threadlistsafe' is set then there is no need to acquire 21187c478bd9Sstevel@tonic-gate * pidlock to stop the thread list from changing (eg, if 21197c478bd9Sstevel@tonic-gate * disp_bound_* is called with cpus paused). 21207c478bd9Sstevel@tonic-gate */ 21217c478bd9Sstevel@tonic-gate static int 21227c478bd9Sstevel@tonic-gate disp_bound_common(cpu_t *cp, int threadlistsafe, int flag) 21237c478bd9Sstevel@tonic-gate { 21247c478bd9Sstevel@tonic-gate int found = 0; 21257c478bd9Sstevel@tonic-gate kthread_t *tp; 21267c478bd9Sstevel@tonic-gate 21277c478bd9Sstevel@tonic-gate ASSERT(flag); 21287c478bd9Sstevel@tonic-gate 21297c478bd9Sstevel@tonic-gate if (!threadlistsafe) 21307c478bd9Sstevel@tonic-gate mutex_enter(&pidlock); 21317c478bd9Sstevel@tonic-gate tp = curthread; /* faster than allthreads */ 21327c478bd9Sstevel@tonic-gate do { 21337c478bd9Sstevel@tonic-gate if (tp->t_state != TS_FREE) { 21347c478bd9Sstevel@tonic-gate /* 21357c478bd9Sstevel@tonic-gate * If an interrupt thread is busy, but the 21367c478bd9Sstevel@tonic-gate * caller doesn't care (i.e. BOUND_INTR is off), 21377c478bd9Sstevel@tonic-gate * then just ignore it and continue through. 21387c478bd9Sstevel@tonic-gate */ 21397c478bd9Sstevel@tonic-gate if ((tp->t_flag & T_INTR_THREAD) && 21407c478bd9Sstevel@tonic-gate !(flag & BOUND_INTR)) 21417c478bd9Sstevel@tonic-gate continue; 21427c478bd9Sstevel@tonic-gate 21437c478bd9Sstevel@tonic-gate /* 21447c478bd9Sstevel@tonic-gate * Skip the idle thread for the CPU 21457c478bd9Sstevel@tonic-gate * we're about to set offline. 21467c478bd9Sstevel@tonic-gate */ 21477c478bd9Sstevel@tonic-gate if (tp == cp->cpu_idle_thread) 21487c478bd9Sstevel@tonic-gate continue; 21497c478bd9Sstevel@tonic-gate 21507c478bd9Sstevel@tonic-gate /* 21517c478bd9Sstevel@tonic-gate * Skip the pause thread for the CPU 21527c478bd9Sstevel@tonic-gate * we're about to set offline. 21537c478bd9Sstevel@tonic-gate */ 21547c478bd9Sstevel@tonic-gate if (tp == cp->cpu_pause_thread) 21557c478bd9Sstevel@tonic-gate continue; 21567c478bd9Sstevel@tonic-gate 21577c478bd9Sstevel@tonic-gate if ((flag & BOUND_CPU) && 21587c478bd9Sstevel@tonic-gate (tp->t_bound_cpu == cp || 21597c478bd9Sstevel@tonic-gate tp->t_bind_cpu == cp->cpu_id || 21607c478bd9Sstevel@tonic-gate tp->t_weakbound_cpu == cp)) { 21617c478bd9Sstevel@tonic-gate found = 1; 21627c478bd9Sstevel@tonic-gate break; 21637c478bd9Sstevel@tonic-gate } 21647c478bd9Sstevel@tonic-gate 21657c478bd9Sstevel@tonic-gate if ((flag & BOUND_PARTITION) && 21667c478bd9Sstevel@tonic-gate (tp->t_cpupart == cp->cpu_part)) { 21677c478bd9Sstevel@tonic-gate found = 1; 21687c478bd9Sstevel@tonic-gate break; 21697c478bd9Sstevel@tonic-gate } 21707c478bd9Sstevel@tonic-gate } 21717c478bd9Sstevel@tonic-gate } while ((tp = tp->t_next) != curthread && found == 0); 21727c478bd9Sstevel@tonic-gate if (!threadlistsafe) 21737c478bd9Sstevel@tonic-gate mutex_exit(&pidlock); 21747c478bd9Sstevel@tonic-gate return (found); 21757c478bd9Sstevel@tonic-gate } 21767c478bd9Sstevel@tonic-gate 21777c478bd9Sstevel@tonic-gate /* 21787c478bd9Sstevel@tonic-gate * disp_bound_threads - return nonzero if threads are bound to the processor. 21797c478bd9Sstevel@tonic-gate * Called infrequently. Keep this simple. 21807c478bd9Sstevel@tonic-gate * Includes threads that are asleep or stopped but not onproc. 21817c478bd9Sstevel@tonic-gate */ 21827c478bd9Sstevel@tonic-gate int 21837c478bd9Sstevel@tonic-gate disp_bound_threads(cpu_t *cp, int threadlistsafe) 21847c478bd9Sstevel@tonic-gate { 21857c478bd9Sstevel@tonic-gate return (disp_bound_common(cp, threadlistsafe, BOUND_CPU)); 21867c478bd9Sstevel@tonic-gate } 21877c478bd9Sstevel@tonic-gate 21887c478bd9Sstevel@tonic-gate /* 21897c478bd9Sstevel@tonic-gate * disp_bound_anythreads - return nonzero if _any_ threads are bound 21907c478bd9Sstevel@tonic-gate * to the given processor, including interrupt threads. 21917c478bd9Sstevel@tonic-gate */ 21927c478bd9Sstevel@tonic-gate int 21937c478bd9Sstevel@tonic-gate disp_bound_anythreads(cpu_t *cp, int threadlistsafe) 21947c478bd9Sstevel@tonic-gate { 21957c478bd9Sstevel@tonic-gate return (disp_bound_common(cp, threadlistsafe, BOUND_CPU | BOUND_INTR)); 21967c478bd9Sstevel@tonic-gate } 21977c478bd9Sstevel@tonic-gate 21987c478bd9Sstevel@tonic-gate /* 21997c478bd9Sstevel@tonic-gate * disp_bound_partition - return nonzero if threads are bound to the same 22007c478bd9Sstevel@tonic-gate * partition as the processor. 22017c478bd9Sstevel@tonic-gate * Called infrequently. Keep this simple. 22027c478bd9Sstevel@tonic-gate * Includes threads that are asleep or stopped but not onproc. 22037c478bd9Sstevel@tonic-gate */ 22047c478bd9Sstevel@tonic-gate int 22057c478bd9Sstevel@tonic-gate disp_bound_partition(cpu_t *cp, int threadlistsafe) 22067c478bd9Sstevel@tonic-gate { 22077c478bd9Sstevel@tonic-gate return (disp_bound_common(cp, threadlistsafe, BOUND_PARTITION)); 22087c478bd9Sstevel@tonic-gate } 22097c478bd9Sstevel@tonic-gate 22107c478bd9Sstevel@tonic-gate /* 22117c478bd9Sstevel@tonic-gate * disp_cpu_inactive - make a CPU inactive by moving all of its unbound 22127c478bd9Sstevel@tonic-gate * threads to other CPUs. 22137c478bd9Sstevel@tonic-gate */ 22147c478bd9Sstevel@tonic-gate void 22157c478bd9Sstevel@tonic-gate disp_cpu_inactive(cpu_t *cp) 22167c478bd9Sstevel@tonic-gate { 22177c478bd9Sstevel@tonic-gate kthread_t *tp; 22187c478bd9Sstevel@tonic-gate disp_t *dp = cp->cpu_disp; 22197c478bd9Sstevel@tonic-gate dispq_t *dq; 22207c478bd9Sstevel@tonic-gate pri_t pri; 22217c478bd9Sstevel@tonic-gate int wasonq; 22227c478bd9Sstevel@tonic-gate 22237c478bd9Sstevel@tonic-gate disp_lock_enter(&dp->disp_lock); 22247c478bd9Sstevel@tonic-gate while ((pri = dp->disp_max_unbound_pri) != -1) { 22257c478bd9Sstevel@tonic-gate dq = &dp->disp_q[pri]; 22267c478bd9Sstevel@tonic-gate tp = dq->dq_first; 22277c478bd9Sstevel@tonic-gate 22287c478bd9Sstevel@tonic-gate /* 22297c478bd9Sstevel@tonic-gate * Skip over bound threads. 22307c478bd9Sstevel@tonic-gate */ 22317c478bd9Sstevel@tonic-gate while (tp != NULL && tp->t_bound_cpu != NULL) { 22327c478bd9Sstevel@tonic-gate tp = tp->t_link; 22337c478bd9Sstevel@tonic-gate } 22347c478bd9Sstevel@tonic-gate 22357c478bd9Sstevel@tonic-gate if (tp == NULL) { 22367c478bd9Sstevel@tonic-gate /* disp_max_unbound_pri must be inaccurate, so fix it */ 22377c478bd9Sstevel@tonic-gate disp_fix_unbound_pri(dp, pri); 22387c478bd9Sstevel@tonic-gate continue; 22397c478bd9Sstevel@tonic-gate } 22407c478bd9Sstevel@tonic-gate 22417c478bd9Sstevel@tonic-gate wasonq = dispdeq(tp); /* drops disp_lock */ 22427c478bd9Sstevel@tonic-gate ASSERT(wasonq); 22437c478bd9Sstevel@tonic-gate ASSERT(tp->t_weakbound_cpu == NULL); 22447c478bd9Sstevel@tonic-gate 22457c478bd9Sstevel@tonic-gate setbackdq(tp); 22467c478bd9Sstevel@tonic-gate /* 22477c478bd9Sstevel@tonic-gate * Called from cpu_offline: 22487c478bd9Sstevel@tonic-gate * 22497c478bd9Sstevel@tonic-gate * cp has already been removed from the list of active cpus 22507c478bd9Sstevel@tonic-gate * and tp->t_cpu has been changed so there is no risk of 22517c478bd9Sstevel@tonic-gate * tp ending up back on cp. 22527c478bd9Sstevel@tonic-gate * 22537c478bd9Sstevel@tonic-gate * Called from cpupart_move_cpu: 22547c478bd9Sstevel@tonic-gate * 22557c478bd9Sstevel@tonic-gate * The cpu has moved to a new cpupart. Any threads that 22567c478bd9Sstevel@tonic-gate * were on it's dispatch queues before the move remain 22577c478bd9Sstevel@tonic-gate * in the old partition and can't run in the new partition. 22587c478bd9Sstevel@tonic-gate */ 22597c478bd9Sstevel@tonic-gate ASSERT(tp->t_cpu != cp); 22607c478bd9Sstevel@tonic-gate thread_unlock(tp); 22617c478bd9Sstevel@tonic-gate 22627c478bd9Sstevel@tonic-gate disp_lock_enter(&dp->disp_lock); 22637c478bd9Sstevel@tonic-gate } 22647c478bd9Sstevel@tonic-gate disp_lock_exit(&dp->disp_lock); 22657c478bd9Sstevel@tonic-gate } 22667c478bd9Sstevel@tonic-gate 22677c478bd9Sstevel@tonic-gate /* 22687c478bd9Sstevel@tonic-gate * disp_lowpri_cpu - find CPU running the lowest priority thread. 22697c478bd9Sstevel@tonic-gate * The hint passed in is used as a starting point so we don't favor 22707c478bd9Sstevel@tonic-gate * CPU 0 or any other CPU. The caller should pass in the most recently 22717c478bd9Sstevel@tonic-gate * used CPU for the thread. 22727c478bd9Sstevel@tonic-gate * 22737c478bd9Sstevel@tonic-gate * The lgroup and priority are used to determine the best CPU to run on 22747c478bd9Sstevel@tonic-gate * in a NUMA machine. The lgroup specifies which CPUs are closest while 22757c478bd9Sstevel@tonic-gate * the thread priority will indicate whether the thread will actually run 22767c478bd9Sstevel@tonic-gate * there. To pick the best CPU, the CPUs inside and outside of the given 22777c478bd9Sstevel@tonic-gate * lgroup which are running the lowest priority threads are found. The 22787c478bd9Sstevel@tonic-gate * remote CPU is chosen only if the thread will not run locally on a CPU 22797c478bd9Sstevel@tonic-gate * within the lgroup, but will run on the remote CPU. If the thread 22807c478bd9Sstevel@tonic-gate * cannot immediately run on any CPU, the best local CPU will be chosen. 22817c478bd9Sstevel@tonic-gate * 22827c478bd9Sstevel@tonic-gate * The lpl specified also identifies the cpu partition from which 22837c478bd9Sstevel@tonic-gate * disp_lowpri_cpu should select a CPU. 22847c478bd9Sstevel@tonic-gate * 22857c478bd9Sstevel@tonic-gate * curcpu is used to indicate that disp_lowpri_cpu is being called on 22867c478bd9Sstevel@tonic-gate * behalf of the current thread. (curthread is looking for a new cpu) 22877c478bd9Sstevel@tonic-gate * In this case, cpu_dispatch_pri for this thread's cpu should be 22887c478bd9Sstevel@tonic-gate * ignored. 22897c478bd9Sstevel@tonic-gate * 22907c478bd9Sstevel@tonic-gate * If a cpu is the target of an offline request then try to avoid it. 22917c478bd9Sstevel@tonic-gate * 22927c478bd9Sstevel@tonic-gate * This function must be called at either high SPL, or with preemption 22937c478bd9Sstevel@tonic-gate * disabled, so that the "hint" CPU cannot be removed from the online 22947c478bd9Sstevel@tonic-gate * CPU list while we are traversing it. 22957c478bd9Sstevel@tonic-gate */ 22967c478bd9Sstevel@tonic-gate cpu_t * 22977c478bd9Sstevel@tonic-gate disp_lowpri_cpu(cpu_t *hint, lpl_t *lpl, pri_t tpri, cpu_t *curcpu) 22987c478bd9Sstevel@tonic-gate { 22997c478bd9Sstevel@tonic-gate cpu_t *bestcpu; 23007c478bd9Sstevel@tonic-gate cpu_t *besthomecpu; 23017c478bd9Sstevel@tonic-gate cpu_t *cp, *cpstart; 23027c478bd9Sstevel@tonic-gate 23037c478bd9Sstevel@tonic-gate pri_t bestpri; 23047c478bd9Sstevel@tonic-gate pri_t cpupri; 23057c478bd9Sstevel@tonic-gate 23067c478bd9Sstevel@tonic-gate klgrpset_t done; 23077c478bd9Sstevel@tonic-gate klgrpset_t cur_set; 23087c478bd9Sstevel@tonic-gate 23097c478bd9Sstevel@tonic-gate lpl_t *lpl_iter, *lpl_leaf; 23107c478bd9Sstevel@tonic-gate int i; 23117c478bd9Sstevel@tonic-gate 23127c478bd9Sstevel@tonic-gate /* 23137c478bd9Sstevel@tonic-gate * Scan for a CPU currently running the lowest priority thread. 23147c478bd9Sstevel@tonic-gate * Cannot get cpu_lock here because it is adaptive. 23157c478bd9Sstevel@tonic-gate * We do not require lock on CPU list. 23167c478bd9Sstevel@tonic-gate */ 23177c478bd9Sstevel@tonic-gate ASSERT(hint != NULL); 23187c478bd9Sstevel@tonic-gate ASSERT(lpl != NULL); 23197c478bd9Sstevel@tonic-gate ASSERT(lpl->lpl_ncpu > 0); 23207c478bd9Sstevel@tonic-gate 23217c478bd9Sstevel@tonic-gate /* 23227c478bd9Sstevel@tonic-gate * First examine local CPUs. Note that it's possible the hint CPU 23237c478bd9Sstevel@tonic-gate * passed in in remote to the specified home lgroup. If our priority 23247c478bd9Sstevel@tonic-gate * isn't sufficient enough such that we can run immediately at home, 23257c478bd9Sstevel@tonic-gate * then examine CPUs remote to our home lgroup. 23267c478bd9Sstevel@tonic-gate * We would like to give preference to CPUs closest to "home". 23277c478bd9Sstevel@tonic-gate * If we can't find a CPU where we'll run at a given level 23287c478bd9Sstevel@tonic-gate * of locality, we expand our search to include the next level. 23297c478bd9Sstevel@tonic-gate */ 23307c478bd9Sstevel@tonic-gate bestcpu = besthomecpu = NULL; 23317c478bd9Sstevel@tonic-gate klgrpset_clear(done); 23327c478bd9Sstevel@tonic-gate /* start with lpl we were passed */ 23337c478bd9Sstevel@tonic-gate 23347c478bd9Sstevel@tonic-gate lpl_iter = lpl; 23357c478bd9Sstevel@tonic-gate 23367c478bd9Sstevel@tonic-gate do { 23377c478bd9Sstevel@tonic-gate 23387c478bd9Sstevel@tonic-gate bestpri = SHRT_MAX; 23397c478bd9Sstevel@tonic-gate klgrpset_clear(cur_set); 23407c478bd9Sstevel@tonic-gate 23417c478bd9Sstevel@tonic-gate for (i = 0; i < lpl_iter->lpl_nrset; i++) { 23427c478bd9Sstevel@tonic-gate lpl_leaf = lpl_iter->lpl_rset[i]; 23437c478bd9Sstevel@tonic-gate if (klgrpset_ismember(done, lpl_leaf->lpl_lgrpid)) 23447c478bd9Sstevel@tonic-gate continue; 23457c478bd9Sstevel@tonic-gate 23467c478bd9Sstevel@tonic-gate klgrpset_add(cur_set, lpl_leaf->lpl_lgrpid); 23477c478bd9Sstevel@tonic-gate 23487c478bd9Sstevel@tonic-gate if (hint->cpu_lpl == lpl_leaf) 23497c478bd9Sstevel@tonic-gate cp = cpstart = hint; 23507c478bd9Sstevel@tonic-gate else 23517c478bd9Sstevel@tonic-gate cp = cpstart = lpl_leaf->lpl_cpus; 23527c478bd9Sstevel@tonic-gate 23537c478bd9Sstevel@tonic-gate do { 23547c478bd9Sstevel@tonic-gate 23557c478bd9Sstevel@tonic-gate if (cp == curcpu) 23567c478bd9Sstevel@tonic-gate cpupri = -1; 23577c478bd9Sstevel@tonic-gate else if (cp == cpu_inmotion) 23587c478bd9Sstevel@tonic-gate cpupri = SHRT_MAX; 23597c478bd9Sstevel@tonic-gate else 23607c478bd9Sstevel@tonic-gate cpupri = cp->cpu_dispatch_pri; 23617c478bd9Sstevel@tonic-gate 23627c478bd9Sstevel@tonic-gate if (cp->cpu_disp->disp_maxrunpri > cpupri) 23637c478bd9Sstevel@tonic-gate cpupri = cp->cpu_disp->disp_maxrunpri; 23647c478bd9Sstevel@tonic-gate if (cp->cpu_chosen_level > cpupri) 23657c478bd9Sstevel@tonic-gate cpupri = cp->cpu_chosen_level; 23667c478bd9Sstevel@tonic-gate if (cpupri < bestpri) { 23677c478bd9Sstevel@tonic-gate if (CPU_IDLING(cpupri)) { 23687c478bd9Sstevel@tonic-gate ASSERT((cp->cpu_flags & 23697c478bd9Sstevel@tonic-gate CPU_QUIESCED) == 0); 23707c478bd9Sstevel@tonic-gate return (cp); 23717c478bd9Sstevel@tonic-gate } 23727c478bd9Sstevel@tonic-gate bestcpu = cp; 23737c478bd9Sstevel@tonic-gate bestpri = cpupri; 23747c478bd9Sstevel@tonic-gate } 23757c478bd9Sstevel@tonic-gate } while ((cp = cp->cpu_next_lpl) != cpstart); 23767c478bd9Sstevel@tonic-gate } 23777c478bd9Sstevel@tonic-gate 23787c478bd9Sstevel@tonic-gate if (bestcpu && (tpri > bestpri)) { 23797c478bd9Sstevel@tonic-gate ASSERT((bestcpu->cpu_flags & CPU_QUIESCED) == 0); 23807c478bd9Sstevel@tonic-gate return (bestcpu); 23817c478bd9Sstevel@tonic-gate } 23827c478bd9Sstevel@tonic-gate if (besthomecpu == NULL) 23837c478bd9Sstevel@tonic-gate besthomecpu = bestcpu; 23847c478bd9Sstevel@tonic-gate /* 23857c478bd9Sstevel@tonic-gate * Add the lgrps we just considered to the "done" set 23867c478bd9Sstevel@tonic-gate */ 23877c478bd9Sstevel@tonic-gate klgrpset_or(done, cur_set); 23887c478bd9Sstevel@tonic-gate 23897c478bd9Sstevel@tonic-gate } while ((lpl_iter = lpl_iter->lpl_parent) != NULL); 23907c478bd9Sstevel@tonic-gate 23917c478bd9Sstevel@tonic-gate /* 23927c478bd9Sstevel@tonic-gate * The specified priority isn't high enough to run immediately 23937c478bd9Sstevel@tonic-gate * anywhere, so just return the best CPU from the home lgroup. 23947c478bd9Sstevel@tonic-gate */ 23957c478bd9Sstevel@tonic-gate ASSERT((besthomecpu->cpu_flags & CPU_QUIESCED) == 0); 23967c478bd9Sstevel@tonic-gate return (besthomecpu); 23977c478bd9Sstevel@tonic-gate } 23987c478bd9Sstevel@tonic-gate 23997c478bd9Sstevel@tonic-gate /* 24007c478bd9Sstevel@tonic-gate * This routine provides the generic idle cpu function for all processors. 24017c478bd9Sstevel@tonic-gate * If a processor has some specific code to execute when idle (say, to stop 24027c478bd9Sstevel@tonic-gate * the pipeline and save power) then that routine should be defined in the 24037c478bd9Sstevel@tonic-gate * processors specific code (module_xx.c) and the global variable idle_cpu 24047c478bd9Sstevel@tonic-gate * set to that function. 24057c478bd9Sstevel@tonic-gate */ 24067c478bd9Sstevel@tonic-gate static void 24077c478bd9Sstevel@tonic-gate generic_idle_cpu(void) 24087c478bd9Sstevel@tonic-gate { 24097c478bd9Sstevel@tonic-gate } 24107c478bd9Sstevel@tonic-gate 24117c478bd9Sstevel@tonic-gate /*ARGSUSED*/ 24127c478bd9Sstevel@tonic-gate static void 24137c478bd9Sstevel@tonic-gate generic_enq_thread(cpu_t *cpu, int bound) 24147c478bd9Sstevel@tonic-gate { 24157c478bd9Sstevel@tonic-gate } 24167c478bd9Sstevel@tonic-gate 24177c478bd9Sstevel@tonic-gate /* 24187c478bd9Sstevel@tonic-gate * Select a CPU for this thread to run on. Choose t->t_cpu unless: 24197c478bd9Sstevel@tonic-gate * - t->t_cpu is not in this thread's assigned lgrp 24207c478bd9Sstevel@tonic-gate * - the time since the thread last came off t->t_cpu exceeds the 24217c478bd9Sstevel@tonic-gate * rechoose time for this cpu (ignore this if t is curthread in 24227c478bd9Sstevel@tonic-gate * which case it's on CPU and t->t_disp_time is inaccurate) 24237c478bd9Sstevel@tonic-gate * - t->t_cpu is presently the target of an offline or partition move 24247c478bd9Sstevel@tonic-gate * request 24257c478bd9Sstevel@tonic-gate */ 24267c478bd9Sstevel@tonic-gate static cpu_t * 24277c478bd9Sstevel@tonic-gate cpu_choose(kthread_t *t, pri_t tpri) 24287c478bd9Sstevel@tonic-gate { 24297c478bd9Sstevel@tonic-gate ASSERT(tpri < kpqpri); 24307c478bd9Sstevel@tonic-gate 24317c478bd9Sstevel@tonic-gate if ((((lbolt - t->t_disp_time) > t->t_cpu->cpu_rechoose) && 24327c478bd9Sstevel@tonic-gate t != curthread) || t->t_cpu == cpu_inmotion) { 24337c478bd9Sstevel@tonic-gate return (disp_lowpri_cpu(t->t_cpu, t->t_lpl, tpri, NULL)); 24347c478bd9Sstevel@tonic-gate } 24357c478bd9Sstevel@tonic-gate 24367c478bd9Sstevel@tonic-gate /* 24377c478bd9Sstevel@tonic-gate * Take a trip through disp_lowpri_cpu() if the thread was 24387c478bd9Sstevel@tonic-gate * running outside it's home lgroup 24397c478bd9Sstevel@tonic-gate */ 24407c478bd9Sstevel@tonic-gate if (!klgrpset_ismember(t->t_lpl->lpl_lgrp->lgrp_set[LGRP_RSRC_CPU], 24417c478bd9Sstevel@tonic-gate t->t_cpu->cpu_lpl->lpl_lgrpid)) { 24427c478bd9Sstevel@tonic-gate return (disp_lowpri_cpu(t->t_cpu, t->t_lpl, tpri, 24437c478bd9Sstevel@tonic-gate (t == curthread) ? t->t_cpu : NULL)); 24447c478bd9Sstevel@tonic-gate } 24457c478bd9Sstevel@tonic-gate return (t->t_cpu); 24467c478bd9Sstevel@tonic-gate } 2447