xref: /illumos-gate/usr/src/uts/common/disp/disp.c (revision eda89462804e5700afce98b28174fa96082df280)
17c478bd9Sstevel@tonic-gate /*
27c478bd9Sstevel@tonic-gate  * CDDL HEADER START
37c478bd9Sstevel@tonic-gate  *
47c478bd9Sstevel@tonic-gate  * The contents of this file are subject to the terms of the
57c478bd9Sstevel@tonic-gate  * Common Development and Distribution License, Version 1.0 only
67c478bd9Sstevel@tonic-gate  * (the "License").  You may not use this file except in compliance
77c478bd9Sstevel@tonic-gate  * with the License.
87c478bd9Sstevel@tonic-gate  *
97c478bd9Sstevel@tonic-gate  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
107c478bd9Sstevel@tonic-gate  * or http://www.opensolaris.org/os/licensing.
117c478bd9Sstevel@tonic-gate  * See the License for the specific language governing permissions
127c478bd9Sstevel@tonic-gate  * and limitations under the License.
137c478bd9Sstevel@tonic-gate  *
147c478bd9Sstevel@tonic-gate  * When distributing Covered Code, include this CDDL HEADER in each
157c478bd9Sstevel@tonic-gate  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
167c478bd9Sstevel@tonic-gate  * If applicable, add the following below this CDDL HEADER, with the
177c478bd9Sstevel@tonic-gate  * fields enclosed by brackets "[]" replaced with your own identifying
187c478bd9Sstevel@tonic-gate  * information: Portions Copyright [yyyy] [name of copyright owner]
197c478bd9Sstevel@tonic-gate  *
207c478bd9Sstevel@tonic-gate  * CDDL HEADER END
217c478bd9Sstevel@tonic-gate  */
227c478bd9Sstevel@tonic-gate /*
237c478bd9Sstevel@tonic-gate  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
247c478bd9Sstevel@tonic-gate  * Use is subject to license terms.
257c478bd9Sstevel@tonic-gate  */
267c478bd9Sstevel@tonic-gate 
277c478bd9Sstevel@tonic-gate /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
287c478bd9Sstevel@tonic-gate /*	  All Rights Reserved  	*/
297c478bd9Sstevel@tonic-gate 
307c478bd9Sstevel@tonic-gate 
317c478bd9Sstevel@tonic-gate #pragma ident	"%Z%%M%	%I%	%E% SMI"	/* from SVr4.0 1.30 */
327c478bd9Sstevel@tonic-gate 
337c478bd9Sstevel@tonic-gate #include <sys/types.h>
347c478bd9Sstevel@tonic-gate #include <sys/param.h>
357c478bd9Sstevel@tonic-gate #include <sys/sysmacros.h>
367c478bd9Sstevel@tonic-gate #include <sys/signal.h>
377c478bd9Sstevel@tonic-gate #include <sys/user.h>
387c478bd9Sstevel@tonic-gate #include <sys/systm.h>
397c478bd9Sstevel@tonic-gate #include <sys/sysinfo.h>
407c478bd9Sstevel@tonic-gate #include <sys/var.h>
417c478bd9Sstevel@tonic-gate #include <sys/errno.h>
427c478bd9Sstevel@tonic-gate #include <sys/cmn_err.h>
437c478bd9Sstevel@tonic-gate #include <sys/debug.h>
447c478bd9Sstevel@tonic-gate #include <sys/inline.h>
457c478bd9Sstevel@tonic-gate #include <sys/disp.h>
467c478bd9Sstevel@tonic-gate #include <sys/class.h>
477c478bd9Sstevel@tonic-gate #include <sys/bitmap.h>
487c478bd9Sstevel@tonic-gate #include <sys/kmem.h>
497c478bd9Sstevel@tonic-gate #include <sys/cpuvar.h>
507c478bd9Sstevel@tonic-gate #include <sys/vtrace.h>
517c478bd9Sstevel@tonic-gate #include <sys/tnf.h>
527c478bd9Sstevel@tonic-gate #include <sys/cpupart.h>
537c478bd9Sstevel@tonic-gate #include <sys/lgrp.h>
547c478bd9Sstevel@tonic-gate #include <sys/chip.h>
557c478bd9Sstevel@tonic-gate #include <sys/schedctl.h>
567c478bd9Sstevel@tonic-gate #include <sys/atomic.h>
577c478bd9Sstevel@tonic-gate #include <sys/dtrace.h>
587c478bd9Sstevel@tonic-gate #include <sys/sdt.h>
597c478bd9Sstevel@tonic-gate 
607c478bd9Sstevel@tonic-gate #include <vm/as.h>
617c478bd9Sstevel@tonic-gate 
627c478bd9Sstevel@tonic-gate #define	BOUND_CPU	0x1
637c478bd9Sstevel@tonic-gate #define	BOUND_PARTITION	0x2
647c478bd9Sstevel@tonic-gate #define	BOUND_INTR	0x4
657c478bd9Sstevel@tonic-gate 
667c478bd9Sstevel@tonic-gate /* Dispatch queue allocation structure and functions */
677c478bd9Sstevel@tonic-gate struct disp_queue_info {
687c478bd9Sstevel@tonic-gate 	disp_t	*dp;
697c478bd9Sstevel@tonic-gate 	dispq_t *olddispq;
707c478bd9Sstevel@tonic-gate 	dispq_t *newdispq;
717c478bd9Sstevel@tonic-gate 	ulong_t	*olddqactmap;
727c478bd9Sstevel@tonic-gate 	ulong_t	*newdqactmap;
737c478bd9Sstevel@tonic-gate 	int	oldnglobpris;
747c478bd9Sstevel@tonic-gate };
757c478bd9Sstevel@tonic-gate static void	disp_dq_alloc(struct disp_queue_info *dptr, int numpris,
767c478bd9Sstevel@tonic-gate     disp_t *dp);
777c478bd9Sstevel@tonic-gate static void	disp_dq_assign(struct disp_queue_info *dptr, int numpris);
787c478bd9Sstevel@tonic-gate static void	disp_dq_free(struct disp_queue_info *dptr);
797c478bd9Sstevel@tonic-gate 
807c478bd9Sstevel@tonic-gate /* platform-specific routine to call when processor is idle */
817c478bd9Sstevel@tonic-gate static void	generic_idle_cpu();
827c478bd9Sstevel@tonic-gate void		(*idle_cpu)() = generic_idle_cpu;
837c478bd9Sstevel@tonic-gate 
847c478bd9Sstevel@tonic-gate /* routines invoked when a CPU enters/exits the idle loop */
857c478bd9Sstevel@tonic-gate static void	idle_enter();
867c478bd9Sstevel@tonic-gate static void	idle_exit();
877c478bd9Sstevel@tonic-gate 
887c478bd9Sstevel@tonic-gate /* platform-specific routine to call when thread is enqueued */
897c478bd9Sstevel@tonic-gate static void	generic_enq_thread(cpu_t *, int);
907c478bd9Sstevel@tonic-gate void		(*disp_enq_thread)(cpu_t *, int) = generic_enq_thread;
917c478bd9Sstevel@tonic-gate 
927c478bd9Sstevel@tonic-gate pri_t	kpreemptpri;	/* priority where kernel preemption applies */
937c478bd9Sstevel@tonic-gate pri_t	upreemptpri = 0; /* priority where normal preemption applies */
947c478bd9Sstevel@tonic-gate pri_t	intr_pri;	/* interrupt thread priority base level */
957c478bd9Sstevel@tonic-gate 
967c478bd9Sstevel@tonic-gate #define	KPQPRI	-1 /* priority where cpu affinity is dropped for kp queue */
977c478bd9Sstevel@tonic-gate pri_t	kpqpri = KPQPRI; /* can be set in /etc/system */
987c478bd9Sstevel@tonic-gate disp_t	cpu0_disp;	/* boot CPU's dispatch queue */
997c478bd9Sstevel@tonic-gate disp_lock_t	swapped_lock;	/* lock swapped threads and swap queue */
1007c478bd9Sstevel@tonic-gate int	nswapped;	/* total number of swapped threads */
1017c478bd9Sstevel@tonic-gate void	disp_swapped_enq(kthread_t *tp);
1027c478bd9Sstevel@tonic-gate static void	disp_swapped_setrun(kthread_t *tp);
1037c478bd9Sstevel@tonic-gate static void	cpu_resched(cpu_t *cp, pri_t tpri);
1047c478bd9Sstevel@tonic-gate 
1057c478bd9Sstevel@tonic-gate /*
1067c478bd9Sstevel@tonic-gate  * If this is set, only interrupt threads will cause kernel preemptions.
1077c478bd9Sstevel@tonic-gate  * This is done by changing the value of kpreemptpri.  kpreemptpri
1087c478bd9Sstevel@tonic-gate  * will either be the max sysclass pri + 1 or the min interrupt pri.
1097c478bd9Sstevel@tonic-gate  */
1107c478bd9Sstevel@tonic-gate int	only_intr_kpreempt;
1117c478bd9Sstevel@tonic-gate 
1127c478bd9Sstevel@tonic-gate extern void set_idle_cpu(int cpun);
1137c478bd9Sstevel@tonic-gate extern void unset_idle_cpu(int cpun);
1147c478bd9Sstevel@tonic-gate static void setkpdq(kthread_t *tp, int borf);
1157c478bd9Sstevel@tonic-gate #define	SETKP_BACK	0
1167c478bd9Sstevel@tonic-gate #define	SETKP_FRONT	1
1177c478bd9Sstevel@tonic-gate /*
1187c478bd9Sstevel@tonic-gate  * Parameter that determines how recently a thread must have run
1197c478bd9Sstevel@tonic-gate  * on the CPU to be considered loosely-bound to that CPU to reduce
1207c478bd9Sstevel@tonic-gate  * cold cache effects.  The interval is in hertz.
1217c478bd9Sstevel@tonic-gate  *
1227c478bd9Sstevel@tonic-gate  * The platform may define a per physical processor adjustment of
1237c478bd9Sstevel@tonic-gate  * this parameter. For efficiency, the effective rechoose interval
1247c478bd9Sstevel@tonic-gate  * (rechoose_interval + per chip adjustment) is maintained in the
1257c478bd9Sstevel@tonic-gate  * cpu structures. See cpu_choose()
1267c478bd9Sstevel@tonic-gate  */
1277c478bd9Sstevel@tonic-gate int	rechoose_interval = RECHOOSE_INTERVAL;
1287c478bd9Sstevel@tonic-gate 
1297c478bd9Sstevel@tonic-gate static cpu_t	*cpu_choose(kthread_t *, pri_t);
1307c478bd9Sstevel@tonic-gate 
1317c478bd9Sstevel@tonic-gate id_t	defaultcid;	/* system "default" class; see dispadmin(1M) */
1327c478bd9Sstevel@tonic-gate 
1337c478bd9Sstevel@tonic-gate disp_lock_t	transition_lock;	/* lock on transitioning threads */
1347c478bd9Sstevel@tonic-gate disp_lock_t	stop_lock;		/* lock on stopped threads */
1357c478bd9Sstevel@tonic-gate 
1367c478bd9Sstevel@tonic-gate static void		cpu_dispqalloc(int numpris);
1377c478bd9Sstevel@tonic-gate 
1387c478bd9Sstevel@tonic-gate static kthread_t	*disp_getwork(cpu_t *to);
1397c478bd9Sstevel@tonic-gate static kthread_t	*disp_getbest(disp_t *from);
1407c478bd9Sstevel@tonic-gate static kthread_t	*disp_ratify(kthread_t *tp, disp_t *kpq);
1417c478bd9Sstevel@tonic-gate 
1427c478bd9Sstevel@tonic-gate void	swtch_to(kthread_t *);
1437c478bd9Sstevel@tonic-gate 
1447c478bd9Sstevel@tonic-gate /*
1457c478bd9Sstevel@tonic-gate  * dispatcher and scheduler initialization
1467c478bd9Sstevel@tonic-gate  */
1477c478bd9Sstevel@tonic-gate 
1487c478bd9Sstevel@tonic-gate /*
1497c478bd9Sstevel@tonic-gate  * disp_setup - Common code to calculate and allocate dispatcher
1507c478bd9Sstevel@tonic-gate  *		variables and structures based on the maximum priority.
1517c478bd9Sstevel@tonic-gate  */
1527c478bd9Sstevel@tonic-gate static void
1537c478bd9Sstevel@tonic-gate disp_setup(pri_t maxglobpri, pri_t oldnglobpris)
1547c478bd9Sstevel@tonic-gate {
1557c478bd9Sstevel@tonic-gate 	pri_t	newnglobpris;
1567c478bd9Sstevel@tonic-gate 
1577c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&cpu_lock));
1587c478bd9Sstevel@tonic-gate 
1597c478bd9Sstevel@tonic-gate 	newnglobpris = maxglobpri + 1 + LOCK_LEVEL;
1607c478bd9Sstevel@tonic-gate 
1617c478bd9Sstevel@tonic-gate 	if (newnglobpris > oldnglobpris) {
1627c478bd9Sstevel@tonic-gate 		/*
1637c478bd9Sstevel@tonic-gate 		 * Allocate new kp queues for each CPU partition.
1647c478bd9Sstevel@tonic-gate 		 */
1657c478bd9Sstevel@tonic-gate 		cpupart_kpqalloc(newnglobpris);
1667c478bd9Sstevel@tonic-gate 
1677c478bd9Sstevel@tonic-gate 		/*
1687c478bd9Sstevel@tonic-gate 		 * Allocate new dispatch queues for each CPU.
1697c478bd9Sstevel@tonic-gate 		 */
1707c478bd9Sstevel@tonic-gate 		cpu_dispqalloc(newnglobpris);
1717c478bd9Sstevel@tonic-gate 
1727c478bd9Sstevel@tonic-gate 		/*
1737c478bd9Sstevel@tonic-gate 		 * compute new interrupt thread base priority
1747c478bd9Sstevel@tonic-gate 		 */
1757c478bd9Sstevel@tonic-gate 		intr_pri = maxglobpri;
1767c478bd9Sstevel@tonic-gate 		if (only_intr_kpreempt) {
1777c478bd9Sstevel@tonic-gate 			kpreemptpri = intr_pri + 1;
1787c478bd9Sstevel@tonic-gate 			if (kpqpri == KPQPRI)
1797c478bd9Sstevel@tonic-gate 				kpqpri = kpreemptpri;
1807c478bd9Sstevel@tonic-gate 		}
1817c478bd9Sstevel@tonic-gate 		v.v_nglobpris = newnglobpris;
1827c478bd9Sstevel@tonic-gate 	}
1837c478bd9Sstevel@tonic-gate }
1847c478bd9Sstevel@tonic-gate 
1857c478bd9Sstevel@tonic-gate /*
1867c478bd9Sstevel@tonic-gate  * dispinit - Called to initialize all loaded classes and the
1877c478bd9Sstevel@tonic-gate  *	      dispatcher framework.
1887c478bd9Sstevel@tonic-gate  */
1897c478bd9Sstevel@tonic-gate void
1907c478bd9Sstevel@tonic-gate dispinit(void)
1917c478bd9Sstevel@tonic-gate {
1927c478bd9Sstevel@tonic-gate 	id_t	cid;
1937c478bd9Sstevel@tonic-gate 	pri_t	maxglobpri;
1947c478bd9Sstevel@tonic-gate 	pri_t	cl_maxglobpri;
1957c478bd9Sstevel@tonic-gate 
1967c478bd9Sstevel@tonic-gate 	maxglobpri = -1;
1977c478bd9Sstevel@tonic-gate 
1987c478bd9Sstevel@tonic-gate 	/*
1997c478bd9Sstevel@tonic-gate 	 * Initialize transition lock, which will always be set.
2007c478bd9Sstevel@tonic-gate 	 */
2017c478bd9Sstevel@tonic-gate 	DISP_LOCK_INIT(&transition_lock);
2027c478bd9Sstevel@tonic-gate 	disp_lock_enter_high(&transition_lock);
2037c478bd9Sstevel@tonic-gate 	DISP_LOCK_INIT(&stop_lock);
2047c478bd9Sstevel@tonic-gate 
2057c478bd9Sstevel@tonic-gate 	mutex_enter(&cpu_lock);
2067c478bd9Sstevel@tonic-gate 	CPU->cpu_disp->disp_maxrunpri = -1;
2077c478bd9Sstevel@tonic-gate 	CPU->cpu_disp->disp_max_unbound_pri = -1;
2087c478bd9Sstevel@tonic-gate 	/*
2097c478bd9Sstevel@tonic-gate 	 * Initialize the default CPU partition.
2107c478bd9Sstevel@tonic-gate 	 */
2117c478bd9Sstevel@tonic-gate 	cpupart_initialize_default();
2127c478bd9Sstevel@tonic-gate 	/*
2137c478bd9Sstevel@tonic-gate 	 * Call the class specific initialization functions for
2147c478bd9Sstevel@tonic-gate 	 * all pre-installed schedulers.
2157c478bd9Sstevel@tonic-gate 	 *
2167c478bd9Sstevel@tonic-gate 	 * We pass the size of a class specific parameter
2177c478bd9Sstevel@tonic-gate 	 * buffer to each of the initialization functions
2187c478bd9Sstevel@tonic-gate 	 * to try to catch problems with backward compatibility
2197c478bd9Sstevel@tonic-gate 	 * of class modules.
2207c478bd9Sstevel@tonic-gate 	 *
2217c478bd9Sstevel@tonic-gate 	 * For example a new class module running on an old system
2227c478bd9Sstevel@tonic-gate 	 * which didn't provide sufficiently large parameter buffers
2237c478bd9Sstevel@tonic-gate 	 * would be bad news. Class initialization modules can check for
2247c478bd9Sstevel@tonic-gate 	 * this and take action if they detect a problem.
2257c478bd9Sstevel@tonic-gate 	 */
2267c478bd9Sstevel@tonic-gate 
2277c478bd9Sstevel@tonic-gate 	for (cid = 0; cid < nclass; cid++) {
2287c478bd9Sstevel@tonic-gate 		sclass_t	*sc;
2297c478bd9Sstevel@tonic-gate 
2307c478bd9Sstevel@tonic-gate 		sc = &sclass[cid];
2317c478bd9Sstevel@tonic-gate 		if (SCHED_INSTALLED(sc)) {
2327c478bd9Sstevel@tonic-gate 			cl_maxglobpri = sc->cl_init(cid, PC_CLPARMSZ,
2337c478bd9Sstevel@tonic-gate 			    &sc->cl_funcs);
2347c478bd9Sstevel@tonic-gate 			if (cl_maxglobpri > maxglobpri)
2357c478bd9Sstevel@tonic-gate 				maxglobpri = cl_maxglobpri;
2367c478bd9Sstevel@tonic-gate 		}
2377c478bd9Sstevel@tonic-gate 	}
2387c478bd9Sstevel@tonic-gate 	kpreemptpri = (pri_t)v.v_maxsyspri + 1;
2397c478bd9Sstevel@tonic-gate 	if (kpqpri == KPQPRI)
2407c478bd9Sstevel@tonic-gate 		kpqpri = kpreemptpri;
2417c478bd9Sstevel@tonic-gate 
2427c478bd9Sstevel@tonic-gate 	ASSERT(maxglobpri >= 0);
2437c478bd9Sstevel@tonic-gate 	disp_setup(maxglobpri, 0);
2447c478bd9Sstevel@tonic-gate 
2457c478bd9Sstevel@tonic-gate 	mutex_exit(&cpu_lock);
2467c478bd9Sstevel@tonic-gate 
2477c478bd9Sstevel@tonic-gate 	/*
2487c478bd9Sstevel@tonic-gate 	 * Get the default class ID; this may be later modified via
2497c478bd9Sstevel@tonic-gate 	 * dispadmin(1M).  This will load the class (normally TS) and that will
2507c478bd9Sstevel@tonic-gate 	 * call disp_add(), which is why we had to drop cpu_lock first.
2517c478bd9Sstevel@tonic-gate 	 */
2527c478bd9Sstevel@tonic-gate 	if (getcid(defaultclass, &defaultcid) != 0) {
2537c478bd9Sstevel@tonic-gate 		cmn_err(CE_PANIC, "Couldn't load default scheduling class '%s'",
2547c478bd9Sstevel@tonic-gate 		    defaultclass);
2557c478bd9Sstevel@tonic-gate 	}
2567c478bd9Sstevel@tonic-gate }
2577c478bd9Sstevel@tonic-gate 
2587c478bd9Sstevel@tonic-gate /*
2597c478bd9Sstevel@tonic-gate  * disp_add - Called with class pointer to initialize the dispatcher
2607c478bd9Sstevel@tonic-gate  *	      for a newly loaded class.
2617c478bd9Sstevel@tonic-gate  */
2627c478bd9Sstevel@tonic-gate void
2637c478bd9Sstevel@tonic-gate disp_add(sclass_t *clp)
2647c478bd9Sstevel@tonic-gate {
2657c478bd9Sstevel@tonic-gate 	pri_t	maxglobpri;
2667c478bd9Sstevel@tonic-gate 	pri_t	cl_maxglobpri;
2677c478bd9Sstevel@tonic-gate 
2687c478bd9Sstevel@tonic-gate 	mutex_enter(&cpu_lock);
2697c478bd9Sstevel@tonic-gate 	/*
2707c478bd9Sstevel@tonic-gate 	 * Initialize the scheduler class.
2717c478bd9Sstevel@tonic-gate 	 */
2727c478bd9Sstevel@tonic-gate 	maxglobpri = (pri_t)(v.v_nglobpris - LOCK_LEVEL - 1);
2737c478bd9Sstevel@tonic-gate 	cl_maxglobpri = clp->cl_init(clp - sclass, PC_CLPARMSZ, &clp->cl_funcs);
2747c478bd9Sstevel@tonic-gate 	if (cl_maxglobpri > maxglobpri)
2757c478bd9Sstevel@tonic-gate 		maxglobpri = cl_maxglobpri;
2767c478bd9Sstevel@tonic-gate 
2777c478bd9Sstevel@tonic-gate 	/*
2787c478bd9Sstevel@tonic-gate 	 * Save old queue information.  Since we're initializing a
2797c478bd9Sstevel@tonic-gate 	 * new scheduling class which has just been loaded, then
2807c478bd9Sstevel@tonic-gate 	 * the size of the dispq may have changed.  We need to handle
2817c478bd9Sstevel@tonic-gate 	 * that here.
2827c478bd9Sstevel@tonic-gate 	 */
2837c478bd9Sstevel@tonic-gate 	disp_setup(maxglobpri, v.v_nglobpris);
2847c478bd9Sstevel@tonic-gate 
2857c478bd9Sstevel@tonic-gate 	mutex_exit(&cpu_lock);
2867c478bd9Sstevel@tonic-gate }
2877c478bd9Sstevel@tonic-gate 
2887c478bd9Sstevel@tonic-gate 
2897c478bd9Sstevel@tonic-gate /*
2907c478bd9Sstevel@tonic-gate  * For each CPU, allocate new dispatch queues
2917c478bd9Sstevel@tonic-gate  * with the stated number of priorities.
2927c478bd9Sstevel@tonic-gate  */
2937c478bd9Sstevel@tonic-gate static void
2947c478bd9Sstevel@tonic-gate cpu_dispqalloc(int numpris)
2957c478bd9Sstevel@tonic-gate {
2967c478bd9Sstevel@tonic-gate 	cpu_t	*cpup;
2977c478bd9Sstevel@tonic-gate 	struct disp_queue_info	*disp_mem;
2987c478bd9Sstevel@tonic-gate 	int i, num;
2997c478bd9Sstevel@tonic-gate 
3007c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&cpu_lock));
3017c478bd9Sstevel@tonic-gate 
3027c478bd9Sstevel@tonic-gate 	disp_mem = kmem_zalloc(NCPU *
3037c478bd9Sstevel@tonic-gate 	    sizeof (struct disp_queue_info), KM_SLEEP);
3047c478bd9Sstevel@tonic-gate 
3057c478bd9Sstevel@tonic-gate 	/*
3067c478bd9Sstevel@tonic-gate 	 * This routine must allocate all of the memory before stopping
3077c478bd9Sstevel@tonic-gate 	 * the cpus because it must not sleep in kmem_alloc while the
3087c478bd9Sstevel@tonic-gate 	 * CPUs are stopped.  Locks they hold will not be freed until they
3097c478bd9Sstevel@tonic-gate 	 * are restarted.
3107c478bd9Sstevel@tonic-gate 	 */
3117c478bd9Sstevel@tonic-gate 	i = 0;
3127c478bd9Sstevel@tonic-gate 	cpup = cpu_list;
3137c478bd9Sstevel@tonic-gate 	do {
3147c478bd9Sstevel@tonic-gate 		disp_dq_alloc(&disp_mem[i], numpris, cpup->cpu_disp);
3157c478bd9Sstevel@tonic-gate 		i++;
3167c478bd9Sstevel@tonic-gate 		cpup = cpup->cpu_next;
3177c478bd9Sstevel@tonic-gate 	} while (cpup != cpu_list);
3187c478bd9Sstevel@tonic-gate 	num = i;
3197c478bd9Sstevel@tonic-gate 
3207c478bd9Sstevel@tonic-gate 	pause_cpus(NULL);
3217c478bd9Sstevel@tonic-gate 	for (i = 0; i < num; i++)
3227c478bd9Sstevel@tonic-gate 		disp_dq_assign(&disp_mem[i], numpris);
3237c478bd9Sstevel@tonic-gate 	start_cpus();
3247c478bd9Sstevel@tonic-gate 
3257c478bd9Sstevel@tonic-gate 	/*
3267c478bd9Sstevel@tonic-gate 	 * I must free all of the memory after starting the cpus because
3277c478bd9Sstevel@tonic-gate 	 * I can not risk sleeping in kmem_free while the cpus are stopped.
3287c478bd9Sstevel@tonic-gate 	 */
3297c478bd9Sstevel@tonic-gate 	for (i = 0; i < num; i++)
3307c478bd9Sstevel@tonic-gate 		disp_dq_free(&disp_mem[i]);
3317c478bd9Sstevel@tonic-gate 
3327c478bd9Sstevel@tonic-gate 	kmem_free(disp_mem, NCPU * sizeof (struct disp_queue_info));
3337c478bd9Sstevel@tonic-gate }
3347c478bd9Sstevel@tonic-gate 
3357c478bd9Sstevel@tonic-gate static void
3367c478bd9Sstevel@tonic-gate disp_dq_alloc(struct disp_queue_info *dptr, int numpris, disp_t	*dp)
3377c478bd9Sstevel@tonic-gate {
3387c478bd9Sstevel@tonic-gate 	dptr->newdispq = kmem_zalloc(numpris * sizeof (dispq_t), KM_SLEEP);
3397c478bd9Sstevel@tonic-gate 	dptr->newdqactmap = kmem_zalloc(((numpris / BT_NBIPUL) + 1) *
3407c478bd9Sstevel@tonic-gate 	    sizeof (long), KM_SLEEP);
3417c478bd9Sstevel@tonic-gate 	dptr->dp = dp;
3427c478bd9Sstevel@tonic-gate }
3437c478bd9Sstevel@tonic-gate 
3447c478bd9Sstevel@tonic-gate static void
3457c478bd9Sstevel@tonic-gate disp_dq_assign(struct disp_queue_info *dptr, int numpris)
3467c478bd9Sstevel@tonic-gate {
3477c478bd9Sstevel@tonic-gate 	disp_t	*dp;
3487c478bd9Sstevel@tonic-gate 
3497c478bd9Sstevel@tonic-gate 	dp = dptr->dp;
3507c478bd9Sstevel@tonic-gate 	dptr->olddispq = dp->disp_q;
3517c478bd9Sstevel@tonic-gate 	dptr->olddqactmap = dp->disp_qactmap;
3527c478bd9Sstevel@tonic-gate 	dptr->oldnglobpris = dp->disp_npri;
3537c478bd9Sstevel@tonic-gate 
3547c478bd9Sstevel@tonic-gate 	ASSERT(dptr->oldnglobpris < numpris);
3557c478bd9Sstevel@tonic-gate 
3567c478bd9Sstevel@tonic-gate 	if (dptr->olddispq != NULL) {
3577c478bd9Sstevel@tonic-gate 		/*
3587c478bd9Sstevel@tonic-gate 		 * Use kcopy because bcopy is platform-specific
3597c478bd9Sstevel@tonic-gate 		 * and could block while we might have paused the cpus.
3607c478bd9Sstevel@tonic-gate 		 */
3617c478bd9Sstevel@tonic-gate 		(void) kcopy(dptr->olddispq, dptr->newdispq,
3627c478bd9Sstevel@tonic-gate 		    dptr->oldnglobpris * sizeof (dispq_t));
3637c478bd9Sstevel@tonic-gate 		(void) kcopy(dptr->olddqactmap, dptr->newdqactmap,
3647c478bd9Sstevel@tonic-gate 		    ((dptr->oldnglobpris / BT_NBIPUL) + 1) *
3657c478bd9Sstevel@tonic-gate 		    sizeof (long));
3667c478bd9Sstevel@tonic-gate 	}
3677c478bd9Sstevel@tonic-gate 	dp->disp_q = dptr->newdispq;
3687c478bd9Sstevel@tonic-gate 	dp->disp_qactmap = dptr->newdqactmap;
3697c478bd9Sstevel@tonic-gate 	dp->disp_q_limit = &dptr->newdispq[numpris];
3707c478bd9Sstevel@tonic-gate 	dp->disp_npri = numpris;
3717c478bd9Sstevel@tonic-gate }
3727c478bd9Sstevel@tonic-gate 
3737c478bd9Sstevel@tonic-gate static void
3747c478bd9Sstevel@tonic-gate disp_dq_free(struct disp_queue_info *dptr)
3757c478bd9Sstevel@tonic-gate {
3767c478bd9Sstevel@tonic-gate 	if (dptr->olddispq != NULL)
3777c478bd9Sstevel@tonic-gate 		kmem_free(dptr->olddispq,
3787c478bd9Sstevel@tonic-gate 		    dptr->oldnglobpris * sizeof (dispq_t));
3797c478bd9Sstevel@tonic-gate 	if (dptr->olddqactmap != NULL)
3807c478bd9Sstevel@tonic-gate 		kmem_free(dptr->olddqactmap,
3817c478bd9Sstevel@tonic-gate 		    ((dptr->oldnglobpris / BT_NBIPUL) + 1) * sizeof (long));
3827c478bd9Sstevel@tonic-gate }
3837c478bd9Sstevel@tonic-gate 
3847c478bd9Sstevel@tonic-gate /*
3857c478bd9Sstevel@tonic-gate  * For a newly created CPU, initialize the dispatch queue.
3867c478bd9Sstevel@tonic-gate  * This is called before the CPU is known through cpu[] or on any lists.
3877c478bd9Sstevel@tonic-gate  */
3887c478bd9Sstevel@tonic-gate void
3897c478bd9Sstevel@tonic-gate disp_cpu_init(cpu_t *cp)
3907c478bd9Sstevel@tonic-gate {
3917c478bd9Sstevel@tonic-gate 	disp_t	*dp;
3927c478bd9Sstevel@tonic-gate 	dispq_t	*newdispq;
3937c478bd9Sstevel@tonic-gate 	ulong_t	*newdqactmap;
3947c478bd9Sstevel@tonic-gate 
3957c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&cpu_lock));	/* protect dispatcher queue sizes */
3967c478bd9Sstevel@tonic-gate 
3977c478bd9Sstevel@tonic-gate 	if (cp == cpu0_disp.disp_cpu)
3987c478bd9Sstevel@tonic-gate 		dp = &cpu0_disp;
3997c478bd9Sstevel@tonic-gate 	else
4007c478bd9Sstevel@tonic-gate 		dp = kmem_alloc(sizeof (disp_t), KM_SLEEP);
4017c478bd9Sstevel@tonic-gate 	bzero(dp, sizeof (disp_t));
4027c478bd9Sstevel@tonic-gate 	cp->cpu_disp = dp;
4037c478bd9Sstevel@tonic-gate 	dp->disp_cpu = cp;
4047c478bd9Sstevel@tonic-gate 	dp->disp_maxrunpri = -1;
4057c478bd9Sstevel@tonic-gate 	dp->disp_max_unbound_pri = -1;
4067c478bd9Sstevel@tonic-gate 	DISP_LOCK_INIT(&cp->cpu_thread_lock);
4077c478bd9Sstevel@tonic-gate 	/*
4087c478bd9Sstevel@tonic-gate 	 * Allocate memory for the dispatcher queue headers
4097c478bd9Sstevel@tonic-gate 	 * and the active queue bitmap.
4107c478bd9Sstevel@tonic-gate 	 */
4117c478bd9Sstevel@tonic-gate 	newdispq = kmem_zalloc(v.v_nglobpris * sizeof (dispq_t), KM_SLEEP);
4127c478bd9Sstevel@tonic-gate 	newdqactmap = kmem_zalloc(((v.v_nglobpris / BT_NBIPUL) + 1) *
4137c478bd9Sstevel@tonic-gate 	    sizeof (long), KM_SLEEP);
4147c478bd9Sstevel@tonic-gate 	dp->disp_q = newdispq;
4157c478bd9Sstevel@tonic-gate 	dp->disp_qactmap = newdqactmap;
4167c478bd9Sstevel@tonic-gate 	dp->disp_q_limit = &newdispq[v.v_nglobpris];
4177c478bd9Sstevel@tonic-gate 	dp->disp_npri = v.v_nglobpris;
4187c478bd9Sstevel@tonic-gate }
4197c478bd9Sstevel@tonic-gate 
4207c478bd9Sstevel@tonic-gate void
4217c478bd9Sstevel@tonic-gate disp_cpu_fini(cpu_t *cp)
4227c478bd9Sstevel@tonic-gate {
4237c478bd9Sstevel@tonic-gate 	ASSERT(MUTEX_HELD(&cpu_lock));
4247c478bd9Sstevel@tonic-gate 
4257c478bd9Sstevel@tonic-gate 	disp_kp_free(cp->cpu_disp);
4267c478bd9Sstevel@tonic-gate 	if (cp->cpu_disp != &cpu0_disp)
4277c478bd9Sstevel@tonic-gate 		kmem_free(cp->cpu_disp, sizeof (disp_t));
4287c478bd9Sstevel@tonic-gate }
4297c478bd9Sstevel@tonic-gate 
4307c478bd9Sstevel@tonic-gate /*
4317c478bd9Sstevel@tonic-gate  * Allocate new, larger kpreempt dispatch queue to replace the old one.
4327c478bd9Sstevel@tonic-gate  */
4337c478bd9Sstevel@tonic-gate void
4347c478bd9Sstevel@tonic-gate disp_kp_alloc(disp_t *dq, pri_t npri)
4357c478bd9Sstevel@tonic-gate {
4367c478bd9Sstevel@tonic-gate 	struct disp_queue_info	mem_info;
4377c478bd9Sstevel@tonic-gate 
4387c478bd9Sstevel@tonic-gate 	if (npri > dq->disp_npri) {
4397c478bd9Sstevel@tonic-gate 		/*
4407c478bd9Sstevel@tonic-gate 		 * Allocate memory for the new array.
4417c478bd9Sstevel@tonic-gate 		 */
4427c478bd9Sstevel@tonic-gate 		disp_dq_alloc(&mem_info, npri, dq);
4437c478bd9Sstevel@tonic-gate 
4447c478bd9Sstevel@tonic-gate 		/*
4457c478bd9Sstevel@tonic-gate 		 * We need to copy the old structures to the new
4467c478bd9Sstevel@tonic-gate 		 * and free the old.
4477c478bd9Sstevel@tonic-gate 		 */
4487c478bd9Sstevel@tonic-gate 		disp_dq_assign(&mem_info, npri);
4497c478bd9Sstevel@tonic-gate 		disp_dq_free(&mem_info);
4507c478bd9Sstevel@tonic-gate 	}
4517c478bd9Sstevel@tonic-gate }
4527c478bd9Sstevel@tonic-gate 
4537c478bd9Sstevel@tonic-gate /*
4547c478bd9Sstevel@tonic-gate  * Free dispatch queue.
4557c478bd9Sstevel@tonic-gate  * Used for the kpreempt queues for a removed CPU partition and
4567c478bd9Sstevel@tonic-gate  * for the per-CPU queues of deleted CPUs.
4577c478bd9Sstevel@tonic-gate  */
4587c478bd9Sstevel@tonic-gate void
4597c478bd9Sstevel@tonic-gate disp_kp_free(disp_t *dq)
4607c478bd9Sstevel@tonic-gate {
4617c478bd9Sstevel@tonic-gate 	struct disp_queue_info	mem_info;
4627c478bd9Sstevel@tonic-gate 
4637c478bd9Sstevel@tonic-gate 	mem_info.olddispq = dq->disp_q;
4647c478bd9Sstevel@tonic-gate 	mem_info.olddqactmap = dq->disp_qactmap;
4657c478bd9Sstevel@tonic-gate 	mem_info.oldnglobpris = dq->disp_npri;
4667c478bd9Sstevel@tonic-gate 	disp_dq_free(&mem_info);
4677c478bd9Sstevel@tonic-gate }
4687c478bd9Sstevel@tonic-gate 
4697c478bd9Sstevel@tonic-gate /*
4707c478bd9Sstevel@tonic-gate  * End dispatcher and scheduler initialization.
4717c478bd9Sstevel@tonic-gate  */
4727c478bd9Sstevel@tonic-gate 
4737c478bd9Sstevel@tonic-gate /*
4747c478bd9Sstevel@tonic-gate  * See if there's anything to do other than remain idle.
4757c478bd9Sstevel@tonic-gate  * Return non-zero if there is.
4767c478bd9Sstevel@tonic-gate  *
4777c478bd9Sstevel@tonic-gate  * This function must be called with high spl, or with
4787c478bd9Sstevel@tonic-gate  * kernel preemption disabled to prevent the partition's
4797c478bd9Sstevel@tonic-gate  * active cpu list from changing while being traversed.
4807c478bd9Sstevel@tonic-gate  *
4817c478bd9Sstevel@tonic-gate  */
4827c478bd9Sstevel@tonic-gate int
4837c478bd9Sstevel@tonic-gate disp_anywork(void)
4847c478bd9Sstevel@tonic-gate {
4857c478bd9Sstevel@tonic-gate 	cpu_t   *cp = CPU;
4867c478bd9Sstevel@tonic-gate 	cpu_t   *ocp;
4877c478bd9Sstevel@tonic-gate 
4887c478bd9Sstevel@tonic-gate 	if (cp->cpu_disp->disp_nrunnable != 0)
4897c478bd9Sstevel@tonic-gate 		return (1);
4907c478bd9Sstevel@tonic-gate 
4917c478bd9Sstevel@tonic-gate 	if (!(cp->cpu_flags & CPU_OFFLINE)) {
4927c478bd9Sstevel@tonic-gate 		if (CP_MAXRUNPRI(cp->cpu_part) >= 0)
4937c478bd9Sstevel@tonic-gate 			return (1);
4947c478bd9Sstevel@tonic-gate 
4957c478bd9Sstevel@tonic-gate 		/*
4967c478bd9Sstevel@tonic-gate 		 * Work can be taken from another CPU if:
4977c478bd9Sstevel@tonic-gate 		 *	- There is unbound work on the run queue
4987c478bd9Sstevel@tonic-gate 		 *	- That work isn't a thread undergoing a
4997c478bd9Sstevel@tonic-gate 		 *	- context switch on an otherwise empty queue.
5007c478bd9Sstevel@tonic-gate 		 *	- The CPU isn't running the idle loop.
5017c478bd9Sstevel@tonic-gate 		 */
5027c478bd9Sstevel@tonic-gate 		for (ocp = cp->cpu_next_part; ocp != cp;
5037c478bd9Sstevel@tonic-gate 		    ocp = ocp->cpu_next_part) {
5047c478bd9Sstevel@tonic-gate 			ASSERT(CPU_ACTIVE(ocp));
5057c478bd9Sstevel@tonic-gate 
5067c478bd9Sstevel@tonic-gate 			if (ocp->cpu_disp->disp_max_unbound_pri != -1 &&
5077c478bd9Sstevel@tonic-gate 			    !((ocp->cpu_disp_flags & CPU_DISP_DONTSTEAL) &&
5087c478bd9Sstevel@tonic-gate 			    ocp->cpu_disp->disp_nrunnable == 1) &&
5097c478bd9Sstevel@tonic-gate 			    ocp->cpu_dispatch_pri != -1)
5107c478bd9Sstevel@tonic-gate 				return (1);
5117c478bd9Sstevel@tonic-gate 		}
5127c478bd9Sstevel@tonic-gate 	}
5137c478bd9Sstevel@tonic-gate 	return (0);
5147c478bd9Sstevel@tonic-gate }
5157c478bd9Sstevel@tonic-gate 
5167c478bd9Sstevel@tonic-gate /*
5177c478bd9Sstevel@tonic-gate  * Called when CPU enters the idle loop
5187c478bd9Sstevel@tonic-gate  */
5197c478bd9Sstevel@tonic-gate static void
5207c478bd9Sstevel@tonic-gate idle_enter()
5217c478bd9Sstevel@tonic-gate {
5227c478bd9Sstevel@tonic-gate 	cpu_t		*cp = CPU;
5237c478bd9Sstevel@tonic-gate 
524*eda89462Sesolom 	new_cpu_mstate(CMS_IDLE, gethrtime_unscaled());
5257c478bd9Sstevel@tonic-gate 	CPU_STATS_ADDQ(cp, sys, idlethread, 1);
5267c478bd9Sstevel@tonic-gate 	set_idle_cpu(cp->cpu_id);	/* arch-dependent hook */
5277c478bd9Sstevel@tonic-gate }
5287c478bd9Sstevel@tonic-gate 
5297c478bd9Sstevel@tonic-gate /*
5307c478bd9Sstevel@tonic-gate  * Called when CPU exits the idle loop
5317c478bd9Sstevel@tonic-gate  */
5327c478bd9Sstevel@tonic-gate static void
5337c478bd9Sstevel@tonic-gate idle_exit()
5347c478bd9Sstevel@tonic-gate {
5357c478bd9Sstevel@tonic-gate 	cpu_t		*cp = CPU;
5367c478bd9Sstevel@tonic-gate 
537*eda89462Sesolom 	new_cpu_mstate(CMS_SYSTEM, gethrtime_unscaled());
5387c478bd9Sstevel@tonic-gate 	unset_idle_cpu(cp->cpu_id);	/* arch-dependent hook */
5397c478bd9Sstevel@tonic-gate }
5407c478bd9Sstevel@tonic-gate 
5417c478bd9Sstevel@tonic-gate /*
5427c478bd9Sstevel@tonic-gate  * Idle loop.
5437c478bd9Sstevel@tonic-gate  */
5447c478bd9Sstevel@tonic-gate void
5457c478bd9Sstevel@tonic-gate idle()
5467c478bd9Sstevel@tonic-gate {
5477c478bd9Sstevel@tonic-gate 	struct cpu	*cp = CPU;		/* pointer to this CPU */
5487c478bd9Sstevel@tonic-gate 	kthread_t	*t;			/* taken thread */
5497c478bd9Sstevel@tonic-gate 
5507c478bd9Sstevel@tonic-gate 	idle_enter();
5517c478bd9Sstevel@tonic-gate 
5527c478bd9Sstevel@tonic-gate 	/*
5537c478bd9Sstevel@tonic-gate 	 * Uniprocessor version of idle loop.
5547c478bd9Sstevel@tonic-gate 	 * Do this until notified that we're on an actual multiprocessor.
5557c478bd9Sstevel@tonic-gate 	 */
5567c478bd9Sstevel@tonic-gate 	while (ncpus == 1) {
5577c478bd9Sstevel@tonic-gate 		if (cp->cpu_disp->disp_nrunnable == 0) {
5587c478bd9Sstevel@tonic-gate 			(*idle_cpu)();
5597c478bd9Sstevel@tonic-gate 			continue;
5607c478bd9Sstevel@tonic-gate 		}
5617c478bd9Sstevel@tonic-gate 		idle_exit();
5627c478bd9Sstevel@tonic-gate 		swtch();
5637c478bd9Sstevel@tonic-gate 
5647c478bd9Sstevel@tonic-gate 		idle_enter(); /* returned from swtch */
5657c478bd9Sstevel@tonic-gate 	}
5667c478bd9Sstevel@tonic-gate 
5677c478bd9Sstevel@tonic-gate 	/*
5687c478bd9Sstevel@tonic-gate 	 * Multiprocessor idle loop.
5697c478bd9Sstevel@tonic-gate 	 */
5707c478bd9Sstevel@tonic-gate 	for (;;) {
5717c478bd9Sstevel@tonic-gate 		/*
5727c478bd9Sstevel@tonic-gate 		 * If CPU is completely quiesced by p_online(2), just wait
5737c478bd9Sstevel@tonic-gate 		 * here with minimal bus traffic until put online.
5747c478bd9Sstevel@tonic-gate 		 */
5757c478bd9Sstevel@tonic-gate 		while (cp->cpu_flags & CPU_QUIESCED)
5767c478bd9Sstevel@tonic-gate 			(*idle_cpu)();
5777c478bd9Sstevel@tonic-gate 
5787c478bd9Sstevel@tonic-gate 		if (cp->cpu_disp->disp_nrunnable != 0) {
5797c478bd9Sstevel@tonic-gate 			idle_exit();
5807c478bd9Sstevel@tonic-gate 			swtch();
5817c478bd9Sstevel@tonic-gate 		} else {
5827c478bd9Sstevel@tonic-gate 			if (cp->cpu_flags & CPU_OFFLINE)
5837c478bd9Sstevel@tonic-gate 				continue;
5847c478bd9Sstevel@tonic-gate 			if ((t = disp_getwork(cp)) == NULL) {
5857c478bd9Sstevel@tonic-gate 				if (cp->cpu_chosen_level != -1) {
5867c478bd9Sstevel@tonic-gate 					disp_t *dp = cp->cpu_disp;
5877c478bd9Sstevel@tonic-gate 					disp_t *kpq;
5887c478bd9Sstevel@tonic-gate 
5897c478bd9Sstevel@tonic-gate 					disp_lock_enter(&dp->disp_lock);
5907c478bd9Sstevel@tonic-gate 					/*
5917c478bd9Sstevel@tonic-gate 					 * Set kpq under lock to prevent
5927c478bd9Sstevel@tonic-gate 					 * migration between partitions.
5937c478bd9Sstevel@tonic-gate 					 */
5947c478bd9Sstevel@tonic-gate 					kpq = &cp->cpu_part->cp_kp_queue;
5957c478bd9Sstevel@tonic-gate 					if (kpq->disp_maxrunpri == -1)
5967c478bd9Sstevel@tonic-gate 						cp->cpu_chosen_level = -1;
5977c478bd9Sstevel@tonic-gate 					disp_lock_exit(&dp->disp_lock);
5987c478bd9Sstevel@tonic-gate 				}
5997c478bd9Sstevel@tonic-gate 				(*idle_cpu)();
6007c478bd9Sstevel@tonic-gate 				continue;
6017c478bd9Sstevel@tonic-gate 			}
6027c478bd9Sstevel@tonic-gate 			idle_exit();
6037c478bd9Sstevel@tonic-gate 			restore_mstate(t);
6047c478bd9Sstevel@tonic-gate 			swtch_to(t);
6057c478bd9Sstevel@tonic-gate 		}
6067c478bd9Sstevel@tonic-gate 		idle_enter(); /* returned from swtch/swtch_to */
6077c478bd9Sstevel@tonic-gate 	}
6087c478bd9Sstevel@tonic-gate }
6097c478bd9Sstevel@tonic-gate 
6107c478bd9Sstevel@tonic-gate 
6117c478bd9Sstevel@tonic-gate /*
6127c478bd9Sstevel@tonic-gate  * Preempt the currently running thread in favor of the highest
6137c478bd9Sstevel@tonic-gate  * priority thread.  The class of the current thread controls
6147c478bd9Sstevel@tonic-gate  * where it goes on the dispatcher queues. If panicking, turn
6157c478bd9Sstevel@tonic-gate  * preemption off.
6167c478bd9Sstevel@tonic-gate  */
6177c478bd9Sstevel@tonic-gate void
6187c478bd9Sstevel@tonic-gate preempt()
6197c478bd9Sstevel@tonic-gate {
6207c478bd9Sstevel@tonic-gate 	kthread_t 	*t = curthread;
6217c478bd9Sstevel@tonic-gate 	klwp_t 		*lwp = ttolwp(curthread);
6227c478bd9Sstevel@tonic-gate 
6237c478bd9Sstevel@tonic-gate 	if (panicstr)
6247c478bd9Sstevel@tonic-gate 		return;
6257c478bd9Sstevel@tonic-gate 
6267c478bd9Sstevel@tonic-gate 	TRACE_0(TR_FAC_DISP, TR_PREEMPT_START, "preempt_start");
6277c478bd9Sstevel@tonic-gate 
6287c478bd9Sstevel@tonic-gate 	thread_lock(t);
6297c478bd9Sstevel@tonic-gate 
6307c478bd9Sstevel@tonic-gate 	if (t->t_state != TS_ONPROC || t->t_disp_queue != CPU->cpu_disp) {
6317c478bd9Sstevel@tonic-gate 		/*
6327c478bd9Sstevel@tonic-gate 		 * this thread has already been chosen to be run on
6337c478bd9Sstevel@tonic-gate 		 * another CPU. Clear kprunrun on this CPU since we're
6347c478bd9Sstevel@tonic-gate 		 * already headed for swtch().
6357c478bd9Sstevel@tonic-gate 		 */
6367c478bd9Sstevel@tonic-gate 		CPU->cpu_kprunrun = 0;
6377c478bd9Sstevel@tonic-gate 		thread_unlock_nopreempt(t);
6387c478bd9Sstevel@tonic-gate 		TRACE_0(TR_FAC_DISP, TR_PREEMPT_END, "preempt_end");
6397c478bd9Sstevel@tonic-gate 	} else {
6407c478bd9Sstevel@tonic-gate 		if (lwp != NULL)
6417c478bd9Sstevel@tonic-gate 			lwp->lwp_ru.nivcsw++;
6427c478bd9Sstevel@tonic-gate 		CPU_STATS_ADDQ(CPU, sys, inv_swtch, 1);
6437c478bd9Sstevel@tonic-gate 		THREAD_TRANSITION(t);
6447c478bd9Sstevel@tonic-gate 		CL_PREEMPT(t);
6457c478bd9Sstevel@tonic-gate 		DTRACE_SCHED(preempt);
6467c478bd9Sstevel@tonic-gate 		thread_unlock_nopreempt(t);
6477c478bd9Sstevel@tonic-gate 
6487c478bd9Sstevel@tonic-gate 		TRACE_0(TR_FAC_DISP, TR_PREEMPT_END, "preempt_end");
6497c478bd9Sstevel@tonic-gate 
6507c478bd9Sstevel@tonic-gate 		swtch();		/* clears CPU->cpu_runrun via disp() */
6517c478bd9Sstevel@tonic-gate 	}
6527c478bd9Sstevel@tonic-gate }
6537c478bd9Sstevel@tonic-gate 
6547c478bd9Sstevel@tonic-gate extern kthread_t *thread_unpin();
6557c478bd9Sstevel@tonic-gate 
6567c478bd9Sstevel@tonic-gate /*
6577c478bd9Sstevel@tonic-gate  * disp() - find the highest priority thread for this processor to run, and
6587c478bd9Sstevel@tonic-gate  * set it in TS_ONPROC state so that resume() can be called to run it.
6597c478bd9Sstevel@tonic-gate  */
6607c478bd9Sstevel@tonic-gate static kthread_t *
6617c478bd9Sstevel@tonic-gate disp()
6627c478bd9Sstevel@tonic-gate {
6637c478bd9Sstevel@tonic-gate 	cpu_t		*cpup;
6647c478bd9Sstevel@tonic-gate 	disp_t		*dp;
6657c478bd9Sstevel@tonic-gate 	kthread_t	*tp;
6667c478bd9Sstevel@tonic-gate 	dispq_t		*dq;
6677c478bd9Sstevel@tonic-gate 	int		maxrunword;
6687c478bd9Sstevel@tonic-gate 	pri_t		pri;
6697c478bd9Sstevel@tonic-gate 	disp_t		*kpq;
6707c478bd9Sstevel@tonic-gate 
6717c478bd9Sstevel@tonic-gate 	TRACE_0(TR_FAC_DISP, TR_DISP_START, "disp_start");
6727c478bd9Sstevel@tonic-gate 
6737c478bd9Sstevel@tonic-gate 	cpup = CPU;
6747c478bd9Sstevel@tonic-gate 	/*
6757c478bd9Sstevel@tonic-gate 	 * Find the highest priority loaded, runnable thread.
6767c478bd9Sstevel@tonic-gate 	 */
6777c478bd9Sstevel@tonic-gate 	dp = cpup->cpu_disp;
6787c478bd9Sstevel@tonic-gate 
6797c478bd9Sstevel@tonic-gate reschedule:
6807c478bd9Sstevel@tonic-gate 	/*
6817c478bd9Sstevel@tonic-gate 	 * If there is more important work on the global queue with a better
6827c478bd9Sstevel@tonic-gate 	 * priority than the maximum on this CPU, take it now.
6837c478bd9Sstevel@tonic-gate 	 */
6847c478bd9Sstevel@tonic-gate 	kpq = &cpup->cpu_part->cp_kp_queue;
6857c478bd9Sstevel@tonic-gate 	while ((pri = kpq->disp_maxrunpri) >= 0 &&
6867c478bd9Sstevel@tonic-gate 	    pri >= dp->disp_maxrunpri &&
6877c478bd9Sstevel@tonic-gate 	    (cpup->cpu_flags & CPU_OFFLINE) == 0 &&
6887c478bd9Sstevel@tonic-gate 	    (tp = disp_getbest(kpq)) != NULL) {
6897c478bd9Sstevel@tonic-gate 		if (disp_ratify(tp, kpq) != NULL) {
6907c478bd9Sstevel@tonic-gate 			TRACE_1(TR_FAC_DISP, TR_DISP_END,
6917c478bd9Sstevel@tonic-gate 			    "disp_end:tid %p", tp);
6927c478bd9Sstevel@tonic-gate 			restore_mstate(tp);
6937c478bd9Sstevel@tonic-gate 			return (tp);
6947c478bd9Sstevel@tonic-gate 		}
6957c478bd9Sstevel@tonic-gate 	}
6967c478bd9Sstevel@tonic-gate 
6977c478bd9Sstevel@tonic-gate 	disp_lock_enter(&dp->disp_lock);
6987c478bd9Sstevel@tonic-gate 	pri = dp->disp_maxrunpri;
6997c478bd9Sstevel@tonic-gate 
7007c478bd9Sstevel@tonic-gate 	/*
7017c478bd9Sstevel@tonic-gate 	 * If there is nothing to run, look at what's runnable on other queues.
7027c478bd9Sstevel@tonic-gate 	 * Choose the idle thread if the CPU is quiesced.
7037c478bd9Sstevel@tonic-gate 	 * Note that CPUs that have the CPU_OFFLINE flag set can still run
7047c478bd9Sstevel@tonic-gate 	 * interrupt threads, which will be the only threads on the CPU's own
7057c478bd9Sstevel@tonic-gate 	 * queue, but cannot run threads from other queues.
7067c478bd9Sstevel@tonic-gate 	 */
7077c478bd9Sstevel@tonic-gate 	if (pri == -1) {
7087c478bd9Sstevel@tonic-gate 		if (!(cpup->cpu_flags & CPU_OFFLINE)) {
7097c478bd9Sstevel@tonic-gate 			disp_lock_exit(&dp->disp_lock);
7107c478bd9Sstevel@tonic-gate 			if ((tp = disp_getwork(cpup)) == NULL) {
7117c478bd9Sstevel@tonic-gate 				tp = cpup->cpu_idle_thread;
7127c478bd9Sstevel@tonic-gate 				(void) splhigh();
7137c478bd9Sstevel@tonic-gate 				THREAD_ONPROC(tp, cpup);
7147c478bd9Sstevel@tonic-gate 				cpup->cpu_dispthread = tp;
7157c478bd9Sstevel@tonic-gate 				cpup->cpu_dispatch_pri = -1;
7167c478bd9Sstevel@tonic-gate 				cpup->cpu_runrun = cpup->cpu_kprunrun = 0;
7177c478bd9Sstevel@tonic-gate 				cpup->cpu_chosen_level = -1;
7187c478bd9Sstevel@tonic-gate 			}
7197c478bd9Sstevel@tonic-gate 		} else {
7207c478bd9Sstevel@tonic-gate 			disp_lock_exit_high(&dp->disp_lock);
7217c478bd9Sstevel@tonic-gate 			tp = cpup->cpu_idle_thread;
7227c478bd9Sstevel@tonic-gate 			THREAD_ONPROC(tp, cpup);
7237c478bd9Sstevel@tonic-gate 			cpup->cpu_dispthread = tp;
7247c478bd9Sstevel@tonic-gate 			cpup->cpu_dispatch_pri = -1;
7257c478bd9Sstevel@tonic-gate 			cpup->cpu_runrun = cpup->cpu_kprunrun = 0;
7267c478bd9Sstevel@tonic-gate 			cpup->cpu_chosen_level = -1;
7277c478bd9Sstevel@tonic-gate 		}
7287c478bd9Sstevel@tonic-gate 		TRACE_1(TR_FAC_DISP, TR_DISP_END,
7297c478bd9Sstevel@tonic-gate 			"disp_end:tid %p", tp);
7307c478bd9Sstevel@tonic-gate 		restore_mstate(tp);
7317c478bd9Sstevel@tonic-gate 		return (tp);
7327c478bd9Sstevel@tonic-gate 	}
7337c478bd9Sstevel@tonic-gate 
7347c478bd9Sstevel@tonic-gate 	dq = &dp->disp_q[pri];
7357c478bd9Sstevel@tonic-gate 	tp = dq->dq_first;
7367c478bd9Sstevel@tonic-gate 
7377c478bd9Sstevel@tonic-gate 	ASSERT(tp != NULL);
7387c478bd9Sstevel@tonic-gate 	ASSERT(tp->t_schedflag & TS_LOAD);	/* thread must be swapped in */
7397c478bd9Sstevel@tonic-gate 
7407c478bd9Sstevel@tonic-gate 	DTRACE_SCHED2(dequeue, kthread_t *, tp, disp_t *, dp);
7417c478bd9Sstevel@tonic-gate 
7427c478bd9Sstevel@tonic-gate 	/*
7437c478bd9Sstevel@tonic-gate 	 * Found it so remove it from queue.
7447c478bd9Sstevel@tonic-gate 	 */
7457c478bd9Sstevel@tonic-gate 	dp->disp_nrunnable--;
7467c478bd9Sstevel@tonic-gate 	dq->dq_sruncnt--;
7477c478bd9Sstevel@tonic-gate 	if ((dq->dq_first = tp->t_link) == NULL) {
7487c478bd9Sstevel@tonic-gate 		ulong_t	*dqactmap = dp->disp_qactmap;
7497c478bd9Sstevel@tonic-gate 
7507c478bd9Sstevel@tonic-gate 		ASSERT(dq->dq_sruncnt == 0);
7517c478bd9Sstevel@tonic-gate 		dq->dq_last = NULL;
7527c478bd9Sstevel@tonic-gate 
7537c478bd9Sstevel@tonic-gate 		/*
7547c478bd9Sstevel@tonic-gate 		 * The queue is empty, so the corresponding bit needs to be
7557c478bd9Sstevel@tonic-gate 		 * turned off in dqactmap.   If nrunnable != 0 just took the
7567c478bd9Sstevel@tonic-gate 		 * last runnable thread off the
7577c478bd9Sstevel@tonic-gate 		 * highest queue, so recompute disp_maxrunpri.
7587c478bd9Sstevel@tonic-gate 		 */
7597c478bd9Sstevel@tonic-gate 		maxrunword = pri >> BT_ULSHIFT;
7607c478bd9Sstevel@tonic-gate 		dqactmap[maxrunword] &= ~BT_BIW(pri);
7617c478bd9Sstevel@tonic-gate 
7627c478bd9Sstevel@tonic-gate 		if (dp->disp_nrunnable == 0) {
7637c478bd9Sstevel@tonic-gate 			dp->disp_max_unbound_pri = -1;
7647c478bd9Sstevel@tonic-gate 			dp->disp_maxrunpri = -1;
7657c478bd9Sstevel@tonic-gate 		} else {
7667c478bd9Sstevel@tonic-gate 			int ipri;
7677c478bd9Sstevel@tonic-gate 
7687c478bd9Sstevel@tonic-gate 			ipri = bt_gethighbit(dqactmap, maxrunword);
7697c478bd9Sstevel@tonic-gate 			dp->disp_maxrunpri = ipri;
7707c478bd9Sstevel@tonic-gate 			if (ipri < dp->disp_max_unbound_pri)
7717c478bd9Sstevel@tonic-gate 				dp->disp_max_unbound_pri = ipri;
7727c478bd9Sstevel@tonic-gate 		}
7737c478bd9Sstevel@tonic-gate 	} else {
7747c478bd9Sstevel@tonic-gate 		tp->t_link = NULL;
7757c478bd9Sstevel@tonic-gate 	}
7767c478bd9Sstevel@tonic-gate 
7777c478bd9Sstevel@tonic-gate 	/*
7787c478bd9Sstevel@tonic-gate 	 * Set TS_DONT_SWAP flag to prevent another processor from swapping
7797c478bd9Sstevel@tonic-gate 	 * out this thread before we have a chance to run it.
7807c478bd9Sstevel@tonic-gate 	 * While running, it is protected against swapping by t_lock.
7817c478bd9Sstevel@tonic-gate 	 */
7827c478bd9Sstevel@tonic-gate 	tp->t_schedflag |= TS_DONT_SWAP;
7837c478bd9Sstevel@tonic-gate 	cpup->cpu_dispthread = tp;		/* protected by spl only */
7847c478bd9Sstevel@tonic-gate 	cpup->cpu_dispatch_pri = pri;
7857c478bd9Sstevel@tonic-gate 	ASSERT(pri == DISP_PRIO(tp));
7867c478bd9Sstevel@tonic-gate 	thread_onproc(tp, cpup);  		/* set t_state to TS_ONPROC */
7877c478bd9Sstevel@tonic-gate 	disp_lock_exit_high(&dp->disp_lock);	/* drop run queue lock */
7887c478bd9Sstevel@tonic-gate 
7897c478bd9Sstevel@tonic-gate 	ASSERT(tp != NULL);
7907c478bd9Sstevel@tonic-gate 	TRACE_1(TR_FAC_DISP, TR_DISP_END,
7917c478bd9Sstevel@tonic-gate 		"disp_end:tid %p", tp);
7927c478bd9Sstevel@tonic-gate 
7937c478bd9Sstevel@tonic-gate 	if (disp_ratify(tp, kpq) == NULL)
7947c478bd9Sstevel@tonic-gate 		goto reschedule;
7957c478bd9Sstevel@tonic-gate 
7967c478bd9Sstevel@tonic-gate 	restore_mstate(tp);
7977c478bd9Sstevel@tonic-gate 	return (tp);
7987c478bd9Sstevel@tonic-gate }
7997c478bd9Sstevel@tonic-gate 
8007c478bd9Sstevel@tonic-gate /*
8017c478bd9Sstevel@tonic-gate  * swtch()
8027c478bd9Sstevel@tonic-gate  *	Find best runnable thread and run it.
8037c478bd9Sstevel@tonic-gate  *	Called with the current thread already switched to a new state,
8047c478bd9Sstevel@tonic-gate  *	on a sleep queue, run queue, stopped, and not zombied.
8057c478bd9Sstevel@tonic-gate  *	May be called at any spl level less than or equal to LOCK_LEVEL.
8067c478bd9Sstevel@tonic-gate  *	Always drops spl to the base level (spl0()).
8077c478bd9Sstevel@tonic-gate  */
8087c478bd9Sstevel@tonic-gate void
8097c478bd9Sstevel@tonic-gate swtch()
8107c478bd9Sstevel@tonic-gate {
8117c478bd9Sstevel@tonic-gate 	kthread_t	*t = curthread;
8127c478bd9Sstevel@tonic-gate 	kthread_t	*next;
8137c478bd9Sstevel@tonic-gate 	cpu_t		*cp;
8147c478bd9Sstevel@tonic-gate 
8157c478bd9Sstevel@tonic-gate 	TRACE_0(TR_FAC_DISP, TR_SWTCH_START, "swtch_start");
8167c478bd9Sstevel@tonic-gate 
8177c478bd9Sstevel@tonic-gate 	if (t->t_flag & T_INTR_THREAD)
8187c478bd9Sstevel@tonic-gate 		cpu_intr_swtch_enter(t);
8197c478bd9Sstevel@tonic-gate 
8207c478bd9Sstevel@tonic-gate 	if (t->t_intr != NULL) {
8217c478bd9Sstevel@tonic-gate 		/*
8227c478bd9Sstevel@tonic-gate 		 * We are an interrupt thread.  Setup and return
8237c478bd9Sstevel@tonic-gate 		 * the interrupted thread to be resumed.
8247c478bd9Sstevel@tonic-gate 		 */
8257c478bd9Sstevel@tonic-gate 		(void) splhigh();	/* block other scheduler action */
8267c478bd9Sstevel@tonic-gate 		cp = CPU;		/* now protected against migration */
8277c478bd9Sstevel@tonic-gate 		ASSERT(CPU_ON_INTR(cp) == 0);	/* not called with PIL > 10 */
8287c478bd9Sstevel@tonic-gate 		CPU_STATS_ADDQ(cp, sys, pswitch, 1);
8297c478bd9Sstevel@tonic-gate 		CPU_STATS_ADDQ(cp, sys, intrblk, 1);
8307c478bd9Sstevel@tonic-gate 		next = thread_unpin();
8317c478bd9Sstevel@tonic-gate 		TRACE_0(TR_FAC_DISP, TR_RESUME_START, "resume_start");
8327c478bd9Sstevel@tonic-gate 		resume_from_intr(next);
8337c478bd9Sstevel@tonic-gate 	} else {
8347c478bd9Sstevel@tonic-gate #ifdef	DEBUG
8357c478bd9Sstevel@tonic-gate 		if (t->t_state == TS_ONPROC &&
8367c478bd9Sstevel@tonic-gate 		    t->t_disp_queue->disp_cpu == CPU &&
8377c478bd9Sstevel@tonic-gate 		    t->t_preempt == 0) {
8387c478bd9Sstevel@tonic-gate 			thread_lock(t);
8397c478bd9Sstevel@tonic-gate 			ASSERT(t->t_state != TS_ONPROC ||
8407c478bd9Sstevel@tonic-gate 			    t->t_disp_queue->disp_cpu != CPU ||
8417c478bd9Sstevel@tonic-gate 			    t->t_preempt != 0);	/* cannot migrate */
8427c478bd9Sstevel@tonic-gate 			thread_unlock_nopreempt(t);
8437c478bd9Sstevel@tonic-gate 		}
8447c478bd9Sstevel@tonic-gate #endif	/* DEBUG */
8457c478bd9Sstevel@tonic-gate 		cp = CPU;
8467c478bd9Sstevel@tonic-gate 		next = disp();		/* returns with spl high */
8477c478bd9Sstevel@tonic-gate 		ASSERT(CPU_ON_INTR(cp) == 0);	/* not called with PIL > 10 */
8487c478bd9Sstevel@tonic-gate 
8497c478bd9Sstevel@tonic-gate 		/* OK to steal anything left on run queue */
8507c478bd9Sstevel@tonic-gate 		cp->cpu_disp_flags &= ~CPU_DISP_DONTSTEAL;
8517c478bd9Sstevel@tonic-gate 
8527c478bd9Sstevel@tonic-gate 		if (next != t) {
8537c478bd9Sstevel@tonic-gate 			if (t == cp->cpu_idle_thread) {
8547c478bd9Sstevel@tonic-gate 				CHIP_NRUNNING(cp->cpu_chip, 1);
8557c478bd9Sstevel@tonic-gate 			} else if (next == cp->cpu_idle_thread) {
8567c478bd9Sstevel@tonic-gate 				CHIP_NRUNNING(cp->cpu_chip, -1);
8577c478bd9Sstevel@tonic-gate 			}
8587c478bd9Sstevel@tonic-gate 
8597c478bd9Sstevel@tonic-gate 			CPU_STATS_ADDQ(cp, sys, pswitch, 1);
8607c478bd9Sstevel@tonic-gate 			cp->cpu_last_swtch = t->t_disp_time = lbolt;
8617c478bd9Sstevel@tonic-gate 			TRACE_0(TR_FAC_DISP, TR_RESUME_START, "resume_start");
8627c478bd9Sstevel@tonic-gate 
8637c478bd9Sstevel@tonic-gate 			if (dtrace_vtime_active)
8647c478bd9Sstevel@tonic-gate 				dtrace_vtime_switch(next);
8657c478bd9Sstevel@tonic-gate 
8667c478bd9Sstevel@tonic-gate 			resume(next);
8677c478bd9Sstevel@tonic-gate 			/*
8687c478bd9Sstevel@tonic-gate 			 * The TR_RESUME_END and TR_SWTCH_END trace points
8697c478bd9Sstevel@tonic-gate 			 * appear at the end of resume(), because we may not
8707c478bd9Sstevel@tonic-gate 			 * return here
8717c478bd9Sstevel@tonic-gate 			 */
8727c478bd9Sstevel@tonic-gate 		} else {
8737c478bd9Sstevel@tonic-gate 			if (t->t_flag & T_INTR_THREAD)
8747c478bd9Sstevel@tonic-gate 				cpu_intr_swtch_exit(t);
8757c478bd9Sstevel@tonic-gate 
8767c478bd9Sstevel@tonic-gate 			DTRACE_SCHED(remain__cpu);
8777c478bd9Sstevel@tonic-gate 			TRACE_0(TR_FAC_DISP, TR_SWTCH_END, "swtch_end");
8787c478bd9Sstevel@tonic-gate 			(void) spl0();
8797c478bd9Sstevel@tonic-gate 		}
8807c478bd9Sstevel@tonic-gate 	}
8817c478bd9Sstevel@tonic-gate }
8827c478bd9Sstevel@tonic-gate 
8837c478bd9Sstevel@tonic-gate /*
8847c478bd9Sstevel@tonic-gate  * swtch_from_zombie()
8857c478bd9Sstevel@tonic-gate  *	Special case of swtch(), which allows checks for TS_ZOMB to be
8867c478bd9Sstevel@tonic-gate  *	eliminated from normal resume.
8877c478bd9Sstevel@tonic-gate  *	Find best runnable thread and run it.
8887c478bd9Sstevel@tonic-gate  *	Called with the current thread zombied.
8897c478bd9Sstevel@tonic-gate  *	Zombies cannot migrate, so CPU references are safe.
8907c478bd9Sstevel@tonic-gate  */
8917c478bd9Sstevel@tonic-gate void
8927c478bd9Sstevel@tonic-gate swtch_from_zombie()
8937c478bd9Sstevel@tonic-gate {
8947c478bd9Sstevel@tonic-gate 	kthread_t	*next;
8957c478bd9Sstevel@tonic-gate 	cpu_t		*cpu = CPU;
8967c478bd9Sstevel@tonic-gate 
8977c478bd9Sstevel@tonic-gate 	TRACE_0(TR_FAC_DISP, TR_SWTCH_START, "swtch_start");
8987c478bd9Sstevel@tonic-gate 
8997c478bd9Sstevel@tonic-gate 	ASSERT(curthread->t_state == TS_ZOMB);
9007c478bd9Sstevel@tonic-gate 
9017c478bd9Sstevel@tonic-gate 	next = disp();			/* returns with spl high */
9027c478bd9Sstevel@tonic-gate 	ASSERT(CPU_ON_INTR(CPU) == 0);	/* not called with PIL > 10 */
9037c478bd9Sstevel@tonic-gate 	CPU_STATS_ADDQ(CPU, sys, pswitch, 1);
9047c478bd9Sstevel@tonic-gate 	ASSERT(next != curthread);
9057c478bd9Sstevel@tonic-gate 	TRACE_0(TR_FAC_DISP, TR_RESUME_START, "resume_start");
9067c478bd9Sstevel@tonic-gate 
9077c478bd9Sstevel@tonic-gate 	if (next == cpu->cpu_idle_thread)
9087c478bd9Sstevel@tonic-gate 		CHIP_NRUNNING(cpu->cpu_chip, -1);
9097c478bd9Sstevel@tonic-gate 
9107c478bd9Sstevel@tonic-gate 	if (dtrace_vtime_active)
9117c478bd9Sstevel@tonic-gate 		dtrace_vtime_switch(next);
9127c478bd9Sstevel@tonic-gate 
9137c478bd9Sstevel@tonic-gate 	resume_from_zombie(next);
9147c478bd9Sstevel@tonic-gate 	/*
9157c478bd9Sstevel@tonic-gate 	 * The TR_RESUME_END and TR_SWTCH_END trace points
9167c478bd9Sstevel@tonic-gate 	 * appear at the end of resume(), because we certainly will not
9177c478bd9Sstevel@tonic-gate 	 * return here
9187c478bd9Sstevel@tonic-gate 	 */
9197c478bd9Sstevel@tonic-gate }
9207c478bd9Sstevel@tonic-gate 
9217c478bd9Sstevel@tonic-gate #if defined(DEBUG) && (defined(DISP_DEBUG) || defined(lint))
9227c478bd9Sstevel@tonic-gate static int
9237c478bd9Sstevel@tonic-gate thread_on_queue(kthread_t *tp)
9247c478bd9Sstevel@tonic-gate {
9257c478bd9Sstevel@tonic-gate 	cpu_t	*cp;
9267c478bd9Sstevel@tonic-gate 	cpu_t	*self;
9277c478bd9Sstevel@tonic-gate 	disp_t	*dp;
9287c478bd9Sstevel@tonic-gate 
9297c478bd9Sstevel@tonic-gate 	self = CPU;
9307c478bd9Sstevel@tonic-gate 	cp = self->cpu_next_onln;
9317c478bd9Sstevel@tonic-gate 	dp = cp->cpu_disp;
9327c478bd9Sstevel@tonic-gate 	for (;;) {
9337c478bd9Sstevel@tonic-gate 		dispq_t		*dq;
9347c478bd9Sstevel@tonic-gate 		dispq_t		*eq;
9357c478bd9Sstevel@tonic-gate 
9367c478bd9Sstevel@tonic-gate 		disp_lock_enter_high(&dp->disp_lock);
9377c478bd9Sstevel@tonic-gate 		for (dq = dp->disp_q, eq = dp->disp_q_limit; dq < eq; ++dq) {
9387c478bd9Sstevel@tonic-gate 			kthread_t	*rp;
9397c478bd9Sstevel@tonic-gate 
9407c478bd9Sstevel@tonic-gate 			ASSERT(dq->dq_last == NULL ||
9417c478bd9Sstevel@tonic-gate 				dq->dq_last->t_link == NULL);
9427c478bd9Sstevel@tonic-gate 			for (rp = dq->dq_first; rp; rp = rp->t_link)
9437c478bd9Sstevel@tonic-gate 				if (tp == rp) {
9447c478bd9Sstevel@tonic-gate 					disp_lock_exit_high(&dp->disp_lock);
9457c478bd9Sstevel@tonic-gate 					return (1);
9467c478bd9Sstevel@tonic-gate 				}
9477c478bd9Sstevel@tonic-gate 		}
9487c478bd9Sstevel@tonic-gate 		disp_lock_exit_high(&dp->disp_lock);
9497c478bd9Sstevel@tonic-gate 		if (cp == NULL)
9507c478bd9Sstevel@tonic-gate 			break;
9517c478bd9Sstevel@tonic-gate 		if (cp == self) {
9527c478bd9Sstevel@tonic-gate 			cp = NULL;
9537c478bd9Sstevel@tonic-gate 			dp = &cp->cpu_part->cp_kp_queue;
9547c478bd9Sstevel@tonic-gate 		} else {
9557c478bd9Sstevel@tonic-gate 			cp = cp->cpu_next_onln;
9567c478bd9Sstevel@tonic-gate 			dp = cp->cpu_disp;
9577c478bd9Sstevel@tonic-gate 		}
9587c478bd9Sstevel@tonic-gate 	}
9597c478bd9Sstevel@tonic-gate 	return (0);
9607c478bd9Sstevel@tonic-gate }	/* end of thread_on_queue */
9617c478bd9Sstevel@tonic-gate #else
9627c478bd9Sstevel@tonic-gate 
9637c478bd9Sstevel@tonic-gate #define	thread_on_queue(tp)	0	/* ASSERT must be !thread_on_queue */
9647c478bd9Sstevel@tonic-gate 
9657c478bd9Sstevel@tonic-gate #endif  /* DEBUG */
9667c478bd9Sstevel@tonic-gate 
9677c478bd9Sstevel@tonic-gate /*
9687c478bd9Sstevel@tonic-gate  * like swtch(), but switch to a specified thread taken from another CPU.
9697c478bd9Sstevel@tonic-gate  *	called with spl high..
9707c478bd9Sstevel@tonic-gate  */
9717c478bd9Sstevel@tonic-gate void
9727c478bd9Sstevel@tonic-gate swtch_to(kthread_t *next)
9737c478bd9Sstevel@tonic-gate {
9747c478bd9Sstevel@tonic-gate 	cpu_t			*cp = CPU;
9757c478bd9Sstevel@tonic-gate 
9767c478bd9Sstevel@tonic-gate 	TRACE_0(TR_FAC_DISP, TR_SWTCH_START, "swtch_start");
9777c478bd9Sstevel@tonic-gate 
9787c478bd9Sstevel@tonic-gate 	/*
9797c478bd9Sstevel@tonic-gate 	 * Update context switch statistics.
9807c478bd9Sstevel@tonic-gate 	 */
9817c478bd9Sstevel@tonic-gate 	CPU_STATS_ADDQ(cp, sys, pswitch, 1);
9827c478bd9Sstevel@tonic-gate 
9837c478bd9Sstevel@tonic-gate 	TRACE_0(TR_FAC_DISP, TR_RESUME_START, "resume_start");
9847c478bd9Sstevel@tonic-gate 
9857c478bd9Sstevel@tonic-gate 	if (curthread == cp->cpu_idle_thread)
9867c478bd9Sstevel@tonic-gate 		CHIP_NRUNNING(cp->cpu_chip, 1);
9877c478bd9Sstevel@tonic-gate 
9887c478bd9Sstevel@tonic-gate 	/* OK to steal anything left on run queue */
9897c478bd9Sstevel@tonic-gate 	cp->cpu_disp_flags &= ~CPU_DISP_DONTSTEAL;
9907c478bd9Sstevel@tonic-gate 
9917c478bd9Sstevel@tonic-gate 	/* record last execution time */
9927c478bd9Sstevel@tonic-gate 	cp->cpu_last_swtch = curthread->t_disp_time = lbolt;
9937c478bd9Sstevel@tonic-gate 
9947c478bd9Sstevel@tonic-gate 	if (dtrace_vtime_active)
9957c478bd9Sstevel@tonic-gate 		dtrace_vtime_switch(next);
9967c478bd9Sstevel@tonic-gate 
9977c478bd9Sstevel@tonic-gate 	resume(next);
9987c478bd9Sstevel@tonic-gate 	/*
9997c478bd9Sstevel@tonic-gate 	 * The TR_RESUME_END and TR_SWTCH_END trace points
10007c478bd9Sstevel@tonic-gate 	 * appear at the end of resume(), because we may not
10017c478bd9Sstevel@tonic-gate 	 * return here
10027c478bd9Sstevel@tonic-gate 	 */
10037c478bd9Sstevel@tonic-gate }
10047c478bd9Sstevel@tonic-gate 
10057c478bd9Sstevel@tonic-gate 
10067c478bd9Sstevel@tonic-gate 
10077c478bd9Sstevel@tonic-gate #define	CPU_IDLING(pri)	((pri) == -1)
10087c478bd9Sstevel@tonic-gate 
10097c478bd9Sstevel@tonic-gate static void
10107c478bd9Sstevel@tonic-gate cpu_resched(cpu_t *cp, pri_t tpri)
10117c478bd9Sstevel@tonic-gate {
10127c478bd9Sstevel@tonic-gate 	int	call_poke_cpu = 0;
10137c478bd9Sstevel@tonic-gate 	pri_t   cpupri = cp->cpu_dispatch_pri;
10147c478bd9Sstevel@tonic-gate 
10157c478bd9Sstevel@tonic-gate 	if (!CPU_IDLING(cpupri) && (cpupri < tpri)) {
10167c478bd9Sstevel@tonic-gate 		TRACE_2(TR_FAC_DISP, TR_CPU_RESCHED,
10177c478bd9Sstevel@tonic-gate 		    "CPU_RESCHED:Tpri %d Cpupri %d", tpri, cpupri);
10187c478bd9Sstevel@tonic-gate 		if (tpri >= upreemptpri && cp->cpu_runrun == 0) {
10197c478bd9Sstevel@tonic-gate 			cp->cpu_runrun = 1;
10207c478bd9Sstevel@tonic-gate 			aston(cp->cpu_dispthread);
10217c478bd9Sstevel@tonic-gate 			if (tpri < kpreemptpri && cp != CPU)
10227c478bd9Sstevel@tonic-gate 				call_poke_cpu = 1;
10237c478bd9Sstevel@tonic-gate 		}
10247c478bd9Sstevel@tonic-gate 		if (tpri >= kpreemptpri && cp->cpu_kprunrun == 0) {
10257c478bd9Sstevel@tonic-gate 			cp->cpu_kprunrun = 1;
10267c478bd9Sstevel@tonic-gate 			if (cp != CPU)
10277c478bd9Sstevel@tonic-gate 				call_poke_cpu = 1;
10287c478bd9Sstevel@tonic-gate 		}
10297c478bd9Sstevel@tonic-gate 	}
10307c478bd9Sstevel@tonic-gate 
10317c478bd9Sstevel@tonic-gate 	/*
10327c478bd9Sstevel@tonic-gate 	 * Propagate cpu_runrun, and cpu_kprunrun to global visibility.
10337c478bd9Sstevel@tonic-gate 	 */
10347c478bd9Sstevel@tonic-gate 	membar_enter();
10357c478bd9Sstevel@tonic-gate 
10367c478bd9Sstevel@tonic-gate 	if (call_poke_cpu)
10377c478bd9Sstevel@tonic-gate 		poke_cpu(cp->cpu_id);
10387c478bd9Sstevel@tonic-gate }
10397c478bd9Sstevel@tonic-gate 
10407c478bd9Sstevel@tonic-gate /*
10417c478bd9Sstevel@tonic-gate  * Routine used by setbackdq() to balance load across the physical
10427c478bd9Sstevel@tonic-gate  * processors. Returns a CPU of a lesser loaded chip in the lgroup
10437c478bd9Sstevel@tonic-gate  * if balancing is necessary, or the "hint" CPU if it's not.
10447c478bd9Sstevel@tonic-gate  *
10457c478bd9Sstevel@tonic-gate  * - tp is the thread being enqueued
10467c478bd9Sstevel@tonic-gate  * - cp is a hint CPU (chosen by cpu_choose()).
10477c478bd9Sstevel@tonic-gate  * - curchip (if not NULL) is the chip on which the current thread
10487c478bd9Sstevel@tonic-gate  *   is running.
10497c478bd9Sstevel@tonic-gate  *
10507c478bd9Sstevel@tonic-gate  * The thread lock for "tp" must be held while calling this routine.
10517c478bd9Sstevel@tonic-gate  */
10527c478bd9Sstevel@tonic-gate static cpu_t *
10537c478bd9Sstevel@tonic-gate chip_balance(kthread_t *tp, cpu_t *cp, chip_t *curchip)
10547c478bd9Sstevel@tonic-gate {
10557c478bd9Sstevel@tonic-gate 	int	chp_nrun, ochp_nrun;
10567c478bd9Sstevel@tonic-gate 	chip_t	*chp, *nchp;
10577c478bd9Sstevel@tonic-gate 
10587c478bd9Sstevel@tonic-gate 	chp = cp->cpu_chip;
10597c478bd9Sstevel@tonic-gate 	chp_nrun = chp->chip_nrunning;
10607c478bd9Sstevel@tonic-gate 
10617c478bd9Sstevel@tonic-gate 	if (chp == curchip)
10627c478bd9Sstevel@tonic-gate 		chp_nrun--;	/* Ignore curthread */
10637c478bd9Sstevel@tonic-gate 
10647c478bd9Sstevel@tonic-gate 	/*
10657c478bd9Sstevel@tonic-gate 	 * If this chip isn't at all idle, then let
10667c478bd9Sstevel@tonic-gate 	 * run queue balancing do the work.
10677c478bd9Sstevel@tonic-gate 	 */
10687c478bd9Sstevel@tonic-gate 	if (chp_nrun == chp->chip_ncpu)
10697c478bd9Sstevel@tonic-gate 		return (cp);
10707c478bd9Sstevel@tonic-gate 
10717c478bd9Sstevel@tonic-gate 	nchp = chp->chip_balance;
10727c478bd9Sstevel@tonic-gate 	do {
10737c478bd9Sstevel@tonic-gate 		if (nchp == chp ||
10747c478bd9Sstevel@tonic-gate 		    !CHIP_IN_CPUPART(nchp, tp->t_cpupart))
10757c478bd9Sstevel@tonic-gate 			continue;
10767c478bd9Sstevel@tonic-gate 
10777c478bd9Sstevel@tonic-gate 		ochp_nrun = nchp->chip_nrunning;
10787c478bd9Sstevel@tonic-gate 
10797c478bd9Sstevel@tonic-gate 		/*
10807c478bd9Sstevel@tonic-gate 		 * If the other chip is running less threads,
10817c478bd9Sstevel@tonic-gate 		 * or if it's running the same number of threads, but
10827c478bd9Sstevel@tonic-gate 		 * has more online logical CPUs, then choose to balance.
10837c478bd9Sstevel@tonic-gate 		 */
10847c478bd9Sstevel@tonic-gate 		if (chp_nrun > ochp_nrun ||
10857c478bd9Sstevel@tonic-gate 		    (chp_nrun == ochp_nrun &&
10867c478bd9Sstevel@tonic-gate 		    nchp->chip_ncpu > chp->chip_ncpu)) {
10877c478bd9Sstevel@tonic-gate 			cp = nchp->chip_cpus;
10887c478bd9Sstevel@tonic-gate 			nchp->chip_cpus = cp->cpu_next_chip;
10897c478bd9Sstevel@tonic-gate 
10907c478bd9Sstevel@tonic-gate 			/*
10917c478bd9Sstevel@tonic-gate 			 * Find a CPU on the chip in the correct
10927c478bd9Sstevel@tonic-gate 			 * partition. We know at least one exists
10937c478bd9Sstevel@tonic-gate 			 * because of the CHIP_IN_CPUPART() check above.
10947c478bd9Sstevel@tonic-gate 			 */
10957c478bd9Sstevel@tonic-gate 			while (cp->cpu_part != tp->t_cpupart)
10967c478bd9Sstevel@tonic-gate 				cp = cp->cpu_next_chip;
10977c478bd9Sstevel@tonic-gate 		}
10987c478bd9Sstevel@tonic-gate 		chp->chip_balance = nchp->chip_next_lgrp;
10997c478bd9Sstevel@tonic-gate 		break;
11007c478bd9Sstevel@tonic-gate 	} while ((nchp = nchp->chip_next_lgrp) != chp->chip_balance);
11017c478bd9Sstevel@tonic-gate 
11027c478bd9Sstevel@tonic-gate 	ASSERT(CHIP_IN_CPUPART(cp->cpu_chip, tp->t_cpupart));
11037c478bd9Sstevel@tonic-gate 	return (cp);
11047c478bd9Sstevel@tonic-gate }
11057c478bd9Sstevel@tonic-gate 
11067c478bd9Sstevel@tonic-gate /*
11077c478bd9Sstevel@tonic-gate  * setbackdq() keeps runqs balanced such that the difference in length
11087c478bd9Sstevel@tonic-gate  * between the chosen runq and the next one is no more than RUNQ_MAX_DIFF.
11097c478bd9Sstevel@tonic-gate  * For threads with priorities below RUNQ_MATCH_PRI levels, the runq's lengths
11107c478bd9Sstevel@tonic-gate  * must match.  When per-thread TS_RUNQMATCH flag is set, setbackdq() will
11117c478bd9Sstevel@tonic-gate  * try to keep runqs perfectly balanced regardless of the thread priority.
11127c478bd9Sstevel@tonic-gate  */
11137c478bd9Sstevel@tonic-gate #define	RUNQ_MATCH_PRI	16	/* pri below which queue lengths must match */
11147c478bd9Sstevel@tonic-gate #define	RUNQ_MAX_DIFF	2	/* maximum runq length difference */
11157c478bd9Sstevel@tonic-gate #define	RUNQ_LEN(cp, pri)	((cp)->cpu_disp->disp_q[pri].dq_sruncnt)
11167c478bd9Sstevel@tonic-gate 
11177c478bd9Sstevel@tonic-gate /*
11187c478bd9Sstevel@tonic-gate  * Put the specified thread on the back of the dispatcher
11197c478bd9Sstevel@tonic-gate  * queue corresponding to its current priority.
11207c478bd9Sstevel@tonic-gate  *
11217c478bd9Sstevel@tonic-gate  * Called with the thread in transition, onproc or stopped state
11227c478bd9Sstevel@tonic-gate  * and locked (transition implies locked) and at high spl.
11237c478bd9Sstevel@tonic-gate  * Returns with the thread in TS_RUN state and still locked.
11247c478bd9Sstevel@tonic-gate  */
11257c478bd9Sstevel@tonic-gate void
11267c478bd9Sstevel@tonic-gate setbackdq(kthread_t *tp)
11277c478bd9Sstevel@tonic-gate {
11287c478bd9Sstevel@tonic-gate 	dispq_t	*dq;
11297c478bd9Sstevel@tonic-gate 	disp_t		*dp;
11307c478bd9Sstevel@tonic-gate 	chip_t		*curchip = NULL;
11317c478bd9Sstevel@tonic-gate 	cpu_t		*cp;
11327c478bd9Sstevel@tonic-gate 	pri_t		tpri;
11337c478bd9Sstevel@tonic-gate 	int		bound;
11347c478bd9Sstevel@tonic-gate 
11357c478bd9Sstevel@tonic-gate 	ASSERT(THREAD_LOCK_HELD(tp));
11367c478bd9Sstevel@tonic-gate 	ASSERT((tp->t_schedflag & TS_ALLSTART) == 0);
11377c478bd9Sstevel@tonic-gate 
11387c478bd9Sstevel@tonic-gate 	if (tp->t_waitrq == 0) {
11397c478bd9Sstevel@tonic-gate 		hrtime_t curtime;
11407c478bd9Sstevel@tonic-gate 
11417c478bd9Sstevel@tonic-gate 		curtime = gethrtime_unscaled();
11427c478bd9Sstevel@tonic-gate 		(void) cpu_update_pct(tp, curtime);
11437c478bd9Sstevel@tonic-gate 		tp->t_waitrq = curtime;
11447c478bd9Sstevel@tonic-gate 	} else {
11457c478bd9Sstevel@tonic-gate 		(void) cpu_update_pct(tp, gethrtime_unscaled());
11467c478bd9Sstevel@tonic-gate 	}
11477c478bd9Sstevel@tonic-gate 
11487c478bd9Sstevel@tonic-gate 	ASSERT(!thread_on_queue(tp));	/* make sure tp isn't on a runq */
11497c478bd9Sstevel@tonic-gate 
11507c478bd9Sstevel@tonic-gate 	/*
11517c478bd9Sstevel@tonic-gate 	 * If thread is "swapped" or on the swap queue don't
11527c478bd9Sstevel@tonic-gate 	 * queue it, but wake sched.
11537c478bd9Sstevel@tonic-gate 	 */
11547c478bd9Sstevel@tonic-gate 	if ((tp->t_schedflag & (TS_LOAD | TS_ON_SWAPQ)) != TS_LOAD) {
11557c478bd9Sstevel@tonic-gate 		disp_swapped_setrun(tp);
11567c478bd9Sstevel@tonic-gate 		return;
11577c478bd9Sstevel@tonic-gate 	}
11587c478bd9Sstevel@tonic-gate 
11597c478bd9Sstevel@tonic-gate 	tpri = DISP_PRIO(tp);
11607c478bd9Sstevel@tonic-gate 	if (tp == curthread) {
11617c478bd9Sstevel@tonic-gate 		curchip = CPU->cpu_chip;
11627c478bd9Sstevel@tonic-gate 	}
11637c478bd9Sstevel@tonic-gate 
11647c478bd9Sstevel@tonic-gate 	if (ncpus == 1)
11657c478bd9Sstevel@tonic-gate 		cp = tp->t_cpu;
11667c478bd9Sstevel@tonic-gate 	else if (!tp->t_bound_cpu && !tp->t_weakbound_cpu) {
11677c478bd9Sstevel@tonic-gate 		if (tpri >= kpqpri) {
11687c478bd9Sstevel@tonic-gate 			setkpdq(tp, SETKP_BACK);
11697c478bd9Sstevel@tonic-gate 			return;
11707c478bd9Sstevel@tonic-gate 		}
11717c478bd9Sstevel@tonic-gate 		/*
11727c478bd9Sstevel@tonic-gate 		 * Let cpu_choose suggest a CPU.
11737c478bd9Sstevel@tonic-gate 		 */
11747c478bd9Sstevel@tonic-gate 		cp = cpu_choose(tp, tpri);
11757c478bd9Sstevel@tonic-gate 
11767c478bd9Sstevel@tonic-gate 		if (tp->t_cpupart == cp->cpu_part) {
11777c478bd9Sstevel@tonic-gate 			int	qlen;
11787c478bd9Sstevel@tonic-gate 
11797c478bd9Sstevel@tonic-gate 			/*
11807c478bd9Sstevel@tonic-gate 			 * Select another CPU if we need
11817c478bd9Sstevel@tonic-gate 			 * to do some load balancing across the
11827c478bd9Sstevel@tonic-gate 			 * physical processors.
11837c478bd9Sstevel@tonic-gate 			 */
11847c478bd9Sstevel@tonic-gate 			if (CHIP_SHOULD_BALANCE(cp->cpu_chip))
11857c478bd9Sstevel@tonic-gate 				cp = chip_balance(tp, cp, curchip);
11867c478bd9Sstevel@tonic-gate 
11877c478bd9Sstevel@tonic-gate 			/*
11887c478bd9Sstevel@tonic-gate 			 * Balance across the run queues
11897c478bd9Sstevel@tonic-gate 			 */
11907c478bd9Sstevel@tonic-gate 			qlen = RUNQ_LEN(cp, tpri);
11917c478bd9Sstevel@tonic-gate 			if (tpri >= RUNQ_MATCH_PRI &&
11927c478bd9Sstevel@tonic-gate 			    !(tp->t_schedflag & TS_RUNQMATCH))
11937c478bd9Sstevel@tonic-gate 				qlen -= RUNQ_MAX_DIFF;
11947c478bd9Sstevel@tonic-gate 			if (qlen > 0) {
11957c478bd9Sstevel@tonic-gate 				cpu_t	*np;
11967c478bd9Sstevel@tonic-gate 
11977c478bd9Sstevel@tonic-gate 				if (tp->t_lpl->lpl_lgrpid == LGRP_ROOTID)
11987c478bd9Sstevel@tonic-gate 					np = cp->cpu_next_part;
11997c478bd9Sstevel@tonic-gate 				else {
12007c478bd9Sstevel@tonic-gate 					if ((np = cp->cpu_next_lpl) == cp)
12017c478bd9Sstevel@tonic-gate 						np = cp->cpu_next_part;
12027c478bd9Sstevel@tonic-gate 				}
12037c478bd9Sstevel@tonic-gate 				if (RUNQ_LEN(np, tpri) < qlen)
12047c478bd9Sstevel@tonic-gate 					cp = np;
12057c478bd9Sstevel@tonic-gate 			}
12067c478bd9Sstevel@tonic-gate 		} else {
12077c478bd9Sstevel@tonic-gate 			/*
12087c478bd9Sstevel@tonic-gate 			 * Migrate to a cpu in the new partition.
12097c478bd9Sstevel@tonic-gate 			 */
12107c478bd9Sstevel@tonic-gate 			cp = disp_lowpri_cpu(tp->t_cpupart->cp_cpulist,
12117c478bd9Sstevel@tonic-gate 			    tp->t_lpl, tp->t_pri, NULL);
12127c478bd9Sstevel@tonic-gate 		}
12137c478bd9Sstevel@tonic-gate 		bound = 0;
12147c478bd9Sstevel@tonic-gate 		ASSERT((cp->cpu_flags & CPU_QUIESCED) == 0);
12157c478bd9Sstevel@tonic-gate 	} else {
12167c478bd9Sstevel@tonic-gate 		/*
12177c478bd9Sstevel@tonic-gate 		 * It is possible that t_weakbound_cpu != t_bound_cpu (for
12187c478bd9Sstevel@tonic-gate 		 * a short time until weak binding that existed when the
12197c478bd9Sstevel@tonic-gate 		 * strong binding was established has dropped) so we must
12207c478bd9Sstevel@tonic-gate 		 * favour weak binding over strong.
12217c478bd9Sstevel@tonic-gate 		 */
12227c478bd9Sstevel@tonic-gate 		cp = tp->t_weakbound_cpu ?
12237c478bd9Sstevel@tonic-gate 		    tp->t_weakbound_cpu : tp->t_bound_cpu;
12247c478bd9Sstevel@tonic-gate 		bound = 1;
12257c478bd9Sstevel@tonic-gate 	}
12267c478bd9Sstevel@tonic-gate 	dp = cp->cpu_disp;
12277c478bd9Sstevel@tonic-gate 	disp_lock_enter_high(&dp->disp_lock);
12287c478bd9Sstevel@tonic-gate 
12297c478bd9Sstevel@tonic-gate 	DTRACE_SCHED3(enqueue, kthread_t *, tp, disp_t *, dp, int, 0);
12307c478bd9Sstevel@tonic-gate 	TRACE_3(TR_FAC_DISP, TR_BACKQ, "setbackdq:pri %d cpu %p tid %p",
12317c478bd9Sstevel@tonic-gate 		tpri, cp, tp);
12327c478bd9Sstevel@tonic-gate 
12337c478bd9Sstevel@tonic-gate #ifndef NPROBE
12347c478bd9Sstevel@tonic-gate 	/* Kernel probe */
12357c478bd9Sstevel@tonic-gate 	if (tnf_tracing_active)
12367c478bd9Sstevel@tonic-gate 		tnf_thread_queue(tp, cp, tpri);
12377c478bd9Sstevel@tonic-gate #endif /* NPROBE */
12387c478bd9Sstevel@tonic-gate 
12397c478bd9Sstevel@tonic-gate 	ASSERT(tpri >= 0 && tpri < dp->disp_npri);
12407c478bd9Sstevel@tonic-gate 
12417c478bd9Sstevel@tonic-gate 	THREAD_RUN(tp, &dp->disp_lock);		/* set t_state to TS_RUN */
12427c478bd9Sstevel@tonic-gate 	tp->t_disp_queue = dp;
12437c478bd9Sstevel@tonic-gate 	tp->t_link = NULL;
12447c478bd9Sstevel@tonic-gate 
12457c478bd9Sstevel@tonic-gate 	dq = &dp->disp_q[tpri];
12467c478bd9Sstevel@tonic-gate 	dp->disp_nrunnable++;
12477c478bd9Sstevel@tonic-gate 	membar_enter();
12487c478bd9Sstevel@tonic-gate 
12497c478bd9Sstevel@tonic-gate 	if (dq->dq_sruncnt++ != 0) {
12507c478bd9Sstevel@tonic-gate 		ASSERT(dq->dq_first != NULL);
12517c478bd9Sstevel@tonic-gate 		dq->dq_last->t_link = tp;
12527c478bd9Sstevel@tonic-gate 		dq->dq_last = tp;
12537c478bd9Sstevel@tonic-gate 	} else {
12547c478bd9Sstevel@tonic-gate 		ASSERT(dq->dq_first == NULL);
12557c478bd9Sstevel@tonic-gate 		ASSERT(dq->dq_last == NULL);
12567c478bd9Sstevel@tonic-gate 		dq->dq_first = dq->dq_last = tp;
12577c478bd9Sstevel@tonic-gate 		BT_SET(dp->disp_qactmap, tpri);
12587c478bd9Sstevel@tonic-gate 		if (tpri > dp->disp_maxrunpri) {
12597c478bd9Sstevel@tonic-gate 			dp->disp_maxrunpri = tpri;
12607c478bd9Sstevel@tonic-gate 			membar_enter();
12617c478bd9Sstevel@tonic-gate 			cpu_resched(cp, tpri);
12627c478bd9Sstevel@tonic-gate 		}
12637c478bd9Sstevel@tonic-gate 	}
12647c478bd9Sstevel@tonic-gate 
12657c478bd9Sstevel@tonic-gate 	if (!bound && tpri > dp->disp_max_unbound_pri) {
12667c478bd9Sstevel@tonic-gate 		if (tp == curthread && dp->disp_max_unbound_pri == -1 &&
12677c478bd9Sstevel@tonic-gate 		    cp == CPU) {
12687c478bd9Sstevel@tonic-gate 			/*
12697c478bd9Sstevel@tonic-gate 			 * If there are no other unbound threads on the
12707c478bd9Sstevel@tonic-gate 			 * run queue, don't allow other CPUs to steal
12717c478bd9Sstevel@tonic-gate 			 * this thread while we are in the middle of a
12727c478bd9Sstevel@tonic-gate 			 * context switch. We may just switch to it
12737c478bd9Sstevel@tonic-gate 			 * again right away. CPU_DISP_DONTSTEAL is cleared
12747c478bd9Sstevel@tonic-gate 			 * in swtch and swtch_to.
12757c478bd9Sstevel@tonic-gate 			 */
12767c478bd9Sstevel@tonic-gate 			cp->cpu_disp_flags |= CPU_DISP_DONTSTEAL;
12777c478bd9Sstevel@tonic-gate 		}
12787c478bd9Sstevel@tonic-gate 		dp->disp_max_unbound_pri = tpri;
12797c478bd9Sstevel@tonic-gate 	}
12807c478bd9Sstevel@tonic-gate 	(*disp_enq_thread)(cp, bound);
12817c478bd9Sstevel@tonic-gate }
12827c478bd9Sstevel@tonic-gate 
12837c478bd9Sstevel@tonic-gate /*
12847c478bd9Sstevel@tonic-gate  * Put the specified thread on the front of the dispatcher
12857c478bd9Sstevel@tonic-gate  * queue corresponding to its current priority.
12867c478bd9Sstevel@tonic-gate  *
12877c478bd9Sstevel@tonic-gate  * Called with the thread in transition, onproc or stopped state
12887c478bd9Sstevel@tonic-gate  * and locked (transition implies locked) and at high spl.
12897c478bd9Sstevel@tonic-gate  * Returns with the thread in TS_RUN state and still locked.
12907c478bd9Sstevel@tonic-gate  */
12917c478bd9Sstevel@tonic-gate void
12927c478bd9Sstevel@tonic-gate setfrontdq(kthread_t *tp)
12937c478bd9Sstevel@tonic-gate {
12947c478bd9Sstevel@tonic-gate 	disp_t		*dp;
12957c478bd9Sstevel@tonic-gate 	dispq_t		*dq;
12967c478bd9Sstevel@tonic-gate 	cpu_t		*cp;
12977c478bd9Sstevel@tonic-gate 	pri_t		tpri;
12987c478bd9Sstevel@tonic-gate 	int		bound;
12997c478bd9Sstevel@tonic-gate 
13007c478bd9Sstevel@tonic-gate 	ASSERT(THREAD_LOCK_HELD(tp));
13017c478bd9Sstevel@tonic-gate 	ASSERT((tp->t_schedflag & TS_ALLSTART) == 0);
13027c478bd9Sstevel@tonic-gate 
13037c478bd9Sstevel@tonic-gate 	if (tp->t_waitrq == 0) {
13047c478bd9Sstevel@tonic-gate 		hrtime_t curtime;
13057c478bd9Sstevel@tonic-gate 
13067c478bd9Sstevel@tonic-gate 		curtime = gethrtime_unscaled();
13077c478bd9Sstevel@tonic-gate 		(void) cpu_update_pct(tp, curtime);
13087c478bd9Sstevel@tonic-gate 		tp->t_waitrq = curtime;
13097c478bd9Sstevel@tonic-gate 	} else {
13107c478bd9Sstevel@tonic-gate 		(void) cpu_update_pct(tp, gethrtime_unscaled());
13117c478bd9Sstevel@tonic-gate 	}
13127c478bd9Sstevel@tonic-gate 
13137c478bd9Sstevel@tonic-gate 	ASSERT(!thread_on_queue(tp));	/* make sure tp isn't on a runq */
13147c478bd9Sstevel@tonic-gate 
13157c478bd9Sstevel@tonic-gate 	/*
13167c478bd9Sstevel@tonic-gate 	 * If thread is "swapped" or on the swap queue don't
13177c478bd9Sstevel@tonic-gate 	 * queue it, but wake sched.
13187c478bd9Sstevel@tonic-gate 	 */
13197c478bd9Sstevel@tonic-gate 	if ((tp->t_schedflag & (TS_LOAD | TS_ON_SWAPQ)) != TS_LOAD) {
13207c478bd9Sstevel@tonic-gate 		disp_swapped_setrun(tp);
13217c478bd9Sstevel@tonic-gate 		return;
13227c478bd9Sstevel@tonic-gate 	}
13237c478bd9Sstevel@tonic-gate 
13247c478bd9Sstevel@tonic-gate 	tpri = DISP_PRIO(tp);
13257c478bd9Sstevel@tonic-gate 	if (ncpus == 1)
13267c478bd9Sstevel@tonic-gate 		cp = tp->t_cpu;
13277c478bd9Sstevel@tonic-gate 	else if (!tp->t_bound_cpu && !tp->t_weakbound_cpu) {
13287c478bd9Sstevel@tonic-gate 		if (tpri >= kpqpri) {
13297c478bd9Sstevel@tonic-gate 			setkpdq(tp, SETKP_FRONT);
13307c478bd9Sstevel@tonic-gate 			return;
13317c478bd9Sstevel@tonic-gate 		}
13327c478bd9Sstevel@tonic-gate 		cp = tp->t_cpu;
13337c478bd9Sstevel@tonic-gate 		if (tp->t_cpupart == cp->cpu_part) {
13347c478bd9Sstevel@tonic-gate 			/*
13357c478bd9Sstevel@tonic-gate 			 * If we are of higher or equal priority than
13367c478bd9Sstevel@tonic-gate 			 * the highest priority runnable thread of
13377c478bd9Sstevel@tonic-gate 			 * the current CPU, just pick this CPU.  Otherwise
13387c478bd9Sstevel@tonic-gate 			 * Let cpu_choose() select the CPU.  If this cpu
13397c478bd9Sstevel@tonic-gate 			 * is the target of an offline request then do not
13407c478bd9Sstevel@tonic-gate 			 * pick it - a thread_nomigrate() on the in motion
13417c478bd9Sstevel@tonic-gate 			 * cpu relies on this when it forces a preempt.
13427c478bd9Sstevel@tonic-gate 			 */
13437c478bd9Sstevel@tonic-gate 			if (tpri < cp->cpu_disp->disp_maxrunpri ||
13447c478bd9Sstevel@tonic-gate 			    cp == cpu_inmotion)
13457c478bd9Sstevel@tonic-gate 				cp = cpu_choose(tp, tpri);
13467c478bd9Sstevel@tonic-gate 		} else {
13477c478bd9Sstevel@tonic-gate 			/*
13487c478bd9Sstevel@tonic-gate 			 * Migrate to a cpu in the new partition.
13497c478bd9Sstevel@tonic-gate 			 */
13507c478bd9Sstevel@tonic-gate 			cp = disp_lowpri_cpu(tp->t_cpupart->cp_cpulist,
13517c478bd9Sstevel@tonic-gate 			    tp->t_lpl, tp->t_pri, NULL);
13527c478bd9Sstevel@tonic-gate 		}
13537c478bd9Sstevel@tonic-gate 		bound = 0;
13547c478bd9Sstevel@tonic-gate 		ASSERT((cp->cpu_flags & CPU_QUIESCED) == 0);
13557c478bd9Sstevel@tonic-gate 	} else {
13567c478bd9Sstevel@tonic-gate 		/*
13577c478bd9Sstevel@tonic-gate 		 * It is possible that t_weakbound_cpu != t_bound_cpu (for
13587c478bd9Sstevel@tonic-gate 		 * a short time until weak binding that existed when the
13597c478bd9Sstevel@tonic-gate 		 * strong binding was established has dropped) so we must
13607c478bd9Sstevel@tonic-gate 		 * favour weak binding over strong.
13617c478bd9Sstevel@tonic-gate 		 */
13627c478bd9Sstevel@tonic-gate 		cp = tp->t_weakbound_cpu ?
13637c478bd9Sstevel@tonic-gate 		    tp->t_weakbound_cpu : tp->t_bound_cpu;
13647c478bd9Sstevel@tonic-gate 		bound = 1;
13657c478bd9Sstevel@tonic-gate 	}
13667c478bd9Sstevel@tonic-gate 	dp = cp->cpu_disp;
13677c478bd9Sstevel@tonic-gate 	disp_lock_enter_high(&dp->disp_lock);
13687c478bd9Sstevel@tonic-gate 
13697c478bd9Sstevel@tonic-gate 	TRACE_2(TR_FAC_DISP, TR_FRONTQ, "frontq:pri %d tid %p", tpri, tp);
13707c478bd9Sstevel@tonic-gate 	DTRACE_SCHED3(enqueue, kthread_t *, tp, disp_t *, dp, int, 1);
13717c478bd9Sstevel@tonic-gate 
13727c478bd9Sstevel@tonic-gate #ifndef NPROBE
13737c478bd9Sstevel@tonic-gate 	/* Kernel probe */
13747c478bd9Sstevel@tonic-gate 	if (tnf_tracing_active)
13757c478bd9Sstevel@tonic-gate 		tnf_thread_queue(tp, cp, tpri);
13767c478bd9Sstevel@tonic-gate #endif /* NPROBE */
13777c478bd9Sstevel@tonic-gate 
13787c478bd9Sstevel@tonic-gate 	ASSERT(tpri >= 0 && tpri < dp->disp_npri);
13797c478bd9Sstevel@tonic-gate 
13807c478bd9Sstevel@tonic-gate 	THREAD_RUN(tp, &dp->disp_lock);		/* set TS_RUN state and lock */
13817c478bd9Sstevel@tonic-gate 	tp->t_disp_queue = dp;
13827c478bd9Sstevel@tonic-gate 
13837c478bd9Sstevel@tonic-gate 	dq = &dp->disp_q[tpri];
13847c478bd9Sstevel@tonic-gate 	dp->disp_nrunnable++;
13857c478bd9Sstevel@tonic-gate 	membar_enter();
13867c478bd9Sstevel@tonic-gate 
13877c478bd9Sstevel@tonic-gate 	if (dq->dq_sruncnt++ != 0) {
13887c478bd9Sstevel@tonic-gate 		ASSERT(dq->dq_last != NULL);
13897c478bd9Sstevel@tonic-gate 		tp->t_link = dq->dq_first;
13907c478bd9Sstevel@tonic-gate 		dq->dq_first = tp;
13917c478bd9Sstevel@tonic-gate 	} else {
13927c478bd9Sstevel@tonic-gate 		ASSERT(dq->dq_last == NULL);
13937c478bd9Sstevel@tonic-gate 		ASSERT(dq->dq_first == NULL);
13947c478bd9Sstevel@tonic-gate 		tp->t_link = NULL;
13957c478bd9Sstevel@tonic-gate 		dq->dq_first = dq->dq_last = tp;
13967c478bd9Sstevel@tonic-gate 		BT_SET(dp->disp_qactmap, tpri);
13977c478bd9Sstevel@tonic-gate 		if (tpri > dp->disp_maxrunpri) {
13987c478bd9Sstevel@tonic-gate 			dp->disp_maxrunpri = tpri;
13997c478bd9Sstevel@tonic-gate 			membar_enter();
14007c478bd9Sstevel@tonic-gate 			cpu_resched(cp, tpri);
14017c478bd9Sstevel@tonic-gate 		}
14027c478bd9Sstevel@tonic-gate 	}
14037c478bd9Sstevel@tonic-gate 
14047c478bd9Sstevel@tonic-gate 	if (!bound && tpri > dp->disp_max_unbound_pri) {
14057c478bd9Sstevel@tonic-gate 		if (tp == curthread && dp->disp_max_unbound_pri == -1 &&
14067c478bd9Sstevel@tonic-gate 		    cp == CPU) {
14077c478bd9Sstevel@tonic-gate 			/*
14087c478bd9Sstevel@tonic-gate 			 * If there are no other unbound threads on the
14097c478bd9Sstevel@tonic-gate 			 * run queue, don't allow other CPUs to steal
14107c478bd9Sstevel@tonic-gate 			 * this thread while we are in the middle of a
14117c478bd9Sstevel@tonic-gate 			 * context switch. We may just switch to it
14127c478bd9Sstevel@tonic-gate 			 * again right away. CPU_DISP_DONTSTEAL is cleared
14137c478bd9Sstevel@tonic-gate 			 * in swtch and swtch_to.
14147c478bd9Sstevel@tonic-gate 			 */
14157c478bd9Sstevel@tonic-gate 			cp->cpu_disp_flags |= CPU_DISP_DONTSTEAL;
14167c478bd9Sstevel@tonic-gate 		}
14177c478bd9Sstevel@tonic-gate 		dp->disp_max_unbound_pri = tpri;
14187c478bd9Sstevel@tonic-gate 	}
14197c478bd9Sstevel@tonic-gate 	(*disp_enq_thread)(cp, bound);
14207c478bd9Sstevel@tonic-gate }
14217c478bd9Sstevel@tonic-gate 
14227c478bd9Sstevel@tonic-gate /*
14237c478bd9Sstevel@tonic-gate  * Put a high-priority unbound thread on the kp queue
14247c478bd9Sstevel@tonic-gate  */
14257c478bd9Sstevel@tonic-gate static void
14267c478bd9Sstevel@tonic-gate setkpdq(kthread_t *tp, int borf)
14277c478bd9Sstevel@tonic-gate {
14287c478bd9Sstevel@tonic-gate 	dispq_t	*dq;
14297c478bd9Sstevel@tonic-gate 	disp_t	*dp;
14307c478bd9Sstevel@tonic-gate 	cpu_t	*cp;
14317c478bd9Sstevel@tonic-gate 	pri_t	tpri;
14327c478bd9Sstevel@tonic-gate 
14337c478bd9Sstevel@tonic-gate 	tpri = DISP_PRIO(tp);
14347c478bd9Sstevel@tonic-gate 
14357c478bd9Sstevel@tonic-gate 	dp = &tp->t_cpupart->cp_kp_queue;
14367c478bd9Sstevel@tonic-gate 	disp_lock_enter_high(&dp->disp_lock);
14377c478bd9Sstevel@tonic-gate 
14387c478bd9Sstevel@tonic-gate 	TRACE_2(TR_FAC_DISP, TR_FRONTQ, "frontq:pri %d tid %p", tpri, tp);
14397c478bd9Sstevel@tonic-gate 
14407c478bd9Sstevel@tonic-gate 	ASSERT(tpri >= 0 && tpri < dp->disp_npri);
14417c478bd9Sstevel@tonic-gate 	DTRACE_SCHED3(enqueue, kthread_t *, tp, disp_t *, dp, int, borf);
14427c478bd9Sstevel@tonic-gate 	THREAD_RUN(tp, &dp->disp_lock);		/* set t_state to TS_RUN */
14437c478bd9Sstevel@tonic-gate 	tp->t_disp_queue = dp;
14447c478bd9Sstevel@tonic-gate 	dp->disp_nrunnable++;
14457c478bd9Sstevel@tonic-gate 	dq = &dp->disp_q[tpri];
14467c478bd9Sstevel@tonic-gate 
14477c478bd9Sstevel@tonic-gate 	if (dq->dq_sruncnt++ != 0) {
14487c478bd9Sstevel@tonic-gate 		if (borf == SETKP_BACK) {
14497c478bd9Sstevel@tonic-gate 			ASSERT(dq->dq_first != NULL);
14507c478bd9Sstevel@tonic-gate 			tp->t_link = NULL;
14517c478bd9Sstevel@tonic-gate 			dq->dq_last->t_link = tp;
14527c478bd9Sstevel@tonic-gate 			dq->dq_last = tp;
14537c478bd9Sstevel@tonic-gate 		} else {
14547c478bd9Sstevel@tonic-gate 			ASSERT(dq->dq_last != NULL);
14557c478bd9Sstevel@tonic-gate 			tp->t_link = dq->dq_first;
14567c478bd9Sstevel@tonic-gate 			dq->dq_first = tp;
14577c478bd9Sstevel@tonic-gate 		}
14587c478bd9Sstevel@tonic-gate 	} else {
14597c478bd9Sstevel@tonic-gate 		if (borf == SETKP_BACK) {
14607c478bd9Sstevel@tonic-gate 			ASSERT(dq->dq_first == NULL);
14617c478bd9Sstevel@tonic-gate 			ASSERT(dq->dq_last == NULL);
14627c478bd9Sstevel@tonic-gate 			dq->dq_first = dq->dq_last = tp;
14637c478bd9Sstevel@tonic-gate 		} else {
14647c478bd9Sstevel@tonic-gate 			ASSERT(dq->dq_last == NULL);
14657c478bd9Sstevel@tonic-gate 			ASSERT(dq->dq_first == NULL);
14667c478bd9Sstevel@tonic-gate 			tp->t_link = NULL;
14677c478bd9Sstevel@tonic-gate 			dq->dq_first = dq->dq_last = tp;
14687c478bd9Sstevel@tonic-gate 		}
14697c478bd9Sstevel@tonic-gate 		BT_SET(dp->disp_qactmap, tpri);
14707c478bd9Sstevel@tonic-gate 		if (tpri > dp->disp_max_unbound_pri)
14717c478bd9Sstevel@tonic-gate 			dp->disp_max_unbound_pri = tpri;
14727c478bd9Sstevel@tonic-gate 		if (tpri > dp->disp_maxrunpri) {
14737c478bd9Sstevel@tonic-gate 			dp->disp_maxrunpri = tpri;
14747c478bd9Sstevel@tonic-gate 			membar_enter();
14757c478bd9Sstevel@tonic-gate 		}
14767c478bd9Sstevel@tonic-gate 	}
14777c478bd9Sstevel@tonic-gate 
14787c478bd9Sstevel@tonic-gate 	cp = tp->t_cpu;
14797c478bd9Sstevel@tonic-gate 	if (tp->t_cpupart != cp->cpu_part) {
14807c478bd9Sstevel@tonic-gate 		/* migrate to a cpu in the new partition */
14817c478bd9Sstevel@tonic-gate 		cp = tp->t_cpupart->cp_cpulist;
14827c478bd9Sstevel@tonic-gate 	}
14837c478bd9Sstevel@tonic-gate 	cp = disp_lowpri_cpu(cp, tp->t_lpl, tp->t_pri, NULL);
14847c478bd9Sstevel@tonic-gate 	disp_lock_enter_high(&cp->cpu_disp->disp_lock);
14857c478bd9Sstevel@tonic-gate 	ASSERT((cp->cpu_flags & CPU_QUIESCED) == 0);
14867c478bd9Sstevel@tonic-gate 
14877c478bd9Sstevel@tonic-gate #ifndef NPROBE
14887c478bd9Sstevel@tonic-gate 	/* Kernel probe */
14897c478bd9Sstevel@tonic-gate 	if (tnf_tracing_active)
14907c478bd9Sstevel@tonic-gate 		tnf_thread_queue(tp, cp, tpri);
14917c478bd9Sstevel@tonic-gate #endif /* NPROBE */
14927c478bd9Sstevel@tonic-gate 
14937c478bd9Sstevel@tonic-gate 	if (cp->cpu_chosen_level < tpri)
14947c478bd9Sstevel@tonic-gate 		cp->cpu_chosen_level = tpri;
14957c478bd9Sstevel@tonic-gate 	cpu_resched(cp, tpri);
14967c478bd9Sstevel@tonic-gate 	disp_lock_exit_high(&cp->cpu_disp->disp_lock);
14977c478bd9Sstevel@tonic-gate 	(*disp_enq_thread)(cp, 0);
14987c478bd9Sstevel@tonic-gate }
14997c478bd9Sstevel@tonic-gate 
15007c478bd9Sstevel@tonic-gate /*
15017c478bd9Sstevel@tonic-gate  * Remove a thread from the dispatcher queue if it is on it.
15027c478bd9Sstevel@tonic-gate  * It is not an error if it is not found but we return whether
15037c478bd9Sstevel@tonic-gate  * or not it was found in case the caller wants to check.
15047c478bd9Sstevel@tonic-gate  */
15057c478bd9Sstevel@tonic-gate int
15067c478bd9Sstevel@tonic-gate dispdeq(kthread_t *tp)
15077c478bd9Sstevel@tonic-gate {
15087c478bd9Sstevel@tonic-gate 	disp_t		*dp;
15097c478bd9Sstevel@tonic-gate 	dispq_t		*dq;
15107c478bd9Sstevel@tonic-gate 	kthread_t	*rp;
15117c478bd9Sstevel@tonic-gate 	kthread_t	*trp;
15127c478bd9Sstevel@tonic-gate 	kthread_t	**ptp;
15137c478bd9Sstevel@tonic-gate 	int		tpri;
15147c478bd9Sstevel@tonic-gate 
15157c478bd9Sstevel@tonic-gate 	ASSERT(THREAD_LOCK_HELD(tp));
15167c478bd9Sstevel@tonic-gate 
15177c478bd9Sstevel@tonic-gate 	if (tp->t_state != TS_RUN)
15187c478bd9Sstevel@tonic-gate 		return (0);
15197c478bd9Sstevel@tonic-gate 
15207c478bd9Sstevel@tonic-gate 	/*
15217c478bd9Sstevel@tonic-gate 	 * The thread is "swapped" or is on the swap queue and
15227c478bd9Sstevel@tonic-gate 	 * hence no longer on the run queue, so return true.
15237c478bd9Sstevel@tonic-gate 	 */
15247c478bd9Sstevel@tonic-gate 	if ((tp->t_schedflag & (TS_LOAD | TS_ON_SWAPQ)) != TS_LOAD)
15257c478bd9Sstevel@tonic-gate 		return (1);
15267c478bd9Sstevel@tonic-gate 
15277c478bd9Sstevel@tonic-gate 	tpri = DISP_PRIO(tp);
15287c478bd9Sstevel@tonic-gate 	dp = tp->t_disp_queue;
15297c478bd9Sstevel@tonic-gate 	ASSERT(tpri < dp->disp_npri);
15307c478bd9Sstevel@tonic-gate 	dq = &dp->disp_q[tpri];
15317c478bd9Sstevel@tonic-gate 	ptp = &dq->dq_first;
15327c478bd9Sstevel@tonic-gate 	rp = *ptp;
15337c478bd9Sstevel@tonic-gate 	trp = NULL;
15347c478bd9Sstevel@tonic-gate 
15357c478bd9Sstevel@tonic-gate 	ASSERT(dq->dq_last == NULL || dq->dq_last->t_link == NULL);
15367c478bd9Sstevel@tonic-gate 
15377c478bd9Sstevel@tonic-gate 	/*
15387c478bd9Sstevel@tonic-gate 	 * Search for thread in queue.
15397c478bd9Sstevel@tonic-gate 	 * Double links would simplify this at the expense of disp/setrun.
15407c478bd9Sstevel@tonic-gate 	 */
15417c478bd9Sstevel@tonic-gate 	while (rp != tp && rp != NULL) {
15427c478bd9Sstevel@tonic-gate 		trp = rp;
15437c478bd9Sstevel@tonic-gate 		ptp = &trp->t_link;
15447c478bd9Sstevel@tonic-gate 		rp = trp->t_link;
15457c478bd9Sstevel@tonic-gate 	}
15467c478bd9Sstevel@tonic-gate 
15477c478bd9Sstevel@tonic-gate 	if (rp == NULL) {
15487c478bd9Sstevel@tonic-gate 		panic("dispdeq: thread not on queue");
15497c478bd9Sstevel@tonic-gate 	}
15507c478bd9Sstevel@tonic-gate 
15517c478bd9Sstevel@tonic-gate 	DTRACE_SCHED2(dequeue, kthread_t *, tp, disp_t *, dp);
15527c478bd9Sstevel@tonic-gate 
15537c478bd9Sstevel@tonic-gate 	/*
15547c478bd9Sstevel@tonic-gate 	 * Found it so remove it from queue.
15557c478bd9Sstevel@tonic-gate 	 */
15567c478bd9Sstevel@tonic-gate 	if ((*ptp = rp->t_link) == NULL)
15577c478bd9Sstevel@tonic-gate 		dq->dq_last = trp;
15587c478bd9Sstevel@tonic-gate 
15597c478bd9Sstevel@tonic-gate 	dp->disp_nrunnable--;
15607c478bd9Sstevel@tonic-gate 	if (--dq->dq_sruncnt == 0) {
15617c478bd9Sstevel@tonic-gate 		dp->disp_qactmap[tpri >> BT_ULSHIFT] &= ~BT_BIW(tpri);
15627c478bd9Sstevel@tonic-gate 		if (dp->disp_nrunnable == 0) {
15637c478bd9Sstevel@tonic-gate 			dp->disp_max_unbound_pri = -1;
15647c478bd9Sstevel@tonic-gate 			dp->disp_maxrunpri = -1;
15657c478bd9Sstevel@tonic-gate 		} else if (tpri == dp->disp_maxrunpri) {
15667c478bd9Sstevel@tonic-gate 			int ipri;
15677c478bd9Sstevel@tonic-gate 
15687c478bd9Sstevel@tonic-gate 			ipri = bt_gethighbit(dp->disp_qactmap,
15697c478bd9Sstevel@tonic-gate 			    dp->disp_maxrunpri >> BT_ULSHIFT);
15707c478bd9Sstevel@tonic-gate 			if (ipri < dp->disp_max_unbound_pri)
15717c478bd9Sstevel@tonic-gate 				dp->disp_max_unbound_pri = ipri;
15727c478bd9Sstevel@tonic-gate 			dp->disp_maxrunpri = ipri;
15737c478bd9Sstevel@tonic-gate 		}
15747c478bd9Sstevel@tonic-gate 	}
15757c478bd9Sstevel@tonic-gate 	tp->t_link = NULL;
15767c478bd9Sstevel@tonic-gate 	THREAD_TRANSITION(tp);		/* put in intermediate state */
15777c478bd9Sstevel@tonic-gate 	return (1);
15787c478bd9Sstevel@tonic-gate }
15797c478bd9Sstevel@tonic-gate 
15807c478bd9Sstevel@tonic-gate 
15817c478bd9Sstevel@tonic-gate /*
15827c478bd9Sstevel@tonic-gate  * dq_sruninc and dq_srundec are public functions for
15837c478bd9Sstevel@tonic-gate  * incrementing/decrementing the sruncnts when a thread on
15847c478bd9Sstevel@tonic-gate  * a dispatcher queue is made schedulable/unschedulable by
15857c478bd9Sstevel@tonic-gate  * resetting the TS_LOAD flag.
15867c478bd9Sstevel@tonic-gate  *
15877c478bd9Sstevel@tonic-gate  * The caller MUST have the thread lock and therefore the dispatcher
15887c478bd9Sstevel@tonic-gate  * queue lock so that the operation which changes
15897c478bd9Sstevel@tonic-gate  * the flag, the operation that checks the status of the thread to
15907c478bd9Sstevel@tonic-gate  * determine if it's on a disp queue AND the call to this function
15917c478bd9Sstevel@tonic-gate  * are one atomic operation with respect to interrupts.
15927c478bd9Sstevel@tonic-gate  */
15937c478bd9Sstevel@tonic-gate 
15947c478bd9Sstevel@tonic-gate /*
15957c478bd9Sstevel@tonic-gate  * Called by sched AFTER TS_LOAD flag is set on a swapped, runnable thread.
15967c478bd9Sstevel@tonic-gate  */
15977c478bd9Sstevel@tonic-gate void
15987c478bd9Sstevel@tonic-gate dq_sruninc(kthread_t *t)
15997c478bd9Sstevel@tonic-gate {
16007c478bd9Sstevel@tonic-gate 	ASSERT(t->t_state == TS_RUN);
16017c478bd9Sstevel@tonic-gate 	ASSERT(t->t_schedflag & TS_LOAD);
16027c478bd9Sstevel@tonic-gate 
16037c478bd9Sstevel@tonic-gate 	THREAD_TRANSITION(t);
16047c478bd9Sstevel@tonic-gate 	setfrontdq(t);
16057c478bd9Sstevel@tonic-gate }
16067c478bd9Sstevel@tonic-gate 
16077c478bd9Sstevel@tonic-gate /*
16087c478bd9Sstevel@tonic-gate  * See comment on calling conventions above.
16097c478bd9Sstevel@tonic-gate  * Called by sched BEFORE TS_LOAD flag is cleared on a runnable thread.
16107c478bd9Sstevel@tonic-gate  */
16117c478bd9Sstevel@tonic-gate void
16127c478bd9Sstevel@tonic-gate dq_srundec(kthread_t *t)
16137c478bd9Sstevel@tonic-gate {
16147c478bd9Sstevel@tonic-gate 	ASSERT(t->t_schedflag & TS_LOAD);
16157c478bd9Sstevel@tonic-gate 
16167c478bd9Sstevel@tonic-gate 	(void) dispdeq(t);
16177c478bd9Sstevel@tonic-gate 	disp_swapped_enq(t);
16187c478bd9Sstevel@tonic-gate }
16197c478bd9Sstevel@tonic-gate 
16207c478bd9Sstevel@tonic-gate /*
16217c478bd9Sstevel@tonic-gate  * Change the dispatcher lock of thread to the "swapped_lock"
16227c478bd9Sstevel@tonic-gate  * and return with thread lock still held.
16237c478bd9Sstevel@tonic-gate  *
16247c478bd9Sstevel@tonic-gate  * Called with thread_lock held, in transition state, and at high spl.
16257c478bd9Sstevel@tonic-gate  */
16267c478bd9Sstevel@tonic-gate void
16277c478bd9Sstevel@tonic-gate disp_swapped_enq(kthread_t *tp)
16287c478bd9Sstevel@tonic-gate {
16297c478bd9Sstevel@tonic-gate 	ASSERT(THREAD_LOCK_HELD(tp));
16307c478bd9Sstevel@tonic-gate 	ASSERT(tp->t_schedflag & TS_LOAD);
16317c478bd9Sstevel@tonic-gate 
16327c478bd9Sstevel@tonic-gate 	switch (tp->t_state) {
16337c478bd9Sstevel@tonic-gate 	case TS_RUN:
16347c478bd9Sstevel@tonic-gate 		disp_lock_enter_high(&swapped_lock);
16357c478bd9Sstevel@tonic-gate 		THREAD_SWAP(tp, &swapped_lock);	/* set TS_RUN state and lock */
16367c478bd9Sstevel@tonic-gate 		break;
16377c478bd9Sstevel@tonic-gate 	case TS_ONPROC:
16387c478bd9Sstevel@tonic-gate 		disp_lock_enter_high(&swapped_lock);
16397c478bd9Sstevel@tonic-gate 		THREAD_TRANSITION(tp);
16407c478bd9Sstevel@tonic-gate 		wake_sched_sec = 1;		/* tell clock to wake sched */
16417c478bd9Sstevel@tonic-gate 		THREAD_SWAP(tp, &swapped_lock);	/* set TS_RUN state and lock */
16427c478bd9Sstevel@tonic-gate 		break;
16437c478bd9Sstevel@tonic-gate 	default:
16447c478bd9Sstevel@tonic-gate 		panic("disp_swapped: tp: %p bad t_state", (void *)tp);
16457c478bd9Sstevel@tonic-gate 	}
16467c478bd9Sstevel@tonic-gate }
16477c478bd9Sstevel@tonic-gate 
16487c478bd9Sstevel@tonic-gate /*
16497c478bd9Sstevel@tonic-gate  * This routine is called by setbackdq/setfrontdq if the thread is
16507c478bd9Sstevel@tonic-gate  * not loaded or loaded and on the swap queue.
16517c478bd9Sstevel@tonic-gate  *
16527c478bd9Sstevel@tonic-gate  * Thread state TS_SLEEP implies that a swapped thread
16537c478bd9Sstevel@tonic-gate  * has been woken up and needs to be swapped in by the swapper.
16547c478bd9Sstevel@tonic-gate  *
16557c478bd9Sstevel@tonic-gate  * Thread state TS_RUN, it implies that the priority of a swapped
16567c478bd9Sstevel@tonic-gate  * thread is being increased by scheduling class (e.g. ts_update).
16577c478bd9Sstevel@tonic-gate  */
16587c478bd9Sstevel@tonic-gate static void
16597c478bd9Sstevel@tonic-gate disp_swapped_setrun(kthread_t *tp)
16607c478bd9Sstevel@tonic-gate {
16617c478bd9Sstevel@tonic-gate 	ASSERT(THREAD_LOCK_HELD(tp));
16627c478bd9Sstevel@tonic-gate 	ASSERT((tp->t_schedflag & (TS_LOAD | TS_ON_SWAPQ)) != TS_LOAD);
16637c478bd9Sstevel@tonic-gate 
16647c478bd9Sstevel@tonic-gate 	switch (tp->t_state) {
16657c478bd9Sstevel@tonic-gate 	case TS_SLEEP:
16667c478bd9Sstevel@tonic-gate 		disp_lock_enter_high(&swapped_lock);
16677c478bd9Sstevel@tonic-gate 		/*
16687c478bd9Sstevel@tonic-gate 		 * Wakeup sched immediately (i.e., next tick) if the
16697c478bd9Sstevel@tonic-gate 		 * thread priority is above maxclsyspri.
16707c478bd9Sstevel@tonic-gate 		 */
16717c478bd9Sstevel@tonic-gate 		if (DISP_PRIO(tp) > maxclsyspri)
16727c478bd9Sstevel@tonic-gate 			wake_sched = 1;
16737c478bd9Sstevel@tonic-gate 		else
16747c478bd9Sstevel@tonic-gate 			wake_sched_sec = 1;
16757c478bd9Sstevel@tonic-gate 		THREAD_RUN(tp, &swapped_lock); /* set TS_RUN state and lock */
16767c478bd9Sstevel@tonic-gate 		break;
16777c478bd9Sstevel@tonic-gate 	case TS_RUN:				/* called from ts_update */
16787c478bd9Sstevel@tonic-gate 		break;
16797c478bd9Sstevel@tonic-gate 	default:
16807c478bd9Sstevel@tonic-gate 		panic("disp_swapped_setrun: tp: %p bad t_state", tp);
16817c478bd9Sstevel@tonic-gate 	}
16827c478bd9Sstevel@tonic-gate }
16837c478bd9Sstevel@tonic-gate 
16847c478bd9Sstevel@tonic-gate 
16857c478bd9Sstevel@tonic-gate /*
16867c478bd9Sstevel@tonic-gate  *	Make a thread give up its processor.  Find the processor on
16877c478bd9Sstevel@tonic-gate  *	which this thread is executing, and have that processor
16887c478bd9Sstevel@tonic-gate  *	preempt.
16897c478bd9Sstevel@tonic-gate  */
16907c478bd9Sstevel@tonic-gate void
16917c478bd9Sstevel@tonic-gate cpu_surrender(kthread_t *tp)
16927c478bd9Sstevel@tonic-gate {
16937c478bd9Sstevel@tonic-gate 	cpu_t	*cpup;
16947c478bd9Sstevel@tonic-gate 	int	max_pri;
16957c478bd9Sstevel@tonic-gate 	int	max_run_pri;
16967c478bd9Sstevel@tonic-gate 	klwp_t	*lwp;
16977c478bd9Sstevel@tonic-gate 
16987c478bd9Sstevel@tonic-gate 	ASSERT(THREAD_LOCK_HELD(tp));
16997c478bd9Sstevel@tonic-gate 
17007c478bd9Sstevel@tonic-gate 	if (tp->t_state != TS_ONPROC)
17017c478bd9Sstevel@tonic-gate 		return;
17027c478bd9Sstevel@tonic-gate 	cpup = tp->t_disp_queue->disp_cpu;	/* CPU thread dispatched to */
17037c478bd9Sstevel@tonic-gate 	max_pri = cpup->cpu_disp->disp_maxrunpri; /* best pri of that CPU */
17047c478bd9Sstevel@tonic-gate 	max_run_pri = CP_MAXRUNPRI(cpup->cpu_part);
17057c478bd9Sstevel@tonic-gate 	if (max_pri < max_run_pri)
17067c478bd9Sstevel@tonic-gate 		max_pri = max_run_pri;
17077c478bd9Sstevel@tonic-gate 
17087c478bd9Sstevel@tonic-gate 	cpup->cpu_runrun = 1;
17097c478bd9Sstevel@tonic-gate 	if (max_pri >= kpreemptpri && cpup->cpu_kprunrun == 0) {
17107c478bd9Sstevel@tonic-gate 		cpup->cpu_kprunrun = 1;
17117c478bd9Sstevel@tonic-gate 	}
17127c478bd9Sstevel@tonic-gate 
17137c478bd9Sstevel@tonic-gate 	/*
17147c478bd9Sstevel@tonic-gate 	 * Propagate cpu_runrun, and cpu_kprunrun to global visibility.
17157c478bd9Sstevel@tonic-gate 	 */
17167c478bd9Sstevel@tonic-gate 	membar_enter();
17177c478bd9Sstevel@tonic-gate 
17187c478bd9Sstevel@tonic-gate 	DTRACE_SCHED1(surrender, kthread_t *, tp);
17197c478bd9Sstevel@tonic-gate 
17207c478bd9Sstevel@tonic-gate 	/*
17217c478bd9Sstevel@tonic-gate 	 * Make the target thread take an excursion through trap()
17227c478bd9Sstevel@tonic-gate 	 * to do preempt() (unless we're already in trap or post_syscall,
17237c478bd9Sstevel@tonic-gate 	 * calling cpu_surrender via CL_TRAPRET).
17247c478bd9Sstevel@tonic-gate 	 */
17257c478bd9Sstevel@tonic-gate 	if (tp != curthread || (lwp = tp->t_lwp) == NULL ||
17267c478bd9Sstevel@tonic-gate 	    lwp->lwp_state != LWP_USER) {
17277c478bd9Sstevel@tonic-gate 		aston(tp);
17287c478bd9Sstevel@tonic-gate 		if (cpup != CPU)
17297c478bd9Sstevel@tonic-gate 			poke_cpu(cpup->cpu_id);
17307c478bd9Sstevel@tonic-gate 	}
17317c478bd9Sstevel@tonic-gate 	TRACE_2(TR_FAC_DISP, TR_CPU_SURRENDER,
17327c478bd9Sstevel@tonic-gate 	    "cpu_surrender:tid %p cpu %p", tp, cpup);
17337c478bd9Sstevel@tonic-gate }
17347c478bd9Sstevel@tonic-gate 
17357c478bd9Sstevel@tonic-gate 
17367c478bd9Sstevel@tonic-gate /*
17377c478bd9Sstevel@tonic-gate  * Commit to and ratify a scheduling decision
17387c478bd9Sstevel@tonic-gate  */
17397c478bd9Sstevel@tonic-gate /*ARGSUSED*/
17407c478bd9Sstevel@tonic-gate static kthread_t *
17417c478bd9Sstevel@tonic-gate disp_ratify(kthread_t *tp, disp_t *kpq)
17427c478bd9Sstevel@tonic-gate {
17437c478bd9Sstevel@tonic-gate 	pri_t	tpri, maxpri;
17447c478bd9Sstevel@tonic-gate 	pri_t	maxkpri;
17457c478bd9Sstevel@tonic-gate 	cpu_t	*cpup;
17467c478bd9Sstevel@tonic-gate 
17477c478bd9Sstevel@tonic-gate 	ASSERT(tp != NULL);
17487c478bd9Sstevel@tonic-gate 	/*
17497c478bd9Sstevel@tonic-gate 	 * Commit to, then ratify scheduling decision
17507c478bd9Sstevel@tonic-gate 	 */
17517c478bd9Sstevel@tonic-gate 	cpup = CPU;
17527c478bd9Sstevel@tonic-gate 	if (cpup->cpu_runrun != 0)
17537c478bd9Sstevel@tonic-gate 		cpup->cpu_runrun = 0;
17547c478bd9Sstevel@tonic-gate 	if (cpup->cpu_kprunrun != 0)
17557c478bd9Sstevel@tonic-gate 		cpup->cpu_kprunrun = 0;
17567c478bd9Sstevel@tonic-gate 	if (cpup->cpu_chosen_level != -1)
17577c478bd9Sstevel@tonic-gate 		cpup->cpu_chosen_level = -1;
17587c478bd9Sstevel@tonic-gate 	membar_enter();
17597c478bd9Sstevel@tonic-gate 	tpri = DISP_PRIO(tp);
17607c478bd9Sstevel@tonic-gate 	maxpri = cpup->cpu_disp->disp_maxrunpri;
17617c478bd9Sstevel@tonic-gate 	maxkpri = kpq->disp_maxrunpri;
17627c478bd9Sstevel@tonic-gate 	if (maxpri < maxkpri)
17637c478bd9Sstevel@tonic-gate 		maxpri = maxkpri;
17647c478bd9Sstevel@tonic-gate 	if (tpri < maxpri) {
17657c478bd9Sstevel@tonic-gate 		/*
17667c478bd9Sstevel@tonic-gate 		 * should have done better
17677c478bd9Sstevel@tonic-gate 		 * put this one back and indicate to try again
17687c478bd9Sstevel@tonic-gate 		 */
17697c478bd9Sstevel@tonic-gate 		cpup->cpu_dispthread = curthread;	/* fixup dispthread */
17707c478bd9Sstevel@tonic-gate 		cpup->cpu_dispatch_pri = DISP_PRIO(curthread);
17717c478bd9Sstevel@tonic-gate 		thread_lock_high(tp);
17727c478bd9Sstevel@tonic-gate 		THREAD_TRANSITION(tp);
17737c478bd9Sstevel@tonic-gate 		setfrontdq(tp);
17747c478bd9Sstevel@tonic-gate 		thread_unlock_nopreempt(tp);
17757c478bd9Sstevel@tonic-gate 
17767c478bd9Sstevel@tonic-gate 		tp = NULL;
17777c478bd9Sstevel@tonic-gate 	}
17787c478bd9Sstevel@tonic-gate 	return (tp);
17797c478bd9Sstevel@tonic-gate }
17807c478bd9Sstevel@tonic-gate 
17817c478bd9Sstevel@tonic-gate /*
17827c478bd9Sstevel@tonic-gate  * See if there is any work on the dispatcher queue for other CPUs.
17837c478bd9Sstevel@tonic-gate  * If there is, dequeue the best thread and return.
17847c478bd9Sstevel@tonic-gate  */
17857c478bd9Sstevel@tonic-gate static kthread_t *
17867c478bd9Sstevel@tonic-gate disp_getwork(cpu_t *cp)
17877c478bd9Sstevel@tonic-gate {
17887c478bd9Sstevel@tonic-gate 	cpu_t		*ocp;		/* other CPU */
17897c478bd9Sstevel@tonic-gate 	cpu_t		*ocp_start;
17907c478bd9Sstevel@tonic-gate 	cpu_t		*tcp;		/* target local CPU */
17917c478bd9Sstevel@tonic-gate 	kthread_t	*tp;
17927c478bd9Sstevel@tonic-gate 	pri_t		maxpri;
17937c478bd9Sstevel@tonic-gate 	int		s;
17947c478bd9Sstevel@tonic-gate 	disp_t		*kpq;		/* kp queue for this partition */
17957c478bd9Sstevel@tonic-gate 	lpl_t		*lpl, *lpl_leaf;
17967c478bd9Sstevel@tonic-gate 	int		hint, leafidx;
17977c478bd9Sstevel@tonic-gate 
17987c478bd9Sstevel@tonic-gate 	maxpri = -1;
17997c478bd9Sstevel@tonic-gate 	tcp = NULL;
18007c478bd9Sstevel@tonic-gate 
18017c478bd9Sstevel@tonic-gate 	kpq = &cp->cpu_part->cp_kp_queue;
18027c478bd9Sstevel@tonic-gate 	while (kpq->disp_maxrunpri >= 0) {
18037c478bd9Sstevel@tonic-gate 		/*
18047c478bd9Sstevel@tonic-gate 		 * Try to take a thread from the kp_queue.
18057c478bd9Sstevel@tonic-gate 		 */
18067c478bd9Sstevel@tonic-gate 		tp = (disp_getbest(kpq));
18077c478bd9Sstevel@tonic-gate 		if (tp)
18087c478bd9Sstevel@tonic-gate 			return (disp_ratify(tp, kpq));
18097c478bd9Sstevel@tonic-gate 	}
18107c478bd9Sstevel@tonic-gate 
18117c478bd9Sstevel@tonic-gate 	s = splhigh();		/* protect the cpu_active list */
18127c478bd9Sstevel@tonic-gate 
18137c478bd9Sstevel@tonic-gate 	/*
18147c478bd9Sstevel@tonic-gate 	 * Try to find something to do on another CPU's run queue.
18157c478bd9Sstevel@tonic-gate 	 * Loop through all other CPUs looking for the one with the highest
18167c478bd9Sstevel@tonic-gate 	 * priority unbound thread.
18177c478bd9Sstevel@tonic-gate 	 *
18187c478bd9Sstevel@tonic-gate 	 * On NUMA machines, the partition's CPUs are consulted in order of
18197c478bd9Sstevel@tonic-gate 	 * distance from the current CPU. This way, the first available
18207c478bd9Sstevel@tonic-gate 	 * work found is also the closest, and will suffer the least
18217c478bd9Sstevel@tonic-gate 	 * from being migrated.
18227c478bd9Sstevel@tonic-gate 	 */
18237c478bd9Sstevel@tonic-gate 	lpl = lpl_leaf = cp->cpu_lpl;
18247c478bd9Sstevel@tonic-gate 	hint = leafidx = 0;
18257c478bd9Sstevel@tonic-gate 
18267c478bd9Sstevel@tonic-gate 	/*
18277c478bd9Sstevel@tonic-gate 	 * This loop traverses the lpl hierarchy. Higher level lpls represent
18287c478bd9Sstevel@tonic-gate 	 * broader levels of locality
18297c478bd9Sstevel@tonic-gate 	 */
18307c478bd9Sstevel@tonic-gate 	do {
18317c478bd9Sstevel@tonic-gate 		/* This loop iterates over the lpl's leaves */
18327c478bd9Sstevel@tonic-gate 		do {
18337c478bd9Sstevel@tonic-gate 			if (lpl_leaf != cp->cpu_lpl)
18347c478bd9Sstevel@tonic-gate 				ocp = lpl_leaf->lpl_cpus;
18357c478bd9Sstevel@tonic-gate 			else
18367c478bd9Sstevel@tonic-gate 				ocp = cp->cpu_next_lpl;
18377c478bd9Sstevel@tonic-gate 
18387c478bd9Sstevel@tonic-gate 			/* This loop iterates over the CPUs in the leaf */
18397c478bd9Sstevel@tonic-gate 			ocp_start = ocp;
18407c478bd9Sstevel@tonic-gate 			do {
18417c478bd9Sstevel@tonic-gate 				pri_t pri;
18427c478bd9Sstevel@tonic-gate 
18437c478bd9Sstevel@tonic-gate 				ASSERT(CPU_ACTIVE(ocp));
18447c478bd9Sstevel@tonic-gate 
18457c478bd9Sstevel@tonic-gate 				/*
18467c478bd9Sstevel@tonic-gate 				 * End our stroll around the partition if:
18477c478bd9Sstevel@tonic-gate 				 *
18487c478bd9Sstevel@tonic-gate 				 * - Something became runnable on the local
18497c478bd9Sstevel@tonic-gate 				 *	queue
18507c478bd9Sstevel@tonic-gate 				 *
18517c478bd9Sstevel@tonic-gate 				 * - We're at the broadest level of locality and
18527c478bd9Sstevel@tonic-gate 				 *   we happen across another idle CPU. At the
18537c478bd9Sstevel@tonic-gate 				 *   highest level of locality, all CPUs will
18547c478bd9Sstevel@tonic-gate 				 *   walk the partition's CPUs in the same
18557c478bd9Sstevel@tonic-gate 				 *   order, so we can end our stroll taking
18567c478bd9Sstevel@tonic-gate 				 *   comfort in knowing the other idle CPU is
18577c478bd9Sstevel@tonic-gate 				 *   already covering the next portion of the
18587c478bd9Sstevel@tonic-gate 				 *   list.
18597c478bd9Sstevel@tonic-gate 				 */
18607c478bd9Sstevel@tonic-gate 				if (cp->cpu_disp->disp_nrunnable != 0)
18617c478bd9Sstevel@tonic-gate 					break;
18627c478bd9Sstevel@tonic-gate 				if (ocp->cpu_dispatch_pri == -1) {
18637c478bd9Sstevel@tonic-gate 					if (ocp->cpu_disp_flags &
18647c478bd9Sstevel@tonic-gate 					    CPU_DISP_HALTED)
18657c478bd9Sstevel@tonic-gate 						continue;
18667c478bd9Sstevel@tonic-gate 					else if (lpl->lpl_parent == NULL)
18677c478bd9Sstevel@tonic-gate 						break;
18687c478bd9Sstevel@tonic-gate 				}
18697c478bd9Sstevel@tonic-gate 
18707c478bd9Sstevel@tonic-gate 				/*
18717c478bd9Sstevel@tonic-gate 				 * If there's only one thread and the CPU
18727c478bd9Sstevel@tonic-gate 				 * is in the middle of a context switch,
18737c478bd9Sstevel@tonic-gate 				 * or it's currently running the idle thread,
18747c478bd9Sstevel@tonic-gate 				 * don't steal it.
18757c478bd9Sstevel@tonic-gate 				 */
18767c478bd9Sstevel@tonic-gate 				if ((ocp->cpu_disp_flags &
18777c478bd9Sstevel@tonic-gate 					CPU_DISP_DONTSTEAL) &&
18787c478bd9Sstevel@tonic-gate 				    ocp->cpu_disp->disp_nrunnable == 1)
18797c478bd9Sstevel@tonic-gate 					continue;
18807c478bd9Sstevel@tonic-gate 
18817c478bd9Sstevel@tonic-gate 				pri = ocp->cpu_disp->disp_max_unbound_pri;
18827c478bd9Sstevel@tonic-gate 				if (pri > maxpri) {
18837c478bd9Sstevel@tonic-gate 					maxpri = pri;
18847c478bd9Sstevel@tonic-gate 					tcp = ocp;
18857c478bd9Sstevel@tonic-gate 				}
18867c478bd9Sstevel@tonic-gate 			} while ((ocp = ocp->cpu_next_lpl) != ocp_start);
18877c478bd9Sstevel@tonic-gate 
18887c478bd9Sstevel@tonic-gate 			if ((lpl_leaf = lpl->lpl_rset[++leafidx]) == NULL) {
18897c478bd9Sstevel@tonic-gate 				leafidx = 0;
18907c478bd9Sstevel@tonic-gate 				lpl_leaf = lpl->lpl_rset[leafidx];
18917c478bd9Sstevel@tonic-gate 			}
18927c478bd9Sstevel@tonic-gate 		} while (leafidx != hint);
18937c478bd9Sstevel@tonic-gate 
18947c478bd9Sstevel@tonic-gate 		hint = leafidx = lpl->lpl_hint;
18957c478bd9Sstevel@tonic-gate 		if ((lpl = lpl->lpl_parent) != NULL)
18967c478bd9Sstevel@tonic-gate 			lpl_leaf = lpl->lpl_rset[hint];
18977c478bd9Sstevel@tonic-gate 	} while (!tcp && lpl);
18987c478bd9Sstevel@tonic-gate 
18997c478bd9Sstevel@tonic-gate 	splx(s);
19007c478bd9Sstevel@tonic-gate 
19017c478bd9Sstevel@tonic-gate 	/*
19027c478bd9Sstevel@tonic-gate 	 * If another queue looks good, and there is still nothing on
19037c478bd9Sstevel@tonic-gate 	 * the local queue, try to transfer one or more threads
19047c478bd9Sstevel@tonic-gate 	 * from it to our queue.
19057c478bd9Sstevel@tonic-gate 	 */
19067c478bd9Sstevel@tonic-gate 	if (tcp && cp->cpu_disp->disp_nrunnable == 0) {
19077c478bd9Sstevel@tonic-gate 		tp = (disp_getbest(tcp->cpu_disp));
19087c478bd9Sstevel@tonic-gate 		if (tp)
19097c478bd9Sstevel@tonic-gate 			return (disp_ratify(tp, kpq));
19107c478bd9Sstevel@tonic-gate 	}
19117c478bd9Sstevel@tonic-gate 	return (NULL);
19127c478bd9Sstevel@tonic-gate }
19137c478bd9Sstevel@tonic-gate 
19147c478bd9Sstevel@tonic-gate 
19157c478bd9Sstevel@tonic-gate /*
19167c478bd9Sstevel@tonic-gate  * disp_fix_unbound_pri()
19177c478bd9Sstevel@tonic-gate  *	Determines the maximum priority of unbound threads on the queue.
19187c478bd9Sstevel@tonic-gate  *	The priority is kept for the queue, but is only increased, never
19197c478bd9Sstevel@tonic-gate  *	reduced unless some CPU is looking for something on that queue.
19207c478bd9Sstevel@tonic-gate  *
19217c478bd9Sstevel@tonic-gate  *	The priority argument is the known upper limit.
19227c478bd9Sstevel@tonic-gate  *
19237c478bd9Sstevel@tonic-gate  *	Perhaps this should be kept accurately, but that probably means
19247c478bd9Sstevel@tonic-gate  *	separate bitmaps for bound and unbound threads.  Since only idled
19257c478bd9Sstevel@tonic-gate  *	CPUs will have to do this recalculation, it seems better this way.
19267c478bd9Sstevel@tonic-gate  */
19277c478bd9Sstevel@tonic-gate static void
19287c478bd9Sstevel@tonic-gate disp_fix_unbound_pri(disp_t *dp, pri_t pri)
19297c478bd9Sstevel@tonic-gate {
19307c478bd9Sstevel@tonic-gate 	kthread_t	*tp;
19317c478bd9Sstevel@tonic-gate 	dispq_t		*dq;
19327c478bd9Sstevel@tonic-gate 	ulong_t		*dqactmap = dp->disp_qactmap;
19337c478bd9Sstevel@tonic-gate 	ulong_t		mapword;
19347c478bd9Sstevel@tonic-gate 	int		wx;
19357c478bd9Sstevel@tonic-gate 
19367c478bd9Sstevel@tonic-gate 	ASSERT(DISP_LOCK_HELD(&dp->disp_lock));
19377c478bd9Sstevel@tonic-gate 
19387c478bd9Sstevel@tonic-gate 	ASSERT(pri >= 0);			/* checked by caller */
19397c478bd9Sstevel@tonic-gate 
19407c478bd9Sstevel@tonic-gate 	/*
19417c478bd9Sstevel@tonic-gate 	 * Start the search at the next lowest priority below the supplied
19427c478bd9Sstevel@tonic-gate 	 * priority.  This depends on the bitmap implementation.
19437c478bd9Sstevel@tonic-gate 	 */
19447c478bd9Sstevel@tonic-gate 	do {
19457c478bd9Sstevel@tonic-gate 		wx = pri >> BT_ULSHIFT;		/* index of word in map */
19467c478bd9Sstevel@tonic-gate 
19477c478bd9Sstevel@tonic-gate 		/*
19487c478bd9Sstevel@tonic-gate 		 * Form mask for all lower priorities in the word.
19497c478bd9Sstevel@tonic-gate 		 */
19507c478bd9Sstevel@tonic-gate 		mapword = dqactmap[wx] & (BT_BIW(pri) - 1);
19517c478bd9Sstevel@tonic-gate 
19527c478bd9Sstevel@tonic-gate 		/*
19537c478bd9Sstevel@tonic-gate 		 * Get next lower active priority.
19547c478bd9Sstevel@tonic-gate 		 */
19557c478bd9Sstevel@tonic-gate 		if (mapword != 0) {
19567c478bd9Sstevel@tonic-gate 			pri = (wx << BT_ULSHIFT) + highbit(mapword) - 1;
19577c478bd9Sstevel@tonic-gate 		} else if (wx > 0) {
19587c478bd9Sstevel@tonic-gate 			pri = bt_gethighbit(dqactmap, wx - 1); /* sign extend */
19597c478bd9Sstevel@tonic-gate 			if (pri < 0)
19607c478bd9Sstevel@tonic-gate 				break;
19617c478bd9Sstevel@tonic-gate 		} else {
19627c478bd9Sstevel@tonic-gate 			pri = -1;
19637c478bd9Sstevel@tonic-gate 			break;
19647c478bd9Sstevel@tonic-gate 		}
19657c478bd9Sstevel@tonic-gate 
19667c478bd9Sstevel@tonic-gate 		/*
19677c478bd9Sstevel@tonic-gate 		 * Search the queue for unbound, runnable threads.
19687c478bd9Sstevel@tonic-gate 		 */
19697c478bd9Sstevel@tonic-gate 		dq = &dp->disp_q[pri];
19707c478bd9Sstevel@tonic-gate 		tp = dq->dq_first;
19717c478bd9Sstevel@tonic-gate 
19727c478bd9Sstevel@tonic-gate 		while (tp && (tp->t_bound_cpu || tp->t_weakbound_cpu)) {
19737c478bd9Sstevel@tonic-gate 			tp = tp->t_link;
19747c478bd9Sstevel@tonic-gate 		}
19757c478bd9Sstevel@tonic-gate 
19767c478bd9Sstevel@tonic-gate 		/*
19777c478bd9Sstevel@tonic-gate 		 * If a thread was found, set the priority and return.
19787c478bd9Sstevel@tonic-gate 		 */
19797c478bd9Sstevel@tonic-gate 	} while (tp == NULL);
19807c478bd9Sstevel@tonic-gate 
19817c478bd9Sstevel@tonic-gate 	/*
19827c478bd9Sstevel@tonic-gate 	 * pri holds the maximum unbound thread priority or -1.
19837c478bd9Sstevel@tonic-gate 	 */
19847c478bd9Sstevel@tonic-gate 	if (dp->disp_max_unbound_pri != pri)
19857c478bd9Sstevel@tonic-gate 		dp->disp_max_unbound_pri = pri;
19867c478bd9Sstevel@tonic-gate }
19877c478bd9Sstevel@tonic-gate 
19887c478bd9Sstevel@tonic-gate /*
19897c478bd9Sstevel@tonic-gate  * disp_adjust_unbound_pri() - thread is becoming unbound, so we should
19907c478bd9Sstevel@tonic-gate  * 	check if the CPU to which is was previously bound should have
19917c478bd9Sstevel@tonic-gate  * 	its disp_max_unbound_pri increased.
19927c478bd9Sstevel@tonic-gate  */
19937c478bd9Sstevel@tonic-gate void
19947c478bd9Sstevel@tonic-gate disp_adjust_unbound_pri(kthread_t *tp)
19957c478bd9Sstevel@tonic-gate {
19967c478bd9Sstevel@tonic-gate 	disp_t *dp;
19977c478bd9Sstevel@tonic-gate 	pri_t tpri;
19987c478bd9Sstevel@tonic-gate 
19997c478bd9Sstevel@tonic-gate 	ASSERT(THREAD_LOCK_HELD(tp));
20007c478bd9Sstevel@tonic-gate 
20017c478bd9Sstevel@tonic-gate 	/*
20027c478bd9Sstevel@tonic-gate 	 * Don't do anything if the thread is not bound, or
20037c478bd9Sstevel@tonic-gate 	 * currently not runnable or swapped out.
20047c478bd9Sstevel@tonic-gate 	 */
20057c478bd9Sstevel@tonic-gate 	if (tp->t_bound_cpu == NULL ||
20067c478bd9Sstevel@tonic-gate 	    tp->t_state != TS_RUN ||
20077c478bd9Sstevel@tonic-gate 	    tp->t_schedflag & TS_ON_SWAPQ)
20087c478bd9Sstevel@tonic-gate 		return;
20097c478bd9Sstevel@tonic-gate 
20107c478bd9Sstevel@tonic-gate 	tpri = DISP_PRIO(tp);
20117c478bd9Sstevel@tonic-gate 	dp = tp->t_bound_cpu->cpu_disp;
20127c478bd9Sstevel@tonic-gate 	ASSERT(tpri >= 0 && tpri < dp->disp_npri);
20137c478bd9Sstevel@tonic-gate 	if (tpri > dp->disp_max_unbound_pri)
20147c478bd9Sstevel@tonic-gate 		dp->disp_max_unbound_pri = tpri;
20157c478bd9Sstevel@tonic-gate }
20167c478bd9Sstevel@tonic-gate 
20177c478bd9Sstevel@tonic-gate /*
20187c478bd9Sstevel@tonic-gate  * disp_getbest() - de-queue the highest priority unbound runnable thread.
20197c478bd9Sstevel@tonic-gate  *	returns with the thread unlocked and onproc
20207c478bd9Sstevel@tonic-gate  *	but at splhigh (like disp()).
20217c478bd9Sstevel@tonic-gate  *	returns NULL if nothing found.
20227c478bd9Sstevel@tonic-gate  *
20237c478bd9Sstevel@tonic-gate  *	Passed a pointer to a dispatch queue not associated with this CPU.
20247c478bd9Sstevel@tonic-gate  */
20257c478bd9Sstevel@tonic-gate static kthread_t *
20267c478bd9Sstevel@tonic-gate disp_getbest(disp_t *dp)
20277c478bd9Sstevel@tonic-gate {
20287c478bd9Sstevel@tonic-gate 	kthread_t	*tp;
20297c478bd9Sstevel@tonic-gate 	dispq_t		*dq;
20307c478bd9Sstevel@tonic-gate 	pri_t		pri;
20317c478bd9Sstevel@tonic-gate 	cpu_t		*cp;
20327c478bd9Sstevel@tonic-gate 
20337c478bd9Sstevel@tonic-gate 	disp_lock_enter(&dp->disp_lock);
20347c478bd9Sstevel@tonic-gate 
20357c478bd9Sstevel@tonic-gate 	/*
20367c478bd9Sstevel@tonic-gate 	 * If there is nothing to run, or the CPU is in the middle of a
20377c478bd9Sstevel@tonic-gate 	 * context switch of the only thread, return NULL.
20387c478bd9Sstevel@tonic-gate 	 */
20397c478bd9Sstevel@tonic-gate 	pri = dp->disp_max_unbound_pri;
20407c478bd9Sstevel@tonic-gate 	if (pri == -1 ||
20417c478bd9Sstevel@tonic-gate 		(dp->disp_cpu != NULL &&
20427c478bd9Sstevel@tonic-gate 		    (dp->disp_cpu->cpu_disp_flags & CPU_DISP_DONTSTEAL) &&
20437c478bd9Sstevel@tonic-gate 		dp->disp_cpu->cpu_disp->disp_nrunnable == 1)) {
20447c478bd9Sstevel@tonic-gate 		disp_lock_exit_nopreempt(&dp->disp_lock);
20457c478bd9Sstevel@tonic-gate 		return (NULL);
20467c478bd9Sstevel@tonic-gate 	}
20477c478bd9Sstevel@tonic-gate 
20487c478bd9Sstevel@tonic-gate 	dq = &dp->disp_q[pri];
20497c478bd9Sstevel@tonic-gate 	tp = dq->dq_first;
20507c478bd9Sstevel@tonic-gate 
20517c478bd9Sstevel@tonic-gate 	/*
20527c478bd9Sstevel@tonic-gate 	 * Skip over bound threads.
20537c478bd9Sstevel@tonic-gate 	 * Bound threads can be here even though disp_max_unbound_pri
20547c478bd9Sstevel@tonic-gate 	 * indicated this level.  Besides, it not always accurate because it
20557c478bd9Sstevel@tonic-gate 	 * isn't reduced until another CPU looks for work.
20567c478bd9Sstevel@tonic-gate 	 * Note that tp could be NULL right away due to this.
20577c478bd9Sstevel@tonic-gate 	 */
20587c478bd9Sstevel@tonic-gate 	while (tp && (tp->t_bound_cpu || tp->t_weakbound_cpu)) {
20597c478bd9Sstevel@tonic-gate 		tp = tp->t_link;
20607c478bd9Sstevel@tonic-gate 	}
20617c478bd9Sstevel@tonic-gate 
20627c478bd9Sstevel@tonic-gate 	/*
20637c478bd9Sstevel@tonic-gate 	 * If there were no unbound threads on this queue, find the queue
20647c478bd9Sstevel@tonic-gate 	 * where they are and then return NULL so that other CPUs will be
20657c478bd9Sstevel@tonic-gate 	 * considered.
20667c478bd9Sstevel@tonic-gate 	 */
20677c478bd9Sstevel@tonic-gate 	if (tp == NULL) {
20687c478bd9Sstevel@tonic-gate 		disp_fix_unbound_pri(dp, pri);
20697c478bd9Sstevel@tonic-gate 		disp_lock_exit_nopreempt(&dp->disp_lock);
20707c478bd9Sstevel@tonic-gate 		return (NULL);
20717c478bd9Sstevel@tonic-gate 	}
20727c478bd9Sstevel@tonic-gate 
20737c478bd9Sstevel@tonic-gate 	/*
20747c478bd9Sstevel@tonic-gate 	 * Found a runnable, unbound thread, so remove it from queue.
20757c478bd9Sstevel@tonic-gate 	 * dispdeq() requires that we have the thread locked, and we do,
20767c478bd9Sstevel@tonic-gate 	 * by virtue of holding the dispatch queue lock.  dispdeq() will
20777c478bd9Sstevel@tonic-gate 	 * put the thread in transition state, thereby dropping the dispq
20787c478bd9Sstevel@tonic-gate 	 * lock.
20797c478bd9Sstevel@tonic-gate 	 */
20807c478bd9Sstevel@tonic-gate #ifdef DEBUG
20817c478bd9Sstevel@tonic-gate 	{
20827c478bd9Sstevel@tonic-gate 		int	thread_was_on_queue;
20837c478bd9Sstevel@tonic-gate 
20847c478bd9Sstevel@tonic-gate 		thread_was_on_queue = dispdeq(tp);	/* drops disp_lock */
20857c478bd9Sstevel@tonic-gate 		ASSERT(thread_was_on_queue);
20867c478bd9Sstevel@tonic-gate 	}
20877c478bd9Sstevel@tonic-gate #else /* DEBUG */
20887c478bd9Sstevel@tonic-gate 	(void) dispdeq(tp);			/* drops disp_lock */
20897c478bd9Sstevel@tonic-gate #endif /* DEBUG */
20907c478bd9Sstevel@tonic-gate 
20917c478bd9Sstevel@tonic-gate 	tp->t_schedflag |= TS_DONT_SWAP;
20927c478bd9Sstevel@tonic-gate 
20937c478bd9Sstevel@tonic-gate 	/*
20947c478bd9Sstevel@tonic-gate 	 * Setup thread to run on the current CPU.
20957c478bd9Sstevel@tonic-gate 	 */
20967c478bd9Sstevel@tonic-gate 	cp = CPU;
20977c478bd9Sstevel@tonic-gate 
20987c478bd9Sstevel@tonic-gate 	tp->t_disp_queue = cp->cpu_disp;
20997c478bd9Sstevel@tonic-gate 
21007c478bd9Sstevel@tonic-gate 	cp->cpu_dispthread = tp;		/* protected by spl only */
21017c478bd9Sstevel@tonic-gate 	cp->cpu_dispatch_pri = pri;
21027c478bd9Sstevel@tonic-gate 	ASSERT(pri == DISP_PRIO(tp));
21037c478bd9Sstevel@tonic-gate 
21047c478bd9Sstevel@tonic-gate 	thread_onproc(tp, cp);			/* set t_state to TS_ONPROC */
21057c478bd9Sstevel@tonic-gate 
21067c478bd9Sstevel@tonic-gate 	/*
21077c478bd9Sstevel@tonic-gate 	 * Return with spl high so that swtch() won't need to raise it.
21087c478bd9Sstevel@tonic-gate 	 * The disp_lock was dropped by dispdeq().
21097c478bd9Sstevel@tonic-gate 	 */
21107c478bd9Sstevel@tonic-gate 
21117c478bd9Sstevel@tonic-gate 	return (tp);
21127c478bd9Sstevel@tonic-gate }
21137c478bd9Sstevel@tonic-gate 
21147c478bd9Sstevel@tonic-gate /*
21157c478bd9Sstevel@tonic-gate  * disp_bound_common() - common routine for higher level functions
21167c478bd9Sstevel@tonic-gate  *	that check for bound threads under certain conditions.
21177c478bd9Sstevel@tonic-gate  *	If 'threadlistsafe' is set then there is no need to acquire
21187c478bd9Sstevel@tonic-gate  *	pidlock to stop the thread list from changing (eg, if
21197c478bd9Sstevel@tonic-gate  *	disp_bound_* is called with cpus paused).
21207c478bd9Sstevel@tonic-gate  */
21217c478bd9Sstevel@tonic-gate static int
21227c478bd9Sstevel@tonic-gate disp_bound_common(cpu_t *cp, int threadlistsafe, int flag)
21237c478bd9Sstevel@tonic-gate {
21247c478bd9Sstevel@tonic-gate 	int		found = 0;
21257c478bd9Sstevel@tonic-gate 	kthread_t	*tp;
21267c478bd9Sstevel@tonic-gate 
21277c478bd9Sstevel@tonic-gate 	ASSERT(flag);
21287c478bd9Sstevel@tonic-gate 
21297c478bd9Sstevel@tonic-gate 	if (!threadlistsafe)
21307c478bd9Sstevel@tonic-gate 		mutex_enter(&pidlock);
21317c478bd9Sstevel@tonic-gate 	tp = curthread;		/* faster than allthreads */
21327c478bd9Sstevel@tonic-gate 	do {
21337c478bd9Sstevel@tonic-gate 		if (tp->t_state != TS_FREE) {
21347c478bd9Sstevel@tonic-gate 			/*
21357c478bd9Sstevel@tonic-gate 			 * If an interrupt thread is busy, but the
21367c478bd9Sstevel@tonic-gate 			 * caller doesn't care (i.e. BOUND_INTR is off),
21377c478bd9Sstevel@tonic-gate 			 * then just ignore it and continue through.
21387c478bd9Sstevel@tonic-gate 			 */
21397c478bd9Sstevel@tonic-gate 			if ((tp->t_flag & T_INTR_THREAD) &&
21407c478bd9Sstevel@tonic-gate 			    !(flag & BOUND_INTR))
21417c478bd9Sstevel@tonic-gate 				continue;
21427c478bd9Sstevel@tonic-gate 
21437c478bd9Sstevel@tonic-gate 			/*
21447c478bd9Sstevel@tonic-gate 			 * Skip the idle thread for the CPU
21457c478bd9Sstevel@tonic-gate 			 * we're about to set offline.
21467c478bd9Sstevel@tonic-gate 			 */
21477c478bd9Sstevel@tonic-gate 			if (tp == cp->cpu_idle_thread)
21487c478bd9Sstevel@tonic-gate 				continue;
21497c478bd9Sstevel@tonic-gate 
21507c478bd9Sstevel@tonic-gate 			/*
21517c478bd9Sstevel@tonic-gate 			 * Skip the pause thread for the CPU
21527c478bd9Sstevel@tonic-gate 			 * we're about to set offline.
21537c478bd9Sstevel@tonic-gate 			 */
21547c478bd9Sstevel@tonic-gate 			if (tp == cp->cpu_pause_thread)
21557c478bd9Sstevel@tonic-gate 				continue;
21567c478bd9Sstevel@tonic-gate 
21577c478bd9Sstevel@tonic-gate 			if ((flag & BOUND_CPU) &&
21587c478bd9Sstevel@tonic-gate 			    (tp->t_bound_cpu == cp ||
21597c478bd9Sstevel@tonic-gate 			    tp->t_bind_cpu == cp->cpu_id ||
21607c478bd9Sstevel@tonic-gate 			    tp->t_weakbound_cpu == cp)) {
21617c478bd9Sstevel@tonic-gate 				found = 1;
21627c478bd9Sstevel@tonic-gate 				break;
21637c478bd9Sstevel@tonic-gate 			}
21647c478bd9Sstevel@tonic-gate 
21657c478bd9Sstevel@tonic-gate 			if ((flag & BOUND_PARTITION) &&
21667c478bd9Sstevel@tonic-gate 			    (tp->t_cpupart == cp->cpu_part)) {
21677c478bd9Sstevel@tonic-gate 				found = 1;
21687c478bd9Sstevel@tonic-gate 				break;
21697c478bd9Sstevel@tonic-gate 			}
21707c478bd9Sstevel@tonic-gate 		}
21717c478bd9Sstevel@tonic-gate 	} while ((tp = tp->t_next) != curthread && found == 0);
21727c478bd9Sstevel@tonic-gate 	if (!threadlistsafe)
21737c478bd9Sstevel@tonic-gate 		mutex_exit(&pidlock);
21747c478bd9Sstevel@tonic-gate 	return (found);
21757c478bd9Sstevel@tonic-gate }
21767c478bd9Sstevel@tonic-gate 
21777c478bd9Sstevel@tonic-gate /*
21787c478bd9Sstevel@tonic-gate  * disp_bound_threads - return nonzero if threads are bound to the processor.
21797c478bd9Sstevel@tonic-gate  *	Called infrequently.  Keep this simple.
21807c478bd9Sstevel@tonic-gate  *	Includes threads that are asleep or stopped but not onproc.
21817c478bd9Sstevel@tonic-gate  */
21827c478bd9Sstevel@tonic-gate int
21837c478bd9Sstevel@tonic-gate disp_bound_threads(cpu_t *cp, int threadlistsafe)
21847c478bd9Sstevel@tonic-gate {
21857c478bd9Sstevel@tonic-gate 	return (disp_bound_common(cp, threadlistsafe, BOUND_CPU));
21867c478bd9Sstevel@tonic-gate }
21877c478bd9Sstevel@tonic-gate 
21887c478bd9Sstevel@tonic-gate /*
21897c478bd9Sstevel@tonic-gate  * disp_bound_anythreads - return nonzero if _any_ threads are bound
21907c478bd9Sstevel@tonic-gate  * to the given processor, including interrupt threads.
21917c478bd9Sstevel@tonic-gate  */
21927c478bd9Sstevel@tonic-gate int
21937c478bd9Sstevel@tonic-gate disp_bound_anythreads(cpu_t *cp, int threadlistsafe)
21947c478bd9Sstevel@tonic-gate {
21957c478bd9Sstevel@tonic-gate 	return (disp_bound_common(cp, threadlistsafe, BOUND_CPU | BOUND_INTR));
21967c478bd9Sstevel@tonic-gate }
21977c478bd9Sstevel@tonic-gate 
21987c478bd9Sstevel@tonic-gate /*
21997c478bd9Sstevel@tonic-gate  * disp_bound_partition - return nonzero if threads are bound to the same
22007c478bd9Sstevel@tonic-gate  * partition as the processor.
22017c478bd9Sstevel@tonic-gate  *	Called infrequently.  Keep this simple.
22027c478bd9Sstevel@tonic-gate  *	Includes threads that are asleep or stopped but not onproc.
22037c478bd9Sstevel@tonic-gate  */
22047c478bd9Sstevel@tonic-gate int
22057c478bd9Sstevel@tonic-gate disp_bound_partition(cpu_t *cp, int threadlistsafe)
22067c478bd9Sstevel@tonic-gate {
22077c478bd9Sstevel@tonic-gate 	return (disp_bound_common(cp, threadlistsafe, BOUND_PARTITION));
22087c478bd9Sstevel@tonic-gate }
22097c478bd9Sstevel@tonic-gate 
22107c478bd9Sstevel@tonic-gate /*
22117c478bd9Sstevel@tonic-gate  * disp_cpu_inactive - make a CPU inactive by moving all of its unbound
22127c478bd9Sstevel@tonic-gate  * threads to other CPUs.
22137c478bd9Sstevel@tonic-gate  */
22147c478bd9Sstevel@tonic-gate void
22157c478bd9Sstevel@tonic-gate disp_cpu_inactive(cpu_t *cp)
22167c478bd9Sstevel@tonic-gate {
22177c478bd9Sstevel@tonic-gate 	kthread_t	*tp;
22187c478bd9Sstevel@tonic-gate 	disp_t		*dp = cp->cpu_disp;
22197c478bd9Sstevel@tonic-gate 	dispq_t		*dq;
22207c478bd9Sstevel@tonic-gate 	pri_t		pri;
22217c478bd9Sstevel@tonic-gate 	int		wasonq;
22227c478bd9Sstevel@tonic-gate 
22237c478bd9Sstevel@tonic-gate 	disp_lock_enter(&dp->disp_lock);
22247c478bd9Sstevel@tonic-gate 	while ((pri = dp->disp_max_unbound_pri) != -1) {
22257c478bd9Sstevel@tonic-gate 		dq = &dp->disp_q[pri];
22267c478bd9Sstevel@tonic-gate 		tp = dq->dq_first;
22277c478bd9Sstevel@tonic-gate 
22287c478bd9Sstevel@tonic-gate 		/*
22297c478bd9Sstevel@tonic-gate 		 * Skip over bound threads.
22307c478bd9Sstevel@tonic-gate 		 */
22317c478bd9Sstevel@tonic-gate 		while (tp != NULL && tp->t_bound_cpu != NULL) {
22327c478bd9Sstevel@tonic-gate 			tp = tp->t_link;
22337c478bd9Sstevel@tonic-gate 		}
22347c478bd9Sstevel@tonic-gate 
22357c478bd9Sstevel@tonic-gate 		if (tp == NULL) {
22367c478bd9Sstevel@tonic-gate 			/* disp_max_unbound_pri must be inaccurate, so fix it */
22377c478bd9Sstevel@tonic-gate 			disp_fix_unbound_pri(dp, pri);
22387c478bd9Sstevel@tonic-gate 			continue;
22397c478bd9Sstevel@tonic-gate 		}
22407c478bd9Sstevel@tonic-gate 
22417c478bd9Sstevel@tonic-gate 		wasonq = dispdeq(tp);		/* drops disp_lock */
22427c478bd9Sstevel@tonic-gate 		ASSERT(wasonq);
22437c478bd9Sstevel@tonic-gate 		ASSERT(tp->t_weakbound_cpu == NULL);
22447c478bd9Sstevel@tonic-gate 
22457c478bd9Sstevel@tonic-gate 		setbackdq(tp);
22467c478bd9Sstevel@tonic-gate 		/*
22477c478bd9Sstevel@tonic-gate 		 * Called from cpu_offline:
22487c478bd9Sstevel@tonic-gate 		 *
22497c478bd9Sstevel@tonic-gate 		 * cp has already been removed from the list of active cpus
22507c478bd9Sstevel@tonic-gate 		 * and tp->t_cpu has been changed so there is no risk of
22517c478bd9Sstevel@tonic-gate 		 * tp ending up back on cp.
22527c478bd9Sstevel@tonic-gate 		 *
22537c478bd9Sstevel@tonic-gate 		 * Called from cpupart_move_cpu:
22547c478bd9Sstevel@tonic-gate 		 *
22557c478bd9Sstevel@tonic-gate 		 * The cpu has moved to a new cpupart.  Any threads that
22567c478bd9Sstevel@tonic-gate 		 * were on it's dispatch queues before the move remain
22577c478bd9Sstevel@tonic-gate 		 * in the old partition and can't run in the new partition.
22587c478bd9Sstevel@tonic-gate 		 */
22597c478bd9Sstevel@tonic-gate 		ASSERT(tp->t_cpu != cp);
22607c478bd9Sstevel@tonic-gate 		thread_unlock(tp);
22617c478bd9Sstevel@tonic-gate 
22627c478bd9Sstevel@tonic-gate 		disp_lock_enter(&dp->disp_lock);
22637c478bd9Sstevel@tonic-gate 	}
22647c478bd9Sstevel@tonic-gate 	disp_lock_exit(&dp->disp_lock);
22657c478bd9Sstevel@tonic-gate }
22667c478bd9Sstevel@tonic-gate 
22677c478bd9Sstevel@tonic-gate /*
22687c478bd9Sstevel@tonic-gate  * disp_lowpri_cpu - find CPU running the lowest priority thread.
22697c478bd9Sstevel@tonic-gate  *	The hint passed in is used as a starting point so we don't favor
22707c478bd9Sstevel@tonic-gate  *	CPU 0 or any other CPU.  The caller should pass in the most recently
22717c478bd9Sstevel@tonic-gate  *	used CPU for the thread.
22727c478bd9Sstevel@tonic-gate  *
22737c478bd9Sstevel@tonic-gate  *	The lgroup and priority are used to determine the best CPU to run on
22747c478bd9Sstevel@tonic-gate  *	in a NUMA machine.  The lgroup specifies which CPUs are closest while
22757c478bd9Sstevel@tonic-gate  *	the thread priority will indicate whether the thread will actually run
22767c478bd9Sstevel@tonic-gate  *	there.  To pick the best CPU, the CPUs inside and outside of the given
22777c478bd9Sstevel@tonic-gate  *	lgroup which are running the lowest priority threads are found.  The
22787c478bd9Sstevel@tonic-gate  *	remote CPU is chosen only if the thread will not run locally on a CPU
22797c478bd9Sstevel@tonic-gate  *	within the lgroup, but will run on the remote CPU. If the thread
22807c478bd9Sstevel@tonic-gate  *	cannot immediately run on any CPU, the best local CPU will be chosen.
22817c478bd9Sstevel@tonic-gate  *
22827c478bd9Sstevel@tonic-gate  *	The lpl specified also identifies the cpu partition from which
22837c478bd9Sstevel@tonic-gate  *	disp_lowpri_cpu should select a CPU.
22847c478bd9Sstevel@tonic-gate  *
22857c478bd9Sstevel@tonic-gate  *	curcpu is used to indicate that disp_lowpri_cpu is being called on
22867c478bd9Sstevel@tonic-gate  *      behalf of the current thread. (curthread is looking for a new cpu)
22877c478bd9Sstevel@tonic-gate  *      In this case, cpu_dispatch_pri for this thread's cpu should be
22887c478bd9Sstevel@tonic-gate  *      ignored.
22897c478bd9Sstevel@tonic-gate  *
22907c478bd9Sstevel@tonic-gate  *      If a cpu is the target of an offline request then try to avoid it.
22917c478bd9Sstevel@tonic-gate  *
22927c478bd9Sstevel@tonic-gate  *	This function must be called at either high SPL, or with preemption
22937c478bd9Sstevel@tonic-gate  *	disabled, so that the "hint" CPU cannot be removed from the online
22947c478bd9Sstevel@tonic-gate  *	CPU list while we are traversing it.
22957c478bd9Sstevel@tonic-gate  */
22967c478bd9Sstevel@tonic-gate cpu_t *
22977c478bd9Sstevel@tonic-gate disp_lowpri_cpu(cpu_t *hint, lpl_t *lpl, pri_t tpri, cpu_t *curcpu)
22987c478bd9Sstevel@tonic-gate {
22997c478bd9Sstevel@tonic-gate 	cpu_t	*bestcpu;
23007c478bd9Sstevel@tonic-gate 	cpu_t	*besthomecpu;
23017c478bd9Sstevel@tonic-gate 	cpu_t   *cp, *cpstart;
23027c478bd9Sstevel@tonic-gate 
23037c478bd9Sstevel@tonic-gate 	pri_t   bestpri;
23047c478bd9Sstevel@tonic-gate 	pri_t   cpupri;
23057c478bd9Sstevel@tonic-gate 
23067c478bd9Sstevel@tonic-gate 	klgrpset_t	done;
23077c478bd9Sstevel@tonic-gate 	klgrpset_t	cur_set;
23087c478bd9Sstevel@tonic-gate 
23097c478bd9Sstevel@tonic-gate 	lpl_t		*lpl_iter, *lpl_leaf;
23107c478bd9Sstevel@tonic-gate 	int		i;
23117c478bd9Sstevel@tonic-gate 
23127c478bd9Sstevel@tonic-gate 	/*
23137c478bd9Sstevel@tonic-gate 	 * Scan for a CPU currently running the lowest priority thread.
23147c478bd9Sstevel@tonic-gate 	 * Cannot get cpu_lock here because it is adaptive.
23157c478bd9Sstevel@tonic-gate 	 * We do not require lock on CPU list.
23167c478bd9Sstevel@tonic-gate 	 */
23177c478bd9Sstevel@tonic-gate 	ASSERT(hint != NULL);
23187c478bd9Sstevel@tonic-gate 	ASSERT(lpl != NULL);
23197c478bd9Sstevel@tonic-gate 	ASSERT(lpl->lpl_ncpu > 0);
23207c478bd9Sstevel@tonic-gate 
23217c478bd9Sstevel@tonic-gate 	/*
23227c478bd9Sstevel@tonic-gate 	 * First examine local CPUs. Note that it's possible the hint CPU
23237c478bd9Sstevel@tonic-gate 	 * passed in in remote to the specified home lgroup. If our priority
23247c478bd9Sstevel@tonic-gate 	 * isn't sufficient enough such that we can run immediately at home,
23257c478bd9Sstevel@tonic-gate 	 * then examine CPUs remote to our home lgroup.
23267c478bd9Sstevel@tonic-gate 	 * We would like to give preference to CPUs closest to "home".
23277c478bd9Sstevel@tonic-gate 	 * If we can't find a CPU where we'll run at a given level
23287c478bd9Sstevel@tonic-gate 	 * of locality, we expand our search to include the next level.
23297c478bd9Sstevel@tonic-gate 	 */
23307c478bd9Sstevel@tonic-gate 	bestcpu = besthomecpu = NULL;
23317c478bd9Sstevel@tonic-gate 	klgrpset_clear(done);
23327c478bd9Sstevel@tonic-gate 	/* start with lpl we were passed */
23337c478bd9Sstevel@tonic-gate 
23347c478bd9Sstevel@tonic-gate 	lpl_iter = lpl;
23357c478bd9Sstevel@tonic-gate 
23367c478bd9Sstevel@tonic-gate 	do {
23377c478bd9Sstevel@tonic-gate 
23387c478bd9Sstevel@tonic-gate 		bestpri = SHRT_MAX;
23397c478bd9Sstevel@tonic-gate 		klgrpset_clear(cur_set);
23407c478bd9Sstevel@tonic-gate 
23417c478bd9Sstevel@tonic-gate 		for (i = 0; i < lpl_iter->lpl_nrset; i++) {
23427c478bd9Sstevel@tonic-gate 			lpl_leaf = lpl_iter->lpl_rset[i];
23437c478bd9Sstevel@tonic-gate 			if (klgrpset_ismember(done, lpl_leaf->lpl_lgrpid))
23447c478bd9Sstevel@tonic-gate 				continue;
23457c478bd9Sstevel@tonic-gate 
23467c478bd9Sstevel@tonic-gate 			klgrpset_add(cur_set, lpl_leaf->lpl_lgrpid);
23477c478bd9Sstevel@tonic-gate 
23487c478bd9Sstevel@tonic-gate 			if (hint->cpu_lpl == lpl_leaf)
23497c478bd9Sstevel@tonic-gate 				cp = cpstart = hint;
23507c478bd9Sstevel@tonic-gate 			else
23517c478bd9Sstevel@tonic-gate 				cp = cpstart = lpl_leaf->lpl_cpus;
23527c478bd9Sstevel@tonic-gate 
23537c478bd9Sstevel@tonic-gate 			do {
23547c478bd9Sstevel@tonic-gate 
23557c478bd9Sstevel@tonic-gate 				if (cp == curcpu)
23567c478bd9Sstevel@tonic-gate 					cpupri = -1;
23577c478bd9Sstevel@tonic-gate 				else if (cp == cpu_inmotion)
23587c478bd9Sstevel@tonic-gate 					cpupri = SHRT_MAX;
23597c478bd9Sstevel@tonic-gate 				else
23607c478bd9Sstevel@tonic-gate 					cpupri = cp->cpu_dispatch_pri;
23617c478bd9Sstevel@tonic-gate 
23627c478bd9Sstevel@tonic-gate 				if (cp->cpu_disp->disp_maxrunpri > cpupri)
23637c478bd9Sstevel@tonic-gate 					cpupri = cp->cpu_disp->disp_maxrunpri;
23647c478bd9Sstevel@tonic-gate 				if (cp->cpu_chosen_level > cpupri)
23657c478bd9Sstevel@tonic-gate 					cpupri = cp->cpu_chosen_level;
23667c478bd9Sstevel@tonic-gate 				if (cpupri < bestpri) {
23677c478bd9Sstevel@tonic-gate 					if (CPU_IDLING(cpupri)) {
23687c478bd9Sstevel@tonic-gate 						ASSERT((cp->cpu_flags &
23697c478bd9Sstevel@tonic-gate 						    CPU_QUIESCED) == 0);
23707c478bd9Sstevel@tonic-gate 						return (cp);
23717c478bd9Sstevel@tonic-gate 					}
23727c478bd9Sstevel@tonic-gate 					bestcpu = cp;
23737c478bd9Sstevel@tonic-gate 					bestpri = cpupri;
23747c478bd9Sstevel@tonic-gate 				}
23757c478bd9Sstevel@tonic-gate 			} while ((cp = cp->cpu_next_lpl) != cpstart);
23767c478bd9Sstevel@tonic-gate 		}
23777c478bd9Sstevel@tonic-gate 
23787c478bd9Sstevel@tonic-gate 		if (bestcpu && (tpri > bestpri)) {
23797c478bd9Sstevel@tonic-gate 			ASSERT((bestcpu->cpu_flags & CPU_QUIESCED) == 0);
23807c478bd9Sstevel@tonic-gate 			return (bestcpu);
23817c478bd9Sstevel@tonic-gate 		}
23827c478bd9Sstevel@tonic-gate 		if (besthomecpu == NULL)
23837c478bd9Sstevel@tonic-gate 			besthomecpu = bestcpu;
23847c478bd9Sstevel@tonic-gate 		/*
23857c478bd9Sstevel@tonic-gate 		 * Add the lgrps we just considered to the "done" set
23867c478bd9Sstevel@tonic-gate 		 */
23877c478bd9Sstevel@tonic-gate 		klgrpset_or(done, cur_set);
23887c478bd9Sstevel@tonic-gate 
23897c478bd9Sstevel@tonic-gate 	} while ((lpl_iter = lpl_iter->lpl_parent) != NULL);
23907c478bd9Sstevel@tonic-gate 
23917c478bd9Sstevel@tonic-gate 	/*
23927c478bd9Sstevel@tonic-gate 	 * The specified priority isn't high enough to run immediately
23937c478bd9Sstevel@tonic-gate 	 * anywhere, so just return the best CPU from the home lgroup.
23947c478bd9Sstevel@tonic-gate 	 */
23957c478bd9Sstevel@tonic-gate 	ASSERT((besthomecpu->cpu_flags & CPU_QUIESCED) == 0);
23967c478bd9Sstevel@tonic-gate 	return (besthomecpu);
23977c478bd9Sstevel@tonic-gate }
23987c478bd9Sstevel@tonic-gate 
23997c478bd9Sstevel@tonic-gate /*
24007c478bd9Sstevel@tonic-gate  * This routine provides the generic idle cpu function for all processors.
24017c478bd9Sstevel@tonic-gate  * If a processor has some specific code to execute when idle (say, to stop
24027c478bd9Sstevel@tonic-gate  * the pipeline and save power) then that routine should be defined in the
24037c478bd9Sstevel@tonic-gate  * processors specific code (module_xx.c) and the global variable idle_cpu
24047c478bd9Sstevel@tonic-gate  * set to that function.
24057c478bd9Sstevel@tonic-gate  */
24067c478bd9Sstevel@tonic-gate static void
24077c478bd9Sstevel@tonic-gate generic_idle_cpu(void)
24087c478bd9Sstevel@tonic-gate {
24097c478bd9Sstevel@tonic-gate }
24107c478bd9Sstevel@tonic-gate 
24117c478bd9Sstevel@tonic-gate /*ARGSUSED*/
24127c478bd9Sstevel@tonic-gate static void
24137c478bd9Sstevel@tonic-gate generic_enq_thread(cpu_t *cpu, int bound)
24147c478bd9Sstevel@tonic-gate {
24157c478bd9Sstevel@tonic-gate }
24167c478bd9Sstevel@tonic-gate 
24177c478bd9Sstevel@tonic-gate /*
24187c478bd9Sstevel@tonic-gate  * Select a CPU for this thread to run on.  Choose t->t_cpu unless:
24197c478bd9Sstevel@tonic-gate  *	- t->t_cpu is not in this thread's assigned lgrp
24207c478bd9Sstevel@tonic-gate  *	- the time since the thread last came off t->t_cpu exceeds the
24217c478bd9Sstevel@tonic-gate  *	  rechoose time for this cpu (ignore this if t is curthread in
24227c478bd9Sstevel@tonic-gate  *	  which case it's on CPU and t->t_disp_time is inaccurate)
24237c478bd9Sstevel@tonic-gate  *	- t->t_cpu is presently the target of an offline or partition move
24247c478bd9Sstevel@tonic-gate  *	  request
24257c478bd9Sstevel@tonic-gate  */
24267c478bd9Sstevel@tonic-gate static cpu_t *
24277c478bd9Sstevel@tonic-gate cpu_choose(kthread_t *t, pri_t tpri)
24287c478bd9Sstevel@tonic-gate {
24297c478bd9Sstevel@tonic-gate 	ASSERT(tpri < kpqpri);
24307c478bd9Sstevel@tonic-gate 
24317c478bd9Sstevel@tonic-gate 	if ((((lbolt - t->t_disp_time) > t->t_cpu->cpu_rechoose) &&
24327c478bd9Sstevel@tonic-gate 	    t != curthread) || t->t_cpu == cpu_inmotion) {
24337c478bd9Sstevel@tonic-gate 		return (disp_lowpri_cpu(t->t_cpu, t->t_lpl, tpri, NULL));
24347c478bd9Sstevel@tonic-gate 	}
24357c478bd9Sstevel@tonic-gate 
24367c478bd9Sstevel@tonic-gate 	/*
24377c478bd9Sstevel@tonic-gate 	 * Take a trip through disp_lowpri_cpu() if the thread was
24387c478bd9Sstevel@tonic-gate 	 * running outside it's home lgroup
24397c478bd9Sstevel@tonic-gate 	 */
24407c478bd9Sstevel@tonic-gate 	if (!klgrpset_ismember(t->t_lpl->lpl_lgrp->lgrp_set[LGRP_RSRC_CPU],
24417c478bd9Sstevel@tonic-gate 	    t->t_cpu->cpu_lpl->lpl_lgrpid)) {
24427c478bd9Sstevel@tonic-gate 		return (disp_lowpri_cpu(t->t_cpu, t->t_lpl, tpri,
24437c478bd9Sstevel@tonic-gate 		    (t == curthread) ? t->t_cpu : NULL));
24447c478bd9Sstevel@tonic-gate 	}
24457c478bd9Sstevel@tonic-gate 	return (t->t_cpu);
24467c478bd9Sstevel@tonic-gate }
2447