10e751525SEric Saxe /*
20e751525SEric Saxe  * CDDL HEADER START
30e751525SEric Saxe  *
40e751525SEric Saxe  * The contents of this file are subject to the terms of the
50e751525SEric Saxe  * Common Development and Distribution License (the "License").
60e751525SEric Saxe  * You may not use this file except in compliance with the License.
70e751525SEric Saxe  *
80e751525SEric Saxe  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90e751525SEric Saxe  * or http://www.opensolaris.org/os/licensing.
100e751525SEric Saxe  * See the License for the specific language governing permissions
110e751525SEric Saxe  * and limitations under the License.
120e751525SEric Saxe  *
130e751525SEric Saxe  * When distributing Covered Code, include this CDDL HEADER in each
140e751525SEric Saxe  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150e751525SEric Saxe  * If applicable, add the following below this CDDL HEADER, with the
160e751525SEric Saxe  * fields enclosed by brackets "[]" replaced with your own identifying
170e751525SEric Saxe  * information: Portions Copyright [yyyy] [name of copyright owner]
180e751525SEric Saxe  *
190e751525SEric Saxe  * CDDL HEADER END
200e751525SEric Saxe  */
210e751525SEric Saxe /*
220e751525SEric Saxe  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
230e751525SEric Saxe  * Use is subject to license terms.
240e751525SEric Saxe  */
25cef70d2cSBill Holler /*
26cef70d2cSBill Holler  * Copyright (c) 2009, Intel Corporation.
27cef70d2cSBill Holler  * All rights reserved.
28cef70d2cSBill Holler  */
290e751525SEric Saxe 
300e751525SEric Saxe #include <sys/cpu_pm.h>
310e751525SEric Saxe #include <sys/x86_archext.h>
320e751525SEric Saxe #include <sys/sdt.h>
330e751525SEric Saxe #include <sys/spl.h>
340e751525SEric Saxe #include <sys/machsystm.h>
35444f66e7SMark Haywood #include <sys/archsystm.h>
360e751525SEric Saxe #include <sys/hpet.h>
3778d5422cSMark Haywood #include <sys/acpi/acpi.h>
3878d5422cSMark Haywood #include <sys/acpica.h>
390e751525SEric Saxe #include <sys/cpupm.h>
400e751525SEric Saxe #include <sys/cpu_idle.h>
410e751525SEric Saxe #include <sys/cpu_acpi.h>
420e751525SEric Saxe #include <sys/cpupm_throttle.h>
439aa01d98SBill Holler #include <sys/dtrace.h>
44444f66e7SMark Haywood #include <sys/note.h>
450e751525SEric Saxe 
460e751525SEric Saxe /*
470e751525SEric Saxe  * This callback is used to build the PPM CPU domains once
48444f66e7SMark Haywood  * a CPU device has been started. The callback is initialized
49444f66e7SMark Haywood  * by the PPM driver to point to a routine that will build the
50444f66e7SMark Haywood  * domains.
510e751525SEric Saxe  */
52444f66e7SMark Haywood void (*cpupm_ppm_alloc_pstate_domains)(cpu_t *);
530e751525SEric Saxe 
540e751525SEric Saxe /*
55444f66e7SMark Haywood  * This callback is used to remove CPU from the PPM CPU domains
56444f66e7SMark Haywood  * when the cpu driver is detached. The callback is initialized
57444f66e7SMark Haywood  * by the PPM driver to point to a routine that will remove CPU
58444f66e7SMark Haywood  * from the domains.
590e751525SEric Saxe  */
60444f66e7SMark Haywood void (*cpupm_ppm_free_pstate_domains)(cpu_t *);
610e751525SEric Saxe 
620e751525SEric Saxe /*
630e751525SEric Saxe  * This callback is used to redefine the topspeed for a CPU device.
640e751525SEric Saxe  * Since all CPUs in a domain should have identical properties, this
650e751525SEric Saxe  * callback is initialized by the PPM driver to point to a routine
660e751525SEric Saxe  * that will redefine the topspeed for all devices in a CPU domain.
670e751525SEric Saxe  * This callback is exercised whenever an ACPI _PPC change notification
680e751525SEric Saxe  * is received by the CPU driver.
690e751525SEric Saxe  */
700e751525SEric Saxe void (*cpupm_redefine_topspeed)(void *);
710e751525SEric Saxe 
720e751525SEric Saxe /*
730e751525SEric Saxe  * This callback is used by the PPM driver to call into the CPU driver
740e751525SEric Saxe  * to find a CPU's current topspeed (i.e., it's current ACPI _PPC value).
750e751525SEric Saxe  */
760e751525SEric Saxe void (*cpupm_set_topspeed_callb)(void *, int);
770e751525SEric Saxe 
780e751525SEric Saxe /*
790e751525SEric Saxe  * This callback is used by the PPM driver to call into the CPU driver
800e751525SEric Saxe  * to set a new topspeed for a CPU.
810e751525SEric Saxe  */
820e751525SEric Saxe int (*cpupm_get_topspeed_callb)(void *);
830e751525SEric Saxe 
840e751525SEric Saxe static void cpupm_event_notify_handler(ACPI_HANDLE, UINT32, void *);
850e751525SEric Saxe static void cpupm_free_notify_handlers(cpu_t *);
8641333a9eSMark Haywood static void cpupm_power_manage_notifications(void *);
870e751525SEric Saxe 
880e751525SEric Saxe /*
890e751525SEric Saxe  * Until proven otherwise, all power states are manageable.
900e751525SEric Saxe  */
910e751525SEric Saxe static uint32_t cpupm_enabled = CPUPM_ALL_STATES;
920e751525SEric Saxe 
930e751525SEric Saxe cpupm_state_domains_t *cpupm_pstate_domains = NULL;
940e751525SEric Saxe cpupm_state_domains_t *cpupm_tstate_domains = NULL;
950e751525SEric Saxe cpupm_state_domains_t *cpupm_cstate_domains = NULL;
960e751525SEric Saxe 
970e751525SEric Saxe /*
980e751525SEric Saxe  * c-state tunables
990e751525SEric Saxe  *
1000fc6188aSaubrey.li@intel.com  * cpupm_cs_sample_interval is the length of time we wait before
1010fc6188aSaubrey.li@intel.com  * recalculating c-state statistics.  When a CPU goes idle it checks
1020fc6188aSaubrey.li@intel.com  * to see if it has been longer than cpupm_cs_sample_interval since it last
1030fc6188aSaubrey.li@intel.com  * caculated which C-state to go to.
1040fc6188aSaubrey.li@intel.com  *
1050e751525SEric Saxe  * cpupm_cs_idle_cost_tunable is the ratio of time CPU spends executing + idle
1060e751525SEric Saxe  * divided by time spent in the idle state transitions.
1070e751525SEric Saxe  * A value of 10 means the CPU will not spend more than 1/10 of its time
1080e751525SEric Saxe  * in idle latency.  The worst case performance will be 90% of non Deep C-state
1090e751525SEric Saxe  * kernel.
1100e751525SEric Saxe  *
1110e751525SEric Saxe  * cpupm_cs_idle_save_tunable is how long we must stay in a deeper C-state
1120e751525SEric Saxe  * before it is worth going there.  Expressed as a multiple of latency.
1130e751525SEric Saxe  */
1140fc6188aSaubrey.li@intel.com uint32_t cpupm_cs_sample_interval = 100*1000*1000;	/* 100 milliseconds */
1150e751525SEric Saxe uint32_t cpupm_cs_idle_cost_tunable = 10;	/* work time / latency cost */
1160e751525SEric Saxe uint32_t cpupm_cs_idle_save_tunable = 2;	/* idle power savings */
1170e751525SEric Saxe uint16_t cpupm_C2_idle_pct_tunable = 70;
1180e751525SEric Saxe uint16_t cpupm_C3_idle_pct_tunable = 80;
1190e751525SEric Saxe 
1200e751525SEric Saxe #ifndef __xpv
1210e751525SEric Saxe extern boolean_t cpupm_intel_init(cpu_t *);
1220e751525SEric Saxe extern boolean_t cpupm_amd_init(cpu_t *);
1230e751525SEric Saxe 
1240e751525SEric Saxe typedef struct cpupm_vendor {
1250e751525SEric Saxe 	boolean_t	(*cpuv_init)(cpu_t *);
1260e751525SEric Saxe } cpupm_vendor_t;
1270e751525SEric Saxe 
1280e751525SEric Saxe /*
1290e751525SEric Saxe  * Table of supported vendors.
1300e751525SEric Saxe  */
1310e751525SEric Saxe static cpupm_vendor_t cpupm_vendors[] = {
1320e751525SEric Saxe 	cpupm_intel_init,
1330e751525SEric Saxe 	cpupm_amd_init,
1340e751525SEric Saxe 	NULL
1350e751525SEric Saxe };
1360e751525SEric Saxe #endif
1370e751525SEric Saxe 
1380e751525SEric Saxe /*
1390e751525SEric Saxe  * Initialize the machine.
1400e751525SEric Saxe  * See if a module exists for managing power for this CPU.
1410e751525SEric Saxe  */
1420e751525SEric Saxe /*ARGSUSED*/
1430e751525SEric Saxe void
cpupm_init(cpu_t * cp)1440e751525SEric Saxe cpupm_init(cpu_t *cp)
1450e751525SEric Saxe {
1460e751525SEric Saxe #ifndef __xpv
1470e751525SEric Saxe 	cpupm_vendor_t *vendors;
1480e751525SEric Saxe 	cpupm_mach_state_t *mach_state;
1490e751525SEric Saxe 	struct machcpu *mcpu = &(cp->cpu_m);
15078d5422cSMark Haywood 	static boolean_t first = B_TRUE;
1510e751525SEric Saxe 	int *speeds;
1520e751525SEric Saxe 	uint_t nspeeds;
1530e751525SEric Saxe 	int ret;
1540e751525SEric Saxe 
1550e751525SEric Saxe 	mach_state = cp->cpu_m.mcpu_pm_mach_state =
1560e751525SEric Saxe 	    kmem_zalloc(sizeof (cpupm_mach_state_t), KM_SLEEP);
1570e751525SEric Saxe 	mach_state->ms_caps = CPUPM_NO_STATES;
1580e751525SEric Saxe 	mutex_init(&mach_state->ms_lock, NULL, MUTEX_DRIVER, NULL);
1590e751525SEric Saxe 
1600e751525SEric Saxe 	mach_state->ms_acpi_handle = cpu_acpi_init(cp);
1610e751525SEric Saxe 	if (mach_state->ms_acpi_handle == NULL) {
162444f66e7SMark Haywood 		cpupm_fini(cp);
1630e751525SEric Saxe 		cmn_err(CE_WARN, "!cpupm_init: processor %d: "
1640e751525SEric Saxe 		    "unable to get ACPI handle", cp->cpu_id);
1650e751525SEric Saxe 		cmn_err(CE_NOTE, "!CPU power management will not function.");
1660e751525SEric Saxe 		CPUPM_DISABLE();
16778d5422cSMark Haywood 		first = B_FALSE;
1680e751525SEric Saxe 		return;
1690e751525SEric Saxe 	}
1700e751525SEric Saxe 
1710e751525SEric Saxe 	/*
1720e751525SEric Saxe 	 * Loop through the CPU management module table and see if
1730e751525SEric Saxe 	 * any of the modules implement CPU power management
1740e751525SEric Saxe 	 * for this CPU.
1750e751525SEric Saxe 	 */
1760e751525SEric Saxe 	for (vendors = cpupm_vendors; vendors->cpuv_init != NULL; vendors++) {
1770e751525SEric Saxe 		if (vendors->cpuv_init(cp))
1780e751525SEric Saxe 			break;
1790e751525SEric Saxe 	}
1800e751525SEric Saxe 
1810e751525SEric Saxe 	/*
1820e751525SEric Saxe 	 * Nope, we can't power manage this CPU.
1830e751525SEric Saxe 	 */
1840e751525SEric Saxe 	if (vendors == NULL) {
185444f66e7SMark Haywood 		cpupm_fini(cp);
1860e751525SEric Saxe 		CPUPM_DISABLE();
18778d5422cSMark Haywood 		first = B_FALSE;
1880e751525SEric Saxe 		return;
1890e751525SEric Saxe 	}
1900e751525SEric Saxe 
1910e751525SEric Saxe 	/*
1920e751525SEric Saxe 	 * If P-state support exists for this system, then initialize it.
1930e751525SEric Saxe 	 */
1940e751525SEric Saxe 	if (mach_state->ms_pstate.cma_ops != NULL) {
1950e751525SEric Saxe 		ret = mach_state->ms_pstate.cma_ops->cpus_init(cp);
1960e751525SEric Saxe 		if (ret != 0) {
1970e751525SEric Saxe 			mach_state->ms_pstate.cma_ops = NULL;
1980e751525SEric Saxe 			cpupm_disable(CPUPM_P_STATES);
1990e751525SEric Saxe 		} else {
2000e751525SEric Saxe 			nspeeds = cpupm_get_speeds(cp, &speeds);
2010e751525SEric Saxe 			if (nspeeds == 0) {
20200f97612SMark Haywood 				cmn_err(CE_NOTE, "!cpupm_init: processor %d:"
2030e751525SEric Saxe 				    " no speeds to manage", cp->cpu_id);
2040e751525SEric Saxe 			} else {
2050e751525SEric Saxe 				cpupm_set_supp_freqs(cp, speeds, nspeeds);
2060e751525SEric Saxe 				cpupm_free_speeds(speeds, nspeeds);
2070e751525SEric Saxe 				mach_state->ms_caps |= CPUPM_P_STATES;
2080e751525SEric Saxe 			}
2090e751525SEric Saxe 		}
21029091f17SAnup Pemmaiah 	} else {
21129091f17SAnup Pemmaiah 		cpupm_disable(CPUPM_P_STATES);
2120e751525SEric Saxe 	}
2130e751525SEric Saxe 
2140e751525SEric Saxe 	if (mach_state->ms_tstate.cma_ops != NULL) {
2150e751525SEric Saxe 		ret = mach_state->ms_tstate.cma_ops->cpus_init(cp);
2160e751525SEric Saxe 		if (ret != 0) {
2170e751525SEric Saxe 			mach_state->ms_tstate.cma_ops = NULL;
2180e751525SEric Saxe 			cpupm_disable(CPUPM_T_STATES);
2190e751525SEric Saxe 		} else {
2200e751525SEric Saxe 			mach_state->ms_caps |= CPUPM_T_STATES;
2210e751525SEric Saxe 		}
22229091f17SAnup Pemmaiah 	} else {
22329091f17SAnup Pemmaiah 		cpupm_disable(CPUPM_T_STATES);
2240e751525SEric Saxe 	}
2250e751525SEric Saxe 
2260e751525SEric Saxe 	/*
2270e751525SEric Saxe 	 * If C-states support exists for this system, then initialize it.
2280e751525SEric Saxe 	 */
2290e751525SEric Saxe 	if (mach_state->ms_cstate.cma_ops != NULL) {
2300e751525SEric Saxe 		ret = mach_state->ms_cstate.cma_ops->cpus_init(cp);
2310e751525SEric Saxe 		if (ret != 0) {
2320e751525SEric Saxe 			mach_state->ms_cstate.cma_ops = NULL;
2330e751525SEric Saxe 			mcpu->max_cstates = CPU_ACPI_C1;
2340e751525SEric Saxe 			cpupm_disable(CPUPM_C_STATES);
2350e751525SEric Saxe 			idle_cpu = non_deep_idle_cpu;
2360e751525SEric Saxe 			disp_enq_thread = non_deep_idle_disp_enq_thread;
2370e751525SEric Saxe 		} else if (cpu_deep_cstates_supported()) {
2380e751525SEric Saxe 			mcpu->max_cstates = cpu_acpi_get_max_cstates(
2390e751525SEric Saxe 			    mach_state->ms_acpi_handle);
2400e751525SEric Saxe 			if (mcpu->max_cstates > CPU_ACPI_C1) {
241cef70d2cSBill Holler 				(void) cstate_timer_callback(
242cef70d2cSBill Holler 				    CST_EVENT_MULTIPLE_CSTATES);
243a3114836SGerry Liu 				cp->cpu_m.mcpu_idle_cpu = cpu_acpi_idle;
2440e751525SEric Saxe 				mcpu->mcpu_idle_type = CPU_ACPI_C1;
2450e751525SEric Saxe 				disp_enq_thread = cstate_wakeup;
2460e751525SEric Saxe 			} else {
247cef70d2cSBill Holler 				(void) cstate_timer_callback(
248cef70d2cSBill Holler 				    CST_EVENT_ONE_CSTATE);
2490e751525SEric Saxe 			}
2500e751525SEric Saxe 			mach_state->ms_caps |= CPUPM_C_STATES;
2510e751525SEric Saxe 		} else {
2520e751525SEric Saxe 			mcpu->max_cstates = CPU_ACPI_C1;
2530e751525SEric Saxe 			idle_cpu = non_deep_idle_cpu;
2540e751525SEric Saxe 			disp_enq_thread = non_deep_idle_disp_enq_thread;
2550e751525SEric Saxe 		}
25629091f17SAnup Pemmaiah 	} else {
25729091f17SAnup Pemmaiah 		cpupm_disable(CPUPM_C_STATES);
2580e751525SEric Saxe 	}
2590e751525SEric Saxe 
2600e751525SEric Saxe 
2610e751525SEric Saxe 	if (mach_state->ms_caps == CPUPM_NO_STATES) {
262444f66e7SMark Haywood 		cpupm_fini(cp);
2630e751525SEric Saxe 		CPUPM_DISABLE();
26478d5422cSMark Haywood 		first = B_FALSE;
2650e751525SEric Saxe 		return;
2660e751525SEric Saxe 	}
2670e751525SEric Saxe 
2680e751525SEric Saxe 	if ((mach_state->ms_caps & CPUPM_T_STATES) ||
2690e751525SEric Saxe 	    (mach_state->ms_caps & CPUPM_P_STATES) ||
27078d5422cSMark Haywood 	    (mach_state->ms_caps & CPUPM_C_STATES)) {
27178d5422cSMark Haywood 		if (first) {
27278d5422cSMark Haywood 			acpica_write_cpupm_capabilities(
27378d5422cSMark Haywood 			    mach_state->ms_caps & CPUPM_P_STATES,
27478d5422cSMark Haywood 			    mach_state->ms_caps & CPUPM_C_STATES);
27578d5422cSMark Haywood 		}
27629091f17SAnup Pemmaiah 		if (mach_state->ms_caps & CPUPM_T_STATES) {
27729091f17SAnup Pemmaiah 			cpupm_throttle_manage_notification(cp);
27829091f17SAnup Pemmaiah 		}
27929091f17SAnup Pemmaiah 		if (mach_state->ms_caps & CPUPM_C_STATES) {
28029091f17SAnup Pemmaiah 			cpuidle_manage_cstates(cp);
28129091f17SAnup Pemmaiah 		}
28229091f17SAnup Pemmaiah 		if (mach_state->ms_caps & CPUPM_P_STATES) {
28329091f17SAnup Pemmaiah 			cpupm_power_manage_notifications(cp);
28429091f17SAnup Pemmaiah 		}
28541333a9eSMark Haywood 		cpupm_add_notify_handler(cp, cpupm_event_notify_handler, cp);
28678d5422cSMark Haywood 	}
28778d5422cSMark Haywood 	first = B_FALSE;
2880e751525SEric Saxe #endif
2890e751525SEric Saxe }
2900e751525SEric Saxe 
2910e751525SEric Saxe /*
292444f66e7SMark Haywood  * Free any resources allocated during cpupm initialization or cpupm start.
2930e751525SEric Saxe  */
2940e751525SEric Saxe /*ARGSUSED*/
2950e751525SEric Saxe void
cpupm_free(cpu_t * cp,boolean_t cpupm_stop)296444f66e7SMark Haywood cpupm_free(cpu_t *cp, boolean_t cpupm_stop)
2970e751525SEric Saxe {
2980e751525SEric Saxe #ifndef __xpv
2990e751525SEric Saxe 	cpupm_mach_state_t *mach_state =
3000e751525SEric Saxe 	    (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state;
3010e751525SEric Saxe 
3020e751525SEric Saxe 	if (mach_state == NULL)
3030e751525SEric Saxe 		return;
304444f66e7SMark Haywood 
3050e751525SEric Saxe 	if (mach_state->ms_pstate.cma_ops != NULL) {
306444f66e7SMark Haywood 		if (cpupm_stop)
307444f66e7SMark Haywood 			mach_state->ms_pstate.cma_ops->cpus_stop(cp);
308444f66e7SMark Haywood 		else
309444f66e7SMark Haywood 			mach_state->ms_pstate.cma_ops->cpus_fini(cp);
3100e751525SEric Saxe 		mach_state->ms_pstate.cma_ops = NULL;
3110e751525SEric Saxe 	}
3120e751525SEric Saxe 
3130e751525SEric Saxe 	if (mach_state->ms_tstate.cma_ops != NULL) {
314444f66e7SMark Haywood 		if (cpupm_stop)
315444f66e7SMark Haywood 			mach_state->ms_tstate.cma_ops->cpus_stop(cp);
316444f66e7SMark Haywood 		else
317444f66e7SMark Haywood 			mach_state->ms_tstate.cma_ops->cpus_fini(cp);
3180e751525SEric Saxe 		mach_state->ms_tstate.cma_ops = NULL;
3190e751525SEric Saxe 	}
3200e751525SEric Saxe 
3210e751525SEric Saxe 	if (mach_state->ms_cstate.cma_ops != NULL) {
322444f66e7SMark Haywood 		if (cpupm_stop)
323444f66e7SMark Haywood 			mach_state->ms_cstate.cma_ops->cpus_stop(cp);
324444f66e7SMark Haywood 		else
325444f66e7SMark Haywood 			mach_state->ms_cstate.cma_ops->cpus_fini(cp);
326444f66e7SMark Haywood 
3270e751525SEric Saxe 		mach_state->ms_cstate.cma_ops = NULL;
3280e751525SEric Saxe 	}
3290e751525SEric Saxe 
3300e751525SEric Saxe 	cpupm_free_notify_handlers(cp);
3310e751525SEric Saxe 
3320e751525SEric Saxe 	if (mach_state->ms_acpi_handle != NULL) {
3330e751525SEric Saxe 		cpu_acpi_fini(mach_state->ms_acpi_handle);
3340e751525SEric Saxe 		mach_state->ms_acpi_handle = NULL;
3350e751525SEric Saxe 	}
3360e751525SEric Saxe 
3370e751525SEric Saxe 	mutex_destroy(&mach_state->ms_lock);
3380e751525SEric Saxe 	kmem_free(mach_state, sizeof (cpupm_mach_state_t));
3390e751525SEric Saxe 	cp->cpu_m.mcpu_pm_mach_state = NULL;
3400e751525SEric Saxe #endif
3410e751525SEric Saxe }
3420e751525SEric Saxe 
343444f66e7SMark Haywood void
cpupm_fini(cpu_t * cp)344444f66e7SMark Haywood cpupm_fini(cpu_t *cp)
345444f66e7SMark Haywood {
346444f66e7SMark Haywood 	/*
347444f66e7SMark Haywood 	 * call (*cpus_fini)() ops to release the cpupm resource
348444f66e7SMark Haywood 	 * in the P/C/T-state driver
349444f66e7SMark Haywood 	 */
350444f66e7SMark Haywood 	cpupm_free(cp, B_FALSE);
351444f66e7SMark Haywood }
352444f66e7SMark Haywood 
353444f66e7SMark Haywood void
cpupm_start(cpu_t * cp)354444f66e7SMark Haywood cpupm_start(cpu_t *cp)
355444f66e7SMark Haywood {
356444f66e7SMark Haywood 	cpupm_init(cp);
357444f66e7SMark Haywood }
358444f66e7SMark Haywood 
359444f66e7SMark Haywood void
cpupm_stop(cpu_t * cp)360444f66e7SMark Haywood cpupm_stop(cpu_t *cp)
361444f66e7SMark Haywood {
362444f66e7SMark Haywood 	/*
363444f66e7SMark Haywood 	 * call (*cpus_stop)() ops to reclaim the cpupm resource
364444f66e7SMark Haywood 	 * in the P/C/T-state driver
365444f66e7SMark Haywood 	 */
366444f66e7SMark Haywood 	cpupm_free(cp, B_TRUE);
367444f66e7SMark Haywood }
368444f66e7SMark Haywood 
3690e751525SEric Saxe /*
370444f66e7SMark Haywood  * If A CPU has started and at least one power state is manageable,
371444f66e7SMark Haywood  * then the CPU is ready for power management.
3720e751525SEric Saxe  */
3730e751525SEric Saxe boolean_t
cpupm_is_ready(cpu_t * cp)374444f66e7SMark Haywood cpupm_is_ready(cpu_t *cp)
3750e751525SEric Saxe {
3760e751525SEric Saxe #ifndef __xpv
377444f66e7SMark Haywood 	cpupm_mach_state_t *mach_state =
378444f66e7SMark Haywood 	    (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state;
379444f66e7SMark Haywood 	uint32_t cpupm_caps = mach_state->ms_caps;
380444f66e7SMark Haywood 
3810e751525SEric Saxe 	if (cpupm_enabled == CPUPM_NO_STATES)
3820e751525SEric Saxe 		return (B_FALSE);
383444f66e7SMark Haywood 
384444f66e7SMark Haywood 	if ((cpupm_caps & CPUPM_T_STATES) ||
385444f66e7SMark Haywood 	    (cpupm_caps & CPUPM_P_STATES) ||
386444f66e7SMark Haywood 	    (cpupm_caps & CPUPM_C_STATES))
387444f66e7SMark Haywood 
388444f66e7SMark Haywood 		return (B_TRUE);
389444f66e7SMark Haywood 	return (B_FALSE);
3900e751525SEric Saxe #else
391444f66e7SMark Haywood 	_NOTE(ARGUNUSED(cp));
3920e751525SEric Saxe 	return (B_FALSE);
3930e751525SEric Saxe #endif
3940e751525SEric Saxe }
3950e751525SEric Saxe 
3960e751525SEric Saxe boolean_t
cpupm_is_enabled(uint32_t state)3970e751525SEric Saxe cpupm_is_enabled(uint32_t state)
3980e751525SEric Saxe {
3990e751525SEric Saxe 	return ((cpupm_enabled & state) == state);
4000e751525SEric Saxe }
4010e751525SEric Saxe 
4020e751525SEric Saxe /*
4030e751525SEric Saxe  * By default, all states are enabled.
4040e751525SEric Saxe  */
4050e751525SEric Saxe void
cpupm_disable(uint32_t state)4060e751525SEric Saxe cpupm_disable(uint32_t state)
4070e751525SEric Saxe {
4080e751525SEric Saxe 
4090e751525SEric Saxe 	if (state & CPUPM_P_STATES) {
4100e751525SEric Saxe 		cpupm_free_domains(&cpupm_pstate_domains);
4110e751525SEric Saxe 	}
4120e751525SEric Saxe 	if (state & CPUPM_T_STATES) {
4130e751525SEric Saxe 		cpupm_free_domains(&cpupm_tstate_domains);
4140e751525SEric Saxe 	}
4150e751525SEric Saxe 	if (state & CPUPM_C_STATES) {
4160e751525SEric Saxe 		cpupm_free_domains(&cpupm_cstate_domains);
4170e751525SEric Saxe 	}
4180e751525SEric Saxe 	cpupm_enabled &= ~state;
4190e751525SEric Saxe }
4200e751525SEric Saxe 
4210e751525SEric Saxe /*
4220e751525SEric Saxe  * Allocate power domains for C,P and T States
4230e751525SEric Saxe  */
4240e751525SEric Saxe void
cpupm_alloc_domains(cpu_t * cp,int state)4250e751525SEric Saxe cpupm_alloc_domains(cpu_t *cp, int state)
4260e751525SEric Saxe {
4270e751525SEric Saxe 	cpupm_mach_state_t *mach_state =
4280e751525SEric Saxe 	    (cpupm_mach_state_t *)(cp->cpu_m.mcpu_pm_mach_state);
4290e751525SEric Saxe 	cpu_acpi_handle_t handle = mach_state->ms_acpi_handle;
4300e751525SEric Saxe 	cpupm_state_domains_t **dom_ptr;
4310e751525SEric Saxe 	cpupm_state_domains_t *dptr;
4320e751525SEric Saxe 	cpupm_state_domains_t **mach_dom_state_ptr;
4330e751525SEric Saxe 	uint32_t domain;
4340e751525SEric Saxe 	uint32_t type;
4350e751525SEric Saxe 
4360e751525SEric Saxe 	switch (state) {
4370e751525SEric Saxe 	case CPUPM_P_STATES:
4380e751525SEric Saxe 		if (CPU_ACPI_IS_OBJ_CACHED(handle, CPU_ACPI_PSD_CACHED)) {
4390e751525SEric Saxe 			domain = CPU_ACPI_PSD(handle).sd_domain;
4400e751525SEric Saxe 			type = CPU_ACPI_PSD(handle).sd_type;
4410e751525SEric Saxe 		} else {
442a3114836SGerry Liu 			if (MUTEX_HELD(&cpu_lock)) {
443a3114836SGerry Liu 				domain = cpuid_get_chipid(cp);
444a3114836SGerry Liu 			} else {
445a3114836SGerry Liu 				mutex_enter(&cpu_lock);
446a3114836SGerry Liu 				domain = cpuid_get_chipid(cp);
447a3114836SGerry Liu 				mutex_exit(&cpu_lock);
448a3114836SGerry Liu 			}
4490e751525SEric Saxe 			type = CPU_ACPI_HW_ALL;
4500e751525SEric Saxe 		}
4510e751525SEric Saxe 		dom_ptr = &cpupm_pstate_domains;
4520e751525SEric Saxe 		mach_dom_state_ptr = &mach_state->ms_pstate.cma_domain;
4530e751525SEric Saxe 		break;
4540e751525SEric Saxe 	case CPUPM_T_STATES:
4550e751525SEric Saxe 		if (CPU_ACPI_IS_OBJ_CACHED(handle, CPU_ACPI_TSD_CACHED)) {
4560e751525SEric Saxe 			domain = CPU_ACPI_TSD(handle).sd_domain;
4570e751525SEric Saxe 			type = CPU_ACPI_TSD(handle).sd_type;
4580e751525SEric Saxe 		} else {
459a3114836SGerry Liu 			if (MUTEX_HELD(&cpu_lock)) {
460a3114836SGerry Liu 				domain = cpuid_get_chipid(cp);
461a3114836SGerry Liu 			} else {
462a3114836SGerry Liu 				mutex_enter(&cpu_lock);
463a3114836SGerry Liu 				domain = cpuid_get_chipid(cp);
464a3114836SGerry Liu 				mutex_exit(&cpu_lock);
465a3114836SGerry Liu 			}
4660e751525SEric Saxe 			type = CPU_ACPI_HW_ALL;
4670e751525SEric Saxe 		}
4680e751525SEric Saxe 		dom_ptr = &cpupm_tstate_domains;
4690e751525SEric Saxe 		mach_dom_state_ptr = &mach_state->ms_tstate.cma_domain;
4700e751525SEric Saxe 		break;
4710e751525SEric Saxe 	case CPUPM_C_STATES:
4720e751525SEric Saxe 		if (CPU_ACPI_IS_OBJ_CACHED(handle, CPU_ACPI_CSD_CACHED)) {
4730e751525SEric Saxe 			domain = CPU_ACPI_CSD(handle).sd_domain;
4740e751525SEric Saxe 			type = CPU_ACPI_CSD(handle).sd_type;
4750e751525SEric Saxe 		} else {
476a3114836SGerry Liu 			if (MUTEX_HELD(&cpu_lock)) {
477a3114836SGerry Liu 				domain = cpuid_get_coreid(cp);
478a3114836SGerry Liu 			} else {
479a3114836SGerry Liu 				mutex_enter(&cpu_lock);
480a3114836SGerry Liu 				domain = cpuid_get_coreid(cp);
481a3114836SGerry Liu 				mutex_exit(&cpu_lock);
482a3114836SGerry Liu 			}
4830e751525SEric Saxe 			type = CPU_ACPI_HW_ALL;
4840e751525SEric Saxe 		}
4850e751525SEric Saxe 		dom_ptr = &cpupm_cstate_domains;
4860e751525SEric Saxe 		mach_dom_state_ptr = &mach_state->ms_cstate.cma_domain;
4870e751525SEric Saxe 		break;
4880e751525SEric Saxe 	default:
4890e751525SEric Saxe 		return;
4900e751525SEric Saxe 	}
4910e751525SEric Saxe 
4920e751525SEric Saxe 	for (dptr = *dom_ptr; dptr != NULL; dptr = dptr->pm_next) {
4930e751525SEric Saxe 		if (dptr->pm_domain == domain)
4940e751525SEric Saxe 			break;
4950e751525SEric Saxe 	}
4960e751525SEric Saxe 
4970e751525SEric Saxe 	/* new domain is created and linked at the head */
4980e751525SEric Saxe 	if (dptr == NULL) {
4990e751525SEric Saxe 		dptr = kmem_zalloc(sizeof (cpupm_state_domains_t), KM_SLEEP);
5000e751525SEric Saxe 		dptr->pm_domain = domain;
5010e751525SEric Saxe 		dptr->pm_type = type;
5020e751525SEric Saxe 		dptr->pm_next = *dom_ptr;
5030e751525SEric Saxe 		mutex_init(&dptr->pm_lock, NULL, MUTEX_SPIN,
5040e751525SEric Saxe 		    (void *)ipltospl(DISP_LEVEL));
5050e751525SEric Saxe 		CPUSET_ZERO(dptr->pm_cpus);
5060e751525SEric Saxe 		*dom_ptr = dptr;
5070e751525SEric Saxe 	}
5080e751525SEric Saxe 	CPUSET_ADD(dptr->pm_cpus, cp->cpu_id);
5090e751525SEric Saxe 	*mach_dom_state_ptr = dptr;
5100e751525SEric Saxe }
5110e751525SEric Saxe 
5120e751525SEric Saxe /*
5130e751525SEric Saxe  * Free C, P or T state power domains
5140e751525SEric Saxe  */
5150e751525SEric Saxe void
cpupm_free_domains(cpupm_state_domains_t ** dom_ptr)5160e751525SEric Saxe cpupm_free_domains(cpupm_state_domains_t **dom_ptr)
5170e751525SEric Saxe {
5180e751525SEric Saxe 	cpupm_state_domains_t *this_domain, *next_domain;
5190e751525SEric Saxe 
5200e751525SEric Saxe 	this_domain = *dom_ptr;
5210e751525SEric Saxe 	while (this_domain != NULL) {
5220e751525SEric Saxe 		next_domain = this_domain->pm_next;
5230e751525SEric Saxe 		mutex_destroy(&this_domain->pm_lock);
5240e751525SEric Saxe 		kmem_free((void *)this_domain,
5250e751525SEric Saxe 		    sizeof (cpupm_state_domains_t));
5260e751525SEric Saxe 		this_domain = next_domain;
5270e751525SEric Saxe 	}
5280e751525SEric Saxe 	*dom_ptr = NULL;
5290e751525SEric Saxe }
5300e751525SEric Saxe 
531444f66e7SMark Haywood /*
532444f66e7SMark Haywood  * Remove CPU from C, P or T state power domains
533444f66e7SMark Haywood  */
534444f66e7SMark Haywood void
cpupm_remove_domains(cpu_t * cp,int state,cpupm_state_domains_t ** dom_ptr)535444f66e7SMark Haywood cpupm_remove_domains(cpu_t *cp, int state, cpupm_state_domains_t **dom_ptr)
536444f66e7SMark Haywood {
537444f66e7SMark Haywood 	cpupm_mach_state_t *mach_state =
538444f66e7SMark Haywood 	    (cpupm_mach_state_t *)(cp->cpu_m.mcpu_pm_mach_state);
539444f66e7SMark Haywood 	cpupm_state_domains_t *dptr;
540444f66e7SMark Haywood 	uint32_t pm_domain;
541444f66e7SMark Haywood 
542444f66e7SMark Haywood 	ASSERT(mach_state);
543444f66e7SMark Haywood 
544444f66e7SMark Haywood 	switch (state) {
545444f66e7SMark Haywood 	case CPUPM_P_STATES:
546444f66e7SMark Haywood 		pm_domain = mach_state->ms_pstate.cma_domain->pm_domain;
547444f66e7SMark Haywood 		break;
548444f66e7SMark Haywood 	case CPUPM_T_STATES:
549444f66e7SMark Haywood 		pm_domain = mach_state->ms_tstate.cma_domain->pm_domain;
550444f66e7SMark Haywood 		break;
551444f66e7SMark Haywood 	case CPUPM_C_STATES:
552444f66e7SMark Haywood 		pm_domain = mach_state->ms_cstate.cma_domain->pm_domain;
553444f66e7SMark Haywood 		break;
554444f66e7SMark Haywood 	default:
555444f66e7SMark Haywood 		return;
556444f66e7SMark Haywood 	}
557444f66e7SMark Haywood 
558444f66e7SMark Haywood 	/*
559444f66e7SMark Haywood 	 * Find the CPU C, P or T state power domain
560444f66e7SMark Haywood 	 */
561444f66e7SMark Haywood 	for (dptr = *dom_ptr; dptr != NULL; dptr = dptr->pm_next) {
562444f66e7SMark Haywood 		if (dptr->pm_domain == pm_domain)
563444f66e7SMark Haywood 			break;
564444f66e7SMark Haywood 	}
565444f66e7SMark Haywood 
566444f66e7SMark Haywood 	/*
567444f66e7SMark Haywood 	 * return if no matched domain found
568444f66e7SMark Haywood 	 */
569444f66e7SMark Haywood 	if (dptr == NULL)
570444f66e7SMark Haywood 		return;
571444f66e7SMark Haywood 
572444f66e7SMark Haywood 	/*
573444f66e7SMark Haywood 	 * We found one matched power domain, remove CPU from its cpuset.
5746af9d452Saubrey.li@intel.com 	 * pm_lock(spin lock) here to avoid the race conditions between
575444f66e7SMark Haywood 	 * event change notification and cpu remove.
576444f66e7SMark Haywood 	 */
577444f66e7SMark Haywood 	mutex_enter(&dptr->pm_lock);
578444f66e7SMark Haywood 	if (CPU_IN_SET(dptr->pm_cpus, cp->cpu_id))
579444f66e7SMark Haywood 		CPUSET_DEL(dptr->pm_cpus, cp->cpu_id);
580444f66e7SMark Haywood 	mutex_exit(&dptr->pm_lock);
581444f66e7SMark Haywood }
582444f66e7SMark Haywood 
5830e751525SEric Saxe void
cpupm_alloc_ms_cstate(cpu_t * cp)5840e751525SEric Saxe cpupm_alloc_ms_cstate(cpu_t *cp)
5850e751525SEric Saxe {
5860e751525SEric Saxe 	cpupm_mach_state_t *mach_state;
5870e751525SEric Saxe 	cpupm_mach_acpi_state_t *ms_cstate;
5880e751525SEric Saxe 
5890e751525SEric Saxe 	mach_state = (cpupm_mach_state_t *)(cp->cpu_m.mcpu_pm_mach_state);
5900e751525SEric Saxe 	ms_cstate = &mach_state->ms_cstate;
5910e751525SEric Saxe 	ASSERT(ms_cstate->cma_state.cstate == NULL);
5920e751525SEric Saxe 	ms_cstate->cma_state.cstate = kmem_zalloc(sizeof (cma_c_state_t),
5930e751525SEric Saxe 	    KM_SLEEP);
5940e751525SEric Saxe 	ms_cstate->cma_state.cstate->cs_next_cstate = CPU_ACPI_C1;
5950e751525SEric Saxe }
5960e751525SEric Saxe 
5970e751525SEric Saxe void
cpupm_free_ms_cstate(cpu_t * cp)5980e751525SEric Saxe cpupm_free_ms_cstate(cpu_t *cp)
5990e751525SEric Saxe {
6000e751525SEric Saxe 	cpupm_mach_state_t *mach_state =
6010e751525SEric Saxe 	    (cpupm_mach_state_t *)(cp->cpu_m.mcpu_pm_mach_state);
6020e751525SEric Saxe 	cpupm_mach_acpi_state_t *ms_cstate = &mach_state->ms_cstate;
6030e751525SEric Saxe 
6040e751525SEric Saxe 	if (ms_cstate->cma_state.cstate != NULL) {
6050e751525SEric Saxe 		kmem_free(ms_cstate->cma_state.cstate, sizeof (cma_c_state_t));
6060e751525SEric Saxe 		ms_cstate->cma_state.cstate = NULL;
6070e751525SEric Saxe 	}
6080e751525SEric Saxe }
6090e751525SEric Saxe 
6100e751525SEric Saxe void
cpupm_state_change(cpu_t * cp,int level,int state)6110e751525SEric Saxe cpupm_state_change(cpu_t *cp, int level, int state)
6120e751525SEric Saxe {
6130e751525SEric Saxe 	cpupm_mach_state_t	*mach_state =
6140e751525SEric Saxe 	    (cpupm_mach_state_t *)(cp->cpu_m.mcpu_pm_mach_state);
6150e751525SEric Saxe 	cpupm_state_ops_t	*state_ops;
616584b574aSToomas Soome 	cpupm_state_domains_t	*state_domain;
6170e751525SEric Saxe 	cpuset_t		set;
6180e751525SEric Saxe 
6190e751525SEric Saxe 	DTRACE_PROBE2(cpupm__state__change, cpu_t *, cp, int, level);
6200e751525SEric Saxe 
6210e751525SEric Saxe 	if (mach_state == NULL) {
6220e751525SEric Saxe 		return;
6230e751525SEric Saxe 	}
6240e751525SEric Saxe 
6250e751525SEric Saxe 	switch (state) {
6260e751525SEric Saxe 	case CPUPM_P_STATES:
6270e751525SEric Saxe 		state_ops = mach_state->ms_pstate.cma_ops;
6280e751525SEric Saxe 		state_domain = mach_state->ms_pstate.cma_domain;
6290e751525SEric Saxe 		break;
6300e751525SEric Saxe 	case CPUPM_T_STATES:
6310e751525SEric Saxe 		state_ops = mach_state->ms_tstate.cma_ops;
6320e751525SEric Saxe 		state_domain = mach_state->ms_tstate.cma_domain;
6330e751525SEric Saxe 		break;
6340e751525SEric Saxe 	default:
635584b574aSToomas Soome 		return;
6360e751525SEric Saxe 	}
6370e751525SEric Saxe 
6380e751525SEric Saxe 	switch (state_domain->pm_type) {
6390e751525SEric Saxe 	case CPU_ACPI_SW_ANY:
6400e751525SEric Saxe 		/*
6410e751525SEric Saxe 		 * A request on any CPU in the domain transitions the domain
6420e751525SEric Saxe 		 */
6430e751525SEric Saxe 		CPUSET_ONLY(set, cp->cpu_id);
6440e751525SEric Saxe 		state_ops->cpus_change(set, level);
6450e751525SEric Saxe 		break;
6460e751525SEric Saxe 	case CPU_ACPI_SW_ALL:
6470e751525SEric Saxe 		/*
6480e751525SEric Saxe 		 * All CPUs in the domain must request the transition
6490e751525SEric Saxe 		 */
6500e751525SEric Saxe 	case CPU_ACPI_HW_ALL:
6510e751525SEric Saxe 		/*
6520e751525SEric Saxe 		 * P/T-state transitions are coordinated by the hardware
6530e751525SEric Saxe 		 * For now, request the transition on all CPUs in the domain,
6540e751525SEric Saxe 		 * but looking ahead we can probably be smarter about this.
6550e751525SEric Saxe 		 */
6560e751525SEric Saxe 		mutex_enter(&state_domain->pm_lock);
6570e751525SEric Saxe 		state_ops->cpus_change(state_domain->pm_cpus, level);
6580e751525SEric Saxe 		mutex_exit(&state_domain->pm_lock);
6590e751525SEric Saxe 		break;
6600e751525SEric Saxe 	default:
66100f97612SMark Haywood 		cmn_err(CE_NOTE, "Unknown domain coordination type: %d",
6620e751525SEric Saxe 		    state_domain->pm_type);
6630e751525SEric Saxe 	}
6640e751525SEric Saxe }
6650e751525SEric Saxe 
6660e751525SEric Saxe /*
6670e751525SEric Saxe  * CPU PM interfaces exposed to the CPU power manager
6680e751525SEric Saxe  */
6690e751525SEric Saxe /*ARGSUSED*/
6700e751525SEric Saxe id_t
cpupm_plat_domain_id(cpu_t * cp,cpupm_dtype_t type)6710e751525SEric Saxe cpupm_plat_domain_id(cpu_t *cp, cpupm_dtype_t type)
6720e751525SEric Saxe {
6730e751525SEric Saxe 	cpupm_mach_state_t	*mach_state =
6740e751525SEric Saxe 	    (cpupm_mach_state_t *)(cp->cpu_m.mcpu_pm_mach_state);
6750e751525SEric Saxe 
6760e751525SEric Saxe 	if ((mach_state == NULL) || (!cpupm_is_enabled(CPUPM_P_STATES) &&
6770e751525SEric Saxe 	    !cpupm_is_enabled(CPUPM_C_STATES))) {
6780e751525SEric Saxe 		return (CPUPM_NO_DOMAIN);
6790e751525SEric Saxe 	}
6800e751525SEric Saxe 	if (type == CPUPM_DTYPE_ACTIVE) {
6810e751525SEric Saxe 		/*
6820e751525SEric Saxe 		 * Return P-State domain for the specified CPU
6830e751525SEric Saxe 		 */
6840e751525SEric Saxe 		if (mach_state->ms_pstate.cma_domain) {
6850e751525SEric Saxe 			return (mach_state->ms_pstate.cma_domain->pm_domain);
6860e751525SEric Saxe 		}
6870e751525SEric Saxe 	} else if (type == CPUPM_DTYPE_IDLE) {
6880e751525SEric Saxe 		/*
6890e751525SEric Saxe 		 * Return C-State domain for the specified CPU
6900e751525SEric Saxe 		 */
6910e751525SEric Saxe 		if (mach_state->ms_cstate.cma_domain) {
6920e751525SEric Saxe 			return (mach_state->ms_cstate.cma_domain->pm_domain);
6930e751525SEric Saxe 		}
6940e751525SEric Saxe 	}
6950e751525SEric Saxe 	return (CPUPM_NO_DOMAIN);
6960e751525SEric Saxe }
6970e751525SEric Saxe 
6980e751525SEric Saxe uint_t
cpupm_plat_state_enumerate(cpu_t * cp,cpupm_dtype_t type,cpupm_state_t * states)6990e751525SEric Saxe cpupm_plat_state_enumerate(cpu_t *cp, cpupm_dtype_t type,
7000e751525SEric Saxe     cpupm_state_t *states)
7010e751525SEric Saxe {
702*2a9992ecSToomas Soome 	int	*speeds = NULL;
7030e751525SEric Saxe 	uint_t	nspeeds, i;
7040e751525SEric Saxe 
7050e751525SEric Saxe 	/*
7060e751525SEric Saxe 	 * Idle domain support unimplemented
7070e751525SEric Saxe 	 */
7080e751525SEric Saxe 	if (type != CPUPM_DTYPE_ACTIVE) {
7090e751525SEric Saxe 		return (0);
7100e751525SEric Saxe 	}
7110e751525SEric Saxe 	nspeeds = cpupm_get_speeds(cp, &speeds);
7120e751525SEric Saxe 
7130e751525SEric Saxe 	/*
7140e751525SEric Saxe 	 * If the caller passes NULL for states, just return the
7150e751525SEric Saxe 	 * number of states.
7160e751525SEric Saxe 	 */
7170e751525SEric Saxe 	if (states != NULL) {
7180e751525SEric Saxe 		for (i = 0; i < nspeeds; i++) {
7190e751525SEric Saxe 			states[i].cps_speed = speeds[i];
7200e751525SEric Saxe 			states[i].cps_handle = (cpupm_handle_t)i;
7210e751525SEric Saxe 		}
7220e751525SEric Saxe 	}
7230e751525SEric Saxe 	cpupm_free_speeds(speeds, nspeeds);
7240e751525SEric Saxe 	return (nspeeds);
7250e751525SEric Saxe }
7260e751525SEric Saxe 
7270e751525SEric Saxe /*ARGSUSED*/
7280e751525SEric Saxe int
cpupm_plat_change_state(cpu_t * cp,cpupm_state_t * state)7290e751525SEric Saxe cpupm_plat_change_state(cpu_t *cp, cpupm_state_t *state)
7300e751525SEric Saxe {
731444f66e7SMark Haywood 	if (!cpupm_is_ready(cp))
7320e751525SEric Saxe 		return (-1);
7330e751525SEric Saxe 
7340e751525SEric Saxe 	cpupm_state_change(cp, (int)state->cps_handle, CPUPM_P_STATES);
7350e751525SEric Saxe 
7360e751525SEric Saxe 	return (0);
7370e751525SEric Saxe }
7380e751525SEric Saxe 
7390e751525SEric Saxe /*ARGSUSED*/
7400e751525SEric Saxe /*
7410e751525SEric Saxe  * Note: It is the responsibility of the users of
7420e751525SEric Saxe  * cpupm_get_speeds() to free the memory allocated
7430e751525SEric Saxe  * for speeds using cpupm_free_speeds()
7440e751525SEric Saxe  */
7450e751525SEric Saxe uint_t
cpupm_get_speeds(cpu_t * cp,int ** speeds)7460e751525SEric Saxe cpupm_get_speeds(cpu_t *cp, int **speeds)
7470e751525SEric Saxe {
7480e751525SEric Saxe #ifndef __xpv
7490e751525SEric Saxe 	cpupm_mach_state_t *mach_state =
7500e751525SEric Saxe 	    (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state;
7510e751525SEric Saxe 	return (cpu_acpi_get_speeds(mach_state->ms_acpi_handle, speeds));
7520e751525SEric Saxe #else
7530e751525SEric Saxe 	return (0);
7540e751525SEric Saxe #endif
7550e751525SEric Saxe }
7560e751525SEric Saxe 
7570e751525SEric Saxe /*ARGSUSED*/
7580e751525SEric Saxe void
cpupm_free_speeds(int * speeds,uint_t nspeeds)7590e751525SEric Saxe cpupm_free_speeds(int *speeds, uint_t nspeeds)
7600e751525SEric Saxe {
7610e751525SEric Saxe #ifndef __xpv
7620e751525SEric Saxe 	cpu_acpi_free_speeds(speeds, nspeeds);
7630e751525SEric Saxe #endif
7640e751525SEric Saxe }
7650e751525SEric Saxe 
7660e751525SEric Saxe /*
7670e751525SEric Saxe  * All CPU instances have been initialized successfully.
7680e751525SEric Saxe  */
7690e751525SEric Saxe boolean_t
cpupm_power_ready(cpu_t * cp)770444f66e7SMark Haywood cpupm_power_ready(cpu_t *cp)
7710e751525SEric Saxe {
772444f66e7SMark Haywood 	return (cpupm_is_enabled(CPUPM_P_STATES) && cpupm_is_ready(cp));
7730e751525SEric Saxe }
7740e751525SEric Saxe 
7750e751525SEric Saxe /*
7760e751525SEric Saxe  * All CPU instances have been initialized successfully.
7770e751525SEric Saxe  */
7780e751525SEric Saxe boolean_t
cpupm_throttle_ready(cpu_t * cp)779444f66e7SMark Haywood cpupm_throttle_ready(cpu_t *cp)
7800e751525SEric Saxe {
781444f66e7SMark Haywood 	return (cpupm_is_enabled(CPUPM_T_STATES) && cpupm_is_ready(cp));
7820e751525SEric Saxe }
7830e751525SEric Saxe 
7840e751525SEric Saxe /*
7850e751525SEric Saxe  * All CPU instances have been initialized successfully.
7860e751525SEric Saxe  */
7870e751525SEric Saxe boolean_t
cpupm_cstate_ready(cpu_t * cp)788444f66e7SMark Haywood cpupm_cstate_ready(cpu_t *cp)
7890e751525SEric Saxe {
790444f66e7SMark Haywood 	return (cpupm_is_enabled(CPUPM_C_STATES) && cpupm_is_ready(cp));
7910e751525SEric Saxe }
7920e751525SEric Saxe 
7930e751525SEric Saxe void
cpupm_notify_handler(ACPI_HANDLE obj,UINT32 val,void * ctx)7940e751525SEric Saxe cpupm_notify_handler(ACPI_HANDLE obj, UINT32 val, void *ctx)
7950e751525SEric Saxe {
7960e751525SEric Saxe 	cpu_t *cp = ctx;
7970e751525SEric Saxe 	cpupm_mach_state_t *mach_state =
7980e751525SEric Saxe 	    (cpupm_mach_state_t *)(cp->cpu_m.mcpu_pm_mach_state);
7990e751525SEric Saxe 	cpupm_notification_t *entry;
8000e751525SEric Saxe 
8010e751525SEric Saxe 	mutex_enter(&mach_state->ms_lock);
8020e751525SEric Saxe 	for (entry =  mach_state->ms_handlers; entry != NULL;
8030e751525SEric Saxe 	    entry = entry->nq_next) {
8040e751525SEric Saxe 		entry->nq_handler(obj, val, entry->nq_ctx);
8050e751525SEric Saxe 	}
8060e751525SEric Saxe 	mutex_exit(&mach_state->ms_lock);
8070e751525SEric Saxe }
8080e751525SEric Saxe 
8090e751525SEric Saxe /*ARGSUSED*/
8100e751525SEric Saxe void
cpupm_add_notify_handler(cpu_t * cp,CPUPM_NOTIFY_HANDLER handler,void * ctx)8110e751525SEric Saxe cpupm_add_notify_handler(cpu_t *cp, CPUPM_NOTIFY_HANDLER handler, void *ctx)
8120e751525SEric Saxe {
8130e751525SEric Saxe #ifndef __xpv
8140e751525SEric Saxe 	cpupm_mach_state_t *mach_state =
8150e751525SEric Saxe 	    (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state;
8160e751525SEric Saxe 	cpupm_notification_t *entry;
8170e751525SEric Saxe 
8180e751525SEric Saxe 	entry = kmem_zalloc(sizeof (cpupm_notification_t), KM_SLEEP);
8190e751525SEric Saxe 	entry->nq_handler = handler;
8200e751525SEric Saxe 	entry->nq_ctx = ctx;
8210e751525SEric Saxe 	mutex_enter(&mach_state->ms_lock);
8220e751525SEric Saxe 	if (mach_state->ms_handlers == NULL) {
8230e751525SEric Saxe 		entry->nq_next = NULL;
8240e751525SEric Saxe 		mach_state->ms_handlers = entry;
8250e751525SEric Saxe 		cpu_acpi_install_notify_handler(mach_state->ms_acpi_handle,
8260e751525SEric Saxe 		    cpupm_notify_handler, cp);
8270e751525SEric Saxe 
8280e751525SEric Saxe 	} else {
8290e751525SEric Saxe 		entry->nq_next = mach_state->ms_handlers;
8300e751525SEric Saxe 		mach_state->ms_handlers = entry;
8310e751525SEric Saxe 	}
8320e751525SEric Saxe 	mutex_exit(&mach_state->ms_lock);
8330e751525SEric Saxe #endif
8340e751525SEric Saxe }
8350e751525SEric Saxe 
8360e751525SEric Saxe /*ARGSUSED*/
8370e751525SEric Saxe static void
cpupm_free_notify_handlers(cpu_t * cp)8380e751525SEric Saxe cpupm_free_notify_handlers(cpu_t *cp)
8390e751525SEric Saxe {
8400e751525SEric Saxe #ifndef __xpv
8410e751525SEric Saxe 	cpupm_mach_state_t *mach_state =
8420e751525SEric Saxe 	    (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state;
8430e751525SEric Saxe 	cpupm_notification_t *entry;
8440e751525SEric Saxe 	cpupm_notification_t *next;
8450e751525SEric Saxe 
8460e751525SEric Saxe 	mutex_enter(&mach_state->ms_lock);
8470e751525SEric Saxe 	if (mach_state->ms_handlers == NULL) {
8480e751525SEric Saxe 		mutex_exit(&mach_state->ms_lock);
8490e751525SEric Saxe 		return;
8500e751525SEric Saxe 	}
8510e751525SEric Saxe 	if (mach_state->ms_acpi_handle != NULL) {
8520e751525SEric Saxe 		cpu_acpi_remove_notify_handler(mach_state->ms_acpi_handle,
8530e751525SEric Saxe 		    cpupm_notify_handler);
8540e751525SEric Saxe 	}
8550e751525SEric Saxe 	entry = mach_state->ms_handlers;
8560e751525SEric Saxe 	while (entry != NULL) {
8570e751525SEric Saxe 		next = entry->nq_next;
8580e751525SEric Saxe 		kmem_free(entry, sizeof (cpupm_notification_t));
8590e751525SEric Saxe 		entry = next;
8600e751525SEric Saxe 	}
8610e751525SEric Saxe 	mach_state->ms_handlers = NULL;
8620e751525SEric Saxe 	mutex_exit(&mach_state->ms_lock);
8630e751525SEric Saxe #endif
8640e751525SEric Saxe }
8650e751525SEric Saxe 
8660e751525SEric Saxe /*
8670e751525SEric Saxe  * Get the current max speed from the ACPI _PPC object
8680e751525SEric Saxe  */
8690e751525SEric Saxe /*ARGSUSED*/
8700e751525SEric Saxe int
cpupm_get_top_speed(cpu_t * cp)8710e751525SEric Saxe cpupm_get_top_speed(cpu_t *cp)
8720e751525SEric Saxe {
8730e751525SEric Saxe #ifndef __xpv
874584b574aSToomas Soome 	cpupm_mach_state_t	*mach_state;
875584b574aSToomas Soome 	cpu_acpi_handle_t	handle;
876584b574aSToomas Soome 	int			plat_level;
8770e751525SEric Saxe 	uint_t			nspeeds;
8780e751525SEric Saxe 	int			max_level;
8790e751525SEric Saxe 
8800e751525SEric Saxe 	mach_state =
8810e751525SEric Saxe 	    (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state;
8820e751525SEric Saxe 	handle = mach_state->ms_acpi_handle;
8830e751525SEric Saxe 
8840e751525SEric Saxe 	cpu_acpi_cache_ppc(handle);
8850e751525SEric Saxe 	plat_level = CPU_ACPI_PPC(handle);
8860e751525SEric Saxe 
8870e751525SEric Saxe 	nspeeds = CPU_ACPI_PSTATES_COUNT(handle);
8880e751525SEric Saxe 
8890e751525SEric Saxe 	max_level = nspeeds - 1;
8900e751525SEric Saxe 	if ((plat_level < 0) || (plat_level > max_level)) {
8910e751525SEric Saxe 		cmn_err(CE_NOTE, "!cpupm_get_top_speed: CPU %d: "
8920e751525SEric Saxe 		    "_PPC out of range %d", cp->cpu_id, plat_level);
8930e751525SEric Saxe 		plat_level = 0;
8940e751525SEric Saxe 	}
8950e751525SEric Saxe 
8960e751525SEric Saxe 	return (plat_level);
8970e751525SEric Saxe #else
8980e751525SEric Saxe 	return (0);
8990e751525SEric Saxe #endif
9000e751525SEric Saxe }
9010e751525SEric Saxe 
9020e751525SEric Saxe /*
9030e751525SEric Saxe  * This notification handler is called whenever the ACPI _PPC
9040e751525SEric Saxe  * object changes. The _PPC is a sort of governor on power levels.
9050e751525SEric Saxe  * It sets an upper threshold on which, _PSS defined, power levels
9060e751525SEric Saxe  * are usuable. The _PPC value is dynamic and may change as properties
9070e751525SEric Saxe  * (i.e., thermal or AC source) of the system change.
9080e751525SEric Saxe  */
9090e751525SEric Saxe 
9100e751525SEric Saxe static void
cpupm_power_manage_notifications(void * ctx)9110e751525SEric Saxe cpupm_power_manage_notifications(void *ctx)
9120e751525SEric Saxe {
9130e751525SEric Saxe 	cpu_t			*cp = ctx;
9140e751525SEric Saxe 	int			top_speed;
9150e751525SEric Saxe 
9160e751525SEric Saxe 	top_speed = cpupm_get_top_speed(cp);
9170e751525SEric Saxe 	cpupm_redefine_max_activepwr_state(cp, top_speed);
9180e751525SEric Saxe }
9190e751525SEric Saxe 
9200e751525SEric Saxe /* ARGSUSED */
9210e751525SEric Saxe static void
cpupm_event_notify_handler(ACPI_HANDLE obj,UINT32 val,void * ctx)9220e751525SEric Saxe cpupm_event_notify_handler(ACPI_HANDLE obj, UINT32 val, void *ctx)
9230e751525SEric Saxe {
9240e751525SEric Saxe #ifndef __xpv
925d218c8f0SMark Haywood 
926d218c8f0SMark Haywood 	cpu_t *cp = ctx;
927d218c8f0SMark Haywood 	cpupm_mach_state_t *mach_state =
928d218c8f0SMark Haywood 	    (cpupm_mach_state_t *)(cp->cpu_m.mcpu_pm_mach_state);
929d218c8f0SMark Haywood 
930d218c8f0SMark Haywood 	if (mach_state == NULL)
931d218c8f0SMark Haywood 		return;
932d218c8f0SMark Haywood 
9330e751525SEric Saxe 	/*
9340e751525SEric Saxe 	 * Currently, we handle _TPC,_CST and _PPC change notifications.
9350e751525SEric Saxe 	 */
936d218c8f0SMark Haywood 	if (val == CPUPM_TPC_CHANGE_NOTIFICATION &&
937d218c8f0SMark Haywood 	    mach_state->ms_caps & CPUPM_T_STATES) {
9380e751525SEric Saxe 		cpupm_throttle_manage_notification(ctx);
939d218c8f0SMark Haywood 	} else if (val == CPUPM_CST_CHANGE_NOTIFICATION &&
940d218c8f0SMark Haywood 	    mach_state->ms_caps & CPUPM_C_STATES) {
9410e751525SEric Saxe 		cpuidle_manage_cstates(ctx);
942d218c8f0SMark Haywood 	} else if (val == CPUPM_PPC_CHANGE_NOTIFICATION &&
943d218c8f0SMark Haywood 	    mach_state->ms_caps & CPUPM_P_STATES) {
9440e751525SEric Saxe 		cpupm_power_manage_notifications(ctx);
9450e751525SEric Saxe 	}
9460e751525SEric Saxe #endif
9470e751525SEric Saxe }
9480e751525SEric Saxe 
9490e751525SEric Saxe /*
9500e751525SEric Saxe  * Update cpupm cstate data each time CPU exits idle.
9510e751525SEric Saxe  */
9520e751525SEric Saxe void
cpupm_wakeup_cstate_data(cma_c_state_t * cs_data,hrtime_t end)9530e751525SEric Saxe cpupm_wakeup_cstate_data(cma_c_state_t *cs_data, hrtime_t end)
9540e751525SEric Saxe {
9550e751525SEric Saxe 	cs_data->cs_idle_exit = end;
9560e751525SEric Saxe }
9570e751525SEric Saxe 
9580e751525SEric Saxe /*
9590e751525SEric Saxe  * Determine next cstate based on cpupm data.
9600e751525SEric Saxe  * Update cpupm cstate data each time CPU goes idle.
9610e751525SEric Saxe  * Do as much as possible in the idle state bookkeeping function because the
9620e751525SEric Saxe  * performance impact while idle is minimal compared to in the wakeup function
9630e751525SEric Saxe  * when there is real work to do.
9640e751525SEric Saxe  */
9650e751525SEric Saxe uint32_t
cpupm_next_cstate(cma_c_state_t * cs_data,cpu_acpi_cstate_t * cstates,uint32_t cs_count,hrtime_t start)9669aa01d98SBill Holler cpupm_next_cstate(cma_c_state_t *cs_data, cpu_acpi_cstate_t *cstates,
9679aa01d98SBill Holler     uint32_t cs_count, hrtime_t start)
9680e751525SEric Saxe {
9699aa01d98SBill Holler 	hrtime_t duration;
9709aa01d98SBill Holler 	hrtime_t ave_interval;
9719aa01d98SBill Holler 	hrtime_t ave_idle_time;
9720fc6188aSaubrey.li@intel.com 	uint32_t i, smpl_cnt;
9730e751525SEric Saxe 
9740e751525SEric Saxe 	duration = cs_data->cs_idle_exit - cs_data->cs_idle_enter;
9750e751525SEric Saxe 	scalehrtime(&duration);
9760e751525SEric Saxe 	cs_data->cs_idle += duration;
9770e751525SEric Saxe 	cs_data->cs_idle_enter = start;
9780e751525SEric Saxe 
9790fc6188aSaubrey.li@intel.com 	smpl_cnt = ++cs_data->cs_cnt;
9800fc6188aSaubrey.li@intel.com 	cs_data->cs_smpl_len = start - cs_data->cs_smpl_start;
9810fc6188aSaubrey.li@intel.com 	scalehrtime(&cs_data->cs_smpl_len);
9820fc6188aSaubrey.li@intel.com 	if (cs_data->cs_smpl_len > cpupm_cs_sample_interval) {
9830e751525SEric Saxe 		cs_data->cs_smpl_idle = cs_data->cs_idle;
9840e751525SEric Saxe 		cs_data->cs_idle = 0;
9850e751525SEric Saxe 		cs_data->cs_smpl_idle_pct = ((100 * cs_data->cs_smpl_idle) /
9860e751525SEric Saxe 		    cs_data->cs_smpl_len);
9870e751525SEric Saxe 
9880e751525SEric Saxe 		cs_data->cs_smpl_start = start;
9890e751525SEric Saxe 		cs_data->cs_cnt = 0;
9900e751525SEric Saxe 
9910e751525SEric Saxe 		/*
9920e751525SEric Saxe 		 * Strand level C-state policy
9939aa01d98SBill Holler 		 * The cpu_acpi_cstate_t *cstates array is not required to
9949aa01d98SBill Holler 		 * have an entry for both CPU_ACPI_C2 and CPU_ACPI_C3.
9959aa01d98SBill Holler 		 * There are cs_count entries in the cstates array.
9969aa01d98SBill Holler 		 * cs_data->cs_next_cstate contains the index of the next
9979aa01d98SBill Holler 		 * C-state this CPU should enter.
9980e751525SEric Saxe 		 */
9999aa01d98SBill Holler 		ASSERT(cstates[0].cs_type == CPU_ACPI_C1);
10000e751525SEric Saxe 
10010e751525SEric Saxe 		/*
10020e751525SEric Saxe 		 * Will CPU be idle long enough to save power?
10030e751525SEric Saxe 		 */
10040fc6188aSaubrey.li@intel.com 		ave_idle_time = (cs_data->cs_smpl_idle / smpl_cnt) / 1000;
10059aa01d98SBill Holler 		for (i = 1; i < cs_count; ++i) {
10069aa01d98SBill Holler 			if (ave_idle_time < (cstates[i].cs_latency *
10079aa01d98SBill Holler 			    cpupm_cs_idle_save_tunable)) {
10089aa01d98SBill Holler 				cs_count = i;
10099aa01d98SBill Holler 				DTRACE_PROBE2(cpupm__next__cstate, cpu_t *,
10109aa01d98SBill Holler 				    CPU, int, i);
10119aa01d98SBill Holler 			}
10120e751525SEric Saxe 		}
10130e751525SEric Saxe 
10140e751525SEric Saxe 		/*
10150e751525SEric Saxe 		 * Wakeup often (even when non-idle time is very short)?
10160e751525SEric Saxe 		 * Some producer/consumer type loads fall into this category.
10170e751525SEric Saxe 		 */
10180fc6188aSaubrey.li@intel.com 		ave_interval = (cs_data->cs_smpl_len / smpl_cnt) / 1000;
10199aa01d98SBill Holler 		for (i = 1; i < cs_count; ++i) {
10209aa01d98SBill Holler 			if (ave_interval <= (cstates[i].cs_latency *
10219aa01d98SBill Holler 			    cpupm_cs_idle_cost_tunable)) {
10229aa01d98SBill Holler 				cs_count = i;
10239aa01d98SBill Holler 				DTRACE_PROBE2(cpupm__next__cstate, cpu_t *,
10249aa01d98SBill Holler 				    CPU, int, (CPU_MAX_CSTATES + i));
10259aa01d98SBill Holler 			}
10260e751525SEric Saxe 		}
10270e751525SEric Saxe 
10280e751525SEric Saxe 		/*
10290e751525SEric Saxe 		 * Idle percent
10300e751525SEric Saxe 		 */
10319aa01d98SBill Holler 		for (i = 1; i < cs_count; ++i) {
10329aa01d98SBill Holler 			switch (cstates[i].cs_type) {
10339aa01d98SBill Holler 			case CPU_ACPI_C2:
10349aa01d98SBill Holler 				if (cs_data->cs_smpl_idle_pct <
10359aa01d98SBill Holler 				    cpupm_C2_idle_pct_tunable) {
10369aa01d98SBill Holler 					cs_count = i;
10379aa01d98SBill Holler 					DTRACE_PROBE2(cpupm__next__cstate,
10389aa01d98SBill Holler 					    cpu_t *, CPU, int,
10399aa01d98SBill Holler 					    ((2 * CPU_MAX_CSTATES) + i));
10409aa01d98SBill Holler 				}
10419aa01d98SBill Holler 				break;
10429aa01d98SBill Holler 
10439aa01d98SBill Holler 			case CPU_ACPI_C3:
10449aa01d98SBill Holler 				if (cs_data->cs_smpl_idle_pct <
10459aa01d98SBill Holler 				    cpupm_C3_idle_pct_tunable) {
10469aa01d98SBill Holler 					cs_count = i;
10479aa01d98SBill Holler 					DTRACE_PROBE2(cpupm__next__cstate,
10489aa01d98SBill Holler 					    cpu_t *, CPU, int,
10499aa01d98SBill Holler 					    ((2 * CPU_MAX_CSTATES) + i));
10509aa01d98SBill Holler 				}
10519aa01d98SBill Holler 				break;
10529aa01d98SBill Holler 			}
10530e751525SEric Saxe 		}
10549aa01d98SBill Holler 
10559aa01d98SBill Holler 		cs_data->cs_next_cstate = cs_count - 1;
10560e751525SEric Saxe 	}
10570e751525SEric Saxe 
10580e751525SEric Saxe 	return (cs_data->cs_next_cstate);
10590e751525SEric Saxe }
1060