10e751525SEric Saxe /*
20e751525SEric Saxe * CDDL HEADER START
30e751525SEric Saxe *
40e751525SEric Saxe * The contents of this file are subject to the terms of the
50e751525SEric Saxe * Common Development and Distribution License (the "License").
60e751525SEric Saxe * You may not use this file except in compliance with the License.
70e751525SEric Saxe *
80e751525SEric Saxe * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90e751525SEric Saxe * or http://www.opensolaris.org/os/licensing.
100e751525SEric Saxe * See the License for the specific language governing permissions
110e751525SEric Saxe * and limitations under the License.
120e751525SEric Saxe *
130e751525SEric Saxe * When distributing Covered Code, include this CDDL HEADER in each
140e751525SEric Saxe * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150e751525SEric Saxe * If applicable, add the following below this CDDL HEADER, with the
160e751525SEric Saxe * fields enclosed by brackets "[]" replaced with your own identifying
170e751525SEric Saxe * information: Portions Copyright [yyyy] [name of copyright owner]
180e751525SEric Saxe *
190e751525SEric Saxe * CDDL HEADER END
200e751525SEric Saxe */
210e751525SEric Saxe /*
220e751525SEric Saxe * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
230e751525SEric Saxe * Use is subject to license terms.
240e751525SEric Saxe */
25cef70d2cSBill Holler /*
26a3114836SGerry Liu * Copyright (c) 2009-2010, Intel Corporation.
27cef70d2cSBill Holler * All rights reserved.
28cef70d2cSBill Holler */
29*a9cc46cfSRobert Mustacchi /*
30*a9cc46cfSRobert Mustacchi * Copyright 2019 Joyent, Inc.
31*a9cc46cfSRobert Mustacchi */
320e751525SEric Saxe
330e751525SEric Saxe #include <sys/x86_archext.h>
340e751525SEric Saxe #include <sys/machsystm.h>
350e751525SEric Saxe #include <sys/x_call.h>
360e751525SEric Saxe #include <sys/stat.h>
370e751525SEric Saxe #include <sys/acpi/acpi.h>
380e751525SEric Saxe #include <sys/acpica.h>
390e751525SEric Saxe #include <sys/cpu_acpi.h>
400e751525SEric Saxe #include <sys/cpu_idle.h>
410e751525SEric Saxe #include <sys/cpupm.h>
42fb2caebeSRandy Fishel #include <sys/cpu_event.h>
430e751525SEric Saxe #include <sys/hpet.h>
440e751525SEric Saxe #include <sys/archsystm.h>
450e751525SEric Saxe #include <vm/hat_i86.h>
460e751525SEric Saxe #include <sys/dtrace.h>
470e751525SEric Saxe #include <sys/sdt.h>
480e751525SEric Saxe #include <sys/callb.h>
490e751525SEric Saxe
50cef70d2cSBill Holler #define CSTATE_USING_HPET 1
51cef70d2cSBill Holler #define CSTATE_USING_LAT 2
52cef70d2cSBill Holler
53a3114836SGerry Liu #define CPU_IDLE_STOP_TIMEOUT 1000
54a3114836SGerry Liu
550e751525SEric Saxe extern void cpu_idle_adaptive(void);
569aa01d98SBill Holler extern uint32_t cpupm_next_cstate(cma_c_state_t *cs_data,
579aa01d98SBill Holler cpu_acpi_cstate_t *cstates, uint32_t cs_count, hrtime_t start);
580e751525SEric Saxe
590e751525SEric Saxe static int cpu_idle_init(cpu_t *);
600e751525SEric Saxe static void cpu_idle_fini(cpu_t *);
61444f66e7SMark Haywood static void cpu_idle_stop(cpu_t *);
620e751525SEric Saxe static boolean_t cpu_deep_idle_callb(void *arg, int code);
630e751525SEric Saxe static boolean_t cpu_idle_cpr_callb(void *arg, int code);
640e751525SEric Saxe static void acpi_cpu_cstate(cpu_acpi_cstate_t *cstate);
650e751525SEric Saxe
66cef70d2cSBill Holler static boolean_t cstate_use_timer(hrtime_t *lapic_expire, int timer);
67cef70d2cSBill Holler
68cef70d2cSBill Holler /*
69cef70d2cSBill Holler * the flag of always-running local APIC timer.
70cef70d2cSBill Holler * the flag of HPET Timer use in deep cstate.
71cef70d2cSBill Holler */
72cef70d2cSBill Holler static boolean_t cpu_cstate_arat = B_FALSE;
73cef70d2cSBill Holler static boolean_t cpu_cstate_hpet = B_FALSE;
74cef70d2cSBill Holler
750e751525SEric Saxe /*
760e751525SEric Saxe * Interfaces for modules implementing Intel's deep c-state.
770e751525SEric Saxe */
780e751525SEric Saxe cpupm_state_ops_t cpu_idle_ops = {
790e751525SEric Saxe "Generic ACPI C-state Support",
800e751525SEric Saxe cpu_idle_init,
810e751525SEric Saxe cpu_idle_fini,
82444f66e7SMark Haywood NULL,
83444f66e7SMark Haywood cpu_idle_stop
840e751525SEric Saxe };
850e751525SEric Saxe
860e751525SEric Saxe static kmutex_t cpu_idle_callb_mutex;
870e751525SEric Saxe static callb_id_t cpu_deep_idle_callb_id;
880e751525SEric Saxe static callb_id_t cpu_idle_cpr_callb_id;
890e751525SEric Saxe static uint_t cpu_idle_cfg_state;
900e751525SEric Saxe
910e751525SEric Saxe static kmutex_t cpu_idle_mutex;
920e751525SEric Saxe
930e751525SEric Saxe cpu_idle_kstat_t cpu_idle_kstat = {
940e751525SEric Saxe { "address_space_id", KSTAT_DATA_STRING },
950e751525SEric Saxe { "latency", KSTAT_DATA_UINT32 },
960e751525SEric Saxe { "power", KSTAT_DATA_UINT32 },
970e751525SEric Saxe };
980e751525SEric Saxe
990e751525SEric Saxe /*
1000e751525SEric Saxe * kstat update function of the c-state info
1010e751525SEric Saxe */
1020e751525SEric Saxe static int
cpu_idle_kstat_update(kstat_t * ksp,int flag)1030e751525SEric Saxe cpu_idle_kstat_update(kstat_t *ksp, int flag)
1040e751525SEric Saxe {
1050e751525SEric Saxe cpu_acpi_cstate_t *cstate = ksp->ks_private;
1060e751525SEric Saxe
1070e751525SEric Saxe if (flag == KSTAT_WRITE) {
1080e751525SEric Saxe return (EACCES);
1090e751525SEric Saxe }
1100e751525SEric Saxe
1110e751525SEric Saxe if (cstate->cs_addrspace_id == ACPI_ADR_SPACE_FIXED_HARDWARE) {
1120e751525SEric Saxe kstat_named_setstr(&cpu_idle_kstat.addr_space_id,
1130e751525SEric Saxe "FFixedHW");
1140e751525SEric Saxe } else if (cstate->cs_addrspace_id == ACPI_ADR_SPACE_SYSTEM_IO) {
1150e751525SEric Saxe kstat_named_setstr(&cpu_idle_kstat.addr_space_id,
1160e751525SEric Saxe "SystemIO");
1170e751525SEric Saxe } else {
1180e751525SEric Saxe kstat_named_setstr(&cpu_idle_kstat.addr_space_id,
1190e751525SEric Saxe "Unsupported");
1200e751525SEric Saxe }
1210e751525SEric Saxe
1220e751525SEric Saxe cpu_idle_kstat.cs_latency.value.ui32 = cstate->cs_latency;
1230e751525SEric Saxe cpu_idle_kstat.cs_power.value.ui32 = cstate->cs_power;
1240e751525SEric Saxe
1250e751525SEric Saxe return (0);
1260e751525SEric Saxe }
1270e751525SEric Saxe
128cef70d2cSBill Holler /*
129cef70d2cSBill Holler * Used during configuration callbacks to manage implementation specific
130cef70d2cSBill Holler * details of the hardware timer used during Deep C-state.
131cef70d2cSBill Holler */
132cef70d2cSBill Holler boolean_t
cstate_timer_callback(int code)133cef70d2cSBill Holler cstate_timer_callback(int code)
134cef70d2cSBill Holler {
135cef70d2cSBill Holler if (cpu_cstate_arat) {
136cef70d2cSBill Holler return (B_TRUE);
137cef70d2cSBill Holler } else if (cpu_cstate_hpet) {
138cef70d2cSBill Holler return (hpet.callback(code));
139cef70d2cSBill Holler }
140cef70d2cSBill Holler return (B_FALSE);
141cef70d2cSBill Holler }
142cef70d2cSBill Holler
143cef70d2cSBill Holler /*
144cef70d2cSBill Holler * Some Local APIC Timers do not work during Deep C-states.
145cef70d2cSBill Holler * The Deep C-state idle function uses this function to ensure it is using a
146cef70d2cSBill Holler * hardware timer that works during Deep C-states. This function also
147cef70d2cSBill Holler * switches the timer back to the LACPI Timer after Deep C-state.
148cef70d2cSBill Holler */
149cef70d2cSBill Holler static boolean_t
cstate_use_timer(hrtime_t * lapic_expire,int timer)150cef70d2cSBill Holler cstate_use_timer(hrtime_t *lapic_expire, int timer)
151cef70d2cSBill Holler {
152cef70d2cSBill Holler if (cpu_cstate_arat)
153cef70d2cSBill Holler return (B_TRUE);
154cef70d2cSBill Holler
155cef70d2cSBill Holler /*
156cef70d2cSBill Holler * We have to return B_FALSE if no arat or hpet support
157cef70d2cSBill Holler */
158cef70d2cSBill Holler if (!cpu_cstate_hpet)
159cef70d2cSBill Holler return (B_FALSE);
160cef70d2cSBill Holler
161cef70d2cSBill Holler switch (timer) {
162cef70d2cSBill Holler case CSTATE_USING_HPET:
163cef70d2cSBill Holler return (hpet.use_hpet_timer(lapic_expire));
164cef70d2cSBill Holler case CSTATE_USING_LAT:
165cef70d2cSBill Holler hpet.use_lapic_timer(*lapic_expire);
166cef70d2cSBill Holler return (B_TRUE);
167cef70d2cSBill Holler default:
168cef70d2cSBill Holler return (B_FALSE);
169cef70d2cSBill Holler }
170cef70d2cSBill Holler }
171cef70d2cSBill Holler
1720e751525SEric Saxe /*
1730e751525SEric Saxe * c-state wakeup function.
1740e751525SEric Saxe * Similar to cpu_wakeup and cpu_wakeup_mwait except this function deals
1750e751525SEric Saxe * with CPUs asleep in MWAIT, HLT, or ACPI Deep C-State.
1760e751525SEric Saxe */
1770e751525SEric Saxe void
cstate_wakeup(cpu_t * cp,int bound)1780e751525SEric Saxe cstate_wakeup(cpu_t *cp, int bound)
1790e751525SEric Saxe {
1800e751525SEric Saxe struct machcpu *mcpu = &(cp->cpu_m);
1810e751525SEric Saxe volatile uint32_t *mcpu_mwait = mcpu->mcpu_mwait;
1820e751525SEric Saxe cpupart_t *cpu_part;
1830e751525SEric Saxe uint_t cpu_found;
1840e751525SEric Saxe processorid_t cpu_sid;
1850e751525SEric Saxe
1860e751525SEric Saxe cpu_part = cp->cpu_part;
1870e751525SEric Saxe cpu_sid = cp->cpu_seqid;
1880e751525SEric Saxe /*
1890e751525SEric Saxe * Clear the halted bit for that CPU since it will be woken up
1900e751525SEric Saxe * in a moment.
1910e751525SEric Saxe */
1920e751525SEric Saxe if (bitset_in_set(&cpu_part->cp_haltset, cpu_sid)) {
1930e751525SEric Saxe /*
1940e751525SEric Saxe * Clear the halted bit for that CPU since it will be
1950e751525SEric Saxe * poked in a moment.
1960e751525SEric Saxe */
1970e751525SEric Saxe bitset_atomic_del(&cpu_part->cp_haltset, cpu_sid);
1980e751525SEric Saxe
1990e751525SEric Saxe /*
2000e751525SEric Saxe * We may find the current CPU present in the halted cpuset
2010e751525SEric Saxe * if we're in the context of an interrupt that occurred
2020e751525SEric Saxe * before we had a chance to clear our bit in cpu_idle().
2030e751525SEric Saxe * Waking ourself is obviously unnecessary, since if
2040e751525SEric Saxe * we're here, we're not halted.
2050e751525SEric Saxe */
2060e751525SEric Saxe if (cp != CPU) {
2070e751525SEric Saxe /*
2080e751525SEric Saxe * Use correct wakeup mechanism
2090e751525SEric Saxe */
2100e751525SEric Saxe if ((mcpu_mwait != NULL) &&
2110e751525SEric Saxe (*mcpu_mwait == MWAIT_HALTED))
2120e751525SEric Saxe MWAIT_WAKEUP(cp);
2130e751525SEric Saxe else
2140e751525SEric Saxe poke_cpu(cp->cpu_id);
2150e751525SEric Saxe }
2160e751525SEric Saxe return;
2170e751525SEric Saxe } else {
2180e751525SEric Saxe /*
2190e751525SEric Saxe * This cpu isn't halted, but it's idle or undergoing a
2200e751525SEric Saxe * context switch. No need to awaken anyone else.
2210e751525SEric Saxe */
2220e751525SEric Saxe if (cp->cpu_thread == cp->cpu_idle_thread ||
2230e751525SEric Saxe cp->cpu_disp_flags & CPU_DISP_DONTSTEAL)
2240e751525SEric Saxe return;
2250e751525SEric Saxe }
2260e751525SEric Saxe
2270e751525SEric Saxe /*
2280e751525SEric Saxe * No need to wake up other CPUs if the thread we just enqueued
2290e751525SEric Saxe * is bound.
2300e751525SEric Saxe */
2310e751525SEric Saxe if (bound)
2320e751525SEric Saxe return;
2330e751525SEric Saxe
2340e751525SEric Saxe
2350e751525SEric Saxe /*
2360e751525SEric Saxe * See if there's any other halted CPUs. If there are, then
2370e751525SEric Saxe * select one, and awaken it.
2380e751525SEric Saxe * It's possible that after we find a CPU, somebody else
2390e751525SEric Saxe * will awaken it before we get the chance.
2400e751525SEric Saxe * In that case, look again.
2410e751525SEric Saxe */
2420e751525SEric Saxe do {
2430e751525SEric Saxe cpu_found = bitset_find(&cpu_part->cp_haltset);
2440e751525SEric Saxe if (cpu_found == (uint_t)-1)
2450e751525SEric Saxe return;
2460e751525SEric Saxe
2470e751525SEric Saxe } while (bitset_atomic_test_and_del(&cpu_part->cp_haltset,
2480e751525SEric Saxe cpu_found) < 0);
2490e751525SEric Saxe
2500e751525SEric Saxe /*
2510e751525SEric Saxe * Must use correct wakeup mechanism to avoid lost wakeup of
2520e751525SEric Saxe * alternate cpu.
2530e751525SEric Saxe */
2540e751525SEric Saxe if (cpu_found != CPU->cpu_seqid) {
255cc31ad68Saubrey.li@intel.com mcpu_mwait = cpu_seq[cpu_found]->cpu_m.mcpu_mwait;
2560e751525SEric Saxe if ((mcpu_mwait != NULL) && (*mcpu_mwait == MWAIT_HALTED))
2570e751525SEric Saxe MWAIT_WAKEUP(cpu_seq[cpu_found]);
2580e751525SEric Saxe else
2590e751525SEric Saxe poke_cpu(cpu_seq[cpu_found]->cpu_id);
2600e751525SEric Saxe }
2610e751525SEric Saxe }
2620e751525SEric Saxe
263fb2caebeSRandy Fishel /*
264fb2caebeSRandy Fishel * Function called by CPU idle notification framework to check whether CPU
265fb2caebeSRandy Fishel * has been awakened. It will be called with interrupt disabled.
266fb2caebeSRandy Fishel * If CPU has been awakened, call cpu_idle_exit() to notify CPU idle
267fb2caebeSRandy Fishel * notification framework.
268fb2caebeSRandy Fishel */
269fb2caebeSRandy Fishel static void
acpi_cpu_mwait_check_wakeup(void * arg)270fb2caebeSRandy Fishel acpi_cpu_mwait_check_wakeup(void *arg)
271fb2caebeSRandy Fishel {
272fb2caebeSRandy Fishel volatile uint32_t *mcpu_mwait = (volatile uint32_t *)arg;
273fb2caebeSRandy Fishel
274fb2caebeSRandy Fishel ASSERT(arg != NULL);
275fb2caebeSRandy Fishel if (*mcpu_mwait != MWAIT_HALTED) {
276fb2caebeSRandy Fishel /*
277fb2caebeSRandy Fishel * CPU has been awakened, notify CPU idle notification system.
278fb2caebeSRandy Fishel */
279fb2caebeSRandy Fishel cpu_idle_exit(CPU_IDLE_CB_FLAG_IDLE);
280fb2caebeSRandy Fishel } else {
281fb2caebeSRandy Fishel /*
282fb2caebeSRandy Fishel * Toggle interrupt flag to detect pending interrupts.
283fb2caebeSRandy Fishel * If interrupt happened, do_interrupt() will notify CPU idle
284fb2caebeSRandy Fishel * notification framework so no need to call cpu_idle_exit()
285fb2caebeSRandy Fishel * here.
286fb2caebeSRandy Fishel */
287fb2caebeSRandy Fishel sti();
288fb2caebeSRandy Fishel SMT_PAUSE();
289fb2caebeSRandy Fishel cli();
290fb2caebeSRandy Fishel }
291fb2caebeSRandy Fishel }
292fb2caebeSRandy Fishel
293fb2caebeSRandy Fishel static void
acpi_cpu_mwait_ipi_check_wakeup(void * arg)294fb2caebeSRandy Fishel acpi_cpu_mwait_ipi_check_wakeup(void *arg)
295fb2caebeSRandy Fishel {
296fb2caebeSRandy Fishel volatile uint32_t *mcpu_mwait = (volatile uint32_t *)arg;
297fb2caebeSRandy Fishel
298fb2caebeSRandy Fishel ASSERT(arg != NULL);
299fb2caebeSRandy Fishel if (*mcpu_mwait != MWAIT_WAKEUP_IPI) {
300fb2caebeSRandy Fishel /*
301fb2caebeSRandy Fishel * CPU has been awakened, notify CPU idle notification system.
302fb2caebeSRandy Fishel */
303fb2caebeSRandy Fishel cpu_idle_exit(CPU_IDLE_CB_FLAG_IDLE);
304fb2caebeSRandy Fishel } else {
305fb2caebeSRandy Fishel /*
306fb2caebeSRandy Fishel * Toggle interrupt flag to detect pending interrupts.
307fb2caebeSRandy Fishel * If interrupt happened, do_interrupt() will notify CPU idle
308fb2caebeSRandy Fishel * notification framework so no need to call cpu_idle_exit()
309fb2caebeSRandy Fishel * here.
310fb2caebeSRandy Fishel */
311fb2caebeSRandy Fishel sti();
312fb2caebeSRandy Fishel SMT_PAUSE();
313fb2caebeSRandy Fishel cli();
314fb2caebeSRandy Fishel }
315fb2caebeSRandy Fishel }
316fb2caebeSRandy Fishel
317fb2caebeSRandy Fishel /*ARGSUSED*/
318fb2caebeSRandy Fishel static void
acpi_cpu_check_wakeup(void * arg)319fb2caebeSRandy Fishel acpi_cpu_check_wakeup(void *arg)
320fb2caebeSRandy Fishel {
321fb2caebeSRandy Fishel /*
322fb2caebeSRandy Fishel * Toggle interrupt flag to detect pending interrupts.
323fb2caebeSRandy Fishel * If interrupt happened, do_interrupt() will notify CPU idle
324fb2caebeSRandy Fishel * notification framework so no need to call cpu_idle_exit() here.
325fb2caebeSRandy Fishel */
326fb2caebeSRandy Fishel sti();
327fb2caebeSRandy Fishel SMT_PAUSE();
328fb2caebeSRandy Fishel cli();
329fb2caebeSRandy Fishel }
330fb2caebeSRandy Fishel
3310e751525SEric Saxe /*
3320e751525SEric Saxe * enter deep c-state handler
3330e751525SEric Saxe */
3340e751525SEric Saxe static void
acpi_cpu_cstate(cpu_acpi_cstate_t * cstate)3350e751525SEric Saxe acpi_cpu_cstate(cpu_acpi_cstate_t *cstate)
3360e751525SEric Saxe {
3370e751525SEric Saxe volatile uint32_t *mcpu_mwait = CPU->cpu_m.mcpu_mwait;
3380e751525SEric Saxe cpu_t *cpup = CPU;
3390e751525SEric Saxe processorid_t cpu_sid = cpup->cpu_seqid;
3400e751525SEric Saxe cpupart_t *cp = cpup->cpu_part;
3410e751525SEric Saxe hrtime_t lapic_expire;
3420e751525SEric Saxe uint8_t type = cstate->cs_addrspace_id;
3430e751525SEric Saxe uint32_t cs_type = cstate->cs_type;
3440e751525SEric Saxe int hset_update = 1;
345cef70d2cSBill Holler boolean_t using_timer;
346fb2caebeSRandy Fishel cpu_idle_check_wakeup_t check_func = &acpi_cpu_check_wakeup;
3470e751525SEric Saxe
3480e751525SEric Saxe /*
3490e751525SEric Saxe * Set our mcpu_mwait here, so we can tell if anyone tries to
3500e751525SEric Saxe * wake us between now and when we call mwait. No other cpu will
3510e751525SEric Saxe * attempt to set our mcpu_mwait until we add ourself to the haltset.
3520e751525SEric Saxe */
3530e751525SEric Saxe if (mcpu_mwait) {
354fb2caebeSRandy Fishel if (type == ACPI_ADR_SPACE_SYSTEM_IO) {
3550e751525SEric Saxe *mcpu_mwait = MWAIT_WAKEUP_IPI;
356fb2caebeSRandy Fishel check_func = &acpi_cpu_mwait_ipi_check_wakeup;
357fb2caebeSRandy Fishel } else {
3580e751525SEric Saxe *mcpu_mwait = MWAIT_HALTED;
359fb2caebeSRandy Fishel check_func = &acpi_cpu_mwait_check_wakeup;
360fb2caebeSRandy Fishel }
3610e751525SEric Saxe }
3620e751525SEric Saxe
3630e751525SEric Saxe /*
3640e751525SEric Saxe * If this CPU is online, and there are multiple CPUs
3650e751525SEric Saxe * in the system, then we should note our halting
3660e751525SEric Saxe * by adding ourselves to the partition's halted CPU
3670e751525SEric Saxe * bitmap. This allows other CPUs to find/awaken us when
3680e751525SEric Saxe * work becomes available.
3690e751525SEric Saxe */
3700e751525SEric Saxe if (cpup->cpu_flags & CPU_OFFLINE || ncpus == 1)
3710e751525SEric Saxe hset_update = 0;
3720e751525SEric Saxe
3730e751525SEric Saxe /*
3740e751525SEric Saxe * Add ourselves to the partition's halted CPUs bitmask
3750e751525SEric Saxe * and set our HALTED flag, if necessary.
3760e751525SEric Saxe *
3770e751525SEric Saxe * When a thread becomes runnable, it is placed on the queue
3780e751525SEric Saxe * and then the halted cpuset is checked to determine who
3790e751525SEric Saxe * (if anyone) should be awakened. We therefore need to first
3800e751525SEric Saxe * add ourselves to the halted cpuset, and and then check if there
3810e751525SEric Saxe * is any work available.
3820e751525SEric Saxe *
3830e751525SEric Saxe * Note that memory barriers after updating the HALTED flag
3840e751525SEric Saxe * are not necessary since an atomic operation (updating the bitmap)
3850e751525SEric Saxe * immediately follows. On x86 the atomic operation acts as a
3860e751525SEric Saxe * memory barrier for the update of cpu_disp_flags.
3870e751525SEric Saxe */
3880e751525SEric Saxe if (hset_update) {
3890e751525SEric Saxe cpup->cpu_disp_flags |= CPU_DISP_HALTED;
3900e751525SEric Saxe bitset_atomic_add(&cp->cp_haltset, cpu_sid);
3910e751525SEric Saxe }
3920e751525SEric Saxe
3930e751525SEric Saxe /*
3940e751525SEric Saxe * Check to make sure there's really nothing to do.
3950e751525SEric Saxe * Work destined for this CPU may become available after
3960e751525SEric Saxe * this check. We'll be notified through the clearing of our
3970e751525SEric Saxe * bit in the halted CPU bitmask, and a write to our mcpu_mwait.
3980e751525SEric Saxe *
3990e751525SEric Saxe * disp_anywork() checks disp_nrunnable, so we do not have to later.
4000e751525SEric Saxe */
4010e751525SEric Saxe if (disp_anywork()) {
4020e751525SEric Saxe if (hset_update) {
4030e751525SEric Saxe cpup->cpu_disp_flags &= ~CPU_DISP_HALTED;
4040e751525SEric Saxe bitset_atomic_del(&cp->cp_haltset, cpu_sid);
4050e751525SEric Saxe }
4060e751525SEric Saxe return;
4070e751525SEric Saxe }
4080e751525SEric Saxe
4090e751525SEric Saxe /*
4100e751525SEric Saxe * We're on our way to being halted.
4110e751525SEric Saxe *
4120e751525SEric Saxe * The local APIC timer can stop in ACPI C2 and deeper c-states.
413cef70d2cSBill Holler * Try to program the HPET hardware to substitute for this CPU's
414cef70d2cSBill Holler * LAPIC timer.
415cef70d2cSBill Holler * cstate_use_timer() could disable the LAPIC Timer. Make sure
416cef70d2cSBill Holler * to start the LAPIC Timer again before leaving this function.
4170e751525SEric Saxe *
418cef70d2cSBill Holler * Disable interrupts here so we will awaken immediately after halting
419cef70d2cSBill Holler * if someone tries to poke us between now and the time we actually
420cef70d2cSBill Holler * halt.
4210e751525SEric Saxe */
422cef70d2cSBill Holler cli();
423cef70d2cSBill Holler using_timer = cstate_use_timer(&lapic_expire, CSTATE_USING_HPET);
4240e751525SEric Saxe
4250e751525SEric Saxe /*
4260e751525SEric Saxe * We check for the presence of our bit after disabling interrupts.
4270e751525SEric Saxe * If it's cleared, we'll return. If the bit is cleared after
4280e751525SEric Saxe * we check then the cstate_wakeup() will pop us out of the halted
4290e751525SEric Saxe * state.
4300e751525SEric Saxe *
4310e751525SEric Saxe * This means that the ordering of the cstate_wakeup() and the clearing
4320e751525SEric Saxe * of the bit by cpu_wakeup is important.
4330e751525SEric Saxe * cpu_wakeup() must clear our mc_haltset bit, and then call
4340e751525SEric Saxe * cstate_wakeup().
4350e751525SEric Saxe * acpi_cpu_cstate() must disable interrupts, then check for the bit.
4360e751525SEric Saxe */
4370e751525SEric Saxe if (hset_update && bitset_in_set(&cp->cp_haltset, cpu_sid) == 0) {
438cef70d2cSBill Holler (void) cstate_use_timer(&lapic_expire,
439cef70d2cSBill Holler CSTATE_USING_LAT);
440cef70d2cSBill Holler sti();
4410e751525SEric Saxe cpup->cpu_disp_flags &= ~CPU_DISP_HALTED;
4420e751525SEric Saxe return;
4430e751525SEric Saxe }
4440e751525SEric Saxe
4450e751525SEric Saxe /*
4460e751525SEric Saxe * The check for anything locally runnable is here for performance
4470e751525SEric Saxe * and isn't needed for correctness. disp_nrunnable ought to be
4480e751525SEric Saxe * in our cache still, so it's inexpensive to check, and if there
4490e751525SEric Saxe * is anything runnable we won't have to wait for the poke.
4500e751525SEric Saxe */
4510e751525SEric Saxe if (cpup->cpu_disp->disp_nrunnable != 0) {
452cef70d2cSBill Holler (void) cstate_use_timer(&lapic_expire,
453cef70d2cSBill Holler CSTATE_USING_LAT);
454cef70d2cSBill Holler sti();
4550e751525SEric Saxe if (hset_update) {
4560e751525SEric Saxe cpup->cpu_disp_flags &= ~CPU_DISP_HALTED;
4570e751525SEric Saxe bitset_atomic_del(&cp->cp_haltset, cpu_sid);
4580e751525SEric Saxe }
4590e751525SEric Saxe return;
4600e751525SEric Saxe }
4610e751525SEric Saxe
462cef70d2cSBill Holler if (using_timer == B_FALSE) {
4630e751525SEric Saxe
464cef70d2cSBill Holler (void) cstate_use_timer(&lapic_expire,
465cef70d2cSBill Holler CSTATE_USING_LAT);
466cef70d2cSBill Holler sti();
4670e751525SEric Saxe
4680e751525SEric Saxe /*
4690e751525SEric Saxe * We are currently unable to program the HPET to act as this
470cef70d2cSBill Holler * CPU's proxy LAPIC timer. This CPU cannot enter C2 or deeper
471cef70d2cSBill Holler * because no timer is set to wake it up while its LAPIC timer
4720e751525SEric Saxe * stalls in deep C-States.
4730e751525SEric Saxe * Enter C1 instead.
4740e751525SEric Saxe *
4750e751525SEric Saxe * cstate_wake_cpu() will wake this CPU with an IPI which
4760e751525SEric Saxe * works with MWAIT.
4770e751525SEric Saxe */
4780e751525SEric Saxe i86_monitor(mcpu_mwait, 0, 0);
4790e751525SEric Saxe if ((*mcpu_mwait & ~MWAIT_WAKEUP_IPI) == MWAIT_HALTED) {
480fb2caebeSRandy Fishel if (cpu_idle_enter(IDLE_STATE_C1, 0,
481fb2caebeSRandy Fishel check_func, (void *)mcpu_mwait) == 0) {
482fb2caebeSRandy Fishel if ((*mcpu_mwait & ~MWAIT_WAKEUP_IPI) ==
483fb2caebeSRandy Fishel MWAIT_HALTED) {
484fb2caebeSRandy Fishel i86_mwait(0, 0);
485fb2caebeSRandy Fishel }
486fb2caebeSRandy Fishel cpu_idle_exit(CPU_IDLE_CB_FLAG_IDLE);
487fb2caebeSRandy Fishel }
4880e751525SEric Saxe }
4890e751525SEric Saxe
4900e751525SEric Saxe /*
4910e751525SEric Saxe * We're no longer halted
4920e751525SEric Saxe */
4930e751525SEric Saxe if (hset_update) {
4940e751525SEric Saxe cpup->cpu_disp_flags &= ~CPU_DISP_HALTED;
4950e751525SEric Saxe bitset_atomic_del(&cp->cp_haltset, cpu_sid);
4960e751525SEric Saxe }
4970e751525SEric Saxe return;
4980e751525SEric Saxe }
4990e751525SEric Saxe
5000e751525SEric Saxe if (type == ACPI_ADR_SPACE_FIXED_HARDWARE) {
5010e751525SEric Saxe /*
5020e751525SEric Saxe * We're on our way to being halted.
5030e751525SEric Saxe * To avoid a lost wakeup, arm the monitor before checking
5040e751525SEric Saxe * if another cpu wrote to mcpu_mwait to wake us up.
5050e751525SEric Saxe */
5060e751525SEric Saxe i86_monitor(mcpu_mwait, 0, 0);
5070e751525SEric Saxe if (*mcpu_mwait == MWAIT_HALTED) {
508fb2caebeSRandy Fishel if (cpu_idle_enter((uint_t)cs_type, 0,
509fb2caebeSRandy Fishel check_func, (void *)mcpu_mwait) == 0) {
510fb2caebeSRandy Fishel if (*mcpu_mwait == MWAIT_HALTED) {
511fb2caebeSRandy Fishel i86_mwait(cstate->cs_address, 1);
512fb2caebeSRandy Fishel }
513fb2caebeSRandy Fishel cpu_idle_exit(CPU_IDLE_CB_FLAG_IDLE);
514fb2caebeSRandy Fishel }
5150e751525SEric Saxe }
5160e751525SEric Saxe } else if (type == ACPI_ADR_SPACE_SYSTEM_IO) {
5170e751525SEric Saxe uint32_t value;
5180e751525SEric Saxe ACPI_TABLE_FADT *gbl_FADT;
5190e751525SEric Saxe
5200e751525SEric Saxe if (*mcpu_mwait == MWAIT_WAKEUP_IPI) {
521fb2caebeSRandy Fishel if (cpu_idle_enter((uint_t)cs_type, 0,
522fb2caebeSRandy Fishel check_func, (void *)mcpu_mwait) == 0) {
523fb2caebeSRandy Fishel if (*mcpu_mwait == MWAIT_WAKEUP_IPI) {
524*a9cc46cfSRobert Mustacchi /*
525*a9cc46cfSRobert Mustacchi * The following calls will cause us to
526*a9cc46cfSRobert Mustacchi * halt which will cause the store
527*a9cc46cfSRobert Mustacchi * buffer to be repartitioned,
528*a9cc46cfSRobert Mustacchi * potentially exposing us to the Intel
529*a9cc46cfSRobert Mustacchi * CPU vulnerability MDS. As such, we
530*a9cc46cfSRobert Mustacchi * need to explicitly call that here.
531*a9cc46cfSRobert Mustacchi * The other idle methods in this
532*a9cc46cfSRobert Mustacchi * function do this automatically as
533*a9cc46cfSRobert Mustacchi * part of the implementation of
534*a9cc46cfSRobert Mustacchi * i86_mwait().
535*a9cc46cfSRobert Mustacchi */
536*a9cc46cfSRobert Mustacchi x86_md_clear();
537fb2caebeSRandy Fishel (void) cpu_acpi_read_port(
538fb2caebeSRandy Fishel cstate->cs_address, &value, 8);
539fb2caebeSRandy Fishel acpica_get_global_FADT(&gbl_FADT);
540fb2caebeSRandy Fishel (void) cpu_acpi_read_port(
541fb2caebeSRandy Fishel gbl_FADT->XPmTimerBlock.Address,
542fb2caebeSRandy Fishel &value, 32);
543fb2caebeSRandy Fishel }
544fb2caebeSRandy Fishel cpu_idle_exit(CPU_IDLE_CB_FLAG_IDLE);
545fb2caebeSRandy Fishel }
5460e751525SEric Saxe }
5470e751525SEric Saxe }
5480e751525SEric Saxe
5490e751525SEric Saxe /*
550cef70d2cSBill Holler * The LAPIC timer may have stopped in deep c-state.
551cef70d2cSBill Holler * Reprogram this CPU's LAPIC here before enabling interrupts.
5520e751525SEric Saxe */
553cef70d2cSBill Holler (void) cstate_use_timer(&lapic_expire, CSTATE_USING_LAT);
554cef70d2cSBill Holler sti();
5550e751525SEric Saxe
5560e751525SEric Saxe /*
5570e751525SEric Saxe * We're no longer halted
5580e751525SEric Saxe */
5590e751525SEric Saxe if (hset_update) {
5600e751525SEric Saxe cpup->cpu_disp_flags &= ~CPU_DISP_HALTED;
5610e751525SEric Saxe bitset_atomic_del(&cp->cp_haltset, cpu_sid);
5620e751525SEric Saxe }
5630e751525SEric Saxe }
5640e751525SEric Saxe
5650e751525SEric Saxe /*
5660e751525SEric Saxe * Idle the present CPU, deep c-state is supported
5670e751525SEric Saxe */
5680e751525SEric Saxe void
cpu_acpi_idle(void)5690e751525SEric Saxe cpu_acpi_idle(void)
5700e751525SEric Saxe {
5710e751525SEric Saxe cpu_t *cp = CPU;
5720e751525SEric Saxe cpu_acpi_handle_t handle;
5730e751525SEric Saxe cma_c_state_t *cs_data;
5749aa01d98SBill Holler cpu_acpi_cstate_t *cstates;
5750e751525SEric Saxe hrtime_t start, end;
5760e751525SEric Saxe int cpu_max_cstates;
5779aa01d98SBill Holler uint32_t cs_indx;
5789aa01d98SBill Holler uint16_t cs_type;
5790e751525SEric Saxe
5800e751525SEric Saxe cpupm_mach_state_t *mach_state =
5810e751525SEric Saxe (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state;
5820e751525SEric Saxe handle = mach_state->ms_acpi_handle;
5830e751525SEric Saxe ASSERT(CPU_ACPI_CSTATES(handle) != NULL);
5840e751525SEric Saxe
5850e751525SEric Saxe cs_data = mach_state->ms_cstate.cma_state.cstate;
5869aa01d98SBill Holler cstates = (cpu_acpi_cstate_t *)CPU_ACPI_CSTATES(handle);
5879aa01d98SBill Holler ASSERT(cstates != NULL);
5880e751525SEric Saxe cpu_max_cstates = cpu_acpi_get_max_cstates(handle);
5890e751525SEric Saxe if (cpu_max_cstates > CPU_MAX_CSTATES)
5900e751525SEric Saxe cpu_max_cstates = CPU_MAX_CSTATES;
5919aa01d98SBill Holler if (cpu_max_cstates == 1) { /* no ACPI c-state data */
5929aa01d98SBill Holler (*non_deep_idle_cpu)();
5939aa01d98SBill Holler return;
5949aa01d98SBill Holler }
5950e751525SEric Saxe
5960e751525SEric Saxe start = gethrtime_unscaled();
5970e751525SEric Saxe
5989aa01d98SBill Holler cs_indx = cpupm_next_cstate(cs_data, cstates, cpu_max_cstates, start);
5990e751525SEric Saxe
6009aa01d98SBill Holler cs_type = cstates[cs_indx].cs_type;
6010e751525SEric Saxe
6020e751525SEric Saxe switch (cs_type) {
6030e751525SEric Saxe default:
6040e751525SEric Saxe /* FALLTHROUGH */
6050e751525SEric Saxe case CPU_ACPI_C1:
6060e751525SEric Saxe (*non_deep_idle_cpu)();
6070e751525SEric Saxe break;
6080e751525SEric Saxe
6090e751525SEric Saxe case CPU_ACPI_C2:
6109aa01d98SBill Holler acpi_cpu_cstate(&cstates[cs_indx]);
6110e751525SEric Saxe break;
6120e751525SEric Saxe
6130e751525SEric Saxe case CPU_ACPI_C3:
6140e751525SEric Saxe /*
61556b56c0dSBill Holler * All supported Intel processors maintain cache coherency
61656b56c0dSBill Holler * during C3. Currently when entering C3 processors flush
61756b56c0dSBill Holler * core caches to higher level shared cache. The shared cache
61856b56c0dSBill Holler * maintains state and supports probes during C3.
61956b56c0dSBill Holler * Consequently there is no need to handle cache coherency
62056b56c0dSBill Holler * and Bus Master activity here with the cache flush, BM_RLD
62156b56c0dSBill Holler * bit, BM_STS bit, nor PM2_CNT.ARB_DIS mechanisms described
62256b56c0dSBill Holler * in section 8.1.4 of the ACPI Specification 4.0.
6230e751525SEric Saxe */
6249aa01d98SBill Holler acpi_cpu_cstate(&cstates[cs_indx]);
6250e751525SEric Saxe break;
6260e751525SEric Saxe }
6270e751525SEric Saxe
6280e751525SEric Saxe end = gethrtime_unscaled();
6290e751525SEric Saxe
6300e751525SEric Saxe /*
6310e751525SEric Saxe * Update statistics
6320e751525SEric Saxe */
6330e751525SEric Saxe cpupm_wakeup_cstate_data(cs_data, end);
6340e751525SEric Saxe }
6350e751525SEric Saxe
6360e751525SEric Saxe boolean_t
cpu_deep_cstates_supported(void)6370e751525SEric Saxe cpu_deep_cstates_supported(void)
6380e751525SEric Saxe {
6390e751525SEric Saxe extern int idle_cpu_no_deep_c;
6400e751525SEric Saxe
6410e751525SEric Saxe if (idle_cpu_no_deep_c)
6420e751525SEric Saxe return (B_FALSE);
6430e751525SEric Saxe
6440e751525SEric Saxe if (!cpuid_deep_cstates_supported())
6450e751525SEric Saxe return (B_FALSE);
6460e751525SEric Saxe
647cef70d2cSBill Holler if (cpuid_arat_supported()) {
648cef70d2cSBill Holler cpu_cstate_arat = B_TRUE;
649cef70d2cSBill Holler return (B_TRUE);
650cef70d2cSBill Holler }
651cef70d2cSBill Holler
652cef70d2cSBill Holler if ((hpet.supported == HPET_FULL_SUPPORT) &&
653cef70d2cSBill Holler hpet.install_proxy()) {
654cef70d2cSBill Holler cpu_cstate_hpet = B_TRUE;
655cef70d2cSBill Holler return (B_TRUE);
656cef70d2cSBill Holler }
6570e751525SEric Saxe
658cef70d2cSBill Holler return (B_FALSE);
6590e751525SEric Saxe }
6600e751525SEric Saxe
6610e751525SEric Saxe /*
6620e751525SEric Saxe * Validate that this processor supports deep cstate and if so,
6630e751525SEric Saxe * get the c-state data from ACPI and cache it.
6640e751525SEric Saxe */
6650e751525SEric Saxe static int
cpu_idle_init(cpu_t * cp)6660e751525SEric Saxe cpu_idle_init(cpu_t *cp)
6670e751525SEric Saxe {
6680e751525SEric Saxe cpupm_mach_state_t *mach_state =
6690e751525SEric Saxe (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state;
6700e751525SEric Saxe cpu_acpi_handle_t handle = mach_state->ms_acpi_handle;
6710e751525SEric Saxe cpu_acpi_cstate_t *cstate;
6720e751525SEric Saxe char name[KSTAT_STRLEN];
6730e751525SEric Saxe int cpu_max_cstates, i;
67400f97612SMark Haywood int ret;
6750e751525SEric Saxe
6760e751525SEric Saxe /*
6770e751525SEric Saxe * Cache the C-state specific ACPI data.
6780e751525SEric Saxe */
67900f97612SMark Haywood if ((ret = cpu_acpi_cache_cstate_data(handle)) != 0) {
68000f97612SMark Haywood if (ret < 0)
68100f97612SMark Haywood cmn_err(CE_NOTE,
68200f97612SMark Haywood "!Support for CPU deep idle states is being "
68300f97612SMark Haywood "disabled due to errors parsing ACPI C-state "
68400f97612SMark Haywood "objects exported by BIOS.");
6850e751525SEric Saxe cpu_idle_fini(cp);
6860e751525SEric Saxe return (-1);
6870e751525SEric Saxe }
6880e751525SEric Saxe
6890e751525SEric Saxe cstate = (cpu_acpi_cstate_t *)CPU_ACPI_CSTATES(handle);
6900e751525SEric Saxe
6910e751525SEric Saxe cpu_max_cstates = cpu_acpi_get_max_cstates(handle);
6920e751525SEric Saxe
6930e751525SEric Saxe for (i = CPU_ACPI_C1; i <= cpu_max_cstates; i++) {
6940e751525SEric Saxe (void) snprintf(name, KSTAT_STRLEN - 1, "c%d", cstate->cs_type);
6950e751525SEric Saxe /*
6960e751525SEric Saxe * Allocate, initialize and install cstate kstat
6970e751525SEric Saxe */
698a3114836SGerry Liu cstate->cs_ksp = kstat_create("cstate", cp->cpu_id,
6990e751525SEric Saxe name, "misc",
7000e751525SEric Saxe KSTAT_TYPE_NAMED,
7010e751525SEric Saxe sizeof (cpu_idle_kstat) / sizeof (kstat_named_t),
7020e751525SEric Saxe KSTAT_FLAG_VIRTUAL);
7030e751525SEric Saxe
7040e751525SEric Saxe if (cstate->cs_ksp == NULL) {
7050e751525SEric Saxe cmn_err(CE_NOTE, "kstat_create(c_state) fail");
7060e751525SEric Saxe } else {
7070e751525SEric Saxe cstate->cs_ksp->ks_data = &cpu_idle_kstat;
7080e751525SEric Saxe cstate->cs_ksp->ks_lock = &cpu_idle_mutex;
7090e751525SEric Saxe cstate->cs_ksp->ks_update = cpu_idle_kstat_update;
7100e751525SEric Saxe cstate->cs_ksp->ks_data_size += MAXNAMELEN;
7110e751525SEric Saxe cstate->cs_ksp->ks_private = cstate;
7120e751525SEric Saxe kstat_install(cstate->cs_ksp);
7130e751525SEric Saxe }
714a3114836SGerry Liu cstate++;
7150e751525SEric Saxe }
7160e751525SEric Saxe
7170e751525SEric Saxe cpupm_alloc_domains(cp, CPUPM_C_STATES);
7180e751525SEric Saxe cpupm_alloc_ms_cstate(cp);
7190e751525SEric Saxe
7200e751525SEric Saxe if (cpu_deep_cstates_supported()) {
72156b56c0dSBill Holler uint32_t value;
72256b56c0dSBill Holler
7230e751525SEric Saxe mutex_enter(&cpu_idle_callb_mutex);
7240e751525SEric Saxe if (cpu_deep_idle_callb_id == (callb_id_t)0)
7250e751525SEric Saxe cpu_deep_idle_callb_id = callb_add(&cpu_deep_idle_callb,
7260e751525SEric Saxe (void *)NULL, CB_CL_CPU_DEEP_IDLE, "cpu_deep_idle");
7270e751525SEric Saxe if (cpu_idle_cpr_callb_id == (callb_id_t)0)
7280e751525SEric Saxe cpu_idle_cpr_callb_id = callb_add(&cpu_idle_cpr_callb,
7290e751525SEric Saxe (void *)NULL, CB_CL_CPR_PM, "cpu_idle_cpr");
7300e751525SEric Saxe mutex_exit(&cpu_idle_callb_mutex);
73156b56c0dSBill Holler
73256b56c0dSBill Holler
73356b56c0dSBill Holler /*
73456b56c0dSBill Holler * All supported CPUs (Nehalem and later) will remain in C3
73556b56c0dSBill Holler * during Bus Master activity.
73656b56c0dSBill Holler * All CPUs set ACPI_BITREG_BUS_MASTER_RLD to 0 here if it
73756b56c0dSBill Holler * is not already 0 before enabling Deeper C-states.
73856b56c0dSBill Holler */
73956b56c0dSBill Holler cpu_acpi_get_register(ACPI_BITREG_BUS_MASTER_RLD, &value);
74056b56c0dSBill Holler if (value & 1)
74156b56c0dSBill Holler cpu_acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 0);
7420e751525SEric Saxe }
7430e751525SEric Saxe
7440e751525SEric Saxe return (0);
7450e751525SEric Saxe }
7460e751525SEric Saxe
7470e751525SEric Saxe /*
7480e751525SEric Saxe * Free resources allocated by cpu_idle_init().
7490e751525SEric Saxe */
7500e751525SEric Saxe static void
cpu_idle_fini(cpu_t * cp)7510e751525SEric Saxe cpu_idle_fini(cpu_t *cp)
7520e751525SEric Saxe {
7530e751525SEric Saxe cpupm_mach_state_t *mach_state =
7540e751525SEric Saxe (cpupm_mach_state_t *)(cp->cpu_m.mcpu_pm_mach_state);
7550e751525SEric Saxe cpu_acpi_handle_t handle = mach_state->ms_acpi_handle;
7560e751525SEric Saxe cpu_acpi_cstate_t *cstate;
7570e751525SEric Saxe uint_t cpu_max_cstates, i;
7580e751525SEric Saxe
7590e751525SEric Saxe /*
7600e751525SEric Saxe * idle cpu points back to the generic one
7610e751525SEric Saxe */
7626af9d452Saubrey.li@intel.com idle_cpu = cp->cpu_m.mcpu_idle_cpu = non_deep_idle_cpu;
7630e751525SEric Saxe disp_enq_thread = non_deep_idle_disp_enq_thread;
7640e751525SEric Saxe
7650e751525SEric Saxe cstate = (cpu_acpi_cstate_t *)CPU_ACPI_CSTATES(handle);
7660e751525SEric Saxe if (cstate) {
7670e751525SEric Saxe cpu_max_cstates = cpu_acpi_get_max_cstates(handle);
7680e751525SEric Saxe
7690e751525SEric Saxe for (i = CPU_ACPI_C1; i <= cpu_max_cstates; i++) {
7700e751525SEric Saxe if (cstate->cs_ksp != NULL)
7710e751525SEric Saxe kstat_delete(cstate->cs_ksp);
7720e751525SEric Saxe cstate++;
7730e751525SEric Saxe }
7740e751525SEric Saxe }
7750e751525SEric Saxe
7760e751525SEric Saxe cpupm_free_ms_cstate(cp);
7770e751525SEric Saxe cpupm_free_domains(&cpupm_cstate_domains);
7780e751525SEric Saxe cpu_acpi_free_cstate_data(handle);
7790e751525SEric Saxe
7800e751525SEric Saxe mutex_enter(&cpu_idle_callb_mutex);
7810e751525SEric Saxe if (cpu_deep_idle_callb_id != (callb_id_t)0) {
7820e751525SEric Saxe (void) callb_delete(cpu_deep_idle_callb_id);
7830e751525SEric Saxe cpu_deep_idle_callb_id = (callb_id_t)0;
7840e751525SEric Saxe }
7850e751525SEric Saxe if (cpu_idle_cpr_callb_id != (callb_id_t)0) {
7860e751525SEric Saxe (void) callb_delete(cpu_idle_cpr_callb_id);
7870e751525SEric Saxe cpu_idle_cpr_callb_id = (callb_id_t)0;
7880e751525SEric Saxe }
7890e751525SEric Saxe mutex_exit(&cpu_idle_callb_mutex);
7900e751525SEric Saxe }
7910e751525SEric Saxe
792a3114836SGerry Liu /*
793a3114836SGerry Liu * This function is introduced here to solve a race condition
794a3114836SGerry Liu * between the master and the slave to touch c-state data structure.
795a3114836SGerry Liu * After the slave calls this idle function to switch to the non
796a3114836SGerry Liu * deep idle function, the master can go on to reclaim the resource.
797a3114836SGerry Liu */
798a3114836SGerry Liu static void
cpu_idle_stop_sync(void)799a3114836SGerry Liu cpu_idle_stop_sync(void)
800a3114836SGerry Liu {
801a3114836SGerry Liu /* switch to the non deep idle function */
802a3114836SGerry Liu CPU->cpu_m.mcpu_idle_cpu = non_deep_idle_cpu;
803a3114836SGerry Liu }
804a3114836SGerry Liu
805444f66e7SMark Haywood static void
cpu_idle_stop(cpu_t * cp)806444f66e7SMark Haywood cpu_idle_stop(cpu_t *cp)
807444f66e7SMark Haywood {
808444f66e7SMark Haywood cpupm_mach_state_t *mach_state =
809444f66e7SMark Haywood (cpupm_mach_state_t *)(cp->cpu_m.mcpu_pm_mach_state);
810444f66e7SMark Haywood cpu_acpi_handle_t handle = mach_state->ms_acpi_handle;
811444f66e7SMark Haywood cpu_acpi_cstate_t *cstate;
812a3114836SGerry Liu uint_t cpu_max_cstates, i = 0;
813444f66e7SMark Haywood
814a3114836SGerry Liu mutex_enter(&cpu_idle_callb_mutex);
815a3114836SGerry Liu if (idle_cpu == cpu_idle_adaptive) {
816a3114836SGerry Liu /*
817a3114836SGerry Liu * invoke the slave to call synchronous idle function.
818a3114836SGerry Liu */
819a3114836SGerry Liu cp->cpu_m.mcpu_idle_cpu = cpu_idle_stop_sync;
820a3114836SGerry Liu poke_cpu(cp->cpu_id);
821a3114836SGerry Liu
822a3114836SGerry Liu /*
823a3114836SGerry Liu * wait until the slave switchs to non deep idle function,
824a3114836SGerry Liu * so that the master is safe to go on to reclaim the resource.
825a3114836SGerry Liu */
826a3114836SGerry Liu while (cp->cpu_m.mcpu_idle_cpu != non_deep_idle_cpu) {
827a3114836SGerry Liu drv_usecwait(10);
828a3114836SGerry Liu if ((++i % CPU_IDLE_STOP_TIMEOUT) == 0)
829a3114836SGerry Liu cmn_err(CE_NOTE, "!cpu_idle_stop: the slave"
830a3114836SGerry Liu " idle stop timeout");
831a3114836SGerry Liu }
832a3114836SGerry Liu }
833a3114836SGerry Liu mutex_exit(&cpu_idle_callb_mutex);
8346af9d452Saubrey.li@intel.com
835444f66e7SMark Haywood cstate = (cpu_acpi_cstate_t *)CPU_ACPI_CSTATES(handle);
836444f66e7SMark Haywood if (cstate) {
837444f66e7SMark Haywood cpu_max_cstates = cpu_acpi_get_max_cstates(handle);
838444f66e7SMark Haywood
839444f66e7SMark Haywood for (i = CPU_ACPI_C1; i <= cpu_max_cstates; i++) {
840444f66e7SMark Haywood if (cstate->cs_ksp != NULL)
841444f66e7SMark Haywood kstat_delete(cstate->cs_ksp);
842444f66e7SMark Haywood cstate++;
843444f66e7SMark Haywood }
844444f66e7SMark Haywood }
845444f66e7SMark Haywood cpupm_free_ms_cstate(cp);
846444f66e7SMark Haywood cpupm_remove_domains(cp, CPUPM_C_STATES, &cpupm_cstate_domains);
847444f66e7SMark Haywood cpu_acpi_free_cstate_data(handle);
848444f66e7SMark Haywood }
849444f66e7SMark Haywood
8500e751525SEric Saxe /*ARGSUSED*/
8510e751525SEric Saxe static boolean_t
cpu_deep_idle_callb(void * arg,int code)8520e751525SEric Saxe cpu_deep_idle_callb(void *arg, int code)
8530e751525SEric Saxe {
8540e751525SEric Saxe boolean_t rslt = B_TRUE;
8550e751525SEric Saxe
8560e751525SEric Saxe mutex_enter(&cpu_idle_callb_mutex);
8570e751525SEric Saxe switch (code) {
8580e751525SEric Saxe case PM_DEFAULT_CPU_DEEP_IDLE:
8590e751525SEric Saxe /*
8600e751525SEric Saxe * Default policy is same as enable
8610e751525SEric Saxe */
8620e751525SEric Saxe /*FALLTHROUGH*/
8630e751525SEric Saxe case PM_ENABLE_CPU_DEEP_IDLE:
8640e751525SEric Saxe if ((cpu_idle_cfg_state & CPU_IDLE_DEEP_CFG) == 0)
8650e751525SEric Saxe break;
8660e751525SEric Saxe
867cef70d2cSBill Holler if (cstate_timer_callback(PM_ENABLE_CPU_DEEP_IDLE)) {
8680e751525SEric Saxe disp_enq_thread = cstate_wakeup;
8690e751525SEric Saxe idle_cpu = cpu_idle_adaptive;
8700e751525SEric Saxe cpu_idle_cfg_state &= ~CPU_IDLE_DEEP_CFG;
8710e751525SEric Saxe } else {
8720e751525SEric Saxe rslt = B_FALSE;
8730e751525SEric Saxe }
8740e751525SEric Saxe break;
8750e751525SEric Saxe
8760e751525SEric Saxe case PM_DISABLE_CPU_DEEP_IDLE:
8770e751525SEric Saxe if (cpu_idle_cfg_state & CPU_IDLE_DEEP_CFG)
8780e751525SEric Saxe break;
8790e751525SEric Saxe
8800e751525SEric Saxe idle_cpu = non_deep_idle_cpu;
881cef70d2cSBill Holler if (cstate_timer_callback(PM_DISABLE_CPU_DEEP_IDLE)) {
8820e751525SEric Saxe disp_enq_thread = non_deep_idle_disp_enq_thread;
8830e751525SEric Saxe cpu_idle_cfg_state |= CPU_IDLE_DEEP_CFG;
8840e751525SEric Saxe }
8850e751525SEric Saxe break;
8860e751525SEric Saxe
8870e751525SEric Saxe default:
8880e751525SEric Saxe cmn_err(CE_NOTE, "!cpu deep_idle_callb: invalid code %d\n",
8890e751525SEric Saxe code);
8900e751525SEric Saxe break;
8910e751525SEric Saxe }
8920e751525SEric Saxe mutex_exit(&cpu_idle_callb_mutex);
8930e751525SEric Saxe return (rslt);
8940e751525SEric Saxe }
8950e751525SEric Saxe
8960e751525SEric Saxe /*ARGSUSED*/
8970e751525SEric Saxe static boolean_t
cpu_idle_cpr_callb(void * arg,int code)8980e751525SEric Saxe cpu_idle_cpr_callb(void *arg, int code)
8990e751525SEric Saxe {
9000e751525SEric Saxe boolean_t rslt = B_TRUE;
9010e751525SEric Saxe
9020e751525SEric Saxe mutex_enter(&cpu_idle_callb_mutex);
9030e751525SEric Saxe switch (code) {
9040e751525SEric Saxe case CB_CODE_CPR_RESUME:
905cef70d2cSBill Holler if (cstate_timer_callback(CB_CODE_CPR_RESUME)) {
9060e751525SEric Saxe /*
9070e751525SEric Saxe * Do not enable dispatcher hooks if disabled by user.
9080e751525SEric Saxe */
9090e751525SEric Saxe if (cpu_idle_cfg_state & CPU_IDLE_DEEP_CFG)
9100e751525SEric Saxe break;
9110e751525SEric Saxe
9120e751525SEric Saxe disp_enq_thread = cstate_wakeup;
9130e751525SEric Saxe idle_cpu = cpu_idle_adaptive;
9140e751525SEric Saxe } else {
9150e751525SEric Saxe rslt = B_FALSE;
9160e751525SEric Saxe }
9170e751525SEric Saxe break;
9180e751525SEric Saxe
9190e751525SEric Saxe case CB_CODE_CPR_CHKPT:
9200e751525SEric Saxe idle_cpu = non_deep_idle_cpu;
9210e751525SEric Saxe disp_enq_thread = non_deep_idle_disp_enq_thread;
922cef70d2cSBill Holler (void) cstate_timer_callback(CB_CODE_CPR_CHKPT);
9230e751525SEric Saxe break;
9240e751525SEric Saxe
9250e751525SEric Saxe default:
9260e751525SEric Saxe cmn_err(CE_NOTE, "!cpudvr cpr_callb: invalid code %d\n", code);
9270e751525SEric Saxe break;
9280e751525SEric Saxe }
9290e751525SEric Saxe mutex_exit(&cpu_idle_callb_mutex);
9300e751525SEric Saxe return (rslt);
9310e751525SEric Saxe }
9320e751525SEric Saxe
9330e751525SEric Saxe /*
9340e751525SEric Saxe * handle _CST notification
9350e751525SEric Saxe */
9360e751525SEric Saxe void
cpuidle_cstate_instance(cpu_t * cp)9370e751525SEric Saxe cpuidle_cstate_instance(cpu_t *cp)
9380e751525SEric Saxe {
9390e751525SEric Saxe #ifndef __xpv
9400e751525SEric Saxe cpupm_mach_state_t *mach_state =
9410e751525SEric Saxe (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state;
9420e751525SEric Saxe cpu_acpi_handle_t handle;
9430e751525SEric Saxe struct machcpu *mcpu;
9440e751525SEric Saxe cpuset_t dom_cpu_set;
9450e751525SEric Saxe kmutex_t *pm_lock;
9460e751525SEric Saxe int result = 0;
9470e751525SEric Saxe processorid_t cpu_id;
9480e751525SEric Saxe
9490e751525SEric Saxe if (mach_state == NULL) {
9500e751525SEric Saxe return;
9510e751525SEric Saxe }
9520e751525SEric Saxe
9530e751525SEric Saxe ASSERT(mach_state->ms_cstate.cma_domain != NULL);
9540e751525SEric Saxe dom_cpu_set = mach_state->ms_cstate.cma_domain->pm_cpus;
9550e751525SEric Saxe pm_lock = &mach_state->ms_cstate.cma_domain->pm_lock;
9560e751525SEric Saxe
9570e751525SEric Saxe /*
9580e751525SEric Saxe * Do for all the CPU's in the domain
9590e751525SEric Saxe */
9600e751525SEric Saxe mutex_enter(pm_lock);
9610e751525SEric Saxe do {
9620e751525SEric Saxe CPUSET_FIND(dom_cpu_set, cpu_id);
9630e751525SEric Saxe if (cpu_id == CPUSET_NOTINSET)
9640e751525SEric Saxe break;
9650e751525SEric Saxe
9660e751525SEric Saxe ASSERT(cpu_id >= 0 && cpu_id < NCPU);
9670e751525SEric Saxe cp = cpu[cpu_id];
9680e751525SEric Saxe mach_state = (cpupm_mach_state_t *)
9690e751525SEric Saxe cp->cpu_m.mcpu_pm_mach_state;
9700e751525SEric Saxe if (!(mach_state->ms_caps & CPUPM_C_STATES)) {
9710e751525SEric Saxe mutex_exit(pm_lock);
9720e751525SEric Saxe return;
9730e751525SEric Saxe }
9740e751525SEric Saxe handle = mach_state->ms_acpi_handle;
9750e751525SEric Saxe ASSERT(handle != NULL);
9760e751525SEric Saxe
9770e751525SEric Saxe /*
9780e751525SEric Saxe * re-evaluate cstate object
9790e751525SEric Saxe */
9800e751525SEric Saxe if (cpu_acpi_cache_cstate_data(handle) != 0) {
9810e751525SEric Saxe cmn_err(CE_WARN, "Cannot re-evaluate the cpu c-state"
9820e751525SEric Saxe " object Instance: %d", cpu_id);
9830e751525SEric Saxe }
9840e751525SEric Saxe mcpu = &(cp->cpu_m);
9850e751525SEric Saxe mcpu->max_cstates = cpu_acpi_get_max_cstates(handle);
9860e751525SEric Saxe if (mcpu->max_cstates > CPU_ACPI_C1) {
987cef70d2cSBill Holler (void) cstate_timer_callback(
988cef70d2cSBill Holler CST_EVENT_MULTIPLE_CSTATES);
9890e751525SEric Saxe disp_enq_thread = cstate_wakeup;
9900e751525SEric Saxe cp->cpu_m.mcpu_idle_cpu = cpu_acpi_idle;
9910e751525SEric Saxe } else if (mcpu->max_cstates == CPU_ACPI_C1) {
9920e751525SEric Saxe disp_enq_thread = non_deep_idle_disp_enq_thread;
9930e751525SEric Saxe cp->cpu_m.mcpu_idle_cpu = non_deep_idle_cpu;
994cef70d2cSBill Holler (void) cstate_timer_callback(CST_EVENT_ONE_CSTATE);
9950e751525SEric Saxe }
9960e751525SEric Saxe
9970e751525SEric Saxe CPUSET_ATOMIC_XDEL(dom_cpu_set, cpu_id, result);
9980e751525SEric Saxe } while (result < 0);
999444f66e7SMark Haywood mutex_exit(pm_lock);
10000e751525SEric Saxe #endif
10010e751525SEric Saxe }
10020e751525SEric Saxe
10030e751525SEric Saxe /*
10040e751525SEric Saxe * handle the number or the type of available processor power states change
10050e751525SEric Saxe */
10060e751525SEric Saxe void
cpuidle_manage_cstates(void * ctx)10070e751525SEric Saxe cpuidle_manage_cstates(void *ctx)
10080e751525SEric Saxe {
10090e751525SEric Saxe cpu_t *cp = ctx;
10100e751525SEric Saxe cpupm_mach_state_t *mach_state =
10110e751525SEric Saxe (cpupm_mach_state_t *)cp->cpu_m.mcpu_pm_mach_state;
10120e751525SEric Saxe boolean_t is_ready;
10130e751525SEric Saxe
10140e751525SEric Saxe if (mach_state == NULL) {
10150e751525SEric Saxe return;
10160e751525SEric Saxe }
10170e751525SEric Saxe
10180e751525SEric Saxe /*
10190e751525SEric Saxe * We currently refuse to power manage if the CPU is not ready to
10200e751525SEric Saxe * take cross calls (cross calls fail silently if CPU is not ready
10210e751525SEric Saxe * for it).
10220e751525SEric Saxe *
1023444f66e7SMark Haywood * Additionally, for x86 platforms we cannot power manage an instance,
1024444f66e7SMark Haywood * until it has been initialized.
10250e751525SEric Saxe */
1026444f66e7SMark Haywood is_ready = (cp->cpu_flags & CPU_READY) && cpupm_cstate_ready(cp);
10270e751525SEric Saxe if (!is_ready)
10280e751525SEric Saxe return;
10290e751525SEric Saxe
10300e751525SEric Saxe cpuidle_cstate_instance(cp);
10310e751525SEric Saxe }
1032