xref: /illumos-gate/usr/src/uts/i86pc/os/mp_pc.c (revision 86ef0a63)
1ae115bc7Smrj /*
2ae115bc7Smrj  * CDDL HEADER START
3ae115bc7Smrj  *
4ae115bc7Smrj  * The contents of this file are subject to the terms of the
5ae115bc7Smrj  * Common Development and Distribution License (the "License").
6ae115bc7Smrj  * You may not use this file except in compliance with the License.
7ae115bc7Smrj  *
8ae115bc7Smrj  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9ae115bc7Smrj  * or http://www.opensolaris.org/os/licensing.
10ae115bc7Smrj  * See the License for the specific language governing permissions
11ae115bc7Smrj  * and limitations under the License.
12ae115bc7Smrj  *
13ae115bc7Smrj  * When distributing Covered Code, include this CDDL HEADER in each
14ae115bc7Smrj  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15ae115bc7Smrj  * If applicable, add the following below this CDDL HEADER, with the
16ae115bc7Smrj  * fields enclosed by brackets "[]" replaced with your own identifying
17ae115bc7Smrj  * information: Portions Copyright [yyyy] [name of copyright owner]
18ae115bc7Smrj  *
19ae115bc7Smrj  * CDDL HEADER END
20ae115bc7Smrj  */
21ae115bc7Smrj /*
227417cfdeSKuriakose Kuruvilla  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
23ae115bc7Smrj  */
24a3114836SGerry Liu /*
25a3114836SGerry Liu  * Copyright (c) 2010, Intel Corporation.
26a3114836SGerry Liu  * All rights reserved.
27a3114836SGerry Liu  */
28f16a0f4cSRobert Mustacchi /*
29a9cc46cfSRobert Mustacchi  * Copyright 2019 Joyent, Inc.
30f16a0f4cSRobert Mustacchi  */
31ae115bc7Smrj 
32ae115bc7Smrj /*
33ae115bc7Smrj  * Welcome to the world of the "real mode platter".
34ae115bc7Smrj  * See also startup.c, mpcore.s and apic.c for related routines.
35ae115bc7Smrj  */
36ae115bc7Smrj 
37ae115bc7Smrj #include <sys/types.h>
38ae115bc7Smrj #include <sys/systm.h>
39ae115bc7Smrj #include <sys/cpuvar.h>
40a3114836SGerry Liu #include <sys/cpu_module.h>
41ae115bc7Smrj #include <sys/kmem.h>
42ae115bc7Smrj #include <sys/archsystm.h>
43ae115bc7Smrj #include <sys/machsystm.h>
44ae115bc7Smrj #include <sys/controlregs.h>
45ae115bc7Smrj #include <sys/x86_archext.h>
46ae115bc7Smrj #include <sys/smp_impldefs.h>
47ae115bc7Smrj #include <sys/sysmacros.h>
48ae115bc7Smrj #include <sys/mach_mmu.h>
49ae115bc7Smrj #include <sys/promif.h>
50ae115bc7Smrj #include <sys/cpu.h>
51a3114836SGerry Liu #include <sys/cpu_event.h>
52a3114836SGerry Liu #include <sys/sunndi.h>
53a3114836SGerry Liu #include <sys/fs/dv_node.h>
5495c0a3c8Sjosephb #include <vm/hat_i86.h>
55a3114836SGerry Liu #include <vm/as.h>
56a3114836SGerry Liu 
57a3114836SGerry Liu extern cpuset_t cpu_ready_set;
58ae115bc7Smrj 
59a3114836SGerry Liu extern int  mp_start_cpu_common(cpu_t *cp, boolean_t boot);
60a3114836SGerry Liu extern void real_mode_start_cpu(void);
61a3114836SGerry Liu extern void real_mode_start_cpu_end(void);
62a3114836SGerry Liu extern void real_mode_stop_cpu_stage1(void);
63a3114836SGerry Liu extern void real_mode_stop_cpu_stage1_end(void);
64a3114836SGerry Liu extern void real_mode_stop_cpu_stage2(void);
65a3114836SGerry Liu extern void real_mode_stop_cpu_stage2_end(void);
662df1fe9cSrandyf 
672df1fe9cSrandyf void rmp_gdt_init(rm_platter_t *);
68ae115bc7Smrj 
69ae115bc7Smrj /*
70ae115bc7Smrj  * Fill up the real mode platter to make it easy for real mode code to
71ae115bc7Smrj  * kick it off. This area should really be one passed by boot to kernel
72ae115bc7Smrj  * and guaranteed to be below 1MB and aligned to 16 bytes. Should also
73ae115bc7Smrj  * have identical physical and virtual address in paged mode.
74ae115bc7Smrj  */
75ae115bc7Smrj static ushort_t *warm_reset_vector = NULL;
76ae115bc7Smrj 
77ae115bc7Smrj int
mach_cpucontext_init(void)78ae115bc7Smrj mach_cpucontext_init(void)
79ae115bc7Smrj {
80ae115bc7Smrj 	ushort_t *vec;
81a3114836SGerry Liu 	ulong_t addr;
82a3114836SGerry Liu 	struct rm_platter *rm = (struct rm_platter *)rm_platter_va;
83ae115bc7Smrj 
84ae115bc7Smrj 	if (!(vec = (ushort_t *)psm_map_phys(WARM_RESET_VECTOR,
85ae115bc7Smrj 	    sizeof (vec), PROT_READ | PROT_WRITE)))
86ae115bc7Smrj 		return (-1);
87a3114836SGerry Liu 
88ae115bc7Smrj 	/*
89ae115bc7Smrj 	 * setup secondary cpu bios boot up vector
90a3114836SGerry Liu 	 * Write page offset to 0x467 and page frame number to 0x469.
91ae115bc7Smrj 	 */
92a3114836SGerry Liu 	addr = (ulong_t)((caddr_t)rm->rm_code - (caddr_t)rm) + rm_platter_pa;
93a3114836SGerry Liu 	vec[0] = (ushort_t)(addr & PAGEOFFSET);
94a3114836SGerry Liu 	vec[1] = (ushort_t)((addr & (0xfffff & PAGEMASK)) >> 4);
95ae115bc7Smrj 	warm_reset_vector = vec;
96ae115bc7Smrj 
97a3114836SGerry Liu 	/* Map real mode platter into kas so kernel can access it. */
98a3114836SGerry Liu 	hat_devload(kas.a_hat,
99a3114836SGerry Liu 	    (caddr_t)(uintptr_t)rm_platter_pa, MMU_PAGESIZE,
100a3114836SGerry Liu 	    btop(rm_platter_pa), PROT_READ | PROT_WRITE | PROT_EXEC,
101a3114836SGerry Liu 	    HAT_LOAD_NOCONSIST);
102a3114836SGerry Liu 
103a3114836SGerry Liu 	/* Copy CPU startup code to rm_platter if it's still during boot. */
104a3114836SGerry Liu 	if (!plat_dr_enabled()) {
105a3114836SGerry Liu 		ASSERT((size_t)real_mode_start_cpu_end -
106a3114836SGerry Liu 		    (size_t)real_mode_start_cpu <= RM_PLATTER_CODE_SIZE);
107a3114836SGerry Liu 		bcopy((caddr_t)real_mode_start_cpu, (caddr_t)rm->rm_code,
108a3114836SGerry Liu 		    (size_t)real_mode_start_cpu_end -
109a3114836SGerry Liu 		    (size_t)real_mode_start_cpu);
110a3114836SGerry Liu 	}
111ae115bc7Smrj 
112ae115bc7Smrj 	return (0);
113ae115bc7Smrj }
114ae115bc7Smrj 
115ae115bc7Smrj void
mach_cpucontext_fini(void)116ae115bc7Smrj mach_cpucontext_fini(void)
117ae115bc7Smrj {
118ae115bc7Smrj 	if (warm_reset_vector)
119ae115bc7Smrj 		psm_unmap_phys((caddr_t)warm_reset_vector,
120ae115bc7Smrj 		    sizeof (warm_reset_vector));
121ae115bc7Smrj 	hat_unload(kas.a_hat, (caddr_t)(uintptr_t)rm_platter_pa, MMU_PAGESIZE,
122ae115bc7Smrj 	    HAT_UNLOAD);
123ae115bc7Smrj }
124ae115bc7Smrj 
125ae115bc7Smrj extern void *long_mode_64(void);
126ae115bc7Smrj 
127a3114836SGerry Liu /*ARGSUSED*/
128a3114836SGerry Liu void
rmp_gdt_init(rm_platter_t * rm)129a3114836SGerry Liu rmp_gdt_init(rm_platter_t *rm)
130a3114836SGerry Liu {
131a3114836SGerry Liu 
132a3114836SGerry Liu 	/* Use the kas address space for the CPU startup thread. */
13374ecdb51SJohn Levon 	if (mmu_ptob(kas.a_hat->hat_htable->ht_pfn) > 0xffffffffUL) {
134a3114836SGerry Liu 		panic("Cannot initialize CPUs; kernel's 64-bit page tables\n"
135a3114836SGerry Liu 		    "located above 4G in physical memory (@ 0x%lx)",
13674ecdb51SJohn Levon 		    mmu_ptob(kas.a_hat->hat_htable->ht_pfn));
13774ecdb51SJohn Levon 	}
138a3114836SGerry Liu 
139a3114836SGerry Liu 	/*
140a3114836SGerry Liu 	 * Setup pseudo-descriptors for temporary GDT and IDT for use ONLY
141a3114836SGerry Liu 	 * by code in real_mode_start_cpu():
142a3114836SGerry Liu 	 *
143a3114836SGerry Liu 	 * GDT[0]:  NULL selector
144a3114836SGerry Liu 	 * GDT[1]:  64-bit CS: Long = 1, Present = 1, bits 12, 11 = 1
145a3114836SGerry Liu 	 *
146a3114836SGerry Liu 	 * Clear the IDT as interrupts will be off and a limit of 0 will cause
147a3114836SGerry Liu 	 * the CPU to triple fault and reset on an NMI, seemingly as reasonable
148a3114836SGerry Liu 	 * a course of action as any other, though it may cause the entire
149a3114836SGerry Liu 	 * platform to reset in some cases...
150a3114836SGerry Liu 	 */
151a3114836SGerry Liu 	rm->rm_temp_gdt[0] = 0;
152a3114836SGerry Liu 	rm->rm_temp_gdt[TEMPGDT_KCODE64] = 0x20980000000000ULL;
153a3114836SGerry Liu 
154a3114836SGerry Liu 	rm->rm_temp_gdt_lim = (ushort_t)(sizeof (rm->rm_temp_gdt) - 1);
155a3114836SGerry Liu 	rm->rm_temp_gdt_base = rm_platter_pa +
156a3114836SGerry Liu 	    (uint32_t)offsetof(rm_platter_t, rm_temp_gdt);
157a3114836SGerry Liu 	rm->rm_temp_idt_lim = 0;
158a3114836SGerry Liu 	rm->rm_temp_idt_base = 0;
159a3114836SGerry Liu 
160a3114836SGerry Liu 	/*
161a3114836SGerry Liu 	 * Since the CPU needs to jump to protected mode using an identity
162a3114836SGerry Liu 	 * mapped address, we need to calculate it here.
163a3114836SGerry Liu 	 */
164a3114836SGerry Liu 	rm->rm_longmode64_addr = rm_platter_pa +
165c909a41bSRichard Lowe 	    (uint32_t)((uintptr_t)long_mode_64 -
166c909a41bSRichard Lowe 	    (uintptr_t)real_mode_start_cpu);
167a3114836SGerry Liu }
168a3114836SGerry Liu 
169a3114836SGerry Liu static void *
mach_cpucontext_alloc_tables(struct cpu * cp)170a3114836SGerry Liu mach_cpucontext_alloc_tables(struct cpu *cp)
171ae115bc7Smrj {
172f16a0f4cSRobert Mustacchi 	tss_t *ntss;
173a3114836SGerry Liu 	struct cpu_tables *ct;
17474ecdb51SJohn Levon 	size_t ctsize;
175ae115bc7Smrj 
176ae115bc7Smrj 	/*
1770cfdb603Sjosephb 	 * Allocate space for stack, tss, gdt and idt. We round the size
178fb2caebeSRandy Fishel 	 * allotted for cpu_tables up, so that the TSS is on a unique page.
1790cfdb603Sjosephb 	 * This is more efficient when running in virtual machines.
180ae115bc7Smrj 	 */
18174ecdb51SJohn Levon 	ctsize = P2ROUNDUP(sizeof (*ct), PAGESIZE);
18274ecdb51SJohn Levon 	ct = kmem_zalloc(ctsize, KM_SLEEP);
1830cfdb603Sjosephb 	if ((uintptr_t)ct & PAGEOFFSET)
184a3114836SGerry Liu 		panic("mach_cpucontext_alloc_tables: cpu%d misaligned tables",
185a3114836SGerry Liu 		    cp->cpu_id);
186ae115bc7Smrj 
187ae115bc7Smrj 	ntss = cp->cpu_tss = &ct->ct_tss;
188ae115bc7Smrj 
18974ecdb51SJohn Levon 	uintptr_t va;
19074ecdb51SJohn Levon 	size_t len;
191ae115bc7Smrj 
192ae115bc7Smrj 	/*
193ae115bc7Smrj 	 * #DF (double fault).
194ae115bc7Smrj 	 */
19574ecdb51SJohn Levon 	ntss->tss_ist1 = (uintptr_t)&ct->ct_stack1[sizeof (ct->ct_stack1)];
19674ecdb51SJohn Levon 
19774ecdb51SJohn Levon 	/*
19874ecdb51SJohn Levon 	 * #NM (non-maskable interrupt)
19974ecdb51SJohn Levon 	 */
20074ecdb51SJohn Levon 	ntss->tss_ist2 = (uintptr_t)&ct->ct_stack2[sizeof (ct->ct_stack2)];
20174ecdb51SJohn Levon 
20274ecdb51SJohn Levon 	/*
20374ecdb51SJohn Levon 	 * #MC (machine check exception / hardware error)
20474ecdb51SJohn Levon 	 */
20574ecdb51SJohn Levon 	ntss->tss_ist3 = (uintptr_t)&ct->ct_stack3[sizeof (ct->ct_stack3)];
20674ecdb51SJohn Levon 
20774ecdb51SJohn Levon 	/*
20874ecdb51SJohn Levon 	 * #DB, #BP debug interrupts and KDI/kmdb
20974ecdb51SJohn Levon 	 */
21074ecdb51SJohn Levon 	ntss->tss_ist4 = (uintptr_t)&cp->cpu_m.mcpu_kpti_dbg.kf_tr_rsp;
21174ecdb51SJohn Levon 
21274ecdb51SJohn Levon 	if (kpti_enable == 1) {
21374ecdb51SJohn Levon 		/*
21474ecdb51SJohn Levon 		 * #GP, #PF, #SS fault interrupts
21574ecdb51SJohn Levon 		 */
21674ecdb51SJohn Levon 		ntss->tss_ist5 = (uintptr_t)&cp->cpu_m.mcpu_kpti_flt.kf_tr_rsp;
21774ecdb51SJohn Levon 
21874ecdb51SJohn Levon 		/*
21974ecdb51SJohn Levon 		 * Used by all other interrupts
22074ecdb51SJohn Levon 		 */
22174ecdb51SJohn Levon 		ntss->tss_ist6 = (uint64_t)&cp->cpu_m.mcpu_kpti.kf_tr_rsp;
22274ecdb51SJohn Levon 
22374ecdb51SJohn Levon 		/*
22474ecdb51SJohn Levon 		 * On AMD64 we need to make sure that all of the pages of the
22574ecdb51SJohn Levon 		 * struct cpu_tables are punched through onto the user CPU for
22674ecdb51SJohn Levon 		 * kpti.
22774ecdb51SJohn Levon 		 *
22874ecdb51SJohn Levon 		 * The final page will always be the TSS, so treat that
22974ecdb51SJohn Levon 		 * separately.
23074ecdb51SJohn Levon 		 */
23174ecdb51SJohn Levon 		for (va = (uintptr_t)ct, len = ctsize - MMU_PAGESIZE;
23274ecdb51SJohn Levon 		    len >= MMU_PAGESIZE;
23374ecdb51SJohn Levon 		    len -= MMU_PAGESIZE, va += MMU_PAGESIZE) {
23474ecdb51SJohn Levon 			/* The doublefault stack must be RW */
23574ecdb51SJohn Levon 			hati_cpu_punchin(cp, va, PROT_READ | PROT_WRITE);
23674ecdb51SJohn Levon 		}
23774ecdb51SJohn Levon 		ASSERT3U((uintptr_t)ntss, ==, va);
23874ecdb51SJohn Levon 		hati_cpu_punchin(cp, (uintptr_t)ntss, PROT_READ);
23974ecdb51SJohn Levon 	}
240ae115bc7Smrj 
241ae115bc7Smrj 
242ae115bc7Smrj 	/*
243ae115bc7Smrj 	 * Set I/O bit map offset equal to size of TSS segment limit
244ae115bc7Smrj 	 * for no I/O permission map. This will cause all user I/O
245ae115bc7Smrj 	 * instructions to generate #gp fault.
246ae115bc7Smrj 	 */
247ae115bc7Smrj 	ntss->tss_bitmapbase = sizeof (*ntss);
248ae115bc7Smrj 
249ae115bc7Smrj 	/*
250ae115bc7Smrj 	 * Setup kernel tss.
251ae115bc7Smrj 	 */
252ae115bc7Smrj 	set_syssegd((system_desc_t *)&cp->cpu_gdt[GDT_KTSS], cp->cpu_tss,
2530cfdb603Sjosephb 	    sizeof (*cp->cpu_tss) - 1, SDT_SYSTSS, SEL_KPL);
254ae115bc7Smrj 
255a3114836SGerry Liu 	return (ct);
256a3114836SGerry Liu }
257a3114836SGerry Liu 
258a3114836SGerry Liu void *
mach_cpucontext_xalloc(struct cpu * cp,int optype)259a3114836SGerry Liu mach_cpucontext_xalloc(struct cpu *cp, int optype)
260a3114836SGerry Liu {
261a3114836SGerry Liu 	size_t len;
262a3114836SGerry Liu 	struct cpu_tables *ct;
263a3114836SGerry Liu 	rm_platter_t *rm = (rm_platter_t *)rm_platter_va;
264a3114836SGerry Liu 	static int cpu_halt_code_ready;
265a3114836SGerry Liu 
266a3114836SGerry Liu 	if (optype == MACH_CPUCONTEXT_OP_STOP) {
267a3114836SGerry Liu 		ASSERT(plat_dr_enabled());
268a3114836SGerry Liu 
269a3114836SGerry Liu 		/*
270a3114836SGerry Liu 		 * The WARM_RESET_VECTOR has a limitation that the physical
271a3114836SGerry Liu 		 * address written to it must be page-aligned. To work around
272a3114836SGerry Liu 		 * this limitation, the CPU stop code has been splitted into
273a3114836SGerry Liu 		 * two stages.
274a3114836SGerry Liu 		 * The stage 2 code, which implements the real logic to halt
275a3114836SGerry Liu 		 * CPUs, is copied to the rm_cpu_halt_code field in the real
276a3114836SGerry Liu 		 * mode platter. The stage 1 code, which simply jumps to the
277a3114836SGerry Liu 		 * stage 2 code in the rm_cpu_halt_code field, is copied to
278a3114836SGerry Liu 		 * rm_code field in the real mode platter and it may be
279a3114836SGerry Liu 		 * overwritten after the CPU has been stopped.
280a3114836SGerry Liu 		 */
281a3114836SGerry Liu 		if (!cpu_halt_code_ready) {
282a3114836SGerry Liu 			/*
283a3114836SGerry Liu 			 * The rm_cpu_halt_code field in the real mode platter
284a3114836SGerry Liu 			 * is used by the CPU stop code only. So only copy the
285a3114836SGerry Liu 			 * CPU stop stage 2 code into the rm_cpu_halt_code
286a3114836SGerry Liu 			 * field on the first call.
287a3114836SGerry Liu 			 */
288a3114836SGerry Liu 			len = (size_t)real_mode_stop_cpu_stage2_end -
289a3114836SGerry Liu 			    (size_t)real_mode_stop_cpu_stage2;
290a3114836SGerry Liu 			ASSERT(len <= RM_PLATTER_CPU_HALT_CODE_SIZE);
291a3114836SGerry Liu 			bcopy((caddr_t)real_mode_stop_cpu_stage2,
292a3114836SGerry Liu 			    (caddr_t)rm->rm_cpu_halt_code, len);
293a3114836SGerry Liu 			cpu_halt_code_ready = 1;
294a3114836SGerry Liu 		}
295a3114836SGerry Liu 
296a3114836SGerry Liu 		/*
297a3114836SGerry Liu 		 * The rm_code field in the real mode platter is shared by
298a3114836SGerry Liu 		 * the CPU start, CPU stop, CPR and fast reboot code. So copy
299a3114836SGerry Liu 		 * the CPU stop stage 1 code into the rm_code field every time.
300a3114836SGerry Liu 		 */
301a3114836SGerry Liu 		len = (size_t)real_mode_stop_cpu_stage1_end -
302a3114836SGerry Liu 		    (size_t)real_mode_stop_cpu_stage1;
303a3114836SGerry Liu 		ASSERT(len <= RM_PLATTER_CODE_SIZE);
304a3114836SGerry Liu 		bcopy((caddr_t)real_mode_stop_cpu_stage1,
305a3114836SGerry Liu 		    (caddr_t)rm->rm_code, len);
306a3114836SGerry Liu 		rm->rm_cpu_halted = 0;
307a3114836SGerry Liu 
308a3114836SGerry Liu 		return (cp->cpu_m.mcpu_mach_ctx_ptr);
309a3114836SGerry Liu 	} else if (optype != MACH_CPUCONTEXT_OP_START) {
310a3114836SGerry Liu 		return (NULL);
311a3114836SGerry Liu 	}
312a3114836SGerry Liu 
313a3114836SGerry Liu 	/*
314a3114836SGerry Liu 	 * Only need to allocate tables when starting CPU.
315a3114836SGerry Liu 	 * Tables allocated when starting CPU will be reused when stopping CPU.
316a3114836SGerry Liu 	 */
317a3114836SGerry Liu 	ct = mach_cpucontext_alloc_tables(cp);
318a3114836SGerry Liu 	if (ct == NULL) {
319a3114836SGerry Liu 		return (NULL);
320a3114836SGerry Liu 	}
321a3114836SGerry Liu 
322a3114836SGerry Liu 	/* Copy CPU startup code to rm_platter for CPU hot-add operations. */
323a3114836SGerry Liu 	if (plat_dr_enabled()) {
324a3114836SGerry Liu 		bcopy((caddr_t)real_mode_start_cpu, (caddr_t)rm->rm_code,
325a3114836SGerry Liu 		    (size_t)real_mode_start_cpu_end -
326a3114836SGerry Liu 		    (size_t)real_mode_start_cpu);
327a3114836SGerry Liu 	}
328a3114836SGerry Liu 
329ae115bc7Smrj 	/*
330ae115bc7Smrj 	 * Now copy all that we've set up onto the real mode platter
331ae115bc7Smrj 	 * for the real mode code to digest as part of starting the cpu.
332ae115bc7Smrj 	 */
333ae115bc7Smrj 	rm->rm_idt_base = cp->cpu_idt;
3340cfdb603Sjosephb 	rm->rm_idt_lim = sizeof (*cp->cpu_idt) * NIDT - 1;
335ae115bc7Smrj 	rm->rm_gdt_base = cp->cpu_gdt;
3360cfdb603Sjosephb 	rm->rm_gdt_lim = sizeof (*cp->cpu_gdt) * NGDT - 1;
337ae115bc7Smrj 
338a3114836SGerry Liu 	/*
339a3114836SGerry Liu 	 * CPU needs to access kernel address space after powering on.
340a3114836SGerry Liu 	 */
34174ecdb51SJohn Levon 	rm->rm_pdbr = MAKECR3(kas.a_hat->hat_htable->ht_pfn, PCID_NONE);
342ae115bc7Smrj 	rm->rm_cpu = cp->cpu_id;
343a3114836SGerry Liu 
344a3114836SGerry Liu 	/*
34574ecdb51SJohn Levon 	 * We need to mask off any bits set on our boot CPU that can't apply
34674ecdb51SJohn Levon 	 * while the subject CPU is initializing.  If appropriate, they are
34774ecdb51SJohn Levon 	 * enabled later on.
348a3114836SGerry Liu 	 */
349ae115bc7Smrj 	rm->rm_cr4 = getcr4();
35074ecdb51SJohn Levon 	rm->rm_cr4 &= ~(CR4_MCE | CR4_PCE | CR4_PCIDE);
351ae115bc7Smrj 
3522df1fe9cSrandyf 	rmp_gdt_init(rm);
3532df1fe9cSrandyf 
3542df1fe9cSrandyf 	return (ct);
3552df1fe9cSrandyf }
3562df1fe9cSrandyf 
3572df1fe9cSrandyf void
mach_cpucontext_xfree(struct cpu * cp,void * arg,int err,int optype)358a3114836SGerry Liu mach_cpucontext_xfree(struct cpu *cp, void *arg, int err, int optype)
3592df1fe9cSrandyf {
360a3114836SGerry Liu 	struct cpu_tables *ct = arg;
3612df1fe9cSrandyf 
362a3114836SGerry Liu 	ASSERT(&ct->ct_tss == cp->cpu_tss);
363a3114836SGerry Liu 	if (optype == MACH_CPUCONTEXT_OP_START) {
364a3114836SGerry Liu 		switch (err) {
365a3114836SGerry Liu 		case 0:
366a3114836SGerry Liu 			/*
367a3114836SGerry Liu 			 * Save pointer for reuse when stopping CPU.
368a3114836SGerry Liu 			 */
369a3114836SGerry Liu 			cp->cpu_m.mcpu_mach_ctx_ptr = arg;
370a3114836SGerry Liu 			break;
371a3114836SGerry Liu 		case ETIMEDOUT:
372a3114836SGerry Liu 			/*
373a3114836SGerry Liu 			 * The processor was poked, but failed to start before
374a3114836SGerry Liu 			 * we gave up waiting for it.  In case it starts later,
375a3114836SGerry Liu 			 * don't free anything.
376a3114836SGerry Liu 			 */
377a3114836SGerry Liu 			cp->cpu_m.mcpu_mach_ctx_ptr = arg;
378a3114836SGerry Liu 			break;
379a3114836SGerry Liu 		default:
380a3114836SGerry Liu 			/*
381a3114836SGerry Liu 			 * Some other, passive, error occurred.
382a3114836SGerry Liu 			 */
383a3114836SGerry Liu 			kmem_free(ct, P2ROUNDUP(sizeof (*ct), PAGESIZE));
384a3114836SGerry Liu 			cp->cpu_tss = NULL;
385a3114836SGerry Liu 			break;
386a3114836SGerry Liu 		}
387a3114836SGerry Liu 	} else if (optype == MACH_CPUCONTEXT_OP_STOP) {
388a3114836SGerry Liu 		switch (err) {
389a3114836SGerry Liu 		case 0:
390a3114836SGerry Liu 			/*
391a3114836SGerry Liu 			 * Free resources allocated when starting CPU.
392a3114836SGerry Liu 			 */
393a3114836SGerry Liu 			kmem_free(ct, P2ROUNDUP(sizeof (*ct), PAGESIZE));
394a3114836SGerry Liu 			cp->cpu_tss = NULL;
395a3114836SGerry Liu 			cp->cpu_m.mcpu_mach_ctx_ptr = NULL;
396a3114836SGerry Liu 			break;
397a3114836SGerry Liu 		default:
398a3114836SGerry Liu 			/*
399a3114836SGerry Liu 			 * Don't touch table pointer in case of failure.
400a3114836SGerry Liu 			 */
401a3114836SGerry Liu 			break;
402a3114836SGerry Liu 		}
403a3114836SGerry Liu 	} else {
404a3114836SGerry Liu 		ASSERT(0);
405a3114836SGerry Liu 	}
406a3114836SGerry Liu }
407ae115bc7Smrj 
408a3114836SGerry Liu void *
mach_cpucontext_alloc(struct cpu * cp)409a3114836SGerry Liu mach_cpucontext_alloc(struct cpu *cp)
410a3114836SGerry Liu {
411a3114836SGerry Liu 	return (mach_cpucontext_xalloc(cp, MACH_CPUCONTEXT_OP_START));
412ae115bc7Smrj }
413ae115bc7Smrj 
414ae115bc7Smrj void
mach_cpucontext_free(struct cpu * cp,void * arg,int err)415ae115bc7Smrj mach_cpucontext_free(struct cpu *cp, void *arg, int err)
416ae115bc7Smrj {
417a3114836SGerry Liu 	mach_cpucontext_xfree(cp, arg, err, MACH_CPUCONTEXT_OP_START);
418ae115bc7Smrj }
419ae115bc7Smrj 
420ae115bc7Smrj /*
421ae115bc7Smrj  * "Enter monitor."  Called via cross-call from stop_other_cpus().
422ae115bc7Smrj  */
423*027bcc9fSToomas Soome int
mach_cpu_halt(xc_arg_t arg1,xc_arg_t arg2 __unused,xc_arg_t arg3 __unused)424*027bcc9fSToomas Soome mach_cpu_halt(xc_arg_t arg1, xc_arg_t arg2 __unused, xc_arg_t arg3 __unused)
425ae115bc7Smrj {
426*027bcc9fSToomas Soome 	char *msg = (char *)arg1;
427*027bcc9fSToomas Soome 
428ae115bc7Smrj 	if (msg)
429ae115bc7Smrj 		prom_printf("%s\n", msg);
430ae115bc7Smrj 
431ae115bc7Smrj 	/*CONSTANTCONDITION*/
432ae115bc7Smrj 	while (1)
433ae115bc7Smrj 		;
434*027bcc9fSToomas Soome 	return (0);
435ae115bc7Smrj }
436ae115bc7Smrj 
437ae115bc7Smrj void
mach_cpu_idle(void)438ae115bc7Smrj mach_cpu_idle(void)
439ae115bc7Smrj {
440a9cc46cfSRobert Mustacchi 	x86_md_clear();
441ae115bc7Smrj 	i86_halt();
442ae115bc7Smrj }
443ae115bc7Smrj 
444ae115bc7Smrj void
mach_cpu_pause(volatile char * safe)445ae115bc7Smrj mach_cpu_pause(volatile char *safe)
446ae115bc7Smrj {
447ae115bc7Smrj 	/*
448ae115bc7Smrj 	 * This cpu is now safe.
449ae115bc7Smrj 	 */
450ae115bc7Smrj 	*safe = PAUSE_WAIT;
451ae115bc7Smrj 	membar_enter(); /* make sure stores are flushed */
452ae115bc7Smrj 
453ae115bc7Smrj 	/*
454ae115bc7Smrj 	 * Now we wait.  When we are allowed to continue, safe
455ae115bc7Smrj 	 * will be set to PAUSE_IDLE.
456ae115bc7Smrj 	 */
457ae115bc7Smrj 	while (*safe != PAUSE_IDLE)
458ae115bc7Smrj 		SMT_PAUSE();
459ae115bc7Smrj }
460ae115bc7Smrj 
461ae115bc7Smrj /*
462a3114836SGerry Liu  * Power on the target CPU.
463ae115bc7Smrj  */
464ae115bc7Smrj int
mp_cpu_poweron(struct cpu * cp)465ae115bc7Smrj mp_cpu_poweron(struct cpu *cp)
466ae115bc7Smrj {
467a3114836SGerry Liu 	int error;
468a3114836SGerry Liu 	cpuset_t tempset;
469a3114836SGerry Liu 	processorid_t cpuid;
470a3114836SGerry Liu 
471a3114836SGerry Liu 	ASSERT(cp != NULL);
472a3114836SGerry Liu 	cpuid = cp->cpu_id;
473a3114836SGerry Liu 	if (use_mp == 0 || plat_dr_support_cpu() == 0) {
474a3114836SGerry Liu 		return (ENOTSUP);
475a3114836SGerry Liu 	} else if (cpuid < 0 || cpuid >= max_ncpus) {
476a3114836SGerry Liu 		return (EINVAL);
477a3114836SGerry Liu 	}
478a3114836SGerry Liu 
479a3114836SGerry Liu 	/*
480a3114836SGerry Liu 	 * The currrent x86 implementaiton of mp_cpu_configure() and
481a3114836SGerry Liu 	 * mp_cpu_poweron() have a limitation that mp_cpu_poweron() could only
482a3114836SGerry Liu 	 * be called once after calling mp_cpu_configure() for a specific CPU.
483a3114836SGerry Liu 	 * It's because mp_cpu_poweron() will destroy data structure created
484a3114836SGerry Liu 	 * by mp_cpu_configure(). So reject the request if the CPU has already
485a3114836SGerry Liu 	 * been powered on once after calling mp_cpu_configure().
486a3114836SGerry Liu 	 * This limitaiton only affects the p_online syscall and the DR driver
487a3114836SGerry Liu 	 * won't be affected because the DR driver always invoke public CPU
488a3114836SGerry Liu 	 * management interfaces in the predefined order:
489a3114836SGerry Liu 	 * cpu_configure()->cpu_poweron()...->cpu_poweroff()->cpu_unconfigure()
490a3114836SGerry Liu 	 */
491a3114836SGerry Liu 	if (cpuid_checkpass(cp, 4) || cp->cpu_thread == cp->cpu_idle_thread) {
492a3114836SGerry Liu 		return (ENOTSUP);
493a3114836SGerry Liu 	}
494a3114836SGerry Liu 
495a3114836SGerry Liu 	/*
496a3114836SGerry Liu 	 * Check if there's at least a Mbyte of kmem available
497a3114836SGerry Liu 	 * before attempting to start the cpu.
498a3114836SGerry Liu 	 */
499a3114836SGerry Liu 	if (kmem_avail() < 1024 * 1024) {
500a3114836SGerry Liu 		/*
501a3114836SGerry Liu 		 * Kick off a reap in case that helps us with
502a3114836SGerry Liu 		 * later attempts ..
503a3114836SGerry Liu 		 */
504a3114836SGerry Liu 		kmem_reap();
505a3114836SGerry Liu 		return (ENOMEM);
506a3114836SGerry Liu 	}
507a3114836SGerry Liu 
508a3114836SGerry Liu 	affinity_set(CPU->cpu_id);
509a3114836SGerry Liu 
510a3114836SGerry Liu 	/*
511a3114836SGerry Liu 	 * Start the target CPU. No need to call mach_cpucontext_fini()
512a3114836SGerry Liu 	 * if mach_cpucontext_init() fails.
513a3114836SGerry Liu 	 */
514a3114836SGerry Liu 	if ((error = mach_cpucontext_init()) == 0) {
515a3114836SGerry Liu 		error = mp_start_cpu_common(cp, B_FALSE);
516a3114836SGerry Liu 		mach_cpucontext_fini();
517a3114836SGerry Liu 	}
518a3114836SGerry Liu 	if (error != 0) {
519a3114836SGerry Liu 		affinity_clear();
520a3114836SGerry Liu 		return (error);
521a3114836SGerry Liu 	}
522a3114836SGerry Liu 
523a3114836SGerry Liu 	/* Wait for the target cpu to reach READY state. */
524a3114836SGerry Liu 	tempset = cpu_ready_set;
525a3114836SGerry Liu 	while (!CPU_IN_SET(tempset, cpuid)) {
526a3114836SGerry Liu 		delay(1);
527a3114836SGerry Liu 		tempset = *((volatile cpuset_t *)&cpu_ready_set);
528a3114836SGerry Liu 	}
529a3114836SGerry Liu 
530a3114836SGerry Liu 	/* Mark the target CPU as available for mp operation. */
531a3114836SGerry Liu 	CPUSET_ATOMIC_ADD(mp_cpus, cpuid);
532a3114836SGerry Liu 
533a3114836SGerry Liu 	/* Free the space allocated to hold the microcode file */
534a3114836SGerry Liu 	ucode_cleanup();
535a3114836SGerry Liu 
536a3114836SGerry Liu 	affinity_clear();
537a3114836SGerry Liu 
538a3114836SGerry Liu 	return (0);
539a3114836SGerry Liu }
540a3114836SGerry Liu 
541a3114836SGerry Liu #define	MP_CPU_DETACH_MAX_TRIES		5
542a3114836SGerry Liu #define	MP_CPU_DETACH_DELAY		100
543a3114836SGerry Liu 
544a3114836SGerry Liu static int
mp_cpu_detach_driver(dev_info_t * dip)545a3114836SGerry Liu mp_cpu_detach_driver(dev_info_t *dip)
546a3114836SGerry Liu {
547a3114836SGerry Liu 	int i;
548a3114836SGerry Liu 	int rv = EBUSY;
549a3114836SGerry Liu 	dev_info_t *pdip;
550a3114836SGerry Liu 
551a3114836SGerry Liu 	pdip = ddi_get_parent(dip);
552a3114836SGerry Liu 	ASSERT(pdip != NULL);
553a3114836SGerry Liu 	/*
554a3114836SGerry Liu 	 * Check if caller holds pdip busy - can cause deadlocks in
555a3114836SGerry Liu 	 * e_ddi_branch_unconfigure(), which calls devfs_clean().
556a3114836SGerry Liu 	 */
557a3114836SGerry Liu 	if (DEVI_BUSY_OWNED(pdip)) {
558a3114836SGerry Liu 		return (EDEADLOCK);
559a3114836SGerry Liu 	}
560a3114836SGerry Liu 
561a3114836SGerry Liu 	for (i = 0; i < MP_CPU_DETACH_MAX_TRIES; i++) {
562a3114836SGerry Liu 		if (e_ddi_branch_unconfigure(dip, NULL, 0) == 0) {
563a3114836SGerry Liu 			rv = 0;
564a3114836SGerry Liu 			break;
565a3114836SGerry Liu 		}
566a3114836SGerry Liu 		DELAY(MP_CPU_DETACH_DELAY);
567a3114836SGerry Liu 	}
568a3114836SGerry Liu 
569a3114836SGerry Liu 	return (rv);
570ae115bc7Smrj }
571ae115bc7Smrj 
572ae115bc7Smrj /*
573a3114836SGerry Liu  * Power off the target CPU.
574a3114836SGerry Liu  * Note: cpu_lock will be released and then reacquired.
575ae115bc7Smrj  */
576ae115bc7Smrj int
mp_cpu_poweroff(struct cpu * cp)577ae115bc7Smrj mp_cpu_poweroff(struct cpu *cp)
578ae115bc7Smrj {
579a3114836SGerry Liu 	int rv = 0;
580a3114836SGerry Liu 	void *ctx;
581a3114836SGerry Liu 	dev_info_t *dip = NULL;
582a3114836SGerry Liu 	rm_platter_t *rm = (rm_platter_t *)rm_platter_va;
583a3114836SGerry Liu 	extern void cpupm_start(cpu_t *);
584a3114836SGerry Liu 	extern void cpupm_stop(cpu_t *);
585a3114836SGerry Liu 
586a3114836SGerry Liu 	ASSERT(cp != NULL);
587a3114836SGerry Liu 	ASSERT((cp->cpu_flags & CPU_OFFLINE) != 0);
588a3114836SGerry Liu 	ASSERT((cp->cpu_flags & CPU_QUIESCED) != 0);
589a3114836SGerry Liu 
590a3114836SGerry Liu 	if (use_mp == 0 || plat_dr_support_cpu() == 0) {
591a3114836SGerry Liu 		return (ENOTSUP);
592a3114836SGerry Liu 	}
593a3114836SGerry Liu 	/*
594a3114836SGerry Liu 	 * There is no support for powering off cpu0 yet.
595a3114836SGerry Liu 	 * There are many pieces of code which have a hard dependency on cpu0.
596a3114836SGerry Liu 	 */
597a3114836SGerry Liu 	if (cp->cpu_id == 0) {
598a3114836SGerry Liu 		return (ENOTSUP);
599a3114836SGerry Liu 	};
600a3114836SGerry Liu 
601a3114836SGerry Liu 	if (mach_cpu_get_device_node(cp, &dip) != PSM_SUCCESS) {
602a3114836SGerry Liu 		return (ENXIO);
603a3114836SGerry Liu 	}
604a3114836SGerry Liu 	ASSERT(dip != NULL);
605a3114836SGerry Liu 	if (mp_cpu_detach_driver(dip) != 0) {
606a3114836SGerry Liu 		rv = EBUSY;
607a3114836SGerry Liu 		goto out_online;
608a3114836SGerry Liu 	}
609a3114836SGerry Liu 
610a3114836SGerry Liu 	/* Allocate CPU context for stopping */
611a3114836SGerry Liu 	if (mach_cpucontext_init() != 0) {
612a3114836SGerry Liu 		rv = ENXIO;
613a3114836SGerry Liu 		goto out_online;
614a3114836SGerry Liu 	}
615a3114836SGerry Liu 	ctx = mach_cpucontext_xalloc(cp, MACH_CPUCONTEXT_OP_STOP);
616a3114836SGerry Liu 	if (ctx == NULL) {
617a3114836SGerry Liu 		rv = ENXIO;
618a3114836SGerry Liu 		goto out_context_fini;
619a3114836SGerry Liu 	}
620a3114836SGerry Liu 
621a3114836SGerry Liu 	cpupm_stop(cp);
622a3114836SGerry Liu 	cpu_event_fini_cpu(cp);
623a3114836SGerry Liu 
624a3114836SGerry Liu 	if (cp->cpu_m.mcpu_cmi_hdl != NULL) {
625a3114836SGerry Liu 		cmi_fini(cp->cpu_m.mcpu_cmi_hdl);
626a3114836SGerry Liu 		cp->cpu_m.mcpu_cmi_hdl = NULL;
627a3114836SGerry Liu 	}
628a3114836SGerry Liu 
629a3114836SGerry Liu 	rv = mach_cpu_stop(cp, ctx);
630a3114836SGerry Liu 	if (rv != 0) {
631a3114836SGerry Liu 		goto out_enable_cmi;
632a3114836SGerry Liu 	}
633a3114836SGerry Liu 
634a3114836SGerry Liu 	/* Wait until the target CPU has been halted. */
635a3114836SGerry Liu 	while (*(volatile ushort_t *)&(rm->rm_cpu_halted) != 0xdead) {
636a3114836SGerry Liu 		delay(1);
637a3114836SGerry Liu 	}
638a3114836SGerry Liu 	rm->rm_cpu_halted = 0xffff;
639a3114836SGerry Liu 
640a3114836SGerry Liu 	/* CPU_READY has been cleared by mach_cpu_stop. */
641a3114836SGerry Liu 	ASSERT((cp->cpu_flags & CPU_READY) == 0);
642a3114836SGerry Liu 	ASSERT((cp->cpu_flags & CPU_RUNNING) == 0);
643a3114836SGerry Liu 	cp->cpu_flags = CPU_OFFLINE | CPU_QUIESCED | CPU_POWEROFF;
644a3114836SGerry Liu 	CPUSET_ATOMIC_DEL(mp_cpus, cp->cpu_id);
645a3114836SGerry Liu 
646a3114836SGerry Liu 	mach_cpucontext_xfree(cp, ctx, 0, MACH_CPUCONTEXT_OP_STOP);
647a3114836SGerry Liu 	mach_cpucontext_fini();
648a3114836SGerry Liu 
649a3114836SGerry Liu 	return (0);
650a3114836SGerry Liu 
651a3114836SGerry Liu out_enable_cmi:
652a3114836SGerry Liu 	{
653a3114836SGerry Liu 		cmi_hdl_t hdl;
654a3114836SGerry Liu 
655a3114836SGerry Liu 		if ((hdl = cmi_init(CMI_HDL_NATIVE, cmi_ntv_hwchipid(cp),
656a3114836SGerry Liu 		    cmi_ntv_hwcoreid(cp), cmi_ntv_hwstrandid(cp))) != NULL) {
6577417cfdeSKuriakose Kuruvilla 			if (is_x86_feature(x86_featureset, X86FSET_MCA))
658a3114836SGerry Liu 				cmi_mca_init(hdl);
659a3114836SGerry Liu 			cp->cpu_m.mcpu_cmi_hdl = hdl;
660a3114836SGerry Liu 		}
661a3114836SGerry Liu 	}
662a3114836SGerry Liu 	cpu_event_init_cpu(cp);
663a3114836SGerry Liu 	cpupm_start(cp);
664a3114836SGerry Liu 	mach_cpucontext_xfree(cp, ctx, rv, MACH_CPUCONTEXT_OP_STOP);
665a3114836SGerry Liu 
666a3114836SGerry Liu out_context_fini:
667a3114836SGerry Liu 	mach_cpucontext_fini();
668a3114836SGerry Liu 
669a3114836SGerry Liu out_online:
670a3114836SGerry Liu 	(void) e_ddi_branch_configure(dip, NULL, 0);
671a3114836SGerry Liu 
672a3114836SGerry Liu 	if (rv != EAGAIN && rv != ETIME) {
673a3114836SGerry Liu 		rv = ENXIO;
674a3114836SGerry Liu 	}
675a3114836SGerry Liu 
676a3114836SGerry Liu 	return (rv);
677ae115bc7Smrj }
678b9bc7f78Ssmaybe 
679b9bc7f78Ssmaybe /*
680b9bc7f78Ssmaybe  * Return vcpu state, since this could be a virtual environment that we
681b9bc7f78Ssmaybe  * are unaware of, return "unknown".
682b9bc7f78Ssmaybe  */
683b9bc7f78Ssmaybe /* ARGSUSED */
684b9bc7f78Ssmaybe int
vcpu_on_pcpu(processorid_t cpu)685b9bc7f78Ssmaybe vcpu_on_pcpu(processorid_t cpu)
686b9bc7f78Ssmaybe {
687b9bc7f78Ssmaybe 	return (VCPU_STATE_UNKNOWN);
688b9bc7f78Ssmaybe }
689