1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25 /*
26  * Copyright 2021 Joyent, Inc.
27  * Copyright (c) 2016, 2017 by Delphix. All rights reserved.
28  * Copyright 2019 Joshua M. Clulow <josh@sysmgr.org>
29  */
30 
31 /*
32  * PSMI 1.1 extensions are supported only in 2.6 and later versions.
33  * PSMI 1.2 extensions are supported only in 2.7 and later versions.
34  * PSMI 1.3 and 1.4 extensions are supported in Solaris 10.
35  * PSMI 1.5 extensions are supported in Solaris Nevada.
36  * PSMI 1.6 extensions are supported in Solaris Nevada.
37  * PSMI 1.7 extensions are supported in Solaris Nevada.
38  */
39 #define	PSMI_1_7
40 
41 #include <sys/processor.h>
42 #include <sys/time.h>
43 #include <sys/psm.h>
44 #include <sys/smp_impldefs.h>
45 #include <sys/cram.h>
46 #include <sys/acpi/acpi.h>
47 #include <sys/acpica.h>
48 #include <sys/psm_common.h>
49 #include <sys/apic.h>
50 #include <sys/pit.h>
51 #include <sys/ddi.h>
52 #include <sys/sunddi.h>
53 #include <sys/ddi_impldefs.h>
54 #include <sys/pci.h>
55 #include <sys/promif.h>
56 #include <sys/x86_archext.h>
57 #include <sys/cpc_impl.h>
58 #include <sys/uadmin.h>
59 #include <sys/panic.h>
60 #include <sys/debug.h>
61 #include <sys/archsystm.h>
62 #include <sys/trap.h>
63 #include <sys/machsystm.h>
64 #include <sys/sysmacros.h>
65 #include <sys/cpuvar.h>
66 #include <sys/rm_platter.h>
67 #include <sys/privregs.h>
68 #include <sys/note.h>
69 #include <sys/pci_intr_lib.h>
70 #include <sys/spl.h>
71 #include <sys/clock.h>
72 #include <sys/dditypes.h>
73 #include <sys/sunddi.h>
74 #include <sys/x_call.h>
75 #include <sys/reboot.h>
76 #include <sys/hpet.h>
77 #include <sys/apic_common.h>
78 #include <sys/apic_timer.h>
79 #include <sys/tsc.h>
80 
81 static void	apic_record_ioapic_rdt(void *intrmap_private,
82 		    ioapic_rdt_t *irdt);
83 static void	apic_record_msi(void *intrmap_private, msi_regs_t *mregs);
84 
85 /*
86  * Common routines between pcplusmp & apix (taken from apic.c).
87  */
88 
89 int	apic_clkinit(int);
90 hrtime_t apic_gethrtime(void);
91 void	apic_send_ipi(int, int);
92 void	apic_set_idlecpu(processorid_t);
93 void	apic_unset_idlecpu(processorid_t);
94 void	apic_shutdown(int, int);
95 void	apic_preshutdown(int, int);
96 processorid_t	apic_get_next_processorid(processorid_t);
97 
98 hrtime_t apic_gettime();
99 
100 enum apic_ioapic_method_type apix_mul_ioapic_method = APIC_MUL_IOAPIC_PCPLUSMP;
101 
102 /* Now the ones for Dynamic Interrupt distribution */
103 int	apic_enable_dynamic_migration = 0;
104 
105 /* maximum loop count when sending Start IPIs. */
106 int apic_sipi_max_loop_count = 0x1000;
107 
108 /*
109  * These variables are frequently accessed in apic_intr_enter(),
110  * apic_intr_exit and apic_setspl, so group them together
111  */
112 volatile uint32_t *apicadr =  NULL;	/* virtual addr of local APIC	*/
113 int apic_setspl_delay = 1;		/* apic_setspl - delay enable	*/
114 int apic_clkvect;
115 
116 /* vector at which error interrupts come in */
117 int apic_errvect;
118 int apic_enable_error_intr = 1;
119 int apic_error_display_delay = 100;
120 
121 /* vector at which performance counter overflow interrupts come in */
122 int apic_cpcovf_vect;
123 int apic_enable_cpcovf_intr = 1;
124 
125 /* vector at which CMCI interrupts come in */
126 int apic_cmci_vect;
127 extern void cmi_cmci_trap(void);
128 
129 lock_t apic_mode_switch_lock;
130 
131 int apic_pir_vect;
132 
133 /*
134  * Patchable global variables.
135  */
136 int	apic_forceload = 0;
137 
138 int	apic_coarse_hrtime = 1;		/* 0 - use accurate slow gethrtime() */
139 
140 int	apic_flat_model = 0;		/* 0 - clustered. 1 - flat */
141 int	apic_panic_on_nmi = 0;
142 int	apic_panic_on_apic_error = 0;
143 
144 int	apic_verbose = 0;	/* 0x1ff */
145 
146 /* If set, force APIC calibration to use the PIT instead of the TSC */
147 int	apic_calibrate_use_pit = 0;
148 
149 /*
150  * It was found empirically that 5 measurements seem sufficient to give a good
151  * accuracy. Most spurious measurements are higher than the target value thus
152  * we eliminate up to 2/5 spurious measurements.
153  */
154 #define	APIC_CALIBRATE_MEASUREMENTS		5
155 
156 #define	APIC_CALIBRATE_PERCENT_OFF_WARNING	10
157 
158 extern int pit_is_broken; /* from tscc_pit.c */
159 
160 uint64_t apic_info_tsc[APIC_CALIBRATE_MEASUREMENTS];
161 uint64_t apic_info_pit[APIC_CALIBRATE_MEASUREMENTS];
162 
163 #ifdef DEBUG
164 int	apic_debug = 0;
165 int	apic_restrict_vector = 0;
166 
167 int	apic_debug_msgbuf[APIC_DEBUG_MSGBUFSIZE];
168 int	apic_debug_msgbufindex = 0;
169 
170 #endif /* DEBUG */
171 
172 uint_t apic_nticks = 0;
173 uint_t apic_skipped_redistribute = 0;
174 
175 uint_t last_count_read = 0;
176 lock_t	apic_gethrtime_lock;
177 volatile int	apic_hrtime_stamp = 0;
178 volatile hrtime_t apic_nsec_since_boot = 0;
179 
180 static	hrtime_t	apic_last_hrtime = 0;
181 int		apic_hrtime_error = 0;
182 int		apic_remote_hrterr = 0;
183 int		apic_num_nmis = 0;
184 int		apic_apic_error = 0;
185 int		apic_num_apic_errors = 0;
186 int		apic_num_cksum_errors = 0;
187 
188 int	apic_error = 0;
189 
190 static	int	apic_cmos_ssb_set = 0;
191 
192 /* use to make sure only one cpu handles the nmi */
193 lock_t	apic_nmi_lock;
194 /* use to make sure only one cpu handles the error interrupt */
195 lock_t	apic_error_lock;
196 
197 static	struct {
198 	uchar_t	cntl;
199 	uchar_t	data;
200 } aspen_bmc[] = {
201 	{ CC_SMS_WR_START,	0x18 },		/* NetFn/LUN */
202 	{ CC_SMS_WR_NEXT,	0x24 },		/* Cmd SET_WATCHDOG_TIMER */
203 	{ CC_SMS_WR_NEXT,	0x84 },		/* DataByte 1: SMS/OS no log */
204 	{ CC_SMS_WR_NEXT,	0x2 },		/* DataByte 2: Power Down */
205 	{ CC_SMS_WR_NEXT,	0x0 },		/* DataByte 3: no pre-timeout */
206 	{ CC_SMS_WR_NEXT,	0x0 },		/* DataByte 4: timer expir. */
207 	{ CC_SMS_WR_NEXT,	0xa },		/* DataByte 5: init countdown */
208 	{ CC_SMS_WR_END,	0x0 },		/* DataByte 6: init countdown */
209 
210 	{ CC_SMS_WR_START,	0x18 },		/* NetFn/LUN */
211 	{ CC_SMS_WR_END,	0x22 }		/* Cmd RESET_WATCHDOG_TIMER */
212 };
213 
214 static	struct {
215 	int	port;
216 	uchar_t	data;
217 } sitka_bmc[] = {
218 	{ SMS_COMMAND_REGISTER,	SMS_WRITE_START },
219 	{ SMS_DATA_REGISTER,	0x18 },		/* NetFn/LUN */
220 	{ SMS_DATA_REGISTER,	0x24 },		/* Cmd SET_WATCHDOG_TIMER */
221 	{ SMS_DATA_REGISTER,	0x84 },		/* DataByte 1: SMS/OS no log */
222 	{ SMS_DATA_REGISTER,	0x2 },		/* DataByte 2: Power Down */
223 	{ SMS_DATA_REGISTER,	0x0 },		/* DataByte 3: no pre-timeout */
224 	{ SMS_DATA_REGISTER,	0x0 },		/* DataByte 4: timer expir. */
225 	{ SMS_DATA_REGISTER,	0xa },		/* DataByte 5: init countdown */
226 	{ SMS_COMMAND_REGISTER,	SMS_WRITE_END },
227 	{ SMS_DATA_REGISTER,	0x0 },		/* DataByte 6: init countdown */
228 
229 	{ SMS_COMMAND_REGISTER,	SMS_WRITE_START },
230 	{ SMS_DATA_REGISTER,	0x18 },		/* NetFn/LUN */
231 	{ SMS_COMMAND_REGISTER,	SMS_WRITE_END },
232 	{ SMS_DATA_REGISTER,	0x22 }		/* Cmd RESET_WATCHDOG_TIMER */
233 };
234 
235 /* Patchable global variables. */
236 int		apic_kmdb_on_nmi = 0;		/* 0 - no, 1 - yes enter kmdb */
237 uint32_t	apic_divide_reg_init = 0;	/* 0 - divide by 2 */
238 
239 /* default apic ops without interrupt remapping */
240 static apic_intrmap_ops_t apic_nointrmap_ops = {
241 	(int (*)(int))return_instr,
242 	(void (*)(int))return_instr,
243 	(void (*)(void **, dev_info_t *, uint16_t, int, uchar_t))return_instr,
244 	(void (*)(void *, void *, uint16_t, int))return_instr,
245 	(void (*)(void **))return_instr,
246 	apic_record_ioapic_rdt,
247 	apic_record_msi,
248 };
249 
250 apic_intrmap_ops_t *apic_vt_ops = &apic_nointrmap_ops;
251 apic_cpus_info_t	*apic_cpus = NULL;
252 cpuset_t	apic_cpumask;
253 uint_t		apic_picinit_called;
254 
255 /* Flag to indicate that we need to shut down all processors */
256 static uint_t	apic_shutdown_processors;
257 
258 /*
259  * Probe the ioapic method for apix module. Called in apic_probe_common()
260  */
261 int
apic_ioapic_method_probe()262 apic_ioapic_method_probe()
263 {
264 	if (apix_enable == 0)
265 		return (PSM_SUCCESS);
266 
267 	/*
268 	 * Set IOAPIC EOI handling method. The priority from low to high is:
269 	 *	1. IOxAPIC: with EOI register
270 	 *	2. IOMMU interrupt mapping
271 	 *	3. Mask-Before-EOI method for systems without boot
272 	 *	interrupt routing, such as systems with only one IOAPIC;
273 	 *	NVIDIA CK8-04/MCP55 systems; systems with bridge solution
274 	 *	which disables the boot interrupt routing already.
275 	 *	4. Directed EOI
276 	 */
277 	if (apic_io_ver[0] >= 0x20)
278 		apix_mul_ioapic_method = APIC_MUL_IOAPIC_IOXAPIC;
279 	if ((apic_io_max == 1) || (apic_nvidia_io_max == apic_io_max))
280 		apix_mul_ioapic_method = APIC_MUL_IOAPIC_MASK;
281 	if (apic_directed_EOI_supported())
282 		apix_mul_ioapic_method = APIC_MUL_IOAPIC_DEOI;
283 
284 	/* fall back to pcplusmp */
285 	if (apix_mul_ioapic_method == APIC_MUL_IOAPIC_PCPLUSMP) {
286 		/* make sure apix is after pcplusmp in /etc/mach */
287 		apix_enable = 0; /* go ahead with pcplusmp install next */
288 		return (PSM_FAILURE);
289 	}
290 
291 	return (PSM_SUCCESS);
292 }
293 
294 /*
295  * handler for APIC Error interrupt. Just print a warning and continue
296  */
297 int
apic_error_intr()298 apic_error_intr()
299 {
300 	uint_t	error0, error1, error;
301 	uint_t	i;
302 
303 	/*
304 	 * We need to write before read as per 7.4.17 of system prog manual.
305 	 * We do both and or the results to be safe
306 	 */
307 	error0 = apic_reg_ops->apic_read(APIC_ERROR_STATUS);
308 	apic_reg_ops->apic_write(APIC_ERROR_STATUS, 0);
309 	error1 = apic_reg_ops->apic_read(APIC_ERROR_STATUS);
310 	error = error0 | error1;
311 
312 	/*
313 	 * Clear the APIC error status (do this on all cpus that enter here)
314 	 * (two writes are required due to the semantics of accessing the
315 	 * error status register.)
316 	 */
317 	apic_reg_ops->apic_write(APIC_ERROR_STATUS, 0);
318 	apic_reg_ops->apic_write(APIC_ERROR_STATUS, 0);
319 
320 	/*
321 	 * Prevent more than 1 CPU from handling error interrupt causing
322 	 * double printing (interleave of characters from multiple
323 	 * CPU's when using prom_printf)
324 	 */
325 	if (lock_try(&apic_error_lock) == 0)
326 		return (error ? DDI_INTR_CLAIMED : DDI_INTR_UNCLAIMED);
327 	if (error) {
328 #if	DEBUG
329 		if (apic_debug)
330 			debug_enter("pcplusmp: APIC Error interrupt received");
331 #endif /* DEBUG */
332 		if (apic_panic_on_apic_error)
333 			cmn_err(CE_PANIC,
334 			    "APIC Error interrupt on CPU %d. Status = %x",
335 			    psm_get_cpu_id(), error);
336 		else {
337 			if ((error & ~APIC_CS_ERRORS) == 0) {
338 				/* cksum error only */
339 				apic_error |= APIC_ERR_APIC_ERROR;
340 				apic_apic_error |= error;
341 				apic_num_apic_errors++;
342 				apic_num_cksum_errors++;
343 			} else {
344 				/*
345 				 * prom_printf is the best shot we have of
346 				 * something which is problem free from
347 				 * high level/NMI type of interrupts
348 				 */
349 				prom_printf("APIC Error interrupt on CPU %d. "
350 				    "Status 0 = %x, Status 1 = %x\n",
351 				    psm_get_cpu_id(), error0, error1);
352 				apic_error |= APIC_ERR_APIC_ERROR;
353 				apic_apic_error |= error;
354 				apic_num_apic_errors++;
355 				for (i = 0; i < apic_error_display_delay; i++) {
356 					tenmicrosec();
357 				}
358 				/*
359 				 * provide more delay next time limited to
360 				 * roughly 1 clock tick time
361 				 */
362 				if (apic_error_display_delay < 500)
363 					apic_error_display_delay *= 2;
364 			}
365 		}
366 		lock_clear(&apic_error_lock);
367 		return (DDI_INTR_CLAIMED);
368 	} else {
369 		lock_clear(&apic_error_lock);
370 		return (DDI_INTR_UNCLAIMED);
371 	}
372 }
373 
374 /*
375  * Turn off the mask bit in the performance counter Local Vector Table entry.
376  */
377 void
apic_cpcovf_mask_clear(void)378 apic_cpcovf_mask_clear(void)
379 {
380 	apic_reg_ops->apic_write(APIC_PCINT_VECT,
381 	    (apic_reg_ops->apic_read(APIC_PCINT_VECT) & ~APIC_LVT_MASK));
382 }
383 
384 static int
apic_cmci_enable(xc_arg_t arg1 __unused,xc_arg_t arg2 __unused,xc_arg_t arg3 __unused)385 apic_cmci_enable(xc_arg_t arg1 __unused, xc_arg_t arg2 __unused,
386     xc_arg_t arg3 __unused)
387 {
388 	apic_reg_ops->apic_write(APIC_CMCI_VECT, apic_cmci_vect);
389 	return (0);
390 }
391 
392 static int
apic_cmci_disable(xc_arg_t arg1 __unused,xc_arg_t arg2 __unused,xc_arg_t arg3 __unused)393 apic_cmci_disable(xc_arg_t arg1 __unused, xc_arg_t arg2 __unused,
394     xc_arg_t arg3 __unused)
395 {
396 	apic_reg_ops->apic_write(APIC_CMCI_VECT, apic_cmci_vect | AV_MASK);
397 	return (0);
398 }
399 
400 void
apic_cmci_setup(processorid_t cpuid,boolean_t enable)401 apic_cmci_setup(processorid_t cpuid, boolean_t enable)
402 {
403 	cpuset_t	cpu_set;
404 
405 	CPUSET_ONLY(cpu_set, cpuid);
406 
407 	if (enable) {
408 		xc_call(0, 0, 0, CPUSET2BV(cpu_set),
409 		    (xc_func_t)apic_cmci_enable);
410 	} else {
411 		xc_call(0, 0, 0, CPUSET2BV(cpu_set),
412 		    (xc_func_t)apic_cmci_disable);
413 	}
414 }
415 
416 static void
apic_disable_local_apic(void)417 apic_disable_local_apic(void)
418 {
419 	apic_reg_ops->apic_write_task_reg(APIC_MASK_ALL);
420 	apic_reg_ops->apic_write(APIC_LOCAL_TIMER, AV_MASK);
421 
422 	/* local intr reg 0 */
423 	apic_reg_ops->apic_write(APIC_INT_VECT0, AV_MASK);
424 
425 	/* disable NMI */
426 	apic_reg_ops->apic_write(APIC_INT_VECT1, AV_MASK);
427 
428 	/* and error interrupt */
429 	apic_reg_ops->apic_write(APIC_ERR_VECT, AV_MASK);
430 
431 	/* and perf counter intr */
432 	apic_reg_ops->apic_write(APIC_PCINT_VECT, AV_MASK);
433 
434 	apic_reg_ops->apic_write(APIC_SPUR_INT_REG, APIC_SPUR_INTR);
435 }
436 
437 static void
apic_cpu_send_SIPI(processorid_t cpun,boolean_t start)438 apic_cpu_send_SIPI(processorid_t cpun, boolean_t start)
439 {
440 	int		loop_count;
441 	uint32_t	vector;
442 	uint_t		apicid;
443 	ulong_t		iflag;
444 
445 	apicid =  apic_cpus[cpun].aci_local_id;
446 
447 	/*
448 	 * Interrupts on current CPU will be disabled during the
449 	 * steps in order to avoid unwanted side effects from
450 	 * executing interrupt handlers on a problematic BIOS.
451 	 */
452 	iflag = intr_clear();
453 
454 	if (start) {
455 		outb(CMOS_ADDR, SSB);
456 		outb(CMOS_DATA, BIOS_SHUTDOWN);
457 	}
458 
459 	/*
460 	 * According to X2APIC specification in section '2.3.5.1' of
461 	 * Interrupt Command Register Semantics, the semantics of
462 	 * programming the Interrupt Command Register to dispatch an interrupt
463 	 * is simplified. A single MSR write to the 64-bit ICR is required
464 	 * for dispatching an interrupt. Specifically, with the 64-bit MSR
465 	 * interface to ICR, system software is not required to check the
466 	 * status of the delivery status bit prior to writing to the ICR
467 	 * to send an IPI. With the removal of the Delivery Status bit,
468 	 * system software no longer has a reason to read the ICR. It remains
469 	 * readable only to aid in debugging.
470 	 */
471 #ifdef	DEBUG
472 	APIC_AV_PENDING_SET();
473 #else
474 	if (apic_mode == LOCAL_APIC) {
475 		APIC_AV_PENDING_SET();
476 	}
477 #endif /* DEBUG */
478 
479 	/* for integrated - make sure there is one INIT IPI in buffer */
480 	/* for external - it will wake up the cpu */
481 	apic_reg_ops->apic_write_int_cmd(apicid, AV_ASSERT | AV_RESET);
482 
483 	/* If only 1 CPU is installed, PENDING bit will not go low */
484 	for (loop_count = apic_sipi_max_loop_count; loop_count; loop_count--) {
485 		if (apic_mode == LOCAL_APIC &&
486 		    apic_reg_ops->apic_read(APIC_INT_CMD1) & AV_PENDING)
487 			apic_ret();
488 		else
489 			break;
490 	}
491 
492 	apic_reg_ops->apic_write_int_cmd(apicid, AV_DEASSERT | AV_RESET);
493 	drv_usecwait(20000);		/* 20 milli sec */
494 
495 	if (apic_cpus[cpun].aci_local_ver >= APIC_INTEGRATED_VERS) {
496 		/* integrated apic */
497 
498 		vector = (rm_platter_pa >> MMU_PAGESHIFT) &
499 		    (APIC_VECTOR_MASK | APIC_IPL_MASK);
500 
501 		/* to offset the INIT IPI queue up in the buffer */
502 		apic_reg_ops->apic_write_int_cmd(apicid, vector | AV_STARTUP);
503 		drv_usecwait(200);		/* 20 micro sec */
504 
505 		/*
506 		 * send the second SIPI (Startup IPI) as recommended by Intel
507 		 * software development manual.
508 		 */
509 		apic_reg_ops->apic_write_int_cmd(apicid, vector | AV_STARTUP);
510 		drv_usecwait(200);	/* 20 micro sec */
511 	}
512 
513 	intr_restore(iflag);
514 }
515 
516 /*ARGSUSED1*/
517 int
apic_cpu_start(processorid_t cpun,caddr_t arg __unused)518 apic_cpu_start(processorid_t cpun, caddr_t arg __unused)
519 {
520 	ASSERT(MUTEX_HELD(&cpu_lock));
521 
522 	if (!apic_cpu_in_range(cpun)) {
523 		return (EINVAL);
524 	}
525 
526 	/*
527 	 * Switch to apic_common_send_ipi for safety during starting other CPUs.
528 	 */
529 	if (apic_mode == LOCAL_X2APIC) {
530 		apic_switch_ipi_callback(B_TRUE);
531 	}
532 
533 	apic_cmos_ssb_set = 1;
534 	apic_cpu_send_SIPI(cpun, B_TRUE);
535 
536 	return (0);
537 }
538 
539 /*
540  * Put CPU into halted state with interrupts disabled.
541  */
542 /*ARGSUSED1*/
543 int
apic_cpu_stop(processorid_t cpun,caddr_t arg __unused)544 apic_cpu_stop(processorid_t cpun, caddr_t arg __unused)
545 {
546 	int		rc;
547 	cpu_t		*cp;
548 	extern cpuset_t cpu_ready_set;
549 	extern void cpu_idle_intercept_cpu(cpu_t *cp);
550 
551 	ASSERT(MUTEX_HELD(&cpu_lock));
552 
553 	if (!apic_cpu_in_range(cpun)) {
554 		return (EINVAL);
555 	}
556 	if (apic_cpus[cpun].aci_local_ver < APIC_INTEGRATED_VERS) {
557 		return (ENOTSUP);
558 	}
559 
560 	cp = cpu_get(cpun);
561 	ASSERT(cp != NULL);
562 	ASSERT((cp->cpu_flags & CPU_OFFLINE) != 0);
563 	ASSERT((cp->cpu_flags & CPU_QUIESCED) != 0);
564 	ASSERT((cp->cpu_flags & CPU_ENABLE) == 0);
565 
566 	/* Clear CPU_READY flag to disable cross calls. */
567 	cp->cpu_flags &= ~CPU_READY;
568 	CPUSET_ATOMIC_DEL(cpu_ready_set, cpun);
569 	rc = xc_flush_cpu(cp);
570 	if (rc != 0) {
571 		CPUSET_ATOMIC_ADD(cpu_ready_set, cpun);
572 		cp->cpu_flags |= CPU_READY;
573 		return (rc);
574 	}
575 
576 	/* Intercept target CPU at a safe point before powering it off. */
577 	cpu_idle_intercept_cpu(cp);
578 
579 	apic_cpu_send_SIPI(cpun, B_FALSE);
580 	cp->cpu_flags &= ~CPU_RUNNING;
581 
582 	return (0);
583 }
584 
585 int
apic_cpu_ops(psm_cpu_request_t * reqp)586 apic_cpu_ops(psm_cpu_request_t *reqp)
587 {
588 	if (reqp == NULL) {
589 		return (EINVAL);
590 	}
591 
592 	switch (reqp->pcr_cmd) {
593 	case PSM_CPU_ADD:
594 		return (apic_cpu_add(reqp));
595 
596 	case PSM_CPU_REMOVE:
597 		return (apic_cpu_remove(reqp));
598 
599 	case PSM_CPU_STOP:
600 		return (apic_cpu_stop(reqp->req.cpu_stop.cpuid,
601 		    reqp->req.cpu_stop.ctx));
602 
603 	default:
604 		return (ENOTSUP);
605 	}
606 }
607 
608 #ifdef	DEBUG
609 int	apic_break_on_cpu = 9;
610 int	apic_stretch_interrupts = 0;
611 int	apic_stretch_ISR = 1 << 3;	/* IPL of 3 matches nothing now */
612 #endif /* DEBUG */
613 
614 /*
615  * generates an interprocessor interrupt to another CPU. Any changes made to
616  * this routine must be accompanied by similar changes to
617  * apic_common_send_ipi().
618  */
619 void
apic_send_ipi(int cpun,int ipl)620 apic_send_ipi(int cpun, int ipl)
621 {
622 	int vector;
623 	ulong_t flag;
624 
625 	vector = apic_resv_vector[ipl];
626 
627 	ASSERT((vector >= APIC_BASE_VECT) && (vector <= APIC_SPUR_INTR));
628 
629 	flag = intr_clear();
630 
631 	APIC_AV_PENDING_SET();
632 
633 	apic_reg_ops->apic_write_int_cmd(apic_cpus[cpun].aci_local_id,
634 	    vector);
635 
636 	intr_restore(flag);
637 }
638 
639 void
apic_send_pir_ipi(processorid_t cpun)640 apic_send_pir_ipi(processorid_t cpun)
641 {
642 	const int vector = apic_pir_vect;
643 	ulong_t flag;
644 
645 	ASSERT((vector >= APIC_BASE_VECT) && (vector <= APIC_SPUR_INTR));
646 
647 	flag = intr_clear();
648 
649 	/* Self-IPI for inducing PIR makes no sense. */
650 	if ((cpun != psm_get_cpu_id())) {
651 		APIC_AV_PENDING_SET();
652 		apic_reg_ops->apic_write_int_cmd(apic_cpus[cpun].aci_local_id,
653 		    vector);
654 	}
655 
656 	intr_restore(flag);
657 }
658 
659 int
apic_get_pir_ipivect(void)660 apic_get_pir_ipivect(void)
661 {
662 	return (apic_pir_vect);
663 }
664 
665 void
apic_set_idlecpu(processorid_t cpun __unused)666 apic_set_idlecpu(processorid_t cpun __unused)
667 {
668 }
669 
670 void
apic_unset_idlecpu(processorid_t cpun __unused)671 apic_unset_idlecpu(processorid_t cpun __unused)
672 {
673 }
674 
675 
676 void
apic_ret()677 apic_ret()
678 {
679 }
680 
681 /*
682  * If apic_coarse_time == 1, then apic_gettime() is used instead of
683  * apic_gethrtime().  This is used for performance instead of accuracy.
684  */
685 
686 hrtime_t
apic_gettime()687 apic_gettime()
688 {
689 	int old_hrtime_stamp;
690 	hrtime_t temp;
691 
692 	/*
693 	 * In one-shot mode, we do not keep time, so if anyone
694 	 * calls psm_gettime() directly, we vector over to
695 	 * gethrtime().
696 	 * one-shot mode MUST NOT be enabled if this psm is the source of
697 	 * hrtime.
698 	 */
699 
700 	if (apic_oneshot)
701 		return (gethrtime());
702 
703 
704 gettime_again:
705 	while ((old_hrtime_stamp = apic_hrtime_stamp) & 1)
706 		apic_ret();
707 
708 	temp = apic_nsec_since_boot;
709 
710 	if (apic_hrtime_stamp != old_hrtime_stamp) {	/* got an interrupt */
711 		goto gettime_again;
712 	}
713 	return (temp);
714 }
715 
716 /*
717  * Here we return the number of nanoseconds since booting.  Note every
718  * clock interrupt increments apic_nsec_since_boot by the appropriate
719  * amount.
720  */
721 hrtime_t
apic_gethrtime(void)722 apic_gethrtime(void)
723 {
724 	int curr_timeval, countval, elapsed_ticks;
725 	int old_hrtime_stamp, status;
726 	hrtime_t temp;
727 	uint32_t cpun;
728 	ulong_t oflags;
729 
730 	/*
731 	 * In one-shot mode, we do not keep time, so if anyone
732 	 * calls psm_gethrtime() directly, we vector over to
733 	 * gethrtime().
734 	 * one-shot mode MUST NOT be enabled if this psm is the source of
735 	 * hrtime.
736 	 */
737 
738 	if (apic_oneshot)
739 		return (gethrtime());
740 
741 	oflags = intr_clear();	/* prevent migration */
742 
743 	cpun = apic_reg_ops->apic_read(APIC_LID_REG);
744 	if (apic_mode == LOCAL_APIC)
745 		cpun >>= APIC_ID_BIT_OFFSET;
746 
747 	lock_set(&apic_gethrtime_lock);
748 
749 gethrtime_again:
750 	while ((old_hrtime_stamp = apic_hrtime_stamp) & 1)
751 		apic_ret();
752 
753 	/*
754 	 * Check to see which CPU we are on.  Note the time is kept on
755 	 * the local APIC of CPU 0.  If on CPU 0, simply read the current
756 	 * counter.  If on another CPU, issue a remote read command to CPU 0.
757 	 */
758 	if (cpun == apic_cpus[0].aci_local_id) {
759 		countval = apic_reg_ops->apic_read(APIC_CURR_COUNT);
760 	} else {
761 #ifdef	DEBUG
762 		APIC_AV_PENDING_SET();
763 #else
764 		if (apic_mode == LOCAL_APIC)
765 			APIC_AV_PENDING_SET();
766 #endif /* DEBUG */
767 
768 		apic_reg_ops->apic_write_int_cmd(
769 		    apic_cpus[0].aci_local_id, APIC_CURR_ADD | AV_REMOTE);
770 
771 		while ((status = apic_reg_ops->apic_read(APIC_INT_CMD1))
772 		    & AV_READ_PENDING) {
773 			apic_ret();
774 		}
775 
776 		if (status & AV_REMOTE_STATUS)	/* 1 = valid */
777 			countval = apic_reg_ops->apic_read(APIC_REMOTE_READ);
778 		else {	/* 0 = invalid */
779 			apic_remote_hrterr++;
780 			/*
781 			 * return last hrtime right now, will need more
782 			 * testing if change to retry
783 			 */
784 			temp = apic_last_hrtime;
785 
786 			lock_clear(&apic_gethrtime_lock);
787 
788 			intr_restore(oflags);
789 
790 			return (temp);
791 		}
792 	}
793 	if (countval > last_count_read)
794 		countval = 0;
795 	else
796 		last_count_read = countval;
797 
798 	elapsed_ticks = apic_hertz_count - countval;
799 
800 	curr_timeval = APIC_TICKS_TO_NSECS(elapsed_ticks);
801 	temp = apic_nsec_since_boot + curr_timeval;
802 
803 	if (apic_hrtime_stamp != old_hrtime_stamp) {	/* got an interrupt */
804 		/* we might have clobbered last_count_read. Restore it */
805 		last_count_read = apic_hertz_count;
806 		goto gethrtime_again;
807 	}
808 
809 	if (temp < apic_last_hrtime) {
810 		/* return last hrtime if error occurs */
811 		apic_hrtime_error++;
812 		temp = apic_last_hrtime;
813 	}
814 	else
815 		apic_last_hrtime = temp;
816 
817 	lock_clear(&apic_gethrtime_lock);
818 	intr_restore(oflags);
819 
820 	return (temp);
821 }
822 
823 /* apic NMI handler */
824 uint_t
apic_nmi_intr(caddr_t arg __unused,caddr_t arg1 __unused)825 apic_nmi_intr(caddr_t arg __unused, caddr_t arg1 __unused)
826 {
827 	nmi_action_t action = nmi_action;
828 
829 	if (apic_shutdown_processors) {
830 		apic_disable_local_apic();
831 		return (DDI_INTR_CLAIMED);
832 	}
833 
834 	apic_error |= APIC_ERR_NMI;
835 
836 	if (!lock_try(&apic_nmi_lock))
837 		return (DDI_INTR_CLAIMED);
838 	apic_num_nmis++;
839 
840 	/*
841 	 * "nmi_action" always over-rides the older way of doing this, unless we
842 	 * can't actually drop into kmdb when requested.
843 	 */
844 	if (action == NMI_ACTION_KMDB && !psm_debugger())
845 		action = NMI_ACTION_UNSET;
846 
847 	if (action == NMI_ACTION_UNSET) {
848 		if (apic_kmdb_on_nmi && psm_debugger())
849 			action = NMI_ACTION_KMDB;
850 		else if (apic_panic_on_nmi)
851 			action = NMI_ACTION_PANIC;
852 		else
853 			action = NMI_ACTION_IGNORE;
854 	}
855 
856 	switch (action) {
857 	case NMI_ACTION_IGNORE:
858 		/*
859 		 * prom_printf is the best shot we have of something which is
860 		 * problem free from high level/NMI type of interrupts
861 		 */
862 		prom_printf("NMI received\n");
863 		break;
864 
865 	case NMI_ACTION_PANIC:
866 		/* Keep panic from entering kmdb. */
867 		nopanicdebug = 1;
868 		panic("NMI received\n");
869 		break;
870 
871 	case NMI_ACTION_KMDB:
872 	default:
873 		debug_enter("NMI received: entering kmdb\n");
874 		break;
875 	}
876 
877 	lock_clear(&apic_nmi_lock);
878 	return (DDI_INTR_CLAIMED);
879 }
880 
881 processorid_t
apic_get_next_processorid(processorid_t cpu_id)882 apic_get_next_processorid(processorid_t cpu_id)
883 {
884 
885 	int i;
886 
887 	if (cpu_id == -1)
888 		return ((processorid_t)0);
889 
890 	for (i = cpu_id + 1; i < NCPU; i++) {
891 		if (apic_cpu_in_range(i))
892 			return (i);
893 	}
894 
895 	return ((processorid_t)-1);
896 }
897 
898 int
apic_cpu_add(psm_cpu_request_t * reqp)899 apic_cpu_add(psm_cpu_request_t *reqp)
900 {
901 	int i, rv = 0;
902 	ulong_t iflag;
903 	boolean_t first = B_TRUE;
904 	uchar_t localver = 0;
905 	uint32_t localid, procid;
906 	processorid_t cpuid = (processorid_t)-1;
907 	mach_cpu_add_arg_t *ap;
908 
909 	ASSERT(reqp != NULL);
910 	reqp->req.cpu_add.cpuid = (processorid_t)-1;
911 
912 	/* Check whether CPU hotplug is supported. */
913 	if (!plat_dr_support_cpu() || apic_max_nproc == -1) {
914 		return (ENOTSUP);
915 	}
916 
917 	ap = (mach_cpu_add_arg_t *)reqp->req.cpu_add.argp;
918 	switch (ap->type) {
919 	case MACH_CPU_ARG_LOCAL_APIC:
920 		localid = ap->arg.apic.apic_id;
921 		procid = ap->arg.apic.proc_id;
922 		if (localid >= 255 || procid > 255) {
923 			cmn_err(CE_WARN,
924 			    "!apic: apicid(%u) or procid(%u) is invalid.",
925 			    localid, procid);
926 			return (EINVAL);
927 		}
928 		break;
929 
930 	case MACH_CPU_ARG_LOCAL_X2APIC:
931 		localid = ap->arg.apic.apic_id;
932 		procid = ap->arg.apic.proc_id;
933 		if (localid >= UINT32_MAX) {
934 			cmn_err(CE_WARN,
935 			    "!apic: x2apicid(%u) is invalid.", localid);
936 			return (EINVAL);
937 		} else if (localid >= 255 && apic_mode == LOCAL_APIC) {
938 			cmn_err(CE_WARN, "!apic: system is in APIC mode, "
939 			    "can't support x2APIC processor.");
940 			return (ENOTSUP);
941 		}
942 		break;
943 
944 	default:
945 		cmn_err(CE_WARN,
946 		    "!apic: unknown argument type %d to apic_cpu_add().",
947 		    ap->type);
948 		return (EINVAL);
949 	}
950 
951 	/* Use apic_ioapic_lock to sync with apic_get_next_bind_cpu. */
952 	iflag = intr_clear();
953 	lock_set(&apic_ioapic_lock);
954 
955 	/* Check whether local APIC id already exists. */
956 	for (i = 0; i < apic_nproc; i++) {
957 		if (!CPU_IN_SET(apic_cpumask, i))
958 			continue;
959 		if (apic_cpus[i].aci_local_id == localid) {
960 			lock_clear(&apic_ioapic_lock);
961 			intr_restore(iflag);
962 			cmn_err(CE_WARN,
963 			    "!apic: local apic id %u already exists.",
964 			    localid);
965 			return (EEXIST);
966 		} else if (apic_cpus[i].aci_processor_id == procid) {
967 			lock_clear(&apic_ioapic_lock);
968 			intr_restore(iflag);
969 			cmn_err(CE_WARN,
970 			    "!apic: processor id %u already exists.",
971 			    (int)procid);
972 			return (EEXIST);
973 		}
974 
975 		/*
976 		 * There's no local APIC version number available in MADT table,
977 		 * so assume that all CPUs are homogeneous and use local APIC
978 		 * version number of the first existing CPU.
979 		 */
980 		if (first) {
981 			first = B_FALSE;
982 			localver = apic_cpus[i].aci_local_ver;
983 		}
984 	}
985 	ASSERT(first == B_FALSE);
986 
987 	/*
988 	 * Try to assign the same cpuid if APIC id exists in the dirty cache.
989 	 */
990 	for (i = 0; i < apic_max_nproc; i++) {
991 		if (CPU_IN_SET(apic_cpumask, i)) {
992 			ASSERT((apic_cpus[i].aci_status & APIC_CPU_FREE) == 0);
993 			continue;
994 		}
995 		ASSERT(apic_cpus[i].aci_status & APIC_CPU_FREE);
996 		if ((apic_cpus[i].aci_status & APIC_CPU_DIRTY) &&
997 		    apic_cpus[i].aci_local_id == localid &&
998 		    apic_cpus[i].aci_processor_id == procid) {
999 			cpuid = i;
1000 			break;
1001 		}
1002 	}
1003 
1004 	/* Avoid the dirty cache and allocate fresh slot if possible. */
1005 	if (cpuid == (processorid_t)-1) {
1006 		for (i = 0; i < apic_max_nproc; i++) {
1007 			if ((apic_cpus[i].aci_status & APIC_CPU_FREE) &&
1008 			    (apic_cpus[i].aci_status & APIC_CPU_DIRTY) == 0) {
1009 				cpuid = i;
1010 				break;
1011 			}
1012 		}
1013 	}
1014 
1015 	/* Try to find any free slot as last resort. */
1016 	if (cpuid == (processorid_t)-1) {
1017 		for (i = 0; i < apic_max_nproc; i++) {
1018 			if (apic_cpus[i].aci_status & APIC_CPU_FREE) {
1019 				cpuid = i;
1020 				break;
1021 			}
1022 		}
1023 	}
1024 
1025 	if (cpuid == (processorid_t)-1) {
1026 		lock_clear(&apic_ioapic_lock);
1027 		intr_restore(iflag);
1028 		cmn_err(CE_NOTE,
1029 		    "!apic: failed to allocate cpu id for processor %u.",
1030 		    procid);
1031 		rv = EAGAIN;
1032 	} else if (ACPI_FAILURE(acpica_map_cpu(cpuid, procid))) {
1033 		lock_clear(&apic_ioapic_lock);
1034 		intr_restore(iflag);
1035 		cmn_err(CE_NOTE,
1036 		    "!apic: failed to build mapping for processor %u.",
1037 		    procid);
1038 		rv = EBUSY;
1039 	} else {
1040 		ASSERT(cpuid >= 0 && cpuid < NCPU);
1041 		ASSERT(cpuid < apic_max_nproc && cpuid < max_ncpus);
1042 		bzero(&apic_cpus[cpuid], sizeof (apic_cpus[0]));
1043 		apic_cpus[cpuid].aci_processor_id = procid;
1044 		apic_cpus[cpuid].aci_local_id = localid;
1045 		apic_cpus[cpuid].aci_local_ver = localver;
1046 		CPUSET_ATOMIC_ADD(apic_cpumask, cpuid);
1047 		if (cpuid >= apic_nproc) {
1048 			apic_nproc = cpuid + 1;
1049 		}
1050 		lock_clear(&apic_ioapic_lock);
1051 		intr_restore(iflag);
1052 		reqp->req.cpu_add.cpuid = cpuid;
1053 	}
1054 
1055 	return (rv);
1056 }
1057 
1058 int
apic_cpu_remove(psm_cpu_request_t * reqp)1059 apic_cpu_remove(psm_cpu_request_t *reqp)
1060 {
1061 	int i;
1062 	ulong_t iflag;
1063 	processorid_t cpuid;
1064 
1065 	/* Check whether CPU hotplug is supported. */
1066 	if (!plat_dr_support_cpu() || apic_max_nproc == -1) {
1067 		return (ENOTSUP);
1068 	}
1069 
1070 	cpuid = reqp->req.cpu_remove.cpuid;
1071 
1072 	/* Use apic_ioapic_lock to sync with apic_get_next_bind_cpu. */
1073 	iflag = intr_clear();
1074 	lock_set(&apic_ioapic_lock);
1075 
1076 	if (!apic_cpu_in_range(cpuid)) {
1077 		lock_clear(&apic_ioapic_lock);
1078 		intr_restore(iflag);
1079 		cmn_err(CE_WARN,
1080 		    "!apic: cpuid %d doesn't exist in apic_cpus array.",
1081 		    cpuid);
1082 		return (ENODEV);
1083 	}
1084 	ASSERT((apic_cpus[cpuid].aci_status & APIC_CPU_FREE) == 0);
1085 
1086 	if (ACPI_FAILURE(acpica_unmap_cpu(cpuid))) {
1087 		lock_clear(&apic_ioapic_lock);
1088 		intr_restore(iflag);
1089 		return (ENOENT);
1090 	}
1091 
1092 	if (cpuid == apic_nproc - 1) {
1093 		/*
1094 		 * We are removing the highest numbered cpuid so we need to
1095 		 * find the next highest cpuid as the new value for apic_nproc.
1096 		 */
1097 		for (i = apic_nproc; i > 0; i--) {
1098 			if (CPU_IN_SET(apic_cpumask, i - 1)) {
1099 				apic_nproc = i;
1100 				break;
1101 			}
1102 		}
1103 		/* at least one CPU left */
1104 		ASSERT(i > 0);
1105 	}
1106 	CPUSET_ATOMIC_DEL(apic_cpumask, cpuid);
1107 	/* mark slot as free and keep it in the dirty cache */
1108 	apic_cpus[cpuid].aci_status = APIC_CPU_FREE | APIC_CPU_DIRTY;
1109 
1110 	lock_clear(&apic_ioapic_lock);
1111 	intr_restore(iflag);
1112 
1113 	return (0);
1114 }
1115 
1116 /*
1117  * Return the number of ticks the APIC decrements in SF nanoseconds.
1118  * The fixed-frequency PIT (aka 8254) is used for the measurement.
1119  */
1120 static uint64_t
apic_calibrate_pit(void)1121 apic_calibrate_pit(void)
1122 {
1123 	uint8_t		pit_tick_lo;
1124 	uint16_t	pit_tick, target_pit_tick, pit_ticks_adj;
1125 	uint32_t	pit_ticks;
1126 	uint32_t	start_apic_tick, end_apic_tick, apic_ticks;
1127 	ulong_t		iflag;
1128 
1129 	if (pit_is_broken)
1130 		return (0);
1131 
1132 	apic_reg_ops->apic_write(APIC_DIVIDE_REG, apic_divide_reg_init);
1133 	apic_reg_ops->apic_write(APIC_INIT_COUNT, APIC_MAXVAL);
1134 
1135 	iflag = intr_clear();
1136 
1137 	/*
1138 	 * Put the PIT in mode 0, "Interrupt On Terminal Count":
1139 	 */
1140 	outb(PITCTL_PORT, PIT_C0 | PIT_LOADMODE | PIT_ENDSIGMODE);
1141 
1142 	/*
1143 	 * The PIT counts down and then the counter value wraps around.  Load
1144 	 * the maximum counter value:
1145 	 */
1146 	outb(PITCTR0_PORT, 0xFF);
1147 	outb(PITCTR0_PORT, 0xFF);
1148 
1149 	do {
1150 		pit_tick_lo = inb(PITCTR0_PORT);
1151 		pit_tick = (inb(PITCTR0_PORT) << 8) | pit_tick_lo;
1152 	} while (pit_tick < APIC_TIME_MIN ||
1153 	    pit_tick_lo <= APIC_LB_MIN || pit_tick_lo >= APIC_LB_MAX);
1154 
1155 	/*
1156 	 * Wait for the PIT to decrement by 5 ticks to ensure
1157 	 * we didn't start in the middle of a tick.
1158 	 * Compare with 0x10 for the wrap around case.
1159 	 */
1160 	target_pit_tick = pit_tick - 5;
1161 	do {
1162 		pit_tick_lo = inb(PITCTR0_PORT);
1163 		pit_tick = (inb(PITCTR0_PORT) << 8) | pit_tick_lo;
1164 	} while (pit_tick > target_pit_tick || pit_tick_lo < 0x10);
1165 
1166 	start_apic_tick = apic_reg_ops->apic_read(APIC_CURR_COUNT);
1167 
1168 	/*
1169 	 * Wait for the PIT to decrement by APIC_TIME_COUNT ticks
1170 	 */
1171 	target_pit_tick = pit_tick - APIC_TIME_COUNT;
1172 	do {
1173 		pit_tick_lo = inb(PITCTR0_PORT);
1174 		pit_tick = (inb(PITCTR0_PORT) << 8) | pit_tick_lo;
1175 	} while (pit_tick > target_pit_tick || pit_tick_lo < 0x10);
1176 
1177 	end_apic_tick = apic_reg_ops->apic_read(APIC_CURR_COUNT);
1178 
1179 	intr_restore(iflag);
1180 
1181 	apic_ticks = start_apic_tick - end_apic_tick;
1182 
1183 	/* The PIT might have decremented by more ticks than planned */
1184 	pit_ticks_adj = target_pit_tick - pit_tick;
1185 	/* total number of PIT ticks corresponding to apic_ticks */
1186 	pit_ticks = APIC_TIME_COUNT + pit_ticks_adj;
1187 
1188 	/*
1189 	 * Determine the number of nanoseconds per APIC clock tick
1190 	 * and then determine how many APIC ticks to interrupt at the
1191 	 * desired frequency
1192 	 * apic_ticks / (pitticks / PIT_HZ) = apic_ticks_per_s
1193 	 * (apic_ticks * PIT_HZ) / pitticks = apic_ticks_per_s
1194 	 * apic_ticks_per_ns = (apic_ticks * PIT_HZ) / (pitticks * 10^9)
1195 	 * apic_ticks_per_SFns =
1196 	 * (SF * apic_ticks * PIT_HZ) / (pitticks * 10^9)
1197 	 */
1198 	return ((SF * apic_ticks * PIT_HZ) / ((uint64_t)pit_ticks * NANOSEC));
1199 }
1200 
1201 /*
1202  * Return the number of ticks the APIC decrements in SF nanoseconds.
1203  * The TSC is used for the measurement.
1204  */
1205 static uint64_t
apic_calibrate_tsc(void)1206 apic_calibrate_tsc(void)
1207 {
1208 	uint64_t	tsc_now, tsc_end, tsc_amt, tsc_hz;
1209 	uint64_t	apic_ticks;
1210 	uint32_t	start_apic_tick, end_apic_tick;
1211 	ulong_t		iflag;
1212 
1213 	tsc_hz = tsc_get_freq();
1214 
1215 	/*
1216 	 * APIC_TIME_COUNT is in i8254 PIT ticks, which have a period
1217 	 * slightly under 1us. We can just treat the value as the number of
1218 	 * microseconds for our sampling period -- that is we wait
1219 	 * APIC_TIME_COUNT microseconds (corresponding to 'tsc_amt' of TSC
1220 	 * ticks).
1221 	 */
1222 	tsc_amt = tsc_hz * APIC_TIME_COUNT / MICROSEC;
1223 
1224 	apic_reg_ops->apic_write(APIC_DIVIDE_REG, apic_divide_reg_init);
1225 	apic_reg_ops->apic_write(APIC_INIT_COUNT, APIC_MAXVAL);
1226 
1227 	iflag = intr_clear();
1228 
1229 	tsc_now = tsc_read();
1230 	tsc_end = tsc_now + tsc_amt;
1231 	start_apic_tick = apic_reg_ops->apic_read(APIC_CURR_COUNT);
1232 
1233 	while (tsc_now < tsc_end)
1234 		tsc_now = tsc_read();
1235 
1236 	end_apic_tick = apic_reg_ops->apic_read(APIC_CURR_COUNT);
1237 
1238 	intr_restore(iflag);
1239 
1240 	apic_ticks = start_apic_tick - end_apic_tick;
1241 
1242 	/*
1243 	 * We likely did not wait exactly APIC_TIME_COUNT microseconds, but
1244 	 * slightly longer. Add the additional amount to tsc_amt.
1245 	 */
1246 	tsc_amt += tsc_now - tsc_end;
1247 
1248 	/*
1249 	 * This calculation is analogous to the one used with the PIT.
1250 	 * However, due to the typically _much_ higher precision of the
1251 	 * TSC compared to the PIT, we have to be careful we do not overflow.
1252 	 *
1253 	 * Since contemporary APIC timers have frequencies on the order of
1254 	 * tens of MHz (i.e. 66MHz), we calculate that first. Then we
1255 	 * scale the result by SF (because the caller wants it scaled by
1256 	 * that amount), then convert the result to scaled (SF) ticks per ns.
1257 	 *
1258 	 */
1259 	uint64_t apic_freq = apic_ticks * tsc_hz / tsc_amt;
1260 
1261 	return (apic_freq * SF / NANOSEC);
1262 }
1263 
1264 /*
1265  * Return the number of ticks the APIC decrements in SF nanoseconds.
1266  * Several measurements are taken to filter out outliers.
1267  */
1268 uint64_t
apic_calibrate()1269 apic_calibrate()
1270 {
1271 	uint64_t	measurements[APIC_CALIBRATE_MEASUREMENTS];
1272 	int		median_idx;
1273 	uint64_t	median;
1274 
1275 	/*
1276 	 * When running under a virtual machine, the emulated PIT and APIC
1277 	 * counters do not always return the right values and can roll over.
1278 	 * Those spurious measurements are relatively rare but could
1279 	 * significantly affect the calibration.
1280 	 * Therefore we take several measurements and then keep the median.
1281 	 * The median is preferred to the average here as we only want to
1282 	 * discard outliers.
1283 	 *
1284 	 * Traditionally, only the PIT was used to calibrate the APIC as the
1285 	 * the TSC was not calibrated at this point in the boot process (or
1286 	 * on even (much, much) older systems, possibly not present). On
1287 	 * newer systems, the PIT is not always present. We now default to
1288 	 * using the TSC (since it's now calibrated early enough in the boot
1289 	 * process to be usable), but for debugging purposes as we transition,
1290 	 * we still try to use the PIT and record those values. On systems
1291 	 * without a functioning PIT, the PIT measurements will always be 0.
1292 	 */
1293 	for (int i = 0; i < APIC_CALIBRATE_MEASUREMENTS; i++) {
1294 		apic_info_tsc[i] = apic_calibrate_tsc();
1295 		apic_info_pit[i] = apic_calibrate_pit();
1296 
1297 		if (apic_calibrate_use_pit) {
1298 			if (pit_is_broken) {
1299 				panic("Failed to calibrate APIC due to broken "
1300 				    "PIT");
1301 			}
1302 			measurements[i] = apic_info_pit[i];
1303 		} else {
1304 			measurements[i] = apic_info_tsc[i];
1305 		}
1306 	}
1307 
1308 	/*
1309 	 * sort results and retrieve median.
1310 	 */
1311 	for (int i = 0; i < APIC_CALIBRATE_MEASUREMENTS; i++) {
1312 		for (int j = i + 1; j < APIC_CALIBRATE_MEASUREMENTS; j++) {
1313 			if (measurements[j] < measurements[i]) {
1314 				uint64_t tmp = measurements[i];
1315 				measurements[i] = measurements[j];
1316 				measurements[j] = tmp;
1317 			}
1318 		}
1319 	}
1320 	median_idx = APIC_CALIBRATE_MEASUREMENTS / 2;
1321 	median = measurements[median_idx];
1322 
1323 #if (APIC_CALIBRATE_MEASUREMENTS >= 3)
1324 	/*
1325 	 * Check that measurements are consistent. Post a warning
1326 	 * if the three middle values are not close to each other.
1327 	 */
1328 	uint64_t delta_warn = median *
1329 	    APIC_CALIBRATE_PERCENT_OFF_WARNING / 100;
1330 	if ((median - measurements[median_idx - 1]) > delta_warn ||
1331 	    (measurements[median_idx + 1] - median) > delta_warn) {
1332 		cmn_err(CE_WARN, "apic_calibrate measurements lack "
1333 		    "precision: %llu, %llu, %llu.",
1334 		    (u_longlong_t)measurements[median_idx - 1],
1335 		    (u_longlong_t)median,
1336 		    (u_longlong_t)measurements[median_idx + 1]);
1337 	}
1338 #endif
1339 
1340 	return (median);
1341 }
1342 
1343 /*
1344  * Initialise the APIC timer on the local APIC of CPU 0 to the desired
1345  * frequency.  Note at this stage in the boot sequence, the boot processor
1346  * is the only active processor.
1347  * hertz value of 0 indicates a one-shot mode request.  In this case
1348  * the function returns the resolution (in nanoseconds) for the hardware
1349  * timer interrupt.  If one-shot mode capability is not available,
1350  * the return value will be 0. apic_enable_oneshot is a global switch
1351  * for disabling the functionality.
1352  * A non-zero positive value for hertz indicates a periodic mode request.
1353  * In this case the hardware will be programmed to generate clock interrupts
1354  * at hertz frequency and returns the resolution of interrupts in
1355  * nanosecond.
1356  */
1357 
1358 int
apic_clkinit(int hertz)1359 apic_clkinit(int hertz)
1360 {
1361 	int		ret;
1362 
1363 	apic_int_busy_mark = (apic_int_busy_mark *
1364 	    apic_sample_factor_redistribution) / 100;
1365 	apic_int_free_mark = (apic_int_free_mark *
1366 	    apic_sample_factor_redistribution) / 100;
1367 	apic_diff_for_redistribution = (apic_diff_for_redistribution *
1368 	    apic_sample_factor_redistribution) / 100;
1369 
1370 	ret = apic_timer_init(hertz);
1371 	return (ret);
1372 
1373 }
1374 
1375 /*
1376  * apic_preshutdown:
1377  * Called early in shutdown whilst we can still access filesystems to do
1378  * things like loading modules which will be required to complete shutdown
1379  * after filesystems are all unmounted.
1380  */
1381 void
apic_preshutdown(int cmd __unused,int fcn __unused)1382 apic_preshutdown(int cmd __unused, int fcn __unused)
1383 {
1384 	APIC_VERBOSE_POWEROFF(("apic_preshutdown(%d,%d); m=%d a=%d\n",
1385 	    cmd, fcn, apic_poweroff_method, apic_enable_acpi));
1386 }
1387 
1388 void
apic_shutdown(int cmd,int fcn)1389 apic_shutdown(int cmd, int fcn)
1390 {
1391 	int restarts, attempts;
1392 	int i;
1393 	uchar_t	byte;
1394 	ulong_t iflag;
1395 
1396 	hpet_acpi_fini();
1397 
1398 	/* Send NMI to all CPUs except self to do per processor shutdown */
1399 	iflag = intr_clear();
1400 #ifdef	DEBUG
1401 	APIC_AV_PENDING_SET();
1402 #else
1403 	if (apic_mode == LOCAL_APIC)
1404 		APIC_AV_PENDING_SET();
1405 #endif /* DEBUG */
1406 	apic_shutdown_processors = 1;
1407 	apic_reg_ops->apic_write(APIC_INT_CMD1,
1408 	    AV_NMI | AV_LEVEL | AV_SH_ALL_EXCSELF);
1409 
1410 	/* restore cmos shutdown byte before reboot */
1411 	if (apic_cmos_ssb_set) {
1412 		outb(CMOS_ADDR, SSB);
1413 		outb(CMOS_DATA, 0);
1414 	}
1415 
1416 	ioapic_disable_redirection();
1417 
1418 	/*	disable apic mode if imcr present	*/
1419 	if (apic_imcrp) {
1420 		outb(APIC_IMCR_P1, (uchar_t)APIC_IMCR_SELECT);
1421 		outb(APIC_IMCR_P2, (uchar_t)APIC_IMCR_PIC);
1422 	}
1423 
1424 	apic_disable_local_apic();
1425 
1426 	intr_restore(iflag);
1427 
1428 	/* remainder of function is for shutdown cases only */
1429 	if (cmd != A_SHUTDOWN)
1430 		return;
1431 
1432 	/*
1433 	 * Switch system back into Legacy-Mode if using ACPI and
1434 	 * not powering-off.  Some BIOSes need to remain in ACPI-mode
1435 	 * for power-off to succeed (Dell Dimension 4600)
1436 	 * Do not disable ACPI while doing fastreboot
1437 	 */
1438 	if (apic_enable_acpi && fcn != AD_POWEROFF && fcn != AD_FASTREBOOT)
1439 		(void) AcpiDisable();
1440 
1441 	if (fcn == AD_FASTREBOOT) {
1442 		apic_reg_ops->apic_write(APIC_INT_CMD1,
1443 		    AV_ASSERT | AV_RESET | AV_SH_ALL_EXCSELF);
1444 	}
1445 
1446 	/* remainder of function is for shutdown+poweroff case only */
1447 	if (fcn != AD_POWEROFF)
1448 		return;
1449 
1450 	switch (apic_poweroff_method) {
1451 		case APIC_POWEROFF_VIA_RTC:
1452 
1453 			/* select the extended NVRAM bank in the RTC */
1454 			outb(CMOS_ADDR, RTC_REGA);
1455 			byte = inb(CMOS_DATA);
1456 			outb(CMOS_DATA, (byte | EXT_BANK));
1457 
1458 			outb(CMOS_ADDR, PFR_REG);
1459 
1460 			/* for Predator must toggle the PAB bit */
1461 			byte = inb(CMOS_DATA);
1462 
1463 			/*
1464 			 * clear power active bar, wakeup alarm and
1465 			 * kickstart
1466 			 */
1467 			byte &= ~(PAB_CBIT | WF_FLAG | KS_FLAG);
1468 			outb(CMOS_DATA, byte);
1469 
1470 			/* delay before next write */
1471 			drv_usecwait(1000);
1472 
1473 			/* for S40 the following would suffice */
1474 			byte = inb(CMOS_DATA);
1475 
1476 			/* power active bar control bit */
1477 			byte |= PAB_CBIT;
1478 			outb(CMOS_DATA, byte);
1479 
1480 			break;
1481 
1482 		case APIC_POWEROFF_VIA_ASPEN_BMC:
1483 			restarts = 0;
1484 restart_aspen_bmc:
1485 			if (++restarts == 3)
1486 				break;
1487 			attempts = 0;
1488 			do {
1489 				byte = inb(MISMIC_FLAG_REGISTER);
1490 				byte &= MISMIC_BUSY_MASK;
1491 				if (byte != 0) {
1492 					drv_usecwait(1000);
1493 					if (attempts >= 3)
1494 						goto restart_aspen_bmc;
1495 					++attempts;
1496 				}
1497 			} while (byte != 0);
1498 			outb(MISMIC_CNTL_REGISTER, CC_SMS_GET_STATUS);
1499 			byte = inb(MISMIC_FLAG_REGISTER);
1500 			byte |= 0x1;
1501 			outb(MISMIC_FLAG_REGISTER, byte);
1502 			i = 0;
1503 			for (; i < (sizeof (aspen_bmc)/sizeof (aspen_bmc[0]));
1504 			    i++) {
1505 				attempts = 0;
1506 				do {
1507 					byte = inb(MISMIC_FLAG_REGISTER);
1508 					byte &= MISMIC_BUSY_MASK;
1509 					if (byte != 0) {
1510 						drv_usecwait(1000);
1511 						if (attempts >= 3)
1512 							goto restart_aspen_bmc;
1513 						++attempts;
1514 					}
1515 				} while (byte != 0);
1516 				outb(MISMIC_CNTL_REGISTER, aspen_bmc[i].cntl);
1517 				outb(MISMIC_DATA_REGISTER, aspen_bmc[i].data);
1518 				byte = inb(MISMIC_FLAG_REGISTER);
1519 				byte |= 0x1;
1520 				outb(MISMIC_FLAG_REGISTER, byte);
1521 			}
1522 			break;
1523 
1524 		case APIC_POWEROFF_VIA_SITKA_BMC:
1525 			restarts = 0;
1526 restart_sitka_bmc:
1527 			if (++restarts == 3)
1528 				break;
1529 			attempts = 0;
1530 			do {
1531 				byte = inb(SMS_STATUS_REGISTER);
1532 				byte &= SMS_STATE_MASK;
1533 				if ((byte == SMS_READ_STATE) ||
1534 				    (byte == SMS_WRITE_STATE)) {
1535 					drv_usecwait(1000);
1536 					if (attempts >= 3)
1537 						goto restart_sitka_bmc;
1538 					++attempts;
1539 				}
1540 			} while ((byte == SMS_READ_STATE) ||
1541 			    (byte == SMS_WRITE_STATE));
1542 			outb(SMS_COMMAND_REGISTER, SMS_GET_STATUS);
1543 			i = 0;
1544 			for (; i < (sizeof (sitka_bmc)/sizeof (sitka_bmc[0]));
1545 			    i++) {
1546 				attempts = 0;
1547 				do {
1548 					byte = inb(SMS_STATUS_REGISTER);
1549 					byte &= SMS_IBF_MASK;
1550 					if (byte != 0) {
1551 						drv_usecwait(1000);
1552 						if (attempts >= 3)
1553 							goto restart_sitka_bmc;
1554 						++attempts;
1555 					}
1556 				} while (byte != 0);
1557 				outb(sitka_bmc[i].port, sitka_bmc[i].data);
1558 			}
1559 			break;
1560 
1561 		case APIC_POWEROFF_NONE:
1562 
1563 			/* If no APIC direct method, we will try using ACPI */
1564 			if (apic_enable_acpi) {
1565 				if (acpi_poweroff() == 1)
1566 					return;
1567 			} else
1568 				return;
1569 
1570 			break;
1571 	}
1572 	/*
1573 	 * Wait a limited time here for power to go off.
1574 	 * If the power does not go off, then there was a
1575 	 * problem and we should continue to the halt which
1576 	 * prints a message for the user to press a key to
1577 	 * reboot.
1578 	 */
1579 	drv_usecwait(7000000); /* wait seven seconds */
1580 
1581 }
1582 
1583 cyclic_id_t apic_cyclic_id;
1584 
1585 /*
1586  * The following functions are in the platform specific file so that they
1587  * can be different functions depending on whether we are running on
1588  * bare metal or a hypervisor.
1589  */
1590 
1591 /*
1592  * map an apic for memory-mapped access
1593  */
1594 uint32_t *
mapin_apic(uint32_t addr,size_t len,int flags)1595 mapin_apic(uint32_t addr, size_t len, int flags)
1596 {
1597 	return ((void *)psm_map_phys(addr, len, flags));
1598 }
1599 
1600 uint32_t *
mapin_ioapic(uint32_t addr,size_t len,int flags)1601 mapin_ioapic(uint32_t addr, size_t len, int flags)
1602 {
1603 	return (mapin_apic(addr, len, flags));
1604 }
1605 
1606 /*
1607  * unmap an apic
1608  */
1609 void
mapout_apic(caddr_t addr,size_t len)1610 mapout_apic(caddr_t addr, size_t len)
1611 {
1612 	psm_unmap_phys(addr, len);
1613 }
1614 
1615 void
mapout_ioapic(caddr_t addr,size_t len)1616 mapout_ioapic(caddr_t addr, size_t len)
1617 {
1618 	mapout_apic(addr, len);
1619 }
1620 
1621 uint32_t
ioapic_read(int ioapic_ix,uint32_t reg)1622 ioapic_read(int ioapic_ix, uint32_t reg)
1623 {
1624 	volatile uint32_t *ioapic;
1625 
1626 	ioapic = apicioadr[ioapic_ix];
1627 	ioapic[APIC_IO_REG] = reg;
1628 	return (ioapic[APIC_IO_DATA]);
1629 }
1630 
1631 void
ioapic_write(int ioapic_ix,uint32_t reg,uint32_t value)1632 ioapic_write(int ioapic_ix, uint32_t reg, uint32_t value)
1633 {
1634 	volatile uint32_t *ioapic;
1635 
1636 	ioapic = apicioadr[ioapic_ix];
1637 	ioapic[APIC_IO_REG] = reg;
1638 	ioapic[APIC_IO_DATA] = value;
1639 }
1640 
1641 void
ioapic_write_eoi(int ioapic_ix,uint32_t value)1642 ioapic_write_eoi(int ioapic_ix, uint32_t value)
1643 {
1644 	volatile uint32_t *ioapic;
1645 
1646 	ioapic = apicioadr[ioapic_ix];
1647 	ioapic[APIC_IO_EOI] = value;
1648 }
1649 
1650 /*
1651  * Round-robin algorithm to find the next CPU with interrupts enabled.
1652  * It can't share the same static variable apic_next_bind_cpu with
1653  * apic_get_next_bind_cpu(), since that will cause all interrupts to be
1654  * bound to CPU1 at boot time.  During boot, only CPU0 is online with
1655  * interrupts enabled when apic_get_next_bind_cpu() and apic_find_cpu()
1656  * are called.  However, the pcplusmp driver assumes that there will be
1657  * boot_ncpus CPUs configured eventually so it tries to distribute all
1658  * interrupts among CPU0 - CPU[boot_ncpus - 1].  Thus to prevent all
1659  * interrupts being targetted at CPU1, we need to use a dedicated static
1660  * variable for find_next_cpu() instead of sharing apic_next_bind_cpu.
1661  */
1662 
1663 processorid_t
apic_find_cpu(int flag)1664 apic_find_cpu(int flag)
1665 {
1666 	int i;
1667 	static processorid_t acid = 0;
1668 
1669 	/* Find the first CPU with the passed-in flag set */
1670 	for (i = 0; i < apic_nproc; i++) {
1671 		if (++acid >= apic_nproc) {
1672 			acid = 0;
1673 		}
1674 		if (apic_cpu_in_range(acid) &&
1675 		    (apic_cpus[acid].aci_status & flag)) {
1676 			break;
1677 		}
1678 	}
1679 
1680 	ASSERT((apic_cpus[acid].aci_status & flag) != 0);
1681 	return (acid);
1682 }
1683 
1684 void
apic_intrmap_init(int apic_mode)1685 apic_intrmap_init(int apic_mode)
1686 {
1687 	int suppress_brdcst_eoi = 0;
1688 
1689 	/*
1690 	 * Intel Software Developer's Manual 3A, 10.12.7:
1691 	 *
1692 	 * Routing of device interrupts to local APIC units operating in
1693 	 * x2APIC mode requires use of the interrupt-remapping architecture
1694 	 * specified in the Intel Virtualization Technology for Directed
1695 	 * I/O, Revision 1.3.  Because of this, BIOS must enumerate support
1696 	 * for and software must enable this interrupt remapping with
1697 	 * Extended Interrupt Mode Enabled before it enabling x2APIC mode in
1698 	 * the local APIC units.
1699 	 *
1700 	 *
1701 	 * In other words, to use the APIC in x2APIC mode, we need interrupt
1702 	 * remapping.  Since we don't start up the IOMMU by default, we
1703 	 * won't be able to do any interrupt remapping and therefore have to
1704 	 * use the APIC in traditional 'local APIC' mode with memory mapped
1705 	 * I/O.
1706 	 */
1707 
1708 	if (psm_vt_ops != NULL) {
1709 		if (((apic_intrmap_ops_t *)psm_vt_ops)->
1710 		    apic_intrmap_init(apic_mode) == DDI_SUCCESS) {
1711 
1712 			apic_vt_ops = psm_vt_ops;
1713 
1714 			/*
1715 			 * We leverage the interrupt remapping engine to
1716 			 * suppress broadcast EOI; thus we must send the
1717 			 * directed EOI with the directed-EOI handler.
1718 			 */
1719 			if (apic_directed_EOI_supported() == 0) {
1720 				suppress_brdcst_eoi = 1;
1721 			}
1722 
1723 			apic_vt_ops->apic_intrmap_enable(suppress_brdcst_eoi);
1724 
1725 			if (apic_detect_x2apic()) {
1726 				apic_enable_x2apic();
1727 			}
1728 
1729 			if (apic_directed_EOI_supported() == 0) {
1730 				apic_set_directed_EOI_handler();
1731 			}
1732 		}
1733 	}
1734 }
1735 
1736 static void
apic_record_ioapic_rdt(void * intrmap_private __unused,ioapic_rdt_t * irdt)1737 apic_record_ioapic_rdt(void *intrmap_private __unused, ioapic_rdt_t *irdt)
1738 {
1739 	irdt->ir_hi <<= APIC_ID_BIT_OFFSET;
1740 }
1741 
1742 static void
apic_record_msi(void * intrmap_private __unused,msi_regs_t * mregs)1743 apic_record_msi(void *intrmap_private __unused, msi_regs_t *mregs)
1744 {
1745 	mregs->mr_addr = MSI_ADDR_HDR |
1746 	    (MSI_ADDR_RH_FIXED << MSI_ADDR_RH_SHIFT) |
1747 	    (MSI_ADDR_DM_PHYSICAL << MSI_ADDR_DM_SHIFT) |
1748 	    (mregs->mr_addr << MSI_ADDR_DEST_SHIFT);
1749 	mregs->mr_data = (MSI_DATA_TM_EDGE << MSI_DATA_TM_SHIFT) |
1750 	    mregs->mr_data;
1751 }
1752 
1753 /*
1754  * Functions from apic_introp.c
1755  *
1756  * Those functions are used by apic_intr_ops().
1757  */
1758 
1759 /*
1760  * MSI support flag:
1761  * reflects whether MSI is supported at APIC level
1762  * it can also be patched through /etc/system
1763  *
1764  *  0 = default value - don't know and need to call apic_check_msi_support()
1765  *      to find out then set it accordingly
1766  *  1 = supported
1767  * -1 = not supported
1768  */
1769 int	apic_support_msi = 0;
1770 
1771 /* Multiple vector support for MSI-X */
1772 int	apic_msix_enable = 1;
1773 
1774 /* Multiple vector support for MSI */
1775 int	apic_multi_msi_enable = 1;
1776 
1777 /*
1778  * Check whether the system supports MSI.
1779  *
1780  * MSI is required for PCI-E and for PCI versions later than 2.2, so if we find
1781  * a PCI-E bus or we find a PCI bus whose version we know is >= 2.2, then we
1782  * return PSM_SUCCESS to indicate this system supports MSI.
1783  *
1784  * (Currently the only way we check whether a given PCI bus supports >= 2.2 is
1785  * by detecting if we are running inside the KVM hypervisor, which guarantees
1786  * this version number.)
1787  */
1788 int
apic_check_msi_support()1789 apic_check_msi_support()
1790 {
1791 	dev_info_t *cdip;
1792 	char dev_type[16];
1793 	int dev_len;
1794 	int hwenv = get_hwenv();
1795 
1796 	DDI_INTR_IMPLDBG((CE_CONT, "apic_check_msi_support:\n"));
1797 
1798 	/*
1799 	 * check whether the first level children of root_node have
1800 	 * PCI-E or PCI capability.
1801 	 */
1802 	for (cdip = ddi_get_child(ddi_root_node()); cdip != NULL;
1803 	    cdip = ddi_get_next_sibling(cdip)) {
1804 
1805 		DDI_INTR_IMPLDBG((CE_CONT, "apic_check_msi_support: cdip: 0x%p,"
1806 		    " driver: %s, binding: %s, nodename: %s\n", (void *)cdip,
1807 		    ddi_driver_name(cdip), ddi_binding_name(cdip),
1808 		    ddi_node_name(cdip)));
1809 		dev_len = sizeof (dev_type);
1810 		if (ddi_getlongprop_buf(DDI_DEV_T_ANY, cdip, DDI_PROP_DONTPASS,
1811 		    "device_type", (caddr_t)dev_type, &dev_len)
1812 		    != DDI_PROP_SUCCESS)
1813 			continue;
1814 		if (strcmp(dev_type, "pciex") == 0)
1815 			return (PSM_SUCCESS);
1816 		if (strcmp(dev_type, "pci") == 0 &&
1817 		    (hwenv == HW_KVM || hwenv == HW_BHYVE))
1818 			return (PSM_SUCCESS);
1819 	}
1820 
1821 	/* MSI is not supported on this system */
1822 	DDI_INTR_IMPLDBG((CE_CONT, "apic_check_msi_support: no 'pciex' "
1823 	    "device_type found\n"));
1824 	return (PSM_FAILURE);
1825 }
1826 
1827 /*
1828  * apic_pci_msi_unconfigure:
1829  *
1830  * This and next two interfaces are copied from pci_intr_lib.c
1831  * Do ensure that these two files stay in sync.
1832  * These needed to be copied over here to avoid a deadlock situation on
1833  * certain mp systems that use MSI interrupts.
1834  *
1835  * IMPORTANT regards next three interfaces:
1836  * i) are called only for MSI/X interrupts.
1837  * ii) called with interrupts disabled, and must not block
1838  */
1839 void
apic_pci_msi_unconfigure(dev_info_t * rdip,int type,int inum)1840 apic_pci_msi_unconfigure(dev_info_t *rdip, int type, int inum)
1841 {
1842 	ushort_t		msi_ctrl;
1843 	int			cap_ptr = i_ddi_get_msi_msix_cap_ptr(rdip);
1844 	ddi_acc_handle_t	handle = i_ddi_get_pci_config_handle(rdip);
1845 
1846 	ASSERT((handle != NULL) && (cap_ptr != 0));
1847 
1848 	if (type == DDI_INTR_TYPE_MSI) {
1849 		msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSI_CTRL);
1850 		msi_ctrl &= (~PCI_MSI_MME_MASK);
1851 		pci_config_put16(handle, cap_ptr + PCI_MSI_CTRL, msi_ctrl);
1852 		pci_config_put32(handle, cap_ptr + PCI_MSI_ADDR_OFFSET, 0);
1853 
1854 		if (msi_ctrl &  PCI_MSI_64BIT_MASK) {
1855 			pci_config_put16(handle,
1856 			    cap_ptr + PCI_MSI_64BIT_DATA, 0);
1857 			pci_config_put32(handle,
1858 			    cap_ptr + PCI_MSI_ADDR_OFFSET + 4, 0);
1859 		} else {
1860 			pci_config_put16(handle,
1861 			    cap_ptr + PCI_MSI_32BIT_DATA, 0);
1862 		}
1863 
1864 	} else if (type == DDI_INTR_TYPE_MSIX) {
1865 		uintptr_t	off;
1866 		uint32_t	mask;
1867 		ddi_intr_msix_t	*msix_p = i_ddi_get_msix(rdip);
1868 
1869 		ASSERT(msix_p != NULL);
1870 
1871 		/* Offset into "inum"th entry in the MSI-X table & mask it */
1872 		off = (uintptr_t)msix_p->msix_tbl_addr + (inum *
1873 		    PCI_MSIX_VECTOR_SIZE) + PCI_MSIX_VECTOR_CTRL_OFFSET;
1874 
1875 		mask = ddi_get32(msix_p->msix_tbl_hdl, (uint32_t *)off);
1876 
1877 		ddi_put32(msix_p->msix_tbl_hdl, (uint32_t *)off, (mask | 1));
1878 
1879 		/* Offset into the "inum"th entry in the MSI-X table */
1880 		off = (uintptr_t)msix_p->msix_tbl_addr +
1881 		    (inum * PCI_MSIX_VECTOR_SIZE);
1882 
1883 		/* Reset the "data" and "addr" bits */
1884 		ddi_put32(msix_p->msix_tbl_hdl,
1885 		    (uint32_t *)(off + PCI_MSIX_DATA_OFFSET), 0);
1886 		ddi_put64(msix_p->msix_tbl_hdl, (uint64_t *)off, 0);
1887 	}
1888 }
1889 
1890 /*
1891  * apic_pci_msi_disable_mode:
1892  */
1893 void
apic_pci_msi_disable_mode(dev_info_t * rdip,int type)1894 apic_pci_msi_disable_mode(dev_info_t *rdip, int type)
1895 {
1896 	ushort_t		msi_ctrl;
1897 	int			cap_ptr = i_ddi_get_msi_msix_cap_ptr(rdip);
1898 	ddi_acc_handle_t	handle = i_ddi_get_pci_config_handle(rdip);
1899 
1900 	ASSERT((handle != NULL) && (cap_ptr != 0));
1901 
1902 	if (type == DDI_INTR_TYPE_MSI) {
1903 		msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSI_CTRL);
1904 		if (!(msi_ctrl & PCI_MSI_ENABLE_BIT))
1905 			return;
1906 
1907 		msi_ctrl &= ~PCI_MSI_ENABLE_BIT;	/* MSI disable */
1908 		pci_config_put16(handle, cap_ptr + PCI_MSI_CTRL, msi_ctrl);
1909 
1910 	} else if (type == DDI_INTR_TYPE_MSIX) {
1911 		msi_ctrl = pci_config_get16(handle, cap_ptr + PCI_MSIX_CTRL);
1912 		if (msi_ctrl & PCI_MSIX_ENABLE_BIT) {
1913 			msi_ctrl &= ~PCI_MSIX_ENABLE_BIT;
1914 			pci_config_put16(handle, cap_ptr + PCI_MSIX_CTRL,
1915 			    msi_ctrl);
1916 		}
1917 	}
1918 }
1919 
1920 uint32_t
apic_get_localapicid(uint32_t cpuid)1921 apic_get_localapicid(uint32_t cpuid)
1922 {
1923 	ASSERT(cpuid < apic_nproc && apic_cpus != NULL);
1924 
1925 	return (apic_cpus[cpuid].aci_local_id);
1926 }
1927 
1928 uchar_t
apic_get_ioapicid(uchar_t ioapicindex)1929 apic_get_ioapicid(uchar_t ioapicindex)
1930 {
1931 	ASSERT(ioapicindex < MAX_IO_APIC);
1932 
1933 	return (apic_io_id[ioapicindex]);
1934 }
1935