xref: /illumos-gate/usr/src/uts/intel/io/vmm/io/vhpet.c (revision d515dd77)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2013 Tycho Nightingale <tycho.nightingale@pluribusnetworks.com>
5  * Copyright (c) 2013 Neel Natu <neel@freebsd.org>
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  *
29  * $FreeBSD$
30  */
31 
32 /*
33  * Copyright 2018 Joyent, Inc.
34  */
35 
36 #include <sys/cdefs.h>
37 __FBSDID("$FreeBSD$");
38 
39 #include <sys/param.h>
40 #include <sys/mutex.h>
41 #include <sys/kernel.h>
42 #include <sys/kmem.h>
43 #include <sys/systm.h>
44 
45 #include <dev/acpica/acpi_hpet.h>
46 
47 #include <machine/vmm.h>
48 #include <machine/vmm_dev.h>
49 
50 #include "vmm_lapic.h"
51 #include "vatpic.h"
52 #include "vioapic.h"
53 #include "vhpet.h"
54 
55 
56 #define	HPET_FREQ	16777216		/* 16.7 (2^24) Mhz */
57 #define	FS_PER_S	1000000000000000ul
58 
59 /* Timer N Configuration and Capabilities Register */
60 #define	HPET_TCAP_RO_MASK	(HPET_TCAP_INT_ROUTE	|	\
61 				HPET_TCAP_FSB_INT_DEL	|	\
62 				HPET_TCAP_SIZE		|	\
63 				HPET_TCAP_PER_INT)
64 /*
65  * HPET requires at least 3 timers and up to 32 timers per block.
66  */
67 #define	VHPET_NUM_TIMERS	8
68 CTASSERT(VHPET_NUM_TIMERS >= 3 && VHPET_NUM_TIMERS <= 32);
69 
70 struct vhpet_callout_arg {
71 	struct vhpet *vhpet;
72 	int timer_num;
73 };
74 
75 struct vhpet_timer {
76 	uint64_t	cap_config;	/* Configuration */
77 	uint64_t	msireg;		/* FSB interrupt routing */
78 	uint32_t	compval;	/* Comparator */
79 	uint32_t	comprate;
80 	struct callout	callout;
81 	hrtime_t	callout_expire;	/* time when counter==compval */
82 	struct vhpet_callout_arg arg;
83 };
84 
85 struct vhpet {
86 	struct vm	*vm;
87 	kmutex_t	lock;
88 
89 	uint64_t	config;		/* Configuration */
90 	uint64_t	isr;		/* Interrupt Status */
91 	uint32_t	base_count;	/* HPET counter base value */
92 	hrtime_t	base_time;	/* uptime corresponding to base value */
93 
94 	struct vhpet_timer timer[VHPET_NUM_TIMERS];
95 };
96 
97 #define	VHPET_LOCK(vhp)		mutex_enter(&((vhp)->lock))
98 #define	VHPET_UNLOCK(vhp)	mutex_exit(&((vhp)->lock))
99 
100 static void vhpet_start_timer(struct vhpet *vhpet, int n, uint32_t counter,
101     hrtime_t now);
102 
103 static uint64_t
104 vhpet_capabilities(void)
105 {
106 	uint64_t cap = 0;
107 
108 	cap |= 0x8086 << 16;			/* vendor id */
109 	cap |= (VHPET_NUM_TIMERS - 1) << 8;	/* number of timers */
110 	cap |= 1;				/* revision */
111 	cap &= ~HPET_CAP_COUNT_SIZE;		/* 32-bit timer */
112 
113 	cap &= 0xffffffff;
114 	cap |= (FS_PER_S / HPET_FREQ) << 32;	/* tick period in fs */
115 
116 	return (cap);
117 }
118 
119 static __inline bool
120 vhpet_counter_enabled(struct vhpet *vhpet)
121 {
122 
123 	return ((vhpet->config & HPET_CNF_ENABLE) ? true : false);
124 }
125 
126 static __inline bool
127 vhpet_timer_msi_enabled(struct vhpet *vhpet, int n)
128 {
129 	const uint64_t msi_enable = HPET_TCAP_FSB_INT_DEL | HPET_TCNF_FSB_EN;
130 
131 	if ((vhpet->timer[n].cap_config & msi_enable) == msi_enable)
132 		return (true);
133 	else
134 		return (false);
135 }
136 
137 static __inline int
138 vhpet_timer_ioapic_pin(struct vhpet *vhpet, int n)
139 {
140 	/*
141 	 * If the timer is configured to use MSI then treat it as if the
142 	 * timer is not connected to the ioapic.
143 	 */
144 	if (vhpet_timer_msi_enabled(vhpet, n))
145 		return (0);
146 
147 	return ((vhpet->timer[n].cap_config & HPET_TCNF_INT_ROUTE) >> 9);
148 }
149 
150 static uint32_t
151 vhpet_counter(struct vhpet *vhpet, hrtime_t *nowptr)
152 {
153 	const hrtime_t now = gethrtime();
154 	uint32_t val = vhpet->base_count;
155 
156 	if (vhpet_counter_enabled(vhpet)) {
157 		const hrtime_t delta = now - vhpet->base_time;
158 
159 		ASSERT3S(delta, >=, 0);
160 		val += hrt_freq_count(delta, HPET_FREQ);
161 	} else {
162 		/* Value of the counter is meaningless when it is disabled */
163 	}
164 
165 	if (nowptr != NULL) {
166 		*nowptr = now;
167 	}
168 	return (val);
169 }
170 
171 static void
172 vhpet_timer_clear_isr(struct vhpet *vhpet, int n)
173 {
174 	int pin;
175 
176 	if (vhpet->isr & (1 << n)) {
177 		pin = vhpet_timer_ioapic_pin(vhpet, n);
178 		KASSERT(pin != 0, ("vhpet timer %d irq incorrectly routed", n));
179 		(void) vioapic_deassert_irq(vhpet->vm, pin);
180 		vhpet->isr &= ~(1 << n);
181 	}
182 }
183 
184 static __inline bool
185 vhpet_periodic_timer(struct vhpet *vhpet, int n)
186 {
187 
188 	return ((vhpet->timer[n].cap_config & HPET_TCNF_TYPE) != 0);
189 }
190 
191 static __inline bool
192 vhpet_timer_interrupt_enabled(struct vhpet *vhpet, int n)
193 {
194 
195 	return ((vhpet->timer[n].cap_config & HPET_TCNF_INT_ENB) != 0);
196 }
197 
198 static __inline bool
199 vhpet_timer_edge_trig(struct vhpet *vhpet, int n)
200 {
201 
202 	KASSERT(!vhpet_timer_msi_enabled(vhpet, n), ("vhpet_timer_edge_trig: "
203 	    "timer %d is using MSI", n));
204 
205 	if ((vhpet->timer[n].cap_config & HPET_TCNF_INT_TYPE) == 0)
206 		return (true);
207 	else
208 		return (false);
209 }
210 
211 static void
212 vhpet_timer_interrupt(struct vhpet *vhpet, int n)
213 {
214 	int pin;
215 
216 	/* If interrupts are not enabled for this timer then just return. */
217 	if (!vhpet_timer_interrupt_enabled(vhpet, n))
218 		return;
219 
220 	/*
221 	 * If a level triggered interrupt is already asserted then just return.
222 	 */
223 	if ((vhpet->isr & (1 << n)) != 0) {
224 		return;
225 	}
226 
227 	if (vhpet_timer_msi_enabled(vhpet, n)) {
228 		(void) lapic_intr_msi(vhpet->vm, vhpet->timer[n].msireg >> 32,
229 		    vhpet->timer[n].msireg & 0xffffffff);
230 		return;
231 	}
232 
233 	pin = vhpet_timer_ioapic_pin(vhpet, n);
234 	if (pin == 0) {
235 		/* Interrupt is not routed to IOAPIC */
236 		return;
237 	}
238 
239 	if (vhpet_timer_edge_trig(vhpet, n)) {
240 		(void) vioapic_pulse_irq(vhpet->vm, pin);
241 	} else {
242 		vhpet->isr |= 1 << n;
243 		(void) vioapic_assert_irq(vhpet->vm, pin);
244 	}
245 }
246 
247 static void
248 vhpet_adjust_compval(struct vhpet *vhpet, int n, uint32_t counter)
249 {
250 	uint32_t compval, comprate, compnext;
251 
252 	KASSERT(vhpet->timer[n].comprate != 0, ("hpet t%d is not periodic", n));
253 
254 	compval = vhpet->timer[n].compval;
255 	comprate = vhpet->timer[n].comprate;
256 
257 	/*
258 	 * Calculate the comparator value to be used for the next periodic
259 	 * interrupt.
260 	 *
261 	 * This function is commonly called from the callout handler.
262 	 * In this scenario the 'counter' is ahead of 'compval'. To find
263 	 * the next value to program into the accumulator we divide the
264 	 * number space between 'compval' and 'counter' into 'comprate'
265 	 * sized units. The 'compval' is rounded up such that is "ahead"
266 	 * of 'counter'.
267 	 */
268 	compnext = compval + ((counter - compval) / comprate + 1) * comprate;
269 
270 	vhpet->timer[n].compval = compnext;
271 }
272 
273 static void
274 vhpet_handler(void *a)
275 {
276 	int n;
277 	uint32_t counter;
278 	hrtime_t now;
279 	struct vhpet *vhpet;
280 	struct callout *callout;
281 	struct vhpet_callout_arg *arg;
282 
283 	arg = a;
284 	vhpet = arg->vhpet;
285 	n = arg->timer_num;
286 	callout = &vhpet->timer[n].callout;
287 
288 	VHPET_LOCK(vhpet);
289 
290 	if (callout_pending(callout))		/* callout was reset */
291 		goto done;
292 
293 	if (!callout_active(callout))		/* callout was stopped */
294 		goto done;
295 
296 	callout_deactivate(callout);
297 
298 	if (!vhpet_counter_enabled(vhpet))
299 		panic("vhpet(%p) callout with counter disabled", vhpet);
300 
301 	counter = vhpet_counter(vhpet, &now);
302 	vhpet_start_timer(vhpet, n, counter, now);
303 	vhpet_timer_interrupt(vhpet, n);
304 done:
305 	VHPET_UNLOCK(vhpet);
306 }
307 
308 static void
309 vhpet_stop_timer(struct vhpet *vhpet, int n, hrtime_t now)
310 {
311 	callout_stop(&vhpet->timer[n].callout);
312 
313 	/*
314 	 * If the callout was scheduled to expire in the past but hasn't
315 	 * had a chance to execute yet then trigger the timer interrupt
316 	 * here. Failing to do so will result in a missed timer interrupt
317 	 * in the guest. This is especially bad in one-shot mode because
318 	 * the next interrupt has to wait for the counter to wrap around.
319 	 */
320 	if (vhpet->timer[n].callout_expire < now) {
321 		vhpet_timer_interrupt(vhpet, n);
322 	}
323 }
324 
325 static void
326 vhpet_start_timer(struct vhpet *vhpet, int n, uint32_t counter, hrtime_t now)
327 {
328 	struct vhpet_timer *timer = &vhpet->timer[n];
329 
330 	if (timer->comprate != 0)
331 		vhpet_adjust_compval(vhpet, n, counter);
332 	else {
333 		/*
334 		 * In one-shot mode it is the guest's responsibility to make
335 		 * sure that the comparator value is not in the "past". The
336 		 * hardware doesn't have any belt-and-suspenders to deal with
337 		 * this so we don't either.
338 		 */
339 	}
340 
341 	const hrtime_t delta = hrt_freq_interval(HPET_FREQ,
342 	    timer->compval - counter);
343 	timer->callout_expire = now + delta;
344 	callout_reset_hrtime(&timer->callout, timer->callout_expire,
345 	    vhpet_handler, &timer->arg, C_ABSOLUTE);
346 }
347 
348 static void
349 vhpet_start_counting(struct vhpet *vhpet)
350 {
351 	int i;
352 
353 	vhpet->base_time = gethrtime();
354 	for (i = 0; i < VHPET_NUM_TIMERS; i++) {
355 		/*
356 		 * Restart the timers based on the value of the main counter
357 		 * when it stopped counting.
358 		 */
359 		vhpet_start_timer(vhpet, i, vhpet->base_count,
360 		    vhpet->base_time);
361 	}
362 }
363 
364 static void
365 vhpet_stop_counting(struct vhpet *vhpet, uint32_t counter, hrtime_t now)
366 {
367 	int i;
368 
369 	vhpet->base_count = counter;
370 	for (i = 0; i < VHPET_NUM_TIMERS; i++)
371 		vhpet_stop_timer(vhpet, i, now);
372 }
373 
374 static __inline void
375 update_register(uint64_t *regptr, uint64_t data, uint64_t mask)
376 {
377 
378 	*regptr &= ~mask;
379 	*regptr |= (data & mask);
380 }
381 
382 static void
383 vhpet_timer_update_config(struct vhpet *vhpet, int n, uint64_t data,
384     uint64_t mask)
385 {
386 	bool clear_isr;
387 	int old_pin, new_pin;
388 	uint32_t allowed_irqs;
389 	uint64_t oldval, newval;
390 
391 	if (vhpet_timer_msi_enabled(vhpet, n) ||
392 	    vhpet_timer_edge_trig(vhpet, n)) {
393 		if (vhpet->isr & (1 << n))
394 			panic("vhpet timer %d isr should not be asserted", n);
395 	}
396 	old_pin = vhpet_timer_ioapic_pin(vhpet, n);
397 	oldval = vhpet->timer[n].cap_config;
398 
399 	newval = oldval;
400 	update_register(&newval, data, mask);
401 	newval &= ~(HPET_TCAP_RO_MASK | HPET_TCNF_32MODE);
402 	newval |= oldval & HPET_TCAP_RO_MASK;
403 
404 	if (newval == oldval)
405 		return;
406 
407 	vhpet->timer[n].cap_config = newval;
408 
409 	/*
410 	 * Validate the interrupt routing in the HPET_TCNF_INT_ROUTE field.
411 	 * If it does not match the bits set in HPET_TCAP_INT_ROUTE then set
412 	 * it to the default value of 0.
413 	 */
414 	allowed_irqs = vhpet->timer[n].cap_config >> 32;
415 	new_pin = vhpet_timer_ioapic_pin(vhpet, n);
416 	if (new_pin != 0 && (allowed_irqs & (1 << new_pin)) == 0) {
417 		/* Invalid IRQ configured */
418 		new_pin = 0;
419 		vhpet->timer[n].cap_config &= ~HPET_TCNF_INT_ROUTE;
420 	}
421 
422 	if (!vhpet_periodic_timer(vhpet, n))
423 		vhpet->timer[n].comprate = 0;
424 
425 	/*
426 	 * If the timer's ISR bit is set then clear it in the following cases:
427 	 * - interrupt is disabled
428 	 * - interrupt type is changed from level to edge or fsb.
429 	 * - interrupt routing is changed
430 	 *
431 	 * This is to ensure that this timer's level triggered interrupt does
432 	 * not remain asserted forever.
433 	 */
434 	if (vhpet->isr & (1 << n)) {
435 		KASSERT(old_pin != 0, ("timer %d isr asserted to ioapic pin %d",
436 		    n, old_pin));
437 		if (!vhpet_timer_interrupt_enabled(vhpet, n))
438 			clear_isr = true;
439 		else if (vhpet_timer_msi_enabled(vhpet, n))
440 			clear_isr = true;
441 		else if (vhpet_timer_edge_trig(vhpet, n))
442 			clear_isr = true;
443 		else if (vhpet_timer_ioapic_pin(vhpet, n) != old_pin)
444 			clear_isr = true;
445 		else
446 			clear_isr = false;
447 
448 		if (clear_isr) {
449 			(void) vioapic_deassert_irq(vhpet->vm, old_pin);
450 			vhpet->isr &= ~(1 << n);
451 		}
452 	}
453 }
454 
455 int
456 vhpet_mmio_write(struct vm *vm, int vcpuid, uint64_t gpa, uint64_t val,
457     int size)
458 {
459 	struct vhpet *vhpet;
460 	uint64_t data, mask, oldval, val64;
461 	uint32_t isr_clear_mask, old_compval, old_comprate, counter;
462 	hrtime_t now;
463 	int i, offset;
464 
465 	vhpet = vm_hpet(vm);
466 	offset = gpa - VHPET_BASE;
467 
468 	VHPET_LOCK(vhpet);
469 
470 	/* Accesses to the HPET should be 4 or 8 bytes wide */
471 	switch (size) {
472 	case 8:
473 		mask = 0xffffffffffffffff;
474 		data = val;
475 		break;
476 	case 4:
477 		mask = 0xffffffff;
478 		data = val;
479 		if ((offset & 0x4) != 0) {
480 			mask <<= 32;
481 			data <<= 32;
482 		}
483 		break;
484 	default:
485 		/* Invalid MMIO write */
486 		goto done;
487 	}
488 
489 	/* Access to the HPET should be naturally aligned to its width */
490 	if (offset & (size - 1)) {
491 		goto done;
492 	}
493 
494 	if (offset == HPET_CONFIG || offset == HPET_CONFIG + 4) {
495 		/*
496 		 * Get the most recent value of the counter before updating
497 		 * the 'config' register. If the HPET is going to be disabled
498 		 * then we need to update 'base_count' with the value right
499 		 * before it is disabled.
500 		 */
501 		counter = vhpet_counter(vhpet, &now);
502 		oldval = vhpet->config;
503 		update_register(&vhpet->config, data, mask);
504 
505 		/*
506 		 * LegacyReplacement Routing is not supported so clear the
507 		 * bit explicitly.
508 		 */
509 		vhpet->config &= ~HPET_CNF_LEG_RT;
510 
511 		if ((oldval ^ vhpet->config) & HPET_CNF_ENABLE) {
512 			if (vhpet_counter_enabled(vhpet)) {
513 				vhpet_start_counting(vhpet);
514 			} else {
515 				vhpet_stop_counting(vhpet, counter, now);
516 			}
517 		}
518 		goto done;
519 	}
520 
521 	if (offset == HPET_ISR || offset == HPET_ISR + 4) {
522 		isr_clear_mask = vhpet->isr & data;
523 		for (i = 0; i < VHPET_NUM_TIMERS; i++) {
524 			if ((isr_clear_mask & (1 << i)) != 0) {
525 				vhpet_timer_clear_isr(vhpet, i);
526 			}
527 		}
528 		goto done;
529 	}
530 
531 	if (offset == HPET_MAIN_COUNTER || offset == HPET_MAIN_COUNTER + 4) {
532 		/* Zero-extend the counter to 64-bits before updating it */
533 		val64 = vhpet_counter(vhpet, NULL);
534 		update_register(&val64, data, mask);
535 		vhpet->base_count = val64;
536 		if (vhpet_counter_enabled(vhpet))
537 			vhpet_start_counting(vhpet);
538 		goto done;
539 	}
540 
541 	for (i = 0; i < VHPET_NUM_TIMERS; i++) {
542 		if (offset == HPET_TIMER_CAP_CNF(i) ||
543 		    offset == HPET_TIMER_CAP_CNF(i) + 4) {
544 			vhpet_timer_update_config(vhpet, i, data, mask);
545 			break;
546 		}
547 
548 		if (offset == HPET_TIMER_COMPARATOR(i) ||
549 		    offset == HPET_TIMER_COMPARATOR(i) + 4) {
550 			old_compval = vhpet->timer[i].compval;
551 			old_comprate = vhpet->timer[i].comprate;
552 			if (vhpet_periodic_timer(vhpet, i)) {
553 				/*
554 				 * In periodic mode writes to the comparator
555 				 * change the 'compval' register only if the
556 				 * HPET_TCNF_VAL_SET bit is set in the config
557 				 * register.
558 				 */
559 				val64 = vhpet->timer[i].comprate;
560 				update_register(&val64, data, mask);
561 				vhpet->timer[i].comprate = val64;
562 				if ((vhpet->timer[i].cap_config &
563 				    HPET_TCNF_VAL_SET) != 0) {
564 					vhpet->timer[i].compval = val64;
565 				}
566 			} else {
567 				KASSERT(vhpet->timer[i].comprate == 0,
568 				    ("vhpet one-shot timer %d has invalid "
569 				    "rate %u", i, vhpet->timer[i].comprate));
570 				val64 = vhpet->timer[i].compval;
571 				update_register(&val64, data, mask);
572 				vhpet->timer[i].compval = val64;
573 			}
574 			vhpet->timer[i].cap_config &= ~HPET_TCNF_VAL_SET;
575 
576 			if (vhpet->timer[i].compval != old_compval ||
577 			    vhpet->timer[i].comprate != old_comprate) {
578 				if (vhpet_counter_enabled(vhpet)) {
579 					counter = vhpet_counter(vhpet, &now);
580 					vhpet_start_timer(vhpet, i, counter,
581 					    now);
582 				}
583 			}
584 			break;
585 		}
586 
587 		if (offset == HPET_TIMER_FSB_VAL(i) ||
588 		    offset == HPET_TIMER_FSB_ADDR(i)) {
589 			update_register(&vhpet->timer[i].msireg, data, mask);
590 			break;
591 		}
592 	}
593 done:
594 	VHPET_UNLOCK(vhpet);
595 	return (0);
596 }
597 
598 int
599 vhpet_mmio_read(struct vm *vm, int vcpuid, uint64_t gpa, uint64_t *rval,
600     int size)
601 {
602 	int i, offset;
603 	struct vhpet *vhpet;
604 	uint64_t data;
605 
606 	vhpet = vm_hpet(vm);
607 	offset = gpa - VHPET_BASE;
608 
609 	VHPET_LOCK(vhpet);
610 
611 	/* Accesses to the HPET should be 4 or 8 bytes wide */
612 	if (size != 4 && size != 8) {
613 		data = 0;
614 		goto done;
615 	}
616 
617 	/* Access to the HPET should be naturally aligned to its width */
618 	if (offset & (size - 1)) {
619 		data = 0;
620 		goto done;
621 	}
622 
623 	if (offset == HPET_CAPABILITIES || offset == HPET_CAPABILITIES + 4) {
624 		data = vhpet_capabilities();
625 		goto done;
626 	}
627 
628 	if (offset == HPET_CONFIG || offset == HPET_CONFIG + 4) {
629 		data = vhpet->config;
630 		goto done;
631 	}
632 
633 	if (offset == HPET_ISR || offset == HPET_ISR + 4) {
634 		data = vhpet->isr;
635 		goto done;
636 	}
637 
638 	if (offset == HPET_MAIN_COUNTER || offset == HPET_MAIN_COUNTER + 4) {
639 		data = vhpet_counter(vhpet, NULL);
640 		goto done;
641 	}
642 
643 	for (i = 0; i < VHPET_NUM_TIMERS; i++) {
644 		if (offset == HPET_TIMER_CAP_CNF(i) ||
645 		    offset == HPET_TIMER_CAP_CNF(i) + 4) {
646 			data = vhpet->timer[i].cap_config;
647 			break;
648 		}
649 
650 		if (offset == HPET_TIMER_COMPARATOR(i) ||
651 		    offset == HPET_TIMER_COMPARATOR(i) + 4) {
652 			data = vhpet->timer[i].compval;
653 			break;
654 		}
655 
656 		if (offset == HPET_TIMER_FSB_VAL(i) ||
657 		    offset == HPET_TIMER_FSB_ADDR(i)) {
658 			data = vhpet->timer[i].msireg;
659 			break;
660 		}
661 	}
662 
663 	if (i >= VHPET_NUM_TIMERS)
664 		data = 0;
665 done:
666 	VHPET_UNLOCK(vhpet);
667 
668 	if (size == 4) {
669 		if (offset & 0x4)
670 			data >>= 32;
671 	}
672 	*rval = data;
673 	return (0);
674 }
675 
676 struct vhpet *
677 vhpet_init(struct vm *vm)
678 {
679 	int i, pincount;
680 	struct vhpet *vhpet;
681 	uint64_t allowed_irqs;
682 	struct vhpet_callout_arg *arg;
683 
684 	vhpet = kmem_zalloc(sizeof (struct vhpet), KM_SLEEP);
685 	vhpet->vm = vm;
686 	mutex_init(&vhpet->lock, NULL, MUTEX_ADAPTIVE, NULL);
687 
688 	pincount = vioapic_pincount(vm);
689 	if (pincount >= 32)
690 		allowed_irqs = 0xff000000;	/* irqs 24-31 */
691 	else if (pincount >= 20)
692 		allowed_irqs = 0xf << (pincount - 4);	/* 4 upper irqs */
693 	else
694 		allowed_irqs = 0;
695 
696 	/*
697 	 * Initialize HPET timer hardware state.
698 	 */
699 	for (i = 0; i < VHPET_NUM_TIMERS; i++) {
700 		vhpet->timer[i].cap_config = allowed_irqs << 32;
701 		vhpet->timer[i].cap_config |= HPET_TCAP_PER_INT;
702 		vhpet->timer[i].cap_config |= HPET_TCAP_FSB_INT_DEL;
703 
704 		vhpet->timer[i].compval = 0xffffffff;
705 		callout_init(&vhpet->timer[i].callout, 1);
706 
707 		arg = &vhpet->timer[i].arg;
708 		arg->vhpet = vhpet;
709 		arg->timer_num = i;
710 	}
711 
712 	return (vhpet);
713 }
714 
715 void
716 vhpet_cleanup(struct vhpet *vhpet)
717 {
718 	int i;
719 
720 	for (i = 0; i < VHPET_NUM_TIMERS; i++)
721 		callout_drain(&vhpet->timer[i].callout);
722 
723 	mutex_destroy(&vhpet->lock);
724 	kmem_free(vhpet, sizeof (*vhpet));
725 }
726 
727 int
728 vhpet_getcap(struct vm_hpet_cap *cap)
729 {
730 
731 	cap->capabilities = vhpet_capabilities();
732 	return (0);
733 }
734 void
735 vhpet_localize_resources(struct vhpet *vhpet)
736 {
737 	for (uint_t i = 0; i < VHPET_NUM_TIMERS; i++) {
738 		vmm_glue_callout_localize(&vhpet->timer[i].callout);
739 	}
740 }
741 
742 static int
743 vhpet_data_read(void *datap, const vmm_data_req_t *req)
744 {
745 	VERIFY3U(req->vdr_class, ==, VDC_HPET);
746 	VERIFY3U(req->vdr_version, ==, 1);
747 	VERIFY3U(req->vdr_len, ==, sizeof (struct vdi_hpet_v1));
748 
749 	struct vhpet *vhpet = datap;
750 	struct vdi_hpet_v1 *out = req->vdr_data;
751 
752 	VHPET_LOCK(vhpet);
753 	out->vh_config = vhpet->config;
754 	out->vh_isr = vhpet->isr;
755 	out->vh_count_base = vhpet->base_count;
756 	out->vh_time_base = vm_normalize_hrtime(vhpet->vm, vhpet->base_time);
757 	for (uint_t i = 0; i < 8; i++) {
758 		const struct vhpet_timer *timer = &vhpet->timer[i];
759 		struct vdi_hpet_timer_v1 *timer_out = &out->vh_timers[i];
760 
761 		timer_out->vht_config = timer->cap_config;
762 		timer_out->vht_msi = timer->msireg;
763 		timer_out->vht_comp_val = timer->compval;
764 		timer_out->vht_comp_rate = timer->comprate;
765 		if (callout_pending(&timer->callout)) {
766 			timer_out->vht_time_target =
767 			    vm_normalize_hrtime(vhpet->vm,
768 			    timer->callout_expire);
769 		} else {
770 			timer_out->vht_time_target = 0;
771 		}
772 	}
773 	VHPET_UNLOCK(vhpet);
774 
775 	return (0);
776 }
777 
778 enum vhpet_validation_error {
779 	VVE_OK,
780 	VVE_BAD_CONFIG,
781 	VVE_BAD_BASE_TIME,
782 	VVE_BAD_ISR,
783 	VVE_BAD_TIMER_CONFIG,
784 	VVE_BAD_TIMER_ISR,
785 	VVE_BAD_TIMER_TIME,
786 };
787 
788 static enum vhpet_validation_error
789 vhpet_data_validate(const vmm_data_req_t *req, struct vm *vm)
790 {
791 	ASSERT(req->vdr_version == 1 &&
792 	    req->vdr_len == sizeof (struct vdi_hpet_v1));
793 	const struct vdi_hpet_v1 *src = req->vdr_data;
794 
795 	/* LegacyReplacement Routing is not supported */
796 	if ((src->vh_config & HPET_CNF_LEG_RT) != 0) {
797 		return (VVE_BAD_CONFIG);
798 	}
799 
800 	/* A base time in the future makes no sense */
801 	const hrtime_t base_time = vm_denormalize_hrtime(vm, src->vh_time_base);
802 	if (base_time > gethrtime()) {
803 		return (VVE_BAD_BASE_TIME);
804 	}
805 
806 	/* All asserted ISRs must be associated with an existing timer */
807 	if ((src->vh_isr & ~(uint64_t)((1 << VHPET_NUM_TIMERS) - 1)) != 0) {
808 		return (VVE_BAD_ISR);
809 	}
810 
811 	for (uint_t i = 0; i < 8; i++) {
812 		const struct vdi_hpet_timer_v1 *timer = &src->vh_timers[i];
813 
814 		const bool msi_enabled =
815 		    (timer->vht_config & HPET_TCNF_FSB_EN) != 0;
816 		const bool level_triggered =
817 		    (timer->vht_config & HPET_TCNF_INT_TYPE) != 0;
818 		const bool irq_asserted = (src->vh_isr & (1 << i)) != 0;
819 		const uint32_t allowed_irqs = (timer->vht_config >> 32);
820 		const uint32_t irq_pin =
821 		    (timer->vht_config & HPET_TCNF_INT_ROUTE) >> 9;
822 
823 		if (msi_enabled) {
824 			if (level_triggered) {
825 				return (VVE_BAD_TIMER_CONFIG);
826 			}
827 		} else {
828 			/*
829 			 * Ensure interrupt route is valid as ensured by the
830 			 * logic in vhpet_timer_update_config.
831 			 */
832 			if (irq_pin != 0 &&
833 			    (allowed_irqs & (1 << irq_pin)) == 0) {
834 				return (VVE_BAD_TIMER_CONFIG);
835 			}
836 		}
837 		if (irq_asserted && !level_triggered) {
838 			return (VVE_BAD_TIMER_ISR);
839 		}
840 
841 		if (timer->vht_time_target != 0) {
842 			/*
843 			 * A timer scheduled earlier than the base time of the
844 			 * entire HPET makes no sense.
845 			 */
846 			const uint64_t timer_target =
847 			    vm_denormalize_hrtime(vm, timer->vht_time_target);
848 			if (timer_target < base_time) {
849 				return (VVE_BAD_TIMER_TIME);
850 			}
851 		}
852 	}
853 
854 	return (VVE_OK);
855 }
856 
857 static int
858 vhpet_data_write(void *datap, const vmm_data_req_t *req)
859 {
860 	VERIFY3U(req->vdr_class, ==, VDC_HPET);
861 	VERIFY3U(req->vdr_version, ==, 1);
862 	VERIFY3U(req->vdr_len, ==, sizeof (struct vdi_hpet_v1));
863 
864 	struct vhpet *vhpet = datap;
865 
866 	if (vhpet_data_validate(req, vhpet->vm) != VVE_OK) {
867 		return (EINVAL);
868 	}
869 	const struct vdi_hpet_v1 *src = req->vdr_data;
870 
871 	VHPET_LOCK(vhpet);
872 	vhpet->config = src->vh_config;
873 	vhpet->isr = src->vh_isr;
874 	vhpet->base_count = src->vh_count_base;
875 	vhpet->base_time = vm_denormalize_hrtime(vhpet->vm, src->vh_time_base);
876 
877 	for (uint_t i = 0; i < 8; i++) {
878 		struct vhpet_timer *timer = &vhpet->timer[i];
879 		const struct vdi_hpet_timer_v1 *timer_src = &src->vh_timers[i];
880 
881 		timer->cap_config = timer_src->vht_config;
882 		timer->msireg = timer_src->vht_msi;
883 		timer->compval = timer_src->vht_comp_val;
884 		timer->comprate = timer_src->vht_comp_rate;
885 
886 		/*
887 		 * For now, any state associating an IOAPIC pin with a given
888 		 * timer is not kept in sync. (We will not increment or
889 		 * decrement a pin level based on the timer state.)  It is left
890 		 * to the consumer to keep those pin levels maintained if
891 		 * modifying either the HPET or the IOAPIC.
892 		 *
893 		 * If both the HPET and IOAPIC are exported and then imported,
894 		 * this will occur naturally, as any asserted IOAPIC pin level
895 		 * from the HPET would come along for the ride.
896 		 */
897 
898 		/* TODO: properly configure timer */
899 		if (timer_src->vht_time_target != 0) {
900 			timer->callout_expire = vm_denormalize_hrtime(vhpet->vm,
901 			    timer_src->vht_time_target);
902 		} else {
903 			timer->callout_expire = 0;
904 		}
905 	}
906 	VHPET_UNLOCK(vhpet);
907 	return (0);
908 }
909 
910 static const vmm_data_version_entry_t hpet_v1 = {
911 	.vdve_class = VDC_HPET,
912 	.vdve_version = 1,
913 	.vdve_len_expect = sizeof (struct vdi_hpet_v1),
914 	.vdve_readf = vhpet_data_read,
915 	.vdve_writef = vhpet_data_write,
916 };
917 VMM_DATA_VERSION(hpet_v1);
918