xref: /illumos-gate/usr/src/uts/intel/io/vmm/io/ppt.c (revision 32640292)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 2011 NetApp, Inc.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 /*
30  * Copyright 2019 Joyent, Inc.
31  * Copyright 2022 OmniOS Community Edition (OmniOSce) Association.
32  */
33 
34 #include <sys/cdefs.h>
35 
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/kernel.h>
39 #include <sys/kmem.h>
40 #include <sys/module.h>
41 #include <sys/bus.h>
42 #include <sys/pciio.h>
43 #include <sys/sysctl.h>
44 
45 #include <dev/pci/pcivar.h>
46 #include <dev/pci/pcireg.h>
47 
48 #include <machine/vmm.h>
49 #include <machine/vmm_dev.h>
50 
51 #include <sys/conf.h>
52 #include <sys/ddi.h>
53 #include <sys/stat.h>
54 #include <sys/sunddi.h>
55 #include <sys/pci.h>
56 #include <sys/pci_cap.h>
57 #include <sys/pcie_impl.h>
58 #include <sys/ppt_dev.h>
59 #include <sys/mkdev.h>
60 #include <sys/sysmacros.h>
61 
62 #include "vmm_lapic.h"
63 
64 #include "iommu.h"
65 #include "ppt.h"
66 
67 #define	MAX_MSIMSGS	32
68 
69 /*
70  * If the MSI-X table is located in the middle of a BAR then that MMIO
71  * region gets split into two segments - one segment above the MSI-X table
72  * and the other segment below the MSI-X table - with a hole in place of
73  * the MSI-X table so accesses to it can be trapped and emulated.
74  *
75  * So, allocate a MMIO segment for each BAR register + 1 additional segment.
76  */
77 #define	MAX_MMIOSEGS	((PCIR_MAX_BAR_0 + 1) + 1)
78 
79 struct pptintr_arg {
80 	struct pptdev	*pptdev;
81 	uint64_t	addr;
82 	uint64_t	msg_data;
83 };
84 
85 struct pptseg {
86 	vm_paddr_t	gpa;
87 	size_t		len;
88 	int		wired;
89 };
90 
91 struct pptbar {
92 	uint64_t base;
93 	uint64_t size;
94 	uint_t type;
95 	ddi_acc_handle_t io_handle;
96 	caddr_t io_ptr;
97 	uint_t ddireg;
98 };
99 
100 struct pptdev {
101 	dev_info_t		*pptd_dip;
102 	list_node_t		pptd_node;
103 	ddi_acc_handle_t	pptd_cfg;
104 	struct pptbar		pptd_bars[PCI_BASE_NUM];
105 	struct vm		*vm;
106 	struct pptseg mmio[MAX_MMIOSEGS];
107 	struct {
108 		int	num_msgs;		/* guest state */
109 		boolean_t is_fixed;
110 		size_t	inth_sz;
111 		ddi_intr_handle_t *inth;
112 		struct pptintr_arg arg[MAX_MSIMSGS];
113 	} msi;
114 
115 	struct {
116 		int num_msgs;
117 		size_t inth_sz;
118 		size_t arg_sz;
119 		ddi_intr_handle_t *inth;
120 		struct pptintr_arg *arg;
121 	} msix;
122 };
123 
124 
125 static major_t		ppt_major;
126 static void		*ppt_state;
127 static kmutex_t		pptdev_mtx;
128 static list_t		pptdev_list;
129 
130 #define	PPT_MINOR_NAME	"ppt"
131 
132 static ddi_device_acc_attr_t ppt_attr = {
133 	DDI_DEVICE_ATTR_V0,
134 	DDI_NEVERSWAP_ACC,
135 	DDI_STORECACHING_OK_ACC,
136 	DDI_DEFAULT_ACC
137 };
138 
139 static int
ppt_open(dev_t * devp,int flag,int otyp,cred_t * cr)140 ppt_open(dev_t *devp, int flag, int otyp, cred_t *cr)
141 {
142 	/* XXX: require extra privs? */
143 	return (0);
144 }
145 
146 #define	BAR_TO_IDX(bar)	(((bar) - PCI_CONF_BASE0) / PCI_BAR_SZ_32)
147 #define	BAR_VALID(b)	(			\
148 		(b) >= PCI_CONF_BASE0 &&	\
149 		(b) <= PCI_CONF_BASE5 &&	\
150 		((b) & (PCI_BAR_SZ_32-1)) == 0)
151 
152 static int
ppt_ioctl(dev_t dev,int cmd,intptr_t arg,int md,cred_t * cr,int * rv)153 ppt_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv)
154 {
155 	minor_t minor = getminor(dev);
156 	struct pptdev *ppt;
157 	void *data = (void *)arg;
158 
159 	if ((ppt = ddi_get_soft_state(ppt_state, minor)) == NULL) {
160 		return (ENOENT);
161 	}
162 
163 	switch (cmd) {
164 	case PPT_CFG_READ: {
165 		struct ppt_cfg_io cio;
166 		ddi_acc_handle_t cfg = ppt->pptd_cfg;
167 
168 		if (ddi_copyin(data, &cio, sizeof (cio), md) != 0) {
169 			return (EFAULT);
170 		}
171 		switch (cio.pci_width) {
172 		case 4:
173 			cio.pci_data = pci_config_get32(cfg, cio.pci_off);
174 			break;
175 		case 2:
176 			cio.pci_data = pci_config_get16(cfg, cio.pci_off);
177 			break;
178 		case 1:
179 			cio.pci_data = pci_config_get8(cfg, cio.pci_off);
180 			break;
181 		default:
182 			return (EINVAL);
183 		}
184 
185 		if (ddi_copyout(&cio, data, sizeof (cio), md) != 0) {
186 			return (EFAULT);
187 		}
188 		return (0);
189 	}
190 	case PPT_CFG_WRITE: {
191 		struct ppt_cfg_io cio;
192 		ddi_acc_handle_t cfg = ppt->pptd_cfg;
193 
194 		if (ddi_copyin(data, &cio, sizeof (cio), md) != 0) {
195 			return (EFAULT);
196 		}
197 		switch (cio.pci_width) {
198 		case 4:
199 			pci_config_put32(cfg, cio.pci_off, cio.pci_data);
200 			break;
201 		case 2:
202 			pci_config_put16(cfg, cio.pci_off, cio.pci_data);
203 			break;
204 		case 1:
205 			pci_config_put8(cfg, cio.pci_off, cio.pci_data);
206 			break;
207 		default:
208 			return (EINVAL);
209 		}
210 
211 		return (0);
212 	}
213 	case PPT_BAR_QUERY: {
214 		struct ppt_bar_query barg;
215 		struct pptbar *pbar;
216 
217 		if (ddi_copyin(data, &barg, sizeof (barg), md) != 0) {
218 			return (EFAULT);
219 		}
220 		if (barg.pbq_baridx >= PCI_BASE_NUM) {
221 			return (EINVAL);
222 		}
223 		pbar = &ppt->pptd_bars[barg.pbq_baridx];
224 
225 		if (pbar->base == 0 || pbar->size == 0) {
226 			return (ENOENT);
227 		}
228 		barg.pbq_type = pbar->type;
229 		barg.pbq_base = pbar->base;
230 		barg.pbq_size = pbar->size;
231 
232 		if (ddi_copyout(&barg, data, sizeof (barg), md) != 0) {
233 			return (EFAULT);
234 		}
235 		return (0);
236 	}
237 	case PPT_BAR_READ: {
238 		struct ppt_bar_io bio;
239 		struct pptbar *pbar;
240 		void *addr;
241 		uint_t rnum;
242 		ddi_acc_handle_t cfg;
243 
244 		if (ddi_copyin(data, &bio, sizeof (bio), md) != 0) {
245 			return (EFAULT);
246 		}
247 		rnum = bio.pbi_bar;
248 		if (rnum >= PCI_BASE_NUM) {
249 			return (EINVAL);
250 		}
251 		pbar = &ppt->pptd_bars[rnum];
252 		if (pbar->type != PCI_ADDR_IO || pbar->io_handle == NULL) {
253 			return (EINVAL);
254 		}
255 		addr = pbar->io_ptr + bio.pbi_off;
256 
257 		switch (bio.pbi_width) {
258 		case 4:
259 			bio.pbi_data = ddi_get32(pbar->io_handle, addr);
260 			break;
261 		case 2:
262 			bio.pbi_data = ddi_get16(pbar->io_handle, addr);
263 			break;
264 		case 1:
265 			bio.pbi_data = ddi_get8(pbar->io_handle, addr);
266 			break;
267 		default:
268 			return (EINVAL);
269 		}
270 
271 		if (ddi_copyout(&bio, data, sizeof (bio), md) != 0) {
272 			return (EFAULT);
273 		}
274 		return (0);
275 	}
276 	case PPT_BAR_WRITE: {
277 		struct ppt_bar_io bio;
278 		struct pptbar *pbar;
279 		void *addr;
280 		uint_t rnum;
281 		ddi_acc_handle_t cfg;
282 
283 		if (ddi_copyin(data, &bio, sizeof (bio), md) != 0) {
284 			return (EFAULT);
285 		}
286 		rnum = bio.pbi_bar;
287 		if (rnum >= PCI_BASE_NUM) {
288 			return (EINVAL);
289 		}
290 		pbar = &ppt->pptd_bars[rnum];
291 		if (pbar->type != PCI_ADDR_IO || pbar->io_handle == NULL) {
292 			return (EINVAL);
293 		}
294 		addr = pbar->io_ptr + bio.pbi_off;
295 
296 		switch (bio.pbi_width) {
297 		case 4:
298 			ddi_put32(pbar->io_handle, addr, bio.pbi_data);
299 			break;
300 		case 2:
301 			ddi_put16(pbar->io_handle, addr, bio.pbi_data);
302 			break;
303 		case 1:
304 			ddi_put8(pbar->io_handle, addr, bio.pbi_data);
305 			break;
306 		default:
307 			return (EINVAL);
308 		}
309 
310 		return (0);
311 	}
312 
313 	default:
314 		return (ENOTTY);
315 	}
316 
317 	return (0);
318 }
319 
320 static int
ppt_find_msix_table_bar(struct pptdev * ppt)321 ppt_find_msix_table_bar(struct pptdev *ppt)
322 {
323 	uint16_t base;
324 	uint32_t off;
325 
326 	if (PCI_CAP_LOCATE(ppt->pptd_cfg, PCI_CAP_ID_MSI_X, &base) !=
327 	    DDI_SUCCESS)
328 		return (-1);
329 
330 	off = pci_config_get32(ppt->pptd_cfg, base + PCI_MSIX_TBL_OFFSET);
331 
332 	if (off == PCI_EINVAL32)
333 		return (-1);
334 
335 	return (off & PCI_MSIX_TBL_BIR_MASK);
336 }
337 
338 static int
ppt_devmap(dev_t dev,devmap_cookie_t dhp,offset_t off,size_t len,size_t * maplen,uint_t model)339 ppt_devmap(dev_t dev, devmap_cookie_t dhp, offset_t off, size_t len,
340     size_t *maplen, uint_t model)
341 {
342 	minor_t minor;
343 	struct pptdev *ppt;
344 	int err, bar;
345 	uint_t ddireg;
346 
347 	minor = getminor(dev);
348 
349 	if ((ppt = ddi_get_soft_state(ppt_state, minor)) == NULL)
350 		return (ENXIO);
351 
352 #ifdef _MULTI_DATAMODEL
353 	if (ddi_model_convert_from(model) != DDI_MODEL_NONE)
354 		return (ENXIO);
355 #endif
356 
357 	if (off < 0 || off != P2ALIGN(off, PAGESIZE))
358 		return (EINVAL);
359 
360 	if ((bar = ppt_find_msix_table_bar(ppt)) == -1)
361 		return (EINVAL);
362 
363 	ddireg = ppt->pptd_bars[bar].ddireg;
364 
365 	if (ddireg == 0)
366 		return (EINVAL);
367 
368 	err = devmap_devmem_setup(dhp, ppt->pptd_dip, NULL, ddireg, off, len,
369 	    PROT_USER | PROT_READ | PROT_WRITE, IOMEM_DATA_CACHED, &ppt_attr);
370 
371 	if (err == DDI_SUCCESS)
372 		*maplen = len;
373 
374 	return (err);
375 }
376 
377 static void
ppt_bar_wipe(struct pptdev * ppt)378 ppt_bar_wipe(struct pptdev *ppt)
379 {
380 	uint_t i;
381 
382 	for (i = 0; i < PCI_BASE_NUM; i++) {
383 		struct pptbar *pbar = &ppt->pptd_bars[i];
384 		if (pbar->type == PCI_ADDR_IO && pbar->io_handle != NULL) {
385 			ddi_regs_map_free(&pbar->io_handle);
386 		}
387 	}
388 	bzero(&ppt->pptd_bars, sizeof (ppt->pptd_bars));
389 }
390 
391 static int
ppt_bar_crawl(struct pptdev * ppt)392 ppt_bar_crawl(struct pptdev *ppt)
393 {
394 	pci_regspec_t *regs;
395 	uint_t rcount, i;
396 	int err = 0, rlen;
397 
398 	if (ddi_getlongprop(DDI_DEV_T_ANY, ppt->pptd_dip, DDI_PROP_DONTPASS,
399 	    "assigned-addresses", (caddr_t)&regs, &rlen) != DDI_PROP_SUCCESS) {
400 		return (EIO);
401 	}
402 
403 	VERIFY3S(rlen, >, 0);
404 	rcount = rlen / sizeof (pci_regspec_t);
405 	for (i = 0; i < rcount; i++) {
406 		pci_regspec_t *reg = &regs[i];
407 		struct pptbar *pbar;
408 		uint_t bar, rnum;
409 
410 		DTRACE_PROBE1(ppt__crawl__reg, pci_regspec_t *, reg);
411 		bar = PCI_REG_REG_G(reg->pci_phys_hi);
412 		if (!BAR_VALID(bar)) {
413 			continue;
414 		}
415 
416 		rnum = BAR_TO_IDX(bar);
417 		pbar = &ppt->pptd_bars[rnum];
418 		/* is this somehow already populated? */
419 		if (pbar->base != 0 || pbar->size != 0) {
420 			err = EEXIST;
421 			break;
422 		}
423 
424 		/*
425 		 * Register 0 corresponds to the PCI config space.
426 		 * The registers which match the assigned-addresses list are
427 		 * offset by 1.
428 		 */
429 		pbar->ddireg = i + 1;
430 
431 		pbar->type = reg->pci_phys_hi & PCI_ADDR_MASK;
432 		pbar->base = ((uint64_t)reg->pci_phys_mid << 32) |
433 		    (uint64_t)reg->pci_phys_low;
434 		pbar->size = ((uint64_t)reg->pci_size_hi << 32) |
435 		    (uint64_t)reg->pci_size_low;
436 		if (pbar->type == PCI_ADDR_IO) {
437 			err = ddi_regs_map_setup(ppt->pptd_dip, rnum,
438 			    &pbar->io_ptr, 0, 0, &ppt_attr, &pbar->io_handle);
439 			if (err != 0) {
440 				break;
441 			}
442 		}
443 	}
444 	kmem_free(regs, rlen);
445 
446 	if (err != 0) {
447 		ppt_bar_wipe(ppt);
448 	}
449 	return (err);
450 }
451 
452 static boolean_t
ppt_bar_verify_mmio(struct pptdev * ppt,uint64_t base,uint64_t size)453 ppt_bar_verify_mmio(struct pptdev *ppt, uint64_t base, uint64_t size)
454 {
455 	const uint64_t map_end = base + size;
456 
457 	/* Zero-length or overflow mappings are not valid */
458 	if (map_end <= base) {
459 		return (B_FALSE);
460 	}
461 	/* MMIO bounds should be page-aligned */
462 	if ((base & PAGEOFFSET) != 0 || (size & PAGEOFFSET) != 0) {
463 		return (B_FALSE);
464 	}
465 
466 	for (uint_t i = 0; i < PCI_BASE_NUM; i++) {
467 		const struct pptbar *bar = &ppt->pptd_bars[i];
468 		const uint64_t bar_end = bar->base + bar->size;
469 
470 		/* Only memory BARs can be mapped */
471 		if (bar->type != PCI_ADDR_MEM32 &&
472 		    bar->type != PCI_ADDR_MEM64) {
473 			continue;
474 		}
475 
476 		/* Does the mapping fit within this BAR? */
477 		if (base < bar->base || base >= bar_end ||
478 		    map_end < bar->base || map_end > bar_end) {
479 			continue;
480 		}
481 
482 		/* This BAR satisfies the provided map */
483 		return (B_TRUE);
484 	}
485 	return (B_FALSE);
486 }
487 
488 static int
ppt_ddi_attach(dev_info_t * dip,ddi_attach_cmd_t cmd)489 ppt_ddi_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
490 {
491 	struct pptdev *ppt = NULL;
492 	char name[PPT_MAXNAMELEN];
493 	int inst;
494 
495 	if (cmd != DDI_ATTACH)
496 		return (DDI_FAILURE);
497 
498 	inst = ddi_get_instance(dip);
499 
500 	if (ddi_soft_state_zalloc(ppt_state, inst) != DDI_SUCCESS) {
501 		goto fail;
502 	}
503 	VERIFY(ppt = ddi_get_soft_state(ppt_state, inst));
504 	ppt->pptd_dip = dip;
505 	ddi_set_driver_private(dip, ppt);
506 
507 	if (pci_config_setup(dip, &ppt->pptd_cfg) != DDI_SUCCESS) {
508 		goto fail;
509 	}
510 	if (ppt_bar_crawl(ppt) != 0) {
511 		goto fail;
512 	}
513 	if (ddi_create_minor_node(dip, PPT_MINOR_NAME, S_IFCHR, inst,
514 	    DDI_PSEUDO, 0) != DDI_SUCCESS) {
515 		goto fail;
516 	}
517 
518 	mutex_enter(&pptdev_mtx);
519 	list_insert_tail(&pptdev_list, ppt);
520 	mutex_exit(&pptdev_mtx);
521 
522 	return (DDI_SUCCESS);
523 
524 fail:
525 	if (ppt != NULL) {
526 		ddi_remove_minor_node(dip, NULL);
527 		if (ppt->pptd_cfg != NULL) {
528 			pci_config_teardown(&ppt->pptd_cfg);
529 		}
530 		ppt_bar_wipe(ppt);
531 		ddi_soft_state_free(ppt_state, inst);
532 	}
533 	return (DDI_FAILURE);
534 }
535 
536 static int
ppt_ddi_detach(dev_info_t * dip,ddi_detach_cmd_t cmd)537 ppt_ddi_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
538 {
539 	struct pptdev *ppt;
540 	int inst;
541 
542 	if (cmd != DDI_DETACH)
543 		return (DDI_FAILURE);
544 
545 	ppt = ddi_get_driver_private(dip);
546 	inst = ddi_get_instance(dip);
547 
548 	ASSERT3P(ddi_get_soft_state(ppt_state, inst), ==, ppt);
549 
550 	mutex_enter(&pptdev_mtx);
551 	if (ppt->vm != NULL) {
552 		mutex_exit(&pptdev_mtx);
553 		return (DDI_FAILURE);
554 	}
555 	list_remove(&pptdev_list, ppt);
556 	mutex_exit(&pptdev_mtx);
557 
558 	ddi_remove_minor_node(dip, PPT_MINOR_NAME);
559 	ppt_bar_wipe(ppt);
560 	pci_config_teardown(&ppt->pptd_cfg);
561 	ddi_set_driver_private(dip, NULL);
562 	ddi_soft_state_free(ppt_state, inst);
563 
564 	return (DDI_SUCCESS);
565 }
566 
567 static int
ppt_ddi_info(dev_info_t * dip,ddi_info_cmd_t cmd,void * arg,void ** result)568 ppt_ddi_info(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **result)
569 {
570 	int error = DDI_FAILURE;
571 	int inst = getminor((dev_t)arg);
572 
573 	switch (cmd) {
574 	case DDI_INFO_DEVT2DEVINFO: {
575 		struct pptdev *ppt = ddi_get_soft_state(ppt_state, inst);
576 
577 		if (ppt != NULL) {
578 			*result = (void *)ppt->pptd_dip;
579 			error = DDI_SUCCESS;
580 		}
581 		break;
582 	}
583 	case DDI_INFO_DEVT2INSTANCE: {
584 		*result = (void *)(uintptr_t)inst;
585 		error = DDI_SUCCESS;
586 		break;
587 	}
588 	default:
589 		break;
590 	}
591 	return (error);
592 }
593 
594 static struct cb_ops ppt_cb_ops = {
595 	ppt_open,
596 	nulldev,	/* close */
597 	nodev,		/* strategy */
598 	nodev,		/* print */
599 	nodev,		/* dump */
600 	nodev,		/* read */
601 	nodev,		/* write */
602 	ppt_ioctl,
603 	ppt_devmap,	/* devmap */
604 	NULL,		/* mmap */
605 	NULL,		/* segmap */
606 	nochpoll,	/* poll */
607 	ddi_prop_op,
608 	NULL,
609 	D_NEW | D_MP | D_64BIT | D_DEVMAP,
610 	CB_REV
611 };
612 
613 static struct dev_ops ppt_ops = {
614 	DEVO_REV,
615 	0,
616 	ppt_ddi_info,
617 	nulldev,	/* identify */
618 	nulldev,	/* probe */
619 	ppt_ddi_attach,
620 	ppt_ddi_detach,
621 	nodev,		/* reset */
622 	&ppt_cb_ops,
623 	(struct bus_ops *)NULL
624 };
625 
626 static struct modldrv modldrv = {
627 	&mod_driverops,
628 	"bhyve pci pass-thru",
629 	&ppt_ops
630 };
631 
632 static struct modlinkage modlinkage = {
633 	MODREV_1,
634 	&modldrv,
635 	NULL
636 };
637 
638 int
_init(void)639 _init(void)
640 {
641 	int error;
642 
643 	mutex_init(&pptdev_mtx, NULL, MUTEX_DRIVER, NULL);
644 	list_create(&pptdev_list, sizeof (struct pptdev),
645 	    offsetof(struct pptdev, pptd_node));
646 
647 	error = ddi_soft_state_init(&ppt_state, sizeof (struct pptdev), 0);
648 	if (error) {
649 		goto fail;
650 	}
651 
652 	error = mod_install(&modlinkage);
653 
654 	ppt_major = ddi_name_to_major("ppt");
655 fail:
656 	if (error) {
657 		ddi_soft_state_fini(&ppt_state);
658 	}
659 	return (error);
660 }
661 
662 int
_fini(void)663 _fini(void)
664 {
665 	int error;
666 
667 	error = mod_remove(&modlinkage);
668 	if (error)
669 		return (error);
670 	ddi_soft_state_fini(&ppt_state);
671 
672 	return (0);
673 }
674 
675 int
_info(struct modinfo * modinfop)676 _info(struct modinfo *modinfop)
677 {
678 	return (mod_info(&modlinkage, modinfop));
679 }
680 
681 static boolean_t
ppt_wait_for_pending_txn(dev_info_t * dip,uint_t max_delay_us)682 ppt_wait_for_pending_txn(dev_info_t *dip, uint_t max_delay_us)
683 {
684 	uint16_t cap_ptr, devsts;
685 	ddi_acc_handle_t hdl;
686 
687 	if (pci_config_setup(dip, &hdl) != DDI_SUCCESS)
688 		return (B_FALSE);
689 
690 	if (PCI_CAP_LOCATE(hdl, PCI_CAP_ID_PCI_E, &cap_ptr) != DDI_SUCCESS) {
691 		pci_config_teardown(&hdl);
692 		return (B_FALSE);
693 	}
694 
695 	devsts = PCI_CAP_GET16(hdl, 0, cap_ptr, PCIE_DEVSTS);
696 	while ((devsts & PCIE_DEVSTS_TRANS_PENDING) != 0) {
697 		if (max_delay_us == 0) {
698 			pci_config_teardown(&hdl);
699 			return (B_FALSE);
700 		}
701 
702 		/* Poll once every 100 milliseconds up to the timeout. */
703 		if (max_delay_us > 100000) {
704 			delay(drv_usectohz(100000));
705 			max_delay_us -= 100000;
706 		} else {
707 			delay(drv_usectohz(max_delay_us));
708 			max_delay_us = 0;
709 		}
710 		devsts = PCI_CAP_GET16(hdl, 0, cap_ptr, PCIE_DEVSTS);
711 	}
712 
713 	pci_config_teardown(&hdl);
714 	return (B_TRUE);
715 }
716 
717 static uint_t
ppt_max_completion_tmo_us(dev_info_t * dip)718 ppt_max_completion_tmo_us(dev_info_t *dip)
719 {
720 	uint_t timo = 0;
721 	uint16_t cap_ptr;
722 	ddi_acc_handle_t hdl;
723 	uint_t timo_ranges[] = {	/* timeout ranges */
724 		50000,		/* 50ms */
725 		100,		/* 100us */
726 		10000,		/* 10ms */
727 		0,
728 		0,
729 		55000,		/* 55ms */
730 		210000,		/* 210ms */
731 		0,
732 		0,
733 		900000,		/* 900ms */
734 		3500000,	/* 3.5s */
735 		0,
736 		0,
737 		13000000,	/* 13s */
738 		64000000,	/* 64s */
739 		0
740 	};
741 
742 	if (pci_config_setup(dip, &hdl) != DDI_SUCCESS)
743 		return (50000); /* default 50ms */
744 
745 	if (PCI_CAP_LOCATE(hdl, PCI_CAP_ID_PCI_E, &cap_ptr) != DDI_SUCCESS)
746 		goto out;
747 
748 	if ((PCI_CAP_GET16(hdl, 0, cap_ptr, PCIE_PCIECAP) &
749 	    PCIE_PCIECAP_VER_MASK) < PCIE_PCIECAP_VER_2_0)
750 		goto out;
751 
752 	if ((PCI_CAP_GET32(hdl, 0, cap_ptr, PCIE_DEVCAP2) &
753 	    PCIE_DEVCTL2_COM_TO_RANGE_MASK) == 0)
754 		goto out;
755 
756 	timo = timo_ranges[PCI_CAP_GET16(hdl, 0, cap_ptr, PCIE_DEVCTL2) &
757 	    PCIE_DEVCAP2_COM_TO_RANGE_MASK];
758 
759 out:
760 	if (timo == 0)
761 		timo = 50000; /* default 50ms */
762 
763 	pci_config_teardown(&hdl);
764 	return (timo);
765 }
766 
767 static boolean_t
ppt_flr(dev_info_t * dip,boolean_t force)768 ppt_flr(dev_info_t *dip, boolean_t force)
769 {
770 	uint16_t cap_ptr, ctl, cmd;
771 	ddi_acc_handle_t hdl;
772 	uint_t compl_delay = 0, max_delay_us;
773 
774 	if (pci_config_setup(dip, &hdl) != DDI_SUCCESS)
775 		return (B_FALSE);
776 
777 	if (PCI_CAP_LOCATE(hdl, PCI_CAP_ID_PCI_E, &cap_ptr) != DDI_SUCCESS)
778 		goto fail;
779 
780 	if ((PCI_CAP_GET32(hdl, 0, cap_ptr, PCIE_DEVCAP) & PCIE_DEVCAP_FLR)
781 	    == 0)
782 		goto fail;
783 
784 	max_delay_us = MAX(ppt_max_completion_tmo_us(dip), 10000);
785 
786 	/*
787 	 * Disable busmastering to prevent generation of new transactions while
788 	 * waiting for the device to go idle.  If the idle timeout fails, the
789 	 * command register is restored which will re-enable busmastering.
790 	 */
791 	cmd = pci_config_get16(hdl, PCI_CONF_COMM);
792 	pci_config_put16(hdl, PCI_CONF_COMM, cmd & ~PCI_COMM_ME);
793 	if (!ppt_wait_for_pending_txn(dip, max_delay_us)) {
794 		if (!force) {
795 			pci_config_put16(hdl, PCI_CONF_COMM, cmd);
796 			goto fail;
797 		}
798 		dev_err(dip, CE_WARN,
799 		    "?Resetting with transactions pending after %u us\n",
800 		    max_delay_us);
801 
802 		/*
803 		 * Extend the post-FLR delay to cover the maximum Completion
804 		 * Timeout delay of anything in flight during the FLR delay.
805 		 * Enforce a minimum delay of at least 10ms.
806 		 */
807 		compl_delay = MAX(10, (ppt_max_completion_tmo_us(dip) / 1000));
808 	}
809 
810 	/* Initiate the reset. */
811 	ctl = PCI_CAP_GET16(hdl, 0, cap_ptr, PCIE_DEVCTL);
812 	(void) PCI_CAP_PUT16(hdl, 0, cap_ptr, PCIE_DEVCTL,
813 	    ctl | PCIE_DEVCTL_INITIATE_FLR);
814 
815 	/* Wait for at least 100ms */
816 	delay(drv_usectohz((100 + compl_delay) * 1000));
817 
818 	pci_config_teardown(&hdl);
819 	return (B_TRUE);
820 
821 fail:
822 	/*
823 	 * TODO: If the FLR fails for some reason, we should attempt a reset
824 	 * using the PCI power management facilities (if possible).
825 	 */
826 	pci_config_teardown(&hdl);
827 	return (B_FALSE);
828 }
829 
830 static int
ppt_findf(struct vm * vm,int fd,struct pptdev ** pptp)831 ppt_findf(struct vm *vm, int fd, struct pptdev **pptp)
832 {
833 	struct pptdev *ppt = NULL;
834 	file_t *fp;
835 	vattr_t va;
836 	int err = 0;
837 
838 	ASSERT(MUTEX_HELD(&pptdev_mtx));
839 
840 	if ((fp = getf(fd)) == NULL)
841 		return (EBADF);
842 
843 	va.va_mask = AT_RDEV;
844 	if (VOP_GETATTR(fp->f_vnode, &va, NO_FOLLOW, fp->f_cred, NULL) != 0 ||
845 	    getmajor(va.va_rdev) != ppt_major) {
846 		err = EBADF;
847 		goto fail;
848 	}
849 
850 	ppt = ddi_get_soft_state(ppt_state, getminor(va.va_rdev));
851 
852 	if (ppt == NULL) {
853 		err = EBADF;
854 		goto fail;
855 	}
856 
857 	if (ppt->vm != vm) {
858 		err = EBUSY;
859 		goto fail;
860 	}
861 
862 	*pptp = ppt;
863 	return (0);
864 
865 fail:
866 	releasef(fd);
867 	return (err);
868 }
869 
870 static void
ppt_unmap_all_mmio(struct vm * vm,struct pptdev * ppt)871 ppt_unmap_all_mmio(struct vm *vm, struct pptdev *ppt)
872 {
873 	int i;
874 	struct pptseg *seg;
875 
876 	for (i = 0; i < MAX_MMIOSEGS; i++) {
877 		seg = &ppt->mmio[i];
878 		if (seg->len == 0)
879 			continue;
880 		(void) vm_unmap_mmio(vm, seg->gpa, seg->len);
881 		bzero(seg, sizeof (struct pptseg));
882 	}
883 }
884 
885 static void
ppt_teardown_msi(struct pptdev * ppt)886 ppt_teardown_msi(struct pptdev *ppt)
887 {
888 	int i;
889 
890 	if (ppt->msi.num_msgs == 0)
891 		return;
892 
893 	for (i = 0; i < ppt->msi.num_msgs; i++) {
894 		int intr_cap;
895 
896 		(void) ddi_intr_get_cap(ppt->msi.inth[i], &intr_cap);
897 		if (intr_cap & DDI_INTR_FLAG_BLOCK)
898 			ddi_intr_block_disable(&ppt->msi.inth[i], 1);
899 		else
900 			ddi_intr_disable(ppt->msi.inth[i]);
901 
902 		ddi_intr_remove_handler(ppt->msi.inth[i]);
903 		ddi_intr_free(ppt->msi.inth[i]);
904 
905 		ppt->msi.inth[i] = NULL;
906 	}
907 
908 	kmem_free(ppt->msi.inth, ppt->msi.inth_sz);
909 	ppt->msi.inth = NULL;
910 	ppt->msi.inth_sz = 0;
911 	ppt->msi.is_fixed = B_FALSE;
912 
913 	ppt->msi.num_msgs = 0;
914 }
915 
916 static void
ppt_teardown_msix_intr(struct pptdev * ppt,int idx)917 ppt_teardown_msix_intr(struct pptdev *ppt, int idx)
918 {
919 	if (ppt->msix.inth != NULL && ppt->msix.inth[idx] != NULL) {
920 		int intr_cap;
921 
922 		(void) ddi_intr_get_cap(ppt->msix.inth[idx], &intr_cap);
923 		if (intr_cap & DDI_INTR_FLAG_BLOCK)
924 			ddi_intr_block_disable(&ppt->msix.inth[idx], 1);
925 		else
926 			ddi_intr_disable(ppt->msix.inth[idx]);
927 
928 		ddi_intr_remove_handler(ppt->msix.inth[idx]);
929 	}
930 }
931 
932 static void
ppt_teardown_msix(struct pptdev * ppt)933 ppt_teardown_msix(struct pptdev *ppt)
934 {
935 	uint_t i;
936 
937 	if (ppt->msix.num_msgs == 0)
938 		return;
939 
940 	for (i = 0; i < ppt->msix.num_msgs; i++)
941 		ppt_teardown_msix_intr(ppt, i);
942 
943 	if (ppt->msix.inth) {
944 		for (i = 0; i < ppt->msix.num_msgs; i++)
945 			ddi_intr_free(ppt->msix.inth[i]);
946 		kmem_free(ppt->msix.inth, ppt->msix.inth_sz);
947 		ppt->msix.inth = NULL;
948 		ppt->msix.inth_sz = 0;
949 		kmem_free(ppt->msix.arg, ppt->msix.arg_sz);
950 		ppt->msix.arg = NULL;
951 		ppt->msix.arg_sz = 0;
952 	}
953 
954 	ppt->msix.num_msgs = 0;
955 }
956 
957 int
ppt_assigned_devices(struct vm * vm)958 ppt_assigned_devices(struct vm *vm)
959 {
960 	struct pptdev *ppt;
961 	uint_t num = 0;
962 
963 	mutex_enter(&pptdev_mtx);
964 	for (ppt = list_head(&pptdev_list); ppt != NULL;
965 	    ppt = list_next(&pptdev_list, ppt)) {
966 		if (ppt->vm == vm) {
967 			num++;
968 		}
969 	}
970 	mutex_exit(&pptdev_mtx);
971 	return (num);
972 }
973 
974 boolean_t
ppt_is_mmio(struct vm * vm,vm_paddr_t gpa)975 ppt_is_mmio(struct vm *vm, vm_paddr_t gpa)
976 {
977 	struct pptdev *ppt = list_head(&pptdev_list);
978 
979 	/* XXX: this should probably be restructured to avoid the lock */
980 	mutex_enter(&pptdev_mtx);
981 	for (ppt = list_head(&pptdev_list); ppt != NULL;
982 	    ppt = list_next(&pptdev_list, ppt)) {
983 		if (ppt->vm != vm) {
984 			continue;
985 		}
986 
987 		for (uint_t i = 0; i < MAX_MMIOSEGS; i++) {
988 			struct pptseg *seg = &ppt->mmio[i];
989 
990 			if (seg->len == 0)
991 				continue;
992 			if (gpa >= seg->gpa && gpa < seg->gpa + seg->len) {
993 				mutex_exit(&pptdev_mtx);
994 				return (B_TRUE);
995 			}
996 		}
997 	}
998 
999 	mutex_exit(&pptdev_mtx);
1000 	return (B_FALSE);
1001 }
1002 
1003 int
ppt_assign_device(struct vm * vm,int pptfd)1004 ppt_assign_device(struct vm *vm, int pptfd)
1005 {
1006 	struct pptdev *ppt;
1007 	int err = 0;
1008 
1009 	mutex_enter(&pptdev_mtx);
1010 	/* Passing NULL requires the device to be unowned. */
1011 	err = ppt_findf(NULL, pptfd, &ppt);
1012 	if (err != 0) {
1013 		mutex_exit(&pptdev_mtx);
1014 		return (err);
1015 	}
1016 
1017 	if (pci_save_config_regs(ppt->pptd_dip) != DDI_SUCCESS) {
1018 		err = EIO;
1019 		goto done;
1020 	}
1021 	ppt_flr(ppt->pptd_dip, B_TRUE);
1022 
1023 	/*
1024 	 * Restore the device state after reset and then perform another save
1025 	 * so the "pristine" state can be restored when the device is removed
1026 	 * from the guest.
1027 	 */
1028 	if (pci_restore_config_regs(ppt->pptd_dip) != DDI_SUCCESS ||
1029 	    pci_save_config_regs(ppt->pptd_dip) != DDI_SUCCESS) {
1030 		err = EIO;
1031 		goto done;
1032 	}
1033 
1034 	ppt->vm = vm;
1035 	iommu_remove_device(iommu_host_domain(), pci_get_bdf(ppt->pptd_dip));
1036 	iommu_add_device(vm_iommu_domain(vm), pci_get_bdf(ppt->pptd_dip));
1037 	pf_set_passthru(ppt->pptd_dip, B_TRUE);
1038 
1039 done:
1040 	releasef(pptfd);
1041 	mutex_exit(&pptdev_mtx);
1042 	return (err);
1043 }
1044 
1045 static void
ppt_reset_pci_power_state(dev_info_t * dip)1046 ppt_reset_pci_power_state(dev_info_t *dip)
1047 {
1048 	ddi_acc_handle_t cfg;
1049 	uint16_t cap_ptr;
1050 
1051 	if (pci_config_setup(dip, &cfg) != DDI_SUCCESS)
1052 		return;
1053 
1054 	if (PCI_CAP_LOCATE(cfg, PCI_CAP_ID_PM, &cap_ptr) == DDI_SUCCESS) {
1055 		uint16_t val;
1056 
1057 		val = PCI_CAP_GET16(cfg, 0, cap_ptr, PCI_PMCSR);
1058 		if ((val & PCI_PMCSR_STATE_MASK) != PCI_PMCSR_D0) {
1059 			val = (val & ~PCI_PMCSR_STATE_MASK) | PCI_PMCSR_D0;
1060 			(void) PCI_CAP_PUT16(cfg, 0, cap_ptr, PCI_PMCSR,
1061 			    val);
1062 		}
1063 	}
1064 
1065 	pci_config_teardown(&cfg);
1066 }
1067 
1068 static void
ppt_do_unassign(struct pptdev * ppt)1069 ppt_do_unassign(struct pptdev *ppt)
1070 {
1071 	struct vm *vm = ppt->vm;
1072 
1073 	ASSERT3P(vm, !=, NULL);
1074 	ASSERT(MUTEX_HELD(&pptdev_mtx));
1075 
1076 
1077 	ppt_flr(ppt->pptd_dip, B_TRUE);
1078 
1079 	/*
1080 	 * Restore from the state saved during device assignment.
1081 	 * If the device power state has been altered, that must be remedied
1082 	 * first, as it will reset register state during the transition.
1083 	 */
1084 	ppt_reset_pci_power_state(ppt->pptd_dip);
1085 	(void) pci_restore_config_regs(ppt->pptd_dip);
1086 
1087 	pf_set_passthru(ppt->pptd_dip, B_FALSE);
1088 
1089 	ppt_unmap_all_mmio(vm, ppt);
1090 	ppt_teardown_msi(ppt);
1091 	ppt_teardown_msix(ppt);
1092 	iommu_remove_device(vm_iommu_domain(vm), pci_get_bdf(ppt->pptd_dip));
1093 	iommu_add_device(iommu_host_domain(), pci_get_bdf(ppt->pptd_dip));
1094 	ppt->vm = NULL;
1095 }
1096 
1097 int
ppt_unassign_device(struct vm * vm,int pptfd)1098 ppt_unassign_device(struct vm *vm, int pptfd)
1099 {
1100 	struct pptdev *ppt;
1101 	int err = 0;
1102 
1103 	mutex_enter(&pptdev_mtx);
1104 	err = ppt_findf(vm, pptfd, &ppt);
1105 	if (err != 0) {
1106 		mutex_exit(&pptdev_mtx);
1107 		return (err);
1108 	}
1109 
1110 	ppt_do_unassign(ppt);
1111 
1112 	releasef(pptfd);
1113 	mutex_exit(&pptdev_mtx);
1114 	return (err);
1115 }
1116 
1117 void
ppt_unassign_all(struct vm * vm)1118 ppt_unassign_all(struct vm *vm)
1119 {
1120 	struct pptdev *ppt;
1121 
1122 	mutex_enter(&pptdev_mtx);
1123 	for (ppt = list_head(&pptdev_list); ppt != NULL;
1124 	    ppt = list_next(&pptdev_list, ppt)) {
1125 		if (ppt->vm == vm) {
1126 			ppt_do_unassign(ppt);
1127 		}
1128 	}
1129 	mutex_exit(&pptdev_mtx);
1130 }
1131 
1132 int
ppt_map_mmio(struct vm * vm,int pptfd,vm_paddr_t gpa,size_t len,vm_paddr_t hpa)1133 ppt_map_mmio(struct vm *vm, int pptfd, vm_paddr_t gpa, size_t len,
1134     vm_paddr_t hpa)
1135 {
1136 	struct pptdev *ppt;
1137 	int err = 0;
1138 
1139 	if ((len & PAGEOFFSET) != 0 || len == 0 || (gpa & PAGEOFFSET) != 0 ||
1140 	    (hpa & PAGEOFFSET) != 0 || gpa + len < gpa || hpa + len < hpa) {
1141 		return (EINVAL);
1142 	}
1143 
1144 	mutex_enter(&pptdev_mtx);
1145 	err = ppt_findf(vm, pptfd, &ppt);
1146 	if (err != 0) {
1147 		mutex_exit(&pptdev_mtx);
1148 		return (err);
1149 	}
1150 
1151 	/*
1152 	 * Ensure that the host-physical range of the requested mapping fits
1153 	 * within one of the MMIO BARs of the device.
1154 	 */
1155 	if (!ppt_bar_verify_mmio(ppt, hpa, len)) {
1156 		err = EINVAL;
1157 		goto done;
1158 	}
1159 
1160 	for (uint_t i = 0; i < MAX_MMIOSEGS; i++) {
1161 		struct pptseg *seg = &ppt->mmio[i];
1162 
1163 		if (seg->len == 0) {
1164 			err = vm_map_mmio(vm, gpa, len, hpa);
1165 			if (err == 0) {
1166 				seg->gpa = gpa;
1167 				seg->len = len;
1168 			}
1169 			goto done;
1170 		}
1171 	}
1172 	err = ENOSPC;
1173 
1174 done:
1175 	releasef(pptfd);
1176 	mutex_exit(&pptdev_mtx);
1177 	return (err);
1178 }
1179 
1180 int
ppt_unmap_mmio(struct vm * vm,int pptfd,vm_paddr_t gpa,size_t len)1181 ppt_unmap_mmio(struct vm *vm, int pptfd, vm_paddr_t gpa, size_t len)
1182 {
1183 	struct pptdev *ppt;
1184 	int err = 0;
1185 	uint_t i;
1186 
1187 	mutex_enter(&pptdev_mtx);
1188 	err = ppt_findf(vm, pptfd, &ppt);
1189 	if (err != 0) {
1190 		mutex_exit(&pptdev_mtx);
1191 		return (err);
1192 	}
1193 
1194 	for (i = 0; i < MAX_MMIOSEGS; i++) {
1195 		struct pptseg *seg = &ppt->mmio[i];
1196 
1197 		if (seg->gpa == gpa && seg->len == len) {
1198 			err = vm_unmap_mmio(vm, seg->gpa, seg->len);
1199 			if (err == 0) {
1200 				seg->gpa = 0;
1201 				seg->len = 0;
1202 			}
1203 			goto out;
1204 		}
1205 	}
1206 	err = ENOENT;
1207 out:
1208 	releasef(pptfd);
1209 	mutex_exit(&pptdev_mtx);
1210 	return (err);
1211 }
1212 
1213 static uint_t
pptintr(caddr_t arg,caddr_t unused)1214 pptintr(caddr_t arg, caddr_t unused)
1215 {
1216 	struct pptintr_arg *pptarg = (struct pptintr_arg *)arg;
1217 	struct pptdev *ppt = pptarg->pptdev;
1218 
1219 	if (ppt->vm != NULL) {
1220 		lapic_intr_msi(ppt->vm, pptarg->addr, pptarg->msg_data);
1221 	} else {
1222 		/*
1223 		 * XXX
1224 		 * This is not expected to happen - panic?
1225 		 */
1226 	}
1227 
1228 	/*
1229 	 * For legacy interrupts give other filters a chance in case
1230 	 * the interrupt was not generated by the passthrough device.
1231 	 */
1232 	return (ppt->msi.is_fixed ? DDI_INTR_UNCLAIMED : DDI_INTR_CLAIMED);
1233 }
1234 
1235 int
ppt_setup_msi(struct vm * vm,int vcpu,int pptfd,uint64_t addr,uint64_t msg,int numvec)1236 ppt_setup_msi(struct vm *vm, int vcpu, int pptfd, uint64_t addr, uint64_t msg,
1237     int numvec)
1238 {
1239 	int i, msi_count, intr_type;
1240 	struct pptdev *ppt;
1241 	int err = 0;
1242 
1243 	if (numvec < 0 || numvec > MAX_MSIMSGS)
1244 		return (EINVAL);
1245 
1246 	mutex_enter(&pptdev_mtx);
1247 	err = ppt_findf(vm, pptfd, &ppt);
1248 	if (err != 0) {
1249 		mutex_exit(&pptdev_mtx);
1250 		return (err);
1251 	}
1252 
1253 	/* Reject attempts to enable MSI while MSI-X is active. */
1254 	if (ppt->msix.num_msgs != 0 && numvec != 0) {
1255 		err = EBUSY;
1256 		goto done;
1257 	}
1258 
1259 	/* Free any allocated resources */
1260 	ppt_teardown_msi(ppt);
1261 
1262 	if (numvec == 0) {
1263 		/* nothing more to do */
1264 		goto done;
1265 	}
1266 
1267 	if (ddi_intr_get_navail(ppt->pptd_dip, DDI_INTR_TYPE_MSI,
1268 	    &msi_count) != DDI_SUCCESS) {
1269 		if (ddi_intr_get_navail(ppt->pptd_dip, DDI_INTR_TYPE_FIXED,
1270 		    &msi_count) != DDI_SUCCESS) {
1271 			err = EINVAL;
1272 			goto done;
1273 		}
1274 
1275 		intr_type = DDI_INTR_TYPE_FIXED;
1276 		ppt->msi.is_fixed = B_TRUE;
1277 	} else {
1278 		intr_type = DDI_INTR_TYPE_MSI;
1279 	}
1280 
1281 	/*
1282 	 * The device must be capable of supporting the number of vectors
1283 	 * the guest wants to allocate.
1284 	 */
1285 	if (numvec > msi_count) {
1286 		err = EINVAL;
1287 		goto done;
1288 	}
1289 
1290 	ppt->msi.inth_sz = numvec * sizeof (ddi_intr_handle_t);
1291 	ppt->msi.inth = kmem_zalloc(ppt->msi.inth_sz, KM_SLEEP);
1292 	if (ddi_intr_alloc(ppt->pptd_dip, ppt->msi.inth, intr_type, 0,
1293 	    numvec, &msi_count, 0) != DDI_SUCCESS) {
1294 		kmem_free(ppt->msi.inth, ppt->msi.inth_sz);
1295 		err = EINVAL;
1296 		goto done;
1297 	}
1298 
1299 	/* Verify that we got as many vectors as the guest requested */
1300 	if (numvec != msi_count) {
1301 		ppt_teardown_msi(ppt);
1302 		err = EINVAL;
1303 		goto done;
1304 	}
1305 
1306 	/* Set up & enable interrupt handler for each vector. */
1307 	for (i = 0; i < numvec; i++) {
1308 		int res, intr_cap = 0;
1309 
1310 		ppt->msi.num_msgs = i + 1;
1311 		ppt->msi.arg[i].pptdev = ppt;
1312 		ppt->msi.arg[i].addr = addr;
1313 		ppt->msi.arg[i].msg_data = msg + i;
1314 
1315 		if (ddi_intr_add_handler(ppt->msi.inth[i], pptintr,
1316 		    &ppt->msi.arg[i], NULL) != DDI_SUCCESS)
1317 			break;
1318 
1319 		(void) ddi_intr_get_cap(ppt->msi.inth[i], &intr_cap);
1320 		if (intr_cap & DDI_INTR_FLAG_BLOCK)
1321 			res = ddi_intr_block_enable(&ppt->msi.inth[i], 1);
1322 		else
1323 			res = ddi_intr_enable(ppt->msi.inth[i]);
1324 
1325 		if (res != DDI_SUCCESS)
1326 			break;
1327 	}
1328 	if (i < numvec) {
1329 		ppt_teardown_msi(ppt);
1330 		err = ENXIO;
1331 	}
1332 
1333 done:
1334 	releasef(pptfd);
1335 	mutex_exit(&pptdev_mtx);
1336 	return (err);
1337 }
1338 
1339 int
ppt_setup_msix(struct vm * vm,int vcpu,int pptfd,int idx,uint64_t addr,uint64_t msg,uint32_t vector_control)1340 ppt_setup_msix(struct vm *vm, int vcpu, int pptfd, int idx, uint64_t addr,
1341     uint64_t msg, uint32_t vector_control)
1342 {
1343 	struct pptdev *ppt;
1344 	int numvec, alloced;
1345 	int err = 0;
1346 
1347 	mutex_enter(&pptdev_mtx);
1348 	err = ppt_findf(vm, pptfd, &ppt);
1349 	if (err != 0) {
1350 		mutex_exit(&pptdev_mtx);
1351 		return (err);
1352 	}
1353 
1354 	/* Reject attempts to enable MSI-X while MSI is active. */
1355 	if (ppt->msi.num_msgs != 0) {
1356 		err = EBUSY;
1357 		goto done;
1358 	}
1359 
1360 	/*
1361 	 * First-time configuration:
1362 	 *	Allocate the MSI-X table
1363 	 *	Allocate the IRQ resources
1364 	 *	Set up some variables in ppt->msix
1365 	 */
1366 	if (ppt->msix.num_msgs == 0) {
1367 		dev_info_t *dip = ppt->pptd_dip;
1368 
1369 		if (ddi_intr_get_navail(dip, DDI_INTR_TYPE_MSIX,
1370 		    &numvec) != DDI_SUCCESS) {
1371 			err = EINVAL;
1372 			goto done;
1373 		}
1374 
1375 		ppt->msix.num_msgs = numvec;
1376 
1377 		ppt->msix.arg_sz = numvec * sizeof (ppt->msix.arg[0]);
1378 		ppt->msix.arg = kmem_zalloc(ppt->msix.arg_sz, KM_SLEEP);
1379 		ppt->msix.inth_sz = numvec * sizeof (ddi_intr_handle_t);
1380 		ppt->msix.inth = kmem_zalloc(ppt->msix.inth_sz, KM_SLEEP);
1381 
1382 		if (ddi_intr_alloc(dip, ppt->msix.inth, DDI_INTR_TYPE_MSIX, 0,
1383 		    numvec, &alloced, 0) != DDI_SUCCESS) {
1384 			kmem_free(ppt->msix.arg, ppt->msix.arg_sz);
1385 			kmem_free(ppt->msix.inth, ppt->msix.inth_sz);
1386 			ppt->msix.arg = NULL;
1387 			ppt->msix.inth = NULL;
1388 			ppt->msix.arg_sz = ppt->msix.inth_sz = 0;
1389 			err = EINVAL;
1390 			goto done;
1391 		}
1392 
1393 		if (numvec != alloced) {
1394 			ppt_teardown_msix(ppt);
1395 			err = EINVAL;
1396 			goto done;
1397 		}
1398 	}
1399 
1400 	if (idx >= ppt->msix.num_msgs) {
1401 		err = EINVAL;
1402 		goto done;
1403 	}
1404 
1405 	if ((vector_control & PCIM_MSIX_VCTRL_MASK) == 0) {
1406 		int intr_cap, res;
1407 
1408 		/* Tear down the IRQ if it's already set up */
1409 		ppt_teardown_msix_intr(ppt, idx);
1410 
1411 		ppt->msix.arg[idx].pptdev = ppt;
1412 		ppt->msix.arg[idx].addr = addr;
1413 		ppt->msix.arg[idx].msg_data = msg;
1414 
1415 		/* Setup the MSI-X interrupt */
1416 		if (ddi_intr_add_handler(ppt->msix.inth[idx], pptintr,
1417 		    &ppt->msix.arg[idx], NULL) != DDI_SUCCESS) {
1418 			err = ENXIO;
1419 			goto done;
1420 		}
1421 
1422 		(void) ddi_intr_get_cap(ppt->msix.inth[idx], &intr_cap);
1423 		if (intr_cap & DDI_INTR_FLAG_BLOCK)
1424 			res = ddi_intr_block_enable(&ppt->msix.inth[idx], 1);
1425 		else
1426 			res = ddi_intr_enable(ppt->msix.inth[idx]);
1427 
1428 		if (res != DDI_SUCCESS) {
1429 			ddi_intr_remove_handler(ppt->msix.inth[idx]);
1430 			err = ENXIO;
1431 			goto done;
1432 		}
1433 	} else {
1434 		/* Masked, tear it down if it's already been set up */
1435 		ppt_teardown_msix_intr(ppt, idx);
1436 	}
1437 
1438 done:
1439 	releasef(pptfd);
1440 	mutex_exit(&pptdev_mtx);
1441 	return (err);
1442 }
1443 
1444 int
ppt_get_limits(struct vm * vm,int pptfd,int * msilimit,int * msixlimit)1445 ppt_get_limits(struct vm *vm, int pptfd, int *msilimit, int *msixlimit)
1446 {
1447 	struct pptdev *ppt;
1448 	int err = 0;
1449 
1450 	mutex_enter(&pptdev_mtx);
1451 	err = ppt_findf(vm, pptfd, &ppt);
1452 	if (err != 0) {
1453 		mutex_exit(&pptdev_mtx);
1454 		return (err);
1455 	}
1456 
1457 	if (ddi_intr_get_navail(ppt->pptd_dip, DDI_INTR_TYPE_MSI,
1458 	    msilimit) != DDI_SUCCESS) {
1459 		*msilimit = -1;
1460 	}
1461 	if (ddi_intr_get_navail(ppt->pptd_dip, DDI_INTR_TYPE_MSIX,
1462 	    msixlimit) != DDI_SUCCESS) {
1463 		*msixlimit = -1;
1464 	}
1465 
1466 	releasef(pptfd);
1467 	mutex_exit(&pptdev_mtx);
1468 	return (err);
1469 }
1470 
1471 int
ppt_disable_msix(struct vm * vm,int pptfd)1472 ppt_disable_msix(struct vm *vm, int pptfd)
1473 {
1474 	struct pptdev *ppt;
1475 	int err = 0;
1476 
1477 	mutex_enter(&pptdev_mtx);
1478 	err = ppt_findf(vm, pptfd, &ppt);
1479 	if (err != 0) {
1480 		mutex_exit(&pptdev_mtx);
1481 		return (err);
1482 	}
1483 
1484 	ppt_teardown_msix(ppt);
1485 
1486 	releasef(pptfd);
1487 	mutex_exit(&pptdev_mtx);
1488 	return (err);
1489 }
1490