1 /* 2 * This file and its contents are supplied under the terms of the 3 * Common Development and Distribution License ("CDDL"), version 1.0. 4 * You may only use this file in accordance with the terms of version 5 * 1.0 of the CDDL. 6 * 7 * A full copy of the text of the CDDL should have accompanied this 8 * source. A copy of the CDDL is also available via the Internet at 9 * http://www.illumos.org/license/CDDL. 10 */ 11 12 /* 13 * Copyright 2019 Joyent, Inc. 14 */ 15 16 /* 17 * VIRTIO FRAMEWORK 18 * 19 * For design and usage documentation, see the comments in "virtio.h". 20 */ 21 22 #include <sys/conf.h> 23 #include <sys/kmem.h> 24 #include <sys/debug.h> 25 #include <sys/modctl.h> 26 #include <sys/autoconf.h> 27 #include <sys/ddi_impldefs.h> 28 #include <sys/ddi.h> 29 #include <sys/sunddi.h> 30 #include <sys/sunndi.h> 31 #include <sys/avintr.h> 32 #include <sys/spl.h> 33 #include <sys/promif.h> 34 #include <sys/list.h> 35 #include <sys/bootconf.h> 36 #include <sys/bootsvcs.h> 37 #include <sys/sysmacros.h> 38 #include <sys/pci.h> 39 40 #include "virtio.h" 41 #include "virtio_impl.h" 42 43 44 /* 45 * Linkage structures 46 */ 47 static struct modlmisc virtio_modlmisc = { 48 .misc_modops = &mod_miscops, 49 .misc_linkinfo = "VIRTIO common routines", 50 }; 51 52 static struct modlinkage virtio_modlinkage = { 53 .ml_rev = MODREV_1, 54 .ml_linkage = { &virtio_modlmisc, NULL } 55 }; 56 57 int 58 _init(void) 59 { 60 return (mod_install(&virtio_modlinkage)); 61 } 62 63 int 64 _fini(void) 65 { 66 return (mod_remove(&virtio_modlinkage)); 67 } 68 69 int 70 _info(struct modinfo *modinfop) 71 { 72 return (mod_info(&virtio_modlinkage, modinfop)); 73 } 74 75 76 77 static void virtio_set_status(virtio_t *, uint8_t); 78 static int virtio_chain_append_impl(virtio_chain_t *, uint64_t, size_t, 79 uint16_t); 80 static int virtio_interrupts_setup(virtio_t *, int); 81 static void virtio_interrupts_teardown(virtio_t *); 82 static void virtio_interrupts_disable_locked(virtio_t *); 83 static void virtio_queue_free(virtio_queue_t *); 84 static void virtio_device_reset_locked(virtio_t *); 85 86 /* 87 * We use the same device access attributes for BAR mapping and access to the 88 * virtqueue memory. 89 */ 90 ddi_device_acc_attr_t virtio_acc_attr = { 91 .devacc_attr_version = DDI_DEVICE_ATTR_V1, 92 .devacc_attr_endian_flags = DDI_NEVERSWAP_ACC, 93 .devacc_attr_dataorder = DDI_STORECACHING_OK_ACC, 94 .devacc_attr_access = DDI_DEFAULT_ACC 95 }; 96 97 98 /* 99 * DMA attributes for the memory given to the device for queue management. 100 */ 101 ddi_dma_attr_t virtio_dma_attr_queue = { 102 .dma_attr_version = DMA_ATTR_V0, 103 .dma_attr_addr_lo = 0x0000000000000000, 104 /* 105 * Queue memory is aligned on VIRTIO_PAGE_SIZE with the address shifted 106 * down by VIRTIO_PAGE_SHIFT before being passed to the device in a 107 * 32-bit register. 108 */ 109 .dma_attr_addr_hi = 0x00000FFFFFFFF000, 110 .dma_attr_count_max = 0x00000000FFFFFFFF, 111 .dma_attr_align = VIRTIO_PAGE_SIZE, 112 .dma_attr_burstsizes = 1, 113 .dma_attr_minxfer = 1, 114 .dma_attr_maxxfer = 0x00000000FFFFFFFF, 115 .dma_attr_seg = 0x00000000FFFFFFFF, 116 .dma_attr_sgllen = 1, 117 .dma_attr_granular = 1, 118 .dma_attr_flags = 0 119 }; 120 121 /* 122 * DMA attributes for the the allocation of indirect descriptor lists. The 123 * indirect list is referenced by a regular descriptor entry: the physical 124 * address field is 64 bits wide, but the length field is only 32 bits. Each 125 * descriptor is 16 bytes long. 126 */ 127 ddi_dma_attr_t virtio_dma_attr_indirect = { 128 .dma_attr_version = DMA_ATTR_V0, 129 .dma_attr_addr_lo = 0x0000000000000000, 130 .dma_attr_addr_hi = 0xFFFFFFFFFFFFFFFF, 131 .dma_attr_count_max = 0x00000000FFFFFFFF, 132 .dma_attr_align = sizeof (struct virtio_vq_desc), 133 .dma_attr_burstsizes = 1, 134 .dma_attr_minxfer = 1, 135 .dma_attr_maxxfer = 0x00000000FFFFFFFF, 136 .dma_attr_seg = 0x00000000FFFFFFFF, 137 .dma_attr_sgllen = 1, 138 .dma_attr_granular = 1, 139 .dma_attr_flags = 0 140 }; 141 142 143 uint8_t 144 virtio_get8(virtio_t *vio, uintptr_t offset) 145 { 146 return (ddi_get8(vio->vio_barh, (uint8_t *)(vio->vio_bar + offset))); 147 } 148 149 uint16_t 150 virtio_get16(virtio_t *vio, uintptr_t offset) 151 { 152 return (ddi_get16(vio->vio_barh, (uint16_t *)(vio->vio_bar + offset))); 153 } 154 155 uint32_t 156 virtio_get32(virtio_t *vio, uintptr_t offset) 157 { 158 return (ddi_get32(vio->vio_barh, (uint32_t *)(vio->vio_bar + offset))); 159 } 160 161 void 162 virtio_put8(virtio_t *vio, uintptr_t offset, uint8_t value) 163 { 164 ddi_put8(vio->vio_barh, (uint8_t *)(vio->vio_bar + offset), value); 165 } 166 167 void 168 virtio_put16(virtio_t *vio, uintptr_t offset, uint16_t value) 169 { 170 ddi_put16(vio->vio_barh, (uint16_t *)(vio->vio_bar + offset), value); 171 } 172 173 void 174 virtio_put32(virtio_t *vio, uintptr_t offset, uint32_t value) 175 { 176 ddi_put32(vio->vio_barh, (uint32_t *)(vio->vio_bar + offset), value); 177 } 178 179 void 180 virtio_fini(virtio_t *vio, boolean_t failed) 181 { 182 mutex_enter(&vio->vio_mutex); 183 184 virtio_interrupts_teardown(vio); 185 186 virtio_queue_t *viq; 187 while ((viq = list_remove_head(&vio->vio_queues)) != NULL) { 188 virtio_queue_free(viq); 189 } 190 list_destroy(&vio->vio_queues); 191 192 if (failed) { 193 /* 194 * Signal to the host that device setup failed. 195 */ 196 virtio_set_status(vio, VIRTIO_STATUS_FAILED); 197 } else { 198 virtio_device_reset_locked(vio); 199 } 200 201 /* 202 * We don't need to do anything for the provider initlevel, as it 203 * merely records the fact that virtio_init_complete() was called. 204 */ 205 vio->vio_initlevel &= ~VIRTIO_INITLEVEL_PROVIDER; 206 207 if (vio->vio_initlevel & VIRTIO_INITLEVEL_REGS) { 208 /* 209 * Unmap PCI BAR0. 210 */ 211 ddi_regs_map_free(&vio->vio_barh); 212 213 vio->vio_initlevel &= ~VIRTIO_INITLEVEL_REGS; 214 } 215 216 /* 217 * Ensure we have torn down everything we set up. 218 */ 219 vio->vio_initlevel &= ~VIRTIO_INITLEVEL_SHUTDOWN; 220 VERIFY0(vio->vio_initlevel); 221 222 mutex_exit(&vio->vio_mutex); 223 mutex_destroy(&vio->vio_mutex); 224 225 kmem_free(vio, sizeof (*vio)); 226 } 227 228 /* 229 * Early device initialisation for legacy (pre-1.0 specification) virtio 230 * devices. 231 */ 232 virtio_t * 233 virtio_init(dev_info_t *dip, uint64_t driver_features, boolean_t allow_indirect) 234 { 235 int r; 236 237 /* 238 * First, confirm that this is a legacy device. 239 */ 240 ddi_acc_handle_t pci; 241 if (pci_config_setup(dip, &pci) != DDI_SUCCESS) { 242 dev_err(dip, CE_WARN, "pci_config_setup failed"); 243 return (NULL); 244 } 245 246 uint8_t revid; 247 if ((revid = pci_config_get8(pci, PCI_CONF_REVID)) == PCI_EINVAL8) { 248 dev_err(dip, CE_WARN, "could not read config space"); 249 pci_config_teardown(&pci); 250 return (NULL); 251 } 252 253 pci_config_teardown(&pci); 254 255 /* 256 * The legacy specification requires that the device advertise as PCI 257 * Revision 0. 258 */ 259 if (revid != 0) { 260 dev_err(dip, CE_WARN, "PCI Revision %u incorrect for " 261 "legacy virtio device", (uint_t)revid); 262 return (NULL); 263 } 264 265 virtio_t *vio = kmem_zalloc(sizeof (*vio), KM_SLEEP); 266 vio->vio_dip = dip; 267 268 /* 269 * Map PCI BAR0 for legacy device access. 270 */ 271 if ((r = ddi_regs_map_setup(dip, VIRTIO_LEGACY_PCI_BAR0, 272 (caddr_t *)&vio->vio_bar, 0, 0, &virtio_acc_attr, 273 &vio->vio_barh)) != DDI_SUCCESS) { 274 dev_err(dip, CE_WARN, "ddi_regs_map_setup failure (%d)", r); 275 kmem_free(vio, sizeof (*vio)); 276 return (NULL); 277 } 278 vio->vio_initlevel |= VIRTIO_INITLEVEL_REGS; 279 280 /* 281 * We initialise the mutex without an interrupt priority to ease the 282 * implementation of some of the configuration space access routines. 283 * Drivers using the virtio framework MUST make a call to 284 * "virtio_init_complete()" prior to spawning other threads or enabling 285 * interrupt handlers, at which time we will destroy and reinitialise 286 * the mutex for use in our interrupt handlers. 287 */ 288 mutex_init(&vio->vio_mutex, NULL, MUTEX_DRIVER, NULL); 289 290 list_create(&vio->vio_queues, sizeof (virtio_queue_t), 291 offsetof(virtio_queue_t, viq_link)); 292 293 /* 294 * Legacy virtio devices require a few common steps before we can 295 * negotiate device features. 296 */ 297 virtio_device_reset(vio); 298 virtio_set_status(vio, VIRTIO_STATUS_ACKNOWLEDGE); 299 virtio_set_status(vio, VIRTIO_STATUS_DRIVER); 300 301 /* 302 * Negotiate features with the device. Record the original supported 303 * feature set for debugging purposes. 304 */ 305 vio->vio_features_device = virtio_get32(vio, 306 VIRTIO_LEGACY_FEATURES_DEVICE); 307 if (allow_indirect) { 308 driver_features |= VIRTIO_F_RING_INDIRECT_DESC; 309 } 310 vio->vio_features = vio->vio_features_device & driver_features; 311 virtio_put32(vio, VIRTIO_LEGACY_FEATURES_DRIVER, vio->vio_features); 312 313 /* 314 * The device-specific configuration begins at an offset into the BAR 315 * that depends on whether we have enabled MSI-X interrupts or not. 316 * Start out with the offset for pre-MSI-X operation so that we can 317 * read device configuration space prior to configuring interrupts. 318 */ 319 vio->vio_config_offset = VIRTIO_LEGACY_CFG_OFFSET; 320 321 return (vio); 322 } 323 324 /* 325 * This function must be called by the driver once it has completed early setup 326 * calls. 327 */ 328 int 329 virtio_init_complete(virtio_t *vio, int allowed_interrupt_types) 330 { 331 VERIFY(!(vio->vio_initlevel & VIRTIO_INITLEVEL_PROVIDER)); 332 vio->vio_initlevel |= VIRTIO_INITLEVEL_PROVIDER; 333 334 if (!list_is_empty(&vio->vio_queues)) { 335 /* 336 * Set up interrupts for the queues that have been registered. 337 */ 338 if (virtio_interrupts_setup(vio, allowed_interrupt_types) != 339 DDI_SUCCESS) { 340 return (DDI_FAILURE); 341 } 342 } 343 344 /* 345 * We can allocate the mutex once we know the priority. 346 */ 347 mutex_destroy(&vio->vio_mutex); 348 mutex_init(&vio->vio_mutex, NULL, MUTEX_DRIVER, virtio_intr_pri(vio)); 349 for (virtio_queue_t *viq = list_head(&vio->vio_queues); viq != NULL; 350 viq = list_next(&vio->vio_queues, viq)) { 351 mutex_destroy(&viq->viq_mutex); 352 mutex_init(&viq->viq_mutex, NULL, MUTEX_DRIVER, 353 virtio_intr_pri(vio)); 354 } 355 356 virtio_set_status(vio, VIRTIO_STATUS_DRIVER_OK); 357 358 return (DDI_SUCCESS); 359 } 360 361 boolean_t 362 virtio_feature_present(virtio_t *vio, uint64_t feature_mask) 363 { 364 return ((vio->vio_features & feature_mask) != 0); 365 } 366 367 void * 368 virtio_intr_pri(virtio_t *vio) 369 { 370 VERIFY(vio->vio_initlevel & VIRTIO_INITLEVEL_INT_ADDED); 371 372 return (DDI_INTR_PRI(vio->vio_interrupt_priority)); 373 } 374 375 /* 376 * Enable a bit in the device status register. Each bit signals a level of 377 * guest readiness to the host. Use the VIRTIO_CONFIG_DEVICE_STATUS_* 378 * constants for "status". To zero the status field use virtio_device_reset(). 379 */ 380 static void 381 virtio_set_status(virtio_t *vio, uint8_t status) 382 { 383 VERIFY3U(status, !=, 0); 384 385 mutex_enter(&vio->vio_mutex); 386 387 uint8_t old = virtio_get8(vio, VIRTIO_LEGACY_DEVICE_STATUS); 388 virtio_put8(vio, VIRTIO_LEGACY_DEVICE_STATUS, status | old); 389 390 mutex_exit(&vio->vio_mutex); 391 } 392 393 static void 394 virtio_device_reset_locked(virtio_t *vio) 395 { 396 virtio_put8(vio, VIRTIO_LEGACY_DEVICE_STATUS, VIRTIO_STATUS_RESET); 397 } 398 399 void 400 virtio_device_reset(virtio_t *vio) 401 { 402 mutex_enter(&vio->vio_mutex); 403 virtio_device_reset_locked(vio); 404 mutex_exit(&vio->vio_mutex); 405 } 406 407 /* 408 * Some queues are effectively long-polled; the driver submits a series of 409 * buffers and the device only returns them when there is data available. 410 * During detach, we need to coordinate the return of these buffers. Calling 411 * "virtio_shutdown()" will reset the device, then allow the removal of all 412 * buffers that were in flight at the time of shutdown via 413 * "virtio_queue_evacuate()". 414 */ 415 void 416 virtio_shutdown(virtio_t *vio) 417 { 418 mutex_enter(&vio->vio_mutex); 419 if (vio->vio_initlevel & VIRTIO_INITLEVEL_SHUTDOWN) { 420 /* 421 * Shutdown has been performed already. 422 */ 423 mutex_exit(&vio->vio_mutex); 424 return; 425 } 426 427 /* 428 * First, mark all of the queues as shutdown. This will prevent any 429 * further activity. 430 */ 431 for (virtio_queue_t *viq = list_head(&vio->vio_queues); viq != NULL; 432 viq = list_next(&vio->vio_queues, viq)) { 433 mutex_enter(&viq->viq_mutex); 434 viq->viq_shutdown = B_TRUE; 435 mutex_exit(&viq->viq_mutex); 436 } 437 438 /* 439 * Now, reset the device. This removes any queue configuration on the 440 * device side. 441 */ 442 virtio_device_reset_locked(vio); 443 vio->vio_initlevel |= VIRTIO_INITLEVEL_SHUTDOWN; 444 mutex_exit(&vio->vio_mutex); 445 } 446 447 /* 448 * Common implementation of quiesce(9E) for simple Virtio-based devices. 449 */ 450 int 451 virtio_quiesce(virtio_t *vio) 452 { 453 if (vio->vio_initlevel & VIRTIO_INITLEVEL_SHUTDOWN) { 454 /* 455 * Device has already been reset. 456 */ 457 return (DDI_SUCCESS); 458 } 459 460 /* 461 * When we reset the device, it should immediately stop using any DMA 462 * memory we've previously passed to it. All queue configuration is 463 * discarded. This is good enough for quiesce(9E). 464 */ 465 virtio_device_reset_locked(vio); 466 467 return (DDI_SUCCESS); 468 } 469 470 /* 471 * DEVICE-SPECIFIC REGISTER ACCESS 472 * 473 * Note that these functions take the mutex to avoid racing with interrupt 474 * enable/disable, when the device-specific offset can potentially change. 475 */ 476 477 uint8_t 478 virtio_dev_get8(virtio_t *vio, uintptr_t offset) 479 { 480 mutex_enter(&vio->vio_mutex); 481 uint8_t r = virtio_get8(vio, vio->vio_config_offset + offset); 482 mutex_exit(&vio->vio_mutex); 483 484 return (r); 485 } 486 487 uint16_t 488 virtio_dev_get16(virtio_t *vio, uintptr_t offset) 489 { 490 mutex_enter(&vio->vio_mutex); 491 uint16_t r = virtio_get16(vio, vio->vio_config_offset + offset); 492 mutex_exit(&vio->vio_mutex); 493 494 return (r); 495 } 496 497 uint32_t 498 virtio_dev_get32(virtio_t *vio, uintptr_t offset) 499 { 500 mutex_enter(&vio->vio_mutex); 501 uint32_t r = virtio_get32(vio, vio->vio_config_offset + offset); 502 mutex_exit(&vio->vio_mutex); 503 504 return (r); 505 } 506 507 uint64_t 508 virtio_dev_get64(virtio_t *vio, uintptr_t offset) 509 { 510 mutex_enter(&vio->vio_mutex); 511 /* 512 * On at least some systems, a 64-bit read or write to this BAR is not 513 * possible. For legacy devices, there is no generation number to use 514 * to determine if configuration may have changed half-way through a 515 * read. We need to continue to read both halves of the value until we 516 * read the same value at least twice. 517 */ 518 uintptr_t o_lo = vio->vio_config_offset + offset; 519 uintptr_t o_hi = o_lo + 4; 520 521 uint64_t val = virtio_get32(vio, o_lo) | 522 ((uint64_t)virtio_get32(vio, o_hi) << 32); 523 524 for (;;) { 525 uint64_t tval = virtio_get32(vio, o_lo) | 526 ((uint64_t)virtio_get32(vio, o_hi) << 32); 527 528 if (tval == val) { 529 break; 530 } 531 532 val = tval; 533 } 534 535 mutex_exit(&vio->vio_mutex); 536 return (val); 537 } 538 539 void 540 virtio_dev_put8(virtio_t *vio, uintptr_t offset, uint8_t value) 541 { 542 mutex_enter(&vio->vio_mutex); 543 virtio_put8(vio, vio->vio_config_offset + offset, value); 544 mutex_exit(&vio->vio_mutex); 545 } 546 547 void 548 virtio_dev_put16(virtio_t *vio, uintptr_t offset, uint16_t value) 549 { 550 mutex_enter(&vio->vio_mutex); 551 virtio_put16(vio, vio->vio_config_offset + offset, value); 552 mutex_exit(&vio->vio_mutex); 553 } 554 555 void 556 virtio_dev_put32(virtio_t *vio, uintptr_t offset, uint32_t value) 557 { 558 mutex_enter(&vio->vio_mutex); 559 virtio_put32(vio, vio->vio_config_offset + offset, value); 560 mutex_exit(&vio->vio_mutex); 561 } 562 563 /* 564 * VIRTQUEUE MANAGEMENT 565 */ 566 567 static int 568 virtio_inflight_compar(const void *lp, const void *rp) 569 { 570 const virtio_chain_t *l = lp; 571 const virtio_chain_t *r = rp; 572 573 if (l->vic_head < r->vic_head) { 574 return (-1); 575 } else if (l->vic_head > r->vic_head) { 576 return (1); 577 } else { 578 return (0); 579 } 580 } 581 582 virtio_queue_t * 583 virtio_queue_alloc(virtio_t *vio, uint16_t qidx, const char *name, 584 ddi_intr_handler_t *func, void *funcarg, boolean_t force_direct, 585 uint_t max_segs) 586 { 587 uint16_t qsz; 588 char space_name[256]; 589 590 if (max_segs < 1) { 591 /* 592 * Every descriptor, direct or indirect, needs to refer to at 593 * least one buffer. 594 */ 595 dev_err(vio->vio_dip, CE_WARN, "queue \"%s\" (%u) " 596 "segment count must be at least 1", name, (uint_t)qidx); 597 return (NULL); 598 } 599 600 mutex_enter(&vio->vio_mutex); 601 602 if (vio->vio_initlevel & VIRTIO_INITLEVEL_PROVIDER) { 603 /* 604 * Cannot configure any more queues once initial setup is 605 * complete and interrupts have been allocated. 606 */ 607 dev_err(vio->vio_dip, CE_WARN, "queue \"%s\" (%u) " 608 "alloc after init complete", name, (uint_t)qidx); 609 mutex_exit(&vio->vio_mutex); 610 return (NULL); 611 } 612 613 /* 614 * There is no way to negotiate a different queue size for legacy 615 * devices. We must read and use the native queue size of the device. 616 */ 617 virtio_put16(vio, VIRTIO_LEGACY_QUEUE_SELECT, qidx); 618 if ((qsz = virtio_get16(vio, VIRTIO_LEGACY_QUEUE_SIZE)) == 0) { 619 /* 620 * A size of zero means the device does not have a queue with 621 * this index. 622 */ 623 dev_err(vio->vio_dip, CE_WARN, "queue \"%s\" (%u) " 624 "does not exist on device", name, (uint_t)qidx); 625 mutex_exit(&vio->vio_mutex); 626 return (NULL); 627 } 628 629 mutex_exit(&vio->vio_mutex); 630 631 virtio_queue_t *viq = kmem_zalloc(sizeof (*viq), KM_SLEEP); 632 viq->viq_virtio = vio; 633 viq->viq_name = name; 634 viq->viq_index = qidx; 635 viq->viq_size = qsz; 636 viq->viq_func = func; 637 viq->viq_funcarg = funcarg; 638 viq->viq_max_segs = max_segs; 639 avl_create(&viq->viq_inflight, virtio_inflight_compar, 640 sizeof (virtio_chain_t), offsetof(virtio_chain_t, vic_node)); 641 642 /* 643 * Allocate the mutex without an interrupt priority for now, as we do 644 * with "vio_mutex". We'll reinitialise it in 645 * "virtio_init_complete()". 646 */ 647 mutex_init(&viq->viq_mutex, NULL, MUTEX_DRIVER, NULL); 648 649 if (virtio_feature_present(vio, VIRTIO_F_RING_INDIRECT_DESC) && 650 !force_direct) { 651 /* 652 * If we were able to negotiate the indirect descriptor 653 * feature, and the caller has not explicitly forced the use of 654 * direct descriptors, we'll allocate indirect descriptor lists 655 * for each chain. 656 */ 657 viq->viq_indirect = B_TRUE; 658 } 659 660 /* 661 * Track descriptor usage in an identifier space. 662 */ 663 (void) snprintf(space_name, sizeof (space_name), "%s%d_vq_%s", 664 ddi_get_name(vio->vio_dip), ddi_get_instance(vio->vio_dip), name); 665 if ((viq->viq_descmap = id_space_create(space_name, 0, qsz)) == NULL) { 666 dev_err(vio->vio_dip, CE_WARN, "could not allocate descriptor " 667 "ID space"); 668 virtio_queue_free(viq); 669 return (NULL); 670 } 671 672 /* 673 * For legacy devices, memory for the queue has a strict layout 674 * determined by the queue size. 675 */ 676 size_t sz_descs = sizeof (virtio_vq_desc_t) * qsz; 677 size_t sz_driver = P2ROUNDUP_TYPED(sz_descs + 678 sizeof (virtio_vq_driver_t) + 679 sizeof (uint16_t) * qsz, 680 VIRTIO_PAGE_SIZE, size_t); 681 size_t sz_device = P2ROUNDUP_TYPED(sizeof (virtio_vq_device_t) + 682 sizeof (virtio_vq_elem_t) * qsz, 683 VIRTIO_PAGE_SIZE, size_t); 684 685 if (virtio_dma_init(vio, &viq->viq_dma, sz_driver + sz_device, 686 &virtio_dma_attr_queue, DDI_DMA_RDWR | DDI_DMA_CONSISTENT, 687 KM_SLEEP) != DDI_SUCCESS) { 688 dev_err(vio->vio_dip, CE_WARN, "could not allocate queue " 689 "DMA memory"); 690 virtio_queue_free(viq); 691 return (NULL); 692 } 693 694 /* 695 * NOTE: The viq_dma_* members below are used by 696 * VIRTQ_DMA_SYNC_FORDEV() and VIRTQ_DMA_SYNC_FORKERNEL() to calculate 697 * offsets into the DMA allocation for partial synchronisation. If the 698 * ordering of, or relationship between, these pointers changes, the 699 * macros must be kept in sync. 700 */ 701 viq->viq_dma_descs = virtio_dma_va(&viq->viq_dma, 0); 702 viq->viq_dma_driver = virtio_dma_va(&viq->viq_dma, sz_descs); 703 viq->viq_dma_device = virtio_dma_va(&viq->viq_dma, sz_driver); 704 705 /* 706 * Install in the per-device list of queues. 707 */ 708 mutex_enter(&vio->vio_mutex); 709 for (virtio_queue_t *chkvq = list_head(&vio->vio_queues); chkvq != NULL; 710 chkvq = list_next(&vio->vio_queues, chkvq)) { 711 if (chkvq->viq_index == qidx) { 712 dev_err(vio->vio_dip, CE_WARN, "attempt to register " 713 "queue \"%s\" with same index (%d) as queue \"%s\"", 714 name, qidx, chkvq->viq_name); 715 mutex_exit(&vio->vio_mutex); 716 virtio_queue_free(viq); 717 return (NULL); 718 } 719 } 720 list_insert_tail(&vio->vio_queues, viq); 721 722 /* 723 * Ensure the zeroing of the queue memory is visible to the host before 724 * we inform the device of the queue address. 725 */ 726 membar_producer(); 727 VIRTQ_DMA_SYNC_FORDEV(viq); 728 729 virtio_put16(vio, VIRTIO_LEGACY_QUEUE_SELECT, qidx); 730 virtio_put32(vio, VIRTIO_LEGACY_QUEUE_ADDRESS, 731 virtio_dma_cookie_pa(&viq->viq_dma, 0) >> VIRTIO_PAGE_SHIFT); 732 733 mutex_exit(&vio->vio_mutex); 734 return (viq); 735 } 736 737 static void 738 virtio_queue_free(virtio_queue_t *viq) 739 { 740 virtio_t *vio = viq->viq_virtio; 741 742 /* 743 * We are going to destroy the queue mutex. Make sure we've already 744 * removed the interrupt handlers. 745 */ 746 VERIFY(!(vio->vio_initlevel & VIRTIO_INITLEVEL_INT_ADDED)); 747 748 mutex_enter(&viq->viq_mutex); 749 750 /* 751 * If the device has not already been reset as part of a shutdown, 752 * detach the queue from the device now. 753 */ 754 if (!viq->viq_shutdown) { 755 virtio_put16(vio, VIRTIO_LEGACY_QUEUE_SELECT, viq->viq_index); 756 virtio_put32(vio, VIRTIO_LEGACY_QUEUE_ADDRESS, 0); 757 } 758 759 virtio_dma_fini(&viq->viq_dma); 760 761 VERIFY(avl_is_empty(&viq->viq_inflight)); 762 avl_destroy(&viq->viq_inflight); 763 if (viq->viq_descmap != NULL) { 764 id_space_destroy(viq->viq_descmap); 765 } 766 767 mutex_exit(&viq->viq_mutex); 768 mutex_destroy(&viq->viq_mutex); 769 770 kmem_free(viq, sizeof (*viq)); 771 } 772 773 void 774 virtio_queue_no_interrupt(virtio_queue_t *viq, boolean_t stop_interrupts) 775 { 776 mutex_enter(&viq->viq_mutex); 777 778 if (stop_interrupts) { 779 viq->viq_dma_driver->vqdr_flags |= VIRTQ_AVAIL_F_NO_INTERRUPT; 780 } else { 781 viq->viq_dma_driver->vqdr_flags &= ~VIRTQ_AVAIL_F_NO_INTERRUPT; 782 } 783 VIRTQ_DMA_SYNC_FORDEV(viq); 784 785 mutex_exit(&viq->viq_mutex); 786 } 787 788 static virtio_chain_t * 789 virtio_queue_complete(virtio_queue_t *viq, uint_t index) 790 { 791 VERIFY(MUTEX_HELD(&viq->viq_mutex)); 792 793 virtio_chain_t *vic; 794 795 virtio_chain_t search; 796 bzero(&search, sizeof (search)); 797 search.vic_head = index; 798 799 if ((vic = avl_find(&viq->viq_inflight, &search, NULL)) == NULL) { 800 return (NULL); 801 } 802 avl_remove(&viq->viq_inflight, vic); 803 804 return (vic); 805 } 806 807 uint_t 808 virtio_queue_size(virtio_queue_t *viq) 809 { 810 return (viq->viq_size); 811 } 812 813 uint_t 814 virtio_queue_nactive(virtio_queue_t *viq) 815 { 816 mutex_enter(&viq->viq_mutex); 817 uint_t r = avl_numnodes(&viq->viq_inflight); 818 mutex_exit(&viq->viq_mutex); 819 820 return (r); 821 } 822 823 virtio_chain_t * 824 virtio_queue_poll(virtio_queue_t *viq) 825 { 826 mutex_enter(&viq->viq_mutex); 827 if (viq->viq_shutdown) { 828 /* 829 * The device has been reset by virtio_shutdown(), and queue 830 * processing has been halted. Any previously submitted chains 831 * will be evacuated using virtio_queue_evacuate(). 832 */ 833 mutex_exit(&viq->viq_mutex); 834 return (NULL); 835 } 836 837 VIRTQ_DMA_SYNC_FORKERNEL(viq); 838 if (viq->viq_device_index == viq->viq_dma_device->vqde_index) { 839 /* 840 * If the device index has not changed since the last poll, 841 * there are no new chains to process. 842 */ 843 mutex_exit(&viq->viq_mutex); 844 return (NULL); 845 } 846 847 /* 848 * We need to ensure that all reads from the descriptor (vqde_ring[]) 849 * and any referenced memory by the descriptor occur after we have read 850 * the descriptor index value above (vqde_index). 851 */ 852 membar_consumer(); 853 854 uint16_t index = (viq->viq_device_index++) % viq->viq_size; 855 uint16_t start = viq->viq_dma_device->vqde_ring[index].vqe_start; 856 uint32_t len = viq->viq_dma_device->vqde_ring[index].vqe_len; 857 858 virtio_chain_t *vic; 859 if ((vic = virtio_queue_complete(viq, start)) == NULL) { 860 /* 861 * We could not locate a chain for this descriptor index, which 862 * suggests that something has gone horribly wrong. 863 */ 864 dev_err(viq->viq_virtio->vio_dip, CE_PANIC, 865 "queue \"%s\" ring entry %u (descriptor %u) has no chain", 866 viq->viq_name, (uint16_t)index, (uint16_t)start); 867 } 868 869 vic->vic_received_length = len; 870 871 mutex_exit(&viq->viq_mutex); 872 873 return (vic); 874 } 875 876 /* 877 * After a call to "virtio_shutdown()", the driver must retrieve any previously 878 * submitted chains and free any associated resources. 879 */ 880 virtio_chain_t * 881 virtio_queue_evacuate(virtio_queue_t *viq) 882 { 883 virtio_t *vio = viq->viq_virtio; 884 885 mutex_enter(&vio->vio_mutex); 886 if (!(vio->vio_initlevel & VIRTIO_INITLEVEL_SHUTDOWN)) { 887 dev_err(vio->vio_dip, CE_PANIC, 888 "virtio_queue_evacuate() without virtio_shutdown()"); 889 } 890 mutex_exit(&vio->vio_mutex); 891 892 mutex_enter(&viq->viq_mutex); 893 VERIFY(viq->viq_shutdown); 894 895 virtio_chain_t *vic = avl_first(&viq->viq_inflight); 896 if (vic != NULL) { 897 avl_remove(&viq->viq_inflight, vic); 898 } 899 900 mutex_exit(&viq->viq_mutex); 901 902 return (vic); 903 } 904 905 /* 906 * VIRTQUEUE DESCRIPTOR CHAIN MANAGEMENT 907 */ 908 909 /* 910 * When the device returns a descriptor chain to the driver, it may provide the 911 * length in bytes of data written into the chain. Client drivers should use 912 * this value with care; the specification suggests some device implementations 913 * have not always provided a useful or correct value. 914 */ 915 size_t 916 virtio_chain_received_length(virtio_chain_t *vic) 917 { 918 return (vic->vic_received_length); 919 } 920 921 /* 922 * Allocate a descriptor chain for use with this queue. The "kmflags" value 923 * may be KM_SLEEP or KM_NOSLEEP as per kmem_alloc(9F). 924 */ 925 virtio_chain_t * 926 virtio_chain_alloc(virtio_queue_t *viq, int kmflags) 927 { 928 virtio_t *vio = viq->viq_virtio; 929 virtio_chain_t *vic; 930 uint_t cap; 931 932 /* 933 * Direct descriptors are known by their index in the descriptor table 934 * for the queue. We use the variable-length array member at the end 935 * of the chain tracking object to hold the list of direct descriptors 936 * assigned to this chain. 937 */ 938 if (viq->viq_indirect) { 939 /* 940 * When using indirect descriptors we still need one direct 941 * descriptor entry to hold the physical address and length of 942 * the indirect descriptor table. 943 */ 944 cap = 1; 945 } else { 946 /* 947 * For direct descriptors we need to be able to track a 948 * descriptor for each possible segment in a single chain. 949 */ 950 cap = viq->viq_max_segs; 951 } 952 953 size_t vicsz = sizeof (*vic) + sizeof (uint16_t) * cap; 954 if ((vic = kmem_zalloc(vicsz, kmflags)) == NULL) { 955 return (NULL); 956 } 957 vic->vic_vq = viq; 958 vic->vic_direct_capacity = cap; 959 960 if (viq->viq_indirect) { 961 /* 962 * Allocate an indirect descriptor list with the appropriate 963 * number of entries. 964 */ 965 if (virtio_dma_init(vio, &vic->vic_indirect_dma, 966 sizeof (virtio_vq_desc_t) * viq->viq_max_segs, 967 &virtio_dma_attr_indirect, 968 DDI_DMA_CONSISTENT | DDI_DMA_WRITE, 969 kmflags) != DDI_SUCCESS) { 970 goto fail; 971 } 972 973 /* 974 * Allocate a single descriptor to hold the indirect list. 975 * Leave the length as zero for now; it will be set to include 976 * any occupied entries at push time. 977 */ 978 mutex_enter(&viq->viq_mutex); 979 if (virtio_chain_append_impl(vic, 980 virtio_dma_cookie_pa(&vic->vic_indirect_dma, 0), 0, 981 VIRTQ_DESC_F_INDIRECT) != DDI_SUCCESS) { 982 mutex_exit(&viq->viq_mutex); 983 goto fail; 984 } 985 mutex_exit(&viq->viq_mutex); 986 VERIFY3U(vic->vic_direct_used, ==, 1); 987 988 /* 989 * Don't set the indirect capacity until after we've installed 990 * the direct descriptor which points at the indirect list, or 991 * virtio_chain_append_impl() will be confused. 992 */ 993 vic->vic_indirect_capacity = viq->viq_max_segs; 994 } 995 996 return (vic); 997 998 fail: 999 virtio_dma_fini(&vic->vic_indirect_dma); 1000 kmem_free(vic, vicsz); 1001 return (NULL); 1002 } 1003 1004 void * 1005 virtio_chain_data(virtio_chain_t *vic) 1006 { 1007 return (vic->vic_data); 1008 } 1009 1010 void 1011 virtio_chain_data_set(virtio_chain_t *vic, void *data) 1012 { 1013 vic->vic_data = data; 1014 } 1015 1016 void 1017 virtio_chain_clear(virtio_chain_t *vic) 1018 { 1019 if (vic->vic_indirect_capacity != 0) { 1020 /* 1021 * There should only be one direct descriptor, which points at 1022 * our indirect descriptor list. We don't want to clear it 1023 * here. 1024 */ 1025 VERIFY3U(vic->vic_direct_capacity, ==, 1); 1026 1027 if (vic->vic_indirect_used > 0) { 1028 /* 1029 * Clear out the indirect descriptor table. 1030 */ 1031 vic->vic_indirect_used = 0; 1032 bzero(virtio_dma_va(&vic->vic_indirect_dma, 0), 1033 virtio_dma_size(&vic->vic_indirect_dma)); 1034 } 1035 1036 } else if (vic->vic_direct_capacity > 0) { 1037 /* 1038 * Release any descriptors that were assigned to us previously. 1039 */ 1040 for (uint_t i = 0; i < vic->vic_direct_used; i++) { 1041 id_free(vic->vic_vq->viq_descmap, vic->vic_direct[i]); 1042 vic->vic_direct[i] = 0; 1043 } 1044 vic->vic_direct_used = 0; 1045 } 1046 } 1047 1048 void 1049 virtio_chain_free(virtio_chain_t *vic) 1050 { 1051 /* 1052 * First ensure that we have released any descriptors used by this 1053 * chain. 1054 */ 1055 virtio_chain_clear(vic); 1056 1057 if (vic->vic_indirect_capacity > 0) { 1058 /* 1059 * Release the direct descriptor that points to our indirect 1060 * descriptor list. 1061 */ 1062 VERIFY3U(vic->vic_direct_capacity, ==, 1); 1063 id_free(vic->vic_vq->viq_descmap, vic->vic_direct[0]); 1064 1065 virtio_dma_fini(&vic->vic_indirect_dma); 1066 } 1067 1068 size_t vicsz = sizeof (*vic) + 1069 vic->vic_direct_capacity * sizeof (uint16_t); 1070 1071 kmem_free(vic, vicsz); 1072 } 1073 1074 static inline int 1075 virtio_queue_descmap_alloc(virtio_queue_t *viq, uint_t *indexp) 1076 { 1077 id_t index; 1078 1079 if ((index = id_alloc_nosleep(viq->viq_descmap)) == -1) { 1080 return (ENOMEM); 1081 } 1082 1083 VERIFY3S(index, >=, 0); 1084 VERIFY3S(index, <=, viq->viq_size); 1085 1086 *indexp = (uint_t)index; 1087 return (0); 1088 } 1089 1090 static int 1091 virtio_chain_append_impl(virtio_chain_t *vic, uint64_t pa, size_t len, 1092 uint16_t flags) 1093 { 1094 virtio_queue_t *viq = vic->vic_vq; 1095 virtio_vq_desc_t *vqd; 1096 uint_t index; 1097 1098 /* 1099 * We're modifying the queue-wide descriptor list so make sure we have 1100 * the appropriate lock. 1101 */ 1102 VERIFY(MUTEX_HELD(&viq->viq_mutex)); 1103 1104 if (vic->vic_indirect_capacity != 0) { 1105 /* 1106 * Use indirect descriptors. 1107 */ 1108 if (vic->vic_indirect_used >= vic->vic_indirect_capacity) { 1109 return (DDI_FAILURE); 1110 } 1111 1112 vqd = virtio_dma_va(&vic->vic_indirect_dma, 0); 1113 1114 if ((index = vic->vic_indirect_used++) > 0) { 1115 /* 1116 * Chain the current last indirect descriptor to the 1117 * new one. 1118 */ 1119 vqd[index - 1].vqd_flags |= VIRTQ_DESC_F_NEXT; 1120 vqd[index - 1].vqd_next = index; 1121 } 1122 1123 } else { 1124 /* 1125 * Use direct descriptors. 1126 */ 1127 if (vic->vic_direct_used >= vic->vic_direct_capacity) { 1128 return (DDI_FAILURE); 1129 } 1130 1131 if (virtio_queue_descmap_alloc(viq, &index) != 0) { 1132 return (DDI_FAILURE); 1133 } 1134 1135 vqd = virtio_dma_va(&viq->viq_dma, 0); 1136 1137 if (vic->vic_direct_used > 0) { 1138 /* 1139 * This is not the first entry. Chain the current 1140 * descriptor to the next one. 1141 */ 1142 uint16_t p = vic->vic_direct[vic->vic_direct_used - 1]; 1143 1144 vqd[p].vqd_flags |= VIRTQ_DESC_F_NEXT; 1145 vqd[p].vqd_next = index; 1146 } 1147 vic->vic_direct[vic->vic_direct_used++] = index; 1148 } 1149 1150 vqd[index].vqd_addr = pa; 1151 vqd[index].vqd_len = len; 1152 vqd[index].vqd_flags = flags; 1153 vqd[index].vqd_next = 0; 1154 1155 return (DDI_SUCCESS); 1156 } 1157 1158 int 1159 virtio_chain_append(virtio_chain_t *vic, uint64_t pa, size_t len, 1160 virtio_direction_t dir) 1161 { 1162 virtio_queue_t *viq = vic->vic_vq; 1163 uint16_t flags = 0; 1164 1165 switch (dir) { 1166 case VIRTIO_DIR_DEVICE_WRITES: 1167 flags |= VIRTQ_DESC_F_WRITE; 1168 break; 1169 1170 case VIRTIO_DIR_DEVICE_READS: 1171 break; 1172 1173 default: 1174 panic("unknown direction value %u", dir); 1175 } 1176 1177 mutex_enter(&viq->viq_mutex); 1178 int r = virtio_chain_append_impl(vic, pa, len, flags); 1179 mutex_exit(&viq->viq_mutex); 1180 1181 return (r); 1182 } 1183 1184 static void 1185 virtio_queue_flush_locked(virtio_queue_t *viq) 1186 { 1187 VERIFY(MUTEX_HELD(&viq->viq_mutex)); 1188 1189 /* 1190 * Make sure any writes we have just made to the descriptors 1191 * (vqdr_ring[]) are visible to the device before we update the ring 1192 * pointer (vqdr_index). 1193 */ 1194 membar_producer(); 1195 viq->viq_dma_driver->vqdr_index = viq->viq_driver_index; 1196 VIRTQ_DMA_SYNC_FORDEV(viq); 1197 1198 /* 1199 * Determine whether the device expects us to notify it of new 1200 * descriptors. 1201 */ 1202 VIRTQ_DMA_SYNC_FORKERNEL(viq); 1203 if (!(viq->viq_dma_device->vqde_flags & VIRTQ_USED_F_NO_NOTIFY)) { 1204 virtio_put16(viq->viq_virtio, VIRTIO_LEGACY_QUEUE_NOTIFY, 1205 viq->viq_index); 1206 } 1207 } 1208 1209 void 1210 virtio_queue_flush(virtio_queue_t *viq) 1211 { 1212 mutex_enter(&viq->viq_mutex); 1213 virtio_queue_flush_locked(viq); 1214 mutex_exit(&viq->viq_mutex); 1215 } 1216 1217 void 1218 virtio_chain_submit(virtio_chain_t *vic, boolean_t flush) 1219 { 1220 virtio_queue_t *viq = vic->vic_vq; 1221 1222 mutex_enter(&viq->viq_mutex); 1223 1224 if (vic->vic_indirect_capacity != 0) { 1225 virtio_vq_desc_t *vqd = virtio_dma_va(&viq->viq_dma, 0); 1226 1227 VERIFY3U(vic->vic_direct_used, ==, 1); 1228 1229 /* 1230 * This is an indirect descriptor queue. The length in bytes 1231 * of the descriptor must extend to cover the populated 1232 * indirect descriptor entries. 1233 */ 1234 vqd[vic->vic_direct[0]].vqd_len = 1235 sizeof (virtio_vq_desc_t) * vic->vic_indirect_used; 1236 1237 virtio_dma_sync(&vic->vic_indirect_dma, DDI_DMA_SYNC_FORDEV); 1238 } 1239 1240 /* 1241 * Populate the next available slot in the driver-owned ring for this 1242 * chain. The updated value of viq_driver_index is not yet visible to 1243 * the device until a subsequent queue flush. 1244 */ 1245 uint16_t index = (viq->viq_driver_index++) % viq->viq_size; 1246 viq->viq_dma_driver->vqdr_ring[index] = vic->vic_direct[0]; 1247 1248 vic->vic_head = vic->vic_direct[0]; 1249 avl_add(&viq->viq_inflight, vic); 1250 1251 if (flush) { 1252 virtio_queue_flush_locked(vic->vic_vq); 1253 } 1254 1255 mutex_exit(&viq->viq_mutex); 1256 } 1257 1258 /* 1259 * INTERRUPTS MANAGEMENT 1260 */ 1261 1262 static const char * 1263 virtio_interrupt_type_name(int type) 1264 { 1265 switch (type) { 1266 case DDI_INTR_TYPE_MSIX: 1267 return ("MSI-X"); 1268 case DDI_INTR_TYPE_MSI: 1269 return ("MSI"); 1270 case DDI_INTR_TYPE_FIXED: 1271 return ("fixed"); 1272 default: 1273 return ("?"); 1274 } 1275 } 1276 1277 static int 1278 virtio_interrupts_alloc(virtio_t *vio, int type, int nrequired) 1279 { 1280 dev_info_t *dip = vio->vio_dip; 1281 int nintrs = 0; 1282 int navail = 0; 1283 1284 VERIFY(MUTEX_HELD(&vio->vio_mutex)); 1285 VERIFY(!(vio->vio_initlevel & VIRTIO_INITLEVEL_INT_ALLOC)); 1286 1287 if (ddi_intr_get_nintrs(dip, type, &nintrs) != DDI_SUCCESS) { 1288 dev_err(dip, CE_WARN, "could not count %s interrupts", 1289 virtio_interrupt_type_name(type)); 1290 return (DDI_FAILURE); 1291 } 1292 if (nintrs < 1) { 1293 dev_err(dip, CE_WARN, "no %s interrupts supported", 1294 virtio_interrupt_type_name(type)); 1295 return (DDI_FAILURE); 1296 } 1297 1298 if (ddi_intr_get_navail(dip, type, &navail) != DDI_SUCCESS) { 1299 dev_err(dip, CE_WARN, "could not count available %s interrupts", 1300 virtio_interrupt_type_name(type)); 1301 return (DDI_FAILURE); 1302 } 1303 if (navail < nrequired) { 1304 dev_err(dip, CE_WARN, "need %d %s interrupts, but only %d " 1305 "available", nrequired, virtio_interrupt_type_name(type), 1306 navail); 1307 return (DDI_FAILURE); 1308 } 1309 1310 VERIFY3P(vio->vio_interrupts, ==, NULL); 1311 vio->vio_interrupts = kmem_zalloc( 1312 sizeof (ddi_intr_handle_t) * nrequired, KM_SLEEP); 1313 1314 int r; 1315 if ((r = ddi_intr_alloc(dip, vio->vio_interrupts, type, 0, nrequired, 1316 &vio->vio_ninterrupts, DDI_INTR_ALLOC_STRICT)) != DDI_SUCCESS) { 1317 dev_err(dip, CE_WARN, "%s interrupt allocation failure (%d)", 1318 virtio_interrupt_type_name(type), r); 1319 kmem_free(vio->vio_interrupts, 1320 sizeof (ddi_intr_handle_t) * nrequired); 1321 vio->vio_interrupts = NULL; 1322 return (DDI_FAILURE); 1323 } 1324 1325 vio->vio_initlevel |= VIRTIO_INITLEVEL_INT_ALLOC; 1326 vio->vio_interrupt_type = type; 1327 return (DDI_SUCCESS); 1328 } 1329 1330 static uint_t 1331 virtio_shared_isr(caddr_t arg0, caddr_t arg1) 1332 { 1333 virtio_t *vio = (virtio_t *)arg0; 1334 uint_t r = DDI_INTR_UNCLAIMED; 1335 uint8_t isr; 1336 1337 mutex_enter(&vio->vio_mutex); 1338 1339 /* 1340 * Check the ISR status to see if the interrupt applies to us. Reading 1341 * this field resets it to zero. 1342 */ 1343 isr = virtio_get8(vio, VIRTIO_LEGACY_ISR_STATUS); 1344 if ((isr & VIRTIO_ISR_CHECK_QUEUES) == 0) { 1345 goto done; 1346 } 1347 1348 for (virtio_queue_t *viq = list_head(&vio->vio_queues); viq != NULL; 1349 viq = list_next(&vio->vio_queues, viq)) { 1350 if (viq->viq_func != NULL) { 1351 mutex_exit(&vio->vio_mutex); 1352 if (viq->viq_func(viq->viq_funcarg, arg0) == 1353 DDI_INTR_CLAIMED) { 1354 r = DDI_INTR_CLAIMED; 1355 } 1356 mutex_enter(&vio->vio_mutex); 1357 1358 if (vio->vio_initlevel & VIRTIO_INITLEVEL_SHUTDOWN) { 1359 /* 1360 * The device was shut down while in a queue 1361 * handler routine. 1362 */ 1363 goto done; 1364 } 1365 } 1366 } 1367 1368 done: 1369 mutex_exit(&vio->vio_mutex); 1370 return (r); 1371 } 1372 1373 static int 1374 virtio_interrupts_setup(virtio_t *vio, int allow_types) 1375 { 1376 dev_info_t *dip = vio->vio_dip; 1377 int types; 1378 int count = 0; 1379 1380 mutex_enter(&vio->vio_mutex); 1381 1382 /* 1383 * Determine the number of interrupts we'd like based on the number of 1384 * virtqueues. 1385 */ 1386 for (virtio_queue_t *viq = list_head(&vio->vio_queues); viq != NULL; 1387 viq = list_next(&vio->vio_queues, viq)) { 1388 if (viq->viq_func != NULL) { 1389 count++; 1390 } 1391 } 1392 1393 if (ddi_intr_get_supported_types(dip, &types) != DDI_SUCCESS) { 1394 dev_err(dip, CE_WARN, "could not get supported interrupts"); 1395 mutex_exit(&vio->vio_mutex); 1396 return (DDI_FAILURE); 1397 } 1398 1399 if (allow_types != 0) { 1400 /* 1401 * Restrict the possible interrupt types at the request of the 1402 * driver. 1403 */ 1404 types &= allow_types; 1405 } 1406 1407 /* 1408 * Try each potential interrupt type in descending order of preference. 1409 * Note that the specification does not appear to allow for the use of 1410 * classical MSI, so we are limited to either MSI-X or fixed 1411 * interrupts. 1412 */ 1413 if (types & DDI_INTR_TYPE_MSIX) { 1414 if (virtio_interrupts_alloc(vio, DDI_INTR_TYPE_MSIX, 1415 count) == DDI_SUCCESS) { 1416 goto add_handlers; 1417 } 1418 } 1419 if (types & DDI_INTR_TYPE_FIXED) { 1420 /* 1421 * If fixed interrupts are all that are available, we'll just 1422 * ask for one. 1423 */ 1424 if (virtio_interrupts_alloc(vio, DDI_INTR_TYPE_FIXED, 1) == 1425 DDI_SUCCESS) { 1426 goto add_handlers; 1427 } 1428 } 1429 1430 dev_err(dip, CE_WARN, "interrupt allocation failed"); 1431 mutex_exit(&vio->vio_mutex); 1432 return (DDI_FAILURE); 1433 1434 add_handlers: 1435 /* 1436 * Ensure that we have not been given any high-level interrupts as our 1437 * interrupt handlers do not support them. 1438 */ 1439 for (int i = 0; i < vio->vio_ninterrupts; i++) { 1440 uint_t ipri; 1441 1442 if (ddi_intr_get_pri(vio->vio_interrupts[i], &ipri) != 1443 DDI_SUCCESS) { 1444 dev_err(dip, CE_WARN, "could not determine interrupt " 1445 "priority"); 1446 goto fail; 1447 } 1448 1449 if (ipri >= ddi_intr_get_hilevel_pri()) { 1450 dev_err(dip, CE_WARN, "high level interrupts not " 1451 "supported"); 1452 goto fail; 1453 } 1454 1455 /* 1456 * Record the highest priority we've been allocated to use for 1457 * mutex initialisation. 1458 */ 1459 if (i == 0 || ipri > vio->vio_interrupt_priority) { 1460 vio->vio_interrupt_priority = ipri; 1461 } 1462 } 1463 1464 /* 1465 * Get the interrupt capabilities from the first handle to determine 1466 * whether we need to use ddi_intr_block_enable(9F). 1467 */ 1468 if (ddi_intr_get_cap(vio->vio_interrupts[0], 1469 &vio->vio_interrupt_cap) != DDI_SUCCESS) { 1470 dev_err(dip, CE_WARN, "failed to get interrupt capabilities"); 1471 goto fail; 1472 } 1473 1474 if (vio->vio_interrupt_type == DDI_INTR_TYPE_FIXED) { 1475 VERIFY3S(vio->vio_ninterrupts, ==, 1); 1476 /* 1477 * For fixed interrupts, we need to use our shared handler to 1478 * multiplex the per-queue handlers provided by the driver. 1479 */ 1480 if (ddi_intr_add_handler(vio->vio_interrupts[0], 1481 virtio_shared_isr, (caddr_t)vio, NULL) != DDI_SUCCESS) { 1482 dev_err(dip, CE_WARN, "adding shared %s interrupt " 1483 "handler failed", virtio_interrupt_type_name( 1484 vio->vio_interrupt_type)); 1485 goto fail; 1486 } 1487 1488 goto done; 1489 } 1490 1491 VERIFY3S(vio->vio_ninterrupts, ==, count); 1492 1493 uint_t n = 0; 1494 for (virtio_queue_t *viq = list_head(&vio->vio_queues); viq != NULL; 1495 viq = list_next(&vio->vio_queues, viq)) { 1496 if (viq->viq_func == NULL) { 1497 continue; 1498 } 1499 1500 if (ddi_intr_add_handler(vio->vio_interrupts[n], 1501 viq->viq_func, (caddr_t)viq->viq_funcarg, 1502 (caddr_t)vio) != DDI_SUCCESS) { 1503 dev_err(dip, CE_WARN, "adding interrupt %u (%s) failed", 1504 n, viq->viq_name); 1505 goto fail; 1506 } 1507 1508 viq->viq_handler_index = n; 1509 viq->viq_handler_added = B_TRUE; 1510 n++; 1511 } 1512 1513 done: 1514 vio->vio_initlevel |= VIRTIO_INITLEVEL_INT_ADDED; 1515 mutex_exit(&vio->vio_mutex); 1516 return (DDI_SUCCESS); 1517 1518 fail: 1519 virtio_interrupts_teardown(vio); 1520 mutex_exit(&vio->vio_mutex); 1521 return (DDI_FAILURE); 1522 } 1523 1524 static void 1525 virtio_interrupts_teardown(virtio_t *vio) 1526 { 1527 VERIFY(MUTEX_HELD(&vio->vio_mutex)); 1528 1529 virtio_interrupts_disable_locked(vio); 1530 1531 if (vio->vio_interrupt_type == DDI_INTR_TYPE_FIXED) { 1532 /* 1533 * Remove the multiplexing interrupt handler. 1534 */ 1535 if (vio->vio_initlevel & VIRTIO_INITLEVEL_INT_ADDED) { 1536 int r; 1537 1538 VERIFY3S(vio->vio_ninterrupts, ==, 1); 1539 1540 if ((r = ddi_intr_remove_handler( 1541 vio->vio_interrupts[0])) != DDI_SUCCESS) { 1542 dev_err(vio->vio_dip, CE_WARN, "removing " 1543 "shared interrupt handler failed (%d)", r); 1544 } 1545 } 1546 } else { 1547 for (virtio_queue_t *viq = list_head(&vio->vio_queues); 1548 viq != NULL; viq = list_next(&vio->vio_queues, viq)) { 1549 int r; 1550 1551 if (!viq->viq_handler_added) { 1552 continue; 1553 } 1554 1555 if ((r = ddi_intr_remove_handler( 1556 vio->vio_interrupts[viq->viq_handler_index])) != 1557 DDI_SUCCESS) { 1558 dev_err(vio->vio_dip, CE_WARN, "removing " 1559 "interrupt handler (%s) failed (%d)", 1560 viq->viq_name, r); 1561 } 1562 1563 viq->viq_handler_added = B_FALSE; 1564 } 1565 } 1566 vio->vio_initlevel &= ~VIRTIO_INITLEVEL_INT_ADDED; 1567 1568 if (vio->vio_initlevel & VIRTIO_INITLEVEL_INT_ALLOC) { 1569 for (int i = 0; i < vio->vio_ninterrupts; i++) { 1570 int r; 1571 1572 if ((r = ddi_intr_free(vio->vio_interrupts[i])) != 1573 DDI_SUCCESS) { 1574 dev_err(vio->vio_dip, CE_WARN, "freeing " 1575 "interrupt %u failed (%d)", i, r); 1576 } 1577 } 1578 kmem_free(vio->vio_interrupts, 1579 sizeof (ddi_intr_handle_t) * vio->vio_ninterrupts); 1580 vio->vio_interrupts = NULL; 1581 vio->vio_ninterrupts = 0; 1582 vio->vio_interrupt_type = 0; 1583 vio->vio_interrupt_cap = 0; 1584 vio->vio_interrupt_priority = 0; 1585 1586 vio->vio_initlevel &= ~VIRTIO_INITLEVEL_INT_ALLOC; 1587 } 1588 } 1589 1590 static void 1591 virtio_interrupts_unwind(virtio_t *vio) 1592 { 1593 VERIFY(MUTEX_HELD(&vio->vio_mutex)); 1594 1595 if (vio->vio_interrupt_type == DDI_INTR_TYPE_MSIX) { 1596 for (virtio_queue_t *viq = list_head(&vio->vio_queues); 1597 viq != NULL; viq = list_next(&vio->vio_queues, viq)) { 1598 if (!viq->viq_handler_added) { 1599 continue; 1600 } 1601 1602 virtio_put16(vio, VIRTIO_LEGACY_QUEUE_SELECT, 1603 viq->viq_index); 1604 virtio_put16(vio, VIRTIO_LEGACY_MSIX_QUEUE, 1605 VIRTIO_LEGACY_MSI_NO_VECTOR); 1606 } 1607 } 1608 1609 if (vio->vio_interrupt_cap & DDI_INTR_FLAG_BLOCK) { 1610 (void) ddi_intr_block_disable(vio->vio_interrupts, 1611 vio->vio_ninterrupts); 1612 } else { 1613 for (int i = 0; i < vio->vio_ninterrupts; i++) { 1614 (void) ddi_intr_disable(vio->vio_interrupts[i]); 1615 } 1616 } 1617 1618 /* 1619 * Disabling the interrupts makes the MSI-X fields disappear from the 1620 * BAR once more. 1621 */ 1622 vio->vio_config_offset = VIRTIO_LEGACY_CFG_OFFSET; 1623 } 1624 1625 int 1626 virtio_interrupts_enable(virtio_t *vio) 1627 { 1628 mutex_enter(&vio->vio_mutex); 1629 if (vio->vio_initlevel & VIRTIO_INITLEVEL_INT_ENABLED) { 1630 mutex_exit(&vio->vio_mutex); 1631 return (DDI_SUCCESS); 1632 } 1633 1634 int r = DDI_SUCCESS; 1635 if (vio->vio_interrupt_cap & DDI_INTR_FLAG_BLOCK) { 1636 r = ddi_intr_block_enable(vio->vio_interrupts, 1637 vio->vio_ninterrupts); 1638 } else { 1639 for (int i = 0; i < vio->vio_ninterrupts; i++) { 1640 if ((r = ddi_intr_enable(vio->vio_interrupts[i])) != 1641 DDI_SUCCESS) { 1642 /* 1643 * Disable the interrupts we have enabled so 1644 * far. 1645 */ 1646 for (i--; i >= 0; i--) { 1647 (void) ddi_intr_disable( 1648 vio->vio_interrupts[i]); 1649 } 1650 break; 1651 } 1652 } 1653 } 1654 1655 if (r != DDI_SUCCESS) { 1656 mutex_exit(&vio->vio_mutex); 1657 return (r); 1658 } 1659 1660 if (vio->vio_interrupt_type == DDI_INTR_TYPE_MSIX) { 1661 /* 1662 * When asked to enable the interrupts, the system enables 1663 * MSI-X in the PCI configuration for the device. While 1664 * enabled, the extra MSI-X configuration table fields appear 1665 * between the general and the device-specific regions of the 1666 * BAR. 1667 */ 1668 vio->vio_config_offset = VIRTIO_LEGACY_CFG_OFFSET_MSIX; 1669 1670 for (virtio_queue_t *viq = list_head(&vio->vio_queues); 1671 viq != NULL; viq = list_next(&vio->vio_queues, viq)) { 1672 if (!viq->viq_handler_added) { 1673 continue; 1674 } 1675 1676 uint16_t qi = viq->viq_index; 1677 uint16_t msi = viq->viq_handler_index; 1678 1679 /* 1680 * Route interrupts for this queue to the assigned 1681 * MSI-X vector number. 1682 */ 1683 virtio_put16(vio, VIRTIO_LEGACY_QUEUE_SELECT, qi); 1684 virtio_put16(vio, VIRTIO_LEGACY_MSIX_QUEUE, msi); 1685 1686 /* 1687 * The device may not actually accept the vector number 1688 * we're attempting to program. We need to confirm 1689 * that configuration was successful by re-reading the 1690 * configuration we just wrote. 1691 */ 1692 if (virtio_get16(vio, VIRTIO_LEGACY_MSIX_QUEUE) != 1693 msi) { 1694 dev_err(vio->vio_dip, CE_WARN, 1695 "failed to configure MSI-X vector %u for " 1696 "queue \"%s\" (#%u)", (uint_t)msi, 1697 viq->viq_name, (uint_t)qi); 1698 1699 virtio_interrupts_unwind(vio); 1700 mutex_exit(&vio->vio_mutex); 1701 return (DDI_FAILURE); 1702 } 1703 } 1704 } 1705 1706 vio->vio_initlevel |= VIRTIO_INITLEVEL_INT_ENABLED; 1707 1708 mutex_exit(&vio->vio_mutex); 1709 return (DDI_SUCCESS); 1710 } 1711 1712 static void 1713 virtio_interrupts_disable_locked(virtio_t *vio) 1714 { 1715 VERIFY(MUTEX_HELD(&vio->vio_mutex)); 1716 1717 if (!(vio->vio_initlevel & VIRTIO_INITLEVEL_INT_ENABLED)) { 1718 return; 1719 } 1720 1721 virtio_interrupts_unwind(vio); 1722 1723 vio->vio_initlevel &= ~VIRTIO_INITLEVEL_INT_ENABLED; 1724 } 1725 1726 void 1727 virtio_interrupts_disable(virtio_t *vio) 1728 { 1729 mutex_enter(&vio->vio_mutex); 1730 virtio_interrupts_disable_locked(vio); 1731 mutex_exit(&vio->vio_mutex); 1732 } 1733