/* * This file and its contents are supplied under the terms of the * Common Development and Distribution License ("CDDL"), version 1.0. * You may only use this file in accordance with the terms of version * 1.0 of the CDDL. * * A full copy of the text of the CDDL should have accompanied this * source. A copy of the CDDL is also available via the Internet at * http://www.illumos.org/license/CDDL. */ /* * Copyright 2019, Joyent, Inc. */ /* * xHCI DMA Management Routines * * Please see the big theory statement in xhci.c for more information. */ #include int xhci_check_dma_handle(xhci_t *xhcip, xhci_dma_buffer_t *xdb) { ddi_fm_error_t de; if (!DDI_FM_DMA_ERR_CAP(xhcip->xhci_fm_caps)) return (0); ddi_fm_dma_err_get(xdb->xdb_dma_handle, &de, DDI_FME_VERSION); return (de.fme_status); } void xhci_dma_acc_attr(xhci_t *xhcip, ddi_device_acc_attr_t *accp) { accp->devacc_attr_version = DDI_DEVICE_ATTR_V0; accp->devacc_attr_endian_flags = DDI_NEVERSWAP_ACC; accp->devacc_attr_dataorder = DDI_STRICTORDER_ACC; if (DDI_FM_DMA_ERR_CAP(xhcip->xhci_fm_caps)) { accp->devacc_attr_access = DDI_FLAGERR_ACC; } else { accp->devacc_attr_access = DDI_DEFAULT_ACC; } } /* * These are DMA attributes that we assign when making a transfer. The SGL is * variable and based on the caller, which varies based on the type of transfer * we're doing. */ void xhci_dma_transfer_attr(xhci_t *xhcip, ddi_dma_attr_t *attrp, uint_t sgl) { VERIFY3U(sgl, >, 0); VERIFY3U(sgl, <=, XHCI_TRANSFER_DMA_SGL); attrp->dma_attr_version = DMA_ATTR_V0; /* * The range of data that we can use is based on what hardware supports. */ attrp->dma_attr_addr_lo = 0x0; if (xhcip->xhci_caps.xcap_flags & XCAP_AC64) { attrp->dma_attr_addr_hi = UINT64_MAX; } else { attrp->dma_attr_addr_hi = UINT32_MAX; } /* * The count max indicates the total amount that will fit into one * cookie, which is one TRB in our world. In other words 64k. */ attrp->dma_attr_count_max = XHCI_TRB_MAX_TRANSFER; /* * The alignment and segment are related. The alignment describes the * alignment of the PA. The segment describes a boundary that the DMA * allocation cannot cross. In other words, for a given chunk of * allocated DMA memory, the allocated buffer cannot cross a 64k aligned * boundary. However, the physical address only needs to be aligned to * 64 bytes. */ attrp->dma_attr_align = XHCI_DMA_ALIGN; attrp->dma_attr_seg = XHCI_TRB_MAX_TRANSFER - 1; attrp->dma_attr_burstsizes = 0xfff; /* * This is the maximum we can send. Technically this is limited by the * descriptors and not by hardware, hence why we use a large value for * the max that'll be less than any memory allocation we ever throw at * it. */ attrp->dma_attr_minxfer = 0x1; attrp->dma_attr_maxxfer = UINT32_MAX; /* * This is determined by the caller. */ attrp->dma_attr_sgllen = sgl; /* * The granularity describes the addressing granularity. e.g. can things * ask for chunks in units of this number of bytes. For PCI this should * always be one. */ attrp->dma_attr_granular = 1; if (DDI_FM_DMA_ERR_CAP(xhcip->xhci_fm_caps)) { attrp->dma_attr_flags = DDI_DMA_FLAGERR; } else { attrp->dma_attr_flags = 0; } } /* * This routine tries to create DMA attributes for normal allocations for data * structures and the like. By default we use the same values as the transfer * attributes, but have explicit comments about how they're different. */ void xhci_dma_dma_attr(xhci_t *xhcip, ddi_dma_attr_t *attrp) { /* * Note, we always use a single SGL for these DMA allocations as these * are used for small data structures. */ xhci_dma_transfer_attr(xhcip, attrp, XHCI_DEF_DMA_SGL); /* * The maximum size of any of these structures is 4k as opposed to the * 64K max described above. Similarly the boundary requirement is * reduced to 4k. */ attrp->dma_attr_count_max = xhcip->xhci_caps.xcap_pagesize; attrp->dma_attr_maxxfer = xhcip->xhci_caps.xcap_pagesize; attrp->dma_attr_seg = xhcip->xhci_caps.xcap_pagesize - 1; } /* * Fill in attributes for a scratchpad entry. The scratchpad entries are * somewhat different in so far as they are closest to a normal DMA attribute, * except they have stricter alignments, needing to be page sized. * * In addition, because we never access this memory ourselves, we can just mark * it all as relaxed ordering. */ void xhci_dma_scratchpad_attr(xhci_t *xhcip, ddi_dma_attr_t *attrp) { xhci_dma_dma_attr(xhcip, attrp); attrp->dma_attr_align = xhcip->xhci_caps.xcap_pagesize; attrp->dma_attr_flags |= DDI_DMA_RELAXED_ORDERING; } /* * This should be used for the simple case of a single SGL entry, which is the * vast majority of the non-transfer allocations. */ uint64_t xhci_dma_pa(xhci_dma_buffer_t *xdb) { ASSERT(xdb->xdb_ncookies == 1); return (xdb->xdb_cookies[0].dmac_laddress); } void xhci_dma_free(xhci_dma_buffer_t *xdb) { if (xdb->xdb_ncookies != 0) { VERIFY(xdb->xdb_dma_handle != NULL); (void) ddi_dma_unbind_handle(xdb->xdb_dma_handle); xdb->xdb_ncookies = 0; bzero(xdb->xdb_cookies, sizeof (ddi_dma_cookie_t) * XHCI_TRANSFER_DMA_SGL); xdb->xdb_len = 0; } if (xdb->xdb_acc_handle != NULL) { ddi_dma_mem_free(&xdb->xdb_acc_handle); xdb->xdb_acc_handle = NULL; xdb->xdb_va = NULL; } if (xdb->xdb_dma_handle != NULL) { ddi_dma_free_handle(&xdb->xdb_dma_handle); xdb->xdb_dma_handle = NULL; } ASSERT(xdb->xdb_va == NULL); ASSERT(xdb->xdb_ncookies == 0); ASSERT(xdb->xdb_cookies[0].dmac_laddress == 0); ASSERT(xdb->xdb_len == 0); } boolean_t xhci_dma_alloc(xhci_t *xhcip, xhci_dma_buffer_t *xdb, ddi_dma_attr_t *attrp, ddi_device_acc_attr_t *accp, boolean_t zero, size_t size, boolean_t wait) { int ret, i; uint_t flags = DDI_DMA_CONSISTENT; size_t len; ddi_dma_cookie_t cookie; uint_t ncookies; int (*memcb)(caddr_t); if (wait == B_TRUE) { memcb = DDI_DMA_SLEEP; } else { memcb = DDI_DMA_DONTWAIT; } ret = ddi_dma_alloc_handle(xhcip->xhci_dip, attrp, memcb, NULL, &xdb->xdb_dma_handle); if (ret != 0) { xhci_log(xhcip, "!failed to allocate DMA handle: %d", ret); xdb->xdb_dma_handle = NULL; return (B_FALSE); } ret = ddi_dma_mem_alloc(xdb->xdb_dma_handle, size, accp, flags, memcb, NULL, &xdb->xdb_va, &len, &xdb->xdb_acc_handle); if (ret != DDI_SUCCESS) { xhci_log(xhcip, "!failed to allocate DMA memory: %d", ret); xdb->xdb_va = NULL; xdb->xdb_acc_handle = NULL; xhci_dma_free(xdb); return (B_FALSE); } if (zero == B_TRUE) bzero(xdb->xdb_va, len); ret = ddi_dma_addr_bind_handle(xdb->xdb_dma_handle, NULL, xdb->xdb_va, len, DDI_DMA_RDWR | flags, memcb, NULL, &cookie, &ncookies); if (ret != 0) { xhci_log(xhcip, "!failed to bind DMA memory: %d", ret); xhci_dma_free(xdb); return (B_FALSE); } /* * Note we explicitly store the logical length of this allocation. The * physical length is available via the cookies. */ xdb->xdb_len = size; xdb->xdb_ncookies = ncookies; xdb->xdb_cookies[0] = cookie; for (i = 1; i < ncookies; i++) { ddi_dma_nextcookie(xdb->xdb_dma_handle, &xdb->xdb_cookies[i]); } return (B_TRUE); } void xhci_transfer_free(xhci_t *xhcip, xhci_transfer_t *xt) { if (xt == NULL) return; VERIFY(xhcip != NULL); xhci_dma_free(&xt->xt_buffer); if (xt->xt_isoc != NULL) { ASSERT3U(xt->xt_ntrbs, >, 0); kmem_free(xt->xt_isoc, sizeof (usb_isoc_pkt_descr_t) * xt->xt_ntrbs); xt->xt_isoc = NULL; } if (xt->xt_trbs != NULL) { ASSERT3U(xt->xt_ntrbs, >, 0); kmem_free(xt->xt_trbs, sizeof (xhci_trb_t) * xt->xt_ntrbs); xt->xt_trbs = NULL; } if (xt->xt_trbs_pa != NULL) { ASSERT3U(xt->xt_ntrbs, >, 0); kmem_free(xt->xt_trbs_pa, sizeof (uint64_t) * xt->xt_ntrbs); xt->xt_trbs_pa = NULL; } kmem_free(xt, sizeof (xhci_transfer_t)); } xhci_transfer_t * xhci_transfer_alloc(xhci_t *xhcip, xhci_endpoint_t *xep, size_t size, uint_t trbs, int usb_flags) { int kmflags; boolean_t dmawait; xhci_transfer_t *xt; ddi_device_acc_attr_t acc; ddi_dma_attr_t attr; if (usb_flags & USB_FLAGS_SLEEP) { kmflags = KM_SLEEP; dmawait = B_TRUE; } else { kmflags = KM_NOSLEEP; dmawait = B_FALSE; } xt = kmem_zalloc(sizeof (xhci_transfer_t), kmflags); if (xt == NULL) return (NULL); if (size != 0) { int sgl = XHCI_DEF_DMA_SGL; /* * For BULK transfers, we always increase the number of SGL * entries that we support to make things easier for the kernel. * However, for control transfers, we currently opt to keep * things a bit simpler and use our default of one SGL. There's * no good technical reason for this, rather it just keeps * things a bit easier. * * To simplify things, we don't use additional SGL entries for * ISOC transfers. While this isn't the best, it isn't too far * off from what ehci and co. have done before. If this becomes * a technical issue, it's certainly possible to increase the * SGL entry count. * * When we use the larger SGL count, we change our strategy for * being notified. In such a case we will opt to use an event * data packet. This helps deal with cases where some * controllers don't properly generate events for the last entry * in a TD with IOC when IOSP is set. */ if (xep->xep_type == USB_EP_ATTR_BULK) { sgl = XHCI_TRANSFER_DMA_SGL; trbs++; } xhci_dma_acc_attr(xhcip, &acc); xhci_dma_transfer_attr(xhcip, &attr, sgl); if (xhci_dma_alloc(xhcip, &xt->xt_buffer, &attr, &acc, B_FALSE, size, dmawait) == B_FALSE) { kmem_free(xt, sizeof (xhci_transfer_t)); return (NULL); } /* * ISOC transfers are a bit special and don't need additional * TRBs for data. */ if (xep->xep_type != USB_EP_ATTR_ISOCH) trbs += xt->xt_buffer.xdb_ncookies; } xt->xt_trbs = kmem_zalloc(sizeof (xhci_trb_t) * trbs, kmflags); if (xt->xt_trbs == NULL) { xhci_dma_free(&xt->xt_buffer); kmem_free(xt, sizeof (xhci_transfer_t)); return (NULL); } xt->xt_trbs_pa = kmem_zalloc(sizeof (uint64_t) * trbs, kmflags); if (xt->xt_trbs_pa == NULL) { kmem_free(xt->xt_trbs, sizeof (xhci_trb_t) * trbs); xhci_dma_free(&xt->xt_buffer); kmem_free(xt, sizeof (xhci_transfer_t)); return (NULL); } /* * For ISOCH transfers, we need to also allocate the results data. */ if (xep->xep_type == USB_EP_ATTR_ISOCH) { xt->xt_isoc = kmem_zalloc(sizeof (usb_isoc_pkt_descr_t) * trbs, kmflags); if (xt->xt_isoc == NULL) { kmem_free(xt->xt_trbs_pa, sizeof (uint64_t) * trbs); kmem_free(xt->xt_trbs, sizeof (xhci_trb_t) * trbs); xhci_dma_free(&xt->xt_buffer); kmem_free(xt, sizeof (xhci_transfer_t)); return (NULL); } } xt->xt_ntrbs = trbs; xt->xt_cr = USB_CR_OK; return (xt); } /* * Abstract the notion of copying out to handle the case of multiple DMA * cookies. If tobuf is true, we are copying to the kernel provided buffer, * otherwise we're copying into the DMA memory. */ void xhci_transfer_copy(xhci_transfer_t *xt, void *buf, size_t len, boolean_t tobuf) { void *dmabuf = xt->xt_buffer.xdb_va; if (tobuf == B_TRUE) bcopy(dmabuf, buf, len); else bcopy(buf, dmabuf, len); } int xhci_transfer_sync(xhci_t *xhcip, xhci_transfer_t *xt, uint_t type) { XHCI_DMA_SYNC(xt->xt_buffer, type); return (xhci_check_dma_handle(xhcip, &xt->xt_buffer)); } /* * We're required to try and inform the xHCI controller about the number of data * packets that are required. The algorithm to use is described in xHCI 1.1 / * 4.11.2.4. While it might be tempting to just try and calculate the number of * packets based on simple rounding of the remaining number of bytes, that * misses a critical problem -- DMA boundaries may cause us to need additional * packets that are missed initially. Consider a transfer made up of four * different DMA buffers sized in bytes: 4096, 4096, 256, 256, with a 512 byte * packet size. * * Remain 4608 512 256 0 * Bytes 4096 4096 256 256 * Naive TD 9 1 1 0 * Act TD 10 2 1 0 * * This means that the only safe way forward here is to work backwards and see * how many we need to work up to this point. */ static uint_t xhci_transfer_get_tdsize(xhci_transfer_t *xt, uint_t off, uint_t mps) { int i; uint_t npkt = 0; /* * There are always zero packets for the last TRB. */ ASSERT(xt->xt_buffer.xdb_ncookies > 0); for (i = xt->xt_buffer.xdb_ncookies - 1; i > off; i--) { size_t len = roundup(xt->xt_buffer.xdb_cookies[i].dmac_size, mps); npkt += len / mps; } /* * Make sure to clamp this value otherwise we risk truncation. */ if (npkt >= XHCI_MAX_TDSIZE) return (XHCI_MAX_TDSIZE); return (npkt); } void xhci_transfer_trb_fill_data(xhci_endpoint_t *xep, xhci_transfer_t *xt, int off, boolean_t in) { uint_t mps, tdsize, flags; int i; VERIFY(xt->xt_buffer.xdb_ncookies > 0); VERIFY(xep->xep_pipe != NULL); VERIFY(off + xt->xt_buffer.xdb_ncookies <= xt->xt_ntrbs); mps = xep->xep_pipe->p_ep.wMaxPacketSize; if (in == B_TRUE) { xt->xt_data_tohost = B_TRUE; } /* * We assume that if we have a non-bulk endpoint, then we should only * have a single cookie. This falls out from the default SGL length that * we use for these other device types. */ if (xep->xep_type != USB_EP_ATTR_BULK) { VERIFY3U(xt->xt_buffer.xdb_ncookies, ==, 1); } for (i = 0; i < xt->xt_buffer.xdb_ncookies; i++) { uint64_t pa, dmasz; pa = xt->xt_buffer.xdb_cookies[i].dmac_laddress; dmasz = xt->xt_buffer.xdb_cookies[i].dmac_size; tdsize = xhci_transfer_get_tdsize(xt, i, mps); flags = XHCI_TRB_TYPE_NORMAL; if (i == 0 && xep->xep_type == USB_EP_ATTR_CONTROL) { flags = XHCI_TRB_TYPE_DATA; if (in == B_TRUE) flags |= XHCI_TRB_DIR_IN; } /* * If we have more than one cookie, then we need to set chaining * on every TRB and the last TRB will turn into an event data * TRB. If we only have a single TRB, then we just set interrupt * on completion (IOC). There's no need to specifically set * interrupt on short packet (IOSP) in that case, as we'll * always get the event notification. We still need the chain * bit set on the last packet, so we can chain into the event * data. Even if all the data on a bulk endpoint (the only * endpoint type that uses chaining today) has only one cookie, * then we'll still schedule an event data block. */ if (xep->xep_type == USB_EP_ATTR_BULK || xt->xt_buffer.xdb_ncookies > 1) { flags |= XHCI_TRB_CHAIN; } /* * What we set for the last TRB depends on the type of the * endpoint. If it's a bulk endpoint, then we have to set * evaluate next trb (ENT) so we successfully process the event * data TRB we'll set up. Otherwise, we need to make sure that * we set interrupt on completion, so we get the event. However, * we don't set the event on control endpoints, as the status * stage TD will be the one where we get the event. But, we do * still need an interrupt on short packet, because technically * the status stage is in its own TD. */ if (i + 1 == xt->xt_buffer.xdb_ncookies) { switch (xep->xep_type) { case USB_EP_ATTR_BULK: flags |= XHCI_TRB_ENT; break; case USB_EP_ATTR_CONTROL: flags |= XHCI_TRB_ISP; break; default: flags |= XHCI_TRB_IOC; break; } } xt->xt_trbs[off + i].trb_addr = LE_64(pa); xt->xt_trbs[off + i].trb_status = LE_32(XHCI_TRB_LEN(dmasz) | XHCI_TRB_TDREM(tdsize) | XHCI_TRB_INTR(0)); xt->xt_trbs[off + i].trb_flags = LE_32(flags); } /* * The last TRB in any bulk transfer is the Event Data TRB. */ if (xep->xep_type == USB_EP_ATTR_BULK) { VERIFY(off + xt->xt_buffer.xdb_ncookies + 1 <= xt->xt_ntrbs); xt->xt_trbs[off + i].trb_addr = LE_64((uintptr_t)xt); xt->xt_trbs[off + i].trb_status = LE_32(XHCI_TRB_INTR(0)); xt->xt_trbs[off + i].trb_flags = LE_32(XHCI_TRB_TYPE_EVENT | XHCI_TRB_IOC); } } /* * These are utility functions for isochronus transfers to help calculate the * transfer burst count (TBC) and transfer last burst packet count (TLPBC) * entries for an isochronus entry. See xHCI 1.1 / 4.11.2.3 for how to calculate * them. */ void xhci_transfer_calculate_isoc(xhci_device_t *xd, xhci_endpoint_t *xep, uint_t trb_len, uint_t *tbc, uint_t *tlbpc) { uint_t mps, tdpc, burst; /* * Even if we're asked to send no data, that actually requires the * equivalent of sending one byte of data. */ if (trb_len == 0) trb_len = 1; mps = XHCI_EPCTX_GET_MPS(xd->xd_endout[xep->xep_num]->xec_info2); burst = XHCI_EPCTX_GET_MAXB(xd->xd_endout[xep->xep_num]->xec_info2); /* * This is supposed to correspond to the Transfer Descriptor Packet * Count from xHCI 1.1 / 4.14.1. */ tdpc = howmany(trb_len, mps); *tbc = howmany(tdpc, burst + 1) - 1; if ((tdpc % (burst + 1)) == 0) *tlbpc = burst; else *tlbpc = (tdpc % (burst + 1)) - 1; }