1 /*
2  * This file and its contents are supplied under the terms of the
3  * Common Development and Distribution License ("CDDL"), version 1.0.
4  * You may only use this file in accordance with the terms of version
5  * 1.0 of the CDDL.
6  *
7  * A full copy of the text of the CDDL should have accompanied this
8  * source.  A copy of the CDDL is also available via the Internet at
9  * http://www.illumos.org/license/CDDL.
10  */
11 
12 /*
13  * Copyright (c) 2018, Joyent, Inc.
14  */
15 
16 /*
17  * -----------------------------
18  * xHCI Ring Management Routines
19  * -----------------------------
20  *
21  * There are three major different types of rings for xHCI, these are:
22  *
23  * 1) Command Rings
24  * 2) Event Rings
25  * 3) Transfer Rings
26  *
27  * Command and Transfer rings function in similar ways while the event rings are
28  * different. The difference comes in who is the consumer and who is the
29  * producer. In the case of command and transfer rings, the driver is the
30  * producer. For the event ring the driver is the consumer.
31  *
32  * Each ring in xhci has a synthetic head and tail register. Each entry in a
33  * ring has a bit that's often referred to as the 'Cycle bit'. The cycle bit is
34  * toggled as a means of saying that a given entry needs to be consumed.
35  *
36  * When a ring is created, all of the data in it is initialized to zero and the
37  * producer and consumer agree that when the cycle bit is toggled, the ownership
38  * of the entry is transfered from the producer to the consumer.  For example,
39  * the command ring defaults to saying that a cycle bit of one is what indicates
40  * the command is owned by the hardware. So as the driver (the producer) fills
41  * in entries, the driver toggles the cycle bit from 0->1 as part of writing out
42  * the TRB.  When the command ring's doorbell is rung, the hardware (the
43  * consumer) begins processing commands. It will process them until one of two
44  * things happens:
45  *
46  * 1) The hardware encounters an entry with the old cycle bit (0 in this case)
47  *
48  * 2) The hardware hits the last entry in the ring which is a special kind of
49  * entry called a LINK TRB.
50  *
51  * A LINK TRB has two purposes:
52  *
53  * 1) Indicate where processing should be redirected. This can potentially be to
54  * another memory segment; however, this driver always programs LINK TRBs to
55  * point back to the start of the ring.
56  *
57  * 2) Indicate whether or not the cycle bit should be changed. We always
58  * indicate that the cycle bit should be toggled when a LINK TRB is processed.
59  *
60  * In this same example, whereas the driver (the producer) would be setting the
61  * cycle to 1 to indicate that an entry is to be processed, the driver would now
62  * set it to 0. Similarly, the hardware (the consumer) would be looking for a
63  * 0 to determine whether or not it should process the entry.
64  *
65  * Currently, when the driver allocates rings, it always allocates a single page
66  * for the ring. The entire page is dedicated to ring use, which is determined
67  * based on the devices PAGESIZE register. The last entry in a given page is
68  * always configured as a LINK TRB. As each entry in a ring is 16 bytes, this
69  * gives us an average of 255 usable descriptors on x86 and 511 on SPARC, as
70  * PAGESIZE is 4k and 8k respectively.
71  *
72  * The driver is always the producer for all rings except for the event ring,
73  * where it is the consumer.
74  *
75  * ----------------------
76  * Head and Tail Pointers
77  * ----------------------
78  *
79  * Now, while we have the cycle bits for the ring explained, we still need to
80  * keep track of what we consider the head and tail pointers, what the xHCI
81  * specification calls enqueue (head) and dequeue (tail) pointers. Now, in all
82  * the cases here, the actual tracking of the head pointer is basically done by
83  * the cycle bit; however, we maintain an actual offset in the xhci_ring_t
84  * structure. The tail is usually less synthetic; however, it's up for different
85  * folks to maintain it.
86  *
87  * We handle the command and transfer rings the same way. The head pointer
88  * indicates where we should insert the next TRB to transfer. The tail pointer
89  * indicates the last thing that hardware has told us it has processed. If the
90  * head and tail point to the same index, then we know the ring is empty.
91  *
92  * We increment the head pointer whenever we insert an entry. Note that we do
93  * not tell hardware about this in any way, it's just maintained by the cycle
94  * bit. Then, we keep track of what hardware has processed in our tail pointer,
95  * incrementing it only when we have an interrupt that indicates that it's been
96  * processed.
97  *
98  * One oddity here is that we only get notified of this via the event ring. So
99  * when the event ring encounters this information, it needs to go back and
100  * increment our command and transfer ring tails after processing events.
101  *
102  * For the event ring, we handle things differently. We still initialize
103  * everything to zero; however, we start processing things and looking at cycle
104  * bits only when we get an interrupt from hardware. With the event ring, we do
105  * *not* maintain a head pointer (it's still in the structure, but unused).  We
106  * always start processing at the tail pointer and use the cycle bit to indicate
107  * what we should process. Once we're done incrementing things, we go and notify
108  * the hardware of how far we got with this process by updating the tail for the
109  * event ring via a memory mapped register.
110  */
111 
112 #include <sys/usb/hcd/xhci/xhci.h>
113 
114 void
xhci_ring_free(xhci_ring_t * xrp)115 xhci_ring_free(xhci_ring_t *xrp)
116 {
117 	if (xrp->xr_trb != NULL) {
118 		xhci_dma_free(&xrp->xr_dma);
119 		xrp->xr_trb = NULL;
120 	}
121 	xrp->xr_ntrb = 0;
122 	xrp->xr_head = 0;
123 	xrp->xr_tail = 0;
124 	xrp->xr_cycle = 0;
125 }
126 
127 /*
128  * Initialize a ring that hasn't been used and set up its link pointer back to
129  * it.
130  */
131 int
xhci_ring_reset(xhci_t * xhcip,xhci_ring_t * xrp)132 xhci_ring_reset(xhci_t *xhcip, xhci_ring_t *xrp)
133 {
134 	xhci_trb_t *ltrb;
135 
136 	ASSERT(xrp->xr_trb != NULL);
137 
138 	bzero(xrp->xr_trb, sizeof (xhci_trb_t) * xrp->xr_ntrb);
139 	xrp->xr_head = 0;
140 	xrp->xr_tail = 0;
141 	xrp->xr_cycle = 1;
142 
143 	/*
144 	 * Set up the link TRB back to ourselves.
145 	 */
146 	ltrb = &xrp->xr_trb[xrp->xr_ntrb - 1];
147 	ltrb->trb_addr = LE_64(xhci_dma_pa(&xrp->xr_dma));
148 	ltrb->trb_flags = LE_32(XHCI_TRB_TYPE_LINK | XHCI_TRB_LINKSEG);
149 
150 	XHCI_DMA_SYNC(xrp->xr_dma, DDI_DMA_SYNC_FORDEV);
151 	if (xhci_check_dma_handle(xhcip, &xrp->xr_dma) != DDI_FM_OK) {
152 		ddi_fm_service_impact(xhcip->xhci_dip, DDI_SERVICE_LOST);
153 		return (EIO);
154 	}
155 
156 	return (0);
157 }
158 
159 int
xhci_ring_alloc(xhci_t * xhcip,xhci_ring_t * xrp)160 xhci_ring_alloc(xhci_t *xhcip, xhci_ring_t *xrp)
161 {
162 	ddi_dma_attr_t attr;
163 	ddi_device_acc_attr_t acc;
164 
165 	/*
166 	 * We use a transfer attribute for the rings as they require 64-byte
167 	 * boundaries.
168 	 */
169 	xhci_dma_acc_attr(xhcip, &acc);
170 	xhci_dma_transfer_attr(xhcip, &attr, XHCI_DEF_DMA_SGL);
171 	bzero(xrp, sizeof (xhci_ring_t));
172 	if (xhci_dma_alloc(xhcip, &xrp->xr_dma, &attr, &acc, B_FALSE,
173 	    xhcip->xhci_caps.xcap_pagesize, B_FALSE) == B_FALSE)
174 		return (ENOMEM);
175 	xrp->xr_trb = (xhci_trb_t *)xrp->xr_dma.xdb_va;
176 	xrp->xr_ntrb = xhcip->xhci_caps.xcap_pagesize / sizeof (xhci_trb_t);
177 	return (0);
178 }
179 
180 /*
181  * Note, caller should have already synced our DMA memory. This should not be
182  * used for the command ring, as its cycle is maintained by the cycling of the
183  * head. This function is only used for managing the event ring.
184  */
185 xhci_trb_t *
xhci_ring_event_advance(xhci_ring_t * xrp)186 xhci_ring_event_advance(xhci_ring_t *xrp)
187 {
188 	xhci_trb_t *trb = &xrp->xr_trb[xrp->xr_tail];
189 	VERIFY(xrp->xr_tail < xrp->xr_ntrb);
190 
191 	if (xrp->xr_cycle != (LE_32(trb->trb_flags) & XHCI_TRB_CYCLE))
192 		return (NULL);
193 
194 	/*
195 	 * The event ring does not use a link TRB. It instead always uses
196 	 * information based on the table to wrap. That means that the last
197 	 * entry is in fact going to contain data, so we shouldn't wrap and
198 	 * toggle the cycle until after we've processed that, in other words the
199 	 * tail equals the total number of entries.
200 	 */
201 	xrp->xr_tail++;
202 	if (xrp->xr_tail == xrp->xr_ntrb) {
203 		xrp->xr_cycle ^= 1;
204 		xrp->xr_tail = 0;
205 	}
206 
207 	return (trb);
208 }
209 
210 /*
211  * When processing the command ring, we're going to get a single event for each
212  * entry in it. As we've submitted things in order, we need to make sure that
213  * this address matches the DMA address that we'd expect of the current tail.
214  */
215 boolean_t
xhci_ring_trb_tail_valid(xhci_ring_t * xrp,uint64_t dma)216 xhci_ring_trb_tail_valid(xhci_ring_t *xrp, uint64_t dma)
217 {
218 	uint64_t tail;
219 
220 	tail = xhci_dma_pa(&xrp->xr_dma) + xrp->xr_tail * sizeof (xhci_trb_t);
221 	return (dma == tail);
222 }
223 
224 /*
225  * A variant on the above that checks for a given message within a range of
226  * entries and returns the offset to it from the tail.
227  */
228 int
xhci_ring_trb_valid_range(xhci_ring_t * xrp,uint64_t dma,uint_t range)229 xhci_ring_trb_valid_range(xhci_ring_t *xrp, uint64_t dma, uint_t range)
230 {
231 	uint_t i;
232 	uint_t tail = xrp->xr_tail;
233 	uint64_t taddr;
234 
235 	VERIFY(range < xrp->xr_ntrb);
236 	for (i = 0; i < range; i++) {
237 		taddr = xhci_dma_pa(&xrp->xr_dma) + tail * sizeof (xhci_trb_t);
238 		if (taddr == dma)
239 			return (i);
240 
241 		tail++;
242 		if (tail == xrp->xr_ntrb - 1)
243 			tail = 0;
244 	}
245 
246 	return (-1);
247 }
248 
249 /*
250  * Determine whether or not we have enough space for this request in a given
251  * ring for the given request. Note, we have to be a bit careful here and ensure
252  * that we properly handle cases where we cross the link TRB and that we don't
253  * count it.
254  *
255  * To determine if we have enough space for a given number of trbs, we need to
256  * logically advance the head pointer and make sure that we don't cross the tail
257  * pointer. In other words, if after advancement, head == tail, we're in
258  * trouble and don't have enough space.
259  */
260 boolean_t
xhci_ring_trb_space(xhci_ring_t * xrp,uint_t ntrb)261 xhci_ring_trb_space(xhci_ring_t *xrp, uint_t ntrb)
262 {
263 	uint_t i;
264 	uint_t head = xrp->xr_head;
265 
266 	VERIFY(ntrb > 0);
267 	/* We use < to ignore the link TRB */
268 	VERIFY(ntrb < xrp->xr_ntrb);
269 
270 	for (i = 0; i < ntrb; i++) {
271 		head++;
272 		if (head == xrp->xr_ntrb - 1) {
273 			head = 0;
274 		}
275 
276 		if (head == xrp->xr_tail)
277 			return (B_FALSE);
278 	}
279 
280 	return (B_TRUE);
281 }
282 
283 /*
284  * Fill in a TRB in the ring at offset trboff. If cycle is currently set to
285  * B_TRUE, then we fill in the appropriate cycle bit to tell the system to
286  * advance, otherwise we leave the existing cycle bit untouched so the system
287  * doesn't accidentally advance until we have everything filled in.
288  */
289 void
xhci_ring_trb_fill(xhci_ring_t * xrp,uint_t trboff,xhci_trb_t * host_trb,uint64_t * trb_pap,boolean_t put_cycle)290 xhci_ring_trb_fill(xhci_ring_t *xrp, uint_t trboff, xhci_trb_t *host_trb,
291     uint64_t *trb_pap, boolean_t put_cycle)
292 {
293 	uint_t i;
294 	uint32_t flags;
295 	uint_t ent = xrp->xr_head;
296 	uint8_t cycle = xrp->xr_cycle;
297 	xhci_trb_t *trb;
298 
299 	for (i = 0; i < trboff; i++) {
300 		ent++;
301 		if (ent == xrp->xr_ntrb - 1) {
302 			ent = 0;
303 			cycle ^= 1;
304 		}
305 	}
306 
307 	/*
308 	 * If we're being asked to not update the cycle for it to be valid to be
309 	 * produced, we need to xor this once again to get to the inappropriate
310 	 * value.
311 	 */
312 	if (put_cycle == B_FALSE)
313 		cycle ^= 1;
314 
315 	trb = &xrp->xr_trb[ent];
316 
317 	trb->trb_addr = host_trb->trb_addr;
318 	trb->trb_status = host_trb->trb_status;
319 	flags = host_trb->trb_flags;
320 	if (cycle == 0) {
321 		flags &= ~LE_32(XHCI_TRB_CYCLE);
322 	} else {
323 		flags |= LE_32(XHCI_TRB_CYCLE);
324 	}
325 
326 	trb->trb_flags = flags;
327 
328 	if (trb_pap != NULL) {
329 		uint64_t pa;
330 
331 		/*
332 		 * This logic only works if we have a single cookie address.
333 		 * However, this is prettty tightly assumed for rings through
334 		 * the xhci driver at this time.
335 		 */
336 		ASSERT3U(xrp->xr_dma.xdb_ncookies, ==, 1);
337 		pa = xrp->xr_dma.xdb_cookies[0].dmac_laddress;
338 		pa += ((uintptr_t)trb - (uintptr_t)&xrp->xr_trb[0]);
339 		*trb_pap = pa;
340 	}
341 }
342 
343 /*
344  * Update our metadata for the ring and verify the cycle bit is correctly set
345  * for the first trb. It is expected that it is incorrectly set.
346  */
347 void
xhci_ring_trb_produce(xhci_ring_t * xrp,uint_t ntrb)348 xhci_ring_trb_produce(xhci_ring_t *xrp, uint_t ntrb)
349 {
350 	uint_t i, ohead;
351 	xhci_trb_t *trb;
352 
353 	VERIFY(ntrb > 0);
354 
355 	ohead = xrp->xr_head;
356 
357 	/*
358 	 * As part of updating the head, we need to make sure we correctly
359 	 * update the cycle bit of the link TRB. So we always do this first
360 	 * before we update the old head, to try and get a consistent view of
361 	 * the cycle bit.
362 	 */
363 	for (i = 0; i < ntrb; i++) {
364 		xrp->xr_head++;
365 		/*
366 		 * If we're updating the link TRB, we also need to make sure
367 		 * that the Chain bit is set if we're in the middle of a TD
368 		 * comprised of multiple TRDs. Thankfully the algorithmn here is
369 		 * simple: set it to the value of the previous TRB.
370 		 */
371 		if (xrp->xr_head == xrp->xr_ntrb - 1) {
372 			trb = &xrp->xr_trb[xrp->xr_ntrb - 1];
373 			if (xrp->xr_trb[xrp->xr_ntrb - 2].trb_flags &
374 			    XHCI_TRB_CHAIN) {
375 				trb->trb_flags |= XHCI_TRB_CHAIN;
376 			} else {
377 				trb->trb_flags &= ~XHCI_TRB_CHAIN;
378 
379 			}
380 			trb->trb_flags ^= LE_32(XHCI_TRB_CYCLE);
381 			xrp->xr_cycle ^= 1;
382 			xrp->xr_head = 0;
383 		}
384 	}
385 
386 	trb = &xrp->xr_trb[ohead];
387 	trb->trb_flags ^= LE_32(XHCI_TRB_CYCLE);
388 }
389 
390 /*
391  * This is a convenience wrapper for the single TRB case to make callers less
392  * likely to mess up some of the required semantics.
393  */
394 void
xhci_ring_trb_put(xhci_ring_t * xrp,xhci_trb_t * trb)395 xhci_ring_trb_put(xhci_ring_t *xrp, xhci_trb_t *trb)
396 {
397 	xhci_ring_trb_fill(xrp, 0U, trb, NULL, B_FALSE);
398 	xhci_ring_trb_produce(xrp, 1U);
399 }
400 
401 /*
402  * Update the tail pointer for a ring based on the DMA address of a consumed
403  * entry. Note, this entry indicates what we just processed, therefore we should
404  * bump the tail entry to the next one.
405  */
406 boolean_t
xhci_ring_trb_consumed(xhci_ring_t * xrp,uint64_t dma)407 xhci_ring_trb_consumed(xhci_ring_t *xrp, uint64_t dma)
408 {
409 	uint64_t pa = xhci_dma_pa(&xrp->xr_dma);
410 	uint64_t high = pa + xrp->xr_ntrb * sizeof (xhci_trb_t);
411 
412 	if (dma < pa || dma >= high ||
413 	    dma % sizeof (xhci_trb_t) != 0)
414 		return (B_FALSE);
415 
416 	dma -= pa;
417 	dma /= sizeof (xhci_trb_t);
418 
419 	VERIFY(dma < xrp->xr_ntrb);
420 
421 	xrp->xr_tail = dma + 1;
422 	if (xrp->xr_tail == xrp->xr_ntrb - 1)
423 		xrp->xr_tail = 0;
424 
425 	return (B_TRUE);
426 }
427 
428 /*
429  * The ring represented here has been reset and we're being asked to basically
430  * skip all outstanding entries. Note, this shouldn't be used for the event
431  * ring. Because the cycle bit is toggled whenever the head moves past the link
432  * trb, the cycle bit is already correct. So in this case, it's really just a
433  * matter of setting the current tail equal to the head, at which point we
434  * consider things empty.
435  */
436 void
xhci_ring_skip(xhci_ring_t * xrp)437 xhci_ring_skip(xhci_ring_t *xrp)
438 {
439 	xrp->xr_tail = xrp->xr_head;
440 }
441 
442 /*
443  * A variant on the normal skip. This basically just tells us to make sure that
444  * that everything this transfer represents has been skipped. Callers need to
445  * make sure that this is actually the first transfer in the ring. Like above,
446  * we don't need to touch the cycle bit.
447  */
448 void
xhci_ring_skip_transfer(xhci_ring_t * xrp,xhci_transfer_t * xt)449 xhci_ring_skip_transfer(xhci_ring_t *xrp, xhci_transfer_t *xt)
450 {
451 	uint_t i;
452 
453 	for (i = 0; i < xt->xt_ntrbs; i++) {
454 		xrp->xr_tail++;
455 		if (xrp->xr_tail == xrp->xr_ntrb - 1)
456 			xrp->xr_tail = 0;
457 	}
458 }
459