19da57d7bSbt /*
29da57d7bSbt * CDDL HEADER START
39da57d7bSbt *
49da57d7bSbt * The contents of this file are subject to the terms of the
59da57d7bSbt * Common Development and Distribution License (the "License").
69da57d7bSbt * You may not use this file except in compliance with the License.
79da57d7bSbt *
8da14cebeSEric Cheng * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9da14cebeSEric Cheng * or http://www.opensolaris.org/os/licensing.
109da57d7bSbt * See the License for the specific language governing permissions
119da57d7bSbt * and limitations under the License.
129da57d7bSbt *
13da14cebeSEric Cheng * When distributing Covered Code, include this CDDL HEADER in each
14da14cebeSEric Cheng * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
159da57d7bSbt * If applicable, add the following below this CDDL HEADER, with the
169da57d7bSbt * fields enclosed by brackets "[]" replaced with your own identifying
179da57d7bSbt * information: Portions Copyright [yyyy] [name of copyright owner]
189da57d7bSbt *
199da57d7bSbt * CDDL HEADER END
209da57d7bSbt */
219da57d7bSbt
229da57d7bSbt /*
235b6dd21fSchenlu chen - Sun Microsystems - Beijing China * Copyright(c) 2007-2010 Intel Corporation. All rights reserved.
245b6dd21fSchenlu chen - Sun Microsystems - Beijing China */
255b6dd21fSchenlu chen - Sun Microsystems - Beijing China
265b6dd21fSchenlu chen - Sun Microsystems - Beijing China /*
275b6dd21fSchenlu chen - Sun Microsystems - Beijing China * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
2869b5a878SDan McDonald * Copyright 2012 Nexenta Systems, Inc. All rights reserved.
29dc0cb1cdSDale Ghent * Copyright 2016 OmniTI Computer Consulting, Inc. All rights reserved.
30*1dd392ebSJason King * Copyright 2021 Joyent, Inc.
31da14cebeSEric Cheng */
329da57d7bSbt
339da57d7bSbt #include "ixgbe_sw.h"
349da57d7bSbt
35e39a7b5bSJason King static int ixgbe_tx_copy(ixgbe_tx_ring_t *, tx_control_block_t **,
36e39a7b5bSJason King link_list_t *, const void *, size_t);
37e39a7b5bSJason King static int ixgbe_tx_bind(ixgbe_tx_ring_t *, tx_control_block_t **,
38e39a7b5bSJason King link_list_t *, uint8_t *, size_t);
39e39a7b5bSJason King static uint_t ixgbe_tcb_done(tx_control_block_t *);
409da57d7bSbt static int ixgbe_tx_fill_ring(ixgbe_tx_ring_t *, link_list_t *,
41c971fb7eSgg ixgbe_tx_context_t *, size_t);
429da57d7bSbt static void ixgbe_save_desc(tx_control_block_t *, uint64_t, size_t);
43e39a7b5bSJason King static tx_control_block_t *ixgbe_get_free_list(ixgbe_tx_ring_t *,
44e39a7b5bSJason King link_list_t *);
459da57d7bSbt
46c971fb7eSgg static int ixgbe_get_context(mblk_t *, ixgbe_tx_context_t *);
47c971fb7eSgg static boolean_t ixgbe_check_context(ixgbe_tx_ring_t *,
48c971fb7eSgg ixgbe_tx_context_t *);
49c971fb7eSgg static void ixgbe_fill_context(struct ixgbe_adv_tx_context_desc *,
5073cd555cSBin Tu - Sun Microsystems - Beijing China ixgbe_tx_context_t *);
519da57d7bSbt
529da57d7bSbt #ifndef IXGBE_DEBUG
539da57d7bSbt #pragma inline(ixgbe_save_desc)
54c971fb7eSgg #pragma inline(ixgbe_get_context)
55c971fb7eSgg #pragma inline(ixgbe_check_context)
56c971fb7eSgg #pragma inline(ixgbe_fill_context)
579da57d7bSbt #endif
589da57d7bSbt
599da57d7bSbt /*
60da14cebeSEric Cheng * ixgbe_ring_tx
619da57d7bSbt *
62da14cebeSEric Cheng * To transmit one mblk through one specified ring.
639da57d7bSbt *
649da57d7bSbt * One mblk can consist of several fragments, each fragment
659da57d7bSbt * will be processed with different methods based on the size.
669da57d7bSbt * For the fragments with size less than the bcopy threshold,
679da57d7bSbt * they will be processed by using bcopy; otherwise, they will
689da57d7bSbt * be processed by using DMA binding.
699da57d7bSbt *
70e39a7b5bSJason King * To process the mblk, for each fragment, we pass a pointer to the location
71e39a7b5bSJason King * of the current transmit control block (tcb) (initialized to NULL) to either
72e39a7b5bSJason King * ixgbe_tx_copy() or ixgbe_tx_bind() (based on the size of the mblk fragment).
73e39a7b5bSJason King * ixgbe_tx_copy() and ixgbe_tx_bind() will either continue to use the current
74e39a7b5bSJason King * if possible, or close out the current tcb, allocate a new tcb, and update
75e39a7b5bSJason King * the passed location (tx_control_block_t **) to reflect the new current tcb.
769da57d7bSbt *
77e39a7b5bSJason King * Since bound mblk fragments require their own tcb, the close, allocate new,
78e39a7b5bSJason King * and update steps occur on every call to ixgbe_tx_bind(), but since
79e39a7b5bSJason King * consecutive small mblk fragments can be combined into a single tcb, the
80e39a7b5bSJason King * close, allocate new, and update steps may not occur on every call to
81e39a7b5bSJason King * ixgbe_tx_copy(). If the current tcb is already being used to copy data and
82e39a7b5bSJason King * we call ixgbe_tx_copy(), if there is enough room in the current tcb for
83e39a7b5bSJason King * the current mblk fragment, we append the data from the mblk fragment. If
84e39a7b5bSJason King * we call ixgbe_tx_copy() and the current tcb isn't being used to copy (i.e.
85e39a7b5bSJason King * the previous iteration of the loop called ixgbe_tx_bind()), or doesn't
86e39a7b5bSJason King * have enough space for the mblk fragment, we close out the current tcb,
87e39a7b5bSJason King * grab a new tcb from the free list, and update the current tcb to the
88e39a7b5bSJason King * newly obtained tcb.
899da57d7bSbt *
90e39a7b5bSJason King * When LSO (large segment offload) is enabled, we first copy the packet
91e39a7b5bSJason King * headers (ethernet, IP, and TCP/UDP) into their own descriptor before
92e39a7b5bSJason King * processing the remainder of the packet. The remaining bytes of the packet
93e39a7b5bSJason King * are then copied or mapped based on the fragment size as described above.
94e39a7b5bSJason King *
95e39a7b5bSJason King * Through the entire processing of a packet, we keep track of the number of
96e39a7b5bSJason King * DMA descriptors being used (either bound or pre-bound buffers used for
97e39a7b5bSJason King * copying) by this packet. Each tcb requires at least one DMA descriptor, but
98e39a7b5bSJason King * may require more than one. When a tcb is closed by ixgbe_tx_bind() or
99e39a7b5bSJason King * ixgbe_tx_copy(), it does so by calling ixgbe_tcb_done() which returns the
100e39a7b5bSJason King * number of DMA descriptors that are closed (ready for the HW). Since the
101e39a7b5bSJason King * hardware limits the number of descriptors that can be used to transmit a
102e39a7b5bSJason King * single packet, if the total number DMA descriptors required to transmit
103e39a7b5bSJason King * this packet exceeds this limit, we perform a msgpullup() and try again.
104e39a7b5bSJason King * Since our DMA attributes limit the number of DMA cookies allowed to
105e39a7b5bSJason King * map a single span of memory to a value (MAX_COOKIE) less than the
106e39a7b5bSJason King * maximum number of descriptors allowed for a packet (IXGBE_TX_DESC_LIMIT),
107e39a7b5bSJason King * as long as sufficient tcbs are available, we should always be able to
108e39a7b5bSJason King * process a packet that's contained in a single mblk_t (no additional
109e39a7b5bSJason King * fragments).
110e39a7b5bSJason King *
111e39a7b5bSJason King * Once all of the tcbs have been setup, ixgbe_tx_fill_ring() is called to
112e39a7b5bSJason King * setup the tx ring to transmit the tcbs and then tell the HW to start
113e39a7b5bSJason King * transmitting. When transmission is complete, an interrupt is triggered
114e39a7b5bSJason King * which calls the appropriate recycle routine to place the tcbs that were
115e39a7b5bSJason King * used in transmission back in the free list. We also may also try to
116e39a7b5bSJason King * recycle any available tcbs when the size of the tcb free list gets low
117e39a7b5bSJason King * or if the watchdog timer triggers.
1189da57d7bSbt *
1199da57d7bSbt */
120da14cebeSEric Cheng mblk_t *
ixgbe_ring_tx(void * arg,mblk_t * orig_mp)121e39a7b5bSJason King ixgbe_ring_tx(void *arg, mblk_t *orig_mp)
1229da57d7bSbt {
123da14cebeSEric Cheng ixgbe_tx_ring_t *tx_ring = (ixgbe_tx_ring_t *)arg;
1249da57d7bSbt ixgbe_t *ixgbe = tx_ring->ixgbe;
125e39a7b5bSJason King mblk_t *mp = orig_mp;
126e39a7b5bSJason King mblk_t *pull_mp = NULL;
127e39a7b5bSJason King tx_control_block_t *tcb;
128e39a7b5bSJason King size_t mbsize, offset, len;
1299da57d7bSbt uint32_t desc_total;
130e39a7b5bSJason King uint32_t copy_thresh;
1319da57d7bSbt int desc_num;
132e39a7b5bSJason King ixgbe_tx_context_t tx_context, *ctx = NULL;
1339da57d7bSbt link_list_t pending_list;
134e39a7b5bSJason King boolean_t limit_retry = B_FALSE;
135da14cebeSEric Cheng
136da14cebeSEric Cheng ASSERT(mp->b_next == NULL);
137da14cebeSEric Cheng
13862e6e1adSPaul Guo if ((ixgbe->ixgbe_state & IXGBE_SUSPENDED) ||
13962e6e1adSPaul Guo (ixgbe->ixgbe_state & IXGBE_ERROR) ||
1405b6dd21fSchenlu chen - Sun Microsystems - Beijing China (ixgbe->ixgbe_state & IXGBE_OVERTEMP) ||
141b607c8a3SKeith M Wesolowski !(ixgbe->ixgbe_state & IXGBE_STARTED) ||
142b607c8a3SKeith M Wesolowski ixgbe->link_state != LINK_STATE_UP) {
143b607c8a3SKeith M Wesolowski freemsg(mp);
144b607c8a3SKeith M Wesolowski return (NULL);
14562e6e1adSPaul Guo }
14662e6e1adSPaul Guo
147ea65739eSchenlu chen - Sun Microsystems - Beijing China copy_thresh = ixgbe->tx_copy_thresh;
1489da57d7bSbt
149e39a7b5bSJason King mbsize = msgsize(mp);
1509da57d7bSbt
151c971fb7eSgg if (ixgbe->tx_hcksum_enable) {
152c971fb7eSgg /*
153c971fb7eSgg * Retrieve checksum context information from the mblk
154c971fb7eSgg * that will be used to decide whether/how to fill the
155c971fb7eSgg * context descriptor.
156c971fb7eSgg */
157c971fb7eSgg ctx = &tx_context;
158c971fb7eSgg if (ixgbe_get_context(mp, ctx) < 0) {
159c971fb7eSgg freemsg(mp);
160da14cebeSEric Cheng return (NULL);
161c971fb7eSgg }
162c971fb7eSgg
163c971fb7eSgg /*
164c971fb7eSgg * If the mblk size exceeds the max size ixgbe could
165da14cebeSEric Cheng * process, then discard this mblk, and return NULL.
166c971fb7eSgg */
16773cd555cSBin Tu - Sun Microsystems - Beijing China if ((ctx->lso_flag &&
16873cd555cSBin Tu - Sun Microsystems - Beijing China ((mbsize - ctx->mac_hdr_len) > IXGBE_LSO_MAXLEN)) ||
16973cd555cSBin Tu - Sun Microsystems - Beijing China (!ctx->lso_flag &&
170c971fb7eSgg (mbsize > (ixgbe->max_frame_size - ETHERFCSL)))) {
171c971fb7eSgg freemsg(mp);
172c971fb7eSgg IXGBE_DEBUGLOG_0(ixgbe, "ixgbe_tx: packet oversize");
173da14cebeSEric Cheng return (NULL);
174c971fb7eSgg }
1759da57d7bSbt }
1769da57d7bSbt
177e39a7b5bSJason King /*
178e39a7b5bSJason King * If we use too many descriptors (see comments below), we may do
179e39a7b5bSJason King * pull_mp = msgpullup(orig_mp, -1), and jump back to here. As such,
180e39a7b5bSJason King * any time we error return past here, we should check and free
181e39a7b5bSJason King * pull_mp if != NULL.
182e39a7b5bSJason King */
183e39a7b5bSJason King retry:
1849da57d7bSbt /*
1859da57d7bSbt * Check and recycle tx descriptors.
1869da57d7bSbt * The recycle threshold here should be selected carefully
1879da57d7bSbt */
188ea65739eSchenlu chen - Sun Microsystems - Beijing China if (tx_ring->tbd_free < ixgbe->tx_recycle_thresh) {
1899da57d7bSbt tx_ring->tx_recycle(tx_ring);
19073cd555cSBin Tu - Sun Microsystems - Beijing China }
1919da57d7bSbt
1929da57d7bSbt /*
1939da57d7bSbt * After the recycling, if the tbd_free is less than the
194da14cebeSEric Cheng * overload_threshold, assert overload, return mp;
1959da57d7bSbt * and we need to re-schedule the tx again.
1969da57d7bSbt */
197ea65739eSchenlu chen - Sun Microsystems - Beijing China if (tx_ring->tbd_free < ixgbe->tx_overload_thresh) {
1989da57d7bSbt tx_ring->reschedule = B_TRUE;
199a9bfd41dSRyan Zezeski tx_ring->stat_overload++;
200e39a7b5bSJason King if (pull_mp != NULL)
201e39a7b5bSJason King freemsg(pull_mp);
202e39a7b5bSJason King return (orig_mp);
2039da57d7bSbt }
2049da57d7bSbt
2059da57d7bSbt /*
2069da57d7bSbt * The pending_list is a linked list that is used to save
2079da57d7bSbt * the tx control blocks that have packet data processed
2089da57d7bSbt * but have not put the data to the tx descriptor ring.
2099da57d7bSbt * It is used to reduce the lock contention of the tx_lock.
2109da57d7bSbt */
2119da57d7bSbt LINK_LIST_INIT(&pending_list);
212e39a7b5bSJason King
213e39a7b5bSJason King tcb = NULL;
2149da57d7bSbt desc_num = 0;
2159da57d7bSbt desc_total = 0;
216e39a7b5bSJason King offset = 0;
2179da57d7bSbt
218da14cebeSEric Cheng /*
219e39a7b5bSJason King * For LSO, we always copy the packet header (Ethernet + IP + TCP/UDP)
220e39a7b5bSJason King * into a single descriptor separate from the remaining data.
221da14cebeSEric Cheng */
222da14cebeSEric Cheng if ((ctx != NULL) && ctx->lso_flag) {
223e39a7b5bSJason King size_t hdr_len;
224e39a7b5bSJason King
225da14cebeSEric Cheng hdr_len = ctx->ip_hdr_len + ctx->mac_hdr_len + ctx->l4_hdr_len;
226da14cebeSEric Cheng
227da14cebeSEric Cheng /*
228e39a7b5bSJason King * copy the first hdr_len bytes of mp (i.e. the Ethernet, IP,
229e39a7b5bSJason King * and TCP/UDP headers) into tcb.
230da14cebeSEric Cheng */
231e39a7b5bSJason King for (len = hdr_len; mp != NULL && len > 0; mp = mp->b_cont) {
232e39a7b5bSJason King size_t mlen = MBLKL(mp);
233e39a7b5bSJason King size_t amt = MIN(mlen, len);
234e39a7b5bSJason King int ret;
235e39a7b5bSJason King
236e39a7b5bSJason King ret = ixgbe_tx_copy(tx_ring, &tcb, &pending_list,
237e39a7b5bSJason King mp->b_rptr, amt);
238e39a7b5bSJason King /*
239e39a7b5bSJason King * Since we're trying to copy all of the headers into
240e39a7b5bSJason King * a single buffer in a single tcb, if ixgbe_tx_copy()
241e39a7b5bSJason King * returns anything but 0, it means either no tcbs
242e39a7b5bSJason King * are available (< 0), or while copying, we spilled
243e39a7b5bSJason King * over and couldn't fit all the headers into a
244e39a7b5bSJason King * single tcb.
245e39a7b5bSJason King */
246e39a7b5bSJason King if (ret != 0) {
247e39a7b5bSJason King if (ret > 0)
248e39a7b5bSJason King tx_ring->stat_lso_header_fail++;
249e39a7b5bSJason King goto tx_failure;
250e39a7b5bSJason King }
251e39a7b5bSJason King
252e39a7b5bSJason King len -= amt;
253e39a7b5bSJason King
254da14cebeSEric Cheng /*
255e39a7b5bSJason King * If we copy less than the full amount of this
256e39a7b5bSJason King * mblk_t, we have some amount to copy below.
257da14cebeSEric Cheng */
258e39a7b5bSJason King if (amt < mlen) {
259e39a7b5bSJason King offset = amt;
260e39a7b5bSJason King break;
261e39a7b5bSJason King }
262da14cebeSEric Cheng }
263e39a7b5bSJason King
264e39a7b5bSJason King ASSERT0(len);
265e39a7b5bSJason King
266da14cebeSEric Cheng /*
267e39a7b5bSJason King * Finish off the header tcb, and start anew for the
268e39a7b5bSJason King * rest of the packet.
269da14cebeSEric Cheng */
270e39a7b5bSJason King desc_total += ixgbe_tcb_done(tcb);
271e39a7b5bSJason King tcb = NULL;
272da14cebeSEric Cheng }
273da14cebeSEric Cheng
2749da57d7bSbt /*
275e39a7b5bSJason King * Process each remaining segment in the packet -- either binding
276e39a7b5bSJason King * the dblk_t or copying the contents of the dblk_t to an already
277e39a7b5bSJason King * bound buffer. When we copy, we will accumulate consecutive small
278e39a7b5bSJason King * (less than copy_thresh bytes) segments into a single tcb buffer
279e39a7b5bSJason King * until no more can fit (or we encounter a segment larger than
280e39a7b5bSJason King * copy_thresh and bind the dblk_t).
2819da57d7bSbt *
282e39a7b5bSJason King * Both ixgbe_tx_bind() and ixgbe_tx_copy() will allocate new
283e39a7b5bSJason King * transmit control blocks (tcb)s as needed (and append them onto
284e39a7b5bSJason King * 'pending_list'). Both functions also replace 'tcb' with the new
285e39a7b5bSJason King * tcb when they allocate a new tcb.
2869da57d7bSbt *
287e39a7b5bSJason King * We stop trying to process the packet once the number of descriptors
288e39a7b5bSJason King * used equals IXGBE_TX_DESC_LIMIT. Even if we're copying into the
289e39a7b5bSJason King * IXGBE_TX_DESC_LIMIT-th descriptor, we won't have room to add a
290e39a7b5bSJason King * context descriptor (since we're already at the limit), so there's
291e39a7b5bSJason King * no point in continuing. We'll pull up the mblk_t (see below)
292e39a7b5bSJason King * and try again.
2939da57d7bSbt */
294e39a7b5bSJason King while (mp != NULL && desc_total < IXGBE_TX_DESC_LIMIT) {
295e39a7b5bSJason King uint8_t *rptr = mp->b_rptr + offset;
296e39a7b5bSJason King int ret;
2979da57d7bSbt
298e39a7b5bSJason King len = MBLKL(mp) - offset;
299e39a7b5bSJason King offset = 0;
3009da57d7bSbt
301e39a7b5bSJason King if (len > copy_thresh) {
302e39a7b5bSJason King ret = ixgbe_tx_bind(tx_ring, &tcb, &pending_list, rptr,
303e39a7b5bSJason King len);
3049da57d7bSbt } else {
305e39a7b5bSJason King ret = ixgbe_tx_copy(tx_ring, &tcb, &pending_list, rptr,
306e39a7b5bSJason King len);
3079da57d7bSbt }
3089da57d7bSbt
309e39a7b5bSJason King if (ret < 0)
3109da57d7bSbt goto tx_failure;
3119da57d7bSbt
312e39a7b5bSJason King desc_total += ret;
313e39a7b5bSJason King mp = mp->b_cont;
3149da57d7bSbt }
3159da57d7bSbt
316e39a7b5bSJason King /* Finish off the last tcb */
317e39a7b5bSJason King desc_total += ixgbe_tcb_done(tcb);
3189da57d7bSbt
319edf70dc9SPaul Guo /*
320edf70dc9SPaul Guo * 82598/82599 chipset has a limitation that no more than 32 tx
321e39a7b5bSJason King * descriptors can be transmited out at one time. As noted above,
322e39a7b5bSJason King * we need to include space for a context descriptor in case its
323e39a7b5bSJason King * necessary, so we do this even if desc_total == IXGBE_TX_DESC_LIMIT
324e39a7b5bSJason King * as well as when it exceeds the limit.
325edf70dc9SPaul Guo *
326e39a7b5bSJason King * If we exceed this limit, we take the hit, do a msgpullup(), and
327e39a7b5bSJason King * then try again. Our DMA attributes guarantee we should never use
328e39a7b5bSJason King * more than MAX_COOKIE (18) descriptors to map a single mblk_t, so we
329e39a7b5bSJason King * should only need to retry once.
330edf70dc9SPaul Guo */
331e39a7b5bSJason King if (desc_total >= IXGBE_TX_DESC_LIMIT) {
332e39a7b5bSJason King /* We shouldn't hit this path twice */
333e39a7b5bSJason King VERIFY0(limit_retry);
334edf70dc9SPaul Guo
335e39a7b5bSJason King tx_ring->stat_break_tbd_limit++;
336edf70dc9SPaul Guo
337e39a7b5bSJason King /* Release all the tcbs we used previously */
338edf70dc9SPaul Guo ixgbe_put_free_list(tx_ring, &pending_list);
339185c5677SPaul Guo desc_total = 0;
340e39a7b5bSJason King offset = 0;
341185c5677SPaul Guo
342e39a7b5bSJason King pull_mp = msgpullup(orig_mp, -1);
343e39a7b5bSJason King if (pull_mp == NULL) {
344e39a7b5bSJason King tx_ring->reschedule = B_TRUE;
345e39a7b5bSJason King return (orig_mp);
346edf70dc9SPaul Guo }
347edf70dc9SPaul Guo
348e39a7b5bSJason King mp = pull_mp;
349e39a7b5bSJason King limit_retry = B_TRUE;
350e39a7b5bSJason King goto retry;
351edf70dc9SPaul Guo }
352edf70dc9SPaul Guo
3539da57d7bSbt /*
354e39a7b5bSJason King * Before filling the tx descriptor ring with the data, we need to
3559da57d7bSbt * ensure there are adequate free descriptors for transmit
3569da57d7bSbt * (including one context descriptor).
3575b6dd21fSchenlu chen - Sun Microsystems - Beijing China * Do not use up all the tx descriptors.
3585b6dd21fSchenlu chen - Sun Microsystems - Beijing China * Otherwise tx recycle will fail and cause false hang.
3599da57d7bSbt */
3605b6dd21fSchenlu chen - Sun Microsystems - Beijing China if (tx_ring->tbd_free <= (desc_total + 1)) {
3619da57d7bSbt tx_ring->tx_recycle(tx_ring);
3629da57d7bSbt }
3639da57d7bSbt
3649da57d7bSbt mutex_enter(&tx_ring->tx_lock);
3659da57d7bSbt /*
3669da57d7bSbt * If the number of free tx descriptors is not enough for transmit
367da14cebeSEric Cheng * then return mp.
3689da57d7bSbt *
3699da57d7bSbt * Note: we must put this check under the mutex protection to
3709da57d7bSbt * ensure the correctness when multiple threads access it in
3719da57d7bSbt * parallel.
3729da57d7bSbt */
3735b6dd21fSchenlu chen - Sun Microsystems - Beijing China if (tx_ring->tbd_free <= (desc_total + 1)) {
374a9bfd41dSRyan Zezeski tx_ring->stat_fail_no_tbd++;
3759da57d7bSbt mutex_exit(&tx_ring->tx_lock);
3769da57d7bSbt goto tx_failure;
3779da57d7bSbt }
3789da57d7bSbt
379e39a7b5bSJason King /*
380e39a7b5bSJason King * Attach the mblk_t we've setup to the last control block.
381e39a7b5bSJason King * This is only done once we know there are enough free descriptors
382e39a7b5bSJason King * to transmit so that the cleanup in tx_failure doesn't try to
383e39a7b5bSJason King * call freemsg() on mp (since we will want to return it).
384e39a7b5bSJason King */
385e39a7b5bSJason King tcb->mp = (pull_mp != NULL) ? pull_mp : orig_mp;
386e39a7b5bSJason King
387c971fb7eSgg desc_num = ixgbe_tx_fill_ring(tx_ring, &pending_list, ctx,
388c971fb7eSgg mbsize);
3899da57d7bSbt
3909da57d7bSbt ASSERT((desc_num == desc_total) || (desc_num == (desc_total + 1)));
3919da57d7bSbt
3920dc2366fSVenugopal Iyer tx_ring->stat_obytes += mbsize;
393e39a7b5bSJason King tx_ring->stat_opackets++;
3940dc2366fSVenugopal Iyer
3959da57d7bSbt mutex_exit(&tx_ring->tx_lock);
3969da57d7bSbt
397185c5677SPaul Guo /*
398e39a7b5bSJason King * Now that tx is done, if we pulled up the original message, we
399e39a7b5bSJason King * can free the original message since it is no longer being
400e39a7b5bSJason King * used.
401185c5677SPaul Guo */
402e39a7b5bSJason King if (pull_mp != NULL) {
403e39a7b5bSJason King freemsg(orig_mp);
404185c5677SPaul Guo }
405185c5677SPaul Guo
406da14cebeSEric Cheng return (NULL);
4079da57d7bSbt
4089da57d7bSbt tx_failure:
409185c5677SPaul Guo /*
410185c5677SPaul Guo * If transmission fails, need to free the pulling up mblk.
411185c5677SPaul Guo */
412185c5677SPaul Guo if (pull_mp) {
413185c5677SPaul Guo freemsg(pull_mp);
414185c5677SPaul Guo }
415185c5677SPaul Guo
4169da57d7bSbt /*
4179da57d7bSbt * Return the tx control blocks in the pending list to the free list.
4189da57d7bSbt */
4199da57d7bSbt ixgbe_put_free_list(tx_ring, &pending_list);
4209da57d7bSbt
4219da57d7bSbt /* Transmit failed, do not drop the mblk, rechedule the transmit */
4229da57d7bSbt tx_ring->reschedule = B_TRUE;
4239da57d7bSbt
424e39a7b5bSJason King return (orig_mp);
4259da57d7bSbt }
4269da57d7bSbt
4279da57d7bSbt /*
4289da57d7bSbt * ixgbe_tx_copy
4299da57d7bSbt *
430e39a7b5bSJason King * Copy the mblk fragment to the pre-allocated tx buffer. Return -1 on error,
431e39a7b5bSJason King * otherwise return the number of descriptors we've completed in this call.
4329da57d7bSbt */
4339da57d7bSbt static int
ixgbe_tx_copy(ixgbe_tx_ring_t * tx_ring,tx_control_block_t ** tcbp,link_list_t * pending_list,const void * buf,size_t len)434e39a7b5bSJason King ixgbe_tx_copy(ixgbe_tx_ring_t *tx_ring, tx_control_block_t **tcbp,
435e39a7b5bSJason King link_list_t *pending_list, const void *buf, size_t len)
4369da57d7bSbt {
437e39a7b5bSJason King tx_control_block_t *tcb = *tcbp;
4389da57d7bSbt dma_buffer_t *tx_buf;
439e39a7b5bSJason King uint32_t desc_num = 0;
440e39a7b5bSJason King
441e39a7b5bSJason King /*
442e39a7b5bSJason King * We need a new tcb -- either the current one (tcb) is NULL because
443e39a7b5bSJason King * we just started, tcb is being used for DMA, or tcb isn't large enough
444e39a7b5bSJason King * to hold the contents we need to copy.
445e39a7b5bSJason King */
446e39a7b5bSJason King if (tcb == NULL || tcb->tx_type == USE_DMA ||
447e39a7b5bSJason King tcb->tx_buf.len + len > tcb->tx_buf.size) {
448e39a7b5bSJason King tx_control_block_t *newtcb;
449e39a7b5bSJason King
450e39a7b5bSJason King newtcb = ixgbe_get_free_list(tx_ring, pending_list);
451e39a7b5bSJason King if (newtcb == NULL)
452e39a7b5bSJason King return (-1);
4539da57d7bSbt
454e39a7b5bSJason King newtcb->tx_type = USE_COPY;
455e39a7b5bSJason King
456e39a7b5bSJason King if (tcb != NULL)
457e39a7b5bSJason King desc_num += ixgbe_tcb_done(tcb);
458e39a7b5bSJason King *tcbp = tcb = newtcb;
459e39a7b5bSJason King }
460e39a7b5bSJason King
461e39a7b5bSJason King ASSERT3S(tcb->tx_type, ==, USE_COPY);
4629da57d7bSbt tx_buf = &tcb->tx_buf;
4639da57d7bSbt
4649da57d7bSbt /*
4659da57d7bSbt * Copy the packet data of the mblk fragment into the
4669da57d7bSbt * pre-allocated tx buffer, which is maintained by the
4679da57d7bSbt * tx control block.
4689da57d7bSbt *
4699da57d7bSbt * Several mblk fragments can be copied into one tx buffer.
4709da57d7bSbt * The destination address of the current copied fragment in
4719da57d7bSbt * the tx buffer is next to the end of the previous copied
4729da57d7bSbt * fragment.
4739da57d7bSbt */
4749da57d7bSbt if (len > 0) {
475e39a7b5bSJason King bcopy(buf, tx_buf->address + tx_buf->len, len);
4769da57d7bSbt
4779da57d7bSbt tx_buf->len += len;
4789da57d7bSbt tcb->frag_num++;
4799da57d7bSbt }
4809da57d7bSbt
4819da57d7bSbt return (desc_num);
4829da57d7bSbt }
4839da57d7bSbt
4849da57d7bSbt /*
4859da57d7bSbt * ixgbe_tx_bind
4869da57d7bSbt *
487e39a7b5bSJason King * Bind the mblk fragment with DMA. Returns -1 on error, otherwise it
488e39a7b5bSJason King * returns the number of descriptors completed in this call. This count
489e39a7b5bSJason King * can include descriptors that weren't filled in by the current call to
490e39a7b5bSJason King * ixgbe_tx_bind() but were being used (but not yet completed) in previous
491e39a7b5bSJason King * calls to ixgbe_tx_bind() or ixgbe_tx_copy().
4929da57d7bSbt */
4939da57d7bSbt static int
ixgbe_tx_bind(ixgbe_tx_ring_t * tx_ring,tx_control_block_t ** tcbp,link_list_t * pending_list,uint8_t * buf,size_t len)494e39a7b5bSJason King ixgbe_tx_bind(ixgbe_tx_ring_t *tx_ring, tx_control_block_t **tcbp,
495e39a7b5bSJason King link_list_t *pending_list, uint8_t *buf, size_t len)
4969da57d7bSbt {
497e39a7b5bSJason King tx_control_block_t *tcb = NULL;
498e39a7b5bSJason King uint_t desc_num = 0;
499e39a7b5bSJason King int status;
500e39a7b5bSJason King
501e39a7b5bSJason King tcb = ixgbe_get_free_list(tx_ring, pending_list);
502e39a7b5bSJason King if (tcb == NULL)
503e39a7b5bSJason King return (-1);
5049da57d7bSbt
5059da57d7bSbt /*
5069da57d7bSbt * Use DMA binding to process the mblk fragment
5079da57d7bSbt */
5089da57d7bSbt status = ddi_dma_addr_bind_handle(tcb->tx_dma_handle, NULL,
509e39a7b5bSJason King (caddr_t)buf, len,
5109da57d7bSbt DDI_DMA_WRITE | DDI_DMA_STREAMING, DDI_DMA_DONTWAIT,
511e39a7b5bSJason King 0, NULL, NULL);
5129da57d7bSbt
5139da57d7bSbt if (status != DDI_DMA_MAPPED) {
514a9bfd41dSRyan Zezeski tx_ring->stat_fail_dma_bind++;
5159da57d7bSbt return (-1);
5169da57d7bSbt }
5179da57d7bSbt
5189da57d7bSbt tcb->frag_num++;
5199da57d7bSbt tcb->tx_type = USE_DMA;
520e39a7b5bSJason King
5219da57d7bSbt /*
522e39a7b5bSJason King * If there was an old tcb, we're about to replace it. Finish
523e39a7b5bSJason King * setting up the old tcb so we can replace it with the new one.
5249da57d7bSbt */
525e39a7b5bSJason King if (*tcbp != NULL)
526e39a7b5bSJason King desc_num += ixgbe_tcb_done(*tcbp);
5279da57d7bSbt
528e39a7b5bSJason King *tcbp = tcb;
529e39a7b5bSJason King return (desc_num);
530e39a7b5bSJason King }
531e39a7b5bSJason King
532e39a7b5bSJason King /*
533e39a7b5bSJason King * Once we're done populating a tcb (either by binding or copying into
534e39a7b5bSJason King * a buffer in the tcb), get it ready for tx and return the number of
535e39a7b5bSJason King * descriptors used.
536e39a7b5bSJason King */
537e39a7b5bSJason King static uint_t
ixgbe_tcb_done(tx_control_block_t * tcb)538e39a7b5bSJason King ixgbe_tcb_done(tx_control_block_t *tcb)
539e39a7b5bSJason King {
540e39a7b5bSJason King uint_t desc_num = 0;
541e39a7b5bSJason King
542e39a7b5bSJason King if (tcb->tx_type == USE_DMA) {
543e39a7b5bSJason King const ddi_dma_cookie_t *c;
5449da57d7bSbt
545e39a7b5bSJason King for (c = ddi_dma_cookie_iter(tcb->tx_dma_handle, NULL);
546e39a7b5bSJason King c != NULL;
547e39a7b5bSJason King c = ddi_dma_cookie_iter(tcb->tx_dma_handle, c)) {
548e39a7b5bSJason King /*
549e39a7b5bSJason King * Save the address and length to the private data
550e39a7b5bSJason King * structure of the tx control block, which will be
551e39a7b5bSJason King * used to fill the tx descriptor ring after all the
552e39a7b5bSJason King * fragments are processed.
553e39a7b5bSJason King */
554e39a7b5bSJason King ixgbe_save_desc(tcb, c->dmac_laddress, c->dmac_size);
555e39a7b5bSJason King desc_num++;
556e39a7b5bSJason King }
557e39a7b5bSJason King } else if (tcb->tx_type == USE_COPY) {
558e39a7b5bSJason King dma_buffer_t *tx_buf = &tcb->tx_buf;
559e39a7b5bSJason King
560e39a7b5bSJason King DMA_SYNC(tx_buf, DDI_DMA_SYNC_FORDEV);
561e39a7b5bSJason King ixgbe_save_desc(tcb, tx_buf->dma_address, tx_buf->len);
562e39a7b5bSJason King desc_num++;
563e39a7b5bSJason King } else {
564e39a7b5bSJason King panic("invalid tcb type");
5659da57d7bSbt }
5669da57d7bSbt
5679da57d7bSbt return (desc_num);
5689da57d7bSbt }
5699da57d7bSbt
5709da57d7bSbt /*
571c971fb7eSgg * ixgbe_get_context
5729da57d7bSbt *
573c971fb7eSgg * Get the context information from the mblk
5749da57d7bSbt */
575c971fb7eSgg static int
ixgbe_get_context(mblk_t * mp,ixgbe_tx_context_t * ctx)576c971fb7eSgg ixgbe_get_context(mblk_t *mp, ixgbe_tx_context_t *ctx)
5779da57d7bSbt {
5789da57d7bSbt uint32_t start;
579da14cebeSEric Cheng uint32_t hckflags;
580da14cebeSEric Cheng uint32_t lsoflags;
58185f496faSRobert Mustacchi uint32_t lsocksum;
582da14cebeSEric Cheng uint32_t mss;
5839da57d7bSbt uint32_t len;
5849da57d7bSbt uint32_t size;
5859da57d7bSbt uint32_t offset;
5869da57d7bSbt unsigned char *pos;
5879da57d7bSbt ushort_t etype;
5889da57d7bSbt uint32_t mac_hdr_len;
5899da57d7bSbt uint32_t l4_proto;
590c971fb7eSgg uint32_t l4_hdr_len;
5919da57d7bSbt
5929da57d7bSbt ASSERT(mp != NULL);
5939da57d7bSbt
5940dc2366fSVenugopal Iyer mac_hcksum_get(mp, &start, NULL, NULL, NULL, &hckflags);
595c971fb7eSgg bzero(ctx, sizeof (ixgbe_tx_context_t));
5969da57d7bSbt
59773cd555cSBin Tu - Sun Microsystems - Beijing China if (hckflags == 0) {
598c971fb7eSgg return (0);
59973cd555cSBin Tu - Sun Microsystems - Beijing China }
60073cd555cSBin Tu - Sun Microsystems - Beijing China
601da14cebeSEric Cheng ctx->hcksum_flags = hckflags;
602c971fb7eSgg
6030dc2366fSVenugopal Iyer mac_lso_get(mp, &mss, &lsoflags);
604da14cebeSEric Cheng ctx->mss = mss;
605da14cebeSEric Cheng ctx->lso_flag = (lsoflags == HW_LSO);
606c971fb7eSgg
6079da57d7bSbt etype = 0;
6089da57d7bSbt mac_hdr_len = 0;
6099da57d7bSbt l4_proto = 0;
6109da57d7bSbt
6119da57d7bSbt /*
6129da57d7bSbt * Firstly get the position of the ether_type/ether_tpid.
6139da57d7bSbt * Here we don't assume the ether (VLAN) header is fully included
6149da57d7bSbt * in one mblk fragment, so we go thourgh the fragments to parse
6159da57d7bSbt * the ether type.
6169da57d7bSbt */
617da14cebeSEric Cheng size = len = MBLKL(mp);
6189da57d7bSbt offset = offsetof(struct ether_header, ether_type);
6199da57d7bSbt while (size <= offset) {
6209da57d7bSbt mp = mp->b_cont;
6219da57d7bSbt ASSERT(mp != NULL);
622da14cebeSEric Cheng len = MBLKL(mp);
6239da57d7bSbt size += len;
6249da57d7bSbt }
6259da57d7bSbt pos = mp->b_rptr + offset + len - size;
6269da57d7bSbt
6279da57d7bSbt etype = ntohs(*(ushort_t *)(uintptr_t)pos);
6289da57d7bSbt if (etype == ETHERTYPE_VLAN) {
6299da57d7bSbt /*
6309da57d7bSbt * Get the position of the ether_type in VLAN header
6319da57d7bSbt */
6329da57d7bSbt offset = offsetof(struct ether_vlan_header, ether_type);
6339da57d7bSbt while (size <= offset) {
6349da57d7bSbt mp = mp->b_cont;
6359da57d7bSbt ASSERT(mp != NULL);
636da14cebeSEric Cheng len = MBLKL(mp);
6379da57d7bSbt size += len;
6389da57d7bSbt }
6399da57d7bSbt pos = mp->b_rptr + offset + len - size;
6409da57d7bSbt
6419da57d7bSbt etype = ntohs(*(ushort_t *)(uintptr_t)pos);
6429da57d7bSbt mac_hdr_len = sizeof (struct ether_vlan_header);
6439da57d7bSbt } else {
6449da57d7bSbt mac_hdr_len = sizeof (struct ether_header);
6459da57d7bSbt }
6469da57d7bSbt
6479da57d7bSbt /*
648da14cebeSEric Cheng * Here we don't assume the IP(V6) header is fully included in
649c971fb7eSgg * one mblk fragment.
6509da57d7bSbt */
65185f496faSRobert Mustacchi lsocksum = HCK_PARTIALCKSUM;
65285f496faSRobert Mustacchi ctx->l3_proto = etype;
6539da57d7bSbt switch (etype) {
6549da57d7bSbt case ETHERTYPE_IP:
655c971fb7eSgg if (ctx->lso_flag) {
656da14cebeSEric Cheng offset = offsetof(ipha_t, ipha_length) + mac_hdr_len;
657da14cebeSEric Cheng while (size <= offset) {
658da14cebeSEric Cheng mp = mp->b_cont;
659da14cebeSEric Cheng ASSERT(mp != NULL);
660da14cebeSEric Cheng len = MBLKL(mp);
661da14cebeSEric Cheng size += len;
662da14cebeSEric Cheng }
663da14cebeSEric Cheng pos = mp->b_rptr + offset + len - size;
664da14cebeSEric Cheng *((uint16_t *)(uintptr_t)(pos)) = 0;
665da14cebeSEric Cheng
666da14cebeSEric Cheng offset = offsetof(ipha_t, ipha_hdr_checksum) +
667da14cebeSEric Cheng mac_hdr_len;
668da14cebeSEric Cheng while (size <= offset) {
669da14cebeSEric Cheng mp = mp->b_cont;
670da14cebeSEric Cheng ASSERT(mp != NULL);
671da14cebeSEric Cheng len = MBLKL(mp);
672da14cebeSEric Cheng size += len;
673da14cebeSEric Cheng }
674da14cebeSEric Cheng pos = mp->b_rptr + offset + len - size;
675da14cebeSEric Cheng *((uint16_t *)(uintptr_t)(pos)) = 0;
676c971fb7eSgg
677c971fb7eSgg /*
678c971fb7eSgg * To perform ixgbe LSO, here also need to fill
679c971fb7eSgg * the tcp checksum field of the packet with the
680c971fb7eSgg * following pseudo-header checksum:
681c971fb7eSgg * (ip_source_addr, ip_destination_addr, l4_proto)
682c971fb7eSgg * Currently the tcp/ip stack has done it.
683c971fb7eSgg */
68485f496faSRobert Mustacchi lsocksum |= HCK_IPV4_HDRCKSUM;
685c971fb7eSgg }
686c971fb7eSgg
687da14cebeSEric Cheng offset = offsetof(ipha_t, ipha_protocol) + mac_hdr_len;
688da14cebeSEric Cheng while (size <= offset) {
689da14cebeSEric Cheng mp = mp->b_cont;
690da14cebeSEric Cheng ASSERT(mp != NULL);
691da14cebeSEric Cheng len = MBLKL(mp);
692da14cebeSEric Cheng size += len;
693da14cebeSEric Cheng }
694da14cebeSEric Cheng pos = mp->b_rptr + offset + len - size;
695da14cebeSEric Cheng
696da14cebeSEric Cheng l4_proto = *(uint8_t *)pos;
6979da57d7bSbt break;
6989da57d7bSbt case ETHERTYPE_IPV6:
69985f496faSRobert Mustacchi /*
70085f496faSRobert Mustacchi * We need to zero out the length in the header.
70185f496faSRobert Mustacchi */
70285f496faSRobert Mustacchi if (ctx->lso_flag) {
70385f496faSRobert Mustacchi offset = offsetof(ip6_t, ip6_plen) + mac_hdr_len;
70485f496faSRobert Mustacchi while (size <= offset) {
70585f496faSRobert Mustacchi mp = mp->b_cont;
70685f496faSRobert Mustacchi ASSERT(mp != NULL);
70785f496faSRobert Mustacchi len = MBLKL(mp);
70885f496faSRobert Mustacchi size += len;
70985f496faSRobert Mustacchi }
71085f496faSRobert Mustacchi pos = mp->b_rptr + offset + len - size;
71185f496faSRobert Mustacchi *((uint16_t *)(uintptr_t)(pos)) = 0;
71285f496faSRobert Mustacchi }
71385f496faSRobert Mustacchi
7149da57d7bSbt offset = offsetof(ip6_t, ip6_nxt) + mac_hdr_len;
7159da57d7bSbt while (size <= offset) {
7169da57d7bSbt mp = mp->b_cont;
7179da57d7bSbt ASSERT(mp != NULL);
718da14cebeSEric Cheng len = MBLKL(mp);
7199da57d7bSbt size += len;
7209da57d7bSbt }
7219da57d7bSbt pos = mp->b_rptr + offset + len - size;
7229da57d7bSbt
7239da57d7bSbt l4_proto = *(uint8_t *)pos;
7249da57d7bSbt break;
7259da57d7bSbt default:
7269da57d7bSbt /* Unrecoverable error */
7279da57d7bSbt IXGBE_DEBUGLOG_0(NULL, "Ether type error with tx hcksum");
728c971fb7eSgg return (-2);
729c971fb7eSgg }
730c971fb7eSgg
731c971fb7eSgg if (ctx->lso_flag) {
73285f496faSRobert Mustacchi /*
73385f496faSRobert Mustacchi * LSO relies on tx h/w checksum, so here will drop the packet
73485f496faSRobert Mustacchi * if h/w checksum flag is not declared.
73585f496faSRobert Mustacchi */
73685f496faSRobert Mustacchi if ((ctx->hcksum_flags & lsocksum) != lsocksum) {
73785f496faSRobert Mustacchi IXGBE_DEBUGLOG_2(NULL, "ixgbe_tx: h/w checksum flags "
73885f496faSRobert Mustacchi "are not set for LSO, found 0x%x, needed bits 0x%x",
73985f496faSRobert Mustacchi ctx->hcksum_flags, lsocksum);
74085f496faSRobert Mustacchi return (-1);
74185f496faSRobert Mustacchi }
74285f496faSRobert Mustacchi
74385f496faSRobert Mustacchi
744c971fb7eSgg offset = mac_hdr_len + start;
745c971fb7eSgg while (size <= offset) {
746c971fb7eSgg mp = mp->b_cont;
747c971fb7eSgg ASSERT(mp != NULL);
748da14cebeSEric Cheng len = MBLKL(mp);
749c971fb7eSgg size += len;
750c971fb7eSgg }
751c971fb7eSgg pos = mp->b_rptr + offset + len - size;
752c971fb7eSgg
753c971fb7eSgg l4_hdr_len = TCP_HDR_LENGTH((tcph_t *)pos);
754c971fb7eSgg } else {
755c971fb7eSgg /*
756c971fb7eSgg * l4 header length is only required for LSO
757c971fb7eSgg */
758c971fb7eSgg l4_hdr_len = 0;
7599da57d7bSbt }
7609da57d7bSbt
761c971fb7eSgg ctx->mac_hdr_len = mac_hdr_len;
762c971fb7eSgg ctx->ip_hdr_len = start;
763c971fb7eSgg ctx->l4_proto = l4_proto;
764c971fb7eSgg ctx->l4_hdr_len = l4_hdr_len;
765c971fb7eSgg
766c971fb7eSgg return (0);
7679da57d7bSbt }
7689da57d7bSbt
7699da57d7bSbt /*
770c971fb7eSgg * ixgbe_check_context
7719da57d7bSbt *
7729da57d7bSbt * Check if a new context descriptor is needed
7739da57d7bSbt */
7749da57d7bSbt static boolean_t
ixgbe_check_context(ixgbe_tx_ring_t * tx_ring,ixgbe_tx_context_t * ctx)775c971fb7eSgg ixgbe_check_context(ixgbe_tx_ring_t *tx_ring, ixgbe_tx_context_t *ctx)
7769da57d7bSbt {
777c971fb7eSgg ixgbe_tx_context_t *last;
7789da57d7bSbt
779c971fb7eSgg if (ctx == NULL)
7809da57d7bSbt return (B_FALSE);
7819da57d7bSbt
7829da57d7bSbt /*
783da14cebeSEric Cheng * Compare the context data retrieved from the mblk and the
784da14cebeSEric Cheng * stored data of the last context descriptor. The data need
785da14cebeSEric Cheng * to be checked are:
7869da57d7bSbt * hcksum_flags
7879da57d7bSbt * l4_proto
7889da57d7bSbt * mac_hdr_len
7899da57d7bSbt * ip_hdr_len
790da14cebeSEric Cheng * lso_flag
791c971fb7eSgg * mss (only checked for LSO)
792c971fb7eSgg * l4_hr_len (only checked for LSO)
7939da57d7bSbt * Either one of the above data is changed, a new context descriptor
7949da57d7bSbt * will be needed.
7959da57d7bSbt */
796c971fb7eSgg last = &tx_ring->tx_context;
7979da57d7bSbt
798da14cebeSEric Cheng if ((ctx->hcksum_flags != last->hcksum_flags) ||
799da14cebeSEric Cheng (ctx->l4_proto != last->l4_proto) ||
80085f496faSRobert Mustacchi (ctx->l3_proto != last->l3_proto) ||
801da14cebeSEric Cheng (ctx->mac_hdr_len != last->mac_hdr_len) ||
802da14cebeSEric Cheng (ctx->ip_hdr_len != last->ip_hdr_len) ||
803da14cebeSEric Cheng (ctx->lso_flag != last->lso_flag) ||
804da14cebeSEric Cheng (ctx->lso_flag && ((ctx->mss != last->mss) ||
805da14cebeSEric Cheng (ctx->l4_hdr_len != last->l4_hdr_len)))) {
806da14cebeSEric Cheng return (B_TRUE);
8079da57d7bSbt }
8089da57d7bSbt
8099da57d7bSbt return (B_FALSE);
8109da57d7bSbt }
8119da57d7bSbt
8129da57d7bSbt /*
813c971fb7eSgg * ixgbe_fill_context
8149da57d7bSbt *
8159da57d7bSbt * Fill the context descriptor with hardware checksum informations
8169da57d7bSbt */
8179da57d7bSbt static void
ixgbe_fill_context(struct ixgbe_adv_tx_context_desc * ctx_tbd,ixgbe_tx_context_t * ctx)818c971fb7eSgg ixgbe_fill_context(struct ixgbe_adv_tx_context_desc *ctx_tbd,
81973cd555cSBin Tu - Sun Microsystems - Beijing China ixgbe_tx_context_t *ctx)
8209da57d7bSbt {
8219da57d7bSbt /*
8229da57d7bSbt * Fill the context descriptor with the checksum
823da14cebeSEric Cheng * context information we've got.
8249da57d7bSbt */
825c971fb7eSgg ctx_tbd->vlan_macip_lens = ctx->ip_hdr_len;
826c971fb7eSgg ctx_tbd->vlan_macip_lens |= ctx->mac_hdr_len <<
8279da57d7bSbt IXGBE_ADVTXD_MACLEN_SHIFT;
8289da57d7bSbt
8299da57d7bSbt ctx_tbd->type_tucmd_mlhl =
8309da57d7bSbt IXGBE_ADVTXD_DCMD_DEXT | IXGBE_ADVTXD_DTYP_CTXT;
83185f496faSRobert Mustacchi /*
83285f496faSRobert Mustacchi * When we have a TX context set up, we enforce that the ethertype is
83385f496faSRobert Mustacchi * either IPv4 or IPv6 in ixgbe_get_tx_context().
83485f496faSRobert Mustacchi */
83585f496faSRobert Mustacchi if (ctx->lso_flag || ctx->hcksum_flags & HCK_IPV4_HDRCKSUM) {
83685f496faSRobert Mustacchi if (ctx->l3_proto == ETHERTYPE_IP) {
83785f496faSRobert Mustacchi ctx_tbd->type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4;
83885f496faSRobert Mustacchi } else {
83985f496faSRobert Mustacchi ctx_tbd->type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6;
84085f496faSRobert Mustacchi }
84185f496faSRobert Mustacchi }
8429da57d7bSbt
84385f496faSRobert Mustacchi if (ctx->lso_flag || ctx->hcksum_flags & HCK_PARTIALCKSUM) {
844c971fb7eSgg switch (ctx->l4_proto) {
8459da57d7bSbt case IPPROTO_TCP:
8469da57d7bSbt ctx_tbd->type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_L4T_TCP;
8479da57d7bSbt break;
8489da57d7bSbt case IPPROTO_UDP:
8499da57d7bSbt /*
8509da57d7bSbt * We don't have to explicitly set:
8519da57d7bSbt * ctx_tbd->type_tucmd_mlhl |=
8529da57d7bSbt * IXGBE_ADVTXD_TUCMD_L4T_UDP;
8539da57d7bSbt * Because IXGBE_ADVTXD_TUCMD_L4T_UDP == 0b
8549da57d7bSbt */
8559da57d7bSbt break;
8569da57d7bSbt default:
8579da57d7bSbt /* Unrecoverable error */
8589da57d7bSbt IXGBE_DEBUGLOG_0(NULL, "L4 type error with tx hcksum");
8599da57d7bSbt break;
8609da57d7bSbt }
8619da57d7bSbt }
8629da57d7bSbt
8639da57d7bSbt ctx_tbd->seqnum_seed = 0;
864da14cebeSEric Cheng
865c971fb7eSgg if (ctx->lso_flag) {
86673cd555cSBin Tu - Sun Microsystems - Beijing China ctx_tbd->mss_l4len_idx =
867c971fb7eSgg (ctx->l4_hdr_len << IXGBE_ADVTXD_L4LEN_SHIFT) |
868c971fb7eSgg (ctx->mss << IXGBE_ADVTXD_MSS_SHIFT);
86973cd555cSBin Tu - Sun Microsystems - Beijing China } else {
87073cd555cSBin Tu - Sun Microsystems - Beijing China ctx_tbd->mss_l4len_idx = 0;
871c971fb7eSgg }
8729da57d7bSbt }
8739da57d7bSbt
8749da57d7bSbt /*
8759da57d7bSbt * ixgbe_tx_fill_ring
8769da57d7bSbt *
8779da57d7bSbt * Fill the tx descriptor ring with the data
8789da57d7bSbt */
8799da57d7bSbt static int
ixgbe_tx_fill_ring(ixgbe_tx_ring_t * tx_ring,link_list_t * pending_list,ixgbe_tx_context_t * ctx,size_t mbsize)8809da57d7bSbt ixgbe_tx_fill_ring(ixgbe_tx_ring_t *tx_ring, link_list_t *pending_list,
881c971fb7eSgg ixgbe_tx_context_t *ctx, size_t mbsize)
8829da57d7bSbt {
8839da57d7bSbt struct ixgbe_hw *hw = &tx_ring->ixgbe->hw;
8849da57d7bSbt boolean_t load_context;
8859da57d7bSbt uint32_t index, tcb_index, desc_num;
8869da57d7bSbt union ixgbe_adv_tx_desc *tbd, *first_tbd;
8879da57d7bSbt tx_control_block_t *tcb, *first_tcb;
8889da57d7bSbt uint32_t hcksum_flags;
8899da57d7bSbt int i;
8909da57d7bSbt
8919da57d7bSbt ASSERT(mutex_owned(&tx_ring->tx_lock));
8929da57d7bSbt
8939da57d7bSbt tbd = NULL;
8949da57d7bSbt first_tbd = NULL;
8959da57d7bSbt first_tcb = NULL;
8969da57d7bSbt desc_num = 0;
8979da57d7bSbt hcksum_flags = 0;
8989da57d7bSbt load_context = B_FALSE;
8999da57d7bSbt
9009da57d7bSbt /*
9019da57d7bSbt * Get the index of the first tx descriptor that will be filled,
9029da57d7bSbt * and the index of the first work list item that will be attached
9039da57d7bSbt * with the first used tx control block in the pending list.
9049da57d7bSbt * Note: the two indexes are the same.
9059da57d7bSbt */
9069da57d7bSbt index = tx_ring->tbd_tail;
9079da57d7bSbt tcb_index = tx_ring->tbd_tail;
9089da57d7bSbt
909c971fb7eSgg if (ctx != NULL) {
910c971fb7eSgg hcksum_flags = ctx->hcksum_flags;
9119da57d7bSbt
9129da57d7bSbt /*
9139da57d7bSbt * Check if a new context descriptor is needed for this packet
9149da57d7bSbt */
915c971fb7eSgg load_context = ixgbe_check_context(tx_ring, ctx);
916c971fb7eSgg
9179da57d7bSbt if (load_context) {
9189da57d7bSbt tbd = &tx_ring->tbd_ring[index];
9199da57d7bSbt
9209da57d7bSbt /*
9219da57d7bSbt * Fill the context descriptor with the
9229da57d7bSbt * hardware checksum offload informations.
9239da57d7bSbt */
924c971fb7eSgg ixgbe_fill_context(
92573cd555cSBin Tu - Sun Microsystems - Beijing China (struct ixgbe_adv_tx_context_desc *)tbd, ctx);
9269da57d7bSbt
9279da57d7bSbt index = NEXT_INDEX(index, 1, tx_ring->ring_size);
9289da57d7bSbt desc_num++;
9299da57d7bSbt
9309da57d7bSbt /*
9319da57d7bSbt * Store the checksum context data if
9329da57d7bSbt * a new context descriptor is added
9339da57d7bSbt */
934c971fb7eSgg tx_ring->tx_context = *ctx;
9359da57d7bSbt }
9369da57d7bSbt }
9379da57d7bSbt
9389da57d7bSbt first_tbd = &tx_ring->tbd_ring[index];
9399da57d7bSbt
9409da57d7bSbt /*
9419da57d7bSbt * Fill tx data descriptors with the data saved in the pending list.
9429da57d7bSbt * The tx control blocks in the pending list are added to the work list
9439da57d7bSbt * at the same time.
9449da57d7bSbt *
9459da57d7bSbt * The work list is strictly 1:1 corresponding to the descriptor ring.
9469da57d7bSbt * One item of the work list corresponds to one tx descriptor. Because
9479da57d7bSbt * one tx control block can span multiple tx descriptors, the tx
9489da57d7bSbt * control block will be added to the first work list item that
9499da57d7bSbt * corresponds to the first tx descriptor generated from that tx
9509da57d7bSbt * control block.
9519da57d7bSbt */
9529da57d7bSbt tcb = (tx_control_block_t *)LIST_POP_HEAD(pending_list);
953edf70dc9SPaul Guo first_tcb = tcb;
9549da57d7bSbt while (tcb != NULL) {
9559da57d7bSbt
9569da57d7bSbt for (i = 0; i < tcb->desc_num; i++) {
9579da57d7bSbt tbd = &tx_ring->tbd_ring[index];
9589da57d7bSbt
9599da57d7bSbt tbd->read.buffer_addr = tcb->desc[i].address;
9609da57d7bSbt tbd->read.cmd_type_len = tcb->desc[i].length;
9619da57d7bSbt
962edf70dc9SPaul Guo tbd->read.cmd_type_len |= IXGBE_ADVTXD_DCMD_DEXT
963edf70dc9SPaul Guo | IXGBE_ADVTXD_DTYP_DATA;
9649da57d7bSbt
9659da57d7bSbt tbd->read.olinfo_status = 0;
9669da57d7bSbt
9679da57d7bSbt index = NEXT_INDEX(index, 1, tx_ring->ring_size);
9689da57d7bSbt desc_num++;
9699da57d7bSbt }
9709da57d7bSbt
9719da57d7bSbt /*
9729da57d7bSbt * Add the tx control block to the work list
9739da57d7bSbt */
9749da57d7bSbt ASSERT(tx_ring->work_list[tcb_index] == NULL);
9759da57d7bSbt tx_ring->work_list[tcb_index] = tcb;
9769da57d7bSbt
9779da57d7bSbt tcb_index = index;
9789da57d7bSbt tcb = (tx_control_block_t *)LIST_POP_HEAD(pending_list);
9799da57d7bSbt }
9809da57d7bSbt
981edf70dc9SPaul Guo if (load_context) {
982edf70dc9SPaul Guo /*
983edf70dc9SPaul Guo * Count the context descriptor for
984edf70dc9SPaul Guo * the first tx control block.
985edf70dc9SPaul Guo */
986edf70dc9SPaul Guo first_tcb->desc_num++;
987edf70dc9SPaul Guo }
988edf70dc9SPaul Guo first_tcb->last_index = PREV_INDEX(index, 1, tx_ring->ring_size);
989edf70dc9SPaul Guo
9909da57d7bSbt /*
9919da57d7bSbt * The Insert Ethernet CRC (IFCS) bit and the checksum fields are only
9929da57d7bSbt * valid in the first descriptor of the packet.
99373cd555cSBin Tu - Sun Microsystems - Beijing China * Setting paylen in every first_tbd for all parts.
994dc0cb1cdSDale Ghent * 82599, X540 and X550 require the packet length in paylen field
995dc0cb1cdSDale Ghent * with or without LSO and 82598 will ignore it in non-LSO mode.
9969da57d7bSbt */
9979da57d7bSbt ASSERT(first_tbd != NULL);
9989da57d7bSbt first_tbd->read.cmd_type_len |= IXGBE_ADVTXD_DCMD_IFCS;
9999da57d7bSbt
100073cd555cSBin Tu - Sun Microsystems - Beijing China switch (hw->mac.type) {
10015b6dd21fSchenlu chen - Sun Microsystems - Beijing China case ixgbe_mac_82598EB:
100273cd555cSBin Tu - Sun Microsystems - Beijing China if (ctx != NULL && ctx->lso_flag) {
100373cd555cSBin Tu - Sun Microsystems - Beijing China first_tbd->read.cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
100473cd555cSBin Tu - Sun Microsystems - Beijing China first_tbd->read.olinfo_status |=
100573cd555cSBin Tu - Sun Microsystems - Beijing China (mbsize - ctx->mac_hdr_len - ctx->ip_hdr_len
100673cd555cSBin Tu - Sun Microsystems - Beijing China - ctx->l4_hdr_len) << IXGBE_ADVTXD_PAYLEN_SHIFT;
100773cd555cSBin Tu - Sun Microsystems - Beijing China }
100873cd555cSBin Tu - Sun Microsystems - Beijing China break;
10095b6dd21fSchenlu chen - Sun Microsystems - Beijing China
10105b6dd21fSchenlu chen - Sun Microsystems - Beijing China case ixgbe_mac_82599EB:
101169b5a878SDan McDonald case ixgbe_mac_X540:
1012dc0cb1cdSDale Ghent case ixgbe_mac_X550:
1013dc0cb1cdSDale Ghent case ixgbe_mac_X550EM_x:
101448ed61a7SRobert Mustacchi case ixgbe_mac_X550EM_a:
101573cd555cSBin Tu - Sun Microsystems - Beijing China if (ctx != NULL && ctx->lso_flag) {
101673cd555cSBin Tu - Sun Microsystems - Beijing China first_tbd->read.cmd_type_len |= IXGBE_ADVTXD_DCMD_TSE;
101773cd555cSBin Tu - Sun Microsystems - Beijing China first_tbd->read.olinfo_status |=
101873cd555cSBin Tu - Sun Microsystems - Beijing China (mbsize - ctx->mac_hdr_len - ctx->ip_hdr_len
101973cd555cSBin Tu - Sun Microsystems - Beijing China - ctx->l4_hdr_len) << IXGBE_ADVTXD_PAYLEN_SHIFT;
10205b6dd21fSchenlu chen - Sun Microsystems - Beijing China } else {
10215b6dd21fSchenlu chen - Sun Microsystems - Beijing China first_tbd->read.olinfo_status |=
10225b6dd21fSchenlu chen - Sun Microsystems - Beijing China (mbsize << IXGBE_ADVTXD_PAYLEN_SHIFT);
102373cd555cSBin Tu - Sun Microsystems - Beijing China }
102473cd555cSBin Tu - Sun Microsystems - Beijing China break;
10255b6dd21fSchenlu chen - Sun Microsystems - Beijing China
102673cd555cSBin Tu - Sun Microsystems - Beijing China default:
102773cd555cSBin Tu - Sun Microsystems - Beijing China break;
1028c971fb7eSgg }
1029c971fb7eSgg
10309da57d7bSbt /* Set hardware checksum bits */
10319da57d7bSbt if (hcksum_flags != 0) {
10329da57d7bSbt if (hcksum_flags & HCK_IPV4_HDRCKSUM)
10339da57d7bSbt first_tbd->read.olinfo_status |=
1034c971fb7eSgg IXGBE_ADVTXD_POPTS_IXSM;
10359da57d7bSbt if (hcksum_flags & HCK_PARTIALCKSUM)
10369da57d7bSbt first_tbd->read.olinfo_status |=
1037c971fb7eSgg IXGBE_ADVTXD_POPTS_TXSM;
10389da57d7bSbt }
10399da57d7bSbt
10409da57d7bSbt /*
10419da57d7bSbt * The last descriptor of packet needs End Of Packet (EOP),
10429da57d7bSbt * and Report Status (RS) bits set
10439da57d7bSbt */
10449da57d7bSbt ASSERT(tbd != NULL);
10459da57d7bSbt tbd->read.cmd_type_len |=
10469da57d7bSbt IXGBE_ADVTXD_DCMD_EOP | IXGBE_ADVTXD_DCMD_RS;
10479da57d7bSbt
10489da57d7bSbt /*
10499da57d7bSbt * Sync the DMA buffer of the tx descriptor ring
10509da57d7bSbt */
10519da57d7bSbt DMA_SYNC(&tx_ring->tbd_area, DDI_DMA_SYNC_FORDEV);
10529da57d7bSbt
10539da57d7bSbt /*
10549da57d7bSbt * Update the number of the free tx descriptors.
10559da57d7bSbt * The mutual exclusion between the transmission and the recycling
10569da57d7bSbt * (for the tx descriptor ring and the work list) is implemented
10579da57d7bSbt * with the atomic operation on the number of the free tx descriptors.
10589da57d7bSbt *
10599da57d7bSbt * Note: we should always decrement the counter tbd_free before
10609da57d7bSbt * advancing the hardware TDT pointer to avoid the race condition -
10619da57d7bSbt * before the counter tbd_free is decremented, the transmit of the
10629da57d7bSbt * tx descriptors has done and the counter tbd_free is increased by
10639da57d7bSbt * the tx recycling.
10649da57d7bSbt */
10659da57d7bSbt i = ixgbe_atomic_reserve(&tx_ring->tbd_free, desc_num);
10669da57d7bSbt ASSERT(i >= 0);
10679da57d7bSbt
10689da57d7bSbt tx_ring->tbd_tail = index;
10699da57d7bSbt
10709da57d7bSbt /*
10719da57d7bSbt * Advance the hardware TDT pointer of the tx descriptor ring
10729da57d7bSbt */
10739da57d7bSbt IXGBE_WRITE_REG(hw, IXGBE_TDT(tx_ring->index), index);
10749da57d7bSbt
10759da57d7bSbt if (ixgbe_check_acc_handle(tx_ring->ixgbe->osdep.reg_handle) !=
10769da57d7bSbt DDI_FM_OK) {
10779da57d7bSbt ddi_fm_service_impact(tx_ring->ixgbe->dip,
10789da57d7bSbt DDI_SERVICE_DEGRADED);
107962e6e1adSPaul Guo atomic_or_32(&tx_ring->ixgbe->ixgbe_state, IXGBE_ERROR);
10809da57d7bSbt }
10819da57d7bSbt
10829da57d7bSbt return (desc_num);
10839da57d7bSbt }
10849da57d7bSbt
10859da57d7bSbt /*
10869da57d7bSbt * ixgbe_save_desc
10879da57d7bSbt *
10889da57d7bSbt * Save the address/length pair to the private array
10899da57d7bSbt * of the tx control block. The address/length pairs
10909da57d7bSbt * will be filled into the tx descriptor ring later.
10919da57d7bSbt */
10929da57d7bSbt static void
ixgbe_save_desc(tx_control_block_t * tcb,uint64_t address,size_t length)10939da57d7bSbt ixgbe_save_desc(tx_control_block_t *tcb, uint64_t address, size_t length)
10949da57d7bSbt {
10959da57d7bSbt sw_desc_t *desc;
10969da57d7bSbt
10979da57d7bSbt desc = &tcb->desc[tcb->desc_num];
10989da57d7bSbt desc->address = address;
10999da57d7bSbt desc->length = length;
11009da57d7bSbt
11019da57d7bSbt tcb->desc_num++;
11029da57d7bSbt }
11039da57d7bSbt
11049da57d7bSbt /*
11059da57d7bSbt * ixgbe_tx_recycle_legacy
11069da57d7bSbt *
11079da57d7bSbt * Recycle the tx descriptors and tx control blocks.
11089da57d7bSbt *
11099da57d7bSbt * The work list is traversed to check if the corresponding
11109da57d7bSbt * tx descriptors have been transmitted. If so, the resources
11119da57d7bSbt * bound to the tx control blocks will be freed, and those
11129da57d7bSbt * tx control blocks will be returned to the free list.
11139da57d7bSbt */
11149da57d7bSbt uint32_t
ixgbe_tx_recycle_legacy(ixgbe_tx_ring_t * tx_ring)11159da57d7bSbt ixgbe_tx_recycle_legacy(ixgbe_tx_ring_t *tx_ring)
11169da57d7bSbt {
1117edf70dc9SPaul Guo uint32_t index, last_index, prev_index;
11189da57d7bSbt int desc_num;
11199da57d7bSbt boolean_t desc_done;
11209da57d7bSbt tx_control_block_t *tcb;
11219da57d7bSbt link_list_t pending_list;
1122ea65739eSchenlu chen - Sun Microsystems - Beijing China ixgbe_t *ixgbe = tx_ring->ixgbe;
11239da57d7bSbt
1124da14cebeSEric Cheng mutex_enter(&tx_ring->recycle_lock);
11259da57d7bSbt
11269da57d7bSbt ASSERT(tx_ring->tbd_free <= tx_ring->ring_size);
11279da57d7bSbt
11289da57d7bSbt if (tx_ring->tbd_free == tx_ring->ring_size) {
11299da57d7bSbt tx_ring->recycle_fail = 0;
11309da57d7bSbt tx_ring->stall_watchdog = 0;
1131da14cebeSEric Cheng if (tx_ring->reschedule) {
1132da14cebeSEric Cheng tx_ring->reschedule = B_FALSE;
1133ea65739eSchenlu chen - Sun Microsystems - Beijing China mac_tx_ring_update(ixgbe->mac_hdl,
1134da14cebeSEric Cheng tx_ring->ring_handle);
1135da14cebeSEric Cheng }
11369da57d7bSbt mutex_exit(&tx_ring->recycle_lock);
11379da57d7bSbt return (0);
11389da57d7bSbt }
11399da57d7bSbt
11409da57d7bSbt /*
11419da57d7bSbt * Sync the DMA buffer of the tx descriptor ring
11429da57d7bSbt */
11439da57d7bSbt DMA_SYNC(&tx_ring->tbd_area, DDI_DMA_SYNC_FORKERNEL);
11449da57d7bSbt
11459da57d7bSbt if (ixgbe_check_dma_handle(tx_ring->tbd_area.dma_handle) != DDI_FM_OK) {
114619843f01SPaul Guo mutex_exit(&tx_ring->recycle_lock);
1147ea65739eSchenlu chen - Sun Microsystems - Beijing China ddi_fm_service_impact(ixgbe->dip, DDI_SERVICE_DEGRADED);
114862e6e1adSPaul Guo atomic_or_32(&ixgbe->ixgbe_state, IXGBE_ERROR);
114962e6e1adSPaul Guo return (0);
11509da57d7bSbt }
11519da57d7bSbt
11529da57d7bSbt LINK_LIST_INIT(&pending_list);
11539da57d7bSbt desc_num = 0;
11549da57d7bSbt index = tx_ring->tbd_head; /* Index of next tbd/tcb to recycle */
11559da57d7bSbt
11569da57d7bSbt tcb = tx_ring->work_list[index];
11579da57d7bSbt ASSERT(tcb != NULL);
11589da57d7bSbt
1159edf70dc9SPaul Guo while (tcb != NULL) {
1160edf70dc9SPaul Guo /*
1161edf70dc9SPaul Guo * Get the last tx descriptor of this packet.
1162edf70dc9SPaul Guo * If the last tx descriptor is done, then
1163edf70dc9SPaul Guo * we can recycle all descriptors of a packet
1164edf70dc9SPaul Guo * which usually includes several tx control blocks.
1165edf70dc9SPaul Guo * For 82599, LSO descriptors can not be recycled
1166edf70dc9SPaul Guo * unless the whole packet's transmission is done.
1167edf70dc9SPaul Guo * That's why packet level recycling is used here.
1168edf70dc9SPaul Guo * For 82598, there's not such limit.
1169edf70dc9SPaul Guo */
1170edf70dc9SPaul Guo last_index = tcb->last_index;
11719da57d7bSbt /*
1172edf70dc9SPaul Guo * MAX_TX_RING_SIZE is used to judge whether
1173edf70dc9SPaul Guo * the index is a valid value or not.
11749da57d7bSbt */
1175edf70dc9SPaul Guo if (last_index == MAX_TX_RING_SIZE)
1176edf70dc9SPaul Guo break;
11779da57d7bSbt
11789da57d7bSbt /*
11799da57d7bSbt * Check if the Descriptor Done bit is set
11809da57d7bSbt */
11819da57d7bSbt desc_done = tx_ring->tbd_ring[last_index].wb.status &
11829da57d7bSbt IXGBE_TXD_STAT_DD;
11839da57d7bSbt if (desc_done) {
11849da57d7bSbt /*
1185edf70dc9SPaul Guo * recycle all descriptors of the packet
11869da57d7bSbt */
1187edf70dc9SPaul Guo while (tcb != NULL) {
1188edf70dc9SPaul Guo /*
1189edf70dc9SPaul Guo * Strip off the tx control block from
1190edf70dc9SPaul Guo * the work list, and add it to the
1191edf70dc9SPaul Guo * pending list.
1192edf70dc9SPaul Guo */
1193edf70dc9SPaul Guo tx_ring->work_list[index] = NULL;
1194edf70dc9SPaul Guo LIST_PUSH_TAIL(&pending_list, &tcb->link);
11959da57d7bSbt
1196edf70dc9SPaul Guo /*
1197edf70dc9SPaul Guo * Count the total number of the tx
1198edf70dc9SPaul Guo * descriptors recycled
1199edf70dc9SPaul Guo */
1200edf70dc9SPaul Guo desc_num += tcb->desc_num;
12019da57d7bSbt
1202edf70dc9SPaul Guo index = NEXT_INDEX(index, tcb->desc_num,
1203edf70dc9SPaul Guo tx_ring->ring_size);
1204edf70dc9SPaul Guo
1205edf70dc9SPaul Guo tcb = tx_ring->work_list[index];
12069da57d7bSbt
1207edf70dc9SPaul Guo prev_index = PREV_INDEX(index, 1,
1208edf70dc9SPaul Guo tx_ring->ring_size);
1209edf70dc9SPaul Guo if (prev_index == last_index)
1210edf70dc9SPaul Guo break;
1211edf70dc9SPaul Guo }
1212edf70dc9SPaul Guo } else {
1213edf70dc9SPaul Guo break;
12149da57d7bSbt }
12159da57d7bSbt }
12169da57d7bSbt
12179da57d7bSbt /*
12189da57d7bSbt * If no tx descriptors are recycled, no need to do more processing
12199da57d7bSbt */
12209da57d7bSbt if (desc_num == 0) {
12219da57d7bSbt tx_ring->recycle_fail++;
12229da57d7bSbt mutex_exit(&tx_ring->recycle_lock);
12239da57d7bSbt return (0);
12249da57d7bSbt }
12259da57d7bSbt
12269da57d7bSbt tx_ring->recycle_fail = 0;
12279da57d7bSbt tx_ring->stall_watchdog = 0;
12289da57d7bSbt
12299da57d7bSbt /*
12309da57d7bSbt * Update the head index of the tx descriptor ring
12319da57d7bSbt */
12329da57d7bSbt tx_ring->tbd_head = index;
12339da57d7bSbt
12349da57d7bSbt /*
12359da57d7bSbt * Update the number of the free tx descriptors with atomic operations
12369da57d7bSbt */
12379da57d7bSbt atomic_add_32(&tx_ring->tbd_free, desc_num);
12389da57d7bSbt
1239ea65739eSchenlu chen - Sun Microsystems - Beijing China if ((tx_ring->tbd_free >= ixgbe->tx_resched_thresh) &&
1240da14cebeSEric Cheng (tx_ring->reschedule)) {
1241da14cebeSEric Cheng tx_ring->reschedule = B_FALSE;
1242ea65739eSchenlu chen - Sun Microsystems - Beijing China mac_tx_ring_update(ixgbe->mac_hdl,
1243da14cebeSEric Cheng tx_ring->ring_handle);
1244da14cebeSEric Cheng }
12459da57d7bSbt mutex_exit(&tx_ring->recycle_lock);
12469da57d7bSbt
12479da57d7bSbt /*
12489da57d7bSbt * Add the tx control blocks in the pending list to the free list.
12499da57d7bSbt */
12509da57d7bSbt ixgbe_put_free_list(tx_ring, &pending_list);
12519da57d7bSbt
12529da57d7bSbt return (desc_num);
12539da57d7bSbt }
12549da57d7bSbt
12559da57d7bSbt /*
12569da57d7bSbt * ixgbe_tx_recycle_head_wb
12579da57d7bSbt *
12589da57d7bSbt * Check the head write-back, and recycle all the transmitted
12599da57d7bSbt * tx descriptors and tx control blocks.
12609da57d7bSbt */
12619da57d7bSbt uint32_t
ixgbe_tx_recycle_head_wb(ixgbe_tx_ring_t * tx_ring)12629da57d7bSbt ixgbe_tx_recycle_head_wb(ixgbe_tx_ring_t *tx_ring)
12639da57d7bSbt {
12649da57d7bSbt uint32_t index;
12659da57d7bSbt uint32_t head_wb;
12669da57d7bSbt int desc_num;
12679da57d7bSbt tx_control_block_t *tcb;
12689da57d7bSbt link_list_t pending_list;
1269ea65739eSchenlu chen - Sun Microsystems - Beijing China ixgbe_t *ixgbe = tx_ring->ixgbe;
12709da57d7bSbt
1271da14cebeSEric Cheng mutex_enter(&tx_ring->recycle_lock);
12729da57d7bSbt
12739da57d7bSbt ASSERT(tx_ring->tbd_free <= tx_ring->ring_size);
12749da57d7bSbt
12759da57d7bSbt if (tx_ring->tbd_free == tx_ring->ring_size) {
12769da57d7bSbt tx_ring->recycle_fail = 0;
12779da57d7bSbt tx_ring->stall_watchdog = 0;
1278da14cebeSEric Cheng if (tx_ring->reschedule) {
1279da14cebeSEric Cheng tx_ring->reschedule = B_FALSE;
1280ea65739eSchenlu chen - Sun Microsystems - Beijing China mac_tx_ring_update(ixgbe->mac_hdl,
1281da14cebeSEric Cheng tx_ring->ring_handle);
1282da14cebeSEric Cheng }
12839da57d7bSbt mutex_exit(&tx_ring->recycle_lock);
12849da57d7bSbt return (0);
12859da57d7bSbt }
12869da57d7bSbt
12879da57d7bSbt /*
12889da57d7bSbt * Sync the DMA buffer of the tx descriptor ring
12899da57d7bSbt *
12909da57d7bSbt * Note: For head write-back mode, the tx descriptors will not
12919da57d7bSbt * be written back, but the head write-back value is stored at
12929da57d7bSbt * the last extra tbd at the end of the DMA area, we still need
12939da57d7bSbt * to sync the head write-back value for kernel.
12949da57d7bSbt *
12959da57d7bSbt * DMA_SYNC(&tx_ring->tbd_area, DDI_DMA_SYNC_FORKERNEL);
12969da57d7bSbt */
12979da57d7bSbt (void) ddi_dma_sync(tx_ring->tbd_area.dma_handle,
12989da57d7bSbt sizeof (union ixgbe_adv_tx_desc) * tx_ring->ring_size,
12999da57d7bSbt sizeof (uint32_t),
13009da57d7bSbt DDI_DMA_SYNC_FORKERNEL);
13019da57d7bSbt
13029da57d7bSbt if (ixgbe_check_dma_handle(tx_ring->tbd_area.dma_handle) != DDI_FM_OK) {
130319843f01SPaul Guo mutex_exit(&tx_ring->recycle_lock);
1304ea65739eSchenlu chen - Sun Microsystems - Beijing China ddi_fm_service_impact(ixgbe->dip,
13059da57d7bSbt DDI_SERVICE_DEGRADED);
130662e6e1adSPaul Guo atomic_or_32(&ixgbe->ixgbe_state, IXGBE_ERROR);
130762e6e1adSPaul Guo return (0);
13089da57d7bSbt }
13099da57d7bSbt
13109da57d7bSbt LINK_LIST_INIT(&pending_list);
13119da57d7bSbt desc_num = 0;
13129da57d7bSbt index = tx_ring->tbd_head; /* Next index to clean */
13139da57d7bSbt
13149da57d7bSbt /*
13159da57d7bSbt * Get the value of head write-back
13169da57d7bSbt */
13179da57d7bSbt head_wb = *tx_ring->tbd_head_wb;
13189da57d7bSbt while (index != head_wb) {
13199da57d7bSbt tcb = tx_ring->work_list[index];
13209da57d7bSbt ASSERT(tcb != NULL);
13219da57d7bSbt
13229da57d7bSbt if (OFFSET(index, head_wb, tx_ring->ring_size) <
13239da57d7bSbt tcb->desc_num) {
13249da57d7bSbt /*
13259da57d7bSbt * The current tx control block is not
13269da57d7bSbt * completely transmitted, stop recycling
13279da57d7bSbt */
13289da57d7bSbt break;
13299da57d7bSbt }
13309da57d7bSbt
13319da57d7bSbt /*
13329da57d7bSbt * Strip off the tx control block from the work list,
13339da57d7bSbt * and add it to the pending list.
13349da57d7bSbt */
13359da57d7bSbt tx_ring->work_list[index] = NULL;
13369da57d7bSbt LIST_PUSH_TAIL(&pending_list, &tcb->link);
13379da57d7bSbt
13389da57d7bSbt /*
13399da57d7bSbt * Advance the index of the tx descriptor ring
13409da57d7bSbt */
13419da57d7bSbt index = NEXT_INDEX(index, tcb->desc_num, tx_ring->ring_size);
13429da57d7bSbt
13439da57d7bSbt /*
13449da57d7bSbt * Count the total number of the tx descriptors recycled
13459da57d7bSbt */
13469da57d7bSbt desc_num += tcb->desc_num;
13479da57d7bSbt }
13489da57d7bSbt
13499da57d7bSbt /*
13509da57d7bSbt * If no tx descriptors are recycled, no need to do more processing
13519da57d7bSbt */
13529da57d7bSbt if (desc_num == 0) {
13539da57d7bSbt tx_ring->recycle_fail++;
13549da57d7bSbt mutex_exit(&tx_ring->recycle_lock);
13559da57d7bSbt return (0);
13569da57d7bSbt }
13579da57d7bSbt
13589da57d7bSbt tx_ring->recycle_fail = 0;
13599da57d7bSbt tx_ring->stall_watchdog = 0;
13609da57d7bSbt
13619da57d7bSbt /*
13629da57d7bSbt * Update the head index of the tx descriptor ring
13639da57d7bSbt */
13649da57d7bSbt tx_ring->tbd_head = index;
13659da57d7bSbt
13669da57d7bSbt /*
13679da57d7bSbt * Update the number of the free tx descriptors with atomic operations
13689da57d7bSbt */
13699da57d7bSbt atomic_add_32(&tx_ring->tbd_free, desc_num);
13709da57d7bSbt
1371ea65739eSchenlu chen - Sun Microsystems - Beijing China if ((tx_ring->tbd_free >= ixgbe->tx_resched_thresh) &&
1372da14cebeSEric Cheng (tx_ring->reschedule)) {
1373da14cebeSEric Cheng tx_ring->reschedule = B_FALSE;
1374ea65739eSchenlu chen - Sun Microsystems - Beijing China mac_tx_ring_update(ixgbe->mac_hdl,
1375da14cebeSEric Cheng tx_ring->ring_handle);
1376da14cebeSEric Cheng }
13779da57d7bSbt mutex_exit(&tx_ring->recycle_lock);
13789da57d7bSbt
13799da57d7bSbt /*
13809da57d7bSbt * Add the tx control blocks in the pending list to the free list.
13819da57d7bSbt */
13829da57d7bSbt ixgbe_put_free_list(tx_ring, &pending_list);
13839da57d7bSbt
13849da57d7bSbt return (desc_num);
13859da57d7bSbt }
13869da57d7bSbt
13879da57d7bSbt /*
13889da57d7bSbt * ixgbe_free_tcb - free up the tx control block
13899da57d7bSbt *
13909da57d7bSbt * Free the resources of the tx control block, including
13919da57d7bSbt * unbind the previously bound DMA handle, and reset other
13929da57d7bSbt * control fields.
13939da57d7bSbt */
13949da57d7bSbt void
ixgbe_free_tcb(tx_control_block_t * tcb)13959da57d7bSbt ixgbe_free_tcb(tx_control_block_t *tcb)
13969da57d7bSbt {
1397e39a7b5bSJason King if (tcb == NULL)
1398e39a7b5bSJason King return;
1399e39a7b5bSJason King
14009da57d7bSbt switch (tcb->tx_type) {
14019da57d7bSbt case USE_COPY:
14029da57d7bSbt /*
14039da57d7bSbt * Reset the buffer length that is used for copy
14049da57d7bSbt */
14059da57d7bSbt tcb->tx_buf.len = 0;
14069da57d7bSbt break;
14079da57d7bSbt case USE_DMA:
14089da57d7bSbt /*
14099da57d7bSbt * Release the DMA resource that is used for
14109da57d7bSbt * DMA binding.
14119da57d7bSbt */
14129da57d7bSbt (void) ddi_dma_unbind_handle(tcb->tx_dma_handle);
14139da57d7bSbt break;
14149da57d7bSbt default:
14159da57d7bSbt break;
14169da57d7bSbt }
14179da57d7bSbt
14189da57d7bSbt /*
14199da57d7bSbt * Free the mblk
14209da57d7bSbt */
14219da57d7bSbt if (tcb->mp != NULL) {
14229da57d7bSbt freemsg(tcb->mp);
14239da57d7bSbt tcb->mp = NULL;
14249da57d7bSbt }
14259da57d7bSbt
14269da57d7bSbt tcb->tx_type = USE_NONE;
1427edf70dc9SPaul Guo tcb->last_index = MAX_TX_RING_SIZE;
14289da57d7bSbt tcb->frag_num = 0;
14299da57d7bSbt tcb->desc_num = 0;
14309da57d7bSbt }
14319da57d7bSbt
14329da57d7bSbt /*
1433e39a7b5bSJason King * ixgbe_get_free_list - Get a free tx control block from the free list.
1434e39a7b5bSJason King * Returns the tx control block and appends it to list.
14359da57d7bSbt *
14369da57d7bSbt * The atomic operation on the number of the available tx control block
14379da57d7bSbt * in the free list is used to keep this routine mutual exclusive with
14389da57d7bSbt * the routine ixgbe_put_check_list.
14399da57d7bSbt */
14409da57d7bSbt static tx_control_block_t *
ixgbe_get_free_list(ixgbe_tx_ring_t * tx_ring,link_list_t * list)1441e39a7b5bSJason King ixgbe_get_free_list(ixgbe_tx_ring_t *tx_ring, link_list_t *list)
14429da57d7bSbt {
14439da57d7bSbt tx_control_block_t *tcb;
14449da57d7bSbt
14459da57d7bSbt /*
14469da57d7bSbt * Check and update the number of the free tx control block
14479da57d7bSbt * in the free list.
14489da57d7bSbt */
1449e39a7b5bSJason King if (ixgbe_atomic_reserve(&tx_ring->tcb_free, 1) < 0) {
1450e39a7b5bSJason King tx_ring->stat_fail_no_tcb++;
14519da57d7bSbt return (NULL);
1452e39a7b5bSJason King }
14539da57d7bSbt
14549da57d7bSbt mutex_enter(&tx_ring->tcb_head_lock);
14559da57d7bSbt
14569da57d7bSbt tcb = tx_ring->free_list[tx_ring->tcb_head];
14579da57d7bSbt ASSERT(tcb != NULL);
14589da57d7bSbt tx_ring->free_list[tx_ring->tcb_head] = NULL;
14599da57d7bSbt tx_ring->tcb_head = NEXT_INDEX(tx_ring->tcb_head, 1,
14609da57d7bSbt tx_ring->free_list_size);
14619da57d7bSbt
14629da57d7bSbt mutex_exit(&tx_ring->tcb_head_lock);
14639da57d7bSbt
1464e39a7b5bSJason King LIST_PUSH_TAIL(list, &tcb->link);
14659da57d7bSbt return (tcb);
14669da57d7bSbt }
14679da57d7bSbt
14689da57d7bSbt /*
14699da57d7bSbt * ixgbe_put_free_list
14709da57d7bSbt *
14719da57d7bSbt * Put a list of used tx control blocks back to the free list
14729da57d7bSbt *
14739da57d7bSbt * A mutex is used here to ensure the serialization. The mutual exclusion
14749da57d7bSbt * between ixgbe_get_free_list and ixgbe_put_free_list is implemented with
14759da57d7bSbt * the atomic operation on the counter tcb_free.
14769da57d7bSbt */
14779da57d7bSbt void
ixgbe_put_free_list(ixgbe_tx_ring_t * tx_ring,link_list_t * pending_list)14789da57d7bSbt ixgbe_put_free_list(ixgbe_tx_ring_t *tx_ring, link_list_t *pending_list)
14799da57d7bSbt {
14809da57d7bSbt uint32_t index;
14819da57d7bSbt int tcb_num;
14829da57d7bSbt tx_control_block_t *tcb;
14839da57d7bSbt
1484e39a7b5bSJason King for (tcb = (tx_control_block_t *)LIST_GET_HEAD(pending_list);
1485e39a7b5bSJason King tcb != NULL;
1486e39a7b5bSJason King tcb = (tx_control_block_t *)LIST_GET_NEXT(pending_list, tcb)) {
1487e39a7b5bSJason King /*
1488e39a7b5bSJason King * Despite the name, ixgbe_free_tcb() just releases the
1489e39a7b5bSJason King * resources in tcb, but does not free tcb itself.
1490e39a7b5bSJason King */
1491e39a7b5bSJason King ixgbe_free_tcb(tcb);
1492e39a7b5bSJason King }
1493e39a7b5bSJason King
14949da57d7bSbt mutex_enter(&tx_ring->tcb_tail_lock);
14959da57d7bSbt
14969da57d7bSbt index = tx_ring->tcb_tail;
14979da57d7bSbt
14989da57d7bSbt tcb_num = 0;
14999da57d7bSbt tcb = (tx_control_block_t *)LIST_POP_HEAD(pending_list);
15009da57d7bSbt while (tcb != NULL) {
15019da57d7bSbt ASSERT(tx_ring->free_list[index] == NULL);
15029da57d7bSbt tx_ring->free_list[index] = tcb;
15039da57d7bSbt
15049da57d7bSbt tcb_num++;
15059da57d7bSbt
15069da57d7bSbt index = NEXT_INDEX(index, 1, tx_ring->free_list_size);
15079da57d7bSbt
15089da57d7bSbt tcb = (tx_control_block_t *)LIST_POP_HEAD(pending_list);
15099da57d7bSbt }
15109da57d7bSbt
15119da57d7bSbt tx_ring->tcb_tail = index;
15129da57d7bSbt
15139da57d7bSbt /*
15149da57d7bSbt * Update the number of the free tx control block
15159da57d7bSbt * in the free list. This operation must be placed
15169da57d7bSbt * under the protection of the lock.
15179da57d7bSbt */
15189da57d7bSbt atomic_add_32(&tx_ring->tcb_free, tcb_num);
15199da57d7bSbt
15209da57d7bSbt mutex_exit(&tx_ring->tcb_tail_lock);
15219da57d7bSbt }
1522