xref: /illumos-gate/usr/src/uts/common/io/chxge/sge.c (revision 11abda1e)
1d39a76e7Sxw /*
2d39a76e7Sxw  * CDDL HEADER START
3d39a76e7Sxw  *
4d39a76e7Sxw  * The contents of this file are subject to the terms of the
5d39a76e7Sxw  * Common Development and Distribution License (the "License").
6d39a76e7Sxw  * You may not use this file except in compliance with the License.
7d39a76e7Sxw  *
8d39a76e7Sxw  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9d39a76e7Sxw  * or http://www.opensolaris.org/os/licensing.
10d39a76e7Sxw  * See the License for the specific language governing permissions
11d39a76e7Sxw  * and limitations under the License.
12d39a76e7Sxw  *
13d39a76e7Sxw  * When distributing Covered Code, include this CDDL HEADER in each
14d39a76e7Sxw  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15d39a76e7Sxw  * If applicable, add the following below this CDDL HEADER, with the
16d39a76e7Sxw  * fields enclosed by brackets "[]" replaced with your own identifying
17d39a76e7Sxw  * information: Portions Copyright [yyyy] [name of copyright owner]
18d39a76e7Sxw  *
19d39a76e7Sxw  * CDDL HEADER END
20d39a76e7Sxw  */
21d39a76e7Sxw 
22d39a76e7Sxw /*
23d39a76e7Sxw  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24d39a76e7Sxw  * Use is subject to license terms.
25d39a76e7Sxw  */
26d39a76e7Sxw 
27d39a76e7Sxw /*
28d39a76e7Sxw  * This file is part of the Chelsio T1 Ethernet driver.
29d39a76e7Sxw  *
30d39a76e7Sxw  * Copyright (C) 2003-2005 Chelsio Communications.  All rights reserved.
31d39a76e7Sxw  */
32d39a76e7Sxw 
33d39a76e7Sxw #include <sys/types.h>
34d39a76e7Sxw #include <sys/param.h>
35d39a76e7Sxw #include <sys/cmn_err.h>
36d39a76e7Sxw #include <sys/sunddi.h>
37d39a76e7Sxw #include <sys/kmem.h>
38d39a76e7Sxw #include <sys/cmn_err.h>
39d39a76e7Sxw #include <sys/byteorder.h>
40d39a76e7Sxw #include <sys/atomic.h>
41d39a76e7Sxw #include <sys/stropts.h>
42d39a76e7Sxw #include <sys/stream.h>
43d39a76e7Sxw #include <sys/strsubr.h>
44d39a76e7Sxw #include <sys/dlpi.h>
45d39a76e7Sxw #include <sys/kstat.h>
46d39a76e7Sxw #include <sys/ethernet.h>
47d39a76e7Sxw #include <netinet/in.h>
48d39a76e7Sxw #include <netinet/udp.h>
49d39a76e7Sxw #include <inet/common.h>
50d39a76e7Sxw #include <inet/nd.h>
51d39a76e7Sxw #include <inet/ip.h>
52d39a76e7Sxw #include <inet/tcp.h>
53d39a76e7Sxw #include <netinet/udp.h>
54d39a76e7Sxw #include <sys/gld.h>
55d39a76e7Sxw #include "ostypes.h"
56d39a76e7Sxw #include "common.h"
57d39a76e7Sxw #ifdef CONFIG_CHELSIO_T1_1G
58d39a76e7Sxw #include "fpga_defs.h"
59d39a76e7Sxw #endif
60d39a76e7Sxw #include "regs.h"
61d39a76e7Sxw #include "suni1x10gexp_regs.h"
62d39a76e7Sxw #include "sge.h"
63d39a76e7Sxw #include "espi.h"
64d39a76e7Sxw 
65d39a76e7Sxw #include "ch.h"
66d39a76e7Sxw 
67d39a76e7Sxw extern uint32_t buffers_in_use[];
68d39a76e7Sxw 
69d39a76e7Sxw uint32_t sge_cmdq0_cnt = SGE_CMDQ0_E_N;
70d39a76e7Sxw uint32_t sge_cmdq1_cnt = SGE_CMDQ1_E_N;
71d39a76e7Sxw uint32_t sge_flq0_cnt = SGE_FREELQ0_E_N;
72d39a76e7Sxw uint32_t sge_flq1_cnt = SGE_FREELQ1_E_N;
73d39a76e7Sxw uint32_t sge_respq_cnt = SGE_RESPQ_E_N;
74d39a76e7Sxw 
75d39a76e7Sxw uint32_t sge_cmdq0_cnt_orig = SGE_CMDQ0_E_N;
76d39a76e7Sxw uint32_t sge_cmdq1_cnt_orig = SGE_CMDQ1_E_N;
77d39a76e7Sxw uint32_t sge_flq0_cnt_orig = SGE_FREELQ0_E_N;
78d39a76e7Sxw uint32_t sge_flq1_cnt_orig = SGE_FREELQ1_E_N;
79d39a76e7Sxw uint32_t sge_respq_cnt_orig = SGE_RESPQ_E_N;
80d39a76e7Sxw 
81d39a76e7Sxw #ifdef HOST_PAUSE
82d39a76e7Sxw uint32_t do_host_pause = 1;
83d39a76e7Sxw uint32_t flq_pause_window = 64;
84d39a76e7Sxw #endif
85d39a76e7Sxw 
86d39a76e7Sxw static uint64_t os_freelist_buffer_alloc(ch_t *sa, int sz, mblk_t **mb,
87d39a76e7Sxw     ulong_t *dh);
88d39a76e7Sxw void pe_os_free_contig(ch_t *, size_t, void *, uint64_t, ulong_t, ulong_t);
89d39a76e7Sxw 
90d39a76e7Sxw static inline uint32_t t1_sge_rx(pesge *sge, freelQ_t *Q,
91d39a76e7Sxw     unsigned int len, unsigned int offload);
92d39a76e7Sxw #ifdef HOST_PAUSE
93d39a76e7Sxw static void t1_sge_check_pause(pesge *sge, struct freelQ *Q);
94d39a76e7Sxw #endif
95d39a76e7Sxw static void alloc_freelQ_buffers(pesge *sge, struct freelQ *Q);
96d39a76e7Sxw static void freelQs_empty(pesge *sge);
97d39a76e7Sxw static void free_cmdQ_buffers(pesge *sge, cmdQ_t *Q, uint32_t credits_pend);
98d39a76e7Sxw static int alloc_rx_resources(pesge *sge, struct sge_params *p);
99d39a76e7Sxw static int alloc_tx_resources(pesge *sge, struct sge_params *p);
100d39a76e7Sxw static inline void setup_ring_params(ch_t *adapter, u64 addr, u32 size,
101d39a76e7Sxw     int base_reg_lo, int base_reg_hi, int size_reg);
102d39a76e7Sxw static void configure_sge(pesge *sge, struct sge_params *p);
103d39a76e7Sxw static void free_freelQ_buffers(pesge *sge, struct freelQ *Q);
104d39a76e7Sxw static void free_rx_resources(pesge *sge);
105d39a76e7Sxw static void free_tx_resources(pesge *sge);
106d39a76e7Sxw static inline unsigned int jumbo_payload_capacity(pesge *sge);
107d39a76e7Sxw #ifdef SUN_KSTATS
108d39a76e7Sxw static int sge_kstat_setup(pesge *);
109d39a76e7Sxw static void sge_kstat_remove(pesge *);
110d39a76e7Sxw static int sge_kstat_update(p_kstat_t, int);
111d39a76e7Sxw #endif
112d39a76e7Sxw static uint16_t calc_ocsum(mblk_t *, int);
113d39a76e7Sxw 
114d39a76e7Sxw /*
115d39a76e7Sxw  * Local routines.
116d39a76e7Sxw  */
117d39a76e7Sxw static inline void sge_ring_doorbell(pesge *sge, u32 control_reg);
118d39a76e7Sxw 
119d39a76e7Sxw static inline void
sge_ring_doorbell(pesge * sge,u32 control_reg)120d39a76e7Sxw sge_ring_doorbell(pesge *sge, u32 control_reg)
121d39a76e7Sxw {
122d39a76e7Sxw 	membar_producer();
123d39a76e7Sxw 	t1_write_reg_4(sge->obj, A_SG_DOORBELL, control_reg);
124d39a76e7Sxw }
125d39a76e7Sxw 
126d39a76e7Sxw /*
127d39a76e7Sxw  * DESC:
128d39a76e7Sxw  *
129d39a76e7Sxw  * NOTES:   Must have at least 1 command queue and 1 freelist queue.
130d39a76e7Sxw  *
131d39a76e7Sxw  */
132d39a76e7Sxw pesge *
t1_sge_create(ch_t * sa,struct sge_params * p)133d39a76e7Sxw t1_sge_create(ch_t *sa, struct sge_params *p)
134d39a76e7Sxw {
135d39a76e7Sxw 	pesge *sge;
136d39a76e7Sxw 
137d39a76e7Sxw 	sge = t1_os_malloc_wait_zero(sizeof (pesge));
138d39a76e7Sxw 
139d39a76e7Sxw 	if (sge == NULL)
140d39a76e7Sxw 		goto error_no_mem;
141d39a76e7Sxw 
142d39a76e7Sxw 	memset(sge, 0, sizeof (*sge));
143d39a76e7Sxw 
144d39a76e7Sxw 	/*
145d39a76e7Sxw 	 * PR2928 & PR3309
146d39a76e7Sxw 	 * set default timeout value - 20 msec
147d39a76e7Sxw 	 * we set the initial value to 2 which gurantees at least one tick.
148d39a76e7Sxw 	 */
149d39a76e7Sxw 	if (is_T2(sa))
150d39a76e7Sxw 		sge->ptimeout = 1;
151d39a76e7Sxw 
152d39a76e7Sxw 	sge->obj = sa;
153d39a76e7Sxw #ifdef SUN_KSTATS
154d39a76e7Sxw 	if (sge_kstat_setup(sge) != 0)
155d39a76e7Sxw 		goto t1_sge_create_fail1;
156d39a76e7Sxw #endif
157d39a76e7Sxw 	p->cmdQ_size[0] = sge_cmdq0_cnt;
158d39a76e7Sxw 	p->cmdQ_size[1] = sge_cmdq1_cnt;
159d39a76e7Sxw 
160d39a76e7Sxw 	/* note that jumbo frame index is inverted for T2 */
161d39a76e7Sxw 	if (is_T2(sa)) {
162d39a76e7Sxw 		p->freelQ_size[1] = sge_flq0_cnt;
163d39a76e7Sxw 		p->freelQ_size[0] = sge_flq1_cnt;
164d39a76e7Sxw 	} else {
165d39a76e7Sxw 		p->freelQ_size[0] = sge_flq0_cnt;
166d39a76e7Sxw 		p->freelQ_size[1] = sge_flq1_cnt;
167d39a76e7Sxw 	}
168d39a76e7Sxw 
169d39a76e7Sxw #if CH_DEBUG
170d39a76e7Sxw 	/* DEBUG only */
171d39a76e7Sxw 	cmn_err(CE_NOTE, "sge: %p\n", sge);
172d39a76e7Sxw 	cmn_err(CE_NOTE, "&sge->cmdQ[0]: %p\n", &sge->cmdQ[0]);
173d39a76e7Sxw 	cmn_err(CE_NOTE, "&sge->freelQ[0]: %p\n", &sge->freelQ[0]);
174d39a76e7Sxw 	cmn_err(CE_NOTE, "&sge->freelQ[1]: %p\n", &sge->freelQ[1]);
175d39a76e7Sxw 	cmn_err(CE_NOTE, "&sge->respQ: %p\n", &sge->respQ);
176d39a76e7Sxw 	cmn_err(CE_NOTE, "&sge->intr_cnt: %p\n", &sge->intr_cnt);
177d39a76e7Sxw #endif
178d39a76e7Sxw #ifdef SUN_KSTATS
179d39a76e7Sxw 	goto error_no_mem;
180d39a76e7Sxw 
181d39a76e7Sxw t1_sge_create_fail1:
182d39a76e7Sxw 	t1_os_free(sge, sizeof (pesge));
183d39a76e7Sxw 	sge = NULL;
184d39a76e7Sxw #endif
185d39a76e7Sxw error_no_mem:
186d39a76e7Sxw 	return (sge);
187d39a76e7Sxw }
188d39a76e7Sxw 
189d39a76e7Sxw int
t1_sge_destroy(pesge * sge)190d39a76e7Sxw t1_sge_destroy(pesge* sge)
191d39a76e7Sxw {
192d39a76e7Sxw 	if (sge != NULL) {
193d39a76e7Sxw 		free_tx_resources(sge);
194d39a76e7Sxw 		free_rx_resources(sge);
195d39a76e7Sxw 
196d39a76e7Sxw 		/* PR2928 & PR3309 */
197d39a76e7Sxw 		if ((is_T2(sge->obj)) && (sge->pskb))
198d39a76e7Sxw 			pe_free_fake_arp(sge->pskb);
199d39a76e7Sxw #ifdef SUN_KSTATS
200d39a76e7Sxw 		sge_kstat_remove(sge);
201d39a76e7Sxw #endif
202d39a76e7Sxw 		t1_os_free(sge, sizeof (pesge));
203d39a76e7Sxw 	}
204d39a76e7Sxw 	return (0);
205d39a76e7Sxw }
206d39a76e7Sxw 
207d39a76e7Sxw /*
208d39a76e7Sxw  * PR2928 & PR3309
209d39a76e7Sxw  * call out event from timeout
210d39a76e7Sxw  *
211d39a76e7Sxw  * there is a potential race between the timeout and the close.
212d39a76e7Sxw  * unless we protect the timeout, the close could occur at the
213d39a76e7Sxw  * same time. Then if the timeout service routine was slow or
214d39a76e7Sxw  * interrupted, the sge_stop() could complete with a timeoutID
215d39a76e7Sxw  * that has expired, thus letting another timeout occur. If the
216d39a76e7Sxw  * service routine was delayed still further, a detach could occur.
217d39a76e7Sxw  * the second time could then end up accessing memory that has been
218d39a76e7Sxw  * released back to the system. Bad things could then occur. We
219d39a76e7Sxw  * set a flag in sge_stop() to tell the service routine not to
220d39a76e7Sxw  * issue further timeouts. sge_stop() will block until a timeout
221d39a76e7Sxw  * has occured. If the command Q is full then we shouldn't put out
222d39a76e7Sxw  * an arp.
223d39a76e7Sxw  */
224d39a76e7Sxw 
225d39a76e7Sxw void
t1_espi_workaround(ch_t * adapter)226d39a76e7Sxw t1_espi_workaround(ch_t *adapter)
227d39a76e7Sxw {
228d39a76e7Sxw 	pesge *sge = adapter->sge;
229d39a76e7Sxw 	ch_t *chp = (ch_t *)sge->obj;
230d39a76e7Sxw 	int rv = 1;
231d39a76e7Sxw 
232d39a76e7Sxw 	if ((chp->ch_state == PERUNNING) &&
233d39a76e7Sxw 	    atomic_read(&sge->cmdQ[0].cq_asleep)) {
234d39a76e7Sxw 		u32 seop;
235d39a76e7Sxw 		seop = t1_espi_get_mon(adapter, 0x930, 0);
236d39a76e7Sxw 		if ((seop & 0xfff0fff) == 0xfff) {
237d39a76e7Sxw 			/* after first arp */
238f172c8abSToomas Soome 			if (sge->pskb) {
239d39a76e7Sxw 				rv = pe_start(adapter, (mblk_t *)sge->pskb,
240d39a76e7Sxw 				    CH_ARP);
241d39a76e7Sxw 				if (!rv)
242d39a76e7Sxw 					sge->intr_cnt.arp_sent++;
243f172c8abSToomas Soome 			}
244d39a76e7Sxw 		}
245d39a76e7Sxw 	}
246d39a76e7Sxw #ifdef HOST_PAUSE
247d39a76e7Sxw 	/*
248d39a76e7Sxw 	 * If we are already in sge_data_in, then we can skip calling
249d39a76e7Sxw 	 * t1_sge_check_pause() this clock cycle. lockstat showed that
250d39a76e7Sxw 	 * we were blocking on the mutex ~ 2% of the time.
251d39a76e7Sxw 	 */
252d39a76e7Sxw 	if (mutex_tryenter(&adapter->ch_intr)) {
253d39a76e7Sxw 		t1_sge_check_pause(sge, &sge->freelQ[0]);
254d39a76e7Sxw 		t1_sge_check_pause(sge, &sge->freelQ[1]);
255d39a76e7Sxw 		mutex_exit(&adapter->ch_intr);
256d39a76e7Sxw 	}
257d39a76e7Sxw #endif
258d39a76e7Sxw }
259d39a76e7Sxw 
260d39a76e7Sxw int
sge_start(pesge * sge)261d39a76e7Sxw sge_start(pesge *sge)
262d39a76e7Sxw {
263d39a76e7Sxw 	t1_write_reg_4(sge->obj, A_SG_CONTROL, sge->sge_control);
264d39a76e7Sxw 	/* PR2928 & PR3309, also need to avoid Pause deadlock */
265d39a76e7Sxw 	ch_init_cyclic(sge->obj, &sge->espi_wa_cyclic,
266d39a76e7Sxw 	    (void (*)(void *))t1_espi_workaround, sge->obj);
267d39a76e7Sxw 	ch_start_cyclic(&sge->espi_wa_cyclic, sge->ptimeout);
268d39a76e7Sxw 	return (0);
269d39a76e7Sxw }
270d39a76e7Sxw 
271d39a76e7Sxw /*
272d39a76e7Sxw  * Disables SGE queues.
273d39a76e7Sxw  */
274d39a76e7Sxw int
sge_stop(pesge * sge)275d39a76e7Sxw sge_stop(pesge *sge)
276d39a76e7Sxw {
277d39a76e7Sxw 	uint32_t status;
278d39a76e7Sxw 	int loops;
279d39a76e7Sxw 
280d39a76e7Sxw 	DBGASSERT(sge);
281d39a76e7Sxw 
282d39a76e7Sxw 	/* PR2928 & PR3309, also need to avoid Pause deadlock */
283d39a76e7Sxw 	t1_write_reg_4(sge->obj, A_SG_CONTROL, 0x0);
284d39a76e7Sxw 
285d39a76e7Sxw 	/* wait until there's no more outstanding interrupts pending */
286d39a76e7Sxw 	loops = 0;
287d39a76e7Sxw 	do {
288d39a76e7Sxw 		status = t1_read_reg_4(sge->obj, A_SG_INT_CAUSE);
289d39a76e7Sxw 		t1_write_reg_4(sge->obj, A_SG_INT_CAUSE, status);
290d39a76e7Sxw 		drv_usecwait(125);
291d39a76e7Sxw 		loops++;
292d39a76e7Sxw 	} while (status && (loops < 1000));
293d39a76e7Sxw 
294d39a76e7Sxw 	ch_stop_cyclic(&sge->espi_wa_cyclic);
295d39a76e7Sxw 
296d39a76e7Sxw 	return (0);
297d39a76e7Sxw }
298d39a76e7Sxw 
299d39a76e7Sxw uint32_t sge_cmdq_send_fail;
300d39a76e7Sxw 
301d39a76e7Sxw int
sge_data_out(pesge * sge,int qid,mblk_t * m0,cmdQ_ce_t * cmp,int count,uint32_t flg)302d39a76e7Sxw sge_data_out(pesge* sge, int qid, mblk_t *m0,
303f172c8abSToomas Soome     cmdQ_ce_t *cmp, int count, uint32_t flg)
304d39a76e7Sxw {
305d39a76e7Sxw 	struct cmdQ *Q = &sge->cmdQ[qid];
306d39a76e7Sxw 	ddi_dma_handle_t dh = (ddi_dma_handle_t)sge->cmdQ[qid].cq_dh;
307d39a76e7Sxw 	spinlock_t *qlock = &Q->cq_qlock;
308d39a76e7Sxw 	cmdQ_e *e;
309d39a76e7Sxw 	cmdQ_e *q = Q->cq_entries;
310d39a76e7Sxw 	uint32_t credits;
311d39a76e7Sxw 	uint32_t pidx;
312d39a76e7Sxw 	uint32_t genbit;
313d39a76e7Sxw 	uint32_t entries_n = Q->cq_entries_n;
314d39a76e7Sxw 	cmdQ_ce_t *ce;
315d39a76e7Sxw 	cmdQ_ce_t *cq = Q->cq_centries;
316d39a76e7Sxw 	dma_addr_t mapping;
317d39a76e7Sxw 	uint32_t j = 0;
318d39a76e7Sxw 	uint32_t offset;
319d39a76e7Sxw #if defined(TX_CKSUM_FIX)
320d39a76e7Sxw 	uint16_t csum;
321d39a76e7Sxw 	uint16_t *csum_loc;
322d39a76e7Sxw #endif
323d39a76e7Sxw #ifdef TX_THREAD_RECLAIM
324d39a76e7Sxw 	uint32_t reclaim_cnt;
325d39a76e7Sxw #endif
326d39a76e7Sxw 
327d39a76e7Sxw 	/*
328d39a76e7Sxw 	 * We must exit if we don't have enough free command queue entries
329d39a76e7Sxw 	 * available.
330d39a76e7Sxw 	 */
331d39a76e7Sxw 
332d39a76e7Sxw 	spin_lock(qlock);
333d39a76e7Sxw 
334d39a76e7Sxw #if defined(TX_CKSUM_FIX)
335d39a76e7Sxw 	/*
336d39a76e7Sxw 	 * This checksum fix will address a fragmented datagram
337d39a76e7Sxw 	 * checksum error. Which will lead to the next packet after
338d39a76e7Sxw 	 * the last packet with the More fragment bit set having its
339d39a76e7Sxw 	 * checksum corrupted. When the packet reaches this point
340d39a76e7Sxw 	 * the 'flg' variable indicates whether a checksum is needed
341d39a76e7Sxw 	 * or not. The algorithm is as follows, if the current packet
342d39a76e7Sxw 	 * is a More fragment set the count of packets to be checksummed
343d39a76e7Sxw 	 * after it to 3. If it't not and the count of is more than 0
344d39a76e7Sxw 	 * then calculate the checksum in software, if a hardware checksum
345d39a76e7Sxw 	 * was requested. Then decrment the count. Same algorithm applies
346d39a76e7Sxw 	 * to TCP.
347d39a76e7Sxw 	 */
348d39a76e7Sxw 	if (flg & CH_UDP_MF) {
349d39a76e7Sxw 		sge->do_udp_csum = 3;
350d39a76e7Sxw 	} else if ((flg & CH_UDP) && (sge->do_udp_csum != 0)) {
351d39a76e7Sxw 		if ((flg & CH_NO_HWCKSUM) == 0) {
352d39a76e7Sxw 			/*
353d39a76e7Sxw 			 *  Calc Checksum here.
354d39a76e7Sxw 			 */
355d39a76e7Sxw 			csum = calc_ocsum(m0,
356d39a76e7Sxw 			    sizeof (struct ether_header) + CPL_FORMAT_0_SIZE);
357d39a76e7Sxw 			csum_loc = (uint16_t *)(m0->b_rptr +
358d39a76e7Sxw 			    sizeof (struct ether_header) + CPL_FORMAT_0_SIZE);
359d39a76e7Sxw 			csum_loc += (((*(char *)csum_loc) & 0x0f) << 1);
360d39a76e7Sxw 
361d39a76e7Sxw 			sge->intr_cnt.tx_soft_cksums++;
362d39a76e7Sxw 			((struct udphdr *)(csum_loc))->uh_sum = csum;
363d39a76e7Sxw 			((struct cpl_tx_pkt *)m0->b_rptr)->l4_csum_dis = 1;
364d39a76e7Sxw 		}
365d39a76e7Sxw 		sge->do_udp_csum--;
366d39a76e7Sxw 	} else if (flg & CH_TCP_MF) {
367d39a76e7Sxw 		sge->do_tcp_csum = 3;
368d39a76e7Sxw 	} else if (sge->do_tcp_csum != 0) {
369d39a76e7Sxw 		if ((flg & CH_NO_HWCKSUM) == 0) {
370d39a76e7Sxw 			sge->intr_cnt.tx_soft_cksums++;
371d39a76e7Sxw 			/*
372d39a76e7Sxw 			 *  Calc Checksum here.
373d39a76e7Sxw 			 */
374d39a76e7Sxw 		}
375d39a76e7Sxw 		sge->do_tcp_csum--;
376d39a76e7Sxw 	}
377d39a76e7Sxw #endif	/* TX_CKSUM_FIX */
378d39a76e7Sxw #ifdef TX_THREAD_RECLAIM
379d39a76e7Sxw 	reclaim_cnt = Q->cq_complete;
380d39a76e7Sxw 	if (reclaim_cnt > SGE_BATCH_THRESH) {
381d39a76e7Sxw 		sge->intr_cnt.tx_reclaims[qid]++;
382d39a76e7Sxw 		free_cmdQ_buffers(sge, Q, reclaim_cnt);
383d39a76e7Sxw 		Q->cq_complete = 0;
384d39a76e7Sxw 	}
385d39a76e7Sxw #endif
386d39a76e7Sxw 	genbit = Q->cq_genbit;
387d39a76e7Sxw 	pidx = Q->cq_pidx;
388d39a76e7Sxw 	credits = Q->cq_credits;
389d39a76e7Sxw 
390d39a76e7Sxw 	if ((credits - 1) < count) {
391d39a76e7Sxw 		spin_unlock(qlock);
392d39a76e7Sxw 		sge->intr_cnt.cmdQ_full[qid]++;
393d39a76e7Sxw 		return (1);
394d39a76e7Sxw 	}
395d39a76e7Sxw 
396d39a76e7Sxw 	atomic_sub(count, &Q->cq_credits);
397d39a76e7Sxw 	Q->cq_pidx += count;
398d39a76e7Sxw 	if (Q->cq_pidx >= entries_n) {
399d39a76e7Sxw 		Q->cq_pidx -= entries_n;
400d39a76e7Sxw 		Q->cq_genbit ^= 1;
401d39a76e7Sxw 	}
402d39a76e7Sxw 
403d39a76e7Sxw 	spin_unlock(qlock);
404d39a76e7Sxw 
405d39a76e7Sxw #ifdef SUN_KSTATS
406d39a76e7Sxw 	if (count > MBLK_MAX)
407d39a76e7Sxw 		sge->intr_cnt.tx_descs[MBLK_MAX - 1]++;
408d39a76e7Sxw 	else
409d39a76e7Sxw 		sge->intr_cnt.tx_descs[count]++;
410d39a76e7Sxw #endif
411d39a76e7Sxw 
412d39a76e7Sxw 	ce = &cq[pidx];
413d39a76e7Sxw 	*ce = *cmp;
414d39a76e7Sxw 	mapping = cmp->ce_pa;
415d39a76e7Sxw 	j++;
416d39a76e7Sxw 
417d39a76e7Sxw 	e = &q[pidx];
418d39a76e7Sxw 
419d39a76e7Sxw 	offset = (caddr_t)e - (caddr_t)q;
420d39a76e7Sxw 
421d39a76e7Sxw 	e->Sop =  1;
422d39a76e7Sxw 	e->DataValid = 1;
423d39a76e7Sxw 	e->BufferLength = cmp->ce_len;
424d39a76e7Sxw 	e->AddrHigh = ((u64)mapping >> 32);
425d39a76e7Sxw 	e->AddrLow = ((u64)mapping & 0xffffffff);
426d39a76e7Sxw 
427d39a76e7Sxw 	--count;
428d39a76e7Sxw 	if (count > 0) {
429d39a76e7Sxw 		unsigned int i;
430d39a76e7Sxw 
431d39a76e7Sxw 		e->Eop = 0;
432d39a76e7Sxw 		wmb();
433d39a76e7Sxw 		e->GenerationBit = e->GenerationBit2 = genbit;
434d39a76e7Sxw 
435d39a76e7Sxw 		for (i = 0; i < count; i++) {
436d39a76e7Sxw 
437d39a76e7Sxw 			ce++;
438d39a76e7Sxw 			e++;
439d39a76e7Sxw 			cmp++;
440d39a76e7Sxw 			if (++pidx == entries_n) {
441d39a76e7Sxw 				pidx = 0;
442d39a76e7Sxw 				genbit ^= 1;
443d39a76e7Sxw 				/* sync from offset to end of cmdQ */
444d39a76e7Sxw 				(void) ddi_dma_sync(dh, (off_t)(offset),
445d39a76e7Sxw 				    j*sizeof (*e), DDI_DMA_SYNC_FORDEV);
446d39a76e7Sxw 				offset = j = 0;
447d39a76e7Sxw 				ce = cq;
448d39a76e7Sxw 				e = q;
449d39a76e7Sxw 			}
450d39a76e7Sxw 
451d39a76e7Sxw 			*ce = *cmp;
452d39a76e7Sxw 			mapping = cmp->ce_pa;
453d39a76e7Sxw 			j++;
454d39a76e7Sxw 			e->Sop = 0;
455d39a76e7Sxw 			e->DataValid = 1;
456d39a76e7Sxw 			e->BufferLength = cmp->ce_len;
457d39a76e7Sxw 			e->AddrHigh = ((u64)mapping >> 32);
458d39a76e7Sxw 			e->AddrLow = ((u64)mapping & 0xffffffff);
459d39a76e7Sxw 
460d39a76e7Sxw 			if (i < (count - 1)) {
461d39a76e7Sxw 				e->Eop = 0;
462d39a76e7Sxw 				wmb();
463d39a76e7Sxw 				e->GenerationBit = e->GenerationBit2 = genbit;
464d39a76e7Sxw 			}
465d39a76e7Sxw 		}
466d39a76e7Sxw 	}
467d39a76e7Sxw 
468d39a76e7Sxw 	ce->ce_mp = m0;
469d39a76e7Sxw 
470d39a76e7Sxw 	e->Eop = 1;
471d39a76e7Sxw 	wmb();
472d39a76e7Sxw 	e->GenerationBit = e->GenerationBit2 = genbit;
473d39a76e7Sxw 
474d39a76e7Sxw 	(void) ddi_dma_sync(dh, (off_t)(offset), j*sizeof (*e),
475d39a76e7Sxw 	    DDI_DMA_SYNC_FORDEV);
476d39a76e7Sxw 
477d39a76e7Sxw 	/*
478d39a76e7Sxw 	 * We always ring the doorbell for cmdQ1.  For cmdQ0, we only ring
479d39a76e7Sxw 	 * the doorbell if the Q is asleep. There is a natural race, where
480d39a76e7Sxw 	 * the hardware is going to sleep just after we checked, however,
481d39a76e7Sxw 	 * then the interrupt handler will detect the outstanding TX packet
482d39a76e7Sxw 	 * and ring the doorbell for us.
483d39a76e7Sxw 	 */
484d39a76e7Sxw 	if (qid) {
485d39a76e7Sxw 		doorbell_pio(sge, F_CMDQ1_ENABLE);
486d39a76e7Sxw 	} else {
487d39a76e7Sxw 		if (atomic_read(Q->cq_asleep)) {
488d39a76e7Sxw 			atomic_set(&Q->cq_asleep, 0);
489d39a76e7Sxw /* NOT YET		doorbell_pio(sge, F_CMDQ0_ENABLE); */
490d39a76e7Sxw 			atomic_set(&Q->cq_pio_pidx, Q->cq_pidx);
491d39a76e7Sxw 		}
492d39a76e7Sxw 	}
493d39a76e7Sxw 	doorbell_pio(sge, F_CMDQ0_ENABLE);
494d39a76e7Sxw 
495d39a76e7Sxw 	return (0);
496d39a76e7Sxw }
497d39a76e7Sxw 
498d39a76e7Sxw #define	SGE_PL_INTR_MASK (F_PL_INTR_SGE_ERR | F_PL_INTR_SGE_DATA)
499d39a76e7Sxw 
500d39a76e7Sxw /*
501d39a76e7Sxw  * Disable SGE error interrupts.
502d39a76e7Sxw  */
503d39a76e7Sxw int
t1_sge_intr_disable(pesge * sge)504d39a76e7Sxw t1_sge_intr_disable(pesge* sge)
505d39a76e7Sxw {
506d39a76e7Sxw 	u32 val = t1_read_reg_4(sge->obj, A_PL_ENABLE);
507d39a76e7Sxw 
508d39a76e7Sxw 	t1_write_reg_4(sge->obj, A_PL_ENABLE, val & ~SGE_PL_INTR_MASK);
509d39a76e7Sxw 	t1_write_reg_4(sge->obj, A_SG_INT_ENABLE, 0);
510d39a76e7Sxw 	return (0);
511d39a76e7Sxw }
512d39a76e7Sxw 
513d39a76e7Sxw #define	SGE_INT_ENABLE (F_RESPQ_EXHAUSTED | F_RESPQ_OVERFLOW | \
514d39a76e7Sxw 	F_FL_EXHAUSTED | F_PACKET_TOO_BIG | F_PACKET_MISMATCH)
515d39a76e7Sxw 
516d39a76e7Sxw /*
517d39a76e7Sxw  * Enable SGE error interrupts.
518d39a76e7Sxw  */
519d39a76e7Sxw int
t1_sge_intr_enable(pesge * sge)520d39a76e7Sxw t1_sge_intr_enable(pesge* sge)
521d39a76e7Sxw {
522d39a76e7Sxw 	u32 en = SGE_INT_ENABLE;
523d39a76e7Sxw 	u32 val = t1_read_reg_4(sge->obj, A_PL_ENABLE);
524d39a76e7Sxw 
525d39a76e7Sxw 	t1_write_reg_4(sge->obj, A_PL_ENABLE, val | SGE_PL_INTR_MASK);
526d39a76e7Sxw 
527d39a76e7Sxw 	if (sge->obj->ch_flags & TSO_CAPABLE)
528d39a76e7Sxw 		en &= ~F_PACKET_TOO_BIG;
529d39a76e7Sxw 	t1_write_reg_4(sge->obj, A_SG_INT_ENABLE, en);
530d39a76e7Sxw 	return (0);
531d39a76e7Sxw }
532d39a76e7Sxw 
533d39a76e7Sxw /*
534d39a76e7Sxw  * Clear SGE error interrupts.
535d39a76e7Sxw  */
536d39a76e7Sxw int
t1_sge_intr_clear(pesge * sge)537d39a76e7Sxw t1_sge_intr_clear(pesge* sge)
538d39a76e7Sxw {
539d39a76e7Sxw 	t1_write_reg_4(sge->obj, A_PL_CAUSE, SGE_PL_INTR_MASK);
540d39a76e7Sxw 	t1_write_reg_4(sge->obj, A_SG_INT_CAUSE, 0xffffffff);
541d39a76e7Sxw 	return (0);
542d39a76e7Sxw }
543d39a76e7Sxw 
544d39a76e7Sxw #define	SGE_INT_FATAL (F_RESPQ_OVERFLOW | F_PACKET_TOO_BIG | F_PACKET_MISMATCH)
545d39a76e7Sxw 
546d39a76e7Sxw int
t1_sge_intr_error_handler(pesge * sge)547d39a76e7Sxw t1_sge_intr_error_handler(pesge *sge)
548d39a76e7Sxw {
549d39a76e7Sxw 	peobj *obj = sge->obj;
550d39a76e7Sxw 	u32 cause = t1_read_reg_4(obj, A_SG_INT_CAUSE);
551d39a76e7Sxw 
552d39a76e7Sxw 	if (cause & F_RESPQ_EXHAUSTED)
553d39a76e7Sxw 		sge->intr_cnt.respQ_empty++;
554d39a76e7Sxw 	if (cause & F_RESPQ_OVERFLOW) {
555d39a76e7Sxw 		sge->intr_cnt.respQ_overflow++;
556d39a76e7Sxw 		cmn_err(CE_WARN, "%s: SGE response queue overflow\n",
557d39a76e7Sxw 		    obj->ch_name);
558d39a76e7Sxw 	}
559d39a76e7Sxw 	if (cause & F_FL_EXHAUSTED) {
560d39a76e7Sxw 		sge->intr_cnt.freelistQ_empty++;
561d39a76e7Sxw 		freelQs_empty(sge);
562d39a76e7Sxw 	}
563d39a76e7Sxw 	if (cause & F_PACKET_TOO_BIG) {
564d39a76e7Sxw 		sge->intr_cnt.pkt_too_big++;
565d39a76e7Sxw 		cmn_err(CE_WARN, "%s: SGE max packet size exceeded\n",
566d39a76e7Sxw 		    obj->ch_name);
567d39a76e7Sxw 	}
568d39a76e7Sxw 	if (cause & F_PACKET_MISMATCH) {
569d39a76e7Sxw 		sge->intr_cnt.pkt_mismatch++;
570d39a76e7Sxw 		cmn_err(CE_WARN, "%s: SGE packet mismatch\n",
571d39a76e7Sxw 		    obj->ch_name);
572d39a76e7Sxw 	}
573d39a76e7Sxw 	if (cause & SGE_INT_FATAL)
574d39a76e7Sxw 		t1_fatal_err(obj);
575d39a76e7Sxw 
576d39a76e7Sxw 	t1_write_reg_4(obj, A_SG_INT_CAUSE, cause);
577d39a76e7Sxw 	return (0);
578d39a76e7Sxw }
579d39a76e7Sxw 
580d39a76e7Sxw /*
581d39a76e7Sxw  *
582d39a76e7Sxw  * PARAM:   sge     - SGE instance pointer.
583d39a76e7Sxw  */
584d39a76e7Sxw int
sge_data_in(pesge * sge)585d39a76e7Sxw sge_data_in(pesge *sge)
586d39a76e7Sxw {
587d39a76e7Sxw 	peobj *adapter = sge->obj;
588d39a76e7Sxw 	struct respQ *Q = &sge->respQ;
589d39a76e7Sxw 	respQ_e *e;				/* response queue entry */
590d39a76e7Sxw 	respQ_e *q = Q->rq_entries;		/* base response queue */
591d39a76e7Sxw 	uint32_t cidx = Q->rq_cidx;
592d39a76e7Sxw 	uint32_t genbit = Q->rq_genbit;
593d39a76e7Sxw 	uint32_t entries_n = Q->rq_entries_n;
594d39a76e7Sxw 	uint32_t credits = Q->rq_credits;
595d39a76e7Sxw 	uint32_t credits_thresh = Q->rq_credits_thresh;
596d39a76e7Sxw 	uint32_t ret = 0;
597d39a76e7Sxw #ifndef TX_THREAD_RECLAIM
598d39a76e7Sxw 	uint32_t credits_pend[2] = {0, 0};
599d39a76e7Sxw #endif
600d39a76e7Sxw 	uint32_t flags = 0;
601d39a76e7Sxw 	uint32_t flagt;
602d39a76e7Sxw 	ddi_dma_handle_t dh = (ddi_dma_handle_t)Q->rq_dh;
603d39a76e7Sxw 
604d39a76e7Sxw 	t1_write_reg_4(adapter, A_PL_CAUSE, F_PL_INTR_SGE_DATA);
605d39a76e7Sxw 
606d39a76e7Sxw 	/*
607d39a76e7Sxw 	 * Catch the case where an interrupt arrives
608d39a76e7Sxw 	 * early.
609d39a76e7Sxw 	 */
610d39a76e7Sxw 	if ((q == NULL) || (dh == NULL)) {
611d39a76e7Sxw 		goto check_slow_ints;
612d39a76e7Sxw 	}
613d39a76e7Sxw 
614d39a76e7Sxw 	/* initial response queue entry */
615d39a76e7Sxw 	e = &q[cidx];
616d39a76e7Sxw 
617d39a76e7Sxw 	/* pull physical memory of response queue entry into cache */
618d39a76e7Sxw 	(void) ddi_dma_sync(dh, (off_t)((caddr_t)e - (caddr_t)q),
619d39a76e7Sxw 	    sizeof (*e), DDI_DMA_SYNC_FORKERNEL);
620d39a76e7Sxw 
621d39a76e7Sxw 	while (e->GenerationBit == genbit) {
622d39a76e7Sxw 		if (--credits < credits_thresh) {
623d39a76e7Sxw 			uint32_t n = entries_n - credits - 1;
624d39a76e7Sxw 			t1_write_reg_4(adapter, A_SG_RSPQUEUECREDIT, n);
625d39a76e7Sxw 			credits += n;
626d39a76e7Sxw 		}
627d39a76e7Sxw 		if (likely(e->DataValid)) {
628d39a76e7Sxw 			(void) t1_sge_rx(sge, &sge->freelQ[e->FreelistQid],
629d39a76e7Sxw 			    e->BufferLength, e->Offload);
630d39a76e7Sxw 			if ((e->Sop != 1) || (e->Eop != 1)) {
631d39a76e7Sxw 				sge->intr_cnt.rx_badEopSop++;
632d39a76e7Sxw 				cmn_err(CE_WARN, "bad Sop %d or Eop %d: %d",
633d39a76e7Sxw 				    e->Sop, e->Eop, e->BufferLength);
634d39a76e7Sxw 			}
635d39a76e7Sxw 		}
636d39a76e7Sxw 		flagt = e->Qsleeping;
637d39a76e7Sxw 		flags |= flagt;
638d39a76e7Sxw 		if (flagt & F_CMDQ0_ENABLE)
639d39a76e7Sxw 			sge->intr_cnt.rx_cmdq0++;
640d39a76e7Sxw 		if (flagt & F_CMDQ1_ENABLE)
641d39a76e7Sxw 			sge->intr_cnt.rx_cmdq1++;
642d39a76e7Sxw 		if (flagt & F_FL0_ENABLE)
643d39a76e7Sxw 			sge->intr_cnt.rx_flq0++;
644d39a76e7Sxw 		if (flagt & F_FL1_ENABLE)
645d39a76e7Sxw 			sge->intr_cnt.rx_flq1++;
646d39a76e7Sxw #ifdef TX_THREAD_RECLAIM
647d39a76e7Sxw 		spin_lock(&sge->cmdQ[0].cq_qlock);
648d39a76e7Sxw 		sge->cmdQ[0].cq_complete += e->Cmdq0CreditReturn;
649d39a76e7Sxw 		spin_unlock(&sge->cmdQ[0].cq_qlock);
650d39a76e7Sxw 		spin_lock(&sge->cmdQ[1].cq_qlock);
651d39a76e7Sxw 		sge->cmdQ[1].cq_complete += e->Cmdq1CreditReturn;
652d39a76e7Sxw 		if ((adapter->ch_blked) &&
653d39a76e7Sxw 		    (sge->cmdQ[0].cq_complete +
654d39a76e7Sxw 		    sge->cmdQ[1].cq_complete) > 16) {
655d39a76e7Sxw 			adapter->ch_blked = 0;
656d39a76e7Sxw 			ch_gld_ok(adapter);
657d39a76e7Sxw 		}
658d39a76e7Sxw 		spin_unlock(&sge->cmdQ[1].cq_qlock);
659d39a76e7Sxw #else
660d39a76e7Sxw 		credits_pend[0] += e->Cmdq0CreditReturn;
661d39a76e7Sxw 		credits_pend[1] += e->Cmdq1CreditReturn;
662d39a76e7Sxw #ifdef CONFIG_SMP
663d39a76e7Sxw 		if (unlikely(credits_pend[0] > SGE_BATCH_THRESH)) {
664d39a76e7Sxw 			free_cmdQ_buffers(sge, &sge->cmdQ[0], credits_pend[0]);
665d39a76e7Sxw 			credits_pend[0] = 0;
666d39a76e7Sxw 		}
667d39a76e7Sxw 		if (unlikely(credits_pend[1] > SGE_BATCH_THRESH)) {
668d39a76e7Sxw 			free_cmdQ_buffers(sge, &sge->cmdQ[1], credits_pend[1]);
669d39a76e7Sxw 			credits_pend[1] = 0;
670d39a76e7Sxw 		}
671d39a76e7Sxw #endif
672d39a76e7Sxw #endif
673d39a76e7Sxw #ifdef HOST_PAUSE
674d39a76e7Sxw 		t1_sge_check_pause(sge, &sge->freelQ[e->FreelistQid]);
675d39a76e7Sxw #endif
676d39a76e7Sxw 		e++;
677d39a76e7Sxw 		if (unlikely(++cidx == entries_n)) {
678d39a76e7Sxw 			cidx = 0;
679d39a76e7Sxw 			genbit ^= 1;
680d39a76e7Sxw 			e = q;
681d39a76e7Sxw 		}
682d39a76e7Sxw 
683d39a76e7Sxw 		/* pull physical memory of response queue entry into cache */
684d39a76e7Sxw 		(void) ddi_dma_sync(dh, (off_t)((caddr_t)e - (caddr_t)q),
685d39a76e7Sxw 		    sizeof (*e), DDI_DMA_SYNC_FORKERNEL);
686d39a76e7Sxw 
687d39a76e7Sxw 		ret = 1;
688d39a76e7Sxw 	}
689d39a76e7Sxw 
690d39a76e7Sxw #ifndef TX_THREAD_RECLAIM
691d39a76e7Sxw 	if (credits_pend[0])
692d39a76e7Sxw 		free_cmdQ_buffers(sge, &sge->cmdQ[0], credits_pend[0]);
693d39a76e7Sxw 	if (credits_pend[1])
694d39a76e7Sxw 		free_cmdQ_buffers(sge, &sge->cmdQ[1], credits_pend[1]);
695d39a76e7Sxw #endif
696d39a76e7Sxw 	if (flags & F_CMDQ0_ENABLE) {
697d39a76e7Sxw 		struct cmdQ *cmdQ = &sge->cmdQ[0];
698d39a76e7Sxw 		atomic_set(&cmdQ->cq_asleep, 1);
699d39a76e7Sxw 		if (atomic_read(cmdQ->cq_pio_pidx) != cmdQ->cq_pidx) {
700d39a76e7Sxw 			doorbell_pio(sge, F_CMDQ0_ENABLE);
701d39a76e7Sxw 			atomic_set(&cmdQ->cq_pio_pidx, cmdQ->cq_pidx);
702d39a76e7Sxw 		}
703d39a76e7Sxw 	}
704d39a76e7Sxw 
705d39a76e7Sxw 	/* the SGE told us one of the free lists is empty */
706d39a76e7Sxw 	if (unlikely(flags & (F_FL0_ENABLE | F_FL1_ENABLE)))
707d39a76e7Sxw 		freelQs_empty(sge);
708d39a76e7Sxw 
709d39a76e7Sxw #ifdef CONFIG_CHELSIO_T1_OFFLOAD
710d39a76e7Sxw 	if (adapter->ch_tx_overflow_mutex)
711d39a76e7Sxw 		mutex_enter(adapter->ch_tx_overflow_mutex);
712d39a76e7Sxw 	if (adapter->ch_blked &&
713d39a76e7Sxw 	    (sge->cmdQ[0].cq_credits > (sge->cmdQ[0].cq_entries_n>>2)) &&
714d39a76e7Sxw 	    (sge->cmdQ[1].cq_credits > (sge->cmdQ[1].cq_entries_n>>2))) {
715d39a76e7Sxw 		adapter->ch_blked = 0;
716d39a76e7Sxw 		if (adapter->ch_tx_overflow_cv)
717d39a76e7Sxw 			cv_broadcast(adapter->ch_tx_overflow_cv);
718d39a76e7Sxw 		ch_gld_ok(adapter);
719d39a76e7Sxw 	}
720d39a76e7Sxw 	if (adapter->ch_tx_overflow_mutex)
721d39a76e7Sxw 		mutex_exit(adapter->ch_tx_overflow_mutex);
722d39a76e7Sxw #else
723d39a76e7Sxw #ifndef TX_THREAD_RECLAIM
724d39a76e7Sxw 	if (adapter->ch_blked &&
725d39a76e7Sxw 	    (sge->cmdQ[0].cq_credits > (sge->cmdQ[0].cq_entries_n>>1)) &&
726d39a76e7Sxw 	    (sge->cmdQ[1].cq_credits > (sge->cmdQ[1].cq_entries_n>>1))) {
727d39a76e7Sxw 		adapter->ch_blked = 0;
728d39a76e7Sxw 		ch_gld_ok(adapter);
729d39a76e7Sxw 	}
730d39a76e7Sxw #endif
731d39a76e7Sxw #endif	/* CONFIG_CHELSIO_T1_OFFLOAD */
732d39a76e7Sxw 
733d39a76e7Sxw 	Q->rq_genbit = genbit;
734d39a76e7Sxw 	Q->rq_cidx = cidx;
735d39a76e7Sxw 	Q->rq_credits = credits;
736d39a76e7Sxw 
737d39a76e7Sxw 	t1_write_reg_4(adapter, A_SG_SLEEPING, cidx);
738d39a76e7Sxw 
739d39a76e7Sxw check_slow_ints:
740d39a76e7Sxw 	/* handle non-data interrupts */
741d39a76e7Sxw 	if (unlikely(!ret))
742d39a76e7Sxw 		ret = t1_slow_intr_handler(adapter);
743d39a76e7Sxw 
744d39a76e7Sxw 	return (ret);
745d39a76e7Sxw }
746d39a76e7Sxw 
747d39a76e7Sxw /*
748d39a76e7Sxw  * allocate a mblk with DMA mapped mblk.
749d39a76e7Sxw  * When checksum offload is enabled, we start the DMA at a 2 byte offset so
750d39a76e7Sxw  * the IP header will be aligned. We do this for sparc only.
751d39a76e7Sxw  */
752d39a76e7Sxw static uint64_t
os_freelist_buffer_alloc(ch_t * sa,int sz,mblk_t ** mb,ulong_t * dh)753d39a76e7Sxw os_freelist_buffer_alloc(ch_t *sa, int sz, mblk_t **mb, ulong_t *dh)
754d39a76e7Sxw {
755d39a76e7Sxw 	ch_esb_t *ch_get_small_rbuf(ch_t *sa);
756d39a76e7Sxw 	ch_esb_t *ch_get_big_rbuf(ch_t *sa);
757d39a76e7Sxw 	ch_esb_t *rbp;
758d39a76e7Sxw 	uint32_t rxoff = sa->sge->rx_offset;
759d39a76e7Sxw 
760d39a76e7Sxw 	if (sz == SGE_SM_BUF_SZ(sa)) {
761d39a76e7Sxw 		/* get pre-mapped buffer */
762d39a76e7Sxw 		if ((rbp = ch_get_small_rbuf(sa)) == NULL) {
763d39a76e7Sxw 			sa->norcvbuf++;
764d39a76e7Sxw 			return ((uint64_t)0);
765d39a76e7Sxw 		}
766d39a76e7Sxw 
767d39a76e7Sxw 		*mb = desballoc((unsigned char *)rbp->cs_buf + rxoff,
768d39a76e7Sxw 		    SGE_SM_BUF_SZ(sa)-rxoff, BPRI_MED, &rbp->cs_frtn);
769d39a76e7Sxw 		if (*mb == NULL) {
770d39a76e7Sxw 			mutex_enter(&sa->ch_small_esbl);
771d39a76e7Sxw 			rbp->cs_next = sa->ch_small_esb_free;
772d39a76e7Sxw 			sa->ch_small_esb_free = rbp;
773d39a76e7Sxw 			mutex_exit(&sa->ch_small_esbl);
774d39a76e7Sxw 			return ((uint64_t)0);
775d39a76e7Sxw 		}
776d39a76e7Sxw 		*dh = rbp->cs_dh;
777d39a76e7Sxw 
778d39a76e7Sxw 		return (rbp->cs_pa + rxoff);
779d39a76e7Sxw 	} else {
780d39a76e7Sxw 		/* get pre-mapped buffer */
781d39a76e7Sxw 		if ((rbp = ch_get_big_rbuf(sa)) == NULL) {
782d39a76e7Sxw 			sa->norcvbuf++;
783d39a76e7Sxw 			return ((uint64_t)0);
784d39a76e7Sxw 		}
785d39a76e7Sxw 
786d39a76e7Sxw 		*mb = desballoc((unsigned char *)rbp->cs_buf + rxoff,
787d39a76e7Sxw 		    SGE_BG_BUF_SZ(sa)-rxoff, BPRI_MED, &rbp->cs_frtn);
788d39a76e7Sxw 		if (*mb == NULL) {
789d39a76e7Sxw 			mutex_enter(&sa->ch_big_esbl);
790d39a76e7Sxw 			rbp->cs_next = sa->ch_big_esb_free;
791d39a76e7Sxw 			sa->ch_big_esb_free = rbp;
792d39a76e7Sxw 			mutex_exit(&sa->ch_big_esbl);
793d39a76e7Sxw 			return ((uint64_t)0);
794d39a76e7Sxw 		}
795d39a76e7Sxw 		*dh = rbp->cs_dh;
796d39a76e7Sxw 
797d39a76e7Sxw 		return (rbp->cs_pa + rxoff);
798d39a76e7Sxw 	}
799d39a76e7Sxw }
800d39a76e7Sxw 
801d39a76e7Sxw static inline unsigned int
t1_sge_rx(pesge * sge,struct freelQ * Q,unsigned int len,unsigned int offload)802d39a76e7Sxw t1_sge_rx(pesge *sge, struct freelQ *Q, unsigned int len, unsigned int offload)
803d39a76e7Sxw {
804d39a76e7Sxw 	mblk_t *skb;
805d39a76e7Sxw 	peobj *adapter = sge->obj;
806d39a76e7Sxw 	struct freelQ_ce *cq = Q->fq_centries;
807d39a76e7Sxw 	struct freelQ_ce *ce = &cq[Q->fq_cidx];
808d39a76e7Sxw 	ddi_dma_handle_t dh = (ddi_dma_handle_t)ce->fe_dh;
809d39a76e7Sxw 	uint32_t cidx = Q->fq_cidx;
810d39a76e7Sxw 	uint32_t entries_n = Q->fq_entries_n;
811d39a76e7Sxw 	uint32_t sz = Q->fq_rx_buffer_size;
812d39a76e7Sxw 	uint32_t useit = 1;
813d39a76e7Sxw 	uint32_t rxoff = sge->rx_offset;
814d39a76e7Sxw #ifdef CONFIG_CHELSIO_T1_OFFLOAD
815d39a76e7Sxw 	uint32_t rv;
816d39a76e7Sxw #endif
817d39a76e7Sxw 
818d39a76e7Sxw 	if (Q->fq_id)
819d39a76e7Sxw 		sge->intr_cnt.rx_flq1_cnt++;
820d39a76e7Sxw 	else
821d39a76e7Sxw 		sge->intr_cnt.rx_flq0_cnt++;
822d39a76e7Sxw 	/*
823d39a76e7Sxw 	 * If pkt size falls below threshold, then we'll copy data to
824d39a76e7Sxw 	 * an blk and reuse mblk.
825d39a76e7Sxw 	 *
826d39a76e7Sxw 	 * NOTE that rxoff is 2 for T1 adapters. We align the the start
827d39a76e7Sxw 	 * of the DMA buffer begin at rxoff offset for T1 cards instead of
828d39a76e7Sxw 	 * at the beginning of the buffer, thus the length of the received
829d39a76e7Sxw 	 * data does not include this offset. We therefore always add
830