xref: /illumos-gate/usr/src/uts/common/io/ixgbe/ixgbe_rx.c (revision a9bfd41d)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright(c) 2007-2010 Intel Corporation. All rights reserved.
24  */
25 
26 /*
27  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
28  * Copyright 2017 Joyent, Inc.
29  */
30 
31 #include "ixgbe_sw.h"
32 
33 /* function prototypes */
34 static mblk_t *ixgbe_rx_bind(ixgbe_rx_data_t *, uint32_t, uint32_t);
35 static mblk_t *ixgbe_rx_copy(ixgbe_rx_data_t *, uint32_t, uint32_t);
36 static void ixgbe_rx_assoc_hcksum(mblk_t *, uint32_t);
37 static mblk_t *ixgbe_lro_bind(ixgbe_rx_data_t *, uint32_t, uint32_t, uint32_t);
38 static mblk_t *ixgbe_lro_copy(ixgbe_rx_data_t *, uint32_t, uint32_t, uint32_t);
39 static int ixgbe_lro_get_start(ixgbe_rx_data_t *, uint32_t);
40 static uint32_t ixgbe_lro_get_first(ixgbe_rx_data_t *, uint32_t);
41 
42 #ifndef IXGBE_DEBUG
43 #pragma inline(ixgbe_rx_assoc_hcksum)
44 #pragma inline(ixgbe_lro_get_start)
45 #pragma inline(ixgbe_lro_get_first)
46 #endif
47 
48 /*
49  * ixgbe_rx_recycle - The call-back function to reclaim rx buffer.
50  *
51  * This function is called when an mp is freed by the user thru
52  * freeb call (Only for mp constructed through desballoc call).
53  * It returns back the freed buffer to the free list.
54  */
55 void
ixgbe_rx_recycle(caddr_t arg)56 ixgbe_rx_recycle(caddr_t arg)
57 {
58 	ixgbe_t *ixgbe;
59 	ixgbe_rx_ring_t *rx_ring;
60 	ixgbe_rx_data_t	*rx_data;
61 	rx_control_block_t *recycle_rcb;
62 	uint32_t free_index;
63 	uint32_t ref_cnt;
64 
65 	recycle_rcb = (rx_control_block_t *)(uintptr_t)arg;
66 	rx_data = recycle_rcb->rx_data;
67 	rx_ring = rx_data->rx_ring;
68 	ixgbe = rx_ring->ixgbe;
69 
70 	if (recycle_rcb->ref_cnt == 0) {
71 		/*
72 		 * This case only happens when rx buffers are being freed
73 		 * in ixgbe_stop() and freemsg() is called.
74 		 */
75 		return;
76 	}
77 
78 	ASSERT(recycle_rcb->mp == NULL);
79 
80 	/*
81 	 * Using the recycled data buffer to generate a new mblk
82 	 */
83 	recycle_rcb->mp = desballoc((unsigned char *)
84 	    recycle_rcb->rx_buf.address,
85 	    recycle_rcb->rx_buf.size,
86 	    0, &recycle_rcb->free_rtn);
87 
88 	/*
89 	 * Put the recycled rx control block into free list
90 	 */
91 	mutex_enter(&rx_data->recycle_lock);
92 
93 	free_index = rx_data->rcb_tail;
94 	ASSERT(rx_data->free_list[free_index] == NULL);
95 
96 	rx_data->free_list[free_index] = recycle_rcb;
97 	rx_data->rcb_tail = NEXT_INDEX(free_index, 1, rx_data->free_list_size);
98 
99 	mutex_exit(&rx_data->recycle_lock);
100 
101 	/*
102 	 * The atomic operation on the number of the available rx control
103 	 * blocks in the free list is used to make the recycling mutual
104 	 * exclusive with the receiving.
105 	 */
106 	atomic_inc_32(&rx_data->rcb_free);
107 	ASSERT(rx_data->rcb_free <= rx_data->free_list_size);
108 
109 	/*
110 	 * Considering the case that the interface is unplumbed
111 	 * and there are still some buffers held by the upper layer.
112 	 * When the buffer is returned back, we need to free it.
113 	 */
114 	ref_cnt = atomic_dec_32_nv(&recycle_rcb->ref_cnt);
115 	if (ref_cnt == 0) {
116 		if (recycle_rcb->mp != NULL) {
117 			freemsg(recycle_rcb->mp);
118 			recycle_rcb->mp = NULL;
119 		}
120 
121 		ixgbe_free_dma_buffer(&recycle_rcb->rx_buf);
122 
123 		mutex_enter(&ixgbe->rx_pending_lock);
124 		atomic_dec_32(&rx_data->rcb_pending);
125 		atomic_dec_32(&ixgbe->rcb_pending);
126 
127 		/*
128 		 * When there is not any buffer belonging to this rx_data
129 		 * held by the upper layer, the rx_data can be freed.
130 		 */
131 		if ((rx_data->flag & IXGBE_RX_STOPPED) &&
132 		    (rx_data->rcb_pending == 0))
133 			ixgbe_free_rx_ring_data(rx_data);
134 
135 		mutex_exit(&ixgbe->rx_pending_lock);
136 	}
137 }
138 
139 /*
140  * ixgbe_rx_copy - Use copy to process the received packet.
141  *
142  * This function will use bcopy to process the packet
143  * and send the copied packet upstream.
144  */
145 static mblk_t *
ixgbe_rx_copy(ixgbe_rx_data_t * rx_data,uint32_t index,uint32_t pkt_len)146 ixgbe_rx_copy(ixgbe_rx_data_t *rx_data, uint32_t index, uint32_t pkt_len)
147 {
148 	ixgbe_t *ixgbe;
149 	rx_control_block_t *current_rcb;
150 	mblk_t *mp;
151 
152 	ixgbe = rx_data->rx_ring->ixgbe;
153 	current_rcb = rx_data->work_list[index];
154 
155 	DMA_SYNC(&current_rcb->rx_buf, DDI_DMA_SYNC_FORKERNEL);
156 
157 	if (ixgbe_check_dma_handle(current_rcb->rx_buf.dma_handle) !=
158 	    DDI_FM_OK) {
159 		ddi_fm_service_impact(ixgbe->dip, DDI_SERVICE_DEGRADED);
160 		atomic_or_32(&ixgbe->ixgbe_state, IXGBE_ERROR);
161 		return (NULL);
162 	}
163 
164 	/*
165 	 * Allocate buffer to receive this packet
166 	 */
167 	mp = allocb(pkt_len + IPHDR_ALIGN_ROOM, 0);
168 	if (mp == NULL) {
169 		ixgbe_log(ixgbe, "ixgbe_rx_copy: allocate buffer failed");
170 		return (NULL);
171 	}
172 
173 	/*
174 	 * Copy the data received into the new cluster
175 	 */
176 	mp->b_rptr += IPHDR_ALIGN_ROOM;
177 	bcopy(current_rcb->rx_buf.address, mp->b_rptr, pkt_len);
178 	mp->b_wptr = mp->b_rptr + pkt_len;
179 
180 	return (mp);
181 }
182 
183 /*
184  * ixgbe_rx_bind - Use existing DMA buffer to build mblk for receiving.
185  *
186  * This function will use pre-bound DMA buffer to receive the packet
187  * and build mblk that will be sent upstream.
188  */
189 static mblk_t *
ixgbe_rx_bind(ixgbe_rx_data_t * rx_data,uint32_t index,uint32_t pkt_len)190 ixgbe_rx_bind(ixgbe_rx_data_t *rx_data, uint32_t index, uint32_t pkt_len)
191 {
192 	rx_control_block_t *current_rcb;
193 	rx_control_block_t *free_rcb;
194 	uint32_t free_index;
195 	mblk_t *mp;
196 	ixgbe_t	*ixgbe = rx_data->rx_ring->ixgbe;
197 
198 	/*
199 	 * If the free list is empty, we cannot proceed to send
200 	 * the current DMA buffer upstream. We'll have to return
201 	 * and use bcopy to process the packet.
202 	 */
203 	if (ixgbe_atomic_reserve(&rx_data->rcb_free, 1) < 0)
204 		return (NULL);
205 
206 	current_rcb = rx_data->work_list[index];
207 	/*
208 	 * If the mp of the rx control block is NULL, try to do
209 	 * desballoc again.
210 	 */
211 	if (current_rcb->mp == NULL) {
212 		current_rcb->mp = desballoc((unsigned char *)
213 		    current_rcb->rx_buf.address,
214 		    current_rcb->rx_buf.size,
215 		    0, &current_rcb->free_rtn);
216 		/*
217 		 * If it is failed to built a mblk using the current
218 		 * DMA buffer, we have to return and use bcopy to
219 		 * process the packet.
220 		 */
221 		if (current_rcb->mp == NULL) {
222 			atomic_inc_32(&rx_data->rcb_free);
223 			return (NULL);
224 		}
225 	}
226 	/*
227 	 * Sync up the data received
228 	 */
229 	DMA_SYNC(&current_rcb->rx_buf, DDI_DMA_SYNC_FORKERNEL);
230 
231 	if (ixgbe_check_dma_handle(current_rcb->rx_buf.dma_handle) !=
232 	    DDI_FM_OK) {
233 		ddi_fm_service_impact(ixgbe->dip, DDI_SERVICE_DEGRADED);
234 		atomic_inc_32(&rx_data->rcb_free);
235 		atomic_or_32(&ixgbe->ixgbe_state, IXGBE_ERROR);
236 		return (NULL);
237 	}
238 
239 	mp = current_rcb->mp;
240 	current_rcb->mp = NULL;
241 	atomic_inc_32(&current_rcb->ref_cnt);
242 
243 	mp->b_wptr = mp->b_rptr + pkt_len;
244 	mp->b_next = mp->b_cont = NULL;
245 
246 	/*
247 	 * Strip off one free rx control block from the free list
248 	 */
249 	free_index = rx_data->rcb_head;
250 	free_rcb = rx_data->free_list[free_index];
251 	ASSERT(free_rcb != NULL);
252 	rx_data->free_list[free_index] = NULL;
253 	rx_data->rcb_head = NEXT_INDEX(free_index, 1, rx_data->free_list_size);
254 
255 	/*
256 	 * Put the rx control block to the work list
257 	 */
258 	rx_data->work_list[index] = free_rcb;
259 
260 	return (mp);
261 }
262 
263 /*
264  * ixgbe_lro_bind - Use existing DMA buffer to build LRO mblk for receiving.
265  *
266  * This function will use pre-bound DMA buffers to receive the packet
267  * and build LRO mblk that will be sent upstream.
268  */
269 static mblk_t *
ixgbe_lro_bind(ixgbe_rx_data_t * rx_data,uint32_t lro_start,uint32_t lro_num,uint32_t pkt_len)270 ixgbe_lro_bind(ixgbe_rx_data_t *rx_data, uint32_t lro_start,
271     uint32_t lro_num, uint32_t pkt_len)
272 {
273 	rx_control_block_t *current_rcb;
274 	union ixgbe_adv_rx_desc *current_rbd;
275 	rx_control_block_t *free_rcb;
276 	uint32_t free_index;
277 	int lro_next;
278 	uint32_t last_pkt_len;
279 	uint32_t i;
280 	mblk_t *mp;
281 	mblk_t *mblk_head;
282 	mblk_t **mblk_tail;
283 	ixgbe_t	*ixgbe = rx_data->rx_ring->ixgbe;
284 
285 	/*
286 	 * If the free list is empty, we cannot proceed to send
287 	 * the current DMA buffer upstream. We'll have to return
288 	 * and use bcopy to process the packet.
289 	 */
290 	if (ixgbe_atomic_reserve(&rx_data->rcb_free, lro_num) < 0)
291 		return (NULL);
292 	current_rcb = rx_data->work_list[lro_start];
293 
294 	/*
295 	 * If any one of the rx data blocks can not support
296 	 * lro bind  operation,  We'll have to return and use
297 	 * bcopy to process the lro  packet.
298 	 */
299 	for (i = lro_num; i > 0; i--) {
300 		/*
301 		 * Sync up the data received
302 		 */
303 		DMA_SYNC(&current_rcb->rx_buf, DDI_DMA_SYNC_FORKERNEL);
304 
305 		if (ixgbe_check_dma_handle(current_rcb->rx_buf.dma_handle) !=
306 		    DDI_FM_OK) {
307 			ddi_fm_service_impact(ixgbe->dip, DDI_SERVICE_DEGRADED);
308 			atomic_add_32(&rx_data->rcb_free, lro_num);
309 			atomic_or_32(&ixgbe->ixgbe_state, IXGBE_ERROR);
310 			return (NULL);
311 		}
312 
313 		/*
314 		 * If the mp of the rx control block is NULL, try to do
315 		 * desballoc again.
316 		 */
317 		if (current_rcb->mp == NULL) {
318 			current_rcb->mp = desballoc((unsigned char *)
319 			    current_rcb->rx_buf.address,
320 			    current_rcb->rx_buf.size,
321 			    0, &current_rcb->free_rtn);
322 			/*
323 			 * If it is failed to built a mblk using the current
324 			 * DMA buffer, we have to return and use bcopy to
325 			 * process the packet.
326 			 */
327 			if (current_rcb->mp == NULL) {
328 				atomic_add_32(&rx_data->rcb_free, lro_num);
329 				return (NULL);
330 			}
331 		}
332 		if (current_rcb->lro_next != -1)
333 			lro_next = current_rcb->lro_next;
334 		current_rcb = rx_data->work_list[lro_next];
335 	}
336 
337 	mblk_head = NULL;
338 	mblk_tail = &mblk_head;
339 	lro_next = lro_start;
340 	last_pkt_len = pkt_len - ixgbe->rx_buf_size * (lro_num - 1);
341 	current_rcb = rx_data->work_list[lro_next];
342 	current_rbd = &rx_data->rbd_ring[lro_next];
343 	while (lro_num --) {
344 		mp = current_rcb->mp;
345 		current_rcb->mp = NULL;
346 		atomic_inc_32(&current_rcb->ref_cnt);
347 		if (lro_num != 0)
348 			mp->b_wptr = mp->b_rptr + ixgbe->rx_buf_size;
349 		else
350 			mp->b_wptr = mp->b_rptr + last_pkt_len;
351 		mp->b_next = mp->b_cont = NULL;
352 		*mblk_tail = mp;
353 		mblk_tail = &mp->b_cont;
354 
355 		/*
356 		 * Strip off one free rx control block from the free list
357 		 */
358 		free_index = rx_data->rcb_head;
359 		free_rcb = rx_data->free_list[free_index];
360 		ASSERT(free_rcb != NULL);
361 		rx_data->free_list[free_index] = NULL;
362 		rx_data->rcb_head = NEXT_INDEX(free_index, 1,
363 		    rx_data->free_list_size);
364 
365 		/*
366 		 * Put the rx control block to the work list
367 		 */
368 		rx_data->work_list[lro_next] = free_rcb;
369 		lro_next = current_rcb->lro_next;
370 		current_rcb->lro_next = -1;
371 		current_rcb->lro_prev = -1;
372 		current_rcb->lro_pkt = B_FALSE;
373 		current_rbd->read.pkt_addr = free_rcb->rx_buf.dma_address;
374 		current_rbd->read.hdr_addr = 0;
375 		if (lro_next == -1)
376 			break;
377 		current_rcb = rx_data->work_list[lro_next];
378 		current_rbd = &rx_data->rbd_ring[lro_next];
379 	}
380 	return (mblk_head);
381 }
382 
383 /*
384  * ixgbe_lro_copy - Use copy to process the received LRO packet.
385  *
386  * This function will use bcopy to process the LRO  packet
387  * and send the copied packet upstream.
388  */
389 static mblk_t *
ixgbe_lro_copy(ixgbe_rx_data_t * rx_data,uint32_t lro_start,uint32_t lro_num,uint32_t pkt_len)390 ixgbe_lro_copy(ixgbe_rx_data_t *rx_data, uint32_t lro_start,
391     uint32_t lro_num, uint32_t pkt_len)
392 {
393 	ixgbe_t *ixgbe;
394 	rx_control_block_t *current_rcb;
395 	union ixgbe_adv_rx_desc *current_rbd;
396 	mblk_t *mp;
397 	uint32_t last_pkt_len;
398 	int lro_next;
399 	uint32_t i;
400 
401 	ixgbe = rx_data->rx_ring->ixgbe;
402 
403 	/*
404 	 * Allocate buffer to receive this LRO packet
405 	 */
406 	mp = allocb(pkt_len + IPHDR_ALIGN_ROOM, 0);
407 	if (mp == NULL) {
408 		ixgbe_log(ixgbe, "LRO copy MP alloc failed");
409 		return (NULL);
410 	}
411 
412 	current_rcb = rx_data->work_list[lro_start];
413 
414 	/*
415 	 * Sync up the LRO packet data received
416 	 */
417 	for (i = lro_num; i > 0; i--) {
418 		DMA_SYNC(&current_rcb->rx_buf, DDI_DMA_SYNC_FORKERNEL);
419 
420 		if (ixgbe_check_dma_handle(current_rcb->rx_buf.dma_handle) !=
421 		    DDI_FM_OK) {
422 			ddi_fm_service_impact(ixgbe->dip, DDI_SERVICE_DEGRADED);
423 			atomic_or_32(&ixgbe->ixgbe_state, IXGBE_ERROR);
424 			return (NULL);
425 		}
426 		if (current_rcb->lro_next != -1)
427 			lro_next = current_rcb->lro_next;
428 		current_rcb = rx_data->work_list[lro_next];
429 	}
430 	lro_next = lro_start;
431 	current_rcb = rx_data->work_list[lro_next];
432 	current_rbd = &rx_data->rbd_ring[lro_next];
433 	last_pkt_len = pkt_len - ixgbe->rx_buf_size * (lro_num - 1);
434 
435 	/*
436 	 * Copy the data received into the new cluster
437 	 */
438 	mp->b_rptr += IPHDR_ALIGN_ROOM;
439 	mp->b_wptr += IPHDR_ALIGN_ROOM;
440 	while (lro_num --) {
441 		if (lro_num != 0) {
442 			bcopy(current_rcb->rx_buf.address, mp->b_wptr,
443 			    ixgbe->rx_buf_size);
444 			mp->b_wptr += ixgbe->rx_buf_size;
445 		} else {
446 			bcopy(current_rcb->rx_buf.address, mp->b_wptr,
447 			    last_pkt_len);
448 			mp->b_wptr += last_pkt_len;
449 		}
450 		lro_next = current_rcb->lro_next;
451 		current_rcb->lro_next = -1;
452 		current_rcb->lro_prev = -1;
453 		current_rcb->lro_pkt = B_FALSE;
454 		current_rbd->read.pkt_addr = current_rcb->rx_buf.dma_address;
455 		current_rbd->read.hdr_addr = 0;
456 		if (lro_next == -1)
457 			break;
458 		current_rcb = rx_data->work_list[lro_next];
459 		current_rbd = &rx_data->rbd_ring[lro_next];
460 	}
461 
462 	return (mp);
463 }
464 
465 /*
466  * ixgbe_lro_get_start - get the start rcb index in one LRO packet
467  */
468 static int
ixgbe_lro_get_start(ixgbe_rx_data_t * rx_data,uint32_t rx_next)469 ixgbe_lro_get_start(ixgbe_rx_data_t *rx_data, uint32_t rx_next)
470 {
471 	int lro_prev;
472 	int lro_start;
473 	uint32_t lro_num = 1;
474 	rx_control_block_t *prev_rcb;
475 	rx_control_block_t *current_rcb = rx_data->work_list[rx_next];
476 	lro_prev = current_rcb->lro_prev;
477 
478 	while (lro_prev != -1) {
479 		lro_num ++;
480 		prev_rcb = rx_data->work_list[lro_prev];
481 		lro_start = lro_prev;
482 		lro_prev = prev_rcb->lro_prev;
483 	}
484 	rx_data->lro_num = lro_num;
485 	return (lro_start);
486 }
487 
488 /*
489  * ixgbe_lro_get_first - get the first LRO rcb index
490  */
491 static uint32_t
ixgbe_lro_get_first(ixgbe_rx_data_t * rx_data,uint32_t rx_next)492 ixgbe_lro_get_first(ixgbe_rx_data_t *rx_data, uint32_t rx_next)
493 {
494 	rx_control_block_t *current_rcb;
495 	uint32_t lro_first;
496 	lro_first = rx_data->lro_first;
497 	current_rcb = rx_data->work_list[lro_first];
498 	while ((!current_rcb->lro_pkt) && (lro_first != rx_next)) {
499 		lro_first =  NEXT_INDEX(lro_first, 1, rx_data->ring_size);
500 		current_rcb = rx_data->work_list[lro_first];
501 	}
502 	rx_data->lro_first = lro_first;
503 	return (lro_first);
504 }
505 
506 /*
507  * ixgbe_rx_assoc_hcksum - Check the rx hardware checksum status and associate
508  * the hcksum flags.
509  */
510 static void
ixgbe_rx_assoc_hcksum(mblk_t * mp,uint32_t status_error)511 ixgbe_rx_assoc_hcksum(mblk_t *mp, uint32_t status_error)
512 {
513 	uint32_t hcksum_flags = 0;
514 
515 	/*
516 	 * Check TCP/UDP checksum
517 	 */
518 	if ((status_error & IXGBE_RXD_STAT_L4CS) &&
519 	    !(status_error & IXGBE_RXDADV_ERR_TCPE))
520 		hcksum_flags |= HCK_FULLCKSUM_OK;
521 
522 	/*
523 	 * Check IP Checksum
524 	 */
525 	if ((status_error & IXGBE_RXD_STAT_IPCS) &&
526 	    !(status_error & IXGBE_RXDADV_ERR_IPE))
527 		hcksum_flags |= HCK_IPV4_HDRCKSUM_OK;
528 
529 	if (hcksum_flags != 0) {
530 		mac_hcksum_set(mp, 0, 0, 0, 0, hcksum_flags);
531 	}
532 }
533 
534 /*
535  * ixgbe_ring_rx - Receive the data of one ring.
536  *
537  * This function goes throught h/w descriptor in one specified rx ring,
538  * receives the data if the descriptor status shows the data is ready.
539  * It returns a chain of mblks containing the received data, to be
540  * passed up to mac_rx().
541  */
542 mblk_t *
ixgbe_ring_rx(ixgbe_rx_ring_t * rx_ring,int poll_bytes)543 ixgbe_ring_rx(ixgbe_rx_ring_t *rx_ring, int poll_bytes)
544 {
545 	union ixgbe_adv_rx_desc *current_rbd;
546 	rx_control_block_t *current_rcb;
547 	mblk_t *mp;
548 	mblk_t *mblk_head;
549 	mblk_t **mblk_tail;
550 	uint32_t rx_next;
551 	uint32_t rx_tail;
552 	uint32_t pkt_len;
553 	uint32_t status_error;
554 	uint32_t pkt_num;
555 	uint32_t rsc_cnt;
556 	uint32_t lro_first;
557 	uint32_t lro_start;
558 	uint32_t lro_next;
559 	boolean_t lro_eop;
560 	uint32_t received_bytes;
561 	ixgbe_t *ixgbe = rx_ring->ixgbe;
562 	ixgbe_rx_data_t *rx_data;
563 
564 	if ((ixgbe->ixgbe_state & IXGBE_SUSPENDED) ||
565 	    (ixgbe->ixgbe_state & IXGBE_ERROR) ||
566 	    (ixgbe->ixgbe_state & IXGBE_OVERTEMP) ||
567 	    !(ixgbe->ixgbe_state & IXGBE_STARTED))
568 		return (NULL);
569 
570 	rx_data = rx_ring->rx_data;
571 	lro_eop = B_FALSE;
572 	mblk_head = NULL;
573 	mblk_tail = &mblk_head;
574 
575 	/*
576 	 * Sync the receive descriptors before accepting the packets
577 	 */
578 	DMA_SYNC(&rx_data->rbd_area, DDI_DMA_SYNC_FORKERNEL);
579 
580 	if (ixgbe_check_dma_handle(rx_data->rbd_area.dma_handle) != DDI_FM_OK) {
581 		ddi_fm_service_impact(ixgbe->dip, DDI_SERVICE_DEGRADED);
582 		atomic_or_32(&ixgbe->ixgbe_state, IXGBE_ERROR);
583 		return (NULL);
584 	}
585 
586 	/*
587 	 * Get the start point of rx bd ring which should be examined
588 	 * during this cycle.
589 	 */
590 	rx_next = rx_data->rbd_next;
591 	current_rbd = &rx_data->rbd_ring[rx_next];
592 	received_bytes = 0;
593 	pkt_num = 0;
594 	status_error = current_rbd->wb.upper.status_error;
595 	while (status_error & IXGBE_RXD_STAT_DD) {
596 		/*
597 		 * If adapter has found errors, but the error
598 		 * is hardware checksum error, this does not discard the
599 		 * packet: let upper layer compute the checksum;
600 		 * Otherwise discard the packet.
601 		 */
602 		if ((status_error & IXGBE_RXDADV_ERR_FRAME_ERR_MASK) ||
603 		    ((!ixgbe->lro_enable) &&
604 		    (!(status_error & IXGBE_RXD_STAT_EOP)))) {
605 			rx_ring->stat_frame_error++;
606 			goto rx_discard;
607 		}
608 
609 		if ((status_error & IXGBE_RXDADV_ERR_TCPE) ||
610 		    (status_error & IXGBE_RXDADV_ERR_IPE))
611 			rx_ring->stat_cksum_error++;
612 
613 		if (ixgbe->lro_enable) {
614 			rsc_cnt =  (current_rbd->wb.lower.lo_dword.data &
615 			    IXGBE_RXDADV_RSCCNT_MASK) >>
616 			    IXGBE_RXDADV_RSCCNT_SHIFT;
617 			if (rsc_cnt != 0) {
618 				if (status_error & IXGBE_RXD_STAT_EOP) {
619 					pkt_len = current_rbd->wb.upper.length;
620 					if (rx_data->work_list[rx_next]->
621 					    lro_prev != -1) {
622 						lro_start =
623 						    ixgbe_lro_get_start(rx_data,
624 						    rx_next);
625 						ixgbe->lro_pkt_count++;
626 						pkt_len +=
627 						    (rx_data->lro_num  - 1) *
628 						    ixgbe->rx_buf_size;
629 						lro_eop = B_TRUE;
630 					}
631 				} else {
632 					lro_next = (status_error &
633 					    IXGBE_RXDADV_NEXTP_MASK) >>
634 					    IXGBE_RXDADV_NEXTP_SHIFT;
635 					rx_data->work_list[lro_next]->lro_prev
636 					    = rx_next;
637 					rx_data->work_list[rx_next]->lro_next =
638 					    lro_next;
639 					rx_data->work_list[rx_next]->lro_pkt =
640 					    B_TRUE;
641 					goto rx_discard;
642 				}
643 
644 			} else {
645 				pkt_len = current_rbd->wb.upper.length;
646 			}
647 		} else {
648 			pkt_len = current_rbd->wb.upper.length;
649 		}
650 
651 
652 		if ((poll_bytes != IXGBE_POLL_NULL) &&
653 		    ((received_bytes + pkt_len) > poll_bytes))
654 			break;
655 
656 		received_bytes += pkt_len;
657 		mp = NULL;
658 
659 		/*
660 		 * For packets with length more than the copy threshold,
661 		 * we'll first try to use the existing DMA buffer to build
662 		 * an mblk and send the mblk upstream.
663 		 *
664 		 * If the first method fails, or the packet length is less
665 		 * than the copy threshold, we'll allocate a new mblk and
666 		 * copy the packet data to the new mblk.
667 		 */
668 		if (lro_eop) {
669 			mp = ixgbe_lro_bind(rx_data, lro_start,
670 			    rx_data->lro_num, pkt_len);
671 			if (mp == NULL)
672 				mp = ixgbe_lro_copy(rx_data, lro_start,
673 				    rx_data->lro_num, pkt_len);
674 			lro_eop = B_FALSE;
675 			rx_data->lro_num = 0;
676 
677 		} else {
678 			if (pkt_len > ixgbe->rx_copy_thresh)
679 				mp = ixgbe_rx_bind(rx_data, rx_next, pkt_len);
680 
681 			if (mp == NULL)
682 				mp = ixgbe_rx_copy(rx_data, rx_next, pkt_len);
683 		}
684 		if (mp != NULL) {
685 			/*
686 			 * Check h/w checksum offload status
687 			 */
688 			if (ixgbe->rx_hcksum_enable)
689 				ixgbe_rx_assoc_hcksum(mp, status_error);
690 
691 			*mblk_tail = mp;
692 			mblk_tail = &mp->b_next;
693 		}
694 
695 rx_discard:
696 		/*
697 		 * Reset rx descriptor read bits
698 		 */
699 		current_rcb = rx_data->work_list[rx_next];
700 		if (ixgbe->lro_enable) {
701 			if (!current_rcb->lro_pkt) {
702 				current_rbd->read.pkt_addr =
703 				    current_rcb->rx_buf.dma_address;
704 				current_rbd->read.hdr_addr = 0;
705 			}
706 		} else {
707 			current_rbd->read.pkt_addr =
708 			    current_rcb->rx_buf.dma_address;
709 			current_rbd->read.hdr_addr = 0;
710 		}
711 
712 		rx_next = NEXT_INDEX(rx_next, 1, rx_data->ring_size);
713 
714 		/*
715 		 * The receive function is in interrupt context, so here
716 		 * rx_limit_per_intr is used to avoid doing receiving too long
717 		 * per interrupt.
718 		 */
719 		if (++pkt_num > ixgbe->rx_limit_per_intr) {
720 			rx_ring->stat_exceed_pkt++;
721 			break;
722 		}
723 
724 		current_rbd = &rx_data->rbd_ring[rx_next];
725 		status_error = current_rbd->wb.upper.status_error;
726 	}
727 
728 	rx_ring->stat_rbytes += received_bytes;
729 	rx_ring->stat_ipackets += pkt_num;
730 
731 	DMA_SYNC(&rx_data->rbd_area, DDI_DMA_SYNC_FORDEV);
732 
733 	rx_data->rbd_next = rx_next;
734 
735 	/*
736 	 * Update the h/w tail accordingly
737 	 */
738 	if (ixgbe->lro_enable) {
739 		lro_first = ixgbe_lro_get_first(rx_data, rx_next);
740 		rx_tail = PREV_INDEX(lro_first, 1, rx_data->ring_size);
741 	} else
742 		rx_tail = PREV_INDEX(rx_next, 1, rx_data->ring_size);
743 
744 	IXGBE_WRITE_REG(&ixgbe->hw, IXGBE_RDT(rx_ring->hw_index), rx_tail);
745 
746 	if (ixgbe_check_acc_handle(ixgbe->osdep.reg_handle) != DDI_FM_OK) {
747 		ddi_fm_service_impact(ixgbe->dip, DDI_SERVICE_DEGRADED);
748 		atomic_or_32(&ixgbe->ixgbe_state, IXGBE_ERROR);
749 	}
750 
751 	return (mblk_head);
752 }
753 
754 mblk_t *
ixgbe_ring_rx_poll(void * arg,int n_bytes)755 ixgbe_ring_rx_poll(void *arg, int n_bytes)
756 {
757 	ixgbe_rx_ring_t *rx_ring = (ixgbe_rx_ring_t *)arg;
758 	mblk_t *mp = NULL;
759 
760 	ASSERT(n_bytes >= 0);
761 
762 	if (n_bytes == 0)
763 		return (NULL);
764 
765 	mutex_enter(&rx_ring->rx_lock);
766 	mp = ixgbe_ring_rx(rx_ring, n_bytes);
767 	mutex_exit(&rx_ring->rx_lock);
768 
769 	return (mp);
770 }
771