1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 26 #pragma ident "%Z%%M% %I% %E% SMI" 27 28 #include <sys/nxge/nxge_impl.h> 29 30 extern uint32_t nxge_reclaim_pending; 31 extern uint32_t nxge_bcopy_thresh; 32 extern uint32_t nxge_dvma_thresh; 33 extern uint32_t nxge_dma_stream_thresh; 34 extern uint32_t nxge_tx_minfree; 35 extern uint32_t nxge_tx_intr_thres; 36 extern uint32_t nxge_tx_max_gathers; 37 extern uint32_t nxge_tx_tiny_pack; 38 extern uint32_t nxge_tx_use_bcopy; 39 extern uint32_t nxge_tx_lb_policy; 40 extern uint32_t nxge_no_tx_lb; 41 42 typedef struct _mac_tx_hint { 43 uint16_t sap; 44 uint16_t vid; 45 void *hash; 46 } mac_tx_hint_t, *p_mac_tx_hint_t; 47 48 int nxge_tx_lb_ring_1(p_mblk_t, uint32_t, p_mac_tx_hint_t); 49 50 int 51 nxge_start(p_nxge_t nxgep, p_tx_ring_t tx_ring_p, p_mblk_t mp) 52 { 53 int status = 0; 54 p_tx_desc_t tx_desc_ring_vp; 55 npi_handle_t npi_desc_handle; 56 nxge_os_dma_handle_t tx_desc_dma_handle; 57 p_tx_desc_t tx_desc_p; 58 p_tx_msg_t tx_msg_ring; 59 p_tx_msg_t tx_msg_p; 60 tx_desc_t tx_desc, *tmp_desc_p; 61 tx_desc_t sop_tx_desc, *sop_tx_desc_p; 62 p_tx_pkt_header_t hdrp; 63 p_tx_pkt_hdr_all_t pkthdrp; 64 uint8_t npads = 0; 65 uint64_t dma_ioaddr; 66 uint32_t dma_flags; 67 int last_bidx; 68 uint8_t *b_rptr; 69 caddr_t kaddr; 70 uint32_t nmblks; 71 uint32_t ngathers; 72 uint32_t clen; 73 int len; 74 uint32_t pkt_len, pack_len, min_len; 75 uint32_t bcopy_thresh; 76 int i, cur_index, sop_index; 77 uint16_t tail_index; 78 boolean_t tail_wrap = B_FALSE; 79 nxge_dma_common_t desc_area; 80 nxge_os_dma_handle_t dma_handle; 81 ddi_dma_cookie_t dma_cookie; 82 npi_handle_t npi_handle; 83 p_mblk_t nmp; 84 p_mblk_t t_mp; 85 uint32_t ncookies; 86 boolean_t good_packet; 87 boolean_t mark_mode = B_FALSE; 88 p_nxge_stats_t statsp; 89 p_nxge_tx_ring_stats_t tdc_stats; 90 t_uscalar_t start_offset = 0; 91 t_uscalar_t stuff_offset = 0; 92 t_uscalar_t end_offset = 0; 93 t_uscalar_t value = 0; 94 t_uscalar_t cksum_flags = 0; 95 boolean_t cksum_on = B_FALSE; 96 uint32_t boff = 0; 97 uint64_t tot_xfer_len = 0, tmp_len = 0; 98 boolean_t header_set = B_FALSE; 99 #ifdef NXGE_DEBUG 100 p_tx_desc_t tx_desc_ring_pp; 101 p_tx_desc_t tx_desc_pp; 102 tx_desc_t *save_desc_p; 103 int dump_len; 104 int sad_len; 105 uint64_t sad; 106 int xfer_len; 107 uint32_t msgsize; 108 #endif 109 110 NXGE_DEBUG_MSG((nxgep, TX_CTL, 111 "==> nxge_start: tx dma channel %d", tx_ring_p->tdc)); 112 NXGE_DEBUG_MSG((nxgep, TX_CTL, 113 "==> nxge_start: Starting tdc %d desc pending %d", 114 tx_ring_p->tdc, tx_ring_p->descs_pending)); 115 116 statsp = nxgep->statsp; 117 118 if (nxgep->statsp->port_stats.lb_mode == nxge_lb_normal) { 119 if ((!statsp->mac_stats.link_up) || 120 (FM_GET_DEVSTATE(nxgep) <= DDI_DEVSTATE_DEGRADED)) { 121 freemsg(mp); 122 NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_start: " 123 "link not up or LB mode")); 124 goto nxge_start_fail1; 125 } 126 } 127 128 hcksum_retrieve(mp, NULL, NULL, &start_offset, 129 &stuff_offset, &end_offset, &value, &cksum_flags); 130 if (!NXGE_IS_VLAN_PACKET(mp->b_rptr)) { 131 start_offset += sizeof (ether_header_t); 132 stuff_offset += sizeof (ether_header_t); 133 } else { 134 start_offset += sizeof (struct ether_vlan_header); 135 stuff_offset += sizeof (struct ether_vlan_header); 136 } 137 138 if (cksum_flags & HCK_PARTIALCKSUM) { 139 NXGE_DEBUG_MSG((nxgep, TX_CTL, 140 "==> nxge_start: cksum_flags 0x%x (partial checksum) ", 141 cksum_flags)); 142 cksum_on = B_TRUE; 143 } 144 145 #ifdef NXGE_DEBUG 146 if (tx_ring_p->descs_pending) { 147 NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_start: " 148 "desc pending %d ", tx_ring_p->descs_pending)); 149 } 150 151 dump_len = (int)(MBLKL(mp)); 152 dump_len = (dump_len > 128) ? 128: dump_len; 153 154 NXGE_DEBUG_MSG((nxgep, TX_CTL, 155 "==> nxge_start: tdc %d: dumping ...: b_rptr $%p " 156 "(Before header reserve: ORIGINAL LEN %d)", 157 tx_ring_p->tdc, 158 mp->b_rptr, 159 dump_len)); 160 161 NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_start: dump packets " 162 "(IP ORIGINAL b_rptr $%p): %s", mp->b_rptr, 163 nxge_dump_packet((char *)mp->b_rptr, dump_len))); 164 #endif 165 166 MUTEX_ENTER(&tx_ring_p->lock); 167 tdc_stats = tx_ring_p->tdc_stats; 168 mark_mode = (tx_ring_p->descs_pending && 169 ((tx_ring_p->tx_ring_size - tx_ring_p->descs_pending) 170 < nxge_tx_minfree)); 171 172 NXGE_DEBUG_MSG((nxgep, TX_CTL, 173 "TX Descriptor ring is channel %d mark mode %d", 174 tx_ring_p->tdc, mark_mode)); 175 176 if (!nxge_txdma_reclaim(nxgep, tx_ring_p, nxge_tx_minfree)) { 177 NXGE_DEBUG_MSG((nxgep, TX_CTL, 178 "TX Descriptor ring is full: channel %d", 179 tx_ring_p->tdc)); 180 cas32((uint32_t *)&tx_ring_p->queueing, 0, 1); 181 tdc_stats->tx_no_desc++; 182 MUTEX_EXIT(&tx_ring_p->lock); 183 if (nxgep->resched_needed && !nxgep->resched_running) { 184 nxgep->resched_running = B_TRUE; 185 ddi_trigger_softintr(nxgep->resched_id); 186 } 187 status = 1; 188 goto nxge_start_fail1; 189 } 190 191 nmp = mp; 192 i = sop_index = tx_ring_p->wr_index; 193 nmblks = 0; 194 ngathers = 0; 195 pkt_len = 0; 196 pack_len = 0; 197 clen = 0; 198 last_bidx = -1; 199 good_packet = B_TRUE; 200 201 desc_area = tx_ring_p->tdc_desc; 202 npi_handle = desc_area.npi_handle; 203 npi_desc_handle.regh = (nxge_os_acc_handle_t) 204 DMA_COMMON_ACC_HANDLE(desc_area); 205 tx_desc_ring_vp = (p_tx_desc_t)DMA_COMMON_VPTR(desc_area); 206 #ifdef NXGE_DEBUG 207 tx_desc_ring_pp = (p_tx_desc_t)DMA_COMMON_IOADDR(desc_area); 208 #endif 209 tx_desc_dma_handle = (nxge_os_dma_handle_t) 210 DMA_COMMON_HANDLE(desc_area); 211 tx_msg_ring = tx_ring_p->tx_msg_ring; 212 213 NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_start: wr_index %d i %d", 214 sop_index, i)); 215 216 #ifdef NXGE_DEBUG 217 msgsize = msgdsize(nmp); 218 NXGE_DEBUG_MSG((nxgep, TX_CTL, 219 "==> nxge_start(1): wr_index %d i %d msgdsize %d", 220 sop_index, i, msgsize)); 221 #endif 222 /* 223 * The first 16 bytes of the premapped buffer are reserved 224 * for header. No padding will be used. 225 */ 226 pkt_len = pack_len = boff = TX_PKT_HEADER_SIZE; 227 if (nxge_tx_use_bcopy) { 228 bcopy_thresh = (nxge_bcopy_thresh - TX_PKT_HEADER_SIZE); 229 } else { 230 bcopy_thresh = (TX_BCOPY_SIZE - TX_PKT_HEADER_SIZE); 231 } 232 while (nmp) { 233 good_packet = B_TRUE; 234 b_rptr = nmp->b_rptr; 235 len = MBLKL(nmp); 236 if (len <= 0) { 237 nmp = nmp->b_cont; 238 continue; 239 } 240 nmblks++; 241 242 NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_start(1): nmblks %d " 243 "len %d pkt_len %d pack_len %d", 244 nmblks, len, pkt_len, pack_len)); 245 /* 246 * Hardware limits the transfer length to 4K. 247 * If len is more than 4K, we need to break 248 * nmp into two chunks: Make first chunk smaller 249 * than 4K. The second chunk will be broken into 250 * less than 4K (if needed) during the next pass. 251 */ 252 if (len > TX_MAX_TRANSFER_LENGTH) { 253 t_mp = dupb(nmp); 254 nmp->b_wptr = nmp->b_rptr + TX_MAX_TRANSFER_LENGTH; 255 t_mp->b_rptr = nmp->b_wptr; 256 t_mp->b_cont = nmp->b_cont; 257 nmp->b_cont = t_mp; 258 259 len = MBLKL(nmp); 260 } 261 262 tx_desc.value = 0; 263 tx_desc_p = &tx_desc_ring_vp[i]; 264 #ifdef NXGE_DEBUG 265 tx_desc_pp = &tx_desc_ring_pp[i]; 266 #endif 267 tx_msg_p = &tx_msg_ring[i]; 268 npi_desc_handle.regp = (uint64_t)tx_desc_p; 269 if (!header_set && 270 ((!nxge_tx_use_bcopy && (len > TX_BCOPY_SIZE)) || 271 (len >= bcopy_thresh))) { 272 header_set = B_TRUE; 273 bcopy_thresh += TX_PKT_HEADER_SIZE; 274 boff = 0; 275 pack_len = 0; 276 kaddr = (caddr_t)DMA_COMMON_VPTR(tx_msg_p->buf_dma); 277 hdrp = (p_tx_pkt_header_t)kaddr; 278 clen = pkt_len; 279 dma_handle = tx_msg_p->buf_dma_handle; 280 dma_ioaddr = DMA_COMMON_IOADDR(tx_msg_p->buf_dma); 281 (void) ddi_dma_sync(dma_handle, 282 i * nxge_bcopy_thresh, nxge_bcopy_thresh, 283 DDI_DMA_SYNC_FORDEV); 284 285 tx_msg_p->flags.dma_type = USE_BCOPY; 286 goto nxge_start_control_header_only; 287 } 288 289 pkt_len += len; 290 pack_len += len; 291 292 NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_start(3): " 293 "desc entry %d " 294 "DESC IOADDR $%p " 295 "desc_vp $%p tx_desc_p $%p " 296 "desc_pp $%p tx_desc_pp $%p " 297 "len %d pkt_len %d pack_len %d", 298 i, 299 DMA_COMMON_IOADDR(desc_area), 300 tx_desc_ring_vp, tx_desc_p, 301 tx_desc_ring_pp, tx_desc_pp, 302 len, pkt_len, pack_len)); 303 304 if (len < bcopy_thresh) { 305 NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_start(4): " 306 "USE BCOPY: ")); 307 if (nxge_tx_tiny_pack) { 308 uint32_t blst = 309 TXDMA_DESC_NEXT_INDEX(i, -1, 310 tx_ring_p->tx_wrap_mask); 311 NXGE_DEBUG_MSG((nxgep, TX_CTL, 312 "==> nxge_start(5): pack")); 313 if ((pack_len <= bcopy_thresh) && 314 (last_bidx == blst)) { 315 NXGE_DEBUG_MSG((nxgep, TX_CTL, 316 "==> nxge_start: pack(6) " 317 "(pkt_len %d pack_len %d)", 318 pkt_len, pack_len)); 319 i = blst; 320 tx_desc_p = &tx_desc_ring_vp[i]; 321 #ifdef NXGE_DEBUG 322 tx_desc_pp = &tx_desc_ring_pp[i]; 323 #endif 324 tx_msg_p = &tx_msg_ring[i]; 325 boff = pack_len - len; 326 ngathers--; 327 } else if (pack_len > bcopy_thresh) { 328 pack_len = len; 329 boff = 0; 330 bcopy_thresh = nxge_bcopy_thresh; 331 NXGE_DEBUG_MSG((nxgep, TX_CTL, 332 "==> nxge_start(7): > max NEW " 333 "bcopy thresh %d " 334 "pkt_len %d pack_len %d(next)", 335 bcopy_thresh, 336 pkt_len, pack_len)); 337 } 338 last_bidx = i; 339 } 340 kaddr = (caddr_t)DMA_COMMON_VPTR(tx_msg_p->buf_dma); 341 if ((boff == TX_PKT_HEADER_SIZE) && (nmblks == 1)) { 342 hdrp = (p_tx_pkt_header_t)kaddr; 343 header_set = B_TRUE; 344 NXGE_DEBUG_MSG((nxgep, TX_CTL, 345 "==> nxge_start(7_x2): " 346 "pkt_len %d pack_len %d (new hdrp $%p)", 347 pkt_len, pack_len, hdrp)); 348 } 349 tx_msg_p->flags.dma_type = USE_BCOPY; 350 kaddr += boff; 351 NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_start(8): " 352 "USE BCOPY: before bcopy " 353 "DESC IOADDR $%p entry %d " 354 "bcopy packets %d " 355 "bcopy kaddr $%p " 356 "bcopy ioaddr (SAD) $%p " 357 "bcopy clen %d " 358 "bcopy boff %d", 359 DMA_COMMON_IOADDR(desc_area), i, 360 tdc_stats->tx_hdr_pkts, 361 kaddr, 362 dma_ioaddr, 363 clen, 364 boff)); 365 NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_start: " 366 "1USE BCOPY: ")); 367 NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_start: " 368 "2USE BCOPY: ")); 369 NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_start: " 370 "last USE BCOPY: copy from b_rptr $%p " 371 "to KADDR $%p (len %d offset %d", 372 b_rptr, kaddr, len, boff)); 373 374 bcopy(b_rptr, kaddr, len); 375 376 #ifdef NXGE_DEBUG 377 dump_len = (len > 128) ? 128: len; 378 NXGE_DEBUG_MSG((nxgep, TX_CTL, 379 "==> nxge_start: dump packets " 380 "(After BCOPY len %d)" 381 "(b_rptr $%p): %s", len, nmp->b_rptr, 382 nxge_dump_packet((char *)nmp->b_rptr, 383 dump_len))); 384 #endif 385 386 dma_handle = tx_msg_p->buf_dma_handle; 387 dma_ioaddr = DMA_COMMON_IOADDR(tx_msg_p->buf_dma); 388 (void) ddi_dma_sync(dma_handle, 389 i * nxge_bcopy_thresh, nxge_bcopy_thresh, 390 DDI_DMA_SYNC_FORDEV); 391 clen = len + boff; 392 tdc_stats->tx_hdr_pkts++; 393 NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_start(9): " 394 "USE BCOPY: " 395 "DESC IOADDR $%p entry %d " 396 "bcopy packets %d " 397 "bcopy kaddr $%p " 398 "bcopy ioaddr (SAD) $%p " 399 "bcopy clen %d " 400 "bcopy boff %d", 401 DMA_COMMON_IOADDR(desc_area), 402 i, 403 tdc_stats->tx_hdr_pkts, 404 kaddr, 405 dma_ioaddr, 406 clen, 407 boff)); 408 } else { 409 NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_start(12): " 410 "USE DVMA: len %d", len)); 411 tx_msg_p->flags.dma_type = USE_DMA; 412 dma_flags = DDI_DMA_WRITE; 413 if (len < nxge_dma_stream_thresh) { 414 dma_flags |= DDI_DMA_CONSISTENT; 415 } else { 416 dma_flags |= DDI_DMA_STREAMING; 417 } 418 419 dma_handle = tx_msg_p->dma_handle; 420 status = ddi_dma_addr_bind_handle(dma_handle, NULL, 421 (caddr_t)b_rptr, len, dma_flags, 422 DDI_DMA_DONTWAIT, NULL, 423 &dma_cookie, &ncookies); 424 if (status == DDI_DMA_MAPPED) { 425 dma_ioaddr = dma_cookie.dmac_laddress; 426 len = (int)dma_cookie.dmac_size; 427 clen = (uint32_t)dma_cookie.dmac_size; 428 NXGE_DEBUG_MSG((nxgep, TX_CTL, 429 "==> nxge_start(12_1): " 430 "USE DVMA: len %d clen %d " 431 "ngathers %d", 432 len, clen, 433 ngathers)); 434 435 npi_desc_handle.regp = (uint64_t)tx_desc_p; 436 while (ncookies > 1) { 437 ngathers++; 438 /* 439 * this is the fix for multiple 440 * cookies, which are basicaly 441 * a descriptor entry, we don't set 442 * SOP bit as well as related fields 443 */ 444 445 (void) npi_txdma_desc_gather_set( 446 npi_desc_handle, 447 &tx_desc, 448 (ngathers -1), 449 mark_mode, 450 ngathers, 451 dma_ioaddr, 452 clen); 453 454 tx_msg_p->tx_msg_size = clen; 455 #if NXGE_DEBUG 456 NXGE_DEBUG_MSG((nxgep, TX_CTL, 457 "==> nxge_start: DMA " 458 "ncookie %d " 459 "ngathers %d " 460 "dma_ioaddr $%p len %d" 461 "desc $%p descp $%p (%d)", 462 ncookies, 463 ngathers, 464 dma_ioaddr, clen, 465 *tx_desc_p, tx_desc_p, i)); 466 #endif 467 468 469 ddi_dma_nextcookie(dma_handle, 470 &dma_cookie); 471 dma_ioaddr = 472 dma_cookie.dmac_laddress; 473 474 len = (int)dma_cookie.dmac_size; 475 clen = (uint32_t)dma_cookie.dmac_size; 476 NXGE_DEBUG_MSG((nxgep, TX_CTL, 477 "==> nxge_start(12_2): " 478 "USE DVMA: len %d clen %d ", 479 len, clen)); 480 481 i = TXDMA_DESC_NEXT_INDEX(i, 1, 482 tx_ring_p->tx_wrap_mask); 483 tx_desc_p = &tx_desc_ring_vp[i]; 484 485 npi_desc_handle.regp = 486 (uint64_t)tx_desc_p; 487 tx_msg_p = &tx_msg_ring[i]; 488 tx_msg_p->flags.dma_type = USE_NONE; 489 tx_desc.value = 0; 490 491 ncookies--; 492 } 493 tdc_stats->tx_ddi_pkts++; 494 NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_start:" 495 "DMA: ddi packets %d", 496 tdc_stats->tx_ddi_pkts)); 497 } else { 498 NXGE_ERROR_MSG((nxgep, NXGE_ERR_CTL, 499 "dma mapping failed for %d " 500 "bytes addr $%p flags %x (%d)", 501 len, b_rptr, status, status)); 502 good_packet = B_FALSE; 503 tdc_stats->tx_dma_bind_fail++; 504 tx_msg_p->flags.dma_type = USE_NONE; 505 goto nxge_start_fail2; 506 } 507 } /* ddi dvma */ 508 509 nmp = nmp->b_cont; 510 nxge_start_control_header_only: 511 npi_desc_handle.regp = (uint64_t)tx_desc_p; 512 ngathers++; 513 514 if (ngathers == 1) { 515 #ifdef NXGE_DEBUG 516 save_desc_p = &sop_tx_desc; 517 #endif 518 sop_tx_desc_p = &sop_tx_desc; 519 sop_tx_desc_p->value = 0; 520 sop_tx_desc_p->bits.hdw.tr_len = clen; 521 sop_tx_desc_p->bits.hdw.sad = dma_ioaddr >> 32; 522 sop_tx_desc_p->bits.ldw.sad = dma_ioaddr & 0xffffffff; 523 } else { 524 #ifdef NXGE_DEBUG 525 save_desc_p = &tx_desc; 526 #endif 527 tmp_desc_p = &tx_desc; 528 tmp_desc_p->value = 0; 529 tmp_desc_p->bits.hdw.tr_len = clen; 530 tmp_desc_p->bits.hdw.sad = dma_ioaddr >> 32; 531 tmp_desc_p->bits.ldw.sad = dma_ioaddr & 0xffffffff; 532 533 tx_desc_p->value = tmp_desc_p->value; 534 } 535 536 NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_start(13): " 537 "Desc_entry %d ngathers %d " 538 "desc_vp $%p tx_desc_p $%p " 539 "len %d clen %d pkt_len %d pack_len %d nmblks %d " 540 "dma_ioaddr (SAD) $%p mark %d", 541 i, ngathers, 542 tx_desc_ring_vp, tx_desc_p, 543 len, clen, pkt_len, pack_len, nmblks, 544 dma_ioaddr, mark_mode)); 545 546 #ifdef NXGE_DEBUG 547 npi_desc_handle.nxgep = nxgep; 548 npi_desc_handle.function.function = nxgep->function_num; 549 npi_desc_handle.function.instance = nxgep->instance; 550 sad = (save_desc_p->value & TX_PKT_DESC_SAD_MASK); 551 xfer_len = ((save_desc_p->value & TX_PKT_DESC_TR_LEN_MASK) >> 552 TX_PKT_DESC_TR_LEN_SHIFT); 553 554 555 NXGE_DEBUG_MSG((nxgep, TX_CTL, "\n\t: value 0x%llx\n" 556 "\t\tsad $%p\ttr_len %d len %d\tnptrs %d\t" 557 "mark %d sop %d\n", 558 save_desc_p->value, 559 sad, 560 save_desc_p->bits.hdw.tr_len, 561 xfer_len, 562 save_desc_p->bits.hdw.num_ptr, 563 save_desc_p->bits.hdw.mark, 564 save_desc_p->bits.hdw.sop)); 565 566 npi_txdma_dump_desc_one(npi_desc_handle, NULL, i); 567 #endif 568 569 tx_msg_p->tx_msg_size = clen; 570 i = TXDMA_DESC_NEXT_INDEX(i, 1, tx_ring_p->tx_wrap_mask); 571 if (ngathers > nxge_tx_max_gathers) { 572 good_packet = B_FALSE; 573 hcksum_retrieve(mp, NULL, NULL, &start_offset, 574 &stuff_offset, &end_offset, &value, 575 &cksum_flags); 576 577 NXGE_DEBUG_MSG((NULL, TX_CTL, 578 "==> nxge_start(14): pull msg - " 579 "len %d pkt_len %d ngathers %d", 580 len, pkt_len, ngathers)); 581 /* Pull all message blocks from b_cont */ 582 if ((msgpullup(mp, -1)) == NULL) { 583 goto nxge_start_fail2; 584 } 585 goto nxge_start_fail2; 586 } 587 } /* while (nmp) */ 588 589 tx_msg_p->tx_message = mp; 590 tx_desc_p = &tx_desc_ring_vp[sop_index]; 591 npi_desc_handle.regp = (uint64_t)tx_desc_p; 592 593 pkthdrp = (p_tx_pkt_hdr_all_t)hdrp; 594 pkthdrp->reserved = 0; 595 hdrp->value = 0; 596 (void) nxge_fill_tx_hdr(mp, B_FALSE, cksum_on, 597 (pkt_len - TX_PKT_HEADER_SIZE), npads, pkthdrp); 598 599 if (pkt_len > NXGE_MTU_DEFAULT_MAX) { 600 tdc_stats->tx_jumbo_pkts++; 601 } 602 603 min_len = (nxgep->msg_min + TX_PKT_HEADER_SIZE + (npads * 2)); 604 if (pkt_len < min_len) { 605 /* Assume we use bcopy to premapped buffers */ 606 kaddr = (caddr_t)DMA_COMMON_VPTR(tx_msg_p->buf_dma); 607 NXGE_DEBUG_MSG((NULL, TX_CTL, 608 "==> nxge_start(14-1): < (msg_min + 16)" 609 "len %d pkt_len %d min_len %d bzero %d ngathers %d", 610 len, pkt_len, min_len, (min_len - pkt_len), ngathers)); 611 bzero((kaddr + pkt_len), (min_len - pkt_len)); 612 pkt_len = tx_msg_p->tx_msg_size = min_len; 613 614 sop_tx_desc_p->bits.hdw.tr_len = min_len; 615 616 NXGE_MEM_PIO_WRITE64(npi_desc_handle, sop_tx_desc_p->value); 617 tx_desc_p->value = sop_tx_desc_p->value; 618 619 NXGE_DEBUG_MSG((NULL, TX_CTL, 620 "==> nxge_start(14-2): < msg_min - " 621 "len %d pkt_len %d min_len %d ngathers %d", 622 len, pkt_len, min_len, ngathers)); 623 } 624 625 NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_start: cksum_flags 0x%x ", 626 cksum_flags)); 627 if (cksum_flags & HCK_PARTIALCKSUM) { 628 NXGE_DEBUG_MSG((nxgep, TX_CTL, 629 "==> nxge_start: cksum_flags 0x%x (partial checksum) ", 630 cksum_flags)); 631 cksum_on = B_TRUE; 632 NXGE_DEBUG_MSG((nxgep, TX_CTL, 633 "==> nxge_start: from IP cksum_flags 0x%x " 634 "(partial checksum) " 635 "start_offset %d stuff_offset %d", 636 cksum_flags, start_offset, stuff_offset)); 637 tmp_len = (uint64_t)(start_offset >> 1); 638 hdrp->value |= (tmp_len << TX_PKT_HEADER_L4START_SHIFT); 639 tmp_len = (uint64_t)(stuff_offset >> 1); 640 hdrp->value |= (tmp_len << TX_PKT_HEADER_L4STUFF_SHIFT); 641 642 NXGE_DEBUG_MSG((nxgep, TX_CTL, 643 "==> nxge_start: from IP cksum_flags 0x%x " 644 "(partial checksum) " 645 "after SHIFT start_offset %d stuff_offset %d", 646 cksum_flags, start_offset, stuff_offset)); 647 } 648 { 649 uint64_t tmp_len; 650 651 /* pkt_len already includes 16 + paddings!! */ 652 /* Update the control header length */ 653 tot_xfer_len = (pkt_len - TX_PKT_HEADER_SIZE); 654 tmp_len = hdrp->value | 655 (tot_xfer_len << TX_PKT_HEADER_TOT_XFER_LEN_SHIFT); 656 657 NXGE_DEBUG_MSG((nxgep, TX_CTL, 658 "==> nxge_start(15_x1): setting SOP " 659 "tot_xfer_len 0x%llx (%d) pkt_len %d tmp_len " 660 "0x%llx hdrp->value 0x%llx", 661 tot_xfer_len, tot_xfer_len, pkt_len, 662 tmp_len, hdrp->value)); 663 #if defined(_BIG_ENDIAN) 664 hdrp->value = ddi_swap64(tmp_len); 665 #else 666 hdrp->value = tmp_len; 667 #endif 668 NXGE_DEBUG_MSG((nxgep, 669 TX_CTL, "==> nxge_start(15_x2): setting SOP " 670 "after SWAP: tot_xfer_len 0x%llx pkt_len %d " 671 "tmp_len 0x%llx hdrp->value 0x%llx", 672 tot_xfer_len, pkt_len, 673 tmp_len, hdrp->value)); 674 } 675 676 NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_start(15): setting SOP " 677 "wr_index %d " 678 "tot_xfer_len (%d) pkt_len %d npads %d", 679 sop_index, 680 tot_xfer_len, pkt_len, 681 npads)); 682 683 sop_tx_desc_p->bits.hdw.sop = 1; 684 sop_tx_desc_p->bits.hdw.mark = mark_mode; 685 sop_tx_desc_p->bits.hdw.num_ptr = ngathers; 686 687 NXGE_MEM_PIO_WRITE64(npi_desc_handle, sop_tx_desc_p->value); 688 689 NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_start(16): set SOP done")); 690 691 #ifdef NXGE_DEBUG 692 npi_desc_handle.nxgep = nxgep; 693 npi_desc_handle.function.function = nxgep->function_num; 694 npi_desc_handle.function.instance = nxgep->instance; 695 696 NXGE_DEBUG_MSG((nxgep, TX_CTL, "\n\t: value 0x%llx\n" 697 "\t\tsad $%p\ttr_len %d len %d\tnptrs %d\tmark %d sop %d\n", 698 save_desc_p->value, 699 sad, 700 save_desc_p->bits.hdw.tr_len, 701 xfer_len, 702 save_desc_p->bits.hdw.num_ptr, 703 save_desc_p->bits.hdw.mark, 704 save_desc_p->bits.hdw.sop)); 705 (void) npi_txdma_dump_desc_one(npi_desc_handle, NULL, sop_index); 706 707 dump_len = (pkt_len > 128) ? 128: pkt_len; 708 NXGE_DEBUG_MSG((nxgep, TX_CTL, 709 "==> nxge_start: dump packets(17) (after sop set, len " 710 " (len/dump_len/pkt_len/tot_xfer_len) %d/%d/%d/%d):\n" 711 "ptr $%p: %s", len, dump_len, pkt_len, tot_xfer_len, 712 (char *)hdrp, 713 nxge_dump_packet((char *)hdrp, dump_len))); 714 NXGE_DEBUG_MSG((nxgep, TX_CTL, 715 "==> nxge_start(18): TX desc sync: sop_index %d", 716 sop_index)); 717 #endif 718 719 if ((ngathers == 1) || tx_ring_p->wr_index < i) { 720 (void) ddi_dma_sync(tx_desc_dma_handle, 721 sop_index * sizeof (tx_desc_t), 722 ngathers * sizeof (tx_desc_t), 723 DDI_DMA_SYNC_FORDEV); 724 725 NXGE_DEBUG_MSG((nxgep, TX_CTL, "nxge_start(19): sync 1 " 726 "cs_off = 0x%02X cs_s_off = 0x%02X " 727 "pkt_len %d ngathers %d sop_index %d\n", 728 stuff_offset, start_offset, 729 pkt_len, ngathers, sop_index)); 730 } else { /* more than one descriptor and wrap around */ 731 uint32_t nsdescs = tx_ring_p->tx_ring_size - sop_index; 732 (void) ddi_dma_sync(tx_desc_dma_handle, 733 sop_index * sizeof (tx_desc_t), 734 nsdescs * sizeof (tx_desc_t), 735 DDI_DMA_SYNC_FORDEV); 736 737 NXGE_DEBUG_MSG((nxgep, TX_CTL, "nxge_start(20): sync 1 " 738 "cs_off = 0x%02X cs_s_off = 0x%02X " 739 "pkt_len %d ngathers %d sop_index %d\n", 740 stuff_offset, start_offset, 741 pkt_len, ngathers, sop_index)); 742 743 (void) ddi_dma_sync(tx_desc_dma_handle, 744 0, 745 (ngathers - nsdescs) * sizeof (tx_desc_t), 746 DDI_DMA_SYNC_FORDEV); 747 NXGE_DEBUG_MSG((nxgep, TX_CTL, "nxge_start(21): sync 2 " 748 "cs_off = 0x%02X cs_s_off = 0x%02X " 749 "pkt_len %d ngathers %d sop_index %d\n", 750 stuff_offset, start_offset, 751 pkt_len, ngathers, sop_index)); 752 } 753 754 tail_index = tx_ring_p->wr_index; 755 tail_wrap = tx_ring_p->wr_index_wrap; 756 757 tx_ring_p->wr_index = i; 758 if (tx_ring_p->wr_index <= tail_index) { 759 tx_ring_p->wr_index_wrap = ((tail_wrap == B_TRUE) ? 760 B_FALSE : B_TRUE); 761 } 762 763 tx_ring_p->descs_pending += ngathers; 764 NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_start: TX kick: " 765 "channel %d wr_index %d wrap %d ngathers %d desc_pend %d", 766 tx_ring_p->tdc, 767 tx_ring_p->wr_index, 768 tx_ring_p->wr_index_wrap, 769 ngathers, 770 tx_ring_p->descs_pending)); 771 772 NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_start: TX KICKING: ")); 773 774 { 775 tx_ring_kick_t kick; 776 777 kick.value = 0; 778 kick.bits.ldw.wrap = tx_ring_p->wr_index_wrap; 779 kick.bits.ldw.tail = (uint16_t)tx_ring_p->wr_index; 780 781 /* Kick start the Transmit kick register */ 782 TXDMA_REG_WRITE64(NXGE_DEV_NPI_HANDLE(nxgep), 783 TX_RING_KICK_REG, 784 (uint8_t)tx_ring_p->tdc, 785 kick.value); 786 } 787 788 tdc_stats->tx_starts++; 789 790 MUTEX_EXIT(&tx_ring_p->lock); 791 792 NXGE_DEBUG_MSG((nxgep, TX_CTL, "<== nxge_start")); 793 794 return (status); 795 796 nxge_start_fail2: 797 if (good_packet == B_FALSE) { 798 cur_index = sop_index; 799 NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_start: clean up")); 800 for (i = 0; i < ngathers; i++) { 801 tx_desc_p = &tx_desc_ring_vp[cur_index]; 802 npi_handle.regp = (uint64_t)tx_desc_p; 803 tx_msg_p = &tx_msg_ring[cur_index]; 804 (void) npi_txdma_desc_set_zero(npi_handle, 1); 805 if (tx_msg_p->flags.dma_type == USE_DVMA) { 806 NXGE_DEBUG_MSG((nxgep, TX_CTL, 807 "tx_desc_p = %X index = %d", 808 tx_desc_p, tx_ring_p->rd_index)); 809 (void) dvma_unload( 810 tx_msg_p->dvma_handle, 811 0, -1); 812 tx_msg_p->dvma_handle = NULL; 813 if (tx_ring_p->dvma_wr_index == 814 tx_ring_p->dvma_wrap_mask) 815 tx_ring_p->dvma_wr_index = 0; 816 else 817 tx_ring_p->dvma_wr_index++; 818 tx_ring_p->dvma_pending--; 819 } else if (tx_msg_p->flags.dma_type == 820 USE_DMA) { 821 if (ddi_dma_unbind_handle( 822 tx_msg_p->dma_handle)) 823 cmn_err(CE_WARN, "!nxge_start: " 824 "ddi_dma_unbind_handle failed"); 825 } 826 tx_msg_p->flags.dma_type = USE_NONE; 827 cur_index = TXDMA_DESC_NEXT_INDEX(cur_index, 1, 828 tx_ring_p->tx_wrap_mask); 829 830 } 831 832 nxgep->resched_needed = B_TRUE; 833 } 834 835 MUTEX_EXIT(&tx_ring_p->lock); 836 837 nxge_start_fail1: 838 /* Add FMA to check the access handle nxge_hregh */ 839 840 NXGE_DEBUG_MSG((nxgep, TX_CTL, "<== nxge_start")); 841 842 return (status); 843 } 844 845 boolean_t 846 nxge_send(p_nxge_t nxgep, mblk_t *mp, p_mac_tx_hint_t hp) 847 { 848 p_tx_ring_t *tx_rings; 849 uint8_t ring_index; 850 851 NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_send")); 852 853 ASSERT(mp->b_next == NULL); 854 855 ring_index = nxge_tx_lb_ring_1(mp, nxgep->max_tdcs, hp); 856 tx_rings = nxgep->tx_rings->rings; 857 NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_tx_msg: tx_rings $%p", 858 tx_rings)); 859 NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_tx_msg: max_tdcs %d " 860 "ring_index %d", nxgep->max_tdcs, ring_index)); 861 862 if (nxge_start(nxgep, tx_rings[ring_index], mp)) { 863 NXGE_DEBUG_MSG((nxgep, TX_CTL, "<== nxge_send: failed " 864 "ring index %d", ring_index)); 865 return (B_FALSE); 866 } 867 868 NXGE_DEBUG_MSG((nxgep, TX_CTL, "<== nxge_send: ring index %d", 869 ring_index)); 870 871 return (B_TRUE); 872 } 873 874 875 /* 876 * nxge_m_tx() - send a chain of packets 877 */ 878 mblk_t * 879 nxge_m_tx(void *arg, mblk_t *mp) 880 { 881 p_nxge_t nxgep = (p_nxge_t)arg; 882 mblk_t *next; 883 mac_tx_hint_t hint; 884 885 if (!(nxgep->drv_state & STATE_HW_INITIALIZED)) { 886 NXGE_DEBUG_MSG((nxgep, DDI_CTL, 887 "==> nxge_m_tx: hardware not initialized")); 888 NXGE_DEBUG_MSG((nxgep, DDI_CTL, 889 "<== nxge_m_tx")); 890 return (mp); 891 } 892 893 hint.hash = NULL; 894 hint.vid = 0; 895 hint.sap = 0; 896 897 while (mp != NULL) { 898 next = mp->b_next; 899 mp->b_next = NULL; 900 901 /* 902 * Until Nemo tx resource works, the mac driver 903 * does the load balancing based on TCP port, 904 * or CPU. For debugging, we use a system 905 * configurable parameter. 906 */ 907 if (!nxge_send(nxgep, mp, &hint)) { 908 mp->b_next = next; 909 break; 910 } 911 912 mp = next; 913 } 914 915 return (mp); 916 } 917 918 int 919 nxge_tx_lb_ring_1(p_mblk_t mp, uint32_t maxtdcs, p_mac_tx_hint_t hp) 920 { 921 uint8_t ring_index = 0; 922 uint8_t *tcp_port; 923 p_mblk_t nmp; 924 size_t mblk_len; 925 size_t iph_len; 926 size_t hdrs_size; 927 uint8_t hdrs_buf[sizeof (struct ether_header) + 928 IP_MAX_HDR_LENGTH + sizeof (uint32_t)]; 929 /* 930 * allocate space big enough to cover 931 * the max ip header length and the first 932 * 4 bytes of the TCP/IP header. 933 */ 934 935 boolean_t qos = B_FALSE; 936 937 NXGE_DEBUG_MSG((NULL, TX_CTL, "==> nxge_tx_lb_ring")); 938 939 if (hp->vid) { 940 qos = B_TRUE; 941 } 942 switch (nxge_tx_lb_policy) { 943 case NXGE_TX_LB_TCPUDP: /* default IPv4 TCP/UDP */ 944 default: 945 tcp_port = mp->b_rptr; 946 if (!nxge_no_tx_lb && !qos && 947 (ntohs(((p_ether_header_t)tcp_port)->ether_type) 948 == ETHERTYPE_IP)) { 949 nmp = mp; 950 mblk_len = MBLKL(nmp); 951 tcp_port = NULL; 952 if (mblk_len > sizeof (struct ether_header) + 953 sizeof (uint8_t)) { 954 tcp_port = nmp->b_rptr + 955 sizeof (struct ether_header); 956 mblk_len -= sizeof (struct ether_header); 957 iph_len = ((*tcp_port) & 0x0f) << 2; 958 if (mblk_len > (iph_len + sizeof (uint32_t))) { 959 tcp_port = nmp->b_rptr; 960 } else { 961 tcp_port = NULL; 962 } 963 } 964 if (tcp_port == NULL) { 965 hdrs_size = 0; 966 ((p_ether_header_t)hdrs_buf)->ether_type = 0; 967 while ((nmp) && (hdrs_size < 968 sizeof (hdrs_buf))) { 969 mblk_len = MBLKL(nmp); 970 if (mblk_len >= 971 (sizeof (hdrs_buf) - hdrs_size)) 972 mblk_len = sizeof (hdrs_buf) - 973 hdrs_size; 974 bcopy(nmp->b_rptr, 975 &hdrs_buf[hdrs_size], mblk_len); 976 hdrs_size += mblk_len; 977 nmp = nmp->b_cont; 978 } 979 tcp_port = hdrs_buf; 980 } 981 tcp_port += sizeof (ether_header_t); 982 if (!(tcp_port[6] & 0x3f) && !(tcp_port[7] & 0xff)) { 983 if ((tcp_port[9] == IPPROTO_TCP) || 984 (tcp_port[9] == IPPROTO_UDP)) { 985 tcp_port += ((*tcp_port) & 0x0f) << 2; 986 ring_index = 987 ((tcp_port[1] ^ tcp_port[3]) 988 % maxtdcs); 989 } else { 990 ring_index = tcp_port[19] % maxtdcs; 991 } 992 } else { /* fragmented packet */ 993 ring_index = tcp_port[19] % maxtdcs; 994 } 995 } else { 996 ring_index = mp->b_band % maxtdcs; 997 } 998 break; 999 1000 case NXGE_TX_LB_HASH: 1001 if (hp->hash) { 1002 ring_index = ((uint64_t)(hp->hash) % maxtdcs); 1003 } else { 1004 ring_index = mp->b_band % maxtdcs; 1005 } 1006 break; 1007 1008 case NXGE_TX_LB_DEST_MAC: /* Use destination MAC address */ 1009 tcp_port = mp->b_rptr; 1010 ring_index = tcp_port[5] % maxtdcs; 1011 break; 1012 } 1013 1014 NXGE_DEBUG_MSG((NULL, TX_CTL, "<== nxge_tx_lb_ring")); 1015 1016 return (ring_index); 1017 } 1018 1019 uint_t 1020 nxge_reschedule(caddr_t arg) 1021 { 1022 p_nxge_t nxgep; 1023 1024 nxgep = (p_nxge_t)arg; 1025 1026 NXGE_DEBUG_MSG((nxgep, TX_CTL, "==> nxge_reschedule")); 1027 1028 if (nxgep->nxge_mac_state == NXGE_MAC_STARTED && 1029 nxgep->resched_needed) { 1030 mac_tx_update(nxgep->mach); 1031 nxgep->resched_needed = B_FALSE; 1032 nxgep->resched_running = B_FALSE; 1033 } 1034 1035 NXGE_DEBUG_MSG((NULL, TX_CTL, "<== nxge_reschedule")); 1036 return (DDI_INTR_CLAIMED); 1037 } 1038