1*b494511aSVenki Rajagopalan /*
2*b494511aSVenki Rajagopalan * CDDL HEADER START
3*b494511aSVenki Rajagopalan *
4*b494511aSVenki Rajagopalan * The contents of this file are subject to the terms of the
5*b494511aSVenki Rajagopalan * Common Development and Distribution License (the "License").
6*b494511aSVenki Rajagopalan * You may not use this file except in compliance with the License.
7*b494511aSVenki Rajagopalan *
8*b494511aSVenki Rajagopalan * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9*b494511aSVenki Rajagopalan * or http://www.opensolaris.org/os/licensing.
10*b494511aSVenki Rajagopalan * See the License for the specific language governing permissions
11*b494511aSVenki Rajagopalan * and limitations under the License.
12*b494511aSVenki Rajagopalan *
13*b494511aSVenki Rajagopalan * When distributing Covered Code, include this CDDL HEADER in each
14*b494511aSVenki Rajagopalan * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15*b494511aSVenki Rajagopalan * If applicable, add the following below this CDDL HEADER, with the
16*b494511aSVenki Rajagopalan * fields enclosed by brackets "[]" replaced with your own identifying
17*b494511aSVenki Rajagopalan * information: Portions Copyright [yyyy] [name of copyright owner]
18*b494511aSVenki Rajagopalan *
19*b494511aSVenki Rajagopalan * CDDL HEADER END
20*b494511aSVenki Rajagopalan */
21*b494511aSVenki Rajagopalan
22*b494511aSVenki Rajagopalan /*
23*b494511aSVenki Rajagopalan * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
24*b494511aSVenki Rajagopalan */
25*b494511aSVenki Rajagopalan
26*b494511aSVenki Rajagopalan #include <sys/types.h>
27*b494511aSVenki Rajagopalan #include <sys/kmem.h>
28*b494511aSVenki Rajagopalan #include <sys/conf.h>
29*b494511aSVenki Rajagopalan #include <sys/ddi.h>
30*b494511aSVenki Rajagopalan #include <sys/sunddi.h>
31*b494511aSVenki Rajagopalan #include <sys/ksynch.h>
32*b494511aSVenki Rajagopalan #include <sys/pattr.h> /* HCK_* */
33*b494511aSVenki Rajagopalan #include <inet/ip.h> /* ipha_t */
34*b494511aSVenki Rajagopalan #include <inet/tcp.h> /* tcph_t */
35*b494511aSVenki Rajagopalan #include <sys/mac_provider.h> /* mac_* */
36*b494511aSVenki Rajagopalan #include <sys/strsun.h> /* MBLKL */
37*b494511aSVenki Rajagopalan
38*b494511aSVenki Rajagopalan #include <sys/ib/clients/eoib/eib_impl.h>
39*b494511aSVenki Rajagopalan
40*b494511aSVenki Rajagopalan /*
41*b494511aSVenki Rajagopalan * Declarations private to this file
42*b494511aSVenki Rajagopalan */
43*b494511aSVenki Rajagopalan static int eib_data_setup_cqs(eib_t *, eib_vnic_t *);
44*b494511aSVenki Rajagopalan static int eib_data_setup_ud_channel(eib_t *, eib_vnic_t *);
45*b494511aSVenki Rajagopalan static void eib_data_setup_lso(eib_wqe_t *, mblk_t *, uint32_t,
46*b494511aSVenki Rajagopalan eib_ether_hdr_t *);
47*b494511aSVenki Rajagopalan static int eib_data_prepare_sgl(eib_vnic_t *, eib_wqe_t *, mblk_t *);
48*b494511aSVenki Rajagopalan static int eib_data_is_mcast_pkt_ok(eib_vnic_t *, uint8_t *, uint64_t *,
49*b494511aSVenki Rajagopalan uint64_t *);
50*b494511aSVenki Rajagopalan static void eib_data_rx_comp_intr(ibt_cq_hdl_t, void *);
51*b494511aSVenki Rajagopalan static void eib_data_tx_comp_intr(ibt_cq_hdl_t, void *);
52*b494511aSVenki Rajagopalan static mblk_t *eib_data_rx_comp(eib_vnic_t *, eib_wqe_t *, ibt_wc_t *);
53*b494511aSVenki Rajagopalan static void eib_data_tx_comp(eib_vnic_t *, eib_wqe_t *, eib_chan_t *);
54*b494511aSVenki Rajagopalan static void eib_data_err_comp(eib_vnic_t *, eib_wqe_t *, ibt_wc_t *);
55*b494511aSVenki Rajagopalan static void eib_rb_data_setup_cqs(eib_t *, eib_vnic_t *);
56*b494511aSVenki Rajagopalan static void eib_rb_data_setup_ud_channel(eib_t *, eib_vnic_t *);
57*b494511aSVenki Rajagopalan
58*b494511aSVenki Rajagopalan
59*b494511aSVenki Rajagopalan int
eib_data_create_qp(eib_t * ss,eib_vnic_t * vnic,int * err)60*b494511aSVenki Rajagopalan eib_data_create_qp(eib_t *ss, eib_vnic_t *vnic, int *err)
61*b494511aSVenki Rajagopalan {
62*b494511aSVenki Rajagopalan eib_chan_t *chan = NULL;
63*b494511aSVenki Rajagopalan
64*b494511aSVenki Rajagopalan /*
65*b494511aSVenki Rajagopalan * Allocate a eib_chan_t to store stuff about this vnic's data qp
66*b494511aSVenki Rajagopalan * and initialize it with default admin qp pkey parameters. We'll
67*b494511aSVenki Rajagopalan * re-associate this with the pkey we receive from the gw once we
68*b494511aSVenki Rajagopalan * receive the login ack.
69*b494511aSVenki Rajagopalan */
70*b494511aSVenki Rajagopalan vnic->vn_data_chan = eib_chan_init();
71*b494511aSVenki Rajagopalan
72*b494511aSVenki Rajagopalan chan = vnic->vn_data_chan;
73*b494511aSVenki Rajagopalan chan->ch_pkey = ss->ei_admin_chan->ch_pkey;
74*b494511aSVenki Rajagopalan chan->ch_pkey_ix = ss->ei_admin_chan->ch_pkey_ix;
75*b494511aSVenki Rajagopalan chan->ch_vnic_inst = vnic->vn_instance;
76*b494511aSVenki Rajagopalan
77*b494511aSVenki Rajagopalan /*
78*b494511aSVenki Rajagopalan * Setup tx/rx CQs and completion handlers
79*b494511aSVenki Rajagopalan */
80*b494511aSVenki Rajagopalan if (eib_data_setup_cqs(ss, vnic) != EIB_E_SUCCESS) {
81*b494511aSVenki Rajagopalan EIB_DPRINTF_ERR(ss->ei_instance, "eib_data_create_qp: "
82*b494511aSVenki Rajagopalan "eib_data_setup_cqs(vn_inst=0x%x) failed",
83*b494511aSVenki Rajagopalan vnic->vn_instance);
84*b494511aSVenki Rajagopalan *err = ENOMEM;
85*b494511aSVenki Rajagopalan goto data_create_qp_fail;
86*b494511aSVenki Rajagopalan }
87*b494511aSVenki Rajagopalan
88*b494511aSVenki Rajagopalan /*
89*b494511aSVenki Rajagopalan * Setup UD channel
90*b494511aSVenki Rajagopalan */
91*b494511aSVenki Rajagopalan if (eib_data_setup_ud_channel(ss, vnic) != EIB_E_SUCCESS) {
92*b494511aSVenki Rajagopalan EIB_DPRINTF_ERR(ss->ei_instance, "eib_data_create_qp: "
93*b494511aSVenki Rajagopalan "eib_data_setup_ud_channel(vn_inst=0x%x) failed",
94*b494511aSVenki Rajagopalan vnic->vn_instance);
95*b494511aSVenki Rajagopalan *err = ENOMEM;
96*b494511aSVenki Rajagopalan goto data_create_qp_fail;
97*b494511aSVenki Rajagopalan }
98*b494511aSVenki Rajagopalan
99*b494511aSVenki Rajagopalan return (EIB_E_SUCCESS);
100*b494511aSVenki Rajagopalan
101*b494511aSVenki Rajagopalan data_create_qp_fail:
102*b494511aSVenki Rajagopalan eib_rb_data_create_qp(ss, vnic);
103*b494511aSVenki Rajagopalan return (EIB_E_FAILURE);
104*b494511aSVenki Rajagopalan }
105*b494511aSVenki Rajagopalan
106*b494511aSVenki Rajagopalan /*ARGSUSED*/
107*b494511aSVenki Rajagopalan uint_t
eib_data_rx_comp_handler(caddr_t arg1,caddr_t arg2)108*b494511aSVenki Rajagopalan eib_data_rx_comp_handler(caddr_t arg1, caddr_t arg2)
109*b494511aSVenki Rajagopalan {
110*b494511aSVenki Rajagopalan eib_vnic_t *vnic = (eib_vnic_t *)(void *)arg1;
111*b494511aSVenki Rajagopalan eib_t *ss = vnic->vn_ss;
112*b494511aSVenki Rajagopalan eib_chan_t *chan = vnic->vn_data_chan;
113*b494511aSVenki Rajagopalan eib_stats_t *stats = ss->ei_stats;
114*b494511aSVenki Rajagopalan ibt_wc_t *wc;
115*b494511aSVenki Rajagopalan eib_wqe_t *wqe;
116*b494511aSVenki Rajagopalan mblk_t *mp;
117*b494511aSVenki Rajagopalan mblk_t *head = NULL;
118*b494511aSVenki Rajagopalan mblk_t *tail = NULL;
119*b494511aSVenki Rajagopalan ibt_status_t ret;
120*b494511aSVenki Rajagopalan uint_t pkts_per_call = 0;
121*b494511aSVenki Rajagopalan uint_t polled;
122*b494511aSVenki Rajagopalan uint_t rbytes;
123*b494511aSVenki Rajagopalan uint_t ipkts;
124*b494511aSVenki Rajagopalan uint_t num_wc;
125*b494511aSVenki Rajagopalan int i;
126*b494511aSVenki Rajagopalan
127*b494511aSVenki Rajagopalan /*
128*b494511aSVenki Rajagopalan * Re-arm the rx notification callback before we start polling
129*b494511aSVenki Rajagopalan * the completion queue. There's nothing much we can do if the
130*b494511aSVenki Rajagopalan * enable_cq_notify fails - we issue a warning and move on.
131*b494511aSVenki Rajagopalan */
132*b494511aSVenki Rajagopalan ret = ibt_enable_cq_notify(chan->ch_rcv_cq_hdl, IBT_NEXT_COMPLETION);
133*b494511aSVenki Rajagopalan if (ret != IBT_SUCCESS) {
134*b494511aSVenki Rajagopalan EIB_DPRINTF_WARN(ss->ei_instance, "eib_data_rx_comp_handler: "
135*b494511aSVenki Rajagopalan "ibt_enable_cq_notify() failed, ret=%d", ret);
136*b494511aSVenki Rajagopalan }
137*b494511aSVenki Rajagopalan
138*b494511aSVenki Rajagopalan /*
139*b494511aSVenki Rajagopalan * We don't want to be stuck in receive processing for too long without
140*b494511aSVenki Rajagopalan * giving others a chance.
141*b494511aSVenki Rajagopalan */
142*b494511aSVenki Rajagopalan num_wc = (chan->ch_rcv_cq_sz < EIB_MAX_RX_PKTS_ONINTR) ?
143*b494511aSVenki Rajagopalan chan->ch_rcv_cq_sz : EIB_MAX_RX_PKTS_ONINTR;
144*b494511aSVenki Rajagopalan
145*b494511aSVenki Rajagopalan /*
146*b494511aSVenki Rajagopalan * Handle rx completions
147*b494511aSVenki Rajagopalan */
148*b494511aSVenki Rajagopalan while ((ret = ibt_poll_cq(chan->ch_rcv_cq_hdl, chan->ch_rcv_wc,
149*b494511aSVenki Rajagopalan num_wc, &polled)) == IBT_SUCCESS) {
150*b494511aSVenki Rajagopalan
151*b494511aSVenki Rajagopalan rbytes = ipkts = 0;
152*b494511aSVenki Rajagopalan head = tail = NULL;
153*b494511aSVenki Rajagopalan
154*b494511aSVenki Rajagopalan for (wc = chan->ch_rcv_wc, i = 0; i < polled; i++, wc++) {
155*b494511aSVenki Rajagopalan wqe = (eib_wqe_t *)(uintptr_t)wc->wc_id;
156*b494511aSVenki Rajagopalan
157*b494511aSVenki Rajagopalan ASSERT(EIB_WQE_TYPE(wqe->qe_info) == EIB_WQE_RX);
158*b494511aSVenki Rajagopalan
159*b494511aSVenki Rajagopalan /*
160*b494511aSVenki Rajagopalan * Clear the posted-to-hca flag and reduce the number
161*b494511aSVenki Rajagopalan * of posted-rwqes count
162*b494511aSVenki Rajagopalan */
163*b494511aSVenki Rajagopalan wqe->qe_info &= (~EIB_WQE_FLG_POSTED_TO_HCA);
164*b494511aSVenki Rajagopalan eib_rsrc_decr_posted_rwqe(ss, chan);
165*b494511aSVenki Rajagopalan
166*b494511aSVenki Rajagopalan rbytes += wc->wc_bytes_xfer;
167*b494511aSVenki Rajagopalan if (wc->wc_status != IBT_WC_SUCCESS) {
168*b494511aSVenki Rajagopalan EIB_INCR_COUNTER(&stats->st_ierrors);
169*b494511aSVenki Rajagopalan eib_data_err_comp(vnic, wqe, wc);
170*b494511aSVenki Rajagopalan } else {
171*b494511aSVenki Rajagopalan ipkts++;
172*b494511aSVenki Rajagopalan mp = eib_data_rx_comp(vnic, wqe, wc);
173*b494511aSVenki Rajagopalan if (mp == NULL) {
174*b494511aSVenki Rajagopalan continue;
175*b494511aSVenki Rajagopalan } else {
176*b494511aSVenki Rajagopalan /*
177*b494511aSVenki Rajagopalan * Add this mp to the list to
178*b494511aSVenki Rajagopalan * send it to the nw layer. Note
179*b494511aSVenki Rajagopalan * that the wqe could've been
180*b494511aSVenki Rajagopalan * returned to the pool if we're
181*b494511aSVenki Rajagopalan * running low, so don't process
182*b494511aSVenki Rajagopalan * wqe after this point.
183*b494511aSVenki Rajagopalan */
184*b494511aSVenki Rajagopalan if (head)
185*b494511aSVenki Rajagopalan tail->b_next = mp;
186*b494511aSVenki Rajagopalan else
187*b494511aSVenki Rajagopalan head = mp;
188*b494511aSVenki Rajagopalan tail = mp;
189*b494511aSVenki Rajagopalan }
190*b494511aSVenki Rajagopalan }
191*b494511aSVenki Rajagopalan }
192*b494511aSVenki Rajagopalan
193*b494511aSVenki Rajagopalan /*
194*b494511aSVenki Rajagopalan * We reduce the number of atomic updates to key statistics
195*b494511aSVenki Rajagopalan * by pooling them here, once per ibt_poll_cq(). The accuracy
196*b494511aSVenki Rajagopalan * and consistency of the published statistics within a cq
197*b494511aSVenki Rajagopalan * polling cycle will be compromised a little bit, but that
198*b494511aSVenki Rajagopalan * should be ok, given that we probably gain a little bit by
199*b494511aSVenki Rajagopalan * not having to do these atomic operations per packet.
200*b494511aSVenki Rajagopalan */
201*b494511aSVenki Rajagopalan EIB_UPDATE_COUNTER(&stats->st_rbytes, rbytes);
202*b494511aSVenki Rajagopalan EIB_UPDATE_COUNTER(&stats->st_ipkts, ipkts);
203*b494511aSVenki Rajagopalan
204*b494511aSVenki Rajagopalan pkts_per_call += ipkts;
205*b494511aSVenki Rajagopalan
206*b494511aSVenki Rajagopalan if (head) {
207*b494511aSVenki Rajagopalan mac_rx(ss->ei_mac_hdl, NULL, head);
208*b494511aSVenki Rajagopalan }
209*b494511aSVenki Rajagopalan
210*b494511aSVenki Rajagopalan /*
211*b494511aSVenki Rajagopalan * If we have processed too many packets in one attempt, we'll
212*b494511aSVenki Rajagopalan * have to come back here later.
213*b494511aSVenki Rajagopalan */
214*b494511aSVenki Rajagopalan if (pkts_per_call >= EIB_MAX_RX_PKTS_ONINTR) {
215*b494511aSVenki Rajagopalan (void) ddi_intr_trigger_softint(vnic->vn_data_rx_si_hdl,
216*b494511aSVenki Rajagopalan NULL);
217*b494511aSVenki Rajagopalan break;
218*b494511aSVenki Rajagopalan }
219*b494511aSVenki Rajagopalan
220*b494511aSVenki Rajagopalan num_wc -= polled;
221*b494511aSVenki Rajagopalan }
222*b494511aSVenki Rajagopalan
223*b494511aSVenki Rajagopalan return (DDI_INTR_CLAIMED);
224*b494511aSVenki Rajagopalan }
225*b494511aSVenki Rajagopalan
226*b494511aSVenki Rajagopalan /*ARGSUSED*/
227*b494511aSVenki Rajagopalan uint_t
eib_data_tx_comp_handler(caddr_t arg1,caddr_t arg2)228*b494511aSVenki Rajagopalan eib_data_tx_comp_handler(caddr_t arg1, caddr_t arg2)
229*b494511aSVenki Rajagopalan {
230*b494511aSVenki Rajagopalan eib_vnic_t *vnic = (eib_vnic_t *)(void *)arg1;
231*b494511aSVenki Rajagopalan eib_t *ss = vnic->vn_ss;
232*b494511aSVenki Rajagopalan eib_chan_t *chan = vnic->vn_data_chan;
233*b494511aSVenki Rajagopalan eib_stats_t *stats = ss->ei_stats;
234*b494511aSVenki Rajagopalan ibt_wc_t *wc;
235*b494511aSVenki Rajagopalan eib_wqe_t *wqe;
236*b494511aSVenki Rajagopalan ibt_status_t ret;
237*b494511aSVenki Rajagopalan uint_t polled;
238*b494511aSVenki Rajagopalan int i;
239*b494511aSVenki Rajagopalan
240*b494511aSVenki Rajagopalan /*
241*b494511aSVenki Rajagopalan * Re-arm the tx notification callback before we start polling
242*b494511aSVenki Rajagopalan * the completion queue. There's nothing much we can do if the
243*b494511aSVenki Rajagopalan * enable_cq_notify fails - we issue a warning and move on.
244*b494511aSVenki Rajagopalan */
245*b494511aSVenki Rajagopalan ret = ibt_enable_cq_notify(chan->ch_cq_hdl, IBT_NEXT_COMPLETION);
246*b494511aSVenki Rajagopalan if (ret != IBT_SUCCESS) {
247*b494511aSVenki Rajagopalan EIB_DPRINTF_WARN(ss->ei_instance, "eib_data_tx_comp_handler: "
248*b494511aSVenki Rajagopalan "ibt_enable_cq_notify() failed, ret=%d", ret);
249*b494511aSVenki Rajagopalan }
250*b494511aSVenki Rajagopalan
251*b494511aSVenki Rajagopalan /*
252*b494511aSVenki Rajagopalan * Handle tx completions
253*b494511aSVenki Rajagopalan */
254*b494511aSVenki Rajagopalan while ((ret = ibt_poll_cq(chan->ch_cq_hdl, chan->ch_wc, chan->ch_cq_sz,
255*b494511aSVenki Rajagopalan &polled)) == IBT_SUCCESS) {
256*b494511aSVenki Rajagopalan for (wc = chan->ch_wc, i = 0; i < polled; i++, wc++) {
257*b494511aSVenki Rajagopalan wqe = (eib_wqe_t *)(uintptr_t)wc->wc_id;
258*b494511aSVenki Rajagopalan
259*b494511aSVenki Rajagopalan ASSERT(EIB_WQE_TYPE(wqe->qe_info) == EIB_WQE_TX);
260*b494511aSVenki Rajagopalan
261*b494511aSVenki Rajagopalan if (wc->wc_status != IBT_WC_SUCCESS) {
262*b494511aSVenki Rajagopalan EIB_INCR_COUNTER(&stats->st_oerrors);
263*b494511aSVenki Rajagopalan eib_data_err_comp(vnic, wqe, wc);
264*b494511aSVenki Rajagopalan } else {
265*b494511aSVenki Rajagopalan eib_data_tx_comp(vnic, wqe, vnic->vn_data_chan);
266*b494511aSVenki Rajagopalan }
267*b494511aSVenki Rajagopalan }
268*b494511aSVenki Rajagopalan }
269*b494511aSVenki Rajagopalan
270*b494511aSVenki Rajagopalan return (DDI_INTR_CLAIMED);
271*b494511aSVenki Rajagopalan }
272*b494511aSVenki Rajagopalan
273*b494511aSVenki Rajagopalan void
eib_data_rx_recycle(caddr_t arg)274*b494511aSVenki Rajagopalan eib_data_rx_recycle(caddr_t arg)
275*b494511aSVenki Rajagopalan {
276*b494511aSVenki Rajagopalan eib_wqe_t *rwqe = (eib_wqe_t *)(void *)arg;
277*b494511aSVenki Rajagopalan eib_t *ss = rwqe->qe_pool->wp_ss;
278*b494511aSVenki Rajagopalan eib_chan_t *vn_chan;
279*b494511aSVenki Rajagopalan uint_t nic_state;
280*b494511aSVenki Rajagopalan int ret;
281*b494511aSVenki Rajagopalan
282*b494511aSVenki Rajagopalan /*
283*b494511aSVenki Rajagopalan * We come here from three places - (a) from the nw layer if the
284*b494511aSVenki Rajagopalan * rx mblk we handed to it has been done with and the nw layer is
285*b494511aSVenki Rajagopalan * calling the freemsg() (b) from eib_data_rx_comp() if the rx
286*b494511aSVenki Rajagopalan * completion processing discovers that the received EoIB packet
287*b494511aSVenki Rajagopalan * has a problem and (c) from eib_data_err_comp() if we're tearing
288*b494511aSVenki Rajagopalan * down this channel. We only need to repost the rwqe if we're
289*b494511aSVenki Rajagopalan * being called back from the nw layer. For the other two cases,
290*b494511aSVenki Rajagopalan * we'll simply return the rwqe to the pool. Also, since we would've
291*b494511aSVenki Rajagopalan * already updated the ch_rx_posted counters in the rx completion
292*b494511aSVenki Rajagopalan * handler, we don't pass the chan pointer to eib_rsrc_return_rwqe
293*b494511aSVenki Rajagopalan * from within this routine.
294*b494511aSVenki Rajagopalan */
295*b494511aSVenki Rajagopalan rwqe->qe_mp = NULL;
296*b494511aSVenki Rajagopalan if ((rwqe->qe_info & EIB_WQE_FLG_WITH_NW) == 0) {
297*b494511aSVenki Rajagopalan eib_rsrc_return_rwqe(ss, rwqe, NULL);
298*b494511aSVenki Rajagopalan return;
299*b494511aSVenki Rajagopalan }
300*b494511aSVenki Rajagopalan
301*b494511aSVenki Rajagopalan rwqe->qe_info &= (~EIB_WQE_FLG_WITH_NW);
302*b494511aSVenki Rajagopalan
303*b494511aSVenki Rajagopalan /*
304*b494511aSVenki Rajagopalan * If the buffers are being returned by nw layer after a long
305*b494511aSVenki Rajagopalan * time, this eoib instance could've even been stopped by now.
306*b494511aSVenki Rajagopalan * If so, simply return the rwqe to the pool.
307*b494511aSVenki Rajagopalan */
308*b494511aSVenki Rajagopalan nic_state = eib_mac_get_nic_state(ss);
309*b494511aSVenki Rajagopalan if ((nic_state & EIB_NIC_STARTED) != EIB_NIC_STARTED) {
310*b494511aSVenki Rajagopalan eib_rsrc_return_rwqe(ss, rwqe, NULL);
311*b494511aSVenki Rajagopalan return;
312*b494511aSVenki Rajagopalan }
313*b494511aSVenki Rajagopalan
314*b494511aSVenki Rajagopalan /*
315*b494511aSVenki Rajagopalan * Or it could've taken even longer, and the nic has even been
316*b494511aSVenki Rajagopalan * restarted. Only thing we can do is to make sure that the
317*b494511aSVenki Rajagopalan * original channel pointer we passed corresponds to what's in
318*b494511aSVenki Rajagopalan * the instance of the vnic currently.
319*b494511aSVenki Rajagopalan */
320*b494511aSVenki Rajagopalan vn_chan = eib_vnic_get_data_chan(ss, rwqe->qe_vnic_inst);
321*b494511aSVenki Rajagopalan if (vn_chan == NULL || vn_chan != rwqe->qe_chan) {
322*b494511aSVenki Rajagopalan eib_rsrc_return_rwqe(ss, rwqe, NULL);
323*b494511aSVenki Rajagopalan return;
324*b494511aSVenki Rajagopalan }
325*b494511aSVenki Rajagopalan
326*b494511aSVenki Rajagopalan /*
327*b494511aSVenki Rajagopalan * Try to repost the rwqe if we're not tearing down this channel
328*b494511aSVenki Rajagopalan */
329*b494511aSVenki Rajagopalan if (vn_chan->ch_tear_down) {
330*b494511aSVenki Rajagopalan eib_rsrc_return_rwqe(ss, rwqe, NULL);
331*b494511aSVenki Rajagopalan } else {
332*b494511aSVenki Rajagopalan ret = eib_chan_post_recv(ss, vn_chan, rwqe);
333*b494511aSVenki Rajagopalan if (ret != EIB_E_SUCCESS) {
334*b494511aSVenki Rajagopalan if (rwqe->qe_mp)
335*b494511aSVenki Rajagopalan freemsg(rwqe->qe_mp);
336*b494511aSVenki Rajagopalan else
337*b494511aSVenki Rajagopalan eib_rsrc_return_rwqe(ss, rwqe, NULL);
338*b494511aSVenki Rajagopalan }
339*b494511aSVenki Rajagopalan }
340*b494511aSVenki Rajagopalan }
341*b494511aSVenki Rajagopalan
342*b494511aSVenki Rajagopalan void
eib_data_post_tx(eib_vnic_t * vnic,eib_wqe_t * swqe)343*b494511aSVenki Rajagopalan eib_data_post_tx(eib_vnic_t *vnic, eib_wqe_t *swqe)
344*b494511aSVenki Rajagopalan {
345*b494511aSVenki Rajagopalan eib_chan_t *chan = vnic->vn_data_chan;
346*b494511aSVenki Rajagopalan eib_t *ss = vnic->vn_ss;
347*b494511aSVenki Rajagopalan eib_stats_t *stats = vnic->vn_ss->ei_stats;
348*b494511aSVenki Rajagopalan ibt_send_wr_t wrs[EIB_MAX_POST_MULTIPLE];
349*b494511aSVenki Rajagopalan eib_wqe_t *wqes[EIB_MAX_POST_MULTIPLE];
350*b494511aSVenki Rajagopalan eib_wqe_t *elem;
351*b494511aSVenki Rajagopalan ibt_status_t ret;
352*b494511aSVenki Rajagopalan uint_t n_wrs;
353*b494511aSVenki Rajagopalan uint_t n_posted;
354*b494511aSVenki Rajagopalan uint_t total_failed = 0;
355*b494511aSVenki Rajagopalan uint_t n_failed = 0;
356*b494511aSVenki Rajagopalan uint_t i;
357*b494511aSVenki Rajagopalan
358*b494511aSVenki Rajagopalan /*
359*b494511aSVenki Rajagopalan * See if we have room for this wqe and then add it to the
360*b494511aSVenki Rajagopalan * list of tx wrs to post in this channel.
361*b494511aSVenki Rajagopalan */
362*b494511aSVenki Rajagopalan mutex_enter(&chan->ch_tx_lock);
363*b494511aSVenki Rajagopalan
364*b494511aSVenki Rajagopalan if ((chan->ch_tx_posted + 1) >= (chan->ch_max_swqes - 1)) {
365*b494511aSVenki Rajagopalan EIB_DPRINTF_WARN(ss->ei_instance, "eib_data_post_tx: "
366*b494511aSVenki Rajagopalan "too many swqes posted already, posted=0x%lx, "
367*b494511aSVenki Rajagopalan "max=0x%lx", chan->ch_tx_posted, chan->ch_max_swqes);
368*b494511aSVenki Rajagopalan mutex_exit(&chan->ch_tx_lock);
369*b494511aSVenki Rajagopalan return;
370*b494511aSVenki Rajagopalan }
371*b494511aSVenki Rajagopalan
372*b494511aSVenki Rajagopalan swqe->qe_nxt_post = NULL;
373*b494511aSVenki Rajagopalan if (chan->ch_tx) {
374*b494511aSVenki Rajagopalan chan->ch_tx_tail->qe_nxt_post = swqe;
375*b494511aSVenki Rajagopalan } else {
376*b494511aSVenki Rajagopalan chan->ch_tx = swqe;
377*b494511aSVenki Rajagopalan }
378*b494511aSVenki Rajagopalan chan->ch_tx_tail = swqe;
379*b494511aSVenki Rajagopalan chan->ch_tx_posted++; /* pre-increment */
380*b494511aSVenki Rajagopalan
381*b494511aSVenki Rajagopalan /*
382*b494511aSVenki Rajagopalan * If someone's already posting tx wqes in this channel, let
383*b494511aSVenki Rajagopalan * them post ours as well.
384*b494511aSVenki Rajagopalan */
385*b494511aSVenki Rajagopalan if (chan->ch_tx_busy == B_TRUE) {
386*b494511aSVenki Rajagopalan mutex_exit(&chan->ch_tx_lock);
387*b494511aSVenki Rajagopalan return;
388*b494511aSVenki Rajagopalan }
389*b494511aSVenki Rajagopalan chan->ch_tx_busy = B_TRUE;
390*b494511aSVenki Rajagopalan
391*b494511aSVenki Rajagopalan while (chan->ch_tx) {
392*b494511aSVenki Rajagopalan /*
393*b494511aSVenki Rajagopalan * Post EIB_MAX_POST_MULTIPLE wrs at a time
394*b494511aSVenki Rajagopalan */
395*b494511aSVenki Rajagopalan for (n_wrs = 0, elem = chan->ch_tx;
396*b494511aSVenki Rajagopalan (elem) && (n_wrs < EIB_MAX_POST_MULTIPLE);
397*b494511aSVenki Rajagopalan elem = elem->qe_nxt_post, n_wrs++) {
398*b494511aSVenki Rajagopalan wqes[n_wrs] = elem;
399*b494511aSVenki Rajagopalan wrs[n_wrs] = (elem->qe_wr).send;
400*b494511aSVenki Rajagopalan }
401*b494511aSVenki Rajagopalan chan->ch_tx = elem;
402*b494511aSVenki Rajagopalan if (elem == NULL) {
403*b494511aSVenki Rajagopalan chan->ch_tx_tail = NULL;
404*b494511aSVenki Rajagopalan }
405*b494511aSVenki Rajagopalan mutex_exit(&chan->ch_tx_lock);
406*b494511aSVenki Rajagopalan
407*b494511aSVenki Rajagopalan ASSERT(n_wrs != 0);
408*b494511aSVenki Rajagopalan
409*b494511aSVenki Rajagopalan /*
410*b494511aSVenki Rajagopalan * If multiple wrs posting fails for some reason, we'll try
411*b494511aSVenki Rajagopalan * posting the unposted ones one by one. If even that fails,
412*b494511aSVenki Rajagopalan * we'll release any mappings/buffers/mblks associated with
413*b494511aSVenki Rajagopalan * this wqe and return it to the pool.
414*b494511aSVenki Rajagopalan */
415*b494511aSVenki Rajagopalan n_posted = n_failed = 0;
416*b494511aSVenki Rajagopalan ret = ibt_post_send(chan->ch_chan, wrs, n_wrs, &n_posted);
417*b494511aSVenki Rajagopalan if (ret != IBT_SUCCESS) {
418*b494511aSVenki Rajagopalan EIB_DPRINTF_WARN(ss->ei_instance, "eib_data_post_tx: "
419*b494511aSVenki Rajagopalan "ibt_post_send(n_wrs=0x%lx, n_posted=0x%lx) "
420*b494511aSVenki Rajagopalan "failed, ret=%d", n_wrs, n_posted, ret);
421*b494511aSVenki Rajagopalan
422*b494511aSVenki Rajagopalan for (i = n_posted; i < n_wrs; i++) {
423*b494511aSVenki Rajagopalan ret = ibt_post_send(chan->ch_chan, &wrs[i],
424*b494511aSVenki Rajagopalan 1, NULL);
425*b494511aSVenki Rajagopalan if (ret != IBT_SUCCESS) {
426*b494511aSVenki Rajagopalan n_failed++;
427*b494511aSVenki Rajagopalan eib_data_tx_comp(vnic, wqes[i], chan);
428*b494511aSVenki Rajagopalan
429*b494511aSVenki Rajagopalan EIB_DPRINTF_WARN(ss->ei_instance,
430*b494511aSVenki Rajagopalan "eib_data_post_tx: "
431*b494511aSVenki Rajagopalan "ibt_post_send(n_wrs=1) failed, "
432*b494511aSVenki Rajagopalan "ret=%d", ret);
433*b494511aSVenki Rajagopalan }
434*b494511aSVenki Rajagopalan }
435*b494511aSVenki Rajagopalan }
436*b494511aSVenki Rajagopalan total_failed += n_failed;
437*b494511aSVenki Rajagopalan
438*b494511aSVenki Rajagopalan mutex_enter(&chan->ch_tx_lock);
439*b494511aSVenki Rajagopalan }
440*b494511aSVenki Rajagopalan
441*b494511aSVenki Rajagopalan chan->ch_tx_busy = B_FALSE;
442*b494511aSVenki Rajagopalan mutex_exit(&chan->ch_tx_lock);
443*b494511aSVenki Rajagopalan
444*b494511aSVenki Rajagopalan /*
445*b494511aSVenki Rajagopalan * If we failed to post something, update error stats
446*b494511aSVenki Rajagopalan */
447*b494511aSVenki Rajagopalan if (total_failed) {
448*b494511aSVenki Rajagopalan EIB_UPDATE_COUNTER(&stats->st_oerrors, total_failed);
449*b494511aSVenki Rajagopalan }
450*b494511aSVenki Rajagopalan }
451*b494511aSVenki Rajagopalan
452*b494511aSVenki Rajagopalan void
eib_data_parse_ether_hdr(mblk_t * mp,eib_ether_hdr_t * evh)453*b494511aSVenki Rajagopalan eib_data_parse_ether_hdr(mblk_t *mp, eib_ether_hdr_t *evh)
454*b494511aSVenki Rajagopalan {
455*b494511aSVenki Rajagopalan struct ether_vlan_header *vl_hdr;
456*b494511aSVenki Rajagopalan struct ether_header *hdr;
457*b494511aSVenki Rajagopalan
458*b494511aSVenki Rajagopalan /*
459*b494511aSVenki Rajagopalan * Assume that the ether header (with or without vlan tag) is
460*b494511aSVenki Rajagopalan * contained in one fragment
461*b494511aSVenki Rajagopalan */
462*b494511aSVenki Rajagopalan hdr = (struct ether_header *)(void *)mp->b_rptr;
463*b494511aSVenki Rajagopalan vl_hdr = (struct ether_vlan_header *)(void *)mp->b_rptr;
464*b494511aSVenki Rajagopalan
465*b494511aSVenki Rajagopalan evh->eh_ether_type = ntohs(hdr->ether_type);
466*b494511aSVenki Rajagopalan if (evh->eh_ether_type != ETHERTYPE_VLAN) {
467*b494511aSVenki Rajagopalan evh->eh_tagless = 1;
468*b494511aSVenki Rajagopalan evh->eh_vlan = 0;
469*b494511aSVenki Rajagopalan ether_copy((void *)hdr->ether_dhost.ether_addr_octet,
470*b494511aSVenki Rajagopalan (void *)evh->eh_dmac);
471*b494511aSVenki Rajagopalan ether_copy((void *)hdr->ether_shost.ether_addr_octet,
472*b494511aSVenki Rajagopalan (void *)evh->eh_smac);
473*b494511aSVenki Rajagopalan } else {
474*b494511aSVenki Rajagopalan evh->eh_ether_type = ntohs(vl_hdr->ether_type);
475*b494511aSVenki Rajagopalan evh->eh_tagless = 0;
476*b494511aSVenki Rajagopalan evh->eh_vlan = VLAN_ID(ntohs(vl_hdr->ether_tci));
477*b494511aSVenki Rajagopalan ether_copy((void *)vl_hdr->ether_dhost.ether_addr_octet,
478*b494511aSVenki Rajagopalan (void *)evh->eh_dmac);
479*b494511aSVenki Rajagopalan ether_copy((void *)vl_hdr->ether_shost.ether_addr_octet,
480*b494511aSVenki Rajagopalan (void *)evh->eh_smac);
481*b494511aSVenki Rajagopalan }
482*b494511aSVenki Rajagopalan }
483*b494511aSVenki Rajagopalan
484*b494511aSVenki Rajagopalan int
eib_data_lookup_vnic(eib_t * ss,uint8_t * mac,uint16_t vlan,eib_vnic_t ** vnicp,boolean_t * failed)485*b494511aSVenki Rajagopalan eib_data_lookup_vnic(eib_t *ss, uint8_t *mac, uint16_t vlan, eib_vnic_t **vnicp,
486*b494511aSVenki Rajagopalan boolean_t *failed)
487*b494511aSVenki Rajagopalan {
488*b494511aSVenki Rajagopalan eib_vnic_t *vnic;
489*b494511aSVenki Rajagopalan eib_vnic_req_t *vrq;
490*b494511aSVenki Rajagopalan uint8_t *vn_mac;
491*b494511aSVenki Rajagopalan uint16_t vn_vlan;
492*b494511aSVenki Rajagopalan uint64_t av;
493*b494511aSVenki Rajagopalan int inst = 0;
494*b494511aSVenki Rajagopalan
495*b494511aSVenki Rajagopalan if (mac == NULL)
496*b494511aSVenki Rajagopalan return (EIB_E_FAILURE);
497*b494511aSVenki Rajagopalan
498*b494511aSVenki Rajagopalan /*
499*b494511aSVenki Rajagopalan * For now, a simple search (but only what we've allocated). Note that
500*b494511aSVenki Rajagopalan * if we're in the process of creating a vnic, the instance might've
501*b494511aSVenki Rajagopalan * been allocated, but the vnic entry would be NULL.
502*b494511aSVenki Rajagopalan */
503*b494511aSVenki Rajagopalan mutex_enter(&ss->ei_vnic_lock);
504*b494511aSVenki Rajagopalan av = ss->ei_active_vnics;
505*b494511aSVenki Rajagopalan while ((inst = EIB_FIND_LSB_SET(av)) != -1) {
506*b494511aSVenki Rajagopalan if ((vnic = ss->ei_vnic[inst]) != NULL) {
507*b494511aSVenki Rajagopalan vn_mac = vnic->vn_login_data.ld_assigned_mac;
508*b494511aSVenki Rajagopalan vn_vlan = vnic->vn_login_data.ld_assigned_vlan;
509*b494511aSVenki Rajagopalan
510*b494511aSVenki Rajagopalan if ((vn_vlan == vlan) &&
511*b494511aSVenki Rajagopalan (bcmp(vn_mac, mac, ETHERADDRL) == 0)) {
512*b494511aSVenki Rajagopalan if (vnicp) {
513*b494511aSVenki Rajagopalan *vnicp = vnic;
514*b494511aSVenki Rajagopalan }
515*b494511aSVenki Rajagopalan mutex_exit(&ss->ei_vnic_lock);
516*b494511aSVenki Rajagopalan return (EIB_E_SUCCESS);
517*b494511aSVenki Rajagopalan }
518*b494511aSVenki Rajagopalan }
519*b494511aSVenki Rajagopalan
520*b494511aSVenki Rajagopalan av &= (~((uint64_t)1 << inst));
521*b494511aSVenki Rajagopalan }
522*b494511aSVenki Rajagopalan mutex_exit(&ss->ei_vnic_lock);
523*b494511aSVenki Rajagopalan
524*b494511aSVenki Rajagopalan /*
525*b494511aSVenki Rajagopalan * If we haven't been able to locate a vnic for this {mac,vlan} tuple,
526*b494511aSVenki Rajagopalan * see if we've already failed a creation request for this vnic, and
527*b494511aSVenki Rajagopalan * return that information.
528*b494511aSVenki Rajagopalan */
529*b494511aSVenki Rajagopalan if (failed) {
530*b494511aSVenki Rajagopalan mutex_enter(&ss->ei_vnic_req_lock);
531*b494511aSVenki Rajagopalan *failed = B_FALSE;
532*b494511aSVenki Rajagopalan for (vrq = ss->ei_failed_vnic_req; vrq; vrq = vrq->vr_next) {
533*b494511aSVenki Rajagopalan if ((vrq->vr_vlan == vlan) &&
534*b494511aSVenki Rajagopalan (bcmp(vrq->vr_mac, mac, ETHERADDRL) == 0)) {
535*b494511aSVenki Rajagopalan *failed = B_TRUE;
536*b494511aSVenki Rajagopalan }
537*b494511aSVenki Rajagopalan }
538*b494511aSVenki Rajagopalan mutex_exit(&ss->ei_vnic_req_lock);
539*b494511aSVenki Rajagopalan }
540*b494511aSVenki Rajagopalan
541*b494511aSVenki Rajagopalan return (EIB_E_FAILURE);
542*b494511aSVenki Rajagopalan }
543*b494511aSVenki Rajagopalan
544*b494511aSVenki Rajagopalan int
eib_data_prepare_frame(eib_vnic_t * vnic,eib_wqe_t * swqe,mblk_t * mp,eib_ether_hdr_t * evh)545*b494511aSVenki Rajagopalan eib_data_prepare_frame(eib_vnic_t *vnic, eib_wqe_t *swqe, mblk_t *mp,
546*b494511aSVenki Rajagopalan eib_ether_hdr_t *evh)
547*b494511aSVenki Rajagopalan {
548*b494511aSVenki Rajagopalan uint32_t mss;
549*b494511aSVenki Rajagopalan uint32_t lsoflags;
550*b494511aSVenki Rajagopalan uint32_t hckflags;
551*b494511aSVenki Rajagopalan
552*b494511aSVenki Rajagopalan /*
553*b494511aSVenki Rajagopalan * The swqe defaults are set to use the regular ud work request
554*b494511aSVenki Rajagopalan * member and the IBT_WRC_SEND opcode, so we don't need to do
555*b494511aSVenki Rajagopalan * anything here if this isn't an LSO packet.
556*b494511aSVenki Rajagopalan */
557*b494511aSVenki Rajagopalan mac_lso_get(mp, &mss, &lsoflags);
558*b494511aSVenki Rajagopalan if ((lsoflags & HW_LSO) == HW_LSO)
559*b494511aSVenki Rajagopalan eib_data_setup_lso(swqe, mp, mss, evh);
560*b494511aSVenki Rajagopalan
561*b494511aSVenki Rajagopalan mac_hcksum_get(mp, NULL, NULL, NULL, NULL, &hckflags);
562*b494511aSVenki Rajagopalan if ((hckflags & HCK_FULLCKSUM) == HCK_FULLCKSUM) {
563*b494511aSVenki Rajagopalan swqe->qe_wr.send.wr_flags |= IBT_WR_SEND_CKSUM;
564*b494511aSVenki Rajagopalan } else {
565*b494511aSVenki Rajagopalan swqe->qe_wr.send.wr_flags &= (~IBT_WR_SEND_CKSUM);
566*b494511aSVenki Rajagopalan }
567*b494511aSVenki Rajagopalan
568*b494511aSVenki Rajagopalan if (eib_data_prepare_sgl(vnic, swqe, mp) != 0)
569*b494511aSVenki Rajagopalan return (EIB_E_FAILURE);
570*b494511aSVenki Rajagopalan
571*b494511aSVenki Rajagopalan swqe->qe_mp = mp;
572*b494511aSVenki Rajagopalan
573*b494511aSVenki Rajagopalan return (EIB_E_SUCCESS);
574*b494511aSVenki Rajagopalan }
575*b494511aSVenki Rajagopalan
576*b494511aSVenki Rajagopalan void
eib_rb_data_create_qp(eib_t * ss,eib_vnic_t * vnic)577*b494511aSVenki Rajagopalan eib_rb_data_create_qp(eib_t *ss, eib_vnic_t *vnic)
578*b494511aSVenki Rajagopalan {
579*b494511aSVenki Rajagopalan eib_rb_data_setup_ud_channel(ss, vnic);
580*b494511aSVenki Rajagopalan
581*b494511aSVenki Rajagopalan eib_rb_data_setup_cqs(ss, vnic);
582*b494511aSVenki Rajagopalan
583*b494511aSVenki Rajagopalan eib_chan_fini(vnic->vn_data_chan);
584*b494511aSVenki Rajagopalan vnic->vn_data_chan = NULL;
585*b494511aSVenki Rajagopalan }
586*b494511aSVenki Rajagopalan
587*b494511aSVenki Rajagopalan static int
eib_data_setup_cqs(eib_t * ss,eib_vnic_t * vnic)588*b494511aSVenki Rajagopalan eib_data_setup_cqs(eib_t *ss, eib_vnic_t *vnic)
589*b494511aSVenki Rajagopalan {
590*b494511aSVenki Rajagopalan eib_chan_t *chan = vnic->vn_data_chan;
591*b494511aSVenki Rajagopalan ibt_cq_attr_t cq_attr;
592*b494511aSVenki Rajagopalan ibt_status_t ret;
593*b494511aSVenki Rajagopalan uint_t snd_sz;
594*b494511aSVenki Rajagopalan uint_t rcv_sz;
595*b494511aSVenki Rajagopalan int rv;
596*b494511aSVenki Rajagopalan
597*b494511aSVenki Rajagopalan /*
598*b494511aSVenki Rajagopalan * Allocate send completion queue. Note that we've already verified
599*b494511aSVenki Rajagopalan * that cp_max_swqe and cp_max_rwqe meet the max cq size requirements
600*b494511aSVenki Rajagopalan * of the hca.
601*b494511aSVenki Rajagopalan */
602*b494511aSVenki Rajagopalan cq_attr.cq_sched = NULL;
603*b494511aSVenki Rajagopalan cq_attr.cq_flags = IBT_CQ_NO_FLAGS;
604*b494511aSVenki Rajagopalan cq_attr.cq_size = ss->ei_caps->cp_max_swqe + 1;
605*b494511aSVenki Rajagopalan
606*b494511aSVenki Rajagopalan ret = ibt_alloc_cq(ss->ei_hca_hdl, &cq_attr, &chan->ch_cq_hdl, &snd_sz);
607*b494511aSVenki Rajagopalan if (ret != IBT_SUCCESS) {
608*b494511aSVenki Rajagopalan EIB_DPRINTF_ERR(ss->ei_instance, "eib_data_setup_cqs: "
609*b494511aSVenki Rajagopalan "ibt_alloc_cq(snd_cq_sz=0x%lx) failed, ret=%d",
610*b494511aSVenki Rajagopalan cq_attr.cq_size, ret);
611*b494511aSVenki Rajagopalan goto setup_data_cqs_fail;
612*b494511aSVenki Rajagopalan }
613*b494511aSVenki Rajagopalan ret = ibt_modify_cq(chan->ch_cq_hdl, EIB_TX_COMP_COUNT,
614*b494511aSVenki Rajagopalan EIB_TX_COMP_USEC, 0);
615*b494511aSVenki Rajagopalan if (ret != IBT_SUCCESS) {
616*b494511aSVenki Rajagopalan EIB_DPRINTF_WARN(ss->ei_instance, "eib_data_setup_cqs: "
617*b494511aSVenki Rajagopalan "ibt_modify_cq(snd_comp_count=0x%lx, snd_comp_usec=0x%lx) "
618*b494511aSVenki Rajagopalan "failed, ret=%d",
619*b494511aSVenki Rajagopalan EIB_TX_COMP_COUNT, EIB_TX_COMP_USEC, ret);
620*b494511aSVenki Rajagopalan }
621*b494511aSVenki Rajagopalan
622*b494511aSVenki Rajagopalan /*
623*b494511aSVenki Rajagopalan * Allocate receive completion queue
624*b494511aSVenki Rajagopalan */
625*b494511aSVenki Rajagopalan cq_attr.cq_sched = NULL;
626*b494511aSVenki Rajagopalan cq_attr.cq_flags = IBT_CQ_NO_FLAGS;
627*b494511aSVenki Rajagopalan cq_attr.cq_size = ss->ei_caps->cp_max_rwqe + 1;
628*b494511aSVenki Rajagopalan
629*b494511aSVenki Rajagopalan ret = ibt_alloc_cq(ss->ei_hca_hdl, &cq_attr, &chan->ch_rcv_cq_hdl,
630*b494511aSVenki Rajagopalan &rcv_sz);
631*b494511aSVenki Rajagopalan if (ret != IBT_SUCCESS) {
632*b494511aSVenki Rajagopalan EIB_DPRINTF_ERR(ss->ei_instance, "eib_data_setup_cqs: "
633*b494511aSVenki Rajagopalan "ibt_alloc_cq(rcv_cq_sz=0x%lx) failed, ret=%d",
634*b494511aSVenki Rajagopalan cq_attr.cq_size, ret);
635*b494511aSVenki Rajagopalan goto setup_data_cqs_fail;
636*b494511aSVenki Rajagopalan }
637*b494511aSVenki Rajagopalan ret = ibt_modify_cq(chan->ch_rcv_cq_hdl, EIB_RX_COMP_COUNT,
638*b494511aSVenki Rajagopalan EIB_RX_COMP_USEC, 0);
639*b494511aSVenki Rajagopalan if (ret != IBT_SUCCESS) {
640*b494511aSVenki Rajagopalan EIB_DPRINTF_WARN(ss->ei_instance, "eib_data_setup_cqs: "
641*b494511aSVenki Rajagopalan "ibt_modify_cq(rcv_comp_count=0x%lx, rcv_comp_usec=0x%lx) "
642*b494511aSVenki Rajagopalan "failed, ret=%d",
643*b494511aSVenki Rajagopalan EIB_RX_COMP_COUNT, EIB_RX_COMP_USEC, ret);
644*b494511aSVenki Rajagopalan }
645*b494511aSVenki Rajagopalan
646*b494511aSVenki Rajagopalan /*
647*b494511aSVenki Rajagopalan * Set up parameters for collecting tx and rx completion information
648*b494511aSVenki Rajagopalan */
649*b494511aSVenki Rajagopalan chan->ch_cq_sz = snd_sz;
650*b494511aSVenki Rajagopalan chan->ch_wc = kmem_zalloc(sizeof (ibt_wc_t) * snd_sz, KM_SLEEP);
651*b494511aSVenki Rajagopalan chan->ch_rcv_cq_sz = rcv_sz;
652*b494511aSVenki Rajagopalan chan->ch_rcv_wc = kmem_zalloc(sizeof (ibt_wc_t) * rcv_sz, KM_SLEEP);
653*b494511aSVenki Rajagopalan
654*b494511aSVenki Rajagopalan /*
655*b494511aSVenki Rajagopalan * Set up the vnic's data tx completion queue handler and allocate
656*b494511aSVenki Rajagopalan * a softint for it as well.
657*b494511aSVenki Rajagopalan */
658*b494511aSVenki Rajagopalan if ((rv = ddi_intr_add_softint(ss->ei_dip, &vnic->vn_data_tx_si_hdl,
659*b494511aSVenki Rajagopalan EIB_SOFTPRI_DATA, eib_data_tx_comp_handler, vnic)) != DDI_SUCCESS) {
660*b494511aSVenki Rajagopalan EIB_DPRINTF_ERR(ss->ei_instance, "eib_data_setup_cqs: "
661*b494511aSVenki Rajagopalan "ddi_intr_add_softint() failed for data tx qp, ret=%d", rv);
662*b494511aSVenki Rajagopalan goto setup_data_cqs_fail;
663*b494511aSVenki Rajagopalan }
664*b494511aSVenki Rajagopalan ibt_set_cq_handler(chan->ch_cq_hdl, eib_data_tx_comp_intr, vnic);
665*b494511aSVenki Rajagopalan ret = ibt_enable_cq_notify(chan->ch_cq_hdl, IBT_NEXT_COMPLETION);
666*b494511aSVenki Rajagopalan if (ret != IBT_SUCCESS) {
667*b494511aSVenki Rajagopalan EIB_DPRINTF_ERR(ss->ei_instance, "eib_data_setup_cqs: "
668*b494511aSVenki Rajagopalan "ibt_enable_cq_notify() failed for tx cq, ret=%d", ret);
669*b494511aSVenki Rajagopalan goto setup_data_cqs_fail;
670*b494511aSVenki Rajagopalan }
671*b494511aSVenki Rajagopalan
672*b494511aSVenki Rajagopalan /*
673*b494511aSVenki Rajagopalan * And then the data rx completion queue handler
674*b494511aSVenki Rajagopalan */
675*b494511aSVenki Rajagopalan if ((rv = ddi_intr_add_softint(ss->ei_dip, &vnic->vn_data_rx_si_hdl,
676*b494511aSVenki Rajagopalan EIB_SOFTPRI_DATA, eib_data_rx_comp_handler, vnic)) != DDI_SUCCESS) {
677*b494511aSVenki Rajagopalan EIB_DPRINTF_ERR(ss->ei_instance, "eib_data_setup_cqs: "
678*b494511aSVenki Rajagopalan "ddi_intr_add_softint() failed for data rx qp, ret=%d", rv);
679*b494511aSVenki Rajagopalan goto setup_data_cqs_fail;
680*b494511aSVenki Rajagopalan }
681*b494511aSVenki Rajagopalan ibt_set_cq_handler(chan->ch_rcv_cq_hdl, eib_data_rx_comp_intr, vnic);
682*b494511aSVenki Rajagopalan ret = ibt_enable_cq_notify(chan->ch_rcv_cq_hdl, IBT_NEXT_COMPLETION);
683*b494511aSVenki Rajagopalan if (ret != IBT_SUCCESS) {
684*b494511aSVenki Rajagopalan EIB_DPRINTF_ERR(ss->ei_instance, "eib_data_setup_cqs: "
685*b494511aSVenki Rajagopalan "ibt_enable_cq_notify() failed for rx cq, ret=%d", ret);
686*b494511aSVenki Rajagopalan goto setup_data_cqs_fail;
687*b494511aSVenki Rajagopalan }
688*b494511aSVenki Rajagopalan
689*b494511aSVenki Rajagopalan return (EIB_E_SUCCESS);
690*b494511aSVenki Rajagopalan
691*b494511aSVenki Rajagopalan setup_data_cqs_fail:
692*b494511aSVenki Rajagopalan eib_rb_data_setup_cqs(ss, vnic);
693*b494511aSVenki Rajagopalan return (EIB_E_FAILURE);
694*b494511aSVenki Rajagopalan }
695*b494511aSVenki Rajagopalan
696*b494511aSVenki Rajagopalan static int
eib_data_setup_ud_channel(eib_t * ss,eib_vnic_t * vnic)697*b494511aSVenki Rajagopalan eib_data_setup_ud_channel(eib_t *ss, eib_vnic_t *vnic)
698*b494511aSVenki Rajagopalan {
699*b494511aSVenki Rajagopalan eib_chan_t *chan = vnic->vn_data_chan;
700*b494511aSVenki Rajagopalan ibt_ud_chan_alloc_args_t alloc_attr;
701*b494511aSVenki Rajagopalan ibt_ud_chan_query_attr_t query_attr;
702*b494511aSVenki Rajagopalan ibt_status_t ret;
703*b494511aSVenki Rajagopalan
704*b494511aSVenki Rajagopalan bzero(&alloc_attr, sizeof (ibt_ud_chan_alloc_args_t));
705*b494511aSVenki Rajagopalan bzero(&query_attr, sizeof (ibt_ud_chan_query_attr_t));
706*b494511aSVenki Rajagopalan
707*b494511aSVenki Rajagopalan alloc_attr.ud_flags = IBT_ALL_SIGNALED;
708*b494511aSVenki Rajagopalan if (ss->ei_caps->cp_resv_lkey_capab)
709*b494511aSVenki Rajagopalan alloc_attr.ud_flags |= IBT_FAST_REG_RES_LKEY;
710*b494511aSVenki Rajagopalan if (ss->ei_caps->cp_lso_maxlen)
711*b494511aSVenki Rajagopalan alloc_attr.ud_flags |= IBT_USES_LSO;
712*b494511aSVenki Rajagopalan
713*b494511aSVenki Rajagopalan alloc_attr.ud_hca_port_num = ss->ei_props->ep_port_num;
714*b494511aSVenki Rajagopalan alloc_attr.ud_pkey_ix = chan->ch_pkey_ix;
715*b494511aSVenki Rajagopalan alloc_attr.ud_sizes.cs_sq = ss->ei_caps->cp_max_swqe;
716*b494511aSVenki Rajagopalan alloc_attr.ud_sizes.cs_rq = ss->ei_caps->cp_max_rwqe;
717*b494511aSVenki Rajagopalan alloc_attr.ud_sizes.cs_sq_sgl = ss->ei_caps->cp_max_sgl;
718*b494511aSVenki Rajagopalan alloc_attr.ud_sizes.cs_rq_sgl = 1;
719*b494511aSVenki Rajagopalan alloc_attr.ud_sizes.cs_inline = 0;
720*b494511aSVenki Rajagopalan
721*b494511aSVenki Rajagopalan alloc_attr.ud_qkey = EIB_DATA_QKEY;
722*b494511aSVenki Rajagopalan alloc_attr.ud_scq = chan->ch_cq_hdl;
723*b494511aSVenki Rajagopalan alloc_attr.ud_rcq = chan->ch_rcv_cq_hdl;
724*b494511aSVenki Rajagopalan alloc_attr.ud_pd = ss->ei_pd_hdl;
725*b494511aSVenki Rajagopalan
726*b494511aSVenki Rajagopalan ret = ibt_alloc_ud_channel(ss->ei_hca_hdl, IBT_ACHAN_NO_FLAGS,
727*b494511aSVenki Rajagopalan &alloc_attr, &chan->ch_chan, NULL);
728*b494511aSVenki Rajagopalan if (ret != IBT_SUCCESS) {
729*b494511aSVenki Rajagopalan EIB_DPRINTF_ERR(ss->ei_instance, "eib_data_setup_ud_channel: "
730*b494511aSVenki Rajagopalan "ibt_alloc_ud_channel(port=0x%x, pkey_ix=0x%x, "
731*b494511aSVenki Rajagopalan "cs_sq=0x%lx, cs_rq=0x%lx, sq_sgl=0x%lx) failed, ret=%d",
732*b494511aSVenki Rajagopalan alloc_attr.ud_hca_port_num, chan->ch_pkey_ix,
733*b494511aSVenki Rajagopalan alloc_attr.ud_sizes.cs_sq, alloc_attr.ud_sizes.cs_rq,
734*b494511aSVenki Rajagopalan alloc_attr.ud_sizes.cs_sq_sgl, ret);
735*b494511aSVenki Rajagopalan
736*b494511aSVenki Rajagopalan goto setup_data_ud_channel_fail;
737*b494511aSVenki Rajagopalan }
738*b494511aSVenki Rajagopalan
739*b494511aSVenki Rajagopalan ret = ibt_query_ud_channel(chan->ch_chan, &query_attr);
740*b494511aSVenki Rajagopalan if (ret != IBT_SUCCESS) {
741*b494511aSVenki Rajagopalan EIB_DPRINTF_ERR(ss->ei_instance, "eib_data_setup_ud_channel: "
742*b494511aSVenki Rajagopalan "ibt_query_ud_channel() failed, ret=%d", ret);
743*b494511aSVenki Rajagopalan goto setup_data_ud_channel_fail;
744*b494511aSVenki Rajagopalan }
745*b494511aSVenki Rajagopalan
746*b494511aSVenki Rajagopalan chan->ch_qpn = query_attr.ud_qpn;
747*b494511aSVenki Rajagopalan chan->ch_max_swqes = query_attr.ud_chan_sizes.cs_sq;
748*b494511aSVenki Rajagopalan chan->ch_max_rwqes = query_attr.ud_chan_sizes.cs_rq;
749*b494511aSVenki Rajagopalan chan->ch_lwm_rwqes = chan->ch_max_rwqes >> 2;
750*b494511aSVenki Rajagopalan chan->ch_rwqe_bktsz = (chan->ch_max_rwqes < EIB_DATA_RWQE_BKT) ?
751*b494511aSVenki Rajagopalan chan->ch_max_rwqes : EIB_DATA_RWQE_BKT;
752*b494511aSVenki Rajagopalan chan->ch_ip_hdr_align = EIB_IP_HDR_ALIGN;
753*b494511aSVenki Rajagopalan chan->ch_alloc_mp = B_TRUE;
754*b494511aSVenki Rajagopalan chan->ch_tear_down = B_FALSE;
755*b494511aSVenki Rajagopalan
756*b494511aSVenki Rajagopalan return (EIB_E_SUCCESS);
757*b494511aSVenki Rajagopalan
758*b494511aSVenki Rajagopalan setup_data_ud_channel_fail:
759*b494511aSVenki Rajagopalan eib_rb_data_setup_ud_channel(ss, vnic);
760*b494511aSVenki Rajagopalan return (EIB_E_FAILURE);
761*b494511aSVenki Rajagopalan }
762*b494511aSVenki Rajagopalan
763*b494511aSVenki Rajagopalan static void
eib_data_setup_lso(eib_wqe_t * swqe,mblk_t * mp,uint32_t mss,eib_ether_hdr_t * evh)764*b494511aSVenki Rajagopalan eib_data_setup_lso(eib_wqe_t *swqe, mblk_t *mp, uint32_t mss,
765*b494511aSVenki Rajagopalan eib_ether_hdr_t *evh)
766*b494511aSVenki Rajagopalan {
767*b494511aSVenki Rajagopalan ibt_wr_lso_t *lso;
768*b494511aSVenki Rajagopalan mblk_t *nmp;
769*b494511aSVenki Rajagopalan uint8_t *dst;
770*b494511aSVenki Rajagopalan uintptr_t ip_start;
771*b494511aSVenki Rajagopalan uintptr_t tcp_start;
772*b494511aSVenki Rajagopalan uint_t pending;
773*b494511aSVenki Rajagopalan uint_t mblen;
774*b494511aSVenki Rajagopalan uint_t eth_hdr_len;
775*b494511aSVenki Rajagopalan uint_t ip_hdr_len;
776*b494511aSVenki Rajagopalan uint_t tcp_hdr_len;
777*b494511aSVenki Rajagopalan
778*b494511aSVenki Rajagopalan /*
779*b494511aSVenki Rajagopalan * When the swqe was grabbed, it would've had its wr_opcode and
780*b494511aSVenki Rajagopalan * wr.ud.udwr_dest set to default values. Since we're now going
781*b494511aSVenki Rajagopalan * to use LSO, we need to change these.
782*b494511aSVenki Rajagopalan */
783*b494511aSVenki Rajagopalan swqe->qe_wr.send.wr_opcode = IBT_WRC_SEND_LSO;
784*b494511aSVenki Rajagopalan lso = &(swqe->qe_wr.send.wr.ud_lso);
785*b494511aSVenki Rajagopalan lso->lso_ud_dest = swqe->qe_dest;
786*b494511aSVenki Rajagopalan lso->lso_mss = mss;
787*b494511aSVenki Rajagopalan
788*b494511aSVenki Rajagopalan /*
789*b494511aSVenki Rajagopalan * Details on the ethernet header in the mp is already known to us
790*b494511aSVenki Rajagopalan */
791*b494511aSVenki Rajagopalan eth_hdr_len = (evh->eh_tagless) ? (sizeof (struct ether_header)) :
792*b494511aSVenki Rajagopalan (sizeof (struct ether_vlan_header));
793*b494511aSVenki Rajagopalan
794*b494511aSVenki Rajagopalan /*
795*b494511aSVenki Rajagopalan * Calculate the LSO header size and set it in the UD LSO structure.
796*b494511aSVenki Rajagopalan * Note that the only assumption we make is that each of the Ethernet,
797*b494511aSVenki Rajagopalan * IP and TCP headers will be contained in a single mblk fragment;
798*b494511aSVenki Rajagopalan * together, the headers may span multiple mblk fragments. Note also
799*b494511aSVenki Rajagopalan * that since the EoIB encapsulation header is not part of the message
800*b494511aSVenki Rajagopalan * block we receive, we'll need to account space for inserting it later.
801*b494511aSVenki Rajagopalan */
802*b494511aSVenki Rajagopalan nmp = mp;
803*b494511aSVenki Rajagopalan ip_start = (uintptr_t)(nmp->b_rptr) + eth_hdr_len;
804*b494511aSVenki Rajagopalan if (ip_start >= (uintptr_t)(nmp->b_wptr)) {
805*b494511aSVenki Rajagopalan ip_start = (uintptr_t)nmp->b_cont->b_rptr
806*b494511aSVenki Rajagopalan + (ip_start - (uintptr_t)(nmp->b_wptr));
807*b494511aSVenki Rajagopalan nmp = nmp->b_cont;
808*b494511aSVenki Rajagopalan }
809*b494511aSVenki Rajagopalan ip_hdr_len = IPH_HDR_LENGTH((ipha_t *)ip_start);
810*b494511aSVenki Rajagopalan
811*b494511aSVenki Rajagopalan tcp_start = ip_start + ip_hdr_len;
812*b494511aSVenki Rajagopalan if (tcp_start >= (uintptr_t)(nmp->b_wptr)) {
813*b494511aSVenki Rajagopalan tcp_start = (uintptr_t)nmp->b_cont->b_rptr
814*b494511aSVenki Rajagopalan + (tcp_start - (uintptr_t)(nmp->b_wptr));
815*b494511aSVenki Rajagopalan nmp = nmp->b_cont;
816*b494511aSVenki Rajagopalan }
817*b494511aSVenki Rajagopalan tcp_hdr_len = TCP_HDR_LENGTH((tcph_t *)tcp_start);
818*b494511aSVenki Rajagopalan
819*b494511aSVenki Rajagopalan /*
820*b494511aSVenki Rajagopalan * Since the passed mp fragment never contains the EoIB encapsulation
821*b494511aSVenki Rajagopalan * header, we always have to copy the lso header. Sigh.
822*b494511aSVenki Rajagopalan */
823*b494511aSVenki Rajagopalan lso->lso_hdr = swqe->qe_payload_hdr;
824*b494511aSVenki Rajagopalan lso->lso_hdr_sz = EIB_ENCAP_HDR_SZ + eth_hdr_len +
825*b494511aSVenki Rajagopalan ip_hdr_len + tcp_hdr_len;
826*b494511aSVenki Rajagopalan
827*b494511aSVenki Rajagopalan /*
828*b494511aSVenki Rajagopalan * We already have the EoIB encapsulation header written at the
829*b494511aSVenki Rajagopalan * start of wqe->qe_payload_hdr during swqe acquisition. Only
830*b494511aSVenki Rajagopalan * copy the remaining headers.
831*b494511aSVenki Rajagopalan */
832*b494511aSVenki Rajagopalan dst = lso->lso_hdr + EIB_ENCAP_HDR_SZ;
833*b494511aSVenki Rajagopalan pending = lso->lso_hdr_sz - EIB_ENCAP_HDR_SZ;
834*b494511aSVenki Rajagopalan
835*b494511aSVenki Rajagopalan for (nmp = mp; nmp && pending; nmp = nmp->b_cont) {
836*b494511aSVenki Rajagopalan mblen = MBLKL(nmp);
837*b494511aSVenki Rajagopalan if (pending > mblen) {
838*b494511aSVenki Rajagopalan bcopy(nmp->b_rptr, dst, mblen);
839*b494511aSVenki Rajagopalan dst += mblen;
840*b494511aSVenki Rajagopalan pending -= mblen;
841*b494511aSVenki Rajagopalan } else {
842*b494511aSVenki Rajagopalan bcopy(nmp->b_rptr, dst, pending);
843*b494511aSVenki Rajagopalan break;
844*b494511aSVenki Rajagopalan }
845*b494511aSVenki Rajagopalan }
846*b494511aSVenki Rajagopalan }
847*b494511aSVenki Rajagopalan
848*b494511aSVenki Rajagopalan static int
eib_data_prepare_sgl(eib_vnic_t * vnic,eib_wqe_t * swqe,mblk_t * mp)849*b494511aSVenki Rajagopalan eib_data_prepare_sgl(eib_vnic_t *vnic, eib_wqe_t *swqe, mblk_t *mp)
850*b494511aSVenki Rajagopalan {
851*b494511aSVenki Rajagopalan eib_t *ss = vnic->vn_ss;
852*b494511aSVenki Rajagopalan eib_stats_t *stats = vnic->vn_ss->ei_stats;
853*b494511aSVenki Rajagopalan ibt_iov_t iov_arr[EIB_MAX_SGL];
854*b494511aSVenki Rajagopalan ibt_iov_attr_t iov_attr;
855*b494511aSVenki Rajagopalan ibt_wr_ds_t *sgl;
856*b494511aSVenki Rajagopalan ibt_status_t ret;
857*b494511aSVenki Rajagopalan mblk_t *nmp;
858*b494511aSVenki Rajagopalan mblk_t *data_mp;
859*b494511aSVenki Rajagopalan uchar_t *bufp;
860*b494511aSVenki Rajagopalan size_t blksize;
861*b494511aSVenki Rajagopalan size_t skip;
862*b494511aSVenki Rajagopalan size_t avail;
863*b494511aSVenki Rajagopalan uint_t lsohdr_sz;
864*b494511aSVenki Rajagopalan uint_t pktsz;
865*b494511aSVenki Rajagopalan ptrdiff_t frag_len;
866*b494511aSVenki Rajagopalan uint_t pending_hdr;
867*b494511aSVenki Rajagopalan uint_t nblks;
868*b494511aSVenki Rajagopalan uint_t i;
869*b494511aSVenki Rajagopalan
870*b494511aSVenki Rajagopalan /*
871*b494511aSVenki Rajagopalan * Let's skip ahead to the TCP data if this is LSO. Note that while
872*b494511aSVenki Rajagopalan * the lso header size in the swqe includes the EoIB encapsulation
873*b494511aSVenki Rajagopalan * header size, that encapsulation header itself won't be found in
874*b494511aSVenki Rajagopalan * the mblk.
875*b494511aSVenki Rajagopalan */
876*b494511aSVenki Rajagopalan lsohdr_sz = (swqe->qe_wr.send.wr_opcode == IBT_WRC_SEND) ? 0 :
877*b494511aSVenki Rajagopalan swqe->qe_wr.send.wr.ud_lso.lso_hdr_sz;
878*b494511aSVenki Rajagopalan
879*b494511aSVenki Rajagopalan data_mp = mp;
880*b494511aSVenki Rajagopalan pending_hdr = 0;
881*b494511aSVenki Rajagopalan if (lsohdr_sz) {
882*b494511aSVenki Rajagopalan pending_hdr = lsohdr_sz - EIB_ENCAP_HDR_SZ;
883*b494511aSVenki Rajagopalan for (nmp = mp; nmp; nmp = nmp->b_cont) {
884*b494511aSVenki Rajagopalan frag_len =
885*b494511aSVenki Rajagopalan (uintptr_t)nmp->b_wptr - (uintptr_t)nmp->b_rptr;
886*b494511aSVenki Rajagopalan if (frag_len > pending_hdr)
887*b494511aSVenki Rajagopalan break;
888*b494511aSVenki Rajagopalan pending_hdr -= frag_len;
889*b494511aSVenki Rajagopalan }
890*b494511aSVenki Rajagopalan data_mp = nmp; /* start of data past lso header */
891*b494511aSVenki Rajagopalan ASSERT(data_mp != NULL);
892*b494511aSVenki Rajagopalan }
893*b494511aSVenki Rajagopalan
894*b494511aSVenki Rajagopalan /*
895*b494511aSVenki Rajagopalan * If this is an LSO packet, we want pktsz to hold the size of the
896*b494511aSVenki Rajagopalan * data following the eoib/ethernet/tcp/ip headers. If this is a
897*b494511aSVenki Rajagopalan * non-LSO packet, we want pktsz to refer to the size of the entire
898*b494511aSVenki Rajagopalan * packet with all the headers, and nblks to hold the number of
899*b494511aSVenki Rajagopalan * mappings we'll need to iov map this (for reserved lkey request).
900*b494511aSVenki Rajagopalan */
901*b494511aSVenki Rajagopalan if (lsohdr_sz == 0) {
902*b494511aSVenki Rajagopalan nblks = 1;
903*b494511aSVenki Rajagopalan pktsz = EIB_ENCAP_HDR_SZ;
904*b494511aSVenki Rajagopalan } else {
905*b494511aSVenki Rajagopalan nblks = 0;
906*b494511aSVenki Rajagopalan pktsz = 0;
907*b494511aSVenki Rajagopalan }
908*b494511aSVenki Rajagopalan for (nmp = data_mp; nmp != NULL; nmp = nmp->b_cont) {
909*b494511aSVenki Rajagopalan pktsz += MBLKL(nmp);
910*b494511aSVenki Rajagopalan nblks++;
911*b494511aSVenki Rajagopalan }
912*b494511aSVenki Rajagopalan pktsz -= pending_hdr;
913*b494511aSVenki Rajagopalan
914*b494511aSVenki Rajagopalan EIB_UPDATE_COUNTER(&stats->st_obytes, pktsz);
915*b494511aSVenki Rajagopalan EIB_INCR_COUNTER(&stats->st_opkts);
916*b494511aSVenki Rajagopalan
917*b494511aSVenki Rajagopalan /*
918*b494511aSVenki Rajagopalan * We only do ibt_map_mem_iov() if the pktsz is above the tx copy
919*b494511aSVenki Rajagopalan * threshold and if the number of mp fragments is less than the
920*b494511aSVenki Rajagopalan * maximum acceptable.
921*b494511aSVenki Rajagopalan */
922*b494511aSVenki Rajagopalan if ((ss->ei_caps->cp_resv_lkey_capab) && (pktsz > EIB_TX_COPY_THRESH) &&
923*b494511aSVenki Rajagopalan (nblks < ss->ei_caps->cp_hiwm_sgl)) {
924*b494511aSVenki Rajagopalan
925*b494511aSVenki Rajagopalan iov_attr.iov_as = NULL;
926*b494511aSVenki Rajagopalan iov_attr.iov = iov_arr;
927*b494511aSVenki Rajagopalan iov_attr.iov_buf = NULL;
928*b494511aSVenki Rajagopalan iov_attr.iov_list_len = nblks;
929*b494511aSVenki Rajagopalan iov_attr.iov_wr_nds = ss->ei_caps->cp_max_sgl;
930*b494511aSVenki Rajagopalan iov_attr.iov_lso_hdr_sz = lsohdr_sz;
931*b494511aSVenki Rajagopalan iov_attr.iov_flags = IBT_IOV_SLEEP;
932*b494511aSVenki Rajagopalan
933*b494511aSVenki Rajagopalan i = 0;
934*b494511aSVenki Rajagopalan if (lsohdr_sz == 0) {
935*b494511aSVenki Rajagopalan iov_arr[i].iov_addr = (caddr_t)swqe->qe_payload_hdr;
936*b494511aSVenki Rajagopalan iov_arr[i].iov_len = EIB_ENCAP_HDR_SZ;
937*b494511aSVenki Rajagopalan i++;
938*b494511aSVenki Rajagopalan }
939*b494511aSVenki Rajagopalan for (nmp = data_mp; i < nblks; i++, nmp = nmp->b_cont) {
940*b494511aSVenki Rajagopalan iov_arr[i].iov_addr = (caddr_t)(void *)nmp->b_rptr;
941*b494511aSVenki Rajagopalan iov_arr[i].iov_len = MBLKL(nmp);
942*b494511aSVenki Rajagopalan if (nmp == data_mp) {
943*b494511aSVenki Rajagopalan iov_arr[i].iov_addr += pending_hdr;
944*b494511aSVenki Rajagopalan iov_arr[i].iov_len -= pending_hdr;
945*b494511aSVenki Rajagopalan }
946*b494511aSVenki Rajagopalan }
947*b494511aSVenki Rajagopalan swqe->qe_info |= EIB_WQE_FLG_BUFTYPE_MAPPED;
948*b494511aSVenki Rajagopalan swqe->qe_wr.send.wr_sgl = swqe->qe_big_sgl;
949*b494511aSVenki Rajagopalan
950*b494511aSVenki Rajagopalan ret = ibt_map_mem_iov(ss->ei_hca_hdl, &iov_attr,
951*b494511aSVenki Rajagopalan &swqe->qe_wr, &swqe->qe_iov_hdl);
952*b494511aSVenki Rajagopalan if (ret != IBT_SUCCESS) {
953*b494511aSVenki Rajagopalan EIB_DPRINTF_WARN(ss->ei_instance,
954*b494511aSVenki Rajagopalan "eib_data_prepare_sgl: "
955*b494511aSVenki Rajagopalan "ibt_map_mem_iov(nblks=0x%lx) failed, ret=%d ",
956*b494511aSVenki Rajagopalan "attempting to use copy path", nblks, ret);
957*b494511aSVenki Rajagopalan goto prepare_sgl_copy_path;
958*b494511aSVenki Rajagopalan }
959*b494511aSVenki Rajagopalan
960*b494511aSVenki Rajagopalan return (EIB_E_SUCCESS);
961*b494511aSVenki Rajagopalan }
962*b494511aSVenki Rajagopalan
963*b494511aSVenki Rajagopalan prepare_sgl_copy_path:
964*b494511aSVenki Rajagopalan if (pktsz <= swqe->qe_bufsz) {
965*b494511aSVenki Rajagopalan swqe->qe_wr.send.wr_nds = 1;
966*b494511aSVenki Rajagopalan swqe->qe_wr.send.wr_sgl = &swqe->qe_sgl;
967*b494511aSVenki Rajagopalan swqe->qe_sgl.ds_len = pktsz;
968*b494511aSVenki Rajagopalan
969*b494511aSVenki Rajagopalan /*
970*b494511aSVenki Rajagopalan * Even though this is the copy path for transfers less than
971*b494511aSVenki Rajagopalan * qe_bufsz, it could still be an LSO packet. If so, we only
972*b494511aSVenki Rajagopalan * have to write the data following all the headers into the
973*b494511aSVenki Rajagopalan * work request buffer, since we'll be sending the lso header
974*b494511aSVenki Rajagopalan * itself separately. If this is not an LSO send (but pkt size
975*b494511aSVenki Rajagopalan * greater than mtu, say for a jumbo frame), then we need
976*b494511aSVenki Rajagopalan * to write all the headers including EoIB encapsulation,
977*b494511aSVenki Rajagopalan * into the work request buffer.
978*b494511aSVenki Rajagopalan */
979*b494511aSVenki Rajagopalan bufp = (uchar_t *)(uintptr_t)swqe->qe_sgl.ds_va;
980*b494511aSVenki Rajagopalan if (lsohdr_sz == 0) {
981*b494511aSVenki Rajagopalan *(uint32_t *)((void *)bufp) = htonl(EIB_TX_ENCAP_HDR);
982*b494511aSVenki Rajagopalan bufp += EIB_ENCAP_HDR_SZ;
983*b494511aSVenki Rajagopalan }
984*b494511aSVenki Rajagopalan for (nmp = data_mp; nmp != NULL; nmp = nmp->b_cont) {
985*b494511aSVenki Rajagopalan blksize = MBLKL(nmp) - pending_hdr;
986*b494511aSVenki Rajagopalan bcopy(nmp->b_rptr + pending_hdr, bufp, blksize);
987*b494511aSVenki Rajagopalan bufp += blksize;
988*b494511aSVenki Rajagopalan pending_hdr = 0;
989*b494511aSVenki Rajagopalan }
990*b494511aSVenki Rajagopalan
991*b494511aSVenki Rajagopalan /*
992*b494511aSVenki Rajagopalan * If the ethernet frame we're going to send is less than
993*b494511aSVenki Rajagopalan * ETHERMIN, pad up the buffer to ETHERMIN (with zeros)
994*b494511aSVenki Rajagopalan */
995*b494511aSVenki Rajagopalan if ((pktsz + lsohdr_sz) < (ETHERMIN + EIB_ENCAP_HDR_SZ)) {
996*b494511aSVenki Rajagopalan bzero(bufp, (ETHERMIN + EIB_ENCAP_HDR_SZ) -
997*b494511aSVenki Rajagopalan (pktsz + lsohdr_sz));
998*b494511aSVenki Rajagopalan swqe->qe_sgl.ds_len = ETHERMIN + EIB_ENCAP_HDR_SZ;
999*b494511aSVenki Rajagopalan }
1000*b494511aSVenki Rajagopalan return (EIB_E_SUCCESS);
1001*b494511aSVenki Rajagopalan }
1002*b494511aSVenki Rajagopalan
1003*b494511aSVenki Rajagopalan /*
1004*b494511aSVenki Rajagopalan * Copy path for transfers greater than swqe->qe_bufsz
1005*b494511aSVenki Rajagopalan */
1006*b494511aSVenki Rajagopalan swqe->qe_wr.send.wr_sgl = swqe->qe_big_sgl;
1007*b494511aSVenki Rajagopalan if (eib_rsrc_grab_lsobufs(ss, pktsz, swqe->qe_wr.send.wr_sgl,
1008*b494511aSVenki Rajagopalan &(swqe->qe_wr.send.wr_nds)) != EIB_E_SUCCESS) {
1009*b494511aSVenki Rajagopalan EIB_DPRINTF_ERR(ss->ei_instance, "eib_data_prepare_sgl: "
1010*b494511aSVenki Rajagopalan "eib_rsrc_grab_lsobufs() failed");
1011*b494511aSVenki Rajagopalan return (EIB_E_FAILURE);
1012*b494511aSVenki Rajagopalan }
1013*b494511aSVenki Rajagopalan swqe->qe_info |= EIB_WQE_FLG_BUFTYPE_LSO;
1014*b494511aSVenki Rajagopalan
1015*b494511aSVenki Rajagopalan /*
1016*b494511aSVenki Rajagopalan * Copy the larger-than-qe_buf_sz packet into a set of fixed-sized,
1017*b494511aSVenki Rajagopalan * pre-mapped LSO buffers. Note that we might need to skip part of
1018*b494511aSVenki Rajagopalan * the LSO header in the first fragment as before.
1019*b494511aSVenki Rajagopalan */
1020*b494511aSVenki Rajagopalan nmp = data_mp;
1021*b494511aSVenki Rajagopalan skip = pending_hdr;
1022*b494511aSVenki Rajagopalan for (i = 0; i < swqe->qe_wr.send.wr_nds; i++) {
1023*b494511aSVenki Rajagopalan sgl = swqe->qe_wr.send.wr_sgl + i;
1024*b494511aSVenki Rajagopalan bufp = (uchar_t *)(uintptr_t)sgl->ds_va;
1025*b494511aSVenki Rajagopalan avail = EIB_LSO_BUFSZ;
1026*b494511aSVenki Rajagopalan
1027*b494511aSVenki Rajagopalan /*
1028*b494511aSVenki Rajagopalan * If this is a non-LSO packet (perhaps a jumbo frame?)
1029*b494511aSVenki Rajagopalan * we may still need to prefix the EoIB header in the
1030*b494511aSVenki Rajagopalan * wr buffer.
1031*b494511aSVenki Rajagopalan */
1032*b494511aSVenki Rajagopalan if ((i == 0) && (lsohdr_sz == 0)) {
1033*b494511aSVenki Rajagopalan *(uint32_t *)((void *)bufp) = htonl(EIB_TX_ENCAP_HDR);
1034*b494511aSVenki Rajagopalan bufp += EIB_ENCAP_HDR_SZ;
1035*b494511aSVenki Rajagopalan avail -= EIB_ENCAP_HDR_SZ;
1036*b494511aSVenki Rajagopalan }
1037*b494511aSVenki Rajagopalan
1038*b494511aSVenki Rajagopalan while (nmp && avail) {
1039*b494511aSVenki Rajagopalan blksize = MBLKL(nmp) - skip;
1040*b494511aSVenki Rajagopalan if (blksize > avail) {
1041*b494511aSVenki Rajagopalan bcopy(nmp->b_rptr + skip, bufp, avail);
1042*b494511aSVenki Rajagopalan skip += avail;
1043*b494511aSVenki Rajagopalan avail = 0;
1044*b494511aSVenki Rajagopalan } else {
1045*b494511aSVenki Rajagopalan bcopy(nmp->b_rptr + skip, bufp, blksize);
1046*b494511aSVenki Rajagopalan skip = 0;
1047*b494511aSVenki Rajagopalan bufp += blksize;
1048*b494511aSVenki Rajagopalan avail -= blksize;
1049*b494511aSVenki Rajagopalan nmp = nmp->b_cont;
1050*b494511aSVenki Rajagopalan }
1051*b494511aSVenki Rajagopalan }
1052*b494511aSVenki Rajagopalan }
1053*b494511aSVenki Rajagopalan
1054*b494511aSVenki Rajagopalan return (EIB_E_SUCCESS);
1055*b494511aSVenki Rajagopalan }
1056*b494511aSVenki Rajagopalan
1057*b494511aSVenki Rajagopalan /*ARGSUSED*/
1058*b494511aSVenki Rajagopalan static int
eib_data_is_mcast_pkt_ok(eib_vnic_t * vnic,uint8_t * macaddr,uint64_t * brdcst,uint64_t * multicst)1059*b494511aSVenki Rajagopalan eib_data_is_mcast_pkt_ok(eib_vnic_t *vnic, uint8_t *macaddr, uint64_t *brdcst,
1060*b494511aSVenki Rajagopalan uint64_t *multicst)
1061*b494511aSVenki Rajagopalan {
1062*b494511aSVenki Rajagopalan /*
1063*b494511aSVenki Rajagopalan * If the dmac is a broadcast packet, let it through. Otherwise, either
1064*b494511aSVenki Rajagopalan * we should be in promiscuous mode or the dmac should be in our list of
1065*b494511aSVenki Rajagopalan * joined multicast addresses. Currently we only update the stat
1066*b494511aSVenki Rajagopalan * counters and always let things through.
1067*b494511aSVenki Rajagopalan */
1068*b494511aSVenki Rajagopalan if (bcmp(macaddr, eib_broadcast_mac, ETHERADDRL) == 0)
1069*b494511aSVenki Rajagopalan EIB_INCR_COUNTER(brdcst);
1070*b494511aSVenki Rajagopalan else
1071*b494511aSVenki Rajagopalan EIB_INCR_COUNTER(multicst);
1072*b494511aSVenki Rajagopalan
1073*b494511aSVenki Rajagopalan return (1);
1074*b494511aSVenki Rajagopalan }
1075*b494511aSVenki Rajagopalan
1076*b494511aSVenki Rajagopalan static void
eib_data_rx_comp_intr(ibt_cq_hdl_t cq_hdl,void * arg)1077*b494511aSVenki Rajagopalan eib_data_rx_comp_intr(ibt_cq_hdl_t cq_hdl, void *arg)
1078*b494511aSVenki Rajagopalan {
1079*b494511aSVenki Rajagopalan eib_vnic_t *vnic = arg;
1080*b494511aSVenki Rajagopalan eib_chan_t *chan = vnic->vn_data_chan;
1081*b494511aSVenki Rajagopalan eib_t *ss = vnic->vn_ss;
1082*b494511aSVenki Rajagopalan
1083*b494511aSVenki Rajagopalan if (cq_hdl != chan->ch_rcv_cq_hdl) {
1084*b494511aSVenki Rajagopalan EIB_DPRINTF_DEBUG(ss->ei_instance, "eib_data_rx_comp_intr: "
1085*b494511aSVenki Rajagopalan "cq_hdl(0x%llx) != chan->ch_cq_hdl(0x%llx), "
1086*b494511aSVenki Rajagopalan "ignoring completion", cq_hdl, chan->ch_cq_hdl);
1087*b494511aSVenki Rajagopalan return;
1088*b494511aSVenki Rajagopalan }
1089*b494511aSVenki Rajagopalan
1090*b494511aSVenki Rajagopalan ASSERT(vnic->vn_data_rx_si_hdl != NULL);
1091*b494511aSVenki Rajagopalan
1092*b494511aSVenki Rajagopalan (void) ddi_intr_trigger_softint(vnic->vn_data_rx_si_hdl, NULL);
1093*b494511aSVenki Rajagopalan }
1094*b494511aSVenki Rajagopalan
1095*b494511aSVenki Rajagopalan static void
eib_data_tx_comp_intr(ibt_cq_hdl_t cq_hdl,void * arg)1096*b494511aSVenki Rajagopalan eib_data_tx_comp_intr(ibt_cq_hdl_t cq_hdl, void *arg)
1097*b494511aSVenki Rajagopalan {
1098*b494511aSVenki Rajagopalan eib_vnic_t *vnic = arg;
1099*b494511aSVenki Rajagopalan eib_chan_t *chan = vnic->vn_data_chan;
1100*b494511aSVenki Rajagopalan eib_t *ss = vnic->vn_ss;
1101*b494511aSVenki Rajagopalan
1102*b494511aSVenki Rajagopalan if (cq_hdl != chan->ch_cq_hdl) {
1103*b494511aSVenki Rajagopalan EIB_DPRINTF_DEBUG(ss->ei_instance, "eib_data_tx_comp_intr: "
1104*b494511aSVenki Rajagopalan "cq_hdl(0x%llx) != chan->ch_cq_hdl(0x%llx), "
1105*b494511aSVenki Rajagopalan "ignoring completion", cq_hdl, chan->ch_cq_hdl);
1106*b494511aSVenki Rajagopalan return;
1107*b494511aSVenki Rajagopalan }
1108*b494511aSVenki Rajagopalan
1109*b494511aSVenki Rajagopalan ASSERT(vnic->vn_data_tx_si_hdl != NULL);
1110*b494511aSVenki Rajagopalan
1111*b494511aSVenki Rajagopalan (void) ddi_intr_trigger_softint(vnic->vn_data_tx_si_hdl, NULL);
1112*b494511aSVenki Rajagopalan }
1113*b494511aSVenki Rajagopalan
1114*b494511aSVenki Rajagopalan static mblk_t *
eib_data_rx_comp(eib_vnic_t * vnic,eib_wqe_t * wqe,ibt_wc_t * wc)1115*b494511aSVenki Rajagopalan eib_data_rx_comp(eib_vnic_t *vnic, eib_wqe_t *wqe, ibt_wc_t *wc)
1116*b494511aSVenki Rajagopalan {
1117*b494511aSVenki Rajagopalan eib_t *ss = vnic->vn_ss;
1118*b494511aSVenki Rajagopalan eib_chan_t *chan = vnic->vn_data_chan;
1119*b494511aSVenki Rajagopalan eib_login_data_t *ld = &vnic->vn_login_data;
1120*b494511aSVenki Rajagopalan eib_stats_t *stats = ss->ei_stats;
1121*b494511aSVenki Rajagopalan eib_ether_hdr_t evh;
1122*b494511aSVenki Rajagopalan mblk_t *mp;
1123*b494511aSVenki Rajagopalan boolean_t allocd_mp = B_FALSE;
1124*b494511aSVenki Rajagopalan uint_t ec_hdr;
1125*b494511aSVenki Rajagopalan uint_t ec_sign;
1126*b494511aSVenki Rajagopalan uint_t ec_ver;
1127*b494511aSVenki Rajagopalan uint_t ec_tu_cs;
1128*b494511aSVenki Rajagopalan uint_t ec_ip_cs;
1129*b494511aSVenki Rajagopalan
1130*b494511aSVenki Rajagopalan /*
1131*b494511aSVenki Rajagopalan * Before we process this mblk and send it up to network layer, see
1132*b494511aSVenki Rajagopalan * if we're running low on rwqes in the wqe pool. If so, allocate a
1133*b494511aSVenki Rajagopalan * new mblk, copy the received data into it and send it up (and return
1134*b494511aSVenki Rajagopalan * the current rwqe back to the pool immediately by calling freemsg()
1135*b494511aSVenki Rajagopalan * on the original mblk).
1136*b494511aSVenki Rajagopalan */
1137*b494511aSVenki Rajagopalan if (!eib_rsrc_rxpool_low(wqe)) {
1138*b494511aSVenki Rajagopalan mp = wqe->qe_mp;
1139*b494511aSVenki Rajagopalan } else {
1140*b494511aSVenki Rajagopalan if ((mp = allocb(wc->wc_bytes_xfer, BPRI_HI)) != NULL) {
1141*b494511aSVenki Rajagopalan bcopy(wqe->qe_mp->b_rptr, mp->b_rptr,
1142*b494511aSVenki Rajagopalan wc->wc_bytes_xfer);
1143*b494511aSVenki Rajagopalan freemsg(wqe->qe_mp);
1144*b494511aSVenki Rajagopalan allocd_mp = B_TRUE;
1145*b494511aSVenki Rajagopalan } else {
1146*b494511aSVenki Rajagopalan EIB_DPRINTF_WARN(ss->ei_instance, "eib_data_rx_comp: "
1147*b494511aSVenki Rajagopalan "wqe level below watermark, dropping rx pkt");
1148*b494511aSVenki Rajagopalan EIB_INCR_COUNTER(&stats->st_norcvbuf);
1149*b494511aSVenki Rajagopalan freemsg(wqe->qe_mp);
1150*b494511aSVenki Rajagopalan return (NULL);
1151*b494511aSVenki Rajagopalan }
1152*b494511aSVenki Rajagopalan }
1153*b494511aSVenki Rajagopalan
1154*b494511aSVenki Rajagopalan /*
1155*b494511aSVenki Rajagopalan * Adjust write pointer depending on how much data came in. Note that
1156*b494511aSVenki Rajagopalan * since the nw layer will expect us to hand over the mp with the
1157*b494511aSVenki Rajagopalan * ethernet header starting at mp->b_rptr, update the b_rptr as well.
1158*b494511aSVenki Rajagopalan */
1159*b494511aSVenki Rajagopalan mp->b_wptr = mp->b_rptr + wc->wc_bytes_xfer;
1160*b494511aSVenki Rajagopalan
1161*b494511aSVenki Rajagopalan /*
1162*b494511aSVenki Rajagopalan * We have a problem if this really happens!
1163*b494511aSVenki Rajagopalan */
1164*b494511aSVenki Rajagopalan if (mp->b_next != NULL) {
1165*b494511aSVenki Rajagopalan EIB_DPRINTF_WARN(ss->ei_instance, "eib_data_rx_comp: "
1166*b494511aSVenki Rajagopalan "received packet's b_next not NULL, possible dup from cq");
1167*b494511aSVenki Rajagopalan mp->b_next = NULL;
1168*b494511aSVenki Rajagopalan }
1169*b494511aSVenki Rajagopalan
1170*b494511aSVenki Rajagopalan /*
1171*b494511aSVenki Rajagopalan * Drop loopback packets ?
1172*b494511aSVenki Rajagopalan */
1173*b494511aSVenki Rajagopalan if ((wc->wc_slid == ss->ei_props->ep_blid) &&
1174*b494511aSVenki Rajagopalan (wc->wc_qpn == chan->ch_qpn)) {
1175*b494511aSVenki Rajagopalan goto data_rx_comp_fail;
1176*b494511aSVenki Rajagopalan }
1177*b494511aSVenki Rajagopalan
1178*b494511aSVenki Rajagopalan mp->b_rptr += EIB_GRH_SZ;
1179*b494511aSVenki Rajagopalan
1180*b494511aSVenki Rajagopalan /*
1181*b494511aSVenki Rajagopalan * Since the recv buffer has been aligned for IP header to start on
1182*b494511aSVenki Rajagopalan * a word boundary, it is safe to say that the EoIB and ethernet
1183*b494511aSVenki Rajagopalan * headers won't start on a word boundary.
1184*b494511aSVenki Rajagopalan */
1185*b494511aSVenki Rajagopalan bcopy(mp->b_rptr, &ec_hdr, EIB_ENCAP_HDR_SZ);
1186*b494511aSVenki Rajagopalan
1187*b494511aSVenki Rajagopalan /*
1188*b494511aSVenki Rajagopalan * Check EoIB signature and version
1189*b494511aSVenki Rajagopalan */
1190*b494511aSVenki Rajagopalan ec_hdr = ntohl(ec_hdr);
1191*b494511aSVenki Rajagopalan
1192*b494511aSVenki Rajagopalan ec_sign = (ec_hdr >> EIB_ENCAP_SIGN_SHIFT) & EIB_ENCAP_SIGN_MASK;
1193*b494511aSVenki Rajagopalan if (ec_sign != EIB_EH_SIGNATURE) {
1194*b494511aSVenki Rajagopalan EIB_DPRINTF_WARN(ss->ei_instance, "eib_data_rx_comp: "
1195*b494511aSVenki Rajagopalan "EoIB encapsulation header signature (0x%lx) unknown",
1196*b494511aSVenki Rajagopalan ec_sign);
1197*b494511aSVenki Rajagopalan goto data_rx_comp_fail;
1198*b494511aSVenki Rajagopalan }
1199*b494511aSVenki Rajagopalan
1200*b494511aSVenki Rajagopalan ec_ver = (ec_hdr >> EIB_ENCAP_VER_SHIFT) & EIB_ENCAP_VER_MASK;
1201*b494511aSVenki Rajagopalan if (ec_ver != EIB_EH_VERSION) {
1202*b494511aSVenki Rajagopalan EIB_DPRINTF_WARN(ss->ei_instance, "eib_data_rx_comp: "
1203*b494511aSVenki Rajagopalan "EoIB encapsulation header version (0x%lx) unknown",
1204*b494511aSVenki Rajagopalan ec_ver);
1205*b494511aSVenki Rajagopalan goto data_rx_comp_fail;
1206*b494511aSVenki Rajagopalan }
1207*b494511aSVenki Rajagopalan
1208*b494511aSVenki Rajagopalan /*
1209*b494511aSVenki Rajagopalan * Check TCP/UDP and IP checksum
1210*b494511aSVenki Rajagopalan */
1211*b494511aSVenki Rajagopalan ec_tu_cs = (ec_hdr >> EIB_ENCAP_TCPCHK_SHIFT) & EIB_ENCAP_TCPCHK_MASK;
1212*b494511aSVenki Rajagopalan ec_ip_cs = (ec_hdr >> EIB_ENCAP_IPCHK_SHIFT) & EIB_ENCAP_IPCHK_MASK;
1213*b494511aSVenki Rajagopalan
1214*b494511aSVenki Rajagopalan if ((ec_tu_cs == EIB_EH_UDPCSUM_OK || ec_tu_cs == EIB_EH_TCPCSUM_OK) &&
1215*b494511aSVenki Rajagopalan (ec_ip_cs == EIB_EH_IPCSUM_OK)) {
1216*b494511aSVenki Rajagopalan mac_hcksum_set(mp, 0, 0, 0, 0, HCK_FULLCKSUM_OK);
1217*b494511aSVenki Rajagopalan } else if (ec_tu_cs == EIB_EH_CSUM_BAD || ec_ip_cs == EIB_EH_CSUM_BAD) {
1218*b494511aSVenki Rajagopalan EIB_DPRINTF_WARN(ss->ei_instance, "eib_data_rx_comp: "
1219*b494511aSVenki Rajagopalan "EoIB encapsulation header tcp/udp checksum (0x%lx) or"
1220*b494511aSVenki Rajagopalan "ip checksum (0x%lx) is bad", ec_tu_cs, ec_ip_cs);
1221*b494511aSVenki Rajagopalan }
1222*b494511aSVenki Rajagopalan
1223*b494511aSVenki Rajagopalan /*
1224*b494511aSVenki Rajagopalan * Update the message block's b_rptr to the start of ethernet header
1225*b494511aSVenki Rajagopalan * and parse the header information
1226*b494511aSVenki Rajagopalan */
1227*b494511aSVenki Rajagopalan mp->b_rptr += EIB_ENCAP_HDR_SZ;
1228*b494511aSVenki Rajagopalan eib_data_parse_ether_hdr(mp, &evh);
1229*b494511aSVenki Rajagopalan
1230*b494511aSVenki Rajagopalan /*
1231*b494511aSVenki Rajagopalan * If the incoming packet is vlan-tagged, but the tag doesn't match
1232*b494511aSVenki Rajagopalan * this vnic's vlan, drop it.
1233*b494511aSVenki Rajagopalan */
1234*b494511aSVenki Rajagopalan if ((evh.eh_tagless == 0) && (evh.eh_vlan != ld->ld_assigned_vlan)) {
1235*b494511aSVenki Rajagopalan EIB_DPRINTF_WARN(ss->ei_instance, "eib_data_rx_comp: "
1236*b494511aSVenki Rajagopalan "received packet's vlan unknown, expected=0x%x, got=0x%x",
1237*b494511aSVenki Rajagopalan ld->ld_assigned_vlan, evh.eh_vlan);
1238*b494511aSVenki Rajagopalan goto data_rx_comp_fail;
1239*b494511aSVenki Rajagopalan }
1240*b494511aSVenki Rajagopalan
1241*b494511aSVenki Rajagopalan /*
1242*b494511aSVenki Rajagopalan * Final checks to see if the unicast destination is indeed correct
1243*b494511aSVenki Rajagopalan * and to see if the multicast address is ok for us.
1244*b494511aSVenki Rajagopalan */
1245*b494511aSVenki Rajagopalan if (EIB_UNICAST_MAC(evh.eh_dmac)) {
1246*b494511aSVenki Rajagopalan if (bcmp(evh.eh_dmac, ld->ld_assigned_mac, ETHERADDRL) != 0) {
1247*b494511aSVenki Rajagopalan uint8_t *exp;
1248*b494511aSVenki Rajagopalan uint8_t *got;
1249*b494511aSVenki Rajagopalan
1250*b494511aSVenki Rajagopalan exp = ld->ld_assigned_mac;
1251*b494511aSVenki Rajagopalan got = evh.eh_dmac;
1252*b494511aSVenki Rajagopalan
1253*b494511aSVenki Rajagopalan EIB_DPRINTF_WARN(ss->ei_instance, "eib_data_rx_comp: "
1254*b494511aSVenki Rajagopalan "received packet's macaddr mismatch, "
1255*b494511aSVenki Rajagopalan "expected=%x:%x:%x:%x:%x:%x, got=%x:%x:%x:%x:%x:%x",
1256*b494511aSVenki Rajagopalan exp[0], exp[1], exp[2], exp[3], exp[4], exp[5],
1257*b494511aSVenki Rajagopalan got[0], got[1], got[2], got[3], got[4], got[5]);
1258*b494511aSVenki Rajagopalan
1259*b494511aSVenki Rajagopalan goto data_rx_comp_fail;
1260*b494511aSVenki Rajagopalan }
1261*b494511aSVenki Rajagopalan } else {
1262*b494511aSVenki Rajagopalan if (!eib_data_is_mcast_pkt_ok(vnic, evh.eh_dmac,
1263*b494511aSVenki Rajagopalan &stats->st_brdcstrcv, &stats->st_multircv)) {
1264*b494511aSVenki Rajagopalan EIB_DPRINTF_WARN(ss->ei_instance, "eib_data_rx_comp: "
1265*b494511aSVenki Rajagopalan "multicast packet not ok");
1266*b494511aSVenki Rajagopalan goto data_rx_comp_fail;
1267*b494511aSVenki Rajagopalan }
1268*b494511aSVenki Rajagopalan }
1269*b494511aSVenki Rajagopalan
1270*b494511aSVenki Rajagopalan /*
1271*b494511aSVenki Rajagopalan * Strip ethernet FCS if present in the packet. ConnectX-2 doesn't
1272*b494511aSVenki Rajagopalan * support ethernet FCS, so this shouldn't happen anyway.
1273*b494511aSVenki Rajagopalan */
1274*b494511aSVenki Rajagopalan if ((ec_hdr >> EIB_ENCAP_FCS_B_SHIFT) & 0x1) {
1275*b494511aSVenki Rajagopalan EIB_DPRINTF_WARN(ss->ei_instance, "eib_data_rx_comp: "
1276*b494511aSVenki Rajagopalan "ethernet FCS present (ec_hdr=0%lx), ignoring",
1277*b494511aSVenki Rajagopalan ec_hdr);
1278*b494511aSVenki Rajagopalan
1279*b494511aSVenki Rajagopalan mp->b_wptr -= ETHERFCSL;
1280*b494511aSVenki Rajagopalan }
1281*b494511aSVenki Rajagopalan
1282*b494511aSVenki Rajagopalan /*
1283*b494511aSVenki Rajagopalan * If this is the same mp as was in the original rwqe (i.e. we didn't
1284*b494511aSVenki Rajagopalan * do any allocb()), then mark the rwqe flag so we know that its mblk
1285*b494511aSVenki Rajagopalan * is with the network layer.
1286*b494511aSVenki Rajagopalan */
1287*b494511aSVenki Rajagopalan if (!allocd_mp) {
1288*b494511aSVenki Rajagopalan wqe->qe_info |= EIB_WQE_FLG_WITH_NW;
1289*b494511aSVenki Rajagopalan }
1290*b494511aSVenki Rajagopalan
1291*b494511aSVenki Rajagopalan return (mp);
1292*b494511aSVenki Rajagopalan
1293*b494511aSVenki Rajagopalan data_rx_comp_fail:
1294*b494511aSVenki Rajagopalan freemsg(mp);
1295*b494511aSVenki Rajagopalan return (NULL);
1296*b494511aSVenki Rajagopalan }
1297*b494511aSVenki Rajagopalan
1298*b494511aSVenki Rajagopalan static void
eib_data_tx_comp(eib_vnic_t * vnic,eib_wqe_t * wqe,eib_chan_t * chan)1299*b494511aSVenki Rajagopalan eib_data_tx_comp(eib_vnic_t *vnic, eib_wqe_t *wqe, eib_chan_t *chan)
1300*b494511aSVenki Rajagopalan {
1301*b494511aSVenki Rajagopalan eib_t *ss = vnic->vn_ss;
1302*b494511aSVenki Rajagopalan ibt_status_t ret;
1303*b494511aSVenki Rajagopalan
1304*b494511aSVenki Rajagopalan if (wqe->qe_mp) {
1305*b494511aSVenki Rajagopalan if (wqe->qe_info & EIB_WQE_FLG_BUFTYPE_MAPPED) {
1306*b494511aSVenki Rajagopalan ret = ibt_unmap_mem_iov(ss->ei_hca_hdl,
1307*b494511aSVenki Rajagopalan wqe->qe_iov_hdl);
1308*b494511aSVenki Rajagopalan if (ret != IBT_SUCCESS) {
1309*b494511aSVenki Rajagopalan EIB_DPRINTF_WARN(ss->ei_instance,
1310*b494511aSVenki Rajagopalan "eib_data_tx_comp: "
1311*b494511aSVenki Rajagopalan "ibt_unmap_mem_iov() failed, ret=%d", ret);
1312*b494511aSVenki Rajagopalan }
1313*b494511aSVenki Rajagopalan wqe->qe_iov_hdl = NULL;
1314*b494511aSVenki Rajagopalan } else if (wqe->qe_info & EIB_WQE_FLG_BUFTYPE_LSO) {
1315*b494511aSVenki Rajagopalan eib_rsrc_return_lsobufs(ss, wqe->qe_big_sgl,
1316*b494511aSVenki Rajagopalan wqe->qe_wr.send.wr_nds);
1317*b494511aSVenki Rajagopalan }
1318*b494511aSVenki Rajagopalan freemsg(wqe->qe_mp);
1319*b494511aSVenki Rajagopalan wqe->qe_mp = NULL;
1320*b494511aSVenki Rajagopalan }
1321*b494511aSVenki Rajagopalan
1322*b494511aSVenki Rajagopalan eib_rsrc_return_swqe(ss, wqe, chan);
1323*b494511aSVenki Rajagopalan }
1324*b494511aSVenki Rajagopalan
1325*b494511aSVenki Rajagopalan static void
eib_data_err_comp(eib_vnic_t * vnic,eib_wqe_t * wqe,ibt_wc_t * wc)1326*b494511aSVenki Rajagopalan eib_data_err_comp(eib_vnic_t *vnic, eib_wqe_t *wqe, ibt_wc_t *wc)
1327*b494511aSVenki Rajagopalan {
1328*b494511aSVenki Rajagopalan eib_t *ss = vnic->vn_ss;
1329*b494511aSVenki Rajagopalan
1330*b494511aSVenki Rajagopalan /*
1331*b494511aSVenki Rajagopalan * Currently, all we do is report
1332*b494511aSVenki Rajagopalan */
1333*b494511aSVenki Rajagopalan switch (wc->wc_status) {
1334*b494511aSVenki Rajagopalan case IBT_WC_WR_FLUSHED_ERR:
1335*b494511aSVenki Rajagopalan break;
1336*b494511aSVenki Rajagopalan
1337*b494511aSVenki Rajagopalan case IBT_WC_LOCAL_CHAN_OP_ERR:
1338*b494511aSVenki Rajagopalan EIB_DPRINTF_ERR(ss->ei_instance, "eib_data_err_comp: "
1339*b494511aSVenki Rajagopalan "IBT_WC_LOCAL_CHAN_OP_ERR seen, wqe_info=0x%lx ",
1340*b494511aSVenki Rajagopalan wqe->qe_info);
1341*b494511aSVenki Rajagopalan break;
1342*b494511aSVenki Rajagopalan
1343*b494511aSVenki Rajagopalan case IBT_WC_LOCAL_PROTECT_ERR:
1344*b494511aSVenki Rajagopalan EIB_DPRINTF_ERR(ss->ei_instance, "eib_data_err_comp: "
1345*b494511aSVenki Rajagopalan "IBT_WC_LOCAL_PROTECT_ERR seen, wqe_info=0x%lx ",
1346*b494511aSVenki Rajagopalan wqe->qe_info);
1347*b494511aSVenki Rajagopalan break;
1348*b494511aSVenki Rajagopalan }
1349*b494511aSVenki Rajagopalan
1350*b494511aSVenki Rajagopalan /*
1351*b494511aSVenki Rajagopalan * When a wc indicates error, we do not attempt to repost the
1352*b494511aSVenki Rajagopalan * rwqe but simply return it to the wqe pool. Also for rwqes,
1353*b494511aSVenki Rajagopalan * attempting to free the mblk in the wqe invokes the
1354*b494511aSVenki Rajagopalan * eib_data_rx_recycle() callback. For tx wqes, error handling
1355*b494511aSVenki Rajagopalan * is the same as successful completion handling. We still
1356*b494511aSVenki Rajagopalan * have to unmap iov/free lsobufs/free mblk and then return the
1357*b494511aSVenki Rajagopalan * swqe to the pool.
1358*b494511aSVenki Rajagopalan */
1359*b494511aSVenki Rajagopalan if (EIB_WQE_TYPE(wqe->qe_info) == EIB_WQE_RX) {
1360*b494511aSVenki Rajagopalan ASSERT(wqe->qe_mp != NULL);
1361*b494511aSVenki Rajagopalan freemsg(wqe->qe_mp);
1362*b494511aSVenki Rajagopalan } else {
1363*b494511aSVenki Rajagopalan eib_data_tx_comp(vnic, wqe, vnic->vn_data_chan);
1364*b494511aSVenki Rajagopalan }
1365*b494511aSVenki Rajagopalan }
1366*b494511aSVenki Rajagopalan
1367*b494511aSVenki Rajagopalan /*ARGSUSED*/
1368*b494511aSVenki Rajagopalan static void
eib_rb_data_setup_cqs(eib_t * ss,eib_vnic_t * vnic)1369*b494511aSVenki Rajagopalan eib_rb_data_setup_cqs(eib_t *ss, eib_vnic_t *vnic)
1370*b494511aSVenki Rajagopalan {
1371*b494511aSVenki Rajagopalan eib_chan_t *chan = vnic->vn_data_chan;
1372*b494511aSVenki Rajagopalan ibt_status_t ret;
1373*b494511aSVenki Rajagopalan
1374*b494511aSVenki Rajagopalan if (chan == NULL)
1375*b494511aSVenki Rajagopalan return;
1376*b494511aSVenki Rajagopalan
1377*b494511aSVenki Rajagopalan /*
1378*b494511aSVenki Rajagopalan * Reset any completion handlers we may have set up
1379*b494511aSVenki Rajagopalan */
1380*b494511aSVenki Rajagopalan if (chan->ch_rcv_cq_hdl) {
1381*b494511aSVenki Rajagopalan ibt_set_cq_handler(chan->ch_rcv_cq_hdl, NULL, NULL);
1382*b494511aSVenki Rajagopalan }
1383*b494511aSVenki Rajagopalan if (chan->ch_cq_hdl) {
1384*b494511aSVenki Rajagopalan ibt_set_cq_handler(chan->ch_cq_hdl, NULL, NULL);
1385*b494511aSVenki Rajagopalan }
1386*b494511aSVenki Rajagopalan
1387*b494511aSVenki Rajagopalan /*
1388*b494511aSVenki Rajagopalan * Remove any softints that were added
1389*b494511aSVenki Rajagopalan */
1390*b494511aSVenki Rajagopalan if (vnic->vn_data_rx_si_hdl) {
1391*b494511aSVenki Rajagopalan (void) ddi_intr_remove_softint(vnic->vn_data_rx_si_hdl);
1392*b494511aSVenki Rajagopalan vnic->vn_data_rx_si_hdl = NULL;
1393*b494511aSVenki Rajagopalan }
1394*b494511aSVenki Rajagopalan if (vnic->vn_data_tx_si_hdl) {
1395*b494511aSVenki Rajagopalan (void) ddi_intr_remove_softint(vnic->vn_data_tx_si_hdl);
1396*b494511aSVenki Rajagopalan vnic->vn_data_tx_si_hdl = NULL;
1397*b494511aSVenki Rajagopalan }
1398*b494511aSVenki Rajagopalan
1399*b494511aSVenki Rajagopalan /*
1400*b494511aSVenki Rajagopalan * Release any work completion buffers we may have allocated
1401*b494511aSVenki Rajagopalan */
1402*b494511aSVenki Rajagopalan if (chan->ch_rcv_wc && chan->ch_rcv_cq_sz) {
1403*b494511aSVenki Rajagopalan kmem_free(chan->ch_rcv_wc,
1404*b494511aSVenki Rajagopalan sizeof (ibt_wc_t) * chan->ch_rcv_cq_sz);
1405*b494511aSVenki Rajagopalan }
1406*b494511aSVenki Rajagopalan chan->ch_rcv_cq_sz = 0;
1407*b494511aSVenki Rajagopalan chan->ch_rcv_wc = NULL;
1408*b494511aSVenki Rajagopalan
1409*b494511aSVenki Rajagopalan if (chan->ch_wc && chan->ch_cq_sz) {
1410*b494511aSVenki Rajagopalan kmem_free(chan->ch_wc, sizeof (ibt_wc_t) * chan->ch_cq_sz);
1411*b494511aSVenki Rajagopalan }
1412*b494511aSVenki Rajagopalan chan->ch_cq_sz = 0;
1413*b494511aSVenki Rajagopalan chan->ch_wc = NULL;
1414*b494511aSVenki Rajagopalan
1415*b494511aSVenki Rajagopalan /*
1416*b494511aSVenki Rajagopalan * Free any completion queues we may have allocated
1417*b494511aSVenki Rajagopalan */
1418*b494511aSVenki Rajagopalan if (chan->ch_rcv_cq_hdl) {
1419*b494511aSVenki Rajagopalan ret = ibt_free_cq(chan->ch_rcv_cq_hdl);
1420*b494511aSVenki Rajagopalan if (ret != IBT_SUCCESS) {
1421*b494511aSVenki Rajagopalan EIB_DPRINTF_WARN(ss->ei_instance,
1422*b494511aSVenki Rajagopalan "eib_rb_data_setup_cqs: "
1423*b494511aSVenki Rajagopalan "ibt_free_cq(rcv_cq) failed, ret=%d", ret);
1424*b494511aSVenki Rajagopalan }
1425*b494511aSVenki Rajagopalan chan->ch_rcv_cq_hdl = NULL;
1426*b494511aSVenki Rajagopalan }
1427*b494511aSVenki Rajagopalan if (chan->ch_cq_hdl) {
1428*b494511aSVenki Rajagopalan ret = ibt_free_cq(chan->ch_cq_hdl);
1429*b494511aSVenki Rajagopalan if (ret != IBT_SUCCESS) {
1430*b494511aSVenki Rajagopalan EIB_DPRINTF_WARN(ss->ei_instance,
1431*b494511aSVenki Rajagopalan "eib_rb_data_setup_cqs: "
1432*b494511aSVenki Rajagopalan "ibt_free_cq(snd_cq) failed, ret=%d", ret);
1433*b494511aSVenki Rajagopalan }
1434*b494511aSVenki Rajagopalan chan->ch_cq_hdl = NULL;
1435*b494511aSVenki Rajagopalan }
1436*b494511aSVenki Rajagopalan }
1437*b494511aSVenki Rajagopalan
1438*b494511aSVenki Rajagopalan /*ARGSUSED*/
1439*b494511aSVenki Rajagopalan static void
eib_rb_data_setup_ud_channel(eib_t * ss,eib_vnic_t * vnic)1440*b494511aSVenki Rajagopalan eib_rb_data_setup_ud_channel(eib_t *ss, eib_vnic_t *vnic)
1441*b494511aSVenki Rajagopalan {
1442*b494511aSVenki Rajagopalan eib_chan_t *chan = vnic->vn_data_chan;
1443*b494511aSVenki Rajagopalan ibt_status_t ret;
1444*b494511aSVenki Rajagopalan
1445*b494511aSVenki Rajagopalan if (chan == NULL)
1446*b494511aSVenki Rajagopalan return;
1447*b494511aSVenki Rajagopalan
1448*b494511aSVenki Rajagopalan if (chan->ch_chan) {
1449*b494511aSVenki Rajagopalan /*
1450*b494511aSVenki Rajagopalan * We're trying to tear down this UD channel. Make sure that
1451*b494511aSVenki Rajagopalan * we don't attempt to refill (repost) at any point from now on.
1452*b494511aSVenki Rajagopalan */
1453*b494511aSVenki Rajagopalan chan->ch_tear_down = B_TRUE;
1454*b494511aSVenki Rajagopalan if ((ret = ibt_flush_channel(chan->ch_chan)) != IBT_SUCCESS) {
1455*b494511aSVenki Rajagopalan EIB_DPRINTF_WARN(ss->ei_instance,
1456*b494511aSVenki Rajagopalan "eib_rb_data_setup_ud_channel: "
1457*b494511aSVenki Rajagopalan "ibt_flush_channel() failed, ret=%d", ret);
1458*b494511aSVenki Rajagopalan }
1459*b494511aSVenki Rajagopalan
1460*b494511aSVenki Rajagopalan /*
1461*b494511aSVenki Rajagopalan * Wait until all posted tx wqes on this channel are back with
1462*b494511aSVenki Rajagopalan * the wqe pool.
1463*b494511aSVenki Rajagopalan */
1464*b494511aSVenki Rajagopalan mutex_enter(&chan->ch_tx_lock);
1465*b494511aSVenki Rajagopalan while (chan->ch_tx_posted > 0)
1466*b494511aSVenki Rajagopalan cv_wait(&chan->ch_tx_cv, &chan->ch_tx_lock);
1467*b494511aSVenki Rajagopalan mutex_exit(&chan->ch_tx_lock);
1468*b494511aSVenki Rajagopalan
1469*b494511aSVenki Rajagopalan /*
1470*b494511aSVenki Rajagopalan * Wait until all posted rx wqes on this channel are back with
1471*b494511aSVenki Rajagopalan * the wqe pool.
1472*b494511aSVenki Rajagopalan */
1473*b494511aSVenki Rajagopalan mutex_enter(&chan->ch_rx_lock);
1474*b494511aSVenki Rajagopalan while (chan->ch_rx_posted > 0)
1475*b494511aSVenki Rajagopalan cv_wait(&chan->ch_rx_cv, &chan->ch_rx_lock);
1476*b494511aSVenki Rajagopalan mutex_exit(&chan->ch_rx_lock);
1477*b494511aSVenki Rajagopalan
1478*b494511aSVenki Rajagopalan /*
1479*b494511aSVenki Rajagopalan * Now we're ready to free this channel
1480*b494511aSVenki Rajagopalan */
1481*b494511aSVenki Rajagopalan if ((ret = ibt_free_channel(chan->ch_chan)) != IBT_SUCCESS) {
1482*b494511aSVenki Rajagopalan EIB_DPRINTF_WARN(ss->ei_instance,
1483*b494511aSVenki Rajagopalan "eib_rb_data_setup_ud_channel: "
1484*b494511aSVenki Rajagopalan "ibt_free_channel() failed, ret=%d", ret);
1485*b494511aSVenki Rajagopalan }
1486*b494511aSVenki Rajagopalan
1487*b494511aSVenki Rajagopalan chan->ch_alloc_mp = B_FALSE;
1488*b494511aSVenki Rajagopalan chan->ch_ip_hdr_align = 0;
1489*b494511aSVenki Rajagopalan chan->ch_rwqe_bktsz = 0;
1490*b494511aSVenki Rajagopalan chan->ch_lwm_rwqes = 0;
1491*b494511aSVenki Rajagopalan chan->ch_max_rwqes = 0;
1492*b494511aSVenki Rajagopalan chan->ch_max_swqes = 0;
1493*b494511aSVenki Rajagopalan chan->ch_qpn = 0;
1494*b494511aSVenki Rajagopalan chan->ch_chan = NULL;
1495*b494511aSVenki Rajagopalan }
1496*b494511aSVenki Rajagopalan }
1497