1*b494511aSVenki Rajagopalan /*
2*b494511aSVenki Rajagopalan * CDDL HEADER START
3*b494511aSVenki Rajagopalan *
4*b494511aSVenki Rajagopalan * The contents of this file are subject to the terms of the
5*b494511aSVenki Rajagopalan * Common Development and Distribution License (the "License").
6*b494511aSVenki Rajagopalan * You may not use this file except in compliance with the License.
7*b494511aSVenki Rajagopalan *
8*b494511aSVenki Rajagopalan * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9*b494511aSVenki Rajagopalan * or http://www.opensolaris.org/os/licensing.
10*b494511aSVenki Rajagopalan * See the License for the specific language governing permissions
11*b494511aSVenki Rajagopalan * and limitations under the License.
12*b494511aSVenki Rajagopalan *
13*b494511aSVenki Rajagopalan * When distributing Covered Code, include this CDDL HEADER in each
14*b494511aSVenki Rajagopalan * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15*b494511aSVenki Rajagopalan * If applicable, add the following below this CDDL HEADER, with the
16*b494511aSVenki Rajagopalan * fields enclosed by brackets "[]" replaced with your own identifying
17*b494511aSVenki Rajagopalan * information: Portions Copyright [yyyy] [name of copyright owner]
18*b494511aSVenki Rajagopalan *
19*b494511aSVenki Rajagopalan * CDDL HEADER END
20*b494511aSVenki Rajagopalan */
21*b494511aSVenki Rajagopalan
22*b494511aSVenki Rajagopalan /*
23*b494511aSVenki Rajagopalan * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
24*b494511aSVenki Rajagopalan */
25*b494511aSVenki Rajagopalan
26*b494511aSVenki Rajagopalan #include <sys/types.h>
27*b494511aSVenki Rajagopalan #include <sys/ddi.h>
28*b494511aSVenki Rajagopalan #include <sys/sunddi.h>
29*b494511aSVenki Rajagopalan #include <sys/ksynch.h>
30*b494511aSVenki Rajagopalan #include <sys/byteorder.h>
31*b494511aSVenki Rajagopalan
32*b494511aSVenki Rajagopalan #include <sys/ib/clients/eoib/enx_impl.h>
33*b494511aSVenki Rajagopalan
34*b494511aSVenki Rajagopalan const char fip_vendor_mellanox[] = {
35*b494511aSVenki Rajagopalan 0x4d, 0x65, 0x6c, 0x6c, 0x61, 0x6e, 0x6f, 0x78
36*b494511aSVenki Rajagopalan };
37*b494511aSVenki Rajagopalan
38*b494511aSVenki Rajagopalan /*
39*b494511aSVenki Rajagopalan * HW/FW workaround
40*b494511aSVenki Rajagopalan *
41*b494511aSVenki Rajagopalan * Verification of descriptor list length in the received packets is
42*b494511aSVenki Rajagopalan * disabled, since experimentation shows that BX does not set the desc
43*b494511aSVenki Rajagopalan * list length correctly.
44*b494511aSVenki Rajagopalan */
45*b494511aSVenki Rajagopalan int enx_wa_no_desc_list_len = 1;
46*b494511aSVenki Rajagopalan
47*b494511aSVenki Rajagopalan /*
48*b494511aSVenki Rajagopalan * Static function declarations
49*b494511aSVenki Rajagopalan */
50*b494511aSVenki Rajagopalan static int eibnx_fip_make_solicit_pkt(eibnx_thr_info_t *, eibnx_wqe_t *);
51*b494511aSVenki Rajagopalan static int eibnx_fip_send_solicit_pkt(eibnx_thr_info_t *, eibnx_wqe_t *,
52*b494511aSVenki Rajagopalan eibnx_gw_addr_t *);
53*b494511aSVenki Rajagopalan static int eibnx_fip_parse_advt_pkt(uint8_t *, eibnx_gw_msg_t *);
54*b494511aSVenki Rajagopalan static void eibnx_rb_fip_make_solicit_pkt(eibnx_wqe_t *);
55*b494511aSVenki Rajagopalan
56*b494511aSVenki Rajagopalan /*
57*b494511aSVenki Rajagopalan * Prepare and send a solicit multicast packet to the All-EoIB-GWs-GID
58*b494511aSVenki Rajagopalan */
59*b494511aSVenki Rajagopalan int
eibnx_fip_solicit_mcast(eibnx_thr_info_t * info)60*b494511aSVenki Rajagopalan eibnx_fip_solicit_mcast(eibnx_thr_info_t *info)
61*b494511aSVenki Rajagopalan {
62*b494511aSVenki Rajagopalan eibnx_wqe_t *swqe;
63*b494511aSVenki Rajagopalan int ret;
64*b494511aSVenki Rajagopalan
65*b494511aSVenki Rajagopalan if ((swqe = eibnx_acquire_swqe(info, KM_SLEEP)) == NULL)
66*b494511aSVenki Rajagopalan return (ENX_E_FAILURE);
67*b494511aSVenki Rajagopalan
68*b494511aSVenki Rajagopalan ret = eibnx_fip_make_solicit_pkt(info, swqe);
69*b494511aSVenki Rajagopalan if (ret != ENX_E_SUCCESS) {
70*b494511aSVenki Rajagopalan eibnx_release_swqe(swqe);
71*b494511aSVenki Rajagopalan return (ENX_E_FAILURE);
72*b494511aSVenki Rajagopalan }
73*b494511aSVenki Rajagopalan
74*b494511aSVenki Rajagopalan ret = eibnx_fip_send_solicit_pkt(info, swqe, NULL);
75*b494511aSVenki Rajagopalan if (ret != ENX_E_SUCCESS) {
76*b494511aSVenki Rajagopalan eibnx_rb_fip_make_solicit_pkt(swqe);
77*b494511aSVenki Rajagopalan eibnx_release_swqe(swqe);
78*b494511aSVenki Rajagopalan return (ENX_E_FAILURE);
79*b494511aSVenki Rajagopalan }
80*b494511aSVenki Rajagopalan
81*b494511aSVenki Rajagopalan return (ENX_E_SUCCESS);
82*b494511aSVenki Rajagopalan }
83*b494511aSVenki Rajagopalan
84*b494511aSVenki Rajagopalan /*
85*b494511aSVenki Rajagopalan * Go through the list of already discovered gateways and send
86*b494511aSVenki Rajagopalan * a unicast solicitation to each gateway. This is required by
87*b494511aSVenki Rajagopalan * the EoIB specification ostensibly to receive updated
88*b494511aSVenki Rajagopalan * advertisements.
89*b494511aSVenki Rajagopalan */
90*b494511aSVenki Rajagopalan int
eibnx_fip_solicit_ucast(eibnx_thr_info_t * info,clock_t * solicit_period_ticks)91*b494511aSVenki Rajagopalan eibnx_fip_solicit_ucast(eibnx_thr_info_t *info, clock_t *solicit_period_ticks)
92*b494511aSVenki Rajagopalan {
93*b494511aSVenki Rajagopalan eibnx_gw_info_t *gw;
94*b494511aSVenki Rajagopalan eibnx_wqe_t *swqe;
95*b494511aSVenki Rajagopalan clock_t min_solicit_period_msec;
96*b494511aSVenki Rajagopalan int ret;
97*b494511aSVenki Rajagopalan
98*b494511aSVenki Rajagopalan /*
99*b494511aSVenki Rajagopalan * We want to read the gwlist and send a unicast to each
100*b494511aSVenki Rajagopalan * destination. Now, the only places where the gw list pointers
101*b494511aSVenki Rajagopalan * are updated are when we're adding a new gw item to the list
102*b494511aSVenki Rajagopalan * and when the list is being torn down and freed.
103*b494511aSVenki Rajagopalan *
104*b494511aSVenki Rajagopalan * Since new GWs are always inserted at the head of the list,
105*b494511aSVenki Rajagopalan * we're guaranteed that any tail subchain of the list will
106*b494511aSVenki Rajagopalan * not change by the addition of a new gw item coming into
107*b494511aSVenki Rajagopalan * the list.
108*b494511aSVenki Rajagopalan *
109*b494511aSVenki Rajagopalan * Also, since the gw list is torn down only by the port-monitor
110*b494511aSVenki Rajagopalan * thread (i.e. ourselves), we are also protected against the
111*b494511aSVenki Rajagopalan * list itself going away while we're here.
112*b494511aSVenki Rajagopalan *
113*b494511aSVenki Rajagopalan * Given these two constraints, we can safely read the list
114*b494511aSVenki Rajagopalan * of gateways without the gw list lock in this routine.
115*b494511aSVenki Rajagopalan */
116*b494511aSVenki Rajagopalan min_solicit_period_msec = drv_hztousec(*solicit_period_ticks) / 1000;
117*b494511aSVenki Rajagopalan for (gw = info->ti_gw; gw; gw = gw->gw_next) {
118*b494511aSVenki Rajagopalan
119*b494511aSVenki Rajagopalan if (eibnx_is_gw_dead(gw))
120*b494511aSVenki Rajagopalan continue;
121*b494511aSVenki Rajagopalan
122*b494511aSVenki Rajagopalan swqe = gw->gw_swqe;
123*b494511aSVenki Rajagopalan ASSERT(swqe != NULL);
124*b494511aSVenki Rajagopalan
125*b494511aSVenki Rajagopalan mutex_enter(&swqe->qe_lock);
126*b494511aSVenki Rajagopalan if (swqe->qe_type != ENX_QETYP_SWQE) {
127*b494511aSVenki Rajagopalan ENX_DPRINTF_DEBUG("eibnx_fip_solicit_ucast: "
128*b494511aSVenki Rajagopalan "gw wqe type (0x%lx) indicates this is not an "
129*b494511aSVenki Rajagopalan "swqe!, cannot send solicitation to gw",
130*b494511aSVenki Rajagopalan swqe->qe_type);
131*b494511aSVenki Rajagopalan mutex_exit(&swqe->qe_lock);
132*b494511aSVenki Rajagopalan continue;
133*b494511aSVenki Rajagopalan } else if ((swqe->qe_flags & ENX_QEFL_INUSE) !=
134*b494511aSVenki Rajagopalan ENX_QEFL_INUSE) {
135*b494511aSVenki Rajagopalan ENX_DPRINTF_DEBUG("eibnx_fip_solicit_ucast: "
136*b494511aSVenki Rajagopalan "gw swqe flags (0x%lx) indicate swqe is free!, "
137*b494511aSVenki Rajagopalan "cannot send solicitation to gw", swqe->qe_flags);
138*b494511aSVenki Rajagopalan mutex_exit(&swqe->qe_lock);
139*b494511aSVenki Rajagopalan continue;
140*b494511aSVenki Rajagopalan } else if ((swqe->qe_flags & ENX_QEFL_POSTED) ==
141*b494511aSVenki Rajagopalan ENX_QEFL_POSTED) {
142*b494511aSVenki Rajagopalan ENX_DPRINTF_DEBUG("eibnx_fip_solicit_ucast: gw swqe "
143*b494511aSVenki Rajagopalan "flags (0x%lx) indicate swqe is still with HCA!, "
144*b494511aSVenki Rajagopalan "cannot send solicitation to gw", swqe->qe_flags);
145*b494511aSVenki Rajagopalan mutex_exit(&swqe->qe_lock);
146*b494511aSVenki Rajagopalan continue;
147*b494511aSVenki Rajagopalan }
148*b494511aSVenki Rajagopalan mutex_exit(&swqe->qe_lock);
149*b494511aSVenki Rajagopalan
150*b494511aSVenki Rajagopalan /*
151*b494511aSVenki Rajagopalan * EoIB spec requires that each host send solicitation
152*b494511aSVenki Rajagopalan * to discovered gateways atleast every 4 * GW_ADV_PERIOD.
153*b494511aSVenki Rajagopalan * We make sure we send a solicitation to all gateways
154*b494511aSVenki Rajagopalan * every 4 * GW_ADV_PERIOD of the smallest value of
155*b494511aSVenki Rajagopalan * GW_ADV_PERIOD that we have in our gw list.
156*b494511aSVenki Rajagopalan */
157*b494511aSVenki Rajagopalan if ((gw->gw_adv_period * 4) < min_solicit_period_msec)
158*b494511aSVenki Rajagopalan min_solicit_period_msec = gw->gw_adv_period * 4;
159*b494511aSVenki Rajagopalan
160*b494511aSVenki Rajagopalan ret = eibnx_fip_make_solicit_pkt(info, swqe);
161*b494511aSVenki Rajagopalan if (ret != ENX_E_SUCCESS)
162*b494511aSVenki Rajagopalan continue;
163*b494511aSVenki Rajagopalan
164*b494511aSVenki Rajagopalan ret = eibnx_fip_send_solicit_pkt(info, swqe, &gw->gw_addr);
165*b494511aSVenki Rajagopalan if (ret != ENX_E_SUCCESS)
166*b494511aSVenki Rajagopalan eibnx_rb_fip_make_solicit_pkt(swqe);
167*b494511aSVenki Rajagopalan }
168*b494511aSVenki Rajagopalan
169*b494511aSVenki Rajagopalan *solicit_period_ticks = drv_usectohz(min_solicit_period_msec * 1000);
170*b494511aSVenki Rajagopalan
171*b494511aSVenki Rajagopalan return (ENX_E_SUCCESS);
172*b494511aSVenki Rajagopalan }
173*b494511aSVenki Rajagopalan
174*b494511aSVenki Rajagopalan /*
175*b494511aSVenki Rajagopalan * Given a send wqe and an eibnx_thr_info_t pointer, fill in the
176*b494511aSVenki Rajagopalan * send buffer with a solicit packet in the network byte order.
177*b494511aSVenki Rajagopalan */
178*b494511aSVenki Rajagopalan static int
eibnx_fip_make_solicit_pkt(eibnx_thr_info_t * info,eibnx_wqe_t * swqe)179*b494511aSVenki Rajagopalan eibnx_fip_make_solicit_pkt(eibnx_thr_info_t *info, eibnx_wqe_t *swqe)
180*b494511aSVenki Rajagopalan {
181*b494511aSVenki Rajagopalan fip_solicit_t *solicit;
182*b494511aSVenki Rajagopalan fip_proto_t *proto;
183*b494511aSVenki Rajagopalan fip_basic_hdr_t *hdr;
184*b494511aSVenki Rajagopalan fip_desc_iba_t *iba;
185*b494511aSVenki Rajagopalan ib_gid_t port_gid;
186*b494511aSVenki Rajagopalan ib_guid_t port_guid;
187*b494511aSVenki Rajagopalan
188*b494511aSVenki Rajagopalan uint8_t *pkt = (uint8_t *)(uintptr_t)(swqe->qe_sgl.ds_va);
189*b494511aSVenki Rajagopalan uint_t pktsz = swqe->qe_sgl.ds_len;
190*b494511aSVenki Rajagopalan uint_t solicit_sz = sizeof (fip_solicit_t);
191*b494511aSVenki Rajagopalan
192*b494511aSVenki Rajagopalan if (pktsz < solicit_sz) {
193*b494511aSVenki Rajagopalan ENX_DPRINTF_ERR("swqe bufsize too small for pkt, "
194*b494511aSVenki Rajagopalan "pktsz=%x < expsz=%x", pktsz, solicit_sz);
195*b494511aSVenki Rajagopalan return (ENX_E_FAILURE);
196*b494511aSVenki Rajagopalan }
197*b494511aSVenki Rajagopalan
198*b494511aSVenki Rajagopalan /*
199*b494511aSVenki Rajagopalan * Lint complains that there may be an alignment issue here,
200*b494511aSVenki Rajagopalan * but we know that the "pkt" is atleast double-word aligned,
201*b494511aSVenki Rajagopalan * so it's ok.
202*b494511aSVenki Rajagopalan */
203*b494511aSVenki Rajagopalan solicit = (fip_solicit_t *)pkt;
204*b494511aSVenki Rajagopalan
205*b494511aSVenki Rajagopalan /*
206*b494511aSVenki Rajagopalan * Fill in the FIP protocol version
207*b494511aSVenki Rajagopalan */
208*b494511aSVenki Rajagopalan proto = &solicit->sl_proto_version;
209*b494511aSVenki Rajagopalan proto->pr_version = FIP_PROTO_VERSION;
210*b494511aSVenki Rajagopalan
211*b494511aSVenki Rajagopalan /*
212*b494511aSVenki Rajagopalan * Fill in the basic header
213*b494511aSVenki Rajagopalan */
214*b494511aSVenki Rajagopalan hdr = &solicit->sl_fip_hdr;
215*b494511aSVenki Rajagopalan hdr->hd_opcode = htons(FIP_OPCODE_EOIB);
216*b494511aSVenki Rajagopalan hdr->hd_subcode = FIP_SUBCODE_H_SOLICIT;
217*b494511aSVenki Rajagopalan hdr->hd_desc_list_len = htons((solicit_sz >> 2) - 2);
218*b494511aSVenki Rajagopalan hdr->hd_flags = 0;
219*b494511aSVenki Rajagopalan hdr->hd_type = FIP_DESC_TYPE_VENDOR_ID;
220*b494511aSVenki Rajagopalan hdr->hd_len = FIP_DESC_LEN_VENDOR_ID;
221*b494511aSVenki Rajagopalan bcopy(fip_vendor_mellanox, hdr->hd_vendor_id, FIP_VENDOR_LEN);
222*b494511aSVenki Rajagopalan
223*b494511aSVenki Rajagopalan /*
224*b494511aSVenki Rajagopalan * Fill in the Infiniband Address descriptor
225*b494511aSVenki Rajagopalan */
226*b494511aSVenki Rajagopalan iba = &solicit->sl_iba;
227*b494511aSVenki Rajagopalan iba->ia_type = FIP_DESC_TYPE_IBA;
228*b494511aSVenki Rajagopalan iba->ia_len = FIP_DESC_LEN_IBA;
229*b494511aSVenki Rajagopalan bcopy(fip_vendor_mellanox, iba->ia_vendor_id, FIP_VENDOR_LEN);
230*b494511aSVenki Rajagopalan iba->ia_qpn = htonl(info->ti_qpn);
231*b494511aSVenki Rajagopalan iba->ia_sl_portid = 0;
232*b494511aSVenki Rajagopalan iba->ia_lid = htons(info->ti_pi->p_base_lid);
233*b494511aSVenki Rajagopalan port_gid = info->ti_pi->p_sgid_tbl[0];
234*b494511aSVenki Rajagopalan port_guid = htonll(port_gid.gid_guid);
235*b494511aSVenki Rajagopalan bcopy(&port_guid, iba->ia_guid, FIP_GUID_LEN);
236*b494511aSVenki Rajagopalan
237*b494511aSVenki Rajagopalan /*
238*b494511aSVenki Rajagopalan * Adjust the ds_len in the sgl to indicate the size of the
239*b494511aSVenki Rajagopalan * solicit pkt before returning
240*b494511aSVenki Rajagopalan */
241*b494511aSVenki Rajagopalan swqe->qe_sgl.ds_len = solicit_sz;
242*b494511aSVenki Rajagopalan
243*b494511aSVenki Rajagopalan return (ENX_E_SUCCESS);
244*b494511aSVenki Rajagopalan }
245*b494511aSVenki Rajagopalan
246*b494511aSVenki Rajagopalan static int
eibnx_setup_ud_dest(eibnx_thr_info_t * info,eibnx_wqe_t * swqe,eibnx_gw_addr_t * gw_addr)247*b494511aSVenki Rajagopalan eibnx_setup_ud_dest(eibnx_thr_info_t *info, eibnx_wqe_t *swqe,
248*b494511aSVenki Rajagopalan eibnx_gw_addr_t *gw_addr)
249*b494511aSVenki Rajagopalan {
250*b494511aSVenki Rajagopalan eibnx_t *ss = enx_global_ss;
251*b494511aSVenki Rajagopalan ibt_path_attr_t attr;
252*b494511aSVenki Rajagopalan ibt_path_info_t path;
253*b494511aSVenki Rajagopalan ibt_status_t ret;
254*b494511aSVenki Rajagopalan
255*b494511aSVenki Rajagopalan /*
256*b494511aSVenki Rajagopalan * If this a multicast send, we'll have the gateway address NULL,
257*b494511aSVenki Rajagopalan * and we'll need to modify the UD destination to send to the
258*b494511aSVenki Rajagopalan * solicit mcg.
259*b494511aSVenki Rajagopalan */
260*b494511aSVenki Rajagopalan if (gw_addr == NULL) {
261*b494511aSVenki Rajagopalan ret = ibt_modify_ud_dest(swqe->qe_wr.send.wr.ud.udwr_dest,
262*b494511aSVenki Rajagopalan info->ti_solicit_mcg->mc_qkey, IB_MC_QPN,
263*b494511aSVenki Rajagopalan &info->ti_solicit_mcg->mc_adds_vect);
264*b494511aSVenki Rajagopalan if (ret != IBT_SUCCESS) {
265*b494511aSVenki Rajagopalan ENX_DPRINTF_ERR("ibt_modify_ud_dest() failed with "
266*b494511aSVenki Rajagopalan "ret=%d, qkey=%x, qpn=%x", ret,
267*b494511aSVenki Rajagopalan info->ti_solicit_mcg->mc_qkey, IB_MC_QPN);
268*b494511aSVenki Rajagopalan return (ENX_E_FAILURE);
269*b494511aSVenki Rajagopalan }
270*b494511aSVenki Rajagopalan
271*b494511aSVenki Rajagopalan return (ENX_E_SUCCESS);
272*b494511aSVenki Rajagopalan }
273*b494511aSVenki Rajagopalan
274*b494511aSVenki Rajagopalan /*
275*b494511aSVenki Rajagopalan * If this is a unicast send, but we already have the gw address
276*b494511aSVenki Rajagopalan * vector, the ud destination handle has already been set up for
277*b494511aSVenki Rajagopalan * this gateway, so we can return.
278*b494511aSVenki Rajagopalan */
279*b494511aSVenki Rajagopalan if (gw_addr->ga_vect)
280*b494511aSVenki Rajagopalan return (ENX_E_SUCCESS);
281*b494511aSVenki Rajagopalan
282*b494511aSVenki Rajagopalan /*
283*b494511aSVenki Rajagopalan * Get the reversible path information for this gateway
284*b494511aSVenki Rajagopalan */
285*b494511aSVenki Rajagopalan bzero(&attr, sizeof (ibt_path_info_t));
286*b494511aSVenki Rajagopalan attr.pa_dgids = &gw_addr->ga_gid;
287*b494511aSVenki Rajagopalan attr.pa_num_dgids = 1;
288*b494511aSVenki Rajagopalan attr.pa_sgid = info->ti_pi->p_sgid_tbl[0];
289*b494511aSVenki Rajagopalan attr.pa_pkey = gw_addr->ga_pkey;
290*b494511aSVenki Rajagopalan
291*b494511aSVenki Rajagopalan bzero(&path, sizeof (ibt_path_info_t));
292*b494511aSVenki Rajagopalan ret = ibt_get_paths(ss->nx_ibt_hdl, IBT_PATH_PKEY,
293*b494511aSVenki Rajagopalan &attr, 1, &path, NULL);
294*b494511aSVenki Rajagopalan if ((ret != IBT_SUCCESS) || (path.pi_hca_guid == 0)) {
295*b494511aSVenki Rajagopalan ENX_DPRINTF_ERR("ibt_get_paths() failed with "
296*b494511aSVenki Rajagopalan "ret=%d, gid_prefix=%llx, gid_guid=%llx", ret,
297*b494511aSVenki Rajagopalan gw_addr->ga_gid.gid_prefix, gw_addr->ga_gid.gid_guid);
298*b494511aSVenki Rajagopalan return (ENX_E_FAILURE);
299*b494511aSVenki Rajagopalan }
300*b494511aSVenki Rajagopalan
301*b494511aSVenki Rajagopalan /*
302*b494511aSVenki Rajagopalan * And save the address vector
303*b494511aSVenki Rajagopalan */
304*b494511aSVenki Rajagopalan gw_addr->ga_vect = kmem_zalloc(sizeof (ibt_adds_vect_t), KM_SLEEP);
305*b494511aSVenki Rajagopalan bcopy(&path.pi_prim_cep_path.cep_adds_vect, gw_addr->ga_vect,
306*b494511aSVenki Rajagopalan sizeof (ibt_adds_vect_t));
307*b494511aSVenki Rajagopalan
308*b494511aSVenki Rajagopalan /*
309*b494511aSVenki Rajagopalan * Modify the UD destination handle on this swqe entry to address
310*b494511aSVenki Rajagopalan * this gateway
311*b494511aSVenki Rajagopalan */
312*b494511aSVenki Rajagopalan ret = ibt_modify_ud_dest(swqe->qe_wr.send.wr.ud.udwr_dest,
313*b494511aSVenki Rajagopalan gw_addr->ga_qkey, gw_addr->ga_qpn, gw_addr->ga_vect);
314*b494511aSVenki Rajagopalan if (ret != IBT_SUCCESS) {
315*b494511aSVenki Rajagopalan ENX_DPRINTF_ERR("ibt_modify_ud_dest() failed with "
316*b494511aSVenki Rajagopalan "ret=%d, qkey=%x, qpn=%x", ret, gw_addr->ga_qkey,
317*b494511aSVenki Rajagopalan gw_addr->ga_qpn);
318*b494511aSVenki Rajagopalan kmem_free(gw_addr->ga_vect, sizeof (ibt_adds_vect_t));
319*b494511aSVenki Rajagopalan gw_addr->ga_vect = NULL;
320*b494511aSVenki Rajagopalan return (ENX_E_FAILURE);
321*b494511aSVenki Rajagopalan }
322*b494511aSVenki Rajagopalan
323*b494511aSVenki Rajagopalan return (ENX_E_SUCCESS);
324*b494511aSVenki Rajagopalan }
325*b494511aSVenki Rajagopalan
326*b494511aSVenki Rajagopalan /*
327*b494511aSVenki Rajagopalan * Send a solicit packet to the appropriate destination: if the
328*b494511aSVenki Rajagopalan * destination gw addr is specified, send a unicast message to it;
329*b494511aSVenki Rajagopalan * if not, send a multicast using the solicit mcg address.
330*b494511aSVenki Rajagopalan */
331*b494511aSVenki Rajagopalan static int
eibnx_fip_send_solicit_pkt(eibnx_thr_info_t * info,eibnx_wqe_t * swqe,eibnx_gw_addr_t * gw_addr)332*b494511aSVenki Rajagopalan eibnx_fip_send_solicit_pkt(eibnx_thr_info_t *info, eibnx_wqe_t *swqe,
333*b494511aSVenki Rajagopalan eibnx_gw_addr_t *gw_addr)
334*b494511aSVenki Rajagopalan {
335*b494511aSVenki Rajagopalan ibt_status_t ret;
336*b494511aSVenki Rajagopalan
337*b494511aSVenki Rajagopalan if (eibnx_setup_ud_dest(info, swqe, gw_addr) != ENX_E_SUCCESS)
338*b494511aSVenki Rajagopalan return (ENX_E_FAILURE);
339*b494511aSVenki Rajagopalan
340*b494511aSVenki Rajagopalan mutex_enter(&swqe->qe_lock);
341*b494511aSVenki Rajagopalan
342*b494511aSVenki Rajagopalan /*
343*b494511aSVenki Rajagopalan * Note that if the post send fails, we don't really need to undo
344*b494511aSVenki Rajagopalan * anything we did in setting up the ud destination; we can always
345*b494511aSVenki Rajagopalan * use it for the next time.
346*b494511aSVenki Rajagopalan */
347*b494511aSVenki Rajagopalan ret = ibt_post_send(info->ti_chan, &(swqe->qe_wr.send), 1, NULL);
348*b494511aSVenki Rajagopalan if (ret != IBT_SUCCESS) {
349*b494511aSVenki Rajagopalan mutex_exit(&swqe->qe_lock);
350*b494511aSVenki Rajagopalan ENX_DPRINTF_ERR("ibt_post_send() failed for solicit, "
351*b494511aSVenki Rajagopalan "ret=%d", ret);
352*b494511aSVenki Rajagopalan return (ENX_E_FAILURE);
353*b494511aSVenki Rajagopalan }
354*b494511aSVenki Rajagopalan
355*b494511aSVenki Rajagopalan /*
356*b494511aSVenki Rajagopalan * Set the 'posted' flag for the send wqe. If this is an unicast
357*b494511aSVenki Rajagopalan * send, the wqe is attached to a specific gw entry and we should
358*b494511aSVenki Rajagopalan * not release the wqe back to the pool on the send completion.
359*b494511aSVenki Rajagopalan */
360*b494511aSVenki Rajagopalan swqe->qe_flags |= ENX_QEFL_POSTED;
361*b494511aSVenki Rajagopalan if (gw_addr == NULL) {
362*b494511aSVenki Rajagopalan swqe->qe_flags |= ENX_QEFL_RELONCOMP;
363*b494511aSVenki Rajagopalan info->ti_mcast_done = 1;
364*b494511aSVenki Rajagopalan }
365*b494511aSVenki Rajagopalan
366*b494511aSVenki Rajagopalan mutex_exit(&swqe->qe_lock);
367*b494511aSVenki Rajagopalan
368*b494511aSVenki Rajagopalan return (ENX_E_SUCCESS);
369*b494511aSVenki Rajagopalan }
370*b494511aSVenki Rajagopalan
371*b494511aSVenki Rajagopalan /*
372*b494511aSVenki Rajagopalan * Parse a received packet from the gateway into the
373*b494511aSVenki Rajagopalan * eibnx_gw_msg_t argument. Note that at this point, this
374*b494511aSVenki Rajagopalan * driver only expects to receive advertisements from the
375*b494511aSVenki Rajagopalan * GW, nothing else.
376*b494511aSVenki Rajagopalan */
377*b494511aSVenki Rajagopalan int
eibnx_fip_parse_pkt(uint8_t * pkt,eibnx_gw_msg_t * msg)378*b494511aSVenki Rajagopalan eibnx_fip_parse_pkt(uint8_t *pkt, eibnx_gw_msg_t *msg)
379*b494511aSVenki Rajagopalan {
380*b494511aSVenki Rajagopalan fip_basic_hdr_t *hdr;
381*b494511aSVenki Rajagopalan uint16_t opcode;
382*b494511aSVenki Rajagopalan uint8_t subcode;
383*b494511aSVenki Rajagopalan int ret = ENX_E_FAILURE;
384*b494511aSVenki Rajagopalan
385*b494511aSVenki Rajagopalan /*
386*b494511aSVenki Rajagopalan * Lint complains about potential alignment problem here,
387*b494511aSVenki Rajagopalan * but the fip_* structures are all packed and each of them
388*b494511aSVenki Rajagopalan * is aligned on a word boundary, so we're ok.
389*b494511aSVenki Rajagopalan */
390*b494511aSVenki Rajagopalan hdr = (fip_basic_hdr_t *)(pkt + sizeof (fip_proto_t));
391*b494511aSVenki Rajagopalan
392*b494511aSVenki Rajagopalan /*
393*b494511aSVenki Rajagopalan * Verify that the opcode is EoIB
394*b494511aSVenki Rajagopalan */
395*b494511aSVenki Rajagopalan if ((opcode = ntohs(hdr->hd_opcode)) != FIP_OPCODE_EOIB) {
396*b494511aSVenki Rajagopalan ENX_DPRINTF_WARN("unsupported opcode (%x) found in "
397*b494511aSVenki Rajagopalan "gw advertisement, ignoring", opcode);
398*b494511aSVenki Rajagopalan return (ENX_E_FAILURE);
399*b494511aSVenki Rajagopalan }
400*b494511aSVenki Rajagopalan
401*b494511aSVenki Rajagopalan /*
402*b494511aSVenki Rajagopalan * We only handle GW advertisements in the eibnx driver code. However,
403*b494511aSVenki Rajagopalan * the BridgeX gateway software currently sends login acknowledgements
404*b494511aSVenki Rajagopalan * to the one who did the solicitation instead of the one who actually
405*b494511aSVenki Rajagopalan * made the login request, so we need to do something about this as
406*b494511aSVenki Rajagopalan * well.
407*b494511aSVenki Rajagopalan */
408*b494511aSVenki Rajagopalan subcode = hdr->hd_subcode;
409*b494511aSVenki Rajagopalan switch (subcode) {
410*b494511aSVenki Rajagopalan case FIP_SUBCODE_G_ADVERTISE:
411*b494511aSVenki Rajagopalan ret = eibnx_fip_parse_advt_pkt(pkt, msg);
412*b494511aSVenki Rajagopalan break;
413*b494511aSVenki Rajagopalan
414*b494511aSVenki Rajagopalan case FIP_SUBCODE_G_VNIC_LOGIN_ACK:
415*b494511aSVenki Rajagopalan msg->gm_type = FIP_VNIC_LOGIN_ACK;
416*b494511aSVenki Rajagopalan ret = ENX_E_SUCCESS;
417*b494511aSVenki Rajagopalan break;
418*b494511aSVenki Rajagopalan
419*b494511aSVenki Rajagopalan default:
420*b494511aSVenki Rajagopalan ENX_DPRINTF_WARN("unsupported subcode (%x) found in "
421*b494511aSVenki Rajagopalan "gw advertisement, ignoring", subcode);
422*b494511aSVenki Rajagopalan ret = ENX_E_FAILURE;
423*b494511aSVenki Rajagopalan break;
424*b494511aSVenki Rajagopalan }
425*b494511aSVenki Rajagopalan
426*b494511aSVenki Rajagopalan return (ret);
427*b494511aSVenki Rajagopalan }
428*b494511aSVenki Rajagopalan
429*b494511aSVenki Rajagopalan /*
430*b494511aSVenki Rajagopalan * Parse and validate a packet known to be an advertisement from
431*b494511aSVenki Rajagopalan * the GW.
432*b494511aSVenki Rajagopalan */
433*b494511aSVenki Rajagopalan static int
eibnx_fip_parse_advt_pkt(uint8_t * pkt,eibnx_gw_msg_t * msg)434*b494511aSVenki Rajagopalan eibnx_fip_parse_advt_pkt(uint8_t *pkt, eibnx_gw_msg_t *msg)
435*b494511aSVenki Rajagopalan {
436*b494511aSVenki Rajagopalan fip_advertise_t *advertise;
437*b494511aSVenki Rajagopalan fip_basic_hdr_t *hdr;
438*b494511aSVenki Rajagopalan fip_desc_iba_t *desc_iba;
439*b494511aSVenki Rajagopalan fip_desc_gwinfo_t *desc_gwinfo;
440*b494511aSVenki Rajagopalan fip_desc_gwid_t *desc_gwid;
441*b494511aSVenki Rajagopalan fip_desc_keepalive_t *desc_ka;
442*b494511aSVenki Rajagopalan eibnx_gw_info_t *gwi;
443*b494511aSVenki Rajagopalan ib_guid_t guid;
444*b494511aSVenki Rajagopalan uint16_t rss_qpn_num_net_vnics;
445*b494511aSVenki Rajagopalan uint16_t sl_portid;
446*b494511aSVenki Rajagopalan uint16_t flags;
447*b494511aSVenki Rajagopalan
448*b494511aSVenki Rajagopalan /*
449*b494511aSVenki Rajagopalan * Lint complains about potential alignment problem here,
450*b494511aSVenki Rajagopalan * but we know that "pkt" is always atleast double-word
451*b494511aSVenki Rajagopalan * aligned when it's passed to us, so we're ok.
452*b494511aSVenki Rajagopalan */
453*b494511aSVenki Rajagopalan advertise = (fip_advertise_t *)pkt;
454*b494511aSVenki Rajagopalan
455*b494511aSVenki Rajagopalan /*
456*b494511aSVenki Rajagopalan * Verify if the descriptor list length in the received
457*b494511aSVenki Rajagopalan * packet is valid. Currently disabled.
458*b494511aSVenki Rajagopalan *
459*b494511aSVenki Rajagopalan * Experimentation shows that BX doesn't set the desc list
460*b494511aSVenki Rajagopalan * length correctly, so we also simply ignore it and move
461*b494511aSVenki Rajagopalan * on. If and when BX fixes this problem, we'll need to
462*b494511aSVenki Rajagopalan * enable the warning+failure below.
463*b494511aSVenki Rajagopalan */
464*b494511aSVenki Rajagopalan hdr = &(advertise->ad_fip_header);
465*b494511aSVenki Rajagopalan if (!enx_wa_no_desc_list_len) {
466*b494511aSVenki Rajagopalan uint_t pkt_data_sz;
467*b494511aSVenki Rajagopalan
468*b494511aSVenki Rajagopalan pkt_data_sz = (ntohs(hdr->hd_desc_list_len) + 2) << 2;
469*b494511aSVenki Rajagopalan if (pkt_data_sz < sizeof (fip_advertise_t)) {
470*b494511aSVenki Rajagopalan ENX_DPRINTF_WARN("advertisement from gw too small; "
471*b494511aSVenki Rajagopalan "expected %x, got %x", sizeof (fip_advertise_t),
472*b494511aSVenki Rajagopalan pkt_data_sz);
473*b494511aSVenki Rajagopalan return (ENX_E_FAILURE);
474*b494511aSVenki Rajagopalan }
475*b494511aSVenki Rajagopalan }
476*b494511aSVenki Rajagopalan
477*b494511aSVenki Rajagopalan /*
478*b494511aSVenki Rajagopalan * Validate all the header and descriptor types and lengths
479*b494511aSVenki Rajagopalan */
480*b494511aSVenki Rajagopalan
481*b494511aSVenki Rajagopalan if (hdr->hd_type != FIP_DESC_TYPE_VENDOR_ID ||
482*b494511aSVenki Rajagopalan hdr->hd_len != FIP_DESC_LEN_VENDOR_ID) {
483*b494511aSVenki Rajagopalan ENX_DPRINTF_WARN("invalid type/len in fip basic header; "
484*b494511aSVenki Rajagopalan "expected (%x,%x), got (%x,%x)", FIP_DESC_TYPE_VENDOR_ID,
485*b494511aSVenki Rajagopalan FIP_DESC_LEN_VENDOR_ID, hdr->hd_type, hdr->hd_len);
486*b494511aSVenki Rajagopalan return (ENX_E_FAILURE);
487*b494511aSVenki Rajagopalan }
488*b494511aSVenki Rajagopalan
489*b494511aSVenki Rajagopalan desc_iba = &(advertise->ad_iba);
490*b494511aSVenki Rajagopalan if (desc_iba->ia_type != FIP_DESC_TYPE_IBA ||
491*b494511aSVenki Rajagopalan desc_iba->ia_len != FIP_DESC_LEN_IBA) {
492*b494511aSVenki Rajagopalan ENX_DPRINTF_WARN("invalid type/len in fip iba desc; "
493*b494511aSVenki Rajagopalan "expected (%x,%x), got (%x,%x)", FIP_DESC_TYPE_IBA,
494*b494511aSVenki Rajagopalan FIP_DESC_LEN_IBA, desc_iba->ia_type, desc_iba->ia_len);
495*b494511aSVenki Rajagopalan return (ENX_E_FAILURE);
496*b494511aSVenki Rajagopalan }
497*b494511aSVenki Rajagopalan
498*b494511aSVenki Rajagopalan desc_gwinfo = &(advertise->ad_gwinfo);
499*b494511aSVenki Rajagopalan if (desc_gwinfo->gi_type != FIP_DESC_TYPE_EOIB_GW_INFO ||
500*b494511aSVenki Rajagopalan desc_gwinfo->gi_len != FIP_DESC_LEN_EOIB_GW_INFO) {
501*b494511aSVenki Rajagopalan ENX_DPRINTF_WARN("invalid type/len in fip gwinfo desc; "
502*b494511aSVenki Rajagopalan "expected (%x,%x), got (%x,%x)",
503*b494511aSVenki Rajagopalan FIP_DESC_TYPE_EOIB_GW_INFO, FIP_DESC_LEN_EOIB_GW_INFO,
504*b494511aSVenki Rajagopalan desc_gwinfo->gi_type, desc_gwinfo->gi_len);
505*b494511aSVenki Rajagopalan return (ENX_E_FAILURE);
506*b494511aSVenki Rajagopalan }
507*b494511aSVenki Rajagopalan
508*b494511aSVenki Rajagopalan desc_gwid = &(advertise->ad_gwid);
509*b494511aSVenki Rajagopalan if (desc_gwid->id_type != FIP_DESC_TYPE_GW_ID ||
510*b494511aSVenki Rajagopalan desc_gwid->id_len != FIP_DESC_LEN_GW_ID) {
511*b494511aSVenki Rajagopalan ENX_DPRINTF_WARN("invalid type/len in fip gwid desc; "
512*b494511aSVenki Rajagopalan "expected (%x,%x), got (%x,%x)",
513*b494511aSVenki Rajagopalan FIP_DESC_TYPE_GW_ID, FIP_DESC_LEN_GW_ID,
514*b494511aSVenki Rajagopalan desc_gwid->id_type, desc_gwid->id_len);
515*b494511aSVenki Rajagopalan return (ENX_E_FAILURE);
516*b494511aSVenki Rajagopalan }
517*b494511aSVenki Rajagopalan
518*b494511aSVenki Rajagopalan desc_ka = &(advertise->ad_keep_alive);
519*b494511aSVenki Rajagopalan if (desc_ka->ka_type != FIP_DESC_TYPE_KEEP_ALIVE ||
520*b494511aSVenki Rajagopalan desc_ka->ka_len != FIP_DESC_LEN_KEEP_ALIVE) {
521*b494511aSVenki Rajagopalan ENX_DPRINTF_WARN("invalid type/len in fip ka desc; "
522*b494511aSVenki Rajagopalan "expected (%x,%x), got (%x,%x)",
523*b494511aSVenki Rajagopalan FIP_DESC_TYPE_KEEP_ALIVE, FIP_DESC_LEN_KEEP_ALIVE,
524*b494511aSVenki Rajagopalan desc_ka->ka_type, desc_ka->ka_len);
525*b494511aSVenki Rajagopalan return (ENX_E_FAILURE);
526*b494511aSVenki Rajagopalan }
527*b494511aSVenki Rajagopalan
528*b494511aSVenki Rajagopalan /*
529*b494511aSVenki Rajagopalan * Record if the gw is available for login ('A' bit in the header)
530*b494511aSVenki Rajagopalan */
531*b494511aSVenki Rajagopalan flags = ntohs(hdr->hd_flags);
532*b494511aSVenki Rajagopalan gwi = &(msg->u.gm_info);
533*b494511aSVenki Rajagopalan gwi->gw_flag_available = (flags & FIP_BHFLAG_GWAVAIL) ? 1 : 0;
534*b494511aSVenki Rajagopalan
535*b494511aSVenki Rajagopalan /*
536*b494511aSVenki Rajagopalan * Record if this was in response to a solicit request (unicast
537*b494511aSVenki Rajagopalan * advertisement) or not ('S' bit in the header)
538*b494511aSVenki Rajagopalan */
539*b494511aSVenki Rajagopalan gwi->gw_flag_ucast_advt = (flags & FIP_BHFLAG_SLCTMSG) ? 1 : 0;
540*b494511aSVenki Rajagopalan msg->gm_type = (gwi->gw_flag_ucast_advt) ?
541*b494511aSVenki Rajagopalan FIP_GW_ADVERTISE_UCAST : FIP_GW_ADVERTISE_MCAST;
542*b494511aSVenki Rajagopalan
543*b494511aSVenki Rajagopalan /*
544*b494511aSVenki Rajagopalan * Record all info from the Infiniband Address descriptor
545*b494511aSVenki Rajagopalan */
546*b494511aSVenki Rajagopalan gwi->gw_ctrl_qpn = (ntohl(desc_iba->ia_qpn) & FIP_IBA_QPN_MASK);
547*b494511aSVenki Rajagopalan
548*b494511aSVenki Rajagopalan sl_portid = ntohs(desc_iba->ia_sl_portid);
549*b494511aSVenki Rajagopalan gwi->gw_portid = (sl_portid & FIP_IBA_PORTID_MASK);
550*b494511aSVenki Rajagopalan gwi->gw_sl = ((sl_portid & FIP_IBA_SL_MASK) >> FIP_IBA_SL_SHIFT);
551*b494511aSVenki Rajagopalan
552*b494511aSVenki Rajagopalan gwi->gw_lid = ntohs(desc_iba->ia_lid);
553*b494511aSVenki Rajagopalan
554*b494511aSVenki Rajagopalan bcopy(desc_iba->ia_guid, &guid, sizeof (ib_guid_t));
555*b494511aSVenki Rajagopalan gwi->gw_guid = ntohll(guid);
556*b494511aSVenki Rajagopalan
557*b494511aSVenki Rajagopalan /*
558*b494511aSVenki Rajagopalan * Record all info from the EoIB GW Information descriptor
559*b494511aSVenki Rajagopalan */
560*b494511aSVenki Rajagopalan if (desc_gwinfo->gi_flags & FIP_GWI_HOST_ADMIND_VNICS_MASK)
561*b494511aSVenki Rajagopalan gwi->gw_is_host_adm_vnics = 1;
562*b494511aSVenki Rajagopalan else
563*b494511aSVenki Rajagopalan gwi->gw_is_host_adm_vnics = 0;
564*b494511aSVenki Rajagopalan
565*b494511aSVenki Rajagopalan rss_qpn_num_net_vnics = ntohs(desc_gwinfo->gi_rss_qpn_num_net_vnics);
566*b494511aSVenki Rajagopalan gwi->gw_num_net_vnics = (rss_qpn_num_net_vnics &
567*b494511aSVenki Rajagopalan FIP_GWI_NUM_NET_VNICS_MASK);
568*b494511aSVenki Rajagopalan gwi->gw_n_rss_qpn = ((rss_qpn_num_net_vnics &
569*b494511aSVenki Rajagopalan FIP_GWI_RSS_QPN_MASK) >> FIP_GWI_RSS_QPN_SHIFT);
570*b494511aSVenki Rajagopalan bcopy(desc_gwinfo->gi_vendor_id, gwi->gw_vendor_id, FIP_VENDOR_LEN);
571*b494511aSVenki Rajagopalan (gwi->gw_vendor_id)[FIP_VENDOR_LEN] = '\0';
572*b494511aSVenki Rajagopalan
573*b494511aSVenki Rajagopalan /*
574*b494511aSVenki Rajagopalan * Record all info from the Gateway Identifier descriptor
575*b494511aSVenki Rajagopalan */
576*b494511aSVenki Rajagopalan bcopy(desc_gwid->id_guid, &guid, sizeof (ib_guid_t));
577*b494511aSVenki Rajagopalan gwi->gw_system_guid = ntohll(guid);
578*b494511aSVenki Rajagopalan bcopy(desc_gwid->id_sysname, gwi->gw_system_name, FIP_SYSNAME_LEN);
579*b494511aSVenki Rajagopalan (gwi->gw_system_name)[FIP_SYSNAME_LEN] = '\0';
580*b494511aSVenki Rajagopalan bcopy(desc_gwid->id_portname, gwi->gw_port_name, FIP_PORTNAME_LEN);
581*b494511aSVenki Rajagopalan (gwi->gw_port_name)[FIP_PORTNAME_LEN] = '\0';
582*b494511aSVenki Rajagopalan
583*b494511aSVenki Rajagopalan /*
584*b494511aSVenki Rajagopalan * Record all info from the Keep Alive descriptor
585*b494511aSVenki Rajagopalan */
586*b494511aSVenki Rajagopalan gwi->gw_adv_period = ntohl(desc_ka->ka_gw_adv_period);
587*b494511aSVenki Rajagopalan gwi->gw_ka_period = ntohl(desc_ka->ka_gw_ka_period);
588*b494511aSVenki Rajagopalan gwi->gw_vnic_ka_period = ntohl(desc_ka->ka_vnic_ka_period);
589*b494511aSVenki Rajagopalan
590*b494511aSVenki Rajagopalan gwi->gw_next = NULL;
591*b494511aSVenki Rajagopalan
592*b494511aSVenki Rajagopalan return (ENX_E_SUCCESS);
593*b494511aSVenki Rajagopalan }
594*b494511aSVenki Rajagopalan
595*b494511aSVenki Rajagopalan /*
596*b494511aSVenki Rajagopalan * Rollback whatever we did for making a solicit packet
597*b494511aSVenki Rajagopalan */
598*b494511aSVenki Rajagopalan static void
eibnx_rb_fip_make_solicit_pkt(eibnx_wqe_t * swqe)599*b494511aSVenki Rajagopalan eibnx_rb_fip_make_solicit_pkt(eibnx_wqe_t *swqe)
600*b494511aSVenki Rajagopalan {
601*b494511aSVenki Rajagopalan uint8_t *pkt = (uint8_t *)(uintptr_t)(swqe->qe_sgl.ds_va);
602*b494511aSVenki Rajagopalan
603*b494511aSVenki Rajagopalan bzero(pkt, sizeof (fip_solicit_t));
604*b494511aSVenki Rajagopalan swqe->qe_sgl.ds_len = swqe->qe_bufsz;
605*b494511aSVenki Rajagopalan }
606