1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
24 */
25
26#include <sys/types.h>
27#include <sys/ddi.h>
28#include <sys/sunddi.h>
29#include <sys/ksynch.h>
30#include <sys/byteorder.h>
31
32#include <sys/ib/clients/eoib/eib_impl.h>
33
34/*
35 * Declarations private to this file
36 */
37static int eib_fip_make_login(eib_t *, eib_vnic_t *, eib_wqe_t *, int *);
38static int eib_fip_make_update(eib_t *, eib_vnic_t *, eib_wqe_t *, int, int *);
39static int eib_fip_make_table(eib_t *, eib_vnic_t *, eib_wqe_t *, int *);
40static int eib_fip_make_ka(eib_t *, eib_vnic_t *, eib_wqe_t *, int *);
41static int eib_fip_make_logout(eib_t *, eib_vnic_t *, eib_wqe_t *, int *);
42
43static int eib_fip_send_login(eib_t *, eib_vnic_t *, eib_wqe_t *, int *);
44static int eib_fip_send_update(eib_t *, eib_vnic_t *, eib_wqe_t *,
45    uint_t, int *);
46static int eib_fip_send_table(eib_t *, eib_vnic_t *, eib_wqe_t *, int *);
47static int eib_fip_send_ka(eib_t *, eib_vnic_t *, eib_wqe_t *, int *);
48static int eib_fip_send_logout(eib_t *, eib_vnic_t *, eib_wqe_t *, int *);
49
50static int eib_fip_parse_vhub_table(uint8_t *, eib_vnic_t *);
51static int eib_fip_parse_vhub_update(uint8_t *, eib_vnic_t *);
52static void eib_fip_update_eport_state(eib_t *, eib_vhub_table_t *,
53    eib_vhub_update_t *, boolean_t, uint8_t);
54static void eib_fip_queue_tbl_entry(eib_vhub_table_t *, eib_vhub_map_t *,
55    uint32_t, uint8_t);
56static void eib_fip_queue_upd_entry(eib_vhub_update_t *, eib_vhub_map_t *,
57    uint32_t, uint8_t);
58static void eib_fip_queue_gw_entry(eib_vnic_t *, eib_vhub_table_t *, uint32_t,
59    uint8_t);
60static int eib_fip_apply_updates(eib_t *, eib_vhub_table_t *,
61    eib_vhub_update_t *);
62static void eib_fip_dequeue_tbl_entry(eib_vhub_table_t *, uint8_t *, uint32_t,
63    uint8_t);
64static eib_vhub_map_t *eib_fip_get_vhub_map(void);
65
66/*
67 * Definitions private to this file
68 */
69const char eib_vendor_mellanox[] = {
70	0x4d, 0x65, 0x6c, 0x6c, 0x61, 0x6e, 0x6f, 0x78
71};
72
73/*
74 * The three requests to the gateway - request a vHUB table, request a
75 * vHUB update (aka keepalive) and vNIC logout - all need the same
76 * vnic identity descriptor to be sent with different flag settings.
77 *
78 *      vHUB table: R=1, U=0, TUSN=last, subcode=KEEPALIVE
79 *      keepalive/vHUB update: R=0, U=1, TUSN=last, subcode=KEEPALIVE
80 *      vNIC logout: R=0, U=0, TUSN=0, subcode=LOGOUT
81 */
82#define	EIB_UPD_REQ_TABLE	1
83#define	EIB_UPD_REQ_KA		2
84#define	EIB_UPD_REQ_LOGOUT	3
85
86int
87eib_fip_login(eib_t *ss, eib_vnic_t *vnic, int *err)
88{
89	eib_wqe_t *swqe;
90	int ret;
91	int ntries = 0;
92
93	do {
94		if ((swqe = eib_rsrc_grab_swqe(ss, EIB_WPRI_LO)) == NULL) {
95			EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_login: "
96			    "no swqe available, not sending "
97			    "vnic login request");
98			*err = ENOMEM;
99			return (EIB_E_FAILURE);
100		}
101
102		ret = eib_fip_make_login(ss, vnic, swqe, err);
103		if (ret != EIB_E_SUCCESS) {
104			eib_rsrc_return_swqe(ss, swqe, NULL);
105			return (EIB_E_FAILURE);
106		}
107
108		ret = eib_fip_send_login(ss, vnic, swqe, err);
109		if (ret != EIB_E_SUCCESS) {
110			eib_rsrc_return_swqe(ss, swqe, NULL);
111			return (EIB_E_FAILURE);
112		}
113
114		ret = eib_vnic_wait_for_login_ack(ss, vnic, err);
115		if (ret == EIB_E_SUCCESS)
116			break;
117
118	} while ((*err == ETIME) && (ntries++ < EIB_MAX_LOGIN_ATTEMPTS));
119
120	return (ret);
121}
122
123int
124eib_fip_vhub_table(eib_t *ss, eib_vnic_t *vnic, int *err)
125{
126	eib_wqe_t *swqe;
127	int ret;
128	int ntries = 0;
129
130	do {
131		if ((swqe = eib_rsrc_grab_swqe(ss, EIB_WPRI_LO)) == NULL) {
132			EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_vhub_table: "
133			    "no swqe available, not sending "
134			    "vhub table request");
135			*err = ENOMEM;
136			return (EIB_E_FAILURE);
137		}
138
139		ret = eib_fip_make_table(ss, vnic, swqe, err);
140		if (ret != EIB_E_SUCCESS) {
141			eib_rsrc_return_swqe(ss, swqe, NULL);
142			return (EIB_E_FAILURE);
143		}
144
145		ret = eib_fip_send_table(ss, vnic, swqe, err);
146		if (ret != EIB_E_SUCCESS) {
147			eib_rsrc_return_swqe(ss, swqe, NULL);
148			return (EIB_E_FAILURE);
149		}
150
151		ret = eib_vnic_wait_for_table(ss, vnic, err);
152		if (ret == EIB_E_SUCCESS) {
153			return (EIB_E_SUCCESS);
154		}
155
156		/*
157		 * If we'd failed in constructing a proper vhub table above,
158		 * the vnic login state would be set to EIB_LOGIN_TBL_FAILED.
159		 * We need to clean up any pending entries from the vhub
160		 * table and vhub update structures and reset the vnic state
161		 * to EIB_LOGIN_ACK_RCVD before we can try again.
162		 */
163		eib_vnic_fini_tables(ss, vnic, B_FALSE);
164		mutex_enter(&vnic->vn_lock);
165		vnic->vn_state = EIB_LOGIN_ACK_RCVD;
166		mutex_exit(&vnic->vn_lock);
167
168	} while ((*err == ETIME) && (ntries++ < EIB_MAX_VHUB_TBL_ATTEMPTS));
169
170	return (EIB_E_FAILURE);
171}
172
173int
174eib_fip_heartbeat(eib_t *ss, eib_vnic_t *vnic, int *err)
175{
176	eib_wqe_t *swqe;
177	int ntries = 0;
178	int ret;
179
180	/*
181	 * Even if we're running low on the wqe resource, we want to be
182	 * able to grab a wqe to send the keepalive, to avoid getting
183	 * logged out by the gateway, so we use EIB_WPRI_HI.
184	 */
185	if ((swqe = eib_rsrc_grab_swqe(ss, EIB_WPRI_HI)) == NULL) {
186		EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_heartbeat: "
187		    "no swqe available, not sending heartbeat");
188		return (EIB_E_FAILURE);
189	}
190
191	while (ntries++ < EIB_MAX_KA_ATTEMPTS) {
192		ret = eib_fip_make_ka(ss, vnic, swqe, err);
193		if (ret != EIB_E_SUCCESS)
194			continue;
195
196		ret = eib_fip_send_ka(ss, vnic, swqe, err);
197		if (ret == EIB_E_SUCCESS)
198			break;
199	}
200
201	if (ret != EIB_E_SUCCESS)
202		eib_rsrc_return_swqe(ss, swqe, NULL);
203
204	return (ret);
205}
206
207int
208eib_fip_logout(eib_t *ss, eib_vnic_t *vnic, int *err)
209{
210	eib_wqe_t *swqe;
211	int ret;
212
213	/*
214	 * This routine is only called after the vnic has successfully
215	 * logged in to the gateway. If that's really the case, there
216	 * is nothing in terms of resources we need to release: the swqe
217	 * that was acquired during login has already been posted, the
218	 * work has been completed and the swqe has also been reaped back
219	 * into the free pool. The only thing we need to rollback is the
220	 * fact that we're logged in to the gateway at all -- and the way
221	 * to do this is to send a logout request.
222	 */
223	if ((swqe = eib_rsrc_grab_swqe(ss, EIB_WPRI_LO)) == NULL) {
224		EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_logout: "
225		    "no swqe available, not sending logout");
226		return (EIB_E_FAILURE);
227	}
228
229	ret = eib_fip_make_logout(ss, vnic, swqe, err);
230	if (ret != EIB_E_SUCCESS) {
231		eib_rsrc_return_swqe(ss, swqe, NULL);
232		return (EIB_E_FAILURE);
233	}
234
235	ret = eib_fip_send_logout(ss, vnic, swqe, err);
236	if (ret != EIB_E_SUCCESS) {
237		eib_rsrc_return_swqe(ss, swqe, NULL);
238		return (EIB_E_FAILURE);
239	}
240
241	return (EIB_E_SUCCESS);
242}
243
244int
245eib_fip_parse_login_ack(eib_t *ss, uint8_t *pkt, eib_login_data_t *ld)
246{
247	fip_login_ack_t *ack;
248	fip_basic_hdr_t *hdr;
249	fip_desc_iba_t *iba;
250	fip_desc_vnic_login_t *login;
251	fip_desc_partition_t *partition;
252	ib_guid_t guid;
253	uint32_t syn_ctl_qpn;
254	uint16_t sl_portid;
255	uint16_t flags_vlan;
256	uint16_t opcode;
257	uint8_t subcode;
258
259	/*
260	 * Note that 'pkt' is always atleast double-word aligned
261	 * when it is passed to us, so we can cast it without any
262	 * problems.
263	 */
264	ack = (fip_login_ack_t *)(void *)pkt;
265	hdr = &(ack->ak_fip_header);
266
267	/*
268	 * Verify that the opcode is EoIB
269	 */
270	if ((opcode = ntohs(hdr->hd_opcode)) != FIP_OPCODE_EOIB) {
271		EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_parse_login_ack: "
272		    "unsupported opcode 0x%x in login ack, ignoring",
273		    opcode);
274		return (EIB_E_FAILURE);
275	}
276
277	/*
278	 * The admin qp in the EoIB driver should receive only the login
279	 * acknowledgements
280	 */
281	subcode = hdr->hd_subcode;
282	if (subcode != FIP_SUBCODE_G_VNIC_LOGIN_ACK) {
283		EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_parse_login_ack: "
284		    "unexpected subcode 0x%x received by adm qp, ignoring",
285		    subcode);
286		return (EIB_E_FAILURE);
287	}
288
289	/*
290	 * Verify if the descriptor list length in the received packet is
291	 * valid if the workaround to disable it explicitly is absent.
292	 */
293	if (!eib_wa_no_desc_list_len) {
294		uint_t pkt_data_sz;
295
296		pkt_data_sz = (ntohs(hdr->hd_desc_list_len) + 2) << 2;
297		if (pkt_data_sz < sizeof (fip_login_ack_t)) {
298			EIB_DPRINTF_WARN(ss->ei_instance,
299			    "eib_fip_parse_login_ack: "
300			    "login ack desc list len (0x%lx) too small "
301			    "(min 0x%lx)",
302			    pkt_data_sz, sizeof (fip_login_ack_t));
303			return (EIB_E_FAILURE);
304		}
305	}
306
307	/*
308	 * Validate all the header and descriptor types and lengths
309	 */
310	if (hdr->hd_type != FIP_DESC_TYPE_VENDOR_ID ||
311	    hdr->hd_len != FIP_DESC_LEN_VENDOR_ID) {
312		EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_parse_login_ack: "
313		    "invalid type/len in basic hdr: expected (0x%x,0x%x), "
314		    "got (0x%x,0x%x)", FIP_DESC_TYPE_VENDOR_ID,
315		    FIP_DESC_LEN_VENDOR_ID, hdr->hd_type, hdr->hd_len);
316		return (EIB_E_FAILURE);
317	}
318	iba = &(ack->ak_iba);
319	if (iba->ia_type != FIP_DESC_TYPE_IBA ||
320	    iba->ia_len != FIP_DESC_LEN_IBA) {
321		EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_parse_login_ack: "
322		    "invalid type/len in iba desc: expected (0x%x,0x%x), "
323		    "got (0x%x,0x%x)", FIP_DESC_TYPE_IBA, FIP_DESC_LEN_IBA,
324		    iba->ia_type, iba->ia_len);
325		return (EIB_E_FAILURE);
326	}
327	login = &(ack->ak_vnic_login);
328	if (login->vl_type != FIP_DESC_TYPE_VNIC_LOGIN ||
329	    login->vl_len != FIP_DESC_LEN_VNIC_LOGIN) {
330		EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_parse_login_ack: "
331		    "invalid type/len in login desc: expected (0x%x,0x%x), "
332		    "got (0x%x,0x%x)", FIP_DESC_TYPE_VNIC_LOGIN,
333		    FIP_DESC_LEN_VNIC_LOGIN, login->vl_type, login->vl_len);
334		return (EIB_E_FAILURE);
335	}
336	partition = &(ack->ak_vhub_partition);
337	if (partition->pn_type != FIP_DESC_TYPE_PARTITION ||
338	    partition->pn_len != FIP_DESC_LEN_PARTITION) {
339		EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_parse_login_ack: "
340		    "invalid type/len in partition desc: expected (0x%x,0x%x), "
341		    "got (0x%x,0x%x)", FIP_DESC_TYPE_PARTITION,
342		    FIP_DESC_LEN_PARTITION, partition->pn_type,
343		    partition->pn_len);
344		return (EIB_E_FAILURE);
345	}
346
347	/*
348	 * Note that we'll return the vnic id as-is.  The msb is not actually
349	 * part of the vnic id in our internal records, so we'll mask it out
350	 * later before we do our searches.
351	 */
352	ld->ld_vnic_id = ntohs(login->vl_vnic_id);
353
354	syn_ctl_qpn = ntohl(login->vl_syndrome_ctl_qpn);
355
356	/*
357	 * If the syndrome indicates a nack, we're done.  No need to collect
358	 * any more information
359	 */
360	ld->ld_syndrome = (uint8_t)((syn_ctl_qpn & FIP_VL_SYN_MASK) >>
361	    FIP_VL_SYN_SHIFT);
362	if (ld->ld_syndrome) {
363		return (EIB_E_SUCCESS);
364	}
365
366	/*
367	 * Let's get the rest of the information out of the login ack
368	 */
369	sl_portid = ntohs(iba->ia_sl_portid);
370	ld->ld_gw_port_id = sl_portid & FIP_IBA_PORTID_MASK;
371	ld->ld_gw_sl = (sl_portid & FIP_IBA_SL_MASK) >> FIP_IBA_SL_SHIFT;
372
373	ld->ld_gw_data_qpn = ntohl(iba->ia_qpn) & FIP_IBA_QPN_MASK;
374	ld->ld_gw_lid = ntohs(iba->ia_lid);
375
376	bcopy(iba->ia_guid, &guid, sizeof (ib_guid_t));
377	ld->ld_gw_guid = ntohll(guid);
378	ld->ld_vhub_mtu = ntohs(login->vl_mtu);
379	bcopy(login->vl_mac, ld->ld_assigned_mac, ETHERADDRL);
380	bcopy(login->vl_gw_mgid_prefix, ld->ld_gw_mgid_prefix,
381	    FIP_MGID_PREFIX_LEN);
382	ld->ld_n_rss_mcgid = login->vl_flags_rss & FIP_VL_N_RSS_MCGID_MASK;
383	ld->ld_n_mac_mcgid = login->vl_n_mac_mcgid & FIP_VL_N_MAC_MCGID_MASK;
384	ld->ld_gw_ctl_qpn = (syn_ctl_qpn & FIP_VL_CTL_QPN_MASK);
385
386	flags_vlan = ntohs(login->vl_flags_vlan);
387	ld->ld_assigned_vlan = flags_vlan & FIP_VL_VLAN_MASK;
388	ld->ld_vlan_in_packets = (flags_vlan & FIP_VL_FLAGS_VP) ? 1 : 0;
389	bcopy(login->vl_vnic_name, ld->ld_vnic_name, FIP_VNIC_NAME_LEN);
390
391	ld->ld_vhub_pkey = ntohs(partition->pn_pkey);
392
393	return (EIB_E_SUCCESS);
394}
395
396int
397eib_fip_parse_ctl_pkt(uint8_t *pkt, eib_vnic_t *vnic)
398{
399	eib_t *ss = vnic->vn_ss;
400	fip_vhub_pkt_t *vhb;
401	fip_basic_hdr_t *hdr;
402	uint16_t opcode;
403	uint8_t subcode;
404	uint_t vnic_state;
405	int ret = EIB_E_FAILURE;
406
407	/*
408	 * Note that 'pkt' is always atleast double-word aligned when it is
409	 * passed to us, so we can cast it without any problems.
410	 */
411	vhb = (fip_vhub_pkt_t *)(void *)pkt;
412	hdr = &(vhb->hb_fip_header);
413
414	/*
415	 * Verify that the opcode is EoIB
416	 */
417	if ((opcode = ntohs(hdr->hd_opcode)) != FIP_OPCODE_EOIB) {
418		EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_parse_ctl_pkt: "
419		    "unsupported opcode 0x%x in ctl pkt, ignoring",
420		    opcode);
421		return (EIB_E_FAILURE);
422	}
423
424	mutex_enter(&vnic->vn_lock);
425	vnic_state = vnic->vn_state;
426	mutex_exit(&vnic->vn_lock);
427
428	/*
429	 * The ctl qp in the EoIB driver should receive only vHUB messages
430	 */
431	subcode = hdr->hd_subcode;
432	if (subcode == FIP_SUBCODE_G_VHUB_UPDATE) {
433		if (vnic_state != EIB_LOGIN_TBL_WAIT &&
434		    vnic_state != EIB_LOGIN_TBL_INPROG &&
435		    vnic_state != EIB_LOGIN_TBL_DONE &&
436		    vnic_state != EIB_LOGIN_DONE) {
437
438			EIB_DPRINTF_WARN(ss->ei_instance,
439			    "eib_fip_parse_ctl_pkt: unexpected vnic state "
440			    "(0x%lx) for subcode (VHUB_UPDATE 0x%x)",
441			    vnic_state, subcode);
442			return (EIB_E_FAILURE);
443		}
444
445		ret = eib_fip_parse_vhub_update(pkt, vnic);
446
447	} else if (subcode == FIP_SUBCODE_G_VHUB_TABLE) {
448		if ((vnic_state != EIB_LOGIN_TBL_WAIT) &&
449		    (vnic_state != EIB_LOGIN_TBL_INPROG)) {
450
451			EIB_DPRINTF_WARN(ss->ei_instance,
452			    "eib_fip_parse_ctl_pkt: unexpected vnic state "
453			    "(0x%lx) for subcode (VHUB_TABLE 0x%x)",
454			    vnic_state, subcode);
455			return (EIB_E_FAILURE);
456		}
457
458		ret = eib_fip_parse_vhub_table(pkt, vnic);
459
460	} else {
461		EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_parse_ctl_pkt: "
462		    "unexpected subcode 0x%x for ctl pkt", subcode);
463	}
464
465	if (ret == EIB_E_SUCCESS) {
466		/*
467		 * Update last gateway heartbeat received time and
468		 * gateway eport state.  The eport state should only
469		 * be updated if the vnic's vhub table has been fully
470		 * constructed.
471		 */
472		mutex_enter(&ss->ei_vnic_lock);
473		ss->ei_gw_last_heartbeat = ddi_get_lbolt64();
474		if (vnic_state == EIB_LOGIN_TBL_DONE ||
475		    vnic_state == EIB_LOGIN_DONE) {
476			ss->ei_gw_eport_state =
477			    vnic->vn_vhub_table->tb_eport_state;
478		}
479		mutex_exit(&ss->ei_vnic_lock);
480	}
481
482	return (ret);
483}
484
485static int
486eib_fip_make_login(eib_t *ss, eib_vnic_t *vnic, eib_wqe_t *swqe, int *err)
487{
488	fip_login_t *login;
489	fip_proto_t *proto;
490	fip_basic_hdr_t *hdr;
491	fip_desc_iba_t *iba;
492	fip_desc_vnic_login_t *vlg;
493	ib_gid_t port_gid;
494	ib_guid_t port_guid;
495	uint16_t sl_portid;
496	uint16_t flags_vlan;
497
498	uint16_t gw_portid = ss->ei_gw_props->pp_gw_portid;
499	uint16_t sl = ss->ei_gw_props->pp_gw_sl;
500	uint8_t *pkt = (uint8_t *)(uintptr_t)(swqe->qe_sgl.ds_va);
501	uint_t pktsz = swqe->qe_sgl.ds_len;
502	uint_t login_sz = sizeof (fip_login_t);
503
504	if (pktsz < login_sz) {
505		*err = EINVAL;
506
507		EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_make_login: "
508		    "send buffer size (0x%lx) too small to send"
509		    "login request (min 0x%lx)",
510		    pktsz, login_sz);
511		return (EIB_E_FAILURE);
512	}
513
514	/*
515	 * Lint complains that there may be an alignment issue here,
516	 * but we know that the "pkt" is atleast double-word aligned,
517	 * so it's ok.
518	 */
519	login = (fip_login_t *)(void *)pkt;
520	bzero(pkt, login_sz);
521
522	/*
523	 * Fill in the FIP protocol version
524	 */
525	proto = &login->lg_proto_version;
526	proto->pr_version = FIP_PROTO_VERSION;
527
528	/*
529	 * Fill in the basic header
530	 */
531	hdr = &login->lg_fip_header;
532	hdr->hd_opcode = htons(FIP_OPCODE_EOIB);
533	hdr->hd_subcode = FIP_SUBCODE_H_VNIC_LOGIN;
534	hdr->hd_desc_list_len = htons((login_sz >> 2) - 2);
535	hdr->hd_flags = 0;
536	hdr->hd_type = FIP_DESC_TYPE_VENDOR_ID;
537	hdr->hd_len = FIP_DESC_LEN_VENDOR_ID;
538	bcopy(eib_vendor_mellanox, hdr->hd_vendor_id, FIP_VENDOR_LEN);
539
540	/*
541	 * Fill in the Infiniband Address descriptor
542	 */
543	iba = &login->lg_iba;
544	iba->ia_type = FIP_DESC_TYPE_IBA;
545	iba->ia_len = FIP_DESC_LEN_IBA;
546	bcopy(eib_vendor_mellanox, iba->ia_vendor_id, FIP_VENDOR_LEN);
547	iba->ia_qpn = htonl(vnic->vn_data_chan->ch_qpn);
548
549	sl_portid = (gw_portid & FIP_IBA_PORTID_MASK) |
550	    ((sl << FIP_IBA_SL_SHIFT) & FIP_IBA_SL_MASK);
551	iba->ia_sl_portid = htons(sl_portid);
552
553	iba->ia_lid = htons(ss->ei_props->ep_blid);
554
555	port_gid = ss->ei_props->ep_sgid;
556	port_guid = htonll(port_gid.gid_guid);
557	bcopy(&port_guid, iba->ia_guid, FIP_GUID_LEN);
558
559	/*
560	 * Now, fill in the vNIC Login descriptor
561	 */
562
563	vlg = &login->lg_vnic_login;
564	vlg->vl_type = FIP_DESC_TYPE_VNIC_LOGIN;
565	vlg->vl_len = FIP_DESC_LEN_VNIC_LOGIN;
566	bcopy(eib_vendor_mellanox, vlg->vl_vendor_id, FIP_VENDOR_LEN);
567
568	/*
569	 * Only for the physlink instance 0, we ask the gateway to assign
570	 * the mac address and a VLAN (tagless, actually).  For this vnic
571	 * only, we do not set the H bit. All other vnics are created by
572	 * Solaris admin and will have the H bit set. Note also that we
573	 * need to clear the vnic id's most significant bit for those that
574	 * are administered by the gateway, so vnic0's vnic_id's msb should
575	 * be 0 as well.
576	 */
577	if (vnic->vn_instance == 0) {
578		vlg->vl_vnic_id = htons(vnic->vn_id);
579		flags_vlan = vnic->vn_vlan & FIP_VL_VLAN_MASK;
580	} else {
581		vlg->vl_vnic_id = htons(vnic->vn_id | FIP_VL_VNIC_ID_MSBIT);
582		flags_vlan = (vnic->vn_vlan & FIP_VL_VLAN_MASK) |
583		    FIP_VL_FLAGS_H | FIP_VL_FLAGS_M;
584
585		if (vnic->vn_vlan & FIP_VL_VLAN_MASK)
586			flags_vlan |= (FIP_VL_FLAGS_V | FIP_VL_FLAGS_VP);
587	}
588
589	vlg->vl_flags_vlan = htons(flags_vlan);
590	bcopy(vnic->vn_macaddr, vlg->vl_mac, ETHERADDRL);
591
592	/*
593	 * We aren't ready to enable rss, so we set the RSS bit and
594	 * the n_rss_mcgid field to 0.  Set the mac mcgid to 0 as well.
595	 */
596	vlg->vl_flags_rss = 0;
597	vlg->vl_n_mac_mcgid = 0;
598
599	/*
600	 * Set the syndrome to 0 and pass the control qpn
601	 */
602	vlg->vl_syndrome_ctl_qpn =
603	    htonl(vnic->vn_ctl_chan->ch_qpn & FIP_VL_CTL_QPN_MASK);
604
605	/*
606	 * Try to set as unique a name as possible for this vnic
607	 */
608	(void) snprintf((char *)(vlg->vl_vnic_name), FIP_VNIC_NAME_LEN,
609	    "eoib_%02x_%02x", ss->ei_instance, vnic->vn_instance);
610
611	/*
612	 * Adjust the ds_len in the sgl to indicate the size of this
613	 * request before returning
614	 */
615	swqe->qe_sgl.ds_len = login_sz;
616
617	return (EIB_E_SUCCESS);
618}
619
620static int
621eib_fip_make_update(eib_t *ss, eib_vnic_t *vnic, eib_wqe_t *swqe, int req,
622    int *err)
623{
624	fip_keep_alive_t *ka;
625	fip_proto_t *proto;
626	fip_basic_hdr_t *hdr;
627	fip_desc_vnic_identity_t *vid;
628	ib_gid_t port_gid;
629	ib_guid_t port_guid;
630	uint32_t flags_vhub_id;
631
632	uint8_t *pkt = (uint8_t *)(uintptr_t)(swqe->qe_sgl.ds_va);
633	uint_t pktsz = swqe->qe_sgl.ds_len;
634	uint_t ka_sz = sizeof (fip_keep_alive_t);
635
636	if (pktsz < ka_sz) {
637		*err = EINVAL;
638
639		EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_make_update: "
640		    "send buffer size (0x%lx) too small to send"
641		    "keepalive/update request (min 0x%lx)",
642		    pktsz, ka_sz);
643		return (EIB_E_FAILURE);
644	}
645
646	/*
647	 * Lint complains that there may be an alignment issue here,
648	 * but we know that the "pkt" is atleast double-word aligned,
649	 * so it's ok.
650	 */
651	ka = (fip_keep_alive_t *)(void *)pkt;
652	bzero(pkt, ka_sz);
653
654	/*
655	 * Fill in the FIP protocol version
656	 */
657	proto = &ka->ka_proto_version;
658	proto->pr_version = FIP_PROTO_VERSION;
659
660	/*
661	 * Fill in the basic header
662	 */
663	hdr = &ka->ka_fip_header;
664	hdr->hd_opcode = htons(FIP_OPCODE_EOIB);
665	hdr->hd_subcode = (req == EIB_UPD_REQ_LOGOUT) ?
666	    FIP_SUBCODE_H_VNIC_LOGOUT : FIP_SUBCODE_H_KEEP_ALIVE;
667	hdr->hd_desc_list_len = htons((ka_sz >> 2) - 2);
668	hdr->hd_flags = 0;
669	hdr->hd_type = FIP_DESC_TYPE_VENDOR_ID;
670	hdr->hd_len = FIP_DESC_LEN_VENDOR_ID;
671	bcopy(eib_vendor_mellanox, hdr->hd_vendor_id, FIP_VENDOR_LEN);
672
673	/*
674	 * Fill in the vNIC Identity descriptor
675	 */
676	vid = &ka->ka_vnic_identity;
677
678	vid->vi_type = FIP_DESC_TYPE_VNIC_IDENTITY;
679	vid->vi_len = FIP_DESC_LEN_VNIC_IDENTITY;
680	bcopy(eib_vendor_mellanox, vid->vi_vendor_id, FIP_VENDOR_LEN);
681
682	flags_vhub_id = vnic->vn_login_data.ld_vhub_id;
683	if (vnic->vn_login_data.ld_vlan_in_packets) {
684		flags_vhub_id |= FIP_VI_FLAG_VP;
685	}
686	if (req == EIB_UPD_REQ_TABLE) {
687		flags_vhub_id |= FIP_VI_FLAG_R;
688	} else if (req == EIB_UPD_REQ_KA) {
689		flags_vhub_id |= FIP_VI_FLAG_U;
690	}
691	vid->vi_flags_vhub_id = htonl(flags_vhub_id);
692
693	vid->vi_tusn = (req != EIB_UPD_REQ_LOGOUT) ?
694	    htonl(vnic->vn_vhub_table->tb_tusn) : 0;
695
696	vid->vi_vnic_id = htons(vnic->vn_login_data.ld_vnic_id);
697	bcopy(vnic->vn_login_data.ld_assigned_mac, vid->vi_mac, ETHERADDRL);
698
699	port_gid = ss->ei_props->ep_sgid;
700	port_guid = htonll(port_gid.gid_guid);
701	bcopy(&port_guid, vid->vi_port_guid, FIP_GUID_LEN);
702	bcopy(vnic->vn_login_data.ld_vnic_name, vid->vi_vnic_name,
703	    FIP_VNIC_NAME_LEN);
704
705	/*
706	 * Adjust the ds_len in the sgl to indicate the size of this
707	 * request before returning
708	 */
709	swqe->qe_sgl.ds_len = ka_sz;
710
711	return (EIB_E_SUCCESS);
712}
713
714static int
715eib_fip_make_table(eib_t *ss, eib_vnic_t *vnic, eib_wqe_t *swqe, int *err)
716{
717	return (eib_fip_make_update(ss, vnic, swqe, EIB_UPD_REQ_TABLE, err));
718}
719
720static int
721eib_fip_make_ka(eib_t *ss, eib_vnic_t *vnic, eib_wqe_t *swqe, int *err)
722{
723	return (eib_fip_make_update(ss, vnic, swqe, EIB_UPD_REQ_KA, err));
724}
725
726static int
727eib_fip_make_logout(eib_t *ss, eib_vnic_t *vnic, eib_wqe_t *swqe, int *err)
728{
729	return (eib_fip_make_update(ss, vnic, swqe, EIB_UPD_REQ_LOGOUT, err));
730}
731
732static int
733eib_fip_send_login(eib_t *ss, eib_vnic_t *vnic, eib_wqe_t *swqe, int *err)
734{
735	eib_avect_t *av;
736	eib_chan_t *chan = ss->ei_admin_chan;
737	ibt_status_t ret;
738
739	/*
740	 * Get an address vector for this destination
741	 */
742	if ((av = eib_ibt_hold_avect(ss, ss->ei_gw_props->pp_gw_lid,
743	    ss->ei_gw_props->pp_gw_sl)) == NULL) {
744		*err = ENOMEM;
745		EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_send_login: "
746		    "eib_ibt_hold_avect(gw_lid=0x%x, sl=0x%x) failed",
747		    ss->ei_gw_props->pp_gw_lid, ss->ei_gw_props->pp_gw_sl);
748		return (EIB_E_FAILURE);
749	}
750
751	/*
752	 * Modify the UD destination handle to the gateway
753	 */
754	ret = ibt_modify_ud_dest(swqe->qe_dest, EIB_FIP_QKEY,
755	    ss->ei_gw_props->pp_gw_ctrl_qpn, &av->av_vect);
756
757	eib_ibt_release_avect(ss, av);
758	if (ret != IBT_SUCCESS) {
759		*err = EINVAL;
760
761		EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_send_login: "
762		    "ibt_modify_ud_dest(gw_ctl_qpn=0x%lx, qkey=0x%lx) failed, "
763		    "ret=%d", ss->ei_gw_props->pp_gw_ctrl_qpn,
764		    EIB_FIP_QKEY, ret);
765		return (EIB_E_FAILURE);
766	}
767
768	/*
769	 * Send the login packet to the destination gateway. Posting
770	 * the login and setting the login state to wait-for-ack should
771	 * ideally be atomic to avoid race.
772	 */
773	mutex_enter(&vnic->vn_lock);
774	ret = ibt_post_send(chan->ch_chan, &(swqe->qe_wr.send), 1, NULL);
775	if (ret != IBT_SUCCESS) {
776		mutex_exit(&vnic->vn_lock);
777		*err = EINVAL;
778		EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_send_login: "
779		    "ibt_post_send() failed for vnic id 0x%x, ret=%d",
780		    vnic->vn_id, ret);
781		return (EIB_E_FAILURE);
782	}
783	vnic->vn_state = EIB_LOGIN_ACK_WAIT;
784
785	mutex_enter(&chan->ch_tx_lock);
786	chan->ch_tx_posted++;
787	mutex_exit(&chan->ch_tx_lock);
788
789	mutex_exit(&vnic->vn_lock);
790
791	return (EIB_E_SUCCESS);
792}
793
794static int
795eib_fip_send_update(eib_t *ss, eib_vnic_t *vnic, eib_wqe_t *swqe,
796    uint_t nxt_state, int *err)
797{
798	eib_login_data_t *ld = &vnic->vn_login_data;
799	eib_chan_t *chan = vnic->vn_ctl_chan;
800	eib_avect_t *av;
801	ibt_status_t ret;
802
803	/*
804	 * Get an address vector for this destination
805	 */
806	if ((av = eib_ibt_hold_avect(ss, ld->ld_gw_lid,
807	    ld->ld_gw_sl)) == NULL) {
808		*err = ENOMEM;
809		EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_send_update: "
810		    "eib_ibt_hold_avect(gw_lid=0x%x, sl=0x%x) failed",
811		    ld->ld_gw_lid, ld->ld_gw_sl);
812		return (EIB_E_FAILURE);
813	}
814
815	/*
816	 * Modify the UD destination handle to the destination appropriately
817	 */
818	ret = ibt_modify_ud_dest(swqe->qe_dest, EIB_FIP_QKEY,
819	    ld->ld_gw_ctl_qpn, &av->av_vect);
820
821	eib_ibt_release_avect(ss, av);
822	if (ret != IBT_SUCCESS) {
823		*err = EINVAL;
824		EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_send_update: "
825		    "ibt_modify_ud_dest(gw_ctl_qpn=0x%lx, qkey=0x%lx) failed, "
826		    "ret=%d", ld->ld_gw_ctl_qpn, EIB_FIP_QKEY, ret);
827		return (EIB_E_FAILURE);
828	}
829
830	/*
831	 * Send the update packet to the destination. Posting the update request
832	 * and setting the login state to wait-for-vhub_table needs to be atomic
833	 * to avoid race.
834	 */
835	mutex_enter(&vnic->vn_lock);
836	ret = ibt_post_send(chan->ch_chan, &(swqe->qe_wr.send), 1, NULL);
837	if (ret != IBT_SUCCESS) {
838		mutex_exit(&vnic->vn_lock);
839		*err = EINVAL;
840		EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_send_update: "
841		    "ibt_post_send() failed for vnic id 0x%x, ret=%d",
842		    vnic->vn_id, ret);
843		return (EIB_E_FAILURE);
844	}
845	vnic->vn_state = nxt_state;
846
847	mutex_enter(&chan->ch_tx_lock);
848	chan->ch_tx_posted++;
849	mutex_exit(&chan->ch_tx_lock);
850
851	mutex_exit(&vnic->vn_lock);
852
853	return (EIB_E_SUCCESS);
854}
855
856static int
857eib_fip_send_table(eib_t *ss, eib_vnic_t *vnic, eib_wqe_t *swqe, int *err)
858{
859	return (eib_fip_send_update(ss, vnic, swqe, EIB_LOGIN_TBL_WAIT, err));
860}
861
862static int
863eib_fip_send_ka(eib_t *ss, eib_vnic_t *vnic, eib_wqe_t *swqe, int *err)
864{
865	return (eib_fip_send_update(ss, vnic, swqe, EIB_LOGIN_DONE, err));
866}
867
868static int
869eib_fip_send_logout(eib_t *ss, eib_vnic_t *vnic, eib_wqe_t *swqe, int *err)
870{
871	return (eib_fip_send_update(ss, vnic, swqe, EIB_LOGOUT_DONE, err));
872}
873
874static int
875eib_fip_parse_vhub_table(uint8_t *pkt, eib_vnic_t *vnic)
876{
877	fip_vhub_table_t *tbl;
878	fip_desc_vhub_table_t *desc_tbl;
879	fip_vhub_table_entry_t *entry;
880	fip_basic_hdr_t *hdr;
881	eib_t *ss = vnic->vn_ss;
882	eib_login_data_t *ld = &vnic->vn_login_data;
883	eib_vhub_table_t *etbl = vnic->vn_vhub_table;
884	eib_vhub_update_t *eupd = vnic->vn_vhub_update;
885	eib_vhub_map_t *newmap;
886
887	uint32_t *ipkt;
888	uint32_t init_checksum = 0;
889	uint32_t tusn;
890	uint32_t vhub_id;
891	uint_t entries_in_pkt;
892	uint_t ndx;
893	uint_t i;
894
895	/*
896	 * If we're here receiving vhub table messages, we certainly should
897	 * have the vhub table structure allocated and present at this point.
898	 */
899	if (etbl == NULL) {
900		EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_parse_vhub_table: "
901		    "vhub table missing for vnic id 0x%x", vnic->vn_id);
902		return (EIB_E_FAILURE);
903	}
904
905	/*
906	 * Note that 'pkt' is always atleast double-word aligned when it is
907	 * passed to us, so we can cast it without any problems.
908	 */
909	ipkt = (uint32_t *)(void *)pkt;
910	tbl = (fip_vhub_table_t *)(void *)pkt;
911	hdr = &(tbl->vt_fip_header);
912
913	/*
914	 * Validate all the header and descriptor types and lengths
915	 */
916	if (hdr->hd_type != FIP_DESC_TYPE_VENDOR_ID ||
917	    hdr->hd_len != FIP_DESC_LEN_VENDOR_ID) {
918		EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_parse_vhub_table: "
919		    "invalid type/len in fip basic header, "
920		    "exp (0x%x,0x%x), got (0x%x,0x%x)",
921		    FIP_DESC_TYPE_VENDOR_ID, FIP_DESC_LEN_VENDOR_ID,
922		    hdr->hd_type, hdr->hd_len);
923		return (EIB_E_FAILURE);
924	}
925	desc_tbl = &(tbl->vt_vhub_table);
926	if (desc_tbl->tb_type != FIP_DESC_TYPE_VHUB_TABLE) {
927		EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_parse_vhub_table: "
928		    "invalid type in vhub desc, exp 0x%x, got 0x%x",
929		    FIP_DESC_TYPE_VHUB_TABLE, desc_tbl->tb_type);
930		return (EIB_E_FAILURE);
931	}
932
933	/*
934	 * Verify that the vhub id is ok for this vnic
935	 */
936	vhub_id = ntohl(desc_tbl->tb_flags_vhub_id) & FIP_TB_VHUB_ID_MASK;
937	if (vhub_id != ld->ld_vhub_id) {
938		EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_parse_vhub_table: "
939		    "invalid vhub id in vhub table pkt: exp 0x%x, got 0x%x",
940		    ld->ld_vhub_id, vhub_id);
941		return (EIB_E_FAILURE);
942	}
943
944	/*
945	 * Count the number of vhub table entries in this packet
946	 */
947	entries_in_pkt = (desc_tbl->tb_len - FIP_DESC_VHUB_TABLE_WORDS) /
948	    FIP_VHUB_TABLE_ENTRY_WORDS;
949
950	/*
951	 * While we're here, also compute the 32-bit 2's complement carry-
952	 * discarded checksum of the vHUB table descriptor in this packet
953	 * till the first vhub table entry.
954	 */
955	for (i = 0; i < FIP_DESC_VHUB_TABLE_WORDS; i++)
956		init_checksum += ipkt[i];
957
958	/*
959	 * Initialize the vhub's Table Update Sequence Number (tusn),
960	 * checksum and record the total number of entries in in the table
961	 * if this is the first pkt of the table.
962	 */
963	tusn = ntohl(desc_tbl->tb_tusn);
964	if (desc_tbl->tb_hdr & FIP_TB_HDR_FIRST) {
965		etbl->tb_entries_in_table = ntohs(desc_tbl->tb_table_size);
966		etbl->tb_tusn = tusn;
967		etbl->tb_checksum = 0;
968
969		mutex_enter(&vnic->vn_lock);
970		vnic->vn_state = EIB_LOGIN_TBL_INPROG;
971		mutex_exit(&vnic->vn_lock);
972	}
973
974	/*
975	 * First, middle or last, the current table TUSN we have must match this
976	 * packet's TUSN.
977	 */
978	if (etbl->tb_tusn != tusn) {
979		EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_parse_vhub_table: "
980		    "unexpected TUSN (0x%lx) during vhub table construction, "
981		    "expected 0x%lx", etbl->tb_tusn, tusn);
982		goto vhub_table_fail;
983	}
984
985	/*
986	 * See if we've overrun/underrun our original entries count
987	 */
988	if ((etbl->tb_entries_seen + entries_in_pkt) >
989	    etbl->tb_entries_in_table) {
990		EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_parse_vhub_table: "
991		    "vhub table overrun, total_exp=%d, so_far=%d, this_pkt=%d",
992		    etbl->tb_entries_in_table, etbl->tb_entries_seen,
993		    entries_in_pkt);
994		goto vhub_table_fail;
995	} else if (((etbl->tb_entries_seen + entries_in_pkt) <
996	    etbl->tb_entries_in_table) &&
997	    (desc_tbl->tb_hdr & FIP_TB_HDR_LAST)) {
998		EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_parse_vhub_table: "
999		    "vhub table underrun, total_exp=%d, so_far=%d, last_pkt=%d",
1000		    etbl->tb_entries_in_table, etbl->tb_entries_seen,
1001		    entries_in_pkt);
1002		goto vhub_table_fail;
1003	}
1004
1005	/*
1006	 * Process and add the entries we have in this packet
1007	 */
1008	etbl->tb_checksum += init_checksum;
1009	entry = (fip_vhub_table_entry_t *)(void *)
1010	    ((uint8_t *)desc_tbl + FIP_DESC_VHUB_TABLE_SZ);
1011
1012	for (ndx = 0; ndx < entries_in_pkt; ndx++, entry++) {
1013		/*
1014		 * Allocate a eib_vhub_map_t, copy the current entry details
1015		 * and chain it to the appropriate queue.
1016		 */
1017		if ((newmap = eib_fip_get_vhub_map()) == NULL) {
1018			EIB_DPRINTF_WARN(ss->ei_instance,
1019			    "eib_fip_parse_vhub_table: no memory for vhub "
1020			    "table entry, ignoring this vhub table packet");
1021			goto vhub_table_fail;
1022		}
1023
1024		ASSERT((entry->te_v_rss_type & FIP_TE_VALID) == FIP_TE_VALID);
1025		newmap->mp_v_rss_type = entry->te_v_rss_type;
1026		bcopy(entry->te_mac, newmap->mp_mac, ETHERADDRL);
1027		newmap->mp_qpn = (ntohl(entry->te_qpn) & FIP_TE_QPN_MASK);
1028		newmap->mp_sl = (entry->te_sl & FIP_TE_SL_MASK);
1029		newmap->mp_lid = ntohs(entry->te_lid);
1030		newmap->mp_tusn = tusn;
1031		newmap->mp_next = NULL;
1032
1033		/*
1034		 * The vhub table messages do not provide status on eport
1035		 * state, so we'll simply assume that the eport is up.
1036		 */
1037		eib_fip_queue_tbl_entry(etbl, newmap, tusn, FIP_EPORT_UP);
1038
1039		/*
1040		 * Update table checksum with this entry's computed checksum
1041		 */
1042		ipkt = (uint32_t *)entry;
1043		for (i = 0; i < FIP_VHUB_TABLE_ENTRY_WORDS; i++)
1044			etbl->tb_checksum += ipkt[i];
1045	}
1046	etbl->tb_entries_seen += entries_in_pkt;
1047
1048	/*
1049	 * If this is the last packet of this vhub table, complete vhub
1050	 * table by verifying checksum and applying all the vhub updates
1051	 * that may have come in while we were constructing this table.
1052	 */
1053	if (desc_tbl->tb_hdr & FIP_TB_HDR_LAST) {
1054
1055		ipkt = (uint32_t *)entry;
1056		if (!eib_wa_no_good_vhub_cksum) {
1057			if (*ipkt != etbl->tb_checksum) {
1058				EIB_DPRINTF_VERBOSE(ss->ei_instance,
1059				    "eib_fip_parse_vhub_table: "
1060				    "vhub table checksum invalid, "
1061				    "computed=0x%lx, found=0x%lx",
1062				    etbl->tb_checksum, *ipkt);
1063			}
1064		}
1065
1066		/*
1067		 * Per the EoIB specification, the gateway is supposed to
1068		 * include its address information for data messages in the
1069		 * vhub table.  But we've observed that it doesn't do this
1070		 * (with the current version). If this is the case, we'll
1071		 * hand-create and add a vhub map for the gateway from the
1072		 * information we got in login ack.
1073		 */
1074		if (etbl->tb_gateway == NULL)
1075			eib_fip_queue_gw_entry(vnic, etbl, tusn, FIP_EPORT_UP);
1076
1077		/*
1078		 * Apply pending vhub updates and reset table counters needed
1079		 * during table construction.
1080		 */
1081		if (eib_fip_apply_updates(ss, etbl, eupd) != EIB_E_SUCCESS)
1082			goto vhub_table_fail;
1083
1084		etbl->tb_entries_seen = 0;
1085		etbl->tb_entries_in_table = 0;
1086
1087		eib_vnic_vhub_table_done(vnic, EIB_LOGIN_TBL_DONE);
1088	}
1089
1090	return (EIB_E_SUCCESS);
1091
1092vhub_table_fail:
1093	eib_vnic_vhub_table_done(vnic, EIB_LOGIN_TBL_FAILED);
1094	return (EIB_E_FAILURE);
1095}
1096
1097static int
1098eib_fip_parse_vhub_update(uint8_t *pkt, eib_vnic_t *vnic)
1099{
1100	fip_vhub_update_t *upd;
1101	fip_desc_vhub_update_t *desc_upd;
1102	fip_vhub_table_entry_t *entry;
1103	fip_basic_hdr_t *hdr;
1104	eib_t *ss = vnic->vn_ss;
1105	eib_login_data_t *ld = &vnic->vn_login_data;
1106	eib_vhub_table_t *etbl = vnic->vn_vhub_table;
1107	eib_vhub_update_t *eupd = vnic->vn_vhub_update;
1108	eib_vhub_map_t *newmap;
1109	boolean_t vhub_tbl_done;
1110	uint32_t eport_vp_vhub_id;
1111	uint32_t vhub_id;
1112	uint32_t tusn;
1113	uint32_t prev_tusn;
1114	uint8_t eport_state;
1115
1116	/*
1117	 * We should have the vhub table allocated as long as we're receiving
1118	 * vhub control messages.
1119	 */
1120	if (etbl == NULL) {
1121		EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_parse_vhub_update: "
1122		    "vhub table missing for vnic id 0x%x", vnic->vn_id);
1123		return (EIB_E_FAILURE);
1124	}
1125
1126	mutex_enter(&vnic->vn_lock);
1127	vhub_tbl_done = ((vnic->vn_state == EIB_LOGIN_TBL_DONE) ||
1128	    (vnic->vn_state == EIB_LOGIN_DONE)) ? B_TRUE : B_FALSE;
1129	mutex_exit(&vnic->vn_lock);
1130
1131	/*
1132	 * Note that 'pkt' is always atleast double-word aligned when it is
1133	 * passed to us, so we can cast it without any problems.
1134	 */
1135	upd = (fip_vhub_update_t *)(void *)pkt;
1136	hdr = &(upd->vu_fip_header);
1137
1138	/*
1139	 * Validate all the header and descriptor types and lengths
1140	 */
1141	if (hdr->hd_type != FIP_DESC_TYPE_VENDOR_ID ||
1142	    hdr->hd_len != FIP_DESC_LEN_VENDOR_ID) {
1143		EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_parse_vhub_update: "
1144		    "invalid type/len in fip basic header, "
1145		    "exp (0x%x,0x%x), got (0x%x,0x%x)",
1146		    FIP_DESC_TYPE_VENDOR_ID, FIP_DESC_LEN_VENDOR_ID,
1147		    hdr->hd_type, hdr->hd_len);
1148		return (EIB_E_FAILURE);
1149	}
1150	desc_upd = &(upd->vu_vhub_update);
1151	if (desc_upd->up_type != FIP_DESC_TYPE_VHUB_UPDATE ||
1152	    desc_upd->up_len != FIP_DESC_LEN_VHUB_UPDATE) {
1153		EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_parse_vhub_update: "
1154		    "invalid type/len in vhub update desc: "
1155		    "exp (0x%x,0x%x), got (0x%x,0x%x)",
1156		    FIP_DESC_TYPE_VHUB_UPDATE, FIP_DESC_LEN_VHUB_UPDATE,
1157		    desc_upd->up_type, desc_upd->up_len);
1158		return (EIB_E_FAILURE);
1159	}
1160
1161	/*
1162	 * Verify that the vhub id is ok for this vnic and save the eport state
1163	 */
1164	eport_vp_vhub_id = ntohl(desc_upd->up_eport_vp_vhub_id);
1165
1166	vhub_id = eport_vp_vhub_id & FIP_UP_VHUB_ID_MASK;
1167	if (vhub_id != ld->ld_vhub_id) {
1168		EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_parse_vhub_update: "
1169		    "invalid vhub id in vhub update pkt: exp 0x%x, got 0x%x",
1170		    ld->ld_vhub_id, vhub_id);
1171		return (EIB_E_FAILURE);
1172	}
1173	eport_state = (uint8_t)((eport_vp_vhub_id >> FIP_UP_EPORT_STATE_SHIFT) &
1174	    FIP_UP_EPORT_STATE_MASK);
1175
1176	/*
1177	 * If this is the first update we receive, any tusn is ok.  Otherwise,
1178	 * make sure the tusn we see in the packet is appropriate.
1179	 */
1180	tusn = ntohl(desc_upd->up_tusn);
1181	prev_tusn = vhub_tbl_done ? etbl->tb_tusn : eupd->up_tusn;
1182
1183	if (prev_tusn != 0) {
1184		if (tusn == prev_tusn) {
1185			eib_fip_update_eport_state(ss, etbl, eupd,
1186			    vhub_tbl_done, eport_state);
1187			return (EIB_E_SUCCESS);
1188		}
1189		if (tusn != (prev_tusn + 1)) {
1190			EIB_DPRINTF_WARN(ss->ei_instance,
1191			    "eib_fip_parse_vhub_update: "
1192			    "out of order TUSN received (exp 0x%lx, "
1193			    "got 0x%lx), dropping pkt", prev_tusn + 1, tusn);
1194			return (EIB_E_FAILURE);
1195		}
1196	}
1197
1198	/*
1199	 * EoIB expects only type 0 (vnic address) entries to maintain the
1200	 * context table
1201	 */
1202	entry = &(desc_upd->up_tbl_entry);
1203	ASSERT((entry->te_v_rss_type & FIP_TE_TYPE_MASK) == FIP_TE_TYPE_VNIC);
1204
1205	/*
1206	 * If the vHUB table has already been fully constructed and if we've
1207	 * now received a notice to remove a vnic entry from it, do it.
1208	 */
1209	if ((vhub_tbl_done) &&
1210	    ((entry->te_v_rss_type & FIP_TE_VALID) == 0)) {
1211		eib_fip_dequeue_tbl_entry(etbl, entry->te_mac,
1212		    tusn, eport_state);
1213
1214		if (bcmp(entry->te_mac, ld->ld_assigned_mac, ETHERADDRL) == 0) {
1215			uint8_t *mymac;
1216
1217			mymac = entry->te_mac;
1218			EIB_DPRINTF_WARN(ss->ei_instance,
1219			    "eib_fip_parse_vhub_update: "
1220			    "vhub update pkt received to kill self "
1221			    "(%x:%x:%x:%x:%x:%x)", mymac[0], mymac[1], mymac[2],
1222			    mymac[3], mymac[4], mymac[5]);
1223
1224			return (EIB_E_FAILURE);
1225		}
1226		return (EIB_E_SUCCESS);
1227	}
1228
1229	/*
1230	 * Otherwise, allocate a new eib_vhub_map_t and fill it in with
1231	 * the details of the new entry
1232	 */
1233	if ((newmap = eib_fip_get_vhub_map()) == NULL) {
1234		EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_parse_vhub_update: "
1235		    "no memory for vhub update entry, will be ignoring"
1236		    "this vhub update packet");
1237		return (EIB_E_FAILURE);
1238	}
1239
1240	newmap->mp_v_rss_type = entry->te_v_rss_type;
1241	bcopy(entry->te_mac, newmap->mp_mac, ETHERADDRL);
1242	newmap->mp_qpn = (ntohl(entry->te_qpn) & FIP_TE_QPN_MASK);
1243	newmap->mp_sl = (entry->te_sl & FIP_TE_SL_MASK);
1244	newmap->mp_lid = ntohs(entry->te_lid);
1245	newmap->mp_tusn = tusn;
1246	newmap->mp_next = NULL;
1247
1248	/*
1249	 * Update the full vhub table or chain it to the list of pending
1250	 * updates depending on if the vhub table construction is over
1251	 * or not.
1252	 */
1253	if (vhub_tbl_done) {
1254		eib_fip_queue_tbl_entry(etbl, newmap, tusn, eport_state);
1255	} else {
1256		eib_fip_queue_upd_entry(eupd, newmap, tusn, eport_state);
1257	}
1258
1259	return (EIB_E_SUCCESS);
1260}
1261
1262static void
1263eib_fip_update_eport_state(eib_t *ss, eib_vhub_table_t *tbl,
1264    eib_vhub_update_t *upd, boolean_t tbl_done, uint8_t eport_state)
1265{
1266	if (tbl_done) {
1267		mutex_enter(&tbl->tb_lock);
1268		if (tbl->tb_eport_state != eport_state) {
1269			EIB_DPRINTF_DEBUG(ss->ei_instance,
1270			    "eib_fip_update_eport_state: "
1271			    "eport state changing from %d to %d",
1272			    tbl->tb_eport_state, eport_state);
1273			tbl->tb_eport_state = eport_state;
1274		}
1275		mutex_exit(&tbl->tb_lock);
1276	} else {
1277		mutex_enter(&upd->up_lock);
1278		if (upd->up_eport_state != eport_state) {
1279			EIB_DPRINTF_DEBUG(ss->ei_instance,
1280			    "eib_fip_update_eport_state: "
1281			    "eport state changing from %d to %d",
1282			    upd->up_eport_state, eport_state);
1283			upd->up_eport_state = eport_state;
1284		}
1285		mutex_exit(&upd->up_lock);
1286	}
1287}
1288
1289static void
1290eib_fip_queue_tbl_entry(eib_vhub_table_t *tbl, eib_vhub_map_t *map,
1291    uint32_t tusn, uint8_t eport_state)
1292{
1293	uint8_t bkt;
1294
1295	mutex_enter(&tbl->tb_lock);
1296
1297	switch (map->mp_v_rss_type & FIP_TE_TYPE_MASK) {
1298	case FIP_TE_TYPE_GATEWAY:
1299		if (tbl->tb_gateway) {
1300			kmem_free(tbl->tb_gateway,
1301			    sizeof (eib_vhub_map_t));
1302		}
1303		tbl->tb_gateway = map;
1304		break;
1305
1306	case FIP_TE_TYPE_UNICAST_MISS:
1307		if (tbl->tb_unicast_miss) {
1308			kmem_free(tbl->tb_unicast_miss,
1309			    sizeof (eib_vhub_map_t));
1310		}
1311		tbl->tb_unicast_miss = map;
1312		break;
1313
1314	case FIP_TE_TYPE_VHUB_MULTICAST:
1315		if (tbl->tb_vhub_multicast) {
1316			kmem_free(tbl->tb_vhub_multicast,
1317			    sizeof (eib_vhub_map_t));
1318		}
1319		tbl->tb_vhub_multicast = map;
1320		break;
1321
1322	case FIP_TE_TYPE_MULTICAST_ENTRY:
1323		/*
1324		 * If multicast entry types are not to be specially
1325		 * processed, treat them like regular vnic addresses.
1326		 */
1327		if (!eib_wa_no_mcast_entries) {
1328			bkt = (map->mp_mac[ETHERADDRL-1]) % EIB_TB_NBUCKETS;
1329			map->mp_next = tbl->tb_mcast_entry[bkt];
1330			tbl->tb_mcast_entry[bkt] = map;
1331			break;
1332		}
1333		/*FALLTHROUGH*/
1334
1335	case FIP_TE_TYPE_VNIC:
1336		bkt = (map->mp_mac[ETHERADDRL-1]) % EIB_TB_NBUCKETS;
1337		map->mp_next = tbl->tb_vnic_entry[bkt];
1338		tbl->tb_vnic_entry[bkt] = map;
1339		break;
1340	}
1341
1342	tbl->tb_tusn = tusn;
1343	tbl->tb_eport_state = eport_state;
1344
1345	mutex_exit(&tbl->tb_lock);
1346}
1347
1348static void
1349eib_fip_queue_upd_entry(eib_vhub_update_t *upd, eib_vhub_map_t *map,
1350    uint32_t tusn, uint8_t eport_state)
1351{
1352	eib_vhub_map_t *tail;
1353
1354	/*
1355	 * The eib_vhub_update_t list is only touched/traversed when the
1356	 * control cq handler is parsing either update or table message,
1357	 * or by the table cleanup routine when we aren't attached to any
1358	 * control mcgs.  Bottom line is that this list traversal is always
1359	 * single-threaded and we could probably do away with the lock.
1360	 */
1361	mutex_enter(&upd->up_lock);
1362	for (tail = upd->up_vnic_entry;  tail != NULL; tail = tail->mp_next) {
1363		if (tail->mp_next == NULL)
1364			break;
1365	}
1366	if (tail) {
1367		tail->mp_next = map;
1368	} else {
1369		upd->up_vnic_entry = map;
1370	}
1371
1372	upd->up_tusn = tusn;
1373	upd->up_eport_state = eport_state;
1374
1375	mutex_exit(&upd->up_lock);
1376}
1377
1378static void
1379eib_fip_queue_gw_entry(eib_vnic_t *vnic, eib_vhub_table_t *tbl, uint32_t tusn,
1380    uint8_t eport_state)
1381{
1382	eib_t *ss = vnic->vn_ss;
1383	eib_vhub_map_t *newmap;
1384	eib_login_data_t *ld = &vnic->vn_login_data;
1385
1386	if ((newmap = eib_fip_get_vhub_map()) == NULL) {
1387		EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_queue_gw_entry: "
1388		    "no memory to queue gw entry, transactions could fail");
1389		return;
1390	}
1391
1392	newmap->mp_v_rss_type = FIP_TE_VALID | FIP_TE_TYPE_GATEWAY;
1393	bcopy(eib_zero_mac, newmap->mp_mac, ETHERADDRL);
1394	newmap->mp_qpn = ld->ld_gw_data_qpn;
1395	newmap->mp_sl = ld->ld_gw_sl;
1396	newmap->mp_lid = ld->ld_gw_lid;
1397	newmap->mp_tusn = tusn;
1398	newmap->mp_next = NULL;
1399
1400	eib_fip_queue_tbl_entry(tbl, newmap, tusn, eport_state);
1401}
1402
1403static int
1404eib_fip_apply_updates(eib_t *ss, eib_vhub_table_t *tbl, eib_vhub_update_t *upd)
1405{
1406	eib_vhub_map_t *list;
1407	eib_vhub_map_t *map;
1408	eib_vhub_map_t *nxt;
1409	uint32_t tbl_tusn = tbl->tb_tusn;
1410
1411	/*
1412	 * Take the update list out
1413	 */
1414	mutex_enter(&upd->up_lock);
1415	list = upd->up_vnic_entry;
1416	upd->up_vnic_entry = NULL;
1417	mutex_exit(&upd->up_lock);
1418
1419	/*
1420	 * Skip any updates with older/same tusn as our vhub table
1421	 */
1422	nxt = NULL;
1423	for (map = list; (map) && (map->mp_tusn <= tbl_tusn); map = nxt) {
1424		nxt = map->mp_next;
1425		kmem_free(map, sizeof (eib_vhub_map_t));
1426	}
1427
1428	if (map == NULL)
1429		return (EIB_E_SUCCESS);
1430
1431	/*
1432	 * If we missed any updates between table tusn and the first
1433	 * update tusn we got, we need to fail.
1434	 */
1435	if (map->mp_tusn > (tbl_tusn + 1)) {
1436		EIB_DPRINTF_WARN(ss->ei_instance, "eib_fip_apply_updates: "
1437		    "vhub update missed tusn(s), expected=0x%lx, got=0x%lx",
1438		    (tbl_tusn + 1), map->mp_tusn);
1439		for (; map != NULL; map = nxt) {
1440			nxt = map->mp_next;
1441			kmem_free(map, sizeof (eib_vhub_map_t));
1442		}
1443		return (EIB_E_FAILURE);
1444	}
1445
1446	/*
1447	 * If everything is fine, apply all the updates we received
1448	 */
1449	for (; map != NULL; map = nxt) {
1450		nxt = map->mp_next;
1451		map->mp_next = NULL;
1452
1453		if (map->mp_v_rss_type & FIP_TE_VALID) {
1454			eib_fip_queue_tbl_entry(tbl, map, upd->up_tusn,
1455			    upd->up_eport_state);
1456		} else {
1457			eib_fip_dequeue_tbl_entry(tbl, map->mp_mac,
1458			    upd->up_tusn, upd->up_eport_state);
1459			kmem_free(map, sizeof (eib_vhub_map_t));
1460		}
1461	}
1462
1463	return (EIB_E_SUCCESS);
1464}
1465
1466static void
1467eib_fip_dequeue_tbl_entry(eib_vhub_table_t *tbl, uint8_t *mac, uint32_t tusn,
1468    uint8_t eport_state)
1469{
1470	uint8_t bkt;
1471	eib_vhub_map_t *prev;
1472	eib_vhub_map_t *elem;
1473
1474	bkt = (mac[ETHERADDRL-1]) % EIB_TB_NBUCKETS;
1475
1476	mutex_enter(&tbl->tb_lock);
1477
1478	/*
1479	 * Note that for EoIB, the vhub table is maintained using only
1480	 * vnic entry updates
1481	 */
1482	prev = NULL;
1483	for (elem = tbl->tb_vnic_entry[bkt]; elem; elem = elem->mp_next) {
1484		if (bcmp(elem->mp_mac, mac, ETHERADDRL) == 0)
1485			break;
1486		prev = elem;
1487	}
1488
1489	if (prev && elem) {
1490		prev->mp_next = elem->mp_next;
1491		kmem_free(elem, sizeof (eib_vhub_map_t));
1492	}
1493
1494	tbl->tb_tusn = tusn;
1495	tbl->tb_eport_state = eport_state;
1496
1497	mutex_exit(&tbl->tb_lock);
1498}
1499
1500static eib_vhub_map_t *
1501eib_fip_get_vhub_map(void)
1502{
1503	return (kmem_zalloc(sizeof (eib_vhub_map_t), KM_NOSLEEP));
1504}
1505