1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25 
26 /*
27  * Copyright 2019, Joyent, Inc.
28  */
29 
30 #ifndef _SYS_IB_EOIB_ENX_IMPL_H
31 #define	_SYS_IB_EOIB_ENX_IMPL_H
32 
33 #ifdef __cplusplus
34 extern "C" {
35 #endif
36 
37 #include <sys/ddi.h>
38 #include <sys/sunddi.h>
39 #include <sys/varargs.h>
40 #include <sys/ib/ibtl/ibti.h>
41 #include <sys/ib/ibtl/ibvti.h>
42 #include <sys/ib/ib_pkt_hdrs.h>
43 #include <sys/ib/ibtl/impl/ibtl_ibnex.h>
44 #include <sys/ib/mgt/sm_attr.h>
45 
46 #include <sys/ib/clients/eoib/fip.h>
47 #include <sys/ib/clients/eoib/eib.h>
48 
49 /*
50  * Driver specific constants
51  */
52 #define	ENX_E_SUCCESS		0
53 #define	ENX_E_FAILURE		-1
54 #define	ENX_MAX_LINE		128
55 #define	ENX_GRH_SZ		(sizeof (ib_grh_t))
56 
57 /*
58  * Debug messages
59  */
60 #define	ENX_MSGS_CRIT		0x01
61 #define	ENX_MSGS_ERR		0x02
62 #define	ENX_MSGS_WARN		0x04
63 #define	ENX_MSGS_DEBUG		0x08
64 #define	ENX_MSGS_ARGS		0x10
65 #define	ENX_MSGS_VERBOSE	0x20
66 #define	ENX_MSGS_DEFAULT	(ENX_MSGS_CRIT | ENX_MSGS_ERR | ENX_MSGS_WARN)
67 
68 #define	ENX_LOGSZ_DEFAULT	0x20000
69 
70 #define	ENX_DPRINTF_CRIT	eibnx_dprintf_crit
71 #define	ENX_DPRINTF_ERR		eibnx_dprintf_err
72 #define	ENX_DPRINTF_WARN	eibnx_dprintf_warn
73 #ifdef ENX_DEBUG
74 #define	ENX_DPRINTF_DEBUG	eibnx_dprintf_debug
75 #define	ENX_DPRINTF_ARGS	eibnx_dprintf_args
76 #define	ENX_DPRINTF_VERBOSE	eibnx_dprintf_verbose
77 #else
78 #define	ENX_DPRINTF_DEBUG(...)	(void)(0)
79 #define	ENX_DPRINTF_ARGS(...)	(void)(0)
80 #define	ENX_DPRINTF_VERBOSE(...) (void)(0)
81 #endif
82 
83 /*
84  *  EoIB Nexus service threads
85  */
86 #define	ENX_PORT_MONITOR	"eibnx_port_%d_monitor"
87 #define	ENX_NODE_CREATOR	"eibnx_node_creator"
88 
89 /*
90  * Default period (us) for unicast solicitations to discovered gateways.
91  * EoIB specification requires that hosts send solicitation atleast every
92  * 4 * GW_ADV_PERIOD.
93  */
94 #define	ENX_DFL_SOLICIT_PERIOD_USEC	32000000
95 
96 /*
97  * Portinfo list per HCA
98  */
99 typedef struct eibnx_port_s {
100 	struct eibnx_port_s	*po_next;
101 	ibt_hca_portinfo_t	*po_pi;
102 	uint_t			po_pi_size;
103 } eibnx_port_t;
104 
105 /*
106  * HCA details
107  */
108 typedef struct eibnx_hca_s {
109 	struct eibnx_hca_s	*hc_next;
110 	ib_guid_t		hc_guid;
111 	ibt_hca_hdl_t		hc_hdl;
112 	ibt_pd_hdl_t		hc_pd;
113 	eibnx_port_t		*hc_port;
114 } eibnx_hca_t;
115 
116 /*
117  * The port_monitor thread in EoIB nexus driver only sends two types of
118  * packets: multicast solicitation the first time around, and periodic
119  * unicast solicitations later to gateways that have been discovered. So
120  * we need a couple of send wqes for the multicast solicitation and
121  * probably as many send wqes as the number of gateways that may be
122  * discovered from each port, for sending the unicast solicitations.
123  * For unicast solicitations though, the UD destination needs to be set
124  * up at the time we receive the advertisement from the gateway, using
125  * ibt_modify_reply_ud_dest(), so we'll assign one send wqe for each
126  * gateway that we discover.  This means that we need to acquire these
127  * send wqe entries during rx processing in the completion handler, which
128  * means we must avoid sleeping in trying to acquire the swqe. Therefore,
129  * we'll pre-allocate these unicast solication send wqes to be atleast
130  * twice the number of recv wqes.
131  *
132  * The receive packets expected by the EoIB nexus driver are the multicast
133  * and unicast messages on the SOLICIT and ADVERTISE groups. These
134  * shouldn't be too many, and should be tuned as we gain experience on
135  * the traffic pattern.  We'll start with 16.
136  */
137 #define	ENX_NUM_SWQE			46
138 #define	ENX_NUM_RWQE			16
139 #define	ENX_CQ_SIZE			(ENX_NUM_SWQE + ENX_NUM_RWQE + 2)
140 
141 /*
142  * qe_type values
143  */
144 #define	ENX_QETYP_RWQE			0x1
145 #define	ENX_QETYP_SWQE			0x2
146 
147 /*
148  * qe_flags bitmasks (protected by qe_lock). None of the
149  * flag values may be zero.
150  */
151 #define	ENX_QEFL_INUSE			0x01
152 #define	ENX_QEFL_POSTED			0x02
153 #define	ENX_QEFL_RELONCOMP		0x04
154 
155 /*
156  * Recv and send workq entries
157  */
158 typedef struct eibnx_wqe_s {
159 	uint_t			qe_type;
160 	uint_t			qe_bufsz;
161 	ibt_wr_ds_t		qe_sgl;
162 	ibt_all_wr_t		qe_wr;
163 	kmutex_t		qe_lock;
164 	uint_t			qe_flags;
165 } eibnx_wqe_t;
166 
167 /*
168  * Tx descriptor
169  */
170 typedef struct eibnx_tx_s {
171 	ib_vaddr_t		tx_vaddr;
172 	ibt_mr_hdl_t		tx_mr;
173 	ibt_lkey_t		tx_lkey;
174 	eibnx_wqe_t		tx_wqe[ENX_NUM_SWQE];
175 } eibnx_tx_t;
176 
177 /*
178  * Rx descriptor
179  */
180 typedef struct eibnx_rx_s {
181 	ib_vaddr_t		rx_vaddr;
182 	ibt_mr_hdl_t		rx_mr;
183 	ibt_lkey_t		rx_lkey;
184 	eibnx_wqe_t		rx_wqe[ENX_NUM_RWQE];
185 } eibnx_rx_t;
186 
187 /*
188  * Details about the address of each gateway we discover.
189  */
190 typedef struct eibnx_gw_addr_s {
191 	ibt_adds_vect_t		*ga_vect;
192 	ib_gid_t		ga_gid;
193 	ib_qpn_t		ga_qpn;
194 	ib_qkey_t		ga_qkey;
195 	ib_pkey_t		ga_pkey;
196 } eibnx_gw_addr_t;
197 
198 /*
199  * States for each GW
200  */
201 #define	ENX_GW_STATE_UNAVAILABLE	1	/* GW nackd availability */
202 #define	ENX_GW_STATE_AVAILABLE		2	/* GW mcasted availability */
203 #define	ENX_GW_STATE_READY_TO_LOGIN	3	/* GW ucasted availability */
204 
205 typedef struct eibnx_gw_info_s {
206 	struct eibnx_gw_info_s	*gw_next;
207 	eibnx_wqe_t		*gw_swqe;
208 	uint_t			gw_state;
209 
210 	kmutex_t		gw_adv_lock;
211 	uint_t			gw_adv_flag;
212 	int64_t			gw_adv_last_lbolt;
213 	int64_t			gw_adv_timeout_ticks;
214 
215 	eibnx_gw_addr_t		gw_addr;
216 
217 	ib_guid_t		gw_system_guid;
218 	ib_guid_t		gw_guid;
219 
220 	uint32_t		gw_adv_period;
221 	uint32_t		gw_ka_period;
222 	uint32_t		gw_vnic_ka_period;
223 	ib_qpn_t		gw_ctrl_qpn;
224 
225 	ib_lid_t		gw_lid;
226 	uint16_t		gw_portid;
227 	uint16_t		gw_num_net_vnics;
228 
229 	uint8_t			gw_is_host_adm_vnics;
230 	uint8_t			gw_sl;
231 	uint8_t			gw_n_rss_qpn;
232 	uint8_t			gw_flag_ucast_advt;
233 	uint8_t			gw_flag_available;
234 
235 	uint8_t			gw_system_name[EIB_GW_SYSNAME_LEN];
236 	uint8_t			gw_port_name[EIB_GW_PORTNAME_LEN];
237 	uint8_t			gw_vendor_id[EIB_GW_VENDOR_LEN];
238 } eibnx_gw_info_t;
239 
240 /*
241  * Values for gw_adv_flag (non-zero only)
242  */
243 #define	ENX_GW_DEAD		1
244 #define	ENX_GW_ALIVE		2
245 #define	ENX_GW_AWARE		3
246 
247 /*
248  * Currently, we only expect the advertisement type of packets
249  * from the gw. But we do get login acks from the gateway also
250  * here in the nexus, so we'll need an identifier for that.
251  */
252 typedef enum {
253 	FIP_GW_ADVERTISE_MCAST = 0,
254 	FIP_GW_ADVERTISE_UCAST,
255 	FIP_VNIC_LOGIN_ACK
256 } eibnx_gw_pkt_type_t;
257 
258 /*
259  * Currently, the only gw response handled by the eibnx driver
260  * are the ucast/mcast advertisements.  Information collected from
261  * both these responses may be packed into a eibnx_gw_info_t.
262  * In the future, if we decide to handle other types of responses
263  * from the gw, we could simply add the new types to the union.
264  */
265 typedef struct eibnx_gw_msg_s {
266 	eibnx_gw_pkt_type_t	gm_type;
267 	union {
268 		eibnx_gw_info_t	gm_info;
269 	} u;
270 } eibnx_gw_msg_t;
271 
272 /*
273  * List to hold the devinfo nodes of eoib instances
274  */
275 typedef struct eibnx_child_s {
276 	struct eibnx_child_s	*ch_next;
277 	dev_info_t		*ch_dip;
278 	eibnx_gw_info_t		*ch_gwi;
279 	char			*ch_node_name;
280 } eibnx_child_t;
281 
282 /*
283  * Event bitmasks for the port-monitor to wait on. None of these flags
284  * may be zero.
285  */
286 #define	ENX_EVENT_LINK_UP		0x01
287 #define	ENX_EVENT_MCGS_AVAILABLE	0x02
288 #define	ENX_EVENT_TIMED_OUT		0x04
289 #define	ENX_EVENT_DIE			0x08
290 #define	ENX_EVENT_COMPLETION		0x10
291 
292 /*
293  * MCG Query/Join status
294  */
295 #define	ENX_MCGS_FOUND			0x1
296 #define	ENX_MCGS_JOINED			0x2
297 
298 /*
299  * Information that each port-monitor thread cares about
300  */
301 typedef struct eibnx_thr_info_s {
302 	struct eibnx_thr_info_s	*ti_next;
303 	uint_t			ti_progress;
304 
305 	/*
306 	 * Our kernel thread id
307 	 */
308 	kt_did_t		ti_kt_did;
309 
310 	/*
311 	 * HCA, port and protection domain information
312 	 */
313 	ib_guid_t		ti_hca_guid;
314 	ibt_hca_hdl_t		ti_hca;
315 	ibt_pd_hdl_t		ti_pd;
316 	ibt_hca_portinfo_t	*ti_pi;
317 	char			*ti_ident;
318 
319 	/*
320 	 * Well-known multicast groups for solicitations
321 	 * and advertisements.
322 	 */
323 	kmutex_t		ti_mcg_lock;
324 	uint_t			ti_mcg_status;
325 	ibt_mcg_info_t		*ti_advertise_mcg;
326 	ibt_mcg_info_t		*ti_solicit_mcg;
327 	uint_t			ti_mcast_done;
328 
329 	/*
330 	 * Completion queue stuff
331 	 */
332 	ibt_cq_hdl_t		ti_cq_hdl;
333 	uint_t			ti_cq_sz;
334 	ibt_wc_t		*ti_wc;
335 	ddi_softint_handle_t    ti_softint_hdl;
336 
337 	/*
338 	 * Channel related
339 	 */
340 	ibt_channel_hdl_t	ti_chan;
341 	ib_qpn_t		ti_qpn;
342 
343 	/*
344 	 * Transmit/Receive stuff
345 	 */
346 	eibnx_tx_t		ti_snd;
347 	eibnx_rx_t		ti_rcv;
348 
349 	/*
350 	 * GW related stuff
351 	 */
352 	kmutex_t		ti_gw_lock;
353 	eibnx_gw_info_t		*ti_gw;
354 
355 	/*
356 	 * Devinfo nodes for the eoib children
357 	 */
358 	kmutex_t		ti_child_lock;
359 	eibnx_child_t		*ti_child;
360 
361 	/*
362 	 * Events that we wait on and/or handle
363 	 */
364 	kmutex_t		ti_event_lock;
365 	kcondvar_t		ti_event_cv;
366 	uint_t			ti_event;
367 } eibnx_thr_info_t;
368 
369 /*
370  * Workq entry for creation of eoib nodes
371  */
372 typedef struct eibnx_nodeq_s {
373 	struct eibnx_nodeq_s	*nc_next;
374 	eibnx_thr_info_t	*nc_info;
375 	eibnx_gw_info_t		*nc_gwi;
376 } eibnx_nodeq_t;
377 
378 /*
379  * Bus config status flags.  The in-prog is protected by
380  * nx_lock, and the rest of the flags (currently only
381  * buscfg-complete) is protected by the in-prog bit itself.
382  */
383 #define	NX_FL_BUSOP_INPROG		0x1
384 #define	NX_FL_BUSCFG_COMPLETE		0x2
385 #define	NX_FL_BUSOP_MASK		0x3
386 
387 /*
388  * EoIB nexus per-instance state
389  */
390 typedef struct eibnx_s {
391 	dev_info_t		*nx_dip;
392 	ibt_clnt_hdl_t		nx_ibt_hdl;
393 
394 	kmutex_t		nx_lock;
395 	eibnx_hca_t		*nx_hca;
396 	eibnx_thr_info_t	*nx_thr_info;
397 	boolean_t		nx_monitors_up;
398 
399 	kmutex_t		nx_nodeq_lock;
400 	kcondvar_t		nx_nodeq_cv;
401 	eibnx_nodeq_t		*nx_nodeq;
402 	kt_did_t		nx_nodeq_kt_did;
403 	uint_t			nx_nodeq_thr_die;
404 
405 	kmutex_t		nx_busop_lock;
406 	kcondvar_t		nx_busop_cv;
407 	uint_t			nx_busop_flags;
408 } eibnx_t;
409 
410 
411 /*
412  * Event tags for EoIB Nexus events delivered to EoIB instances
413  */
414 #define	ENX_EVENT_TAG_GW_INFO_UPDATE		0
415 #define	ENX_EVENT_TAG_GW_AVAILABLE		1
416 #define	ENX_EVENT_TAG_LOGIN_ACK			2
417 
418 /*
419  * FUNCTION PROTOTYPES FOR CROSS-FILE LINKAGE
420  */
421 
422 /*
423  * Threads and Event Handlers
424  */
425 void eibnx_port_monitor(eibnx_thr_info_t *);
426 void eibnx_subnet_notices_handler(void *, ib_gid_t, ibt_subnet_event_code_t,
427     ibt_subnet_event_t *);
428 void eibnx_async_handler(void *, ibt_hca_hdl_t, ibt_async_code_t,
429     ibt_async_event_t *);
430 boolean_t eibnx_is_gw_dead(eibnx_gw_info_t *);
431 void eibnx_create_eoib_node(void);
432 void eibnx_comp_intr(ibt_cq_hdl_t, void *);
433 uint_t eibnx_comp_handler(caddr_t, caddr_t);
434 
435 /*
436  * IBT related functions
437  */
438 int eibnx_ibt_init(eibnx_t *);
439 int eibnx_find_mgroups(eibnx_thr_info_t *);
440 int eibnx_setup_cq(eibnx_thr_info_t *);
441 int eibnx_setup_ud_channel(eibnx_thr_info_t *);
442 int eibnx_setup_bufs(eibnx_thr_info_t *);
443 int eibnx_setup_cq_handler(eibnx_thr_info_t *);
444 int eibnx_join_mcgs(eibnx_thr_info_t *);
445 int eibnx_rejoin_mcgs(eibnx_thr_info_t *);
446 int eibnx_ibt_fini(eibnx_t *);
447 
448 void eibnx_rb_find_mgroups(eibnx_thr_info_t *);
449 void eibnx_rb_setup_cq(eibnx_thr_info_t *);
450 void eibnx_rb_setup_ud_channel(eibnx_thr_info_t *);
451 void eibnx_rb_setup_bufs(eibnx_thr_info_t *);
452 void eibnx_rb_setup_cq_handler(eibnx_thr_info_t *);
453 void eibnx_rb_join_mcgs(eibnx_thr_info_t *);
454 
455 eibnx_hca_t *eibnx_prepare_hca(ib_guid_t);
456 int eibnx_cleanup_hca(eibnx_hca_t *);
457 
458 /*
459  * FIP packetizing related functions
460  */
461 int eibnx_fip_solicit_mcast(eibnx_thr_info_t *);
462 int eibnx_fip_solicit_ucast(eibnx_thr_info_t *, clock_t *);
463 int eibnx_fip_parse_pkt(uint8_t *, eibnx_gw_msg_t *);
464 
465 /*
466  * Queue and List related routines
467  */
468 eibnx_wqe_t *eibnx_acquire_swqe(eibnx_thr_info_t *, int);
469 void eibnx_return_swqe(eibnx_wqe_t *);
470 void eibnx_return_rwqe(eibnx_thr_info_t *, eibnx_wqe_t *);
471 void eibnx_release_swqe(eibnx_wqe_t *);
472 
473 void eibnx_enqueue_child(eibnx_thr_info_t *, eibnx_gw_info_t *, char *,
474     dev_info_t *);
475 int eibnx_update_child(eibnx_thr_info_t *, eibnx_gw_info_t *, dev_info_t *);
476 dev_info_t *eibnx_find_child_dip_by_inst(eibnx_thr_info_t *, int);
477 dev_info_t *eibnx_find_child_dip_by_gw(eibnx_thr_info_t *, uint16_t);
478 
479 eibnx_gw_info_t *eibnx_find_gw_in_gwlist(eibnx_thr_info_t *, eibnx_gw_info_t *);
480 eibnx_gw_info_t *eibnx_add_gw_to_gwlist(eibnx_thr_info_t *, eibnx_gw_info_t *,
481     ibt_wc_t *, uint8_t *);
482 void eibnx_replace_gw_in_gwlist(eibnx_thr_info_t *, eibnx_gw_info_t *,
483     eibnx_gw_info_t *, ibt_wc_t *, uint8_t *, boolean_t *);
484 void eibnx_queue_for_creation(eibnx_thr_info_t *, eibnx_gw_info_t *);
485 
486 /*
487  * Logging and Error reporting routines
488  */
489 void eibnx_debug_init(void);
490 void eibnx_debug_fini(void);
491 void eibnx_dprintf_crit(const char *fmt, ...);
492 void eibnx_dprintf_err(const char *fmt, ...);
493 void eibnx_dprintf_warn(const char *fmt, ...);
494 #ifdef ENX_DEBUG
495 void eibnx_dprintf_debug(const char *fmt, ...);
496 void eibnx_dprintf_args(const char *fmt, ...);
497 void eibnx_dprintf_verbose(const char *fmt, ...);
498 #endif
499 
500 /*
501  * Miscellaneous
502  */
503 void eibnx_cleanup_port_nodes(eibnx_thr_info_t *);
504 void eibnx_create_node_props(dev_info_t *, eibnx_thr_info_t *,
505     eibnx_gw_info_t *);
506 int eibnx_name_child(dev_info_t *, char *, size_t);
507 void eibnx_busop_inprog_enter(eibnx_t *);
508 void eibnx_busop_inprog_exit(eibnx_t *);
509 eibnx_thr_info_t *eibnx_start_port_monitor(eibnx_hca_t *, eibnx_port_t *);
510 void eibnx_stop_port_monitor(eibnx_thr_info_t *);
511 void eibnx_terminate_monitors(void);
512 int eibnx_configure_node(eibnx_thr_info_t *, eibnx_gw_info_t *, dev_info_t **);
513 int eibnx_unconfigure_node(eibnx_thr_info_t *, eibnx_gw_info_t *);
514 int eibnx_locate_node_name(char *, eibnx_thr_info_t **, eibnx_gw_info_t **);
515 int eibnx_locate_unconfigured_node(eibnx_thr_info_t **, eibnx_gw_info_t **);
516 
517 /*
518  * Devctl cbops (currently dummy)
519  */
520 int eibnx_devctl_open(dev_t *, int, int, cred_t *);
521 int eibnx_devctl_close(dev_t, int, int, cred_t *);
522 int eibnx_devctl_ioctl(dev_t, int, intptr_t, int, cred_t *, int *);
523 
524 /*
525  * External variable references
526  */
527 extern pri_t minclsyspri;
528 extern eibnx_t *enx_global_ss;
529 extern ib_gid_t enx_solicit_mgid;
530 extern ib_gid_t enx_advertise_mgid;
531 
532 #ifdef __cplusplus
533 }
534 #endif
535 
536 #endif	/* _SYS_IB_EOIB_ENX_IMPL_H */
537