1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 /*
26  * Copyright (c) 2005 SilverStorm Technologies, Inc. All rights reserved.
27  *
28  * This software is available to you under a choice of one of two
29  * licenses.  You may choose to be licensed under the terms of the GNU
30  * General Public License (GPL) Version 2, available from the file
31  * COPYING in the main directory of this source tree, or the
32  * OpenIB.org BSD license below:
33  *
34  *     Redistribution and use in source and binary forms, with or
35  *     without modification, are permitted provided that the following
36  *     conditions are met:
37  *
38  *	- Redistributions of source code must retain the above
39  *	  copyright notice, this list of conditions and the following
40  *	  disclaimer.
41  *
42  *	- Redistributions in binary form must reproduce the above
43  *	  copyright notice, this list of conditions and the following
44  *	  disclaimer in the documentation and/or other materials
45  *	  provided with the distribution.
46  *
47  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
48  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
49  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
50  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
51  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
52  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
53  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
54  * SOFTWARE.
55  *
56  */
57 /*
58  * Sun elects to include this software in Sun product
59  * under the OpenIB BSD license.
60  *
61  *
62  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
63  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
64  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
65  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
66  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
67  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
68  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
69  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
70  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
71  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
72  * POSSIBILITY OF SUCH DAMAGE.
73  */
74 
75 #ifndef _RDSIB_EP_H
76 #define	_RDSIB_EP_H
77 
78 #pragma ident	"%Z%%M%	%I%	%E% SMI"
79 
80 #ifdef __cplusplus
81 extern "C" {
82 #endif
83 
84 #include <netinet/in.h>
85 
86 /*
87  * Control channel or Data channel
88  */
89 typedef enum rds_ep_type_s {
90 	RDS_EP_TYPE_CTRL		= 1,
91 	RDS_EP_TYPE_DATA		= 2
92 } rds_ep_type_t;
93 
94 /*
95  * Channel States
96  *
97  * RDS_EP_STATE_UNCONNECTED - Initial state when rds_ep_t is created
98  * RDS_EP_STATE_ACTIVE_PENDING - Active side connection in progress
99  * RDS_EP_STATE_PASSIVE_PENDING - Passice side connection in progress
100  * RDS_EP_STATE_CONNECTED - Channel is connected
101  * RDS_EP_STATE_DESTROY_TIMEWAIT - Channel is closed
102  */
103 typedef enum rds_ep_state_s {
104 	RDS_EP_STATE_UNCONNECTED		= 0,
105 	RDS_EP_STATE_ACTIVE_PENDING		= 1,
106 	RDS_EP_STATE_PASSIVE_PENDING		= 2,
107 	RDS_EP_STATE_CONNECTED			= 3,
108 	RDS_EP_STATE_CLOSING			= 4,
109 	RDS_EP_STATE_CLOSED			= 5,
110 	RDS_EP_STATE_ERROR			= 6
111 } rds_ep_state_t;
112 
113 /*
114  * Session State Machine Diagram
115  *
116  *                     -----------------
117  *                    |       (6)       |
118  *                    |                 |
119  *                    v                 |
120  *             --> (Created)-------->(Failed)
121  *            |     |         (5)       ^
122  *            |     |(1)                |
123  *            |     |                   |(9)
124  *            |     v                   |
125  *            |    (Init)<--------------|
126  *            |     | |       (8)       |
127  *            |     | |                 |
128  *            |  (2)|  --------------   |
129  *        (11)|     |         (7)    |  |
130  *            |     v                v  |
131  *            |    (Connected)------>(Error)
132  *            |     |         (10)
133  *            |     |(3)
134  *            |     |
135  *            |     v
136  *            |    (Closed)
137  *            |     |
138  *            |     |(4)
139  *            |     |
140  *            |     v
141  *             --- (Fini) ------->(Destroy)
142  *                         (12)
143  *
144  *	(1) rds_session_init()
145  *	(2) rds_session_open()
146  *	(3) rds_session_close()
147  *	(4) rds_session_fini()
148  *	(4) rds_passive_session_fini()
149  *	(5) Failure in rds_session_init()
150  *	(6) rds_sendmsg(3SOCKET)/Incoming CM REQ
151  *	(7) Failure in rds_session_open()
152  *	(8) rds_session_close(), rds_get_ibaddr() and rds_session_reinit()
153  *	(9) rds_session_close() and rds_session_fini()
154  *	(9) rds_cleanup_passive_session() and rds_passive_session_fini()
155  *	(10) Connection Error/Incoming REQ
156  *	(11) rds_sendmsg(3SOCKET)/Incoming REQ
157  *
158  *
159  * Created   - Session is allocated and inserted into the sessionlist but
160  *             not all members are initialized.
161  * Init      - All members are initialized, send buffer pool is allocated.
162  * Connected - Data and ctrl RC channels are opened.
163  * Closed    - Data and ctrl RC channels are closed.
164  * Fini      - Send buffer pool and buffers in the receive pool are freed.
165  * Destroy   - Session is removed from the session list and is ready to be
166  *             freed.
167  * Failed    - Session initialization has failed (send buffer pool allocation).
168  * Error     - (1) Failed to open the RC channels.
169  *             (2) An error occurred on the RC channels while sending.
170  *             (3) Received a new CM REQ message on the existing connection.
171  */
172 typedef enum rds_session_state_s {
173 	RDS_SESSION_STATE_CREATED		= 0,
174 	RDS_SESSION_STATE_FAILED		= 1,
175 	RDS_SESSION_STATE_INIT			= 2,
176 	RDS_SESSION_STATE_CONNECTED		= 3,
177 	RDS_SESSION_STATE_ERROR			= 4,
178 	RDS_SESSION_STATE_ACTIVE_CLOSING	= 5,
179 	RDS_SESSION_STATE_PASSIVE_CLOSING	= 6,
180 	RDS_SESSION_STATE_CLOSED		= 7,
181 	RDS_SESSION_STATE_FINI			= 8,
182 	RDS_SESSION_STATE_DESTROY		= 9
183 } rds_session_state_t;
184 
185 #define	RDS_SESSION_TRANSITION(sp, state)			\
186 		rw_enter(&sp->session_lock, RW_WRITER);		\
187 		sp->session_state = state;			\
188 		rw_exit(&sp->session_lock)
189 
190 /* Active or Passive */
191 #define	RDS_SESSION_ACTIVE	1
192 #define	RDS_SESSION_PASSIVE	2
193 
194 /*
195  * RDS QP Information
196  *
197  * lock  - Synchronize access
198  * depth - Max number of WRs that can be posted.
199  * level - Number of outstanding WRs in the QP
200  * lwm   - Water mark at which to post more receive WRs.
201  * taskqpending - Indicates if a taskq thread is dispatched to post receive
202  *		WRs in the RQ
203  */
204 typedef struct rds_qp_s {
205 	kmutex_t		qp_lock;
206 	uint32_t		qp_depth;
207 	uint32_t		qp_level;
208 	uint32_t		qp_lwm;
209 	boolean_t		qp_taskqpending;
210 } rds_qp_t;
211 
212 /*
213  * RDS EndPoint(One end of RC connection)
214  *
215  * sp        - Parent Session
216  * type      - Control or Data Channel
217  * remip     - Same as session_remip
218  * myip      - Same as session_myip
219  * snd_lkey  - LKey for the send buffer pool
220  * hca_guid  - HCA guid
221  * snd_mrhdl - Memory handle for the send buffer pool
222  * lock      - Protects the members
223  * state     - See rds_ep_state_t
224  * chanhdl   - RC channel handle
225  * sendcq    - Send CQ handle
226  * recvcq    - Recv CQ handle
227  * sndpool   - Send buffer Pool
228  * rcvpool   - Recv buffer Pool
229  * segfbp    - First packet of a segmented message.
230  * seglbp    - Last packet of a segmented message.
231  * lbufid    - Last successful buffer that was received by the remote.
232  *             Valid only during session failover/reconnect.
233  * rbufid    - Last buffer (remote buffer) that was received successfully
234  *             from the remote node.
235  * ds        - SGL used for send acknowledgement.
236  * ackwr     - WR to send acknowledgement.
237  * ackhdl    - Memory handle for 'ack_addr'.
238  * ack_rkey  - RKey for 'ack_addr'.
239  * ack_addr  - Memory region to receive RDMA acknowledgement from remote.
240  */
241 typedef struct rds_ep_s {
242 	struct rds_session_s	*ep_sp;
243 	rds_ep_type_t		ep_type;
244 	ipaddr_t		ep_remip;
245 	ipaddr_t		ep_myip;
246 	ibt_lkey_t		ep_snd_lkey;
247 	ib_guid_t		ep_hca_guid;
248 	ibt_mr_hdl_t		ep_snd_mrhdl;
249 	kmutex_t		ep_lock;
250 	rds_ep_state_t		ep_state;
251 	ibt_channel_hdl_t	ep_chanhdl;
252 	ibt_cq_hdl_t		ep_sendcq;
253 	ibt_cq_hdl_t		ep_recvcq;
254 	rds_bufpool_t		ep_sndpool;
255 	rds_bufpool_t		ep_rcvpool;
256 	rds_qp_t		ep_recvqp;
257 	uint_t			ep_rdmacnt;
258 	rds_buf_t		*ep_segfbp;
259 	rds_buf_t		*ep_seglbp;
260 	uintptr_t		ep_lbufid;
261 	uintptr_t		ep_rbufid;
262 	ibt_wr_ds_t		ep_ackds;
263 	ibt_send_wr_t		ep_ackwr;
264 	ibt_mr_hdl_t		ep_ackhdl;
265 	ibt_rkey_t		ep_ack_rkey;
266 	uintptr_t		ep_ack_addr;
267 } rds_ep_t;
268 
269 /*
270  * One end of an RDS session
271  *
272  * nextp   - Pointer to the next session in the session list.
273  *           This is protected by rds_state_t:rds_sessionlock.
274  * remip   - IP address of the node having the remote end of the session.
275  * myip    - IP address of this end of the session.
276  * lgid    - IB local (source) gid, hosting "myip".
277  * rgid    - IB remote (destination) gid, hosting "remip".
278  * lock    - Provides read/write access to members of the session.
279  * type    - Identifies which end of session (active or passive).
280  * state   - State of session (rds_session_state_t).
281  * dataep  - Data endpoint
282  * ctrlep  - Control endpoint
283  * failover- Flag to indicate that an error occured and the session is
284  *           re-connecting.
285  * portmap_lock - To serialize access to portmap.
286  * portmap - Bitmap of sockets.
287  *           The maximum number of sockets seem to be 65536, the portmap has
288  *           1 bit for each remote socket. A set bit indicates that the
289  *           corresponding remote socket is stalled and vice versa.
290  */
291 typedef struct rds_session_s {
292 	struct rds_session_s	*session_nextp;
293 	ipaddr_t		session_remip;
294 	ipaddr_t		session_myip;
295 	ib_gid_t		session_lgid;
296 	ib_gid_t		session_rgid;
297 	krwlock_t		session_lock;
298 	uint8_t			session_type;
299 	uint8_t			session_state;
300 	struct rds_ep_s		session_dataep;
301 	struct rds_ep_s		session_ctrlep;
302 	uint_t			session_failover;
303 	krwlock_t		session_portmap_lock;
304 	uint8_t			session_portmap[RDS_PORT_MAP_SIZE];
305 } rds_session_t;
306 
307 /* defined in rds_ep.c */
308 int rds_ep_init(rds_ep_t *ep);
309 rds_session_t *rds_session_create(rds_state_t *statep, ipaddr_t destip,
310     ipaddr_t srcip, ibt_cm_req_rcv_t *reqp, uint8_t type);
311 int rds_session_init(rds_session_t *sp);
312 int rds_session_reinit(rds_session_t *sp, ib_gid_t lgid);
313 void rds_session_open(rds_session_t *sp);
314 void rds_session_close(rds_session_t *sp, ibt_execution_mode_t mode,
315     uint_t wait);
316 rds_session_t *rds_session_lkup(rds_state_t *statep, ipaddr_t destip,
317     ib_guid_t node_guid);
318 void rds_recycle_session(rds_session_t *sp);
319 void rds_session_active(rds_session_t *sp);
320 void rds_close_sessions(void *arg);
321 void rds_received_msg(rds_ep_t *ep, rds_buf_t *bp);
322 void rds_handle_control_message(rds_session_t *sp, rds_ctrl_pkt_t *cp);
323 void rds_handle_send_error(rds_ep_t *ep);
324 void rds_session_fini(rds_session_t *sp);
325 void rds_passive_session_fini(rds_session_t *sp);
326 void rds_cleanup_passive_session(void *arg);
327 
328 /* defined in rds_ib.c */
329 ibt_channel_hdl_t rds_ep_alloc_rc_channel(rds_ep_t *ep, uint8_t hca_port);
330 void rds_ep_free_rc_channel(rds_ep_t *ep);
331 void rds_post_recv_buf(void *arg);
332 void rds_poll_send_completions(ibt_cq_hdl_t cq, struct rds_ep_s *ep,
333     boolean_t lock);
334 
335 /* defined in rds_cm.c */
336 int rds_open_rc_channel(rds_ep_t *ep, ibt_path_info_t *pinfo,
337     ibt_execution_mode_t mode, ibt_channel_hdl_t *chanhdl);
338 int rds_close_rc_channel(ibt_channel_hdl_t chanhdl, ibt_execution_mode_t mode);
339 
340 int rds_deliver_new_msg(mblk_t *mp, ipaddr_t local_addr, ipaddr_t rem_addr,
341     in_port_t local_port, in_port_t rem_port, zoneid_t zoneid);
342 
343 /* defined in rds_sc.c */
344 int rds_sc_path_lookup(ipaddr_t *localip, ipaddr_t *remip);
345 
346 #ifdef __cplusplus
347 }
348 #endif
349 
350 #endif	/* _RDSIB_EP_H */
351