1c0dd49bdSEiji Ota /*
216e76cddSagiri  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
3c0dd49bdSEiji Ota  */
416e76cddSagiri 
5c0dd49bdSEiji Ota /*
616e76cddSagiri  * This file contains code imported from the OFED rds source file
716e76cddSagiri  * rdma_transport.c * Oracle elects to have and use the contents of
816e76cddSagiri  * rdma_transport.c under and governed by the OpenIB.org BSD license
916e76cddSagiri  * (see below for full license text). However, the following notice
1016e76cddSagiri  * accompanied the original version of this file:
11c0dd49bdSEiji Ota  */
12c0dd49bdSEiji Ota 
13c0dd49bdSEiji Ota /*
14c0dd49bdSEiji Ota  * Copyright (c) 2009 Oracle.  All rights reserved.
15c0dd49bdSEiji Ota  *
16c0dd49bdSEiji Ota  * This software is available to you under a choice of one of two
17c0dd49bdSEiji Ota  * licenses.  You may choose to be licensed under the terms of the GNU
18c0dd49bdSEiji Ota  * General Public License (GPL) Version 2, available from the file
19c0dd49bdSEiji Ota  * COPYING in the main directory of this source tree, or the
20c0dd49bdSEiji Ota  * OpenIB.org BSD license below:
21c0dd49bdSEiji Ota  *
22c0dd49bdSEiji Ota  *     Redistribution and use in source and binary forms, with or
23c0dd49bdSEiji Ota  *     without modification, are permitted provided that the following
24c0dd49bdSEiji Ota  *     conditions are met:
25c0dd49bdSEiji Ota  *
26c0dd49bdSEiji Ota  *      - Redistributions of source code must retain the above
27c0dd49bdSEiji Ota  *        copyright notice, this list of conditions and the following
28c0dd49bdSEiji Ota  *        disclaimer.
29c0dd49bdSEiji Ota  *
30c0dd49bdSEiji Ota  *      - Redistributions in binary form must reproduce the above
31c0dd49bdSEiji Ota  *        copyright notice, this list of conditions and the following
32c0dd49bdSEiji Ota  *        disclaimer in the documentation and/or other materials
33c0dd49bdSEiji Ota  *        provided with the distribution.
34c0dd49bdSEiji Ota  *
35c0dd49bdSEiji Ota  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
36c0dd49bdSEiji Ota  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
37c0dd49bdSEiji Ota  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
38c0dd49bdSEiji Ota  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
39c0dd49bdSEiji Ota  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
40c0dd49bdSEiji Ota  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
41c0dd49bdSEiji Ota  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
42c0dd49bdSEiji Ota  * SOFTWARE.
43c0dd49bdSEiji Ota  *
44c0dd49bdSEiji Ota  */
45c0dd49bdSEiji Ota #include <sys/ib/clients/of/rdma/ib_verbs.h>
46c0dd49bdSEiji Ota #include <sys/ib/clients/of/rdma/ib_addr.h>
47c0dd49bdSEiji Ota #include <sys/ib/clients/of/rdma/rdma_cm.h>
48c0dd49bdSEiji Ota 
49c0dd49bdSEiji Ota #include <sys/ib/clients/rdsv3/ib.h>
50c0dd49bdSEiji Ota #include <sys/ib/clients/rdsv3/rdma_transport.h>
51c0dd49bdSEiji Ota #include <sys/ib/clients/rdsv3/rdsv3_debug.h>
52c0dd49bdSEiji Ota 
53c0dd49bdSEiji Ota kmutex_t rdsv3_rdma_listen_id_lock;
54c0dd49bdSEiji Ota struct rdma_cm_id *rdsv3_rdma_listen_id = NULL;
55c0dd49bdSEiji Ota 
56c0dd49bdSEiji Ota int
rdsv3_rdma_cm_event_handler(struct rdma_cm_id * cm_id,struct rdma_cm_event * event)57c0dd49bdSEiji Ota rdsv3_rdma_cm_event_handler(struct rdma_cm_id *cm_id,
58c0dd49bdSEiji Ota     struct rdma_cm_event *event)
59c0dd49bdSEiji Ota {
60c0dd49bdSEiji Ota 	/* this can be null in the listening path */
61c0dd49bdSEiji Ota 	struct rdsv3_connection *conn = cm_id->context;
62c0dd49bdSEiji Ota 	struct rdsv3_transport *trans;
63c0dd49bdSEiji Ota 	int ret = 0;
64c0dd49bdSEiji Ota 
65c0dd49bdSEiji Ota 	RDSV3_DPRINTF2("rdsv3_rdma_cm_event_handler",
66c0dd49bdSEiji Ota 	    "conn %p id %p handling event %u", conn, cm_id, event->event);
67c0dd49bdSEiji Ota 
68c0dd49bdSEiji Ota 	trans = &rdsv3_ib_transport;
69c0dd49bdSEiji Ota 
70c0dd49bdSEiji Ota 	/*
71c0dd49bdSEiji Ota 	 * Prevent shutdown from tearing down the connection
72c0dd49bdSEiji Ota 	 * while we're executing.
73c0dd49bdSEiji Ota 	 */
74c0dd49bdSEiji Ota 	if (conn) {
75c0dd49bdSEiji Ota 		mutex_enter(&conn->c_cm_lock);
76c0dd49bdSEiji Ota 
77c0dd49bdSEiji Ota 		/*
78c0dd49bdSEiji Ota 		 * If the connection is being shut down, bail out
79c0dd49bdSEiji Ota 		 * right away. We return 0 so cm_id doesn't get
80c0dd49bdSEiji Ota 		 * destroyed prematurely
81c0dd49bdSEiji Ota 		 */
82c0dd49bdSEiji Ota 		if (rdsv3_conn_state(conn) == RDSV3_CONN_DISCONNECTING) {
83c0dd49bdSEiji Ota 			/*
84c0dd49bdSEiji Ota 			 * Reject incoming connections while we're tearing
85c0dd49bdSEiji Ota 			 * down an existing one.
86c0dd49bdSEiji Ota 			 */
87c0dd49bdSEiji Ota 			if (event->event == RDMA_CM_EVENT_CONNECT_REQUEST)
88c0dd49bdSEiji Ota 				ret = 1;
89c0dd49bdSEiji Ota 			RDSV3_DPRINTF2("rdsv3_rdma_cm_event_handler",
90c0dd49bdSEiji Ota 			    "conn %p id %p incoming event %u when "
91c0dd49bdSEiji Ota 			    "disconnecting", conn, cm_id, event->event);
92c0dd49bdSEiji Ota 			goto out;
93c0dd49bdSEiji Ota 		}
94c0dd49bdSEiji Ota 	}
95c0dd49bdSEiji Ota 
96c0dd49bdSEiji Ota 	switch (event->event) {
97c0dd49bdSEiji Ota 	case RDMA_CM_EVENT_CONNECT_REQUEST:
98c0dd49bdSEiji Ota 		ret = trans->cm_handle_connect(cm_id, event);
99c0dd49bdSEiji Ota 		break;
100c0dd49bdSEiji Ota 
101c0dd49bdSEiji Ota 	case RDMA_CM_EVENT_ADDR_RESOLVED:
102c0dd49bdSEiji Ota 		/* XXX do we need to clean up if this fails? */
103c0dd49bdSEiji Ota 		ret = rdma_resolve_route(cm_id,
104c0dd49bdSEiji Ota 		    RDSV3_RDMA_RESOLVE_TIMEOUT_MS);
105c0dd49bdSEiji Ota 		break;
106c0dd49bdSEiji Ota 
107c0dd49bdSEiji Ota 	case RDMA_CM_EVENT_ROUTE_RESOLVED:
108c0dd49bdSEiji Ota 		/* XXX worry about racing with listen acceptance */
109c0dd49bdSEiji Ota 		ret = trans->cm_initiate_connect(cm_id);
110c0dd49bdSEiji Ota 		break;
111c0dd49bdSEiji Ota 
112c0dd49bdSEiji Ota 	case RDMA_CM_EVENT_ESTABLISHED:
113c0dd49bdSEiji Ota 		trans->cm_connect_complete(conn, event);
114c0dd49bdSEiji Ota 		break;
115c0dd49bdSEiji Ota 
116c0dd49bdSEiji Ota 	case RDMA_CM_EVENT_ADDR_ERROR:
117c0dd49bdSEiji Ota 	case RDMA_CM_EVENT_ROUTE_ERROR:
118c0dd49bdSEiji Ota 	case RDMA_CM_EVENT_CONNECT_ERROR:
119c0dd49bdSEiji Ota 	case RDMA_CM_EVENT_UNREACHABLE:
120c0dd49bdSEiji Ota 	case RDMA_CM_EVENT_REJECTED:
121c0dd49bdSEiji Ota 	case RDMA_CM_EVENT_DEVICE_REMOVAL:
122c0dd49bdSEiji Ota 	case RDMA_CM_EVENT_ADDR_CHANGE:
123c0dd49bdSEiji Ota 		if (conn)
124c0dd49bdSEiji Ota 			rdsv3_conn_drop(conn);
125c0dd49bdSEiji Ota 		break;
126c0dd49bdSEiji Ota 
127c0dd49bdSEiji Ota 	case RDMA_CM_EVENT_DISCONNECTED:
128c0dd49bdSEiji Ota 		RDSV3_DPRINTF2("rdsv3_rdma_cm_event_handler",
129c0dd49bdSEiji Ota 		    "RDS/RDMA: DISCONNECT event - dropping connection "
130c0dd49bdSEiji Ota 		    "cm_id: %p", cm_id);
131c0dd49bdSEiji Ota 		if (conn) {
1326e18d381Sagiri 			RDSV3_DPRINTF2("rdsv3_rdma_cm_event_handler",
133c0dd49bdSEiji Ota 			    "RDS/RDMA: DISCONNECT event - dropping connection "
134cadbfdc3SEiji Ota 			    "%u.%u.%u.%u->%u.%u.%u.%u", NIPQUAD(conn->c_laddr),
135c0dd49bdSEiji Ota 			    NIPQUAD(conn->c_faddr));
136c0dd49bdSEiji Ota 			rdsv3_conn_drop(conn);
137c0dd49bdSEiji Ota 		}
138c0dd49bdSEiji Ota 		break;
139c0dd49bdSEiji Ota 
140c0dd49bdSEiji Ota 	default:
141c0dd49bdSEiji Ota 		/* things like device disconnect? */
1426e18d381Sagiri 		RDSV3_DPRINTF2("rdsv3_rdma_cm_event_handler",
143cadbfdc3SEiji Ota 		    "unknown event %u!\n", event->event);
144c0dd49bdSEiji Ota 		RDSV3_PANIC();
145c0dd49bdSEiji Ota 		break;
146c0dd49bdSEiji Ota 	}
147c0dd49bdSEiji Ota 
148c0dd49bdSEiji Ota out:
149cadbfdc3SEiji Ota 	if (conn)
150c0dd49bdSEiji Ota 		mutex_exit(&conn->c_cm_lock);
151c0dd49bdSEiji Ota 
152c0dd49bdSEiji Ota 	RDSV3_DPRINTF2("rdsv3_rdma_cm_event_handler",
153c0dd49bdSEiji Ota 	    "id %p event %u handling ret %d", cm_id, event->event, ret);
154c0dd49bdSEiji Ota 
155c0dd49bdSEiji Ota 	return (ret);
156c0dd49bdSEiji Ota }
157c0dd49bdSEiji Ota 
158c0dd49bdSEiji Ota static int
rdsv3_rdma_listen_init(void)159c0dd49bdSEiji Ota rdsv3_rdma_listen_init(void)
160c0dd49bdSEiji Ota {
161c0dd49bdSEiji Ota 	struct sockaddr_in sin;
162c0dd49bdSEiji Ota 	struct rdma_cm_id *cm_id;
163c0dd49bdSEiji Ota 	int ret;
164c0dd49bdSEiji Ota 
165c0dd49bdSEiji Ota 	RDSV3_DPRINTF2("rdsv3_rdma_listen_init", "Enter");
166c0dd49bdSEiji Ota 
167c0dd49bdSEiji Ota 	cm_id = rdma_create_id(rdsv3_rdma_cm_event_handler, NULL, RDMA_PS_TCP);
168c0dd49bdSEiji Ota 	if (IS_ERR(cm_id)) {
169c0dd49bdSEiji Ota 		ret = PTR_ERR(cm_id);
1706e18d381Sagiri 		RDSV3_DPRINTF2("rdsv3_rdma_listen_init",
171c0dd49bdSEiji Ota 		    "RDS/RDMA: failed to setup listener, "
172c0dd49bdSEiji Ota 		    "rdma_create_id() returned %d", ret);
173c0dd49bdSEiji Ota 		goto out;
174c0dd49bdSEiji Ota 	}
175c0dd49bdSEiji Ota 
176c0dd49bdSEiji Ota 	sin.sin_family = PF_INET;
177c0dd49bdSEiji Ota 	sin.sin_addr.s_addr = (uint32_t)htonl(INADDR_ANY);
178c0dd49bdSEiji Ota 	sin.sin_port = (uint16_t)htons(RDSV3_PORT);
179c0dd49bdSEiji Ota 
180c0dd49bdSEiji Ota 	/*
181c0dd49bdSEiji Ota 	 * XXX I bet this binds the cm_id to a device.  If we want to support
182c0dd49bdSEiji Ota 	 * fail-over we'll have to take this into consideration.
183c0dd49bdSEiji Ota 	 */
184c0dd49bdSEiji Ota 	ret = rdma_bind_addr(cm_id, (struct sockaddr *)&sin);
185c0dd49bdSEiji Ota 	if (ret) {
1866e18d381Sagiri 		RDSV3_DPRINTF2("rdsv3_rdma_listen_init",
187c0dd49bdSEiji Ota 		    "RDS/RDMA: failed to setup listener, "
188c0dd49bdSEiji Ota 		    "rdma_bind_addr() returned %d", ret);
189c0dd49bdSEiji Ota 		goto out;
190c0dd49bdSEiji Ota 	}
191c0dd49bdSEiji Ota 
192c0dd49bdSEiji Ota 	ret = rdma_listen(cm_id, 128);
193c0dd49bdSEiji Ota 	if (ret) {
1946e18d381Sagiri 		RDSV3_DPRINTF2("rdsv3_rdma_listen_init",
195c0dd49bdSEiji Ota 		    "RDS/RDMA: failed to setup listener, "
196c0dd49bdSEiji Ota 		    "rdma_listen() returned %d", ret);
197c0dd49bdSEiji Ota 		goto out;
198c0dd49bdSEiji Ota 	}
199c0dd49bdSEiji Ota 
200c0dd49bdSEiji Ota 	RDSV3_DPRINTF5("rdsv3_rdma_listen_init",
201c0dd49bdSEiji Ota 	    "cm %p listening on port %u", cm_id, RDSV3_PORT);
202c0dd49bdSEiji Ota 
203c0dd49bdSEiji Ota 	rdsv3_rdma_listen_id = cm_id;
204c0dd49bdSEiji Ota 	cm_id = NULL;
205c0dd49bdSEiji Ota 
206c0dd49bdSEiji Ota 	RDSV3_DPRINTF2("rdsv3_rdma_listen_init",
207c0dd49bdSEiji Ota 	    "Return: rdsv3_rdma_listen_id: %p", rdsv3_rdma_listen_id);
208c0dd49bdSEiji Ota out:
209c0dd49bdSEiji Ota 	if (cm_id)
210c0dd49bdSEiji Ota 		rdma_destroy_id(cm_id);
211c0dd49bdSEiji Ota 	return (ret);
212c0dd49bdSEiji Ota }
213c0dd49bdSEiji Ota 
rdsv3_rdma_listen_stop(void)214c0dd49bdSEiji Ota static void rdsv3_rdma_listen_stop(void)
215c0dd49bdSEiji Ota {
216c0dd49bdSEiji Ota 	RDSV3_DPRINTF2("rdsv3_rdma_listen_stop", "cm %p", rdsv3_rdma_listen_id);
217c0dd49bdSEiji Ota 	rdma_destroy_id(rdsv3_rdma_listen_id);
218c0dd49bdSEiji Ota 	RDSV3_DPRINTF2("rdsv3_rdma_listen_stop", "Return");
219c0dd49bdSEiji Ota }
220c0dd49bdSEiji Ota 
221c0dd49bdSEiji Ota /*
222c0dd49bdSEiji Ota  * This function can be called via two routes.
223c0dd49bdSEiji Ota  * 	1. During attach on a worker thread.
224c0dd49bdSEiji Ota  *	2. From rdsv3_create() for 1st socket.
225c0dd49bdSEiji Ota  */
226c0dd49bdSEiji Ota void
rdsv3_rdma_init()227c0dd49bdSEiji Ota rdsv3_rdma_init()
228c0dd49bdSEiji Ota {
229c0dd49bdSEiji Ota 	int ret;
230c0dd49bdSEiji Ota 
231c0dd49bdSEiji Ota 	RDSV3_DPRINTF2("rdsv3_rdma_init", "Enter");
232c0dd49bdSEiji Ota 
233c0dd49bdSEiji Ota 	mutex_enter(&rdsv3_rdma_listen_id_lock);
234c0dd49bdSEiji Ota 	if (rdsv3_rdma_listen_id != NULL) {
235*3f756f37Sagiri 		RDSV3_DPRINTF5("rdsv3_rdma_init",
236c0dd49bdSEiji Ota 		    "rdsv3_rdma_listen_id is already initialized: %p",
237c0dd49bdSEiji Ota 		    rdsv3_rdma_listen_id);
238c0dd49bdSEiji Ota 		mutex_exit(&rdsv3_rdma_listen_id_lock);
239c0dd49bdSEiji Ota 		return;
240c0dd49bdSEiji Ota 	}
241c0dd49bdSEiji Ota 
242c0dd49bdSEiji Ota 	ret = rdsv3_rdma_listen_init();
243c0dd49bdSEiji Ota 	if (ret) {
244c0dd49bdSEiji Ota 		mutex_exit(&rdsv3_rdma_listen_id_lock);
245c0dd49bdSEiji Ota 		return;
246c0dd49bdSEiji Ota 	}
247c0dd49bdSEiji Ota 
248c0dd49bdSEiji Ota 	ret = rdsv3_ib_init();
249c0dd49bdSEiji Ota 	if (ret) {
250c0dd49bdSEiji Ota 		rdsv3_rdma_listen_stop();
251c0dd49bdSEiji Ota 	}
252c0dd49bdSEiji Ota 	mutex_exit(&rdsv3_rdma_listen_id_lock);
253c0dd49bdSEiji Ota 
254c0dd49bdSEiji Ota 	RDSV3_DPRINTF2("rdsv3_rdma_init", "Return");
255c0dd49bdSEiji Ota }
256c0dd49bdSEiji Ota 
257c0dd49bdSEiji Ota /*ARGSUSED*/
258c0dd49bdSEiji Ota void
rdsv3_rdma_exit(void * arg)259c0dd49bdSEiji Ota rdsv3_rdma_exit(void *arg)
260c0dd49bdSEiji Ota {
261c0dd49bdSEiji Ota 	RDSV3_DPRINTF2("rdsv3_rdma_exit", "Enter");
262c0dd49bdSEiji Ota 
263c0dd49bdSEiji Ota 	/* stop listening first to ensure no new connections are attempted */
264c0dd49bdSEiji Ota 	if (rdsv3_rdma_listen_id) {
265c0dd49bdSEiji Ota 		rdsv3_rdma_listen_stop();
266c0dd49bdSEiji Ota 		rdsv3_ib_exit();
267c0dd49bdSEiji Ota 		rdsv3_rdma_listen_id = NULL;
268c0dd49bdSEiji Ota 	}
269c0dd49bdSEiji Ota 
270c0dd49bdSEiji Ota 	RDSV3_DPRINTF2("rdsv3_rdma_exit", "Return");
271c0dd49bdSEiji Ota }
272