1 /*
2  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
3  */
4 
5 /*
6  * This file contains code imported from the OFED rds source file bind.c
7  * Oracle elects to have and use the contents of bind.c under and governed
8  * by the OpenIB.org BSD license (see below for full license text). However,
9  * the following notice accompanied the original version of this file:
10  */
11 
12 /*
13  * Copyright (c) 2006 Oracle.  All rights reserved.
14  *
15  * This software is available to you under a choice of one of two
16  * licenses.  You may choose to be licensed under the terms of the GNU
17  * General Public License (GPL) Version 2, available from the file
18  * COPYING in the main directory of this source tree, or the
19  * OpenIB.org BSD license below:
20  *
21  *     Redistribution and use in source and binary forms, with or
22  *     without modification, are permitted provided that the following
23  *     conditions are met:
24  *
25  *      - Redistributions of source code must retain the above
26  *        copyright notice, this list of conditions and the following
27  *        disclaimer.
28  *
29  *      - Redistributions in binary form must reproduce the above
30  *        copyright notice, this list of conditions and the following
31  *        disclaimer in the documentation and/or other materials
32  *        provided with the distribution.
33  *
34  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
35  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
36  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
37  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
38  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
39  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
40  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
41  * SOFTWARE.
42  *
43  */
44 #include <sys/types.h>
45 #include <sys/sysmacros.h>
46 #include <sys/random.h>
47 #include <sys/rds.h>
48 
49 #include <sys/ib/clients/rdsv3/rdsv3.h>
50 #include <sys/ib/clients/rdsv3/rdsv3_debug.h>
51 
52 kmutex_t	rdsv3_bind_lock;
53 avl_tree_t	rdsv3_bind_tree;
54 
55 /*
56  * Each node in the rdsv3_bind_tree is of this type.
57  */
58 struct rdsv3_ip_bucket {
59 	ipaddr_t		ip;
60 	zoneid_t		zone;
61 	avl_node_t		ip_avl_node;
62 	krwlock_t		rwlock;
63 	uint_t			nsockets;
64 	struct rdsv3_sock	*port[65536];
65 };
66 
67 static int
rdsv3_bind_node_compare(const void * a,const void * b)68 rdsv3_bind_node_compare(const void *a, const void *b)
69 {
70 	struct rdsv3_ip_bucket *bp = (struct rdsv3_ip_bucket *)b;
71 
72 	if (*(uint64_t *)a > (((uint64_t)bp->ip << 32) | bp->zone))
73 		return (+1);
74 	else if (*(uint64_t *)a < (((uint64_t)bp->ip << 32) | bp->zone))
75 		return (-1);
76 
77 	return (0);
78 }
79 
80 void
rdsv3_bind_init()81 rdsv3_bind_init()
82 {
83 	RDSV3_DPRINTF4("rdsv3_bind_tree_init", "Enter");
84 
85 	mutex_init(&rdsv3_bind_lock, NULL, MUTEX_DRIVER, NULL);
86 	avl_create(&rdsv3_bind_tree, rdsv3_bind_node_compare,
87 	    sizeof (struct rdsv3_ip_bucket),
88 	    offsetof(struct rdsv3_ip_bucket, ip_avl_node));
89 
90 	RDSV3_DPRINTF4("rdsv3_bind_tree_init", "Return");
91 }
92 
93 /* called on detach */
94 void
rdsv3_bind_exit()95 rdsv3_bind_exit()
96 {
97 	struct rdsv3_ip_bucket	*bucketp;
98 	void			*cookie = NULL;
99 
100 	RDSV3_DPRINTF2("rdsv3_bind_tree_exit", "Enter");
101 
102 	while ((bucketp =
103 	    avl_destroy_nodes(&rdsv3_bind_tree, &cookie)) != NULL) {
104 		rw_destroy(&bucketp->rwlock);
105 		kmem_free(bucketp, sizeof (struct rdsv3_ip_bucket));
106 	}
107 
108 	avl_destroy(&rdsv3_bind_tree);
109 	mutex_destroy(&rdsv3_bind_lock);
110 
111 	RDSV3_DPRINTF2("rdsv3_bind_tree_exit", "Return");
112 }
113 
114 struct rdsv3_ip_bucket *
rdsv3_find_ip_bucket(ipaddr_t ipaddr,zoneid_t zoneid)115 rdsv3_find_ip_bucket(ipaddr_t ipaddr, zoneid_t zoneid)
116 {
117 	struct rdsv3_ip_bucket	*bucketp;
118 	avl_index_t		where;
119 	uint64_t		needle = ((uint64_t)ipaddr << 32) | zoneid;
120 
121 	mutex_enter(&rdsv3_bind_lock);
122 	bucketp = avl_find(&rdsv3_bind_tree, &needle, &where);
123 	if (bucketp == NULL) {
124 		/* allocate a new bucket for this IP & zone */
125 		bucketp =
126 		    kmem_zalloc(sizeof (struct rdsv3_ip_bucket), KM_SLEEP);
127 		rw_init(&bucketp->rwlock, NULL, RW_DRIVER, NULL);
128 		bucketp->ip = ipaddr;
129 		bucketp->zone = zoneid;
130 		avl_insert(&rdsv3_bind_tree, bucketp, where);
131 	}
132 	mutex_exit(&rdsv3_bind_lock);
133 
134 	return (bucketp);
135 }
136 
137 /*
138  * Return the rdsv3_sock bound at the given local address.
139  *
140  * The rx path can race with rdsv3_release.  We notice if rdsv3_release() has
141  * marked this socket and don't return a rs ref to the rx path.
142  */
143 struct rdsv3_sock *
rdsv3_find_bound(struct rdsv3_connection * conn,uint16_be_t port)144 rdsv3_find_bound(struct rdsv3_connection *conn, uint16_be_t port)
145 {
146 	struct rdsv3_sock *rs;
147 
148 	RDSV3_DPRINTF4("rdsv3_find_bound", "Enter(ip:port: %u.%u.%u.%u:%d)",
149 	    NIPQUAD(conn->c_laddr), ntohs(port));
150 
151 	rw_enter(&conn->c_bucketp->rwlock, RW_READER);
152 	ASSERT(ntohl(conn->c_laddr) == conn->c_bucketp->ip);
153 	rs = conn->c_bucketp->port[ntohs(port)];
154 	if (rs && !rdsv3_sk_sock_flag(rdsv3_rs_to_sk(rs), SOCK_DEAD))
155 		rdsv3_sk_sock_hold(rdsv3_rs_to_sk(rs));
156 	else
157 		rs = NULL;
158 	rw_exit(&conn->c_bucketp->rwlock);
159 
160 	RDSV3_DPRINTF5("rdsv3_find_bound", "returning rs %p for %u.%u.%u.%u:%d",
161 	    rs, NIPQUAD(conn->c_laddr), ntohs(port));
162 
163 	return (rs);
164 }
165 
166 /* returns -ve errno or +ve port */
167 static int
rdsv3_add_bound(struct rdsv3_sock * rs,uint32_be_t addr,uint16_be_t * port)168 rdsv3_add_bound(struct rdsv3_sock *rs, uint32_be_t addr, uint16_be_t *port)
169 {
170 	int ret = -EADDRINUSE;
171 	uint16_t rover, last;
172 	struct rdsv3_ip_bucket *bucketp;
173 
174 	RDSV3_DPRINTF4("rdsv3_add_bound", "Enter(addr:port: %x:%x)",
175 	    ntohl(addr), ntohs(*port));
176 
177 	if (*port != 0) {
178 		rover = ntohs(*port);
179 		last = rover;
180 	} else {
181 		(void) random_get_pseudo_bytes((uint8_t *)&rover,
182 		    sizeof (uint16_t));
183 		rover = MAX(rover, 2);
184 		last = rover - 1;
185 	}
186 
187 	bucketp = rdsv3_find_ip_bucket(ntohl(addr), rs->rs_zoneid);
188 
189 	/* leave the bind lock and get the bucket lock */
190 	rw_enter(&bucketp->rwlock, RW_WRITER);
191 
192 	do {
193 		if (rover == 0)
194 			rover++;
195 
196 		if (bucketp->port[rover] == NULL) {
197 			*port = htons(rover);
198 			ret = 0;
199 			break;
200 		}
201 	} while (rover++ != last);
202 
203 	if (ret == 0)  {
204 		rs->rs_bound_addr = addr;
205 		rs->rs_bound_port = *port;
206 		bucketp->port[rover] = rs;
207 		bucketp->nsockets++;
208 		rdsv3_sock_addref(rs);
209 
210 		RDSV3_DPRINTF5("rdsv3_add_bound",
211 		    "rs %p binding to %u.%u.%u.%u:%d",
212 		    rs, NIPQUAD(addr), rover);
213 	}
214 
215 	rw_exit(&bucketp->rwlock);
216 
217 	RDSV3_DPRINTF4("rdsv3_add_bound", "Return(ret: %d port: %d)",
218 	    ret, rover);
219 
220 
221 	return (ret);
222 }
223 
224 void
rdsv3_remove_bound(struct rdsv3_sock * rs)225 rdsv3_remove_bound(struct rdsv3_sock *rs)
226 {
227 	RDSV3_DPRINTF4("rdsv3_remove_bound", "Enter(rs: %p)", rs);
228 
229 	if (rs->rs_bound_addr) {
230 		struct rdsv3_ip_bucket *bucketp;
231 
232 		RDSV3_DPRINTF5("rdsv3_remove_bound",
233 		    "rs %p unbinding from %u.%u.%u.%u:%x",
234 		    rs, NIPQUAD(htonl(rs->rs_bound_addr)), rs->rs_bound_port);
235 
236 		bucketp = rdsv3_find_ip_bucket(ntohl(rs->rs_bound_addr),
237 		    rs->rs_zoneid);
238 
239 		rw_enter(&bucketp->rwlock, RW_WRITER);
240 		bucketp->port[ntohs(rs->rs_bound_port)] = NULL;
241 		bucketp->nsockets--;
242 		rs->rs_bound_addr = 0;
243 		rw_exit(&bucketp->rwlock);
244 
245 		rdsv3_sock_put(rs);
246 	}
247 
248 	RDSV3_DPRINTF4("rdsv3_remove_bound", "Return(rs: %p)", rs);
249 }
250 
251 /* ARGSUSED */
252 int
rdsv3_bind(sock_lower_handle_t proto_handle,struct sockaddr * sa,socklen_t len,cred_t * cr)253 rdsv3_bind(sock_lower_handle_t proto_handle, struct sockaddr *sa,
254     socklen_t len, cred_t *cr)
255 {
256 	struct rsock	*sk = (struct rsock *)proto_handle;
257 	sin_t		*sin = (sin_t *)sa;
258 	struct rdsv3_sock	*rs = rdsv3_sk_to_rs(sk);
259 	int		ret;
260 
261 	if (len != sizeof (sin_t) || (sin == NULL) ||
262 	    !OK_32PTR((char *)sin)) {
263 		RDSV3_DPRINTF2("rdsv3_bind", "address to bind not specified");
264 		return (EINVAL);
265 	}
266 
267 	RDSV3_DPRINTF4("rdsv3_bind", "Enter(rs: %p, addr: 0x%x, port: %x)",
268 	    rs, ntohl(sin->sin_addr.s_addr), htons(sin->sin_port));
269 
270 	if (sin->sin_addr.s_addr == INADDR_ANY) {
271 		RDSV3_DPRINTF2("rdsv3_bind", "Invalid address");
272 		return (EINVAL);
273 	}
274 
275 	/* We don't allow multiple binds */
276 	if (rs->rs_bound_addr) {
277 		RDSV3_DPRINTF2("rdsv3_bind", "Multiple binds not allowed");
278 		return (EINVAL);
279 	}
280 
281 	ret = rdsv3_add_bound(rs, sin->sin_addr.s_addr, &sin->sin_port);
282 	if (ret) {
283 		return (ret);
284 	}
285 
286 	rs->rs_transport = rdsv3_trans_get_preferred(sin->sin_addr.s_addr);
287 	if (!rs->rs_transport) {
288 		rdsv3_remove_bound(rs);
289 		if (rdsv3_printk_ratelimit()) {
290 			RDSV3_DPRINTF1("rdsv3_bind",
291 			    "RDS: rdsv3_bind() could not find a transport.\n");
292 		}
293 		return (EADDRNOTAVAIL);
294 	}
295 
296 	RDSV3_DPRINTF4("rdsv3_bind", "Return: Assigned port: %x to sock: %p",
297 	    sin->sin_port, rs);
298 
299 	return (0);
300 }
301