1c0dd49bdSEiji Ota /*
216e76cddSagiri * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
316e76cddSagiri */
416e76cddSagiri
516e76cddSagiri /*
616e76cddSagiri * This file contains code imported from the OFED rds source file bind.c
716e76cddSagiri * Oracle elects to have and use the contents of bind.c under and governed
816e76cddSagiri * by the OpenIB.org BSD license (see below for full license text). However,
916e76cddSagiri * the following notice accompanied the original version of this file:
1016e76cddSagiri */
1116e76cddSagiri
1216e76cddSagiri /*
1316e76cddSagiri * Copyright (c) 2006 Oracle. All rights reserved.
14c0dd49bdSEiji Ota *
1516e76cddSagiri * This software is available to you under a choice of one of two
1616e76cddSagiri * licenses. You may choose to be licensed under the terms of the GNU
1716e76cddSagiri * General Public License (GPL) Version 2, available from the file
1816e76cddSagiri * COPYING in the main directory of this source tree, or the
1916e76cddSagiri * OpenIB.org BSD license below:
20c0dd49bdSEiji Ota *
2116e76cddSagiri * Redistribution and use in source and binary forms, with or
2216e76cddSagiri * without modification, are permitted provided that the following
2316e76cddSagiri * conditions are met:
24c0dd49bdSEiji Ota *
2516e76cddSagiri * - Redistributions of source code must retain the above
2616e76cddSagiri * copyright notice, this list of conditions and the following
2716e76cddSagiri * disclaimer.
2816e76cddSagiri *
2916e76cddSagiri * - Redistributions in binary form must reproduce the above
3016e76cddSagiri * copyright notice, this list of conditions and the following
3116e76cddSagiri * disclaimer in the documentation and/or other materials
3216e76cddSagiri * provided with the distribution.
3316e76cddSagiri *
3416e76cddSagiri * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
3516e76cddSagiri * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
3616e76cddSagiri * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
3716e76cddSagiri * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
3816e76cddSagiri * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
3916e76cddSagiri * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
4016e76cddSagiri * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
4116e76cddSagiri * SOFTWARE.
42c0dd49bdSEiji Ota *
43c0dd49bdSEiji Ota */
44c0dd49bdSEiji Ota #include <sys/types.h>
45c0dd49bdSEiji Ota #include <sys/sysmacros.h>
46c0dd49bdSEiji Ota #include <sys/random.h>
47c0dd49bdSEiji Ota #include <sys/rds.h>
48c0dd49bdSEiji Ota
49c0dd49bdSEiji Ota #include <sys/ib/clients/rdsv3/rdsv3.h>
50c0dd49bdSEiji Ota #include <sys/ib/clients/rdsv3/rdsv3_debug.h>
51c0dd49bdSEiji Ota
52c0dd49bdSEiji Ota kmutex_t rdsv3_bind_lock;
53c0dd49bdSEiji Ota avl_tree_t rdsv3_bind_tree;
54c0dd49bdSEiji Ota
55*80166370Sagiri /*
56*80166370Sagiri * Each node in the rdsv3_bind_tree is of this type.
57*80166370Sagiri */
58*80166370Sagiri struct rdsv3_ip_bucket {
59*80166370Sagiri ipaddr_t ip;
60*80166370Sagiri zoneid_t zone;
61*80166370Sagiri avl_node_t ip_avl_node;
62*80166370Sagiri krwlock_t rwlock;
63*80166370Sagiri uint_t nsockets;
64*80166370Sagiri struct rdsv3_sock *port[65536];
65*80166370Sagiri };
66*80166370Sagiri
67*80166370Sagiri static int
rdsv3_bind_node_compare(const void * a,const void * b)68*80166370Sagiri rdsv3_bind_node_compare(const void *a, const void *b)
69c0dd49bdSEiji Ota {
70*80166370Sagiri struct rdsv3_ip_bucket *bp = (struct rdsv3_ip_bucket *)b;
71*80166370Sagiri
72*80166370Sagiri if (*(uint64_t *)a > (((uint64_t)bp->ip << 32) | bp->zone))
73*80166370Sagiri return (+1);
74*80166370Sagiri else if (*(uint64_t *)a < (((uint64_t)bp->ip << 32) | bp->zone))
75*80166370Sagiri return (-1);
76*80166370Sagiri
77*80166370Sagiri return (0);
78*80166370Sagiri }
79*80166370Sagiri
80*80166370Sagiri void
rdsv3_bind_init()81*80166370Sagiri rdsv3_bind_init()
82*80166370Sagiri {
83*80166370Sagiri RDSV3_DPRINTF4("rdsv3_bind_tree_init", "Enter");
84*80166370Sagiri
85*80166370Sagiri mutex_init(&rdsv3_bind_lock, NULL, MUTEX_DRIVER, NULL);
86*80166370Sagiri avl_create(&rdsv3_bind_tree, rdsv3_bind_node_compare,
87*80166370Sagiri sizeof (struct rdsv3_ip_bucket),
88*80166370Sagiri offsetof(struct rdsv3_ip_bucket, ip_avl_node));
89*80166370Sagiri
90*80166370Sagiri RDSV3_DPRINTF4("rdsv3_bind_tree_init", "Return");
91*80166370Sagiri }
92*80166370Sagiri
93*80166370Sagiri /* called on detach */
94*80166370Sagiri void
rdsv3_bind_exit()95*80166370Sagiri rdsv3_bind_exit()
96*80166370Sagiri {
97*80166370Sagiri struct rdsv3_ip_bucket *bucketp;
98*80166370Sagiri void *cookie = NULL;
99*80166370Sagiri
100*80166370Sagiri RDSV3_DPRINTF2("rdsv3_bind_tree_exit", "Enter");
101*80166370Sagiri
102*80166370Sagiri while ((bucketp =
103*80166370Sagiri avl_destroy_nodes(&rdsv3_bind_tree, &cookie)) != NULL) {
104*80166370Sagiri rw_destroy(&bucketp->rwlock);
105*80166370Sagiri kmem_free(bucketp, sizeof (struct rdsv3_ip_bucket));
106c0dd49bdSEiji Ota }
107c0dd49bdSEiji Ota
108*80166370Sagiri avl_destroy(&rdsv3_bind_tree);
109*80166370Sagiri mutex_destroy(&rdsv3_bind_lock);
110*80166370Sagiri
111*80166370Sagiri RDSV3_DPRINTF2("rdsv3_bind_tree_exit", "Return");
112*80166370Sagiri }
113*80166370Sagiri
114*80166370Sagiri struct rdsv3_ip_bucket *
rdsv3_find_ip_bucket(ipaddr_t ipaddr,zoneid_t zoneid)115*80166370Sagiri rdsv3_find_ip_bucket(ipaddr_t ipaddr, zoneid_t zoneid)
116*80166370Sagiri {
117*80166370Sagiri struct rdsv3_ip_bucket *bucketp;
118*80166370Sagiri avl_index_t where;
119*80166370Sagiri uint64_t needle = ((uint64_t)ipaddr << 32) | zoneid;
120*80166370Sagiri
121*80166370Sagiri mutex_enter(&rdsv3_bind_lock);
122*80166370Sagiri bucketp = avl_find(&rdsv3_bind_tree, &needle, &where);
123*80166370Sagiri if (bucketp == NULL) {
124*80166370Sagiri /* allocate a new bucket for this IP & zone */
125*80166370Sagiri bucketp =
126*80166370Sagiri kmem_zalloc(sizeof (struct rdsv3_ip_bucket), KM_SLEEP);
127*80166370Sagiri rw_init(&bucketp->rwlock, NULL, RW_DRIVER, NULL);
128*80166370Sagiri bucketp->ip = ipaddr;
129*80166370Sagiri bucketp->zone = zoneid;
130*80166370Sagiri avl_insert(&rdsv3_bind_tree, bucketp, where);
131*80166370Sagiri }
132*80166370Sagiri mutex_exit(&rdsv3_bind_lock);
133*80166370Sagiri
134*80166370Sagiri return (bucketp);
135c0dd49bdSEiji Ota }
136c0dd49bdSEiji Ota
137c0dd49bdSEiji Ota /*
138c0dd49bdSEiji Ota * Return the rdsv3_sock bound at the given local address.
139c0dd49bdSEiji Ota *
140c0dd49bdSEiji Ota * The rx path can race with rdsv3_release. We notice if rdsv3_release() has
141c0dd49bdSEiji Ota * marked this socket and don't return a rs ref to the rx path.
142c0dd49bdSEiji Ota */
143c0dd49bdSEiji Ota struct rdsv3_sock *
rdsv3_find_bound(struct rdsv3_connection * conn,uint16_be_t port)144*80166370Sagiri rdsv3_find_bound(struct rdsv3_connection *conn, uint16_be_t port)
145c0dd49bdSEiji Ota {
146c0dd49bdSEiji Ota struct rdsv3_sock *rs;
147c0dd49bdSEiji Ota
148*80166370Sagiri RDSV3_DPRINTF4("rdsv3_find_bound", "Enter(ip:port: %u.%u.%u.%u:%d)",
149*80166370Sagiri NIPQUAD(conn->c_laddr), ntohs(port));
150c0dd49bdSEiji Ota
151*80166370Sagiri rw_enter(&conn->c_bucketp->rwlock, RW_READER);
152*80166370Sagiri ASSERT(ntohl(conn->c_laddr) == conn->c_bucketp->ip);
153*80166370Sagiri rs = conn->c_bucketp->port[ntohs(port)];
154c0dd49bdSEiji Ota if (rs && !rdsv3_sk_sock_flag(rdsv3_rs_to_sk(rs), SOCK_DEAD))
155*80166370Sagiri rdsv3_sk_sock_hold(rdsv3_rs_to_sk(rs));
156c0dd49bdSEiji Ota else
157c0dd49bdSEiji Ota rs = NULL;
158*80166370Sagiri rw_exit(&conn->c_bucketp->rwlock);
159c0dd49bdSEiji Ota
160*80166370Sagiri RDSV3_DPRINTF5("rdsv3_find_bound", "returning rs %p for %u.%u.%u.%u:%d",
161*80166370Sagiri rs, NIPQUAD(conn->c_laddr), ntohs(port));
162cadbfdc3SEiji Ota
163c0dd49bdSEiji Ota return (rs);
164c0dd49bdSEiji Ota }
165c0dd49bdSEiji Ota
166c0dd49bdSEiji Ota /* returns -ve errno or +ve port */
167c0dd49bdSEiji Ota static int
rdsv3_add_bound(struct rdsv3_sock * rs,uint32_be_t addr,uint16_be_t * port)168c0dd49bdSEiji Ota rdsv3_add_bound(struct rdsv3_sock *rs, uint32_be_t addr, uint16_be_t *port)
169c0dd49bdSEiji Ota {
170c0dd49bdSEiji Ota int ret = -EADDRINUSE;
171c0dd49bdSEiji Ota uint16_t rover, last;
172*80166370Sagiri struct rdsv3_ip_bucket *bucketp;
173c0dd49bdSEiji Ota
174*80166370Sagiri RDSV3_DPRINTF4("rdsv3_add_bound", "Enter(addr:port: %x:%x)",
175*80166370Sagiri ntohl(addr), ntohs(*port));
176c0dd49bdSEiji Ota
177c0dd49bdSEiji Ota if (*port != 0) {
178c0dd49bdSEiji Ota rover = ntohs(*port);
179c0dd49bdSEiji Ota last = rover;
180c0dd49bdSEiji Ota } else {
181c0dd49bdSEiji Ota (void) random_get_pseudo_bytes((uint8_t *)&rover,
182c0dd49bdSEiji Ota sizeof (uint16_t));
183c0dd49bdSEiji Ota rover = MAX(rover, 2);
184c0dd49bdSEiji Ota last = rover - 1;
185c0dd49bdSEiji Ota }
186c0dd49bdSEiji Ota
187*80166370Sagiri bucketp = rdsv3_find_ip_bucket(ntohl(addr), rs->rs_zoneid);
188*80166370Sagiri
189*80166370Sagiri /* leave the bind lock and get the bucket lock */
190*80166370Sagiri rw_enter(&bucketp->rwlock, RW_WRITER);
191c0dd49bdSEiji Ota
192c0dd49bdSEiji Ota do {
193c0dd49bdSEiji Ota if (rover == 0)
194c0dd49bdSEiji Ota rover++;
1955d5562f5SEiji Ota
196*80166370Sagiri if (bucketp->port[rover] == NULL) {
197c0dd49bdSEiji Ota *port = htons(rover);
198c0dd49bdSEiji Ota ret = 0;
199c0dd49bdSEiji Ota break;
200c0dd49bdSEiji Ota }
201c0dd49bdSEiji Ota } while (rover++ != last);
202c0dd49bdSEiji Ota
203c0dd49bdSEiji Ota if (ret == 0) {
204c0dd49bdSEiji Ota rs->rs_bound_addr = addr;
205c0dd49bdSEiji Ota rs->rs_bound_port = *port;
206*80166370Sagiri bucketp->port[rover] = rs;
207*80166370Sagiri bucketp->nsockets++;
208c0dd49bdSEiji Ota rdsv3_sock_addref(rs);
209c0dd49bdSEiji Ota
210c0dd49bdSEiji Ota RDSV3_DPRINTF5("rdsv3_add_bound",
211*80166370Sagiri "rs %p binding to %u.%u.%u.%u:%d",
212*80166370Sagiri rs, NIPQUAD(addr), rover);
213c0dd49bdSEiji Ota }
214c0dd49bdSEiji Ota
215*80166370Sagiri rw_exit(&bucketp->rwlock);
216*80166370Sagiri
217*80166370Sagiri RDSV3_DPRINTF4("rdsv3_add_bound", "Return(ret: %d port: %d)",
218*80166370Sagiri ret, rover);
219c0dd49bdSEiji Ota
220c0dd49bdSEiji Ota
221c0dd49bdSEiji Ota return (ret);
222c0dd49bdSEiji Ota }
223c0dd49bdSEiji Ota
224c0dd49bdSEiji Ota void
rdsv3_remove_bound(struct rdsv3_sock * rs)225c0dd49bdSEiji Ota rdsv3_remove_bound(struct rdsv3_sock *rs)
226c0dd49bdSEiji Ota {
227c0dd49bdSEiji Ota RDSV3_DPRINTF4("rdsv3_remove_bound", "Enter(rs: %p)", rs);
228c0dd49bdSEiji Ota
229c0dd49bdSEiji Ota if (rs->rs_bound_addr) {
230*80166370Sagiri struct rdsv3_ip_bucket *bucketp;
231*80166370Sagiri
232c0dd49bdSEiji Ota RDSV3_DPRINTF5("rdsv3_remove_bound",
233c0dd49bdSEiji Ota "rs %p unbinding from %u.%u.%u.%u:%x",
234*80166370Sagiri rs, NIPQUAD(htonl(rs->rs_bound_addr)), rs->rs_bound_port);
235*80166370Sagiri
236*80166370Sagiri bucketp = rdsv3_find_ip_bucket(ntohl(rs->rs_bound_addr),
237*80166370Sagiri rs->rs_zoneid);
238*80166370Sagiri
239*80166370Sagiri rw_enter(&bucketp->rwlock, RW_WRITER);
240*80166370Sagiri bucketp->port[ntohs(rs->rs_bound_port)] = NULL;
241*80166370Sagiri bucketp->nsockets--;
242c0dd49bdSEiji Ota rs->rs_bound_addr = 0;
243*80166370Sagiri rw_exit(&bucketp->rwlock);
244c0dd49bdSEiji Ota
245*80166370Sagiri rdsv3_sock_put(rs);
246*80166370Sagiri }
247c0dd49bdSEiji Ota
248c0dd49bdSEiji Ota RDSV3_DPRINTF4("rdsv3_remove_bound", "Return(rs: %p)", rs);
249c0dd49bdSEiji Ota }
250c0dd49bdSEiji Ota
251c0dd49bdSEiji Ota /* ARGSUSED */
252c0dd49bdSEiji Ota int
rdsv3_bind(sock_lower_handle_t proto_handle,struct sockaddr * sa,socklen_t len,cred_t * cr)253c0dd49bdSEiji Ota rdsv3_bind(sock_lower_handle_t proto_handle, struct sockaddr *sa,
254c0dd49bdSEiji Ota socklen_t len, cred_t *cr)
255c0dd49bdSEiji Ota {
256c0dd49bdSEiji Ota struct rsock *sk = (struct rsock *)proto_handle;
257c0dd49bdSEiji Ota sin_t *sin = (sin_t *)sa;
258c0dd49bdSEiji Ota struct rdsv3_sock *rs = rdsv3_sk_to_rs(sk);
259c0dd49bdSEiji Ota int ret;
260c0dd49bdSEiji Ota
261c0dd49bdSEiji Ota if (len != sizeof (sin_t) || (sin == NULL) ||
262c0dd49bdSEiji Ota !OK_32PTR((char *)sin)) {
263c0dd49bdSEiji Ota RDSV3_DPRINTF2("rdsv3_bind", "address to bind not specified");
264c0dd49bdSEiji Ota return (EINVAL);
265c0dd49bdSEiji Ota }
266c0dd49bdSEiji Ota
267c0dd49bdSEiji Ota RDSV3_DPRINTF4("rdsv3_bind", "Enter(rs: %p, addr: 0x%x, port: %x)",
268c0dd49bdSEiji Ota rs, ntohl(sin->sin_addr.s_addr), htons(sin->sin_port));
269c0dd49bdSEiji Ota
270c0dd49bdSEiji Ota if (sin->sin_addr.s_addr == INADDR_ANY) {
271c0dd49bdSEiji Ota RDSV3_DPRINTF2("rdsv3_bind", "Invalid address");
272c0dd49bdSEiji Ota return (EINVAL);
273c0dd49bdSEiji Ota }
274c0dd49bdSEiji Ota
275c0dd49bdSEiji Ota /* We don't allow multiple binds */
276c0dd49bdSEiji Ota if (rs->rs_bound_addr) {
277c0dd49bdSEiji Ota RDSV3_DPRINTF2("rdsv3_bind", "Multiple binds not allowed");
278c0dd49bdSEiji Ota return (EINVAL);
279c0dd49bdSEiji Ota }
280c0dd49bdSEiji Ota
281c0dd49bdSEiji Ota ret = rdsv3_add_bound(rs, sin->sin_addr.s_addr, &sin->sin_port);
282c0dd49bdSEiji Ota if (ret) {
283c0dd49bdSEiji Ota return (ret);
284c0dd49bdSEiji Ota }
285c0dd49bdSEiji Ota
286c0dd49bdSEiji Ota rs->rs_transport = rdsv3_trans_get_preferred(sin->sin_addr.s_addr);
2875d5562f5SEiji Ota if (!rs->rs_transport) {
288c0dd49bdSEiji Ota rdsv3_remove_bound(rs);
289cadbfdc3SEiji Ota if (rdsv3_printk_ratelimit()) {
290cadbfdc3SEiji Ota RDSV3_DPRINTF1("rdsv3_bind",
291cadbfdc3SEiji Ota "RDS: rdsv3_bind() could not find a transport.\n");
292cadbfdc3SEiji Ota }
293c0dd49bdSEiji Ota return (EADDRNOTAVAIL);
294c0dd49bdSEiji Ota }
295c0dd49bdSEiji Ota
296c0dd49bdSEiji Ota RDSV3_DPRINTF4("rdsv3_bind", "Return: Assigned port: %x to sock: %p",
297c0dd49bdSEiji Ota sin->sin_port, rs);
298c0dd49bdSEiji Ota
299c0dd49bdSEiji Ota return (0);
300c0dd49bdSEiji Ota }
301