/* * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. */ /* * This file contains code imported from the OFED rds source file bind.c * Oracle elects to have and use the contents of bind.c under and governed * by the OpenIB.org BSD license (see below for full license text). However, * the following notice accompanied the original version of this file: */ /* * Copyright (c) 2006 Oracle. All rights reserved. * * This software is available to you under a choice of one of two * licenses. You may choose to be licensed under the terms of the GNU * General Public License (GPL) Version 2, available from the file * COPYING in the main directory of this source tree, or the * OpenIB.org BSD license below: * * Redistribution and use in source and binary forms, with or * without modification, are permitted provided that the following * conditions are met: * * - Redistributions of source code must retain the above * copyright notice, this list of conditions and the following * disclaimer. * * - Redistributions in binary form must reproduce the above * copyright notice, this list of conditions and the following * disclaimer in the documentation and/or other materials * provided with the distribution. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * */ #include #include #include #include #include #include kmutex_t rdsv3_bind_lock; avl_tree_t rdsv3_bind_tree; /* * Each node in the rdsv3_bind_tree is of this type. */ struct rdsv3_ip_bucket { ipaddr_t ip; zoneid_t zone; avl_node_t ip_avl_node; krwlock_t rwlock; uint_t nsockets; struct rdsv3_sock *port[65536]; }; static int rdsv3_bind_node_compare(const void *a, const void *b) { struct rdsv3_ip_bucket *bp = (struct rdsv3_ip_bucket *)b; if (*(uint64_t *)a > (((uint64_t)bp->ip << 32) | bp->zone)) return (+1); else if (*(uint64_t *)a < (((uint64_t)bp->ip << 32) | bp->zone)) return (-1); return (0); } void rdsv3_bind_init() { RDSV3_DPRINTF4("rdsv3_bind_tree_init", "Enter"); mutex_init(&rdsv3_bind_lock, NULL, MUTEX_DRIVER, NULL); avl_create(&rdsv3_bind_tree, rdsv3_bind_node_compare, sizeof (struct rdsv3_ip_bucket), offsetof(struct rdsv3_ip_bucket, ip_avl_node)); RDSV3_DPRINTF4("rdsv3_bind_tree_init", "Return"); } /* called on detach */ void rdsv3_bind_exit() { struct rdsv3_ip_bucket *bucketp; void *cookie = NULL; RDSV3_DPRINTF2("rdsv3_bind_tree_exit", "Enter"); while ((bucketp = avl_destroy_nodes(&rdsv3_bind_tree, &cookie)) != NULL) { rw_destroy(&bucketp->rwlock); kmem_free(bucketp, sizeof (struct rdsv3_ip_bucket)); } avl_destroy(&rdsv3_bind_tree); mutex_destroy(&rdsv3_bind_lock); RDSV3_DPRINTF2("rdsv3_bind_tree_exit", "Return"); } struct rdsv3_ip_bucket * rdsv3_find_ip_bucket(ipaddr_t ipaddr, zoneid_t zoneid) { struct rdsv3_ip_bucket *bucketp; avl_index_t where; uint64_t needle = ((uint64_t)ipaddr << 32) | zoneid; mutex_enter(&rdsv3_bind_lock); bucketp = avl_find(&rdsv3_bind_tree, &needle, &where); if (bucketp == NULL) { /* allocate a new bucket for this IP & zone */ bucketp = kmem_zalloc(sizeof (struct rdsv3_ip_bucket), KM_SLEEP); rw_init(&bucketp->rwlock, NULL, RW_DRIVER, NULL); bucketp->ip = ipaddr; bucketp->zone = zoneid; avl_insert(&rdsv3_bind_tree, bucketp, where); } mutex_exit(&rdsv3_bind_lock); return (bucketp); } /* * Return the rdsv3_sock bound at the given local address. * * The rx path can race with rdsv3_release. We notice if rdsv3_release() has * marked this socket and don't return a rs ref to the rx path. */ struct rdsv3_sock * rdsv3_find_bound(struct rdsv3_connection *conn, uint16_be_t port) { struct rdsv3_sock *rs; RDSV3_DPRINTF4("rdsv3_find_bound", "Enter(ip:port: %u.%u.%u.%u:%d)", NIPQUAD(conn->c_laddr), ntohs(port)); rw_enter(&conn->c_bucketp->rwlock, RW_READER); ASSERT(ntohl(conn->c_laddr) == conn->c_bucketp->ip); rs = conn->c_bucketp->port[ntohs(port)]; if (rs && !rdsv3_sk_sock_flag(rdsv3_rs_to_sk(rs), SOCK_DEAD)) rdsv3_sk_sock_hold(rdsv3_rs_to_sk(rs)); else rs = NULL; rw_exit(&conn->c_bucketp->rwlock); RDSV3_DPRINTF5("rdsv3_find_bound", "returning rs %p for %u.%u.%u.%u:%d", rs, NIPQUAD(conn->c_laddr), ntohs(port)); return (rs); } /* returns -ve errno or +ve port */ static int rdsv3_add_bound(struct rdsv3_sock *rs, uint32_be_t addr, uint16_be_t *port) { int ret = -EADDRINUSE; uint16_t rover, last; struct rdsv3_ip_bucket *bucketp; RDSV3_DPRINTF4("rdsv3_add_bound", "Enter(addr:port: %x:%x)", ntohl(addr), ntohs(*port)); if (*port != 0) { rover = ntohs(*port); last = rover; } else { (void) random_get_pseudo_bytes((uint8_t *)&rover, sizeof (uint16_t)); rover = MAX(rover, 2); last = rover - 1; } bucketp = rdsv3_find_ip_bucket(ntohl(addr), rs->rs_zoneid); /* leave the bind lock and get the bucket lock */ rw_enter(&bucketp->rwlock, RW_WRITER); do { if (rover == 0) rover++; if (bucketp->port[rover] == NULL) { *port = htons(rover); ret = 0; break; } } while (rover++ != last); if (ret == 0) { rs->rs_bound_addr = addr; rs->rs_bound_port = *port; bucketp->port[rover] = rs; bucketp->nsockets++; rdsv3_sock_addref(rs); RDSV3_DPRINTF5("rdsv3_add_bound", "rs %p binding to %u.%u.%u.%u:%d", rs, NIPQUAD(addr), rover); } rw_exit(&bucketp->rwlock); RDSV3_DPRINTF4("rdsv3_add_bound", "Return(ret: %d port: %d)", ret, rover); return (ret); } void rdsv3_remove_bound(struct rdsv3_sock *rs) { RDSV3_DPRINTF4("rdsv3_remove_bound", "Enter(rs: %p)", rs); if (rs->rs_bound_addr) { struct rdsv3_ip_bucket *bucketp; RDSV3_DPRINTF5("rdsv3_remove_bound", "rs %p unbinding from %u.%u.%u.%u:%x", rs, NIPQUAD(htonl(rs->rs_bound_addr)), rs->rs_bound_port); bucketp = rdsv3_find_ip_bucket(ntohl(rs->rs_bound_addr), rs->rs_zoneid); rw_enter(&bucketp->rwlock, RW_WRITER); bucketp->port[ntohs(rs->rs_bound_port)] = NULL; bucketp->nsockets--; rs->rs_bound_addr = 0; rw_exit(&bucketp->rwlock); rdsv3_sock_put(rs); } RDSV3_DPRINTF4("rdsv3_remove_bound", "Return(rs: %p)", rs); } /* ARGSUSED */ int rdsv3_bind(sock_lower_handle_t proto_handle, struct sockaddr *sa, socklen_t len, cred_t *cr) { struct rsock *sk = (struct rsock *)proto_handle; sin_t *sin = (sin_t *)sa; struct rdsv3_sock *rs = rdsv3_sk_to_rs(sk); int ret; if (len != sizeof (sin_t) || (sin == NULL) || !OK_32PTR((char *)sin)) { RDSV3_DPRINTF2("rdsv3_bind", "address to bind not specified"); return (EINVAL); } RDSV3_DPRINTF4("rdsv3_bind", "Enter(rs: %p, addr: 0x%x, port: %x)", rs, ntohl(sin->sin_addr.s_addr), htons(sin->sin_port)); if (sin->sin_addr.s_addr == INADDR_ANY) { RDSV3_DPRINTF2("rdsv3_bind", "Invalid address"); return (EINVAL); } /* We don't allow multiple binds */ if (rs->rs_bound_addr) { RDSV3_DPRINTF2("rdsv3_bind", "Multiple binds not allowed"); return (EINVAL); } ret = rdsv3_add_bound(rs, sin->sin_addr.s_addr, &sin->sin_port); if (ret) { return (ret); } rs->rs_transport = rdsv3_trans_get_preferred(sin->sin_addr.s_addr); if (!rs->rs_transport) { rdsv3_remove_bound(rs); if (rdsv3_printk_ratelimit()) { RDSV3_DPRINTF1("rdsv3_bind", "RDS: rdsv3_bind() could not find a transport.\n"); } return (EADDRNOTAVAIL); } RDSV3_DPRINTF4("rdsv3_bind", "Return: Assigned port: %x to sock: %p", sin->sin_port, rs); return (0); }