/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ /* * Copyright (c) 2008, The Ohio State University. All rights reserved. * * Portions of this source code is developed by the team members of * The Ohio State University's Network-Based Computing Laboratory (NBCL), * headed by Professor Dhabaleswar K. (DK) Panda. * * Acknowledgements to contributions from developors: * Ranjit Noronha: noronha@cse.ohio-state.edu * Lei Chai : chail@cse.ohio-state.edu * Weikuan Yu : yuw@cse.ohio-state.edu * */ #include #include #include #include #include #include uint_t rdma_minchunk = RDMA_MINCHUNK; /* * Globals */ int rdma_modloaded = 0; /* flag to load RDMA plugin modules */ int rdma_dev_available = 0; /* if any RDMA device is loaded */ kmutex_t rdma_modload_lock; /* protects rdma_modloaded flag */ rdma_svc_wait_t rdma_wait; rdma_registry_t *rdma_mod_head = NULL; /* head for RDMA modules */ krwlock_t rdma_lock; /* protects rdma_mod_head list */ ldi_ident_t rpcmod_li = NULL; /* identifies us with ldi_ framework */ kmem_cache_t *clist_cache = NULL; /* * Statics */ ldi_handle_t rpcib_handle = NULL; /* * Externs */ extern kstat_named_t *rdmarcstat_ptr; extern uint_t rdmarcstat_ndata; extern kstat_named_t *rdmarsstat_ptr; extern uint_t rdmarsstat_ndata; void rdma_kstat_init(); /* * RDMATF module registration routine. * This routine is expected to be called by the init routine in * the plugin modules. */ rdma_stat rdma_register_mod(rdma_mod_t *mod) { rdma_registry_t **mp, *m; if (mod->rdma_version != RDMATF_VERS) { return (RDMA_BADVERS); } rw_enter(&rdma_lock, RW_WRITER); /* * Ensure not already registered */ mp = &rdma_mod_head; while (*mp != NULL) { if (strncmp((*mp)->r_mod->rdma_api, mod->rdma_api, KNC_STRSIZE) == 0) { if ((*mp)->r_mod_state == RDMA_MOD_INACTIVE) { (*mp)->r_mod_state = RDMA_MOD_ACTIVE; (*mp)->r_mod->rdma_ops = mod->rdma_ops; (*mp)->r_mod->rdma_count = mod->rdma_count; goto announce_hca; } rw_exit(&rdma_lock); return (RDMA_REG_EXIST); } mp = &((*mp)->r_next); } /* * New one, create and add to registry */ m = kmem_alloc(sizeof (rdma_registry_t), KM_SLEEP); m->r_mod = kmem_alloc(sizeof (rdma_mod_t), KM_SLEEP); *m->r_mod = *mod; m->r_next = NULL; m->r_mod->rdma_api = kmem_zalloc(KNC_STRSIZE, KM_SLEEP); (void) strncpy(m->r_mod->rdma_api, mod->rdma_api, KNC_STRSIZE); m->r_mod->rdma_api[KNC_STRSIZE - 1] = '\0'; m->r_mod_state = RDMA_MOD_ACTIVE; *mp = m; announce_hca: rw_exit(&rdma_lock); /* * Start the nfs service on the rdma xprts. * (this notification mechanism will need to change when we support * multiple hcas and have support for multiple rdma plugins). */ mutex_enter(&rdma_wait.svc_lock); rdma_wait.svc_stat = RDMA_HCA_ATTACH; cv_signal(&rdma_wait.svc_cv); mutex_exit(&rdma_wait.svc_lock); return (RDMA_SUCCESS); } /* * RDMATF module unregistration routine. * This routine is expected to be called by the fini routine in * the plugin modules. */ rdma_stat rdma_unregister_mod(rdma_mod_t *mod) { rdma_registry_t **m, *mmod = NULL; rw_enter(&rdma_lock, RW_WRITER); m = &rdma_mod_head; while (*m != NULL) { if (strncmp((*m)->r_mod->rdma_api, mod->rdma_api, KNC_STRSIZE) != 0) { m = &((*m)->r_next); continue; } /* * Check if any device attached, if so return error */ if (mod->rdma_count != 0) { rw_exit(&rdma_lock); return (RDMA_FAILED); } /* * Found entry. Mark it inactive. */ mmod = *m; mmod->r_mod->rdma_count = 0; mmod->r_mod_state = RDMA_MOD_INACTIVE; break; } rdma_modloaded = 0; rdma_dev_available = 0; rw_exit(&rdma_lock); /* * Stop the nfs service running on the rdma xprts. * (this notification mechanism will need to change when we support * multiple hcas and have support for multiple rdma plugins). */ mutex_enter(&rdma_wait.svc_lock); rdma_wait.svc_stat = RDMA_HCA_DETACH; cv_signal(&rdma_wait.svc_cv); mutex_exit(&rdma_wait.svc_lock); /* * Not found. */ return (RDMA_SUCCESS); } struct clist * clist_alloc(void) { struct clist *clp; clp = kmem_cache_alloc(clist_cache, KM_SLEEP); bzero(clp, sizeof (*clp)); return (clp); } uint32_t clist_len(struct clist *cl) { uint32_t len = 0; while (cl) { len += cl->c_len; cl = cl->c_next; } return (len); } void clist_zero_len(struct clist *cl) { while (cl != NULL) { if (cl->c_dmemhandle.mrc_rmr == 0) break; cl->c_len = 0; cl = cl->c_next; } } /* * Creates a new chunk list entry, and * adds it to the end of a chunk list. */ void clist_add(struct clist **clp, uint32_t xdroff, int len, struct mrc *shandle, caddr_t saddr, struct mrc *dhandle, caddr_t daddr) { struct clist *cl; /* Find the end of the list */ while (*clp != NULL) clp = &((*clp)->c_next); cl = clist_alloc(); cl->c_xdroff = xdroff; cl->c_len = len; cl->w.c_saddr = (uint64_t)(uintptr_t)saddr; if (shandle) cl->c_smemhandle = *shandle; cl->u.c_daddr = (uint64_t)(uintptr_t)daddr; if (dhandle) cl->c_dmemhandle = *dhandle; cl->c_next = NULL; *clp = cl; } rdma_stat clist_register(CONN *conn, struct clist *cl, clist_dstsrc dstsrc) { struct clist *c; int status; for (c = cl; c; c = c->c_next) { if (c->c_len <= 0) continue; c->c_regtype = dstsrc; switch (dstsrc) { case CLIST_REG_SOURCE: status = RDMA_REGMEMSYNC(conn, (caddr_t)(struct as *)c->c_adspc, (caddr_t)(uintptr_t)c->w.c_saddr3, c->c_len, &c->c_smemhandle, (void **)&c->c_ssynchandle, (void *)c->rb_longbuf.rb_private); break; case CLIST_REG_DST: status = RDMA_REGMEMSYNC(conn, (caddr_t)(struct as *)c->c_adspc, (caddr_t)(uintptr_t)c->u.c_daddr3, c->c_len, &c->c_dmemhandle, (void **)&c->c_dsynchandle, (void *)c->rb_longbuf.rb_private); break; default: return (RDMA_INVAL); } if (status != RDMA_SUCCESS) { (void) clist_deregister(conn, cl); return (status); } } return (RDMA_SUCCESS); } rdma_stat clist_deregister(CONN *conn, struct clist *cl) { struct clist *c; for (c = cl; c; c = c->c_next) { switch (c->c_regtype) { case CLIST_REG_SOURCE: if (c->c_smemhandle.mrc_rmr != 0) { (void) RDMA_DEREGMEMSYNC(conn, (caddr_t)(uintptr_t)c->w.c_saddr3, c->c_smemhandle, (void *)(uintptr_t)c->c_ssynchandle, (void *)c->rb_longbuf.rb_private); c->c_smemhandle.mrc_rmr = 0; c->c_ssynchandle = 0; } break; case CLIST_REG_DST: if (c->c_dmemhandle.mrc_rmr != 0) { (void) RDMA_DEREGMEMSYNC(conn, (caddr_t)(uintptr_t)c->u.c_daddr3, c->c_dmemhandle, (void *)(uintptr_t)c->c_dsynchandle, (void *)c->rb_longbuf.rb_private); c->c_dmemhandle.mrc_rmr = 0; c->c_dsynchandle = 0; } break; default: /* clist unregistered. continue */ break; } } return (RDMA_SUCCESS); } rdma_stat clist_syncmem(CONN *conn, struct clist *cl, clist_dstsrc dstsrc) { struct clist *c; rdma_stat status; c = cl; switch (dstsrc) { case CLIST_REG_SOURCE: while (c != NULL) { if (c->c_ssynchandle) { status = RDMA_SYNCMEM(conn, (void *)(uintptr_t)c->c_ssynchandle, (caddr_t)(uintptr_t)c->w.c_saddr3, c->c_len, 0); if (status != RDMA_SUCCESS) return (status); } c = c->c_next; } break; case CLIST_REG_DST: while (c != NULL) { if (c->c_ssynchandle) { status = RDMA_SYNCMEM(conn, (void *)(uintptr_t)c->c_dsynchandle, (caddr_t)(uintptr_t)c->u.c_daddr3, c->c_len, 1); if (status != RDMA_SUCCESS) return (status); } c = c->c_next; } break; default: return (RDMA_INVAL); } return (RDMA_SUCCESS); } /* * Frees up entries in chunk list */ void clist_free(struct clist *cl) { struct clist *c = cl; while (c != NULL) { cl = cl->c_next; kmem_cache_free(clist_cache, c); c = cl; } } rdma_stat rdma_clnt_postrecv(CONN *conn, uint32_t xid) { struct clist *cl = NULL; rdma_stat retval; rdma_buf_t rbuf = {0}; rbuf.type = RECV_BUFFER; if (RDMA_BUF_ALLOC(conn, &rbuf)) { return (RDMA_NORESOURCE); } clist_add(&cl, 0, rbuf.len, &rbuf.handle, rbuf.addr, NULL, NULL); retval = RDMA_CLNT_RECVBUF(conn, cl, xid); clist_free(cl); return (retval); } rdma_stat rdma_clnt_postrecv_remove(CONN *conn, uint32_t xid) { return (RDMA_CLNT_RECVBUF_REMOVE(conn, xid)); } rdma_stat rdma_svc_postrecv(CONN *conn) { struct clist *cl = NULL; rdma_stat retval; rdma_buf_t rbuf = {0}; rbuf.type = RECV_BUFFER; if (RDMA_BUF_ALLOC(conn, &rbuf)) { retval = RDMA_NORESOURCE; } else { clist_add(&cl, 0, rbuf.len, &rbuf.handle, rbuf.addr, NULL, NULL); retval = RDMA_SVC_RECVBUF(conn, cl); clist_free(cl); } return (retval); } rdma_stat rdma_buf_alloc(CONN *conn, rdma_buf_t *rbuf) { return (RDMA_BUF_ALLOC(conn, rbuf)); } void rdma_buf_free(CONN *conn, rdma_buf_t *rbuf) { if (!rbuf || rbuf->addr == NULL) { return; } RDMA_BUF_FREE(conn, rbuf); bzero(rbuf, sizeof (rdma_buf_t)); } /* * Caller is holding rdma_modload_lock mutex */ int rdma_modload() { int status; ASSERT(MUTEX_HELD(&rdma_modload_lock)); /* * Load all available RDMA plugins which right now is only IB plugin. * If no IB hardware is present, then quit right away. * ENODEV -- For no device on the system * EPROTONOSUPPORT -- For module not avilable either due to failure to * load or some other reason. */ rdma_modloaded = 1; if (ibt_hw_is_present() == 0) { rdma_dev_available = 0; return (ENODEV); } rdma_dev_available = 1; if (rpcmod_li == NULL) return (EPROTONOSUPPORT); status = ldi_open_by_name("/devices/ib/rpcib@0:rpcib", FREAD | FWRITE, kcred, &rpcib_handle, rpcmod_li); if (status != 0) return (EPROTONOSUPPORT); /* * We will need to reload the plugin module after it was unregistered * but the resources below need to allocated only the first time. */ if (!clist_cache) { clist_cache = kmem_cache_create("rdma_clist", sizeof (struct clist), _POINTER_ALIGNMENT, NULL, NULL, NULL, NULL, 0, 0); rdma_kstat_init(); } (void) ldi_close(rpcib_handle, FREAD|FWRITE, kcred); return (0); } void rdma_kstat_init(void) { kstat_t *ksp; /* * The RDMA framework doesn't know how to deal with Zones, and is * only available in the global zone. */ ASSERT(INGLOBALZONE(curproc)); ksp = kstat_create_zone("unix", 0, "rpc_rdma_client", "rpc", KSTAT_TYPE_NAMED, rdmarcstat_ndata, KSTAT_FLAG_VIRTUAL | KSTAT_FLAG_WRITABLE, GLOBAL_ZONEID); if (ksp) { ksp->ks_data = (void *) rdmarcstat_ptr; kstat_install(ksp); } ksp = kstat_create_zone("unix", 0, "rpc_rdma_server", "rpc", KSTAT_TYPE_NAMED, rdmarsstat_ndata, KSTAT_FLAG_VIRTUAL | KSTAT_FLAG_WRITABLE, GLOBAL_ZONEID); if (ksp) { ksp->ks_data = (void *) rdmarsstat_ptr; kstat_install(ksp); } } rdma_stat rdma_kwait(void) { int ret; rdma_stat stat; mutex_enter(&rdma_wait.svc_lock); ret = cv_wait_sig(&rdma_wait.svc_cv, &rdma_wait.svc_lock); /* * If signalled by a hca attach/detach, pass the right * stat back. */ if (ret) stat = rdma_wait.svc_stat; else stat = RDMA_INTR; mutex_exit(&rdma_wait.svc_lock); return (stat); }