1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. 23 */ 24 25 /* 26 * Copyright (c) 2006 Oracle. All rights reserved. 27 * 28 * This software is available to you under a choice of one of two 29 * licenses. You may choose to be licensed under the terms of the GNU 30 * General Public License (GPL) Version 2, available from the file 31 * COPYING in the main directory of this source tree, or the 32 * OpenIB.org BSD license below: 33 * 34 * Redistribution and use in source and binary forms, with or 35 * without modification, are permitted provided that the following 36 * conditions are met: 37 * 38 * - Redistributions of source code must retain the above 39 * copyright notice, this list of conditions and the following 40 * disclaimer. 41 * 42 * - Redistributions in binary form must reproduce the above 43 * copyright notice, this list of conditions and the following 44 * disclaimer in the documentation and/or other materials 45 * provided with the distribution. 46 * 47 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 48 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 49 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 50 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 51 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 52 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 53 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 54 * SOFTWARE. 55 * 56 */ 57 #include <sys/rds.h> 58 #include <netinet/in.h> 59 60 #include <sys/ib/clients/rdsv3/rdsv3.h> 61 #include <sys/ib/clients/rdsv3/rdma.h> 62 #include <sys/ib/clients/rdsv3/ib.h> 63 #include <sys/ib/clients/rdsv3/rdsv3_debug.h> 64 65 /* 66 * This is stored as mr->r_trans_private. 67 */ 68 struct rdsv3_ib_mr { 69 struct rdsv3_ib_device *device; 70 struct rdsv3_ib_mr_pool *pool; 71 struct ib_fmr *fmr; 72 struct list list; 73 unsigned int remap_count; 74 75 struct rdsv3_scatterlist *sg; 76 unsigned int sg_len; 77 uint64_t *dma; 78 int sg_dma_len; 79 80 /* DDI pinned memory */ 81 ddi_umem_cookie_t umem_cookie; 82 /* IBTF type definitions */ 83 ibt_fmr_pool_hdl_t fmr_pool_hdl; 84 ibt_ma_hdl_t rc_ma_hdl; 85 ibt_mr_hdl_t rc_fmr_hdl; 86 ibt_pmr_desc_t rc_mem_desc; 87 }; 88 89 /* 90 * Our own little FMR pool 91 */ 92 struct rdsv3_ib_mr_pool { 93 struct mutex flush_lock; /* serialize fmr invalidate */ 94 struct rdsv3_work_s flush_worker; /* flush worker */ 95 96 kmutex_t list_lock; /* protect variables below */ 97 atomic_t item_count; /* total # of MRs */ 98 atomic_t dirty_count; /* # dirty of MRs */ 99 /* MRs that have reached their max_maps limit */ 100 struct list drop_list; 101 struct list free_list; /* unused MRs */ 102 struct list clean_list; /* unused & unamapped MRs */ 103 atomic_t free_pinned; /* memory pinned by free MRs */ 104 unsigned long max_items; 105 unsigned long max_items_soft; 106 unsigned long max_free_pinned; 107 }; 108 109 static int rdsv3_ib_flush_mr_pool(struct rdsv3_ib_device *rds_ibdev, 110 ibt_fmr_pool_hdl_t pool_hdl, int free_all); 111 static void rdsv3_ib_teardown_mr(struct rdsv3_ib_mr *ibmr); 112 static void rdsv3_ib_mr_pool_flush_worker(struct rdsv3_work_s *work); 113 static struct rdsv3_ib_mr *rdsv3_ib_alloc_fmr(struct rdsv3_ib_device 114 *rds_ibdev); 115 static int rdsv3_ib_map_fmr(struct rdsv3_ib_device *rds_ibdev, 116 struct rdsv3_ib_mr *ibmr, struct buf *bp, unsigned int nents); 117 118 static struct rdsv3_ib_device * 119 rdsv3_ib_get_device(uint32_be_t ipaddr) 120 { 121 struct rdsv3_ib_device *rds_ibdev; 122 struct rdsv3_ib_ipaddr *i_ipaddr; 123 124 RDSV3_DPRINTF4("rdsv3_ib_get_device", "Enter: ipaddr: 0x%x", ipaddr); 125 126 RDSV3_FOR_EACH_LIST_NODE(rds_ibdev, &rdsv3_ib_devices, list) { 127 mutex_enter(&rds_ibdev->spinlock); 128 RDSV3_FOR_EACH_LIST_NODE(i_ipaddr, &rds_ibdev->ipaddr_list, 129 list) { 130 if (i_ipaddr->ipaddr == ipaddr) { 131 mutex_exit(&rds_ibdev->spinlock); 132 return (rds_ibdev); 133 } 134 } 135 mutex_exit(&rds_ibdev->spinlock); 136 } 137 138 RDSV3_DPRINTF4("rdsv3_ib_get_device", "Return: ipaddr: 0x%x", ipaddr); 139 140 return (NULL); 141 } 142 143 static int 144 rdsv3_ib_add_ipaddr(struct rdsv3_ib_device *rds_ibdev, uint32_be_t ipaddr) 145 { 146 struct rdsv3_ib_ipaddr *i_ipaddr; 147 148 RDSV3_DPRINTF4("rdsv3_ib_add_ipaddr", "rds_ibdev: %p ipaddr: %x", 149 rds_ibdev, ipaddr); 150 151 i_ipaddr = kmem_alloc(sizeof (*i_ipaddr), KM_NOSLEEP); 152 if (!i_ipaddr) 153 return (-ENOMEM); 154 155 i_ipaddr->ipaddr = ipaddr; 156 157 mutex_enter(&rds_ibdev->spinlock); 158 list_insert_tail(&rds_ibdev->ipaddr_list, i_ipaddr); 159 mutex_exit(&rds_ibdev->spinlock); 160 161 return (0); 162 } 163 164 static void 165 rdsv3_ib_remove_ipaddr(struct rdsv3_ib_device *rds_ibdev, uint32_be_t ipaddr) 166 { 167 struct rdsv3_ib_ipaddr *i_ipaddr, *next; 168 169 RDSV3_DPRINTF4("rdsv3_ib_remove_ipaddr", "rds_ibdev: %p, ipaddr: %x", 170 rds_ibdev, ipaddr); 171 172 mutex_enter(&rds_ibdev->spinlock); 173 RDSV3_FOR_EACH_LIST_NODE_SAFE(i_ipaddr, next, &rds_ibdev->ipaddr_list, 174 list) { 175 if (i_ipaddr->ipaddr == ipaddr) { 176 list_remove_node(&i_ipaddr->list); 177 kmem_free(i_ipaddr, sizeof (*i_ipaddr)); 178 break; 179 } 180 } 181 mutex_exit(&rds_ibdev->spinlock); 182 183 RDSV3_DPRINTF4("rdsv3_ib_remove_ipaddr", 184 "Return: rds_ibdev: %p, ipaddr: %x", rds_ibdev, ipaddr); 185 } 186 187 int 188 rdsv3_ib_update_ipaddr(struct rdsv3_ib_device *rds_ibdev, uint32_be_t ipaddr) 189 { 190 struct rdsv3_ib_device *rds_ibdev_old; 191 192 RDSV3_DPRINTF4("rdsv3_ib_update_ipaddr", "rds_ibdev: %p, ipaddr: %x", 193 rds_ibdev, ipaddr); 194 195 rds_ibdev_old = rdsv3_ib_get_device(ipaddr); 196 if (rds_ibdev_old) 197 rdsv3_ib_remove_ipaddr(rds_ibdev_old, ipaddr); 198 199 return (rdsv3_ib_add_ipaddr(rds_ibdev, ipaddr)); 200 } 201 202 void 203 rdsv3_ib_add_conn(struct rdsv3_ib_device *rds_ibdev, 204 struct rdsv3_connection *conn) 205 { 206 struct rdsv3_ib_connection *ic = conn->c_transport_data; 207 208 RDSV3_DPRINTF4("rdsv3_ib_add_conn", "rds_ibdev: %p, conn: %p", 209 rds_ibdev, conn); 210 211 /* conn was previously on the nodev_conns_list */ 212 mutex_enter(&ib_nodev_conns_lock); 213 ASSERT(!list_is_empty(&ib_nodev_conns)); 214 ASSERT(list_link_active(&ic->ib_node)); 215 list_remove_node(&ic->ib_node); 216 217 mutex_enter(&rds_ibdev->spinlock); 218 list_insert_tail(&rds_ibdev->conn_list, ic); 219 ic->i_on_dev_list = B_TRUE; 220 mutex_exit(&rds_ibdev->spinlock); 221 mutex_exit(&ib_nodev_conns_lock); 222 } 223 224 void 225 rdsv3_ib_remove_conn(struct rdsv3_ib_device *rds_ibdev, 226 struct rdsv3_connection *conn) 227 { 228 struct rdsv3_ib_connection *ic = conn->c_transport_data; 229 230 RDSV3_DPRINTF4("rdsv3_ib_remove_conn", "rds_ibdev: %p, conn: %p", 231 rds_ibdev, conn); 232 233 /* place conn on nodev_conns_list */ 234 mutex_enter(&ib_nodev_conns_lock); 235 236 mutex_enter(&rds_ibdev->spinlock); 237 ASSERT(list_link_active(&ic->ib_node)); 238 list_remove_node(&ic->ib_node); 239 ic->i_on_dev_list = B_FALSE; 240 mutex_exit(&rds_ibdev->spinlock); 241 242 list_insert_tail(&ib_nodev_conns, ic); 243 244 mutex_exit(&ib_nodev_conns_lock); 245 246 RDSV3_DPRINTF4("rdsv3_ib_remove_conn", 247 "Return: rds_ibdev: %p, conn: %p", rds_ibdev, conn); 248 } 249 250 void 251 __rdsv3_ib_destroy_conns(struct list *list, kmutex_t *list_lock) 252 { 253 struct rdsv3_ib_connection *ic, *_ic; 254 list_t tmp_list; 255 256 RDSV3_DPRINTF4("__rdsv3_ib_destroy_conns", "Enter: list: %p", list); 257 258 /* avoid calling conn_destroy with irqs off */ 259 mutex_enter(list_lock); 260 list_splice(list, &tmp_list); 261 mutex_exit(list_lock); 262 263 RDSV3_FOR_EACH_LIST_NODE_SAFE(ic, _ic, &tmp_list, ib_node) { 264 rdsv3_conn_destroy(ic->conn); 265 } 266 267 RDSV3_DPRINTF4("__rdsv3_ib_destroy_conns", "Return: list: %p", list); 268 } 269 270 void 271 rdsv3_ib_destroy_mr_pool(struct rdsv3_ib_device *rds_ibdev) 272 { 273 RDSV3_DPRINTF4("rdsv3_ib_destroy_mr_pool", "Enter: ibdev: %p", 274 rds_ibdev); 275 276 if (rds_ibdev->fmr_pool_hdl == NULL) 277 return; 278 279 (void) rdsv3_ib_flush_mr_pool(rds_ibdev, rds_ibdev->fmr_pool_hdl, 1); 280 (void) ibt_destroy_fmr_pool(ib_get_ibt_hca_hdl(rds_ibdev->dev), 281 rds_ibdev->fmr_pool_hdl); 282 } 283 284 #define IB_FMR_MAX_BUF_SIZE 0x1000000 /* 16MB max buf */ 285 int 286 rdsv3_ib_create_mr_pool(struct rdsv3_ib_device *rds_ibdev) 287 { 288 uint_t h_page_sz; 289 ibt_fmr_pool_attr_t fmr_attr; 290 ibt_status_t ibt_status; 291 ibt_hca_hdl_t hca_hdl; 292 293 RDSV3_DPRINTF4("rdsv3_ib_create_mr_pool", 294 "Enter: ibdev: %p", rds_ibdev); 295 296 hca_hdl = ib_get_ibt_hca_hdl(rds_ibdev->dev); 297 /* get hca attributes */ 298 ibt_status = ibt_query_hca(hca_hdl, &rds_ibdev->hca_attr); 299 if (ibt_status != IBT_SUCCESS) { 300 return (-ENOMEM); 301 } 302 303 /* setup FMR pool attributes */ 304 h_page_sz = rds_ibdev->hca_attr.hca_page_sz * 1024; 305 306 fmr_attr.fmr_max_pages_per_fmr = (IB_FMR_MAX_BUF_SIZE / h_page_sz) + 2; 307 fmr_attr.fmr_pool_size = RDSV3_FMR_POOL_SIZE; 308 fmr_attr.fmr_dirty_watermark = 128; 309 fmr_attr.fmr_cache = B_FALSE; 310 fmr_attr.fmr_flags = IBT_MR_NOSLEEP | IBT_MR_ENABLE_LOCAL_WRITE | 311 IBT_MR_ENABLE_REMOTE_WRITE | IBT_MR_ENABLE_REMOTE_READ; 312 fmr_attr.fmr_page_sz = h_page_sz; 313 fmr_attr.fmr_func_hdlr = NULL; 314 fmr_attr.fmr_func_arg = (void *) NULL; 315 316 /* create the FMR pool */ 317 ibt_status = ibt_create_fmr_pool(hca_hdl, rds_ibdev->pd->ibt_pd, 318 &fmr_attr, &rds_ibdev->fmr_pool_hdl); 319 if (ibt_status != IBT_SUCCESS) { 320 return (-ENOMEM); 321 } 322 rds_ibdev->max_fmrs = fmr_attr.fmr_pool_size; 323 rds_ibdev->fmr_message_size = fmr_attr.fmr_max_pages_per_fmr; 324 return (0); 325 } 326 327 void 328 rdsv3_ib_get_mr_info(struct rdsv3_ib_device *rds_ibdev, 329 struct rdsv3_info_rdma_connection *iinfo) 330 { 331 iinfo->rdma_mr_max = rds_ibdev->max_fmrs; 332 iinfo->rdma_mr_size = rds_ibdev->fmr_message_size; 333 } 334 335 void * 336 rdsv3_ib_get_mr(struct rdsv3_iovec *args, unsigned long nents, 337 struct rdsv3_sock *rs, uint32_t *key_ret) 338 { 339 struct rdsv3_ib_device *rds_ibdev; 340 struct rdsv3_ib_mr *ibmr = NULL; 341 ddi_umem_cookie_t umem_cookie; 342 size_t umem_len; 343 caddr_t umem_addr; 344 int ret; 345 struct buf *bp; 346 347 RDSV3_DPRINTF4("rdsv3_ib_get_mr", "Enter: args.addr: %p", args->addr); 348 349 rds_ibdev = rdsv3_ib_get_device(rs->rs_bound_addr); 350 351 if (rds_ibdev == NULL) 352 return (void *)(PTR_ERR(-EFAULT)); 353 354 ibmr = rdsv3_ib_alloc_fmr(rds_ibdev); 355 if (IS_ERR(ibmr)) 356 return (ibmr); 357 358 /* pin user memory pages */ 359 umem_len = ptob(btopr(args->bytes + 360 ((uintptr_t)args->addr & PAGEOFFSET))); 361 umem_addr = (caddr_t)((uintptr_t)args->addr & ~PAGEOFFSET); 362 ret = umem_lockmemory(umem_addr, umem_len, 363 DDI_UMEMLOCK_WRITE | DDI_UMEMLOCK_READ, 364 &umem_cookie, NULL, NULL); 365 if (ret != 0) { 366 kmem_free((void *) ibmr, sizeof (*ibmr)); 367 ibmr = ERR_PTR(ret); 368 return (ibmr); 369 } 370 371 /* transpose umem_cookie to buf structure for rdsv3_ib_map_fmr() */ 372 bp = ddi_umem_iosetup(umem_cookie, 0, umem_len, 373 B_WRITE, 0, 0, NULL, DDI_UMEM_SLEEP); 374 375 ret = rdsv3_ib_map_fmr(rds_ibdev, ibmr, bp, nents); 376 freerbuf(bp); /* free bp */ 377 if (ret == 0) { 378 ibmr->umem_cookie = umem_cookie; 379 *key_ret = (uint32_t)ibmr->rc_mem_desc.pmd_rkey; 380 ibmr->device = rds_ibdev; 381 RDSV3_DPRINTF4("rdsv3_ib_get_mr", 382 "Return: ibmr: %p umem_cookie %p", ibmr, ibmr->umem_cookie); 383 return (ibmr); 384 } else { /* error return */ 385 RDSV3_DPRINTF2("rdsv3_ib_get_mr", "map_fmr failed (errno=%d)\n", 386 ret); 387 ddi_umem_unlock(umem_cookie); 388 kmem_free((void *)ibmr, sizeof (*ibmr)); 389 return (ERR_PTR(ret)); 390 } 391 } 392 393 static struct rdsv3_ib_mr * 394 rdsv3_ib_alloc_fmr(struct rdsv3_ib_device *rds_ibdev) 395 { 396 struct rdsv3_ib_mr *ibmr; 397 398 RDSV3_DPRINTF4("rdsv3_ib_alloc_fmr", "Enter: ibdev: %p", rds_ibdev); 399 400 if (rds_ibdev->fmr_pool_hdl) { 401 ibmr = (struct rdsv3_ib_mr *)kmem_zalloc(sizeof (*ibmr), 402 KM_SLEEP); 403 ibmr->fmr_pool_hdl = rds_ibdev->fmr_pool_hdl; 404 return (ibmr); 405 } 406 return (struct rdsv3_ib_mr *)(PTR_ERR(-ENOMEM)); 407 } 408 409 static int 410 rdsv3_ib_map_fmr(struct rdsv3_ib_device *rds_ibdev, struct rdsv3_ib_mr *ibmr, 411 struct buf *bp, unsigned int nents) 412 { 413 ibt_va_attr_t va_attr; 414 ibt_reg_req_t reg_req; 415 uint_t paddr_list_len; 416 uint_t page_sz; 417 ibt_status_t ibt_status; 418 /* LINTED E_FUNC_SET_NOT_USED */ 419 unsigned int l_nents = nents; 420 421 RDSV3_DPRINTF4("rdsv3_ib_map_fmr", "Enter: ibmr: %p", ibmr); 422 RDSV3_DPRINTF4("rdsv3_ib_map_fmr", "buf addr: %p", bp->b_un.b_addr); 423 424 /* setup ibt_map_mem_area attributes */ 425 bzero(&va_attr, sizeof (ibt_va_attr_t)); 426 va_attr.va_buf = bp; 427 va_attr.va_flags = IBT_VA_FMR | IBT_VA_BUF; 428 429 page_sz = rds_ibdev->hca_attr.hca_page_sz * 1024; /* in kbytes */ 430 paddr_list_len = (bp->b_bcount / page_sz) + 2; /* start + end pg */ 431 432 /* map user buffer to HCA address */ 433 ibt_status = ibt_map_mem_area(ib_get_ibt_hca_hdl(rds_ibdev->dev), 434 &va_attr, paddr_list_len, ®_req, &ibmr->rc_ma_hdl); 435 if (ibt_status != IBT_SUCCESS) { 436 return (-ENOMEM); 437 } 438 439 /* use a free entry from FMR pool to register the specified memory */ 440 ibt_status = ibt_register_physical_fmr( 441 ib_get_ibt_hca_hdl(rds_ibdev->dev), ibmr->fmr_pool_hdl, 442 ®_req.fn_arg, &ibmr->rc_fmr_hdl, &ibmr->rc_mem_desc); 443 if (ibt_status != IBT_SUCCESS) { 444 (void) ibt_unmap_mem_area(ib_get_ibt_hca_hdl(rds_ibdev->dev), 445 ibmr->rc_ma_hdl); 446 if (ibt_status == IBT_INSUFF_RESOURCE) { 447 return (-ENOBUFS); 448 } 449 return (-EINVAL); 450 } 451 RDSV3_DPRINTF4("rdsv3_ib_map_fmr", "Return: ibmr: %p rkey: 0x%x", 452 ibmr, (uint32_t)ibmr->rc_mem_desc.pmd_rkey); 453 return (0); 454 } 455 456 void 457 rdsv3_ib_sync_mr(void *trans_private, int direction) 458 { 459 /* LINTED E_FUNC_SET_NOT_USED */ 460 void *l_trans_private = trans_private; 461 /* LINTED E_FUNC_SET_NOT_USED */ 462 int l_direction = direction; 463 464 /* FMR Sync not needed in Solaris on PCI-ex systems */ 465 466 RDSV3_DPRINTF4("rdsv3_ib_sync_mr", "Enter:"); 467 } 468 469 void 470 rdsv3_ib_flush_mrs(void) 471 { 472 struct rdsv3_ib_device *rds_ibdev; 473 474 RDSV3_DPRINTF4("rdsv3_ib_flush_mrs", "Enter:"); 475 476 RDSV3_FOR_EACH_LIST_NODE(rds_ibdev, &rdsv3_ib_devices, list) { 477 if (rds_ibdev->fmr_pool_hdl) { 478 (void) rdsv3_ib_flush_mr_pool(rds_ibdev, 479 rds_ibdev->fmr_pool_hdl, 0); 480 } 481 } 482 } 483 484 static void 485 __rdsv3_ib_teardown_mr(struct rdsv3_ib_mr *ibmr) 486 { 487 RDSV3_DPRINTF4("__rdsv3_ib_teardown_mr", 488 "Enter: ibmr: %p umem_cookie %p", ibmr, ibmr->umem_cookie); 489 490 /* unpin memory pages */ 491 (void) ddi_umem_unlock(ibmr->umem_cookie); 492 } 493 494 void 495 rdsv3_ib_free_mr(void *trans_private, int invalidate) 496 { 497 struct rdsv3_ib_mr *ibmr = trans_private; 498 struct rdsv3_ib_device *rds_ibdev = ibmr->device; 499 500 RDSV3_DPRINTF4("rdsv3_ib_free_mr", "Enter: ibmr: %p inv: %d", 501 ibmr, invalidate); 502 503 /* return the fmr to the IBTF pool */ 504 /* the final punch will come from the ibt_flush_fmr_pool() */ 505 (void) ibt_deregister_fmr(ib_get_ibt_hca_hdl(rds_ibdev->dev), 506 ibmr->rc_fmr_hdl); 507 (void) ibt_unmap_mem_area(ib_get_ibt_hca_hdl(rds_ibdev->dev), 508 ibmr->rc_ma_hdl); 509 __rdsv3_ib_teardown_mr(ibmr); 510 if (invalidate) { 511 rds_ibdev = ibmr->device; 512 (void) rdsv3_ib_flush_mr_pool(rds_ibdev, 513 rds_ibdev->fmr_pool_hdl, 0); 514 } 515 kmem_free((void *) ibmr, sizeof (*ibmr)); 516 } 517 518 static int 519 rdsv3_ib_flush_mr_pool(struct rdsv3_ib_device *rds_ibdev, 520 ibt_fmr_pool_hdl_t pool_hdl, int free_all) 521 { 522 /* LINTED E_FUNC_SET_NOT_USED */ 523 int l_free_all = free_all; 524 525 RDSV3_DPRINTF4("rdsv3_ib_flush_mr_pool", "Enter: pool: %p", pool_hdl); 526 527 rdsv3_ib_stats_inc(s_ib_rdma_mr_pool_flush); 528 529 (void) ibt_flush_fmr_pool(ib_get_ibt_hca_hdl(rds_ibdev->dev), 530 pool_hdl); 531 return (0); 532 } 533