1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. 23 */ 24 25 /* 26 * Copyright (c) 2007 Oracle. All rights reserved. 27 * 28 * This software is available to you under a choice of one of two 29 * licenses. You may choose to be licensed under the terms of the GNU 30 * General Public License (GPL) Version 2, available from the file 31 * COPYING in the main directory of this source tree, or the 32 * OpenIB.org BSD license below: 33 * 34 * Redistribution and use in source and binary forms, with or 35 * without modification, are permitted provided that the following 36 * conditions are met: 37 * 38 * - Redistributions of source code must retain the above 39 * copyright notice, this list of conditions and the following 40 * disclaimer. 41 * 42 * - Redistributions in binary form must reproduce the above 43 * copyright notice, this list of conditions and the following 44 * disclaimer in the documentation and/or other materials 45 * provided with the distribution. 46 * 47 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 48 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 49 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 50 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 51 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 52 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 53 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 54 * SOFTWARE. 55 * 56 */ 57 #include <sys/ib/clients/of/rdma/ib_verbs.h> 58 #include <sys/ib/clients/of/rdma/ib_addr.h> 59 #include <sys/ib/clients/of/rdma/rdma_cm.h> 60 61 #include <sys/ib/clients/rdsv3/ib.h> 62 #include <sys/ib/clients/rdsv3/rdma.h> 63 #include <sys/ib/clients/rdsv3/rdsv3_debug.h> 64 65 #define DMA_TO_DEVICE 0 66 #define DMA_FROM_DEVICE 1 67 #define RB_CLEAR_NODE(nodep) AVL_SETPARENT(nodep, nodep); 68 69 /* 70 * XXX 71 * - build with sparse 72 * - should we limit the size of a mr region? let transport return failure? 73 * - should we detect duplicate keys on a socket? hmm. 74 * - an rdma is an mlock, apply rlimit? 75 */ 76 77 /* 78 * get the number of pages by looking at the page indices that the start and 79 * end addresses fall in. 80 * 81 * Returns 0 if the vec is invalid. It is invalid if the number of bytes 82 * causes the address to wrap or overflows an unsigned int. This comes 83 * from being stored in the 'length' member of 'struct rdsv3_scatterlist'. 84 */ 85 static unsigned int 86 rdsv3_pages_in_vec(struct rdsv3_iovec *vec) 87 { 88 if ((vec->addr + vec->bytes <= vec->addr) || 89 (vec->bytes > (uint64_t)UINT_MAX)) { 90 return (0); 91 } 92 93 return (((vec->addr + vec->bytes + PAGESIZE - 1) >> 94 PAGESHIFT) - (vec->addr >> PAGESHIFT)); 95 } 96 97 static struct rdsv3_mr * 98 rdsv3_mr_tree_walk(struct avl_tree *root, uint32_t key, 99 struct rdsv3_mr *insert) 100 { 101 struct rdsv3_mr *mr; 102 avl_index_t where; 103 104 mr = avl_find(root, &key, &where); 105 if ((mr == NULL) && (insert != NULL)) { 106 avl_insert(root, (void *)insert, where); 107 atomic_add_32(&insert->r_refcount, 1); 108 return (NULL); 109 } 110 111 return (mr); 112 } 113 114 /* 115 * Destroy the transport-specific part of a MR. 116 */ 117 static void 118 rdsv3_destroy_mr(struct rdsv3_mr *mr) 119 { 120 struct rdsv3_sock *rs = mr->r_sock; 121 void *trans_private = NULL; 122 avl_node_t *np; 123 124 RDSV3_DPRINTF5("rdsv3_destroy_mr", 125 "RDS: destroy mr key is %x refcnt %u", 126 mr->r_key, atomic_get(&mr->r_refcount)); 127 128 if (test_and_set_bit(RDSV3_MR_DEAD, &mr->r_state)) 129 return; 130 131 mutex_enter(&rs->rs_rdma_lock); 132 np = &mr->r_rb_node; 133 if (AVL_XPARENT(np) != np) 134 avl_remove(&rs->rs_rdma_keys, mr); 135 trans_private = mr->r_trans_private; 136 mr->r_trans_private = NULL; 137 mutex_exit(&rs->rs_rdma_lock); 138 139 if (trans_private) 140 mr->r_trans->free_mr(trans_private, mr->r_invalidate); 141 } 142 143 void 144 __rdsv3_put_mr_final(struct rdsv3_mr *mr) 145 { 146 rdsv3_destroy_mr(mr); 147 kmem_free(mr, sizeof (*mr)); 148 } 149 150 /* 151 * By the time this is called we can't have any more ioctls called on 152 * the socket so we don't need to worry about racing with others. 153 */ 154 void 155 rdsv3_rdma_drop_keys(struct rdsv3_sock *rs) 156 { 157 struct rdsv3_mr *mr; 158 struct avl_node *node; 159 160 /* Release any MRs associated with this socket */ 161 mutex_enter(&rs->rs_rdma_lock); 162 while ((node = avl_first(&rs->rs_rdma_keys))) { 163 mr = container_of(node, struct rdsv3_mr, r_rb_node); 164 if (mr->r_trans == rs->rs_transport) 165 mr->r_invalidate = 0; 166 avl_remove(&rs->rs_rdma_keys, &mr->r_rb_node); 167 RB_CLEAR_NODE(&mr->r_rb_node) 168 mutex_exit(&rs->rs_rdma_lock); 169 rdsv3_destroy_mr(mr); 170 rdsv3_mr_put(mr); 171 mutex_enter(&rs->rs_rdma_lock); 172 } 173 mutex_exit(&rs->rs_rdma_lock); 174 175 if (rs->rs_transport && rs->rs_transport->flush_mrs) 176 rs->rs_transport->flush_mrs(); 177 } 178 179 /* 180 * Helper function to pin user pages. 181 */ 182 #if 0 183 static int 184 rds_pin_pages(unsigned long user_addr, unsigned int nr_pages, 185 struct page **pages, int write) 186 { 187 unsigned long l_user_addr = user_addr; 188 unsigned int l_nr_pages = nr_pages; 189 struct page **l_pages = pages; 190 int l_write = write; 191 192 /* memory pin in rds_ib_get_mr() */ 193 return (0); 194 } 195 #endif 196 197 static int 198 __rdsv3_rdma_map(struct rdsv3_sock *rs, struct rdsv3_get_mr_args *args, 199 uint64_t *cookie_ret, struct rdsv3_mr **mr_ret) 200 { 201 struct rdsv3_mr *mr = NULL, *found; 202 void *trans_private; 203 rdsv3_rdma_cookie_t cookie; 204 unsigned int nents = 0; 205 int ret; 206 207 if (rs->rs_bound_addr == 0) { 208 ret = -ENOTCONN; /* XXX not a great errno */ 209 goto out; 210 } 211 212 if (rs->rs_transport->get_mr == NULL) { 213 ret = -EOPNOTSUPP; 214 goto out; 215 } 216 217 mr = kmem_zalloc(sizeof (struct rdsv3_mr), KM_NOSLEEP); 218 if (mr == NULL) { 219 ret = -ENOMEM; 220 goto out; 221 } 222 223 mr->r_refcount = 1; 224 RB_CLEAR_NODE(&mr->r_rb_node); 225 mr->r_trans = rs->rs_transport; 226 mr->r_sock = rs; 227 228 if (args->flags & RDSV3_RDMA_USE_ONCE) 229 mr->r_use_once = 1; 230 if (args->flags & RDSV3_RDMA_INVALIDATE) 231 mr->r_invalidate = 1; 232 if (args->flags & RDSV3_RDMA_READWRITE) 233 mr->r_write = 1; 234 235 /* 236 * Obtain a transport specific MR. If this succeeds, the 237 * s/g list is now owned by the MR. 238 * Note that dma_map() implies that pending writes are 239 * flushed to RAM, so no dma_sync is needed here. 240 */ 241 trans_private = rs->rs_transport->get_mr(&args->vec, nents, rs, 242 &mr->r_key); 243 244 if (IS_ERR(trans_private)) { 245 ret = PTR_ERR(trans_private); 246 goto out; 247 } 248 249 mr->r_trans_private = trans_private; 250 251 /* 252 * The user may pass us an unaligned address, but we can only 253 * map page aligned regions. So we keep the offset, and build 254 * a 64bit cookie containing <R_Key, offset> and pass that 255 * around. 256 */ 257 cookie = rdsv3_rdma_make_cookie(mr->r_key, args->vec.addr & ~PAGEMASK); 258 if (cookie_ret) 259 *cookie_ret = cookie; 260 261 /* 262 * copy value of cookie to user address at args->cookie_addr 263 */ 264 if (args->cookie_addr) { 265 ret = ddi_copyout((void *)&cookie, 266 (void *)((intptr_t)args->cookie_addr), 267 sizeof (rdsv3_rdma_cookie_t), 0); 268 if (ret != 0) { 269 ret = -EFAULT; 270 goto out; 271 } 272 } 273 274 RDSV3_DPRINTF5("__rdsv3_rdma_map", 275 "RDS: get_mr mr 0x%p addr 0x%llx key 0x%x", 276 mr, args->vec.addr, mr->r_key); 277 /* 278 * Inserting the new MR into the rbtree bumps its 279 * reference count. 280 */ 281 mutex_enter(&rs->rs_rdma_lock); 282 found = rdsv3_mr_tree_walk(&rs->rs_rdma_keys, mr->r_key, mr); 283 mutex_exit(&rs->rs_rdma_lock); 284 285 ASSERT(!(found && found != mr)); 286 287 if (mr_ret) { 288 atomic_add_32(&mr->r_refcount, 1); 289 *mr_ret = mr; 290 } 291 292 ret = 0; 293 out: 294 if (mr) 295 rdsv3_mr_put(mr); 296 return (ret); 297 } 298 299 int 300 rdsv3_get_mr(struct rdsv3_sock *rs, const void *optval, int optlen) 301 { 302 struct rdsv3_get_mr_args args; 303 304 if (optlen != sizeof (struct rdsv3_get_mr_args)) 305 return (-EINVAL); 306 307 #if 1 308 bcopy((struct rdsv3_get_mr_args *)optval, &args, 309 sizeof (struct rdsv3_get_mr_args)); 310 #else 311 if (ddi_copyin(optval, &args, optlen, 0)) 312 return (-EFAULT); 313 #endif 314 315 return (__rdsv3_rdma_map(rs, &args, NULL, NULL)); 316 } 317 318 int 319 rdsv3_get_mr_for_dest(struct rdsv3_sock *rs, const void *optval, 320 int optlen) 321 { 322 struct rdsv3_get_mr_for_dest_args args; 323 struct rdsv3_get_mr_args new_args; 324 325 if (optlen != sizeof (struct rdsv3_get_mr_for_dest_args)) 326 return (-EINVAL); 327 328 #if 1 329 bcopy((struct rdsv3_get_mr_for_dest_args *)optval, &args, 330 sizeof (struct rdsv3_get_mr_for_dest_args)); 331 #else 332 if (ddi_copyin(optval, &args, optlen, 0)) 333 return (-EFAULT); 334 #endif 335 336 /* 337 * Initially, just behave like get_mr(). 338 * TODO: Implement get_mr as wrapper around this 339 * and deprecate it. 340 */ 341 new_args.vec = args.vec; 342 new_args.cookie_addr = args.cookie_addr; 343 new_args.flags = args.flags; 344 345 return (__rdsv3_rdma_map(rs, &new_args, NULL, NULL)); 346 } 347 348 /* 349 * Free the MR indicated by the given R_Key 350 */ 351 int 352 rdsv3_free_mr(struct rdsv3_sock *rs, const void *optval, int optlen) 353 { 354 struct rdsv3_free_mr_args args; 355 struct rdsv3_mr *mr; 356 357 if (optlen != sizeof (struct rdsv3_free_mr_args)) 358 return (-EINVAL); 359 360 #if 1 361 bcopy((struct rdsv3_free_mr_args *)optval, &args, 362 sizeof (struct rdsv3_free_mr_args)); 363 #else 364 if (ddi_copyin((struct rdsv3_free_mr_args *)optval, &args, 365 sizeof (struct rdsv3_free_mr_args), 0)) 366 return (-EFAULT); 367 #endif 368 369 /* Special case - a null cookie means flush all unused MRs */ 370 if (args.cookie == 0) { 371 if (!rs->rs_transport || !rs->rs_transport->flush_mrs) 372 return (-EINVAL); 373 rs->rs_transport->flush_mrs(); 374 return (0); 375 } 376 377 /* 378 * Look up the MR given its R_key and remove it from the rbtree 379 * so nobody else finds it. 380 * This should also prevent races with rdsv3_rdma_unuse. 381 */ 382 mutex_enter(&rs->rs_rdma_lock); 383 mr = rdsv3_mr_tree_walk(&rs->rs_rdma_keys, 384 rdsv3_rdma_cookie_key(args.cookie), NULL); 385 if (mr) { 386 avl_remove(&rs->rs_rdma_keys, &mr->r_rb_node); 387 RB_CLEAR_NODE(&mr->r_rb_node); 388 if (args.flags & RDSV3_RDMA_INVALIDATE) 389 mr->r_invalidate = 1; 390 } 391 mutex_exit(&rs->rs_rdma_lock); 392 393 if (!mr) 394 return (-EINVAL); 395 396 /* 397 * call rdsv3_destroy_mr() ourselves so that we're sure it's done 398 * by time we return. If we let rdsv3_mr_put() do it it might not 399 * happen until someone else drops their ref. 400 */ 401 rdsv3_destroy_mr(mr); 402 rdsv3_mr_put(mr); 403 return (0); 404 } 405 406 /* 407 * This is called when we receive an extension header that 408 * tells us this MR was used. It allows us to implement 409 * use_once semantics 410 */ 411 void 412 rdsv3_rdma_unuse(struct rdsv3_sock *rs, uint32_t r_key, int force) 413 { 414 struct rdsv3_mr *mr; 415 int zot_me = 0; 416 417 RDSV3_DPRINTF4("rdsv3_rdma_unuse", "Enter rkey: 0x%x", r_key); 418 419 mutex_enter(&rs->rs_rdma_lock); 420 mr = rdsv3_mr_tree_walk(&rs->rs_rdma_keys, r_key, NULL); 421 if (mr && (mr->r_use_once || force)) { 422 avl_remove(&rs->rs_rdma_keys, &mr->r_rb_node); 423 RB_CLEAR_NODE(&mr->r_rb_node); 424 zot_me = 1; 425 } else if (mr) 426 atomic_add_32(&mr->r_refcount, 1); 427 mutex_exit(&rs->rs_rdma_lock); 428 429 /* 430 * May have to issue a dma_sync on this memory region. 431 * Note we could avoid this if the operation was a RDMA READ, 432 * but at this point we can't tell. 433 */ 434 if (mr != NULL) { 435 RDSV3_DPRINTF4("rdsv3_rdma_unuse", "mr: %p zot_me %d", 436 mr, zot_me); 437 if (mr->r_trans->sync_mr) 438 mr->r_trans->sync_mr(mr->r_trans_private, 439 DMA_FROM_DEVICE); 440 441 /* 442 * If the MR was marked as invalidate, this will 443 * trigger an async flush. 444 */ 445 if (zot_me) 446 rdsv3_destroy_mr(mr); 447 rdsv3_mr_put(mr); 448 } 449 RDSV3_DPRINTF4("rdsv3_rdma_unuse", "Return"); 450 } 451 452 void 453 rdsv3_rdma_free_op(struct rdsv3_rdma_op *ro) 454 { 455 unsigned int i; 456 457 /* deallocate RDMA resources on rdsv3_message */ 458 459 for (i = 0; i < ro->r_nents; i++) { 460 ddi_umem_unlock(ro->r_rdma_sg[i].umem_cookie); 461 } 462 463 if (ro->r_notifier) 464 kmem_free(ro->r_notifier, sizeof (*ro->r_notifier)); 465 kmem_free(ro, sizeof (*ro)); 466 } 467 468 extern struct umem_callback_ops rdsv3_umem_cbops; 469 /* 470 * args is a pointer to an in-kernel copy in the sendmsg cmsg. 471 */ 472 static struct rdsv3_rdma_op * 473 rdsv3_rdma_prepare(struct rdsv3_sock *rs, struct rdsv3_rdma_args *args) 474 { 475 struct rdsv3_iovec vec; 476 struct rdsv3_rdma_op *op = NULL; 477 unsigned int nr_bytes; 478 struct rdsv3_iovec *local_vec; 479 unsigned int nr; 480 unsigned int i; 481 ddi_umem_cookie_t umem_cookie; 482 size_t umem_len; 483 caddr_t umem_addr; 484 int umem_flags; 485 int ret; 486 487 if (rs->rs_bound_addr == 0) { 488 ret = -ENOTCONN; /* XXX not a great errno */ 489 goto out; 490 } 491 492 if (args->nr_local > (uint64_t)UINT_MAX) { 493 ret = -EMSGSIZE; 494 goto out; 495 } 496 497 op = kmem_zalloc(offsetof(struct rdsv3_rdma_op, 498 r_rdma_sg[args->nr_local]), KM_NOSLEEP); 499 if (op == NULL) { 500 ret = -ENOMEM; 501 goto out; 502 } 503 504 op->r_write = !!(args->flags & RDSV3_RDMA_READWRITE); 505 op->r_fence = !!(args->flags & RDSV3_RDMA_FENCE); 506 op->r_notify = !!(args->flags & RDSV3_RDMA_NOTIFY_ME); 507 op->r_recverr = rs->rs_recverr; 508 509 if (op->r_notify || op->r_recverr) { 510 /* 511 * We allocate an uninitialized notifier here, because 512 * we don't want to do that in the completion handler. We 513 * would have to use GFP_ATOMIC there, and don't want to deal 514 * with failed allocations. 515 */ 516 op->r_notifier = kmem_alloc(sizeof (struct rdsv3_notifier), 517 KM_NOSLEEP); 518 if (!op->r_notifier) { 519 ret = -ENOMEM; 520 goto out; 521 } 522 op->r_notifier->n_user_token = args->user_token; 523 op->r_notifier->n_status = RDSV3_RDMA_SUCCESS; 524 } 525 526 /* 527 * The cookie contains the R_Key of the remote memory region, and 528 * optionally an offset into it. This is how we implement RDMA into 529 * unaligned memory. 530 * When setting up the RDMA, we need to add that offset to the 531 * destination address (which is really an offset into the MR) 532 * FIXME: We may want to move this into ib_rdma.c 533 */ 534 op->r_key = rdsv3_rdma_cookie_key(args->cookie); 535 op->r_remote_addr = args->remote_vec.addr + 536 rdsv3_rdma_cookie_offset(args->cookie); 537 538 nr_bytes = 0; 539 540 RDSV3_DPRINTF5("rdsv3_rdma_prepare", 541 "RDS: rdma prepare nr_local %llu rva %llx rkey %x", 542 (unsigned long long)args->nr_local, 543 (unsigned long long)args->remote_vec.addr, 544 op->r_key); 545 546 local_vec = (struct rdsv3_iovec *)(unsigned long) args->local_vec_addr; 547 548 /* pin the scatter list of user buffers */ 549 for (i = 0; i < args->nr_local; i++) { 550 if (ddi_copyin(&local_vec[i], &vec, 551 sizeof (struct rdsv3_iovec), 0)) { 552 ret = -EFAULT; 553 goto out; 554 } 555 556 nr = rdsv3_pages_in_vec(&vec); 557 if (nr == 0) { 558 RDSV3_DPRINTF2("rdsv3_rdma_prepare", 559 "rdsv3_pages_in_vec returned 0"); 560 ret = -EINVAL; 561 goto out; 562 } 563 564 rs->rs_user_addr = vec.addr; 565 rs->rs_user_bytes = vec.bytes; 566 567 /* pin user memory pages */ 568 umem_len = ptob(btopr(vec.bytes + 569 ((uintptr_t)vec.addr & PAGEOFFSET))); 570 umem_addr = (caddr_t)((uintptr_t)vec.addr & ~PAGEOFFSET); 571 umem_flags = (DDI_UMEMLOCK_WRITE | DDI_UMEMLOCK_READ | 572 DDI_UMEMLOCK_LONGTERM); 573 ret = umem_lockmemory(umem_addr, umem_len, umem_flags, 574 &umem_cookie, &rdsv3_umem_cbops, NULL); 575 if (ret != 0) { 576 RDSV3_DPRINTF2("rdsv3_rdma_prepare", 577 "umem_lockmemory() returned %d", ret); 578 ret = -EFAULT; 579 goto out; 580 } 581 op->r_rdma_sg[i].umem_cookie = umem_cookie; 582 op->r_rdma_sg[i].iovec = vec; 583 nr_bytes += vec.bytes; 584 585 RDSV3_DPRINTF5("rdsv3_rdma_prepare", 586 "RDS: nr_bytes %u nr %u vec.bytes %llu vec.addr %llx", 587 nr_bytes, nr, vec.bytes, vec.addr); 588 } 589 op->r_nents = i; 590 591 if (nr_bytes > args->remote_vec.bytes) { 592 RDSV3_DPRINTF2("rdsv3_rdma_prepare", 593 "RDS nr_bytes %u remote_bytes %u do not match", 594 nr_bytes, (unsigned int) args->remote_vec.bytes); 595 ret = -EINVAL; 596 goto out; 597 } 598 op->r_bytes = nr_bytes; 599 600 ret = 0; 601 out: 602 if (ret) { 603 if (op) 604 rdsv3_rdma_free_op(op); 605 op = ERR_PTR(ret); 606 } 607 return (op); 608 } 609 610 /* 611 * The application asks for a RDMA transfer. 612 * Extract all arguments and set up the rdma_op 613 */ 614 int 615 rdsv3_cmsg_rdma_args(struct rdsv3_sock *rs, struct rdsv3_message *rm, 616 struct cmsghdr *cmsg) 617 { 618 struct rdsv3_rdma_op *op; 619 struct rdsv3_rdma_args *ap; 620 621 if (cmsg->cmsg_len < CMSG_LEN(sizeof (struct rdsv3_rdma_args)) || 622 rm->m_rdma_op != NULL) 623 return (-EINVAL); 624 625 /* uint64_t alignment on struct rdsv3_get_mr_args */ 626 ap = (struct rdsv3_rdma_args *)kmem_alloc(cmsg->cmsg_len, KM_SLEEP); 627 bcopy(CMSG_DATA(cmsg), ap, cmsg->cmsg_len); 628 op = rdsv3_rdma_prepare(rs, ap); 629 kmem_free(ap, cmsg->cmsg_len); 630 if (IS_ERR(op)) 631 return (PTR_ERR(op)); 632 rdsv3_stats_inc(s_send_rdma); 633 rm->m_rdma_op = op; 634 return (0); 635 } 636 637 /* 638 * The application wants us to pass an RDMA destination (aka MR) 639 * to the remote 640 */ 641 int 642 rdsv3_cmsg_rdma_dest(struct rdsv3_sock *rs, struct rdsv3_message *rm, 643 struct cmsghdr *cmsg) 644 { 645 struct rdsv3_mr *mr; 646 uint32_t r_key; 647 int err = 0; 648 649 if (cmsg->cmsg_len < CMSG_LEN(sizeof (rdsv3_rdma_cookie_t)) || 650 rm->m_rdma_cookie != 0) 651 return (-EINVAL); 652 653 (void) memcpy(&rm->m_rdma_cookie, CMSG_DATA(cmsg), 654 sizeof (rm->m_rdma_cookie)); 655 656 /* 657 * We are reusing a previously mapped MR here. Most likely, the 658 * application has written to the buffer, so we need to explicitly 659 * flush those writes to RAM. Otherwise the HCA may not see them 660 * when doing a DMA from that buffer. 661 */ 662 r_key = rdsv3_rdma_cookie_key(rm->m_rdma_cookie); 663 664 mutex_enter(&rs->rs_rdma_lock); 665 mr = rdsv3_mr_tree_walk(&rs->rs_rdma_keys, r_key, NULL); 666 if (mr == NULL) 667 err = -EINVAL; /* invalid r_key */ 668 else 669 atomic_add_32(&mr->r_refcount, 1); 670 mutex_exit(&rs->rs_rdma_lock); 671 672 if (mr) { 673 mr->r_trans->sync_mr(mr->r_trans_private, DMA_TO_DEVICE); 674 rm->m_rdma_mr = mr; 675 } 676 return (err); 677 } 678 679 /* 680 * The application passes us an address range it wants to enable RDMA 681 * to/from. We map the area, and save the <R_Key,offset> pair 682 * in rm->m_rdma_cookie. This causes it to be sent along to the peer 683 * in an extension header. 684 */ 685 int 686 rdsv3_cmsg_rdma_map(struct rdsv3_sock *rs, struct rdsv3_message *rm, 687 struct cmsghdr *cmsg) 688 { 689 struct rdsv3_get_mr_args *mrp; 690 int status; 691 692 if (cmsg->cmsg_len < CMSG_LEN(sizeof (struct rdsv3_get_mr_args)) || 693 rm->m_rdma_cookie != 0) 694 return (-EINVAL); 695 696 /* uint64_t alignment on struct rdsv3_get_mr_args */ 697 mrp = (struct rdsv3_get_mr_args *)kmem_alloc(cmsg->cmsg_len, KM_SLEEP); 698 bcopy(CMSG_DATA(cmsg), mrp, cmsg->cmsg_len); 699 status = __rdsv3_rdma_map(rs, mrp, &rm->m_rdma_cookie, &rm->m_rdma_mr); 700 kmem_free(mrp, cmsg->cmsg_len); 701 return (status); 702 } 703