1 /*
2  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
3  */
4 
5 /*
6  * This file contains code imported from the OFED rds source file ib_rdma.c
7  * Oracle elects to have and use the contents of ib_rdma.c under and governed
8  * by the OpenIB.org BSD license (see below for full license text). However,
9  * the following notice accompanied the original version of this file:
10  */
11 
12 /*
13  * Copyright (c) 2006 Oracle.  All rights reserved.
14  *
15  * This software is available to you under a choice of one of two
16  * licenses.  You may choose to be licensed under the terms of the GNU
17  * General Public License (GPL) Version 2, available from the file
18  * COPYING in the main directory of this source tree, or the
19  * OpenIB.org BSD license below:
20  *
21  *     Redistribution and use in source and binary forms, with or
22  *     without modification, are permitted provided that the following
23  *     conditions are met:
24  *
25  *      - Redistributions of source code must retain the above
26  *        copyright notice, this list of conditions and the following
27  *        disclaimer.
28  *
29  *      - Redistributions in binary form must reproduce the above
30  *        copyright notice, this list of conditions and the following
31  *        disclaimer in the documentation and/or other materials
32  *        provided with the distribution.
33  *
34  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
35  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
36  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
37  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
38  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
39  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
40  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
41  * SOFTWARE.
42  *
43  */
44 #include <sys/rds.h>
45 #include <netinet/in.h>
46 
47 #include <sys/ib/clients/rdsv3/rdsv3.h>
48 #include <sys/ib/clients/rdsv3/rdma.h>
49 #include <sys/ib/clients/rdsv3/ib.h>
50 #include <sys/ib/clients/rdsv3/rdsv3_debug.h>
51 
52 /*
53  * This is stored as mr->r_trans_private.
54  */
55 struct rdsv3_ib_mr {
56 	list_node_t		m_obj; /* list obj of rdsv3_fmr_pool list */
57 	struct rdsv3_ib_device	*m_device;
58 	struct rdsv3_fmr_pool	*m_pool; /* hca fmr pool */
59 	unsigned int		m_inval:1;
60 
61 	struct rdsv3_scatterlist	*sg;
62 	unsigned int		sg_len;
63 	uint64_t		*dma;
64 	int			sg_dma_len;
65 
66 	/* DDI pinned memory */
67 	ddi_umem_cookie_t	umem_cookie;
68 	/* IBTF type definitions */
69 	ibt_hca_hdl_t		rc_hca_hdl;
70 	ibt_fmr_pool_hdl_t	fmr_pool_hdl;
71 	ibt_ma_hdl_t		rc_ma_hdl;
72 	ibt_mr_hdl_t		rc_fmr_hdl;
73 	ibt_pmr_desc_t		rc_mem_desc;
74 };
75 
76 /*
77  * delayed freed fmr's
78  */
79 struct rdsv3_fmr_pool {
80 	list_t			f_list;	/* list of freed mr */
81 	kmutex_t		f_lock; /* lock of fmr pool */
82 	int32_t			f_listcnt;
83 };
84 
85 static int rdsv3_ib_flush_mr_pool(struct rdsv3_ib_device *rds_ibdev,
86 	ibt_fmr_pool_hdl_t pool_hdl, int free_all);
87 static void rdsv3_ib_teardown_mr(struct rdsv3_ib_mr *ibmr);
88 static void rdsv3_ib_mr_pool_flush_worker(struct rdsv3_work_s *work);
89 static struct rdsv3_ib_mr *rdsv3_ib_alloc_fmr(struct rdsv3_ib_device
90 	*rds_ibdev);
91 static int rdsv3_ib_map_fmr(struct rdsv3_ib_device *rds_ibdev,
92 	struct rdsv3_ib_mr *ibmr, struct buf *bp, unsigned int nents);
93 
94 static struct rdsv3_ib_device *
rdsv3_ib_get_device(uint32_be_t ipaddr)95 rdsv3_ib_get_device(uint32_be_t ipaddr)
96 {
97 	struct rdsv3_ib_device *rds_ibdev;
98 	struct rdsv3_ib_ipaddr *i_ipaddr;
99 
100 	RDSV3_DPRINTF4("rdsv3_ib_get_device", "Enter: ipaddr: 0x%x", ipaddr);
101 
102 	RDSV3_FOR_EACH_LIST_NODE(rds_ibdev, &rdsv3_ib_devices, list) {
103 		rw_enter(&rds_ibdev->rwlock, RW_READER);
104 		RDSV3_FOR_EACH_LIST_NODE(i_ipaddr, &rds_ibdev->ipaddr_list,
105 		    list) {
106 			if (i_ipaddr->ipaddr == ipaddr) {
107 				rw_exit(&rds_ibdev->rwlock);
108 				return (rds_ibdev);
109 			}
110 		}
111 		rw_exit(&rds_ibdev->rwlock);
112 	}
113 
114 	RDSV3_DPRINTF4("rdsv3_ib_get_device", "Return: ipaddr: 0x%x", ipaddr);
115 
116 	return (NULL);
117 }
118 
119 static int
rdsv3_ib_add_ipaddr(struct rdsv3_ib_device * rds_ibdev,uint32_be_t ipaddr)120 rdsv3_ib_add_ipaddr(struct rdsv3_ib_device *rds_ibdev, uint32_be_t ipaddr)
121 {
122 	struct rdsv3_ib_ipaddr *i_ipaddr;
123 
124 	RDSV3_DPRINTF4("rdsv3_ib_add_ipaddr", "rds_ibdev: %p ipaddr: %x",
125 	    rds_ibdev, ipaddr);
126 
127 	i_ipaddr = kmem_alloc(sizeof (*i_ipaddr), KM_NOSLEEP);
128 	if (!i_ipaddr)
129 		return (-ENOMEM);
130 
131 	i_ipaddr->ipaddr = ipaddr;
132 
133 	rw_enter(&rds_ibdev->rwlock, RW_WRITER);
134 	list_insert_tail(&rds_ibdev->ipaddr_list, i_ipaddr);
135 	rw_exit(&rds_ibdev->rwlock);
136 
137 	return (0);
138 }
139 
140 static void
rdsv3_ib_remove_ipaddr(struct rdsv3_ib_device * rds_ibdev,uint32_be_t ipaddr)141 rdsv3_ib_remove_ipaddr(struct rdsv3_ib_device *rds_ibdev, uint32_be_t ipaddr)
142 {
143 	struct rdsv3_ib_ipaddr *i_ipaddr, *next;
144 	struct rdsv3_ib_ipaddr *to_free = NULL;
145 
146 	RDSV3_DPRINTF4("rdsv3_ib_remove_ipaddr", "rds_ibdev: %p, ipaddr: %x",
147 	    rds_ibdev, ipaddr);
148 
149 	rw_enter(&rds_ibdev->rwlock, RW_WRITER);
150 	RDSV3_FOR_EACH_LIST_NODE_SAFE(i_ipaddr, next, &rds_ibdev->ipaddr_list,
151 	    list) {
152 		if (i_ipaddr->ipaddr == ipaddr) {
153 			list_remove_node(&i_ipaddr->list);
154 			to_free = i_ipaddr;
155 			break;
156 		}
157 	}
158 	rw_exit(&rds_ibdev->rwlock);
159 
160 	if (to_free) {
161 		kmem_free(i_ipaddr, sizeof (*i_ipaddr));
162 	}
163 
164 	RDSV3_DPRINTF4("rdsv3_ib_remove_ipaddr",
165 	    "Return: rds_ibdev: %p, ipaddr: %x", rds_ibdev, ipaddr);
166 }
167 
168 int
rdsv3_ib_update_ipaddr(struct rdsv3_ib_device * rds_ibdev,uint32_be_t ipaddr)169 rdsv3_ib_update_ipaddr(struct rdsv3_ib_device *rds_ibdev, uint32_be_t ipaddr)
170 {
171 	struct rdsv3_ib_device *rds_ibdev_old;
172 
173 	RDSV3_DPRINTF4("rdsv3_ib_update_ipaddr", "rds_ibdev: %p, ipaddr: %x",
174 	    rds_ibdev, ipaddr);
175 
176 	rds_ibdev_old = rdsv3_ib_get_device(ipaddr);
177 	if (rds_ibdev_old)
178 		rdsv3_ib_remove_ipaddr(rds_ibdev_old, ipaddr);
179 
180 	return (rdsv3_ib_add_ipaddr(rds_ibdev, ipaddr));
181 }
182 
183 void
rdsv3_ib_add_conn(struct rdsv3_ib_device * rds_ibdev,struct rdsv3_connection * conn)184 rdsv3_ib_add_conn(struct rdsv3_ib_device *rds_ibdev,
185     struct rdsv3_connection *conn)
186 {
187 	struct rdsv3_ib_connection *ic = conn->c_transport_data;
188 
189 	RDSV3_DPRINTF4("rdsv3_ib_add_conn", "rds_ibdev: %p, conn: %p",
190 	    rds_ibdev, conn);
191 
192 	/* conn was previously on the nodev_conns_list */
193 	mutex_enter(&ib_nodev_conns_lock);
194 	ASSERT(!list_is_empty(&ib_nodev_conns));
195 	ASSERT(list_link_active(&ic->ib_node));
196 	list_remove_node(&ic->ib_node);
197 
198 	mutex_enter(&rds_ibdev->spinlock);
199 	list_insert_tail(&rds_ibdev->conn_list, ic);
200 	ic->i_on_dev_list = B_TRUE;
201 	mutex_exit(&rds_ibdev->spinlock);
202 	mutex_exit(&ib_nodev_conns_lock);
203 }
204 
205 void
rdsv3_ib_remove_conn(struct rdsv3_ib_device * rds_ibdev,struct rdsv3_connection * conn)206 rdsv3_ib_remove_conn(struct rdsv3_ib_device *rds_ibdev,
207     struct rdsv3_connection *conn)
208 {
209 	struct rdsv3_ib_connection *ic = conn->c_transport_data;
210 
211 	RDSV3_DPRINTF4("rdsv3_ib_remove_conn", "rds_ibdev: %p, conn: %p",
212 	    rds_ibdev, conn);
213 
214 	/* place conn on nodev_conns_list */
215 	mutex_enter(&ib_nodev_conns_lock);
216 
217 	mutex_enter(&rds_ibdev->spinlock);
218 	ASSERT(list_link_active(&ic->ib_node));
219 	list_remove_node(&ic->ib_node);
220 	ic->i_on_dev_list = B_FALSE;
221 	mutex_exit(&rds_ibdev->spinlock);
222 
223 	list_insert_tail(&ib_nodev_conns, ic);
224 
225 	mutex_exit(&ib_nodev_conns_lock);
226 
227 	RDSV3_DPRINTF4("rdsv3_ib_remove_conn",
228 	    "Return: rds_ibdev: %p, conn: %p", rds_ibdev, conn);
229 }
230 
231 void
__rdsv3_ib_destroy_conns(struct list * list,kmutex_t * list_lock)232 __rdsv3_ib_destroy_conns(struct list *list, kmutex_t *list_lock)
233 {
234 	struct rdsv3_ib_connection *ic, *_ic;
235 	list_t tmp_list;
236 
237 	RDSV3_DPRINTF4("__rdsv3_ib_destroy_conns", "Enter: list: %p", list);
238 
239 	/* avoid calling conn_destroy with irqs off */
240 	mutex_enter(list_lock);
241 	list_splice(list, &tmp_list);
242 	mutex_exit(list_lock);
243 
244 	RDSV3_FOR_EACH_LIST_NODE_SAFE(ic, _ic, &tmp_list, ib_node) {
245 		rdsv3_conn_destroy(ic->conn);
246 	}
247 
248 	RDSV3_DPRINTF4("__rdsv3_ib_destroy_conns", "Return: list: %p", list);
249 }
250 
251 void
rdsv3_ib_destroy_mr_pool(struct rdsv3_ib_device * rds_ibdev)252 rdsv3_ib_destroy_mr_pool(struct rdsv3_ib_device *rds_ibdev)
253 {
254 	struct rdsv3_fmr_pool *pool = rds_ibdev->fmr_pool;
255 
256 	RDSV3_DPRINTF4("rdsv3_ib_destroy_mr_pool", "Enter: ibdev: %p",
257 	    rds_ibdev);
258 
259 	if (rds_ibdev->fmr_pool_hdl == NULL)
260 		return;
261 
262 	if (pool) {
263 		list_destroy(&pool->f_list);
264 		kmem_free((void *) pool, sizeof (*pool));
265 	}
266 
267 	(void) rdsv3_ib_flush_mr_pool(rds_ibdev, rds_ibdev->fmr_pool_hdl, 1);
268 	(void) ibt_destroy_fmr_pool(ib_get_ibt_hca_hdl(rds_ibdev->dev),
269 	    rds_ibdev->fmr_pool_hdl);
270 }
271 
272 #define	IB_FMR_MAX_BUF_SIZE	0x1000000	/* 16MB max buf */
273 int
rdsv3_ib_create_mr_pool(struct rdsv3_ib_device * rds_ibdev)274 rdsv3_ib_create_mr_pool(struct rdsv3_ib_device *rds_ibdev)
275 {
276 	uint_t h_page_sz;
277 	ibt_fmr_pool_attr_t fmr_attr;
278 	ibt_status_t ibt_status;
279 	struct rdsv3_fmr_pool *pool;
280 
281 	RDSV3_DPRINTF4("rdsv3_ib_create_mr_pool",
282 	    "Enter: ibdev: %p", rds_ibdev);
283 
284 	pool = (struct rdsv3_fmr_pool *)kmem_zalloc(sizeof (*pool), KM_NOSLEEP);
285 	if (pool == NULL) {
286 		return (-ENOMEM);
287 	}
288 
289 	/* setup FMR pool attributes */
290 	h_page_sz = rds_ibdev->hca_attr.hca_page_sz * 1024;
291 
292 	fmr_attr.fmr_max_pages_per_fmr = (IB_FMR_MAX_BUF_SIZE / h_page_sz) + 2;
293 	fmr_attr.fmr_pool_size = RDSV3_FMR_POOL_SIZE;
294 	fmr_attr.fmr_dirty_watermark = 128;
295 	fmr_attr.fmr_cache = B_FALSE;
296 	fmr_attr.fmr_flags = IBT_MR_NOSLEEP  | IBT_MR_ENABLE_LOCAL_WRITE |
297 	    IBT_MR_ENABLE_REMOTE_WRITE | IBT_MR_ENABLE_REMOTE_READ;
298 	fmr_attr.fmr_page_sz = h_page_sz;
299 	fmr_attr.fmr_func_hdlr = NULL;
300 	fmr_attr.fmr_func_arg = (void *) NULL;
301 
302 	/* create the FMR pool */
303 	ibt_status = ibt_create_fmr_pool(rds_ibdev->ibt_hca_hdl,
304 	    rds_ibdev->pd->ibt_pd, &fmr_attr, &rds_ibdev->fmr_pool_hdl);
305 	if (ibt_status != IBT_SUCCESS) {
306 		kmem_free((void *) pool, sizeof (*pool));
307 		rds_ibdev->fmr_pool = NULL;
308 		return (-ENOMEM);
309 	}
310 
311 	list_create(&pool->f_list, sizeof (struct rdsv3_ib_mr),
312 	    offsetof(struct rdsv3_ib_mr, m_obj));
313 	mutex_init(&pool->f_lock, NULL, MUTEX_DRIVER, NULL);
314 	rds_ibdev->fmr_pool = pool;
315 	rds_ibdev->max_fmrs = fmr_attr.fmr_pool_size;
316 	rds_ibdev->fmr_message_size = fmr_attr.fmr_max_pages_per_fmr;
317 
318 	RDSV3_DPRINTF2("rdsv3_ib_create_mr_pool",
319 	    "Exit: ibdev: %p fmr_pool: %p", rds_ibdev, pool);
320 	return (0);
321 }
322 
323 void
rdsv3_ib_get_mr_info(struct rdsv3_ib_device * rds_ibdev,struct rds_info_rdma_connection * iinfo)324 rdsv3_ib_get_mr_info(struct rdsv3_ib_device *rds_ibdev,
325 	struct rds_info_rdma_connection *iinfo)
326 {
327 	iinfo->rdma_mr_max = rds_ibdev->max_fmrs;
328 	iinfo->rdma_mr_size = rds_ibdev->fmr_message_size;
329 }
330 
331 void *
rdsv3_ib_get_mr(struct rds_iovec * args,unsigned long nents,struct rdsv3_sock * rs,uint32_t * key_ret)332 rdsv3_ib_get_mr(struct rds_iovec *args, unsigned long nents,
333 	struct rdsv3_sock *rs, uint32_t *key_ret)
334 {
335 	struct rdsv3_ib_device *rds_ibdev;
336 	struct rdsv3_ib_mr *ibmr = NULL;
337 	ddi_umem_cookie_t umem_cookie;
338 	size_t umem_len;
339 	caddr_t umem_addr;
340 	int ret;
341 	struct buf *bp;
342 
343 	RDSV3_DPRINTF4("rdsv3_ib_get_mr", "Enter: args.addr: %p", args->addr);
344 
345 	rds_ibdev = rdsv3_ib_get_device(rs->rs_bound_addr);
346 
347 	if (rds_ibdev == NULL)
348 		return (void *)(PTR_ERR(-EFAULT));
349 
350 	ibmr = rdsv3_ib_alloc_fmr(rds_ibdev);
351 	if (IS_ERR(ibmr))
352 		return (ibmr);
353 
354 	/* pin user memory pages */
355 	umem_len   = ptob(btopr(args->bytes +
356 	    ((uintptr_t)args->addr & PAGEOFFSET)));
357 	umem_addr  = (caddr_t)((uintptr_t)args->addr & ~PAGEOFFSET);
358 	ret = umem_lockmemory(umem_addr, umem_len,
359 	    DDI_UMEMLOCK_WRITE | DDI_UMEMLOCK_READ,
360 	    &umem_cookie, NULL, NULL);
361 	if (ret != 0) {
362 		kmem_free((void *) ibmr, sizeof (*ibmr));
363 		ibmr = ERR_PTR(-ret);
364 		return (ibmr);
365 	}
366 
367 	/* transpose umem_cookie to buf structure for rdsv3_ib_map_fmr() */
368 	bp = ddi_umem_iosetup(umem_cookie, 0, umem_len,
369 	    B_WRITE, 0, 0, NULL, DDI_UMEM_SLEEP);
370 
371 	ret = rdsv3_ib_map_fmr(rds_ibdev, ibmr, bp, nents);
372 	freerbuf(bp);	/* free bp */
373 	if (ret == 0) {
374 		ibmr->umem_cookie = umem_cookie;
375 		*key_ret = (uint32_t)ibmr->rc_mem_desc.pmd_rkey;
376 		ibmr->m_device = rds_ibdev;
377 		ibmr->m_pool = rds_ibdev->fmr_pool;
378 		RDSV3_DPRINTF4("rdsv3_ib_get_mr",
379 		    "Return: ibmr: %p umem_cookie %p", ibmr, ibmr->umem_cookie);
380 		return (ibmr);
381 	} else { /* error return */
382 		RDSV3_DPRINTF2("rdsv3_ib_get_mr", "map_fmr failed (errno=%d)\n",
383 		    ret);
384 		ddi_umem_unlock(umem_cookie);
385 		kmem_free((void *)ibmr, sizeof (*ibmr));
386 		return (ERR_PTR(ret));
387 	}
388 }
389 
390 static struct rdsv3_ib_mr *
rdsv3_ib_alloc_fmr(struct rdsv3_ib_device * rds_ibdev)391 rdsv3_ib_alloc_fmr(struct rdsv3_ib_device *rds_ibdev)
392 {
393 	struct rdsv3_ib_mr *ibmr;
394 
395 	RDSV3_DPRINTF4("rdsv3_ib_alloc_fmr", "Enter: ibdev: %p", rds_ibdev);
396 
397 	if (rds_ibdev->fmr_pool_hdl) {
398 		ibmr = (struct rdsv3_ib_mr *)kmem_zalloc(sizeof (*ibmr),
399 		    KM_SLEEP);
400 		ibmr->rc_hca_hdl = ib_get_ibt_hca_hdl(rds_ibdev->dev);
401 		ibmr->fmr_pool_hdl = rds_ibdev->fmr_pool_hdl;
402 		return (ibmr);
403 	}
404 	return (struct rdsv3_ib_mr *)(PTR_ERR(-ENOMEM));
405 }
406 
407 static int
rdsv3_ib_map_fmr(struct rdsv3_ib_device * rds_ibdev,struct rdsv3_ib_mr * ibmr,struct buf * bp,unsigned int nents)408 rdsv3_ib_map_fmr(struct rdsv3_ib_device *rds_ibdev, struct rdsv3_ib_mr *ibmr,
409 	struct buf *bp, unsigned int nents)
410 {
411 	ibt_va_attr_t va_attr;
412 	ibt_reg_req_t reg_req;
413 	uint_t paddr_list_len;
414 	uint_t page_sz;
415 	ibt_status_t ibt_status;
416 	/* LINTED E_FUNC_SET_NOT_USED */
417 	unsigned int l_nents = nents;
418 
419 	RDSV3_DPRINTF4("rdsv3_ib_map_fmr", "Enter: ibmr: %p", ibmr);
420 	RDSV3_DPRINTF4("rdsv3_ib_map_fmr", "buf addr: %p", bp->b_un.b_addr);
421 
422 	/* setup ibt_map_mem_area attributes */
423 	bzero(&va_attr, sizeof (ibt_va_attr_t));
424 	va_attr.va_buf   = bp;
425 	va_attr.va_flags = IBT_VA_FMR | IBT_VA_BUF;
426 
427 	page_sz = rds_ibdev->hca_attr.hca_page_sz * 1024; /* in kbytes */
428 	paddr_list_len = (bp->b_bcount / page_sz) + 2; /* start + end pg */
429 
430 	/* map user buffer to HCA address */
431 	ibt_status = ibt_map_mem_area(ibmr->rc_hca_hdl,
432 	    &va_attr, paddr_list_len, &reg_req, &ibmr->rc_ma_hdl);
433 	if (ibt_status != IBT_SUCCESS) {
434 		return (-ENOMEM);
435 	}
436 
437 	/*  use a free entry from FMR pool to register the specified memory */
438 	ibt_status = ibt_register_physical_fmr(ibmr->rc_hca_hdl,
439 	    ibmr->fmr_pool_hdl,
440 	    &reg_req.fn_arg, &ibmr->rc_fmr_hdl, &ibmr->rc_mem_desc);
441 	if (ibt_status != IBT_SUCCESS) {
442 		RDSV3_DPRINTF2("rdsv3_ib_map_fmr", "reg_phy_fmr failed %d",
443 		    ibt_status);
444 		(void) ibt_unmap_mem_area(ibmr->rc_hca_hdl,
445 		    ibmr->rc_ma_hdl);
446 		if (ibt_status == IBT_INSUFF_RESOURCE) {
447 			return (-ENOBUFS);
448 		}
449 		return (-EINVAL);
450 	}
451 	RDSV3_DPRINTF4("rdsv3_ib_map_fmr", "Return: ibmr: %p rkey: 0x%x",
452 	    ibmr, (uint32_t)ibmr->rc_mem_desc.pmd_rkey);
453 	return (0);
454 }
455 
456 void
rdsv3_ib_sync_mr(void * trans_private,int direction)457 rdsv3_ib_sync_mr(void *trans_private, int direction)
458 {
459 	/* LINTED E_FUNC_SET_NOT_USED */
460 	void *l_trans_private = trans_private;
461 	/* LINTED E_FUNC_SET_NOT_USED */
462 	int l_direction = direction;
463 
464 	/* FMR Sync not needed in Solaris on PCI-ex systems */
465 
466 	RDSV3_DPRINTF4("rdsv3_ib_sync_mr", "Enter:");
467 }
468 
469 void
rdsv3_ib_flush_mrs(void)470 rdsv3_ib_flush_mrs(void)
471 {
472 	struct rdsv3_ib_device *rds_ibdev;
473 
474 	RDSV3_DPRINTF4("rdsv3_ib_flush_mrs", "Enter:");
475 
476 	RDSV3_FOR_EACH_LIST_NODE(rds_ibdev, &rdsv3_ib_devices, list) {
477 		if (rds_ibdev->fmr_pool_hdl) {
478 			(void) rdsv3_ib_flush_mr_pool(rds_ibdev,
479 			    rds_ibdev->fmr_pool_hdl, 0);
480 		}
481 	}
482 }
483 
484 static void
rdsv3_ib_drop_mr(struct rdsv3_ib_mr * ibmr)485 rdsv3_ib_drop_mr(struct rdsv3_ib_mr *ibmr)
486 {
487 	/* return the fmr to the IBTF pool */
488 	(void) ibt_deregister_fmr(ibmr->rc_hca_hdl, ibmr->rc_fmr_hdl);
489 	(void) ibt_unmap_mem_area(ibmr->rc_hca_hdl, ibmr->rc_ma_hdl);
490 	(void) ddi_umem_unlock(ibmr->umem_cookie);
491 	kmem_free((void *) ibmr, sizeof (*ibmr));
492 }
493 
494 void
rdsv3_ib_drain_mrlist_fn(void * data)495 rdsv3_ib_drain_mrlist_fn(void *data)
496 {
497 	struct rdsv3_fmr_pool *pool = (struct rdsv3_fmr_pool *)data;
498 	ibt_hca_hdl_t hca_hdl;
499 	ibt_fmr_pool_hdl_t fmr_pool_hdl;
500 	unsigned int inval;
501 	struct rdsv3_ib_mr *ibmr;
502 	list_t *listp = &pool->f_list;
503 	kmutex_t *lockp = &pool->f_lock;
504 	int i;
505 
506 	inval = 0;
507 	i = 0;
508 	for (;;) {
509 		mutex_enter(lockp);
510 		ibmr = (struct rdsv3_ib_mr *)list_remove_head(listp);
511 		if (ibmr)
512 			pool->f_listcnt--;
513 		mutex_exit(lockp);
514 		if (!ibmr)
515 			break;
516 		if ((inval == 0) && ibmr->m_inval) {
517 			inval = 1;
518 			hca_hdl = ibmr->rc_hca_hdl;
519 			fmr_pool_hdl = ibmr->fmr_pool_hdl;
520 		}
521 		i++;
522 		rdsv3_ib_drop_mr(ibmr);
523 	}
524 	if (inval)
525 		(void) ibt_flush_fmr_pool(hca_hdl, fmr_pool_hdl);
526 }
527 
528 void
rdsv3_ib_free_mr(void * trans_private,int invalidate)529 rdsv3_ib_free_mr(void *trans_private, int invalidate)
530 {
531 	struct rdsv3_ib_mr *ibmr = trans_private;
532 	rdsv3_af_thr_t *af_thr;
533 
534 	RDSV3_DPRINTF4("rdsv3_ib_free_mr", "Enter: ibmr: %p inv: %d",
535 	    ibmr, invalidate);
536 
537 	/* save af_thr at local as ibmr might be freed at mutex_exit */
538 	af_thr = ibmr->m_device->fmr_soft_cq;
539 	ibmr->m_inval = (unsigned int) invalidate;
540 	mutex_enter(&ibmr->m_pool->f_lock);
541 	list_insert_tail(&ibmr->m_pool->f_list, ibmr);
542 	ibmr->m_pool->f_listcnt++;
543 	mutex_exit(&ibmr->m_pool->f_lock);
544 
545 	rdsv3_af_thr_fire(af_thr);
546 }
547 
548 static int
rdsv3_ib_flush_mr_pool(struct rdsv3_ib_device * rds_ibdev,ibt_fmr_pool_hdl_t pool_hdl,int free_all)549 rdsv3_ib_flush_mr_pool(struct rdsv3_ib_device *rds_ibdev,
550     ibt_fmr_pool_hdl_t pool_hdl, int free_all)
551 {
552 	/* LINTED E_FUNC_SET_NOT_USED */
553 	int l_free_all = free_all;
554 
555 	RDSV3_DPRINTF4("rdsv3_ib_flush_mr_pool", "Enter: pool: %p", pool_hdl);
556 
557 	rdsv3_ib_stats_inc(s_ib_rdma_mr_pool_flush);
558 
559 	(void) ibt_flush_fmr_pool(ib_get_ibt_hca_hdl(rds_ibdev->dev),
560 	    pool_hdl);
561 	return (0);
562 }
563