1c0dd49bdSEiji Ota /*
216e76cddSagiri  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
3c0dd49bdSEiji Ota  */
416e76cddSagiri 
5c0dd49bdSEiji Ota /*
616e76cddSagiri  * This file contains code imported from the OFED rds source file ib_rdma.c
716e76cddSagiri  * Oracle elects to have and use the contents of ib_rdma.c under and governed
816e76cddSagiri  * by the OpenIB.org BSD license (see below for full license text). However,
916e76cddSagiri  * the following notice accompanied the original version of this file:
10c0dd49bdSEiji Ota  */
11c0dd49bdSEiji Ota 
12c0dd49bdSEiji Ota /*
13c0dd49bdSEiji Ota  * Copyright (c) 2006 Oracle.  All rights reserved.
14c0dd49bdSEiji Ota  *
15c0dd49bdSEiji Ota  * This software is available to you under a choice of one of two
16c0dd49bdSEiji Ota  * licenses.  You may choose to be licensed under the terms of the GNU
17c0dd49bdSEiji Ota  * General Public License (GPL) Version 2, available from the file
18c0dd49bdSEiji Ota  * COPYING in the main directory of this source tree, or the
19c0dd49bdSEiji Ota  * OpenIB.org BSD license below:
20c0dd49bdSEiji Ota  *
21c0dd49bdSEiji Ota  *     Redistribution and use in source and binary forms, with or
22c0dd49bdSEiji Ota  *     without modification, are permitted provided that the following
23c0dd49bdSEiji Ota  *     conditions are met:
24c0dd49bdSEiji Ota  *
25c0dd49bdSEiji Ota  *      - Redistributions of source code must retain the above
26c0dd49bdSEiji Ota  *        copyright notice, this list of conditions and the following
27c0dd49bdSEiji Ota  *        disclaimer.
28c0dd49bdSEiji Ota  *
29c0dd49bdSEiji Ota  *      - Redistributions in binary form must reproduce the above
30c0dd49bdSEiji Ota  *        copyright notice, this list of conditions and the following
31c0dd49bdSEiji Ota  *        disclaimer in the documentation and/or other materials
32c0dd49bdSEiji Ota  *        provided with the distribution.
33c0dd49bdSEiji Ota  *
34c0dd49bdSEiji Ota  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
35c0dd49bdSEiji Ota  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
36c0dd49bdSEiji Ota  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
37c0dd49bdSEiji Ota  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
38c0dd49bdSEiji Ota  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
39c0dd49bdSEiji Ota  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
40c0dd49bdSEiji Ota  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
41c0dd49bdSEiji Ota  * SOFTWARE.
42c0dd49bdSEiji Ota  *
43c0dd49bdSEiji Ota  */
44c0dd49bdSEiji Ota #include <sys/rds.h>
45c0dd49bdSEiji Ota #include <netinet/in.h>
46c0dd49bdSEiji Ota 
47c0dd49bdSEiji Ota #include <sys/ib/clients/rdsv3/rdsv3.h>
48c0dd49bdSEiji Ota #include <sys/ib/clients/rdsv3/rdma.h>
49c0dd49bdSEiji Ota #include <sys/ib/clients/rdsv3/ib.h>
50c0dd49bdSEiji Ota #include <sys/ib/clients/rdsv3/rdsv3_debug.h>
51c0dd49bdSEiji Ota 
52c0dd49bdSEiji Ota /*
53c0dd49bdSEiji Ota  * This is stored as mr->r_trans_private.
54c0dd49bdSEiji Ota  */
55c0dd49bdSEiji Ota struct rdsv3_ib_mr {
565d5562f5SEiji Ota 	list_node_t		m_obj; /* list obj of rdsv3_fmr_pool list */
575d5562f5SEiji Ota 	struct rdsv3_ib_device	*m_device;
585d5562f5SEiji Ota 	struct rdsv3_fmr_pool	*m_pool; /* hca fmr pool */
595d5562f5SEiji Ota 	unsigned int		m_inval:1;
60c0dd49bdSEiji Ota 
61c0dd49bdSEiji Ota 	struct rdsv3_scatterlist	*sg;
62c0dd49bdSEiji Ota 	unsigned int		sg_len;
63c0dd49bdSEiji Ota 	uint64_t		*dma;
64c0dd49bdSEiji Ota 	int			sg_dma_len;
65c0dd49bdSEiji Ota 
66c0dd49bdSEiji Ota 	/* DDI pinned memory */
67c0dd49bdSEiji Ota 	ddi_umem_cookie_t	umem_cookie;
68c0dd49bdSEiji Ota 	/* IBTF type definitions */
695d5562f5SEiji Ota 	ibt_hca_hdl_t		rc_hca_hdl;
70c0dd49bdSEiji Ota 	ibt_fmr_pool_hdl_t	fmr_pool_hdl;
71c0dd49bdSEiji Ota 	ibt_ma_hdl_t		rc_ma_hdl;
72c0dd49bdSEiji Ota 	ibt_mr_hdl_t		rc_fmr_hdl;
73c0dd49bdSEiji Ota 	ibt_pmr_desc_t		rc_mem_desc;
74c0dd49bdSEiji Ota };
75c0dd49bdSEiji Ota 
76c0dd49bdSEiji Ota /*
775d5562f5SEiji Ota  * delayed freed fmr's
78c0dd49bdSEiji Ota  */
795d5562f5SEiji Ota struct rdsv3_fmr_pool {
805d5562f5SEiji Ota 	list_t			f_list;	/* list of freed mr */
815d5562f5SEiji Ota 	kmutex_t		f_lock; /* lock of fmr pool */
825d5562f5SEiji Ota 	int32_t			f_listcnt;
83c0dd49bdSEiji Ota };
84c0dd49bdSEiji Ota 
85c0dd49bdSEiji Ota static int rdsv3_ib_flush_mr_pool(struct rdsv3_ib_device *rds_ibdev,
86c0dd49bdSEiji Ota 	ibt_fmr_pool_hdl_t pool_hdl, int free_all);
87c0dd49bdSEiji Ota static void rdsv3_ib_teardown_mr(struct rdsv3_ib_mr *ibmr);
88c0dd49bdSEiji Ota static void rdsv3_ib_mr_pool_flush_worker(struct rdsv3_work_s *work);
89c0dd49bdSEiji Ota static struct rdsv3_ib_mr *rdsv3_ib_alloc_fmr(struct rdsv3_ib_device
90c0dd49bdSEiji Ota 	*rds_ibdev);
91c0dd49bdSEiji Ota static int rdsv3_ib_map_fmr(struct rdsv3_ib_device *rds_ibdev,
92c0dd49bdSEiji Ota 	struct rdsv3_ib_mr *ibmr, struct buf *bp, unsigned int nents);
93c0dd49bdSEiji Ota 
94c0dd49bdSEiji Ota static struct rdsv3_ib_device *
rdsv3_ib_get_device(uint32_be_t ipaddr)95c0dd49bdSEiji Ota rdsv3_ib_get_device(uint32_be_t ipaddr)
96c0dd49bdSEiji Ota {
97c0dd49bdSEiji Ota 	struct rdsv3_ib_device *rds_ibdev;
98c0dd49bdSEiji Ota 	struct rdsv3_ib_ipaddr *i_ipaddr;
99c0dd49bdSEiji Ota 
100c0dd49bdSEiji Ota 	RDSV3_DPRINTF4("rdsv3_ib_get_device", "Enter: ipaddr: 0x%x", ipaddr);
101c0dd49bdSEiji Ota 
102c0dd49bdSEiji Ota 	RDSV3_FOR_EACH_LIST_NODE(rds_ibdev, &rdsv3_ib_devices, list) {
1035d5562f5SEiji Ota 		rw_enter(&rds_ibdev->rwlock, RW_READER);
104c0dd49bdSEiji Ota 		RDSV3_FOR_EACH_LIST_NODE(i_ipaddr, &rds_ibdev->ipaddr_list,
105c0dd49bdSEiji Ota 		    list) {
106c0dd49bdSEiji Ota 			if (i_ipaddr->ipaddr == ipaddr) {
1075d5562f5SEiji Ota 				rw_exit(&rds_ibdev->rwlock);
108c0dd49bdSEiji Ota 				return (rds_ibdev);
109c0dd49bdSEiji Ota 			}
110c0dd49bdSEiji Ota 		}
1115d5562f5SEiji Ota 		rw_exit(&rds_ibdev->rwlock);
112c0dd49bdSEiji Ota 	}
113c0dd49bdSEiji Ota 
114c0dd49bdSEiji Ota 	RDSV3_DPRINTF4("rdsv3_ib_get_device", "Return: ipaddr: 0x%x", ipaddr);
115c0dd49bdSEiji Ota 
116c0dd49bdSEiji Ota 	return (NULL);
117c0dd49bdSEiji Ota }
118c0dd49bdSEiji Ota 
119c0dd49bdSEiji Ota static int
rdsv3_ib_add_ipaddr(struct rdsv3_ib_device * rds_ibdev,uint32_be_t ipaddr)120c0dd49bdSEiji Ota rdsv3_ib_add_ipaddr(struct rdsv3_ib_device *rds_ibdev, uint32_be_t ipaddr)
121c0dd49bdSEiji Ota {
122c0dd49bdSEiji Ota 	struct rdsv3_ib_ipaddr *i_ipaddr;
123c0dd49bdSEiji Ota 
124c0dd49bdSEiji Ota 	RDSV3_DPRINTF4("rdsv3_ib_add_ipaddr", "rds_ibdev: %p ipaddr: %x",
125c0dd49bdSEiji Ota 	    rds_ibdev, ipaddr);
126c0dd49bdSEiji Ota 
127c0dd49bdSEiji Ota 	i_ipaddr = kmem_alloc(sizeof (*i_ipaddr), KM_NOSLEEP);
128c0dd49bdSEiji Ota 	if (!i_ipaddr)
129c0dd49bdSEiji Ota 		return (-ENOMEM);
130c0dd49bdSEiji Ota 
131c0dd49bdSEiji Ota 	i_ipaddr->ipaddr = ipaddr;
132c0dd49bdSEiji Ota 
1335d5562f5SEiji Ota 	rw_enter(&rds_ibdev->rwlock, RW_WRITER);
134c0dd49bdSEiji Ota 	list_insert_tail(&rds_ibdev->ipaddr_list, i_ipaddr);
1355d5562f5SEiji Ota 	rw_exit(&rds_ibdev->rwlock);
136c0dd49bdSEiji Ota 
137c0dd49bdSEiji Ota 	return (0);
138c0dd49bdSEiji Ota }
139c0dd49bdSEiji Ota 
140c0dd49bdSEiji Ota static void
rdsv3_ib_remove_ipaddr(struct rdsv3_ib_device * rds_ibdev,uint32_be_t ipaddr)141c0dd49bdSEiji Ota rdsv3_ib_remove_ipaddr(struct rdsv3_ib_device *rds_ibdev, uint32_be_t ipaddr)
142c0dd49bdSEiji Ota {
143c0dd49bdSEiji Ota 	struct rdsv3_ib_ipaddr *i_ipaddr, *next;
1445d5562f5SEiji Ota 	struct rdsv3_ib_ipaddr *to_free = NULL;
145c0dd49bdSEiji Ota 
146c0dd49bdSEiji Ota 	RDSV3_DPRINTF4("rdsv3_ib_remove_ipaddr", "rds_ibdev: %p, ipaddr: %x",
147c0dd49bdSEiji Ota 	    rds_ibdev, ipaddr);
148c0dd49bdSEiji Ota 
1495d5562f5SEiji Ota 	rw_enter(&rds_ibdev->rwlock, RW_WRITER);
150c0dd49bdSEiji Ota 	RDSV3_FOR_EACH_LIST_NODE_SAFE(i_ipaddr, next, &rds_ibdev->ipaddr_list,
151c0dd49bdSEiji Ota 	    list) {
152c0dd49bdSEiji Ota 		if (i_ipaddr->ipaddr == ipaddr) {
153c0dd49bdSEiji Ota 			list_remove_node(&i_ipaddr->list);
1545d5562f5SEiji Ota 			to_free = i_ipaddr;
155c0dd49bdSEiji Ota 			break;
156c0dd49bdSEiji Ota 		}
157c0dd49bdSEiji Ota 	}
1585d5562f5SEiji Ota 	rw_exit(&rds_ibdev->rwlock);
1595d5562f5SEiji Ota 
1605d5562f5SEiji Ota 	if (to_free) {
1615d5562f5SEiji Ota 		kmem_free(i_ipaddr, sizeof (*i_ipaddr));
1625d5562f5SEiji Ota 	}
163c0dd49bdSEiji Ota 
164c0dd49bdSEiji Ota 	RDSV3_DPRINTF4("rdsv3_ib_remove_ipaddr",
165c0dd49bdSEiji Ota 	    "Return: rds_ibdev: %p, ipaddr: %x", rds_ibdev, ipaddr);
166c0dd49bdSEiji Ota }
167c0dd49bdSEiji Ota 
168c0dd49bdSEiji Ota int
rdsv3_ib_update_ipaddr(struct rdsv3_ib_device * rds_ibdev,uint32_be_t ipaddr)169c0dd49bdSEiji Ota rdsv3_ib_update_ipaddr(struct rdsv3_ib_device *rds_ibdev, uint32_be_t ipaddr)
170c0dd49bdSEiji Ota {
171c0dd49bdSEiji Ota 	struct rdsv3_ib_device *rds_ibdev_old;
172c0dd49bdSEiji Ota 
173c0dd49bdSEiji Ota 	RDSV3_DPRINTF4("rdsv3_ib_update_ipaddr", "rds_ibdev: %p, ipaddr: %x",
174c0dd49bdSEiji Ota 	    rds_ibdev, ipaddr);
175c0dd49bdSEiji Ota 
176c0dd49bdSEiji Ota 	rds_ibdev_old = rdsv3_ib_get_device(ipaddr);
177c0dd49bdSEiji Ota 	if (rds_ibdev_old)
178c0dd49bdSEiji Ota 		rdsv3_ib_remove_ipaddr(rds_ibdev_old, ipaddr);
179c0dd49bdSEiji Ota 
180c0dd49bdSEiji Ota 	return (rdsv3_ib_add_ipaddr(rds_ibdev, ipaddr));
181c0dd49bdSEiji Ota }
182c0dd49bdSEiji Ota 
183c0dd49bdSEiji Ota void
rdsv3_ib_add_conn(struct rdsv3_ib_device * rds_ibdev,struct rdsv3_connection * conn)184c0dd49bdSEiji Ota rdsv3_ib_add_conn(struct rdsv3_ib_device *rds_ibdev,
185c0dd49bdSEiji Ota     struct rdsv3_connection *conn)
186c0dd49bdSEiji Ota {
187c0dd49bdSEiji Ota 	struct rdsv3_ib_connection *ic = conn->c_transport_data;
188c0dd49bdSEiji Ota 
189c0dd49bdSEiji Ota 	RDSV3_DPRINTF4("rdsv3_ib_add_conn", "rds_ibdev: %p, conn: %p",
190c0dd49bdSEiji Ota 	    rds_ibdev, conn);
191c0dd49bdSEiji Ota 
192c0dd49bdSEiji Ota 	/* conn was previously on the nodev_conns_list */
193c0dd49bdSEiji Ota 	mutex_enter(&ib_nodev_conns_lock);
194c0dd49bdSEiji Ota 	ASSERT(!list_is_empty(&ib_nodev_conns));
195c0dd49bdSEiji Ota 	ASSERT(list_link_active(&ic->ib_node));
196c0dd49bdSEiji Ota 	list_remove_node(&ic->ib_node);
197c0dd49bdSEiji Ota 
198c0dd49bdSEiji Ota 	mutex_enter(&rds_ibdev->spinlock);
199c0dd49bdSEiji Ota 	list_insert_tail(&rds_ibdev->conn_list, ic);
200d2b539e7Sagiri 	ic->i_on_dev_list = B_TRUE;
201c0dd49bdSEiji Ota 	mutex_exit(&rds_ibdev->spinlock);
202c0dd49bdSEiji Ota 	mutex_exit(&ib_nodev_conns_lock);
203c0dd49bdSEiji Ota }
204c0dd49bdSEiji Ota 
205c0dd49bdSEiji Ota void
rdsv3_ib_remove_conn(struct rdsv3_ib_device * rds_ibdev,struct rdsv3_connection * conn)206c0dd49bdSEiji Ota rdsv3_ib_remove_conn(struct rdsv3_ib_device *rds_ibdev,
207c0dd49bdSEiji Ota     struct rdsv3_connection *conn)
208c0dd49bdSEiji Ota {
209c0dd49bdSEiji Ota 	struct rdsv3_ib_connection *ic = conn->c_transport_data;
210c0dd49bdSEiji Ota 
211c0dd49bdSEiji Ota 	RDSV3_DPRINTF4("rdsv3_ib_remove_conn", "rds_ibdev: %p, conn: %p",
212c0dd49bdSEiji Ota 	    rds_ibdev, conn);
213c0dd49bdSEiji Ota 
214c0dd49bdSEiji Ota 	/* place conn on nodev_conns_list */
215c0dd49bdSEiji Ota 	mutex_enter(&ib_nodev_conns_lock);
216c0dd49bdSEiji Ota 
217c0dd49bdSEiji Ota 	mutex_enter(&rds_ibdev->spinlock);
218c0dd49bdSEiji Ota 	ASSERT(list_link_active(&ic->ib_node));
219c0dd49bdSEiji Ota 	list_remove_node(&ic->ib_node);
220d2b539e7Sagiri 	ic->i_on_dev_list = B_FALSE;
221c0dd49bdSEiji Ota 	mutex_exit(&rds_ibdev->spinlock);
222c0dd49bdSEiji Ota 
223c0dd49bdSEiji Ota 	list_insert_tail(&ib_nodev_conns, ic);
224c0dd49bdSEiji Ota 
225c0dd49bdSEiji Ota 	mutex_exit(&ib_nodev_conns_lock);
226c0dd49bdSEiji Ota 
227c0dd49bdSEiji Ota 	RDSV3_DPRINTF4("rdsv3_ib_remove_conn",
228c0dd49bdSEiji Ota 	    "Return: rds_ibdev: %p, conn: %p", rds_ibdev, conn);
229c0dd49bdSEiji Ota }
230c0dd49bdSEiji Ota 
231c0dd49bdSEiji Ota void
__rdsv3_ib_destroy_conns(struct list * list,kmutex_t * list_lock)232c0dd49bdSEiji Ota __rdsv3_ib_destroy_conns(struct list *list, kmutex_t *list_lock)
233c0dd49bdSEiji Ota {
234c0dd49bdSEiji Ota 	struct rdsv3_ib_connection *ic, *_ic;
235c0dd49bdSEiji Ota 	list_t tmp_list;
236c0dd49bdSEiji Ota 
237c0dd49bdSEiji Ota 	RDSV3_DPRINTF4("__rdsv3_ib_destroy_conns", "Enter: list: %p", list);
238c0dd49bdSEiji Ota 
239c0dd49bdSEiji Ota 	/* avoid calling conn_destroy with irqs off */
240c0dd49bdSEiji Ota 	mutex_enter(list_lock);
241c0dd49bdSEiji Ota 	list_splice(list, &tmp_list);
242c0dd49bdSEiji Ota 	mutex_exit(list_lock);
243c0dd49bdSEiji Ota 
244c0dd49bdSEiji Ota 	RDSV3_FOR_EACH_LIST_NODE_SAFE(ic, _ic, &tmp_list, ib_node) {
245c0dd49bdSEiji Ota 		rdsv3_conn_destroy(ic->conn);
246c0dd49bdSEiji Ota 	}
247c0dd49bdSEiji Ota 
248c0dd49bdSEiji Ota 	RDSV3_DPRINTF4("__rdsv3_ib_destroy_conns", "Return: list: %p", list);
249c0dd49bdSEiji Ota }
250c0dd49bdSEiji Ota 
251c0dd49bdSEiji Ota void
rdsv3_ib_destroy_mr_pool(struct rdsv3_ib_device * rds_ibdev)252c0dd49bdSEiji Ota rdsv3_ib_destroy_mr_pool(struct rdsv3_ib_device *rds_ibdev)
253c0dd49bdSEiji Ota {
2545d5562f5SEiji Ota 	struct rdsv3_fmr_pool *pool = rds_ibdev->fmr_pool;
2555d5562f5SEiji Ota 
256c0dd49bdSEiji Ota 	RDSV3_DPRINTF4("rdsv3_ib_destroy_mr_pool", "Enter: ibdev: %p",
257c0dd49bdSEiji Ota 	    rds_ibdev);
258c0dd49bdSEiji Ota 
259c0dd49bdSEiji Ota 	if (rds_ibdev->fmr_pool_hdl == NULL)
260c0dd49bdSEiji Ota 		return;
261c0dd49bdSEiji Ota 
2625d5562f5SEiji Ota 	if (pool) {
2635d5562f5SEiji Ota 		list_destroy(&pool->f_list);
2645d5562f5SEiji Ota 		kmem_free((void *) pool, sizeof (*pool));
2655d5562f5SEiji Ota 	}
2665d5562f5SEiji Ota 
267c0dd49bdSEiji Ota 	(void) rdsv3_ib_flush_mr_pool(rds_ibdev, rds_ibdev->fmr_pool_hdl, 1);
268c0dd49bdSEiji Ota 	(void) ibt_destroy_fmr_pool(ib_get_ibt_hca_hdl(rds_ibdev->dev),
269c0dd49bdSEiji Ota 	    rds_ibdev->fmr_pool_hdl);
270c0dd49bdSEiji Ota }
271c0dd49bdSEiji Ota 
272c0dd49bdSEiji Ota #define	IB_FMR_MAX_BUF_SIZE	0x1000000	/* 16MB max buf */
273c0dd49bdSEiji Ota int
rdsv3_ib_create_mr_pool(struct rdsv3_ib_device * rds_ibdev)274c0dd49bdSEiji Ota rdsv3_ib_create_mr_pool(struct rdsv3_ib_device *rds_ibdev)
275c0dd49bdSEiji Ota {
276c0dd49bdSEiji Ota 	uint_t h_page_sz;
277c0dd49bdSEiji Ota 	ibt_fmr_pool_attr_t fmr_attr;
278c0dd49bdSEiji Ota 	ibt_status_t ibt_status;
2795d5562f5SEiji Ota 	struct rdsv3_fmr_pool *pool;
280c0dd49bdSEiji Ota 
281c0dd49bdSEiji Ota 	RDSV3_DPRINTF4("rdsv3_ib_create_mr_pool",
282c0dd49bdSEiji Ota 	    "Enter: ibdev: %p", rds_ibdev);
283c0dd49bdSEiji Ota 
2845d5562f5SEiji Ota 	pool = (struct rdsv3_fmr_pool *)kmem_zalloc(sizeof (*pool), KM_NOSLEEP);
2855d5562f5SEiji Ota 	if (pool == NULL) {
286c0dd49bdSEiji Ota 		return (-ENOMEM);
287c0dd49bdSEiji Ota 	}
288c0dd49bdSEiji Ota 
289c0dd49bdSEiji Ota 	/* setup FMR pool attributes */
290c0dd49bdSEiji Ota 	h_page_sz = rds_ibdev->hca_attr.hca_page_sz * 1024;
291c0dd49bdSEiji Ota 
292c0dd49bdSEiji Ota 	fmr_attr.fmr_max_pages_per_fmr = (IB_FMR_MAX_BUF_SIZE / h_page_sz) + 2;
293c0dd49bdSEiji Ota 	fmr_attr.fmr_pool_size = RDSV3_FMR_POOL_SIZE;
294c0dd49bdSEiji Ota 	fmr_attr.fmr_dirty_watermark = 128;
295c0dd49bdSEiji Ota 	fmr_attr.fmr_cache = B_FALSE;
296c0dd49bdSEiji Ota 	fmr_attr.fmr_flags = IBT_MR_NOSLEEP  | IBT_MR_ENABLE_LOCAL_WRITE |
297c0dd49bdSEiji Ota 	    IBT_MR_ENABLE_REMOTE_WRITE | IBT_MR_ENABLE_REMOTE_READ;
298c0dd49bdSEiji Ota 	fmr_attr.fmr_page_sz = h_page_sz;
299c0dd49bdSEiji Ota 	fmr_attr.fmr_func_hdlr = NULL;
300c0dd49bdSEiji Ota 	fmr_attr.fmr_func_arg = (void *) NULL;
301c0dd49bdSEiji Ota 
302c0dd49bdSEiji Ota 	/* create the FMR pool */
3035d5562f5SEiji Ota 	ibt_status = ibt_create_fmr_pool(rds_ibdev->ibt_hca_hdl,
3045d5562f5SEiji Ota 	    rds_ibdev->pd->ibt_pd, &fmr_attr, &rds_ibdev->fmr_pool_hdl);
305c0dd49bdSEiji Ota 	if (ibt_status != IBT_SUCCESS) {
3065d5562f5SEiji Ota 		kmem_free((void *) pool, sizeof (*pool));
3075d5562f5SEiji Ota 		rds_ibdev->fmr_pool = NULL;
308c0dd49bdSEiji Ota 		return (-ENOMEM);
309c0dd49bdSEiji Ota 	}
3105d5562f5SEiji Ota 
3115d5562f5SEiji Ota 	list_create(&pool->f_list, sizeof (struct rdsv3_ib_mr),
3125d5562f5SEiji Ota 	    offsetof(struct rdsv3_ib_mr, m_obj));
3135d5562f5SEiji Ota 	mutex_init(&pool->f_lock, NULL, MUTEX_DRIVER, NULL);
3145d5562f5SEiji Ota 	rds_ibdev->fmr_pool = pool;
315c0dd49bdSEiji Ota 	rds_ibdev->max_fmrs = fmr_attr.fmr_pool_size;
316c0dd49bdSEiji Ota 	rds_ibdev->fmr_message_size = fmr_attr.fmr_max_pages_per_fmr;
3175d5562f5SEiji Ota 
3185d5562f5SEiji Ota 	RDSV3_DPRINTF2("rdsv3_ib_create_mr_pool",
3195d5562f5SEiji Ota 	    "Exit: ibdev: %p fmr_pool: %p", rds_ibdev, pool);
320c0dd49bdSEiji Ota 	return (0);
321c0dd49bdSEiji Ota }
322c0dd49bdSEiji Ota 
323c0dd49bdSEiji Ota void
rdsv3_ib_get_mr_info(struct rdsv3_ib_device * rds_ibdev,struct rds_info_rdma_connection * iinfo)324c0dd49bdSEiji Ota rdsv3_ib_get_mr_info(struct rdsv3_ib_device *rds_ibdev,
325*fe817b60SEiji Ota 	struct rds_info_rdma_connection *iinfo)
326c0dd49bdSEiji Ota {
327c0dd49bdSEiji Ota 	iinfo->rdma_mr_max = rds_ibdev->max_fmrs;
328c0dd49bdSEiji Ota 	iinfo->rdma_mr_size = rds_ibdev->fmr_message_size;
329c0dd49bdSEiji Ota }
330c0dd49bdSEiji Ota 
331c0dd49bdSEiji Ota void *
rdsv3_ib_get_mr(struct rds_iovec * args,unsigned long nents,struct rdsv3_sock * rs,uint32_t * key_ret)332*fe817b60SEiji Ota rdsv3_ib_get_mr(struct rds_iovec *args, unsigned long nents,
333c0dd49bdSEiji Ota 	struct rdsv3_sock *rs, uint32_t *key_ret)
334c0dd49bdSEiji Ota {
335c0dd49bdSEiji Ota 	struct rdsv3_ib_device *rds_ibdev;
336c0dd49bdSEiji Ota 	struct rdsv3_ib_mr *ibmr = NULL;
337c0dd49bdSEiji Ota 	ddi_umem_cookie_t umem_cookie;
338c0dd49bdSEiji Ota 	size_t umem_len;
339c0dd49bdSEiji Ota 	caddr_t umem_addr;
340c0dd49bdSEiji Ota 	int ret;
341c0dd49bdSEiji Ota 	struct buf *bp;
342c0dd49bdSEiji Ota 
343c0dd49bdSEiji Ota 	RDSV3_DPRINTF4("rdsv3_ib_get_mr", "Enter: args.addr: %p", args->addr);
344c0dd49bdSEiji Ota 
345c0dd49bdSEiji Ota 	rds_ibdev = rdsv3_ib_get_device(rs->rs_bound_addr);
346c0dd49bdSEiji Ota 
347c0dd49bdSEiji Ota 	if (rds_ibdev == NULL)
348c0dd49bdSEiji Ota 		return (void *)(PTR_ERR(-EFAULT));
349c0dd49bdSEiji Ota 
350c0dd49bdSEiji Ota 	ibmr = rdsv3_ib_alloc_fmr(rds_ibdev);
351c0dd49bdSEiji Ota 	if (IS_ERR(ibmr))
352c0dd49bdSEiji Ota 		return (ibmr);
353c0dd49bdSEiji Ota 
354c0dd49bdSEiji Ota 	/* pin user memory pages */
355c0dd49bdSEiji Ota 	umem_len   = ptob(btopr(args->bytes +
356c0dd49bdSEiji Ota 	    ((uintptr_t)args->addr & PAGEOFFSET)));
357c0dd49bdSEiji Ota 	umem_addr  = (caddr_t)((uintptr_t)args->addr & ~PAGEOFFSET);
358d2b539e7Sagiri 	ret = umem_lockmemory(umem_addr, umem_len,
359d2b539e7Sagiri 	    DDI_UMEMLOCK_WRITE | DDI_UMEMLOCK_READ,
360d2b539e7Sagiri 	    &umem_cookie, NULL, NULL);
361c0dd49bdSEiji Ota 	if (ret != 0) {
362c0dd49bdSEiji Ota 		kmem_free((void *) ibmr, sizeof (*ibmr));
363b27516f5Sagiri 		ibmr = ERR_PTR(-ret);
364c0dd49bdSEiji Ota 		return (ibmr);
365c0dd49bdSEiji Ota 	}
366c0dd49bdSEiji Ota 
367c0dd49bdSEiji Ota 	/* transpose umem_cookie to buf structure for rdsv3_ib_map_fmr() */
368c0dd49bdSEiji Ota 	bp = ddi_umem_iosetup(umem_cookie, 0, umem_len,
369c0dd49bdSEiji Ota 	    B_WRITE, 0, 0, NULL, DDI_UMEM_SLEEP);
370c0dd49bdSEiji Ota 
371c0dd49bdSEiji Ota 	ret = rdsv3_ib_map_fmr(rds_ibdev, ibmr, bp, nents);
372c0dd49bdSEiji Ota 	freerbuf(bp);	/* free bp */
373c0dd49bdSEiji Ota 	if (ret == 0) {
374c0dd49bdSEiji Ota 		ibmr->umem_cookie = umem_cookie;
375c0dd49bdSEiji Ota 		*key_ret = (uint32_t)ibmr->rc_mem_desc.pmd_rkey;
3765d5562f5SEiji Ota 		ibmr->m_device = rds_ibdev;
3775d5562f5SEiji Ota 		ibmr->m_pool = rds_ibdev->fmr_pool;
378c0dd49bdSEiji Ota 		RDSV3_DPRINTF4("rdsv3_ib_get_mr",
379c0dd49bdSEiji Ota 		    "Return: ibmr: %p umem_cookie %p", ibmr, ibmr->umem_cookie);
380c0dd49bdSEiji Ota 		return (ibmr);
381c0dd49bdSEiji Ota 	} else { /* error return */
3826e18d381Sagiri 		RDSV3_DPRINTF2("rdsv3_ib_get_mr", "map_fmr failed (errno=%d)\n",
383c0dd49bdSEiji Ota 		    ret);
384c0dd49bdSEiji Ota 		ddi_umem_unlock(umem_cookie);
385c0dd49bdSEiji Ota 		kmem_free((void *)ibmr, sizeof (*ibmr));
386c0dd49bdSEiji Ota 		return (ERR_PTR(ret));
387c0dd49bdSEiji Ota 	}
388c0dd49bdSEiji Ota }
389c0dd49bdSEiji Ota 
390c0dd49bdSEiji Ota static struct rdsv3_ib_mr *
rdsv3_ib_alloc_fmr(struct rdsv3_ib_device * rds_ibdev)391c0dd49bdSEiji Ota rdsv3_ib_alloc_fmr(struct rdsv3_ib_device *rds_ibdev)
392c0dd49bdSEiji Ota {
393c0dd49bdSEiji Ota 	struct rdsv3_ib_mr *ibmr;
394c0dd49bdSEiji Ota 
395c0dd49bdSEiji Ota 	RDSV3_DPRINTF4("rdsv3_ib_alloc_fmr", "Enter: ibdev: %p", rds_ibdev);
396c0dd49bdSEiji Ota 
397c0dd49bdSEiji Ota 	if (rds_ibdev->fmr_pool_hdl) {
398c0dd49bdSEiji Ota 		ibmr = (struct rdsv3_ib_mr *)kmem_zalloc(sizeof (*ibmr),
399c0dd49bdSEiji Ota 		    KM_SLEEP);
4005d5562f5SEiji Ota 		ibmr->rc_hca_hdl = ib_get_ibt_hca_hdl(rds_ibdev->dev);
401c0dd49bdSEiji Ota 		ibmr->fmr_pool_hdl = rds_ibdev->fmr_pool_hdl;
402c0dd49bdSEiji Ota 		return (ibmr);
403c0dd49bdSEiji Ota 	}
404c0dd49bdSEiji Ota 	return (struct rdsv3_ib_mr *)(PTR_ERR(-ENOMEM));
405c0dd49bdSEiji Ota }
406c0dd49bdSEiji Ota 
407c0dd49bdSEiji Ota static int
rdsv3_ib_map_fmr(struct rdsv3_ib_device * rds_ibdev,struct rdsv3_ib_mr * ibmr,struct buf * bp,unsigned int nents)408c0dd49bdSEiji Ota rdsv3_ib_map_fmr(struct rdsv3_ib_device *rds_ibdev, struct rdsv3_ib_mr *ibmr,
409c0dd49bdSEiji Ota 	struct buf *bp, unsigned int nents)
410c0dd49bdSEiji Ota {
411c0dd49bdSEiji Ota 	ibt_va_attr_t va_attr;
412c0dd49bdSEiji Ota 	ibt_reg_req_t reg_req;
413c0dd49bdSEiji Ota 	uint_t paddr_list_len;
414c0dd49bdSEiji Ota 	uint_t page_sz;
415c0dd49bdSEiji Ota 	ibt_status_t ibt_status;
416c0dd49bdSEiji Ota 	/* LINTED E_FUNC_SET_NOT_USED */
417c0dd49bdSEiji Ota 	unsigned int l_nents = nents;
418c0dd49bdSEiji Ota 
419c0dd49bdSEiji Ota 	RDSV3_DPRINTF4("rdsv3_ib_map_fmr", "Enter: ibmr: %p", ibmr);
420c0dd49bdSEiji Ota 	RDSV3_DPRINTF4("rdsv3_ib_map_fmr", "buf addr: %p", bp->b_un.b_addr);
421c0dd49bdSEiji Ota 
422c0dd49bdSEiji Ota 	/* setup ibt_map_mem_area attributes */
423c0dd49bdSEiji Ota 	bzero(&va_attr, sizeof (ibt_va_attr_t));
424c0dd49bdSEiji Ota 	va_attr.va_buf   = bp;
425c0dd49bdSEiji Ota 	va_attr.va_flags = IBT_VA_FMR | IBT_VA_BUF;
426c0dd49bdSEiji Ota 
427c0dd49bdSEiji Ota 	page_sz = rds_ibdev->hca_attr.hca_page_sz * 1024; /* in kbytes */
428c0dd49bdSEiji Ota 	paddr_list_len = (bp->b_bcount / page_sz) + 2; /* start + end pg */
429c0dd49bdSEiji Ota 
430c0dd49bdSEiji Ota 	/* map user buffer to HCA address */
4315d5562f5SEiji Ota 	ibt_status = ibt_map_mem_area(ibmr->rc_hca_hdl,
432c0dd49bdSEiji Ota 	    &va_attr, paddr_list_len, &reg_req, &ibmr->rc_ma_hdl);
433c0dd49bdSEiji Ota 	if (ibt_status != IBT_SUCCESS) {
434c0dd49bdSEiji Ota 		return (-ENOMEM);
435c0dd49bdSEiji Ota 	}
436c0dd49bdSEiji Ota 
437c0dd49bdSEiji Ota 	/*  use a free entry from FMR pool to register the specified memory */
4385d5562f5SEiji Ota 	ibt_status = ibt_register_physical_fmr(ibmr->rc_hca_hdl,
4395d5562f5SEiji Ota 	    ibmr->fmr_pool_hdl,
440c0dd49bdSEiji Ota 	    &reg_req.fn_arg, &ibmr->rc_fmr_hdl, &ibmr->rc_mem_desc);
441c0dd49bdSEiji Ota 	if (ibt_status != IBT_SUCCESS) {
4425d5562f5SEiji Ota 		RDSV3_DPRINTF2("rdsv3_ib_map_fmr", "reg_phy_fmr failed %d",
4435d5562f5SEiji Ota 		    ibt_status);
4445d5562f5SEiji Ota 		(void) ibt_unmap_mem_area(ibmr->rc_hca_hdl,
445c0dd49bdSEiji Ota 		    ibmr->rc_ma_hdl);
446c0dd49bdSEiji Ota 		if (ibt_status == IBT_INSUFF_RESOURCE) {
447c0dd49bdSEiji Ota 			return (-ENOBUFS);
448c0dd49bdSEiji Ota 		}
449c0dd49bdSEiji Ota 		return (-EINVAL);
450c0dd49bdSEiji Ota 	}
451c0dd49bdSEiji Ota 	RDSV3_DPRINTF4("rdsv3_ib_map_fmr", "Return: ibmr: %p rkey: 0x%x",
452c0dd49bdSEiji Ota 	    ibmr, (uint32_t)ibmr->rc_mem_desc.pmd_rkey);
453c0dd49bdSEiji Ota 	return (0);
454c0dd49bdSEiji Ota }
455c0dd49bdSEiji Ota 
456c0dd49bdSEiji Ota void
rdsv3_ib_sync_mr(void * trans_private,int direction)457c0dd49bdSEiji Ota rdsv3_ib_sync_mr(void *trans_private, int direction)
458c0dd49bdSEiji Ota {
459c0dd49bdSEiji Ota 	/* LINTED E_FUNC_SET_NOT_USED */
460c0dd49bdSEiji Ota 	void *l_trans_private = trans_private;
461c0dd49bdSEiji Ota 	/* LINTED E_FUNC_SET_NOT_USED */
462c0dd49bdSEiji Ota 	int l_direction = direction;
463c0dd49bdSEiji Ota 
464c0dd49bdSEiji Ota 	/* FMR Sync not needed in Solaris on PCI-ex systems */
465c0dd49bdSEiji Ota 
466c0dd49bdSEiji Ota 	RDSV3_DPRINTF4("rdsv3_ib_sync_mr", "Enter:");
467c0dd49bdSEiji Ota }
468c0dd49bdSEiji Ota 
469c0dd49bdSEiji Ota void
rdsv3_ib_flush_mrs(void)470c0dd49bdSEiji Ota rdsv3_ib_flush_mrs(void)
471c0dd49bdSEiji Ota {
472c0dd49bdSEiji Ota 	struct rdsv3_ib_device *rds_ibdev;
473c0dd49bdSEiji Ota 
474c0dd49bdSEiji Ota 	RDSV3_DPRINTF4("rdsv3_ib_flush_mrs", "Enter:");
475c0dd49bdSEiji Ota 
476c0dd49bdSEiji Ota 	RDSV3_FOR_EACH_LIST_NODE(rds_ibdev, &rdsv3_ib_devices, list) {
477c0dd49bdSEiji Ota 		if (rds_ibdev->fmr_pool_hdl) {
478c0dd49bdSEiji Ota 			(void) rdsv3_ib_flush_mr_pool(rds_ibdev,
479c0dd49bdSEiji Ota 			    rds_ibdev->fmr_pool_hdl, 0);
480c0dd49bdSEiji Ota 		}
481c0dd49bdSEiji Ota 	}
482c0dd49bdSEiji Ota }
483c0dd49bdSEiji Ota 
484c0dd49bdSEiji Ota static void
rdsv3_ib_drop_mr(struct rdsv3_ib_mr * ibmr)4855d5562f5SEiji Ota rdsv3_ib_drop_mr(struct rdsv3_ib_mr *ibmr)
486c0dd49bdSEiji Ota {
4875d5562f5SEiji Ota 	/* return the fmr to the IBTF pool */
4885d5562f5SEiji Ota 	(void) ibt_deregister_fmr(ibmr->rc_hca_hdl, ibmr->rc_fmr_hdl);
4895d5562f5SEiji Ota 	(void) ibt_unmap_mem_area(ibmr->rc_hca_hdl, ibmr->rc_ma_hdl);
490c0dd49bdSEiji Ota 	(void) ddi_umem_unlock(ibmr->umem_cookie);
4915d5562f5SEiji Ota 	kmem_free((void *) ibmr, sizeof (*ibmr));
4925d5562f5SEiji Ota }
4935d5562f5SEiji Ota 
4945d5562f5SEiji Ota void
rdsv3_ib_drain_mrlist_fn(void * data)4955d5562f5SEiji Ota rdsv3_ib_drain_mrlist_fn(void *data)
4965d5562f5SEiji Ota {
4975d5562f5SEiji Ota 	struct rdsv3_fmr_pool *pool = (struct rdsv3_fmr_pool *)data;
4985d5562f5SEiji Ota 	ibt_hca_hdl_t hca_hdl;
4995d5562f5SEiji Ota 	ibt_fmr_pool_hdl_t fmr_pool_hdl;
5005d5562f5SEiji Ota 	unsigned int inval;
5015d5562f5SEiji Ota 	struct rdsv3_ib_mr *ibmr;
5025d5562f5SEiji Ota 	list_t *listp = &pool->f_list;
5035d5562f5SEiji Ota 	kmutex_t *lockp = &pool->f_lock;
5045d5562f5SEiji Ota 	int i;
5055d5562f5SEiji Ota 
5065d5562f5SEiji Ota 	inval = 0;
5075d5562f5SEiji Ota 	i = 0;
5085d5562f5SEiji Ota 	for (;;) {
5095d5562f5SEiji Ota 		mutex_enter(lockp);
5105d5562f5SEiji Ota 		ibmr = (struct rdsv3_ib_mr *)list_remove_head(listp);
5115d5562f5SEiji Ota 		if (ibmr)
5125d5562f5SEiji Ota 			pool->f_listcnt--;
5135d5562f5SEiji Ota 		mutex_exit(lockp);
5145d5562f5SEiji Ota 		if (!ibmr)
5155d5562f5SEiji Ota 			break;
5165d5562f5SEiji Ota 		if ((inval == 0) && ibmr->m_inval) {
5175d5562f5SEiji Ota 			inval = 1;
5185d5562f5SEiji Ota 			hca_hdl = ibmr->rc_hca_hdl;
5195d5562f5SEiji Ota 			fmr_pool_hdl = ibmr->fmr_pool_hdl;
5205d5562f5SEiji Ota 		}
5215d5562f5SEiji Ota 		i++;
5225d5562f5SEiji Ota 		rdsv3_ib_drop_mr(ibmr);
5235d5562f5SEiji Ota 	}
5245d5562f5SEiji Ota 	if (inval)
5255d5562f5SEiji Ota 		(void) ibt_flush_fmr_pool(hca_hdl, fmr_pool_hdl);
526c0dd49bdSEiji Ota }
527c0dd49bdSEiji Ota 
528c0dd49bdSEiji Ota void
rdsv3_ib_free_mr(void * trans_private,int invalidate)529c0dd49bdSEiji Ota rdsv3_ib_free_mr(void *trans_private, int invalidate)
530c0dd49bdSEiji Ota {
531c0dd49bdSEiji Ota 	struct rdsv3_ib_mr *ibmr = trans_private;
5325d5562f5SEiji Ota 	rdsv3_af_thr_t *af_thr;
533c0dd49bdSEiji Ota 
534c0dd49bdSEiji Ota 	RDSV3_DPRINTF4("rdsv3_ib_free_mr", "Enter: ibmr: %p inv: %d",
535c0dd49bdSEiji Ota 	    ibmr, invalidate);
536c0dd49bdSEiji Ota 
5375d5562f5SEiji Ota 	/* save af_thr at local as ibmr might be freed at mutex_exit */
5385d5562f5SEiji Ota 	af_thr = ibmr->m_device->fmr_soft_cq;
5395d5562f5SEiji Ota 	ibmr->m_inval = (unsigned int) invalidate;
5405d5562f5SEiji Ota 	mutex_enter(&ibmr->m_pool->f_lock);
5415d5562f5SEiji Ota 	list_insert_tail(&ibmr->m_pool->f_list, ibmr);
5425d5562f5SEiji Ota 	ibmr->m_pool->f_listcnt++;
5435d5562f5SEiji Ota 	mutex_exit(&ibmr->m_pool->f_lock);
5445d5562f5SEiji Ota 
5455d5562f5SEiji Ota 	rdsv3_af_thr_fire(af_thr);
546c0dd49bdSEiji Ota }
547c0dd49bdSEiji Ota 
548c0dd49bdSEiji Ota static int
rdsv3_ib_flush_mr_pool(struct rdsv3_ib_device * rds_ibdev,ibt_fmr_pool_hdl_t pool_hdl,int free_all)549c0dd49bdSEiji Ota rdsv3_ib_flush_mr_pool(struct rdsv3_ib_device *rds_ibdev,
550c0dd49bdSEiji Ota     ibt_fmr_pool_hdl_t pool_hdl, int free_all)
551c0dd49bdSEiji Ota {
552c0dd49bdSEiji Ota 	/* LINTED E_FUNC_SET_NOT_USED */
553c0dd49bdSEiji Ota 	int l_free_all = free_all;
554c0dd49bdSEiji Ota 
555c0dd49bdSEiji Ota 	RDSV3_DPRINTF4("rdsv3_ib_flush_mr_pool", "Enter: pool: %p", pool_hdl);
556c0dd49bdSEiji Ota 
557c0dd49bdSEiji Ota 	rdsv3_ib_stats_inc(s_ib_rdma_mr_pool_flush);
558c0dd49bdSEiji Ota 
559c0dd49bdSEiji Ota 	(void) ibt_flush_fmr_pool(ib_get_ibt_hca_hdl(rds_ibdev->dev),
560c0dd49bdSEiji Ota 	    pool_hdl);
561c0dd49bdSEiji Ota 	return (0);
562c0dd49bdSEiji Ota }
563