130e7468fSPeter Dunlap /*
230e7468fSPeter Dunlap  * CDDL HEADER START
330e7468fSPeter Dunlap  *
430e7468fSPeter Dunlap  * The contents of this file are subject to the terms of the
530e7468fSPeter Dunlap  * Common Development and Distribution License (the "License").
630e7468fSPeter Dunlap  * You may not use this file except in compliance with the License.
730e7468fSPeter Dunlap  *
830e7468fSPeter Dunlap  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
930e7468fSPeter Dunlap  * or http://www.opensolaris.org/os/licensing.
1030e7468fSPeter Dunlap  * See the License for the specific language governing permissions
1130e7468fSPeter Dunlap  * and limitations under the License.
1230e7468fSPeter Dunlap  *
1330e7468fSPeter Dunlap  * When distributing Covered Code, include this CDDL HEADER in each
1430e7468fSPeter Dunlap  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
1530e7468fSPeter Dunlap  * If applicable, add the following below this CDDL HEADER, with the
1630e7468fSPeter Dunlap  * fields enclosed by brackets "[]" replaced with your own identifying
1730e7468fSPeter Dunlap  * information: Portions Copyright [yyyy] [name of copyright owner]
1830e7468fSPeter Dunlap  *
1930e7468fSPeter Dunlap  * CDDL HEADER END
2030e7468fSPeter Dunlap  */
2130e7468fSPeter Dunlap /*
2230e7468fSPeter Dunlap  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
2330e7468fSPeter Dunlap  * Use is subject to license terms.
2430e7468fSPeter Dunlap  */
2530e7468fSPeter Dunlap 
2630e7468fSPeter Dunlap #include <sys/types.h>
2730e7468fSPeter Dunlap #include <sys/ddi.h>
2830e7468fSPeter Dunlap #include <sys/types.h>
2930e7468fSPeter Dunlap #include <sys/socket.h>
3030e7468fSPeter Dunlap #include <netinet/in.h>
3130e7468fSPeter Dunlap #include <sys/sunddi.h>
3230e7468fSPeter Dunlap #include <sys/sysmacros.h>
3330e7468fSPeter Dunlap #include <sys/ib/ibtl/ibti.h>
3430e7468fSPeter Dunlap #include <sys/ib/ibtl/ibtl_types.h>
3530e7468fSPeter Dunlap 
3630e7468fSPeter Dunlap #include <sys/ib/clients/iser/iser.h>
3730e7468fSPeter Dunlap 
3830e7468fSPeter Dunlap /*
3930e7468fSPeter Dunlap  * iser_resource.c
4030e7468fSPeter Dunlap  *    Routines for allocating resources for iSER
4130e7468fSPeter Dunlap  */
4230e7468fSPeter Dunlap 
4330e7468fSPeter Dunlap static iser_mr_t *iser_vmem_chunk_alloc(iser_hca_t *hca, ib_memlen_t chunksize,
4430e7468fSPeter Dunlap     ibt_mr_flags_t mr_flags);
4530e7468fSPeter Dunlap 
4630e7468fSPeter Dunlap static void iser_vmem_chunk_free(iser_hca_t *hca, iser_mr_t *iser_mr);
4730e7468fSPeter Dunlap 
4830e7468fSPeter Dunlap static iser_mr_t *iser_reg_mem(iser_hca_t *hca, ib_vaddr_t vaddr,
4930e7468fSPeter Dunlap     ib_memlen_t len, ibt_mr_flags_t mr_flags);
5030e7468fSPeter Dunlap 
5130e7468fSPeter Dunlap static void iser_dereg_mem(iser_hca_t *hca, iser_mr_t *mr);
5230e7468fSPeter Dunlap 
5330e7468fSPeter Dunlap static int iser_vmem_mr_compare(const void *void_mr1, const void *void_mr2);
5430e7468fSPeter Dunlap 
5530e7468fSPeter Dunlap /*
5630e7468fSPeter Dunlap  * iser_init_hca_caches()
5730e7468fSPeter Dunlap  * Invoked per HCA instance initialization, to establish HCA-wide
5830e7468fSPeter Dunlap  * message and buffer kmem caches. Note we'll uniquify cache names
5930e7468fSPeter Dunlap  * with the lower 32-bits of the HCA GUID.
6030e7468fSPeter Dunlap  */
6130e7468fSPeter Dunlap void
iser_init_hca_caches(iser_hca_t * hca)6230e7468fSPeter Dunlap iser_init_hca_caches(iser_hca_t *hca)
6330e7468fSPeter Dunlap {
6430e7468fSPeter Dunlap 	char name[ISER_CACHE_NAMELEN];
6530e7468fSPeter Dunlap 
6630e7468fSPeter Dunlap 	(void) snprintf(name, ISER_CACHE_NAMELEN, "iser_msg_pool_%08x",
6730e7468fSPeter Dunlap 	    (uint32_t)(hca->hca_guid & 0xFFFFFFFF));
6830e7468fSPeter Dunlap 	hca->hca_msg_pool = iser_vmem_create(name, hca, ISER_MSG_MR_CHUNKSIZE,
6930e7468fSPeter Dunlap 	    ISER_MSG_POOL_MAX, ISER_MSG_MR_FLAGS);
7030e7468fSPeter Dunlap 	(void) snprintf(name, ISER_CACHE_NAMELEN, "iser_msg_cache_%08x",
7130e7468fSPeter Dunlap 	    (uint32_t)(hca->hca_guid & 0xFFFFFFFF));
7230e7468fSPeter Dunlap 	hca->iser_msg_cache = kmem_cache_create(name, sizeof (iser_msg_t),
7330e7468fSPeter Dunlap 	    0, &iser_msg_cache_constructor, &iser_msg_cache_destructor,
7430e7468fSPeter Dunlap 	    NULL, hca, NULL, KM_SLEEP);
7530e7468fSPeter Dunlap 
7630e7468fSPeter Dunlap 	(void) snprintf(name, ISER_CACHE_NAMELEN, "iser_buf_pool_%08x",
7730e7468fSPeter Dunlap 	    (uint32_t)(hca->hca_guid & 0xFFFFFFFF));
7830e7468fSPeter Dunlap 	hca->hca_buf_pool = iser_vmem_create(name, hca, ISER_BUF_MR_CHUNKSIZE,
7930e7468fSPeter Dunlap 	    ISER_BUF_POOL_MAX, ISER_BUF_MR_FLAGS);
8030e7468fSPeter Dunlap 	(void) snprintf(name, ISER_CACHE_NAMELEN, "iser_buf_cache_%08x",
8130e7468fSPeter Dunlap 	    (uint32_t)(hca->hca_guid & 0xFFFFFFFF));
8230e7468fSPeter Dunlap 	hca->iser_buf_cache = kmem_cache_create(name, sizeof (iser_buf_t),
8330e7468fSPeter Dunlap 	    0, &iser_buf_cache_constructor, &iser_buf_cache_destructor,
8430e7468fSPeter Dunlap 	    NULL, hca, NULL, KM_SLEEP);
8530e7468fSPeter Dunlap }
8630e7468fSPeter Dunlap 
8730e7468fSPeter Dunlap /*
8830e7468fSPeter Dunlap  * iser_fini_hca_caches()
8930e7468fSPeter Dunlap  * Invoked per HCA instance teardown, this routine cleans up the
9030e7468fSPeter Dunlap  * message and buffer handle caches.
9130e7468fSPeter Dunlap  */
9230e7468fSPeter Dunlap void
iser_fini_hca_caches(iser_hca_t * hca)9330e7468fSPeter Dunlap iser_fini_hca_caches(iser_hca_t *hca)
9430e7468fSPeter Dunlap {
9530e7468fSPeter Dunlap 	kmem_cache_destroy(hca->iser_buf_cache);
9630e7468fSPeter Dunlap 	iser_vmem_destroy(hca->hca_buf_pool);
9730e7468fSPeter Dunlap 	kmem_cache_destroy(hca->iser_msg_cache);
9830e7468fSPeter Dunlap 	iser_vmem_destroy(hca->hca_msg_pool);
9930e7468fSPeter Dunlap }
10030e7468fSPeter Dunlap 
10130e7468fSPeter Dunlap /*
10230e7468fSPeter Dunlap  * Allocate and initialize an iSER WR handle
10330e7468fSPeter Dunlap  */
10430e7468fSPeter Dunlap iser_wr_t *
iser_wr_get()10530e7468fSPeter Dunlap iser_wr_get()
10630e7468fSPeter Dunlap {
10730e7468fSPeter Dunlap 	iser_wr_t	*iser_wr;
10830e7468fSPeter Dunlap 
10930e7468fSPeter Dunlap 	iser_wr = kmem_cache_alloc(iser_state->iser_wr_cache, KM_NOSLEEP);
11030e7468fSPeter Dunlap 	if (iser_wr != NULL) {
11130e7468fSPeter Dunlap 		iser_wr->iw_type = ISER_WR_UNDEFINED;
11230e7468fSPeter Dunlap 		iser_wr->iw_msg  = NULL;
11330e7468fSPeter Dunlap 		iser_wr->iw_buf  = NULL;
11430e7468fSPeter Dunlap 		iser_wr->iw_pdu  = NULL;
11530e7468fSPeter Dunlap 	}
11630e7468fSPeter Dunlap 
11730e7468fSPeter Dunlap 	return (iser_wr);
11830e7468fSPeter Dunlap }
11930e7468fSPeter Dunlap 
12030e7468fSPeter Dunlap /*
12130e7468fSPeter Dunlap  * Free an iSER WR handle back to the global cache
12230e7468fSPeter Dunlap  */
12330e7468fSPeter Dunlap void
iser_wr_free(iser_wr_t * iser_wr)12430e7468fSPeter Dunlap iser_wr_free(iser_wr_t *iser_wr)
12530e7468fSPeter Dunlap {
12630e7468fSPeter Dunlap 	kmem_cache_free(iser_state->iser_wr_cache, iser_wr);
12730e7468fSPeter Dunlap }
12830e7468fSPeter Dunlap 
12930e7468fSPeter Dunlap /*
13030e7468fSPeter Dunlap  * iser_msg_cache_constructor()
13130e7468fSPeter Dunlap  * Allocate and register memory for an iSER Control-type PDU message.
13230e7468fSPeter Dunlap  * The cached objects will retain this memory registration in the HCA,
13330e7468fSPeter Dunlap  * and thus provide a cache of pre-allocated and registered messages
13430e7468fSPeter Dunlap  * for use in iSER.
13530e7468fSPeter Dunlap  */
13630e7468fSPeter Dunlap /* ARGSUSED */
13730e7468fSPeter Dunlap int
iser_msg_cache_constructor(void * msg_void,void * arg,int flags)13830e7468fSPeter Dunlap iser_msg_cache_constructor(void *msg_void, void *arg, int flags)
13930e7468fSPeter Dunlap {
14030e7468fSPeter Dunlap 	void		*memp = NULL;
14130e7468fSPeter Dunlap 	int		status;
14230e7468fSPeter Dunlap 	iser_msg_t	*msg = (iser_msg_t *)msg_void;
14330e7468fSPeter Dunlap 	iser_hca_t	*hca = (iser_hca_t *)arg;
14430e7468fSPeter Dunlap 	iser_mr_t	mr;
14530e7468fSPeter Dunlap 
14630e7468fSPeter Dunlap 	memp = iser_vmem_alloc(hca->hca_msg_pool, ISER_MAX_CTRLPDU_LEN);
14730e7468fSPeter Dunlap 	if (memp == NULL) {
14830e7468fSPeter Dunlap 		ISER_LOG(CE_NOTE, "iser_msg_cache_constructor: "
14930e7468fSPeter Dunlap 		    "failed to allocate backing memory");
15030e7468fSPeter Dunlap 		return (DDI_FAILURE);
15130e7468fSPeter Dunlap 	}
15230e7468fSPeter Dunlap 
15330e7468fSPeter Dunlap 	/* Fill in iser_mr for the memory we just allocated */
15430e7468fSPeter Dunlap 	status = iser_vmem_mr(hca->hca_msg_pool, memp,
15530e7468fSPeter Dunlap 	    ISER_MAX_CTRLPDU_LEN, &mr);
15630e7468fSPeter Dunlap 	if (status != IDM_STATUS_SUCCESS) {
15730e7468fSPeter Dunlap 		ISER_LOG(CE_NOTE, "iser_msg_cache_constructor: "
15830e7468fSPeter Dunlap 		    "couldn't find mr for %p", memp);
15930e7468fSPeter Dunlap 		iser_vmem_free(hca->hca_msg_pool, memp, ISER_MAX_CTRLPDU_LEN);
16030e7468fSPeter Dunlap 		return (DDI_FAILURE);
16130e7468fSPeter Dunlap 	}
16230e7468fSPeter Dunlap 
16330e7468fSPeter Dunlap 	msg->msg_ds.ds_va	= (ib_vaddr_t)(uintptr_t)memp;
16430e7468fSPeter Dunlap 	msg->msg_ds.ds_key	= mr.is_mrlkey;
16530e7468fSPeter Dunlap 
16630e7468fSPeter Dunlap 	/* Set a backpointer to this cache to save a lookup on free */
16730e7468fSPeter Dunlap 	msg->cache = hca->iser_msg_cache;
16830e7468fSPeter Dunlap 
16930e7468fSPeter Dunlap 	return (DDI_SUCCESS);
17030e7468fSPeter Dunlap }
17130e7468fSPeter Dunlap 
17230e7468fSPeter Dunlap /*
17330e7468fSPeter Dunlap  * Deregister and free registered memory from an iser_msg_t handle.
17430e7468fSPeter Dunlap  */
17530e7468fSPeter Dunlap void
iser_msg_cache_destructor(void * mr,void * arg)17630e7468fSPeter Dunlap iser_msg_cache_destructor(void *mr, void *arg)
17730e7468fSPeter Dunlap {
17830e7468fSPeter Dunlap 	iser_msg_t	*msg = (iser_msg_t *)mr;
17930e7468fSPeter Dunlap 	iser_hca_t	*hca = (iser_hca_t *)arg;
18030e7468fSPeter Dunlap 	uint8_t		*memp;
18130e7468fSPeter Dunlap 
18230e7468fSPeter Dunlap 	memp = (uint8_t *)(uintptr_t)(ib_vaddr_t)msg->msg_ds.ds_va;
18330e7468fSPeter Dunlap 	iser_vmem_free(hca->hca_msg_pool, memp, ISER_MAX_CTRLPDU_LEN);
18430e7468fSPeter Dunlap }
18530e7468fSPeter Dunlap 
18630e7468fSPeter Dunlap /*
18730e7468fSPeter Dunlap  * Pull a msg handle off of hca's msg cache. If no object is available
18830e7468fSPeter Dunlap  * on the cache, a new message buffer will be allocated and registered
18930e7468fSPeter Dunlap  * with the HCA. Once freed, this message will not be unregistered, thus
19030e7468fSPeter Dunlap  * building up a cache of pre-allocated and registered message buffers
19130e7468fSPeter Dunlap  * over time.
19230e7468fSPeter Dunlap  */
19330e7468fSPeter Dunlap iser_msg_t *
iser_msg_get(iser_hca_t * hca,int num,int * ret)19430e7468fSPeter Dunlap iser_msg_get(iser_hca_t *hca, int num, int *ret)
19530e7468fSPeter Dunlap {
19630e7468fSPeter Dunlap 	iser_msg_t	*tmp, *msg = NULL;
19730e7468fSPeter Dunlap 	int i;
19830e7468fSPeter Dunlap 
19930e7468fSPeter Dunlap 	ASSERT(hca != NULL);
20030e7468fSPeter Dunlap 
20130e7468fSPeter Dunlap 	/*
20230e7468fSPeter Dunlap 	 * Pull num number of message handles off the cache, linking
20330e7468fSPeter Dunlap 	 * them if more than one have been requested.
20430e7468fSPeter Dunlap 	 */
20530e7468fSPeter Dunlap 	for (i = 0; i < num; i++) {
20630e7468fSPeter Dunlap 		tmp = kmem_cache_alloc(hca->iser_msg_cache, KM_NOSLEEP);
20730e7468fSPeter Dunlap 		if (tmp == NULL) {
20830e7468fSPeter Dunlap 			ISER_LOG(CE_NOTE, "iser_msg_get: alloc failed, "
20930e7468fSPeter Dunlap 			    "requested (%d) allocated (%d)", num, i);
21030e7468fSPeter Dunlap 			break;
21130e7468fSPeter Dunlap 		}
21230e7468fSPeter Dunlap 		tmp->msg_ds.ds_len	= ISER_MAX_CTRLPDU_LEN;
21330e7468fSPeter Dunlap 		tmp->nextp = msg;
21430e7468fSPeter Dunlap 		msg = tmp;
21530e7468fSPeter Dunlap 	}
21630e7468fSPeter Dunlap 
21730e7468fSPeter Dunlap 	if (ret != NULL) {
21830e7468fSPeter Dunlap 		*ret = i;
21930e7468fSPeter Dunlap 	}
22030e7468fSPeter Dunlap 
22130e7468fSPeter Dunlap 	return (msg);
22230e7468fSPeter Dunlap }
22330e7468fSPeter Dunlap 
22430e7468fSPeter Dunlap /*
22530e7468fSPeter Dunlap  * Free this msg back to its cache, leaving the memory contained by
22630e7468fSPeter Dunlap  * it registered for later re-use.
22730e7468fSPeter Dunlap  */
22830e7468fSPeter Dunlap void
iser_msg_free(iser_msg_t * msg)22930e7468fSPeter Dunlap iser_msg_free(iser_msg_t *msg)
23030e7468fSPeter Dunlap {
23130e7468fSPeter Dunlap 	kmem_cache_free(msg->cache, msg);
23230e7468fSPeter Dunlap }
23330e7468fSPeter Dunlap 
23430e7468fSPeter Dunlap /*
23530e7468fSPeter Dunlap  * iser_buf_cache_constructor()
23630e7468fSPeter Dunlap  * Allocate and register memory for an iSER RDMA operation. The cached
23730e7468fSPeter Dunlap  * objects will retain this memory registration in the HCA, and thus
23830e7468fSPeter Dunlap  * provide a cache of pre-allocated and registered messages for use in
23930e7468fSPeter Dunlap  * iSER.
24030e7468fSPeter Dunlap  */
24130e7468fSPeter Dunlap /* ARGSUSED */
24230e7468fSPeter Dunlap int
iser_buf_cache_constructor(void * mr,void * arg,int flags)24330e7468fSPeter Dunlap iser_buf_cache_constructor(void *mr, void *arg, int flags)
24430e7468fSPeter Dunlap {
24530e7468fSPeter Dunlap 	uint8_t		*memp;
24630e7468fSPeter Dunlap 	idm_status_t	status;
24730e7468fSPeter Dunlap 	iser_buf_t	*iser_buf = (iser_buf_t *)mr;
24830e7468fSPeter Dunlap 	iser_hca_t	*hca = (iser_hca_t *)arg;
24930e7468fSPeter Dunlap 
25030e7468fSPeter Dunlap 	/* Allocate an iser_mr handle for this buffer */
25130e7468fSPeter Dunlap 	iser_buf->iser_mr = kmem_zalloc(sizeof (iser_mr_t), KM_NOSLEEP);
25230e7468fSPeter Dunlap 	if (iser_buf->iser_mr == NULL) {
25330e7468fSPeter Dunlap 		ISER_LOG(CE_NOTE, "iser_buf_cache_constructor: "
25430e7468fSPeter Dunlap 		    "failed to allocate memory for iser_mr handle");
25530e7468fSPeter Dunlap 		return (DDI_FAILURE);
25630e7468fSPeter Dunlap 	}
25730e7468fSPeter Dunlap 
25830e7468fSPeter Dunlap 	memp = iser_vmem_alloc(hca->hca_buf_pool, ISER_DEFAULT_BUFLEN);
25930e7468fSPeter Dunlap 	if (memp == NULL) {
26030e7468fSPeter Dunlap 		kmem_free(iser_buf->iser_mr, sizeof (iser_mr_t));
26130e7468fSPeter Dunlap 		return (DDI_FAILURE);
26230e7468fSPeter Dunlap 	}
26330e7468fSPeter Dunlap 
26430e7468fSPeter Dunlap 	/* Fill in iser_mr for the memory we just allocated */
26530e7468fSPeter Dunlap 	status = iser_vmem_mr(hca->hca_buf_pool, memp, ISER_DEFAULT_BUFLEN,
26630e7468fSPeter Dunlap 	    iser_buf->iser_mr);
26730e7468fSPeter Dunlap 
26830e7468fSPeter Dunlap 	if (status != IDM_STATUS_SUCCESS) {
26930e7468fSPeter Dunlap 		return (DDI_FAILURE);
27030e7468fSPeter Dunlap 	}
27130e7468fSPeter Dunlap 
27230e7468fSPeter Dunlap 	/* Set buf pointer and len for later manipulation (if necessary) */
27330e7468fSPeter Dunlap 	iser_buf->buf		= (uint64_t *)(uintptr_t)memp;
27430e7468fSPeter Dunlap 	iser_buf->buflen	= ISER_DEFAULT_BUFLEN;
27530e7468fSPeter Dunlap 
27630e7468fSPeter Dunlap 	/* Populate the SGE Vaddr and L_key for the xfer operation later */
27730e7468fSPeter Dunlap 	iser_buf->buf_ds.ds_va	= iser_buf->iser_mr->is_mrva;
27830e7468fSPeter Dunlap 	iser_buf->buf_ds.ds_key	= iser_buf->iser_mr->is_mrlkey;
27930e7468fSPeter Dunlap 
28030e7468fSPeter Dunlap 	/* Set a backpointer to this cache to save a lookup on free */
28130e7468fSPeter Dunlap 	iser_buf->cache = hca->iser_buf_cache;
28230e7468fSPeter Dunlap 
28330e7468fSPeter Dunlap 	gethrestime(&iser_buf->buf_constructed);
28430e7468fSPeter Dunlap 
28530e7468fSPeter Dunlap 	return (DDI_SUCCESS);
28630e7468fSPeter Dunlap }
28730e7468fSPeter Dunlap 
28830e7468fSPeter Dunlap /*
28930e7468fSPeter Dunlap  * Deregister and free registered memory from an iser_buf_t handle.
29030e7468fSPeter Dunlap  */
29130e7468fSPeter Dunlap void
iser_buf_cache_destructor(void * mr,void * arg)29230e7468fSPeter Dunlap iser_buf_cache_destructor(void *mr, void *arg)
29330e7468fSPeter Dunlap {
29430e7468fSPeter Dunlap 	iser_buf_t	*iser_buf = (iser_buf_t *)mr;
29530e7468fSPeter Dunlap 	iser_hca_t	*hca = (iser_hca_t *)arg;
29630e7468fSPeter Dunlap 
29730e7468fSPeter Dunlap 	gethrestime(&iser_buf->buf_destructed);
29830e7468fSPeter Dunlap 
29930e7468fSPeter Dunlap 	iser_vmem_free(hca->hca_buf_pool, iser_buf->buf, iser_buf->buflen);
30030e7468fSPeter Dunlap 
30130e7468fSPeter Dunlap 	kmem_free(iser_buf->iser_mr, sizeof (iser_mr_t));
30230e7468fSPeter Dunlap }
30330e7468fSPeter Dunlap 
30430e7468fSPeter Dunlap /*
30530e7468fSPeter Dunlap  * Registration for initiator buffers
30630e7468fSPeter Dunlap  */
30730e7468fSPeter Dunlap int
iser_reg_rdma_mem(iser_hca_t * hca,idm_buf_t * idb)30830e7468fSPeter Dunlap iser_reg_rdma_mem(iser_hca_t *hca, idm_buf_t *idb)
30930e7468fSPeter Dunlap {
31030e7468fSPeter Dunlap 	iser_mr_t	*iser_mr = NULL;
31130e7468fSPeter Dunlap 
31230e7468fSPeter Dunlap 	ASSERT(idb != NULL);
31330e7468fSPeter Dunlap 	ASSERT(idb->idb_buflen > 0);
31430e7468fSPeter Dunlap 
31530e7468fSPeter Dunlap 	iser_mr = iser_reg_mem(hca, (ib_vaddr_t)(uintptr_t)idb->idb_buf,
31630e7468fSPeter Dunlap 	    idb->idb_buflen, ISER_BUF_MR_FLAGS | IBT_MR_NOSLEEP);
31730e7468fSPeter Dunlap 	if (iser_mr == NULL) {
31830e7468fSPeter Dunlap 		ISER_LOG(CE_NOTE, "iser_reg_rdma_mem: failed to register "
31930e7468fSPeter Dunlap 		    "memory for idm_buf_t");
32030e7468fSPeter Dunlap 		return (DDI_FAILURE);
32130e7468fSPeter Dunlap 	}
32230e7468fSPeter Dunlap 
32330e7468fSPeter Dunlap 	idb->idb_reg_private	= (void *)iser_mr;
32430e7468fSPeter Dunlap 
32530e7468fSPeter Dunlap 	return (DDI_SUCCESS);
32630e7468fSPeter Dunlap }
32730e7468fSPeter Dunlap 
32830e7468fSPeter Dunlap void
iser_dereg_rdma_mem(iser_hca_t * hca,idm_buf_t * idb)32930e7468fSPeter Dunlap iser_dereg_rdma_mem(iser_hca_t *hca, idm_buf_t *idb)
33030e7468fSPeter Dunlap {
33130e7468fSPeter Dunlap 	iser_mr_t	*mr;
33230e7468fSPeter Dunlap 
33330e7468fSPeter Dunlap 	ASSERT(idb != NULL);
33430e7468fSPeter Dunlap 	mr = (iser_mr_t *)idb->idb_reg_private;
33530e7468fSPeter Dunlap 
33630e7468fSPeter Dunlap 	iser_dereg_mem(hca, mr);
33730e7468fSPeter Dunlap }
33830e7468fSPeter Dunlap 
33930e7468fSPeter Dunlap iser_vmem_mr_pool_t *
iser_vmem_create(const char * name,iser_hca_t * hca,ib_memlen_t chunksize,uint64_t max_total_size,ibt_mr_flags_t arena_mr_flags)34030e7468fSPeter Dunlap iser_vmem_create(const char *name, iser_hca_t *hca, ib_memlen_t chunksize,
34130e7468fSPeter Dunlap     uint64_t max_total_size, ibt_mr_flags_t arena_mr_flags)
34230e7468fSPeter Dunlap {
34330e7468fSPeter Dunlap 	iser_mr_t		*first_chunk;
34430e7468fSPeter Dunlap 	iser_vmem_mr_pool_t	*result;
34530e7468fSPeter Dunlap 
34630e7468fSPeter Dunlap 	ASSERT(chunksize <= max_total_size);
34730e7468fSPeter Dunlap 	result = kmem_zalloc(sizeof (*result), KM_SLEEP);
34830e7468fSPeter Dunlap 	result->ivmp_hca = hca;
34930e7468fSPeter Dunlap 	result->ivmp_mr_flags = arena_mr_flags;
35030e7468fSPeter Dunlap 	result->ivmp_chunksize = chunksize;
35130e7468fSPeter Dunlap 	result->ivmp_max_total_size = max_total_size;
35230e7468fSPeter Dunlap 	mutex_init(&result->ivmp_mutex, NULL, MUTEX_DRIVER, NULL);
35330e7468fSPeter Dunlap 	avl_create(&result->ivmp_mr_list, iser_vmem_mr_compare,
35430e7468fSPeter Dunlap 	    sizeof (iser_mr_t), offsetof(iser_mr_t, is_avl_ln));
35530e7468fSPeter Dunlap 
35630e7468fSPeter Dunlap 	first_chunk = iser_vmem_chunk_alloc(hca, chunksize,
35730e7468fSPeter Dunlap 	    arena_mr_flags | IBT_MR_SLEEP);
35830e7468fSPeter Dunlap 
35930e7468fSPeter Dunlap 	avl_add(&result->ivmp_mr_list, first_chunk);
36030e7468fSPeter Dunlap 	result->ivmp_total_size += chunksize;
36130e7468fSPeter Dunlap 
36230e7468fSPeter Dunlap 	result->ivmp_vmem = vmem_create(name,
36330e7468fSPeter Dunlap 	    (void *)(uintptr_t)first_chunk->is_mrva,
36430e7468fSPeter Dunlap 	    (size_t)first_chunk->is_mrlen, ISER_MR_QUANTSIZE,
36530e7468fSPeter Dunlap 	    NULL, NULL, NULL, 0, VM_SLEEP);
36630e7468fSPeter Dunlap 
36730e7468fSPeter Dunlap 	return (result);
36830e7468fSPeter Dunlap }
36930e7468fSPeter Dunlap 
37030e7468fSPeter Dunlap void
iser_vmem_destroy(iser_vmem_mr_pool_t * vmr_pool)37130e7468fSPeter Dunlap iser_vmem_destroy(iser_vmem_mr_pool_t *vmr_pool)
37230e7468fSPeter Dunlap {
37330e7468fSPeter Dunlap 	iser_mr_t	*chunk, *next_chunk;
37430e7468fSPeter Dunlap 
37530e7468fSPeter Dunlap 	mutex_enter(&vmr_pool->ivmp_mutex);
37630e7468fSPeter Dunlap 	vmem_destroy(vmr_pool->ivmp_vmem);
37730e7468fSPeter Dunlap 
37830e7468fSPeter Dunlap 	for (chunk = avl_first(&vmr_pool->ivmp_mr_list); chunk != NULL;
37930e7468fSPeter Dunlap 	    chunk = next_chunk) {
38030e7468fSPeter Dunlap 		next_chunk = AVL_NEXT(&vmr_pool->ivmp_mr_list, chunk);
38130e7468fSPeter Dunlap 		avl_remove(&vmr_pool->ivmp_mr_list, chunk);
38230e7468fSPeter Dunlap 		iser_vmem_chunk_free(vmr_pool->ivmp_hca, chunk);
38330e7468fSPeter Dunlap 	}
38430e7468fSPeter Dunlap 	mutex_exit(&vmr_pool->ivmp_mutex);
38530e7468fSPeter Dunlap 
38630e7468fSPeter Dunlap 	avl_destroy(&vmr_pool->ivmp_mr_list);
38730e7468fSPeter Dunlap 	mutex_destroy(&vmr_pool->ivmp_mutex);
38830e7468fSPeter Dunlap 
38930e7468fSPeter Dunlap 	kmem_free(vmr_pool, sizeof (*vmr_pool));
39030e7468fSPeter Dunlap }
39130e7468fSPeter Dunlap 
39230e7468fSPeter Dunlap void *
iser_vmem_alloc(iser_vmem_mr_pool_t * vmr_pool,size_t size)39330e7468fSPeter Dunlap iser_vmem_alloc(iser_vmem_mr_pool_t *vmr_pool, size_t size)
39430e7468fSPeter Dunlap {
39530e7468fSPeter Dunlap 	void		*result;
39630e7468fSPeter Dunlap 	iser_mr_t	*next_chunk;
39730e7468fSPeter Dunlap 	ib_memlen_t	chunk_len;
39830e7468fSPeter Dunlap 	result = vmem_alloc(vmr_pool->ivmp_vmem, size,
39930e7468fSPeter Dunlap 	    VM_NOSLEEP | VM_FIRSTFIT);
40030e7468fSPeter Dunlap 	if (result == NULL) {
40130e7468fSPeter Dunlap 		mutex_enter(&vmr_pool->ivmp_mutex);
40230e7468fSPeter Dunlap 		chunk_len = vmr_pool->ivmp_chunksize;
40330e7468fSPeter Dunlap 		if ((vmr_pool->ivmp_total_size + chunk_len) >
40430e7468fSPeter Dunlap 		    vmr_pool->ivmp_max_total_size) {
40530e7468fSPeter Dunlap 			/*
40630e7468fSPeter Dunlap 			 * Don't go over the pool size limit.  We can allocate
40730e7468fSPeter Dunlap 			 * partial chunks so it's not always the case that
40830e7468fSPeter Dunlap 			 * current_size + chunk_size == max_total_size
40930e7468fSPeter Dunlap 			 */
41030e7468fSPeter Dunlap 			if (vmr_pool->ivmp_total_size >=
41130e7468fSPeter Dunlap 			    vmr_pool->ivmp_max_total_size) {
41230e7468fSPeter Dunlap 				mutex_exit(&vmr_pool->ivmp_mutex);
41330e7468fSPeter Dunlap 				return (NULL);
41430e7468fSPeter Dunlap 			} else {
41530e7468fSPeter Dunlap 				chunk_len = vmr_pool->ivmp_max_total_size -
41630e7468fSPeter Dunlap 				    vmr_pool->ivmp_total_size;
41730e7468fSPeter Dunlap 			}
41830e7468fSPeter Dunlap 		}
41930e7468fSPeter Dunlap 		next_chunk = iser_vmem_chunk_alloc(vmr_pool->ivmp_hca,
42030e7468fSPeter Dunlap 		    chunk_len, vmr_pool->ivmp_mr_flags | IBT_MR_NOSLEEP);
42130e7468fSPeter Dunlap 		if (next_chunk != NULL) {
42230e7468fSPeter Dunlap 			if (vmem_add(vmr_pool->ivmp_vmem,
42330e7468fSPeter Dunlap 			    (void *)(uintptr_t)next_chunk->is_mrva,
42430e7468fSPeter Dunlap 			    next_chunk->is_mrlen, VM_NOSLEEP) == NULL) {
42530e7468fSPeter Dunlap 				/* Free the chunk we just allocated */
42630e7468fSPeter Dunlap 				iser_vmem_chunk_free(vmr_pool->ivmp_hca,
42730e7468fSPeter Dunlap 				    next_chunk);
42830e7468fSPeter Dunlap 			} else {
42930e7468fSPeter Dunlap 				vmr_pool->ivmp_total_size +=
43030e7468fSPeter Dunlap 				    next_chunk->is_mrlen;
43130e7468fSPeter Dunlap 				avl_add(&vmr_pool->ivmp_mr_list, next_chunk);
43230e7468fSPeter Dunlap 			}
43330e7468fSPeter Dunlap 
43430e7468fSPeter Dunlap 			result = vmem_alloc(vmr_pool->ivmp_vmem, size,
43530e7468fSPeter Dunlap 			    VM_NOSLEEP | VM_FIRSTFIT);
43630e7468fSPeter Dunlap 		}
43730e7468fSPeter Dunlap 
43830e7468fSPeter Dunlap 		mutex_exit(&vmr_pool->ivmp_mutex);
43930e7468fSPeter Dunlap 	}
44030e7468fSPeter Dunlap 
44130e7468fSPeter Dunlap 	return (result);
44230e7468fSPeter Dunlap }
44330e7468fSPeter Dunlap 
44430e7468fSPeter Dunlap 
44530e7468fSPeter Dunlap void
iser_vmem_free(iser_vmem_mr_pool_t * vmr_pool,void * vaddr,size_t size)44630e7468fSPeter Dunlap iser_vmem_free(iser_vmem_mr_pool_t *vmr_pool, void *vaddr, size_t size)
44730e7468fSPeter Dunlap {
44830e7468fSPeter Dunlap 	vmem_free(vmr_pool->ivmp_vmem, vaddr, size);
44930e7468fSPeter Dunlap }
45030e7468fSPeter Dunlap 
45130e7468fSPeter Dunlap idm_status_t
iser_vmem_mr(iser_vmem_mr_pool_t * vmr_pool,void * vaddr,size_t size,iser_mr_t * mr)45230e7468fSPeter Dunlap iser_vmem_mr(iser_vmem_mr_pool_t *vmr_pool, void *vaddr, size_t size,
45330e7468fSPeter Dunlap     iser_mr_t *mr)
45430e7468fSPeter Dunlap {
45530e7468fSPeter Dunlap 	avl_index_t	where;
45630e7468fSPeter Dunlap 	ib_vaddr_t	mrva = (ib_vaddr_t)(uintptr_t)vaddr;
45730e7468fSPeter Dunlap 	iser_mr_t	search_chunk;
45830e7468fSPeter Dunlap 	iser_mr_t	*nearest_chunk;
45930e7468fSPeter Dunlap 	ib_vaddr_t	chunk_end;
46030e7468fSPeter Dunlap 
46130e7468fSPeter Dunlap 	mutex_enter(&vmr_pool->ivmp_mutex);
46230e7468fSPeter Dunlap 	search_chunk.is_mrva = mrva;
46330e7468fSPeter Dunlap 	nearest_chunk = avl_find(&vmr_pool->ivmp_mr_list, &search_chunk,
46430e7468fSPeter Dunlap 	    &where);
46530e7468fSPeter Dunlap 	if (nearest_chunk == NULL) {
46630e7468fSPeter Dunlap 		nearest_chunk = avl_nearest(&vmr_pool->ivmp_mr_list, where,
46730e7468fSPeter Dunlap 		    AVL_BEFORE);
46830e7468fSPeter Dunlap 		if (nearest_chunk == NULL) {
46930e7468fSPeter Dunlap 			mutex_exit(&vmr_pool->ivmp_mutex);
47030e7468fSPeter Dunlap 			return (IDM_STATUS_FAIL);
47130e7468fSPeter Dunlap 		}
47230e7468fSPeter Dunlap 	}
47330e7468fSPeter Dunlap 
47430e7468fSPeter Dunlap 	/* See if this chunk contains the specified address range */
47530e7468fSPeter Dunlap 	ASSERT(nearest_chunk->is_mrva <= mrva);
47630e7468fSPeter Dunlap 	chunk_end = nearest_chunk->is_mrva + nearest_chunk->is_mrlen;
47730e7468fSPeter Dunlap 	if (chunk_end >= mrva + size) {
47830e7468fSPeter Dunlap 		/* Yes, this chunk contains the address range */
47930e7468fSPeter Dunlap 		mr->is_mrhdl = nearest_chunk->is_mrhdl;
48030e7468fSPeter Dunlap 		mr->is_mrva = mrva;
48130e7468fSPeter Dunlap 		mr->is_mrlen = size;
48230e7468fSPeter Dunlap 		mr->is_mrlkey = nearest_chunk->is_mrlkey;
48330e7468fSPeter Dunlap 		mr->is_mrrkey = nearest_chunk->is_mrrkey;
48430e7468fSPeter Dunlap 		mutex_exit(&vmr_pool->ivmp_mutex);
48530e7468fSPeter Dunlap 		return (IDM_STATUS_SUCCESS);
48630e7468fSPeter Dunlap 	}
48730e7468fSPeter Dunlap 	mutex_exit(&vmr_pool->ivmp_mutex);
48830e7468fSPeter Dunlap 
48930e7468fSPeter Dunlap 	return (IDM_STATUS_FAIL);
49030e7468fSPeter Dunlap }
49130e7468fSPeter Dunlap 
49230e7468fSPeter Dunlap static iser_mr_t *
iser_vmem_chunk_alloc(iser_hca_t * hca,ib_memlen_t chunksize,ibt_mr_flags_t mr_flags)49330e7468fSPeter Dunlap iser_vmem_chunk_alloc(iser_hca_t *hca, ib_memlen_t chunksize,
49430e7468fSPeter Dunlap     ibt_mr_flags_t mr_flags)
49530e7468fSPeter Dunlap {
49630e7468fSPeter Dunlap 	void		*chunk = NULL;
49730e7468fSPeter Dunlap 	iser_mr_t	*result = NULL;
49830e7468fSPeter Dunlap 	int		km_flags = 0;
49930e7468fSPeter Dunlap 
50030e7468fSPeter Dunlap 	if (mr_flags & IBT_MR_NOSLEEP)
50130e7468fSPeter Dunlap 		km_flags |= KM_NOSLEEP;
50230e7468fSPeter Dunlap 
50330e7468fSPeter Dunlap 	while ((chunk == NULL) && (chunksize >= ISER_MIN_CHUNKSIZE)) {
50430e7468fSPeter Dunlap 		chunk = kmem_alloc(chunksize, km_flags);
50530e7468fSPeter Dunlap 		if (chunk == NULL) {
50630e7468fSPeter Dunlap 			ISER_LOG(CE_NOTE, "iser_vmem_chunk_alloc: "
50730e7468fSPeter Dunlap 			    "chunk alloc of %d failed, trying %d",
50830e7468fSPeter Dunlap 			    (int)chunksize, (int)(chunksize / 2));
50930e7468fSPeter Dunlap 			chunksize /= 2;
51030e7468fSPeter Dunlap 		} else {
51130e7468fSPeter Dunlap 			ISER_LOG(CE_NOTE, "iser_vmem_chunk_alloc: "
51230e7468fSPeter Dunlap 			    "New chunk %p size %d", chunk, (int)chunksize);
51330e7468fSPeter Dunlap 		}
51430e7468fSPeter Dunlap 	}
51530e7468fSPeter Dunlap 
51630e7468fSPeter Dunlap 	if (chunk != NULL) {
51730e7468fSPeter Dunlap 		result = iser_reg_mem(hca, (ib_vaddr_t)(uintptr_t)chunk,
51830e7468fSPeter Dunlap 		    chunksize, mr_flags);
51930e7468fSPeter Dunlap 		if (result == NULL) {
52030e7468fSPeter Dunlap 			ISER_LOG(CE_NOTE, "iser_vmem_chunk_alloc: "
52130e7468fSPeter Dunlap 			    "Chunk registration failed");
52230e7468fSPeter Dunlap 			kmem_free(chunk, chunksize);
52330e7468fSPeter Dunlap 		}
52430e7468fSPeter Dunlap 	}
52530e7468fSPeter Dunlap 
52630e7468fSPeter Dunlap 	return (result);
52730e7468fSPeter Dunlap }
52830e7468fSPeter Dunlap 
52930e7468fSPeter Dunlap static void
iser_vmem_chunk_free(iser_hca_t * hca,iser_mr_t * iser_mr)53030e7468fSPeter Dunlap iser_vmem_chunk_free(iser_hca_t *hca, iser_mr_t *iser_mr)
53130e7468fSPeter Dunlap {
53230e7468fSPeter Dunlap 	void		*chunk		= (void *)(uintptr_t)iser_mr->is_mrva;
53330e7468fSPeter Dunlap 	ib_memlen_t	chunksize	= iser_mr->is_mrlen;
53430e7468fSPeter Dunlap 
53530e7468fSPeter Dunlap 	iser_dereg_mem(hca, iser_mr);
53630e7468fSPeter Dunlap 
53730e7468fSPeter Dunlap 	kmem_free(chunk, chunksize);
53830e7468fSPeter Dunlap }
53930e7468fSPeter Dunlap 
54030e7468fSPeter Dunlap iser_mr_t *
iser_reg_mem(iser_hca_t * hca,ib_vaddr_t vaddr,ib_memlen_t len,ibt_mr_flags_t mr_flags)54130e7468fSPeter Dunlap iser_reg_mem(iser_hca_t *hca, ib_vaddr_t vaddr, ib_memlen_t len,
54230e7468fSPeter Dunlap     ibt_mr_flags_t mr_flags)
54330e7468fSPeter Dunlap {
54430e7468fSPeter Dunlap 	iser_mr_t	*result = NULL;
54530e7468fSPeter Dunlap 	ibt_mr_attr_t   mr_attr;
54630e7468fSPeter Dunlap 	ibt_mr_desc_t	mr_desc;
54730e7468fSPeter Dunlap 	ibt_status_t	status;
54830e7468fSPeter Dunlap 	int		km_flags = 0;
54930e7468fSPeter Dunlap 
55030e7468fSPeter Dunlap 	if (mr_flags & IBT_MR_NOSLEEP)
55130e7468fSPeter Dunlap 		mr_flags |= KM_NOSLEEP;
55230e7468fSPeter Dunlap 
55330e7468fSPeter Dunlap 	result = (iser_mr_t *)kmem_zalloc(sizeof (iser_mr_t), km_flags);
55430e7468fSPeter Dunlap 	if (result == NULL) {
55530e7468fSPeter Dunlap 		ISER_LOG(CE_NOTE, "iser_reg_mem: failed to allocate "
55630e7468fSPeter Dunlap 		    "memory for iser_mr handle");
55730e7468fSPeter Dunlap 		return (NULL);
55830e7468fSPeter Dunlap 	}
55930e7468fSPeter Dunlap 
56030e7468fSPeter Dunlap 	bzero(&mr_attr, sizeof (ibt_mr_attr_t));
56130e7468fSPeter Dunlap 	bzero(&mr_desc, sizeof (ibt_mr_desc_t));
56230e7468fSPeter Dunlap 
56330e7468fSPeter Dunlap 	mr_attr.mr_vaddr	= vaddr;
56430e7468fSPeter Dunlap 	mr_attr.mr_len		= len;
56530e7468fSPeter Dunlap 	mr_attr.mr_as		= NULL;
56630e7468fSPeter Dunlap 	mr_attr.mr_flags	= mr_flags;
56730e7468fSPeter Dunlap 
56830e7468fSPeter Dunlap 	status = ibt_register_mr(hca->hca_hdl, hca->hca_pdhdl, &mr_attr,
56930e7468fSPeter Dunlap 	    &result->is_mrhdl, &mr_desc);
57030e7468fSPeter Dunlap 	if (status != IBT_SUCCESS) {
57130e7468fSPeter Dunlap 		ISER_LOG(CE_NOTE, "iser_reg_mem: ibt_register_mr "
57230e7468fSPeter Dunlap 		    "failure (%d)", status);
57330e7468fSPeter Dunlap 		kmem_free(result, sizeof (iser_mr_t));
57430e7468fSPeter Dunlap 		return (NULL);
57530e7468fSPeter Dunlap 	}
57630e7468fSPeter Dunlap 
57730e7468fSPeter Dunlap 	result->is_mrva		= mr_attr.mr_vaddr;
57830e7468fSPeter Dunlap 	result->is_mrlen	= mr_attr.mr_len;
57930e7468fSPeter Dunlap 	result->is_mrlkey	= mr_desc.md_lkey;
58030e7468fSPeter Dunlap 	result->is_mrrkey	= mr_desc.md_rkey;
58130e7468fSPeter Dunlap 
58230e7468fSPeter Dunlap 	return (result);
58330e7468fSPeter Dunlap }
58430e7468fSPeter Dunlap 
58530e7468fSPeter Dunlap void
iser_dereg_mem(iser_hca_t * hca,iser_mr_t * mr)58630e7468fSPeter Dunlap iser_dereg_mem(iser_hca_t *hca, iser_mr_t *mr)
58730e7468fSPeter Dunlap {
588*aedf2b3bSsrivijitha dugganapalli 	(void) ibt_deregister_mr(hca->hca_hdl, mr->is_mrhdl);
58930e7468fSPeter Dunlap 	kmem_free(mr, sizeof (iser_mr_t));
59030e7468fSPeter Dunlap }
59130e7468fSPeter Dunlap 
59230e7468fSPeter Dunlap static int
iser_vmem_mr_compare(const void * void_mr1,const void * void_mr2)59330e7468fSPeter Dunlap iser_vmem_mr_compare(const void *void_mr1, const void *void_mr2)
59430e7468fSPeter Dunlap {
59530e7468fSPeter Dunlap 	iser_mr_t *mr1 = (iser_mr_t *)void_mr1;
59630e7468fSPeter Dunlap 	iser_mr_t *mr2 = (iser_mr_t *)void_mr2;
59730e7468fSPeter Dunlap 
59830e7468fSPeter Dunlap 	/* Sort memory chunks by their virtual address */
59930e7468fSPeter Dunlap 	if (mr1->is_mrva < mr2->is_mrva)
60030e7468fSPeter Dunlap 		return (-1);
60130e7468fSPeter Dunlap 	else if (mr1->is_mrva > mr2->is_mrva)
60230e7468fSPeter Dunlap 		return (1);
60330e7468fSPeter Dunlap 
60430e7468fSPeter Dunlap 	return (0);
60530e7468fSPeter Dunlap }
606