1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
24  */
25 
26 /*
27  * tavor_mr.c
28  *    Tavor Memory Region/Window Routines
29  *
30  *    Implements all the routines necessary to provide the requisite memory
31  *    registration verbs.  These include operations like RegisterMemRegion(),
32  *    DeregisterMemRegion(), ReregisterMemRegion, RegisterSharedMemRegion,
33  *    etc., that affect Memory Regions.  It also includes the verbs that
34  *    affect Memory Windows, including AllocMemWindow(), FreeMemWindow(),
35  *    and QueryMemWindow().
36  */
37 
38 #include <sys/types.h>
39 #include <sys/conf.h>
40 #include <sys/ddi.h>
41 #include <sys/sunddi.h>
42 #include <sys/modctl.h>
43 #include <sys/esunddi.h>
44 
45 #include <sys/ib/adapters/tavor/tavor.h>
46 
47 
48 /*
49  * Used by tavor_mr_keycalc() below to fill in the "unconstrained" portion
50  * of Tavor memory keys (LKeys and RKeys)
51  */
52 static uint_t tavor_debug_memkey_cnt = 0x00000000;
53 
54 static int tavor_mr_common_reg(tavor_state_t *state, tavor_pdhdl_t pd,
55     tavor_bind_info_t *bind, tavor_mrhdl_t *mrhdl, tavor_mr_options_t *op);
56 static int tavor_mr_common_rereg(tavor_state_t *state, tavor_mrhdl_t mr,
57     tavor_pdhdl_t pd, tavor_bind_info_t *bind, tavor_mrhdl_t *mrhdl_new,
58     tavor_mr_options_t *op);
59 static int tavor_mr_rereg_xlat_helper(tavor_state_t *state, tavor_mrhdl_t mr,
60     tavor_bind_info_t *bind, tavor_mr_options_t *op, uint64_t *mtt_addr,
61     uint_t sleep, uint_t *dereg_level);
62 static uint64_t tavor_mr_nummtt_needed(tavor_state_t *state,
63     tavor_bind_info_t *bind, uint_t *mtt_pgsize);
64 static int tavor_mr_mem_bind(tavor_state_t *state, tavor_bind_info_t *bind,
65     ddi_dma_handle_t dmahdl, uint_t sleep);
66 static void tavor_mr_mem_unbind(tavor_state_t *state,
67     tavor_bind_info_t *bind);
68 static int tavor_mr_fast_mtt_write(tavor_rsrc_t *mtt, tavor_bind_info_t *bind,
69     uint32_t mtt_pgsize_bits);
70 static int tavor_mtt_refcnt_inc(tavor_rsrc_t *rsrc);
71 static int tavor_mtt_refcnt_dec(tavor_rsrc_t *rsrc);
72 
73 /*
74  * The Tavor umem_lockmemory() callback ops.  When userland memory is
75  * registered, these callback ops are specified.  The tavor_umap_umemlock_cb()
76  * callback will be called whenever the memory for the corresponding
77  * ddi_umem_cookie_t is being freed.
78  */
79 static struct umem_callback_ops tavor_umem_cbops = {
80 	UMEM_CALLBACK_VERSION,
81 	tavor_umap_umemlock_cb,
82 };
83 
84 
85 /*
86  * tavor_mr_register()
87  *    Context: Can be called from interrupt or base context.
88  */
89 int
tavor_mr_register(tavor_state_t * state,tavor_pdhdl_t pd,ibt_mr_attr_t * mr_attr,tavor_mrhdl_t * mrhdl,tavor_mr_options_t * op)90 tavor_mr_register(tavor_state_t *state, tavor_pdhdl_t pd,
91     ibt_mr_attr_t *mr_attr, tavor_mrhdl_t *mrhdl, tavor_mr_options_t *op)
92 {
93 	tavor_bind_info_t	bind;
94 	int			status;
95 
96 	/*
97 	 * Fill in the "bind" struct.  This struct provides the majority
98 	 * of the information that will be used to distinguish between an
99 	 * "addr" binding (as is the case here) and a "buf" binding (see
100 	 * below).  The "bind" struct is later passed to tavor_mr_mem_bind()
101 	 * which does most of the "heavy lifting" for the Tavor memory
102 	 * registration routines.
103 	 */
104 	bind.bi_type  = TAVOR_BINDHDL_VADDR;
105 	bind.bi_addr  = mr_attr->mr_vaddr;
106 	bind.bi_len   = mr_attr->mr_len;
107 	bind.bi_as    = mr_attr->mr_as;
108 	bind.bi_flags = mr_attr->mr_flags;
109 	status = tavor_mr_common_reg(state, pd, &bind, mrhdl, op);
110 
111 	return (status);
112 }
113 
114 
115 /*
116  * tavor_mr_register_buf()
117  *    Context: Can be called from interrupt or base context.
118  */
119 int
tavor_mr_register_buf(tavor_state_t * state,tavor_pdhdl_t pd,ibt_smr_attr_t * mr_attr,struct buf * buf,tavor_mrhdl_t * mrhdl,tavor_mr_options_t * op)120 tavor_mr_register_buf(tavor_state_t *state, tavor_pdhdl_t pd,
121     ibt_smr_attr_t *mr_attr, struct buf *buf, tavor_mrhdl_t *mrhdl,
122     tavor_mr_options_t *op)
123 {
124 	tavor_bind_info_t	bind;
125 	int			status;
126 
127 	/*
128 	 * Fill in the "bind" struct.  This struct provides the majority
129 	 * of the information that will be used to distinguish between an
130 	 * "addr" binding (see above) and a "buf" binding (as is the case
131 	 * here).  The "bind" struct is later passed to tavor_mr_mem_bind()
132 	 * which does most of the "heavy lifting" for the Tavor memory
133 	 * registration routines.  Note: We have chosen to provide
134 	 * "b_un.b_addr" as the IB address (when the IBT_MR_PHYS_IOVA flag is
135 	 * not set).  It is not critical what value we choose here as it need
136 	 * only be unique for the given RKey (which will happen by default),
137 	 * so the choice here is somewhat arbitrary.
138 	 */
139 	bind.bi_type  = TAVOR_BINDHDL_BUF;
140 	bind.bi_buf   = buf;
141 	if (mr_attr->mr_flags & IBT_MR_PHYS_IOVA) {
142 		bind.bi_addr  = mr_attr->mr_vaddr;
143 	} else {
144 		bind.bi_addr  = (uint64_t)(uintptr_t)buf->b_un.b_addr;
145 	}
146 	bind.bi_as    = NULL;
147 	bind.bi_len   = (uint64_t)buf->b_bcount;
148 	bind.bi_flags = mr_attr->mr_flags;
149 	status = tavor_mr_common_reg(state, pd, &bind, mrhdl, op);
150 
151 	return (status);
152 }
153 
154 
155 /*
156  * tavor_mr_register_shared()
157  *    Context: Can be called from interrupt or base context.
158  */
159 int
tavor_mr_register_shared(tavor_state_t * state,tavor_mrhdl_t mrhdl,tavor_pdhdl_t pd,ibt_smr_attr_t * mr_attr,tavor_mrhdl_t * mrhdl_new)160 tavor_mr_register_shared(tavor_state_t *state, tavor_mrhdl_t mrhdl,
161     tavor_pdhdl_t pd, ibt_smr_attr_t *mr_attr, tavor_mrhdl_t *mrhdl_new)
162 {
163 	tavor_rsrc_pool_info_t	*rsrc_pool;
164 	tavor_rsrc_t		*mpt, *mtt, *rsrc;
165 	tavor_umap_db_entry_t	*umapdb;
166 	tavor_hw_mpt_t		mpt_entry;
167 	tavor_mrhdl_t		mr;
168 	tavor_bind_info_t	*bind;
169 	ddi_umem_cookie_t	umem_cookie;
170 	size_t			umem_len;
171 	caddr_t			umem_addr;
172 	uint64_t		mtt_addr, mtt_ddrbaseaddr, pgsize_msk;
173 	uint_t			sleep, mr_is_umem;
174 	int			status, umem_flags;
175 
176 	/*
177 	 * Check the sleep flag.  Ensure that it is consistent with the
178 	 * current thread context (i.e. if we are currently in the interrupt
179 	 * context, then we shouldn't be attempting to sleep).
180 	 */
181 	sleep = (mr_attr->mr_flags & IBT_MR_NOSLEEP) ? TAVOR_NOSLEEP :
182 	    TAVOR_SLEEP;
183 	if ((sleep == TAVOR_SLEEP) &&
184 	    (sleep != TAVOR_SLEEPFLAG_FOR_CONTEXT())) {
185 		goto mrshared_fail;
186 	}
187 
188 	/* Increment the reference count on the protection domain (PD) */
189 	tavor_pd_refcnt_inc(pd);
190 
191 	/*
192 	 * Allocate an MPT entry.  This will be filled in with all the
193 	 * necessary parameters to define the shared memory region.
194 	 * Specifically, it will be made to reference the currently existing
195 	 * MTT entries and ownership of the MPT will be passed to the hardware
196 	 * in the last step below.  If we fail here, we must undo the
197 	 * protection domain reference count.
198 	 */
199 	status = tavor_rsrc_alloc(state, TAVOR_MPT, 1, sleep, &mpt);
200 	if (status != DDI_SUCCESS) {
201 		goto mrshared_fail1;
202 	}
203 
204 	/*
205 	 * Allocate the software structure for tracking the shared memory
206 	 * region (i.e. the Tavor Memory Region handle).  If we fail here, we
207 	 * must undo the protection domain reference count and the previous
208 	 * resource allocation.
209 	 */
210 	status = tavor_rsrc_alloc(state, TAVOR_MRHDL, 1, sleep, &rsrc);
211 	if (status != DDI_SUCCESS) {
212 		goto mrshared_fail2;
213 	}
214 	mr = (tavor_mrhdl_t)rsrc->tr_addr;
215 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr))
216 
217 	/*
218 	 * Setup and validate the memory region access flags.  This means
219 	 * translating the IBTF's enable flags into the access flags that
220 	 * will be used in later operations.
221 	 */
222 	mr->mr_accflag = 0;
223 	if (mr_attr->mr_flags & IBT_MR_ENABLE_WINDOW_BIND)
224 		mr->mr_accflag |= IBT_MR_WINDOW_BIND;
225 	if (mr_attr->mr_flags & IBT_MR_ENABLE_LOCAL_WRITE)
226 		mr->mr_accflag |= IBT_MR_LOCAL_WRITE;
227 	if (mr_attr->mr_flags & IBT_MR_ENABLE_REMOTE_READ)
228 		mr->mr_accflag |= IBT_MR_REMOTE_READ;
229 	if (mr_attr->mr_flags & IBT_MR_ENABLE_REMOTE_WRITE)
230 		mr->mr_accflag |= IBT_MR_REMOTE_WRITE;
231 	if (mr_attr->mr_flags & IBT_MR_ENABLE_REMOTE_ATOMIC)
232 		mr->mr_accflag |= IBT_MR_REMOTE_ATOMIC;
233 
234 	/*
235 	 * Calculate keys (Lkey, Rkey) from MPT index.  Each key is formed
236 	 * from a certain number of "constrained" bits (the least significant
237 	 * bits) and some number of "unconstrained" bits.  The constrained
238 	 * bits must be set to the index of the entry in the MPT table, but
239 	 * the unconstrained bits can be set to any value we wish.  Note:
240 	 * if no remote access is required, then the RKey value is not filled
241 	 * in.  Otherwise both Rkey and LKey are given the same value.
242 	 */
243 	tavor_mr_keycalc(state, mpt->tr_indx, &mr->mr_lkey);
244 	if ((mr->mr_accflag & IBT_MR_REMOTE_READ) ||
245 	    (mr->mr_accflag & IBT_MR_REMOTE_WRITE) ||
246 	    (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC)) {
247 		mr->mr_rkey = mr->mr_lkey;
248 	}
249 
250 	/* Grab the MR lock for the current memory region */
251 	mutex_enter(&mrhdl->mr_lock);
252 
253 	/*
254 	 * Check here to see if the memory region has already been partially
255 	 * deregistered as a result of a tavor_umap_umemlock_cb() callback.
256 	 * If so, this is an error, return failure.
257 	 */
258 	if ((mrhdl->mr_is_umem) && (mrhdl->mr_umemcookie == NULL)) {
259 		mutex_exit(&mrhdl->mr_lock);
260 		goto mrshared_fail3;
261 	}
262 
263 	/*
264 	 * Determine if the original memory was from userland and, if so, pin
265 	 * the pages (again) with umem_lockmemory().  This will guarantee a
266 	 * separate callback for each of this shared region's MR handles.
267 	 * If this is userland memory, then allocate an entry in the
268 	 * "userland resources database".  This will later be added to
269 	 * the database (after all further memory registration operations are
270 	 * successful).  If we fail here, we must undo all the above setup.
271 	 */
272 	mr_is_umem = mrhdl->mr_is_umem;
273 	if (mr_is_umem) {
274 		umem_len   = ptob(btopr(mrhdl->mr_bindinfo.bi_len +
275 		    ((uintptr_t)mrhdl->mr_bindinfo.bi_addr & PAGEOFFSET)));
276 		umem_addr  = (caddr_t)((uintptr_t)mrhdl->mr_bindinfo.bi_addr &
277 		    ~PAGEOFFSET);
278 		umem_flags = (DDI_UMEMLOCK_WRITE | DDI_UMEMLOCK_READ |
279 		    DDI_UMEMLOCK_LONGTERM);
280 		status = umem_lockmemory(umem_addr, umem_len, umem_flags,
281 		    &umem_cookie, &tavor_umem_cbops, NULL);
282 		if (status != 0) {
283 			mutex_exit(&mrhdl->mr_lock);
284 			goto mrshared_fail3;
285 		}
286 
287 		umapdb = tavor_umap_db_alloc(state->ts_instance,
288 		    (uint64_t)(uintptr_t)umem_cookie, MLNX_UMAP_MRMEM_RSRC,
289 		    (uint64_t)(uintptr_t)rsrc);
290 		if (umapdb == NULL) {
291 			mutex_exit(&mrhdl->mr_lock);
292 			goto mrshared_fail4;
293 		}
294 	}
295 
296 	/*
297 	 * Copy the MTT resource pointer (and additional parameters) from
298 	 * the original Tavor Memory Region handle.  Note: this is normally
299 	 * where the tavor_mr_mem_bind() routine would be called, but because
300 	 * we already have bound and filled-in MTT entries it is simply a
301 	 * matter here of managing the MTT reference count and grabbing the
302 	 * address of the MTT table entries (for filling in the shared region's
303 	 * MPT entry).
304 	 */
305 	mr->mr_mttrsrcp	  = mrhdl->mr_mttrsrcp;
306 	mr->mr_logmttpgsz = mrhdl->mr_logmttpgsz;
307 	mr->mr_bindinfo	  = mrhdl->mr_bindinfo;
308 	mr->mr_mttrefcntp = mrhdl->mr_mttrefcntp;
309 	mutex_exit(&mrhdl->mr_lock);
310 	bind = &mr->mr_bindinfo;
311 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind))
312 	mtt = mr->mr_mttrsrcp;
313 
314 	/*
315 	 * Increment the MTT reference count (to reflect the fact that
316 	 * the MTT is now shared)
317 	 */
318 	(void) tavor_mtt_refcnt_inc(mr->mr_mttrefcntp);
319 
320 	/*
321 	 * Update the new "bind" virtual address.  Do some extra work here
322 	 * to ensure proper alignment.  That is, make sure that the page
323 	 * offset for the beginning of the old range is the same as the
324 	 * offset for this new mapping
325 	 */
326 	pgsize_msk = (((uint64_t)1 << mr->mr_logmttpgsz) - 1);
327 	bind->bi_addr = ((mr_attr->mr_vaddr & ~pgsize_msk) |
328 	    (mr->mr_bindinfo.bi_addr & pgsize_msk));
329 
330 	/*
331 	 * Get the base address for the MTT table.  This will be necessary
332 	 * in the next step when we are setting up the MPT entry.
333 	 */
334 	rsrc_pool = &state->ts_rsrc_hdl[TAVOR_MTT];
335 	mtt_ddrbaseaddr = (uint64_t)(uintptr_t)rsrc_pool->rsrc_ddr_offset;
336 
337 	/*
338 	 * Fill in the MPT entry.  This is the final step before passing
339 	 * ownership of the MPT entry to the Tavor hardware.  We use all of
340 	 * the information collected/calculated above to fill in the
341 	 * requisite portions of the MPT.
342 	 */
343 	bzero(&mpt_entry, sizeof (tavor_hw_mpt_t));
344 	mpt_entry.m_io	  = TAVOR_MEM_CYCLE_GENERATE;
345 	mpt_entry.en_bind = (mr->mr_accflag & IBT_MR_WINDOW_BIND)   ? 1 : 0;
346 	mpt_entry.atomic  = (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC) ? 1 : 0;
347 	mpt_entry.rw	  = (mr->mr_accflag & IBT_MR_REMOTE_WRITE)  ? 1 : 0;
348 	mpt_entry.rr	  = (mr->mr_accflag & IBT_MR_REMOTE_READ)   ? 1 : 0;
349 	mpt_entry.lw	  = (mr->mr_accflag & IBT_MR_LOCAL_WRITE)   ? 1 : 0;
350 	mpt_entry.lr	  = 1;
351 	mpt_entry.reg_win = TAVOR_MPT_IS_REGION;
352 	mpt_entry.page_sz	= mr->mr_logmttpgsz - 0xC;
353 	mpt_entry.mem_key	= mr->mr_lkey;
354 	mpt_entry.pd		= pd->pd_pdnum;
355 	mpt_entry.start_addr	= bind->bi_addr;
356 	mpt_entry.reg_win_len	= bind->bi_len;
357 	mpt_entry.win_cnt_limit	= TAVOR_UNLIMITED_WIN_BIND;
358 	mtt_addr = mtt_ddrbaseaddr + (mtt->tr_indx << TAVOR_MTT_SIZE_SHIFT);
359 	mpt_entry.mttseg_addr_h = mtt_addr >> 32;
360 	mpt_entry.mttseg_addr_l = mtt_addr >> 6;
361 
362 	/*
363 	 * Write the MPT entry to hardware.  Lastly, we pass ownership of
364 	 * the entry to the hardware.  Note: in general, this operation
365 	 * shouldn't fail.  But if it does, we have to undo everything we've
366 	 * done above before returning error.
367 	 */
368 	status = tavor_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry,
369 	    sizeof (tavor_hw_mpt_t), mpt->tr_indx, sleep);
370 	if (status != TAVOR_CMD_SUCCESS) {
371 		cmn_err(CE_CONT, "Tavor: SW2HW_MPT command failed: %08x\n",
372 		    status);
373 		goto mrshared_fail5;
374 	}
375 
376 	/*
377 	 * Fill in the rest of the Tavor Memory Region handle.  Having
378 	 * successfully transferred ownership of the MPT, we can update the
379 	 * following fields for use in further operations on the MR.
380 	 */
381 	mr->mr_mptrsrcp	  = mpt;
382 	mr->mr_mttrsrcp	  = mtt;
383 	mr->mr_pdhdl	  = pd;
384 	mr->mr_rsrcp	  = rsrc;
385 	mr->mr_is_umem	  = mr_is_umem;
386 	mr->mr_umemcookie = (mr_is_umem != 0) ? umem_cookie : NULL;
387 	mr->mr_umem_cbfunc = NULL;
388 	mr->mr_umem_cbarg1 = NULL;
389 	mr->mr_umem_cbarg2 = NULL;
390 
391 	/*
392 	 * If this is userland memory, then we need to insert the previously
393 	 * allocated entry into the "userland resources database".  This will
394 	 * allow for later coordination between the tavor_umap_umemlock_cb()
395 	 * callback and tavor_mr_deregister().
396 	 */
397 	if (mr_is_umem) {
398 		tavor_umap_db_add(umapdb);
399 	}
400 
401 	*mrhdl_new = mr;
402 
403 	return (DDI_SUCCESS);
404 
405 /*
406  * The following is cleanup for all possible failure cases in this routine
407  */
408 mrshared_fail5:
409 	(void) tavor_mtt_refcnt_dec(mr->mr_mttrefcntp);
410 	if (mr_is_umem) {
411 		tavor_umap_db_free(umapdb);
412 	}
413 mrshared_fail4:
414 	if (mr_is_umem) {
415 		ddi_umem_unlock(umem_cookie);
416 	}
417 mrshared_fail3:
418 	tavor_rsrc_free(state, &rsrc);
419 mrshared_fail2:
420 	tavor_rsrc_free(state, &mpt);
421 mrshared_fail1:
422 	tavor_pd_refcnt_dec(pd);
423 mrshared_fail:
424 	return (status);
425 }
426 
427 
428 /*
429  * tavor_mr_deregister()
430  *    Context: Can be called from interrupt or base context.
431  */
432 /* ARGSUSED */
433 int
tavor_mr_deregister(tavor_state_t * state,tavor_mrhdl_t * mrhdl,uint_t level,uint_t sleep)434 tavor_mr_deregister(tavor_state_t *state, tavor_mrhdl_t *mrhdl, uint_t level,
435     uint_t sleep)
436 {
437 	tavor_rsrc_t		*mpt, *mtt, *rsrc, *mtt_refcnt;
438 	tavor_umap_db_entry_t	*umapdb;
439 	tavor_pdhdl_t		pd;
440 	tavor_mrhdl_t		mr;
441 	tavor_bind_info_t	*bind;
442 	uint64_t		value;
443 	int			status, shared_mtt;
444 
445 	/*
446 	 * Check the sleep flag.  Ensure that it is consistent with the
447 	 * current thread context (i.e. if we are currently in the interrupt
448 	 * context, then we shouldn't be attempting to sleep).
449 	 */
450 	if ((sleep == TAVOR_SLEEP) &&
451 	    (sleep != TAVOR_SLEEPFLAG_FOR_CONTEXT())) {
452 		return (status);
453 	}
454 
455 	/*
456 	 * Pull all the necessary information from the Tavor Memory Region
457 	 * handle.  This is necessary here because the resource for the
458 	 * MR handle is going to be freed up as part of the this
459 	 * deregistration
460 	 */
461 	mr	= *mrhdl;
462 	mutex_enter(&mr->mr_lock);
463 	mpt	= mr->mr_mptrsrcp;
464 	mtt	= mr->mr_mttrsrcp;
465 	mtt_refcnt = mr->mr_mttrefcntp;
466 	rsrc	= mr->mr_rsrcp;
467 	pd	= mr->mr_pdhdl;
468 	bind	= &mr->mr_bindinfo;
469 
470 	/*
471 	 * Check here to see if the memory region has already been partially
472 	 * deregistered as a result of the tavor_umap_umemlock_cb() callback.
473 	 * If so, then jump to the end and free the remaining resources.
474 	 */
475 	if ((mr->mr_is_umem) && (mr->mr_umemcookie == NULL)) {
476 		goto mrdereg_finish_cleanup;
477 	}
478 
479 	/*
480 	 * We must drop the "mr_lock" here to ensure that both SLEEP and
481 	 * NOSLEEP calls into the firmware work as expected.  Also, if two
482 	 * threads are attemping to access this MR (via de-register,
483 	 * re-register, or otherwise), then we allow the firmware to enforce
484 	 * the checking, that only one deregister is valid.
485 	 */
486 	mutex_exit(&mr->mr_lock);
487 
488 	/*
489 	 * Reclaim MPT entry from hardware (if necessary).  Since the
490 	 * tavor_mr_deregister() routine is used in the memory region
491 	 * reregistration process as well, it is possible that we will
492 	 * not always wish to reclaim ownership of the MPT.  Check the
493 	 * "level" arg and, if necessary, attempt to reclaim it.  If
494 	 * the ownership transfer fails for any reason, we check to see
495 	 * what command status was returned from the hardware.  The only
496 	 * "expected" error status is the one that indicates an attempt to
497 	 * deregister a memory region that has memory windows bound to it
498 	 */
499 	if (level >= TAVOR_MR_DEREG_ALL) {
500 		status = tavor_cmn_ownership_cmd_post(state, HW2SW_MPT,
501 		    NULL, 0, mpt->tr_indx, sleep);
502 		if (status != TAVOR_CMD_SUCCESS) {
503 			if (status == TAVOR_CMD_REG_BOUND) {
504 				return (IBT_MR_IN_USE);
505 			} else {
506 				cmn_err(CE_CONT, "Tavor: HW2SW_MPT command "
507 				    "failed: %08x\n", status);
508 				return (IBT_INVALID_PARAM);
509 			}
510 		}
511 	}
512 
513 	/*
514 	 * Re-grab the mr_lock here.  Since further access to the protected
515 	 * 'mr' structure is needed, and we would have returned previously for
516 	 * the multiple deregistration case, we can safely grab the lock here.
517 	 */
518 	mutex_enter(&mr->mr_lock);
519 
520 	/*
521 	 * If the memory had come from userland, then we do a lookup in the
522 	 * "userland resources database".  On success, we free the entry, call
523 	 * ddi_umem_unlock(), and continue the cleanup.  On failure (which is
524 	 * an indication that the umem_lockmemory() callback has called
525 	 * tavor_mr_deregister()), we call ddi_umem_unlock() and invalidate
526 	 * the "mr_umemcookie" field in the MR handle (this will be used
527 	 * later to detect that only partial cleaup still remains to be done
528 	 * on the MR handle).
529 	 */
530 	if (mr->mr_is_umem) {
531 		status = tavor_umap_db_find(state->ts_instance,
532 		    (uint64_t)(uintptr_t)mr->mr_umemcookie,
533 		    MLNX_UMAP_MRMEM_RSRC, &value, TAVOR_UMAP_DB_REMOVE,
534 		    &umapdb);
535 		if (status == DDI_SUCCESS) {
536 			tavor_umap_db_free(umapdb);
537 			ddi_umem_unlock(mr->mr_umemcookie);
538 		} else {
539 			ddi_umem_unlock(mr->mr_umemcookie);
540 			mr->mr_umemcookie = NULL;
541 		}
542 	}
543 
544 	/* mtt_refcnt is NULL in the case of tavor_dma_mr_register() */
545 	if (mtt_refcnt != NULL) {
546 		/*
547 		 * Decrement the MTT reference count.  Since the MTT resource
548 		 * may be shared between multiple memory regions (as a result
549 		 * of a "RegisterSharedMR" verb) it is important that we not
550 		 * free up or unbind resources prematurely.  If it's not shared
551 		 * (as indicated by the return status), then free the resource.
552 		 */
553 		shared_mtt = tavor_mtt_refcnt_dec(mtt_refcnt);
554 		if (!shared_mtt) {
555 			tavor_rsrc_free(state, &mtt_refcnt);
556 		}
557 
558 		/*
559 		 * Free up the MTT entries and unbind the memory.  Here,
560 		 * as above, we attempt to free these resources only if
561 		 * it is appropriate to do so.
562 		 */
563 		if (!shared_mtt) {
564 			if (level >= TAVOR_MR_DEREG_NO_HW2SW_MPT) {
565 				tavor_mr_mem_unbind(state, bind);
566 			}
567 			tavor_rsrc_free(state, &mtt);
568 		}
569 	}
570 
571 	/*
572 	 * If the MR handle has been invalidated, then drop the
573 	 * lock and return success.  Note: This only happens because
574 	 * the umem_lockmemory() callback has been triggered.  The
575 	 * cleanup here is partial, and further cleanup (in a
576 	 * subsequent tavor_mr_deregister() call) will be necessary.
577 	 */
578 	if ((mr->mr_is_umem) && (mr->mr_umemcookie == NULL)) {
579 		mutex_exit(&mr->mr_lock);
580 		return (DDI_SUCCESS);
581 	}
582 
583 mrdereg_finish_cleanup:
584 	mutex_exit(&mr->mr_lock);
585 
586 	/* Free the Tavor Memory Region handle */
587 	tavor_rsrc_free(state, &rsrc);
588 
589 	/* Free up the MPT entry resource */
590 	tavor_rsrc_free(state, &mpt);
591 
592 	/* Decrement the reference count on the protection domain (PD) */
593 	tavor_pd_refcnt_dec(pd);
594 
595 	/* Set the mrhdl pointer to NULL and return success */
596 	*mrhdl = NULL;
597 
598 	return (DDI_SUCCESS);
599 }
600 
601 
602 /*
603  * tavor_mr_query()
604  *    Context: Can be called from interrupt or base context.
605  */
606 /* ARGSUSED */
607 int
tavor_mr_query(tavor_state_t * state,tavor_mrhdl_t mr,ibt_mr_query_attr_t * attr)608 tavor_mr_query(tavor_state_t *state, tavor_mrhdl_t mr,
609     ibt_mr_query_attr_t *attr)
610 {
611 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*attr))
612 
613 	mutex_enter(&mr->mr_lock);
614 
615 	/*
616 	 * Check here to see if the memory region has already been partially
617 	 * deregistered as a result of a tavor_umap_umemlock_cb() callback.
618 	 * If so, this is an error, return failure.
619 	 */
620 	if ((mr->mr_is_umem) && (mr->mr_umemcookie == NULL)) {
621 		mutex_exit(&mr->mr_lock);
622 		return (IBT_MR_HDL_INVALID);
623 	}
624 
625 	/* Fill in the queried attributes */
626 	attr->mr_attr_flags = mr->mr_accflag;
627 	attr->mr_pd	= (ibt_pd_hdl_t)mr->mr_pdhdl;
628 
629 	/* Fill in the "local" attributes */
630 	attr->mr_lkey = (ibt_lkey_t)mr->mr_lkey;
631 	attr->mr_lbounds.pb_addr = (ib_vaddr_t)mr->mr_bindinfo.bi_addr;
632 	attr->mr_lbounds.pb_len  = (size_t)mr->mr_bindinfo.bi_len;
633 
634 	/*
635 	 * Fill in the "remote" attributes (if necessary).  Note: the
636 	 * remote attributes are only valid if the memory region has one
637 	 * or more of the remote access flags set.
638 	 */
639 	if ((mr->mr_accflag & IBT_MR_REMOTE_READ) ||
640 	    (mr->mr_accflag & IBT_MR_REMOTE_WRITE) ||
641 	    (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC)) {
642 		attr->mr_rkey = (ibt_rkey_t)mr->mr_rkey;
643 		attr->mr_rbounds.pb_addr = (ib_vaddr_t)mr->mr_bindinfo.bi_addr;
644 		attr->mr_rbounds.pb_len  = (size_t)mr->mr_bindinfo.bi_len;
645 	}
646 
647 	/*
648 	 * If region is mapped for streaming (i.e. noncoherent), then set sync
649 	 * is required
650 	 */
651 	attr->mr_sync_required = (mr->mr_bindinfo.bi_flags &
652 	    IBT_MR_NONCOHERENT) ? B_TRUE : B_FALSE;
653 
654 	mutex_exit(&mr->mr_lock);
655 	return (DDI_SUCCESS);
656 }
657 
658 
659 /*
660  * tavor_mr_reregister()
661  *    Context: Can be called from interrupt or base context.
662  */
663 int
tavor_mr_reregister(tavor_state_t * state,tavor_mrhdl_t mr,tavor_pdhdl_t pd,ibt_mr_attr_t * mr_attr,tavor_mrhdl_t * mrhdl_new,tavor_mr_options_t * op)664 tavor_mr_reregister(tavor_state_t *state, tavor_mrhdl_t mr,
665     tavor_pdhdl_t pd, ibt_mr_attr_t *mr_attr, tavor_mrhdl_t *mrhdl_new,
666     tavor_mr_options_t *op)
667 {
668 	tavor_bind_info_t	bind;
669 	int			status;
670 
671 	/*
672 	 * Fill in the "bind" struct.  This struct provides the majority
673 	 * of the information that will be used to distinguish between an
674 	 * "addr" binding (as is the case here) and a "buf" binding (see
675 	 * below).  The "bind" struct is later passed to tavor_mr_mem_bind()
676 	 * which does most of the "heavy lifting" for the Tavor memory
677 	 * registration (and reregistration) routines.
678 	 */
679 	bind.bi_type  = TAVOR_BINDHDL_VADDR;
680 	bind.bi_addr  = mr_attr->mr_vaddr;
681 	bind.bi_len   = mr_attr->mr_len;
682 	bind.bi_as    = mr_attr->mr_as;
683 	bind.bi_flags = mr_attr->mr_flags;
684 	status = tavor_mr_common_rereg(state, mr, pd, &bind, mrhdl_new, op);
685 
686 	return (status);
687 }
688 
689 
690 /*
691  * tavor_mr_reregister_buf()
692  *    Context: Can be called from interrupt or base context.
693  */
694 int
tavor_mr_reregister_buf(tavor_state_t * state,tavor_mrhdl_t mr,tavor_pdhdl_t pd,ibt_smr_attr_t * mr_attr,struct buf * buf,tavor_mrhdl_t * mrhdl_new,tavor_mr_options_t * op)695 tavor_mr_reregister_buf(tavor_state_t *state, tavor_mrhdl_t mr,
696     tavor_pdhdl_t pd, ibt_smr_attr_t *mr_attr, struct buf *buf,
697     tavor_mrhdl_t *mrhdl_new, tavor_mr_options_t *op)
698 {
699 	tavor_bind_info_t	bind;
700 	int			status;
701 
702 	/*
703 	 * Fill in the "bind" struct.  This struct provides the majority
704 	 * of the information that will be used to distinguish between an
705 	 * "addr" binding (see above) and a "buf" binding (as is the case
706 	 * here).  The "bind" struct is later passed to tavor_mr_mem_bind()
707 	 * which does most of the "heavy lifting" for the Tavor memory
708 	 * registration routines.  Note: We have chosen to provide
709 	 * "b_un.b_addr" as the IB address (when the IBT_MR_PHYS_IOVA flag is
710 	 * not set).  It is not critical what value we choose here as it need
711 	 * only be unique for the given RKey (which will happen by default),
712 	 * so the choice here is somewhat arbitrary.
713 	 */
714 	bind.bi_type  = TAVOR_BINDHDL_BUF;
715 	bind.bi_buf   = buf;
716 	if (mr_attr->mr_flags & IBT_MR_PHYS_IOVA) {
717 		bind.bi_addr  = mr_attr->mr_vaddr;
718 	} else {
719 		bind.bi_addr  = (uint64_t)(uintptr_t)buf->b_un.b_addr;
720 	}
721 	bind.bi_len   = (uint64_t)buf->b_bcount;
722 	bind.bi_flags = mr_attr->mr_flags;
723 	bind.bi_as = NULL;
724 	status = tavor_mr_common_rereg(state, mr, pd, &bind, mrhdl_new, op);
725 
726 	return (status);
727 }
728 
729 
730 /*
731  * tavor_mr_sync()
732  *    Context: Can be called from interrupt or base context.
733  */
734 /* ARGSUSED */
735 int
tavor_mr_sync(tavor_state_t * state,ibt_mr_sync_t * mr_segs,size_t num_segs)736 tavor_mr_sync(tavor_state_t *state, ibt_mr_sync_t *mr_segs, size_t num_segs)
737 {
738 	tavor_mrhdl_t		mrhdl;
739 	uint64_t		seg_vaddr, seg_len, seg_end;
740 	uint64_t		mr_start, mr_end;
741 	uint_t			type;
742 	int			status, i;
743 
744 	/* Process each of the ibt_mr_sync_t's */
745 	for (i = 0; i < num_segs; i++) {
746 		mrhdl = (tavor_mrhdl_t)mr_segs[i].ms_handle;
747 
748 		/* Check for valid memory region handle */
749 		if (mrhdl == NULL) {
750 			goto mrsync_fail;
751 		}
752 
753 		mutex_enter(&mrhdl->mr_lock);
754 
755 		/*
756 		 * Check here to see if the memory region has already been
757 		 * partially deregistered as a result of a
758 		 * tavor_umap_umemlock_cb() callback.  If so, this is an
759 		 * error, return failure.
760 		 */
761 		if ((mrhdl->mr_is_umem) && (mrhdl->mr_umemcookie == NULL)) {
762 			mutex_exit(&mrhdl->mr_lock);
763 			goto mrsync_fail;
764 		}
765 
766 		/* Check for valid bounds on sync request */
767 		seg_vaddr = mr_segs[i].ms_vaddr;
768 		seg_len	  = mr_segs[i].ms_len;
769 		seg_end	  = seg_vaddr + seg_len - 1;
770 		mr_start  = mrhdl->mr_bindinfo.bi_addr;
771 		mr_end	  = mr_start + mrhdl->mr_bindinfo.bi_len - 1;
772 		if ((seg_vaddr < mr_start) || (seg_vaddr > mr_end)) {
773 			mutex_exit(&mrhdl->mr_lock);
774 			goto mrsync_fail;
775 		}
776 		if ((seg_end < mr_start) || (seg_end > mr_end)) {
777 			mutex_exit(&mrhdl->mr_lock);
778 			goto mrsync_fail;
779 		}
780 
781 		/* Determine what type (i.e. direction) for sync */
782 		if (mr_segs[i].ms_flags & IBT_SYNC_READ) {
783 			type = DDI_DMA_SYNC_FORDEV;
784 		} else if (mr_segs[i].ms_flags & IBT_SYNC_WRITE) {
785 			type = DDI_DMA_SYNC_FORCPU;
786 		} else {
787 			mutex_exit(&mrhdl->mr_lock);
788 			goto mrsync_fail;
789 		}
790 
791 		(void) ddi_dma_sync(mrhdl->mr_bindinfo.bi_dmahdl,
792 		    (off_t)(seg_vaddr - mr_start), (size_t)seg_len, type);
793 		mutex_exit(&mrhdl->mr_lock);
794 	}
795 
796 	return (DDI_SUCCESS);
797 
798 mrsync_fail:
799 	return (status);
800 }
801 
802 
803 /*
804  * tavor_mw_alloc()
805  *    Context: Can be called from interrupt or base context.
806  */
807 int
tavor_mw_alloc(tavor_state_t * state,tavor_pdhdl_t pd,ibt_mw_flags_t flags,tavor_mwhdl_t * mwhdl)808 tavor_mw_alloc(tavor_state_t *state, tavor_pdhdl_t pd, ibt_mw_flags_t flags,
809     tavor_mwhdl_t *mwhdl)
810 {
811 	tavor_rsrc_t		*mpt, *rsrc;
812 	tavor_hw_mpt_t		mpt_entry;
813 	tavor_mwhdl_t		mw;
814 	uint_t			sleep;
815 	int			status;
816 
817 	/*
818 	 * Check the sleep flag.  Ensure that it is consistent with the
819 	 * current thread context (i.e. if we are currently in the interrupt
820 	 * context, then we shouldn't be attempting to sleep).
821 	 */
822 	sleep = (flags & IBT_MW_NOSLEEP) ? TAVOR_NOSLEEP : TAVOR_SLEEP;
823 	if ((sleep == TAVOR_SLEEP) &&
824 	    (sleep != TAVOR_SLEEPFLAG_FOR_CONTEXT())) {
825 		goto mwalloc_fail;
826 	}
827 
828 	/* Increment the reference count on the protection domain (PD) */
829 	tavor_pd_refcnt_inc(pd);
830 
831 	/*
832 	 * Allocate an MPT entry (for use as a memory window).  Since the
833 	 * Tavor hardware uses the MPT entry for memory regions and for
834 	 * memory windows, we will fill in this MPT with all the necessary
835 	 * parameters for the memory window.  And then (just as we do for
836 	 * memory regions) ownership will be passed to the hardware in the
837 	 * final step below.  If we fail here, we must undo the protection
838 	 * domain reference count.
839 	 */
840 	status = tavor_rsrc_alloc(state, TAVOR_MPT, 1, sleep, &mpt);
841 	if (status != DDI_SUCCESS) {
842 		goto mwalloc_fail1;
843 	}
844 
845 	/*
846 	 * Allocate the software structure for tracking the memory window (i.e.
847 	 * the Tavor Memory Window handle).  Note: This is actually the same
848 	 * software structure used for tracking memory regions, but since many
849 	 * of the same properties are needed, only a single structure is
850 	 * necessary.  If we fail here, we must undo the protection domain
851 	 * reference count and the previous resource allocation.
852 	 */
853 	status = tavor_rsrc_alloc(state, TAVOR_MRHDL, 1, sleep, &rsrc);
854 	if (status != DDI_SUCCESS) {
855 		goto mwalloc_fail2;
856 	}
857 	mw = (tavor_mwhdl_t)rsrc->tr_addr;
858 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mw))
859 
860 	/*
861 	 * Calculate an "unbound" RKey from MPT index.  In much the same way
862 	 * as we do for memory regions (above), this key is constructed from
863 	 * a "constrained" (which depends on the MPT index) and an
864 	 * "unconstrained" portion (which may be arbitrarily chosen).
865 	 */
866 	tavor_mr_keycalc(state, mpt->tr_indx, &mw->mr_rkey);
867 
868 	/*
869 	 * Fill in the MPT entry.  This is the final step before passing
870 	 * ownership of the MPT entry to the Tavor hardware.  We use all of
871 	 * the information collected/calculated above to fill in the
872 	 * requisite portions of the MPT.  Note: fewer entries in the MPT
873 	 * entry are necessary to allocate a memory window.
874 	 */
875 	bzero(&mpt_entry, sizeof (tavor_hw_mpt_t));
876 	mpt_entry.reg_win	= TAVOR_MPT_IS_WINDOW;
877 	mpt_entry.mem_key	= mw->mr_rkey;
878 	mpt_entry.pd		= pd->pd_pdnum;
879 
880 	/*
881 	 * Write the MPT entry to hardware.  Lastly, we pass ownership of
882 	 * the entry to the hardware.  Note: in general, this operation
883 	 * shouldn't fail.  But if it does, we have to undo everything we've
884 	 * done above before returning error.
885 	 */
886 	status = tavor_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry,
887 	    sizeof (tavor_hw_mpt_t), mpt->tr_indx, sleep);
888 	if (status != TAVOR_CMD_SUCCESS) {
889 		cmn_err(CE_CONT, "Tavor: SW2HW_MPT command failed: %08x\n",
890 		    status);
891 		goto mwalloc_fail3;
892 	}
893 
894 	/*
895 	 * Fill in the rest of the Tavor Memory Window handle.  Having
896 	 * successfully transferred ownership of the MPT, we can update the
897 	 * following fields for use in further operations on the MW.
898 	 */
899 	mw->mr_mptrsrcp	= mpt;
900 	mw->mr_pdhdl	= pd;
901 	mw->mr_rsrcp	= rsrc;
902 	*mwhdl = mw;
903 
904 	return (DDI_SUCCESS);
905 
906 mwalloc_fail3:
907 	tavor_rsrc_free(state, &rsrc);
908 mwalloc_fail2:
909 	tavor_rsrc_free(state, &mpt);
910 mwalloc_fail1:
911 	tavor_pd_refcnt_dec(pd);
912 mwalloc_fail:
913 	return (status);
914 }
915 
916 
917 /*
918  * tavor_mw_free()
919  *    Context: Can be called from interrupt or base context.
920  */
921 int
tavor_mw_free(tavor_state_t * state,tavor_mwhdl_t * mwhdl,uint_t sleep)922 tavor_mw_free(tavor_state_t *state, tavor_mwhdl_t *mwhdl, uint_t sleep)
923 {
924 	tavor_rsrc_t		*mpt, *rsrc;
925 	tavor_mwhdl_t		mw;
926 	int			status;
927 	tavor_pdhdl_t		pd;
928 
929 	/*
930 	 * Check the sleep flag.  Ensure that it is consistent with the
931 	 * current thread context (i.e. if we are currently in the interrupt
932 	 * context, then we shouldn't be attempting to sleep).
933 	 */
934 	if ((sleep == TAVOR_SLEEP) &&
935 	    (sleep != TAVOR_SLEEPFLAG_FOR_CONTEXT())) {
936 		return (status);
937 	}
938 
939 	/*
940 	 * Pull all the necessary information from the Tavor Memory Window
941 	 * handle.  This is necessary here because the resource for the
942 	 * MW handle is going to be freed up as part of the this operation.
943 	 */
944 	mw	= *mwhdl;
945 	mutex_enter(&mw->mr_lock);
946 	mpt	= mw->mr_mptrsrcp;
947 	rsrc	= mw->mr_rsrcp;
948 	pd	= mw->mr_pdhdl;
949 	mutex_exit(&mw->mr_lock);
950 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mw))
951 
952 	/*
953 	 * Reclaim the MPT entry from hardware.  Note: in general, it is
954 	 * unexpected for this operation to return an error.
955 	 */
956 	status = tavor_cmn_ownership_cmd_post(state, HW2SW_MPT, NULL,
957 	    0, mpt->tr_indx, sleep);
958 	if (status != TAVOR_CMD_SUCCESS) {
959 		cmn_err(CE_CONT, "Tavor: HW2SW_MPT command failed: %08x\n",
960 		    status);
961 		return (IBT_INVALID_PARAM);
962 	}
963 
964 	/* Free the Tavor Memory Window handle */
965 	tavor_rsrc_free(state, &rsrc);
966 
967 	/* Free up the MPT entry resource */
968 	tavor_rsrc_free(state, &mpt);
969 
970 	/* Decrement the reference count on the protection domain (PD) */
971 	tavor_pd_refcnt_dec(pd);
972 
973 	/* Set the mwhdl pointer to NULL and return success */
974 	*mwhdl = NULL;
975 
976 	return (DDI_SUCCESS);
977 }
978 
979 
980 /*
981  * tavor_mr_keycalc()
982  *    Context: Can be called from interrupt or base context.
983  */
984 void
tavor_mr_keycalc(tavor_state_t * state,uint32_t indx,uint32_t * key)985 tavor_mr_keycalc(tavor_state_t *state, uint32_t indx, uint32_t *key)
986 {
987 	uint32_t	tmp, log_num_mpt;
988 
989 	/*
990 	 * Generate a simple key from counter.  Note:  We increment this
991 	 * static variable _intentionally_ without any kind of mutex around
992 	 * it.  First, single-threading all operations through a single lock
993 	 * would be a bad idea (from a performance point-of-view).  Second,
994 	 * the upper "unconstrained" bits don't really have to be unique
995 	 * because the lower bits are guaranteed to be (although we do make a
996 	 * best effort to ensure that they are).  Third, the window for the
997 	 * race (where both threads read and update the counter at the same
998 	 * time) is incredibly small.
999 	 * And, lastly, we'd like to make this into a "random" key XXX
1000 	 */
1001 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(tavor_debug_memkey_cnt))
1002 	log_num_mpt = state->ts_cfg_profile->cp_log_num_mpt;
1003 	tmp = (tavor_debug_memkey_cnt++) << log_num_mpt;
1004 	*key = tmp | indx;
1005 }
1006 
1007 
1008 /*
1009  * tavor_mr_common_reg()
1010  *    Context: Can be called from interrupt or base context.
1011  */
1012 static int
tavor_mr_common_reg(tavor_state_t * state,tavor_pdhdl_t pd,tavor_bind_info_t * bind,tavor_mrhdl_t * mrhdl,tavor_mr_options_t * op)1013 tavor_mr_common_reg(tavor_state_t *state, tavor_pdhdl_t pd,
1014     tavor_bind_info_t *bind, tavor_mrhdl_t *mrhdl, tavor_mr_options_t *op)
1015 {
1016 	tavor_rsrc_pool_info_t	*rsrc_pool;
1017 	tavor_rsrc_t		*mpt, *mtt, *rsrc, *mtt_refcnt;
1018 	tavor_umap_db_entry_t	*umapdb;
1019 	tavor_sw_refcnt_t	*swrc_tmp;
1020 	tavor_hw_mpt_t		mpt_entry;
1021 	tavor_mrhdl_t		mr;
1022 	ibt_mr_flags_t		flags;
1023 	tavor_bind_info_t	*bh;
1024 	ddi_dma_handle_t	bind_dmahdl;
1025 	ddi_umem_cookie_t	umem_cookie;
1026 	size_t			umem_len;
1027 	caddr_t			umem_addr;
1028 	uint64_t		mtt_addr, mtt_ddrbaseaddr, max_sz;
1029 	uint_t			sleep, mtt_pgsize_bits, bind_type, mr_is_umem;
1030 	int			status, umem_flags, bind_override_addr;
1031 
1032 	/*
1033 	 * Check the "options" flag.  Currently this flag tells the driver
1034 	 * whether or not the region should be bound normally (i.e. with
1035 	 * entries written into the PCI IOMMU), whether it should be
1036 	 * registered to bypass the IOMMU, and whether or not the resulting
1037 	 * address should be "zero-based" (to aid the alignment restrictions
1038 	 * for QPs).
1039 	 */
1040 	if (op == NULL) {
1041 		bind_type   = TAVOR_BINDMEM_NORMAL;
1042 		bind_dmahdl = NULL;
1043 		bind_override_addr = 0;
1044 	} else {
1045 		bind_type	   = op->mro_bind_type;
1046 		bind_dmahdl	   = op->mro_bind_dmahdl;
1047 		bind_override_addr = op->mro_bind_override_addr;
1048 	}
1049 
1050 	/* Extract the flags field from the tavor_bind_info_t */
1051 	flags = bind->bi_flags;
1052 
1053 	/*
1054 	 * Check for invalid length.  Check is the length is zero or if the
1055 	 * length is larger than the maximum configured value.  Return error
1056 	 * if it is.
1057 	 */
1058 	max_sz = ((uint64_t)1 << state->ts_cfg_profile->cp_log_max_mrw_sz);
1059 	if ((bind->bi_len == 0) || (bind->bi_len > max_sz)) {
1060 		goto mrcommon_fail;
1061 	}
1062 
1063 	/*
1064 	 * Check the sleep flag.  Ensure that it is consistent with the
1065 	 * current thread context (i.e. if we are currently in the interrupt
1066 	 * context, then we shouldn't be attempting to sleep).
1067 	 */
1068 	sleep = (flags & IBT_MR_NOSLEEP) ? TAVOR_NOSLEEP: TAVOR_SLEEP;
1069 	if ((sleep == TAVOR_SLEEP) &&
1070 	    (sleep != TAVOR_SLEEPFLAG_FOR_CONTEXT())) {
1071 		goto mrcommon_fail;
1072 	}
1073 
1074 	/*
1075 	 * Get the base address for the MTT table.  This will be necessary
1076 	 * below when we are setting up the MPT entry.
1077 	 */
1078 	rsrc_pool = &state->ts_rsrc_hdl[TAVOR_MTT];
1079 	mtt_ddrbaseaddr = (uint64_t)(uintptr_t)rsrc_pool->rsrc_ddr_offset;
1080 
1081 	/* Increment the reference count on the protection domain (PD) */
1082 	tavor_pd_refcnt_inc(pd);
1083 
1084 	/*
1085 	 * Allocate an MPT entry.  This will be filled in with all the
1086 	 * necessary parameters to define the memory region.  And then
1087 	 * ownership will be passed to the hardware in the final step
1088 	 * below.  If we fail here, we must undo the protection domain
1089 	 * reference count.
1090 	 */
1091 	status = tavor_rsrc_alloc(state, TAVOR_MPT, 1, sleep, &mpt);
1092 	if (status != DDI_SUCCESS) {
1093 		goto mrcommon_fail1;
1094 	}
1095 
1096 	/*
1097 	 * Allocate the software structure for tracking the memory region (i.e.
1098 	 * the Tavor Memory Region handle).  If we fail here, we must undo
1099 	 * the protection domain reference count and the previous resource
1100 	 * allocation.
1101 	 */
1102 	status = tavor_rsrc_alloc(state, TAVOR_MRHDL, 1, sleep, &rsrc);
1103 	if (status != DDI_SUCCESS) {
1104 		goto mrcommon_fail2;
1105 	}
1106 	mr = (tavor_mrhdl_t)rsrc->tr_addr;
1107 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr))
1108 
1109 	/*
1110 	 * Setup and validate the memory region access flags.  This means
1111 	 * translating the IBTF's enable flags into the access flags that
1112 	 * will be used in later operations.
1113 	 */
1114 	mr->mr_accflag = 0;
1115 	if (flags & IBT_MR_ENABLE_WINDOW_BIND)
1116 		mr->mr_accflag |= IBT_MR_WINDOW_BIND;
1117 	if (flags & IBT_MR_ENABLE_LOCAL_WRITE)
1118 		mr->mr_accflag |= IBT_MR_LOCAL_WRITE;
1119 	if (flags & IBT_MR_ENABLE_REMOTE_READ)
1120 		mr->mr_accflag |= IBT_MR_REMOTE_READ;
1121 	if (flags & IBT_MR_ENABLE_REMOTE_WRITE)
1122 		mr->mr_accflag |= IBT_MR_REMOTE_WRITE;
1123 	if (flags & IBT_MR_ENABLE_REMOTE_ATOMIC)
1124 		mr->mr_accflag |= IBT_MR_REMOTE_ATOMIC;
1125 
1126 	/*
1127 	 * Calculate keys (Lkey, Rkey) from MPT index.  Each key is formed
1128 	 * from a certain number of "constrained" bits (the least significant
1129 	 * bits) and some number of "unconstrained" bits.  The constrained
1130 	 * bits must be set to the index of the entry in the MPT table, but
1131 	 * the unconstrained bits can be set to any value we wish.  Note:
1132 	 * if no remote access is required, then the RKey value is not filled
1133 	 * in.  Otherwise both Rkey and LKey are given the same value.
1134 	 */
1135 	tavor_mr_keycalc(state, mpt->tr_indx, &mr->mr_lkey);
1136 	if ((mr->mr_accflag & IBT_MR_REMOTE_READ) ||
1137 	    (mr->mr_accflag & IBT_MR_REMOTE_WRITE) ||
1138 	    (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC)) {
1139 		mr->mr_rkey = mr->mr_lkey;
1140 	}
1141 
1142 	/*
1143 	 * Determine if the memory is from userland and pin the pages
1144 	 * with umem_lockmemory() if necessary.
1145 	 * Then, if this is userland memory, allocate an entry in the
1146 	 * "userland resources database".  This will later be added to
1147 	 * the database (after all further memory registration operations are
1148 	 * successful).  If we fail here, we must undo the reference counts
1149 	 * and the previous resource allocations.
1150 	 */
1151 	mr_is_umem = (((bind->bi_as != NULL) && (bind->bi_as != &kas)) ? 1 : 0);
1152 	if (mr_is_umem) {
1153 		umem_len   = ptob(btopr(bind->bi_len +
1154 		    ((uintptr_t)bind->bi_addr & PAGEOFFSET)));
1155 		umem_addr  = (caddr_t)((uintptr_t)bind->bi_addr & ~PAGEOFFSET);
1156 		umem_flags = (DDI_UMEMLOCK_WRITE | DDI_UMEMLOCK_READ |
1157 		    DDI_UMEMLOCK_LONGTERM);
1158 		status = umem_lockmemory(umem_addr, umem_len, umem_flags,
1159 		    &umem_cookie, &tavor_umem_cbops, NULL);
1160 		if (status != 0) {
1161 			goto mrcommon_fail3;
1162 		}
1163 
1164 		_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind))
1165 		_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind->bi_buf))
1166 
1167 		bind->bi_buf = ddi_umem_iosetup(umem_cookie, 0, umem_len,
1168 		    B_WRITE, 0, 0, NULL, DDI_UMEM_SLEEP);
1169 		if (bind->bi_buf == NULL) {
1170 			goto mrcommon_fail3;
1171 		}
1172 		bind->bi_type = TAVOR_BINDHDL_UBUF;
1173 		bind->bi_buf->b_flags |= B_READ;
1174 
1175 		_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*bind->bi_buf))
1176 		_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*bind))
1177 
1178 		umapdb = tavor_umap_db_alloc(state->ts_instance,
1179 		    (uint64_t)(uintptr_t)umem_cookie, MLNX_UMAP_MRMEM_RSRC,
1180 		    (uint64_t)(uintptr_t)rsrc);
1181 		if (umapdb == NULL) {
1182 			goto mrcommon_fail4;
1183 		}
1184 	}
1185 
1186 	/*
1187 	 * Setup the bindinfo for the mtt bind call
1188 	 */
1189 	bh = &mr->mr_bindinfo;
1190 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bh))
1191 	bcopy(bind, bh, sizeof (tavor_bind_info_t));
1192 	bh->bi_bypass = bind_type;
1193 	status = tavor_mr_mtt_bind(state, bh, bind_dmahdl, &mtt,
1194 	    &mtt_pgsize_bits);
1195 	if (status != DDI_SUCCESS) {
1196 		/*
1197 		 * When mtt_bind fails, freerbuf has already been done,
1198 		 * so make sure not to call it again.
1199 		 */
1200 		bind->bi_type = bh->bi_type;
1201 		goto mrcommon_fail5;
1202 	}
1203 	mr->mr_logmttpgsz = mtt_pgsize_bits;
1204 
1205 	/*
1206 	 * Allocate MTT reference count (to track shared memory regions).
1207 	 * This reference count resource may never be used on the given
1208 	 * memory region, but if it is ever later registered as "shared"
1209 	 * memory region then this resource will be necessary.  If we fail
1210 	 * here, we do pretty much the same as above to clean up.
1211 	 */
1212 	status = tavor_rsrc_alloc(state, TAVOR_REFCNT, 1, sleep,
1213 	    &mtt_refcnt);
1214 	if (status != DDI_SUCCESS) {
1215 		goto mrcommon_fail6;
1216 	}
1217 	mr->mr_mttrefcntp = mtt_refcnt;
1218 	swrc_tmp = (tavor_sw_refcnt_t *)mtt_refcnt->tr_addr;
1219 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*swrc_tmp))
1220 	TAVOR_MTT_REFCNT_INIT(swrc_tmp);
1221 
1222 	/*
1223 	 * Fill in the MPT entry.  This is the final step before passing
1224 	 * ownership of the MPT entry to the Tavor hardware.  We use all of
1225 	 * the information collected/calculated above to fill in the
1226 	 * requisite portions of the MPT.
1227 	 */
1228 	bzero(&mpt_entry, sizeof (tavor_hw_mpt_t));
1229 	mpt_entry.m_io	  = TAVOR_MEM_CYCLE_GENERATE;
1230 	mpt_entry.en_bind = (mr->mr_accflag & IBT_MR_WINDOW_BIND)   ? 1 : 0;
1231 	mpt_entry.atomic  = (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC) ? 1 : 0;
1232 	mpt_entry.rw	  = (mr->mr_accflag & IBT_MR_REMOTE_WRITE)  ? 1 : 0;
1233 	mpt_entry.rr	  = (mr->mr_accflag & IBT_MR_REMOTE_READ)   ? 1 : 0;
1234 	mpt_entry.lw	  = (mr->mr_accflag & IBT_MR_LOCAL_WRITE)   ? 1 : 0;
1235 	mpt_entry.lr	  = 1;
1236 	mpt_entry.reg_win = TAVOR_MPT_IS_REGION;
1237 	mpt_entry.page_sz	= mr->mr_logmttpgsz - 0xC;
1238 	mpt_entry.mem_key	= mr->mr_lkey;
1239 	mpt_entry.pd		= pd->pd_pdnum;
1240 	if (bind_override_addr == 0) {
1241 		mpt_entry.start_addr = bh->bi_addr;
1242 	} else {
1243 		bh->bi_addr = bh->bi_addr & ((1 << mr->mr_logmttpgsz) - 1);
1244 		mpt_entry.start_addr = bh->bi_addr;
1245 	}
1246 	mpt_entry.reg_win_len	= bh->bi_len;
1247 	mpt_entry.win_cnt_limit	= TAVOR_UNLIMITED_WIN_BIND;
1248 	mtt_addr = mtt_ddrbaseaddr + (mtt->tr_indx << TAVOR_MTT_SIZE_SHIFT);
1249 	mpt_entry.mttseg_addr_h = mtt_addr >> 32;
1250 	mpt_entry.mttseg_addr_l = mtt_addr >> 6;
1251 
1252 	/*
1253 	 * Write the MPT entry to hardware.  Lastly, we pass ownership of
1254 	 * the entry to the hardware.  Note: in general, this operation
1255 	 * shouldn't fail.  But if it does, we have to undo everything we've
1256 	 * done above before returning error.
1257 	 */
1258 	status = tavor_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry,
1259 	    sizeof (tavor_hw_mpt_t), mpt->tr_indx, sleep);
1260 	if (status != TAVOR_CMD_SUCCESS) {
1261 		cmn_err(CE_CONT, "Tavor: SW2HW_MPT command failed: %08x\n",
1262 		    status);
1263 		goto mrcommon_fail7;
1264 	}
1265 
1266 	/*
1267 	 * Fill in the rest of the Tavor Memory Region handle.  Having
1268 	 * successfully transferred ownership of the MPT, we can update the
1269 	 * following fields for use in further operations on the MR.
1270 	 */
1271 	mr->mr_mptrsrcp	  = mpt;
1272 	mr->mr_mttrsrcp	  = mtt;
1273 	mr->mr_pdhdl	  = pd;
1274 	mr->mr_rsrcp	  = rsrc;
1275 	mr->mr_is_umem	  = mr_is_umem;
1276 	mr->mr_umemcookie = (mr_is_umem != 0) ? umem_cookie : NULL;
1277 	mr->mr_umem_cbfunc = NULL;
1278 	mr->mr_umem_cbarg1 = NULL;
1279 	mr->mr_umem_cbarg2 = NULL;
1280 
1281 	/*
1282 	 * If this is userland memory, then we need to insert the previously
1283 	 * allocated entry into the "userland resources database".  This will
1284 	 * allow for later coordination between the tavor_umap_umemlock_cb()
1285 	 * callback and tavor_mr_deregister().
1286 	 */
1287 	if (mr_is_umem) {
1288 		tavor_umap_db_add(umapdb);
1289 	}
1290 
1291 	*mrhdl = mr;
1292 
1293 	return (DDI_SUCCESS);
1294 
1295 /*
1296  * The following is cleanup for all possible failure cases in this routine
1297  */
1298 mrcommon_fail7:
1299 	tavor_rsrc_free(state, &mtt_refcnt);
1300 mrcommon_fail6:
1301 	tavor_rsrc_free(state, &mtt);
1302 	tavor_mr_mem_unbind(state, bh);
1303 	bind->bi_type = bh->bi_type;
1304 mrcommon_fail5:
1305 	if (mr_is_umem) {
1306 		tavor_umap_db_free(umapdb);
1307 	}
1308 mrcommon_fail4:
1309 	if (mr_is_umem) {
1310 		/*
1311 		 * Free up the memory ddi_umem_iosetup() allocates
1312 		 * internally.
1313 		 */
1314 		if (bind->bi_type == TAVOR_BINDHDL_UBUF) {
1315 			freerbuf(bind->bi_buf);
1316 			_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind))
1317 			bind->bi_type = TAVOR_BINDHDL_NONE;
1318 			_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*bind))
1319 		}
1320 		ddi_umem_unlock(umem_cookie);
1321 	}
1322 mrcommon_fail3:
1323 	tavor_rsrc_free(state, &rsrc);
1324 mrcommon_fail2:
1325 	tavor_rsrc_free(state, &mpt);
1326 mrcommon_fail1:
1327 	tavor_pd_refcnt_dec(pd);
1328 mrcommon_fail:
1329 	return (status);
1330 }
1331 
1332 int
tavor_dma_mr_register(tavor_state_t * state,tavor_pdhdl_t pd,ibt_dmr_attr_t * mr_attr,tavor_mrhdl_t * mrhdl)1333 tavor_dma_mr_register(tavor_state_t *state, tavor_pdhdl_t pd,
1334     ibt_dmr_attr_t *mr_attr, tavor_mrhdl_t *mrhdl)
1335 {
1336 	tavor_rsrc_t		*mpt, *rsrc;
1337 	tavor_hw_mpt_t		mpt_entry;
1338 	tavor_mrhdl_t		mr;
1339 	ibt_mr_flags_t		flags;
1340 	uint_t			sleep;
1341 	int			status;
1342 
1343 	/* Extract the flags field */
1344 	flags = mr_attr->dmr_flags;
1345 
1346 	/*
1347 	 * Check the sleep flag.  Ensure that it is consistent with the
1348 	 * current thread context (i.e. if we are currently in the interrupt
1349 	 * context, then we shouldn't be attempting to sleep).
1350 	 */
1351 	sleep = (flags & IBT_MR_NOSLEEP) ? TAVOR_NOSLEEP: TAVOR_SLEEP;
1352 	if ((sleep == TAVOR_SLEEP) &&
1353 	    (sleep != TAVOR_SLEEPFLAG_FOR_CONTEXT())) {
1354 		status = IBT_INVALID_PARAM;
1355 		goto mrcommon_fail;
1356 	}
1357 
1358 	/* Increment the reference count on the protection domain (PD) */
1359 	tavor_pd_refcnt_inc(pd);
1360 
1361 	/*
1362 	 * Allocate an MPT entry.  This will be filled in with all the
1363 	 * necessary parameters to define the memory region.  And then
1364 	 * ownership will be passed to the hardware in the final step
1365 	 * below.  If we fail here, we must undo the protection domain
1366 	 * reference count.
1367 	 */
1368 	status = tavor_rsrc_alloc(state, TAVOR_MPT, 1, sleep, &mpt);
1369 	if (status != DDI_SUCCESS) {
1370 		status = IBT_INSUFF_RESOURCE;
1371 		goto mrcommon_fail1;
1372 	}
1373 
1374 	/*
1375 	 * Allocate the software structure for tracking the memory region (i.e.
1376 	 * the Tavor Memory Region handle).  If we fail here, we must undo
1377 	 * the protection domain reference count and the previous resource
1378 	 * allocation.
1379 	 */
1380 	status = tavor_rsrc_alloc(state, TAVOR_MRHDL, 1, sleep, &rsrc);
1381 	if (status != DDI_SUCCESS) {
1382 		status = IBT_INSUFF_RESOURCE;
1383 		goto mrcommon_fail2;
1384 	}
1385 	mr = (tavor_mrhdl_t)rsrc->tr_addr;
1386 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*mr))
1387 	bzero(mr, sizeof (*mr));
1388 
1389 	/*
1390 	 * Setup and validate the memory region access flags.  This means
1391 	 * translating the IBTF's enable flags into the access flags that
1392 	 * will be used in later operations.
1393 	 */
1394 	mr->mr_accflag = 0;
1395 	if (flags & IBT_MR_ENABLE_WINDOW_BIND)
1396 		mr->mr_accflag |= IBT_MR_WINDOW_BIND;
1397 	if (flags & IBT_MR_ENABLE_LOCAL_WRITE)
1398 		mr->mr_accflag |= IBT_MR_LOCAL_WRITE;
1399 	if (flags & IBT_MR_ENABLE_REMOTE_READ)
1400 		mr->mr_accflag |= IBT_MR_REMOTE_READ;
1401 	if (flags & IBT_MR_ENABLE_REMOTE_WRITE)
1402 		mr->mr_accflag |= IBT_MR_REMOTE_WRITE;
1403 	if (flags & IBT_MR_ENABLE_REMOTE_ATOMIC)
1404 		mr->mr_accflag |= IBT_MR_REMOTE_ATOMIC;
1405 
1406 	/*
1407 	 * Calculate keys (Lkey, Rkey) from MPT index.  Each key is formed
1408 	 * from a certain number of "constrained" bits (the least significant
1409 	 * bits) and some number of "unconstrained" bits.  The constrained
1410 	 * bits must be set to the index of the entry in the MPT table, but
1411 	 * the unconstrained bits can be set to any value we wish.  Note:
1412 	 * if no remote access is required, then the RKey value is not filled
1413 	 * in.  Otherwise both Rkey and LKey are given the same value.
1414 	 */
1415 	tavor_mr_keycalc(state, mpt->tr_indx, &mr->mr_lkey);
1416 	if ((mr->mr_accflag & IBT_MR_REMOTE_READ) ||
1417 	    (mr->mr_accflag & IBT_MR_REMOTE_WRITE) ||
1418 	    (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC)) {
1419 		mr->mr_rkey = mr->mr_lkey;
1420 	}
1421 
1422 	/*
1423 	 * Fill in the MPT entry.  This is the final step before passing
1424 	 * ownership of the MPT entry to the Tavor hardware.  We use all of
1425 	 * the information collected/calculated above to fill in the
1426 	 * requisite portions of the MPT.
1427 	 */
1428 	bzero(&mpt_entry, sizeof (tavor_hw_mpt_t));
1429 
1430 	mpt_entry.m_io	  = TAVOR_MEM_CYCLE_GENERATE;
1431 	mpt_entry.en_bind = (mr->mr_accflag & IBT_MR_WINDOW_BIND)   ? 1 : 0;
1432 	mpt_entry.atomic  = (mr->mr_accflag & IBT_MR_REMOTE_ATOMIC) ? 1 : 0;
1433 	mpt_entry.rw	  = (mr->mr_accflag & IBT_MR_REMOTE_WRITE)  ? 1 : 0;
1434 	mpt_entry.rr	  = (mr->mr_accflag & IBT_MR_REMOTE_READ)   ? 1 : 0;
1435 	mpt_entry.lw	  = (mr->mr_accflag & IBT_MR_LOCAL_WRITE)   ? 1 : 0;
1436 	mpt_entry.lr	  = 1;
1437 	mpt_entry.phys_addr = 1;	/* critical bit for this */
1438 	mpt_entry.reg_win = TAVOR_MPT_IS_REGION;
1439 
1440 	mpt_entry.page_sz	= mr->mr_logmttpgsz - 0xC;
1441 	mpt_entry.mem_key	= mr->mr_lkey;
1442 	mpt_entry.pd		= pd->pd_pdnum;
1443 	mpt_entry.win_cnt_limit = TAVOR_UNLIMITED_WIN_BIND;
1444 
1445 	mpt_entry.start_addr = mr_attr->dmr_paddr;
1446 	mpt_entry.reg_win_len = mr_attr->dmr_len;
1447 
1448 	mpt_entry.mttseg_addr_h = 0;
1449 	mpt_entry.mttseg_addr_l = 0;
1450 
1451 	/*
1452 	 * Write the MPT entry to hardware.  Lastly, we pass ownership of
1453 	 * the entry to the hardware if needed.  Note: in general, this
1454 	 * operation shouldn't fail.  But if it does, we have to undo
1455 	 * everything we've done above before returning error.
1456 	 *
1457 	 * For Tavor, this routine (which is common to the contexts) will only
1458 	 * set the ownership if needed - the process of passing the context
1459 	 * itself to HW will take care of setting up the MPT (based on type
1460 	 * and index).
1461 	 */
1462 
1463 	status = tavor_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry,
1464 	    sizeof (tavor_hw_mpt_t), mpt->tr_indx, sleep);
1465 	if (status != TAVOR_CMD_SUCCESS) {
1466 		cmn_err(CE_CONT, "Tavor: SW2HW_MPT command failed: %08x\n",
1467 		    status);
1468 		status = ibc_get_ci_failure(0);
1469 		goto mrcommon_fail7;
1470 	}
1471 
1472 	/*
1473 	 * Fill in the rest of the Tavor Memory Region handle.  Having
1474 	 * successfully transferred ownership of the MPT, we can update the
1475 	 * following fields for use in further operations on the MR.
1476 	 */
1477 	mr->mr_mptrsrcp	   = mpt;
1478 	mr->mr_mttrsrcp	   = NULL;
1479 	mr->mr_pdhdl	   = pd;
1480 	mr->mr_rsrcp	   = rsrc;
1481 	mr->mr_is_umem	   = 0;
1482 	mr->mr_umemcookie  = NULL;
1483 	mr->mr_umem_cbfunc = NULL;
1484 	mr->mr_umem_cbarg1 = NULL;
1485 	mr->mr_umem_cbarg2 = NULL;
1486 
1487 	*mrhdl = mr;
1488 
1489 	return (DDI_SUCCESS);
1490 
1491 /*
1492  * The following is cleanup for all possible failure cases in this routine
1493  */
1494 mrcommon_fail7:
1495 	tavor_rsrc_free(state, &rsrc);
1496 mrcommon_fail2:
1497 	tavor_rsrc_free(state, &mpt);
1498 mrcommon_fail1:
1499 	tavor_pd_refcnt_dec(pd);
1500 mrcommon_fail:
1501 	return (status);
1502 }
1503 
1504 /*
1505  * tavor_mr_mtt_bind()
1506  *    Context: Can be called from interrupt or base context.
1507  */
1508 int
tavor_mr_mtt_bind(tavor_state_t * state,tavor_bind_info_t * bind,ddi_dma_handle_t bind_dmahdl,tavor_rsrc_t ** mtt,uint_t * mtt_pgsize_bits)1509 tavor_mr_mtt_bind(tavor_state_t *state, tavor_bind_info_t *bind,
1510     ddi_dma_handle_t bind_dmahdl, tavor_rsrc_t **mtt, uint_t *mtt_pgsize_bits)
1511 {
1512 	uint64_t		nummtt;
1513 	uint_t			sleep;
1514 	int			status;
1515 
1516 	/*
1517 	 * Check the sleep flag.  Ensure that it is consistent with the
1518 	 * current thread context (i.e. if we are currently in the interrupt
1519 	 * context, then we shouldn't be attempting to sleep).
1520 	 */
1521 	sleep = (bind->bi_flags & IBT_MR_NOSLEEP) ? TAVOR_NOSLEEP: TAVOR_SLEEP;
1522 	if ((sleep == TAVOR_SLEEP) &&
1523 	    (sleep != TAVOR_SLEEPFLAG_FOR_CONTEXT())) {
1524 		goto mrmttbind_fail;
1525 	}
1526 
1527 	/*
1528 	 * Bind the memory and determine the mapped addresses.  This is
1529 	 * the first of two routines that do all the "heavy lifting" for
1530 	 * the Tavor memory registration routines.  The tavor_mr_mem_bind()
1531 	 * routine takes the "bind" struct with all its fields filled
1532 	 * in and returns a list of DMA cookies (for the PCI mapped addresses
1533 	 * corresponding to the specified address region) which are used by
1534 	 * the tavor_mr_fast_mtt_write() routine below.  If we fail here, we
1535 	 * must undo all the previous resource allocation (and PD reference
1536 	 * count).
1537 	 */
1538 	status = tavor_mr_mem_bind(state, bind, bind_dmahdl, sleep);
1539 	if (status != DDI_SUCCESS) {
1540 		goto mrmttbind_fail;
1541 	}
1542 
1543 	/*
1544 	 * Determine number of pages spanned.  This routine uses the
1545 	 * information in the "bind" struct to determine the required
1546 	 * number of MTT entries needed (and returns the suggested page size -
1547 	 * as a "power-of-2" - for each MTT entry).
1548 	 */
1549 	nummtt = tavor_mr_nummtt_needed(state, bind, mtt_pgsize_bits);
1550 
1551 	/*
1552 	 * Allocate the MTT entries.  Use the calculations performed above to
1553 	 * allocate the required number of MTT entries.  Note: MTT entries are
1554 	 * allocated in "MTT segments" which consist of complete cachelines
1555 	 * (i.e. 8 entries, 16 entries, etc.)  So the TAVOR_NUMMTT_TO_MTTSEG()
1556 	 * macro is used to do the proper conversion.  If we fail here, we
1557 	 * must not only undo all the previous resource allocation (and PD
1558 	 * reference count), but we must also unbind the memory.
1559 	 */
1560 	status = tavor_rsrc_alloc(state, TAVOR_MTT,
1561 	    TAVOR_NUMMTT_TO_MTTSEG(nummtt), sleep, mtt);
1562 	if (status != DDI_SUCCESS) {
1563 		goto mrmttbind_fail2;
1564 	}
1565 
1566 	/*
1567 	 * Write the mapped addresses into the MTT entries.  This is part two
1568 	 * of the "heavy lifting" routines that we talked about above.  Note:
1569 	 * we pass the suggested page size from the earlier operation here.
1570 	 * And if we fail here, we again do pretty much the same huge clean up.
1571 	 */
1572 	status = tavor_mr_fast_mtt_write(*mtt, bind, *mtt_pgsize_bits);
1573 	if (status != DDI_SUCCESS) {
1574 		goto mrmttbind_fail3;
1575 	}
1576 	return (DDI_SUCCESS);
1577 
1578 /*
1579  * The following is cleanup for all possible failure cases in this routine
1580  */
1581 mrmttbind_fail3:
1582 	tavor_rsrc_free(state, mtt);
1583 mrmttbind_fail2:
1584 	tavor_mr_mem_unbind(state, bind);
1585 mrmttbind_fail:
1586 	return (status);
1587 }
1588 
1589 
1590 /*
1591  * tavor_mr_mtt_unbind()
1592  *    Context: Can be called from interrupt or base context.
1593  */
1594 int
tavor_mr_mtt_unbind(tavor_state_t * state,tavor_bind_info_t * bind,tavor_rsrc_t * mtt)1595 tavor_mr_mtt_unbind(tavor_state_t *state, tavor_bind_info_t *bind,
1596     tavor_rsrc_t *mtt)
1597 {
1598 	/*
1599 	 * Free up the MTT entries and unbind the memory.  Here, as above, we
1600 	 * attempt to free these resources only if it is appropriate to do so.
1601 	 */
1602 	tavor_mr_mem_unbind(state, bind);
1603 	tavor_rsrc_free(state, &mtt);
1604 
1605 	return (DDI_SUCCESS);
1606 }
1607 
1608 
1609 /*
1610  * tavor_mr_common_rereg()
1611  *    Context: Can be called from interrupt or base context.
1612  */
1613 static int
tavor_mr_common_rereg(tavor_state_t * state,tavor_mrhdl_t mr,tavor_pdhdl_t pd,tavor_bind_info_t * bind,tavor_mrhdl_t * mrhdl_new,tavor_mr_options_t * op)1614 tavor_mr_common_rereg(tavor_state_t *state, tavor_mrhdl_t mr,
1615     tavor_pdhdl_t pd, tavor_bind_info_t *bind, tavor_mrhdl_t *mrhdl_new,
1616     tavor_mr_options_t *op)
1617 {
1618 	tavor_rsrc_t		*mpt;
1619 	ibt_mr_attr_flags_t	acc_flags_to_use;
1620 	ibt_mr_flags_t		flags;
1621 	tavor_pdhdl_t		pd_to_use;
1622 	tavor_hw_mpt_t		mpt_entry;
1623 	uint64_t		mtt_addr_to_use, vaddr_to_use, len_to_use;
1624 	uint_t			sleep, dereg_level;
1625 	int			status;
1626 
1627 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind))
1628 
1629 	/*
1630 	 * Check here to see if the memory region corresponds to a userland
1631 	 * mapping.  Reregistration of userland memory regions is not
1632 	 * currently supported.  Return failure. XXX
1633 	 */
1634 	if (mr->mr_is_umem) {
1635 		goto mrrereg_fail;
1636 	}
1637 
1638 	mutex_enter(&mr->mr_lock);
1639 
1640 	/* Pull MPT resource pointer from the Tavor Memory Region handle */
1641 	mpt = mr->mr_mptrsrcp;
1642 
1643 	/* Extract the flags field from the tavor_bind_info_t */
1644 	flags = bind->bi_flags;
1645 
1646 	/*
1647 	 * Check the sleep flag.  Ensure that it is consistent with the
1648 	 * current thread context (i.e. if we are currently in the interrupt
1649 	 * context, then we shouldn't be attempting to sleep).
1650 	 */
1651 	sleep = (flags & IBT_MR_NOSLEEP) ? TAVOR_NOSLEEP: TAVOR_SLEEP;
1652 	if ((sleep == TAVOR_SLEEP) &&
1653 	    (sleep != TAVOR_SLEEPFLAG_FOR_CONTEXT())) {
1654 		mutex_exit(&mr->mr_lock);
1655 		goto mrrereg_fail;
1656 	}
1657 
1658 	/*
1659 	 * First step is to temporarily invalidate the MPT entry.  This
1660 	 * regains ownership from the hardware, and gives us the opportunity
1661 	 * to modify the entry.  Note: The HW2SW_MPT command returns the
1662 	 * current MPT entry contents.  These are saved away here because
1663 	 * they will be reused in a later step below.  If the region has
1664 	 * bound memory windows that we fail returning an "in use" error code.
1665 	 * Otherwise, this is an unexpected error and we deregister the
1666 	 * memory region and return error.
1667 	 *
1668 	 * We use TAVOR_CMD_NOSLEEP_SPIN here always because we must protect
1669 	 * against holding the lock around this rereg call in all contexts.
1670 	 */
1671 	status = tavor_cmn_ownership_cmd_post(state, HW2SW_MPT, &mpt_entry,
1672 	    sizeof (tavor_hw_mpt_t), mpt->tr_indx, TAVOR_CMD_NOSLEEP_SPIN);
1673 	if (status != TAVOR_CMD_SUCCESS) {
1674 		mutex_exit(&mr->mr_lock);
1675 		if (status == TAVOR_CMD_REG_BOUND) {
1676 			return (IBT_MR_IN_USE);
1677 		} else {
1678 			cmn_err(CE_CONT, "Tavor: HW2SW_MPT command failed: "
1679 			    "%08x\n", status);
1680 
1681 			/*
1682 			 * Call deregister and ensure that all current
1683 			 * resources get freed up
1684 			 */
1685 			if (tavor_mr_deregister(state, &mr,
1686 			    TAVOR_MR_DEREG_ALL, sleep) != DDI_SUCCESS) {
1687 				TAVOR_WARNING(state, "failed to deregister "
1688 				    "memory region");
1689 			}
1690 			return (ibc_get_ci_failure(0));
1691 		}
1692 	}
1693 
1694 	/*
1695 	 * If we're changing the protection domain, then validate the new one
1696 	 */
1697 	if (flags & IBT_MR_CHANGE_PD) {
1698 
1699 		/* Check for valid PD handle pointer */
1700 		if (pd == NULL) {
1701 			mutex_exit(&mr->mr_lock);
1702 			/*
1703 			 * Call deregister and ensure that all current
1704 			 * resources get properly freed up. Unnecessary
1705 			 * here to attempt to regain software ownership
1706 			 * of the MPT entry as that has already been
1707 			 * done above.
1708 			 */
1709 			if (tavor_mr_deregister(state, &mr,
1710 			    TAVOR_MR_DEREG_NO_HW2SW_MPT, sleep) !=
1711 			    DDI_SUCCESS) {
1712 				TAVOR_WARNING(state, "failed to deregister "
1713 				    "memory region");
1714 			}
1715 			goto mrrereg_fail;
1716 		}
1717 
1718 		/* Use the new PD handle in all operations below */
1719 		pd_to_use = pd;
1720 
1721 	} else {
1722 		/* Use the current PD handle in all operations below */
1723 		pd_to_use = mr->mr_pdhdl;
1724 	}
1725 
1726 	/*
1727 	 * If we're changing access permissions, then validate the new ones
1728 	 */
1729 	if (flags & IBT_MR_CHANGE_ACCESS) {
1730 		/*
1731 		 * Validate the access flags.  Both remote write and remote
1732 		 * atomic require the local write flag to be set
1733 		 */
1734 		if (((flags & IBT_MR_ENABLE_REMOTE_WRITE) ||
1735 		    (flags & IBT_MR_ENABLE_REMOTE_ATOMIC)) &&
1736 		    !(flags & IBT_MR_ENABLE_LOCAL_WRITE)) {
1737 			mutex_exit(&mr->mr_lock);
1738 			/*
1739 			 * Call deregister and ensure that all current
1740 			 * resources get properly freed up. Unnecessary
1741 			 * here to attempt to regain software ownership
1742 			 * of the MPT entry as that has already been
1743 			 * done above.
1744 			 */
1745 			if (tavor_mr_deregister(state, &mr,
1746 			    TAVOR_MR_DEREG_NO_HW2SW_MPT, sleep) !=
1747 			    DDI_SUCCESS) {
1748 				TAVOR_WARNING(state, "failed to deregister "
1749 				    "memory region");
1750 			}
1751 			goto mrrereg_fail;
1752 		}
1753 
1754 		/*
1755 		 * Setup and validate the memory region access flags.  This
1756 		 * means translating the IBTF's enable flags into the access
1757 		 * flags that will be used in later operations.
1758 		 */
1759 		acc_flags_to_use = 0;
1760 		if (flags & IBT_MR_ENABLE_WINDOW_BIND)
1761 			acc_flags_to_use |= IBT_MR_WINDOW_BIND;
1762 		if (flags & IBT_MR_ENABLE_LOCAL_WRITE)
1763 			acc_flags_to_use |= IBT_MR_LOCAL_WRITE;
1764 		if (flags & IBT_MR_ENABLE_REMOTE_READ)
1765 			acc_flags_to_use |= IBT_MR_REMOTE_READ;
1766 		if (flags & IBT_MR_ENABLE_REMOTE_WRITE)
1767 			acc_flags_to_use |= IBT_MR_REMOTE_WRITE;
1768 		if (flags & IBT_MR_ENABLE_REMOTE_ATOMIC)
1769 			acc_flags_to_use |= IBT_MR_REMOTE_ATOMIC;
1770 
1771 	} else {
1772 		acc_flags_to_use = mr->mr_accflag;
1773 	}
1774 
1775 	/*
1776 	 * If we're modifying the translation, then figure out whether
1777 	 * we can reuse the current MTT resources.  This means calling
1778 	 * tavor_mr_rereg_xlat_helper() which does most of the heavy lifting
1779 	 * for the reregistration.  If the current memory region contains
1780 	 * sufficient MTT entries for the new regions, then it will be
1781 	 * reused and filled in.  Otherwise, new entries will be allocated,
1782 	 * the old ones will be freed, and the new entries will be filled
1783 	 * in.  Note:  If we're not modifying the translation, then we
1784 	 * should already have all the information we need to update the MPT.
1785 	 * Also note: If tavor_mr_rereg_xlat_helper() fails, it will return
1786 	 * a "dereg_level" which is the level of cleanup that needs to be
1787 	 * passed to tavor_mr_deregister() to finish the cleanup.
1788 	 */
1789 	if (flags & IBT_MR_CHANGE_TRANSLATION) {
1790 		status = tavor_mr_rereg_xlat_helper(state, mr, bind, op,
1791 		    &mtt_addr_to_use, sleep, &dereg_level);
1792 		if (status != DDI_SUCCESS) {
1793 			mutex_exit(&mr->mr_lock);
1794 			/*
1795 			 * Call deregister and ensure that all resources get
1796 			 * properly freed up.
1797 			 */
1798 			if (tavor_mr_deregister(state, &mr, dereg_level,
1799 			    sleep) != DDI_SUCCESS) {
1800 				TAVOR_WARNING(state, "failed to deregister "
1801 				    "memory region");
1802 			}
1803 
1804 			goto mrrereg_fail;
1805 		}
1806 		vaddr_to_use = mr->mr_bindinfo.bi_addr;
1807 		len_to_use   = mr->mr_bindinfo.bi_len;
1808 	} else {
1809 		mtt_addr_to_use = (((uint64_t)mpt_entry.mttseg_addr_h << 32) |
1810 		    ((uint64_t)mpt_entry.mttseg_addr_l << 6));
1811 		vaddr_to_use = mr->mr_bindinfo.bi_addr;
1812 		len_to_use   = mr->mr_bindinfo.bi_len;
1813 	}
1814 
1815 	/*
1816 	 * Calculate new keys (Lkey, Rkey) from MPT index.  Just like they were
1817 	 * when the region was first registered, each key is formed from
1818 	 * "constrained" bits and "unconstrained" bits.  Note:  If no remote
1819 	 * access is required, then the RKey value is not filled in.  Otherwise
1820 	 * both Rkey and LKey are given the same value.
1821 	 */
1822 	tavor_mr_keycalc(state, mpt->tr_indx, &mr->mr_lkey);
1823 	if ((acc_flags_to_use & IBT_MR_REMOTE_READ) ||
1824 	    (acc_flags_to_use & IBT_MR_REMOTE_WRITE) ||
1825 	    (acc_flags_to_use & IBT_MR_REMOTE_ATOMIC)) {
1826 		mr->mr_rkey = mr->mr_lkey;
1827 	}
1828 
1829 	/*
1830 	 * Update the MPT entry with the new information.  Some of this
1831 	 * information is retained from the previous operation, some of
1832 	 * it is new based on request.
1833 	 */
1834 	mpt_entry.en_bind = (acc_flags_to_use & IBT_MR_WINDOW_BIND)   ? 1 : 0;
1835 	mpt_entry.atomic  = (acc_flags_to_use & IBT_MR_REMOTE_ATOMIC) ? 1 : 0;
1836 	mpt_entry.rw	  = (acc_flags_to_use & IBT_MR_REMOTE_WRITE)  ? 1 : 0;
1837 	mpt_entry.rr	  = (acc_flags_to_use & IBT_MR_REMOTE_READ)   ? 1 : 0;
1838 	mpt_entry.lw	  = (acc_flags_to_use & IBT_MR_LOCAL_WRITE)   ? 1 : 0;
1839 	mpt_entry.page_sz	= mr->mr_logmttpgsz - 0xC;
1840 	mpt_entry.mem_key	= mr->mr_lkey;
1841 	mpt_entry.pd		= pd_to_use->pd_pdnum;
1842 	mpt_entry.start_addr	= vaddr_to_use;
1843 	mpt_entry.reg_win_len	= len_to_use;
1844 	mpt_entry.mttseg_addr_h = mtt_addr_to_use >> 32;
1845 	mpt_entry.mttseg_addr_l = mtt_addr_to_use >> 6;
1846 
1847 	/*
1848 	 * Write the updated MPT entry to hardware
1849 	 *
1850 	 * We use TAVOR_CMD_NOSLEEP_SPIN here always because we must protect
1851 	 * against holding the lock around this rereg call in all contexts.
1852 	 */
1853 	status = tavor_cmn_ownership_cmd_post(state, SW2HW_MPT, &mpt_entry,
1854 	    sizeof (tavor_hw_mpt_t), mpt->tr_indx, TAVOR_CMD_NOSLEEP_SPIN);
1855 	if (status != TAVOR_CMD_SUCCESS) {
1856 		mutex_exit(&mr->mr_lock);
1857 		cmn_err(CE_CONT, "Tavor: SW2HW_MPT command failed: %08x\n",
1858 		    status);
1859 		/*
1860 		 * Call deregister and ensure that all current resources get
1861 		 * properly freed up. Unnecessary here to attempt to regain
1862 		 * software ownership of the MPT entry as that has already
1863 		 * been done above.
1864 		 */
1865 		if (tavor_mr_deregister(state, &mr,
1866 		    TAVOR_MR_DEREG_NO_HW2SW_MPT, sleep) != DDI_SUCCESS) {
1867 			TAVOR_WARNING(state, "failed to deregister memory "
1868 			    "region");
1869 		}
1870 		return (ibc_get_ci_failure(0));
1871 	}
1872 
1873 	/*
1874 	 * If we're changing PD, then update their reference counts now.
1875 	 * This means decrementing the reference count on the old PD and
1876 	 * incrementing the reference count on the new PD.
1877 	 */
1878 	if (flags & IBT_MR_CHANGE_PD) {
1879 		tavor_pd_refcnt_dec(mr->mr_pdhdl);
1880 		tavor_pd_refcnt_inc(pd);
1881 	}
1882 
1883 	/*
1884 	 * Update the contents of the Tavor Memory Region handle to reflect
1885 	 * what has been changed.
1886 	 */
1887 	mr->mr_pdhdl	  = pd_to_use;
1888 	mr->mr_accflag	  = acc_flags_to_use;
1889 	mr->mr_is_umem	  = 0;
1890 	mr->mr_umemcookie = NULL;
1891 
1892 	/* New MR handle is same as the old */
1893 	*mrhdl_new = mr;
1894 	mutex_exit(&mr->mr_lock);
1895 
1896 	return (DDI_SUCCESS);
1897 
1898 mrrereg_fail:
1899 	return (status);
1900 }
1901 
1902 
1903 /*
1904  * tavor_mr_rereg_xlat_helper
1905  *    Context: Can be called from interrupt or base context.
1906  *    Note: This routine expects the "mr_lock" to be held when it
1907  *    is called.  Upon returning failure, this routine passes information
1908  *    about what "dereg_level" should be passed to tavor_mr_deregister().
1909  */
1910 static int
tavor_mr_rereg_xlat_helper(tavor_state_t * state,tavor_mrhdl_t mr,tavor_bind_info_t * bind,tavor_mr_options_t * op,uint64_t * mtt_addr,uint_t sleep,uint_t * dereg_level)1911 tavor_mr_rereg_xlat_helper(tavor_state_t *state, tavor_mrhdl_t mr,
1912     tavor_bind_info_t *bind, tavor_mr_options_t *op, uint64_t *mtt_addr,
1913     uint_t sleep, uint_t *dereg_level)
1914 {
1915 	tavor_rsrc_pool_info_t	*rsrc_pool;
1916 	tavor_rsrc_t		*mtt, *mtt_refcnt;
1917 	tavor_sw_refcnt_t	*swrc_old, *swrc_new;
1918 	ddi_dma_handle_t	dmahdl;
1919 	uint64_t		nummtt_needed, nummtt_in_currrsrc, max_sz;
1920 	uint64_t		mtt_ddrbaseaddr;
1921 	uint_t			mtt_pgsize_bits, bind_type, reuse_dmahdl;
1922 	int			status;
1923 
1924 	ASSERT(MUTEX_HELD(&mr->mr_lock));
1925 
1926 	/*
1927 	 * Check the "options" flag.  Currently this flag tells the driver
1928 	 * whether or not the region should be bound normally (i.e. with
1929 	 * entries written into the PCI IOMMU) or whether it should be
1930 	 * registered to bypass the IOMMU.
1931 	 */
1932 	if (op == NULL) {
1933 		bind_type = TAVOR_BINDMEM_NORMAL;
1934 	} else {
1935 		bind_type = op->mro_bind_type;
1936 	}
1937 
1938 	/*
1939 	 * Check for invalid length.  Check is the length is zero or if the
1940 	 * length is larger than the maximum configured value.  Return error
1941 	 * if it is.
1942 	 */
1943 	max_sz = ((uint64_t)1 << state->ts_cfg_profile->cp_log_max_mrw_sz);
1944 	if ((bind->bi_len == 0) || (bind->bi_len > max_sz)) {
1945 		/*
1946 		 * Deregister will be called upon returning failure from this
1947 		 * routine. This will ensure that all current resources get
1948 		 * properly freed up. Unnecessary to attempt to regain
1949 		 * software ownership of the MPT entry as that has already
1950 		 * been done above (in tavor_mr_reregister())
1951 		 */
1952 		*dereg_level = TAVOR_MR_DEREG_NO_HW2SW_MPT;
1953 
1954 		goto mrrereghelp_fail;
1955 	}
1956 
1957 	/*
1958 	 * Determine the number of pages necessary for new region and the
1959 	 * number of pages supported by the current MTT resources
1960 	 */
1961 	nummtt_needed = tavor_mr_nummtt_needed(state, bind, &mtt_pgsize_bits);
1962 	nummtt_in_currrsrc = mr->mr_mttrsrcp->tr_len >> TAVOR_MTT_SIZE_SHIFT;
1963 
1964 	/*
1965 	 * Depending on whether we have enough pages or not, the next step is
1966 	 * to fill in a set of MTT entries that reflect the new mapping.  In
1967 	 * the first case below, we already have enough entries.  This means
1968 	 * we need to unbind the memory from the previous mapping, bind the
1969 	 * memory for the new mapping, write the new MTT entries, and update
1970 	 * the mr to reflect the changes.
1971 	 * In the second case below, we do not have enough entries in the
1972 	 * current mapping.  So, in this case, we need not only to unbind the
1973 	 * current mapping, but we need to free up the MTT resources associated
1974 	 * with that mapping.  After we've successfully done that, we continue
1975 	 * by binding the new memory, allocating new MTT entries, writing the
1976 	 * new MTT entries, and updating the mr to reflect the changes.
1977 	 */
1978 
1979 	/*
1980 	 * If this region is being shared (i.e. MTT refcount != 1), then we
1981 	 * can't reuse the current MTT resources regardless of their size.
1982 	 * Instead we'll need to alloc new ones (below) just as if there
1983 	 * hadn't been enough room in the current entries.
1984 	 */
1985 	swrc_old = (tavor_sw_refcnt_t *)mr->mr_mttrefcntp->tr_addr;
1986 	if (TAVOR_MTT_IS_NOT_SHARED(swrc_old) &&
1987 	    (nummtt_needed <= nummtt_in_currrsrc)) {
1988 
1989 		/*
1990 		 * Unbind the old mapping for this memory region, but retain
1991 		 * the ddi_dma_handle_t (if possible) for reuse in the bind
1992 		 * operation below.  Note:  If original memory region was
1993 		 * bound for IOMMU bypass and the new region can not use
1994 		 * bypass, then a new DMA handle will be necessary.
1995 		 */
1996 		if (TAVOR_MR_REUSE_DMAHDL(mr, bind->bi_flags)) {
1997 			mr->mr_bindinfo.bi_free_dmahdl = 0;
1998 			tavor_mr_mem_unbind(state, &mr->mr_bindinfo);
1999 			dmahdl = mr->mr_bindinfo.bi_dmahdl;
2000 			reuse_dmahdl = 1;
2001 		} else {
2002 			tavor_mr_mem_unbind(state, &mr->mr_bindinfo);
2003 			dmahdl = NULL;
2004 			reuse_dmahdl = 0;
2005 		}
2006 
2007 		/*
2008 		 * Bind the new memory and determine the mapped addresses.
2009 		 * As described, this routine and tavor_mr_fast_mtt_write()
2010 		 * do the majority of the work for the memory registration
2011 		 * operations.  Note:  When we successfully finish the binding,
2012 		 * we will set the "bi_free_dmahdl" flag to indicate that
2013 		 * even though we may have reused the ddi_dma_handle_t we do
2014 		 * wish it to be freed up at some later time.  Note also that
2015 		 * if we fail, we may need to cleanup the ddi_dma_handle_t.
2016 		 */
2017 		bind->bi_bypass	= bind_type;
2018 		status = tavor_mr_mem_bind(state, bind, dmahdl, sleep);
2019 		if (status != DDI_SUCCESS) {
2020 			if (reuse_dmahdl) {
2021 				ddi_dma_free_handle(&dmahdl);
2022 			}
2023 
2024 			/*
2025 			 * Deregister will be called upon returning failure
2026 			 * from this routine. This will ensure that all
2027 			 * current resources get properly freed up.
2028 			 * Unnecessary to attempt to regain software ownership
2029 			 * of the MPT entry as that has already been done
2030 			 * above (in tavor_mr_reregister()).  Also unnecessary
2031 			 * to attempt to unbind the memory.
2032 			 */
2033 			*dereg_level = TAVOR_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND;
2034 
2035 			goto mrrereghelp_fail;
2036 		}
2037 		if (reuse_dmahdl) {
2038 			bind->bi_free_dmahdl = 1;
2039 		}
2040 
2041 		/*
2042 		 * Using the new mapping, but reusing the current MTT
2043 		 * resources, write the updated entries to MTT
2044 		 */
2045 		mtt    = mr->mr_mttrsrcp;
2046 		status = tavor_mr_fast_mtt_write(mtt, bind, mtt_pgsize_bits);
2047 		if (status != DDI_SUCCESS) {
2048 			/*
2049 			 * Deregister will be called upon returning failure
2050 			 * from this routine. This will ensure that all
2051 			 * current resources get properly freed up.
2052 			 * Unnecessary to attempt to regain software ownership
2053 			 * of the MPT entry as that has already been done
2054 			 * above (in tavor_mr_reregister()).  Also unnecessary
2055 			 * to attempt to unbind the memory.
2056 			 *
2057 			 * But we do need to unbind the newly bound memory
2058 			 * before returning.
2059 			 */
2060 			tavor_mr_mem_unbind(state, bind);
2061 			*dereg_level = TAVOR_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND;
2062 
2063 			goto mrrereghelp_fail;
2064 		}
2065 
2066 		/* Put the updated information into the Mem Region handle */
2067 		mr->mr_bindinfo	  = *bind;
2068 		mr->mr_logmttpgsz = mtt_pgsize_bits;
2069 
2070 	} else {
2071 		/*
2072 		 * Check if the memory region MTT is shared by any other MRs.
2073 		 * Since the resource may be shared between multiple memory
2074 		 * regions (as a result of a "RegisterSharedMR()" verb) it is
2075 		 * important that we not unbind any resources prematurely.
2076 		 */
2077 		if (!TAVOR_MTT_IS_SHARED(swrc_old)) {
2078 			/*
2079 			 * Unbind the old mapping for this memory region, but
2080 			 * retain the ddi_dma_handle_t for reuse in the bind
2081 			 * operation below. Note: This can only be done here
2082 			 * because the region being reregistered is not
2083 			 * currently shared.  Also if original memory region
2084 			 * was bound for IOMMU bypass and the new region can
2085 			 * not use bypass, then a new DMA handle will be
2086 			 * necessary.
2087 			 */
2088 			if (TAVOR_MR_REUSE_DMAHDL(mr, bind->bi_flags)) {
2089 				mr->mr_bindinfo.bi_free_dmahdl = 0;
2090 				tavor_mr_mem_unbind(state, &mr->mr_bindinfo);
2091 				dmahdl = mr->mr_bindinfo.bi_dmahdl;
2092 				reuse_dmahdl = 1;
2093 			} else {
2094 				tavor_mr_mem_unbind(state, &mr->mr_bindinfo);
2095 				dmahdl = NULL;
2096 				reuse_dmahdl = 0;
2097 			}
2098 		} else {
2099 			dmahdl = NULL;
2100 			reuse_dmahdl = 0;
2101 		}
2102 
2103 		/*
2104 		 * Bind the new memory and determine the mapped addresses.
2105 		 * As described, this routine and tavor_mr_fast_mtt_write()
2106 		 * do the majority of the work for the memory registration
2107 		 * operations.  Note:  When we successfully finish the binding,
2108 		 * we will set the "bi_free_dmahdl" flag to indicate that
2109 		 * even though we may have reused the ddi_dma_handle_t we do
2110 		 * wish it to be freed up at some later time.  Note also that
2111 		 * if we fail, we may need to cleanup the ddi_dma_handle_t.
2112 		 */
2113 		bind->bi_bypass	= bind_type;
2114 		status = tavor_mr_mem_bind(state, bind, dmahdl, sleep);
2115 		if (status != DDI_SUCCESS) {
2116 			if (reuse_dmahdl) {
2117 				ddi_dma_free_handle(&dmahdl);
2118 			}
2119 
2120 			/*
2121 			 * Deregister will be called upon returning failure
2122 			 * from this routine. This will ensure that all
2123 			 * current resources get properly freed up.
2124 			 * Unnecessary to attempt to regain software ownership
2125 			 * of the MPT entry as that has already been done
2126 			 * above (in tavor_mr_reregister()).  Also unnecessary
2127 			 * to attempt to unbind the memory.
2128 			 */
2129 			*dereg_level = TAVOR_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND;
2130 
2131 			goto mrrereghelp_fail;
2132 		}
2133 		if (reuse_dmahdl) {
2134 			bind->bi_free_dmahdl = 1;
2135 		}
2136 
2137 		/*
2138 		 * Allocate the new MTT entries resource
2139 		 */
2140 		status = tavor_rsrc_alloc(state, TAVOR_MTT,
2141 		    TAVOR_NUMMTT_TO_MTTSEG(nummtt_needed), sleep, &mtt);
2142 		if (status != DDI_SUCCESS) {
2143 			/*
2144 			 * Deregister will be called upon returning failure
2145 			 * from this routine. This will ensure that all
2146 			 * current resources get properly freed up.
2147 			 * Unnecessary to attempt to regain software ownership
2148 			 * of the MPT entry as that has already been done
2149 			 * above (in tavor_mr_reregister()).  Also unnecessary
2150 			 * to attempt to unbind the memory.
2151 			 *
2152 			 * But we do need to unbind the newly bound memory
2153 			 * before returning.
2154 			 */
2155 			tavor_mr_mem_unbind(state, bind);
2156 			*dereg_level = TAVOR_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND;
2157 
2158 			goto mrrereghelp_fail;
2159 		}
2160 
2161 		/*
2162 		 * Allocate MTT reference count (to track shared memory
2163 		 * regions).  As mentioned elsewhere above, this reference
2164 		 * count resource may never be used on the given memory region,
2165 		 * but if it is ever later registered as a "shared" memory
2166 		 * region then this resource will be necessary.  Note:  This
2167 		 * is only necessary here if the existing memory region is
2168 		 * already being shared (because otherwise we already have
2169 		 * a useable reference count resource).
2170 		 */
2171 		if (TAVOR_MTT_IS_SHARED(swrc_old)) {
2172 			status = tavor_rsrc_alloc(state, TAVOR_REFCNT, 1,
2173 			    sleep, &mtt_refcnt);
2174 			if (status != DDI_SUCCESS) {
2175 				/*
2176 				 * Deregister will be called upon returning
2177 				 * failure from this routine. This will ensure
2178 				 * that all current resources get properly
2179 				 * freed up.  Unnecessary to attempt to regain
2180 				 * software ownership of the MPT entry as that
2181 				 * has already been done above (in
2182 				 * tavor_mr_reregister()).  Also unnecessary
2183 				 * to attempt to unbind the memory.
2184 				 *
2185 				 * But we need to unbind the newly bound
2186 				 * memory and free up the newly allocated MTT
2187 				 * entries before returning.
2188 				 */
2189 				tavor_mr_mem_unbind(state, bind);
2190 				tavor_rsrc_free(state, &mtt);
2191 				*dereg_level =
2192 				    TAVOR_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND;
2193 
2194 				goto mrrereghelp_fail;
2195 			}
2196 			swrc_new = (tavor_sw_refcnt_t *)mtt_refcnt->tr_addr;
2197 			_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*swrc_new))
2198 			TAVOR_MTT_REFCNT_INIT(swrc_new);
2199 		} else {
2200 			mtt_refcnt = mr->mr_mttrefcntp;
2201 		}
2202 
2203 		/*
2204 		 * Using the new mapping and the new MTT resources, write the
2205 		 * updated entries to MTT
2206 		 */
2207 		status = tavor_mr_fast_mtt_write(mtt, bind, mtt_pgsize_bits);
2208 		if (status != DDI_SUCCESS) {
2209 			/*
2210 			 * Deregister will be called upon returning failure
2211 			 * from this routine. This will ensure that all
2212 			 * current resources get properly freed up.
2213 			 * Unnecessary to attempt to regain software ownership
2214 			 * of the MPT entry as that has already been done
2215 			 * above (in tavor_mr_reregister()).  Also unnecessary
2216 			 * to attempt to unbind the memory.
2217 			 *
2218 			 * But we need to unbind the newly bound memory,
2219 			 * free up the newly allocated MTT entries, and
2220 			 * (possibly) free the new MTT reference count
2221 			 * resource before returning.
2222 			 */
2223 			if (TAVOR_MTT_IS_SHARED(swrc_old)) {
2224 				tavor_rsrc_free(state, &mtt_refcnt);
2225 			}
2226 			tavor_mr_mem_unbind(state, bind);
2227 			tavor_rsrc_free(state, &mtt);
2228 			*dereg_level = TAVOR_MR_DEREG_NO_HW2SW_MPT_OR_UNBIND;
2229 
2230 			goto mrrereghelp_fail;
2231 		}
2232 
2233 		/*
2234 		 * Check if the memory region MTT is shared by any other MRs.
2235 		 * Since the resource may be shared between multiple memory
2236 		 * regions (as a result of a "RegisterSharedMR()" verb) it is
2237 		 * important that we not free up any resources prematurely.
2238 		 */
2239 		if (TAVOR_MTT_IS_SHARED(swrc_old)) {
2240 			/* Decrement MTT reference count for "old" region */
2241 			(void) tavor_mtt_refcnt_dec(mr->mr_mttrefcntp);
2242 		} else {
2243 			/* Free up the old MTT entries resource */
2244 			tavor_rsrc_free(state, &mr->mr_mttrsrcp);
2245 		}
2246 
2247 		/* Put the updated information into the mrhdl */
2248 		mr->mr_bindinfo	  = *bind;
2249 		mr->mr_logmttpgsz = mtt_pgsize_bits;
2250 		mr->mr_mttrsrcp   = mtt;
2251 		mr->mr_mttrefcntp = mtt_refcnt;
2252 	}
2253 
2254 	/*
2255 	 * Calculate and return the updated MTT address (in the DDR address
2256 	 * space).  This will be used by the caller (tavor_mr_reregister) in
2257 	 * the updated MPT entry
2258 	 */
2259 	rsrc_pool	= &state->ts_rsrc_hdl[TAVOR_MTT];
2260 	mtt_ddrbaseaddr = (uint64_t)(uintptr_t)rsrc_pool->rsrc_ddr_offset;
2261 	*mtt_addr	= mtt_ddrbaseaddr + (mtt->tr_indx <<
2262 	    TAVOR_MTT_SIZE_SHIFT);
2263 
2264 	return (DDI_SUCCESS);
2265 
2266 mrrereghelp_fail:
2267 	return (status);
2268 }
2269 
2270 
2271 /*
2272  * tavor_mr_nummtt_needed()
2273  *    Context: Can be called from interrupt or base context.
2274  */
2275 /* ARGSUSED */
2276 static uint64_t
tavor_mr_nummtt_needed(tavor_state_t * state,tavor_bind_info_t * bind,uint_t * mtt_pgsize_bits)2277 tavor_mr_nummtt_needed(tavor_state_t *state, tavor_bind_info_t *bind,
2278     uint_t *mtt_pgsize_bits)
2279 {
2280 	uint64_t	pg_offset_mask;
2281 	uint64_t	pg_offset, tmp_length;
2282 
2283 	/*
2284 	 * For now we specify the page size as 8Kb (the default page size for
2285 	 * the sun4u architecture), or 4Kb for x86.  Figure out optimal page
2286 	 * size by examining the dmacookies XXX
2287 	 */
2288 	*mtt_pgsize_bits = PAGESHIFT;
2289 
2290 	pg_offset_mask = ((uint64_t)1 << *mtt_pgsize_bits) - 1;
2291 	pg_offset = bind->bi_addr & pg_offset_mask;
2292 	tmp_length = pg_offset + (bind->bi_len - 1);
2293 	return ((tmp_length >> *mtt_pgsize_bits) + 1);
2294 }
2295 
2296 
2297 /*
2298  * tavor_mr_mem_bind()
2299  *    Context: Can be called from interrupt or base context.
2300  */
2301 static int
tavor_mr_mem_bind(tavor_state_t * state,tavor_bind_info_t * bind,ddi_dma_handle_t dmahdl,uint_t sleep)2302 tavor_mr_mem_bind(tavor_state_t *state, tavor_bind_info_t *bind,
2303     ddi_dma_handle_t dmahdl, uint_t sleep)
2304 {
2305 	ddi_dma_attr_t	dma_attr;
2306 	int		(*callback)(caddr_t);
2307 	uint_t		dma_xfer_mode;
2308 	int		status;
2309 
2310 	/* bi_type must be set to a meaningful value to get a bind handle */
2311 	ASSERT(bind->bi_type == TAVOR_BINDHDL_VADDR ||
2312 	    bind->bi_type == TAVOR_BINDHDL_BUF ||
2313 	    bind->bi_type == TAVOR_BINDHDL_UBUF);
2314 
2315 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind))
2316 
2317 	/* Set the callback flag appropriately */
2318 	callback = (sleep == TAVOR_SLEEP) ? DDI_DMA_SLEEP : DDI_DMA_DONTWAIT;
2319 
2320 	/* Determine whether to map STREAMING or CONSISTENT */
2321 	dma_xfer_mode = (bind->bi_flags & IBT_MR_NONCOHERENT) ?
2322 	    DDI_DMA_STREAMING : DDI_DMA_CONSISTENT;
2323 
2324 	/*
2325 	 * Initialize many of the default DMA attributes.  Then, if we're
2326 	 * bypassing the IOMMU, set the DDI_DMA_FORCE_PHYSICAL flag.
2327 	 */
2328 	if (dmahdl == NULL) {
2329 		tavor_dma_attr_init(&dma_attr);
2330 #ifdef	__sparc
2331 		/*
2332 		 * First, disable streaming and switch to consistent if
2333 		 * configured to do so and IOMMU BYPASS is enabled.
2334 		 */
2335 		if (state->ts_cfg_profile->cp_disable_streaming_on_bypass &&
2336 		    dma_xfer_mode == DDI_DMA_STREAMING &&
2337 		    bind->bi_bypass == TAVOR_BINDMEM_BYPASS) {
2338 			dma_xfer_mode = DDI_DMA_CONSISTENT;
2339 		}
2340 
2341 		/*
2342 		 * Then, if streaming is still specified, then "bypass" is not
2343 		 * allowed.
2344 		 */
2345 		if ((dma_xfer_mode == DDI_DMA_CONSISTENT) &&
2346 		    (bind->bi_bypass == TAVOR_BINDMEM_BYPASS)) {
2347 			dma_attr.dma_attr_flags = DDI_DMA_FORCE_PHYSICAL;
2348 		}
2349 #endif
2350 		/* Allocate a DMA handle for the binding */
2351 		status = ddi_dma_alloc_handle(state->ts_dip, &dma_attr,
2352 		    callback, NULL, &bind->bi_dmahdl);
2353 		if (status != DDI_SUCCESS) {
2354 			return (status);
2355 		}
2356 		bind->bi_free_dmahdl = 1;
2357 
2358 	} else  {
2359 		bind->bi_dmahdl = dmahdl;
2360 		bind->bi_free_dmahdl = 0;
2361 	}
2362 
2363 	/*
2364 	 * Bind the memory to get the PCI mapped addresses.  The decision
2365 	 * to call ddi_dma_addr_bind_handle() or ddi_dma_buf_bind_handle()
2366 	 * is determined by the "bi_type" flag.  Note: if the bind operation
2367 	 * fails then we have to free up the DMA handle and return error.
2368 	 */
2369 	if (bind->bi_type == TAVOR_BINDHDL_VADDR) {
2370 		status = ddi_dma_addr_bind_handle(bind->bi_dmahdl, NULL,
2371 		    (caddr_t)(uintptr_t)bind->bi_addr, bind->bi_len,
2372 		    (DDI_DMA_RDWR | dma_xfer_mode), callback, NULL,
2373 		    &bind->bi_dmacookie, &bind->bi_cookiecnt);
2374 	} else { /* TAVOR_BINDHDL_BUF || TAVOR_BINDHDL_UBUF */
2375 		status = ddi_dma_buf_bind_handle(bind->bi_dmahdl,
2376 		    bind->bi_buf, (DDI_DMA_RDWR | dma_xfer_mode), callback,
2377 		    NULL, &bind->bi_dmacookie, &bind->bi_cookiecnt);
2378 	}
2379 
2380 	if (status != DDI_DMA_MAPPED) {
2381 		if (bind->bi_free_dmahdl != 0) {
2382 			ddi_dma_free_handle(&bind->bi_dmahdl);
2383 		}
2384 		return (status);
2385 	}
2386 
2387 	return (DDI_SUCCESS);
2388 }
2389 
2390 
2391 /*
2392  * tavor_mr_mem_unbind()
2393  *    Context: Can be called from interrupt or base context.
2394  */
2395 static void
tavor_mr_mem_unbind(tavor_state_t * state,tavor_bind_info_t * bind)2396 tavor_mr_mem_unbind(tavor_state_t *state, tavor_bind_info_t *bind)
2397 {
2398 	int	status;
2399 
2400 	/*
2401 	 * In case of TAVOR_BINDHDL_UBUF, the memory bi_buf points to
2402 	 * is actually allocated by ddi_umem_iosetup() internally, then
2403 	 * it's required to free it here. Reset bi_type to TAVOR_BINDHDL_NONE
2404 	 * not to free it again later.
2405 	 */
2406 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*bind))
2407 	if (bind->bi_type == TAVOR_BINDHDL_UBUF) {
2408 		freerbuf(bind->bi_buf);
2409 		bind->bi_type = TAVOR_BINDHDL_NONE;
2410 	}
2411 	_NOTE(NOW_VISIBLE_TO_OTHER_THREADS(*bind))
2412 
2413 	/*
2414 	 * Unbind the DMA memory for the region
2415 	 *
2416 	 * Note: The only way ddi_dma_unbind_handle() currently
2417 	 * can return an error is if the handle passed in is invalid.
2418 	 * Since this should never happen, we choose to return void
2419 	 * from this function!  If this does return an error, however,
2420 	 * then we print a warning message to the console.
2421 	 */
2422 	status = ddi_dma_unbind_handle(bind->bi_dmahdl);
2423 	if (status != DDI_SUCCESS) {
2424 		TAVOR_WARNING(state, "failed to unbind DMA mapping");
2425 		return;
2426 	}
2427 
2428 	/* Free up the DMA handle */
2429 	if (bind->bi_free_dmahdl != 0) {
2430 		ddi_dma_free_handle(&bind->bi_dmahdl);
2431 	}
2432 }
2433 
2434 
2435 /*
2436  * tavor_mr_fast_mtt_write()
2437  *    Context: Can be called from interrupt or base context.
2438  */
2439 static int
tavor_mr_fast_mtt_write(tavor_rsrc_t * mtt,tavor_bind_info_t * bind,uint32_t mtt_pgsize_bits)2440 tavor_mr_fast_mtt_write(tavor_rsrc_t *mtt, tavor_bind_info_t *bind,
2441     uint32_t mtt_pgsize_bits)
2442 {
2443 	ddi_dma_cookie_t	dmacookie;
2444 	uint_t			cookie_cnt;
2445 	uint64_t		*mtt_table;
2446 	uint64_t		mtt_entry;
2447 	uint64_t		addr, endaddr;
2448 	uint64_t		pagesize;
2449 	int			i;
2450 
2451 	/* Calculate page size from the suggested value passed in */
2452 	pagesize = ((uint64_t)1 << mtt_pgsize_bits);
2453 
2454 	/*
2455 	 * Walk the "cookie list" and fill in the MTT table entries
2456 	 */
2457 	i = 0;
2458 	mtt_table  = (uint64_t *)mtt->tr_addr;
2459 	dmacookie  = bind->bi_dmacookie;
2460 	cookie_cnt = bind->bi_cookiecnt;
2461 	while (cookie_cnt-- > 0) {
2462 		addr	= dmacookie.dmac_laddress;
2463 		endaddr = addr + (dmacookie.dmac_size - 1);
2464 		addr	= addr & ~((uint64_t)pagesize - 1);
2465 		while (addr <= endaddr) {
2466 			/*
2467 			 * Fill in the mapped addresses (calculated above) and
2468 			 * set TAVOR_MTT_ENTRY_PRESET flag for each MTT entry.
2469 			 */
2470 			mtt_entry = addr | TAVOR_MTT_ENTRY_PRESET;
2471 			ddi_put64(mtt->tr_acchdl, &mtt_table[i], mtt_entry);
2472 			addr += pagesize;
2473 			i++;
2474 
2475 			if (addr == 0) {
2476 				static int do_once = 1;
2477 				_NOTE(SCHEME_PROTECTS_DATA("safe sharing",
2478 				    do_once))
2479 				if (do_once) {
2480 					do_once = 0;
2481 					cmn_err(CE_NOTE, "probable error in "
2482 					    "dma_cookie address from caller\n");
2483 				}
2484 				break;
2485 			}
2486 		}
2487 
2488 		/*
2489 		 * When we've reached the end of the current DMA cookie,
2490 		 * jump to the next cookie (if there are more)
2491 		 */
2492 		if (cookie_cnt != 0) {
2493 			ddi_dma_nextcookie(bind->bi_dmahdl, &dmacookie);
2494 		}
2495 	}
2496 
2497 	return (DDI_SUCCESS);
2498 }
2499 
2500 /*
2501  * tavor_mtt_refcnt_inc()
2502  *    Context: Can be called from interrupt or base context.
2503  */
2504 static int
tavor_mtt_refcnt_inc(tavor_rsrc_t * rsrc)2505 tavor_mtt_refcnt_inc(tavor_rsrc_t *rsrc)
2506 {
2507 	tavor_sw_refcnt_t *rc;
2508 	uint32_t	  cnt;
2509 
2510 	rc = (tavor_sw_refcnt_t *)rsrc->tr_addr;
2511 
2512 	/* Increment the MTT's reference count */
2513 	mutex_enter(&rc->swrc_lock);
2514 	cnt = rc->swrc_refcnt++;
2515 	mutex_exit(&rc->swrc_lock);
2516 
2517 	return (cnt);
2518 }
2519 
2520 
2521 /*
2522  * tavor_mtt_refcnt_dec()
2523  *    Context: Can be called from interrupt or base context.
2524  */
2525 static int
tavor_mtt_refcnt_dec(tavor_rsrc_t * rsrc)2526 tavor_mtt_refcnt_dec(tavor_rsrc_t *rsrc)
2527 {
2528 	tavor_sw_refcnt_t *rc;
2529 	uint32_t	  cnt;
2530 
2531 	rc = (tavor_sw_refcnt_t *)rsrc->tr_addr;
2532 
2533 	/* Decrement the MTT's reference count */
2534 	mutex_enter(&rc->swrc_lock);
2535 	cnt = --rc->swrc_refcnt;
2536 	mutex_exit(&rc->swrc_lock);
2537 
2538 	return (cnt);
2539 }
2540