1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*
28  * tavor_umap.c
29  *    Tavor Userland Mapping Routines
30  *
31  *    Implements all the routines necessary for enabling direct userland
32  *    access to the Tavor hardware.  This includes all routines necessary for
33  *    maintaining the "userland resources database" and all the support routines
34  *    for the devmap calls.
35  */
36 
37 #include <sys/types.h>
38 #include <sys/conf.h>
39 #include <sys/ddi.h>
40 #include <sys/sunddi.h>
41 #include <sys/modctl.h>
42 #include <sys/file.h>
43 #include <sys/avl.h>
44 #include <sys/sysmacros.h>
45 
46 #include <sys/ib/adapters/tavor/tavor.h>
47 
48 /* Tavor HCA state pointer (extern) */
49 extern void *tavor_statep;
50 
51 /* Tavor HCA Userland Resource Database (extern) */
52 extern tavor_umap_db_t tavor_userland_rsrc_db;
53 
54 static int tavor_umap_uarpg(tavor_state_t *state, devmap_cookie_t dhp,
55     tavor_rsrc_t *rsrcp, size_t *maplen, int *err);
56 static int tavor_umap_cqmem(tavor_state_t *state, devmap_cookie_t dhp,
57     tavor_rsrc_t *rsrcp, offset_t off, size_t *maplen, int *err);
58 static int tavor_umap_qpmem(tavor_state_t *state, devmap_cookie_t dhp,
59     tavor_rsrc_t *rsrcp, offset_t off, size_t *maplen, int *err);
60 static int tavor_umap_srqmem(tavor_state_t *state, devmap_cookie_t dhp,
61     tavor_rsrc_t *rsrcp, offset_t off, size_t *maplen, int *err);
62 static int tavor_devmap_umem_map(devmap_cookie_t dhp, dev_t dev, uint_t flags,
63     offset_t off, size_t len, void **pvtp);
64 static int tavor_devmap_umem_dup(devmap_cookie_t dhp, void *pvtp,
65     devmap_cookie_t new_dhp, void **new_pvtp);
66 static void tavor_devmap_umem_unmap(devmap_cookie_t dhp, void *pvtp,
67     offset_t off, size_t len, devmap_cookie_t new_dhp1, void **pvtp1,
68     devmap_cookie_t new_dhp2, void **pvtp2);
69 static int tavor_devmap_devmem_map(devmap_cookie_t dhp, dev_t dev, uint_t flags,
70     offset_t off, size_t len, void **pvtp);
71 static int tavor_devmap_devmem_dup(devmap_cookie_t dhp, void *pvtp,
72     devmap_cookie_t new_dhp, void **new_pvtp);
73 static void tavor_devmap_devmem_unmap(devmap_cookie_t dhp, void *pvtp,
74     offset_t off, size_t len, devmap_cookie_t new_dhp1, void **pvtp1,
75     devmap_cookie_t new_dhp2, void **pvtp2);
76 static ibt_status_t tavor_umap_mr_data_in(tavor_mrhdl_t mr,
77     ibt_mr_data_in_t *data, size_t data_sz);
78 static ibt_status_t tavor_umap_cq_data_out(tavor_cqhdl_t cq,
79     mlnx_umap_cq_data_out_t *data, size_t data_sz);
80 static ibt_status_t tavor_umap_qp_data_out(tavor_qphdl_t qp,
81     mlnx_umap_qp_data_out_t *data, size_t data_sz);
82 static ibt_status_t tavor_umap_srq_data_out(tavor_srqhdl_t srq,
83     mlnx_umap_srq_data_out_t *data, size_t data_sz);
84 static int tavor_umap_db_compare(const void *query, const void *entry);
85 static ibt_status_t tavor_umap_pd_data_out(tavor_pdhdl_t pd,
86     mlnx_umap_pd_data_out_t *data, size_t data_sz);
87 
88 
89 /*
90  * These callbacks are passed to devmap_umem_setup() and devmap_devmem_setup(),
91  * respectively.  They are used to handle (among other things) partial
92  * unmappings and to provide a method for invalidating mappings inherited
93  * as a result of a fork(2) system call.
94  */
95 static struct devmap_callback_ctl tavor_devmap_umem_cbops = {
96 	DEVMAP_OPS_REV,
97 	tavor_devmap_umem_map,
98 	NULL,
99 	tavor_devmap_umem_dup,
100 	tavor_devmap_umem_unmap
101 };
102 static struct devmap_callback_ctl tavor_devmap_devmem_cbops = {
103 	DEVMAP_OPS_REV,
104 	tavor_devmap_devmem_map,
105 	NULL,
106 	tavor_devmap_devmem_dup,
107 	tavor_devmap_devmem_unmap
108 };
109 
110 /*
111  * tavor_devmap()
112  *    Context: Can be called from user context.
113  */
114 /* ARGSUSED */
115 int
tavor_devmap(dev_t dev,devmap_cookie_t dhp,offset_t off,size_t len,size_t * maplen,uint_t model)116 tavor_devmap(dev_t dev, devmap_cookie_t dhp, offset_t off, size_t len,
117     size_t *maplen, uint_t model)
118 {
119 	tavor_state_t	*state;
120 	tavor_rsrc_t 	*rsrcp;
121 	minor_t		instance;
122 	uint64_t	key, value;
123 	uint_t		type;
124 	int		err, status;
125 
126 	/* Get Tavor softstate structure from instance */
127 	instance = TAVOR_DEV_INSTANCE(dev);
128 	state = ddi_get_soft_state(tavor_statep, instance);
129 	if (state == NULL) {
130 		return (ENXIO);
131 	}
132 
133 	/*
134 	 * Access to Tavor devmap interface is not allowed in
135 	 * "maintenance mode".
136 	 */
137 	if (state->ts_operational_mode == TAVOR_MAINTENANCE_MODE) {
138 		return (EFAULT);
139 	}
140 
141 	/*
142 	 * The bottom bits of "offset" are undefined (number depends on
143 	 * system PAGESIZE).  Shifting these off leaves us with a "key".
144 	 * The "key" is actually a combination of both a real key value
145 	 * (for the purpose of database lookup) and a "type" value.  We
146 	 * extract this information before doing the database lookup.
147 	 */
148 	key  = off >> PAGESHIFT;
149 	type = key & MLNX_UMAP_RSRC_TYPE_MASK;
150 	key  = key >> MLNX_UMAP_RSRC_TYPE_SHIFT;
151 	status = tavor_umap_db_find(instance, key, type, &value, 0, NULL);
152 	if (status == DDI_SUCCESS) {
153 		rsrcp = (tavor_rsrc_t *)(uintptr_t)value;
154 
155 		switch (type) {
156 		case MLNX_UMAP_UARPG_RSRC:
157 			/*
158 			 * Double check that process who open()'d Tavor is
159 			 * same process attempting to mmap() UAR page.
160 			 */
161 			if (key != ddi_get_pid()) {
162 				return (EINVAL);
163 			}
164 
165 			/* Map the UAR page out for userland access */
166 			status = tavor_umap_uarpg(state, dhp, rsrcp, maplen,
167 			    &err);
168 			if (status != DDI_SUCCESS) {
169 				return (err);
170 			}
171 			break;
172 
173 		case MLNX_UMAP_CQMEM_RSRC:
174 			/* Map the CQ memory out for userland access */
175 			status = tavor_umap_cqmem(state, dhp, rsrcp, off,
176 			    maplen, &err);
177 			if (status != DDI_SUCCESS) {
178 				return (err);
179 			}
180 			break;
181 
182 		case MLNX_UMAP_QPMEM_RSRC:
183 			/* Map the QP memory out for userland access */
184 			status = tavor_umap_qpmem(state, dhp, rsrcp, off,
185 			    maplen, &err);
186 			if (status != DDI_SUCCESS) {
187 				return (err);
188 			}
189 			break;
190 
191 		case MLNX_UMAP_SRQMEM_RSRC:
192 			/* Map the SRQ memory out for userland access */
193 			status = tavor_umap_srqmem(state, dhp, rsrcp, off,
194 			    maplen, &err);
195 			if (status != DDI_SUCCESS) {
196 				return (err);
197 			}
198 			break;
199 
200 		default:
201 			TAVOR_WARNING(state, "unexpected rsrc type in devmap");
202 			return (EINVAL);
203 		}
204 	} else {
205 		return (EINVAL);
206 	}
207 
208 	return (0);
209 }
210 
211 
212 /*
213  * tavor_umap_uarpg()
214  *    Context: Can be called from user context.
215  */
216 static int
tavor_umap_uarpg(tavor_state_t * state,devmap_cookie_t dhp,tavor_rsrc_t * rsrcp,size_t * maplen,int * err)217 tavor_umap_uarpg(tavor_state_t *state, devmap_cookie_t dhp,
218     tavor_rsrc_t *rsrcp, size_t *maplen, int *err)
219 {
220 	int		status;
221 	uint_t		maxprot;
222 
223 	/* Map out the UAR page (doorbell page) */
224 	maxprot = (PROT_READ | PROT_WRITE | PROT_USER);
225 	status = devmap_devmem_setup(dhp, state->ts_dip,
226 	    &tavor_devmap_devmem_cbops, TAVOR_UAR_BAR, (rsrcp->tr_indx <<
227 	    PAGESHIFT), PAGESIZE, maxprot, DEVMAP_ALLOW_REMAP,
228 	    &state->ts_reg_accattr);
229 	if (status < 0) {
230 		*err = status;
231 		return (DDI_FAILURE);
232 	}
233 
234 	*maplen = PAGESIZE;
235 	return (DDI_SUCCESS);
236 }
237 
238 
239 /*
240  * tavor_umap_cqmem()
241  *    Context: Can be called from user context.
242  */
243 /* ARGSUSED */
244 static int
tavor_umap_cqmem(tavor_state_t * state,devmap_cookie_t dhp,tavor_rsrc_t * rsrcp,offset_t off,size_t * maplen,int * err)245 tavor_umap_cqmem(tavor_state_t *state, devmap_cookie_t dhp,
246     tavor_rsrc_t *rsrcp, offset_t off, size_t *maplen, int *err)
247 {
248 	tavor_cqhdl_t	cq;
249 	size_t		size;
250 	uint_t		maxprot;
251 	int		status;
252 
253 	/* Extract the Tavor CQ handle pointer from the tavor_rsrc_t */
254 	cq = (tavor_cqhdl_t)rsrcp->tr_addr;
255 
256 	/* Round-up the CQ size to system page size */
257 	size = ptob(btopr(cq->cq_cqinfo.qa_size));
258 
259 	/* Map out the CQ memory */
260 	maxprot = (PROT_READ | PROT_WRITE | PROT_USER);
261 	status = devmap_umem_setup(dhp, state->ts_dip,
262 	    &tavor_devmap_umem_cbops, cq->cq_cqinfo.qa_umemcookie, 0, size,
263 	    maxprot, (DEVMAP_ALLOW_REMAP | DEVMAP_DEFAULTS), NULL);
264 	if (status < 0) {
265 		*err = status;
266 		return (DDI_FAILURE);
267 	}
268 	*maplen = size;
269 
270 	return (DDI_SUCCESS);
271 }
272 
273 
274 /*
275  * tavor_umap_qpmem()
276  *    Context: Can be called from user context.
277  */
278 /* ARGSUSED */
279 static int
tavor_umap_qpmem(tavor_state_t * state,devmap_cookie_t dhp,tavor_rsrc_t * rsrcp,offset_t off,size_t * maplen,int * err)280 tavor_umap_qpmem(tavor_state_t *state, devmap_cookie_t dhp,
281     tavor_rsrc_t *rsrcp, offset_t off, size_t *maplen, int *err)
282 {
283 	tavor_qphdl_t	qp;
284 	offset_t	offset;
285 	size_t		size;
286 	uint_t		maxprot;
287 	int		status;
288 
289 	/* Extract the Tavor QP handle pointer from the tavor_rsrc_t */
290 	qp = (tavor_qphdl_t)rsrcp->tr_addr;
291 
292 	/*
293 	 * Calculate the offset of the first work queue (send or recv) into
294 	 * the memory (ddi_umem_alloc()) allocated previously for the QP.
295 	 */
296 	offset = (offset_t)((uintptr_t)qp->qp_wqinfo.qa_buf_aligned -
297 	    (uintptr_t)qp->qp_wqinfo.qa_buf_real);
298 
299 	/* Round-up the QP work queue sizes to system page size */
300 	size = ptob(btopr(qp->qp_wqinfo.qa_size));
301 
302 	/* Map out the QP memory */
303 	maxprot = (PROT_READ | PROT_WRITE | PROT_USER);
304 	status = devmap_umem_setup(dhp, state->ts_dip,
305 	    &tavor_devmap_umem_cbops, qp->qp_wqinfo.qa_umemcookie, offset,
306 	    size, maxprot, (DEVMAP_ALLOW_REMAP | DEVMAP_DEFAULTS), NULL);
307 	if (status < 0) {
308 		*err = status;
309 		return (DDI_FAILURE);
310 	}
311 	*maplen = size;
312 
313 	return (DDI_SUCCESS);
314 }
315 
316 
317 /*
318  * tavor_umap_srqmem()
319  *    Context: Can be called from user context.
320  */
321 /* ARGSUSED */
322 static int
tavor_umap_srqmem(tavor_state_t * state,devmap_cookie_t dhp,tavor_rsrc_t * rsrcp,offset_t off,size_t * maplen,int * err)323 tavor_umap_srqmem(tavor_state_t *state, devmap_cookie_t dhp,
324     tavor_rsrc_t *rsrcp, offset_t off, size_t *maplen, int *err)
325 {
326 	tavor_srqhdl_t	srq;
327 	offset_t	offset;
328 	size_t		size;
329 	uint_t		maxprot;
330 	int		status;
331 
332 	/* Extract the Tavor SRQ handle pointer from the tavor_rsrc_t */
333 	srq = (tavor_srqhdl_t)rsrcp->tr_addr;
334 
335 	/*
336 	 * Calculate the offset of the first shared recv queue into the memory
337 	 * (ddi_umem_alloc()) allocated previously for the SRQ.
338 	 */
339 	offset = (offset_t)((uintptr_t)srq->srq_wqinfo.qa_buf_aligned -
340 	    (uintptr_t)srq->srq_wqinfo.qa_buf_real);
341 
342 	/* Round-up the SRQ work queue sizes to system page size */
343 	size = ptob(btopr(srq->srq_wqinfo.qa_size));
344 
345 	/* Map out the QP memory */
346 	maxprot = (PROT_READ | PROT_WRITE | PROT_USER);
347 	status = devmap_umem_setup(dhp, state->ts_dip,
348 	    &tavor_devmap_umem_cbops, srq->srq_wqinfo.qa_umemcookie, offset,
349 	    size, maxprot, (DEVMAP_ALLOW_REMAP | DEVMAP_DEFAULTS), NULL);
350 	if (status < 0) {
351 		*err = status;
352 		return (DDI_FAILURE);
353 	}
354 	*maplen = size;
355 
356 	return (DDI_SUCCESS);
357 }
358 
359 
360 /*
361  * tavor_devmap_umem_map()
362  *    Context: Can be called from kernel context.
363  */
364 /* ARGSUSED */
365 static int
tavor_devmap_umem_map(devmap_cookie_t dhp,dev_t dev,uint_t flags,offset_t off,size_t len,void ** pvtp)366 tavor_devmap_umem_map(devmap_cookie_t dhp, dev_t dev, uint_t flags,
367     offset_t off, size_t len, void **pvtp)
368 {
369 	tavor_state_t		*state;
370 	tavor_devmap_track_t	*dvm_track;
371 	tavor_cqhdl_t		cq;
372 	tavor_qphdl_t		qp;
373 	tavor_srqhdl_t		srq;
374 	minor_t			instance;
375 	uint64_t		key;
376 	uint_t			type;
377 
378 	/* Get Tavor softstate structure from instance */
379 	instance = TAVOR_DEV_INSTANCE(dev);
380 	state = ddi_get_soft_state(tavor_statep, instance);
381 	if (state == NULL) {
382 		return (ENXIO);
383 	}
384 
385 	/*
386 	 * The bottom bits of "offset" are undefined (number depends on
387 	 * system PAGESIZE).  Shifting these off leaves us with a "key".
388 	 * The "key" is actually a combination of both a real key value
389 	 * (for the purpose of database lookup) and a "type" value.  Although
390 	 * we are not going to do any database lookup per se, we do want
391 	 * to extract the "key" and the "type" (to enable faster lookup of
392 	 * the appropriate CQ or QP handle).
393 	 */
394 	key  = off >> PAGESHIFT;
395 	type = key & MLNX_UMAP_RSRC_TYPE_MASK;
396 	key  = key >> MLNX_UMAP_RSRC_TYPE_SHIFT;
397 
398 	/*
399 	 * Allocate an entry to track the mapping and unmapping (specifically,
400 	 * partial unmapping) of this resource.
401 	 */
402 	dvm_track = (tavor_devmap_track_t *)kmem_zalloc(
403 	    sizeof (tavor_devmap_track_t), KM_SLEEP);
404 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*dvm_track))
405 	dvm_track->tdt_offset = off;
406 	dvm_track->tdt_state  = state;
407 	dvm_track->tdt_refcnt = 1;
408 	mutex_init(&dvm_track->tdt_lock, NULL, MUTEX_DRIVER,
409 	    DDI_INTR_PRI(state->ts_intrmsi_pri));
410 
411 	/*
412 	 * Depending of the type of resource that has been mapped out, we
413 	 * need to update the QP or CQ handle to reflect that it has, in
414 	 * fact, been mapped.  This allows the driver code which frees a QP
415 	 * or a CQ to know whether it is appropriate to do a
416 	 * devmap_devmem_remap() to invalidate the userland mapping for the
417 	 * corresponding queue's memory.
418 	 */
419 	if (type == MLNX_UMAP_CQMEM_RSRC) {
420 
421 		/* Use "key" (CQ number) to do fast lookup of CQ handle */
422 		cq = tavor_cqhdl_from_cqnum(state, key);
423 
424 		/*
425 		 * Update the handle to the userland mapping.  Note:  If
426 		 * the CQ already has a valid userland mapping, then stop
427 		 * and return failure.
428 		 */
429 		mutex_enter(&cq->cq_lock);
430 		if (cq->cq_umap_dhp == NULL) {
431 			cq->cq_umap_dhp = dhp;
432 			dvm_track->tdt_size = cq->cq_cqinfo.qa_size;
433 			mutex_exit(&cq->cq_lock);
434 		} else {
435 			mutex_exit(&cq->cq_lock);
436 			goto umem_map_fail;
437 		}
438 
439 	} else if (type == MLNX_UMAP_QPMEM_RSRC) {
440 
441 		/* Use "key" (QP number) to do fast lookup of QP handle */
442 		qp = tavor_qphdl_from_qpnum(state, key);
443 
444 		/*
445 		 * Update the handle to the userland mapping.  Note:  If
446 		 * the CQ already has a valid userland mapping, then stop
447 		 * and return failure.
448 		 */
449 		mutex_enter(&qp->qp_lock);
450 		if (qp->qp_umap_dhp == NULL) {
451 			qp->qp_umap_dhp = dhp;
452 			dvm_track->tdt_size = qp->qp_wqinfo.qa_size;
453 			mutex_exit(&qp->qp_lock);
454 		} else {
455 			mutex_exit(&qp->qp_lock);
456 			goto umem_map_fail;
457 		}
458 
459 	} else if (type == MLNX_UMAP_SRQMEM_RSRC) {
460 
461 		/* Use "key" (SRQ number) to do fast lookup on SRQ handle */
462 		srq = tavor_srqhdl_from_srqnum(state, key);
463 
464 		/*
465 		 * Update the handle to the userland mapping.  Note:  If the
466 		 * SRQ already has a valid userland mapping, then stop and
467 		 * return failure.
468 		 */
469 		mutex_enter(&srq->srq_lock);
470 		if (srq->srq_umap_dhp == NULL) {
471 			srq->srq_umap_dhp = dhp;
472 			dvm_track->tdt_size = srq->srq_wqinfo.qa_size;
473 			mutex_exit(&srq->srq_lock);
474 		} else {
475 			mutex_exit(&srq->srq_lock);
476 			goto umem_map_fail;
477 		}
478 	}
479 
480 	/*
481 	 * Pass the private "Tavor devmap tracking structure" back.  This
482 	 * pointer will be returned in subsequent "unmap" callbacks.
483 	 */
484 	*pvtp = dvm_track;
485 
486 	return (DDI_SUCCESS);
487 
488 umem_map_fail:
489 	mutex_destroy(&dvm_track->tdt_lock);
490 	kmem_free(dvm_track, sizeof (tavor_devmap_track_t));
491 	return (DDI_FAILURE);
492 }
493 
494 
495 /*
496  * tavor_devmap_umem_dup()
497  *    Context: Can be called from kernel context.
498  */
499 /* ARGSUSED */
500 static int
tavor_devmap_umem_dup(devmap_cookie_t dhp,void * pvtp,devmap_cookie_t new_dhp,void ** new_pvtp)501 tavor_devmap_umem_dup(devmap_cookie_t dhp, void *pvtp, devmap_cookie_t new_dhp,
502     void **new_pvtp)
503 {
504 	tavor_state_t		*state;
505 	tavor_devmap_track_t	*dvm_track, *new_dvm_track;
506 	uint_t			maxprot;
507 	int			status;
508 
509 	/*
510 	 * Extract the Tavor softstate pointer from "Tavor devmap tracking
511 	 * structure" (in "pvtp").
512 	 */
513 	dvm_track = (tavor_devmap_track_t *)pvtp;
514 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*dvm_track))
515 	state = dvm_track->tdt_state;
516 
517 	/*
518 	 * Since this devmap_dup() entry point is generally called
519 	 * when a process does fork(2), it is incumbent upon the driver
520 	 * to insure that the child does not inherit a valid copy of
521 	 * the parent's QP or CQ resource.  This is accomplished by using
522 	 * devmap_devmem_remap() to invalidate the child's mapping to the
523 	 * kernel memory.
524 	 */
525 	maxprot = (PROT_READ | PROT_WRITE | PROT_USER);
526 	status = devmap_devmem_remap(new_dhp, state->ts_dip, 0, 0,
527 	    dvm_track->tdt_size, maxprot, DEVMAP_MAPPING_INVALID, NULL);
528 	if (status != DDI_SUCCESS) {
529 		TAVOR_WARNING(state, "failed in tavor_devmap_umem_dup()");
530 		return (status);
531 	}
532 
533 	/*
534 	 * Allocate a new entry to track the subsequent unmapping
535 	 * (specifically, all partial unmappings) of the child's newly
536 	 * invalidated resource.  Note: Setting the "tdt_size" field to
537 	 * zero here is an indication to the devmap_unmap() entry point
538 	 * that this mapping is invalid, and that its subsequent unmapping
539 	 * should not affect any of the parent's CQ or QP resources.
540 	 */
541 	new_dvm_track = (tavor_devmap_track_t *)kmem_zalloc(
542 	    sizeof (tavor_devmap_track_t), KM_SLEEP);
543 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*new_dvm_track))
544 	new_dvm_track->tdt_offset = 0;
545 	new_dvm_track->tdt_state  = state;
546 	new_dvm_track->tdt_refcnt = 1;
547 	new_dvm_track->tdt_size	  = 0;
548 	mutex_init(&new_dvm_track->tdt_lock, NULL, MUTEX_DRIVER,
549 	    DDI_INTR_PRI(state->ts_intrmsi_pri));
550 	*new_pvtp = new_dvm_track;
551 
552 	return (DDI_SUCCESS);
553 }
554 
555 
556 /*
557  * tavor_devmap_umem_unmap()
558  *    Context: Can be called from kernel context.
559  */
560 /* ARGSUSED */
561 static void
tavor_devmap_umem_unmap(devmap_cookie_t dhp,void * pvtp,offset_t off,size_t len,devmap_cookie_t new_dhp1,void ** pvtp1,devmap_cookie_t new_dhp2,void ** pvtp2)562 tavor_devmap_umem_unmap(devmap_cookie_t dhp, void *pvtp, offset_t off,
563     size_t len, devmap_cookie_t new_dhp1, void **pvtp1,
564     devmap_cookie_t new_dhp2, void **pvtp2)
565 {
566 	tavor_state_t 		*state;
567 	tavor_rsrc_t 		*rsrcp;
568 	tavor_devmap_track_t	*dvm_track;
569 	tavor_cqhdl_t		cq;
570 	tavor_qphdl_t		qp;
571 	tavor_srqhdl_t		srq;
572 	uint64_t		key, value;
573 	uint_t			type;
574 	uint_t			size;
575 	int			status;
576 
577 	/*
578 	 * Extract the Tavor softstate pointer from "Tavor devmap tracking
579 	 * structure" (in "pvtp").
580 	 */
581 	dvm_track = (tavor_devmap_track_t *)pvtp;
582 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*dvm_track))
583 	state	  = dvm_track->tdt_state;
584 
585 	/*
586 	 * Extract the "offset" from the "Tavor devmap tracking structure".
587 	 * Note: The input argument "off" is ignored here because the
588 	 * Tavor mapping interfaces define a very specific meaning to
589 	 * each "logical offset".  Also extract the "key" and "type" encoded
590 	 * in the logical offset.
591 	 */
592 	key  = dvm_track->tdt_offset >> PAGESHIFT;
593 	type = key & MLNX_UMAP_RSRC_TYPE_MASK;
594 	key  = key >> MLNX_UMAP_RSRC_TYPE_SHIFT;
595 
596 	/*
597 	 * Extract the "size" of the mapping.  If this size is determined
598 	 * to be zero, then it is an indication of a previously invalidated
599 	 * mapping, and no CQ or QP resources should be affected.
600 	 */
601 	size = dvm_track->tdt_size;
602 
603 	/*
604 	 * If only the "middle portion of a given mapping is being unmapped,
605 	 * then we are effectively creating one new piece of mapped memory.
606 	 * (Original region is divided into three pieces of which the middle
607 	 * piece is being removed.  This leaves two pieces.  Since we started
608 	 * with one piece and now have two pieces, we need to increment the
609 	 * counter in the "Tavor devmap tracking structure".
610 	 *
611 	 * If, however, the whole mapped region is being unmapped, then we
612 	 * have started with one region which we are completely removing.
613 	 * In this case, we need to decrement the counter in the "Tavor
614 	 * devmap tracking structure".
615 	 *
616 	 * In each of the remaining cases, we will have started with one
617 	 * mapped region and ended with one (different) region.  So no counter
618 	 * modification is necessary.
619 	 */
620 	mutex_enter(&dvm_track->tdt_lock);
621 	if ((new_dhp1 == NULL) && (new_dhp2 == NULL)) {
622 		dvm_track->tdt_refcnt--;
623 	} else if ((new_dhp1 != NULL) && (new_dhp2 != NULL)) {
624 		dvm_track->tdt_refcnt++;
625 	}
626 	mutex_exit(&dvm_track->tdt_lock);
627 
628 	/*
629 	 * For each of the cases where the region is being divided, then we
630 	 * need to pass back the "Tavor devmap tracking structure".  This way
631 	 * we get it back when each of the remaining pieces is subsequently
632 	 * unmapped.
633 	 */
634 	if (new_dhp1 != NULL) {
635 		*pvtp1 = pvtp;
636 	}
637 	if (new_dhp2 != NULL) {
638 		*pvtp2 = pvtp;
639 	}
640 
641 	/*
642 	 * If the "Tavor devmap tracking structure" is no longer being
643 	 * referenced, then free it up.  Otherwise, return.
644 	 */
645 	if (dvm_track->tdt_refcnt == 0) {
646 		mutex_destroy(&dvm_track->tdt_lock);
647 		kmem_free(dvm_track, sizeof (tavor_devmap_track_t));
648 
649 		/*
650 		 * If the mapping was invalid (see explanation above), then
651 		 * no further processing is necessary.
652 		 */
653 		if (size == 0) {
654 			return;
655 		}
656 	} else {
657 		return;
658 	}
659 
660 	/*
661 	 * Now that we can guarantee that the user memory is fully unmapped,
662 	 * we can use the "key" and "type" values to try to find the entry
663 	 * in the "userland resources database".  If it's found, then it
664 	 * indicates that the queue memory (CQ or QP) has not yet been freed.
665 	 * In this case, we update the corresponding CQ or QP handle to
666 	 * indicate that the "devmap_devmem_remap()" call will be unnecessary.
667 	 * If it's _not_ found, then it indicates that the CQ or QP memory
668 	 * was, in fact, freed before it was unmapped (thus requiring a
669 	 * previous invalidation by remapping - which will already have
670 	 * been done in the free routine).
671 	 */
672 	status = tavor_umap_db_find(state->ts_instance, key, type, &value,
673 	    0, NULL);
674 	if (status == DDI_SUCCESS) {
675 		/*
676 		 * Depending on the type of the mapped resource (CQ or QP),
677 		 * update handle to indicate that no invalidation remapping
678 		 * will be necessary.
679 		 */
680 		if (type == MLNX_UMAP_CQMEM_RSRC) {
681 
682 			/* Use "value" to convert to CQ handle */
683 			rsrcp = (tavor_rsrc_t *)(uintptr_t)value;
684 			cq = (tavor_cqhdl_t)rsrcp->tr_addr;
685 
686 			/*
687 			 * Invalidate the handle to the userland mapping.
688 			 * Note: We must ensure that the mapping being
689 			 * unmapped here is the current one for the CQ.  It
690 			 * is possible that it might not be if this CQ has
691 			 * been resized and the previous CQ memory has not
692 			 * yet been unmapped.  But in that case, because of
693 			 * the devmap_devmem_remap(), there is no longer any
694 			 * association between the mapping and the real CQ
695 			 * kernel memory.
696 			 */
697 			mutex_enter(&cq->cq_lock);
698 			if (cq->cq_umap_dhp == dhp) {
699 				cq->cq_umap_dhp = (devmap_cookie_t)NULL;
700 			}
701 			mutex_exit(&cq->cq_lock);
702 
703 		} else if (type == MLNX_UMAP_QPMEM_RSRC) {
704 
705 			/* Use "value" to convert to QP handle */
706 			rsrcp = (tavor_rsrc_t *)(uintptr_t)value;
707 			qp = (tavor_qphdl_t)rsrcp->tr_addr;
708 
709 			/*
710 			 * Invalidate the handle to the userland mapping.
711 			 * Note: we ensure that the mapping being unmapped
712 			 * here is the current one for the QP.  This is
713 			 * more of a sanity check here since, unlike CQs
714 			 * (above) we do not support resize of QPs.
715 			 */
716 			mutex_enter(&qp->qp_lock);
717 			if (qp->qp_umap_dhp == dhp) {
718 				qp->qp_umap_dhp = (devmap_cookie_t)NULL;
719 			}
720 			mutex_exit(&qp->qp_lock);
721 
722 		} else if (type == MLNX_UMAP_SRQMEM_RSRC) {
723 
724 			/* Use "value" to convert to SRQ handle */
725 			rsrcp = (tavor_rsrc_t *)(uintptr_t)value;
726 			srq = (tavor_srqhdl_t)rsrcp->tr_addr;
727 
728 			/*
729 			 * Invalidate the handle to the userland mapping.
730 			 * Note: we ensure that the mapping being unmapped
731 			 * here is the current one for the QP.  This is
732 			 * more of a sanity check here since, unlike CQs
733 			 * (above) we do not support resize of QPs.
734 			 */
735 			mutex_enter(&srq->srq_lock);
736 			if (srq->srq_umap_dhp == dhp) {
737 				srq->srq_umap_dhp = (devmap_cookie_t)NULL;
738 			}
739 			mutex_exit(&srq->srq_lock);
740 		}
741 	}
742 }
743 
744 
745 /*
746  * tavor_devmap_devmem_map()
747  *    Context: Can be called from kernel context.
748  */
749 /* ARGSUSED */
750 static int
tavor_devmap_devmem_map(devmap_cookie_t dhp,dev_t dev,uint_t flags,offset_t off,size_t len,void ** pvtp)751 tavor_devmap_devmem_map(devmap_cookie_t dhp, dev_t dev, uint_t flags,
752     offset_t off, size_t len, void **pvtp)
753 {
754 	tavor_state_t		*state;
755 	tavor_devmap_track_t	*dvm_track;
756 	minor_t			instance;
757 
758 	/* Get Tavor softstate structure from instance */
759 	instance = TAVOR_DEV_INSTANCE(dev);
760 	state = ddi_get_soft_state(tavor_statep, instance);
761 	if (state == NULL) {
762 		return (ENXIO);
763 	}
764 
765 	/*
766 	 * Allocate an entry to track the mapping and unmapping of this
767 	 * resource.  Note:  We don't need to initialize the "refcnt" or
768 	 * "offset" fields here, nor do we need to initialize the mutex
769 	 * used with the "refcnt".  Since UAR pages are single pages, they
770 	 * are not subject to "partial" unmappings.  This makes these other
771 	 * fields unnecessary.
772 	 */
773 	dvm_track = (tavor_devmap_track_t *)kmem_zalloc(
774 	    sizeof (tavor_devmap_track_t), KM_SLEEP);
775 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*dvm_track))
776 	dvm_track->tdt_state  = state;
777 	dvm_track->tdt_size   = PAGESIZE;
778 
779 	/*
780 	 * Pass the private "Tavor devmap tracking structure" back.  This
781 	 * pointer will be returned in a subsequent "unmap" callback.
782 	 */
783 	*pvtp = dvm_track;
784 
785 	return (DDI_SUCCESS);
786 }
787 
788 
789 /*
790  * tavor_devmap_devmem_dup()
791  *    Context: Can be called from kernel context.
792  */
793 /* ARGSUSED */
794 static int
tavor_devmap_devmem_dup(devmap_cookie_t dhp,void * pvtp,devmap_cookie_t new_dhp,void ** new_pvtp)795 tavor_devmap_devmem_dup(devmap_cookie_t dhp, void *pvtp,
796     devmap_cookie_t new_dhp, void **new_pvtp)
797 {
798 	tavor_state_t		*state;
799 	tavor_devmap_track_t	*dvm_track;
800 	uint_t			maxprot;
801 	int			status;
802 
803 	/*
804 	 * Extract the Tavor softstate pointer from "Tavor devmap tracking
805 	 * structure" (in "pvtp").  Note: If the tracking structure is NULL
806 	 * here, it means that the mapping corresponds to an invalid mapping.
807 	 * In this case, it can be safely ignored ("new_pvtp" set to NULL).
808 	 */
809 	dvm_track = (tavor_devmap_track_t *)pvtp;
810 	if (dvm_track == NULL) {
811 		*new_pvtp = NULL;
812 		return (DDI_SUCCESS);
813 	}
814 
815 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*dvm_track))
816 	state = dvm_track->tdt_state;
817 
818 	/*
819 	 * Since this devmap_dup() entry point is generally called
820 	 * when a process does fork(2), it is incumbent upon the driver
821 	 * to insure that the child does not inherit a valid copy of
822 	 * the parent's resource.  This is accomplished by using
823 	 * devmap_devmem_remap() to invalidate the child's mapping to the
824 	 * kernel memory.
825 	 */
826 	maxprot = (PROT_READ | PROT_WRITE | PROT_USER);
827 	status = devmap_devmem_remap(new_dhp, state->ts_dip, 0, 0,
828 	    dvm_track->tdt_size, maxprot, DEVMAP_MAPPING_INVALID, NULL);
829 	if (status != DDI_SUCCESS) {
830 		TAVOR_WARNING(state, "failed in tavor_devmap_devmem_dup()");
831 		return (status);
832 	}
833 
834 	/*
835 	 * Since the region is invalid, there is no need for us to
836 	 * allocate and continue to track an additional "Tavor devmap
837 	 * tracking structure".  Instead we return NULL here, which is an
838 	 * indication to the devmap_unmap() entry point that this entry
839 	 * can be safely ignored.
840 	 */
841 	*new_pvtp = NULL;
842 
843 	return (DDI_SUCCESS);
844 }
845 
846 
847 /*
848  * tavor_devmap_devmem_unmap()
849  *    Context: Can be called from kernel context.
850  */
851 /* ARGSUSED */
852 static void
tavor_devmap_devmem_unmap(devmap_cookie_t dhp,void * pvtp,offset_t off,size_t len,devmap_cookie_t new_dhp1,void ** pvtp1,devmap_cookie_t new_dhp2,void ** pvtp2)853 tavor_devmap_devmem_unmap(devmap_cookie_t dhp, void *pvtp, offset_t off,
854     size_t len, devmap_cookie_t new_dhp1, void **pvtp1,
855     devmap_cookie_t new_dhp2, void **pvtp2)
856 {
857 	tavor_devmap_track_t	*dvm_track;
858 
859 	/*
860 	 * Free up the "Tavor devmap tracking structure" (in "pvtp").
861 	 * There cannot be "partial" unmappings here because all UAR pages
862 	 * are single pages.  Note: If the tracking structure is NULL here,
863 	 * it means that the mapping corresponds to an invalid mapping.  In
864 	 * this case, it can be safely ignored.
865 	 */
866 	dvm_track = (tavor_devmap_track_t *)pvtp;
867 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*dvm_track))
868 	if (dvm_track == NULL) {
869 		return;
870 	}
871 
872 	kmem_free(dvm_track, sizeof (tavor_devmap_track_t));
873 }
874 
875 
876 /*
877  * tavor_umap_ci_data_in()
878  *    Context: Can be called from user or kernel context.
879  */
880 /* ARGSUSED */
881 ibt_status_t
tavor_umap_ci_data_in(tavor_state_t * state,ibt_ci_data_flags_t flags,ibt_object_type_t object,void * hdl,void * data_p,size_t data_sz)882 tavor_umap_ci_data_in(tavor_state_t *state, ibt_ci_data_flags_t flags,
883     ibt_object_type_t object, void *hdl, void *data_p, size_t data_sz)
884 {
885 	int	status;
886 
887 	/*
888 	 * Depending on the type of object about which additional information
889 	 * is being provided (currently only MR is supported), we call the
890 	 * appropriate resource-specific function.
891 	 */
892 	switch (object) {
893 	case IBT_HDL_MR:
894 		status = tavor_umap_mr_data_in((tavor_mrhdl_t)hdl,
895 		    (ibt_mr_data_in_t *)data_p, data_sz);
896 		if (status != DDI_SUCCESS) {
897 			return (status);
898 		}
899 		break;
900 
901 	/*
902 	 * For other possible valid IBT types, we return IBT_NOT_SUPPORTED,
903 	 * since the Tavor driver does not support these.
904 	 */
905 	case IBT_HDL_HCA:
906 	case IBT_HDL_QP:
907 	case IBT_HDL_CQ:
908 	case IBT_HDL_PD:
909 	case IBT_HDL_MW:
910 	case IBT_HDL_AH:
911 	case IBT_HDL_SCHED:
912 	case IBT_HDL_EEC:
913 	case IBT_HDL_RDD:
914 	case IBT_HDL_SRQ:
915 		return (IBT_NOT_SUPPORTED);
916 
917 	/*
918 	 * Any other types are invalid.
919 	 */
920 	default:
921 		return (IBT_INVALID_PARAM);
922 	}
923 
924 	return (DDI_SUCCESS);
925 }
926 
927 
928 /*
929  * tavor_umap_mr_data_in()
930  *    Context: Can be called from user or kernel context.
931  */
932 static ibt_status_t
tavor_umap_mr_data_in(tavor_mrhdl_t mr,ibt_mr_data_in_t * data,size_t data_sz)933 tavor_umap_mr_data_in(tavor_mrhdl_t mr, ibt_mr_data_in_t *data,
934     size_t data_sz)
935 {
936 	if (data->mr_rev != IBT_MR_DATA_IN_IF_VERSION) {
937 		return (IBT_NOT_SUPPORTED);
938 	}
939 
940 	/* Check for valid MR handle pointer */
941 	if (mr == NULL) {
942 		return (IBT_MR_HDL_INVALID);
943 	}
944 
945 	/* Check for valid MR input structure size */
946 	if (data_sz < sizeof (ibt_mr_data_in_t)) {
947 		return (IBT_INSUFF_RESOURCE);
948 	}
949 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*data))
950 
951 	/*
952 	 * Ensure that the MR corresponds to userland memory and that it is
953 	 * a currently valid memory region as well.
954 	 */
955 	mutex_enter(&mr->mr_lock);
956 	if ((mr->mr_is_umem == 0) || (mr->mr_umemcookie == NULL)) {
957 		mutex_exit(&mr->mr_lock);
958 		return (IBT_MR_HDL_INVALID);
959 	}
960 
961 	/*
962 	 * If it has passed all the above checks, then extract the callback
963 	 * function and argument from the input structure.  Copy them into
964 	 * the MR handle.  This function will be called only if the memory
965 	 * corresponding to the MR handle gets a umem_lockmemory() callback.
966 	 */
967 	mr->mr_umem_cbfunc = data->mr_func;
968 	mr->mr_umem_cbarg1 = data->mr_arg1;
969 	mr->mr_umem_cbarg2 = data->mr_arg2;
970 	mutex_exit(&mr->mr_lock);
971 
972 	return (DDI_SUCCESS);
973 }
974 
975 
976 /*
977  * tavor_umap_ci_data_out()
978  *    Context: Can be called from user or kernel context.
979  */
980 /* ARGSUSED */
981 ibt_status_t
tavor_umap_ci_data_out(tavor_state_t * state,ibt_ci_data_flags_t flags,ibt_object_type_t object,void * hdl,void * data_p,size_t data_sz)982 tavor_umap_ci_data_out(tavor_state_t *state, ibt_ci_data_flags_t flags,
983     ibt_object_type_t object, void *hdl, void *data_p, size_t data_sz)
984 {
985 	int	status;
986 
987 	/*
988 	 * Depending on the type of object about which additional information
989 	 * is being requested (CQ or QP), we call the appropriate resource-
990 	 * specific mapping function.
991 	 */
992 	switch (object) {
993 	case IBT_HDL_CQ:
994 		status = tavor_umap_cq_data_out((tavor_cqhdl_t)hdl,
995 		    (mlnx_umap_cq_data_out_t *)data_p, data_sz);
996 		if (status != DDI_SUCCESS) {
997 			return (status);
998 		}
999 		break;
1000 
1001 	case IBT_HDL_QP:
1002 		status = tavor_umap_qp_data_out((tavor_qphdl_t)hdl,
1003 		    (mlnx_umap_qp_data_out_t *)data_p, data_sz);
1004 		if (status != DDI_SUCCESS) {
1005 			return (status);
1006 		}
1007 		break;
1008 
1009 	case IBT_HDL_SRQ:
1010 		status = tavor_umap_srq_data_out((tavor_srqhdl_t)hdl,
1011 		    (mlnx_umap_srq_data_out_t *)data_p, data_sz);
1012 		if (status != DDI_SUCCESS) {
1013 			return (status);
1014 		}
1015 		break;
1016 
1017 	/*
1018 	 * For other possible valid IBT types, we return IBT_NOT_SUPPORTED,
1019 	 * since the Tavor driver does not support these.
1020 	 */
1021 	case IBT_HDL_PD:
1022 		status = tavor_umap_pd_data_out((tavor_pdhdl_t)hdl,
1023 		    (mlnx_umap_pd_data_out_t *)data_p, data_sz);
1024 		if (status != DDI_SUCCESS) {
1025 			return (status);
1026 		}
1027 		break;
1028 
1029 	case IBT_HDL_HCA:
1030 	case IBT_HDL_MR:
1031 	case IBT_HDL_MW:
1032 	case IBT_HDL_AH:
1033 	case IBT_HDL_SCHED:
1034 	case IBT_HDL_EEC:
1035 	case IBT_HDL_RDD:
1036 		return (IBT_NOT_SUPPORTED);
1037 
1038 	/*
1039 	 * Any other types are invalid.
1040 	 */
1041 	default:
1042 		return (IBT_INVALID_PARAM);
1043 	}
1044 
1045 	return (DDI_SUCCESS);
1046 }
1047 
1048 
1049 /*
1050  * tavor_umap_cq_data_out()
1051  *    Context: Can be called from user or kernel context.
1052  */
1053 static ibt_status_t
tavor_umap_cq_data_out(tavor_cqhdl_t cq,mlnx_umap_cq_data_out_t * data,size_t data_sz)1054 tavor_umap_cq_data_out(tavor_cqhdl_t cq, mlnx_umap_cq_data_out_t *data,
1055     size_t data_sz)
1056 {
1057 	/* Check for valid CQ handle pointer */
1058 	if (cq == NULL) {
1059 		return (IBT_CQ_HDL_INVALID);
1060 	}
1061 
1062 	/* Check for valid CQ mapping structure size */
1063 	if (data_sz < sizeof (mlnx_umap_cq_data_out_t)) {
1064 		return (IBT_INSUFF_RESOURCE);
1065 	}
1066 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*data))
1067 
1068 	/*
1069 	 * If it has passed all the above checks, then fill in all the useful
1070 	 * mapping information (including the mapping offset that will be
1071 	 * passed back to the devmap() interface during a subsequent mmap()
1072 	 * call.
1073 	 *
1074 	 * The "offset" for CQ mmap()'s looks like this:
1075 	 * +----------------------------------------+--------+--------------+
1076 	 * |		   CQ Number		    |  0x33  | Reserved (0) |
1077 	 * +----------------------------------------+--------+--------------+
1078 	 *	   (64 - 8 - PAGESHIFT) bits	    8 bits	PAGESHIFT bits
1079 	 *
1080 	 * This returns information about the mapping offset, the length of
1081 	 * the CQ memory, the CQ number (for use in later CQ doorbells), the
1082 	 * number of CQEs the CQ memory can hold, and the size of each CQE.
1083 	 */
1084 	data->mcq_rev		= MLNX_UMAP_IF_VERSION;
1085 	data->mcq_mapoffset	= ((((uint64_t)cq->cq_cqnum <<
1086 	    MLNX_UMAP_RSRC_TYPE_SHIFT) | MLNX_UMAP_CQMEM_RSRC) << PAGESHIFT);
1087 	data->mcq_maplen	= cq->cq_cqinfo.qa_size;
1088 	data->mcq_cqnum		= cq->cq_cqnum;
1089 	data->mcq_numcqe	= cq->cq_bufsz;
1090 	data->mcq_cqesz		= sizeof (tavor_hw_cqe_t);
1091 
1092 	return (DDI_SUCCESS);
1093 }
1094 
1095 
1096 /*
1097  * tavor_umap_qp_data_out()
1098  *    Context: Can be called from user or kernel context.
1099  */
1100 static ibt_status_t
tavor_umap_qp_data_out(tavor_qphdl_t qp,mlnx_umap_qp_data_out_t * data,size_t data_sz)1101 tavor_umap_qp_data_out(tavor_qphdl_t qp, mlnx_umap_qp_data_out_t *data,
1102     size_t data_sz)
1103 {
1104 	/* Check for valid QP handle pointer */
1105 	if (qp == NULL) {
1106 		return (IBT_QP_HDL_INVALID);
1107 	}
1108 
1109 	/* Check for valid QP mapping structure size */
1110 	if (data_sz < sizeof (mlnx_umap_qp_data_out_t)) {
1111 		return (IBT_INSUFF_RESOURCE);
1112 	}
1113 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*data))
1114 
1115 	/*
1116 	 * If it has passed all the checks, then fill in all the useful
1117 	 * mapping information (including the mapping offset that will be
1118 	 * passed back to the devmap() interface during a subsequent mmap()
1119 	 * call.
1120 	 *
1121 	 * The "offset" for QP mmap()'s looks like this:
1122 	 * +----------------------------------------+--------+--------------+
1123 	 * |		   QP Number		    |  0x44  | Reserved (0) |
1124 	 * +----------------------------------------+--------+--------------+
1125 	 *	   (64 - 8 - PAGESHIFT) bits	    8 bits	PAGESHIFT bits
1126 	 *
1127 	 * This returns information about the mapping offset, the length of
1128 	 * the QP memory, and the QP number (for use in later send and recv
1129 	 * doorbells).  It also returns the following information for both
1130 	 * the receive work queue and the send work queue, respectively:  the
1131 	 * offset (from the base mapped address) of the start of the given
1132 	 * work queue, the 64-bit IB virtual address that corresponds to
1133 	 * the base mapped address (needed for posting WQEs though the
1134 	 * QP doorbells), the number of WQEs the given work queue can hold,
1135 	 * and the size of each WQE for the given work queue.
1136 	 */
1137 	data->mqp_rev		= MLNX_UMAP_IF_VERSION;
1138 	data->mqp_mapoffset	= ((((uint64_t)qp->qp_qpnum <<
1139 	    MLNX_UMAP_RSRC_TYPE_SHIFT) | MLNX_UMAP_QPMEM_RSRC) << PAGESHIFT);
1140 	data->mqp_maplen	= qp->qp_wqinfo.qa_size;
1141 	data->mqp_qpnum		= qp->qp_qpnum;
1142 
1143 	/*
1144 	 * If this QP is associated with a shared receive queue (SRQ),
1145 	 * then return invalid RecvQ parameters.  Otherwise, return
1146 	 * the proper parameter values.
1147 	 */
1148 	if (qp->qp_srq_en == TAVOR_QP_SRQ_ENABLED) {
1149 		data->mqp_rq_off	= (uint32_t)qp->qp_wqinfo.qa_size;
1150 		data->mqp_rq_desc_addr	= (uint32_t)qp->qp_wqinfo.qa_size;
1151 		data->mqp_rq_numwqe	= 0;
1152 		data->mqp_rq_wqesz	= 0;
1153 	} else {
1154 		data->mqp_rq_off	= (uintptr_t)qp->qp_rq_buf -
1155 		    (uintptr_t)qp->qp_wqinfo.qa_buf_aligned;
1156 		data->mqp_rq_desc_addr	= (uint32_t)((uintptr_t)qp->qp_rq_buf -
1157 		    qp->qp_desc_off);
1158 		data->mqp_rq_numwqe	= qp->qp_rq_bufsz;
1159 		data->mqp_rq_wqesz	= (1 << qp->qp_rq_log_wqesz);
1160 	}
1161 	data->mqp_sq_off	= (uintptr_t)qp->qp_sq_buf -
1162 	    (uintptr_t)qp->qp_wqinfo.qa_buf_aligned;
1163 	data->mqp_sq_desc_addr	= (uint32_t)((uintptr_t)qp->qp_sq_buf -
1164 	    qp->qp_desc_off);
1165 	data->mqp_sq_numwqe	= qp->qp_sq_bufsz;
1166 	data->mqp_sq_wqesz	= (1 << qp->qp_sq_log_wqesz);
1167 
1168 	return (DDI_SUCCESS);
1169 }
1170 
1171 
1172 /*
1173  * tavor_umap_srq_data_out()
1174  *    Context: Can be called from user or kernel context.
1175  */
1176 static ibt_status_t
tavor_umap_srq_data_out(tavor_srqhdl_t srq,mlnx_umap_srq_data_out_t * data,size_t data_sz)1177 tavor_umap_srq_data_out(tavor_srqhdl_t srq, mlnx_umap_srq_data_out_t *data,
1178     size_t data_sz)
1179 {
1180 	/* Check for valid SRQ handle pointer */
1181 	if (srq == NULL) {
1182 		return (IBT_SRQ_HDL_INVALID);
1183 	}
1184 
1185 	/* Check for valid SRQ mapping structure size */
1186 	if (data_sz < sizeof (mlnx_umap_srq_data_out_t)) {
1187 		return (IBT_INSUFF_RESOURCE);
1188 	}
1189 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*data))
1190 
1191 	/*
1192 	 * If it has passed all the checks, then fill in all the useful
1193 	 * mapping information (including the mapping offset that will be
1194 	 * passed back to the devmap() interface during a subsequent mmap()
1195 	 * call.
1196 	 *
1197 	 * The "offset" for SRQ mmap()'s looks like this:
1198 	 * +----------------------------------------+--------+--------------+
1199 	 * |		   SRQ Number		    |  0x66  | Reserved (0) |
1200 	 * +----------------------------------------+--------+--------------+
1201 	 *	   (64 - 8 - PAGESHIFT) bits	    8 bits	PAGESHIFT bits
1202 	 *
1203 	 * This returns information about the mapping offset, the length of the
1204 	 * SRQ memory, and the SRQ number (for use in later send and recv
1205 	 * doorbells).  It also returns the following information for the
1206 	 * shared receive queue: the offset (from the base mapped address) of
1207 	 * the start of the given work queue, the 64-bit IB virtual address
1208 	 * that corresponds to the base mapped address (needed for posting WQEs
1209 	 * though the QP doorbells), the number of WQEs the given work queue
1210 	 * can hold, and the size of each WQE for the given work queue.
1211 	 */
1212 	data->msrq_rev		= MLNX_UMAP_IF_VERSION;
1213 	data->msrq_mapoffset	= ((((uint64_t)srq->srq_srqnum <<
1214 	    MLNX_UMAP_RSRC_TYPE_SHIFT) | MLNX_UMAP_SRQMEM_RSRC) << PAGESHIFT);
1215 	data->msrq_maplen	= srq->srq_wqinfo.qa_size;
1216 	data->msrq_srqnum	= srq->srq_srqnum;
1217 
1218 	data->msrq_desc_addr	= (uint32_t)((uintptr_t)srq->srq_wq_buf -
1219 	    srq->srq_desc_off);
1220 	data->msrq_numwqe	= srq->srq_wq_bufsz;
1221 	data->msrq_wqesz	= (1 << srq->srq_wq_log_wqesz);
1222 
1223 	return (DDI_SUCCESS);
1224 }
1225 
1226 /*
1227  * tavor_umap_pd_data_out()
1228  *    Context: Can be called from user or kernel context.
1229  */
1230 static ibt_status_t
tavor_umap_pd_data_out(tavor_pdhdl_t pd,mlnx_umap_pd_data_out_t * data,size_t data_sz)1231 tavor_umap_pd_data_out(tavor_pdhdl_t pd, mlnx_umap_pd_data_out_t *data,
1232     size_t data_sz)
1233 {
1234 	/* Check for valid PD handle pointer */
1235 	if (pd == NULL) {
1236 		return (IBT_PD_HDL_INVALID);
1237 	}
1238 
1239 	/* Check for valid PD mapping structure size */
1240 	if (data_sz < sizeof (mlnx_umap_pd_data_out_t)) {
1241 		return (IBT_INSUFF_RESOURCE);
1242 	}
1243 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*data))
1244 
1245 	/*
1246 	 * If it has passed all the checks, then fill the PD table index
1247 	 * (the PD table allocated index for the PD pd_pdnum)
1248 	 */
1249 	data->mpd_rev	= MLNX_UMAP_IF_VERSION;
1250 	data->mpd_pdnum	= pd->pd_pdnum;
1251 
1252 	return (DDI_SUCCESS);
1253 }
1254 
1255 /*
1256  * tavor_umap_db_init()
1257  *    Context: Only called from attach() path context
1258  */
1259 void
tavor_umap_db_init(void)1260 tavor_umap_db_init(void)
1261 {
1262 	/*
1263 	 * Initialize the lock used by the Tavor "userland resources database"
1264 	 * This is used to ensure atomic access to add, remove, and find
1265 	 * entries in the database.
1266 	 */
1267 	mutex_init(&tavor_userland_rsrc_db.tdl_umapdb_lock, NULL,
1268 	    MUTEX_DRIVER, NULL);
1269 
1270 	/*
1271 	 * Initialize the AVL tree used for the "userland resources
1272 	 * database".  Using an AVL tree here provides the ability to
1273 	 * scale the database size to large numbers of resources.  The
1274 	 * entries in the tree are "tavor_umap_db_entry_t".
1275 	 * The tree is searched with the help of the
1276 	 * tavor_umap_db_compare() routine.
1277 	 */
1278 	avl_create(&tavor_userland_rsrc_db.tdl_umapdb_avl,
1279 	    tavor_umap_db_compare, sizeof (tavor_umap_db_entry_t),
1280 	    offsetof(tavor_umap_db_entry_t, tdbe_avlnode));
1281 }
1282 
1283 
1284 /*
1285  * tavor_umap_db_fini()
1286  *    Context: Only called from attach() and/or detach() path contexts
1287  */
1288 void
tavor_umap_db_fini(void)1289 tavor_umap_db_fini(void)
1290 {
1291 	/* Destroy the AVL tree for the "userland resources database" */
1292 	avl_destroy(&tavor_userland_rsrc_db.tdl_umapdb_avl);
1293 
1294 	/* Destroy the lock for the "userland resources database" */
1295 	mutex_destroy(&tavor_userland_rsrc_db.tdl_umapdb_lock);
1296 }
1297 
1298 
1299 /*
1300  * tavor_umap_db_alloc()
1301  *    Context: Can be called from user or kernel context.
1302  */
1303 tavor_umap_db_entry_t *
tavor_umap_db_alloc(uint_t instance,uint64_t key,uint_t type,uint64_t value)1304 tavor_umap_db_alloc(uint_t instance, uint64_t key, uint_t type, uint64_t value)
1305 {
1306 	tavor_umap_db_entry_t	*umapdb;
1307 
1308 	/* Allocate an entry to add to the "userland resources database" */
1309 	umapdb = kmem_zalloc(sizeof (tavor_umap_db_entry_t), KM_NOSLEEP);
1310 	if (umapdb == NULL) {
1311 		return (NULL);
1312 	}
1313 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*umapdb))
1314 
1315 	/* Fill in the fields in the database entry */
1316 	umapdb->tdbe_common.tdb_instance  = instance;
1317 	umapdb->tdbe_common.tdb_type	  = type;
1318 	umapdb->tdbe_common.tdb_key	  = key;
1319 	umapdb->tdbe_common.tdb_value	  = value;
1320 
1321 	return (umapdb);
1322 }
1323 
1324 
1325 /*
1326  * tavor_umap_db_free()
1327  *    Context: Can be called from user or kernel context.
1328  */
1329 void
tavor_umap_db_free(tavor_umap_db_entry_t * umapdb)1330 tavor_umap_db_free(tavor_umap_db_entry_t *umapdb)
1331 {
1332 	/* Free the database entry */
1333 	kmem_free(umapdb, sizeof (tavor_umap_db_entry_t));
1334 }
1335 
1336 
1337 /*
1338  * tavor_umap_db_add()
1339  *    Context: Can be called from user or kernel context.
1340  */
1341 void
tavor_umap_db_add(tavor_umap_db_entry_t * umapdb)1342 tavor_umap_db_add(tavor_umap_db_entry_t *umapdb)
1343 {
1344 	mutex_enter(&tavor_userland_rsrc_db.tdl_umapdb_lock);
1345 	tavor_umap_db_add_nolock(umapdb);
1346 	mutex_exit(&tavor_userland_rsrc_db.tdl_umapdb_lock);
1347 }
1348 
1349 
1350 /*
1351  * tavor_umap_db_add_nolock()
1352  *    Context: Can be called from user or kernel context.
1353  */
1354 void
tavor_umap_db_add_nolock(tavor_umap_db_entry_t * umapdb)1355 tavor_umap_db_add_nolock(tavor_umap_db_entry_t *umapdb)
1356 {
1357 	tavor_umap_db_query_t	query;
1358 	avl_index_t		where;
1359 
1360 	ASSERT(MUTEX_HELD(&tavor_userland_rsrc_db.tdl_umapdb_lock));
1361 
1362 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*umapdb))
1363 
1364 	/*
1365 	 * Copy the common portion of the "to-be-added" database entry
1366 	 * into the "tavor_umap_db_query_t" structure.  We use this structure
1367 	 * (with no flags set) to find the appropriate location in the
1368 	 * "userland resources database" for the new entry to be added.
1369 	 *
1370 	 * Note: we expect that this entry should not be found in the
1371 	 * database (unless something bad has happened).
1372 	 */
1373 	query.tqdb_common = umapdb->tdbe_common;
1374 	query.tqdb_flags  = 0;
1375 	(void) avl_find(&tavor_userland_rsrc_db.tdl_umapdb_avl, &query,
1376 	    &where);
1377 
1378 	/*
1379 	 * Now, using the "where" field from the avl_find() operation
1380 	 * above, we will insert the new database entry ("umapdb").
1381 	 */
1382 	avl_insert(&tavor_userland_rsrc_db.tdl_umapdb_avl, umapdb,
1383 	    where);
1384 }
1385 
1386 
1387 /*
1388  * tavor_umap_db_find()
1389  *    Context: Can be called from user or kernel context.
1390  */
1391 int
tavor_umap_db_find(uint_t instance,uint64_t key,uint_t type,uint64_t * value,uint_t flag,tavor_umap_db_entry_t ** umapdb)1392 tavor_umap_db_find(uint_t instance, uint64_t key, uint_t type,
1393     uint64_t *value, uint_t flag, tavor_umap_db_entry_t	**umapdb)
1394 {
1395 	int	status;
1396 
1397 	mutex_enter(&tavor_userland_rsrc_db.tdl_umapdb_lock);
1398 	status = tavor_umap_db_find_nolock(instance, key, type, value, flag,
1399 	    umapdb);
1400 	mutex_exit(&tavor_userland_rsrc_db.tdl_umapdb_lock);
1401 
1402 	return (status);
1403 }
1404 
1405 
1406 /*
1407  * tavor_umap_db_find_nolock()
1408  *    Context: Can be called from user or kernel context.
1409  */
1410 int
tavor_umap_db_find_nolock(uint_t instance,uint64_t key,uint_t type,uint64_t * value,uint_t flags,tavor_umap_db_entry_t ** umapdb)1411 tavor_umap_db_find_nolock(uint_t instance, uint64_t key, uint_t type,
1412     uint64_t *value, uint_t flags, tavor_umap_db_entry_t **umapdb)
1413 {
1414 	tavor_umap_db_query_t	query;
1415 	tavor_umap_db_entry_t	*entry;
1416 	avl_index_t		where;
1417 
1418 	ASSERT(MUTEX_HELD(&tavor_userland_rsrc_db.tdl_umapdb_lock));
1419 
1420 	/*
1421 	 * Fill in key, type, instance, and flags values of the
1422 	 * tavor_umap_db_query_t in preparation for the database
1423 	 * lookup.
1424 	 */
1425 	query.tqdb_flags		= flags;
1426 	query.tqdb_common.tdb_key	= key;
1427 	query.tqdb_common.tdb_type	= type;
1428 	query.tqdb_common.tdb_instance	= instance;
1429 
1430 	/*
1431 	 * Perform the database query.  If no entry is found, then
1432 	 * return failure, else continue.
1433 	 */
1434 	entry = (tavor_umap_db_entry_t *)avl_find(
1435 	    &tavor_userland_rsrc_db.tdl_umapdb_avl, &query, &where);
1436 	if (entry == NULL) {
1437 		return (DDI_FAILURE);
1438 	}
1439 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*entry))
1440 
1441 	/*
1442 	 * If the flags argument specifies that the entry should
1443 	 * be removed if found, then call avl_remove() to remove
1444 	 * the entry from the database.
1445 	 */
1446 	if (flags & TAVOR_UMAP_DB_REMOVE) {
1447 
1448 		avl_remove(&tavor_userland_rsrc_db.tdl_umapdb_avl, entry);
1449 
1450 		/*
1451 		 * The database entry is returned with the expectation
1452 		 * that the caller will use tavor_umap_db_free() to
1453 		 * free the entry's memory.  ASSERT that this is non-NULL.
1454 		 * NULL pointer should never be passed for the
1455 		 * TAVOR_UMAP_DB_REMOVE case.
1456 		 */
1457 		ASSERT(umapdb != NULL);
1458 	}
1459 
1460 	/*
1461 	 * If the caller would like visibility to the database entry
1462 	 * (indicated through the use of a non-NULL "umapdb" argument),
1463 	 * then fill it in.
1464 	 */
1465 	if (umapdb != NULL) {
1466 		*umapdb = entry;
1467 	}
1468 
1469 	/* Extract value field from database entry and return success */
1470 	*value = entry->tdbe_common.tdb_value;
1471 
1472 	return (DDI_SUCCESS);
1473 }
1474 
1475 
1476 /*
1477  * tavor_umap_umemlock_cb()
1478  *    Context: Can be called from callback context.
1479  */
1480 void
tavor_umap_umemlock_cb(ddi_umem_cookie_t * umem_cookie)1481 tavor_umap_umemlock_cb(ddi_umem_cookie_t *umem_cookie)
1482 {
1483 	tavor_umap_db_entry_t	*umapdb;
1484 	tavor_state_t		*state;
1485 	tavor_rsrc_t 		*rsrcp;
1486 	tavor_mrhdl_t		mr;
1487 	uint64_t		value;
1488 	uint_t			instance;
1489 	int			status;
1490 	void			(*mr_callback)(void *, void *);
1491 	void			*mr_cbarg1, *mr_cbarg2;
1492 
1493 	/*
1494 	 * If this was userland memory, then we need to remove its entry
1495 	 * from the "userland resources database".  Note:  We use the
1496 	 * TAVOR_UMAP_DB_IGNORE_INSTANCE flag here because we don't know
1497 	 * which instance was used when the entry was added (but we want
1498 	 * to know after the entry is found using the other search criteria).
1499 	 */
1500 	status = tavor_umap_db_find(0, (uint64_t)(uintptr_t)umem_cookie,
1501 	    MLNX_UMAP_MRMEM_RSRC, &value, (TAVOR_UMAP_DB_REMOVE |
1502 	    TAVOR_UMAP_DB_IGNORE_INSTANCE), &umapdb);
1503 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*umapdb))
1504 	if (status == DDI_SUCCESS) {
1505 		instance = umapdb->tdbe_common.tdb_instance;
1506 		state = ddi_get_soft_state(tavor_statep, instance);
1507 		if (state == NULL) {
1508 			cmn_err(CE_WARN, "Unable to match Tavor instance\n");
1509 			return;
1510 		}
1511 
1512 		/* Free the database entry */
1513 		tavor_umap_db_free(umapdb);
1514 
1515 		/* Use "value" to convert to an MR handle */
1516 		rsrcp = (tavor_rsrc_t *)(uintptr_t)value;
1517 		mr = (tavor_mrhdl_t)rsrcp->tr_addr;
1518 
1519 		/*
1520 		 * If a callback has been provided, call it first.  This
1521 		 * callback is expected to do any cleanup necessary to
1522 		 * guarantee that the subsequent MR deregister (below)
1523 		 * will succeed.  Specifically, this means freeing up memory
1524 		 * windows which might have been associated with the MR.
1525 		 */
1526 		mutex_enter(&mr->mr_lock);
1527 		mr_callback = mr->mr_umem_cbfunc;
1528 		mr_cbarg1   = mr->mr_umem_cbarg1;
1529 		mr_cbarg2   = mr->mr_umem_cbarg2;
1530 		mutex_exit(&mr->mr_lock);
1531 		if (mr_callback != NULL) {
1532 			mr_callback(mr_cbarg1, mr_cbarg2);
1533 		}
1534 
1535 		/*
1536 		 * Then call tavor_mr_deregister() to release the resources
1537 		 * associated with the MR handle.  Note: Because this routine
1538 		 * will also check for whether the ddi_umem_cookie_t is in the
1539 		 * database, it will take responsibility for disabling the
1540 		 * memory region and calling ddi_umem_unlock().
1541 		 */
1542 		status = tavor_mr_deregister(state, &mr, TAVOR_MR_DEREG_ALL,
1543 		    TAVOR_SLEEP);
1544 		if (status != DDI_SUCCESS) {
1545 			TAVOR_WARNING(state, "Unexpected failure in "
1546 			    "deregister from callback\n");
1547 		}
1548 	}
1549 }
1550 
1551 
1552 /*
1553  * tavor_umap_db_compare()
1554  *    Context: Can be called from user or kernel context.
1555  */
1556 static int
tavor_umap_db_compare(const void * q,const void * e)1557 tavor_umap_db_compare(const void *q, const void *e)
1558 {
1559 	tavor_umap_db_common_t	*entry_common, *query_common;
1560 	uint_t			query_flags;
1561 
1562 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*((tavor_umap_db_query_t *)q)))
1563 
1564 	entry_common = &((tavor_umap_db_entry_t *)e)->tdbe_common;
1565 	query_common = &((tavor_umap_db_query_t *)q)->tqdb_common;
1566 	query_flags  = ((tavor_umap_db_query_t *)q)->tqdb_flags;
1567 
1568 	/*
1569 	 * The first comparison is done on the "key" value in "query"
1570 	 * and "entry".  If they are not equal, then the appropriate
1571 	 * search direction is returned.  Else, we continue by
1572 	 * comparing "type".
1573 	 */
1574 	if (query_common->tdb_key < entry_common->tdb_key) {
1575 		return (-1);
1576 	} else if (query_common->tdb_key > entry_common->tdb_key) {
1577 		return (+1);
1578 	}
1579 
1580 	/*
1581 	 * If the search reaches this point, then "query" and "entry"
1582 	 * have equal key values.  So we continue be comparing their
1583 	 * "type" values.  Again, if they are not equal, then the
1584 	 * appropriate search direction is returned.  Else, we continue
1585 	 * by comparing "instance".
1586 	 */
1587 	if (query_common->tdb_type < entry_common->tdb_type) {
1588 		return (-1);
1589 	} else if (query_common->tdb_type > entry_common->tdb_type) {
1590 		return (+1);
1591 	}
1592 
1593 	/*
1594 	 * If the search reaches this point, then "query" and "entry"
1595 	 * have exactly the same key and type values.  Now we consult
1596 	 * the "flags" field in the query to determine whether the
1597 	 * "instance" is relevant to the search.  If the
1598 	 * TAVOR_UMAP_DB_IGNORE_INSTANCE flags is set, then return
1599 	 * success (0) here.  Otherwise, continue the search by comparing
1600 	 * instance values and returning the appropriate search direction.
1601 	 */
1602 	if (query_flags & TAVOR_UMAP_DB_IGNORE_INSTANCE) {
1603 		return (0);
1604 	}
1605 
1606 	/*
1607 	 * If the search has reached this point, then "query" and "entry"
1608 	 * can only be differentiated by their instance values.  If these
1609 	 * are not equal, then return the appropriate search direction.
1610 	 * Else, we return success (0).
1611 	 */
1612 	if (query_common->tdb_instance < entry_common->tdb_instance) {
1613 		return (-1);
1614 	} else if (query_common->tdb_instance > entry_common->tdb_instance) {
1615 		return (+1);
1616 	}
1617 
1618 	/* Everything matches... so return success */
1619 	return (0);
1620 }
1621 
1622 
1623 /*
1624  * tavor_umap_db_set_onclose_cb()
1625  *    Context: Can be called from user or kernel context.
1626  */
1627 int
tavor_umap_db_set_onclose_cb(dev_t dev,uint64_t flag,void (* callback)(void *),void * arg)1628 tavor_umap_db_set_onclose_cb(dev_t dev, uint64_t flag,
1629     void (*callback)(void *), void *arg)
1630 {
1631 	tavor_umap_db_priv_t	*priv;
1632 	tavor_umap_db_entry_t	*umapdb;
1633 	minor_t			instance;
1634 	uint64_t		value;
1635 	int			status;
1636 
1637 	instance = TAVOR_DEV_INSTANCE(dev);
1638 	if (instance == -1) {
1639 		return (DDI_FAILURE);
1640 	}
1641 
1642 	if (flag != TAVOR_ONCLOSE_FLASH_INPROGRESS) {
1643 		return (DDI_FAILURE);
1644 	}
1645 
1646 	/*
1647 	 * Grab the lock for the "userland resources database" and find
1648 	 * the entry corresponding to this minor number.  Once it's found,
1649 	 * allocate (if necessary) and add an entry (in the "tdb_priv"
1650 	 * field) to indicate that further processing may be needed during
1651 	 * Tavor's close() handling.
1652 	 */
1653 	mutex_enter(&tavor_userland_rsrc_db.tdl_umapdb_lock);
1654 	status = tavor_umap_db_find_nolock(instance, dev,
1655 	    MLNX_UMAP_PID_RSRC, &value, 0, &umapdb);
1656 	if (status != DDI_SUCCESS) {
1657 		mutex_exit(&tavor_userland_rsrc_db.tdl_umapdb_lock);
1658 		return (DDI_FAILURE);
1659 	}
1660 
1661 	priv = (tavor_umap_db_priv_t *)umapdb->tdbe_common.tdb_priv;
1662 	if (priv == NULL) {
1663 		priv = (tavor_umap_db_priv_t *)kmem_zalloc(
1664 		    sizeof (tavor_umap_db_priv_t), KM_NOSLEEP);
1665 		if (priv == NULL) {
1666 			mutex_exit(&tavor_userland_rsrc_db.tdl_umapdb_lock);
1667 			return (DDI_FAILURE);
1668 		}
1669 	}
1670 
1671 	/*
1672 	 * Save away the callback and argument to be used during Tavor's
1673 	 * close() processing.
1674 	 */
1675 	priv->tdp_cb	= callback;
1676 	priv->tdp_arg	= arg;
1677 
1678 	umapdb->tdbe_common.tdb_priv = (void *)priv;
1679 	mutex_exit(&tavor_userland_rsrc_db.tdl_umapdb_lock);
1680 
1681 	return (DDI_SUCCESS);
1682 }
1683 
1684 
1685 /*
1686  * tavor_umap_db_clear_onclose_cb()
1687  *    Context: Can be called from user or kernel context.
1688  */
1689 int
tavor_umap_db_clear_onclose_cb(dev_t dev,uint64_t flag)1690 tavor_umap_db_clear_onclose_cb(dev_t dev, uint64_t flag)
1691 {
1692 	tavor_umap_db_priv_t	*priv;
1693 	tavor_umap_db_entry_t	*umapdb;
1694 	minor_t			instance;
1695 	uint64_t		value;
1696 	int			status;
1697 
1698 	instance = TAVOR_DEV_INSTANCE(dev);
1699 	if (instance == -1) {
1700 		return (DDI_FAILURE);
1701 	}
1702 
1703 	if (flag != TAVOR_ONCLOSE_FLASH_INPROGRESS) {
1704 		return (DDI_FAILURE);
1705 	}
1706 
1707 	/*
1708 	 * Grab the lock for the "userland resources database" and find
1709 	 * the entry corresponding to this minor number.  Once it's found,
1710 	 * remove the entry (in the "tdb_priv" field) that indicated the
1711 	 * need for further processing during Tavor's close().  Free the
1712 	 * entry, if appropriate.
1713 	 */
1714 	mutex_enter(&tavor_userland_rsrc_db.tdl_umapdb_lock);
1715 	status = tavor_umap_db_find_nolock(instance, dev,
1716 	    MLNX_UMAP_PID_RSRC, &value, 0, &umapdb);
1717 	if (status != DDI_SUCCESS) {
1718 		mutex_exit(&tavor_userland_rsrc_db.tdl_umapdb_lock);
1719 		return (DDI_FAILURE);
1720 	}
1721 
1722 	priv = (tavor_umap_db_priv_t *)umapdb->tdbe_common.tdb_priv;
1723 	if (priv != NULL) {
1724 		kmem_free(priv, sizeof (tavor_umap_db_priv_t));
1725 		priv = NULL;
1726 	}
1727 
1728 	umapdb->tdbe_common.tdb_priv = (void *)priv;
1729 	mutex_exit(&tavor_userland_rsrc_db.tdl_umapdb_lock);
1730 	return (DDI_SUCCESS);
1731 }
1732 
1733 
1734 /*
1735  * tavor_umap_db_clear_onclose_cb()
1736  *    Context: Can be called from user or kernel context.
1737  */
1738 void
tavor_umap_db_handle_onclose_cb(tavor_umap_db_priv_t * priv)1739 tavor_umap_db_handle_onclose_cb(tavor_umap_db_priv_t *priv)
1740 {
1741 	void	(*callback)(void *);
1742 
1743 	ASSERT(MUTEX_HELD(&tavor_userland_rsrc_db.tdl_umapdb_lock));
1744 
1745 	/*
1746 	 * Call the callback.
1747 	 *    Note: Currently there is only one callback (in "tdp_cb"), but
1748 	 *    in the future there may be more, depending on what other types
1749 	 *    of interaction there are between userland processes and the
1750 	 *    driver.
1751 	 */
1752 	callback = priv->tdp_cb;
1753 	callback(priv->tdp_arg);
1754 }
1755