1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright (c) 2002-2003, Network Appliance, Inc. All rights reserved.
24  */
25 
26 /*
27  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
28  * Use is subject to license terms.
29  */
30 
31 /*
32  *
33  * MODULE: dapl_evd_util.c
34  *
35  * PURPOSE: Manage EVD Info structure
36  *
37  * $Id: dapl_evd_util.c,v 1.41 2003/08/20 13:18:36 sjs2 Exp $
38  */
39 
40 #include <sys/time.h>
41 #include <strings.h>
42 #include "dapl_evd_util.h"
43 #include "dapl_ia_util.h"
44 #include "dapl_cno_util.h"
45 #include "dapl_ring_buffer_util.h"
46 #include "dapl_adapter_util.h"
47 #include "dapl_tavor_ibtf_impl.h"
48 #include "dapl_cookie.h"
49 #include "dapl.h"
50 
51 
52 #ifdef	DAPL_DBG	/* For debugging.  */
53 static void
54 dapli_evd_eh_print_cqe(
55 	IN  ib_work_completion_t	cqe);
56 #endif
57 
58 static DAT_BOOLEAN
59 dapli_evd_cqe_to_event(
60     IN DAPL_EVD			*evd_ptr,
61     IN ib_work_completion_t	*cqe_ptr,
62     IN DAT_BOOLEAN		process_premature_events,
63     OUT DAT_EVENT		*event_ptr);
64 
65 static DAT_RETURN
66 dapli_evd_event_alloc(
67 	IN  DAPL_EVD		*evd_ptr,
68 	IN  DAPL_CNO		*cno_ptr,
69 	IN  DAT_COUNT		qlen);
70 
71 
72 /*
73  * dapls_evd_internal_create
74  *
75  * actually create the evd.  this is called after all parameter checking
76  * has been performed in dapl_ep_create.  it is also called from dapl_ia_open
77  * to create the default async evd.
78  *
79  * Input:
80  *	ia_ptr
81  *	cno_ptr
82  *	qlen
83  *	evd_flags
84  *
85  * Output:
86  *	evd_ptr_ptr
87  *
88  * Returns:
89  *	none
90  *
91  */
92 
93 DAT_RETURN
dapls_evd_internal_create(DAPL_IA * ia_ptr,DAPL_CNO * cno_ptr,DAT_COUNT min_qlen,DAT_EVD_FLAGS evd_flags,DAPL_EVD ** evd_ptr_ptr)94 dapls_evd_internal_create(
95     DAPL_IA		*ia_ptr,
96     DAPL_CNO		*cno_ptr,
97     DAT_COUNT		min_qlen,
98     DAT_EVD_FLAGS	evd_flags,
99     DAPL_EVD		**evd_ptr_ptr)
100 {
101 	DAPL_EVD	*evd_ptr;
102 	DAT_COUNT	cq_len;
103 	DAT_RETURN	dat_status;
104 
105 	dat_status	= DAT_SUCCESS;
106 	*evd_ptr_ptr	= NULL;
107 	cq_len		= min_qlen;
108 
109 	evd_ptr = dapls_evd_alloc(ia_ptr,
110 	    cno_ptr,
111 	    evd_flags,
112 	    min_qlen);
113 	if (!evd_ptr) {
114 		dat_status = DAT_ERROR(DAT_INSUFFICIENT_RESOURCES,
115 		    DAT_RESOURCE_MEMORY);
116 		goto bail;
117 	}
118 
119 	/*
120 	 * If we are dealing with event streams besides a CQ event stream,
121 	 * be conservative and set producer side locking.  Otherwise, no.
122 	 */
123 	evd_ptr->evd_producer_locking_needed =
124 	    ((evd_flags & ~ (DAT_EVD_DTO_FLAG|DAT_EVD_RMR_BIND_FLAG)) != 0);
125 
126 	/* Before we setup any callbacks, transition state to OPEN.  */
127 	evd_ptr->evd_state = DAPL_EVD_STATE_OPEN;
128 
129 	/*
130 	 * we need to call cq_alloc even for connection/cr/async evds
131 	 * since all the allocation happens there.
132 	 */
133 	dat_status = dapls_ib_cq_alloc(ia_ptr,
134 	    evd_ptr, cno_ptr, &cq_len);
135 	if (dat_status != DAT_SUCCESS) {
136 		goto bail;
137 	}
138 
139 #if 0
140 	/*
141 	 * Current implementation of dapls_ib_setup_async_callback() does
142 	 * nothing and returns DAT_SUCCESS. However, it is declared to expect
143 	 * function pointers with different signatures. We do leave the code
144 	 * block out till dapls_ib_setup_async_callback() is implemented.
145 	 */
146 	dat_status = dapls_ib_setup_async_callback(
147 	    ia_ptr,
148 	    DAPL_ASYNC_CQ_COMPLETION,
149 	    (unsigned int *) evd_ptr->ib_cq_handle,
150 	    (ib_async_handler_t)dapl_evd_dto_callback,
151 	    evd_ptr);
152 	if (dat_status != DAT_SUCCESS) {
153 		goto bail;
154 	}
155 #endif
156 	/*
157 	 * cq_notify is not required since when evd_wait is called
158 	 * time we go and poll cq anyways.
159 	 * dat_status = dapls_set_cq_notify(ia_ptr, evd_ptr);
160 	 */
161 
162 	/*
163 	 * We now have an accurate count of events, so allocate them into
164 	 * the EVD
165 	 */
166 	dat_status = dapli_evd_event_alloc(evd_ptr, cno_ptr, cq_len);
167 	if (dat_status != DAT_SUCCESS) {
168 		goto bail;
169 	}
170 
171 	/* We're assuming success in the following.   */
172 	dapl_os_assert(dat_status == DAT_SUCCESS);
173 	dapl_ia_link_evd(ia_ptr, evd_ptr);
174 	*evd_ptr_ptr = evd_ptr;
175 
176 bail:
177 	if (dat_status != DAT_SUCCESS) {
178 		if (evd_ptr) {
179 			(void) dapls_evd_dealloc(evd_ptr);
180 		}
181 	}
182 
183 	return (dat_status);
184 }
185 
186 /*
187  * dapls_evd_alloc
188  *
189  * alloc and initialize an EVD struct
190  *
191  * Input:
192  *	ia
193  *
194  * Output:
195  *	evd_ptr
196  *
197  * Returns:
198  *	none
199  *
200  */
201 DAPL_EVD *
dapls_evd_alloc(IN DAPL_IA * ia_ptr,IN DAPL_CNO * cno_ptr,IN DAT_EVD_FLAGS evd_flags,IN DAT_COUNT qlen)202 dapls_evd_alloc(
203     IN DAPL_IA		*ia_ptr,
204     IN DAPL_CNO		*cno_ptr,
205     IN DAT_EVD_FLAGS	evd_flags,
206     IN DAT_COUNT	qlen) /* ARGSUSED */
207 {
208 	DAPL_EVD	*evd_ptr;
209 
210 	evd_ptr    = NULL;
211 
212 	/* Allocate EVD */
213 	evd_ptr = (DAPL_EVD *)dapl_os_alloc(sizeof (DAPL_EVD));
214 	if (!evd_ptr) {
215 		goto bail;
216 	}
217 
218 	/* zero the structure */
219 	(void) dapl_os_memzero(evd_ptr, sizeof (DAPL_EVD));
220 
221 	/*
222 	 * initialize the header
223 	 */
224 	evd_ptr->header.provider		= ia_ptr->header.provider;
225 	evd_ptr->header.magic			= DAPL_MAGIC_EVD;
226 	evd_ptr->header.handle_type		= DAT_HANDLE_TYPE_EVD;
227 	evd_ptr->header.owner_ia		= ia_ptr;
228 	evd_ptr->header.user_context.as_64	= 0;
229 	evd_ptr->header.user_context.as_ptr	= NULL;
230 	dapl_llist_init_entry(&evd_ptr->header.ia_list_entry);
231 	dapl_os_lock_init(&evd_ptr->header.lock);
232 
233 	/*
234 	 * Initialize the body
235 	 */
236 	evd_ptr->evd_state	= DAPL_EVD_STATE_INITIAL;
237 	evd_ptr->evd_flags	= evd_flags;
238 	evd_ptr->evd_enabled	= DAT_TRUE;
239 	evd_ptr->evd_waitable	= DAT_TRUE;
240 	evd_ptr->evd_producer_locking_needed = 1; /* Conservative value.  */
241 	evd_ptr->ib_cq_handle	= IB_INVALID_HANDLE;
242 	evd_ptr->evd_ref_count	= 0;
243 	evd_ptr->catastrophic_overflow = DAT_FALSE;
244 	evd_ptr->qlen		= qlen;
245 
246 	dapl_llist_init_entry(&evd_ptr->cno_list_entry);
247 	evd_ptr->completion_type = DAPL_EVD_STATE_THRESHOLD;
248 	(void) dapl_os_wait_object_init(&evd_ptr->wait_object);
249 
250 bail:
251 	return (evd_ptr);
252 }
253 
254 
255 /*
256  * dapls_evd_event_alloc
257  *
258  * alloc events into an EVD.
259  *
260  * Input:
261  *	evd_ptr
262  *	qlen
263  *
264  * Output:
265  *	NONE
266  *
267  * Returns:
268  *	DAT_SUCCESS
269  *	ERROR
270  *
271  */
272 DAT_RETURN
dapli_evd_event_alloc(IN DAPL_EVD * evd_ptr,IN DAPL_CNO * cno_ptr,IN DAT_COUNT qlen)273 dapli_evd_event_alloc(
274     IN DAPL_EVD		*evd_ptr,
275     IN  DAPL_CNO	*cno_ptr,
276     IN DAT_COUNT	qlen)
277 {
278 	DAT_EVENT	*event_ptr;
279 	DAT_COUNT	i;
280 	DAT_RETURN	dat_status;
281 
282 	dat_status = DAT_SUCCESS;
283 	event_ptr  = NULL;
284 
285 	/* Allocate EVENTs */
286 	event_ptr = (DAT_EVENT *) dapl_os_alloc(qlen * sizeof (DAT_EVENT));
287 	if (!event_ptr) {
288 		goto bail;
289 	}
290 	evd_ptr->events = event_ptr;
291 	evd_ptr->qlen = qlen;
292 
293 	/* allocate free event queue */
294 	dat_status = dapls_rbuf_alloc(&evd_ptr->free_event_queue, qlen);
295 	if (dat_status != DAT_SUCCESS) {
296 		goto bail;
297 	}
298 
299 	/* allocate pending event queue */
300 	dat_status = dapls_rbuf_alloc(&evd_ptr->pending_event_queue, qlen);
301 	if (dat_status != DAT_SUCCESS) {
302 		goto bail;
303 	}
304 
305 	/* add events to free event queue */
306 	for (i = 0; i < qlen; i++) {
307 		dat_status = dapls_rbuf_add(&evd_ptr->free_event_queue,
308 		    (void *)event_ptr);
309 		dapl_os_assert(dat_status == DAT_SUCCESS);
310 		event_ptr++;
311 	}
312 	evd_ptr->cq_notified = DAT_FALSE;
313 	evd_ptr->cq_notified_when = 0;
314 	evd_ptr->cno_active_count = 0;
315 	if (cno_ptr != NULL) {
316 		dapl_os_lock(&cno_ptr->header.lock);
317 		dapl_llist_add_head(&cno_ptr->evd_list_head,
318 		    &evd_ptr->cno_list_entry, evd_ptr);
319 		/* Take a reference count on the CNO */
320 		dapl_os_atomic_inc(&cno_ptr->cno_ref_count);
321 		dapl_os_unlock(&cno_ptr->header.lock);
322 	}
323 	evd_ptr->cno_ptr = cno_ptr;
324 	evd_ptr->threshold = 0;
325 
326 bail:
327 	return (dat_status);
328 }
329 
330 
331 /*
332  * dapls_evd_dealloc
333  *
334  * Free the passed in EVD structure. If an error occurs, this function
335  * will clean up all of the internal data structures and report the
336  * error.
337  *
338  * Input:
339  *	evd_ptr
340  *
341  * Output:
342  *	none
343  *
344  * Returns:
345  *	status
346  *
347  */
348 DAT_RETURN
dapls_evd_dealloc(IN DAPL_EVD * evd_ptr)349 dapls_evd_dealloc(
350     IN DAPL_EVD		*evd_ptr)
351 {
352 	DAT_RETURN	dat_status;
353 	DAPL_IA	*ia_ptr;
354 
355 	dat_status = DAT_SUCCESS;
356 
357 	dapl_os_assert(evd_ptr->header.magic == DAPL_MAGIC_EVD);
358 	dapl_os_assert(evd_ptr->evd_ref_count == 0);
359 
360 	/*
361 	 * Destroy the CQ first, to keep any more callbacks from coming
362 	 * up from it.
363 	 */
364 	if (evd_ptr->ib_cq_handle != IB_INVALID_HANDLE) {
365 		ia_ptr = evd_ptr->header.owner_ia;
366 
367 		dat_status = dapls_ib_cq_free(ia_ptr, evd_ptr);
368 		if (dat_status != DAT_SUCCESS) {
369 			goto bail;
370 		}
371 	}
372 
373 	/*
374 	 * We should now be safe to invalidate the EVD; reset the
375 	 * magic to prevent reuse.
376 	 */
377 	evd_ptr->header.magic = DAPL_MAGIC_INVALID;
378 
379 	/* Release reference on the CNO if it exists */
380 	if (evd_ptr->cno_ptr != NULL) {
381 		dapl_os_lock(&evd_ptr->cno_ptr->header.lock);
382 		(void) dapl_llist_remove_entry(&evd_ptr->cno_ptr->evd_list_head,
383 		    &evd_ptr->cno_list_entry);
384 		dapl_os_atomic_dec(&evd_ptr->cno_ptr->cno_ref_count);
385 		dapl_os_unlock(&evd_ptr->cno_ptr->header.lock);
386 	}
387 
388 	/*
389 	 * If the ring buffer allocation failed, then the dapls_rbuf_destroy
390 	 * function will detect that the ring buffer's internal data (ex. base
391 	 * pointer) are invalid and will handle the situation appropriately
392 	 */
393 	dapls_rbuf_destroy(&evd_ptr->free_event_queue);
394 	dapls_rbuf_destroy(&evd_ptr->pending_event_queue);
395 
396 	if (evd_ptr->events) {
397 		dapl_os_free(evd_ptr->events,
398 		    evd_ptr->qlen * sizeof (DAT_EVENT));
399 	}
400 
401 	(void) dapl_os_wait_object_destroy(&evd_ptr->wait_object);
402 	dapl_os_free(evd_ptr, sizeof (DAPL_EVD));
403 
404 bail:
405 	return (dat_status);
406 }
407 
408 
409 /*
410  * dapli_evd_eh_print_cqe
411  *
412  * Input:
413  *	cqe
414  *
415  * Output:
416  *	none
417  *
418  * Prints out a CQE for debug purposes
419  *
420  */
421 
422 #ifdef	DAPL_DBG	/* For debugging.  */
423 void
dapli_evd_eh_print_cqe(IN ib_work_completion_t cqe)424 dapli_evd_eh_print_cqe(IN ib_work_completion_t cqe)
425 {
426 	static char *optable[] = {
427 		"",
428 		"OP_SEND",
429 		"OP_RDMA_READ",
430 		"OP_RDMA_WRITE",
431 		"OP_COMP_AND_SWAP",
432 		"OP_FETCH_AND_ADD",
433 		"OP_BIND_MW",
434 		"OP_RECEIVE",
435 		"OP_RECEIVE_RDMAWI",
436 		0
437 	};
438 	DAPL_COOKIE		*dto_cookie;
439 
440 	dto_cookie = (DAPL_COOKIE *) (uintptr_t)DAPL_GET_CQE_WRID(&cqe);
441 
442 	dapl_dbg_log(DAPL_DBG_TYPE_CALLBACK,
443 	    "\t >>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<\n");
444 	dapl_dbg_log(DAPL_DBG_TYPE_CALLBACK,
445 	    "\t dapl_evd_dto_callback : CQE \n");
446 	dapl_dbg_log(DAPL_DBG_TYPE_CALLBACK,
447 	    "\t\t work_req_id 0x%llx\n", DAPL_GET_CQE_WRID(&cqe));
448 	dapl_dbg_log(DAPL_DBG_TYPE_CALLBACK,
449 	    "\t\t op_type: %s\n", optable[DAPL_GET_CQE_OPTYPE(&cqe)]);
450 	if ((DAPL_GET_CQE_OPTYPE(&cqe) == OP_SEND) ||
451 	    (DAPL_GET_CQE_OPTYPE(&cqe) == OP_RDMA_WRITE)) {
452 		dapl_dbg_log(DAPL_DBG_TYPE_CALLBACK,
453 		    "\t\t bytes_num %d\n", dto_cookie->val.dto.size);
454 	} else {
455 		dapl_dbg_log(DAPL_DBG_TYPE_CALLBACK,
456 		    "\t\t bytes_num %d\n", DAPL_GET_CQE_BYTESNUM(&cqe));
457 	}
458 	dapl_dbg_log(DAPL_DBG_TYPE_CALLBACK,
459 	    "\t\t status %d\n", DAPL_GET_CQE_STATUS(&cqe));
460 	dapl_dbg_log(DAPL_DBG_TYPE_CALLBACK,
461 	    "\t >>>>>>>>>>>>>>>>>>>>>>><<<<<<<<<<<<<<<<<<<\n");
462 }
463 #endif
464 
465 /*
466  * Event posting code follows.
467  */
468 
469 /*
470  * These next two functions (dapli_evd_get_event and dapli_evd_post_event)
471  * are a pair.  They are always called together, from one of the functions
472  * at the end of this file (dapl_evd_post_*_event).
473  *
474  * Note that if producer side locking is enabled, the first one takes the
475  * EVD lock and the second releases it.
476  */
477 
478 /*
479  * dapli_evd_get_event
480  *
481  * Get an event struct from the evd.  The caller should fill in the event
482  * and call dapl_evd_post_event.
483  *
484  * If there are no events available, an overflow event is generated to the
485  * async EVD handler.
486  *
487  * If this EVD required producer locking, a successful return implies
488  * that the lock is held.
489  *
490  * Input:
491  *	evd_ptr
492  *
493  * Output:
494  *	event
495  *
496  */
497 
498 static DAT_EVENT *
dapli_evd_get_event(DAPL_EVD * evd_ptr)499 dapli_evd_get_event(
500     DAPL_EVD *evd_ptr)
501 {
502 	DAT_EVENT	*event;
503 
504 	if (evd_ptr->evd_producer_locking_needed) {
505 		dapl_os_lock(&evd_ptr->header.lock);
506 	}
507 
508 	event = (DAT_EVENT *)dapls_rbuf_remove(&evd_ptr->free_event_queue);
509 
510 	/* Release the lock if it was taken and the call failed.  */
511 	if (!event && evd_ptr->evd_producer_locking_needed) {
512 		dapl_os_unlock(&evd_ptr->header.lock);
513 	}
514 
515 	return (event);
516 }
517 
518 /*
519  * dapli_evd_post_event
520  *
521  * Post the <event> to the evd.  If possible, invoke the evd's CNO.
522  * Otherwise post the event on the pending queue.
523  *
524  * If producer side locking is required, the EVD lock must be held upon
525  * entry to this function.
526  *
527  * Input:
528  *	evd_ptr
529  *	event
530  *
531  * Output:
532  *	none
533  *
534  */
535 
536 static void
dapli_evd_post_event(IN DAPL_EVD * evd_ptr,IN const DAT_EVENT * event_ptr)537 dapli_evd_post_event(
538     IN	DAPL_EVD	*evd_ptr,
539     IN	const DAT_EVENT	*event_ptr)
540 {
541 	DAT_RETURN	dat_status;
542 	DAPL_CNO	*cno_to_trigger = NULL;
543 
544 	dapl_dbg_log(DAPL_DBG_TYPE_EVD,
545 	    "dapli_evd_post_event: Called with event # %x\n",
546 	    event_ptr->event_number);
547 
548 	dat_status = dapls_rbuf_add(&evd_ptr->pending_event_queue,
549 	    (void *)event_ptr);
550 	dapl_os_assert(dat_status == DAT_SUCCESS);
551 
552 	dapl_os_assert(evd_ptr->evd_state == DAPL_EVD_STATE_WAITED ||
553 	    evd_ptr->evd_state == DAPL_EVD_STATE_OPEN);
554 
555 	if (evd_ptr->evd_state == DAPL_EVD_STATE_OPEN) {
556 		/* No waiter.  Arrange to trigger a CNO if it exists.  */
557 
558 		if (evd_ptr->evd_enabled) {
559 			cno_to_trigger = evd_ptr->cno_ptr;
560 		}
561 		if (evd_ptr->evd_producer_locking_needed) {
562 			dapl_os_unlock(&evd_ptr->header.lock);
563 		}
564 	} else {
565 		/*
566 		 * This routine gets called
567 		 *  - In the context of the waiting thread when CQ, CM or ASYNC
568 		 *    events need to be put on to the EVD ring buffer.
569 		 *  - Due to a post of a software event.
570 		 *
571 		 * In the first case the waiting thread is pulling the events
572 		 * from various streams into the evd so there is no need to
573 		 * wake any thread. In the second case if the evd is in waited
574 		 * state then we need to wakeup the waiting thread.
575 		 */
576 		if (event_ptr->event_number == DAT_SOFTWARE_EVENT) {
577 			/*
578 			 * We're in DAPL_EVD_STATE_WAITED.  Take the lock if
579 			 * we don't have it, recheck, and signal.
580 			 */
581 
582 			if (!evd_ptr->evd_producer_locking_needed) {
583 				dapl_os_lock(&evd_ptr->header.lock);
584 			}
585 
586 			if (evd_ptr->evd_state == DAPL_EVD_STATE_WAITED) {
587 				dapl_os_unlock(&evd_ptr->header.lock);
588 				(void) dapls_ib_event_wakeup(evd_ptr);
589 			} else {
590 				dapl_os_unlock(&evd_ptr->header.lock);
591 			}
592 		} else {
593 			if (evd_ptr->evd_producer_locking_needed) {
594 				dapl_os_unlock(&evd_ptr->header.lock);
595 			}
596 		}
597 	}
598 
599 	if (cno_to_trigger != NULL) {
600 		dapl_cno_trigger(cno_to_trigger, evd_ptr);
601 	}
602 }
603 
604 /*
605  * dapli_evd_post_event_nosignal
606  *
607  * Post the <event> to the evd.  Do not do any wakeup processing.
608  * This function should only be called if it is known that there are
609  * no waiters that it is appropriate to wakeup on this EVD.  An example
610  * of such a situation is during internal dat_evd_wait() processing.
611  *
612  * If producer side locking is required, the EVD lock must be held upon
613  * entry to this function.
614  *
615  * Input:
616  *	evd_ptr
617  *	event
618  *
619  * Output:
620  *	none
621  *
622  */
623 
624 static void
dapli_evd_post_event_nosignal(IN DAPL_EVD * evd_ptr,IN const DAT_EVENT * event_ptr)625 dapli_evd_post_event_nosignal(
626     IN	DAPL_EVD	*evd_ptr,
627     IN	const DAT_EVENT	*event_ptr)
628 {
629 	DAT_RETURN	dat_status;
630 
631 	dapl_dbg_log(DAPL_DBG_TYPE_EVD,
632 	    "dapli_evd_post_event: Called with event # %x\n",
633 	    event_ptr->event_number);
634 
635 	dat_status = dapls_rbuf_add(&evd_ptr->pending_event_queue,
636 	    (void *)event_ptr);
637 	dapl_os_assert(dat_status == DAT_SUCCESS);
638 
639 	dapl_os_assert(evd_ptr->evd_state == DAPL_EVD_STATE_WAITED ||
640 	    evd_ptr->evd_state == DAPL_EVD_STATE_OPEN);
641 
642 	if (evd_ptr->evd_producer_locking_needed) {
643 		dapl_os_unlock(&evd_ptr->header.lock);
644 	}
645 }
646 
647 /*
648  * dapli_evd_format_overflow_event
649  *
650  * format an overflow event for posting
651  *
652  * Input:
653  *	evd_ptr
654  *	event_ptr
655  *
656  * Output:
657  *	none
658  *
659  */
660 static void
dapli_evd_format_overflow_event(IN DAPL_EVD * evd_ptr,OUT DAT_EVENT * event_ptr)661 dapli_evd_format_overflow_event(
662 	IN  DAPL_EVD  *evd_ptr,
663 	OUT DAT_EVENT *event_ptr)
664 {
665 	DAPL_IA *ia_ptr;
666 
667 	ia_ptr = evd_ptr->header.owner_ia;
668 
669 	event_ptr->evd_handle   = (DAT_EVD_HANDLE)evd_ptr;
670 	event_ptr->event_number = DAT_ASYNC_ERROR_EVD_OVERFLOW;
671 	event_ptr->event_data.asynch_error_event_data.dat_handle =
672 	    (DAT_HANDLE)ia_ptr;
673 }
674 
675 /*
676  * dapli_evd_post_overflow_event
677  *
678  * post an overflow event
679  *
680  * Input:
681  *	async_evd_ptr
682  *	evd_ptr
683  *
684  * Output:
685  *	none
686  *
687  */
688 static void
dapli_evd_post_overflow_event(IN DAPL_EVD * async_evd_ptr,IN DAPL_EVD * overflow_evd_ptr)689 dapli_evd_post_overflow_event(
690     IN  DAPL_EVD  *async_evd_ptr,
691     IN  DAPL_EVD  *overflow_evd_ptr)
692 {
693 	DAT_EVENT *overflow_event;
694 
695 	/*
696 	 * The overflow_evd_ptr mght be the same as evd.
697 	 * In that case we've got a catastrophic overflow.
698 	 */
699 	if (async_evd_ptr == overflow_evd_ptr) {
700 		async_evd_ptr->catastrophic_overflow = DAT_TRUE;
701 		async_evd_ptr->evd_state = DAPL_EVD_STATE_DEAD;
702 		return;
703 	}
704 
705 	overflow_event = dapli_evd_get_event(overflow_evd_ptr);
706 	if (!overflow_event) {
707 		/* this is not good */
708 		overflow_evd_ptr->catastrophic_overflow = DAT_TRUE;
709 		overflow_evd_ptr->evd_state = DAPL_EVD_STATE_DEAD;
710 		return;
711 	}
712 	dapli_evd_format_overflow_event(overflow_evd_ptr, overflow_event);
713 	dapli_evd_post_event(overflow_evd_ptr, overflow_event);
714 }
715 
716 static DAT_EVENT *
dapli_evd_get_and_init_event(IN DAPL_EVD * evd_ptr,IN DAT_EVENT_NUMBER event_number)717 dapli_evd_get_and_init_event(
718     IN DAPL_EVD				*evd_ptr,
719     IN DAT_EVENT_NUMBER			event_number)
720 {
721 	DAT_EVENT		*event_ptr;
722 
723 	event_ptr = dapli_evd_get_event(evd_ptr);
724 	if (NULL == event_ptr) {
725 		dapli_evd_post_overflow_event(
726 		    evd_ptr->header.owner_ia->async_error_evd, evd_ptr);
727 	} else {
728 		event_ptr->evd_handle = (DAT_EVD_HANDLE) evd_ptr;
729 		event_ptr->event_number = event_number;
730 	}
731 
732 	return (event_ptr);
733 }
734 
735 DAT_RETURN
dapls_evd_post_cr_arrival_event(IN DAPL_EVD * evd_ptr,IN DAT_EVENT_NUMBER event_number,IN DAT_SP_HANDLE sp_handle,DAT_IA_ADDRESS_PTR ia_address_ptr,DAT_CONN_QUAL conn_qual,DAT_CR_HANDLE cr_handle)736 dapls_evd_post_cr_arrival_event(
737     IN DAPL_EVD				*evd_ptr,
738     IN DAT_EVENT_NUMBER			event_number,
739     IN DAT_SP_HANDLE			sp_handle,
740     DAT_IA_ADDRESS_PTR			ia_address_ptr,
741     DAT_CONN_QUAL			conn_qual,
742     DAT_CR_HANDLE			cr_handle)
743 {
744 	DAT_EVENT		*event_ptr;
745 	event_ptr = dapli_evd_get_and_init_event(evd_ptr, event_number);
746 	/*
747 	 * Note event lock may be held on successful return
748 	 * to be released by dapli_evd_post_event(), if provider side locking
749 	 * is needed.
750 	 */
751 
752 	if (!event_ptr) {
753 		return (DAT_INSUFFICIENT_RESOURCES | DAT_RESOURCE_MEMORY);
754 	}
755 
756 	event_ptr->event_data.cr_arrival_event_data.sp_handle = sp_handle;
757 	event_ptr->event_data.cr_arrival_event_data.local_ia_address_ptr
758 	    = ia_address_ptr;
759 	event_ptr->event_data.cr_arrival_event_data.conn_qual = conn_qual;
760 	event_ptr->event_data.cr_arrival_event_data.cr_handle = cr_handle;
761 
762 	dapli_evd_post_event(evd_ptr, event_ptr);
763 	return (DAT_SUCCESS);
764 }
765 
766 
767 DAT_RETURN
dapls_evd_post_connection_event(IN DAPL_EVD * evd_ptr,IN DAT_EVENT_NUMBER event_number,IN DAT_EP_HANDLE ep_handle,IN DAT_COUNT private_data_size,IN DAT_PVOID private_data)768 dapls_evd_post_connection_event(
769     IN DAPL_EVD				*evd_ptr,
770     IN DAT_EVENT_NUMBER			event_number,
771     IN DAT_EP_HANDLE			ep_handle,
772     IN DAT_COUNT			private_data_size,
773     IN DAT_PVOID			private_data)
774 {
775 	DAT_EVENT		*event_ptr;
776 	event_ptr = dapli_evd_get_and_init_event(evd_ptr, event_number);
777 	/*
778 	 * Note event lock may be held on successful return
779 	 * to be released by dapli_evd_post_event(), if provider side locking
780 	 * is needed.
781 	 */
782 
783 	if (!event_ptr) {
784 		return (DAT_INSUFFICIENT_RESOURCES | DAT_RESOURCE_MEMORY);
785 	}
786 
787 	event_ptr->event_data.connect_event_data.ep_handle = ep_handle;
788 	event_ptr->event_data.connect_event_data.private_data_size
789 	    = private_data_size;
790 	event_ptr->event_data.connect_event_data.private_data = private_data;
791 
792 	dapli_evd_post_event(evd_ptr, event_ptr);
793 	return (DAT_SUCCESS);
794 }
795 
796 
797 DAT_RETURN
dapls_evd_post_async_error_event(IN DAPL_EVD * evd_ptr,IN DAT_EVENT_NUMBER event_number,IN DAT_IA_HANDLE ia_handle)798 dapls_evd_post_async_error_event(
799     IN DAPL_EVD				*evd_ptr,
800     IN DAT_EVENT_NUMBER			event_number,
801     IN DAT_IA_HANDLE			ia_handle)
802 {
803 	DAT_EVENT		*event_ptr;
804 	event_ptr = dapli_evd_get_and_init_event(evd_ptr, event_number);
805 	/*
806 	 * Note event lock may be held on successful return
807 	 * to be released by dapli_evd_post_event(), if provider side locking
808 	 * is needed.
809 	 */
810 
811 	if (!event_ptr) {
812 		return (DAT_INSUFFICIENT_RESOURCES | DAT_RESOURCE_MEMORY);
813 	}
814 
815 	event_ptr->event_data.asynch_error_event_data.dat_handle = ia_handle;
816 
817 	dapli_evd_post_event(evd_ptr, event_ptr);
818 	return (DAT_SUCCESS);
819 }
820 
821 
822 DAT_RETURN
dapls_evd_post_software_event(IN DAPL_EVD * evd_ptr,IN DAT_EVENT_NUMBER event_number,IN DAT_PVOID pointer)823 dapls_evd_post_software_event(
824     IN DAPL_EVD				*evd_ptr,
825     IN DAT_EVENT_NUMBER			event_number,
826     IN DAT_PVOID			pointer)
827 {
828 	DAT_EVENT		*event_ptr;
829 	event_ptr = dapli_evd_get_and_init_event(evd_ptr, event_number);
830 	/*
831 	 * Note event lock may be held on successful return
832 	 * to be released by dapli_evd_post_event(), if provider side locking
833 	 * is needed.
834 	 */
835 
836 	if (!event_ptr) {
837 		return (DAT_QUEUE_FULL);
838 	}
839 
840 	event_ptr->event_data.software_event_data.pointer = pointer;
841 
842 	dapli_evd_post_event(evd_ptr, event_ptr);
843 	return (DAT_SUCCESS);
844 }
845 
846 void
dapls_evd_post_premature_events(IN DAPL_EP * ep_ptr)847 dapls_evd_post_premature_events(IN DAPL_EP *ep_ptr)
848 {
849 	DAPL_EVD		*evd_ptr;
850 	DAT_EVENT		*event;
851 	ib_work_completion_t	*cqe;
852 	uint32_t		qpn;
853 	int			prm_idx;
854 	int			nevents;
855 	int			i;
856 
857 	dapls_ib_poll_premature_events(ep_ptr, &cqe, &nevents);
858 	/* premature events are always recv events */
859 	evd_ptr = ep_ptr->param.recv_evd_handle;
860 	qpn = ep_ptr->qpn;
861 
862 	i = 0;
863 	prm_idx = 0;
864 	while (i < nevents) {
865 		/*
866 		 * If srq_attached, premature events cannot exceed max_recv_dtos
867 		 */
868 		dapl_os_assert(!ep_ptr->srq_attached ||
869 		    (prm_idx <= ((DAPL_SRQ *)ep_ptr->param.srq_handle)->
870 		    param.max_recv_dtos));
871 
872 		/*
873 		 * The SRQ premature event list could potentially have
874 		 * holes (ie. free entries in the middle) or premature
875 		 * events for other QPs. These need to be skipped.
876 		 */
877 		if (ep_ptr->srq_attached &&
878 		    (!DAPL_CQE_IS_VALID(&cqe[prm_idx]) ||
879 		    (DAPL_GET_CQE_QPN(&cqe[prm_idx]) != qpn))) {
880 			prm_idx++;
881 			continue;
882 		}
883 
884 		dapl_dbg_log(DAPL_DBG_TYPE_DTO_COMP_ERR,
885 		    " Premature DTO processing\n");
886 
887 #ifdef	DAPL_DBG	/* For debugging.  */
888 		dapli_evd_eh_print_cqe(cqe[i]);
889 #endif
890 		/*
891 		 * Can use DAT_DTO_COMPLETION_EVENT because
892 		 * dapli_evd_cqe_to_event will overwrite.
893 		 */
894 		event = dapli_evd_get_and_init_event(evd_ptr,
895 		    DAT_DTO_COMPLETION_EVENT);
896 		if (event == NULL) {
897 			/* We've already attempted the overflow post, return */
898 			return;
899 		}
900 		(void) dapli_evd_cqe_to_event(evd_ptr, &cqe[i], DAT_TRUE,
901 		    event);
902 		dapli_evd_post_event_nosignal(evd_ptr, event);
903 		/*
904 		 * For SRQ attached QPs recycle the premature event
905 		 */
906 		if (ep_ptr->srq_attached) {
907 			dapls_ib_free_premature_events(ep_ptr, prm_idx);
908 			prm_idx++;
909 		}
910 		i++;
911 	}
912 }
913 
914 /*
915  * dapli_evd_cqe_to_event
916  *
917  * Convert a CQE into an event structure.
918  *
919  * Input:
920  *	evd_ptr
921  *	cqe_ptr
922  *
923  * Output:
924  *	event_ptr
925  *
926  * Returns:
927  *	none
928  *
929  */
930 static DAT_BOOLEAN
dapli_evd_cqe_to_event(IN DAPL_EVD * evd_ptr,IN ib_work_completion_t * cqe_ptr,IN DAT_BOOLEAN process_premature_events,OUT DAT_EVENT * event_ptr)931 dapli_evd_cqe_to_event(
932     IN DAPL_EVD			*evd_ptr,
933     IN ib_work_completion_t	*cqe_ptr,
934     IN DAT_BOOLEAN		process_premature_events,
935     OUT DAT_EVENT		*event_ptr)
936 {
937 	DAPL_EP			*ep_ptr;
938 	DAPL_SRQ		*srq_ptr;
939 	DAPL_COOKIE		*cookie;
940 	DAT_EP_STATE		ep_state;
941 	ib_qp_handle_t		qp;
942 	ib_uint32_t		ib_status;
943 	ib_uint32_t		ibtype;
944 	int			srq_enabled;
945 	int			dto_error = 0;
946 
947 
948 	/*
949 	 * All that can be relied on if the status is bad is the status
950 	 * and WRID.
951 	 */
952 	ib_status = DAPL_GET_CQE_STATUS(cqe_ptr);
953 
954 	cookie = (DAPL_COOKIE *)((uintptr_t)DAPL_GET_CQE_WRID(cqe_ptr));
955 	dapl_os_assert((NULL != cookie));
956 
957 	if (cookie->queue_type == DAPL_COOKIE_QUEUE_EP) {
958 		srq_enabled = 0;
959 		ep_ptr = cookie->queue.ep;
960 	} else {
961 		srq_enabled = 1;
962 		srq_ptr = cookie->queue.srq;
963 		dapl_os_assert(NULL != srq_ptr);
964 		dapl_os_assert(srq_ptr->header.magic == DAPL_MAGIC_SRQ);
965 		ib_status = DAPL_GET_CQE_STATUS(cqe_ptr);
966 		ep_ptr = dapls_ib_srq_lookup_ep(srq_ptr, cqe_ptr);
967 	}
968 
969 	dapl_os_assert((NULL != ep_ptr));
970 	dapl_os_assert((ep_ptr->header.magic == DAPL_MAGIC_EP) ||
971 	    (ep_ptr->header.magic == DAPL_MAGIC_EP_EXIT));
972 
973 	event_ptr->evd_handle = (DAT_EVD_HANDLE) evd_ptr;
974 
975 	/*
976 	 * Check if the DTO completion arrived before CONNECTION_ESTABLISHED
977 	 * event -
978 	 *
979 	 * Send DTOs can occur only if ep state is CONNECTED/DISCONNECTED
980 	 * therefore it cannot occur before connection established event.
981 	 * Receive DTO can potentially complete before connection established
982 	 * event has been delivered to the client. In this case if the
983 	 * ep state is ACTIVE_CONNECTION_PENDING (active side) or
984 	 * COMPLETION_PENDING (passive side) the event is put in a special
985 	 * event queue in the qp_handle.
986 	 *
987 	 */
988 	if (!process_premature_events &&
989 	    (cookie->type == DAPL_COOKIE_TYPE_DTO) &&
990 	    (ib_status == IB_COMP_ST_SUCCESS)) {
991 		ep_state = ep_ptr->param.ep_state;
992 		qp = ep_ptr->qp_handle;
993 		if ((ep_state == DAT_EP_STATE_ACTIVE_CONNECTION_PENDING) ||
994 		    (ep_state == DAT_EP_STATE_COMPLETION_PENDING) ||
995 		    (qp->qp_num_premature_events > 0)) {
996 			/*
997 			 * not yet ready to put the event in the evd ring
998 			 * buffer
999 			 */
1000 			dapls_ib_store_premature_events(qp, cqe_ptr);
1001 			return (DAT_FALSE);
1002 		}
1003 	}
1004 
1005 	switch (cookie->type) {
1006 	case DAPL_COOKIE_TYPE_DTO:
1007 	{
1008 		DAPL_COOKIE_BUFFER	*buffer;
1009 
1010 		if (DAPL_DTO_TYPE_RECV == cookie->val.dto.type) {
1011 			if (srq_enabled) {
1012 				dapl_os_atomic_dec(&srq_ptr->recv_count);
1013 				buffer = &srq_ptr->recv_buffer;
1014 			} else {
1015 				dapl_os_atomic_dec(&ep_ptr->recv_count);
1016 				buffer = &ep_ptr->recv_buffer;
1017 			}
1018 		} else {
1019 			dapl_os_atomic_dec(&ep_ptr->req_count);
1020 			buffer = &ep_ptr->req_buffer;
1021 		}
1022 
1023 		event_ptr->event_number = DAT_DTO_COMPLETION_EVENT;
1024 		event_ptr->event_data.dto_completion_event_data.ep_handle =
1025 		    ep_ptr;
1026 		event_ptr->event_data.dto_completion_event_data.user_cookie =
1027 		    cookie->val.dto.cookie;
1028 
1029 		switch (ib_status) {
1030 		case IB_COMP_ST_SUCCESS:
1031 		{
1032 			ibtype = DAPL_GET_CQE_OPTYPE(cqe_ptr);
1033 
1034 			event_ptr->event_data.dto_completion_event_data.status =
1035 			    DAT_DTO_SUCCESS;
1036 			dapl_os_assert((ibtype == OP_SEND &&
1037 			    cookie->val.dto.type == DAPL_DTO_TYPE_SEND) ||
1038 			    (ibtype == OP_RECEIVE &&
1039 			    cookie->val.dto.type == DAPL_DTO_TYPE_RECV) ||
1040 			    (ibtype == OP_RDMA_WRITE &&
1041 			    cookie->val.dto.type ==
1042 			    DAPL_DTO_TYPE_RDMA_WRITE) ||
1043 			    (ibtype == OP_RDMA_READ &&
1044 			    cookie->val.dto.type ==
1045 			    DAPL_DTO_TYPE_RDMA_READ));
1046 			break;
1047 		}
1048 		case IB_COMP_ST_LOCAL_LEN_ERR:
1049 		{
1050 			event_ptr->event_data.dto_completion_event_data.status =
1051 			    DAT_DTO_ERR_LOCAL_LENGTH;
1052 			break;
1053 		}
1054 		case IB_COMP_ST_LOCAL_PROTECT_ERR:
1055 		{
1056 			event_ptr->event_data.dto_completion_event_data.status =
1057 			    DAT_DTO_ERR_LOCAL_PROTECTION;
1058 			break;
1059 		}
1060 		case IB_COMP_ST_WR_FLUSHED_ERR:
1061 		{
1062 			event_ptr->event_data.dto_completion_event_data.status =
1063 			    DAT_DTO_ERR_FLUSHED;
1064 			break;
1065 		}
1066 		case IB_COMP_ST_BAD_RESPONSE_ERR:
1067 		{
1068 			event_ptr->event_data.dto_completion_event_data.status =
1069 			    DAT_DTO_ERR_BAD_RESPONSE;
1070 			break;
1071 		}
1072 		case IB_COMP_ST_REM_REQ_ERR:
1073 		case IB_COMP_ST_REM_OP_ERR:
1074 		{
1075 			event_ptr->event_data.dto_completion_event_data.status =
1076 			    DAT_DTO_ERR_REMOTE_RESPONDER;
1077 			break;
1078 		}
1079 		case IB_COMP_ST_REM_ACC_ERR:
1080 		{
1081 			event_ptr->event_data.dto_completion_event_data.status =
1082 			    DAT_DTO_ERR_REMOTE_ACCESS;
1083 			break;
1084 		}
1085 		/*
1086 		 * Unsupported RD errors
1087 		 * case IB_COMP_ST_EE_STATE_ERR:
1088 		 * case IB_COMP_ST_EE_CTX_NO_ERR:
1089 		 */
1090 		case IB_COMP_ST_TRANSP_COUNTER:
1091 		{
1092 			event_ptr->event_data.dto_completion_event_data.status =
1093 			    DAT_DTO_ERR_TRANSPORT;
1094 			break;
1095 		}
1096 		case IB_COMP_ST_RNR_COUNTER:
1097 		{
1098 			event_ptr->event_data.dto_completion_event_data.status =
1099 			    DAT_DTO_ERR_RECEIVER_NOT_READY;
1100 			break;
1101 		}
1102 		case IB_COMP_ST_MW_BIND_ERR:
1103 		{
1104 			event_ptr->event_data.dto_completion_event_data.status =
1105 			    DAT_RMR_OPERATION_FAILED;
1106 			break;
1107 		}
1108 		case IB_COMP_ST_LOCAL_OP_ERR:
1109 		{
1110 			event_ptr->event_data.dto_completion_event_data.status =
1111 			    DAT_DTO_ERR_LOCAL_EP;
1112 			break;
1113 		}
1114 		default:
1115 		{
1116 			dapl_dbg_log(DAPL_DBG_TYPE_DTO_COMP_ERR,
1117 			    " DTO completion ERROR: %d: op %#x\n",
1118 			    DAPL_GET_CQE_STATUS(cqe_ptr),
1119 			    DAPL_GET_CQE_OPTYPE(cqe_ptr));
1120 			event_ptr->event_data.dto_completion_event_data.status =
1121 			    DAT_DTO_FAILURE;
1122 			break;
1123 		}
1124 		}
1125 
1126 		/* Most error DTO ops result in disconnecting the EP */
1127 		if ((event_ptr->event_data.dto_completion_event_data.status !=
1128 		    DAT_DTO_SUCCESS) &&
1129 		    (event_ptr->event_data.dto_completion_event_data.status !=
1130 		    DAT_RMR_OPERATION_FAILED)) {
1131 			dto_error = 1;
1132 			dapl_dbg_log(DAPL_DBG_TYPE_DTO_COMP_ERR,
1133 			    " DTO completion ERROR: %d: op %#x\n",
1134 			    DAPL_GET_CQE_STATUS(cqe_ptr),
1135 			    DAPL_GET_CQE_OPTYPE(cqe_ptr));
1136 		}
1137 
1138 		if (cookie->val.dto.type == DAPL_DTO_TYPE_SEND ||
1139 		    cookie->val.dto.type == DAPL_DTO_TYPE_RDMA_WRITE) {
1140 			/* Get size from DTO; CQE value may be off.  */
1141 			event_ptr->event_data.dto_completion_event_data.
1142 			    transfered_length = cookie->val.dto.size;
1143 		} else {
1144 			event_ptr->event_data.dto_completion_event_data.
1145 			    transfered_length = DAPL_GET_CQE_BYTESNUM(cqe_ptr);
1146 		}
1147 
1148 		dapls_cookie_dealloc(buffer, cookie);
1149 		break;
1150 	}
1151 
1152 	case DAPL_COOKIE_TYPE_RMR:
1153 	{
1154 		dapl_os_atomic_dec(&ep_ptr->req_count);
1155 
1156 		event_ptr->event_number = DAT_RMR_BIND_COMPLETION_EVENT;
1157 
1158 		event_ptr->event_data.rmr_completion_event_data.rmr_handle =
1159 		    cookie->val.rmr.rmr;
1160 		event_ptr->event_data.rmr_completion_event_data.user_cookie =
1161 		    cookie->val.rmr.cookie;
1162 		if (ib_status == IB_COMP_ST_SUCCESS) {
1163 			ibtype = DAPL_GET_CQE_OPTYPE(cqe_ptr);
1164 
1165 			event_ptr->event_data.rmr_completion_event_data.status =
1166 			    DAT_RMR_BIND_SUCCESS;
1167 			dapl_os_assert(ibtype == OP_BIND_MW);
1168 		} else {
1169 			event_ptr->event_data.rmr_completion_event_data.status =
1170 			    DAT_RMR_BIND_FAILURE;
1171 			dto_error = 1;
1172 		}
1173 
1174 		dapls_cookie_dealloc(&ep_ptr->req_buffer, cookie);
1175 		break;
1176 	}
1177 	default:
1178 	{
1179 		dapl_os_assert(!"Invalid Operation type");
1180 		break;
1181 	}
1182 	}
1183 
1184 	/*
1185 	 * A DTO failed this will cause the connection to be broken
1186 	 */
1187 	if ((dto_error) && (ep_ptr->param.ep_state == DAT_EP_STATE_CONNECTED)) {
1188 		ep_ptr->param.ep_state = DAT_EP_STATE_DISCONNECTED;
1189 		/*
1190 		 * Disconnect at the IB level.
1191 		 */
1192 		dapls_ib_disconnect_clean(ep_ptr, DAT_TRUE, IB_CME_CONNECTED);
1193 	}
1194 	/* convert premature rec to error flush on disconnect */
1195 	if (process_premature_events && (ep_ptr->param.ep_state ==
1196 	    DAT_EP_STATE_DISCONNECTED) && (ib_status == IB_COMP_ST_SUCCESS)) {
1197 		dapl_os_assert(ibtype == OP_RECEIVE &&
1198 		    cookie->val.dto.type == DAPL_DTO_TYPE_RECV);
1199 		event_ptr->event_data.dto_completion_event_data.status =
1200 		    DAT_DTO_ERR_FLUSHED;
1201 	}
1202 	return (DAT_TRUE);
1203 }
1204 
1205 /*
1206  * dapls_evd_copy_cq
1207  *
1208  * Copy all entries on a CQ associated with the EVD onto that EVD
1209  * Up to caller to handle races, if any.  Note that no EVD waiters will
1210  * be awoken by this copy.
1211  *
1212  * Input:
1213  *	evd_ptr
1214  *
1215  * Output:
1216  *	nevents
1217  *
1218  * Returns:
1219  *	none
1220  *
1221  */
1222 void
dapls_evd_copy_cq(DAPL_EVD * evd_ptr,int * nevents)1223 dapls_evd_copy_cq(
1224 	DAPL_EVD	*evd_ptr,
1225 	int		*nevents)
1226 {
1227 	ib_work_completion_t	cqe[MAX_CQES_PER_POLL];
1228 	DAT_RETURN		dat_status;
1229 	ib_cq_handle_t		cq_handle;
1230 	DAT_EVENT		*event;
1231 	uint_t			num_cqes_polled = 0;
1232 	int			cqe_events;
1233 	int			i;
1234 
1235 	cq_handle = evd_ptr->ib_cq_handle;
1236 
1237 	*nevents = 0;
1238 
1239 	if (cq_handle == IB_INVALID_HANDLE) {
1240 		/* Nothing to do if no CQ.  */
1241 		return;
1242 	}
1243 	dat_status = DAPL_POLL(evd_ptr)(cq_handle,
1244 	    cqe, MAX_CQES_PER_POLL, &num_cqes_polled);
1245 
1246 	if (dat_status == DAT_SUCCESS) {
1247 		dapl_dbg_log(DAPL_DBG_TYPE_EVD, "dapls_evd_copy_cq: %u\n",
1248 		    num_cqes_polled);
1249 		cqe_events = 0;
1250 		for (i = 0; i < num_cqes_polled; i++) {
1251 #ifdef	DAPL_DBG	/* For debugging.  */
1252 			dapli_evd_eh_print_cqe(cqe[i]);
1253 #endif
1254 
1255 			/*
1256 			 * Can use DAT_DTO_COMPLETION_EVENT because
1257 			 * dapli_evd_cqe_to_event will overwrite.
1258 			 */
1259 
1260 			event = dapli_evd_get_and_init_event(
1261 			    evd_ptr, DAT_DTO_COMPLETION_EVENT);
1262 			if (event == NULL) {
1263 			/*
1264 			 * We've already attempted the overflow post; return.
1265 			 */
1266 				return;
1267 			}
1268 			if (dapli_evd_cqe_to_event(evd_ptr, &cqe[i], DAT_FALSE,
1269 			    event)) {
1270 				dapli_evd_post_event_nosignal(evd_ptr, event);
1271 				cqe_events++;
1272 			} else {
1273 				dapl_dbg_log(DAPL_DBG_TYPE_EVD,
1274 				    "dapls_evd_copy_cq: premature event\n");
1275 				/*
1276 				 * We've deferred processing the CQE, so add
1277 				 * the event_ptr back to free queue
1278 				 */
1279 				dat_status = dapls_rbuf_add(&evd_ptr->
1280 				    free_event_queue, (void *)event);
1281 				dapl_os_assert(dat_status == DAT_SUCCESS);
1282 				if (evd_ptr->evd_producer_locking_needed) {
1283 					dapl_os_unlock(&evd_ptr->header.lock);
1284 				}
1285 			}
1286 		}
1287 		*nevents = cqe_events;
1288 	} else if (DAT_GET_TYPE(dat_status) != DAT_QUEUE_EMPTY) {
1289 		dapl_dbg_log(DAPL_DBG_TYPE_ERR,
1290 		    "dapls_evd_copy_cq: dapls_ib_completion_poll "
1291 		    "returned 0x%x\n", dat_status);
1292 		dapl_os_assert(!"Bad return from dapls_ib_completion_poll");
1293 	}
1294 }
1295 
1296 /*
1297  * dapls_evd_copy_events
1298  *
1299  * Copy all events associated with the EVD onto that EVD
1300  *
1301  * Input:
1302  *	evd_ptr
1303  *	timeout
1304  *
1305  * Output:
1306  *	return status
1307  *
1308  * Returns:
1309  *	none
1310  *
1311  */
1312 DAT_RETURN
dapls_evd_copy_events(DAPL_EVD * evd_ptr,DAT_TIMEOUT timeout)1313 dapls_evd_copy_events(DAPL_EVD *evd_ptr, DAT_TIMEOUT timeout)
1314 {
1315 	dapl_ib_event_t	evp_arr[NUM_EVENTS_PER_POLL];
1316 	dapl_ib_event_t	*evpp_start;
1317 	dapl_ib_event_t	*evpp;
1318 	DAPL_IA		*ia_ptr;
1319 	DAT_RETURN	dat_status;
1320 	int		waited;
1321 	uint64_t	curr_time;
1322 	uint64_t	final_time;
1323 	uint64_t	time_left;
1324 	int		events_needed = 0;
1325 	int		nevents = 0;
1326 	int		num_cqe = 0;
1327 	int		num_ke = 0; /* kernel events - CM or ASYNC events */
1328 	int		i;
1329 
1330 	/* rbuf count is zero on entry */
1331 
1332 	if (evd_ptr->evd_flags & (DAT_EVD_CONNECTION_FLAG |
1333 	    DAT_EVD_CR_FLAG | DAT_EVD_ASYNC_FLAG)) {
1334 		if (evd_ptr->threshold <= NUM_EVENTS_PER_POLL) {
1335 			evpp = evp_arr;
1336 		} else {
1337 			/* need to allocate on the heap */
1338 			evpp = (dapl_ib_event_t *)dapl_os_alloc(
1339 			    evd_ptr->threshold * sizeof (dapl_ib_event_t));
1340 			if (evpp == NULL) {
1341 				return (DAT_INSUFFICIENT_RESOURCES);
1342 			}
1343 		}
1344 		evpp_start = evpp;
1345 		/* for evd_dequeue, check for ke before returning Q_EMPTY */
1346 		if (evd_ptr->threshold == 0 && timeout == 0)
1347 			evd_ptr->threshold = 1;
1348 	} else {
1349 		evpp = NULL;
1350 		evpp_start = NULL;
1351 	}
1352 	ia_ptr = evd_ptr->header.owner_ia;
1353 	waited = 0;
1354 	dat_status = DAT_SUCCESS;
1355 
1356 	/* calculate various time wait elements */
1357 	if (timeout == 0) {
1358 		final_time = 0;
1359 		time_left = 0;
1360 	} else if (timeout == DAT_TIMEOUT_INFINITE) {
1361 		/*
1362 		 * The real value of DAT_TIMEOUT_INFINITE is fairly small
1363 		 * ~71 mins, to prevent premature timeouts map it to
1364 		 * 1 year.  NOTE: 64-bit integers are needed here
1365 		 * because 32 bits is not enough.  Other types,
1366 		 * such as clock_t are not 64-bit, so are not
1367 		 * sufficient for this.  Similarly, hrtime_t is
1368 		 * defined as a "nanosecond counter", which does not
1369 		 * match our need for time in microseconds, so we
1370 		 * just use the more general uint64_t here.
1371 		 */
1372 #define	DAPL_ONE_YEAR_IN_USEC	((365 * 24 * 3600) * 1000000LL)
1373 		curr_time = gethrtime();
1374 		time_left = DAPL_ONE_YEAR_IN_USEC;
1375 		final_time = curr_time + DAPL_ONE_YEAR_IN_USEC * 1000;
1376 	} else {
1377 		/*
1378 		 * maximum time by which the routine needs to return
1379 		 * DAT_TIMEOUT_INFINITE is defined as ~0 but its of type int
1380 		 * so mask the MSB to avoid overflow
1381 		 */
1382 		curr_time = gethrtime();
1383 		final_time = curr_time + (uint64_t)(timeout&0x7fffffff)*1000;
1384 		time_left = (final_time - curr_time)/1000;
1385 	}
1386 
1387 	do {
1388 		/*
1389 		 * If this evd has a CQ event stream check the CQs first
1390 		 */
1391 		if (evd_ptr->evd_flags & (DAT_EVD_DTO_FLAG |
1392 		    DAT_EVD_RMR_BIND_FLAG)) {
1393 			/*
1394 			 * Poll CQ for events, update the total number of CQEs
1395 			 * so far
1396 			 */
1397 			nevents = 0;
1398 			dapls_evd_copy_cq(evd_ptr, &nevents);
1399 			num_cqe += nevents;
1400 			dapl_dbg_log(DAPL_DBG_TYPE_EVD,
1401 			    "dapls_evd_copy_event: copy_cq num_cqe(%d)\n",
1402 			    num_cqe);
1403 		}
1404 
1405 		/*
1406 		 * We use the dapls_rbuf_count since it includes
1407 		 *  - CQ events pulled by dapls_evd_copy_cq
1408 		 *  - events added by dat_evd_post_se()
1409 		 */
1410 		events_needed = evd_ptr->threshold - num_ke -
1411 		    dapls_rbuf_count(&evd_ptr->pending_event_queue);
1412 
1413 		/*
1414 		 * check for pending events
1415 		 * note: threshold=0 implies dapl_evd_dequeue
1416 		 */
1417 		if (events_needed < 0) {
1418 			/* There are more than sufficient events */
1419 			break;
1420 		} else if (events_needed == 0) {
1421 			/* report queue empty on dat_evd_dequeue */
1422 			/* non CQ events are expected to be polled */
1423 			/* by dat_evd_wait */
1424 			if (evd_ptr->threshold == 0)
1425 				dat_status =  DAT_ERROR(DAT_QUEUE_EMPTY, 0);
1426 			/*
1427 			 * when threshold > 0, we have sufficient events
1428 			 */
1429 			break;
1430 		} else {
1431 			/*
1432 			 * when we reach here, this implies dat_evd_wait
1433 			 * return on any dto completion as
1434 			 * threshold > 1 will be taken as hint only
1435 			 */
1436 			if (num_cqe)
1437 				break;
1438 		}
1439 
1440 		/* check we've already waited */
1441 		if (waited > 0) {
1442 			dapl_dbg_log(DAPL_DBG_TYPE_EVD,
1443 			    "dapls_evd_copy_event: waited[%d]\n", waited);
1444 			if (dat_status != DAT_SUCCESS)
1445 				break;
1446 			curr_time = gethrtime();
1447 			/* exit on time expired */
1448 			if (curr_time >= final_time)
1449 				break;
1450 			time_left = (final_time - curr_time)/1000;
1451 		}
1452 
1453 		/* check for DTO type evd's */
1454 		if (evd_ptr->evd_flags & (DAT_EVD_DTO_FLAG |
1455 		    DAT_EVD_RMR_BIND_FLAG)) {
1456 			if (events_needed == 1) {
1457 				/*
1458 				 * Need only one event so enable cq
1459 				 * notification
1460 				 */
1461 				/*
1462 				 * XXX: Things need to be modified here to
1463 				 * implement the NOTIFICATION suppression
1464 				 * correctly - relies on THRESHOLD flag
1465 				 * and UNSIGNALLED flag to be stored
1466 				 * in the evd.
1467 				 */
1468 				dat_status = dapls_set_cq_notify(ia_ptr,
1469 				    evd_ptr);
1470 				if (dat_status != DAT_SUCCESS) {
1471 					dapl_dbg_log(DAPL_DBG_TYPE_EVD,
1472 					    "dapls_evd_copy_event:"
1473 					    " set_cq_notify(%d)\n", dat_status);
1474 					return (dat_status);
1475 				}
1476 			} else if (events_needed > 1) {
1477 				/*
1478 				 * We need multiple events so lets enable CQ for
1479 				 * notification on N events.
1480 				 * dat_status = dapls_set_cqN_notify(ia_ptr,
1481 				 * evd_ptr, (uint32_t)events_needed);
1482 				 */
1483 				dat_status = dapls_set_cq_notify(ia_ptr,
1484 				    evd_ptr);
1485 				if (dat_status != DAT_SUCCESS) {
1486 					dapl_dbg_log(DAPL_DBG_TYPE_EVD,
1487 					    "dapls_evd_copy_event:"
1488 					    " set_cqN_notify:%d\n", dat_status);
1489 					return (dat_status);
1490 				}
1491 			}
1492 
1493 			/*
1494 			 * Per Tavor PRM if completions occur after polling
1495 			 * the CQ and before arming it, upon arming the CQ
1496 			 * handler will be immediately fired. Hence it
1497 			 * recommends that a re-poll of the CQ can be skipped
1498 			 * as an optimization.
1499 			 */
1500 		}
1501 
1502 		nevents = 0;
1503 
1504 		/*
1505 		 * non-NULL evpp_start denotes either
1506 		 * DAT_EVD_CONNECTION_FLAG, DAT_EVD_CR_FLAG, DAT_EVD_ASYNC_FLAG
1507 		 * is set and thus needs to check events from kernel
1508 		 */
1509 		if (evpp_start) {
1510 			/*
1511 			 * Even if dat_status is not DAT_SUCCESS, num_events
1512 			 * could be non-zero.
1513 			 */
1514 			dat_status = dapls_ib_event_poll(evd_ptr, time_left,
1515 			    (evd_ptr->threshold - (num_cqe + num_ke)), evpp,
1516 			    &nevents);
1517 			dapl_dbg_log(DAPL_DBG_TYPE_EVD,
1518 			    "dapls_evd_copy_event: poll returned 0x%x(%d)\n",
1519 			    dat_status, nevents);
1520 
1521 			num_ke += nevents;
1522 			evpp += nevents;
1523 		} else {
1524 			/* perform a timewait */
1525 			dat_status = dapls_ib_event_poll(evd_ptr, time_left,
1526 			    0, NULL, &nevents);
1527 			dapl_dbg_log(DAPL_DBG_TYPE_EVD,
1528 			    "dapls_evd_copy_event: poll(cq_notification) "
1529 			    "returned 0x%x\n", dat_status);
1530 			if (DAT_GET_TYPE(dat_status) == DAT_INTERRUPTED_CALL)
1531 				return (dat_status);
1532 		}
1533 
1534 		waited++;
1535 	} while (dapls_rbuf_count(&evd_ptr->pending_event_queue) + num_ke <
1536 	    evd_ptr->threshold);
1537 
1538 	/* process the cm events now */
1539 	for (i = 0; i < num_ke; i++) {
1540 		switch (evpp_start[i].ibe_ev_family) {
1541 		case DAPL_CR_EVENTS: /* PASSIVE side events */
1542 		case DAPL_PASSIVE_CONNECTION_EVENTS:
1543 			dapl_dbg_log(DAPL_DBG_TYPE_EVD,
1544 			    "dapls_evd_copy_event: Passive side Event %d\n",
1545 			    evpp_start[i].ibe_ce.ibce_event);
1546 			dapls_cr_callback((ib_cm_handle_t)
1547 			    evpp_start[i].ibe_ce.ibce_psep_cookie,
1548 			    evpp_start[i].ibe_ce.ibce_event,
1549 			    evpp_start[i].ibe_ce.ibce_priv_data_ptr, (void *)
1550 			    (uintptr_t)evpp_start[i].ibe_ce.ibce_cookie);
1551 			break;
1552 		case DAPL_ACTIVE_CONNECTION_EVENTS: /* ACTIVE side events */
1553 			dapl_dbg_log(DAPL_DBG_TYPE_EVD,
1554 			    "dapls_evd_copy_event: Active Conn Event %d\n",
1555 			    evpp_start[i].ibe_ce.ibce_event);
1556 			dapl_evd_connection_callback((ib_cm_handle_t)
1557 			    IB_INVALID_HANDLE,
1558 			    evpp_start[i].ibe_ce.ibce_event,
1559 			    evpp_start[i].ibe_ce.ibce_priv_data_ptr, (void *)
1560 			    (uintptr_t)evpp_start[i].ibe_ce.ibce_cookie);
1561 			break;
1562 		case DAPL_ASYNC_EVENTS:
1563 			dapl_dbg_log(DAPL_DBG_TYPE_EVD,
1564 			    "dapls_evd_copy_event: Async Event %d\n",
1565 			    evpp_start[i].ibe_async.ibae_type);
1566 			dapls_ib_async_callback(evd_ptr,
1567 			    ia_ptr->hca_ptr->ib_hca_handle,
1568 			    &(evpp_start[i].ibe_async), ia_ptr);
1569 			break;
1570 		default:
1571 			dapl_dbg_log(DAPL_DBG_TYPE_ERR,
1572 			    "dapls_evd_copy_event: dapls_ib_event_poll %d "
1573 			    "returned 0x%x\n", i, evpp_start[i].ibe_ev_family);
1574 			dapl_os_assert(!"Bad return from dapls_ib_event_poll");
1575 			break;
1576 		}
1577 	}
1578 
1579 	return (dat_status);
1580 }
1581 
1582 /*
1583  * dapls_evd_cq_poll_to_event
1584  *
1585  * Attempt to dequeue a single CQE from a CQ and turn it into
1586  * an event.
1587  *
1588  * Input:
1589  *	evd_ptr
1590  *
1591  * Output:
1592  *	event
1593  *
1594  * Returns:
1595  *	Status of operation
1596  *
1597  */
1598 DAT_RETURN
dapls_evd_cq_poll_to_event(IN DAPL_EVD * evd_ptr,OUT DAT_EVENT * event)1599 dapls_evd_cq_poll_to_event(IN DAPL_EVD *evd_ptr, OUT DAT_EVENT *event)
1600 {
1601 	DAT_RETURN		dat_status;
1602 	ib_work_completion_t	cur_cqe;
1603 
1604 	/* skip one layer of do-nothing function */
1605 	dat_status = DAPL_POLL1(evd_ptr)(evd_ptr->ib_cq_handle, &cur_cqe);
1606 
1607 	if (dat_status == DAT_SUCCESS) {
1608 #ifdef	DAPL_DBG	/* For debugging.  */
1609 		dapli_evd_eh_print_cqe(cur_cqe);
1610 #endif
1611 		(void) dapli_evd_cqe_to_event(evd_ptr, &cur_cqe, DAT_FALSE,
1612 		    event);
1613 	}
1614 
1615 	return (dat_status);
1616 }
1617 
1618 /*
1619  * Local variables:
1620  *  c-indent-level: 4
1621  *  c-basic-offset: 4
1622  *  tab-width: 8
1623  * End:
1624  */
1625