xref: /illumos-gate/usr/src/uts/common/io/idm/idm.c (revision 4142b486)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <sys/cpuvar.h>
27 #include <sys/conf.h>
28 #include <sys/file.h>
29 #include <sys/ddi.h>
30 #include <sys/sunddi.h>
31 #include <sys/modctl.h>
32 
33 #include <sys/socket.h>
34 #include <sys/strsubr.h>
35 #include <sys/sysmacros.h>
36 
37 #include <sys/socketvar.h>
38 #include <netinet/in.h>
39 
40 #include <sys/idm/idm.h>
41 #include <sys/idm/idm_so.h>
42 
43 #define	IDM_NAME_VERSION	"iSCSI Data Mover"
44 
45 extern struct mod_ops mod_miscops;
46 extern struct mod_ops mod_miscops;
47 
48 static struct modlmisc modlmisc = {
49 	&mod_miscops,	/* Type of module */
50 	IDM_NAME_VERSION
51 };
52 
53 static struct modlinkage modlinkage = {
54 	MODREV_1, (void *)&modlmisc, NULL
55 };
56 
57 extern int idm_task_compare(const void *t1, const void *t2);
58 extern void idm_wd_thread(void *arg);
59 
60 static int _idm_init(void);
61 static int _idm_fini(void);
62 static void idm_buf_bind_in_locked(idm_task_t *idt, idm_buf_t *buf);
63 static void idm_buf_bind_out_locked(idm_task_t *idt, idm_buf_t *buf);
64 static void idm_buf_unbind_in_locked(idm_task_t *idt, idm_buf_t *buf);
65 static void idm_buf_unbind_out_locked(idm_task_t *idt, idm_buf_t *buf);
66 static void idm_task_abort_one(idm_conn_t *ic, idm_task_t *idt,
67     idm_abort_type_t abort_type);
68 static void idm_task_aborted(idm_task_t *idt, idm_status_t status);
69 static idm_pdu_t *idm_pdu_alloc_common(uint_t hdrlen, uint_t datalen,
70     int sleepflag);
71 
72 boolean_t idm_conn_logging = 0;
73 boolean_t idm_svc_logging = 0;
74 #ifdef DEBUG
75 boolean_t idm_pattern_checking = 1;
76 #else
77 boolean_t idm_pattern_checking = 0;
78 #endif
79 
80 /*
81  * Potential tuneable for the maximum number of tasks.  Default to
82  * IDM_TASKIDS_MAX
83  */
84 
85 uint32_t	idm_max_taskids = IDM_TASKIDS_MAX;
86 
87 /*
88  * Global list of transport handles
89  *   These are listed in preferential order, so we can simply take the
90  *   first "it_conn_is_capable" hit. Note also that the order maps to
91  *   the order of the idm_transport_type_t list.
92  */
93 idm_transport_t idm_transport_list[] = {
94 
95 	/* iSER on InfiniBand transport handle */
96 	{IDM_TRANSPORT_TYPE_ISER,	/* type */
97 	"/devices/ib/iser@0:iser",	/* device path */
98 	NULL,				/* LDI handle */
99 	NULL,				/* transport ops */
100 	NULL},				/* transport caps */
101 
102 	/* IDM native sockets transport handle */
103 	{IDM_TRANSPORT_TYPE_SOCKETS,	/* type */
104 	NULL,				/* device path */
105 	NULL,				/* LDI handle */
106 	NULL,				/* transport ops */
107 	NULL}				/* transport caps */
108 
109 };
110 
111 int
112 _init(void)
113 {
114 	int rc;
115 
116 	if ((rc = _idm_init()) != 0) {
117 		return (rc);
118 	}
119 
120 	return (mod_install(&modlinkage));
121 }
122 
123 int
124 _fini(void)
125 {
126 	int rc;
127 
128 	if ((rc = _idm_fini()) != 0) {
129 		return (rc);
130 	}
131 
132 	if ((rc = mod_remove(&modlinkage)) != 0) {
133 		return (rc);
134 	}
135 
136 	return (rc);
137 }
138 
139 int
140 _info(struct modinfo *modinfop)
141 {
142 	return (mod_info(&modlinkage, modinfop));
143 }
144 
145 /*
146  * idm_transport_register()
147  *
148  * Provides a mechanism for an IDM transport driver to register its
149  * transport ops and caps with the IDM kernel module. Invoked during
150  * a transport driver's attach routine.
151  */
152 idm_status_t
153 idm_transport_register(idm_transport_attr_t *attr)
154 {
155 	ASSERT(attr->it_ops != NULL);
156 	ASSERT(attr->it_caps != NULL);
157 
158 	switch (attr->type) {
159 	/* All known non-native transports here; for now, iSER */
160 	case IDM_TRANSPORT_TYPE_ISER:
161 		idm_transport_list[attr->type].it_ops	= attr->it_ops;
162 		idm_transport_list[attr->type].it_caps	= attr->it_caps;
163 		return (IDM_STATUS_SUCCESS);
164 
165 	default:
166 		cmn_err(CE_NOTE, "idm: unknown transport type (0x%x) in "
167 		    "idm_transport_register", attr->type);
168 		return (IDM_STATUS_SUCCESS);
169 	}
170 }
171 
172 /*
173  * idm_ini_conn_create
174  *
175  * This function is invoked by the iSCSI layer to create a connection context.
176  * This does not actually establish the socket connection.
177  *
178  * cr - Connection request parameters
179  * new_con - Output parameter that contains the new request if successful
180  *
181  */
182 idm_status_t
183 idm_ini_conn_create(idm_conn_req_t *cr, idm_conn_t **new_con)
184 {
185 	idm_transport_t		*it;
186 	idm_conn_t		*ic;
187 	int			rc;
188 
189 	it = idm_transport_lookup(cr);
190 
191 retry:
192 	ic = idm_conn_create_common(CONN_TYPE_INI, it->it_type,
193 	    &cr->icr_conn_ops);
194 
195 	bcopy(&cr->cr_ini_dst_addr, &ic->ic_ini_dst_addr,
196 	    sizeof (cr->cr_ini_dst_addr));
197 
198 	/* create the transport-specific connection components */
199 	rc = it->it_ops->it_ini_conn_create(cr, ic);
200 	if (rc != IDM_STATUS_SUCCESS) {
201 		/* cleanup the failed connection */
202 		idm_conn_destroy_common(ic);
203 
204 		/*
205 		 * It is possible for an IB client to connect to
206 		 * an ethernet-only client via an IB-eth gateway.
207 		 * Therefore, if we are attempting to use iSER and
208 		 * fail, retry with sockets before ultimately
209 		 * failing the connection.
210 		 */
211 		if (it->it_type == IDM_TRANSPORT_TYPE_ISER) {
212 			it = &idm_transport_list[IDM_TRANSPORT_TYPE_SOCKETS];
213 			goto retry;
214 		}
215 
216 		return (IDM_STATUS_FAIL);
217 	}
218 
219 	*new_con = ic;
220 
221 	mutex_enter(&idm.idm_global_mutex);
222 	list_insert_tail(&idm.idm_ini_conn_list, ic);
223 	mutex_exit(&idm.idm_global_mutex);
224 
225 	return (IDM_STATUS_SUCCESS);
226 }
227 
228 /*
229  * idm_ini_conn_destroy
230  *
231  * Releases any resources associated with the connection.  This is the
232  * complement to idm_ini_conn_create.
233  * ic - idm_conn_t structure representing the relevant connection
234  *
235  */
236 void
237 idm_ini_conn_destroy_task(void *ic_void)
238 {
239 	idm_conn_t *ic = ic_void;
240 
241 	ic->ic_transport_ops->it_ini_conn_destroy(ic);
242 	idm_conn_destroy_common(ic);
243 }
244 
245 void
246 idm_ini_conn_destroy(idm_conn_t *ic)
247 {
248 	/*
249 	 * It's reasonable for the initiator to call idm_ini_conn_destroy
250 	 * from within the context of the CN_CONNECT_DESTROY notification.
251 	 * That's a problem since we want to destroy the taskq for the
252 	 * state machine associated with the connection.  Remove the
253 	 * connection from the list right away then handle the remaining
254 	 * work via the idm_global_taskq.
255 	 */
256 	mutex_enter(&idm.idm_global_mutex);
257 	list_remove(&idm.idm_ini_conn_list, ic);
258 	mutex_exit(&idm.idm_global_mutex);
259 
260 	if (taskq_dispatch(idm.idm_global_taskq,
261 	    &idm_ini_conn_destroy_task, ic, TQ_SLEEP) == NULL) {
262 		cmn_err(CE_WARN,
263 		    "idm_ini_conn_destroy: Couldn't dispatch task");
264 	}
265 }
266 
267 /*
268  * idm_ini_conn_connect
269  *
270  * Establish connection to the remote system identified in idm_conn_t.
271  * The connection parameters including the remote IP address were established
272  * in the call to idm_ini_conn_create.  The IDM state machine will
273  * perform client notifications as necessary to prompt the initiator through
274  * the login process.  IDM also keeps a timer running so that if the login
275  * process doesn't complete in a timely manner it will fail.
276  *
277  * ic - idm_conn_t structure representing the relevant connection
278  *
279  * Returns success if the connection was established, otherwise some kind
280  * of meaningful error code.
281  *
282  * Upon return the login has either failed or is loggin in (ffp)
283  */
284 idm_status_t
285 idm_ini_conn_connect(idm_conn_t *ic)
286 {
287 	idm_status_t	rc = IDM_STATUS_SUCCESS;
288 
289 	rc = idm_conn_sm_init(ic);
290 	if (rc != IDM_STATUS_SUCCESS) {
291 		return (ic->ic_conn_sm_status);
292 	}
293 
294 	/* Hold connection until we return */
295 	idm_conn_hold(ic);
296 
297 	/* Kick state machine */
298 	idm_conn_event(ic, CE_CONNECT_REQ, NULL);
299 
300 	/* Wait for login flag */
301 	mutex_enter(&ic->ic_state_mutex);
302 	while (!(ic->ic_state_flags & CF_LOGIN_READY) &&
303 	    !(ic->ic_state_flags & CF_ERROR)) {
304 		cv_wait(&ic->ic_state_cv, &ic->ic_state_mutex);
305 	}
306 	mutex_exit(&ic->ic_state_mutex);
307 
308 	if (ic->ic_state_flags & CF_ERROR) {
309 		/* ic->ic_conn_sm_status will contains failure status */
310 		idm_conn_rele(ic);
311 		return (ic->ic_conn_sm_status);
312 	}
313 
314 	/* Ready to login */
315 	ASSERT(ic->ic_state_flags & CF_LOGIN_READY);
316 	(void) idm_notify_client(ic, CN_READY_FOR_LOGIN, NULL);
317 
318 	idm_conn_rele(ic);
319 
320 	return (rc);
321 }
322 
323 /*
324  * idm_ini_conn_disconnect
325  *
326  * Forces a connection (previously established using idm_ini_conn_connect)
327  * to perform a controlled shutdown, cleaning up any outstanding requests.
328  *
329  * ic - idm_conn_t structure representing the relevant connection
330  *
331  * This is asynchronous and will return before the connection is properly
332  * shutdown
333  */
334 /* ARGSUSED */
335 void
336 idm_ini_conn_disconnect(idm_conn_t *ic)
337 {
338 	idm_conn_event(ic, CE_TRANSPORT_FAIL, NULL);
339 }
340 
341 /*
342  * idm_ini_conn_disconnect_wait
343  *
344  * Forces a connection (previously established using idm_ini_conn_connect)
345  * to perform a controlled shutdown.  Blocks until the connection is
346  * disconnected.
347  *
348  * ic - idm_conn_t structure representing the relevant connection
349  */
350 /* ARGSUSED */
351 void
352 idm_ini_conn_disconnect_sync(idm_conn_t *ic)
353 {
354 	mutex_enter(&ic->ic_state_mutex);
355 	if ((ic->ic_state != CS_S9_INIT_ERROR) &&
356 	    (ic->ic_state != CS_S11_COMPLETE)) {
357 		idm_conn_event_locked(ic, CE_TRANSPORT_FAIL, NULL, CT_NONE);
358 		while ((ic->ic_state != CS_S9_INIT_ERROR) &&
359 		    (ic->ic_state != CS_S11_COMPLETE))
360 			cv_wait(&ic->ic_state_cv, &ic->ic_state_mutex);
361 	}
362 	mutex_exit(&ic->ic_state_mutex);
363 }
364 
365 /*
366  * idm_tgt_svc_create
367  *
368  * The target calls this service to obtain a service context for each available
369  * transport, starting a service of each type related to the IP address and port
370  * passed. The idm_svc_req_t contains the service parameters.
371  */
372 idm_status_t
373 idm_tgt_svc_create(idm_svc_req_t *sr, idm_svc_t **new_svc)
374 {
375 	idm_transport_type_t	type;
376 	idm_transport_t		*it;
377 	idm_svc_t		*is;
378 	int			rc;
379 
380 	*new_svc = NULL;
381 	is = kmem_zalloc(sizeof (idm_svc_t), KM_SLEEP);
382 
383 	/* Initialize transport-agnostic components of the service handle */
384 	is->is_svc_req = *sr;
385 	mutex_init(&is->is_mutex, NULL, MUTEX_DEFAULT, NULL);
386 	cv_init(&is->is_cv, NULL, CV_DEFAULT, NULL);
387 	mutex_init(&is->is_count_mutex, NULL, MUTEX_DEFAULT, NULL);
388 	cv_init(&is->is_count_cv, NULL, CV_DEFAULT, NULL);
389 	idm_refcnt_init(&is->is_refcnt, is);
390 
391 	/*
392 	 * Make sure all available transports are setup.  We call this now
393 	 * instead of at initialization time in case IB has become available
394 	 * since we started (hotplug, etc).
395 	 */
396 	idm_transport_setup(sr->sr_li);
397 
398 	/*
399 	 * Loop through the transports, configuring the transport-specific
400 	 * components of each one.
401 	 */
402 	for (type = 0; type < IDM_TRANSPORT_NUM_TYPES; type++) {
403 
404 		it = &idm_transport_list[type];
405 		/*
406 		 * If it_ops is NULL then the transport is unconfigured
407 		 * and we shouldn't try to start the service.
408 		 */
409 		if (it->it_ops == NULL) {
410 			continue;
411 		}
412 
413 		rc = it->it_ops->it_tgt_svc_create(sr, is);
414 		if (rc != IDM_STATUS_SUCCESS) {
415 			/* Teardown any configured services */
416 			while (type--) {
417 				it = &idm_transport_list[type];
418 				if (it->it_ops == NULL) {
419 					continue;
420 				}
421 				it->it_ops->it_tgt_svc_destroy(is);
422 			}
423 			/* Free the svc context and return */
424 			kmem_free(is, sizeof (idm_svc_t));
425 			return (rc);
426 		}
427 	}
428 
429 	*new_svc = is;
430 
431 	mutex_enter(&idm.idm_global_mutex);
432 	list_insert_tail(&idm.idm_tgt_svc_list, is);
433 	mutex_exit(&idm.idm_global_mutex);
434 
435 	return (IDM_STATUS_SUCCESS);
436 }
437 
438 /*
439  * idm_tgt_svc_destroy
440  *
441  * is - idm_svc_t returned by the call to idm_tgt_svc_create
442  *
443  * Cleanup any resources associated with the idm_svc_t.
444  */
445 void
446 idm_tgt_svc_destroy(idm_svc_t *is)
447 {
448 	idm_transport_type_t	type;
449 	idm_transport_t		*it;
450 
451 	mutex_enter(&idm.idm_global_mutex);
452 	/* remove this service from the global list */
453 	list_remove(&idm.idm_tgt_svc_list, is);
454 	/* wakeup any waiters for service change */
455 	cv_broadcast(&idm.idm_tgt_svc_cv);
456 	mutex_exit(&idm.idm_global_mutex);
457 
458 	/* teardown each transport-specific service */
459 	for (type = 0; type < IDM_TRANSPORT_NUM_TYPES; type++) {
460 		it = &idm_transport_list[type];
461 		if (it->it_ops == NULL) {
462 			continue;
463 		}
464 
465 		it->it_ops->it_tgt_svc_destroy(is);
466 	}
467 
468 	/* tear down the svc resources */
469 	idm_refcnt_destroy(&is->is_refcnt);
470 	cv_destroy(&is->is_count_cv);
471 	mutex_destroy(&is->is_count_mutex);
472 	cv_destroy(&is->is_cv);
473 	mutex_destroy(&is->is_mutex);
474 
475 	/* free the svc handle */
476 	kmem_free(is, sizeof (idm_svc_t));
477 }
478 
479 void
480 idm_tgt_svc_hold(idm_svc_t *is)
481 {
482 	idm_refcnt_hold(&is->is_refcnt);
483 }
484 
485 void
486 idm_tgt_svc_rele_and_destroy(idm_svc_t *is)
487 {
488 	idm_refcnt_rele_and_destroy(&is->is_refcnt,
489 	    (idm_refcnt_cb_t *)&idm_tgt_svc_destroy);
490 }
491 
492 /*
493  * idm_tgt_svc_online
494  *
495  * is - idm_svc_t returned by the call to idm_tgt_svc_create
496  *
497  * Online each transport service, as we want this target to be accessible
498  * via any configured transport.
499  *
500  * When the initiator establishes a new connection to the target, IDM will
501  * call the "new connect" callback defined in the idm_svc_req_t structure
502  * and it will pass an idm_conn_t structure representing that new connection.
503  */
504 idm_status_t
505 idm_tgt_svc_online(idm_svc_t *is)
506 {
507 
508 	idm_transport_type_t	type, last_type;
509 	idm_transport_t		*it;
510 	int			rc = IDM_STATUS_SUCCESS;
511 
512 	mutex_enter(&is->is_mutex);
513 	if (is->is_online == 0) {
514 		/* Walk through each of the transports and online them */
515 		for (type = 0; type < IDM_TRANSPORT_NUM_TYPES; type++) {
516 			it = &idm_transport_list[type];
517 			if (it->it_ops == NULL) {
518 				/* transport is not registered */
519 				continue;
520 			}
521 
522 			mutex_exit(&is->is_mutex);
523 			rc = it->it_ops->it_tgt_svc_online(is);
524 			mutex_enter(&is->is_mutex);
525 			if (rc != IDM_STATUS_SUCCESS) {
526 				last_type = type;
527 				break;
528 			}
529 		}
530 		if (rc != IDM_STATUS_SUCCESS) {
531 			/*
532 			 * The last transport failed to online.
533 			 * Offline any transport onlined above and
534 			 * do not online the target.
535 			 */
536 			for (type = 0; type < last_type; type++) {
537 				it = &idm_transport_list[type];
538 				if (it->it_ops == NULL) {
539 					/* transport is not registered */
540 					continue;
541 				}
542 
543 				mutex_exit(&is->is_mutex);
544 				it->it_ops->it_tgt_svc_offline(is);
545 				mutex_enter(&is->is_mutex);
546 			}
547 		} else {
548 			/* Target service now online */
549 			is->is_online = 1;
550 		}
551 	} else {
552 		/* Target service already online, just bump the count */
553 		is->is_online++;
554 	}
555 	mutex_exit(&is->is_mutex);
556 
557 	return (rc);
558 }
559 
560 /*
561  * idm_tgt_svc_offline
562  *
563  * is - idm_svc_t returned by the call to idm_tgt_svc_create
564  *
565  * Shutdown any online target services.
566  */
567 void
568 idm_tgt_svc_offline(idm_svc_t *is)
569 {
570 	idm_transport_type_t	type;
571 	idm_transport_t		*it;
572 
573 	mutex_enter(&is->is_mutex);
574 	is->is_online--;
575 	if (is->is_online == 0) {
576 		/* Walk through each of the transports and offline them */
577 		for (type = 0; type < IDM_TRANSPORT_NUM_TYPES; type++) {
578 			it = &idm_transport_list[type];
579 			if (it->it_ops == NULL) {
580 				/* transport is not registered */
581 				continue;
582 			}
583 
584 			mutex_exit(&is->is_mutex);
585 			it->it_ops->it_tgt_svc_offline(is);
586 			mutex_enter(&is->is_mutex);
587 		}
588 	}
589 	mutex_exit(&is->is_mutex);
590 }
591 
592 /*
593  * idm_tgt_svc_lookup
594  *
595  * Lookup a service instance listening on the specified port
596  */
597 
598 idm_svc_t *
599 idm_tgt_svc_lookup(uint16_t port)
600 {
601 	idm_svc_t *result;
602 
603 retry:
604 	mutex_enter(&idm.idm_global_mutex);
605 	for (result = list_head(&idm.idm_tgt_svc_list);
606 	    result != NULL;
607 	    result = list_next(&idm.idm_tgt_svc_list, result)) {
608 		if (result->is_svc_req.sr_port == port) {
609 			if (result->is_online == 0) {
610 				/*
611 				 * A service exists on this port, but it
612 				 * is going away, wait for it to cleanup.
613 				 */
614 				cv_wait(&idm.idm_tgt_svc_cv,
615 				    &idm.idm_global_mutex);
616 				mutex_exit(&idm.idm_global_mutex);
617 				goto retry;
618 			}
619 			idm_tgt_svc_hold(result);
620 			mutex_exit(&idm.idm_global_mutex);
621 			return (result);
622 		}
623 	}
624 	mutex_exit(&idm.idm_global_mutex);
625 
626 	return (NULL);
627 }
628 
629 /*
630  * idm_negotiate_key_values()
631  * Give IDM level a chance to negotiate any login parameters it should own.
632  *  -- leave unhandled parameters alone on request_nvl
633  *  -- move all handled parameters to response_nvl with an appropriate response
634  *  -- also add an entry to negotiated_nvl for any accepted parameters
635  */
636 kv_status_t
637 idm_negotiate_key_values(idm_conn_t *ic, nvlist_t *request_nvl,
638     nvlist_t *response_nvl, nvlist_t *negotiated_nvl)
639 {
640 	ASSERT(ic->ic_transport_ops != NULL);
641 	return (ic->ic_transport_ops->it_negotiate_key_values(ic,
642 	    request_nvl, response_nvl, negotiated_nvl));
643 }
644 
645 /*
646  * idm_notice_key_values()
647  * Activate at the IDM level any parameters that have been negotiated.
648  * Passes the set of key value pairs to the transport for activation.
649  * This will be invoked as the connection is entering full-feature mode.
650  */
651 void
652 idm_notice_key_values(idm_conn_t *ic, nvlist_t *negotiated_nvl)
653 {
654 	ASSERT(ic->ic_transport_ops != NULL);
655 	ic->ic_transport_ops->it_notice_key_values(ic, negotiated_nvl);
656 }
657 
658 /*
659  * idm_buf_tx_to_ini
660  *
661  * This is IDM's implementation of the 'Put_Data' operational primitive.
662  *
663  * This function is invoked by a target iSCSI layer to request its local
664  * Datamover layer to transmit the Data-In PDU to the peer iSCSI layer
665  * on the remote iSCSI node. The I/O buffer represented by 'idb' is
666  * transferred to the initiator associated with task 'idt'. The connection
667  * info, contents of the Data-In PDU header, the DataDescriptorIn, BHS,
668  * and the callback (idb->idb_buf_cb) at transfer completion are
669  * provided as input.
670  *
671  * This data transfer takes place transparently to the remote iSCSI layer,
672  * i.e. without its participation.
673  *
674  * Using sockets, IDM implements the data transfer by segmenting the data
675  * buffer into appropriately sized iSCSI PDUs and transmitting them to the
676  * initiator. iSER performs the transfer using RDMA write.
677  *
678  */
679 idm_status_t
680 idm_buf_tx_to_ini(idm_task_t *idt, idm_buf_t *idb,
681     uint32_t offset, uint32_t xfer_len,
682     idm_buf_cb_t idb_buf_cb, void *cb_arg)
683 {
684 	idm_status_t rc;
685 
686 	idb->idb_bufoffset = offset;
687 	idb->idb_xfer_len = xfer_len;
688 	idb->idb_buf_cb = idb_buf_cb;
689 	idb->idb_cb_arg = cb_arg;
690 	gethrestime(&idb->idb_xfer_start);
691 
692 	/*
693 	 * Buffer should not contain the pattern.  If the pattern is
694 	 * present then we've been asked to transmit initialized data
695 	 */
696 	IDM_BUFPAT_CHECK(idb, xfer_len, BP_CHECK_ASSERT);
697 
698 	mutex_enter(&idt->idt_mutex);
699 	switch (idt->idt_state) {
700 	case TASK_ACTIVE:
701 		idt->idt_tx_to_ini_start++;
702 		idm_task_hold(idt);
703 		idm_buf_bind_in_locked(idt, idb);
704 		idb->idb_in_transport = B_TRUE;
705 		rc = (*idt->idt_ic->ic_transport_ops->it_buf_tx_to_ini)
706 		    (idt, idb);
707 		return (rc);
708 
709 	case TASK_SUSPENDING:
710 	case TASK_SUSPENDED:
711 		/*
712 		 * Bind buffer but don't start a transfer since the task
713 		 * is suspended
714 		 */
715 		idm_buf_bind_in_locked(idt, idb);
716 		mutex_exit(&idt->idt_mutex);
717 		return (IDM_STATUS_SUCCESS);
718 
719 	case TASK_ABORTING:
720 	case TASK_ABORTED:
721 		/*
722 		 * Once the task is aborted, any buffers added to the
723 		 * idt_inbufv will never get cleaned up, so just return
724 		 * SUCCESS.  The buffer should get cleaned up by the
725 		 * client or framework once task_aborted has completed.
726 		 */
727 		mutex_exit(&idt->idt_mutex);
728 		return (IDM_STATUS_SUCCESS);
729 
730 	default:
731 		ASSERT(0);
732 		break;
733 	}
734 	mutex_exit(&idt->idt_mutex);
735 
736 	return (IDM_STATUS_FAIL);
737 }
738 
739 /*
740  * idm_buf_rx_from_ini
741  *
742  * This is IDM's implementation of the 'Get_Data' operational primitive.
743  *
744  * This function is invoked by a target iSCSI layer to request its local
745  * Datamover layer to retrieve certain data identified by the R2T PDU from the
746  * peer iSCSI layer on the remote node. The retrieved Data-Out PDU will be
747  * mapped to the respective buffer by the task tags (ITT & TTT).
748  * The connection information, contents of an R2T PDU, DataDescriptor, BHS, and
749  * the callback (idb->idb_buf_cb) notification for data transfer completion are
750  * are provided as input.
751  *
752  * When an iSCSI node sends an R2T PDU to its local Datamover layer, the local
753  * Datamover layer, the local and remote Datamover layers transparently bring
754  * about the data transfer requested by the R2T PDU, without the participation
755  * of the iSCSI layers.
756  *
757  * Using sockets, IDM transmits an R2T PDU for each buffer and the rx_data_out()
758  * assembles the Data-Out PDUs into the buffer. iSER uses RDMA read.
759  *
760  */
761 idm_status_t
762 idm_buf_rx_from_ini(idm_task_t *idt, idm_buf_t *idb,
763     uint32_t offset, uint32_t xfer_len,
764     idm_buf_cb_t idb_buf_cb, void *cb_arg)
765 {
766 	idm_status_t rc;
767 
768 	idb->idb_bufoffset = offset;
769 	idb->idb_xfer_len = xfer_len;
770 	idb->idb_buf_cb = idb_buf_cb;
771 	idb->idb_cb_arg = cb_arg;
772 	gethrestime(&idb->idb_xfer_start);
773 
774 	/*
775 	 * "In" buf list is for "Data In" PDU's, "Out" buf list is for
776 	 * "Data Out" PDU's
777 	 */
778 	mutex_enter(&idt->idt_mutex);
779 	switch (idt->idt_state) {
780 	case TASK_ACTIVE:
781 		idt->idt_rx_from_ini_start++;
782 		idm_task_hold(idt);
783 		idm_buf_bind_out_locked(idt, idb);
784 		idb->idb_in_transport = B_TRUE;
785 		rc = (*idt->idt_ic->ic_transport_ops->it_buf_rx_from_ini)
786 		    (idt, idb);
787 		return (rc);
788 	case TASK_SUSPENDING:
789 	case TASK_SUSPENDED:
790 	case TASK_ABORTING:
791 	case TASK_ABORTED:
792 		/*
793 		 * Bind buffer but don't start a transfer since the task
794 		 * is suspended
795 		 */
796 		idm_buf_bind_out_locked(idt, idb);
797 		mutex_exit(&idt->idt_mutex);
798 		return (IDM_STATUS_SUCCESS);
799 	default:
800 		ASSERT(0);
801 		break;
802 	}
803 	mutex_exit(&idt->idt_mutex);
804 
805 	return (IDM_STATUS_FAIL);
806 }
807 
808 /*
809  * idm_buf_tx_to_ini_done
810  *
811  * The transport calls this after it has completed a transfer requested by
812  * a call to transport_buf_tx_to_ini
813  *
814  * Caller holds idt->idt_mutex, idt->idt_mutex is released before returning.
815  * idt may be freed after the call to idb->idb_buf_cb.
816  */
817 void
818 idm_buf_tx_to_ini_done(idm_task_t *idt, idm_buf_t *idb, idm_status_t status)
819 {
820 	ASSERT(mutex_owned(&idt->idt_mutex));
821 	idb->idb_in_transport = B_FALSE;
822 	idb->idb_tx_thread = B_FALSE;
823 	idt->idt_tx_to_ini_done++;
824 	gethrestime(&idb->idb_xfer_done);
825 
826 	/*
827 	 * idm_refcnt_rele may cause TASK_SUSPENDING --> TASK_SUSPENDED or
828 	 * TASK_ABORTING --> TASK_ABORTED transistion if the refcount goes
829 	 * to 0.
830 	 */
831 	idm_task_rele(idt);
832 	idb->idb_status = status;
833 
834 	switch (idt->idt_state) {
835 	case TASK_ACTIVE:
836 		idt->idt_ic->ic_timestamp = ddi_get_lbolt();
837 		idm_buf_unbind_in_locked(idt, idb);
838 		mutex_exit(&idt->idt_mutex);
839 		(*idb->idb_buf_cb)(idb, status);
840 		return;
841 	case TASK_SUSPENDING:
842 	case TASK_SUSPENDED:
843 	case TASK_ABORTING:
844 	case TASK_ABORTED:
845 		/*
846 		 * To keep things simple we will ignore the case where the
847 		 * transfer was successful and leave all buffers bound to the
848 		 * task.  This allows us to also ignore the case where we've
849 		 * been asked to abort a task but the last transfer of the
850 		 * task has completed.  IDM has no idea whether this was, in
851 		 * fact, the last transfer of the task so it would be difficult
852 		 * to handle this case.  Everything should get sorted out again
853 		 * after task reassignment is complete.
854 		 *
855 		 * In the case of TASK_ABORTING we could conceivably call the
856 		 * buffer callback here but the timing of when the client's
857 		 * client_task_aborted callback is invoked vs. when the client's
858 		 * buffer callback gets invoked gets sticky.  We don't want
859 		 * the client to here from us again after the call to
860 		 * client_task_aborted() but we don't want to give it a bunch
861 		 * of failed buffer transfers until we've called
862 		 * client_task_aborted().  Instead we'll just leave all the
863 		 * buffers bound and allow the client to cleanup.
864 		 */
865 		break;
866 	default:
867 		ASSERT(0);
868 	}
869 	mutex_exit(&idt->idt_mutex);
870 }
871 
872 /*
873  * idm_buf_rx_from_ini_done
874  *
875  * The transport calls this after it has completed a transfer requested by
876  * a call totransport_buf_tx_to_ini
877  *
878  * Caller holds idt->idt_mutex, idt->idt_mutex is released before returning.
879  * idt may be freed after the call to idb->idb_buf_cb.
880  */
881 void
882 idm_buf_rx_from_ini_done(idm_task_t *idt, idm_buf_t *idb, idm_status_t status)
883 {
884 	ASSERT(mutex_owned(&idt->idt_mutex));
885 	idb->idb_in_transport = B_FALSE;
886 	idt->idt_rx_from_ini_done++;
887 	gethrestime(&idb->idb_xfer_done);
888 
889 	/*
890 	 * idm_refcnt_rele may cause TASK_SUSPENDING --> TASK_SUSPENDED or
891 	 * TASK_ABORTING --> TASK_ABORTED transistion if the refcount goes
892 	 * to 0.
893 	 */
894 	idm_task_rele(idt);
895 	idb->idb_status = status;
896 
897 	if (status == IDM_STATUS_SUCCESS) {
898 		/*
899 		 * Buffer should not contain the pattern.  If it does then
900 		 * we did not get the data from the remote host.
901 		 */
902 		IDM_BUFPAT_CHECK(idb, idb->idb_xfer_len, BP_CHECK_ASSERT);
903 	}
904 
905 	switch (idt->idt_state) {
906 	case TASK_ACTIVE:
907 		idt->idt_ic->ic_timestamp = ddi_get_lbolt();
908 		idm_buf_unbind_out_locked(idt, idb);
909 		mutex_exit(&idt->idt_mutex);
910 		(*idb->idb_buf_cb)(idb, status);
911 		return;
912 	case TASK_SUSPENDING:
913 	case TASK_SUSPENDED:
914 	case TASK_ABORTING:
915 	case TASK_ABORTED:
916 		/*
917 		 * To keep things simple we will ignore the case where the
918 		 * transfer was successful and leave all buffers bound to the
919 		 * task.  This allows us to also ignore the case where we've
920 		 * been asked to abort a task but the last transfer of the
921 		 * task has completed.  IDM has no idea whether this was, in
922 		 * fact, the last transfer of the task so it would be difficult
923 		 * to handle this case.  Everything should get sorted out again
924 		 * after task reassignment is complete.
925 		 *
926 		 * In the case of TASK_ABORTING we could conceivably call the
927 		 * buffer callback here but the timing of when the client's
928 		 * client_task_aborted callback is invoked vs. when the client's
929 		 * buffer callback gets invoked gets sticky.  We don't want
930 		 * the client to here from us again after the call to
931 		 * client_task_aborted() but we don't want to give it a bunch
932 		 * of failed buffer transfers until we've called
933 		 * client_task_aborted().  Instead we'll just leave all the
934 		 * buffers bound and allow the client to cleanup.
935 		 */
936 		break;
937 	default:
938 		ASSERT(0);
939 	}
940 	mutex_exit(&idt->idt_mutex);
941 }
942 
943 /*
944  * idm_buf_alloc
945  *
946  * Allocates a buffer handle and registers it for use with the transport
947  * layer. If a buffer is not passed on bufptr, the buffer will be allocated
948  * as well as the handle.
949  *
950  * ic		- connection on which the buffer will be transferred
951  * bufptr	- allocate memory for buffer if NULL, else assign to buffer
952  * buflen	- length of buffer
953  *
954  * Returns idm_buf_t handle if successful, otherwise NULL
955  */
956 idm_buf_t *
957 idm_buf_alloc(idm_conn_t *ic, void *bufptr, uint64_t buflen)
958 {
959 	idm_buf_t	*buf = NULL;
960 	int		rc;
961 
962 	ASSERT(ic != NULL);
963 	ASSERT(idm.idm_buf_cache != NULL);
964 	ASSERT(buflen > 0);
965 
966 	/* Don't allocate new buffers if we are not in FFP */
967 	mutex_enter(&ic->ic_state_mutex);
968 	if (!ic->ic_ffp) {
969 		mutex_exit(&ic->ic_state_mutex);
970 		return (NULL);
971 	}
972 
973 
974 	idm_conn_hold(ic);
975 	mutex_exit(&ic->ic_state_mutex);
976 
977 	buf = kmem_cache_alloc(idm.idm_buf_cache, KM_NOSLEEP);
978 	if (buf == NULL) {
979 		idm_conn_rele(ic);
980 		return (NULL);
981 	}
982 
983 	buf->idb_ic		= ic;
984 	buf->idb_buflen		= buflen;
985 	buf->idb_exp_offset	= 0;
986 	buf->idb_bufoffset	= 0;
987 	buf->idb_xfer_len 	= 0;
988 	buf->idb_magic		= IDM_BUF_MAGIC;
989 	buf->idb_in_transport	= B_FALSE;
990 	buf->idb_bufbcopy	= B_FALSE;
991 
992 	/*
993 	 * If bufptr is NULL, we have an implicit request to allocate
994 	 * memory for this IDM buffer handle and register it for use
995 	 * with the transport. To simplify this, and to give more freedom
996 	 * to the transport layer for it's own buffer management, both of
997 	 * these actions will take place in the transport layer.
998 	 * If bufptr is set, then the caller has allocated memory (or more
999 	 * likely it's been passed from an upper layer), and we need only
1000 	 * register the buffer for use with the transport layer.
1001 	 */
1002 	if (bufptr == NULL) {
1003 		/*
1004 		 * Allocate a buffer from the transport layer (which
1005 		 * will also register the buffer for use).
1006 		 */
1007 		rc = ic->ic_transport_ops->it_buf_alloc(buf, buflen);
1008 		if (rc != 0) {
1009 			idm_conn_rele(ic);
1010 			kmem_cache_free(idm.idm_buf_cache, buf);
1011 			return (NULL);
1012 		}
1013 		/* Set the bufalloc'd flag */
1014 		buf->idb_bufalloc = B_TRUE;
1015 	} else {
1016 		/*
1017 		 * For large transfers, Set the passed bufptr into
1018 		 * the buf handle, and register the handle with the
1019 		 * transport layer. As memory registration with the
1020 		 * transport layer is a time/cpu intensive operation,
1021 		 * for small transfers (up to a pre-defined bcopy
1022 		 * threshold), use pre-registered memory buffers
1023 		 * and bcopy data at the appropriate time.
1024 		 */
1025 		buf->idb_buf = bufptr;
1026 
1027 		rc = ic->ic_transport_ops->it_buf_setup(buf);
1028 		if (rc != 0) {
1029 			idm_conn_rele(ic);
1030 			kmem_cache_free(idm.idm_buf_cache, buf);
1031 			return (NULL);
1032 		}
1033 		/*
1034 		 * The transport layer is now expected to set the idb_bufalloc
1035 		 * correctly to indicate if resources have been allocated.
1036 		 */
1037 	}
1038 
1039 	IDM_BUFPAT_SET(buf);
1040 
1041 	return (buf);
1042 }
1043 
1044 /*
1045  * idm_buf_free
1046  *
1047  * Release a buffer handle along with the associated buffer that was allocated
1048  * or assigned with idm_buf_alloc
1049  */
1050 void
1051 idm_buf_free(idm_buf_t *buf)
1052 {
1053 	idm_conn_t *ic = buf->idb_ic;
1054 
1055 
1056 	buf->idb_task_binding	= NULL;
1057 
1058 	if (buf->idb_bufalloc) {
1059 		ic->ic_transport_ops->it_buf_free(buf);
1060 	} else {
1061 		ic->ic_transport_ops->it_buf_teardown(buf);
1062 	}
1063 	kmem_cache_free(idm.idm_buf_cache, buf);
1064 	idm_conn_rele(ic);
1065 }
1066 
1067 /*
1068  * idm_buf_bind_in
1069  *
1070  * This function associates a buffer with a task. This is only for use by the
1071  * iSCSI initiator that will have only one buffer per transfer direction
1072  *
1073  */
1074 void
1075 idm_buf_bind_in(idm_task_t *idt, idm_buf_t *buf)
1076 {
1077 	mutex_enter(&idt->idt_mutex);
1078 	idm_buf_bind_in_locked(idt, buf);
1079 	mutex_exit(&idt->idt_mutex);
1080 }
1081 
1082 static void
1083 idm_buf_bind_in_locked(idm_task_t *idt, idm_buf_t *buf)
1084 {
1085 	buf->idb_task_binding = idt;
1086 	buf->idb_ic = idt->idt_ic;
1087 	idm_listbuf_insert(&idt->idt_inbufv, buf);
1088 }
1089 
1090 void
1091 idm_buf_bind_out(idm_task_t *idt, idm_buf_t *buf)
1092 {
1093 	/*
1094 	 * For small transfers, the iSER transport delegates the IDM
1095 	 * layer to bcopy the SCSI Write data for faster IOPS.
1096 	 */
1097 	if (buf->idb_bufbcopy == B_TRUE) {
1098 
1099 		bcopy(buf->idb_bufptr, buf->idb_buf, buf->idb_buflen);
1100 	}
1101 	mutex_enter(&idt->idt_mutex);
1102 	idm_buf_bind_out_locked(idt, buf);
1103 	mutex_exit(&idt->idt_mutex);
1104 }
1105 
1106 static void
1107 idm_buf_bind_out_locked(idm_task_t *idt, idm_buf_t *buf)
1108 {
1109 	buf->idb_task_binding = idt;
1110 	buf->idb_ic = idt->idt_ic;
1111 	idm_listbuf_insert(&idt->idt_outbufv, buf);
1112 }
1113 
1114 void
1115 idm_buf_unbind_in(idm_task_t *idt, idm_buf_t *buf)
1116 {
1117 	/*
1118 	 * For small transfers, the iSER transport delegates the IDM
1119 	 * layer to bcopy the SCSI Read data into the read buufer
1120 	 * for faster IOPS.
1121 	 */
1122 	if (buf->idb_bufbcopy == B_TRUE) {
1123 		bcopy(buf->idb_buf, buf->idb_bufptr, buf->idb_buflen);
1124 	}
1125 	mutex_enter(&idt->idt_mutex);
1126 	idm_buf_unbind_in_locked(idt, buf);
1127 	mutex_exit(&idt->idt_mutex);
1128 }
1129 
1130 static void
1131 idm_buf_unbind_in_locked(idm_task_t *idt, idm_buf_t *buf)
1132 {
1133 	list_remove(&idt->idt_inbufv, buf);
1134 }
1135 
1136 void
1137 idm_buf_unbind_out(idm_task_t *idt, idm_buf_t *buf)
1138 {
1139 	mutex_enter(&idt->idt_mutex);
1140 	idm_buf_unbind_out_locked(idt, buf);
1141 	mutex_exit(&idt->idt_mutex);
1142 }
1143 
1144 static void
1145 idm_buf_unbind_out_locked(idm_task_t *idt, idm_buf_t *buf)
1146 {
1147 	list_remove(&idt->idt_outbufv, buf);
1148 }
1149 
1150 /*
1151  * idm_buf_find() will lookup the idm_buf_t based on the relative offset in the
1152  * iSCSI PDU
1153  */
1154 idm_buf_t *
1155 idm_buf_find(void *lbuf, size_t data_offset)
1156 {
1157 	idm_buf_t	*idb;
1158 	list_t		*lst = (list_t *)lbuf;
1159 
1160 	/* iterate through the list to find the buffer */
1161 	for (idb = list_head(lst); idb != NULL; idb = list_next(lst, idb)) {
1162 
1163 		ASSERT((idb->idb_ic->ic_conn_type == CONN_TYPE_TGT) ||
1164 		    (idb->idb_bufoffset == 0));
1165 
1166 		if ((data_offset >= idb->idb_bufoffset) &&
1167 		    (data_offset < (idb->idb_bufoffset + idb->idb_buflen))) {
1168 
1169 			return (idb);
1170 		}
1171 	}
1172 
1173 	return (NULL);
1174 }
1175 
1176 void
1177 idm_bufpat_set(idm_buf_t *idb)
1178 {
1179 	idm_bufpat_t	*bufpat;
1180 	int		len, i;
1181 
1182 	len = idb->idb_buflen;
1183 	len = (len / sizeof (idm_bufpat_t)) * sizeof (idm_bufpat_t);
1184 
1185 	bufpat = idb->idb_buf;
1186 	for (i = 0; i < len; i += sizeof (idm_bufpat_t)) {
1187 		bufpat->bufpat_idb = idb;
1188 		bufpat->bufpat_bufmagic = IDM_BUF_MAGIC;
1189 		bufpat->bufpat_offset = i;
1190 		bufpat++;
1191 	}
1192 }
1193 
1194 boolean_t
1195 idm_bufpat_check(idm_buf_t *idb, int check_len, idm_bufpat_check_type_t type)
1196 {
1197 	idm_bufpat_t	*bufpat;
1198 	int		len, i;
1199 
1200 	len = (type == BP_CHECK_QUICK) ? sizeof (idm_bufpat_t) : check_len;
1201 	len = (len / sizeof (idm_bufpat_t)) * sizeof (idm_bufpat_t);
1202 	ASSERT(len <= idb->idb_buflen);
1203 	bufpat = idb->idb_buf;
1204 
1205 	/*
1206 	 * Don't check the pattern in buffers that came from outside IDM
1207 	 * (these will be buffers from the initiator that we opted not
1208 	 * to double-buffer)
1209 	 */
1210 	if (!idb->idb_bufalloc)
1211 		return (B_FALSE);
1212 
1213 	/*
1214 	 * Return true if we find the pattern anywhere in the buffer
1215 	 */
1216 	for (i = 0; i < len; i += sizeof (idm_bufpat_t)) {
1217 		if (BUFPAT_MATCH(bufpat, idb)) {
1218 			IDM_CONN_LOG(CE_WARN, "idm_bufpat_check found: "
1219 			    "idb %p bufpat %p "
1220 			    "bufpat_idb=%p bufmagic=%08x offset=%08x",
1221 			    (void *)idb, (void *)bufpat, bufpat->bufpat_idb,
1222 			    bufpat->bufpat_bufmagic, bufpat->bufpat_offset);
1223 			DTRACE_PROBE2(bufpat__pattern__found,
1224 			    idm_buf_t *, idb, idm_bufpat_t *, bufpat);
1225 			if (type == BP_CHECK_ASSERT) {
1226 				ASSERT(0);
1227 			}
1228 			return (B_TRUE);
1229 		}
1230 		bufpat++;
1231 	}
1232 
1233 	return (B_FALSE);
1234 }
1235 
1236 /*
1237  * idm_task_alloc
1238  *
1239  * This function will allocate a idm_task_t structure. A task tag is also
1240  * generated and saved in idt_tt. The task is not active.
1241  */
1242 idm_task_t *
1243 idm_task_alloc(idm_conn_t *ic)
1244 {
1245 	idm_task_t	*idt;
1246 
1247 	ASSERT(ic != NULL);
1248 
1249 	/* Don't allocate new tasks if we are not in FFP */
1250 	mutex_enter(&ic->ic_state_mutex);
1251 	if (!ic->ic_ffp) {
1252 		mutex_exit(&ic->ic_state_mutex);
1253 		return (NULL);
1254 	}
1255 	idt = kmem_cache_alloc(idm.idm_task_cache, KM_NOSLEEP);
1256 	if (idt == NULL) {
1257 		mutex_exit(&ic->ic_state_mutex);
1258 		return (NULL);
1259 	}
1260 
1261 	ASSERT(list_is_empty(&idt->idt_inbufv));
1262 	ASSERT(list_is_empty(&idt->idt_outbufv));
1263 
1264 	idm_conn_hold(ic);
1265 	mutex_exit(&ic->ic_state_mutex);
1266 
1267 	idt->idt_state		= TASK_IDLE;
1268 	idt->idt_ic		= ic;
1269 	idt->idt_private 	= NULL;
1270 	idt->idt_exp_datasn	= 0;
1271 	idt->idt_exp_rttsn	= 0;
1272 
1273 	return (idt);
1274 }
1275 
1276 /*
1277  * idm_task_start
1278  *
1279  * Mark the task active and initialize some stats. The caller
1280  * sets up the idm_task_t structure with a prior call to idm_task_alloc().
1281  * The task service does not function as a task/work engine, it is the
1282  * responsibility of the initiator to start the data transfer and free the
1283  * resources.
1284  */
1285 void
1286 idm_task_start(idm_task_t *idt, uintptr_t handle)
1287 {
1288 	ASSERT(idt != NULL);
1289 
1290 	/* mark the task as ACTIVE */
1291 	idt->idt_state = TASK_ACTIVE;
1292 	idt->idt_client_handle = handle;
1293 	idt->idt_tx_to_ini_start = idt->idt_tx_to_ini_done =
1294 	    idt->idt_rx_from_ini_start = idt->idt_rx_from_ini_done =
1295 	    idt->idt_tx_bytes = idt->idt_rx_bytes = 0;
1296 }
1297 
1298 /*
1299  * idm_task_done
1300  *
1301  * This function sets the state to indicate that the task is no longer active.
1302  */
1303 void
1304 idm_task_done(idm_task_t *idt)
1305 {
1306 	ASSERT(idt != NULL);
1307 
1308 	mutex_enter(&idt->idt_mutex);
1309 	idt->idt_state = TASK_IDLE;
1310 	mutex_exit(&idt->idt_mutex);
1311 
1312 	/*
1313 	 * Although unlikely it is possible for a reference to come in after
1314 	 * the client has decided the task is over but before we've marked
1315 	 * the task idle.  One specific unavoidable scenario is the case where
1316 	 * received PDU with the matching ITT/TTT results in a successful
1317 	 * lookup of this task.  We are at the mercy of the remote node in
1318 	 * that case so we need to handle it.  Now that the task state
1319 	 * has changed no more references will occur so a simple call to
1320 	 * idm_refcnt_wait_ref should deal with the situation.
1321 	 */
1322 	idm_refcnt_wait_ref(&idt->idt_refcnt);
1323 	idm_refcnt_reset(&idt->idt_refcnt);
1324 }
1325 
1326 /*
1327  * idm_task_free
1328  *
1329  * This function will free the Task Tag and the memory allocated for the task
1330  * idm_task_done should be called prior to this call
1331  */
1332 void
1333 idm_task_free(idm_task_t *idt)
1334 {
1335 	idm_conn_t *ic;
1336 
1337 	ASSERT(idt != NULL);
1338 	ASSERT(idt->idt_refcnt.ir_refcnt == 0);
1339 	ASSERT(idt->idt_state == TASK_IDLE);
1340 
1341 	ic = idt->idt_ic;
1342 
1343 	/*
1344 	 * It's possible for items to still be in the idt_inbufv list if
1345 	 * they were added after idm_task_cleanup was called.  We rely on
1346 	 * STMF to free all buffers associated with the task however STMF
1347 	 * doesn't know that we have this reference to the buffers.
1348 	 * Use list_create so that we don't end up with stale references
1349 	 * to these buffers.
1350 	 */
1351 	list_create(&idt->idt_inbufv, sizeof (idm_buf_t),
1352 	    offsetof(idm_buf_t, idb_buflink));
1353 	list_create(&idt->idt_outbufv, sizeof (idm_buf_t),
1354 	    offsetof(idm_buf_t, idb_buflink));
1355 
1356 	kmem_cache_free(idm.idm_task_cache, idt);
1357 
1358 	idm_conn_rele(ic);
1359 }
1360 
1361 /*
1362  * idm_task_find_common
1363  *	common code for idm_task_find() and idm_task_find_and_complete()
1364  */
1365 /*ARGSUSED*/
1366 static idm_task_t *
1367 idm_task_find_common(idm_conn_t *ic, uint32_t itt, uint32_t ttt,
1368     boolean_t complete)
1369 {
1370 	uint32_t	tt, client_handle;
1371 	idm_task_t	*idt;
1372 
1373 	/*
1374 	 * Must match both itt and ttt.  The table is indexed by itt
1375 	 * for initiator connections and ttt for target connections.
1376 	 */
1377 	if (IDM_CONN_ISTGT(ic)) {
1378 		tt = ttt;
1379 		client_handle = itt;
1380 	} else {
1381 		tt = itt;
1382 		client_handle = ttt;
1383 	}
1384 
1385 	rw_enter(&idm.idm_taskid_table_lock, RW_READER);
1386 	if (tt >= idm.idm_taskid_max) {
1387 		rw_exit(&idm.idm_taskid_table_lock);
1388 		return (NULL);
1389 	}
1390 
1391 	idt = idm.idm_taskid_table[tt];
1392 
1393 	if (idt != NULL) {
1394 		mutex_enter(&idt->idt_mutex);
1395 		if ((idt->idt_state != TASK_ACTIVE) ||
1396 		    (idt->idt_ic != ic) ||
1397 		    (IDM_CONN_ISTGT(ic) &&
1398 		    (idt->idt_client_handle != client_handle))) {
1399 			/*
1400 			 * Task doesn't match or task is aborting and
1401 			 * we don't want any more references.
1402 			 */
1403 			if ((idt->idt_ic != ic) &&
1404 			    (idt->idt_state == TASK_ACTIVE) &&
1405 			    (IDM_CONN_ISINI(ic) || idt->idt_client_handle ==
1406 			    client_handle)) {
1407 				IDM_CONN_LOG(CE_WARN,
1408 				"idm_task_find: wrong connection %p != %p",
1409 				    (void *)ic, (void *)idt->idt_ic);
1410 			}
1411 			mutex_exit(&idt->idt_mutex);
1412 			rw_exit(&idm.idm_taskid_table_lock);
1413 			return (NULL);
1414 		}
1415 		idm_task_hold(idt);
1416 		/*
1417 		 * Set the task state to TASK_COMPLETE so it can no longer
1418 		 * be found or aborted.
1419 		 */
1420 		if (B_TRUE == complete)
1421 			idt->idt_state = TASK_COMPLETE;
1422 		mutex_exit(&idt->idt_mutex);
1423 	}
1424 	rw_exit(&idm.idm_taskid_table_lock);
1425 
1426 	return (idt);
1427 }
1428 
1429 /*
1430  * This function looks up a task by task tag.
1431  */
1432 idm_task_t *
1433 idm_task_find(idm_conn_t *ic, uint32_t itt, uint32_t ttt)
1434 {
1435 	return (idm_task_find_common(ic, itt, ttt, B_FALSE));
1436 }
1437 
1438 /*
1439  * This function looks up a task by task tag. If found, the task state
1440  * is atomically set to TASK_COMPLETE so it can longer be found or aborted.
1441  */
1442 idm_task_t *
1443 idm_task_find_and_complete(idm_conn_t *ic, uint32_t itt, uint32_t ttt)
1444 {
1445 	return (idm_task_find_common(ic, itt, ttt, B_TRUE));
1446 }
1447 
1448 /*
1449  * idm_task_find_by_handle
1450  *
1451  * This function looks up a task by the client-private idt_client_handle.
1452  *
1453  * This function should NEVER be called in the performance path.  It is
1454  * intended strictly for error recovery/task management.
1455  */
1456 /*ARGSUSED*/
1457 void *
1458 idm_task_find_by_handle(idm_conn_t *ic, uintptr_t handle)
1459 {
1460 	idm_task_t	*idt = NULL;
1461 	int		idx = 0;
1462 
1463 	rw_enter(&idm.idm_taskid_table_lock, RW_READER);
1464 
1465 	for (idx = 0; idx < idm.idm_taskid_max; idx++) {
1466 		idt = idm.idm_taskid_table[idx];
1467 
1468 		if (idt == NULL)
1469 			continue;
1470 
1471 		mutex_enter(&idt->idt_mutex);
1472 
1473 		if (idt->idt_state != TASK_ACTIVE) {
1474 			/*
1475 			 * Task is either in suspend, abort, or already
1476 			 * complete.
1477 			 */
1478 			mutex_exit(&idt->idt_mutex);
1479 			continue;
1480 		}
1481 
1482 		if (idt->idt_client_handle == handle) {
1483 			idm_task_hold(idt);
1484 			mutex_exit(&idt->idt_mutex);
1485 			break;
1486 		}
1487 
1488 		mutex_exit(&idt->idt_mutex);
1489 	}
1490 
1491 	rw_exit(&idm.idm_taskid_table_lock);
1492 
1493 	if ((idt == NULL) || (idx == idm.idm_taskid_max))
1494 		return (NULL);
1495 
1496 	return (idt->idt_private);
1497 }
1498 
1499 void
1500 idm_task_hold(idm_task_t *idt)
1501 {
1502 	idm_refcnt_hold(&idt->idt_refcnt);
1503 }
1504 
1505 void
1506 idm_task_rele(idm_task_t *idt)
1507 {
1508 	idm_refcnt_rele(&idt->idt_refcnt);
1509 }
1510 
1511 void
1512 idm_task_abort(idm_conn_t *ic, idm_task_t *idt, idm_abort_type_t abort_type)
1513 {
1514 	idm_task_t	*task;
1515 	int		idx;
1516 
1517 	/*
1518 	 * Passing NULL as the task indicates that all tasks
1519 	 * for this connection should be aborted.
1520 	 */
1521 	if (idt == NULL) {
1522 		/*
1523 		 * Only the connection state machine should ask for
1524 		 * all tasks to abort and this should never happen in FFP.
1525 		 */
1526 		ASSERT(!ic->ic_ffp);
1527 		rw_enter(&idm.idm_taskid_table_lock, RW_READER);
1528 		for (idx = 0; idx < idm.idm_taskid_max; idx++) {
1529 			task = idm.idm_taskid_table[idx];
1530 			if (task == NULL)
1531 				continue;
1532 			mutex_enter(&task->idt_mutex);
1533 			if ((task->idt_state != TASK_IDLE) &&
1534 			    (task->idt_state != TASK_COMPLETE) &&
1535 			    (task->idt_ic == ic)) {
1536 				rw_exit(&idm.idm_taskid_table_lock);
1537 				idm_task_abort_one(ic, task, abort_type);
1538 				rw_enter(&idm.idm_taskid_table_lock, RW_READER);
1539 			} else
1540 				mutex_exit(&task->idt_mutex);
1541 		}
1542 		rw_exit(&idm.idm_taskid_table_lock);
1543 	} else {
1544 		mutex_enter(&idt->idt_mutex);
1545 		idm_task_abort_one(ic, idt, abort_type);
1546 	}
1547 }
1548 
1549 static void
1550 idm_task_abort_unref_cb(void *ref)
1551 {
1552 	idm_task_t *idt = ref;
1553 
1554 	mutex_enter(&idt->idt_mutex);
1555 	switch (idt->idt_state) {
1556 	case TASK_SUSPENDING:
1557 		idt->idt_state = TASK_SUSPENDED;
1558 		mutex_exit(&idt->idt_mutex);
1559 		idm_task_aborted(idt, IDM_STATUS_SUSPENDED);
1560 		return;
1561 	case TASK_ABORTING:
1562 		idt->idt_state = TASK_ABORTED;
1563 		mutex_exit(&idt->idt_mutex);
1564 		idm_task_aborted(idt, IDM_STATUS_ABORTED);
1565 		return;
1566 	default:
1567 		mutex_exit(&idt->idt_mutex);
1568 		ASSERT(0);
1569 		break;
1570 	}
1571 }
1572 
1573 /*
1574  * Abort the idm task.
1575  *    Caller must hold the task mutex, which will be released before return
1576  */
1577 static void
1578 idm_task_abort_one(idm_conn_t *ic, idm_task_t *idt, idm_abort_type_t abort_type)
1579 {
1580 	/* Caller must hold connection mutex */
1581 	ASSERT(mutex_owned(&idt->idt_mutex));
1582 	switch (idt->idt_state) {
1583 	case TASK_ACTIVE:
1584 		switch (abort_type) {
1585 		case AT_INTERNAL_SUSPEND:
1586 			/* Call transport to release any resources */
1587 			idt->idt_state = TASK_SUSPENDING;
1588 			mutex_exit(&idt->idt_mutex);
1589 			ic->ic_transport_ops->it_free_task_rsrc(idt);
1590 
1591 			/*
1592 			 * Wait for outstanding references.  When all
1593 			 * references are released the callback will call
1594 			 * idm_task_aborted().
1595 			 */
1596 			idm_refcnt_async_wait_ref(&idt->idt_refcnt,
1597 			    &idm_task_abort_unref_cb);
1598 			return;
1599 		case AT_INTERNAL_ABORT:
1600 		case AT_TASK_MGMT_ABORT:
1601 			idt->idt_state = TASK_ABORTING;
1602 			mutex_exit(&idt->idt_mutex);
1603 			ic->ic_transport_ops->it_free_task_rsrc(idt);
1604 
1605 			/*
1606 			 * Wait for outstanding references.  When all
1607 			 * references are released the callback will call
1608 			 * idm_task_aborted().
1609 			 */
1610 			idm_refcnt_async_wait_ref(&idt->idt_refcnt,
1611 			    &idm_task_abort_unref_cb);
1612 			return;
1613 		default:
1614 			ASSERT(0);
1615 		}
1616 		break;
1617 	case TASK_SUSPENDING:
1618 		/* Already called transport_free_task_rsrc(); */
1619 		switch (abort_type) {
1620 		case AT_INTERNAL_SUSPEND:
1621 			/* Already doing it */
1622 			break;
1623 		case AT_INTERNAL_ABORT:
1624 		case AT_TASK_MGMT_ABORT:
1625 			idt->idt_state = TASK_ABORTING;
1626 			break;
1627 		default:
1628 			ASSERT(0);
1629 		}
1630 		break;
1631 	case TASK_SUSPENDED:
1632 		/* Already called transport_free_task_rsrc(); */
1633 		switch (abort_type) {
1634 		case AT_INTERNAL_SUSPEND:
1635 			/* Already doing it */
1636 			break;
1637 		case AT_INTERNAL_ABORT:
1638 		case AT_TASK_MGMT_ABORT:
1639 			idt->idt_state = TASK_ABORTING;
1640 			mutex_exit(&idt->idt_mutex);
1641 
1642 			/*
1643 			 * We could probably call idm_task_aborted directly
1644 			 * here but we may be holding the conn lock. It's
1645 			 * easier to just switch contexts.  Even though
1646 			 * we shouldn't really have any references we'll
1647 			 * set the state to TASK_ABORTING instead of
1648 			 * TASK_ABORTED so we can use the same code path.
1649 			 */
1650 			idm_refcnt_async_wait_ref(&idt->idt_refcnt,
1651 			    &idm_task_abort_unref_cb);
1652 			return;
1653 		default:
1654 			ASSERT(0);
1655 		}
1656 		break;
1657 	case TASK_ABORTING:
1658 	case TASK_ABORTED:
1659 		switch (abort_type) {
1660 		case AT_INTERNAL_SUSPEND:
1661 			/* We're already past this point... */
1662 		case AT_INTERNAL_ABORT:
1663 		case AT_TASK_MGMT_ABORT:
1664 			/* Already doing it */
1665 			break;
1666 		default:
1667 			ASSERT(0);
1668 		}
1669 		break;
1670 	case TASK_COMPLETE:
1671 		/*
1672 		 * In this case, let it go.  The status has already been
1673 		 * sent (which may or may not get successfully transmitted)
1674 		 * and we don't want to end up in a race between completing
1675 		 * the status PDU and marking the task suspended.
1676 		 */
1677 		break;
1678 	default:
1679 		ASSERT(0);
1680 	}
1681 	mutex_exit(&idt->idt_mutex);
1682 }
1683 
1684 static void
1685 idm_task_aborted(idm_task_t *idt, idm_status_t status)
1686 {
1687 	(*idt->idt_ic->ic_conn_ops.icb_task_aborted)(idt, status);
1688 }
1689 
1690 void
1691 idm_task_cleanup(idm_task_t *idt)
1692 {
1693 	idm_buf_t *idb, *next_idb;
1694 	list_t		tmp_buflist;
1695 	ASSERT((idt->idt_state == TASK_SUSPENDED) ||
1696 	    (idt->idt_state == TASK_ABORTED));
1697 
1698 	list_create(&tmp_buflist, sizeof (idm_buf_t),
1699 	    offsetof(idm_buf_t, idb_buflink));
1700 
1701 	/*
1702 	 * Remove all the buffers from the task and add them to a
1703 	 * temporary local list -- we do this so that we can hold
1704 	 * the task lock and prevent the task from going away if
1705 	 * the client decides to call idm_task_done/idm_task_free.
1706 	 * This could happen during abort in iscsit.
1707 	 */
1708 	mutex_enter(&idt->idt_mutex);
1709 	for (idb = list_head(&idt->idt_inbufv);
1710 	    idb != NULL;
1711 	    idb = next_idb) {
1712 		next_idb = list_next(&idt->idt_inbufv, idb);
1713 		idm_buf_unbind_in_locked(idt, idb);
1714 		list_insert_tail(&tmp_buflist, idb);
1715 	}
1716 
1717 	for (idb = list_head(&idt->idt_outbufv);
1718 	    idb != NULL;
1719 	    idb = next_idb) {
1720 		next_idb = list_next(&idt->idt_outbufv, idb);
1721 		idm_buf_unbind_out_locked(idt, idb);
1722 		list_insert_tail(&tmp_buflist, idb);
1723 	}
1724 	mutex_exit(&idt->idt_mutex);
1725 
1726 	for (idb = list_head(&tmp_buflist); idb != NULL; idb = next_idb) {
1727 		next_idb = list_next(&tmp_buflist, idb);
1728 		list_remove(&tmp_buflist, idb);
1729 		(*idb->idb_buf_cb)(idb, IDM_STATUS_ABORTED);
1730 	}
1731 	list_destroy(&tmp_buflist);
1732 }
1733 
1734 
1735 /*
1736  * idm_pdu_tx
1737  *
1738  * This is IDM's implementation of the 'Send_Control' operational primitive.
1739  * This function is invoked by an initiator iSCSI layer requesting the transfer
1740  * of a iSCSI command PDU or a target iSCSI layer requesting the transfer of a
1741  * iSCSI response PDU. The PDU will be transmitted as-is by the local Datamover
1742  * layer to the peer iSCSI layer in the remote iSCSI node. The connection info
1743  * and iSCSI PDU-specific qualifiers namely BHS, AHS, DataDescriptor and Size
1744  * are provided as input.
1745  *
1746  */
1747 void
1748 idm_pdu_tx(idm_pdu_t *pdu)
1749 {
1750 	idm_conn_t		*ic = pdu->isp_ic;
1751 	iscsi_async_evt_hdr_t	*async_evt;
1752 
1753 	/*
1754 	 * If we are in full-featured mode then route SCSI-related
1755 	 * commands to the appropriate function vector without checking
1756 	 * the connection state.  We will only be in full-feature mode
1757 	 * when we are in an acceptable state for SCSI PDU's.
1758 	 *
1759 	 * We also need to ensure that there are no PDU events outstanding
1760 	 * on the state machine.  Any non-SCSI PDU's received in full-feature
1761 	 * mode will result in PDU events and until these have been handled
1762 	 * we need to route all PDU's through the state machine as PDU
1763 	 * events to maintain ordering.
1764 	 *
1765 	 * Note that IDM cannot enter FFP mode until it processes in
1766 	 * its state machine the last xmit of the login process.
1767 	 * Hence, checking the IDM_PDU_LOGIN_TX flag here would be
1768 	 * superfluous.
1769 	 */
1770 	mutex_enter(&ic->ic_state_mutex);
1771 	if (ic->ic_ffp && (ic->ic_pdu_events == 0)) {
1772 		mutex_exit(&ic->ic_state_mutex);
1773 		switch (IDM_PDU_OPCODE(pdu)) {
1774 		case ISCSI_OP_SCSI_RSP:
1775 			/* Target only */
1776 			idm_pdu_tx_forward(ic, pdu);
1777 			return;
1778 		case ISCSI_OP_SCSI_TASK_MGT_RSP:
1779 			/* Target only */
1780 			idm_pdu_tx_forward(ic, pdu);
1781 			return;
1782 		case ISCSI_OP_SCSI_DATA_RSP:
1783 			/* Target only */
1784 			idm_pdu_tx_forward(ic, pdu);
1785 			return;
1786 		case ISCSI_OP_RTT_RSP:
1787 			/* Target only */
1788 			idm_pdu_tx_forward(ic, pdu);
1789 			return;
1790 		case ISCSI_OP_NOOP_IN:
1791 			/* Target only */
1792 			idm_pdu_tx_forward(ic, pdu);
1793 			return;
1794 		case ISCSI_OP_TEXT_RSP:
1795 			/* Target only */
1796 			idm_pdu_tx_forward(ic, pdu);
1797 			return;
1798 		case ISCSI_OP_TEXT_CMD:
1799 		case ISCSI_OP_NOOP_OUT:
1800 		case ISCSI_OP_SCSI_CMD:
1801 		case ISCSI_OP_SCSI_DATA:
1802 		case ISCSI_OP_SCSI_TASK_MGT_MSG:
1803 			/* Initiator only */
1804 			idm_pdu_tx_forward(ic, pdu);
1805 			return;
1806 		default:
1807 			break;
1808 		}
1809 
1810 		mutex_enter(&ic->ic_state_mutex);
1811 	}
1812 
1813 	/*
1814 	 * Any PDU's processed outside of full-feature mode and non-SCSI
1815 	 * PDU's in full-feature mode are handled by generating an
1816 	 * event to the connection state machine.  The state machine
1817 	 * will validate the PDU against the current state and either
1818 	 * transmit the PDU if the opcode is allowed or handle an
1819 	 * error if the PDU is not allowed.
1820 	 *
1821 	 * This code-path will also generate any events that are implied
1822 	 * by the PDU opcode.  For example a "login response" with success
1823 	 * status generates a CE_LOGOUT_SUCCESS_SND event.
1824 	 */
1825 	switch (IDM_PDU_OPCODE(pdu)) {
1826 	case ISCSI_OP_LOGIN_CMD:
1827 		idm_conn_tx_pdu_event(ic, CE_LOGIN_SND, (uintptr_t)pdu);
1828 		break;
1829 	case ISCSI_OP_LOGIN_RSP:
1830 		idm_parse_login_rsp(ic, pdu, /* Is RX */ B_FALSE);
1831 		break;
1832 	case ISCSI_OP_LOGOUT_CMD:
1833 		idm_parse_logout_req(ic, pdu, /* Is RX */ B_FALSE);
1834 		break;
1835 	case ISCSI_OP_LOGOUT_RSP:
1836 		idm_parse_logout_rsp(ic, pdu, /* Is RX */ B_FALSE);
1837 		break;
1838 	case ISCSI_OP_ASYNC_EVENT:
1839 		async_evt = (iscsi_async_evt_hdr_t *)pdu->isp_hdr;
1840 		switch (async_evt->async_event) {
1841 		case ISCSI_ASYNC_EVENT_REQUEST_LOGOUT:
1842 			idm_conn_tx_pdu_event(ic, CE_ASYNC_LOGOUT_SND,
1843 			    (uintptr_t)pdu);
1844 			break;
1845 		case ISCSI_ASYNC_EVENT_DROPPING_CONNECTION:
1846 			idm_conn_tx_pdu_event(ic, CE_ASYNC_DROP_CONN_SND,
1847 			    (uintptr_t)pdu);
1848 			break;
1849 		case ISCSI_ASYNC_EVENT_DROPPING_ALL_CONNECTIONS:
1850 			idm_conn_tx_pdu_event(ic, CE_ASYNC_DROP_ALL_CONN_SND,
1851 			    (uintptr_t)pdu);
1852 			break;
1853 		case ISCSI_ASYNC_EVENT_SCSI_EVENT:
1854 		case ISCSI_ASYNC_EVENT_PARAM_NEGOTIATION:
1855 		default:
1856 			idm_conn_tx_pdu_event(ic, CE_MISC_TX,
1857 			    (uintptr_t)pdu);
1858 			break;
1859 		}
1860 		break;
1861 	case ISCSI_OP_SCSI_RSP:
1862 		/* Target only */
1863 		idm_conn_tx_pdu_event(ic, CE_MISC_TX, (uintptr_t)pdu);
1864 		break;
1865 	case ISCSI_OP_SCSI_TASK_MGT_RSP:
1866 		/* Target only */
1867 		idm_conn_tx_pdu_event(ic, CE_MISC_TX, (uintptr_t)pdu);
1868 		break;
1869 	case ISCSI_OP_SCSI_DATA_RSP:
1870 		/* Target only */
1871 		idm_conn_tx_pdu_event(ic, CE_MISC_TX, (uintptr_t)pdu);
1872 		break;
1873 	case ISCSI_OP_RTT_RSP:
1874 		/* Target only */
1875 		idm_conn_tx_pdu_event(ic, CE_MISC_TX, (uintptr_t)pdu);
1876 		break;
1877 	case ISCSI_OP_NOOP_IN:
1878 		/* Target only */
1879 		idm_conn_tx_pdu_event(ic, CE_MISC_TX, (uintptr_t)pdu);
1880 		break;
1881 	case ISCSI_OP_TEXT_RSP:
1882 		/* Target only */
1883 		idm_conn_tx_pdu_event(ic, CE_MISC_TX, (uintptr_t)pdu);
1884 		break;
1885 		/* Initiator only */
1886 	case ISCSI_OP_SCSI_CMD:
1887 	case ISCSI_OP_SCSI_TASK_MGT_MSG:
1888 	case ISCSI_OP_SCSI_DATA:
1889 	case ISCSI_OP_NOOP_OUT:
1890 	case ISCSI_OP_TEXT_CMD:
1891 	case ISCSI_OP_SNACK_CMD:
1892 	case ISCSI_OP_REJECT_MSG:
1893 	default:
1894 		/*
1895 		 * Connection state machine will validate these PDU's against
1896 		 * the current state.  A PDU not allowed in the current
1897 		 * state will cause a protocol error.
1898 		 */
1899 		idm_conn_tx_pdu_event(ic, CE_MISC_TX, (uintptr_t)pdu);
1900 		break;
1901 	}
1902 	mutex_exit(&ic->ic_state_mutex);
1903 }
1904 
1905 /*
1906  * Common allocation of a PDU along with memory for header and data.
1907  */
1908 static idm_pdu_t *
1909 idm_pdu_alloc_common(uint_t hdrlen, uint_t datalen, int sleepflag)
1910 {
1911 	idm_pdu_t *result;
1912 
1913 	/*
1914 	 * IDM clients should cache these structures for performance
1915 	 * critical paths.  We can't cache effectively in IDM because we
1916 	 * don't know the correct header and data size.
1917 	 *
1918 	 * Valid header length is assumed to be hdrlen and valid data
1919 	 * length is assumed to be datalen.  isp_hdrlen and isp_datalen
1920 	 * can be adjusted after the PDU is returned if necessary.
1921 	 */
1922 	result = kmem_zalloc(sizeof (idm_pdu_t) + hdrlen + datalen, sleepflag);
1923 	if (result != NULL) {
1924 		/* For idm_pdu_free sanity check */
1925 		result->isp_flags |= IDM_PDU_ALLOC;
1926 		/* pointer arithmetic */
1927 		result->isp_hdr = (iscsi_hdr_t *)(result + 1);
1928 		result->isp_hdrlen = hdrlen;
1929 		result->isp_hdrbuflen = hdrlen;
1930 		result->isp_transport_hdrlen = 0;
1931 		if (datalen != 0)
1932 			result->isp_data = (uint8_t *)result->isp_hdr + hdrlen;
1933 		result->isp_datalen = datalen;
1934 		result->isp_databuflen = datalen;
1935 		result->isp_magic = IDM_PDU_MAGIC;
1936 	}
1937 
1938 	return (result);
1939 }
1940 
1941 /*
1942  * Typical idm_pdu_alloc invocation, will block for resources.
1943  */
1944 idm_pdu_t *
1945 idm_pdu_alloc(uint_t hdrlen, uint_t datalen)
1946 {
1947 	return (idm_pdu_alloc_common(hdrlen, datalen, KM_SLEEP));
1948 }
1949 
1950 /*
1951  * Non-blocking idm_pdu_alloc implementation, returns NULL if resources
1952  * are not available.  Needed for transport-layer allocations which may
1953  * be invoking in interrupt context.
1954  */
1955 idm_pdu_t *
1956 idm_pdu_alloc_nosleep(uint_t hdrlen, uint_t datalen)
1957 {
1958 	return (idm_pdu_alloc_common(hdrlen, datalen, KM_NOSLEEP));
1959 }
1960 
1961 /*
1962  * Free a PDU previously allocated with idm_pdu_alloc() including any
1963  * header and data space allocated as part of the original request.
1964  * Additional memory regions referenced by subsequent modification of
1965  * the isp_hdr and/or isp_data fields will not be freed.
1966  */
1967 void
1968 idm_pdu_free(idm_pdu_t *pdu)
1969 {
1970 	/* Make sure the structure was allocated using idm_pdu_alloc() */
1971 	ASSERT(pdu->isp_flags & IDM_PDU_ALLOC);
1972 	kmem_free(pdu,
1973 	    sizeof (idm_pdu_t) + pdu->isp_hdrbuflen + pdu->isp_databuflen);
1974 }
1975 
1976 /*
1977  * Initialize the connection, private and callback fields in a PDU.
1978  */
1979 void
1980 idm_pdu_init(idm_pdu_t *pdu, idm_conn_t *ic, void *private, idm_pdu_cb_t *cb)
1981 {
1982 	/*
1983 	 * idm_pdu_complete() will call idm_pdu_free if the callback is
1984 	 * NULL.  This will only work if the PDU was originally allocated
1985 	 * with idm_pdu_alloc().
1986 	 */
1987 	ASSERT((pdu->isp_flags & IDM_PDU_ALLOC) ||
1988 	    (cb != NULL));
1989 	pdu->isp_magic = IDM_PDU_MAGIC;
1990 	pdu->isp_ic = ic;
1991 	pdu->isp_private = private;
1992 	pdu->isp_callback = cb;
1993 }
1994 
1995 /*
1996  * Initialize the header and header length field.  This function should
1997  * not be used to adjust the header length in a buffer allocated via
1998  * pdu_pdu_alloc since it overwrites the existing header pointer.
1999  */
2000 void
2001 idm_pdu_init_hdr(idm_pdu_t *pdu, uint8_t *hdr, uint_t hdrlen)
2002 {
2003 	pdu->isp_hdr = (iscsi_hdr_t *)((void *)hdr);
2004 	pdu->isp_hdrlen = hdrlen;
2005 }
2006 
2007 /*
2008  * Initialize the data and data length fields.  This function should
2009  * not be used to adjust the data length of a buffer allocated via
2010  * idm_pdu_alloc since it overwrites the existing data pointer.
2011  */
2012 void
2013 idm_pdu_init_data(idm_pdu_t *pdu, uint8_t *data, uint_t datalen)
2014 {
2015 	pdu->isp_data = data;
2016 	pdu->isp_datalen = datalen;
2017 }
2018 
2019 void
2020 idm_pdu_complete(idm_pdu_t *pdu, idm_status_t status)
2021 {
2022 	if (pdu->isp_callback) {
2023 		pdu->isp_status = status;
2024 		(*pdu->isp_callback)(pdu, status);
2025 	} else {
2026 		idm_pdu_free(pdu);
2027 	}
2028 }
2029 
2030 /*
2031  * State machine auditing
2032  */
2033 
2034 void
2035 idm_sm_audit_init(sm_audit_buf_t *audit_buf)
2036 {
2037 	bzero(audit_buf, sizeof (sm_audit_buf_t));
2038 	audit_buf->sab_max_index = SM_AUDIT_BUF_MAX_REC - 1;
2039 }
2040 
2041 static
2042 sm_audit_record_t *
2043 idm_sm_audit_common(sm_audit_buf_t *audit_buf, sm_audit_record_type_t r_type,
2044     sm_audit_sm_type_t sm_type,
2045     int current_state)
2046 {
2047 	sm_audit_record_t *sar;
2048 
2049 	sar = audit_buf->sab_records;
2050 	sar += audit_buf->sab_index;
2051 	audit_buf->sab_index++;
2052 	audit_buf->sab_index &= audit_buf->sab_max_index;
2053 
2054 	sar->sar_type = r_type;
2055 	gethrestime(&sar->sar_timestamp);
2056 	sar->sar_sm_type = sm_type;
2057 	sar->sar_state = current_state;
2058 
2059 	return (sar);
2060 }
2061 
2062 void
2063 idm_sm_audit_event(sm_audit_buf_t *audit_buf,
2064     sm_audit_sm_type_t sm_type, int current_state,
2065     int event, uintptr_t event_info)
2066 {
2067 	sm_audit_record_t *sar;
2068 
2069 	sar = idm_sm_audit_common(audit_buf, SAR_STATE_EVENT,
2070 	    sm_type, current_state);
2071 	sar->sar_event = event;
2072 	sar->sar_event_info = event_info;
2073 }
2074 
2075 void
2076 idm_sm_audit_state_change(sm_audit_buf_t *audit_buf,
2077     sm_audit_sm_type_t sm_type, int current_state, int new_state)
2078 {
2079 	sm_audit_record_t *sar;
2080 
2081 	sar = idm_sm_audit_common(audit_buf, SAR_STATE_CHANGE,
2082 	    sm_type, current_state);
2083 	sar->sar_new_state = new_state;
2084 }
2085 
2086 
2087 /*
2088  * Object reference tracking
2089  */
2090 
2091 void
2092 idm_refcnt_init(idm_refcnt_t *refcnt, void *referenced_obj)
2093 {
2094 	bzero(refcnt, sizeof (*refcnt));
2095 	idm_refcnt_reset(refcnt);
2096 	refcnt->ir_referenced_obj = referenced_obj;
2097 	bzero(&refcnt->ir_audit_buf, sizeof (refcnt_audit_buf_t));
2098 	refcnt->ir_audit_buf.anb_max_index = REFCNT_AUDIT_BUF_MAX_REC - 1;
2099 	mutex_init(&refcnt->ir_mutex, NULL, MUTEX_DEFAULT, NULL);
2100 	cv_init(&refcnt->ir_cv, NULL, CV_DEFAULT, NULL);
2101 }
2102 
2103 void
2104 idm_refcnt_destroy(idm_refcnt_t *refcnt)
2105 {
2106 	/*
2107 	 * Grab the mutex to there are no other lingering threads holding
2108 	 * the mutex before we destroy it (e.g. idm_refcnt_rele just after
2109 	 * the refcnt goes to zero if ir_waiting == REF_WAIT_ASYNC)
2110 	 */
2111 	mutex_enter(&refcnt->ir_mutex);
2112 	ASSERT(refcnt->ir_refcnt == 0);
2113 	cv_destroy(&refcnt->ir_cv);
2114 	mutex_destroy(&refcnt->ir_mutex);
2115 }
2116 
2117 void
2118 idm_refcnt_reset(idm_refcnt_t *refcnt)
2119 {
2120 	refcnt->ir_waiting = REF_NOWAIT;
2121 	refcnt->ir_refcnt = 0;
2122 }
2123 
2124 void
2125 idm_refcnt_hold(idm_refcnt_t *refcnt)
2126 {
2127 	/*
2128 	 * Nothing should take a hold on an object after a call to
2129 	 * idm_refcnt_wait_ref or idm_refcnd_async_wait_ref
2130 	 */
2131 	ASSERT(refcnt->ir_waiting == REF_NOWAIT);
2132 
2133 	mutex_enter(&refcnt->ir_mutex);
2134 	refcnt->ir_refcnt++;
2135 	REFCNT_AUDIT(refcnt);
2136 	mutex_exit(&refcnt->ir_mutex);
2137 }
2138 
2139 static void
2140 idm_refcnt_unref_task(void *refcnt_void)
2141 {
2142 	idm_refcnt_t *refcnt = refcnt_void;
2143 
2144 	REFCNT_AUDIT(refcnt);
2145 	(*refcnt->ir_cb)(refcnt->ir_referenced_obj);
2146 }
2147 
2148 void
2149 idm_refcnt_rele(idm_refcnt_t *refcnt)
2150 {
2151 	mutex_enter(&refcnt->ir_mutex);
2152 	ASSERT(refcnt->ir_refcnt > 0);
2153 	refcnt->ir_refcnt--;
2154 	REFCNT_AUDIT(refcnt);
2155 	if (refcnt->ir_waiting == REF_NOWAIT) {
2156 		/* No one is waiting on this object */
2157 		mutex_exit(&refcnt->ir_mutex);
2158 		return;
2159 	}
2160 
2161 	/*
2162 	 * Someone is waiting for this object to go idle so check if
2163 	 * refcnt is 0.  Waiting on an object then later grabbing another
2164 	 * reference is not allowed so we don't need to handle that case.
2165 	 */
2166 	if (refcnt->ir_refcnt == 0) {
2167 		if (refcnt->ir_waiting == REF_WAIT_ASYNC) {
2168 			if (taskq_dispatch(idm.idm_global_taskq,
2169 			    &idm_refcnt_unref_task, refcnt, TQ_SLEEP) == NULL) {
2170 				cmn_err(CE_WARN,
2171 				    "idm_refcnt_rele: Couldn't dispatch task");
2172 			}
2173 		} else if (refcnt->ir_waiting == REF_WAIT_SYNC) {
2174 			cv_signal(&refcnt->ir_cv);
2175 		}
2176 	}
2177 	mutex_exit(&refcnt->ir_mutex);
2178 }
2179 
2180 void
2181 idm_refcnt_rele_and_destroy(idm_refcnt_t *refcnt, idm_refcnt_cb_t *cb_func)
2182 {
2183 	mutex_enter(&refcnt->ir_mutex);
2184 	ASSERT(refcnt->ir_refcnt > 0);
2185 	refcnt->ir_refcnt--;
2186 	REFCNT_AUDIT(refcnt);
2187 
2188 	/*
2189 	 * Someone is waiting for this object to go idle so check if
2190 	 * refcnt is 0.  Waiting on an object then later grabbing another
2191 	 * reference is not allowed so we don't need to handle that case.
2192 	 */
2193 	if (refcnt->ir_refcnt == 0) {
2194 		refcnt->ir_cb = cb_func;
2195 		refcnt->ir_waiting = REF_WAIT_ASYNC;
2196 		if (taskq_dispatch(idm.idm_global_taskq,
2197 		    &idm_refcnt_unref_task, refcnt, TQ_SLEEP) == NULL) {
2198 			cmn_err(CE_WARN,
2199 			    "idm_refcnt_rele: Couldn't dispatch task");
2200 		}
2201 	}
2202 	mutex_exit(&refcnt->ir_mutex);
2203 }
2204 
2205 void
2206 idm_refcnt_wait_ref(idm_refcnt_t *refcnt)
2207 {
2208 	mutex_enter(&refcnt->ir_mutex);
2209 	refcnt->ir_waiting = REF_WAIT_SYNC;
2210 	REFCNT_AUDIT(refcnt);
2211 	while (refcnt->ir_refcnt != 0)
2212 		cv_wait(&refcnt->ir_cv, &refcnt->ir_mutex);
2213 	mutex_exit(&refcnt->ir_mutex);
2214 }
2215 
2216 void
2217 idm_refcnt_async_wait_ref(idm_refcnt_t *refcnt, idm_refcnt_cb_t *cb_func)
2218 {
2219 	mutex_enter(&refcnt->ir_mutex);
2220 	refcnt->ir_waiting = REF_WAIT_ASYNC;
2221 	refcnt->ir_cb = cb_func;
2222 	REFCNT_AUDIT(refcnt);
2223 	/*
2224 	 * It's possible we don't have any references.  To make things easier
2225 	 * on the caller use a taskq to call the callback instead of
2226 	 * calling it synchronously
2227 	 */
2228 	if (refcnt->ir_refcnt == 0) {
2229 		if (taskq_dispatch(idm.idm_global_taskq,
2230 		    &idm_refcnt_unref_task, refcnt, TQ_SLEEP) == NULL) {
2231 			cmn_err(CE_WARN,
2232 			    "idm_refcnt_async_wait_ref: "
2233 			    "Couldn't dispatch task");
2234 		}
2235 	}
2236 	mutex_exit(&refcnt->ir_mutex);
2237 }
2238 
2239 void
2240 idm_refcnt_destroy_unref_obj(idm_refcnt_t *refcnt,
2241     idm_refcnt_cb_t *cb_func)
2242 {
2243 	mutex_enter(&refcnt->ir_mutex);
2244 	if (refcnt->ir_refcnt == 0) {
2245 		mutex_exit(&refcnt->ir_mutex);
2246 		(*cb_func)(refcnt->ir_referenced_obj);
2247 		return;
2248 	}
2249 	mutex_exit(&refcnt->ir_mutex);
2250 }
2251 
2252 void
2253 idm_conn_hold(idm_conn_t *ic)
2254 {
2255 	idm_refcnt_hold(&ic->ic_refcnt);
2256 }
2257 
2258 void
2259 idm_conn_rele(idm_conn_t *ic)
2260 {
2261 	idm_refcnt_rele(&ic->ic_refcnt);
2262 }
2263 
2264 
2265 static int
2266 _idm_init(void)
2267 {
2268 	/* Initialize the rwlock for the taskid table */
2269 	rw_init(&idm.idm_taskid_table_lock, NULL, RW_DRIVER, NULL);
2270 
2271 	/* Initialize the global mutex and taskq */
2272 	mutex_init(&idm.idm_global_mutex, NULL, MUTEX_DEFAULT, NULL);
2273 
2274 	cv_init(&idm.idm_tgt_svc_cv, NULL, CV_DEFAULT, NULL);
2275 	cv_init(&idm.idm_wd_cv, NULL, CV_DEFAULT, NULL);
2276 
2277 	/*
2278 	 * The maximum allocation needs to be high here since there can be
2279 	 * many concurrent tasks using the global taskq.
2280 	 */
2281 	idm.idm_global_taskq = taskq_create("idm_global_taskq", 1, minclsyspri,
2282 	    128, 16384, TASKQ_PREPOPULATE);
2283 	if (idm.idm_global_taskq == NULL) {
2284 		cv_destroy(&idm.idm_wd_cv);
2285 		cv_destroy(&idm.idm_tgt_svc_cv);
2286 		mutex_destroy(&idm.idm_global_mutex);
2287 		rw_destroy(&idm.idm_taskid_table_lock);
2288 		return (ENOMEM);
2289 	}
2290 
2291 	/* Start watchdog thread */
2292 	idm.idm_wd_thread = thread_create(NULL, 0,
2293 	    idm_wd_thread, NULL, 0, &p0, TS_RUN, minclsyspri);
2294 	if (idm.idm_wd_thread == NULL) {
2295 		/* Couldn't create the watchdog thread */
2296 		taskq_destroy(idm.idm_global_taskq);
2297 		cv_destroy(&idm.idm_wd_cv);
2298 		cv_destroy(&idm.idm_tgt_svc_cv);
2299 		mutex_destroy(&idm.idm_global_mutex);
2300 		rw_destroy(&idm.idm_taskid_table_lock);
2301 		return (ENOMEM);
2302 	}
2303 
2304 	/* Pause until the watchdog thread is running */
2305 	mutex_enter(&idm.idm_global_mutex);
2306 	while (!idm.idm_wd_thread_running)
2307 		cv_wait(&idm.idm_wd_cv, &idm.idm_global_mutex);
2308 	mutex_exit(&idm.idm_global_mutex);
2309 
2310 	/*
2311 	 * Allocate the task ID table and set "next" to 0.
2312 	 */
2313 
2314 	idm.idm_taskid_max = idm_max_taskids;
2315 	idm.idm_taskid_table = (idm_task_t **)
2316 	    kmem_zalloc(idm.idm_taskid_max * sizeof (idm_task_t *), KM_SLEEP);
2317 	idm.idm_taskid_next = 0;
2318 
2319 	/* Create the global buffer and task kmem caches */
2320 	idm.idm_buf_cache = kmem_cache_create("idm_buf_cache",
2321 	    sizeof (idm_buf_t), 8, NULL, NULL, NULL, NULL, NULL, KM_SLEEP);
2322 
2323 	/*
2324 	 * Note, we're explicitly allocating an additional iSER header-
2325 	 * sized chunk for each of these elements. See idm_task_constructor().
2326 	 */
2327 	idm.idm_task_cache = kmem_cache_create("idm_task_cache",
2328 	    sizeof (idm_task_t) + IDM_TRANSPORT_HEADER_LENGTH, 8,
2329 	    &idm_task_constructor, &idm_task_destructor,
2330 	    NULL, NULL, NULL, KM_SLEEP);
2331 
2332 	/* Create the service and connection context lists */
2333 	list_create(&idm.idm_tgt_svc_list, sizeof (idm_svc_t),
2334 	    offsetof(idm_svc_t, is_list_node));
2335 	list_create(&idm.idm_tgt_conn_list, sizeof (idm_conn_t),
2336 	    offsetof(idm_conn_t, ic_list_node));
2337 	list_create(&idm.idm_ini_conn_list, sizeof (idm_conn_t),
2338 	    offsetof(idm_conn_t, ic_list_node));
2339 
2340 	/* Initialize the native sockets transport */
2341 	idm_so_init(&idm_transport_list[IDM_TRANSPORT_TYPE_SOCKETS]);
2342 
2343 	/* Create connection ID pool */
2344 	(void) idm_idpool_create(&idm.idm_conn_id_pool);
2345 
2346 	return (DDI_SUCCESS);
2347 }
2348 
2349 static int
2350 _idm_fini(void)
2351 {
2352 	if (!list_is_empty(&idm.idm_ini_conn_list) ||
2353 	    !list_is_empty(&idm.idm_tgt_conn_list) ||
2354 	    !list_is_empty(&idm.idm_tgt_svc_list)) {
2355 		return (EBUSY);
2356 	}
2357 
2358 	mutex_enter(&idm.idm_global_mutex);
2359 	idm.idm_wd_thread_running = B_FALSE;
2360 	cv_signal(&idm.idm_wd_cv);
2361 	mutex_exit(&idm.idm_global_mutex);
2362 
2363 	thread_join(idm.idm_wd_thread_did);
2364 
2365 	idm_idpool_destroy(&idm.idm_conn_id_pool);
2366 
2367 	/* Close any LDI handles we have open on transport drivers */
2368 	mutex_enter(&idm.idm_global_mutex);
2369 	idm_transport_teardown();
2370 	mutex_exit(&idm.idm_global_mutex);
2371 
2372 	/* Teardown the native sockets transport */
2373 	idm_so_fini();
2374 
2375 	list_destroy(&idm.idm_ini_conn_list);
2376 	list_destroy(&idm.idm_tgt_conn_list);
2377 	list_destroy(&idm.idm_tgt_svc_list);
2378 	kmem_cache_destroy(idm.idm_task_cache);
2379 	kmem_cache_destroy(idm.idm_buf_cache);
2380 	kmem_free(idm.idm_taskid_table,
2381 	    idm.idm_taskid_max * sizeof (idm_task_t *));
2382 	mutex_destroy(&idm.idm_global_mutex);
2383 	cv_destroy(&idm.idm_wd_cv);
2384 	cv_destroy(&idm.idm_tgt_svc_cv);
2385 	rw_destroy(&idm.idm_taskid_table_lock);
2386 
2387 	return (0);
2388 }
2389