xref: /illumos-gate/usr/src/uts/common/io/idm/idm_so.c (revision f99db78f)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 /*
26  * Copyright (c) 2013 by Delphix. All rights reserved.
27  */
28 
29 #include <sys/conf.h>
30 #include <sys/stat.h>
31 #include <sys/file.h>
32 #include <sys/ddi.h>
33 #include <sys/sunddi.h>
34 #include <sys/modctl.h>
35 #include <sys/priv.h>
36 #include <sys/cpuvar.h>
37 #include <sys/socket.h>
38 #include <sys/strsubr.h>
39 #include <sys/sysmacros.h>
40 #include <sys/sdt.h>
41 #include <netinet/tcp.h>
42 #include <inet/tcp.h>
43 #include <sys/socketvar.h>
44 #include <sys/pathname.h>
45 #include <sys/fs/snode.h>
46 #include <sys/fs/dv_node.h>
47 #include <sys/vnode.h>
48 #include <netinet/in.h>
49 #include <net/if.h>
50 #include <sys/sockio.h>
51 #include <sys/ksocket.h>
52 #include <sys/filio.h>		/* FIONBIO */
53 #include <sys/iscsi_protocol.h>
54 #include <sys/idm/idm.h>
55 #include <sys/idm/idm_so.h>
56 #include <sys/idm/idm_text.h>
57 
58 #define	IN_PROGRESS_DELAY	1
59 
60 /*
61  * in6addr_any is currently all zeroes, but use the macro in case this
62  * ever changes.
63  */
64 static const struct in6_addr in6addr_any = IN6ADDR_ANY_INIT;
65 
66 static void idm_sorx_cache_pdu_cb(idm_pdu_t *pdu, idm_status_t status);
67 static void idm_sorx_addl_pdu_cb(idm_pdu_t *pdu, idm_status_t status);
68 static void idm_sotx_cache_pdu_cb(idm_pdu_t *pdu, idm_status_t status);
69 
70 static idm_status_t idm_so_conn_create_common(idm_conn_t *ic, ksocket_t new_so);
71 static void idm_so_conn_destroy_common(idm_conn_t *ic);
72 static void idm_so_conn_connect_common(idm_conn_t *ic);
73 
74 static void idm_set_ini_preconnect_options(idm_so_conn_t *sc,
75     boolean_t boot_conn);
76 static void idm_set_postconnect_options(ksocket_t so);
77 static idm_status_t idm_i_so_tx(idm_pdu_t *pdu);
78 
79 static idm_status_t idm_sorecvdata(idm_conn_t *ic, idm_pdu_t *pdu);
80 static void idm_so_send_rtt_data(idm_conn_t *ic, idm_task_t *idt,
81     idm_buf_t *idb, uint32_t offset, uint32_t length);
82 static void idm_so_send_rtt_data_done(idm_task_t *idt, idm_buf_t *idb);
83 static idm_status_t idm_so_send_buf_region(idm_task_t *idt,
84     idm_buf_t *idb, uint32_t buf_region_offset, uint32_t buf_region_length);
85 
86 static uint32_t idm_fill_iov(idm_pdu_t *pdu, idm_buf_t *idb,
87     uint32_t ro, uint32_t dlength);
88 
89 static idm_status_t idm_so_handle_digest(idm_conn_t *it,
90     nvpair_t *digest_choice, const idm_kv_xlate_t *ikvx);
91 
92 static void idm_so_socket_set_nonblock(struct sonode *node);
93 static void idm_so_socket_set_block(struct sonode *node);
94 
95 /*
96  * Transport ops prototypes
97  */
98 static void idm_so_tx(idm_conn_t *ic, idm_pdu_t *pdu);
99 static idm_status_t idm_so_buf_tx_to_ini(idm_task_t *idt, idm_buf_t *idb);
100 static idm_status_t idm_so_buf_rx_from_ini(idm_task_t *idt, idm_buf_t *idb);
101 static void idm_so_rx_datain(idm_conn_t *ic, idm_pdu_t *pdu);
102 static void idm_so_rx_rtt(idm_conn_t *ic, idm_pdu_t *pdu);
103 static void idm_so_rx_dataout(idm_conn_t *ic, idm_pdu_t *pdu);
104 static idm_status_t idm_so_free_task_rsrc(idm_task_t *idt);
105 static kv_status_t idm_so_negotiate_key_values(idm_conn_t *it,
106     nvlist_t *request_nvl, nvlist_t *response_nvl, nvlist_t *negotiated_nvl);
107 static void idm_so_notice_key_values(idm_conn_t *it,
108     nvlist_t *negotiated_nvl);
109 static kv_status_t idm_so_declare_key_values(idm_conn_t *it,
110     nvlist_t *config_nvl, nvlist_t *outgoing_nvl);
111 static boolean_t idm_so_conn_is_capable(idm_conn_req_t *ic,
112     idm_transport_caps_t *caps);
113 static idm_status_t idm_so_buf_alloc(idm_buf_t *idb, uint64_t buflen);
114 static void idm_so_buf_free(idm_buf_t *idb);
115 static idm_status_t idm_so_buf_setup(idm_buf_t *idb);
116 static void idm_so_buf_teardown(idm_buf_t *idb);
117 static idm_status_t idm_so_tgt_svc_create(idm_svc_req_t *sr, idm_svc_t *is);
118 static void idm_so_tgt_svc_destroy(idm_svc_t *is);
119 static idm_status_t idm_so_tgt_svc_online(idm_svc_t *is);
120 static void idm_so_tgt_svc_offline(idm_svc_t *is);
121 static void idm_so_tgt_conn_destroy(idm_conn_t *ic);
122 static idm_status_t idm_so_tgt_conn_connect(idm_conn_t *ic);
123 static void idm_so_conn_disconnect(idm_conn_t *ic);
124 static idm_status_t idm_so_ini_conn_create(idm_conn_req_t *cr, idm_conn_t *ic);
125 static void idm_so_ini_conn_destroy(idm_conn_t *ic);
126 static idm_status_t idm_so_ini_conn_connect(idm_conn_t *ic);
127 
128 /*
129  * IDM Native Sockets transport operations
130  */
131 static
132 idm_transport_ops_t idm_so_transport_ops = {
133 	idm_so_tx,			/* it_tx_pdu */
134 	idm_so_buf_tx_to_ini,		/* it_buf_tx_to_ini */
135 	idm_so_buf_rx_from_ini,		/* it_buf_rx_from_ini */
136 	idm_so_rx_datain,		/* it_rx_datain */
137 	idm_so_rx_rtt,			/* it_rx_rtt */
138 	idm_so_rx_dataout,		/* it_rx_dataout */
139 	NULL,				/* it_alloc_conn_rsrc */
140 	NULL,				/* it_free_conn_rsrc */
141 	NULL,				/* it_tgt_enable_datamover */
142 	NULL,				/* it_ini_enable_datamover */
143 	NULL,				/* it_conn_terminate */
144 	idm_so_free_task_rsrc,		/* it_free_task_rsrc */
145 	idm_so_negotiate_key_values,	/* it_negotiate_key_values */
146 	idm_so_notice_key_values,	/* it_notice_key_values */
147 	idm_so_conn_is_capable,		/* it_conn_is_capable */
148 	idm_so_buf_alloc,		/* it_buf_alloc */
149 	idm_so_buf_free,		/* it_buf_free */
150 	idm_so_buf_setup,		/* it_buf_setup */
151 	idm_so_buf_teardown,		/* it_buf_teardown */
152 	idm_so_tgt_svc_create,		/* it_tgt_svc_create */
153 	idm_so_tgt_svc_destroy,		/* it_tgt_svc_destroy */
154 	idm_so_tgt_svc_online,		/* it_tgt_svc_online */
155 	idm_so_tgt_svc_offline,		/* it_tgt_svc_offline */
156 	idm_so_tgt_conn_destroy,	/* it_tgt_conn_destroy */
157 	idm_so_tgt_conn_connect,	/* it_tgt_conn_connect */
158 	idm_so_conn_disconnect,		/* it_tgt_conn_disconnect */
159 	idm_so_ini_conn_create,		/* it_ini_conn_create */
160 	idm_so_ini_conn_destroy,	/* it_ini_conn_destroy */
161 	idm_so_ini_conn_connect,	/* it_ini_conn_connect */
162 	idm_so_conn_disconnect,		/* it_ini_conn_disconnect */
163 	idm_so_declare_key_values	/* it_declare_key_values */
164 };
165 
166 kmutex_t	idm_so_timed_socket_mutex;
167 
168 int32_t idm_so_sndbuf = IDM_SNDBUF_SIZE;
169 int32_t idm_so_rcvbuf = IDM_RCVBUF_SIZE;
170 
171 /*
172  * idm_so_init()
173  * Sockets transport initialization
174  */
175 void
176 idm_so_init(idm_transport_t *it)
177 {
178 	/* Cache for IDM Data and R2T Transmit PDU's */
179 	idm.idm_sotx_pdu_cache = kmem_cache_create("idm_tx_pdu_cache",
180 	    sizeof (idm_pdu_t) + sizeof (iscsi_hdr_t), 8,
181 	    &idm_sotx_pdu_constructor, NULL, NULL, NULL, NULL, KM_SLEEP);
182 
183 	/* Cache for IDM Receive PDU's */
184 	idm.idm_sorx_pdu_cache = kmem_cache_create("idm_rx_pdu_cache",
185 	    sizeof (idm_pdu_t) + IDM_SORX_CACHE_HDRLEN, 8,
186 	    &idm_sorx_pdu_constructor, NULL, NULL, NULL, NULL, KM_SLEEP);
187 
188 	/* 128k buffer cache */
189 	idm.idm_so_128k_buf_cache = kmem_cache_create("idm_128k_buf_cache",
190 	    IDM_SO_BUF_CACHE_UB, 8, NULL, NULL, NULL, NULL, NULL, KM_SLEEP);
191 
192 	/* Set the sockets transport ops */
193 	it->it_ops = &idm_so_transport_ops;
194 
195 	mutex_init(&idm_so_timed_socket_mutex, NULL, MUTEX_DEFAULT, NULL);
196 
197 }
198 
199 /*
200  * idm_so_fini()
201  * Sockets transport teardown
202  */
203 void
204 idm_so_fini(void)
205 {
206 	kmem_cache_destroy(idm.idm_so_128k_buf_cache);
207 	kmem_cache_destroy(idm.idm_sotx_pdu_cache);
208 	kmem_cache_destroy(idm.idm_sorx_pdu_cache);
209 	mutex_destroy(&idm_so_timed_socket_mutex);
210 }
211 
212 ksocket_t
213 idm_socreate(int domain, int type, int protocol)
214 {
215 	ksocket_t ks;
216 
217 	if (!ksocket_socket(&ks, domain, type, protocol, KSOCKET_NOSLEEP,
218 	    CRED())) {
219 		return (ks);
220 	} else {
221 		return (NULL);
222 	}
223 }
224 
225 /*
226  * idm_soshutdown will disconnect the socket and prevent subsequent PDU
227  * reception and transmission.  The sonode still exists but its state
228  * gets modified to indicate it is no longer connected.  Calls to
229  * idm_sorecv/idm_iov_sorecv will return so idm_soshutdown can be used
230  * regain control of a thread stuck in idm_sorecv.
231  */
232 void
233 idm_soshutdown(ksocket_t so)
234 {
235 	(void) ksocket_shutdown(so, SHUT_RDWR, CRED());
236 }
237 
238 /*
239  * idm_sodestroy releases all resources associated with a socket previously
240  * created with idm_socreate.  The socket must be shutdown using
241  * idm_soshutdown before the socket is destroyed with idm_sodestroy,
242  * otherwise undefined behavior will result.
243  */
244 void
245 idm_sodestroy(ksocket_t ks)
246 {
247 	(void) ksocket_close(ks, CRED());
248 }
249 
250 /*
251  * Function to compare two addresses in sockaddr_storage format
252  */
253 
254 int
255 idm_ss_compare(const struct sockaddr_storage *cmp_ss1,
256     const struct sockaddr_storage *cmp_ss2,
257     boolean_t v4_mapped_as_v4,
258     boolean_t compare_ports)
259 {
260 	struct sockaddr_storage			mapped_v4_ss1, mapped_v4_ss2;
261 	const struct sockaddr_storage		*ss1, *ss2;
262 	struct in_addr				*in1, *in2;
263 	struct in6_addr				*in61, *in62;
264 	int i;
265 
266 	/*
267 	 * Normalize V4-mapped IPv6 addresses into V4 format if
268 	 * v4_mapped_as_v4 is B_TRUE.
269 	 */
270 	ss1 = cmp_ss1;
271 	ss2 = cmp_ss2;
272 	if (v4_mapped_as_v4 && (ss1->ss_family == AF_INET6)) {
273 		in61 = &((struct sockaddr_in6 *)ss1)->sin6_addr;
274 		if (IN6_IS_ADDR_V4MAPPED(in61)) {
275 			bzero(&mapped_v4_ss1, sizeof (mapped_v4_ss1));
276 			mapped_v4_ss1.ss_family = AF_INET;
277 			((struct sockaddr_in *)&mapped_v4_ss1)->sin_port =
278 			    ((struct sockaddr_in *)ss1)->sin_port;
279 			IN6_V4MAPPED_TO_INADDR(in61,
280 			    &((struct sockaddr_in *)&mapped_v4_ss1)->sin_addr);
281 			ss1 = &mapped_v4_ss1;
282 		}
283 	}
284 	ss2 = cmp_ss2;
285 	if (v4_mapped_as_v4 && (ss2->ss_family == AF_INET6)) {
286 		in62 = &((struct sockaddr_in6 *)ss2)->sin6_addr;
287 		if (IN6_IS_ADDR_V4MAPPED(in62)) {
288 			bzero(&mapped_v4_ss2, sizeof (mapped_v4_ss2));
289 			mapped_v4_ss2.ss_family = AF_INET;
290 			((struct sockaddr_in *)&mapped_v4_ss2)->sin_port =
291 			    ((struct sockaddr_in *)ss2)->sin_port;
292 			IN6_V4MAPPED_TO_INADDR(in62,
293 			    &((struct sockaddr_in *)&mapped_v4_ss2)->sin_addr);
294 			ss2 = &mapped_v4_ss2;
295 		}
296 	}
297 
298 	/*
299 	 * Compare ports, then address family, then ip address
300 	 */
301 	if (compare_ports &&
302 	    (((struct sockaddr_in *)ss1)->sin_port !=
303 	    ((struct sockaddr_in *)ss2)->sin_port)) {
304 		if (((struct sockaddr_in *)ss1)->sin_port >
305 		    ((struct sockaddr_in *)ss2)->sin_port)
306 			return (1);
307 		else
308 			return (-1);
309 	}
310 
311 	/*
312 	 * ports are the same
313 	 */
314 	if (ss1->ss_family != ss2->ss_family) {
315 		if (ss1->ss_family == AF_INET)
316 			return (1);
317 		else
318 			return (-1);
319 	}
320 
321 	/*
322 	 * address families are the same
323 	 */
324 	if (ss1->ss_family == AF_INET) {
325 		in1 = &((struct sockaddr_in *)ss1)->sin_addr;
326 		in2 = &((struct sockaddr_in *)ss2)->sin_addr;
327 
328 		if (in1->s_addr > in2->s_addr)
329 			return (1);
330 		else if (in1->s_addr < in2->s_addr)
331 			return (-1);
332 		else
333 			return (0);
334 	} else if (ss1->ss_family == AF_INET6) {
335 		in61 = &((struct sockaddr_in6 *)ss1)->sin6_addr;
336 		in62 = &((struct sockaddr_in6 *)ss2)->sin6_addr;
337 
338 		for (i = 0; i < 4; i++) {
339 			if (in61->s6_addr32[i] > in62->s6_addr32[i])
340 				return (1);
341 			else if (in61->s6_addr32[i] < in62->s6_addr32[i])
342 				return (-1);
343 		}
344 		return (0);
345 	}
346 
347 	return (1);
348 }
349 
350 /*
351  * IP address filter functions to flag addresses that should not
352  * go out to initiators through discovery.
353  */
354 static boolean_t
355 idm_v4_addr_okay(struct in_addr *in_addr)
356 {
357 	in_addr_t addr = ntohl(in_addr->s_addr);
358 
359 	if ((INADDR_NONE == addr) ||
360 	    (IN_MULTICAST(addr)) ||
361 	    ((addr >> IN_CLASSA_NSHIFT) == 0) ||
362 	    ((addr >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET)) {
363 		return (B_FALSE);
364 	}
365 	return (B_TRUE);
366 }
367 
368 static boolean_t
369 idm_v6_addr_okay(struct in6_addr *addr6)
370 {
371 
372 	if ((IN6_IS_ADDR_UNSPECIFIED(addr6)) ||
373 	    (IN6_IS_ADDR_LOOPBACK(addr6)) ||
374 	    (IN6_IS_ADDR_MULTICAST(addr6)) ||
375 	    (IN6_IS_ADDR_V4MAPPED(addr6)) ||
376 	    (IN6_IS_ADDR_V4COMPAT(addr6)) ||
377 	    (IN6_IS_ADDR_LINKLOCAL(addr6))) {
378 		return (B_FALSE);
379 	}
380 	return (B_TRUE);
381 }
382 
383 /*
384  * idm_get_ipaddr will retrieve a list of IP Addresses which the host is
385  * configured with by sending down a sequence of kernel ioctl to IP STREAMS.
386  */
387 int
388 idm_get_ipaddr(idm_addr_list_t **ipaddr_p)
389 {
390 	ksocket_t 		so4, so6;
391 	struct lifnum		lifn;
392 	struct lifconf		lifc;
393 	struct lifreq		*lp;
394 	int			rval;
395 	int			numifs;
396 	int			bufsize;
397 	void			*buf;
398 	int			i, j, n, rc;
399 	struct sockaddr_storage	ss;
400 	struct sockaddr_in	*sin;
401 	struct sockaddr_in6	*sin6;
402 	idm_addr_t		*ip;
403 	idm_addr_list_t		*ipaddr = NULL;
404 	int			size_ipaddr;
405 
406 	*ipaddr_p = NULL;
407 	size_ipaddr = 0;
408 	buf = NULL;
409 
410 	/* create an ipv4 and ipv6 UDP socket */
411 	if ((so6 = idm_socreate(PF_INET6, SOCK_DGRAM, 0)) == NULL)
412 		return (0);
413 	if ((so4 = idm_socreate(PF_INET, SOCK_DGRAM, 0)) == NULL) {
414 		idm_sodestroy(so6);
415 		return (0);
416 	}
417 
418 
419 retry_count:
420 	/* snapshot the current number of interfaces */
421 	lifn.lifn_family = PF_UNSPEC;
422 	lifn.lifn_flags = LIFC_NOXMIT | LIFC_TEMPORARY | LIFC_ALLZONES;
423 	lifn.lifn_count = 0;
424 	/* use vp6 for ioctls with unspecified families by default */
425 	if (ksocket_ioctl(so6, SIOCGLIFNUM, (intptr_t)&lifn, &rval, CRED())
426 	    != 0) {
427 		goto cleanup;
428 	}
429 
430 	numifs = lifn.lifn_count;
431 	if (numifs <= 0) {
432 		goto cleanup;
433 	}
434 
435 	/* allocate extra room in case more interfaces appear */
436 	numifs += 10;
437 
438 	/* get the interface names and ip addresses */
439 	bufsize = numifs * sizeof (struct lifreq);
440 	buf = kmem_alloc(bufsize, KM_SLEEP);
441 
442 	lifc.lifc_family = AF_UNSPEC;
443 	lifc.lifc_flags = LIFC_NOXMIT | LIFC_TEMPORARY | LIFC_ALLZONES;
444 	lifc.lifc_len = bufsize;
445 	lifc.lifc_buf = buf;
446 	rc = ksocket_ioctl(so6, SIOCGLIFCONF, (intptr_t)&lifc, &rval, CRED());
447 	if (rc != 0) {
448 		goto cleanup;
449 	}
450 	/* if our extra room is used up, try again */
451 	if (bufsize <= lifc.lifc_len) {
452 		kmem_free(buf, bufsize);
453 		buf = NULL;
454 		goto retry_count;
455 	}
456 	/* calc actual number of ifconfs */
457 	n = lifc.lifc_len / sizeof (struct lifreq);
458 
459 	/* get ip address */
460 	if (n > 0) {
461 		size_ipaddr = sizeof (idm_addr_list_t) +
462 		    (n - 1) * sizeof (idm_addr_t);
463 		ipaddr = kmem_zalloc(size_ipaddr, KM_SLEEP);
464 	} else {
465 		goto cleanup;
466 	}
467 
468 	/*
469 	 * Examine the array of interfaces and filter uninteresting ones
470 	 */
471 	for (i = 0, j = 0, lp = lifc.lifc_req; i < n; i++, lp++) {
472 
473 		/*
474 		 * Copy the address as the SIOCGLIFFLAGS ioctl is destructive
475 		 */
476 		ss = lp->lifr_addr;
477 		/*
478 		 * fetch the flags using the socket of the correct family
479 		 */
480 		switch (ss.ss_family) {
481 		case AF_INET:
482 			rc = ksocket_ioctl(so4, SIOCGLIFFLAGS, (intptr_t)lp,
483 			    &rval, CRED());
484 			break;
485 		case AF_INET6:
486 			rc = ksocket_ioctl(so6, SIOCGLIFFLAGS, (intptr_t)lp,
487 			    &rval, CRED());
488 			break;
489 		default:
490 			continue;
491 		}
492 		if (rc == 0) {
493 			/*
494 			 * If we got the flags, skip uninteresting
495 			 * interfaces based on flags
496 			 */
497 			if ((lp->lifr_flags & IFF_UP) != IFF_UP)
498 				continue;
499 			if (lp->lifr_flags &
500 			    (IFF_ANYCAST|IFF_NOLOCAL|IFF_DEPRECATED))
501 				continue;
502 		}
503 
504 		/* save ip address */
505 		ip = &ipaddr->al_addrs[j];
506 		switch (ss.ss_family) {
507 		case AF_INET:
508 			sin = (struct sockaddr_in *)&ss;
509 			if (!idm_v4_addr_okay(&sin->sin_addr))
510 				continue;
511 			ip->a_addr.i_addr.in4 = sin->sin_addr;
512 			ip->a_addr.i_insize = sizeof (struct in_addr);
513 			break;
514 		case AF_INET6:
515 			sin6 = (struct sockaddr_in6 *)&ss;
516 			if (!idm_v6_addr_okay(&sin6->sin6_addr))
517 				continue;
518 			ip->a_addr.i_addr.in6 = sin6->sin6_addr;
519 			ip->a_addr.i_insize = sizeof (struct in6_addr);
520 			break;
521 		default:
522 			continue;
523 		}
524 		j++;
525 	}
526 
527 	if (j == 0) {
528 		/* no valid ifaddr */
529 		kmem_free(ipaddr, size_ipaddr);
530 		size_ipaddr = 0;
531 		ipaddr = NULL;
532 	} else {
533 		ipaddr->al_out_cnt = j;
534 	}
535 
536 
537 cleanup:
538 	idm_sodestroy(so6);
539 	idm_sodestroy(so4);
540 
541 	if (buf != NULL)
542 		kmem_free(buf, bufsize);
543 
544 	*ipaddr_p = ipaddr;
545 	return (size_ipaddr);
546 }
547 
548 int
549 idm_sorecv(ksocket_t so, void *msg, size_t len)
550 {
551 	iovec_t iov;
552 
553 	ASSERT(so != NULL);
554 	ASSERT(len != 0);
555 
556 	/*
557 	 * Fill in iovec and receive data
558 	 */
559 	iov.iov_base = msg;
560 	iov.iov_len = len;
561 
562 	return (idm_iov_sorecv(so, &iov, 1, len));
563 }
564 
565 /*
566  * idm_sosendto - Sends a buffered data on a non-connected socket.
567  *
568  * This function puts the data provided on the wire by calling sosendmsg.
569  * It will return only when all the data has been sent or if an error
570  * occurs.
571  *
572  * Returns 0 for success, the socket errno value if sosendmsg fails, and
573  * -1 if sosendmsg returns success but uio_resid != 0
574  */
575 int
576 idm_sosendto(ksocket_t so, void *buff, size_t len,
577     struct sockaddr *name, socklen_t namelen)
578 {
579 	struct msghdr		msg;
580 	struct iovec		iov[1];
581 	int			error;
582 	size_t			sent = 0;
583 
584 	iov[0].iov_base	= buff;
585 	iov[0].iov_len	= len;
586 
587 	/* Initialization of the message header. */
588 	bzero(&msg, sizeof (msg));
589 	msg.msg_iov	= iov;
590 	msg.msg_iovlen	= 1;
591 	msg.msg_name	= name;
592 	msg.msg_namelen	= namelen;
593 
594 	if ((error = ksocket_sendmsg(so, &msg, 0, &sent, CRED())) == 0) {
595 		/* Data sent */
596 		if (sent == len) {
597 			/* All data sent.  Success. */
598 			return (0);
599 		} else {
600 			/* Not all data was sent.  Failure */
601 			return (-1);
602 		}
603 	}
604 
605 	/* Send failed */
606 	return (error);
607 }
608 
609 /*
610  * idm_iov_sosend - Sends an iovec on a connection.
611  *
612  * This function puts the data provided on the wire by calling sosendmsg.
613  * It will return only when all the data has been sent or if an error
614  * occurs.
615  *
616  * Returns 0 for success, the socket errno value if sosendmsg fails, and
617  * -1 if sosendmsg returns success but uio_resid != 0
618  */
619 int
620 idm_iov_sosend(ksocket_t so, iovec_t *iop, int iovlen, size_t total_len)
621 {
622 	struct msghdr		msg;
623 	int			error;
624 	size_t 			sent = 0;
625 
626 	ASSERT(iop != NULL);
627 
628 	/* Initialization of the message header. */
629 	bzero(&msg, sizeof (msg));
630 	msg.msg_iov	= iop;
631 	msg.msg_iovlen	= iovlen;
632 
633 	if ((error = ksocket_sendmsg(so, &msg, 0, &sent, CRED()))
634 	    == 0) {
635 		/* Data sent */
636 		if (sent == total_len) {
637 			/* All data sent.  Success. */
638 			return (0);
639 		} else {
640 			/* Not all data was sent.  Failure */
641 			return (-1);
642 		}
643 	}
644 
645 	/* Send failed */
646 	return (error);
647 }
648 
649 /*
650  * idm_iov_sorecv - Receives an iovec from a connection
651  *
652  * This function gets the data asked for from the socket.  It will return
653  * only when all the requested data has been retrieved or if an error
654  * occurs.
655  *
656  * Returns 0 for success, the socket errno value if sorecvmsg fails, and
657  * -1 if sorecvmsg returns success but uio_resid != 0
658  */
659 int
660 idm_iov_sorecv(ksocket_t so, iovec_t *iop, int iovlen, size_t total_len)
661 {
662 	struct msghdr		msg;
663 	int			error;
664 	size_t			recv;
665 	int 			flags;
666 
667 	ASSERT(iop != NULL);
668 
669 	/* Initialization of the message header. */
670 	bzero(&msg, sizeof (msg));
671 	msg.msg_iov	= iop;
672 	msg.msg_iovlen	= iovlen;
673 	flags		= MSG_WAITALL;
674 
675 	if ((error = ksocket_recvmsg(so, &msg, flags, &recv, CRED()))
676 	    == 0) {
677 		/* Received data */
678 		if (recv == total_len) {
679 			/* All requested data received.  Success */
680 			return (0);
681 		} else {
682 			/*
683 			 * Not all data was received.  The connection has
684 			 * probably failed.
685 			 */
686 			return (-1);
687 		}
688 	}
689 
690 	/* Receive failed */
691 	return (error);
692 }
693 
694 static void
695 idm_set_ini_preconnect_options(idm_so_conn_t *sc, boolean_t boot_conn)
696 {
697 	int	conn_abort = 10000;
698 	int	conn_notify = 2000;
699 	int	abort = 30000;
700 
701 	/* Pre-connect socket options */
702 	(void) ksocket_setsockopt(sc->ic_so, IPPROTO_TCP,
703 	    TCP_CONN_NOTIFY_THRESHOLD, (char *)&conn_notify, sizeof (int),
704 	    CRED());
705 	if (boot_conn == B_FALSE) {
706 		(void) ksocket_setsockopt(sc->ic_so, IPPROTO_TCP,
707 		    TCP_CONN_ABORT_THRESHOLD, (char *)&conn_abort, sizeof (int),
708 		    CRED());
709 		(void) ksocket_setsockopt(sc->ic_so, IPPROTO_TCP,
710 		    TCP_ABORT_THRESHOLD,
711 		    (char *)&abort, sizeof (int), CRED());
712 	}
713 }
714 
715 static void
716 idm_set_postconnect_options(ksocket_t ks)
717 {
718 	const int	on = 1;
719 
720 	/* Set connect options */
721 	(void) ksocket_setsockopt(ks, SOL_SOCKET, SO_RCVBUF,
722 	    (char *)&idm_so_rcvbuf, sizeof (idm_so_rcvbuf), CRED());
723 	(void) ksocket_setsockopt(ks, SOL_SOCKET, SO_SNDBUF,
724 	    (char *)&idm_so_sndbuf, sizeof (idm_so_sndbuf), CRED());
725 	(void) ksocket_setsockopt(ks, IPPROTO_TCP, TCP_NODELAY,
726 	    (char *)&on, sizeof (on), CRED());
727 }
728 
729 static uint32_t
730 n2h24(const uchar_t *ptr)
731 {
732 	return ((ptr[0] << 16) | (ptr[1] << 8) | ptr[2]);
733 }
734 
735 
736 static idm_status_t
737 idm_sorecvhdr(idm_conn_t *ic, idm_pdu_t *pdu)
738 {
739 	iscsi_hdr_t	*bhs;
740 	uint32_t	hdr_digest_crc;
741 	uint32_t	crc_calculated;
742 	void		*new_hdr;
743 	int		ahslen = 0;
744 	int		total_len = 0;
745 	int		iovlen = 0;
746 	struct iovec	iov[2];
747 	idm_so_conn_t	*so_conn;
748 	int		rc;
749 
750 	so_conn = ic->ic_transport_private;
751 
752 	/*
753 	 * Read BHS
754 	 */
755 	bhs = pdu->isp_hdr;
756 	rc = idm_sorecv(so_conn->ic_so, pdu->isp_hdr, sizeof (iscsi_hdr_t));
757 	if (rc != IDM_STATUS_SUCCESS) {
758 		return (IDM_STATUS_FAIL);
759 	}
760 
761 	/*
762 	 * Check actual AHS length against the amount available in the buffer
763 	 */
764 	pdu->isp_hdrlen = sizeof (iscsi_hdr_t) +
765 	    (bhs->hlength * sizeof (uint32_t));
766 	pdu->isp_datalen = n2h24(bhs->dlength);
767 	if (ic->ic_conn_type == CONN_TYPE_TGT &&
768 	    pdu->isp_datalen > ic->ic_conn_params.max_recv_dataseglen) {
769 		IDM_CONN_LOG(CE_WARN,
770 		    "idm_sorecvhdr: exceeded the max data segment length");
771 		return (IDM_STATUS_FAIL);
772 	}
773 	if (bhs->hlength > IDM_SORX_CACHE_AHSLEN) {
774 		/* Allocate a new header segment and change the callback */
775 		new_hdr = kmem_alloc(pdu->isp_hdrlen, KM_SLEEP);
776 		bcopy(pdu->isp_hdr, new_hdr, sizeof (iscsi_hdr_t));
777 		pdu->isp_hdr = new_hdr;
778 		pdu->isp_flags |= IDM_PDU_ADDL_HDR;
779 
780 		/*
781 		 * This callback will restore the expected values after
782 		 * the RX PDU has been processed.
783 		 */
784 		pdu->isp_callback = idm_sorx_addl_pdu_cb;
785 	}
786 
787 	/*
788 	 * Setup receipt of additional header and header digest (if enabled).
789 	 */
790 	if (bhs->hlength > 0) {
791 		iov[iovlen].iov_base = (caddr_t)(pdu->isp_hdr + 1);
792 		ahslen = pdu->isp_hdrlen - sizeof (iscsi_hdr_t);
793 		iov[iovlen].iov_len = ahslen;
794 		total_len += iov[iovlen].iov_len;
795 		iovlen++;
796 	}
797 
798 	if (ic->ic_conn_flags & IDM_CONN_HEADER_DIGEST) {
799 		iov[iovlen].iov_base = (caddr_t)&hdr_digest_crc;
800 		iov[iovlen].iov_len = sizeof (hdr_digest_crc);
801 		total_len += iov[iovlen].iov_len;
802 		iovlen++;
803 	}
804 
805 	if ((iovlen != 0) &&
806 	    (idm_iov_sorecv(so_conn->ic_so, &iov[0], iovlen,
807 	    total_len) != 0)) {
808 		return (IDM_STATUS_FAIL);
809 	}
810 
811 	/*
812 	 * Validate header digest if enabled
813 	 */
814 	if (ic->ic_conn_flags & IDM_CONN_HEADER_DIGEST) {
815 		crc_calculated = idm_crc32c(pdu->isp_hdr,
816 		    sizeof (iscsi_hdr_t) + ahslen);
817 		if (crc_calculated != hdr_digest_crc) {
818 			/* Invalid Header Digest */
819 			return (IDM_STATUS_HEADER_DIGEST);
820 		}
821 	}
822 
823 	return (0);
824 }
825 
826 /*
827  * idm_so_ini_conn_create()
828  * Allocate the sockets transport connection resources.
829  */
830 static idm_status_t
831 idm_so_ini_conn_create(idm_conn_req_t *cr, idm_conn_t *ic)
832 {
833 	ksocket_t	so;
834 	idm_so_conn_t	*so_conn;
835 	idm_status_t	idmrc;
836 
837 	so = idm_socreate(cr->cr_domain, cr->cr_type,
838 	    cr->cr_protocol);
839 	if (so == NULL) {
840 		return (IDM_STATUS_FAIL);
841 	}
842 
843 	/* Bind the socket if configured to do so */
844 	if (cr->cr_bound) {
845 		if (ksocket_bind(so, &cr->cr_bound_addr.sin,
846 		    SIZEOF_SOCKADDR(&cr->cr_bound_addr.sin), CRED()) != 0) {
847 			idm_sodestroy(so);
848 			return (IDM_STATUS_FAIL);
849 		}
850 	}
851 
852 	idmrc = idm_so_conn_create_common(ic, so);
853 	if (idmrc != IDM_STATUS_SUCCESS) {
854 		idm_soshutdown(so);
855 		idm_sodestroy(so);
856 		return (IDM_STATUS_FAIL);
857 	}
858 
859 	so_conn = ic->ic_transport_private;
860 	/* Set up socket options */
861 	idm_set_ini_preconnect_options(so_conn, cr->cr_boot_conn);
862 
863 	return (IDM_STATUS_SUCCESS);
864 }
865 
866 /*
867  * idm_so_ini_conn_destroy()
868  * Tear down the sockets transport connection resources.
869  */
870 static void
871 idm_so_ini_conn_destroy(idm_conn_t *ic)
872 {
873 	idm_so_conn_destroy_common(ic);
874 }
875 
876 /*
877  * idm_so_ini_conn_connect()
878  * Establish the connection referred to by the handle previously allocated via
879  * idm_so_ini_conn_create().
880  */
881 static idm_status_t
882 idm_so_ini_conn_connect(idm_conn_t *ic)
883 {
884 	idm_so_conn_t	*so_conn;
885 	struct sonode	*node = NULL;
886 	int 		rc;
887 	clock_t		lbolt, conn_login_max, conn_login_interval;
888 	boolean_t	nonblock;
889 
890 	so_conn = ic->ic_transport_private;
891 	nonblock = ic->ic_conn_params.nonblock_socket;
892 	conn_login_max = ic->ic_conn_params.conn_login_max;
893 	conn_login_interval = ddi_get_lbolt() +
894 	    SEC_TO_TICK(ic->ic_conn_params.conn_login_interval);
895 
896 	if (nonblock == B_TRUE) {
897 		node = ((struct sonode *)(so_conn->ic_so));
898 		/* Set to none block socket mode */
899 		idm_so_socket_set_nonblock(node);
900 		do {
901 			rc = ksocket_connect(so_conn->ic_so,
902 			    &ic->ic_ini_dst_addr.sin,
903 			    (SIZEOF_SOCKADDR(&ic->ic_ini_dst_addr.sin)),
904 			    CRED());
905 			if (rc == 0 || rc == EISCONN) {
906 				/* socket success or already success */
907 				rc = IDM_STATUS_SUCCESS;
908 				break;
909 			}
910 			if ((rc == ETIMEDOUT) || (rc == ECONNREFUSED) ||
911 			    (rc == ECONNRESET)) {
912 				/* socket connection timeout or refuse */
913 				break;
914 			}
915 			lbolt = ddi_get_lbolt();
916 			if (lbolt > conn_login_max) {
917 				/*
918 				 * Connection retry timeout,
919 				 * failed connect to target.
920 				 */
921 				break;
922 			}
923 			if (lbolt < conn_login_interval) {
924 				if ((rc == EINPROGRESS) || (rc == EALREADY)) {
925 					/* TCP connect still in progress */
926 					delay(SEC_TO_TICK(IN_PROGRESS_DELAY));
927 					continue;
928 				} else {
929 					delay(conn_login_interval - lbolt);
930 				}
931 			}
932 			conn_login_interval = ddi_get_lbolt() +
933 			    SEC_TO_TICK(ic->ic_conn_params.conn_login_interval);
934 		} while (rc != 0);
935 		/* resume to nonblock mode */
936 		if (rc == IDM_STATUS_SUCCESS) {
937 			idm_so_socket_set_block(node);
938 		}
939 	} else {
940 		rc = ksocket_connect(so_conn->ic_so, &ic->ic_ini_dst_addr.sin,
941 		    (SIZEOF_SOCKADDR(&ic->ic_ini_dst_addr.sin)), CRED());
942 	}
943 
944 	if (rc != 0) {
945 		idm_soshutdown(so_conn->ic_so);
946 		return (IDM_STATUS_FAIL);
947 	}
948 
949 	idm_so_conn_connect_common(ic);
950 
951 	idm_set_postconnect_options(so_conn->ic_so);
952 
953 	return (IDM_STATUS_SUCCESS);
954 }
955 
956 idm_status_t
957 idm_so_tgt_conn_create(idm_conn_t *ic, ksocket_t new_so)
958 {
959 	idm_status_t	idmrc;
960 
961 	idm_set_postconnect_options(new_so);
962 	idmrc = idm_so_conn_create_common(ic, new_so);
963 
964 	return (idmrc);
965 }
966 
967 static void
968 idm_so_tgt_conn_destroy(idm_conn_t *ic)
969 {
970 	idm_so_conn_destroy_common(ic);
971 }
972 
973 /*
974  * idm_so_tgt_conn_connect()
975  * Establish the connection in ic, passed from idm_tgt_conn_finish(), which
976  * is invoked from the SM as a result of an inbound connection request.
977  */
978 static idm_status_t
979 idm_so_tgt_conn_connect(idm_conn_t *ic)
980 {
981 	idm_so_conn_connect_common(ic);
982 
983 	return (IDM_STATUS_SUCCESS);
984 }
985 
986 static idm_status_t
987 idm_so_conn_create_common(idm_conn_t *ic, ksocket_t new_so)
988 {
989 	idm_so_conn_t	*so_conn;
990 
991 	so_conn = kmem_zalloc(sizeof (idm_so_conn_t), KM_SLEEP);
992 	so_conn->ic_so = new_so;
993 
994 	ic->ic_transport_private = so_conn;
995 	ic->ic_transport_hdrlen = 0;
996 
997 	/* Set the scoreboarding flag on this connection */
998 	ic->ic_conn_flags |= IDM_CONN_USE_SCOREBOARD;
999 	ic->ic_conn_params.max_recv_dataseglen =
1000 	    ISCSI_DEFAULT_MAX_RECV_SEG_LEN;
1001 	ic->ic_conn_params.max_xmit_dataseglen =
1002 	    ISCSI_DEFAULT_MAX_XMIT_SEG_LEN;
1003 
1004 	/*
1005 	 * Initialize tx thread mutex and list
1006 	 */
1007 	mutex_init(&so_conn->ic_tx_mutex, NULL, MUTEX_DEFAULT, NULL);
1008 	cv_init(&so_conn->ic_tx_cv, NULL, CV_DEFAULT, NULL);
1009 	list_create(&so_conn->ic_tx_list, sizeof (idm_pdu_t),
1010 	    offsetof(idm_pdu_t, idm_tx_link));
1011 
1012 	return (IDM_STATUS_SUCCESS);
1013 }
1014 
1015 static void
1016 idm_so_conn_destroy_common(idm_conn_t *ic)
1017 {
1018 	idm_so_conn_t	*so_conn = ic->ic_transport_private;
1019 
1020 	ic->ic_transport_private = NULL;
1021 	idm_sodestroy(so_conn->ic_so);
1022 	list_destroy(&so_conn->ic_tx_list);
1023 	mutex_destroy(&so_conn->ic_tx_mutex);
1024 	cv_destroy(&so_conn->ic_tx_cv);
1025 
1026 	kmem_free(so_conn, sizeof (idm_so_conn_t));
1027 }
1028 
1029 static void
1030 idm_so_conn_connect_common(idm_conn_t *ic)
1031 {
1032 	idm_so_conn_t	*so_conn;
1033 	struct sockaddr_in6	t_addr;
1034 	socklen_t	t_addrlen = 0;
1035 
1036 	so_conn = ic->ic_transport_private;
1037 	bzero(&t_addr, sizeof (struct sockaddr_in6));
1038 	t_addrlen = sizeof (struct sockaddr_in6);
1039 
1040 	/* Set the local and remote addresses in the idm conn handle */
1041 	(void) ksocket_getsockname(so_conn->ic_so, (struct sockaddr *)&t_addr,
1042 	    &t_addrlen, CRED());
1043 	bcopy(&t_addr, &ic->ic_laddr, t_addrlen);
1044 	(void) ksocket_getpeername(so_conn->ic_so, (struct sockaddr *)&t_addr,
1045 	    &t_addrlen, CRED());
1046 	bcopy(&t_addr, &ic->ic_raddr, t_addrlen);
1047 
1048 	mutex_enter(&ic->ic_mutex);
1049 	so_conn->ic_tx_thread = thread_create(NULL, 0, idm_sotx_thread, ic, 0,
1050 	    &p0, TS_RUN, minclsyspri);
1051 	so_conn->ic_rx_thread = thread_create(NULL, 0, idm_sorx_thread, ic, 0,
1052 	    &p0, TS_RUN, minclsyspri);
1053 
1054 	while (so_conn->ic_rx_thread_did == 0 ||
1055 	    so_conn->ic_tx_thread_did == 0)
1056 		cv_wait(&ic->ic_cv, &ic->ic_mutex);
1057 	mutex_exit(&ic->ic_mutex);
1058 }
1059 
1060 /*
1061  * idm_so_conn_disconnect()
1062  * Shutdown the socket connection and stop the thread
1063  */
1064 static void
1065 idm_so_conn_disconnect(idm_conn_t *ic)
1066 {
1067 	idm_so_conn_t	*so_conn;
1068 
1069 	so_conn = ic->ic_transport_private;
1070 
1071 	mutex_enter(&ic->ic_mutex);
1072 	so_conn->ic_rx_thread_running = B_FALSE;
1073 	so_conn->ic_tx_thread_running = B_FALSE;
1074 	/* We need to wakeup the TX thread */
1075 	mutex_enter(&so_conn->ic_tx_mutex);
1076 	cv_signal(&so_conn->ic_tx_cv);
1077 	mutex_exit(&so_conn->ic_tx_mutex);
1078 	mutex_exit(&ic->ic_mutex);
1079 
1080 	/* This should wakeup the RX thread if it is sleeping */
1081 	idm_soshutdown(so_conn->ic_so);
1082 
1083 	thread_join(so_conn->ic_tx_thread_did);
1084 	thread_join(so_conn->ic_rx_thread_did);
1085 }
1086 
1087 /*
1088  * idm_so_tgt_svc_create()
1089  * Establish a service on an IP address and port.  idm_svc_req_t contains
1090  * the service parameters.
1091  */
1092 /*ARGSUSED*/
1093 static idm_status_t
1094 idm_so_tgt_svc_create(idm_svc_req_t *sr, idm_svc_t *is)
1095 {
1096 	idm_so_svc_t		*so_svc;
1097 
1098 	so_svc = kmem_zalloc(sizeof (idm_so_svc_t), KM_SLEEP);
1099 
1100 	/* Set the new sockets service in svc handle */
1101 	is->is_so_svc = (void *)so_svc;
1102 
1103 	return (IDM_STATUS_SUCCESS);
1104 }
1105 
1106 /*
1107  * idm_so_tgt_svc_destroy()
1108  * Teardown sockets resources allocated in idm_so_tgt_svc_create()
1109  */
1110 static void
1111 idm_so_tgt_svc_destroy(idm_svc_t *is)
1112 {
1113 	/* the socket will have been torn down; free the service */
1114 	kmem_free(is->is_so_svc, sizeof (idm_so_svc_t));
1115 }
1116 
1117 /*
1118  * idm_so_tgt_svc_online()
1119  * Launch a watch thread on the svc allocated in idm_so_tgt_svc_create()
1120  */
1121 
1122 static idm_status_t
1123 idm_so_tgt_svc_online(idm_svc_t *is)
1124 {
1125 	idm_so_svc_t		*so_svc;
1126 	idm_svc_req_t		*sr = &is->is_svc_req;
1127 	struct sockaddr_in6	sin6_ip;
1128 	const uint32_t		on = 1;
1129 	const uint32_t		off = 0;
1130 
1131 	mutex_enter(&is->is_mutex);
1132 	so_svc = (idm_so_svc_t *)is->is_so_svc;
1133 
1134 	/*
1135 	 * Try creating an IPv6 socket first
1136 	 */
1137 	if ((so_svc->is_so = idm_socreate(PF_INET6, SOCK_STREAM, 0)) == NULL) {
1138 		mutex_exit(&is->is_mutex);
1139 		return (IDM_STATUS_FAIL);
1140 	} else {
1141 		bzero(&sin6_ip, sizeof (sin6_ip));
1142 		sin6_ip.sin6_family = AF_INET6;
1143 		sin6_ip.sin6_port = htons(sr->sr_port);
1144 		sin6_ip.sin6_addr = in6addr_any;
1145 
1146 		(void) ksocket_setsockopt(so_svc->is_so, SOL_SOCKET,
1147 		    SO_REUSEADDR, (char *)&on, sizeof (on), CRED());
1148 		/*
1149 		 * Turn off SO_MAC_EXEMPT so future sobinds succeed
1150 		 */
1151 		(void) ksocket_setsockopt(so_svc->is_so, SOL_SOCKET,
1152 		    SO_MAC_EXEMPT, (char *)&off, sizeof (off), CRED());
1153 
1154 		if (ksocket_bind(so_svc->is_so, (struct sockaddr *)&sin6_ip,
1155 		    sizeof (sin6_ip), CRED()) != 0) {
1156 			mutex_exit(&is->is_mutex);
1157 			idm_sodestroy(so_svc->is_so);
1158 			return (IDM_STATUS_FAIL);
1159 		}
1160 	}
1161 
1162 	idm_set_postconnect_options(so_svc->is_so);
1163 
1164 	if (ksocket_listen(so_svc->is_so, 5, CRED()) != 0) {
1165 		mutex_exit(&is->is_mutex);
1166 		idm_soshutdown(so_svc->is_so);
1167 		idm_sodestroy(so_svc->is_so);
1168 		return (IDM_STATUS_FAIL);
1169 	}
1170 
1171 	/* Launch a watch thread */
1172 	so_svc->is_thread = thread_create(NULL, 0, idm_so_svc_port_watcher,
1173 	    is, 0, &p0, TS_RUN, minclsyspri);
1174 
1175 	if (so_svc->is_thread == NULL) {
1176 		/* Failure to launch; teardown the socket */
1177 		mutex_exit(&is->is_mutex);
1178 		idm_soshutdown(so_svc->is_so);
1179 		idm_sodestroy(so_svc->is_so);
1180 		return (IDM_STATUS_FAIL);
1181 	}
1182 	ksocket_hold(so_svc->is_so);
1183 	/* Wait for the port watcher thread to start */
1184 	while (!so_svc->is_thread_running)
1185 		cv_wait(&is->is_cv, &is->is_mutex);
1186 	mutex_exit(&is->is_mutex);
1187 
1188 	return (IDM_STATUS_SUCCESS);
1189 }
1190 
1191 /*
1192  * idm_so_tgt_svc_offline
1193  *
1194  * Stop listening on the IP address and port identified by idm_svc_t.
1195  */
1196 static void
1197 idm_so_tgt_svc_offline(idm_svc_t *is)
1198 {
1199 	idm_so_svc_t		*so_svc;
1200 	mutex_enter(&is->is_mutex);
1201 	so_svc = (idm_so_svc_t *)is->is_so_svc;
1202 	so_svc->is_thread_running = B_FALSE;
1203 	mutex_exit(&is->is_mutex);
1204 
1205 	/*
1206 	 * Teardown socket
1207 	 */
1208 	idm_sodestroy(so_svc->is_so);
1209 
1210 	/*
1211 	 * Now we expect the port watcher thread to terminate
1212 	 */
1213 	thread_join(so_svc->is_thread_did);
1214 }
1215 
1216 /*
1217  * Watch thread for target service connection establishment.
1218  */
1219 void
1220 idm_so_svc_port_watcher(void *arg)
1221 {
1222 	idm_svc_t		*svc = arg;
1223 	ksocket_t		new_so;
1224 	idm_conn_t		*ic;
1225 	idm_status_t		idmrc;
1226 	idm_so_svc_t		*so_svc;
1227 	int			rc;
1228 	const uint32_t		off = 0;
1229 	struct sockaddr_in6 	t_addr;
1230 	socklen_t		t_addrlen;
1231 
1232 	bzero(&t_addr, sizeof (struct sockaddr_in6));
1233 	t_addrlen = sizeof (struct sockaddr_in6);
1234 	mutex_enter(&svc->is_mutex);
1235 
1236 	so_svc = svc->is_so_svc;
1237 	so_svc->is_thread_running = B_TRUE;
1238 	so_svc->is_thread_did = so_svc->is_thread->t_did;
1239 
1240 	cv_signal(&svc->is_cv);
1241 
1242 	IDM_SVC_LOG(CE_NOTE, "iSCSI service (%p/%d) online", (void *)svc,
1243 	    svc->is_svc_req.sr_port);
1244 
1245 	while (so_svc->is_thread_running) {
1246 		mutex_exit(&svc->is_mutex);
1247 
1248 		if ((rc = ksocket_accept(so_svc->is_so,
1249 		    (struct sockaddr *)&t_addr, &t_addrlen,
1250 		    &new_so, CRED())) != 0) {
1251 			mutex_enter(&svc->is_mutex);
1252 			if (rc == ECONNABORTED)
1253 				continue;
1254 			/* Connection problem */
1255 			break;
1256 		}
1257 		/*
1258 		 * Turn off SO_MAC_EXEMPT so future sobinds succeed
1259 		 */
1260 		(void) ksocket_setsockopt(new_so, SOL_SOCKET, SO_MAC_EXEMPT,
1261 		    (char *)&off, sizeof (off), CRED());
1262 
1263 		idmrc = idm_svc_conn_create(svc, IDM_TRANSPORT_TYPE_SOCKETS,
1264 		    &ic);
1265 		if (idmrc != IDM_STATUS_SUCCESS) {
1266 			/* Drop connection */
1267 			idm_soshutdown(new_so);
1268 			idm_sodestroy(new_so);
1269 			mutex_enter(&svc->is_mutex);
1270 			continue;
1271 		}
1272 
1273 		idmrc = idm_so_tgt_conn_create(ic, new_so);
1274 		if (idmrc != IDM_STATUS_SUCCESS) {
1275 			idm_svc_conn_destroy(ic);
1276 			idm_soshutdown(new_so);
1277 			idm_sodestroy(new_so);
1278 			mutex_enter(&svc->is_mutex);
1279 			continue;
1280 		}
1281 
1282 		/*
1283 		 * Kick the state machine.  At CS_S3_XPT_UP the state machine
1284 		 * will notify the client (target) about the new connection.
1285 		 */
1286 		idm_conn_event(ic, CE_CONNECT_ACCEPT, NULL);
1287 
1288 		mutex_enter(&svc->is_mutex);
1289 	}
1290 	ksocket_rele(so_svc->is_so);
1291 	so_svc->is_thread_running = B_FALSE;
1292 	mutex_exit(&svc->is_mutex);
1293 
1294 	IDM_SVC_LOG(CE_NOTE, "iSCSI service (%p/%d) offline", (void *)svc,
1295 	    svc->is_svc_req.sr_port);
1296 
1297 	thread_exit();
1298 }
1299 
1300 /*
1301  * idm_so_free_task_rsrc() stops any ongoing processing of the task and
1302  * frees resources associated with the task.
1303  *
1304  * It's not clear that this should return idm_status_t.  What do we do
1305  * if it fails?
1306  */
1307 static idm_status_t
1308 idm_so_free_task_rsrc(idm_task_t *idt)
1309 {
1310 	idm_buf_t	*idb, *next_idb;
1311 
1312 	/*
1313 	 * There is nothing to cleanup on initiator connections
1314 	 */
1315 	if (IDM_CONN_ISINI(idt->idt_ic))
1316 		return (IDM_STATUS_SUCCESS);
1317 
1318 	/*
1319 	 * If this is a target connection, call idm_buf_rx_from_ini_done for
1320 	 * any buffer on the "outbufv" list with idb->idb_in_transport==B_TRUE.
1321 	 *
1322 	 * In addition, remove any buffers associated with this task from
1323 	 * the ic_tx_list.  We'll do this by walking the idt_inbufv list, but
1324 	 * items don't actually get removed from that list (and completion
1325 	 * routines called) until idm_task_cleanup.
1326 	 */
1327 	mutex_enter(&idt->idt_mutex);
1328 
1329 	for (idb = list_head(&idt->idt_outbufv); idb != NULL; idb = next_idb) {
1330 		next_idb = list_next(&idt->idt_outbufv, idb);
1331 		if (idb->idb_in_transport) {
1332 			/*
1333 			 * idm_buf_rx_from_ini_done releases idt->idt_mutex
1334 			 */
1335 			DTRACE_ISCSI_8(xfer__done, idm_conn_t *, idt->idt_ic,
1336 			    uintptr_t, idb->idb_buf,
1337 			    uint32_t, idb->idb_bufoffset,
1338 			    uint64_t, 0, uint32_t, 0, uint32_t, 0,
1339 			    uint32_t, idb->idb_xfer_len,
1340 			    int, XFER_BUF_RX_FROM_INI);
1341 			idm_buf_rx_from_ini_done(idt, idb, IDM_STATUS_ABORTED);
1342 			mutex_enter(&idt->idt_mutex);
1343 		}
1344 	}
1345 
1346 	for (idb = list_head(&idt->idt_inbufv); idb != NULL; idb = next_idb) {
1347 		next_idb = list_next(&idt->idt_inbufv, idb);
1348 		/*
1349 		 * We want to remove these items from the tx_list as well,
1350 		 * but knowing it's in the idt_inbufv list is not a guarantee
1351 		 * that it's in the tx_list.  If it's on the tx list then
1352 		 * let idm_sotx_thread() clean it up.
1353 		 */
1354 		if (idb->idb_in_transport && !idb->idb_tx_thread) {
1355 			/*
1356 			 * idm_buf_tx_to_ini_done releases idt->idt_mutex
1357 			 */
1358 			DTRACE_ISCSI_8(xfer__done, idm_conn_t *, idt->idt_ic,
1359 			    uintptr_t, idb->idb_buf,
1360 			    uint32_t, idb->idb_bufoffset,
1361 			    uint64_t, 0, uint32_t, 0, uint32_t, 0,
1362 			    uint32_t, idb->idb_xfer_len,
1363 			    int, XFER_BUF_TX_TO_INI);
1364 			idm_buf_tx_to_ini_done(idt, idb, IDM_STATUS_ABORTED);
1365 			mutex_enter(&idt->idt_mutex);
1366 		}
1367 	}
1368 
1369 	mutex_exit(&idt->idt_mutex);
1370 
1371 	return (IDM_STATUS_SUCCESS);
1372 }
1373 
1374 /*
1375  * idm_so_negotiate_key_values() validates the key values for this connection
1376  */
1377 /* ARGSUSED */
1378 static kv_status_t
1379 idm_so_negotiate_key_values(idm_conn_t *it, nvlist_t *request_nvl,
1380     nvlist_t *response_nvl, nvlist_t *negotiated_nvl)
1381 {
1382 	/* All parameters are negotiated at the iscsit level */
1383 	return (KV_HANDLED);
1384 }
1385 
1386 /*
1387  * idm_so_notice_key_values() activates the negotiated key values for
1388  * this connection.
1389  */
1390 static void
1391 idm_so_notice_key_values(idm_conn_t *it, nvlist_t *negotiated_nvl)
1392 {
1393 	char			*nvp_name;
1394 	nvpair_t		*nvp;
1395 	nvpair_t		*next_nvp;
1396 	int			nvrc;
1397 	idm_status_t		idm_status;
1398 	const idm_kv_xlate_t	*ikvx;
1399 	uint64_t		num_val;
1400 
1401 	for (nvp = nvlist_next_nvpair(negotiated_nvl, NULL);
1402 	    nvp != NULL; nvp = next_nvp) {
1403 		next_nvp = nvlist_next_nvpair(negotiated_nvl, nvp);
1404 		nvp_name = nvpair_name(nvp);
1405 
1406 		ikvx = idm_lookup_kv_xlate(nvp_name, strlen(nvp_name));
1407 		switch (ikvx->ik_key_id) {
1408 		case KI_HEADER_DIGEST:
1409 		case KI_DATA_DIGEST:
1410 			idm_status = idm_so_handle_digest(it, nvp, ikvx);
1411 			ASSERT(idm_status == 0);
1412 
1413 			/* Remove processed item from negotiated_nvl list */
1414 			nvrc = nvlist_remove_all(
1415 			    negotiated_nvl, ikvx->ik_key_name);
1416 			ASSERT(nvrc == 0);
1417 			break;
1418 		case KI_MAX_RECV_DATA_SEGMENT_LENGTH:
1419 			/*
1420 			 * Just pass the value down to idm layer.
1421 			 * No need to remove it from negotiated_nvl list here.
1422 			 */
1423 			nvrc = nvpair_value_uint64(nvp, &num_val);
1424 			ASSERT(nvrc == 0);
1425 			it->ic_conn_params.max_xmit_dataseglen =
1426 			    (uint32_t)num_val;
1427 			break;
1428 		default:
1429 			break;
1430 		}
1431 	}
1432 }
1433 
1434 /*
1435  * idm_so_declare_key_values() declares the key values for this connection
1436  */
1437 /* ARGSUSED */
1438 static kv_status_t
1439 idm_so_declare_key_values(idm_conn_t *it, nvlist_t *config_nvl,
1440     nvlist_t *outgoing_nvl)
1441 {
1442 	char			*nvp_name;
1443 	nvpair_t		*nvp;
1444 	nvpair_t		*next_nvp;
1445 	kv_status_t		kvrc;
1446 	int			nvrc = 0;
1447 	const idm_kv_xlate_t	*ikvx;
1448 	uint64_t		num_val;
1449 
1450 	for (nvp = nvlist_next_nvpair(config_nvl, NULL);
1451 	    nvp != NULL && nvrc == 0; nvp = next_nvp) {
1452 		next_nvp = nvlist_next_nvpair(config_nvl, nvp);
1453 		nvp_name = nvpair_name(nvp);
1454 
1455 		ikvx = idm_lookup_kv_xlate(nvp_name, strlen(nvp_name));
1456 		switch (ikvx->ik_key_id) {
1457 		case KI_MAX_RECV_DATA_SEGMENT_LENGTH:
1458 			if ((nvrc = nvpair_value_uint64(nvp, &num_val)) != 0) {
1459 				break;
1460 			}
1461 			if (outgoing_nvl &&
1462 			    (nvrc = nvlist_add_uint64(outgoing_nvl,
1463 			    nvp_name, num_val)) != 0) {
1464 				break;
1465 			}
1466 			it->ic_conn_params.max_recv_dataseglen =
1467 			    (uint32_t)num_val;
1468 			break;
1469 		default:
1470 			break;
1471 		}
1472 	}
1473 	kvrc = idm_nvstat_to_kvstat(nvrc);
1474 	return (kvrc);
1475 }
1476 
1477 static idm_status_t
1478 idm_so_handle_digest(idm_conn_t *it, nvpair_t *digest_choice,
1479     const idm_kv_xlate_t *ikvx)
1480 {
1481 	int			nvrc;
1482 	char			*digest_choice_string;
1483 
1484 	nvrc = nvpair_value_string(digest_choice,
1485 	    &digest_choice_string);
1486 	ASSERT(nvrc == 0);
1487 	if (strcasecmp(digest_choice_string, "crc32c") == 0) {
1488 		switch (ikvx->ik_key_id) {
1489 		case KI_HEADER_DIGEST:
1490 			it->ic_conn_flags |= IDM_CONN_HEADER_DIGEST;
1491 			break;
1492 		case KI_DATA_DIGEST:
1493 			it->ic_conn_flags |= IDM_CONN_DATA_DIGEST;
1494 			break;
1495 		default:
1496 			ASSERT(0);
1497 			break;
1498 		}
1499 	} else if (strcasecmp(digest_choice_string, "none") == 0) {
1500 		switch (ikvx->ik_key_id) {
1501 		case KI_HEADER_DIGEST:
1502 			it->ic_conn_flags &= ~IDM_CONN_HEADER_DIGEST;
1503 			break;
1504 		case KI_DATA_DIGEST:
1505 			it->ic_conn_flags &= ~IDM_CONN_DATA_DIGEST;
1506 			break;
1507 		default:
1508 			ASSERT(0);
1509 			break;
1510 		}
1511 	} else {
1512 		ASSERT(0);
1513 	}
1514 
1515 	return (IDM_STATUS_SUCCESS);
1516 }
1517 
1518 
1519 /*
1520  * idm_so_conn_is_capable() verifies that the passed connection is provided
1521  * for by the sockets interface.
1522  */
1523 /* ARGSUSED */
1524 static boolean_t
1525 idm_so_conn_is_capable(idm_conn_req_t *ic, idm_transport_caps_t *caps)
1526 {
1527 	return (B_TRUE);
1528 }
1529 
1530 /*
1531  * idm_so_rx_datain() validates the Data Sequence number of the PDU. The
1532  * idm_sorecv_scsidata() function invoked earlier actually reads the data
1533  * off the socket into the appropriate buffers.
1534  */
1535 static void
1536 idm_so_rx_datain(idm_conn_t *ic, idm_pdu_t *pdu)
1537 {
1538 	iscsi_data_hdr_t	*bhs;
1539 	idm_task_t		*idt;
1540 	idm_buf_t		*idb;
1541 	uint32_t		datasn;
1542 	size_t			offset;
1543 	iscsi_hdr_t		*ihp = (iscsi_hdr_t *)pdu->isp_hdr;
1544 	iscsi_data_rsp_hdr_t    *idrhp = (iscsi_data_rsp_hdr_t *)ihp;
1545 
1546 	ASSERT(ic != NULL);
1547 	ASSERT(pdu != NULL);
1548 
1549 	bhs	= (iscsi_data_hdr_t *)pdu->isp_hdr;
1550 	datasn	= ntohl(bhs->datasn);
1551 	offset	= ntohl(bhs->offset);
1552 
1553 	ASSERT(bhs->opcode == ISCSI_OP_SCSI_DATA_RSP);
1554 
1555 	/*
1556 	 * Look up the task corresponding to the initiator task tag
1557 	 * to get the buffers affiliated with the task.
1558 	 */
1559 	idt = idm_task_find(ic, bhs->itt, bhs->ttt);
1560 	if (idt == NULL) {
1561 		IDM_CONN_LOG(CE_WARN, "idm_so_rx_datain: failed to find task");
1562 		idm_pdu_rx_protocol_error(ic, pdu);
1563 		return;
1564 	}
1565 
1566 	idb = pdu->isp_sorx_buf;
1567 	if (idb == NULL) {
1568 		IDM_CONN_LOG(CE_WARN,
1569 		    "idm_so_rx_datain: failed to find buffer");
1570 		idm_task_rele(idt);
1571 		idm_pdu_rx_protocol_error(ic, pdu);
1572 		return;
1573 	}
1574 
1575 	/*
1576 	 * DataSN values should be sequential and should not have any gaps or
1577 	 * repetitions. Check the DataSN with the one stored in the task.
1578 	 */
1579 	if (datasn == idt->idt_exp_datasn) {
1580 		idt->idt_exp_datasn++; /* keep track of DataSN received */
1581 	} else {
1582 		IDM_CONN_LOG(CE_WARN, "idm_so_rx_datain: datasn out of order");
1583 		idm_task_rele(idt);
1584 		idm_pdu_rx_protocol_error(ic, pdu);
1585 		return;
1586 	}
1587 
1588 	/*
1589 	 * PDUs in a sequence should be in continuously increasing
1590 	 * address offset
1591 	 */
1592 	if (offset != idb->idb_exp_offset) {
1593 		IDM_CONN_LOG(CE_WARN, "idm_so_rx_datain: unexpected offset");
1594 		idm_task_rele(idt);
1595 		idm_pdu_rx_protocol_error(ic, pdu);
1596 		return;
1597 	}
1598 	/* Expected next relative buffer offset */
1599 	idb->idb_exp_offset += n2h24(bhs->dlength);
1600 	idt->idt_rx_bytes += n2h24(bhs->dlength);
1601 
1602 	idm_task_rele(idt);
1603 
1604 	/*
1605 	 * For now call scsi_rsp which will process the data rsp
1606 	 * Revisit, need to provide an explicit client entry point for
1607 	 * phase collapse completions.
1608 	 */
1609 	if (((ihp->opcode & ISCSI_OPCODE_MASK) == ISCSI_OP_SCSI_DATA_RSP) &&
1610 	    (idrhp->flags & ISCSI_FLAG_DATA_STATUS)) {
1611 		(*ic->ic_conn_ops.icb_rx_scsi_rsp)(ic, pdu);
1612 	}
1613 
1614 	idm_pdu_complete(pdu, IDM_STATUS_SUCCESS);
1615 }
1616 
1617 /*
1618  * The idm_so_rx_dataout() function is used by the iSCSI target to read
1619  * data from the Data-Out PDU sent by the iSCSI initiator.
1620  *
1621  * This function gets the Initiator Task Tag from the PDU BHS and looks up the
1622  * task to get the buffers associated with the PDU. A PDU might span buffers.
1623  * The data is then read into the respective buffer.
1624  */
1625 static void
1626 idm_so_rx_dataout(idm_conn_t *ic, idm_pdu_t *pdu)
1627 {
1628 
1629 	iscsi_data_hdr_t	*bhs;
1630 	idm_task_t		*idt;
1631 	idm_buf_t		*idb;
1632 	size_t			offset;
1633 
1634 	ASSERT(ic != NULL);
1635 	ASSERT(pdu != NULL);
1636 
1637 	bhs = (iscsi_data_hdr_t *)pdu->isp_hdr;
1638 	offset = ntohl(bhs->offset);
1639 	ASSERT(bhs->opcode == ISCSI_OP_SCSI_DATA);
1640 
1641 	/*
1642 	 * Look up the task corresponding to the initiator task tag
1643 	 * to get the buffers affiliated with the task.
1644 	 */
1645 	idt = idm_task_find(ic, bhs->itt, bhs->ttt);
1646 	if (idt == NULL) {
1647 		IDM_CONN_LOG(CE_WARN,
1648 		    "idm_so_rx_dataout: failed to find task");
1649 		idm_pdu_rx_protocol_error(ic, pdu);
1650 		return;
1651 	}
1652 
1653 	idb = pdu->isp_sorx_buf;
1654 	if (idb == NULL) {
1655 		IDM_CONN_LOG(CE_WARN,
1656 		    "idm_so_rx_dataout: failed to find buffer");
1657 		idm_task_rele(idt);
1658 		idm_pdu_rx_protocol_error(ic, pdu);
1659 		return;
1660 	}
1661 
1662 	/* Keep track of data transferred - check data offsets */
1663 	if (offset != idb->idb_exp_offset) {
1664 		IDM_CONN_LOG(CE_NOTE, "idm_so_rx_dataout: offset out of seq: "
1665 		    "%ld, %d", offset, idb->idb_exp_offset);
1666 		idm_task_rele(idt);
1667 		idm_pdu_rx_protocol_error(ic, pdu);
1668 		return;
1669 	}
1670 	/* Expected next relative offset */
1671 	idb->idb_exp_offset += ntoh24(bhs->dlength);
1672 	idt->idt_rx_bytes += n2h24(bhs->dlength);
1673 
1674 	/*
1675 	 * Call the buffer callback when the transfer is complete
1676 	 *
1677 	 * The connection state machine should only abort tasks after
1678 	 * shutting down the connection so we are assured that there
1679 	 * won't be a simultaneous attempt to abort this task at the
1680 	 * same time as we are processing this PDU (due to a connection
1681 	 * state change).
1682 	 */
1683 	if (bhs->flags & ISCSI_FLAG_FINAL) {
1684 		/*
1685 		 * We only want to call idm_buf_rx_from_ini_done once
1686 		 * per transfer.  It's possible that this task has
1687 		 * already been aborted in which case
1688 		 * idm_so_free_task_rsrc will call idm_buf_rx_from_ini_done
1689 		 * for each buffer with idb_in_transport==B_TRUE.  To
1690 		 * close this window and ensure that this doesn't happen,
1691 		 * we'll clear idb->idb_in_transport now while holding
1692 		 * the task mutex.   This is only really an issue for
1693 		 * SCSI task abort -- if tasks were being aborted because
1694 		 * of a connection state change the state machine would
1695 		 * have already stopped the receive thread.
1696 		 */
1697 		mutex_enter(&idt->idt_mutex);
1698 
1699 		/*
1700 		 * Release the task hold here (obtained in idm_task_find)
1701 		 * because the task may complete synchronously during
1702 		 * idm_buf_rx_from_ini_done.  Since we still have an active
1703 		 * buffer we know there is at least one additional hold on idt.
1704 		 */
1705 		idm_task_rele(idt);
1706 
1707 		/*
1708 		 * idm_buf_rx_from_ini_done releases idt->idt_mutex
1709 		 */
1710 		DTRACE_ISCSI_8(xfer__done, idm_conn_t *, idt->idt_ic,
1711 		    uintptr_t, idb->idb_buf, uint32_t, idb->idb_bufoffset,
1712 		    uint64_t, 0, uint32_t, 0, uint32_t, 0,
1713 		    uint32_t, idb->idb_xfer_len,
1714 		    int, XFER_BUF_RX_FROM_INI);
1715 		idm_buf_rx_from_ini_done(idt, idb, IDM_STATUS_SUCCESS);
1716 		idm_pdu_complete(pdu, IDM_STATUS_SUCCESS);
1717 		return;
1718 	}
1719 
1720 	idm_task_rele(idt);
1721 	idm_pdu_complete(pdu, IDM_STATUS_SUCCESS);
1722 }
1723 
1724 /*
1725  * The idm_so_rx_rtt() function is used by the iSCSI initiator to handle
1726  * the R2T PDU sent by the iSCSI target indicating that it is ready to
1727  * accept data. This gets the Initiator Task Tag (itt) from the PDU BHS
1728  * and looks up the task in the task tree using the itt to get the output
1729  * buffers associated the task. The R2T PDU contains the offset of the
1730  * requested data and the data length. This function then constructs a
1731  * sequence of iSCSI PDUs and outputs the requested data. Each Data-Out
1732  * PDU is associated with the R2T by the Target Transfer Tag  (ttt).
1733  */
1734 
1735 static void
1736 idm_so_rx_rtt(idm_conn_t *ic, idm_pdu_t *pdu)
1737 {
1738 	idm_task_t		*idt;
1739 	idm_buf_t		*idb;
1740 	iscsi_rtt_hdr_t		*rtt_hdr;
1741 	uint32_t		data_offset;
1742 	uint32_t		data_length;
1743 
1744 	ASSERT(ic != NULL);
1745 	ASSERT(pdu != NULL);
1746 
1747 	rtt_hdr	= (iscsi_rtt_hdr_t *)pdu->isp_hdr;
1748 	data_offset = ntohl(rtt_hdr->data_offset);
1749 	data_length = ntohl(rtt_hdr->data_length);
1750 	idt	= idm_task_find(ic, rtt_hdr->itt, rtt_hdr->ttt);
1751 
1752 	if (idt == NULL) {
1753 		IDM_CONN_LOG(CE_WARN, "idm_so_rx_rtt: could not find task");
1754 		idm_pdu_rx_protocol_error(ic, pdu);
1755 		return;
1756 	}
1757 
1758 	/* Find the buffer bound to the task by the iSCSI initiator */
1759 	mutex_enter(&idt->idt_mutex);
1760 	idb = idm_buf_find(&idt->idt_outbufv, data_offset);
1761 	if (idb == NULL) {
1762 		mutex_exit(&idt->idt_mutex);
1763 		idm_task_rele(idt);
1764 		IDM_CONN_LOG(CE_WARN, "idm_so_rx_rtt: could not find buffer");
1765 		idm_pdu_rx_protocol_error(ic, pdu);
1766 		return;
1767 	}
1768 
1769 	/* return buffer contains this data */
1770 	if (data_offset + data_length > idb->idb_buflen) {
1771 		/* Overflow */
1772 		mutex_exit(&idt->idt_mutex);
1773 		idm_task_rele(idt);
1774 		IDM_CONN_LOG(CE_WARN, "idm_so_rx_rtt: read from outside "
1775 		    "buffer");
1776 		idm_pdu_rx_protocol_error(ic, pdu);
1777 		return;
1778 	}
1779 
1780 	idt->idt_r2t_ttt = rtt_hdr->ttt;
1781 	idt->idt_exp_datasn = 0;
1782 
1783 	idm_so_send_rtt_data(ic, idt, idb, data_offset,
1784 	    ntohl(rtt_hdr->data_length));
1785 	/*
1786 	 * the idt_mutex is released in idm_so_send_rtt_data
1787 	 */
1788 
1789 	idm_pdu_complete(pdu, IDM_STATUS_SUCCESS);
1790 	idm_task_rele(idt);
1791 
1792 }
1793 
1794 idm_status_t
1795 idm_sorecvdata(idm_conn_t *ic, idm_pdu_t *pdu)
1796 {
1797 	uint8_t		pad[ISCSI_PAD_WORD_LEN];
1798 	int		pad_len;
1799 	uint32_t	data_digest_crc;
1800 	uint32_t	crc_calculated;
1801 	int		total_len;
1802 	idm_so_conn_t	*so_conn;
1803 
1804 	so_conn = ic->ic_transport_private;
1805 
1806 	pad_len = ((ISCSI_PAD_WORD_LEN -
1807 	    (pdu->isp_datalen & (ISCSI_PAD_WORD_LEN - 1))) &
1808 	    (ISCSI_PAD_WORD_LEN - 1));
1809 
1810 	ASSERT(pdu->isp_iovlen < (PDU_MAX_IOVLEN - 2)); /* pad + data digest */
1811 
1812 	total_len = pdu->isp_datalen;
1813 
1814 	if (pad_len) {
1815 		pdu->isp_iov[pdu->isp_iovlen].iov_base	= (char *)&pad;
1816 		pdu->isp_iov[pdu->isp_iovlen].iov_len	= pad_len;
1817 		total_len		+= pad_len;
1818 		pdu->isp_iovlen++;
1819 	}
1820 
1821 	/* setup data digest */
1822 	if ((ic->ic_conn_flags & IDM_CONN_DATA_DIGEST) != 0) {
1823 		pdu->isp_iov[pdu->isp_iovlen].iov_base =
1824 		    (char *)&data_digest_crc;
1825 		pdu->isp_iov[pdu->isp_iovlen].iov_len =
1826 		    sizeof (data_digest_crc);
1827 		total_len		+= sizeof (data_digest_crc);
1828 		pdu->isp_iovlen++;
1829 	}
1830 
1831 	pdu->isp_data = (uint8_t *)(uintptr_t)pdu->isp_iov[0].iov_base;
1832 
1833 	if (idm_iov_sorecv(so_conn->ic_so, &pdu->isp_iov[0],
1834 	    pdu->isp_iovlen, total_len) != 0) {
1835 		return (IDM_STATUS_IO);
1836 	}
1837 
1838 	if ((ic->ic_conn_flags & IDM_CONN_DATA_DIGEST) != 0) {
1839 		crc_calculated = idm_crc32c(pdu->isp_data,
1840 		    pdu->isp_datalen);
1841 		if (pad_len) {
1842 			crc_calculated = idm_crc32c_continued((char *)&pad,
1843 			    pad_len, crc_calculated);
1844 		}
1845 		if (crc_calculated != data_digest_crc) {
1846 			IDM_CONN_LOG(CE_WARN,
1847 			    "idm_sorecvdata: "
1848 			    "CRC error: actual 0x%x, calc 0x%x",
1849 			    data_digest_crc, crc_calculated);
1850 
1851 			/* Invalid Data Digest */
1852 			return (IDM_STATUS_DATA_DIGEST);
1853 		}
1854 	}
1855 
1856 	return (IDM_STATUS_SUCCESS);
1857 }
1858 
1859 /*
1860  * idm_sorecv_scsidata() is used to receive scsi data from the socket. The
1861  * Data-type PDU header must be read into the idm_pdu_t structure prior to
1862  * calling this function.
1863  */
1864 idm_status_t
1865 idm_sorecv_scsidata(idm_conn_t *ic, idm_pdu_t *pdu)
1866 {
1867 	iscsi_data_hdr_t	*bhs;
1868 	idm_task_t		*task;
1869 	uint32_t		offset;
1870 	uint8_t			opcode;
1871 	uint32_t		dlength;
1872 	list_t			*buflst;
1873 	uint32_t		xfer_bytes;
1874 	idm_status_t		status;
1875 
1876 	ASSERT(ic != NULL);
1877 	ASSERT(pdu != NULL);
1878 
1879 	bhs	= (iscsi_data_hdr_t *)pdu->isp_hdr;
1880 
1881 	offset	= ntohl(bhs->offset);
1882 	opcode	= bhs->opcode;
1883 	dlength = n2h24(bhs->dlength);
1884 
1885 	ASSERT((opcode == ISCSI_OP_SCSI_DATA_RSP) ||
1886 	    (opcode == ISCSI_OP_SCSI_DATA));
1887 
1888 	/*
1889 	 * Successful lookup implicitly gets a "hold" on the task.  This
1890 	 * hold must be released before leaving this function.  At one
1891 	 * point we were caching this task context and retaining the hold
1892 	 * but it turned out to be very difficult to release the hold properly.
1893 	 * The task can be aborted and the connection shutdown between this
1894 	 * call and the subsequent expected call to idm_so_rx_datain/
1895 	 * idm_so_rx_dataout (in which case those functions are not called).
1896 	 * Releasing the hold in the PDU callback doesn't work well either
1897 	 * because the whole task may be completed by then at which point
1898 	 * it is too late to release the hold -- for better or worse this
1899 	 * code doesn't wait on the refcnts during normal operation.
1900 	 * idm_task_find() is very fast and it is not a huge burden if we
1901 	 * have to do it twice.
1902 	 */
1903 	task = idm_task_find(ic, bhs->itt, bhs->ttt);
1904 	if (task == NULL) {
1905 		IDM_CONN_LOG(CE_WARN,
1906 		    "idm_sorecv_scsidata: could not find task");
1907 		return (IDM_STATUS_FAIL);
1908 	}
1909 
1910 	mutex_enter(&task->idt_mutex);
1911 	buflst	= (opcode == ISCSI_OP_SCSI_DATA_RSP) ?
1912 	    &task->idt_inbufv : &task->idt_outbufv;
1913 	pdu->isp_sorx_buf = idm_buf_find(buflst, offset);
1914 	mutex_exit(&task->idt_mutex);
1915 
1916 	if (pdu->isp_sorx_buf == NULL) {
1917 		idm_task_rele(task);
1918 		IDM_CONN_LOG(CE_WARN, "idm_sorecv_scsidata: could not find "
1919 		    "buffer for offset %x opcode=%x",
1920 		    offset, opcode);
1921 		return (IDM_STATUS_FAIL);
1922 	}
1923 
1924 	xfer_bytes = idm_fill_iov(pdu, pdu->isp_sorx_buf, offset, dlength);
1925 	ASSERT(xfer_bytes != 0);
1926 	if (xfer_bytes != dlength) {
1927 		idm_task_rele(task);
1928 		/*
1929 		 * Buffer overflow, connection error.  The PDU data is still
1930 		 * sitting in the socket so we can't use the connection
1931 		 * again until that data is drained.
1932 		 */
1933 		return (IDM_STATUS_FAIL);
1934 	}
1935 
1936 	status = idm_sorecvdata(ic, pdu);
1937 
1938 	idm_task_rele(task);
1939 
1940 	return (status);
1941 }
1942 
1943 static uint32_t
1944 idm_fill_iov(idm_pdu_t *pdu, idm_buf_t *idb, uint32_t ro, uint32_t dlength)
1945 {
1946 	uint32_t	buf_ro = ro - idb->idb_bufoffset;
1947 	uint32_t	xfer_len = min(dlength, idb->idb_buflen - buf_ro);
1948 
1949 	ASSERT(ro >= idb->idb_bufoffset);
1950 
1951 	pdu->isp_iov[pdu->isp_iovlen].iov_base	=
1952 	    (caddr_t)idb->idb_buf + buf_ro;
1953 	pdu->isp_iov[pdu->isp_iovlen].iov_len	= xfer_len;
1954 	pdu->isp_iovlen++;
1955 
1956 	return (xfer_len);
1957 }
1958 
1959 int
1960 idm_sorecv_nonscsidata(idm_conn_t *ic, idm_pdu_t *pdu)
1961 {
1962 	pdu->isp_data = kmem_alloc(pdu->isp_datalen, KM_SLEEP);
1963 	ASSERT(pdu->isp_data != NULL);
1964 
1965 	pdu->isp_databuflen = pdu->isp_datalen;
1966 	pdu->isp_iov[0].iov_base = (caddr_t)pdu->isp_data;
1967 	pdu->isp_iov[0].iov_len = pdu->isp_datalen;
1968 	pdu->isp_iovlen = 1;
1969 	/*
1970 	 * Since we are associating a new data buffer with this received
1971 	 * PDU we need to set a specific callback to free the data
1972 	 * after the PDU is processed.
1973 	 */
1974 	pdu->isp_flags |= IDM_PDU_ADDL_DATA;
1975 	pdu->isp_callback = idm_sorx_addl_pdu_cb;
1976 
1977 	return (idm_sorecvdata(ic, pdu));
1978 }
1979 
1980 void
1981 idm_sorx_thread(void *arg)
1982 {
1983 	boolean_t	conn_failure = B_FALSE;
1984 	idm_conn_t	*ic = (idm_conn_t *)arg;
1985 	idm_so_conn_t	*so_conn;
1986 	idm_pdu_t	*pdu;
1987 	idm_status_t	rc;
1988 
1989 	idm_conn_hold(ic);
1990 
1991 	mutex_enter(&ic->ic_mutex);
1992 
1993 	so_conn = ic->ic_transport_private;
1994 	so_conn->ic_rx_thread_running = B_TRUE;
1995 	so_conn->ic_rx_thread_did = so_conn->ic_rx_thread->t_did;
1996 	cv_signal(&ic->ic_cv);
1997 
1998 	while (so_conn->ic_rx_thread_running) {
1999 		mutex_exit(&ic->ic_mutex);
2000 
2001 		/*
2002 		 * Get PDU with default header size (large enough for
2003 		 * BHS plus any anticipated AHS).  PDU from
2004 		 * the cache will have all values set correctly
2005 		 * for sockets RX including callback.
2006 		 */
2007 		pdu = kmem_cache_alloc(idm.idm_sorx_pdu_cache, KM_SLEEP);
2008 		pdu->isp_ic = ic;
2009 		pdu->isp_flags = 0;
2010 		pdu->isp_transport_hdrlen = 0;
2011 
2012 		if ((rc = idm_sorecvhdr(ic, pdu)) != 0) {
2013 			/*
2014 			 * Call idm_pdu_complete so that we call the callback
2015 			 * and ensure any memory allocated in idm_sorecvhdr
2016 			 * gets freed up.
2017 			 */
2018 			idm_pdu_complete(pdu, IDM_STATUS_FAIL);
2019 
2020 			/*
2021 			 * If ic_rx_thread_running is still set then
2022 			 * this is some kind of connection problem
2023 			 * on the socket.  In this case we want to
2024 			 * generate an event.  Otherwise some other
2025 			 * thread closed the socket due to another
2026 			 * issue in which case we don't need to
2027 			 * generate an event.
2028 			 */
2029 			mutex_enter(&ic->ic_mutex);
2030 			if (so_conn->ic_rx_thread_running) {
2031 				conn_failure = B_TRUE;
2032 				so_conn->ic_rx_thread_running = B_FALSE;
2033 			}
2034 
2035 			continue;
2036 		}
2037 
2038 		/*
2039 		 * Header has been read and validated.  Now we need
2040 		 * to read the PDU data payload (if present).  SCSI data
2041 		 * need to be transferred from the socket directly into
2042 		 * the associated transfer buffer for the SCSI task.
2043 		 */
2044 		if (pdu->isp_datalen != 0) {
2045 			if ((IDM_PDU_OPCODE(pdu) == ISCSI_OP_SCSI_DATA) ||
2046 			    (IDM_PDU_OPCODE(pdu) == ISCSI_OP_SCSI_DATA_RSP)) {
2047 				rc = idm_sorecv_scsidata(ic, pdu);
2048 				/*
2049 				 * All SCSI errors are fatal to the
2050 				 * connection right now since we have no
2051 				 * place to put the data.  What we need
2052 				 * is some kind of sink to dispose of unwanted
2053 				 * SCSI data.  For example an invalid task tag
2054 				 * should not kill the connection (although
2055 				 * we may want to drop the connection).
2056 				 */
2057 			} else {
2058 				/*
2059 				 * Not data PDUs so allocate a buffer for the
2060 				 * data segment and read the remaining data.
2061 				 */
2062 				rc = idm_sorecv_nonscsidata(ic, pdu);
2063 			}
2064 			if (rc != 0) {
2065 				/*
2066 				 * Call idm_pdu_complete so that we call the
2067 				 * callback and ensure any memory allocated
2068 				 * in idm_sorecvhdr gets freed up.
2069 				 */
2070 				idm_pdu_complete(pdu, IDM_STATUS_FAIL);
2071 
2072 				/*
2073 				 * If ic_rx_thread_running is still set then
2074 				 * this is some kind of connection problem
2075 				 * on the socket.  In this case we want to
2076 				 * generate an event.  Otherwise some other
2077 				 * thread closed the socket due to another
2078 				 * issue in which case we don't need to
2079 				 * generate an event.
2080 				 */
2081 				mutex_enter(&ic->ic_mutex);
2082 				if (so_conn->ic_rx_thread_running) {
2083 					conn_failure = B_TRUE;
2084 					so_conn->ic_rx_thread_running = B_FALSE;
2085 				}
2086 				continue;
2087 			}
2088 		}
2089 
2090 		/*
2091 		 * Process RX PDU
2092 		 */
2093 		idm_pdu_rx(ic, pdu);
2094 
2095 		mutex_enter(&ic->ic_mutex);
2096 	}
2097 
2098 	mutex_exit(&ic->ic_mutex);
2099 
2100 	/*
2101 	 * If we dropped out of the RX processing loop because of
2102 	 * a socket problem or other connection failure (including
2103 	 * digest errors) then we need to generate a state machine
2104 	 * event to shut the connection down.
2105 	 * If the state machine is already in, for example, INIT_ERROR, this
2106 	 * event will get dropped, and the TX thread will never be notified
2107 	 * to shut down.  To be safe, we'll just notify it here.
2108 	 */
2109 	if (conn_failure) {
2110 		if (so_conn->ic_tx_thread_running) {
2111 			so_conn->ic_tx_thread_running = B_FALSE;
2112 			mutex_enter(&so_conn->ic_tx_mutex);
2113 			cv_signal(&so_conn->ic_tx_cv);
2114 			mutex_exit(&so_conn->ic_tx_mutex);
2115 		}
2116 
2117 		idm_conn_event(ic, CE_TRANSPORT_FAIL, rc);
2118 	}
2119 
2120 	idm_conn_rele(ic);
2121 
2122 	thread_exit();
2123 }
2124 
2125 /*
2126  * idm_so_tx
2127  *
2128  * This is the implementation of idm_transport_ops_t's it_tx_pdu entry
2129  * point.  By definition, it is supposed to be fast.  So, simply queue
2130  * the entry and return.  The real work is done by idm_i_so_tx() via
2131  * idm_sotx_thread().
2132  */
2133 
2134 static void
2135 idm_so_tx(idm_conn_t *ic, idm_pdu_t *pdu)
2136 {
2137 	idm_so_conn_t *so_conn = ic->ic_transport_private;
2138 
2139 	ASSERT(pdu->isp_ic == ic);
2140 	mutex_enter(&so_conn->ic_tx_mutex);
2141 
2142 	if (!so_conn->ic_tx_thread_running) {
2143 		mutex_exit(&so_conn->ic_tx_mutex);
2144 		idm_pdu_complete(pdu, IDM_STATUS_ABORTED);
2145 		return;
2146 	}
2147 
2148 	list_insert_tail(&so_conn->ic_tx_list, (void *)pdu);
2149 	cv_signal(&so_conn->ic_tx_cv);
2150 	mutex_exit(&so_conn->ic_tx_mutex);
2151 }
2152 
2153 static idm_status_t
2154 idm_i_so_tx(idm_pdu_t *pdu)
2155 {
2156 	idm_conn_t	*ic = pdu->isp_ic;
2157 	idm_status_t	status = IDM_STATUS_SUCCESS;
2158 	uint8_t		pad[ISCSI_PAD_WORD_LEN];
2159 	int		pad_len;
2160 	uint32_t	hdr_digest_crc;
2161 	uint32_t	data_digest_crc = 0;
2162 	int		total_len = 0;
2163 	int		iovlen = 0;
2164 	struct iovec	iov[6];
2165 	idm_so_conn_t	*so_conn;
2166 
2167 	so_conn = ic->ic_transport_private;
2168 
2169 	/* Setup BHS */
2170 	iov[iovlen].iov_base	= (caddr_t)pdu->isp_hdr;
2171 	iov[iovlen].iov_len	= pdu->isp_hdrlen;
2172 	total_len		+= iov[iovlen].iov_len;
2173 	iovlen++;
2174 
2175 	/* Setup header digest */
2176 	if (((pdu->isp_flags & IDM_PDU_LOGIN_TX) == 0) &&
2177 	    (ic->ic_conn_flags & IDM_CONN_HEADER_DIGEST)) {
2178 		hdr_digest_crc = idm_crc32c(pdu->isp_hdr, pdu->isp_hdrlen);
2179 
2180 		iov[iovlen].iov_base	= (caddr_t)&hdr_digest_crc;
2181 		iov[iovlen].iov_len	= sizeof (hdr_digest_crc);
2182 		total_len		+= iov[iovlen].iov_len;
2183 		iovlen++;
2184 	}
2185 
2186 	/* Setup the data */
2187 	if (pdu->isp_datalen) {
2188 		idm_task_t		*idt;
2189 		idm_buf_t		*idb;
2190 		iscsi_data_hdr_t	*ihp;
2191 		ihp = (iscsi_data_hdr_t *)pdu->isp_hdr;
2192 		/* Write of immediate data */
2193 		if (ic->ic_ffp &&
2194 		    (ihp->opcode == ISCSI_OP_SCSI_CMD ||
2195 		    ihp->opcode == ISCSI_OP_SCSI_DATA)) {
2196 			idt = idm_task_find(ic, ihp->itt, ihp->ttt);
2197 			if (idt) {
2198 				mutex_enter(&idt->idt_mutex);
2199 				idb = idm_buf_find(&idt->idt_outbufv, 0);
2200 				mutex_exit(&idt->idt_mutex);
2201 				/*
2202 				 * If the initiator call to idm_buf_alloc
2203 				 * failed then we can get to this point
2204 				 * without a bound buffer.  The associated
2205 				 * connection failure will clean things up
2206 				 * later.  It would be nice to come up with
2207 				 * a cleaner way to handle this.  In
2208 				 * particular it seems absurd to look up
2209 				 * the task and the buffer just to update
2210 				 * this counter.
2211 				 */
2212 				if (idb)
2213 					idb->idb_xfer_len += pdu->isp_datalen;
2214 				idm_task_rele(idt);
2215 			}
2216 		}
2217 
2218 		iov[iovlen].iov_base = (caddr_t)pdu->isp_data;
2219 		iov[iovlen].iov_len  = pdu->isp_datalen;
2220 		total_len += iov[iovlen].iov_len;
2221 		iovlen++;
2222 	}
2223 
2224 	/* Setup the data pad if necessary */
2225 	pad_len = ((ISCSI_PAD_WORD_LEN -
2226 	    (pdu->isp_datalen & (ISCSI_PAD_WORD_LEN - 1))) &
2227 	    (ISCSI_PAD_WORD_LEN - 1));
2228 
2229 	if (pad_len) {
2230 		bzero(pad, sizeof (pad));
2231 		iov[iovlen].iov_base = (void *)&pad;
2232 		iov[iovlen].iov_len  = pad_len;
2233 		total_len		+= iov[iovlen].iov_len;
2234 		iovlen++;
2235 	}
2236 
2237 	/*
2238 	 * Setup the data digest if enabled.  Data-digest is not sent
2239 	 * for login-phase PDUs.
2240 	 */
2241 	if ((ic->ic_conn_flags & IDM_CONN_DATA_DIGEST) &&
2242 	    ((pdu->isp_flags & IDM_PDU_LOGIN_TX) == 0) &&
2243 	    (pdu->isp_datalen || pad_len)) {
2244 		/*
2245 		 * RFC3720/10.2.3: A zero-length Data Segment also
2246 		 * implies a zero-length data digest.
2247 		 */
2248 		if (pdu->isp_datalen) {
2249 			data_digest_crc = idm_crc32c(pdu->isp_data,
2250 			    pdu->isp_datalen);
2251 		}
2252 		if (pad_len) {
2253 			data_digest_crc = idm_crc32c_continued(&pad,
2254 			    pad_len, data_digest_crc);
2255 		}
2256 
2257 		iov[iovlen].iov_base	= (caddr_t)&data_digest_crc;
2258 		iov[iovlen].iov_len	= sizeof (data_digest_crc);
2259 		total_len		+= iov[iovlen].iov_len;
2260 		iovlen++;
2261 	}
2262 
2263 	/* Transmit the PDU */
2264 	if (idm_iov_sosend(so_conn->ic_so, &iov[0], iovlen,
2265 	    total_len) != 0) {
2266 		/* Set error status */
2267 		IDM_CONN_LOG(CE_WARN,
2268 		    "idm_so_tx: failed to transmit the PDU, so: %p ic: %p "
2269 		    "data: %p", (void *) so_conn->ic_so, (void *) ic,
2270 		    (void *) pdu->isp_data);
2271 		status = IDM_STATUS_IO;
2272 	}
2273 
2274 	/*
2275 	 * Success does not mean that the PDU actually reached the
2276 	 * remote node since it could get dropped along the way.
2277 	 */
2278 	idm_pdu_complete(pdu, status);
2279 
2280 	return (status);
2281 }
2282 
2283 /*
2284  * The idm_so_buf_tx_to_ini() is used by the target iSCSI layer to transmit the
2285  * Data-In PDUs using sockets. Based on the negotiated MaxRecvDataSegmentLength,
2286  * the buffer is segmented into a sequence of Data-In PDUs, ordered by DataSN.
2287  * A target can invoke this function multiple times for a single read command
2288  * (identified by the same ITT) to split the input into several sequences.
2289  *
2290  * DataSN starts with 0 for the first data PDU of an input command and advances
2291  * by 1 for each subsequent data PDU. Each sequence will have its own F bit,
2292  * which is set to 1 for the last data PDU of a sequence.
2293  * If the initiator supports phase collapse, the status bit must be set along
2294  * with the F bit to indicate that the status is shipped together with the last
2295  * Data-In PDU.
2296  *
2297  * The data PDUs within a sequence will be sent in order with the buffer offset
2298  * in increasing order. i.e. initiator and target must have negotiated the
2299  * "DataPDUInOrder" to "Yes". The order between sequences is not enforced.
2300  *
2301  * Caller holds idt->idt_mutex
2302  */
2303 static idm_status_t
2304 idm_so_buf_tx_to_ini(idm_task_t *idt, idm_buf_t *idb)
2305 {
2306 	idm_so_conn_t	*so_conn = idb->idb_ic->ic_transport_private;
2307 	idm_pdu_t	tmppdu;
2308 
2309 	ASSERT(mutex_owned(&idt->idt_mutex));
2310 
2311 	/*
2312 	 * Put the idm_buf_t on the tx queue.  It will be transmitted by
2313 	 * idm_sotx_thread.
2314 	 */
2315 	mutex_enter(&so_conn->ic_tx_mutex);
2316 
2317 	DTRACE_ISCSI_8(xfer__start, idm_conn_t *, idt->idt_ic,
2318 	    uintptr_t, idb->idb_buf, uint32_t, idb->idb_bufoffset,
2319 	    uint64_t, 0, uint32_t, 0, uint32_t, 0,
2320 	    uint32_t, idb->idb_xfer_len, int, XFER_BUF_TX_TO_INI);
2321 
2322 	if (!so_conn->ic_tx_thread_running) {
2323 		mutex_exit(&so_conn->ic_tx_mutex);
2324 		/*
2325 		 * Don't release idt->idt_mutex since we're supposed to hold
2326 		 * in when calling idm_buf_tx_to_ini_done
2327 		 */
2328 		DTRACE_ISCSI_8(xfer__done, idm_conn_t *, idt->idt_ic,
2329 		    uintptr_t, idb->idb_buf, uint32_t, idb->idb_bufoffset,
2330 		    uint64_t, 0, uint32_t, 0, uint32_t, 0,
2331 		    uint32_t, idb->idb_xfer_len,
2332 		    int, XFER_BUF_TX_TO_INI);
2333 		idm_buf_tx_to_ini_done(idt, idb, IDM_STATUS_ABORTED);
2334 		return (IDM_STATUS_FAIL);
2335 	}
2336 
2337 	/*
2338 	 * Build a template for the data PDU headers we will use so that
2339 	 * the SN values will stay consistent with other PDU's we are
2340 	 * transmitting like R2T and SCSI status.
2341 	 */
2342 	bzero(&idb->idb_data_hdr_tmpl, sizeof (iscsi_hdr_t));
2343 	tmppdu.isp_hdr = &idb->idb_data_hdr_tmpl;
2344 	(*idt->idt_ic->ic_conn_ops.icb_build_hdr)(idt, &tmppdu,
2345 	    ISCSI_OP_SCSI_DATA_RSP);
2346 	idb->idb_tx_thread = B_TRUE;
2347 	list_insert_tail(&so_conn->ic_tx_list, (void *)idb);
2348 	cv_signal(&so_conn->ic_tx_cv);
2349 	mutex_exit(&so_conn->ic_tx_mutex);
2350 	mutex_exit(&idt->idt_mutex);
2351 
2352 	/*
2353 	 * Returning success here indicates the transfer was successfully
2354 	 * dispatched -- it does not mean that the transfer completed
2355 	 * successfully.
2356 	 */
2357 	return (IDM_STATUS_SUCCESS);
2358 }
2359 
2360 /*
2361  * The idm_so_buf_rx_from_ini() is used by the target iSCSI layer to specify the
2362  * data blocks it is ready to receive from the initiator in response to a WRITE
2363  * SCSI command. The target iSCSI layer passes the information about the desired
2364  * data blocks to the initiator in one R2T PDU. The receiving buffer, the buffer
2365  * offset and datalen are passed via the 'idb' argument.
2366  *
2367  * Scope for Prototype build:
2368  * R2Ts are required for any Data-Out PDU, i.e. initiator and target must have
2369  * negotiated the "InitialR2T" to "Yes".
2370  *
2371  * Caller holds idt->idt_mutex
2372  */
2373 static idm_status_t
2374 idm_so_buf_rx_from_ini(idm_task_t *idt, idm_buf_t *idb)
2375 {
2376 	idm_pdu_t		*pdu;
2377 	iscsi_rtt_hdr_t		*rtt;
2378 
2379 	ASSERT(mutex_owned(&idt->idt_mutex));
2380 
2381 	DTRACE_ISCSI_8(xfer__start, idm_conn_t *, idt->idt_ic,
2382 	    uintptr_t, idb->idb_buf, uint32_t, idb->idb_bufoffset,
2383 	    uint64_t, 0, uint32_t, 0, uint32_t, 0,
2384 	    uint32_t, idb->idb_xfer_len, int, XFER_BUF_RX_FROM_INI);
2385 
2386 	pdu = kmem_cache_alloc(idm.idm_sotx_pdu_cache, KM_SLEEP);
2387 	pdu->isp_ic = idt->idt_ic;
2388 	pdu->isp_flags = IDM_PDU_SET_STATSN;
2389 	bzero(pdu->isp_hdr, sizeof (iscsi_rtt_hdr_t));
2390 
2391 	/* iSCSI layer fills the TTT, ITT, ExpCmdSN, MaxCmdSN */
2392 	(*idt->idt_ic->ic_conn_ops.icb_build_hdr)(idt, pdu, ISCSI_OP_RTT_RSP);
2393 
2394 	/* set the rttsn, rtt.flags, rtt.data_offset and rtt.data_length */
2395 	rtt = (iscsi_rtt_hdr_t *)(pdu->isp_hdr);
2396 
2397 	rtt->opcode		= ISCSI_OP_RTT_RSP;
2398 	rtt->flags		= ISCSI_FLAG_FINAL;
2399 	rtt->data_offset	= htonl(idb->idb_bufoffset);
2400 	rtt->data_length	= htonl(idb->idb_xfer_len);
2401 	rtt->rttsn		= htonl(idt->idt_exp_rttsn++);
2402 
2403 	/* Keep track of buffer offsets */
2404 	idb->idb_exp_offset	= idb->idb_bufoffset;
2405 	mutex_exit(&idt->idt_mutex);
2406 
2407 	/*
2408 	 * Transmit the PDU.
2409 	 */
2410 	idm_pdu_tx(pdu);
2411 
2412 	return (IDM_STATUS_SUCCESS);
2413 }
2414 
2415 static idm_status_t
2416 idm_so_buf_alloc(idm_buf_t *idb, uint64_t buflen)
2417 {
2418 	if ((buflen > IDM_SO_BUF_CACHE_LB) && (buflen <= IDM_SO_BUF_CACHE_UB)) {
2419 		idb->idb_buf = kmem_cache_alloc(idm.idm_so_128k_buf_cache,
2420 		    KM_NOSLEEP);
2421 		idb->idb_buf_private = idm.idm_so_128k_buf_cache;
2422 	} else {
2423 		idb->idb_buf = kmem_alloc(buflen, KM_NOSLEEP);
2424 		idb->idb_buf_private = NULL;
2425 	}
2426 
2427 	if (idb->idb_buf == NULL) {
2428 		IDM_CONN_LOG(CE_NOTE,
2429 		    "idm_so_buf_alloc: failed buffer allocation");
2430 		return (IDM_STATUS_FAIL);
2431 	}
2432 
2433 	return (IDM_STATUS_SUCCESS);
2434 }
2435 
2436 /* ARGSUSED */
2437 static idm_status_t
2438 idm_so_buf_setup(idm_buf_t *idb)
2439 {
2440 	/* Ensure bufalloc'd flag is unset */
2441 	idb->idb_bufalloc = B_FALSE;
2442 
2443 	return (IDM_STATUS_SUCCESS);
2444 }
2445 
2446 /* ARGSUSED */
2447 static void
2448 idm_so_buf_teardown(idm_buf_t *idb)
2449 {
2450 	/* nothing to do here */
2451 }
2452 
2453 static void
2454 idm_so_buf_free(idm_buf_t *idb)
2455 {
2456 	if (idb->idb_buf_private == NULL) {
2457 		kmem_free(idb->idb_buf, idb->idb_buflen);
2458 	} else {
2459 		kmem_cache_free(idb->idb_buf_private, idb->idb_buf);
2460 	}
2461 }
2462 
2463 static void
2464 idm_so_send_rtt_data(idm_conn_t *ic, idm_task_t *idt, idm_buf_t *idb,
2465     uint32_t offset, uint32_t length)
2466 {
2467 	idm_so_conn_t	*so_conn = ic->ic_transport_private;
2468 	idm_pdu_t	tmppdu;
2469 	idm_buf_t	*rtt_buf;
2470 
2471 	ASSERT(mutex_owned(&idt->idt_mutex));
2472 
2473 	/*
2474 	 * Allocate a buffer to represent the RTT transfer.  We could further
2475 	 * optimize this by allocating the buffers internally from an rtt
2476 	 * specific buffer cache since this is socket-specific code but for
2477 	 * now we will keep it simple.
2478 	 */
2479 	rtt_buf = idm_buf_alloc(ic, (uint8_t *)idb->idb_buf + offset, length);
2480 	if (rtt_buf == NULL) {
2481 		/*
2482 		 * If we're in FFP then the failure was likely a resource
2483 		 * allocation issue and we should close the connection by
2484 		 * sending a CE_TRANSPORT_FAIL event.
2485 		 *
2486 		 * If we're not in FFP then idm_buf_alloc will always
2487 		 * fail and the state is transitioning to "complete" anyway
2488 		 * so we won't bother to send an event.
2489 		 */
2490 		mutex_enter(&ic->ic_state_mutex);
2491 		if (ic->ic_ffp)
2492 			idm_conn_event_locked(ic, CE_TRANSPORT_FAIL,
2493 			    NULL, CT_NONE);
2494 		mutex_exit(&ic->ic_state_mutex);
2495 		mutex_exit(&idt->idt_mutex);
2496 		return;
2497 	}
2498 
2499 	rtt_buf->idb_buf_cb = NULL;
2500 	rtt_buf->idb_cb_arg = NULL;
2501 	rtt_buf->idb_bufoffset = offset;
2502 	rtt_buf->idb_xfer_len = length;
2503 	rtt_buf->idb_ic = idt->idt_ic;
2504 	rtt_buf->idb_task_binding = idt;
2505 
2506 	/*
2507 	 * The new buffer (if any) represents an additional
2508 	 * reference on the task
2509 	 */
2510 	idm_task_hold(idt);
2511 	mutex_exit(&idt->idt_mutex);
2512 
2513 	/*
2514 	 * Put the idm_buf_t on the tx queue.  It will be transmitted by
2515 	 * idm_sotx_thread.
2516 	 */
2517 	mutex_enter(&so_conn->ic_tx_mutex);
2518 
2519 	if (!so_conn->ic_tx_thread_running) {
2520 		idm_buf_free(rtt_buf);
2521 		mutex_exit(&so_conn->ic_tx_mutex);
2522 		idm_task_rele(idt);
2523 		return;
2524 	}
2525 
2526 	/*
2527 	 * Build a template for the data PDU headers we will use so that
2528 	 * the SN values will stay consistent with other PDU's we are
2529 	 * transmitting like R2T and SCSI status.
2530 	 */
2531 	bzero(&rtt_buf->idb_data_hdr_tmpl, sizeof (iscsi_hdr_t));
2532 	tmppdu.isp_hdr = &rtt_buf->idb_data_hdr_tmpl;
2533 	(*idt->idt_ic->ic_conn_ops.icb_build_hdr)(idt, &tmppdu,
2534 	    ISCSI_OP_SCSI_DATA);
2535 	rtt_buf->idb_tx_thread = B_TRUE;
2536 	rtt_buf->idb_in_transport = B_TRUE;
2537 	list_insert_tail(&so_conn->ic_tx_list, (void *)rtt_buf);
2538 	cv_signal(&so_conn->ic_tx_cv);
2539 	mutex_exit(&so_conn->ic_tx_mutex);
2540 }
2541 
2542 static void
2543 idm_so_send_rtt_data_done(idm_task_t *idt, idm_buf_t *idb)
2544 {
2545 	/*
2546 	 * Don't worry about status -- we assume any error handling
2547 	 * is performed by the caller (idm_sotx_thread).
2548 	 */
2549 	idb->idb_in_transport = B_FALSE;
2550 	idm_task_rele(idt);
2551 	idm_buf_free(idb);
2552 }
2553 
2554 static idm_status_t
2555 idm_so_send_buf_region(idm_task_t *idt, idm_buf_t *idb,
2556     uint32_t buf_region_offset, uint32_t buf_region_length)
2557 {
2558 	idm_conn_t		*ic;
2559 	uint32_t		max_dataseglen;
2560 	size_t			remainder, chunk;
2561 	uint32_t		data_offset = buf_region_offset;
2562 	iscsi_data_hdr_t	*bhs;
2563 	idm_pdu_t		*pdu;
2564 	idm_status_t		tx_status;
2565 
2566 	ASSERT(mutex_owned(&idt->idt_mutex));
2567 
2568 	ic = idt->idt_ic;
2569 
2570 	max_dataseglen = ic->ic_conn_params.max_xmit_dataseglen;
2571 	remainder = buf_region_length;
2572 
2573 	while (remainder) {
2574 		if (idt->idt_state != TASK_ACTIVE) {
2575 			ASSERT((idt->idt_state != TASK_IDLE) &&
2576 			    (idt->idt_state != TASK_COMPLETE));
2577 			return (IDM_STATUS_ABORTED);
2578 		}
2579 
2580 		/* check to see if we need to chunk the data */
2581 		if (remainder > max_dataseglen) {
2582 			chunk = max_dataseglen;
2583 		} else {
2584 			chunk = remainder;
2585 		}
2586 
2587 		/* Data PDU headers will always be sizeof (iscsi_hdr_t) */
2588 		pdu = kmem_cache_alloc(idm.idm_sotx_pdu_cache, KM_SLEEP);
2589 		pdu->isp_ic = ic;
2590 		pdu->isp_flags = 0;	/* initialize isp_flags */
2591 
2592 		/*
2593 		 * We've already built a build a header template
2594 		 * to use during the transfer.  Use this template so that
2595 		 * the SN values stay consistent with any unrelated PDU's
2596 		 * being transmitted.
2597 		 */
2598 		bcopy(&idb->idb_data_hdr_tmpl, pdu->isp_hdr,
2599 		    sizeof (iscsi_hdr_t));
2600 
2601 		/*
2602 		 * Set DataSN, data offset, and flags in BHS
2603 		 * For the prototype build, A = 0, S = 0, U = 0
2604 		 */
2605 		bhs = (iscsi_data_hdr_t *)(pdu->isp_hdr);
2606 
2607 		bhs->datasn		= htonl(idt->idt_exp_datasn++);
2608 
2609 		hton24(bhs->dlength, chunk);
2610 		bhs->offset = htonl(idb->idb_bufoffset + data_offset);
2611 
2612 		/* setup data */
2613 		pdu->isp_data	=  (uint8_t *)idb->idb_buf + data_offset;
2614 		pdu->isp_datalen = (uint_t)chunk;
2615 
2616 		if (chunk == remainder) {
2617 			bhs->flags = ISCSI_FLAG_FINAL; /* F bit set to 1 */
2618 			/* Piggyback the status with the last data PDU */
2619 			if (idt->idt_flags & IDM_TASK_PHASECOLLAPSE_REQ) {
2620 				pdu->isp_flags |= IDM_PDU_SET_STATSN |
2621 				    IDM_PDU_ADVANCE_STATSN;
2622 				(*idt->idt_ic->ic_conn_ops.icb_update_statsn)
2623 				    (idt, pdu);
2624 				idt->idt_flags |=
2625 				    IDM_TASK_PHASECOLLAPSE_SUCCESS;
2626 
2627 			}
2628 		}
2629 
2630 		remainder	-= chunk;
2631 		data_offset	+= chunk;
2632 
2633 		/* Instrument the data-send DTrace probe. */
2634 		if (IDM_PDU_OPCODE(pdu) == ISCSI_OP_SCSI_DATA_RSP) {
2635 			DTRACE_ISCSI_2(data__send,
2636 			    idm_conn_t *, idt->idt_ic,
2637 			    iscsi_data_rsp_hdr_t *,
2638 			    (iscsi_data_rsp_hdr_t *)pdu->isp_hdr);
2639 		}
2640 
2641 		/*
2642 		 * Now that we're done working with idt_exp_datasn,
2643 		 * idt->idt_state and idb->idb_bufoffset we can release
2644 		 * the task lock -- don't want to hold it across the
2645 		 * call to idm_i_so_tx since we could block.
2646 		 */
2647 		mutex_exit(&idt->idt_mutex);
2648 
2649 		/*
2650 		 * Transmit the PDU.  Call the internal routine directly
2651 		 * as there is already implicit ordering.
2652 		 */
2653 		if ((tx_status = idm_i_so_tx(pdu)) != IDM_STATUS_SUCCESS) {
2654 			mutex_enter(&idt->idt_mutex);
2655 			return (tx_status);
2656 		}
2657 
2658 		mutex_enter(&idt->idt_mutex);
2659 		idt->idt_tx_bytes += chunk;
2660 	}
2661 
2662 	return (IDM_STATUS_SUCCESS);
2663 }
2664 
2665 /*
2666  * TX PDU cache
2667  */
2668 /* ARGSUSED */
2669 int
2670 idm_sotx_pdu_constructor(void *hdl, void *arg, int flags)
2671 {
2672 	idm_pdu_t	*pdu = hdl;
2673 
2674 	bzero(pdu, sizeof (idm_pdu_t));
2675 	pdu->isp_hdr = (iscsi_hdr_t *)(pdu + 1); /* Ptr arithmetic */
2676 	pdu->isp_hdrlen = sizeof (iscsi_hdr_t);
2677 	pdu->isp_callback = idm_sotx_cache_pdu_cb;
2678 	pdu->isp_magic = IDM_PDU_MAGIC;
2679 	bzero(pdu->isp_hdr, sizeof (iscsi_hdr_t));
2680 
2681 	return (0);
2682 }
2683 
2684 /* ARGSUSED */
2685 void
2686 idm_sotx_cache_pdu_cb(idm_pdu_t *pdu, idm_status_t status)
2687 {
2688 	/* reset values between use */
2689 	pdu->isp_datalen = 0;
2690 
2691 	kmem_cache_free(idm.idm_sotx_pdu_cache, pdu);
2692 }
2693 
2694 /*
2695  * RX PDU cache
2696  */
2697 /* ARGSUSED */
2698 int
2699 idm_sorx_pdu_constructor(void *hdl, void *arg, int flags)
2700 {
2701 	idm_pdu_t	*pdu = hdl;
2702 
2703 	bzero(pdu, sizeof (idm_pdu_t));
2704 	pdu->isp_magic = IDM_PDU_MAGIC;
2705 	pdu->isp_hdr = (iscsi_hdr_t *)(pdu + 1); /* Ptr arithmetic */
2706 	pdu->isp_callback = idm_sorx_cache_pdu_cb;
2707 
2708 	return (0);
2709 }
2710 
2711 /* ARGSUSED */
2712 static void
2713 idm_sorx_cache_pdu_cb(idm_pdu_t *pdu, idm_status_t status)
2714 {
2715 	pdu->isp_iovlen = 0;
2716 	pdu->isp_sorx_buf = 0;
2717 	kmem_cache_free(idm.idm_sorx_pdu_cache, pdu);
2718 }
2719 
2720 static void
2721 idm_sorx_addl_pdu_cb(idm_pdu_t *pdu, idm_status_t status)
2722 {
2723 	/*
2724 	 * We had to modify our cached RX PDU with a longer header buffer
2725 	 * and/or a longer data buffer.  Release the new buffers and fix
2726 	 * the fields back to what we would expect for a cached RX PDU.
2727 	 */
2728 	if (pdu->isp_flags & IDM_PDU_ADDL_HDR) {
2729 		kmem_free(pdu->isp_hdr, pdu->isp_hdrlen);
2730 	}
2731 	if (pdu->isp_flags & IDM_PDU_ADDL_DATA) {
2732 		kmem_free(pdu->isp_data, pdu->isp_datalen);
2733 	}
2734 	pdu->isp_hdr = (iscsi_hdr_t *)(pdu + 1);
2735 	pdu->isp_hdrlen = sizeof (iscsi_hdr_t);
2736 	pdu->isp_data = NULL;
2737 	pdu->isp_datalen = 0;
2738 	pdu->isp_sorx_buf = 0;
2739 	pdu->isp_callback = idm_sorx_cache_pdu_cb;
2740 	idm_sorx_cache_pdu_cb(pdu, status);
2741 }
2742 
2743 /*
2744  * This thread is only active when I/O is queued for transmit
2745  * because the socket is busy.
2746  */
2747 void
2748 idm_sotx_thread(void *arg)
2749 {
2750 	idm_conn_t	*ic = arg;
2751 	idm_tx_obj_t	*object, *next;
2752 	idm_so_conn_t	*so_conn;
2753 	idm_status_t	status = IDM_STATUS_SUCCESS;
2754 
2755 	idm_conn_hold(ic);
2756 
2757 	mutex_enter(&ic->ic_mutex);
2758 	so_conn = ic->ic_transport_private;
2759 	so_conn->ic_tx_thread_running = B_TRUE;
2760 	so_conn->ic_tx_thread_did = so_conn->ic_tx_thread->t_did;
2761 	cv_signal(&ic->ic_cv);
2762 	mutex_exit(&ic->ic_mutex);
2763 
2764 	mutex_enter(&so_conn->ic_tx_mutex);
2765 
2766 	while (so_conn->ic_tx_thread_running) {
2767 		while (list_is_empty(&so_conn->ic_tx_list)) {
2768 			DTRACE_PROBE1(soconn__tx__sleep, idm_conn_t *, ic);
2769 			cv_wait(&so_conn->ic_tx_cv, &so_conn->ic_tx_mutex);
2770 			DTRACE_PROBE1(soconn__tx__wakeup, idm_conn_t *, ic);
2771 
2772 			if (!so_conn->ic_tx_thread_running) {
2773 				goto tx_bail;
2774 			}
2775 		}
2776 
2777 		object = (idm_tx_obj_t *)list_head(&so_conn->ic_tx_list);
2778 		list_remove(&so_conn->ic_tx_list, object);
2779 		mutex_exit(&so_conn->ic_tx_mutex);
2780 
2781 		switch (object->idm_tx_obj_magic) {
2782 		case IDM_PDU_MAGIC: {
2783 			idm_pdu_t *pdu = (idm_pdu_t *)object;
2784 			DTRACE_PROBE2(soconn__tx__pdu, idm_conn_t *, ic,
2785 			    idm_pdu_t *, (idm_pdu_t *)object);
2786 
2787 			if (pdu->isp_flags & IDM_PDU_SET_STATSN) {
2788 				/* No IDM task */
2789 				(ic->ic_conn_ops.icb_update_statsn)(NULL, pdu);
2790 			}
2791 			status = idm_i_so_tx((idm_pdu_t *)object);
2792 			break;
2793 		}
2794 		case IDM_BUF_MAGIC: {
2795 			idm_buf_t *idb = (idm_buf_t *)object;
2796 			idm_task_t *idt = idb->idb_task_binding;
2797 
2798 			DTRACE_PROBE2(soconn__tx__buf, idm_conn_t *, ic,
2799 			    idm_buf_t *, idb);
2800 
2801 			mutex_enter(&idt->idt_mutex);
2802 			status = idm_so_send_buf_region(idt,
2803 			    idb, 0, idb->idb_xfer_len);
2804 
2805 			/*
2806 			 * TX thread owns the buffer so we expect it to
2807 			 * be "in transport"
2808 			 */
2809 			ASSERT(idb->idb_in_transport);
2810 			if (IDM_CONN_ISTGT(ic)) {
2811 				/*
2812 				 * idm_buf_tx_to_ini_done releases
2813 				 * idt->idt_mutex
2814 				 */
2815 				DTRACE_ISCSI_8(xfer__done,
2816 				    idm_conn_t *, idt->idt_ic,
2817 				    uintptr_t, idb->idb_buf,
2818 				    uint32_t, idb->idb_bufoffset,
2819 				    uint64_t, 0, uint32_t, 0, uint32_t, 0,
2820 				    uint32_t, idb->idb_xfer_len,
2821 				    int, XFER_BUF_TX_TO_INI);
2822 				idm_buf_tx_to_ini_done(idt, idb, status);
2823 			} else {
2824 				idm_so_send_rtt_data_done(idt, idb);
2825 				mutex_exit(&idt->idt_mutex);
2826 			}
2827 			break;
2828 		}
2829 
2830 		default:
2831 			IDM_CONN_LOG(CE_WARN, "idm_sotx_thread: Unknown magic "
2832 			    "(0x%08x)", object->idm_tx_obj_magic);
2833 			status = IDM_STATUS_FAIL;
2834 		}
2835 
2836 		mutex_enter(&so_conn->ic_tx_mutex);
2837 
2838 		if (status != IDM_STATUS_SUCCESS) {
2839 			so_conn->ic_tx_thread_running = B_FALSE;
2840 			idm_conn_event(ic, CE_TRANSPORT_FAIL, status);
2841 		}
2842 	}
2843 
2844 	/*
2845 	 * Before we leave, we need to abort every item remaining in the
2846 	 * TX list.
2847 	 */
2848 
2849 tx_bail:
2850 	object = (idm_tx_obj_t *)list_head(&so_conn->ic_tx_list);
2851 
2852 	while (object != NULL) {
2853 		next = list_next(&so_conn->ic_tx_list, object);
2854 
2855 		list_remove(&so_conn->ic_tx_list, object);
2856 		switch (object->idm_tx_obj_magic) {
2857 		case IDM_PDU_MAGIC:
2858 			idm_pdu_complete((idm_pdu_t *)object,
2859 			    IDM_STATUS_ABORTED);
2860 			break;
2861 
2862 		case IDM_BUF_MAGIC: {
2863 			idm_buf_t *idb = (idm_buf_t *)object;
2864 			idm_task_t *idt = idb->idb_task_binding;
2865 			mutex_exit(&so_conn->ic_tx_mutex);
2866 			mutex_enter(&idt->idt_mutex);
2867 			/*
2868 			 * TX thread owns the buffer so we expect it to
2869 			 * be "in transport"
2870 			 */
2871 			ASSERT(idb->idb_in_transport);
2872 			if (IDM_CONN_ISTGT(ic)) {
2873 				/*
2874 				 * idm_buf_tx_to_ini_done releases
2875 				 * idt->idt_mutex
2876 				 */
2877 				DTRACE_ISCSI_8(xfer__done,
2878 				    idm_conn_t *, idt->idt_ic,
2879 				    uintptr_t, idb->idb_buf,
2880 				    uint32_t, idb->idb_bufoffset,
2881 				    uint64_t, 0, uint32_t, 0, uint32_t, 0,
2882 				    uint32_t, idb->idb_xfer_len,
2883 				    int, XFER_BUF_TX_TO_INI);
2884 				idm_buf_tx_to_ini_done(idt, idb,
2885 				    IDM_STATUS_ABORTED);
2886 			} else {
2887 				idm_so_send_rtt_data_done(idt, idb);
2888 				mutex_exit(&idt->idt_mutex);
2889 			}
2890 			mutex_enter(&so_conn->ic_tx_mutex);
2891 			break;
2892 		}
2893 		default:
2894 			IDM_CONN_LOG(CE_WARN,
2895 			    "idm_sotx_thread: Unexpected magic "
2896 			    "(0x%08x)", object->idm_tx_obj_magic);
2897 		}
2898 
2899 		object = next;
2900 	}
2901 
2902 	mutex_exit(&so_conn->ic_tx_mutex);
2903 	idm_conn_rele(ic);
2904 	thread_exit();
2905 	/*NOTREACHED*/
2906 }
2907 
2908 static void
2909 idm_so_socket_set_nonblock(struct sonode *node)
2910 {
2911 	(void) VOP_SETFL(node->so_vnode, node->so_flag,
2912 	    (node->so_state | FNONBLOCK), CRED(), NULL);
2913 }
2914 
2915 static void
2916 idm_so_socket_set_block(struct sonode *node)
2917 {
2918 	(void) VOP_SETFL(node->so_vnode, node->so_flag,
2919 	    (node->so_state & (~FNONBLOCK)), CRED(), NULL);
2920 }
2921 
2922 
2923 /*
2924  * Called by kernel sockets when the connection has been accepted or
2925  * rejected. In early volo, a "disconnect" callback was sent instead of
2926  * "connectfailed", so we check for both.
2927  */
2928 /* ARGSUSED */
2929 void
2930 idm_so_timed_socket_connect_cb(ksocket_t ks,
2931     ksocket_callback_event_t ev, void *arg, uintptr_t info)
2932 {
2933 	idm_so_timed_socket_t	*itp = arg;
2934 	ASSERT(itp != NULL);
2935 	ASSERT(ev == KSOCKET_EV_CONNECTED ||
2936 	    ev == KSOCKET_EV_CONNECTFAILED ||
2937 	    ev == KSOCKET_EV_DISCONNECTED);
2938 
2939 	mutex_enter(&idm_so_timed_socket_mutex);
2940 	itp->it_callback_called = B_TRUE;
2941 	if (ev == KSOCKET_EV_CONNECTED) {
2942 		itp->it_socket_error_code = 0;
2943 	} else {
2944 		/* Make sure the error code is non-zero on error */
2945 		if (info == 0)
2946 			info = ECONNRESET;
2947 		itp->it_socket_error_code = (int)info;
2948 	}
2949 	cv_signal(&itp->it_cv);
2950 	mutex_exit(&idm_so_timed_socket_mutex);
2951 }
2952 
2953 int
2954 idm_so_timed_socket_connect(ksocket_t ks,
2955     struct sockaddr_storage *sa, int sa_sz, int login_max_usec)
2956 {
2957 	clock_t			conn_login_max;
2958 	int			rc, nonblocking, rval;
2959 	idm_so_timed_socket_t	it;
2960 	ksocket_callbacks_t	ks_cb;
2961 
2962 	conn_login_max = ddi_get_lbolt() + drv_usectohz(login_max_usec);
2963 
2964 	/*
2965 	 * Set to non-block socket mode, with callback on connect
2966 	 * Early volo used "disconnected" instead of "connectfailed",
2967 	 * so set callback to look for both.
2968 	 */
2969 	bzero(&it, sizeof (it));
2970 	ks_cb.ksock_cb_flags = KSOCKET_CB_CONNECTED |
2971 	    KSOCKET_CB_CONNECTFAILED | KSOCKET_CB_DISCONNECTED;
2972 	ks_cb.ksock_cb_connected = idm_so_timed_socket_connect_cb;
2973 	ks_cb.ksock_cb_connectfailed = idm_so_timed_socket_connect_cb;
2974 	ks_cb.ksock_cb_disconnected = idm_so_timed_socket_connect_cb;
2975 	cv_init(&it.it_cv, NULL, CV_DEFAULT, NULL);
2976 	rc = ksocket_setcallbacks(ks, &ks_cb, &it, CRED());
2977 	if (rc != 0)
2978 		return (rc);
2979 
2980 	/* Set to non-blocking mode */
2981 	nonblocking = 1;
2982 	rc = ksocket_ioctl(ks, FIONBIO, (intptr_t)&nonblocking, &rval,
2983 	    CRED());
2984 	if (rc != 0)
2985 		goto cleanup;
2986 
2987 	bzero(&it, sizeof (it));
2988 	for (;;) {
2989 		/*
2990 		 * Warning -- in a loopback scenario, the call to
2991 		 * the connect_cb can occur inside the call to
2992 		 * ksocket_connect. Do not hold the mutex around the
2993 		 * call to ksocket_connect.
2994 		 */
2995 		rc = ksocket_connect(ks, (struct sockaddr *)sa, sa_sz, CRED());
2996 		if (rc == 0 || rc == EISCONN) {
2997 			/* socket success or already success */
2998 			rc = 0;
2999 			break;
3000 		}
3001 		if ((rc != EINPROGRESS) && (rc != EALREADY)) {
3002 			break;
3003 		}
3004 
3005 		/* TCP connect still in progress. See if out of time. */
3006 		if (ddi_get_lbolt() > conn_login_max) {
3007 			/*
3008 			 * Connection retry timeout,
3009 			 * failed connect to target.
3010 			 */
3011 			rc = ETIMEDOUT;
3012 			break;
3013 		}
3014 
3015 		/*
3016 		 * TCP connect still in progress.  Sleep until callback.
3017 		 * Do NOT go to sleep if the callback already occurred!
3018 		 */
3019 		mutex_enter(&idm_so_timed_socket_mutex);
3020 		if (!it.it_callback_called) {
3021 			(void) cv_timedwait(&it.it_cv,
3022 			    &idm_so_timed_socket_mutex, conn_login_max);
3023 		}
3024 		if (it.it_callback_called) {
3025 			rc = it.it_socket_error_code;
3026 			mutex_exit(&idm_so_timed_socket_mutex);
3027 			break;
3028 		}
3029 		/* If timer expires, go call ksocket_connect one last time. */
3030 		mutex_exit(&idm_so_timed_socket_mutex);
3031 	}
3032 
3033 	/* resume blocking mode */
3034 	nonblocking = 0;
3035 	(void) ksocket_ioctl(ks, FIONBIO, (intptr_t)&nonblocking, &rval,
3036 	    CRED());
3037 cleanup:
3038 	(void) ksocket_setcallbacks(ks, NULL, NULL, CRED());
3039 	cv_destroy(&it.it_cv);
3040 	if (rc != 0) {
3041 		idm_soshutdown(ks);
3042 	}
3043 	return (rc);
3044 }
3045 
3046 
3047 void
3048 idm_addr_to_sa(idm_addr_t *dportal, struct sockaddr_storage *sa)
3049 {
3050 	int			dp_addr_size;
3051 	struct sockaddr_in	*sin;
3052 	struct sockaddr_in6	*sin6;
3053 
3054 	/* Build sockaddr_storage for this portal (idm_addr_t) */
3055 	bzero(sa, sizeof (*sa));
3056 	dp_addr_size = dportal->a_addr.i_insize;
3057 	if (dp_addr_size == sizeof (struct in_addr)) {
3058 		/* IPv4 */
3059 		sa->ss_family = AF_INET;
3060 		sin = (struct sockaddr_in *)sa;
3061 		sin->sin_port = htons(dportal->a_port);
3062 		bcopy(&dportal->a_addr.i_addr.in4,
3063 		    &sin->sin_addr, sizeof (struct in_addr));
3064 	} else if (dp_addr_size == sizeof (struct in6_addr)) {
3065 		/* IPv6 */
3066 		sa->ss_family = AF_INET6;
3067 		sin6 = (struct sockaddr_in6 *)sa;
3068 		sin6->sin6_port = htons(dportal->a_port);
3069 		bcopy(&dportal->a_addr.i_addr.in6,
3070 		    &sin6->sin6_addr, sizeof (struct in6_addr));
3071 	} else {
3072 		ASSERT(0);
3073 	}
3074 }
3075 
3076 
3077 /*
3078  * return a human-readable form of a sockaddr_storage, in the form
3079  * [ip-address]:port.  This is used in calls to logging functions.
3080  * If several calls to idm_sa_ntop are made within the same invocation
3081  * of a logging function, then each one needs its own buf.
3082  */
3083 const char *
3084 idm_sa_ntop(const struct sockaddr_storage *sa,
3085     char *buf, size_t size)
3086 {
3087 	static const char bogus_ip[] = "[0].-1";
3088 	char tmp[INET6_ADDRSTRLEN];
3089 
3090 	switch (sa->ss_family) {
3091 	case AF_INET6:
3092 		{
3093 			const struct sockaddr_in6 *in6 =
3094 			    (const struct sockaddr_in6 *) sa;
3095 
3096 			if (inet_ntop(in6->sin6_family,
3097 			    &in6->sin6_addr, tmp, sizeof (tmp)) == NULL) {
3098 				goto err;
3099 			}
3100 			if (strlen(tmp) + sizeof ("[].65535") > size) {
3101 				goto err;
3102 			}
3103 			/* struct sockaddr_storage gets port info from v4 loc */
3104 			(void) snprintf(buf, size, "[%s].%u", tmp,
3105 			    ntohs(in6->sin6_port));
3106 			return (buf);
3107 		}
3108 	case AF_INET:
3109 		{
3110 			const struct sockaddr_in *in =
3111 			    (const struct sockaddr_in *) sa;
3112 
3113 			if (inet_ntop(in->sin_family, &in->sin_addr,
3114 			    tmp, sizeof (tmp)) == NULL) {
3115 				goto err;
3116 			}
3117 			if (strlen(tmp) + sizeof ("[].65535") > size) {
3118 				goto err;
3119 			}
3120 			(void) snprintf(buf, size,  "[%s].%u", tmp,
3121 			    ntohs(in->sin_port));
3122 			return (buf);
3123 		}
3124 	default:
3125 		break;
3126 	}
3127 err:
3128 	(void) snprintf(buf, size, "%s", bogus_ip);
3129 	return (buf);
3130 }
3131