xref: /illumos-gate/usr/src/uts/common/io/idm/idm_so.c (revision 56261083)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #include <sys/conf.h>
27 #include <sys/stat.h>
28 #include <sys/file.h>
29 #include <sys/ddi.h>
30 #include <sys/sunddi.h>
31 #include <sys/modctl.h>
32 #include <sys/priv.h>
33 #include <sys/cpuvar.h>
34 #include <sys/socket.h>
35 #include <sys/strsubr.h>
36 #include <sys/sysmacros.h>
37 #include <sys/sdt.h>
38 #include <netinet/tcp.h>
39 #include <inet/tcp.h>
40 #include <sys/socketvar.h>
41 #include <sys/pathname.h>
42 #include <sys/fs/snode.h>
43 #include <sys/fs/dv_node.h>
44 #include <sys/vnode.h>
45 #include <netinet/in.h>
46 #include <net/if.h>
47 #include <sys/sockio.h>
48 #include <sys/ksocket.h>
49 #include <sys/iscsi_protocol.h>
50 #include <sys/idm/idm.h>
51 #include <sys/idm/idm_so.h>
52 #include <sys/idm/idm_text.h>
53 
54 #define	IN_PROGRESS_DELAY	1
55 
56 /*
57  * in6addr_any is currently all zeroes, but use the macro in case this
58  * ever changes.
59  */
60 static const struct in6_addr in6addr_any = IN6ADDR_ANY_INIT;
61 
62 static void idm_sorx_cache_pdu_cb(idm_pdu_t *pdu, idm_status_t status);
63 static void idm_sorx_addl_pdu_cb(idm_pdu_t *pdu, idm_status_t status);
64 static void idm_sotx_cache_pdu_cb(idm_pdu_t *pdu, idm_status_t status);
65 
66 static idm_status_t idm_so_conn_create_common(idm_conn_t *ic, ksocket_t new_so);
67 static void idm_so_conn_destroy_common(idm_conn_t *ic);
68 static void idm_so_conn_connect_common(idm_conn_t *ic);
69 
70 static void idm_set_ini_preconnect_options(idm_so_conn_t *sc);
71 static void idm_set_ini_postconnect_options(idm_so_conn_t *sc);
72 static void idm_set_tgt_connect_options(ksocket_t so);
73 static idm_status_t idm_i_so_tx(idm_pdu_t *pdu);
74 
75 static idm_status_t idm_sorecvdata(idm_conn_t *ic, idm_pdu_t *pdu);
76 static void idm_so_send_rtt_data(idm_conn_t *ic, idm_task_t *idt,
77     idm_buf_t *idb, uint32_t offset, uint32_t length);
78 static void idm_so_send_rtt_data_done(idm_task_t *idt, idm_buf_t *idb);
79 static idm_status_t idm_so_send_buf_region(idm_task_t *idt,
80     idm_buf_t *idb, uint32_t buf_region_offset, uint32_t buf_region_length);
81 
82 static uint32_t idm_fill_iov(idm_pdu_t *pdu, idm_buf_t *idb,
83     uint32_t ro, uint32_t dlength);
84 
85 static idm_status_t idm_so_handle_digest(idm_conn_t *it,
86     nvpair_t *digest_choice, const idm_kv_xlate_t *ikvx);
87 
88 static void idm_so_socket_set_nonblock(struct sonode *node);
89 static void idm_so_socket_set_block(struct sonode *node);
90 
91 /*
92  * Transport ops prototypes
93  */
94 static void idm_so_tx(idm_conn_t *ic, idm_pdu_t *pdu);
95 static idm_status_t idm_so_buf_tx_to_ini(idm_task_t *idt, idm_buf_t *idb);
96 static idm_status_t idm_so_buf_rx_from_ini(idm_task_t *idt, idm_buf_t *idb);
97 static void idm_so_rx_datain(idm_conn_t *ic, idm_pdu_t *pdu);
98 static void idm_so_rx_rtt(idm_conn_t *ic, idm_pdu_t *pdu);
99 static void idm_so_rx_dataout(idm_conn_t *ic, idm_pdu_t *pdu);
100 static idm_status_t idm_so_free_task_rsrc(idm_task_t *idt);
101 static kv_status_t idm_so_negotiate_key_values(idm_conn_t *it,
102     nvlist_t *request_nvl, nvlist_t *response_nvl, nvlist_t *negotiated_nvl);
103 static void idm_so_notice_key_values(idm_conn_t *it,
104     nvlist_t *negotiated_nvl);
105 static kv_status_t idm_so_declare_key_values(idm_conn_t *it,
106     nvlist_t *config_nvl, nvlist_t *outgoing_nvl);
107 static boolean_t idm_so_conn_is_capable(idm_conn_req_t *ic,
108     idm_transport_caps_t *caps);
109 static idm_status_t idm_so_buf_alloc(idm_buf_t *idb, uint64_t buflen);
110 static void idm_so_buf_free(idm_buf_t *idb);
111 static idm_status_t idm_so_buf_setup(idm_buf_t *idb);
112 static void idm_so_buf_teardown(idm_buf_t *idb);
113 static idm_status_t idm_so_tgt_svc_create(idm_svc_req_t *sr, idm_svc_t *is);
114 static void idm_so_tgt_svc_destroy(idm_svc_t *is);
115 static idm_status_t idm_so_tgt_svc_online(idm_svc_t *is);
116 static void idm_so_tgt_svc_offline(idm_svc_t *is);
117 static void idm_so_tgt_conn_destroy(idm_conn_t *ic);
118 static idm_status_t idm_so_tgt_conn_connect(idm_conn_t *ic);
119 static void idm_so_conn_disconnect(idm_conn_t *ic);
120 static idm_status_t idm_so_ini_conn_create(idm_conn_req_t *cr, idm_conn_t *ic);
121 static void idm_so_ini_conn_destroy(idm_conn_t *ic);
122 static idm_status_t idm_so_ini_conn_connect(idm_conn_t *ic);
123 
124 /*
125  * IDM Native Sockets transport operations
126  */
127 static
128 idm_transport_ops_t idm_so_transport_ops = {
129 	idm_so_tx,			/* it_tx_pdu */
130 	idm_so_buf_tx_to_ini,		/* it_buf_tx_to_ini */
131 	idm_so_buf_rx_from_ini,		/* it_buf_rx_from_ini */
132 	idm_so_rx_datain,		/* it_rx_datain */
133 	idm_so_rx_rtt,			/* it_rx_rtt */
134 	idm_so_rx_dataout,		/* it_rx_dataout */
135 	NULL,				/* it_alloc_conn_rsrc */
136 	NULL,				/* it_free_conn_rsrc */
137 	NULL,				/* it_tgt_enable_datamover */
138 	NULL,				/* it_ini_enable_datamover */
139 	NULL,				/* it_conn_terminate */
140 	idm_so_free_task_rsrc,		/* it_free_task_rsrc */
141 	idm_so_negotiate_key_values,	/* it_negotiate_key_values */
142 	idm_so_notice_key_values,	/* it_notice_key_values */
143 	idm_so_conn_is_capable,		/* it_conn_is_capable */
144 	idm_so_buf_alloc,		/* it_buf_alloc */
145 	idm_so_buf_free,		/* it_buf_free */
146 	idm_so_buf_setup,		/* it_buf_setup */
147 	idm_so_buf_teardown,		/* it_buf_teardown */
148 	idm_so_tgt_svc_create,		/* it_tgt_svc_create */
149 	idm_so_tgt_svc_destroy,		/* it_tgt_svc_destroy */
150 	idm_so_tgt_svc_online,		/* it_tgt_svc_online */
151 	idm_so_tgt_svc_offline,		/* it_tgt_svc_offline */
152 	idm_so_tgt_conn_destroy,	/* it_tgt_conn_destroy */
153 	idm_so_tgt_conn_connect,	/* it_tgt_conn_connect */
154 	idm_so_conn_disconnect,		/* it_tgt_conn_disconnect */
155 	idm_so_ini_conn_create,		/* it_ini_conn_create */
156 	idm_so_ini_conn_destroy,	/* it_ini_conn_destroy */
157 	idm_so_ini_conn_connect,	/* it_ini_conn_connect */
158 	idm_so_conn_disconnect,		/* it_ini_conn_disconnect */
159 	idm_so_declare_key_values	/* it_declare_key_values */
160 };
161 
162 /*
163  * idm_so_init()
164  * Sockets transport initialization
165  */
166 void
167 idm_so_init(idm_transport_t *it)
168 {
169 	/* Cache for IDM Data and R2T Transmit PDU's */
170 	idm.idm_sotx_pdu_cache = kmem_cache_create("idm_tx_pdu_cache",
171 	    sizeof (idm_pdu_t) + sizeof (iscsi_hdr_t), 8,
172 	    &idm_sotx_pdu_constructor, NULL, NULL, NULL, NULL, KM_SLEEP);
173 
174 	/* Cache for IDM Receive PDU's */
175 	idm.idm_sorx_pdu_cache = kmem_cache_create("idm_rx_pdu_cache",
176 	    sizeof (idm_pdu_t) + IDM_SORX_CACHE_HDRLEN, 8,
177 	    &idm_sorx_pdu_constructor, NULL, NULL, NULL, NULL, KM_SLEEP);
178 
179 	/* 128k buffer cache */
180 	idm.idm_so_128k_buf_cache = kmem_cache_create("idm_128k_buf_cache",
181 	    IDM_SO_BUF_CACHE_UB, 8, NULL, NULL, NULL, NULL, NULL, KM_SLEEP);
182 
183 	/* Set the sockets transport ops */
184 	it->it_ops = &idm_so_transport_ops;
185 }
186 
187 /*
188  * idm_so_fini()
189  * Sockets transport teardown
190  */
191 void
192 idm_so_fini(void)
193 {
194 	kmem_cache_destroy(idm.idm_so_128k_buf_cache);
195 	kmem_cache_destroy(idm.idm_sotx_pdu_cache);
196 	kmem_cache_destroy(idm.idm_sorx_pdu_cache);
197 }
198 
199 ksocket_t
200 idm_socreate(int domain, int type, int protocol)
201 {
202 	ksocket_t ks;
203 
204 	if (!ksocket_socket(&ks, domain, type, protocol, KSOCKET_NOSLEEP,
205 	    CRED())) {
206 		return (ks);
207 	} else {
208 		return (NULL);
209 	}
210 }
211 
212 /*
213  * idm_soshutdown will disconnect the socket and prevent subsequent PDU
214  * reception and transmission.  The sonode still exists but its state
215  * gets modified to indicate it is no longer connected.  Calls to
216  * idm_sorecv/idm_iov_sorecv will return so idm_soshutdown can be used
217  * regain control of a thread stuck in idm_sorecv.
218  */
219 void
220 idm_soshutdown(ksocket_t so)
221 {
222 	(void) ksocket_shutdown(so, SHUT_RDWR, CRED());
223 }
224 
225 /*
226  * idm_sodestroy releases all resources associated with a socket previously
227  * created with idm_socreate.  The socket must be shutdown using
228  * idm_soshutdown before the socket is destroyed with idm_sodestroy,
229  * otherwise undefined behavior will result.
230  */
231 void
232 idm_sodestroy(ksocket_t ks)
233 {
234 	(void) ksocket_close(ks, CRED());
235 }
236 
237 /*
238  * Function to compare two addresses in sockaddr_storage format
239  */
240 
241 int
242 idm_ss_compare(const struct sockaddr_storage *cmp_ss1,
243     const struct sockaddr_storage *cmp_ss2,
244     boolean_t v4_mapped_as_v4)
245 {
246 	struct sockaddr_storage			mapped_v4_ss1, mapped_v4_ss2;
247 	const struct sockaddr_storage		*ss1, *ss2;
248 	struct in_addr				*in1, *in2;
249 	struct in6_addr				*in61, *in62;
250 	int i;
251 
252 	/*
253 	 * Normalize V4-mapped IPv6 addresses into V4 format if
254 	 * v4_mapped_as_v4 is B_TRUE.
255 	 */
256 	ss1 = cmp_ss1;
257 	ss2 = cmp_ss2;
258 	if (v4_mapped_as_v4 && (ss1->ss_family == AF_INET6)) {
259 		in61 = &((struct sockaddr_in6 *)ss1)->sin6_addr;
260 		if (IN6_IS_ADDR_V4MAPPED(in61)) {
261 			bzero(&mapped_v4_ss1, sizeof (mapped_v4_ss1));
262 			mapped_v4_ss1.ss_family = AF_INET;
263 			((struct sockaddr_in *)&mapped_v4_ss1)->sin_port =
264 			    ((struct sockaddr_in *)ss1)->sin_port;
265 			IN6_V4MAPPED_TO_INADDR(in61,
266 			    &((struct sockaddr_in *)&mapped_v4_ss1)->sin_addr);
267 			ss1 = &mapped_v4_ss1;
268 		}
269 	}
270 	ss2 = cmp_ss2;
271 	if (v4_mapped_as_v4 && (ss2->ss_family == AF_INET6)) {
272 		in62 = &((struct sockaddr_in6 *)ss2)->sin6_addr;
273 		if (IN6_IS_ADDR_V4MAPPED(in62)) {
274 			bzero(&mapped_v4_ss2, sizeof (mapped_v4_ss2));
275 			mapped_v4_ss2.ss_family = AF_INET;
276 			((struct sockaddr_in *)&mapped_v4_ss2)->sin_port =
277 			    ((struct sockaddr_in *)ss2)->sin_port;
278 			IN6_V4MAPPED_TO_INADDR(in62,
279 			    &((struct sockaddr_in *)&mapped_v4_ss2)->sin_addr);
280 			ss2 = &mapped_v4_ss2;
281 		}
282 	}
283 
284 	/*
285 	 * Compare ports, then address family, then ip address
286 	 */
287 	if (((struct sockaddr_in *)ss1)->sin_port !=
288 	    ((struct sockaddr_in *)ss2)->sin_port) {
289 		if (((struct sockaddr_in *)ss1)->sin_port >
290 		    ((struct sockaddr_in *)ss2)->sin_port)
291 			return (1);
292 		else
293 			return (-1);
294 	}
295 
296 	/*
297 	 * ports are the same
298 	 */
299 	if (ss1->ss_family != ss2->ss_family) {
300 		if (ss1->ss_family == AF_INET)
301 			return (1);
302 		else
303 			return (-1);
304 	}
305 
306 	/*
307 	 * address families are the same
308 	 */
309 	if (ss1->ss_family == AF_INET) {
310 		in1 = &((struct sockaddr_in *)ss1)->sin_addr;
311 		in2 = &((struct sockaddr_in *)ss2)->sin_addr;
312 
313 		if (in1->s_addr > in2->s_addr)
314 			return (1);
315 		else if (in1->s_addr < in2->s_addr)
316 			return (-1);
317 		else
318 			return (0);
319 	} else if (ss1->ss_family == AF_INET6) {
320 		in61 = &((struct sockaddr_in6 *)ss1)->sin6_addr;
321 		in62 = &((struct sockaddr_in6 *)ss2)->sin6_addr;
322 
323 		for (i = 0; i < 4; i++) {
324 			if (in61->s6_addr32[i] > in62->s6_addr32[i])
325 				return (1);
326 			else if (in61->s6_addr32[i] < in62->s6_addr32[i])
327 				return (-1);
328 		}
329 		return (0);
330 	}
331 
332 	return (1);
333 }
334 
335 /*
336  * IP address filter functions to flag addresses that should not
337  * go out to initiators through discovery.
338  */
339 static boolean_t
340 idm_v4_addr_okay(struct in_addr *in_addr)
341 {
342 	in_addr_t addr = ntohl(in_addr->s_addr);
343 
344 	if ((INADDR_NONE == addr) ||
345 	    (IN_MULTICAST(addr)) ||
346 	    ((addr >> IN_CLASSA_NSHIFT) == 0) ||
347 	    ((addr >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET)) {
348 		return (B_FALSE);
349 	}
350 	return (B_TRUE);
351 }
352 
353 static boolean_t
354 idm_v6_addr_okay(struct in6_addr *addr6)
355 {
356 
357 	if ((IN6_IS_ADDR_UNSPECIFIED(addr6)) ||
358 	    (IN6_IS_ADDR_LOOPBACK(addr6)) ||
359 	    (IN6_IS_ADDR_MULTICAST(addr6)) ||
360 	    (IN6_IS_ADDR_V4MAPPED(addr6)) ||
361 	    (IN6_IS_ADDR_V4COMPAT(addr6)) ||
362 	    (IN6_IS_ADDR_LINKLOCAL(addr6))) {
363 		return (B_FALSE);
364 	}
365 	return (B_TRUE);
366 }
367 
368 /*
369  * idm_get_ipaddr will retrieve a list of IP Addresses which the host is
370  * configured with by sending down a sequence of kernel ioctl to IP STREAMS.
371  */
372 int
373 idm_get_ipaddr(idm_addr_list_t **ipaddr_p)
374 {
375 	ksocket_t 		so4, so6;
376 	struct lifnum		lifn;
377 	struct lifconf		lifc;
378 	struct lifreq		*lp;
379 	int			rval;
380 	int			numifs;
381 	int			bufsize;
382 	void			*buf;
383 	int			i, j, n, rc;
384 	struct sockaddr_storage	ss;
385 	struct sockaddr_in	*sin;
386 	struct sockaddr_in6	*sin6;
387 	idm_addr_t		*ip;
388 	idm_addr_list_t		*ipaddr;
389 	int			size_ipaddr;
390 
391 	*ipaddr_p = NULL;
392 	size_ipaddr = 0;
393 	buf = NULL;
394 
395 	/* create an ipv4 and ipv6 UDP socket */
396 	if ((so6 = idm_socreate(PF_INET6, SOCK_DGRAM, 0)) == NULL)
397 		return (0);
398 	if ((so4 = idm_socreate(PF_INET, SOCK_DGRAM, 0)) == NULL) {
399 		idm_sodestroy(so6);
400 		return (0);
401 	}
402 
403 
404 retry_count:
405 	/* snapshot the current number of interfaces */
406 	lifn.lifn_family = PF_UNSPEC;
407 	lifn.lifn_flags = LIFC_NOXMIT | LIFC_TEMPORARY | LIFC_ALLZONES;
408 	lifn.lifn_count = 0;
409 	/* use vp6 for ioctls with unspecified families by default */
410 	if (ksocket_ioctl(so6, SIOCGLIFNUM, (intptr_t)&lifn, &rval, CRED())
411 	    != 0) {
412 		goto cleanup;
413 	}
414 
415 	numifs = lifn.lifn_count;
416 	if (numifs <= 0) {
417 		goto cleanup;
418 	}
419 
420 	/* allocate extra room in case more interfaces appear */
421 	numifs += 10;
422 
423 	/* get the interface names and ip addresses */
424 	bufsize = numifs * sizeof (struct lifreq);
425 	buf = kmem_alloc(bufsize, KM_SLEEP);
426 
427 	lifc.lifc_family = AF_UNSPEC;
428 	lifc.lifc_flags = LIFC_NOXMIT | LIFC_TEMPORARY | LIFC_ALLZONES;
429 	lifc.lifc_len = bufsize;
430 	lifc.lifc_buf = buf;
431 	rc = ksocket_ioctl(so6, SIOCGLIFCONF, (intptr_t)&lifc, &rval, CRED());
432 	if (rc != 0) {
433 		goto cleanup;
434 	}
435 	/* if our extra room is used up, try again */
436 	if (bufsize <= lifc.lifc_len) {
437 		kmem_free(buf, bufsize);
438 		buf = NULL;
439 		goto retry_count;
440 	}
441 	/* calc actual number of ifconfs */
442 	n = lifc.lifc_len / sizeof (struct lifreq);
443 
444 	/* get ip address */
445 	if (n > 0) {
446 		size_ipaddr = sizeof (idm_addr_list_t) +
447 		    (n - 1) * sizeof (idm_addr_t);
448 		ipaddr = kmem_zalloc(size_ipaddr, KM_SLEEP);
449 	} else {
450 		goto cleanup;
451 	}
452 
453 	/*
454 	 * Examine the array of interfaces and filter uninteresting ones
455 	 */
456 	for (i = 0, j = 0, lp = lifc.lifc_req; i < n; i++, lp++) {
457 
458 		/*
459 		 * Copy the address as the SIOCGLIFFLAGS ioctl is destructive
460 		 */
461 		ss = lp->lifr_addr;
462 		/*
463 		 * fetch the flags using the socket of the correct family
464 		 */
465 		switch (ss.ss_family) {
466 		case AF_INET:
467 			rc = ksocket_ioctl(so4, SIOCGLIFFLAGS, (intptr_t)lp,
468 			    &rval, CRED());
469 			break;
470 		case AF_INET6:
471 			rc = ksocket_ioctl(so6, SIOCGLIFFLAGS, (intptr_t)lp,
472 			    &rval, CRED());
473 			break;
474 		default:
475 			continue;
476 		}
477 		if (rc == 0) {
478 			/*
479 			 * If we got the flags, skip uninteresting
480 			 * interfaces based on flags
481 			 */
482 			if ((lp->lifr_flags & IFF_UP) != IFF_UP)
483 				continue;
484 			if (lp->lifr_flags &
485 			    (IFF_ANYCAST|IFF_NOLOCAL|IFF_DEPRECATED))
486 				continue;
487 		}
488 
489 		/* save ip address */
490 		ip = &ipaddr->al_addrs[j];
491 		switch (ss.ss_family) {
492 		case AF_INET:
493 			sin = (struct sockaddr_in *)&ss;
494 			if (!idm_v4_addr_okay(&sin->sin_addr))
495 				continue;
496 			ip->a_addr.i_addr.in4 = sin->sin_addr;
497 			ip->a_addr.i_insize = sizeof (struct in_addr);
498 			break;
499 		case AF_INET6:
500 			sin6 = (struct sockaddr_in6 *)&ss;
501 			if (!idm_v6_addr_okay(&sin6->sin6_addr))
502 				continue;
503 			ip->a_addr.i_addr.in6 = sin6->sin6_addr;
504 			ip->a_addr.i_insize = sizeof (struct in6_addr);
505 			break;
506 		default:
507 			continue;
508 		}
509 		j++;
510 	}
511 
512 	if (j == 0) {
513 		/* no valid ifaddr */
514 		kmem_free(ipaddr, size_ipaddr);
515 		size_ipaddr = 0;
516 		ipaddr = NULL;
517 	} else {
518 		ipaddr->al_out_cnt = j;
519 	}
520 
521 
522 cleanup:
523 	idm_sodestroy(so6);
524 	idm_sodestroy(so4);
525 
526 	if (buf != NULL)
527 		kmem_free(buf, bufsize);
528 
529 	*ipaddr_p = ipaddr;
530 	return (size_ipaddr);
531 }
532 
533 int
534 idm_sorecv(ksocket_t so, void *msg, size_t len)
535 {
536 	iovec_t iov;
537 
538 	ASSERT(so != NULL);
539 	ASSERT(len != 0);
540 
541 	/*
542 	 * Fill in iovec and receive data
543 	 */
544 	iov.iov_base = msg;
545 	iov.iov_len = len;
546 
547 	return (idm_iov_sorecv(so, &iov, 1, len));
548 }
549 
550 /*
551  * idm_sosendto - Sends a buffered data on a non-connected socket.
552  *
553  * This function puts the data provided on the wire by calling sosendmsg.
554  * It will return only when all the data has been sent or if an error
555  * occurs.
556  *
557  * Returns 0 for success, the socket errno value if sosendmsg fails, and
558  * -1 if sosendmsg returns success but uio_resid != 0
559  */
560 int
561 idm_sosendto(ksocket_t so, void *buff, size_t len,
562     struct sockaddr *name, socklen_t namelen)
563 {
564 	struct msghdr		msg;
565 	struct iovec		iov[1];
566 	int			error;
567 	size_t			sent = 0;
568 
569 	iov[0].iov_base	= buff;
570 	iov[0].iov_len	= len;
571 
572 	/* Initialization of the message header. */
573 	bzero(&msg, sizeof (msg));
574 	msg.msg_iov	= iov;
575 	msg.msg_iovlen	= 1;
576 	msg.msg_name	= name;
577 	msg.msg_namelen	= namelen;
578 
579 	if ((error = ksocket_sendmsg(so, &msg, 0, &sent, CRED())) == 0) {
580 		/* Data sent */
581 		if (sent == len) {
582 			/* All data sent.  Success. */
583 			return (0);
584 		} else {
585 			/* Not all data was sent.  Failure */
586 			return (-1);
587 		}
588 	}
589 
590 	/* Send failed */
591 	return (error);
592 }
593 
594 /*
595  * idm_iov_sosend - Sends an iovec on a connection.
596  *
597  * This function puts the data provided on the wire by calling sosendmsg.
598  * It will return only when all the data has been sent or if an error
599  * occurs.
600  *
601  * Returns 0 for success, the socket errno value if sosendmsg fails, and
602  * -1 if sosendmsg returns success but uio_resid != 0
603  */
604 int
605 idm_iov_sosend(ksocket_t so, iovec_t *iop, int iovlen, size_t total_len)
606 {
607 	struct msghdr		msg;
608 	int			error;
609 	size_t 			sent = 0;
610 
611 	ASSERT(iop != NULL);
612 
613 	/* Initialization of the message header. */
614 	bzero(&msg, sizeof (msg));
615 	msg.msg_iov	= iop;
616 	msg.msg_iovlen	= iovlen;
617 
618 	if ((error = ksocket_sendmsg(so, &msg, 0, &sent, CRED()))
619 	    == 0) {
620 		/* Data sent */
621 		if (sent == total_len) {
622 			/* All data sent.  Success. */
623 			return (0);
624 		} else {
625 			/* Not all data was sent.  Failure */
626 			return (-1);
627 		}
628 	}
629 
630 	/* Send failed */
631 	return (error);
632 }
633 
634 /*
635  * idm_iov_sorecv - Receives an iovec from a connection
636  *
637  * This function gets the data asked for from the socket.  It will return
638  * only when all the requested data has been retrieved or if an error
639  * occurs.
640  *
641  * Returns 0 for success, the socket errno value if sorecvmsg fails, and
642  * -1 if sorecvmsg returns success but uio_resid != 0
643  */
644 int
645 idm_iov_sorecv(ksocket_t so, iovec_t *iop, int iovlen, size_t total_len)
646 {
647 	struct msghdr		msg;
648 	int			error;
649 	size_t			recv;
650 	int 			flags;
651 
652 	ASSERT(iop != NULL);
653 
654 	/* Initialization of the message header. */
655 	bzero(&msg, sizeof (msg));
656 	msg.msg_iov	= iop;
657 	msg.msg_iovlen	= iovlen;
658 	flags		= MSG_WAITALL;
659 
660 	if ((error = ksocket_recvmsg(so, &msg, flags, &recv, CRED()))
661 	    == 0) {
662 		/* Received data */
663 		if (recv == total_len) {
664 			/* All requested data received.  Success */
665 			return (0);
666 		} else {
667 			/*
668 			 * Not all data was received.  The connection has
669 			 * probably failed.
670 			 */
671 			return (-1);
672 		}
673 	}
674 
675 	/* Receive failed */
676 	return (error);
677 }
678 
679 static void
680 idm_set_ini_preconnect_options(idm_so_conn_t *sc)
681 {
682 	int	conn_abort = 10000;
683 	int	conn_notify = 2000;
684 	int	abort = 30000;
685 
686 	/* Pre-connect socket options */
687 	(void) ksocket_setsockopt(sc->ic_so, IPPROTO_TCP,
688 	    TCP_CONN_NOTIFY_THRESHOLD, (char *)&conn_notify, sizeof (int),
689 	    CRED());
690 	(void) ksocket_setsockopt(sc->ic_so, IPPROTO_TCP,
691 	    TCP_CONN_ABORT_THRESHOLD, (char *)&conn_abort, sizeof (int),
692 	    CRED());
693 	(void) ksocket_setsockopt(sc->ic_so, IPPROTO_TCP, TCP_ABORT_THRESHOLD,
694 	    (char *)&abort, sizeof (int), CRED());
695 }
696 
697 static void
698 idm_set_ini_postconnect_options(idm_so_conn_t *sc)
699 {
700 	int32_t		rcvbuf = IDM_RCVBUF_SIZE;
701 	int32_t		sndbuf = IDM_SNDBUF_SIZE;
702 	const int	on = 1;
703 
704 	/* Set postconnect options */
705 	(void) ksocket_setsockopt(sc->ic_so, IPPROTO_TCP, TCP_NODELAY,
706 	    (char *)&on, sizeof (int), CRED());
707 	(void) ksocket_setsockopt(sc->ic_so, SOL_SOCKET, SO_RCVBUF,
708 	    (char *)&rcvbuf, sizeof (int), CRED());
709 	(void) ksocket_setsockopt(sc->ic_so, SOL_SOCKET, SO_SNDBUF,
710 	    (char *)&sndbuf, sizeof (int), CRED());
711 }
712 
713 static void
714 idm_set_tgt_connect_options(ksocket_t ks)
715 {
716 	int32_t		rcvbuf = IDM_RCVBUF_SIZE;
717 	int32_t		sndbuf = IDM_SNDBUF_SIZE;
718 	const int	on = 1;
719 
720 	/* Set connect options */
721 	(void) ksocket_setsockopt(ks, SOL_SOCKET, SO_RCVBUF,
722 	    (char *)&rcvbuf, sizeof (int), CRED());
723 	(void) ksocket_setsockopt(ks, SOL_SOCKET, SO_SNDBUF,
724 	    (char *)&sndbuf, sizeof (int), CRED());
725 	(void) ksocket_setsockopt(ks, IPPROTO_TCP, TCP_NODELAY,
726 	    (char *)&on, sizeof (on), CRED());
727 }
728 
729 static uint32_t
730 n2h24(const uchar_t *ptr)
731 {
732 	return ((ptr[0] << 16) | (ptr[1] << 8) | ptr[2]);
733 }
734 
735 
736 static idm_status_t
737 idm_sorecvhdr(idm_conn_t *ic, idm_pdu_t *pdu)
738 {
739 	iscsi_hdr_t	*bhs;
740 	uint32_t	hdr_digest_crc;
741 	uint32_t	crc_calculated;
742 	void		*new_hdr;
743 	int		ahslen = 0;
744 	int		total_len = 0;
745 	int		iovlen = 0;
746 	struct iovec	iov[2];
747 	idm_so_conn_t	*so_conn;
748 	int		rc;
749 
750 	so_conn = ic->ic_transport_private;
751 
752 	/*
753 	 * Read BHS
754 	 */
755 	bhs = pdu->isp_hdr;
756 	rc = idm_sorecv(so_conn->ic_so, pdu->isp_hdr, sizeof (iscsi_hdr_t));
757 	if (rc != IDM_STATUS_SUCCESS) {
758 		return (IDM_STATUS_FAIL);
759 	}
760 
761 	/*
762 	 * Check actual AHS length against the amount available in the buffer
763 	 */
764 	pdu->isp_hdrlen = sizeof (iscsi_hdr_t) +
765 	    (bhs->hlength * sizeof (uint32_t));
766 	pdu->isp_datalen = n2h24(bhs->dlength);
767 	if (ic->ic_conn_type == CONN_TYPE_TGT &&
768 	    pdu->isp_datalen > ic->ic_conn_params.max_recv_dataseglen) {
769 		IDM_CONN_LOG(CE_WARN,
770 		    "idm_sorecvhdr: exceeded the max data segment length");
771 		return (IDM_STATUS_FAIL);
772 	}
773 	if (bhs->hlength > IDM_SORX_CACHE_AHSLEN) {
774 		/* Allocate a new header segment and change the callback */
775 		new_hdr = kmem_alloc(pdu->isp_hdrlen, KM_SLEEP);
776 		bcopy(pdu->isp_hdr, new_hdr, sizeof (iscsi_hdr_t));
777 		pdu->isp_hdr = new_hdr;
778 		pdu->isp_flags |= IDM_PDU_ADDL_HDR;
779 
780 		/*
781 		 * This callback will restore the expected values after
782 		 * the RX PDU has been processed.
783 		 */
784 		pdu->isp_callback = idm_sorx_addl_pdu_cb;
785 	}
786 
787 	/*
788 	 * Setup receipt of additional header and header digest (if enabled).
789 	 */
790 	if (bhs->hlength > 0) {
791 		iov[iovlen].iov_base = (caddr_t)(pdu->isp_hdr + 1);
792 		ahslen = pdu->isp_hdrlen - sizeof (iscsi_hdr_t);
793 		iov[iovlen].iov_len = ahslen;
794 		total_len += iov[iovlen].iov_len;
795 		iovlen++;
796 	}
797 
798 	if (ic->ic_conn_flags & IDM_CONN_HEADER_DIGEST) {
799 		iov[iovlen].iov_base = (caddr_t)&hdr_digest_crc;
800 		iov[iovlen].iov_len = sizeof (hdr_digest_crc);
801 		total_len += iov[iovlen].iov_len;
802 		iovlen++;
803 	}
804 
805 	if ((iovlen != 0) &&
806 	    (idm_iov_sorecv(so_conn->ic_so, &iov[0], iovlen,
807 	    total_len) != 0)) {
808 		return (IDM_STATUS_FAIL);
809 	}
810 
811 	/*
812 	 * Validate header digest if enabled
813 	 */
814 	if (ic->ic_conn_flags & IDM_CONN_HEADER_DIGEST) {
815 		crc_calculated = idm_crc32c(pdu->isp_hdr,
816 		    sizeof (iscsi_hdr_t) + ahslen);
817 		if (crc_calculated != hdr_digest_crc) {
818 			/* Invalid Header Digest */
819 			return (IDM_STATUS_HEADER_DIGEST);
820 		}
821 	}
822 
823 	return (0);
824 }
825 
826 /*
827  * idm_so_ini_conn_create()
828  * Allocate the sockets transport connection resources.
829  */
830 static idm_status_t
831 idm_so_ini_conn_create(idm_conn_req_t *cr, idm_conn_t *ic)
832 {
833 	ksocket_t	so;
834 	idm_so_conn_t	*so_conn;
835 	idm_status_t	idmrc;
836 
837 	so = idm_socreate(cr->cr_domain, cr->cr_type,
838 	    cr->cr_protocol);
839 	if (so == NULL) {
840 		return (IDM_STATUS_FAIL);
841 	}
842 
843 	/* Bind the socket if configured to do so */
844 	if (cr->cr_bound) {
845 		if (ksocket_bind(so, &cr->cr_bound_addr.sin,
846 		    SIZEOF_SOCKADDR(&cr->cr_bound_addr.sin), CRED()) != 0) {
847 			idm_sodestroy(so);
848 			return (IDM_STATUS_FAIL);
849 		}
850 	}
851 
852 	idmrc = idm_so_conn_create_common(ic, so);
853 	if (idmrc != IDM_STATUS_SUCCESS) {
854 		idm_soshutdown(so);
855 		idm_sodestroy(so);
856 		return (IDM_STATUS_FAIL);
857 	}
858 
859 	so_conn = ic->ic_transport_private;
860 	/* Set up socket options */
861 	idm_set_ini_preconnect_options(so_conn);
862 
863 	return (IDM_STATUS_SUCCESS);
864 }
865 
866 /*
867  * idm_so_ini_conn_destroy()
868  * Tear down the sockets transport connection resources.
869  */
870 static void
871 idm_so_ini_conn_destroy(idm_conn_t *ic)
872 {
873 	idm_so_conn_destroy_common(ic);
874 }
875 
876 /*
877  * idm_so_ini_conn_connect()
878  * Establish the connection referred to by the handle previously allocated via
879  * idm_so_ini_conn_create().
880  */
881 static idm_status_t
882 idm_so_ini_conn_connect(idm_conn_t *ic)
883 {
884 	idm_so_conn_t	*so_conn;
885 	struct sonode	*node = NULL;
886 	int 		rc;
887 	clock_t		lbolt, conn_login_max, conn_login_interval;
888 	boolean_t	nonblock;
889 
890 	so_conn = ic->ic_transport_private;
891 	nonblock = ic->ic_conn_params.nonblock_socket;
892 	conn_login_max = ic->ic_conn_params.conn_login_max;
893 	conn_login_interval = ddi_get_lbolt() +
894 	    SEC_TO_TICK(ic->ic_conn_params.conn_login_interval);
895 
896 	if (nonblock == B_TRUE) {
897 		node = ((struct sonode *)(so_conn->ic_so));
898 		/* Set to none block socket mode */
899 		idm_so_socket_set_nonblock(node);
900 		do {
901 			rc = ksocket_connect(so_conn->ic_so,
902 			    &ic->ic_ini_dst_addr.sin,
903 			    (SIZEOF_SOCKADDR(&ic->ic_ini_dst_addr.sin)),
904 			    CRED());
905 			if (rc == 0 || rc == EISCONN) {
906 				/* socket success or already success */
907 				rc = IDM_STATUS_SUCCESS;
908 				break;
909 			}
910 			if ((rc == ETIMEDOUT) || (rc == ECONNREFUSED) ||
911 			    (rc == ECONNRESET)) {
912 				/* socket connection timeout or refuse */
913 				break;
914 			}
915 			lbolt = ddi_get_lbolt();
916 			if (lbolt > conn_login_max) {
917 				/*
918 				 * Connection retry timeout,
919 				 * failed connect to target.
920 				 */
921 				break;
922 			}
923 			if (lbolt < conn_login_interval) {
924 				if ((rc == EINPROGRESS) || (rc == EALREADY)) {
925 					/* TCP connect still in progress */
926 					delay(SEC_TO_TICK(IN_PROGRESS_DELAY));
927 					continue;
928 				} else {
929 					delay(conn_login_interval - lbolt);
930 				}
931 			}
932 			conn_login_interval = ddi_get_lbolt() +
933 			    SEC_TO_TICK(ic->ic_conn_params.conn_login_interval);
934 		} while (rc != 0);
935 		/* resume to nonblock mode */
936 		if (rc == IDM_STATUS_SUCCESS) {
937 			idm_so_socket_set_block(node);
938 		}
939 	} else {
940 		rc = ksocket_connect(so_conn->ic_so, &ic->ic_ini_dst_addr.sin,
941 		    (SIZEOF_SOCKADDR(&ic->ic_ini_dst_addr.sin)), CRED());
942 	}
943 
944 	if (rc != 0) {
945 		idm_soshutdown(so_conn->ic_so);
946 		return (IDM_STATUS_FAIL);
947 	}
948 
949 	idm_so_conn_connect_common(ic);
950 
951 	idm_set_ini_postconnect_options(so_conn);
952 
953 	return (IDM_STATUS_SUCCESS);
954 }
955 
956 idm_status_t
957 idm_so_tgt_conn_create(idm_conn_t *ic, ksocket_t new_so)
958 {
959 	idm_status_t	idmrc;
960 
961 	idmrc = idm_so_conn_create_common(ic, new_so);
962 
963 	return (idmrc);
964 }
965 
966 static void
967 idm_so_tgt_conn_destroy(idm_conn_t *ic)
968 {
969 	idm_so_conn_destroy_common(ic);
970 }
971 
972 /*
973  * idm_so_tgt_conn_connect()
974  * Establish the connection in ic, passed from idm_tgt_conn_finish(), which
975  * is invoked from the SM as a result of an inbound connection request.
976  */
977 static idm_status_t
978 idm_so_tgt_conn_connect(idm_conn_t *ic)
979 {
980 	idm_so_conn_connect_common(ic);
981 
982 	return (IDM_STATUS_SUCCESS);
983 }
984 
985 static idm_status_t
986 idm_so_conn_create_common(idm_conn_t *ic, ksocket_t new_so)
987 {
988 	idm_so_conn_t	*so_conn;
989 
990 	so_conn = kmem_zalloc(sizeof (idm_so_conn_t), KM_SLEEP);
991 	so_conn->ic_so = new_so;
992 
993 	ic->ic_transport_private = so_conn;
994 	ic->ic_transport_hdrlen = 0;
995 
996 	/* Set the scoreboarding flag on this connection */
997 	ic->ic_conn_flags |= IDM_CONN_USE_SCOREBOARD;
998 	ic->ic_conn_params.max_recv_dataseglen =
999 	    ISCSI_DEFAULT_MAX_RECV_SEG_LEN;
1000 	ic->ic_conn_params.max_xmit_dataseglen =
1001 	    ISCSI_DEFAULT_MAX_XMIT_SEG_LEN;
1002 
1003 	/*
1004 	 * Initialize tx thread mutex and list
1005 	 */
1006 	mutex_init(&so_conn->ic_tx_mutex, NULL, MUTEX_DEFAULT, NULL);
1007 	cv_init(&so_conn->ic_tx_cv, NULL, CV_DEFAULT, NULL);
1008 	list_create(&so_conn->ic_tx_list, sizeof (idm_pdu_t),
1009 	    offsetof(idm_pdu_t, idm_tx_link));
1010 
1011 	return (IDM_STATUS_SUCCESS);
1012 }
1013 
1014 static void
1015 idm_so_conn_destroy_common(idm_conn_t *ic)
1016 {
1017 	idm_so_conn_t	*so_conn = ic->ic_transport_private;
1018 
1019 	ic->ic_transport_private = NULL;
1020 	idm_sodestroy(so_conn->ic_so);
1021 	list_destroy(&so_conn->ic_tx_list);
1022 	mutex_destroy(&so_conn->ic_tx_mutex);
1023 	cv_destroy(&so_conn->ic_tx_cv);
1024 
1025 	kmem_free(so_conn, sizeof (idm_so_conn_t));
1026 }
1027 
1028 static void
1029 idm_so_conn_connect_common(idm_conn_t *ic)
1030 {
1031 	idm_so_conn_t	*so_conn;
1032 	struct sockaddr_in6	t_addr;
1033 	socklen_t	t_addrlen = 0;
1034 
1035 	so_conn = ic->ic_transport_private;
1036 	bzero(&t_addr, sizeof (struct sockaddr_in6));
1037 	t_addrlen = sizeof (struct sockaddr_in6);
1038 
1039 	/* Set the local and remote addresses in the idm conn handle */
1040 	ksocket_getsockname(so_conn->ic_so, (struct sockaddr *)&t_addr,
1041 	    &t_addrlen, CRED());
1042 	bcopy(&t_addr, &ic->ic_laddr, t_addrlen);
1043 	ksocket_getpeername(so_conn->ic_so, (struct sockaddr *)&t_addr,
1044 	    &t_addrlen, CRED());
1045 	bcopy(&t_addr, &ic->ic_raddr, t_addrlen);
1046 
1047 	mutex_enter(&ic->ic_mutex);
1048 	so_conn->ic_tx_thread = thread_create(NULL, 0, idm_sotx_thread, ic, 0,
1049 	    &p0, TS_RUN, minclsyspri);
1050 	so_conn->ic_rx_thread = thread_create(NULL, 0, idm_sorx_thread, ic, 0,
1051 	    &p0, TS_RUN, minclsyspri);
1052 
1053 	while (!so_conn->ic_rx_thread_running || !so_conn->ic_tx_thread_running)
1054 		cv_wait(&ic->ic_cv, &ic->ic_mutex);
1055 	mutex_exit(&ic->ic_mutex);
1056 }
1057 
1058 /*
1059  * idm_so_conn_disconnect()
1060  * Shutdown the socket connection and stop the thread
1061  */
1062 static void
1063 idm_so_conn_disconnect(idm_conn_t *ic)
1064 {
1065 	idm_so_conn_t	*so_conn;
1066 
1067 	so_conn = ic->ic_transport_private;
1068 
1069 	mutex_enter(&ic->ic_mutex);
1070 	so_conn->ic_rx_thread_running = B_FALSE;
1071 	so_conn->ic_tx_thread_running = B_FALSE;
1072 	/* We need to wakeup the TX thread */
1073 	mutex_enter(&so_conn->ic_tx_mutex);
1074 	cv_signal(&so_conn->ic_tx_cv);
1075 	mutex_exit(&so_conn->ic_tx_mutex);
1076 	mutex_exit(&ic->ic_mutex);
1077 
1078 	/* This should wakeup the RX thread if it is sleeping */
1079 	idm_soshutdown(so_conn->ic_so);
1080 
1081 	thread_join(so_conn->ic_tx_thread_did);
1082 	thread_join(so_conn->ic_rx_thread_did);
1083 }
1084 
1085 /*
1086  * idm_so_tgt_svc_create()
1087  * Establish a service on an IP address and port.  idm_svc_req_t contains
1088  * the service parameters.
1089  */
1090 /*ARGSUSED*/
1091 static idm_status_t
1092 idm_so_tgt_svc_create(idm_svc_req_t *sr, idm_svc_t *is)
1093 {
1094 	idm_so_svc_t		*so_svc;
1095 
1096 	so_svc = kmem_zalloc(sizeof (idm_so_svc_t), KM_SLEEP);
1097 
1098 	/* Set the new sockets service in svc handle */
1099 	is->is_so_svc = (void *)so_svc;
1100 
1101 	return (IDM_STATUS_SUCCESS);
1102 }
1103 
1104 /*
1105  * idm_so_tgt_svc_destroy()
1106  * Teardown sockets resources allocated in idm_so_tgt_svc_create()
1107  */
1108 static void
1109 idm_so_tgt_svc_destroy(idm_svc_t *is)
1110 {
1111 	/* the socket will have been torn down; free the service */
1112 	kmem_free(is->is_so_svc, sizeof (idm_so_svc_t));
1113 }
1114 
1115 /*
1116  * idm_so_tgt_svc_online()
1117  * Launch a watch thread on the svc allocated in idm_so_tgt_svc_create()
1118  */
1119 
1120 static idm_status_t
1121 idm_so_tgt_svc_online(idm_svc_t *is)
1122 {
1123 	idm_so_svc_t		*so_svc;
1124 	idm_svc_req_t		*sr = &is->is_svc_req;
1125 	struct sockaddr_in6	sin6_ip;
1126 	const uint32_t		on = 1;
1127 	const uint32_t		off = 0;
1128 
1129 	mutex_enter(&is->is_mutex);
1130 	so_svc = (idm_so_svc_t *)is->is_so_svc;
1131 
1132 	/*
1133 	 * Try creating an IPv6 socket first
1134 	 */
1135 	if ((so_svc->is_so = idm_socreate(PF_INET6, SOCK_STREAM, 0)) == NULL) {
1136 		mutex_exit(&is->is_mutex);
1137 		return (IDM_STATUS_FAIL);
1138 	} else {
1139 		bzero(&sin6_ip, sizeof (sin6_ip));
1140 		sin6_ip.sin6_family = AF_INET6;
1141 		sin6_ip.sin6_port = htons(sr->sr_port);
1142 		sin6_ip.sin6_addr = in6addr_any;
1143 
1144 		(void) ksocket_setsockopt(so_svc->is_so, SOL_SOCKET,
1145 		    SO_REUSEADDR, (char *)&on, sizeof (on), CRED());
1146 		/*
1147 		 * Turn off SO_MAC_EXEMPT so future sobinds succeed
1148 		 */
1149 		(void) ksocket_setsockopt(so_svc->is_so, SOL_SOCKET,
1150 		    SO_MAC_EXEMPT, (char *)&off, sizeof (off), CRED());
1151 
1152 		if (ksocket_bind(so_svc->is_so, (struct sockaddr *)&sin6_ip,
1153 		    sizeof (sin6_ip), CRED()) != 0) {
1154 			mutex_exit(&is->is_mutex);
1155 			idm_sodestroy(so_svc->is_so);
1156 			return (IDM_STATUS_FAIL);
1157 		}
1158 	}
1159 
1160 	idm_set_tgt_connect_options(so_svc->is_so);
1161 
1162 	if (ksocket_listen(so_svc->is_so, 5, CRED()) != 0) {
1163 		mutex_exit(&is->is_mutex);
1164 		idm_soshutdown(so_svc->is_so);
1165 		idm_sodestroy(so_svc->is_so);
1166 		return (IDM_STATUS_FAIL);
1167 	}
1168 
1169 	/* Launch a watch thread */
1170 	so_svc->is_thread = thread_create(NULL, 0, idm_so_svc_port_watcher,
1171 	    is, 0, &p0, TS_RUN, minclsyspri);
1172 
1173 	if (so_svc->is_thread == NULL) {
1174 		/* Failure to launch; teardown the socket */
1175 		mutex_exit(&is->is_mutex);
1176 		idm_soshutdown(so_svc->is_so);
1177 		idm_sodestroy(so_svc->is_so);
1178 		return (IDM_STATUS_FAIL);
1179 	}
1180 	ksocket_hold(so_svc->is_so);
1181 	/* Wait for the port watcher thread to start */
1182 	while (!so_svc->is_thread_running)
1183 		cv_wait(&is->is_cv, &is->is_mutex);
1184 	mutex_exit(&is->is_mutex);
1185 
1186 	return (IDM_STATUS_SUCCESS);
1187 }
1188 
1189 /*
1190  * idm_so_tgt_svc_offline
1191  *
1192  * Stop listening on the IP address and port identified by idm_svc_t.
1193  */
1194 static void
1195 idm_so_tgt_svc_offline(idm_svc_t *is)
1196 {
1197 	idm_so_svc_t		*so_svc;
1198 	mutex_enter(&is->is_mutex);
1199 	so_svc = (idm_so_svc_t *)is->is_so_svc;
1200 	so_svc->is_thread_running = B_FALSE;
1201 	mutex_exit(&is->is_mutex);
1202 
1203 	/*
1204 	 * Teardown socket
1205 	 */
1206 	idm_sodestroy(so_svc->is_so);
1207 
1208 	/*
1209 	 * Now we expect the port watcher thread to terminate
1210 	 */
1211 	thread_join(so_svc->is_thread_did);
1212 }
1213 
1214 /*
1215  * Watch thread for target service connection establishment.
1216  */
1217 void
1218 idm_so_svc_port_watcher(void *arg)
1219 {
1220 	idm_svc_t		*svc = arg;
1221 	ksocket_t		new_so;
1222 	idm_conn_t		*ic;
1223 	idm_status_t		idmrc;
1224 	idm_so_svc_t		*so_svc;
1225 	int			rc;
1226 	const uint32_t		off = 0;
1227 	struct sockaddr_in6 	t_addr;
1228 	socklen_t		t_addrlen;
1229 
1230 	bzero(&t_addr, sizeof (struct sockaddr_in6));
1231 	t_addrlen = sizeof (struct sockaddr_in6);
1232 	mutex_enter(&svc->is_mutex);
1233 
1234 	so_svc = svc->is_so_svc;
1235 	so_svc->is_thread_running = B_TRUE;
1236 	so_svc->is_thread_did = so_svc->is_thread->t_did;
1237 
1238 	cv_signal(&svc->is_cv);
1239 
1240 	IDM_SVC_LOG(CE_NOTE, "iSCSI service (%p/%d) online", (void *)svc,
1241 	    svc->is_svc_req.sr_port);
1242 
1243 	while (so_svc->is_thread_running) {
1244 		mutex_exit(&svc->is_mutex);
1245 
1246 		if ((rc = ksocket_accept(so_svc->is_so,
1247 		    (struct sockaddr *)&t_addr, &t_addrlen,
1248 		    &new_so, CRED())) != 0) {
1249 			mutex_enter(&svc->is_mutex);
1250 			if (rc == ECONNABORTED)
1251 				continue;
1252 			/* Connection problem */
1253 			break;
1254 		}
1255 		/*
1256 		 * Turn off SO_MAC_EXEMPT so future sobinds succeed
1257 		 */
1258 		(void) ksocket_setsockopt(new_so, SOL_SOCKET, SO_MAC_EXEMPT,
1259 		    (char *)&off, sizeof (off), CRED());
1260 
1261 		idmrc = idm_svc_conn_create(svc, IDM_TRANSPORT_TYPE_SOCKETS,
1262 		    &ic);
1263 		if (idmrc != IDM_STATUS_SUCCESS) {
1264 			/* Drop connection */
1265 			idm_soshutdown(new_so);
1266 			idm_sodestroy(new_so);
1267 			mutex_enter(&svc->is_mutex);
1268 			continue;
1269 		}
1270 
1271 		idmrc = idm_so_tgt_conn_create(ic, new_so);
1272 		if (idmrc != IDM_STATUS_SUCCESS) {
1273 			idm_svc_conn_destroy(ic);
1274 			idm_soshutdown(new_so);
1275 			idm_sodestroy(new_so);
1276 			mutex_enter(&svc->is_mutex);
1277 			continue;
1278 		}
1279 
1280 		/*
1281 		 * Kick the state machine.  At CS_S3_XPT_UP the state machine
1282 		 * will notify the client (target) about the new connection.
1283 		 */
1284 		idm_conn_event(ic, CE_CONNECT_ACCEPT, NULL);
1285 
1286 		mutex_enter(&svc->is_mutex);
1287 	}
1288 	ksocket_rele(so_svc->is_so);
1289 	so_svc->is_thread_running = B_FALSE;
1290 	mutex_exit(&svc->is_mutex);
1291 
1292 	IDM_SVC_LOG(CE_NOTE, "iSCSI service (%p/%d) offline", (void *)svc,
1293 	    svc->is_svc_req.sr_port);
1294 
1295 	thread_exit();
1296 }
1297 
1298 /*
1299  * idm_so_free_task_rsrc() stops any ongoing processing of the task and
1300  * frees resources associated with the task.
1301  *
1302  * It's not clear that this should return idm_status_t.  What do we do
1303  * if it fails?
1304  */
1305 static idm_status_t
1306 idm_so_free_task_rsrc(idm_task_t *idt)
1307 {
1308 	idm_buf_t	*idb;
1309 
1310 	/*
1311 	 * There is nothing to cleanup on initiator connections
1312 	 */
1313 	if (IDM_CONN_ISINI(idt->idt_ic))
1314 		return (IDM_STATUS_SUCCESS);
1315 
1316 	/*
1317 	 * If this is a target connection, call idm_buf_rx_from_ini_done for
1318 	 * any buffer on the "outbufv" list with idb->idb_in_transport==B_TRUE.
1319 	 *
1320 	 * In addition, remove any buffers associated with this task from
1321 	 * the ic_tx_list.  We'll do this by walking the idt_inbufv list, but
1322 	 * items don't actually get removed from that list (and completion
1323 	 * routines called) until idm_task_cleanup.
1324 	 */
1325 	mutex_enter(&idt->idt_mutex);
1326 
1327 	for (idb = list_head(&idt->idt_outbufv); idb != NULL;
1328 	    idb = list_next(&idt->idt_outbufv, idb)) {
1329 		if (idb->idb_in_transport) {
1330 			/*
1331 			 * idm_buf_rx_from_ini_done releases idt->idt_mutex
1332 			 */
1333 			DTRACE_ISCSI_8(xfer__done, idm_conn_t *, idt->idt_ic,
1334 			    uintptr_t, idb->idb_buf,
1335 			    uint32_t, idb->idb_bufoffset,
1336 			    uint64_t, 0, uint32_t, 0, uint32_t, 0,
1337 			    uint32_t, idb->idb_xfer_len,
1338 			    int, XFER_BUF_RX_FROM_INI);
1339 			idm_buf_rx_from_ini_done(idt, idb, IDM_STATUS_ABORTED);
1340 			mutex_enter(&idt->idt_mutex);
1341 		}
1342 	}
1343 
1344 	for (idb = list_head(&idt->idt_inbufv); idb != NULL;
1345 	    idb = list_next(&idt->idt_inbufv, idb)) {
1346 		/*
1347 		 * We want to remove these items from the tx_list as well,
1348 		 * but knowing it's in the idt_inbufv list is not a guarantee
1349 		 * that it's in the tx_list.  If it's on the tx list then
1350 		 * let idm_sotx_thread() clean it up.
1351 		 */
1352 		if (idb->idb_in_transport && !idb->idb_tx_thread) {
1353 			/*
1354 			 * idm_buf_tx_to_ini_done releases idt->idt_mutex
1355 			 */
1356 			DTRACE_ISCSI_8(xfer__done, idm_conn_t *, idt->idt_ic,
1357 			    uintptr_t, idb->idb_buf,
1358 			    uint32_t, idb->idb_bufoffset,
1359 			    uint64_t, 0, uint32_t, 0, uint32_t, 0,
1360 			    uint32_t, idb->idb_xfer_len,
1361 			    int, XFER_BUF_TX_TO_INI);
1362 			idm_buf_tx_to_ini_done(idt, idb, IDM_STATUS_ABORTED);
1363 			mutex_enter(&idt->idt_mutex);
1364 		}
1365 	}
1366 
1367 	mutex_exit(&idt->idt_mutex);
1368 
1369 	return (IDM_STATUS_SUCCESS);
1370 }
1371 
1372 /*
1373  * idm_so_negotiate_key_values() validates the key values for this connection
1374  */
1375 /* ARGSUSED */
1376 static kv_status_t
1377 idm_so_negotiate_key_values(idm_conn_t *it, nvlist_t *request_nvl,
1378     nvlist_t *response_nvl, nvlist_t *negotiated_nvl)
1379 {
1380 	/* All parameters are negotiated at the iscsit level */
1381 	return (KV_HANDLED);
1382 }
1383 
1384 /*
1385  * idm_so_notice_key_values() activates the negotiated key values for
1386  * this connection.
1387  */
1388 static void
1389 idm_so_notice_key_values(idm_conn_t *it, nvlist_t *negotiated_nvl)
1390 {
1391 	char			*nvp_name;
1392 	nvpair_t		*nvp;
1393 	nvpair_t		*next_nvp;
1394 	int			nvrc;
1395 	idm_status_t		idm_status;
1396 	const idm_kv_xlate_t	*ikvx;
1397 	uint64_t		num_val;
1398 
1399 	for (nvp = nvlist_next_nvpair(negotiated_nvl, NULL);
1400 	    nvp != NULL; nvp = next_nvp) {
1401 		next_nvp = nvlist_next_nvpair(negotiated_nvl, nvp);
1402 		nvp_name = nvpair_name(nvp);
1403 
1404 		ikvx = idm_lookup_kv_xlate(nvp_name, strlen(nvp_name));
1405 		switch (ikvx->ik_key_id) {
1406 		case KI_HEADER_DIGEST:
1407 		case KI_DATA_DIGEST:
1408 			idm_status = idm_so_handle_digest(it, nvp, ikvx);
1409 			ASSERT(idm_status == 0);
1410 
1411 			/* Remove processed item from negotiated_nvl list */
1412 			nvrc = nvlist_remove_all(
1413 			    negotiated_nvl, ikvx->ik_key_name);
1414 			ASSERT(nvrc == 0);
1415 			break;
1416 		case KI_MAX_RECV_DATA_SEGMENT_LENGTH:
1417 			/*
1418 			 * Just pass the value down to idm layer.
1419 			 * No need to remove it from negotiated_nvl list here.
1420 			 */
1421 			nvrc = nvpair_value_uint64(nvp, &num_val);
1422 			ASSERT(nvrc == 0);
1423 			it->ic_conn_params.max_xmit_dataseglen =
1424 			    (uint32_t)num_val;
1425 			break;
1426 		default:
1427 			break;
1428 		}
1429 	}
1430 }
1431 
1432 /*
1433  * idm_so_declare_key_values() declares the key values for this connection
1434  */
1435 /* ARGSUSED */
1436 static kv_status_t
1437 idm_so_declare_key_values(idm_conn_t *it, nvlist_t *config_nvl,
1438     nvlist_t *outgoing_nvl)
1439 {
1440 	char			*nvp_name;
1441 	nvpair_t		*nvp;
1442 	nvpair_t		*next_nvp;
1443 	kv_status_t		kvrc;
1444 	int			nvrc = 0;
1445 	const idm_kv_xlate_t	*ikvx;
1446 	uint64_t		num_val;
1447 
1448 	for (nvp = nvlist_next_nvpair(config_nvl, NULL);
1449 	    nvp != NULL && nvrc == 0; nvp = next_nvp) {
1450 		next_nvp = nvlist_next_nvpair(config_nvl, nvp);
1451 		nvp_name = nvpair_name(nvp);
1452 
1453 		ikvx = idm_lookup_kv_xlate(nvp_name, strlen(nvp_name));
1454 		switch (ikvx->ik_key_id) {
1455 		case KI_MAX_RECV_DATA_SEGMENT_LENGTH:
1456 			if ((nvrc = nvpair_value_uint64(nvp, &num_val)) != 0) {
1457 				break;
1458 			}
1459 			if (outgoing_nvl &&
1460 			    (nvrc = nvlist_add_uint64(outgoing_nvl,
1461 			    nvp_name, num_val)) != 0) {
1462 				break;
1463 			}
1464 			it->ic_conn_params.max_recv_dataseglen =
1465 			    (uint32_t)num_val;
1466 			break;
1467 		default:
1468 			break;
1469 		}
1470 	}
1471 	kvrc = idm_nvstat_to_kvstat(nvrc);
1472 	return (kvrc);
1473 }
1474 
1475 static idm_status_t
1476 idm_so_handle_digest(idm_conn_t *it, nvpair_t *digest_choice,
1477     const idm_kv_xlate_t *ikvx)
1478 {
1479 	int			nvrc;
1480 	char			*digest_choice_string;
1481 
1482 	nvrc = nvpair_value_string(digest_choice,
1483 	    &digest_choice_string);
1484 	ASSERT(nvrc == 0);
1485 	if (strcasecmp(digest_choice_string, "crc32c") == 0) {
1486 		switch (ikvx->ik_key_id) {
1487 		case KI_HEADER_DIGEST:
1488 			it->ic_conn_flags |= IDM_CONN_HEADER_DIGEST;
1489 			break;
1490 		case KI_DATA_DIGEST:
1491 			it->ic_conn_flags |= IDM_CONN_DATA_DIGEST;
1492 			break;
1493 		default:
1494 			ASSERT(0);
1495 			break;
1496 		}
1497 	} else if (strcasecmp(digest_choice_string, "none") == 0) {
1498 		switch (ikvx->ik_key_id) {
1499 		case KI_HEADER_DIGEST:
1500 			it->ic_conn_flags &= ~IDM_CONN_HEADER_DIGEST;
1501 			break;
1502 		case KI_DATA_DIGEST:
1503 			it->ic_conn_flags &= ~IDM_CONN_DATA_DIGEST;
1504 			break;
1505 		default:
1506 			ASSERT(0);
1507 			break;
1508 		}
1509 	} else {
1510 		ASSERT(0);
1511 	}
1512 
1513 	return (IDM_STATUS_SUCCESS);
1514 }
1515 
1516 
1517 /*
1518  * idm_so_conn_is_capable() verifies that the passed connection is provided
1519  * for by the sockets interface.
1520  */
1521 /* ARGSUSED */
1522 static boolean_t
1523 idm_so_conn_is_capable(idm_conn_req_t *ic, idm_transport_caps_t *caps)
1524 {
1525 	return (B_TRUE);
1526 }
1527 
1528 /*
1529  * idm_so_rx_datain() validates the Data Sequence number of the PDU. The
1530  * idm_sorecv_scsidata() function invoked earlier actually reads the data
1531  * off the socket into the appropriate buffers.
1532  */
1533 static void
1534 idm_so_rx_datain(idm_conn_t *ic, idm_pdu_t *pdu)
1535 {
1536 	iscsi_data_hdr_t	*bhs;
1537 	idm_task_t		*idt;
1538 	idm_buf_t		*idb;
1539 	uint32_t		datasn;
1540 	size_t			offset;
1541 	iscsi_hdr_t		*ihp = (iscsi_hdr_t *)pdu->isp_hdr;
1542 	iscsi_data_rsp_hdr_t    *idrhp = (iscsi_data_rsp_hdr_t *)ihp;
1543 
1544 	ASSERT(ic != NULL);
1545 	ASSERT(pdu != NULL);
1546 
1547 	bhs	= (iscsi_data_hdr_t *)pdu->isp_hdr;
1548 	datasn	= ntohl(bhs->datasn);
1549 	offset	= ntohl(bhs->offset);
1550 
1551 	ASSERT(bhs->opcode == ISCSI_OP_SCSI_DATA_RSP);
1552 
1553 	/*
1554 	 * Look up the task corresponding to the initiator task tag
1555 	 * to get the buffers affiliated with the task.
1556 	 */
1557 	idt = idm_task_find(ic, bhs->itt, bhs->ttt);
1558 	if (idt == NULL) {
1559 		IDM_CONN_LOG(CE_WARN, "idm_so_rx_datain: failed to find task");
1560 		idm_pdu_rx_protocol_error(ic, pdu);
1561 		return;
1562 	}
1563 
1564 	idb = pdu->isp_sorx_buf;
1565 	if (idb == NULL) {
1566 		IDM_CONN_LOG(CE_WARN,
1567 		    "idm_so_rx_datain: failed to find buffer");
1568 		idm_task_rele(idt);
1569 		idm_pdu_rx_protocol_error(ic, pdu);
1570 		return;
1571 	}
1572 
1573 	/*
1574 	 * DataSN values should be sequential and should not have any gaps or
1575 	 * repetitions. Check the DataSN with the one stored in the task.
1576 	 */
1577 	if (datasn == idt->idt_exp_datasn) {
1578 		idt->idt_exp_datasn++; /* keep track of DataSN received */
1579 	} else {
1580 		IDM_CONN_LOG(CE_WARN, "idm_so_rx_datain: datasn out of order");
1581 		idm_task_rele(idt);
1582 		idm_pdu_rx_protocol_error(ic, pdu);
1583 		return;
1584 	}
1585 
1586 	/*
1587 	 * PDUs in a sequence should be in continuously increasing
1588 	 * address offset
1589 	 */
1590 	if (offset != idb->idb_exp_offset) {
1591 		IDM_CONN_LOG(CE_WARN, "idm_so_rx_datain: unexpected offset");
1592 		idm_task_rele(idt);
1593 		idm_pdu_rx_protocol_error(ic, pdu);
1594 		return;
1595 	}
1596 	/* Expected next relative buffer offset */
1597 	idb->idb_exp_offset += n2h24(bhs->dlength);
1598 	idt->idt_rx_bytes += n2h24(bhs->dlength);
1599 
1600 	idm_task_rele(idt);
1601 
1602 	/*
1603 	 * For now call scsi_rsp which will process the data rsp
1604 	 * Revisit, need to provide an explicit client entry point for
1605 	 * phase collapse completions.
1606 	 */
1607 	if (((ihp->opcode & ISCSI_OPCODE_MASK) == ISCSI_OP_SCSI_DATA_RSP) &&
1608 	    (idrhp->flags & ISCSI_FLAG_DATA_STATUS)) {
1609 		(*ic->ic_conn_ops.icb_rx_scsi_rsp)(ic, pdu);
1610 	}
1611 
1612 	idm_pdu_complete(pdu, IDM_STATUS_SUCCESS);
1613 }
1614 
1615 /*
1616  * The idm_so_rx_dataout() function is used by the iSCSI target to read
1617  * data from the Data-Out PDU sent by the iSCSI initiator.
1618  *
1619  * This function gets the Initiator Task Tag from the PDU BHS and looks up the
1620  * task to get the buffers associated with the PDU. A PDU might span buffers.
1621  * The data is then read into the respective buffer.
1622  */
1623 static void
1624 idm_so_rx_dataout(idm_conn_t *ic, idm_pdu_t *pdu)
1625 {
1626 
1627 	iscsi_data_hdr_t	*bhs;
1628 	idm_task_t		*idt;
1629 	idm_buf_t		*idb;
1630 	size_t			offset;
1631 
1632 	ASSERT(ic != NULL);
1633 	ASSERT(pdu != NULL);
1634 
1635 	bhs = (iscsi_data_hdr_t *)pdu->isp_hdr;
1636 	offset = ntohl(bhs->offset);
1637 	ASSERT(bhs->opcode == ISCSI_OP_SCSI_DATA);
1638 
1639 	/*
1640 	 * Look up the task corresponding to the initiator task tag
1641 	 * to get the buffers affiliated with the task.
1642 	 */
1643 	idt = idm_task_find(ic, bhs->itt, bhs->ttt);
1644 	if (idt == NULL) {
1645 		IDM_CONN_LOG(CE_WARN,
1646 		    "idm_so_rx_dataout: failed to find task");
1647 		idm_pdu_rx_protocol_error(ic, pdu);
1648 		return;
1649 	}
1650 
1651 	idb = pdu->isp_sorx_buf;
1652 	if (idb == NULL) {
1653 		IDM_CONN_LOG(CE_WARN,
1654 		    "idm_so_rx_dataout: failed to find buffer");
1655 		idm_task_rele(idt);
1656 		idm_pdu_rx_protocol_error(ic, pdu);
1657 		return;
1658 	}
1659 
1660 	/* Keep track of data transferred - check data offsets */
1661 	if (offset != idb->idb_exp_offset) {
1662 		IDM_CONN_LOG(CE_NOTE, "idm_so_rx_dataout: offset out of seq: "
1663 		    "%ld, %d", offset, idb->idb_exp_offset);
1664 		idm_task_rele(idt);
1665 		idm_pdu_rx_protocol_error(ic, pdu);
1666 		return;
1667 	}
1668 	/* Expected next relative offset */
1669 	idb->idb_exp_offset += ntoh24(bhs->dlength);
1670 	idt->idt_rx_bytes += n2h24(bhs->dlength);
1671 
1672 	/*
1673 	 * Call the buffer callback when the transfer is complete
1674 	 *
1675 	 * The connection state machine should only abort tasks after
1676 	 * shutting down the connection so we are assured that there
1677 	 * won't be a simultaneous attempt to abort this task at the
1678 	 * same time as we are processing this PDU (due to a connection
1679 	 * state change).
1680 	 */
1681 	if (bhs->flags & ISCSI_FLAG_FINAL) {
1682 		/*
1683 		 * We only want to call idm_buf_rx_from_ini_done once
1684 		 * per transfer.  It's possible that this task has
1685 		 * already been aborted in which case
1686 		 * idm_so_free_task_rsrc will call idm_buf_rx_from_ini_done
1687 		 * for each buffer with idb_in_transport==B_TRUE.  To
1688 		 * close this window and ensure that this doesn't happen,
1689 		 * we'll clear idb->idb_in_transport now while holding
1690 		 * the task mutex.   This is only really an issue for
1691 		 * SCSI task abort -- if tasks were being aborted because
1692 		 * of a connection state change the state machine would
1693 		 * have already stopped the receive thread.
1694 		 */
1695 		mutex_enter(&idt->idt_mutex);
1696 
1697 		/*
1698 		 * Release the task hold here (obtained in idm_task_find)
1699 		 * because the task may complete synchronously during
1700 		 * idm_buf_rx_from_ini_done.  Since we still have an active
1701 		 * buffer we know there is at least one additional hold on idt.
1702 		 */
1703 		idm_task_rele(idt);
1704 
1705 		/*
1706 		 * idm_buf_rx_from_ini_done releases idt->idt_mutex
1707 		 */
1708 		DTRACE_ISCSI_8(xfer__done, idm_conn_t *, idt->idt_ic,
1709 		    uintptr_t, idb->idb_buf, uint32_t, idb->idb_bufoffset,
1710 		    uint64_t, 0, uint32_t, 0, uint32_t, 0,
1711 		    uint32_t, idb->idb_xfer_len,
1712 		    int, XFER_BUF_RX_FROM_INI);
1713 		idm_buf_rx_from_ini_done(idt, idb, IDM_STATUS_SUCCESS);
1714 		idm_pdu_complete(pdu, IDM_STATUS_SUCCESS);
1715 		return;
1716 	}
1717 
1718 	idm_task_rele(idt);
1719 	idm_pdu_complete(pdu, IDM_STATUS_SUCCESS);
1720 }
1721 
1722 /*
1723  * The idm_so_rx_rtt() function is used by the iSCSI initiator to handle
1724  * the R2T PDU sent by the iSCSI target indicating that it is ready to
1725  * accept data. This gets the Initiator Task Tag (itt) from the PDU BHS
1726  * and looks up the task in the task tree using the itt to get the output
1727  * buffers associated the task. The R2T PDU contains the offset of the
1728  * requested data and the data length. This function then constructs a
1729  * sequence of iSCSI PDUs and outputs the requested data. Each Data-Out
1730  * PDU is associated with the R2T by the Target Transfer Tag  (ttt).
1731  */
1732 
1733 static void
1734 idm_so_rx_rtt(idm_conn_t *ic, idm_pdu_t *pdu)
1735 {
1736 	idm_task_t		*idt;
1737 	idm_buf_t		*idb;
1738 	iscsi_rtt_hdr_t		*rtt_hdr;
1739 	uint32_t		data_offset;
1740 	uint32_t		data_length;
1741 
1742 	ASSERT(ic != NULL);
1743 	ASSERT(pdu != NULL);
1744 
1745 	rtt_hdr	= (iscsi_rtt_hdr_t *)pdu->isp_hdr;
1746 	data_offset = ntohl(rtt_hdr->data_offset);
1747 	data_length = ntohl(rtt_hdr->data_length);
1748 	idt	= idm_task_find(ic, rtt_hdr->itt, rtt_hdr->ttt);
1749 
1750 	if (idt == NULL) {
1751 		IDM_CONN_LOG(CE_WARN, "idm_so_rx_rtt: could not find task");
1752 		idm_pdu_rx_protocol_error(ic, pdu);
1753 		return;
1754 	}
1755 
1756 	/* Find the buffer bound to the task by the iSCSI initiator */
1757 	mutex_enter(&idt->idt_mutex);
1758 	idb = idm_buf_find(&idt->idt_outbufv, data_offset);
1759 	if (idb == NULL) {
1760 		mutex_exit(&idt->idt_mutex);
1761 		idm_task_rele(idt);
1762 		IDM_CONN_LOG(CE_WARN, "idm_so_rx_rtt: could not find buffer");
1763 		idm_pdu_rx_protocol_error(ic, pdu);
1764 		return;
1765 	}
1766 
1767 	/* return buffer contains this data */
1768 	if (data_offset + data_length > idb->idb_buflen) {
1769 		/* Overflow */
1770 		mutex_exit(&idt->idt_mutex);
1771 		idm_task_rele(idt);
1772 		IDM_CONN_LOG(CE_WARN, "idm_so_rx_rtt: read from outside "
1773 		    "buffer");
1774 		idm_pdu_rx_protocol_error(ic, pdu);
1775 		return;
1776 	}
1777 
1778 	idt->idt_r2t_ttt = rtt_hdr->ttt;
1779 	idt->idt_exp_datasn = 0;
1780 
1781 	idm_so_send_rtt_data(ic, idt, idb, data_offset,
1782 	    ntohl(rtt_hdr->data_length));
1783 	mutex_exit(&idt->idt_mutex);
1784 
1785 	idm_pdu_complete(pdu, IDM_STATUS_SUCCESS);
1786 	idm_task_rele(idt);
1787 
1788 }
1789 
1790 idm_status_t
1791 idm_sorecvdata(idm_conn_t *ic, idm_pdu_t *pdu)
1792 {
1793 	uint8_t		pad[ISCSI_PAD_WORD_LEN];
1794 	int		pad_len;
1795 	uint32_t	data_digest_crc;
1796 	uint32_t	crc_calculated;
1797 	int		total_len;
1798 	idm_so_conn_t	*so_conn;
1799 
1800 	so_conn = ic->ic_transport_private;
1801 
1802 	pad_len = ((ISCSI_PAD_WORD_LEN -
1803 	    (pdu->isp_datalen & (ISCSI_PAD_WORD_LEN - 1))) &
1804 	    (ISCSI_PAD_WORD_LEN - 1));
1805 
1806 	ASSERT(pdu->isp_iovlen < (PDU_MAX_IOVLEN - 2)); /* pad + data digest */
1807 
1808 	total_len = pdu->isp_datalen;
1809 
1810 	if (pad_len) {
1811 		pdu->isp_iov[pdu->isp_iovlen].iov_base	= (char *)&pad;
1812 		pdu->isp_iov[pdu->isp_iovlen].iov_len	= pad_len;
1813 		total_len		+= pad_len;
1814 		pdu->isp_iovlen++;
1815 	}
1816 
1817 	/* setup data digest */
1818 	if ((ic->ic_conn_flags & IDM_CONN_DATA_DIGEST) != 0) {
1819 		pdu->isp_iov[pdu->isp_iovlen].iov_base =
1820 		    (char *)&data_digest_crc;
1821 		pdu->isp_iov[pdu->isp_iovlen].iov_len =
1822 		    sizeof (data_digest_crc);
1823 		total_len		+= sizeof (data_digest_crc);
1824 		pdu->isp_iovlen++;
1825 	}
1826 
1827 	pdu->isp_data = (uint8_t *)(uintptr_t)pdu->isp_iov[0].iov_base;
1828 
1829 	if (idm_iov_sorecv(so_conn->ic_so, &pdu->isp_iov[0],
1830 	    pdu->isp_iovlen, total_len) != 0) {
1831 		return (IDM_STATUS_IO);
1832 	}
1833 
1834 	if ((ic->ic_conn_flags & IDM_CONN_DATA_DIGEST) != 0) {
1835 		crc_calculated = idm_crc32c(pdu->isp_data,
1836 		    pdu->isp_datalen);
1837 		if (pad_len) {
1838 			crc_calculated = idm_crc32c_continued((char *)&pad,
1839 			    pad_len, crc_calculated);
1840 		}
1841 		if (crc_calculated != data_digest_crc) {
1842 			IDM_CONN_LOG(CE_WARN,
1843 			    "idm_sorecvdata: "
1844 			    "CRC error: actual 0x%x, calc 0x%x",
1845 			    data_digest_crc, crc_calculated);
1846 
1847 			/* Invalid Data Digest */
1848 			return (IDM_STATUS_DATA_DIGEST);
1849 		}
1850 	}
1851 
1852 	return (IDM_STATUS_SUCCESS);
1853 }
1854 
1855 /*
1856  * idm_sorecv_scsidata() is used to receive scsi data from the socket. The
1857  * Data-type PDU header must be read into the idm_pdu_t structure prior to
1858  * calling this function.
1859  */
1860 idm_status_t
1861 idm_sorecv_scsidata(idm_conn_t *ic, idm_pdu_t *pdu)
1862 {
1863 	iscsi_data_hdr_t	*bhs;
1864 	idm_task_t		*task;
1865 	uint32_t		offset;
1866 	uint8_t			opcode;
1867 	uint32_t		dlength;
1868 	list_t			*buflst;
1869 	uint32_t		xfer_bytes;
1870 	idm_status_t		status;
1871 
1872 	ASSERT(ic != NULL);
1873 	ASSERT(pdu != NULL);
1874 
1875 	bhs	= (iscsi_data_hdr_t *)pdu->isp_hdr;
1876 
1877 	offset	= ntohl(bhs->offset);
1878 	opcode	= bhs->opcode;
1879 	dlength = n2h24(bhs->dlength);
1880 
1881 	ASSERT((opcode == ISCSI_OP_SCSI_DATA_RSP) ||
1882 	    (opcode == ISCSI_OP_SCSI_DATA));
1883 
1884 	/*
1885 	 * Successful lookup implicitly gets a "hold" on the task.  This
1886 	 * hold must be released before leaving this function.  At one
1887 	 * point we were caching this task context and retaining the hold
1888 	 * but it turned out to be very difficult to release the hold properly.
1889 	 * The task can be aborted and the connection shutdown between this
1890 	 * call and the subsequent expected call to idm_so_rx_datain/
1891 	 * idm_so_rx_dataout (in which case those functions are not called).
1892 	 * Releasing the hold in the PDU callback doesn't work well either
1893 	 * because the whole task may be completed by then at which point
1894 	 * it is too late to release the hold -- for better or worse this
1895 	 * code doesn't wait on the refcnts during normal operation.
1896 	 * idm_task_find() is very fast and it is not a huge burden if we
1897 	 * have to do it twice.
1898 	 */
1899 	task = idm_task_find(ic, bhs->itt, bhs->ttt);
1900 	if (task == NULL) {
1901 		IDM_CONN_LOG(CE_WARN,
1902 		    "idm_sorecv_scsidata: could not find task");
1903 		return (IDM_STATUS_FAIL);
1904 	}
1905 
1906 	mutex_enter(&task->idt_mutex);
1907 	buflst	= (opcode == ISCSI_OP_SCSI_DATA_RSP) ?
1908 	    &task->idt_inbufv : &task->idt_outbufv;
1909 	pdu->isp_sorx_buf = idm_buf_find(buflst, offset);
1910 	mutex_exit(&task->idt_mutex);
1911 
1912 	if (pdu->isp_sorx_buf == NULL) {
1913 		idm_task_rele(task);
1914 		IDM_CONN_LOG(CE_WARN, "idm_sorecv_scsidata: could not find "
1915 		    "buffer for offset %x opcode=%x",
1916 		    offset, opcode);
1917 		return (IDM_STATUS_FAIL);
1918 	}
1919 
1920 	xfer_bytes = idm_fill_iov(pdu, pdu->isp_sorx_buf, offset, dlength);
1921 	ASSERT(xfer_bytes != 0);
1922 	if (xfer_bytes != dlength) {
1923 		idm_task_rele(task);
1924 		/*
1925 		 * Buffer overflow, connection error.  The PDU data is still
1926 		 * sitting in the socket so we can't use the connection
1927 		 * again until that data is drained.
1928 		 */
1929 		return (IDM_STATUS_FAIL);
1930 	}
1931 
1932 	status = idm_sorecvdata(ic, pdu);
1933 
1934 	idm_task_rele(task);
1935 
1936 	return (status);
1937 }
1938 
1939 static uint32_t
1940 idm_fill_iov(idm_pdu_t *pdu, idm_buf_t *idb, uint32_t ro, uint32_t dlength)
1941 {
1942 	uint32_t	buf_ro = ro - idb->idb_bufoffset;
1943 	uint32_t	xfer_len = min(dlength, idb->idb_buflen - buf_ro);
1944 
1945 	ASSERT(ro >= idb->idb_bufoffset);
1946 
1947 	pdu->isp_iov[pdu->isp_iovlen].iov_base	=
1948 	    (caddr_t)idb->idb_buf + buf_ro;
1949 	pdu->isp_iov[pdu->isp_iovlen].iov_len	= xfer_len;
1950 	pdu->isp_iovlen++;
1951 
1952 	return (xfer_len);
1953 }
1954 
1955 int
1956 idm_sorecv_nonscsidata(idm_conn_t *ic, idm_pdu_t *pdu)
1957 {
1958 	pdu->isp_data = kmem_alloc(pdu->isp_datalen, KM_SLEEP);
1959 	ASSERT(pdu->isp_data != NULL);
1960 
1961 	pdu->isp_databuflen = pdu->isp_datalen;
1962 	pdu->isp_iov[0].iov_base = (caddr_t)pdu->isp_data;
1963 	pdu->isp_iov[0].iov_len = pdu->isp_datalen;
1964 	pdu->isp_iovlen = 1;
1965 	/*
1966 	 * Since we are associating a new data buffer with this received
1967 	 * PDU we need to set a specific callback to free the data
1968 	 * after the PDU is processed.
1969 	 */
1970 	pdu->isp_flags |= IDM_PDU_ADDL_DATA;
1971 	pdu->isp_callback = idm_sorx_addl_pdu_cb;
1972 
1973 	return (idm_sorecvdata(ic, pdu));
1974 }
1975 
1976 void
1977 idm_sorx_thread(void *arg)
1978 {
1979 	boolean_t	conn_failure = B_FALSE;
1980 	idm_conn_t	*ic = (idm_conn_t *)arg;
1981 	idm_so_conn_t	*so_conn;
1982 	idm_pdu_t	*pdu;
1983 	idm_status_t	rc;
1984 
1985 	idm_conn_hold(ic);
1986 
1987 	mutex_enter(&ic->ic_mutex);
1988 
1989 	so_conn = ic->ic_transport_private;
1990 	so_conn->ic_rx_thread_running = B_TRUE;
1991 	so_conn->ic_rx_thread_did = so_conn->ic_rx_thread->t_did;
1992 	cv_signal(&ic->ic_cv);
1993 
1994 	while (so_conn->ic_rx_thread_running) {
1995 		mutex_exit(&ic->ic_mutex);
1996 
1997 		/*
1998 		 * Get PDU with default header size (large enough for
1999 		 * BHS plus any anticipated AHS).  PDU from
2000 		 * the cache will have all values set correctly
2001 		 * for sockets RX including callback.
2002 		 */
2003 		pdu = kmem_cache_alloc(idm.idm_sorx_pdu_cache, KM_SLEEP);
2004 		pdu->isp_ic = ic;
2005 		pdu->isp_flags = 0;
2006 		pdu->isp_transport_hdrlen = 0;
2007 
2008 		if ((rc = idm_sorecvhdr(ic, pdu)) != 0) {
2009 			/*
2010 			 * Call idm_pdu_complete so that we call the callback
2011 			 * and ensure any memory allocated in idm_sorecvhdr
2012 			 * gets freed up.
2013 			 */
2014 			idm_pdu_complete(pdu, IDM_STATUS_FAIL);
2015 
2016 			/*
2017 			 * If ic_rx_thread_running is still set then
2018 			 * this is some kind of connection problem
2019 			 * on the socket.  In this case we want to
2020 			 * generate an event.  Otherwise some other
2021 			 * thread closed the socket due to another
2022 			 * issue in which case we don't need to
2023 			 * generate an event.
2024 			 */
2025 			mutex_enter(&ic->ic_mutex);
2026 			if (so_conn->ic_rx_thread_running) {
2027 				conn_failure = B_TRUE;
2028 				so_conn->ic_rx_thread_running = B_FALSE;
2029 			}
2030 
2031 			continue;
2032 		}
2033 
2034 		/*
2035 		 * Header has been read and validated.  Now we need
2036 		 * to read the PDU data payload (if present).  SCSI data
2037 		 * need to be transferred from the socket directly into
2038 		 * the associated transfer buffer for the SCSI task.
2039 		 */
2040 		if (pdu->isp_datalen != 0) {
2041 			if ((IDM_PDU_OPCODE(pdu) == ISCSI_OP_SCSI_DATA) ||
2042 			    (IDM_PDU_OPCODE(pdu) == ISCSI_OP_SCSI_DATA_RSP)) {
2043 				rc = idm_sorecv_scsidata(ic, pdu);
2044 				/*
2045 				 * All SCSI errors are fatal to the
2046 				 * connection right now since we have no
2047 				 * place to put the data.  What we need
2048 				 * is some kind of sink to dispose of unwanted
2049 				 * SCSI data.  For example an invalid task tag
2050 				 * should not kill the connection (although
2051 				 * we may want to drop the connection).
2052 				 */
2053 			} else {
2054 				/*
2055 				 * Not data PDUs so allocate a buffer for the
2056 				 * data segment and read the remaining data.
2057 				 */
2058 				rc = idm_sorecv_nonscsidata(ic, pdu);
2059 			}
2060 			if (rc != 0) {
2061 				/*
2062 				 * Call idm_pdu_complete so that we call the
2063 				 * callback and ensure any memory allocated
2064 				 * in idm_sorecvhdr gets freed up.
2065 				 */
2066 				idm_pdu_complete(pdu, IDM_STATUS_FAIL);
2067 
2068 				/*
2069 				 * If ic_rx_thread_running is still set then
2070 				 * this is some kind of connection problem
2071 				 * on the socket.  In this case we want to
2072 				 * generate an event.  Otherwise some other
2073 				 * thread closed the socket due to another
2074 				 * issue in which case we don't need to
2075 				 * generate an event.
2076 				 */
2077 				mutex_enter(&ic->ic_mutex);
2078 				if (so_conn->ic_rx_thread_running) {
2079 					conn_failure = B_TRUE;
2080 					so_conn->ic_rx_thread_running = B_FALSE;
2081 				}
2082 				continue;
2083 			}
2084 		}
2085 
2086 		/*
2087 		 * Process RX PDU
2088 		 */
2089 		idm_pdu_rx(ic, pdu);
2090 
2091 		mutex_enter(&ic->ic_mutex);
2092 	}
2093 
2094 	mutex_exit(&ic->ic_mutex);
2095 
2096 	/*
2097 	 * If we dropped out of the RX processing loop because of
2098 	 * a socket problem or other connection failure (including
2099 	 * digest errors) then we need to generate a state machine
2100 	 * event to shut the connection down.
2101 	 * If the state machine is already in, for example, INIT_ERROR, this
2102 	 * event will get dropped, and the TX thread will never be notified
2103 	 * to shut down.  To be safe, we'll just notify it here.
2104 	 */
2105 	if (conn_failure) {
2106 		if (so_conn->ic_tx_thread_running) {
2107 			so_conn->ic_tx_thread_running = B_FALSE;
2108 			mutex_enter(&so_conn->ic_tx_mutex);
2109 			cv_signal(&so_conn->ic_tx_cv);
2110 			mutex_exit(&so_conn->ic_tx_mutex);
2111 		}
2112 
2113 		idm_conn_event(ic, CE_TRANSPORT_FAIL, rc);
2114 	}
2115 
2116 	idm_conn_rele(ic);
2117 
2118 	thread_exit();
2119 }
2120 
2121 /*
2122  * idm_so_tx
2123  *
2124  * This is the implementation of idm_transport_ops_t's it_tx_pdu entry
2125  * point.  By definition, it is supposed to be fast.  So, simply queue
2126  * the entry and return.  The real work is done by idm_i_so_tx() via
2127  * idm_sotx_thread().
2128  */
2129 
2130 static void
2131 idm_so_tx(idm_conn_t *ic, idm_pdu_t *pdu)
2132 {
2133 	idm_so_conn_t *so_conn = ic->ic_transport_private;
2134 
2135 	ASSERT(pdu->isp_ic == ic);
2136 	mutex_enter(&so_conn->ic_tx_mutex);
2137 
2138 	if (!so_conn->ic_tx_thread_running) {
2139 		mutex_exit(&so_conn->ic_tx_mutex);
2140 		idm_pdu_complete(pdu, IDM_STATUS_ABORTED);
2141 		return;
2142 	}
2143 
2144 	list_insert_tail(&so_conn->ic_tx_list, (void *)pdu);
2145 	cv_signal(&so_conn->ic_tx_cv);
2146 	mutex_exit(&so_conn->ic_tx_mutex);
2147 }
2148 
2149 static idm_status_t
2150 idm_i_so_tx(idm_pdu_t *pdu)
2151 {
2152 	idm_conn_t	*ic = pdu->isp_ic;
2153 	idm_status_t	status = IDM_STATUS_SUCCESS;
2154 	uint8_t		pad[ISCSI_PAD_WORD_LEN];
2155 	int		pad_len;
2156 	uint32_t	hdr_digest_crc;
2157 	uint32_t	data_digest_crc = 0;
2158 	int		total_len = 0;
2159 	int		iovlen = 0;
2160 	struct iovec	iov[6];
2161 	idm_so_conn_t	*so_conn;
2162 
2163 	so_conn = ic->ic_transport_private;
2164 
2165 	/* Setup BHS */
2166 	iov[iovlen].iov_base	= (caddr_t)pdu->isp_hdr;
2167 	iov[iovlen].iov_len	= pdu->isp_hdrlen;
2168 	total_len		+= iov[iovlen].iov_len;
2169 	iovlen++;
2170 
2171 	/* Setup header digest */
2172 	if (((pdu->isp_flags & IDM_PDU_LOGIN_TX) == 0) &&
2173 	    (ic->ic_conn_flags & IDM_CONN_HEADER_DIGEST)) {
2174 		hdr_digest_crc = idm_crc32c(pdu->isp_hdr, pdu->isp_hdrlen);
2175 
2176 		iov[iovlen].iov_base	= (caddr_t)&hdr_digest_crc;
2177 		iov[iovlen].iov_len	= sizeof (hdr_digest_crc);
2178 		total_len		+= iov[iovlen].iov_len;
2179 		iovlen++;
2180 	}
2181 
2182 	/* Setup the data */
2183 	if (pdu->isp_datalen) {
2184 		idm_task_t		*idt;
2185 		idm_buf_t		*idb;
2186 		iscsi_data_hdr_t	*ihp;
2187 		ihp = (iscsi_data_hdr_t *)pdu->isp_hdr;
2188 		/* Write of immediate data */
2189 		if (ic->ic_ffp &&
2190 		    (ihp->opcode == ISCSI_OP_SCSI_CMD ||
2191 		    ihp->opcode == ISCSI_OP_SCSI_DATA)) {
2192 			idt = idm_task_find(ic, ihp->itt, ihp->ttt);
2193 			if (idt) {
2194 				mutex_enter(&idt->idt_mutex);
2195 				idb = idm_buf_find(&idt->idt_outbufv, 0);
2196 				mutex_exit(&idt->idt_mutex);
2197 				/*
2198 				 * If the initiator call to idm_buf_alloc
2199 				 * failed then we can get to this point
2200 				 * without a bound buffer.  The associated
2201 				 * connection failure will clean things up
2202 				 * later.  It would be nice to come up with
2203 				 * a cleaner way to handle this.  In
2204 				 * particular it seems absurd to look up
2205 				 * the task and the buffer just to update
2206 				 * this counter.
2207 				 */
2208 				if (idb)
2209 					idb->idb_xfer_len += pdu->isp_datalen;
2210 				idm_task_rele(idt);
2211 			}
2212 		}
2213 
2214 		iov[iovlen].iov_base = (caddr_t)pdu->isp_data;
2215 		iov[iovlen].iov_len  = pdu->isp_datalen;
2216 		total_len += iov[iovlen].iov_len;
2217 		iovlen++;
2218 	}
2219 
2220 	/* Setup the data pad if necessary */
2221 	pad_len = ((ISCSI_PAD_WORD_LEN -
2222 	    (pdu->isp_datalen & (ISCSI_PAD_WORD_LEN - 1))) &
2223 	    (ISCSI_PAD_WORD_LEN - 1));
2224 
2225 	if (pad_len) {
2226 		bzero(pad, sizeof (pad));
2227 		iov[iovlen].iov_base = (void *)&pad;
2228 		iov[iovlen].iov_len  = pad_len;
2229 		total_len		+= iov[iovlen].iov_len;
2230 		iovlen++;
2231 	}
2232 
2233 	/*
2234 	 * Setup the data digest if enabled.  Data-digest is not sent
2235 	 * for login-phase PDUs.
2236 	 */
2237 	if ((ic->ic_conn_flags & IDM_CONN_DATA_DIGEST) &&
2238 	    ((pdu->isp_flags & IDM_PDU_LOGIN_TX) == 0) &&
2239 	    (pdu->isp_datalen || pad_len)) {
2240 		/*
2241 		 * RFC3720/10.2.3: A zero-length Data Segment also
2242 		 * implies a zero-length data digest.
2243 		 */
2244 		if (pdu->isp_datalen) {
2245 			data_digest_crc = idm_crc32c(pdu->isp_data,
2246 			    pdu->isp_datalen);
2247 		}
2248 		if (pad_len) {
2249 			data_digest_crc = idm_crc32c_continued(&pad,
2250 			    pad_len, data_digest_crc);
2251 		}
2252 
2253 		iov[iovlen].iov_base	= (caddr_t)&data_digest_crc;
2254 		iov[iovlen].iov_len	= sizeof (data_digest_crc);
2255 		total_len		+= iov[iovlen].iov_len;
2256 		iovlen++;
2257 	}
2258 
2259 	/* Transmit the PDU */
2260 	if (idm_iov_sosend(so_conn->ic_so, &iov[0], iovlen,
2261 	    total_len) != 0) {
2262 		/* Set error status */
2263 		IDM_CONN_LOG(CE_WARN,
2264 		    "idm_so_tx: failed to transmit the PDU, so: %p ic: %p "
2265 		    "data: %p", (void *) so_conn->ic_so, (void *) ic,
2266 		    (void *) pdu->isp_data);
2267 		status = IDM_STATUS_IO;
2268 	}
2269 
2270 	/*
2271 	 * Success does not mean that the PDU actually reached the
2272 	 * remote node since it could get dropped along the way.
2273 	 */
2274 	idm_pdu_complete(pdu, status);
2275 
2276 	return (status);
2277 }
2278 
2279 /*
2280  * The idm_so_buf_tx_to_ini() is used by the target iSCSI layer to transmit the
2281  * Data-In PDUs using sockets. Based on the negotiated MaxRecvDataSegmentLength,
2282  * the buffer is segmented into a sequence of Data-In PDUs, ordered by DataSN.
2283  * A target can invoke this function multiple times for a single read command
2284  * (identified by the same ITT) to split the input into several sequences.
2285  *
2286  * DataSN starts with 0 for the first data PDU of an input command and advances
2287  * by 1 for each subsequent data PDU. Each sequence will have its own F bit,
2288  * which is set to 1 for the last data PDU of a sequence.
2289  *
2290  * Scope for Prototype build:
2291  * The data PDUs within a sequence will be sent in order with the buffer offset
2292  * in increasing order. i.e. initiator and target must have negotiated the
2293  * "DataPDUInOrder" to "Yes". The order between sequences is not enforced.
2294  *
2295  * Caller holds idt->idt_mutex
2296  */
2297 static idm_status_t
2298 idm_so_buf_tx_to_ini(idm_task_t *idt, idm_buf_t *idb)
2299 {
2300 	idm_so_conn_t	*so_conn = idb->idb_ic->ic_transport_private;
2301 	idm_pdu_t	tmppdu;
2302 
2303 	ASSERT(mutex_owned(&idt->idt_mutex));
2304 
2305 	/*
2306 	 * Put the idm_buf_t on the tx queue.  It will be transmitted by
2307 	 * idm_sotx_thread.
2308 	 */
2309 	mutex_enter(&so_conn->ic_tx_mutex);
2310 
2311 	DTRACE_ISCSI_8(xfer__start, idm_conn_t *, idt->idt_ic,
2312 	    uintptr_t, idb->idb_buf, uint32_t, idb->idb_bufoffset,
2313 	    uint64_t, 0, uint32_t, 0, uint32_t, 0,
2314 	    uint32_t, idb->idb_xfer_len, int, XFER_BUF_TX_TO_INI);
2315 
2316 	if (!so_conn->ic_tx_thread_running) {
2317 		mutex_exit(&so_conn->ic_tx_mutex);
2318 		/*
2319 		 * Don't release idt->idt_mutex since we're supposed to hold
2320 		 * in when calling idm_buf_tx_to_ini_done
2321 		 */
2322 		DTRACE_ISCSI_8(xfer__done, idm_conn_t *, idt->idt_ic,
2323 		    uintptr_t, idb->idb_buf, uint32_t, idb->idb_bufoffset,
2324 		    uint64_t, 0, uint32_t, 0, uint32_t, 0,
2325 		    uint32_t, idb->idb_xfer_len,
2326 		    int, XFER_BUF_TX_TO_INI);
2327 		idm_buf_tx_to_ini_done(idt, idb, IDM_STATUS_ABORTED);
2328 		return (IDM_STATUS_FAIL);
2329 	}
2330 
2331 	/*
2332 	 * Build a template for the data PDU headers we will use so that
2333 	 * the SN values will stay consistent with other PDU's we are
2334 	 * transmitting like R2T and SCSI status.
2335 	 */
2336 	bzero(&idb->idb_data_hdr_tmpl, sizeof (iscsi_hdr_t));
2337 	tmppdu.isp_hdr = &idb->idb_data_hdr_tmpl;
2338 	(*idt->idt_ic->ic_conn_ops.icb_build_hdr)(idt, &tmppdu,
2339 	    ISCSI_OP_SCSI_DATA_RSP);
2340 	idb->idb_tx_thread = B_TRUE;
2341 	list_insert_tail(&so_conn->ic_tx_list, (void *)idb);
2342 	cv_signal(&so_conn->ic_tx_cv);
2343 	mutex_exit(&so_conn->ic_tx_mutex);
2344 	mutex_exit(&idt->idt_mutex);
2345 
2346 	/*
2347 	 * Returning success here indicates the transfer was successfully
2348 	 * dispatched -- it does not mean that the transfer completed
2349 	 * successfully.
2350 	 */
2351 	return (IDM_STATUS_SUCCESS);
2352 }
2353 
2354 /*
2355  * The idm_so_buf_rx_from_ini() is used by the target iSCSI layer to specify the
2356  * data blocks it is ready to receive from the initiator in response to a WRITE
2357  * SCSI command. The target iSCSI layer passes the information about the desired
2358  * data blocks to the initiator in one R2T PDU. The receiving buffer, the buffer
2359  * offset and datalen are passed via the 'idb' argument.
2360  *
2361  * Scope for Prototype build:
2362  * R2Ts are required for any Data-Out PDU, i.e. initiator and target must have
2363  * negotiated the "InitialR2T" to "Yes".
2364  *
2365  * Caller holds idt->idt_mutex
2366  */
2367 static idm_status_t
2368 idm_so_buf_rx_from_ini(idm_task_t *idt, idm_buf_t *idb)
2369 {
2370 	idm_pdu_t		*pdu;
2371 	iscsi_rtt_hdr_t		*rtt;
2372 
2373 	ASSERT(mutex_owned(&idt->idt_mutex));
2374 
2375 	DTRACE_ISCSI_8(xfer__start, idm_conn_t *, idt->idt_ic,
2376 	    uintptr_t, idb->idb_buf, uint32_t, idb->idb_bufoffset,
2377 	    uint64_t, 0, uint32_t, 0, uint32_t, 0,
2378 	    uint32_t, idb->idb_xfer_len, int, XFER_BUF_RX_FROM_INI);
2379 
2380 	pdu = kmem_cache_alloc(idm.idm_sotx_pdu_cache, KM_SLEEP);
2381 	pdu->isp_ic = idt->idt_ic;
2382 	bzero(pdu->isp_hdr, sizeof (iscsi_rtt_hdr_t));
2383 
2384 	/* iSCSI layer fills the TTT, ITT, StatSN, ExpCmdSN, MaxCmdSN */
2385 	(*idt->idt_ic->ic_conn_ops.icb_build_hdr)(idt, pdu, ISCSI_OP_RTT_RSP);
2386 
2387 	/* set the rttsn, rtt.flags, rtt.data_offset and rtt.data_length */
2388 	rtt = (iscsi_rtt_hdr_t *)(pdu->isp_hdr);
2389 
2390 	rtt->opcode		= ISCSI_OP_RTT_RSP;
2391 	rtt->flags		= ISCSI_FLAG_FINAL;
2392 	rtt->data_offset	= htonl(idb->idb_bufoffset);
2393 	rtt->data_length	= htonl(idb->idb_xfer_len);
2394 	rtt->rttsn		= htonl(idt->idt_exp_rttsn++);
2395 
2396 	/* Keep track of buffer offsets */
2397 	idb->idb_exp_offset	= idb->idb_bufoffset;
2398 	mutex_exit(&idt->idt_mutex);
2399 
2400 	/*
2401 	 * Transmit the PDU.
2402 	 */
2403 	idm_pdu_tx(pdu);
2404 
2405 	return (IDM_STATUS_SUCCESS);
2406 }
2407 
2408 static idm_status_t
2409 idm_so_buf_alloc(idm_buf_t *idb, uint64_t buflen)
2410 {
2411 	if ((buflen > IDM_SO_BUF_CACHE_LB) && (buflen <= IDM_SO_BUF_CACHE_UB)) {
2412 		idb->idb_buf = kmem_cache_alloc(idm.idm_so_128k_buf_cache,
2413 		    KM_NOSLEEP);
2414 		idb->idb_buf_private = idm.idm_so_128k_buf_cache;
2415 	} else {
2416 		idb->idb_buf = kmem_alloc(buflen, KM_NOSLEEP);
2417 		idb->idb_buf_private = NULL;
2418 	}
2419 
2420 	if (idb->idb_buf == NULL) {
2421 		IDM_CONN_LOG(CE_NOTE,
2422 		    "idm_so_buf_alloc: failed buffer allocation");
2423 		return (IDM_STATUS_FAIL);
2424 	}
2425 
2426 	return (IDM_STATUS_SUCCESS);
2427 }
2428 
2429 /* ARGSUSED */
2430 static idm_status_t
2431 idm_so_buf_setup(idm_buf_t *idb)
2432 {
2433 	/* Ensure bufalloc'd flag is unset */
2434 	idb->idb_bufalloc = B_FALSE;
2435 
2436 	return (IDM_STATUS_SUCCESS);
2437 }
2438 
2439 /* ARGSUSED */
2440 static void
2441 idm_so_buf_teardown(idm_buf_t *idb)
2442 {
2443 	/* nothing to do here */
2444 }
2445 
2446 static void
2447 idm_so_buf_free(idm_buf_t *idb)
2448 {
2449 	if (idb->idb_buf_private == NULL) {
2450 		kmem_free(idb->idb_buf, idb->idb_buflen);
2451 	} else {
2452 		kmem_cache_free(idb->idb_buf_private, idb->idb_buf);
2453 	}
2454 }
2455 
2456 static void
2457 idm_so_send_rtt_data(idm_conn_t *ic, idm_task_t *idt, idm_buf_t *idb,
2458     uint32_t offset, uint32_t length)
2459 {
2460 	idm_so_conn_t	*so_conn = ic->ic_transport_private;
2461 	idm_pdu_t	tmppdu;
2462 	idm_buf_t	*rtt_buf;
2463 
2464 	ASSERT(mutex_owned(&idt->idt_mutex));
2465 
2466 	/*
2467 	 * Allocate a buffer to represent the RTT transfer.  We could further
2468 	 * optimize this by allocating the buffers internally from an rtt
2469 	 * specific buffer cache since this is socket-specific code but for
2470 	 * now we will keep it simple.
2471 	 */
2472 	rtt_buf = idm_buf_alloc(ic, (uint8_t *)idb->idb_buf + offset, length);
2473 	if (rtt_buf == NULL) {
2474 		/*
2475 		 * If we're in FFP then the failure was likely a resource
2476 		 * allocation issue and we should close the connection by
2477 		 * sending a CE_TRANSPORT_FAIL event.
2478 		 *
2479 		 * If we're not in FFP then idm_buf_alloc will always
2480 		 * fail and the state is transitioning to "complete" anyway
2481 		 * so we won't bother to send an event.
2482 		 */
2483 		mutex_enter(&ic->ic_state_mutex);
2484 		if (ic->ic_ffp)
2485 			idm_conn_event_locked(ic, CE_TRANSPORT_FAIL,
2486 			    NULL, CT_NONE);
2487 		mutex_exit(&ic->ic_state_mutex);
2488 		return;
2489 	}
2490 
2491 	rtt_buf->idb_buf_cb = NULL;
2492 	rtt_buf->idb_cb_arg = NULL;
2493 	rtt_buf->idb_bufoffset = offset;
2494 	rtt_buf->idb_xfer_len = length;
2495 	rtt_buf->idb_ic = idt->idt_ic;
2496 	rtt_buf->idb_task_binding = idt;
2497 
2498 	/*
2499 	 * Put the idm_buf_t on the tx queue.  It will be transmitted by
2500 	 * idm_sotx_thread.
2501 	 */
2502 	mutex_enter(&so_conn->ic_tx_mutex);
2503 
2504 	if (!so_conn->ic_tx_thread_running) {
2505 		idm_buf_free(rtt_buf);
2506 		mutex_exit(&so_conn->ic_tx_mutex);
2507 		return;
2508 	}
2509 
2510 	/*
2511 	 * This new buffer represents an additional reference on the task
2512 	 */
2513 	idm_task_hold(idt);
2514 
2515 	/*
2516 	 * Build a template for the data PDU headers we will use so that
2517 	 * the SN values will stay consistent with other PDU's we are
2518 	 * transmitting like R2T and SCSI status.
2519 	 */
2520 	bzero(&rtt_buf->idb_data_hdr_tmpl, sizeof (iscsi_hdr_t));
2521 	tmppdu.isp_hdr = &rtt_buf->idb_data_hdr_tmpl;
2522 	(*idt->idt_ic->ic_conn_ops.icb_build_hdr)(idt, &tmppdu,
2523 	    ISCSI_OP_SCSI_DATA);
2524 	rtt_buf->idb_tx_thread = B_TRUE;
2525 	rtt_buf->idb_in_transport = B_TRUE;
2526 	list_insert_tail(&so_conn->ic_tx_list, (void *)rtt_buf);
2527 	cv_signal(&so_conn->ic_tx_cv);
2528 	mutex_exit(&so_conn->ic_tx_mutex);
2529 }
2530 
2531 static void
2532 idm_so_send_rtt_data_done(idm_task_t *idt, idm_buf_t *idb)
2533 {
2534 	/*
2535 	 * Don't worry about status -- we assume any error handling
2536 	 * is performed by the caller (idm_sotx_thread).
2537 	 */
2538 	idb->idb_in_transport = B_FALSE;
2539 	idm_task_rele(idt);
2540 	idm_buf_free(idb);
2541 }
2542 
2543 static idm_status_t
2544 idm_so_send_buf_region(idm_task_t *idt, idm_buf_t *idb,
2545     uint32_t buf_region_offset, uint32_t buf_region_length)
2546 {
2547 	idm_conn_t		*ic;
2548 	uint32_t		max_dataseglen;
2549 	size_t			remainder, chunk;
2550 	uint32_t		data_offset = buf_region_offset;
2551 	iscsi_data_hdr_t	*bhs;
2552 	idm_pdu_t		*pdu;
2553 	idm_status_t		tx_status;
2554 
2555 	ASSERT(mutex_owned(&idt->idt_mutex));
2556 
2557 	ic = idt->idt_ic;
2558 
2559 	max_dataseglen = ic->ic_conn_params.max_xmit_dataseglen;
2560 	remainder = buf_region_length;
2561 
2562 	while (remainder) {
2563 		if (idt->idt_state != TASK_ACTIVE) {
2564 			ASSERT((idt->idt_state != TASK_IDLE) &&
2565 			    (idt->idt_state != TASK_COMPLETE));
2566 			return (IDM_STATUS_ABORTED);
2567 		}
2568 
2569 		/* check to see if we need to chunk the data */
2570 		if (remainder > max_dataseglen) {
2571 			chunk = max_dataseglen;
2572 		} else {
2573 			chunk = remainder;
2574 		}
2575 
2576 		/* Data PDU headers will always be sizeof (iscsi_hdr_t) */
2577 		pdu = kmem_cache_alloc(idm.idm_sotx_pdu_cache, KM_SLEEP);
2578 		pdu->isp_ic = ic;
2579 
2580 		/*
2581 		 * We've already built a build a header template
2582 		 * to use during the transfer.  Use this template so that
2583 		 * the SN values stay consistent with any unrelated PDU's
2584 		 * being transmitted.
2585 		 */
2586 		bcopy(&idb->idb_data_hdr_tmpl, pdu->isp_hdr,
2587 		    sizeof (iscsi_hdr_t));
2588 
2589 		/*
2590 		 * Set DataSN, data offset, and flags in BHS
2591 		 * For the prototype build, A = 0, S = 0, U = 0
2592 		 */
2593 		bhs = (iscsi_data_hdr_t *)(pdu->isp_hdr);
2594 
2595 		bhs->datasn		= htonl(idt->idt_exp_datasn++);
2596 
2597 		hton24(bhs->dlength, chunk);
2598 		bhs->offset = htonl(idb->idb_bufoffset + data_offset);
2599 
2600 		if (chunk == remainder) {
2601 			bhs->flags = ISCSI_FLAG_FINAL; /* F bit set to 1 */
2602 		}
2603 
2604 		/* Instrument the data-send DTrace probe. */
2605 		if (IDM_PDU_OPCODE(pdu) == ISCSI_OP_SCSI_DATA_RSP) {
2606 			DTRACE_ISCSI_2(data__send,
2607 			    idm_conn_t *, idt->idt_ic,
2608 			    iscsi_data_rsp_hdr_t *,
2609 			    (iscsi_data_rsp_hdr_t *)pdu->isp_hdr);
2610 		}
2611 		/* setup data */
2612 		pdu->isp_data	=  (uint8_t *)idb->idb_buf + data_offset;
2613 		pdu->isp_datalen = (uint_t)chunk;
2614 		remainder	-= chunk;
2615 		data_offset	+= chunk;
2616 
2617 		/*
2618 		 * Now that we're done working with idt_exp_datasn,
2619 		 * idt->idt_state and idb->idb_bufoffset we can release
2620 		 * the task lock -- don't want to hold it across the
2621 		 * call to idm_i_so_tx since we could block.
2622 		 */
2623 		mutex_exit(&idt->idt_mutex);
2624 
2625 		/*
2626 		 * Transmit the PDU.  Call the internal routine directly
2627 		 * as there is already implicit ordering.
2628 		 */
2629 		if ((tx_status = idm_i_so_tx(pdu)) != IDM_STATUS_SUCCESS) {
2630 			mutex_enter(&idt->idt_mutex);
2631 			return (tx_status);
2632 		}
2633 
2634 		mutex_enter(&idt->idt_mutex);
2635 		idt->idt_tx_bytes += chunk;
2636 	}
2637 
2638 	return (IDM_STATUS_SUCCESS);
2639 }
2640 
2641 /*
2642  * TX PDU cache
2643  */
2644 /* ARGSUSED */
2645 int
2646 idm_sotx_pdu_constructor(void *hdl, void *arg, int flags)
2647 {
2648 	idm_pdu_t	*pdu = hdl;
2649 
2650 	bzero(pdu, sizeof (idm_pdu_t));
2651 	pdu->isp_hdr = (iscsi_hdr_t *)(pdu + 1); /* Ptr arithmetic */
2652 	pdu->isp_hdrlen = sizeof (iscsi_hdr_t);
2653 	pdu->isp_callback = idm_sotx_cache_pdu_cb;
2654 	pdu->isp_magic = IDM_PDU_MAGIC;
2655 	bzero(pdu->isp_hdr, sizeof (iscsi_hdr_t));
2656 
2657 	return (0);
2658 }
2659 
2660 /* ARGSUSED */
2661 void
2662 idm_sotx_cache_pdu_cb(idm_pdu_t *pdu, idm_status_t status)
2663 {
2664 	/* reset values between use */
2665 	pdu->isp_datalen = 0;
2666 
2667 	kmem_cache_free(idm.idm_sotx_pdu_cache, pdu);
2668 }
2669 
2670 /*
2671  * RX PDU cache
2672  */
2673 /* ARGSUSED */
2674 int
2675 idm_sorx_pdu_constructor(void *hdl, void *arg, int flags)
2676 {
2677 	idm_pdu_t	*pdu = hdl;
2678 
2679 	bzero(pdu, sizeof (idm_pdu_t));
2680 	pdu->isp_magic = IDM_PDU_MAGIC;
2681 	pdu->isp_hdr = (iscsi_hdr_t *)(pdu + 1); /* Ptr arithmetic */
2682 	pdu->isp_callback = idm_sorx_cache_pdu_cb;
2683 
2684 	return (0);
2685 }
2686 
2687 /* ARGSUSED */
2688 static void
2689 idm_sorx_cache_pdu_cb(idm_pdu_t *pdu, idm_status_t status)
2690 {
2691 	pdu->isp_iovlen = 0;
2692 	pdu->isp_sorx_buf = 0;
2693 	kmem_cache_free(idm.idm_sorx_pdu_cache, pdu);
2694 }
2695 
2696 static void
2697 idm_sorx_addl_pdu_cb(idm_pdu_t *pdu, idm_status_t status)
2698 {
2699 	/*
2700 	 * We had to modify our cached RX PDU with a longer header buffer
2701 	 * and/or a longer data buffer.  Release the new buffers and fix
2702 	 * the fields back to what we would expect for a cached RX PDU.
2703 	 */
2704 	if (pdu->isp_flags & IDM_PDU_ADDL_HDR) {
2705 		kmem_free(pdu->isp_hdr, pdu->isp_hdrlen);
2706 	}
2707 	if (pdu->isp_flags & IDM_PDU_ADDL_DATA) {
2708 		kmem_free(pdu->isp_data, pdu->isp_datalen);
2709 	}
2710 	pdu->isp_hdr = (iscsi_hdr_t *)(pdu + 1);
2711 	pdu->isp_hdrlen = sizeof (iscsi_hdr_t);
2712 	pdu->isp_data = NULL;
2713 	pdu->isp_datalen = 0;
2714 	pdu->isp_sorx_buf = 0;
2715 	pdu->isp_callback = idm_sorx_cache_pdu_cb;
2716 	idm_sorx_cache_pdu_cb(pdu, status);
2717 }
2718 
2719 /*
2720  * This thread is only active when I/O is queued for transmit
2721  * because the socket is busy.
2722  */
2723 void
2724 idm_sotx_thread(void *arg)
2725 {
2726 	idm_conn_t	*ic = arg;
2727 	idm_tx_obj_t	*object, *next;
2728 	idm_so_conn_t	*so_conn;
2729 	idm_status_t	status = IDM_STATUS_SUCCESS;
2730 
2731 	idm_conn_hold(ic);
2732 
2733 	mutex_enter(&ic->ic_mutex);
2734 	so_conn = ic->ic_transport_private;
2735 	so_conn->ic_tx_thread_running = B_TRUE;
2736 	so_conn->ic_tx_thread_did = so_conn->ic_tx_thread->t_did;
2737 	cv_signal(&ic->ic_cv);
2738 	mutex_exit(&ic->ic_mutex);
2739 
2740 	mutex_enter(&so_conn->ic_tx_mutex);
2741 
2742 	while (so_conn->ic_tx_thread_running) {
2743 		while (list_is_empty(&so_conn->ic_tx_list)) {
2744 			DTRACE_PROBE1(soconn__tx__sleep, idm_conn_t *, ic);
2745 			cv_wait(&so_conn->ic_tx_cv, &so_conn->ic_tx_mutex);
2746 			DTRACE_PROBE1(soconn__tx__wakeup, idm_conn_t *, ic);
2747 
2748 			if (!so_conn->ic_tx_thread_running) {
2749 				goto tx_bail;
2750 			}
2751 		}
2752 
2753 		object = (idm_tx_obj_t *)list_head(&so_conn->ic_tx_list);
2754 		list_remove(&so_conn->ic_tx_list, object);
2755 		mutex_exit(&so_conn->ic_tx_mutex);
2756 
2757 		switch (object->idm_tx_obj_magic) {
2758 		case IDM_PDU_MAGIC:
2759 			DTRACE_PROBE2(soconn__tx__pdu, idm_conn_t *, ic,
2760 			    idm_pdu_t *, (idm_pdu_t *)object);
2761 
2762 			status = idm_i_so_tx((idm_pdu_t *)object);
2763 			break;
2764 
2765 		case IDM_BUF_MAGIC: {
2766 			idm_buf_t *idb = (idm_buf_t *)object;
2767 			idm_task_t *idt = idb->idb_task_binding;
2768 
2769 			DTRACE_PROBE2(soconn__tx__buf, idm_conn_t *, ic,
2770 			    idm_buf_t *, idb);
2771 
2772 			mutex_enter(&idt->idt_mutex);
2773 			status = idm_so_send_buf_region(idt,
2774 			    idb, 0, idb->idb_xfer_len);
2775 
2776 			/*
2777 			 * TX thread owns the buffer so we expect it to
2778 			 * be "in transport"
2779 			 */
2780 			ASSERT(idb->idb_in_transport);
2781 			if (IDM_CONN_ISTGT(ic)) {
2782 				/*
2783 				 * idm_buf_tx_to_ini_done releases
2784 				 * idt->idt_mutex
2785 				 */
2786 				DTRACE_ISCSI_8(xfer__done,
2787 				    idm_conn_t *, idt->idt_ic,
2788 				    uintptr_t, idb->idb_buf,
2789 				    uint32_t, idb->idb_bufoffset,
2790 				    uint64_t, 0, uint32_t, 0, uint32_t, 0,
2791 				    uint32_t, idb->idb_xfer_len,
2792 				    int, XFER_BUF_TX_TO_INI);
2793 				idm_buf_tx_to_ini_done(idt, idb, status);
2794 			} else {
2795 				idm_so_send_rtt_data_done(idt, idb);
2796 				mutex_exit(&idt->idt_mutex);
2797 			}
2798 			break;
2799 		}
2800 
2801 		default:
2802 			IDM_CONN_LOG(CE_WARN, "idm_sotx_thread: Unknown magic "
2803 			    "(0x%08x)", object->idm_tx_obj_magic);
2804 			status = IDM_STATUS_FAIL;
2805 		}
2806 
2807 		mutex_enter(&so_conn->ic_tx_mutex);
2808 
2809 		if (status != IDM_STATUS_SUCCESS) {
2810 			so_conn->ic_tx_thread_running = B_FALSE;
2811 			idm_conn_event(ic, CE_TRANSPORT_FAIL, status);
2812 		}
2813 	}
2814 
2815 	/*
2816 	 * Before we leave, we need to abort every item remaining in the
2817 	 * TX list.
2818 	 */
2819 
2820 tx_bail:
2821 	object = (idm_tx_obj_t *)list_head(&so_conn->ic_tx_list);
2822 
2823 	while (object != NULL) {
2824 		next = list_next(&so_conn->ic_tx_list, object);
2825 
2826 		list_remove(&so_conn->ic_tx_list, object);
2827 		switch (object->idm_tx_obj_magic) {
2828 		case IDM_PDU_MAGIC:
2829 			idm_pdu_complete((idm_pdu_t *)object,
2830 			    IDM_STATUS_ABORTED);
2831 			break;
2832 
2833 		case IDM_BUF_MAGIC: {
2834 			idm_buf_t *idb = (idm_buf_t *)object;
2835 			idm_task_t *idt = idb->idb_task_binding;
2836 			mutex_exit(&so_conn->ic_tx_mutex);
2837 			mutex_enter(&idt->idt_mutex);
2838 			/*
2839 			 * TX thread owns the buffer so we expect it to
2840 			 * be "in transport"
2841 			 */
2842 			ASSERT(idb->idb_in_transport);
2843 			if (IDM_CONN_ISTGT(ic)) {
2844 				/*
2845 				 * idm_buf_tx_to_ini_done releases
2846 				 * idt->idt_mutex
2847 				 */
2848 				DTRACE_ISCSI_8(xfer__done,
2849 				    idm_conn_t *, idt->idt_ic,
2850 				    uintptr_t, idb->idb_buf,
2851 				    uint32_t, idb->idb_bufoffset,
2852 				    uint64_t, 0, uint32_t, 0, uint32_t, 0,
2853 				    uint32_t, idb->idb_xfer_len,
2854 				    int, XFER_BUF_TX_TO_INI);
2855 				idm_buf_tx_to_ini_done(idt, idb,
2856 				    IDM_STATUS_ABORTED);
2857 			} else {
2858 				idm_so_send_rtt_data_done(idt, idb);
2859 				mutex_exit(&idt->idt_mutex);
2860 			}
2861 			mutex_enter(&so_conn->ic_tx_mutex);
2862 			break;
2863 		}
2864 		default:
2865 			IDM_CONN_LOG(CE_WARN,
2866 			    "idm_sotx_thread: Unexpected magic "
2867 			    "(0x%08x)", object->idm_tx_obj_magic);
2868 		}
2869 
2870 		object = next;
2871 	}
2872 
2873 	mutex_exit(&so_conn->ic_tx_mutex);
2874 	idm_conn_rele(ic);
2875 	thread_exit();
2876 	/*NOTREACHED*/
2877 }
2878 
2879 static void
2880 idm_so_socket_set_nonblock(struct sonode *node)
2881 {
2882 	(void) VOP_SETFL(node->so_vnode, node->so_flag,
2883 	    (node->so_state | FNONBLOCK), CRED(), NULL);
2884 }
2885 
2886 static void
2887 idm_so_socket_set_block(struct sonode *node)
2888 {
2889 	(void) VOP_SETFL(node->so_vnode, node->so_flag,
2890 	    (node->so_state & (~FNONBLOCK)), CRED(), NULL);
2891 }
2892