1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  *
25  * iSCSI Software Initiator
26  */
27 
28 #include <sys/socket.h>		/* networking stuff */
29 #include <sys/strsubr.h>	/* networking stuff */
30 #include <netinet/tcp.h>	/* TCP_NODELAY */
31 #include <sys/socketvar.h>	/* _ALLOC_SLEEP */
32 #include <sys/pathname.h>	/* declares:	lookupname */
33 #include <sys/fs/snode.h>	/* defines:	VTOS */
34 #include <sys/fs/dv_node.h>	/* declares:	devfs_lookupname */
35 #include <sys/bootconf.h>
36 #include <sys/bootprops.h>
37 #include <netinet/in.h>
38 #include "iscsi.h"
39 #include <sys/ksocket.h>
40 
41 /*
42  * This is a high level description of the default
43  * iscsi_net transport interfaces.  These are used
44  * to create, send, recv, and close standard TCP/IP
45  * messages.  In addition there are extensions to send
46  * and recv iSCSI PDU data.
47  *
48  * NOTE: It would be very easy for an iSCSI HBA vendor
49  * to register their own functions over the top of
50  * the default interfaces.  This would allow an iSCSI
51  * HBA to use the same iscsiadm management interfaces
52  * and the Solaris iSCSI session / connection management.
53  * The current problem with this approach is we only
54  * allow one one registered transport table.  This
55  * would be pretty easy to correct although will require
56  * additional CLI changes to manage multiple interfaces.
57  * If a vendor can present compelling performance data,
58  * then Sun will be willing to enhance this support for
59  * multiple interface tables and better CLI management.
60  *
61  * The following listing describes the iscsi_net
62  * entry points:
63  *
64  *   socket	    - Creates TCP/IP socket connection.  In the
65  *		       default implementation creates a sonode
66  *		       via the sockfs kernel layer.
67  *   bind	      - Performs standard TCP/IP BSD operation.  In
68  *		       the default implementation this only act
69  *		       as a soft binding based on the IP and routing
70  *			 tables.  It would be preferred if this was
71  *			 a hard binding but that is currently not
72  *			 possible with Solaris's networking stack.
73  *   connect	   - Performs standard TCP/IP BSD operation.  This
74  *		       establishes the TCP SYN to the peer IP address.
75  *   listen	    - Performs standard TCP/IP BSD operation.  This
76  *		       listens for incoming peer connections.
77  *   accept	    - Performs standard TCP/IP BSD operation.  This
78  *		       accepts incoming peer connections.
79  *   shutdown	  - This disconnects the TCP/IP connection while
80  *		       maintaining the resources.
81  *   close	     - This disconnects the TCP/IP connection and
82  *		       releases the resources.
83  *
84  *   getsockopt	- Gets socket option for specified socket.
85  *   setsockopt	- Sets socket option for specified socket.
86  *
87  *      The current socket options that are used by the initiator
88  *      are listed below.
89  *
90  *	TCP_CONN_NOTIFY_THRESHOLD
91  *	TCP_CONN_ABORT_THRESHOLD
92  *	TCP_ABORT_THRESHOLD
93  *	TCP_NODELAY
94  *	SO_RCVBUF
95  *	SO_SNDBUF
96  *
97  *   iscsi_net_poll    - Poll socket interface for a specified amount
98  *		       of data.  If data not received in timeout
99  *		       period fail request.
100  *   iscsi_net_sendmsg - Send message on socket connection
101  *   iscsi_net_recvmsg - Receive message on socket connection
102  *
103  *   iscsi_net_sendpdu - Send iSCSI PDU on socket connection
104  *   iscsi_net_recvhdr - Receive iSCSI header on socket connection
105  *   iscsi_net_recvdata - Receive iSCSI data on socket connection
106  *
107  *     The iSCSI interfaces have the below optional flags.
108  *
109  *       ISCSI_NET_HEADER_DIGEST - The interface should either
110  *				generate or validate the iSCSI
111  *				header digest CRC.
112  *       ISCSI_NET_DATA_DIGESt   - The interface should either
113  *			      generate or validate the iSCSI
114  *			      data digest CRC.
115  */
116 
117 
118 /* global */
119 iscsi_network_t *iscsi_net;
120 
121 /* consts */
122 
123 /*
124  * This table is used for quick validation of incoming
125  * iSCSI PDU opcodes.  A value of '0' in the table below
126  * indicated that the opcode is invalid for an iSCSI
127  * initiator to receive.
128  */
129 const int   is_incoming_opcode_invalid[256] = {
130 	/*		0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F */
131 	/* 0x0X */	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
132 	/* 0x1X */	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
133 	/* 0x2X */	0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1,
134 	/* 0x3X */	1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
135 	/* 0x4X */	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
136 	/* 0x5X */	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
137 	/* 0x6X */	0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1,
138 	/* 0x7X */	1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
139 	/* 0x8X */	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
140 	/* 0x9X */	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
141 	/* 0xAX */	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
142 	/* 0xBX */	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
143 	/* 0xCX */	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
144 	/* 0xDX */	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
145 	/* 0xEX */	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
146 	/* 0xFX */	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
147 };
148 
149 #define	IP_4_BITS	32
150 #define	IP_6_BITS	128
151 
152 extern int modrootloaded;
153 extern ib_boot_prop_t   *iscsiboot_prop;
154 
155 /* prototypes */
156 static void * iscsi_net_socket(int domain, int type, int protocol);
157 static int iscsi_net_bind(void *socket, struct sockaddr *
158     name, int name_len, int backlog, int flags);
159 static int iscsi_net_connect(void *socket, struct sockaddr *
160     name, int name_len, int fflag, int flags);
161 static int iscsi_net_listen(void *socket, int backlog);
162 static void * iscsi_net_accept(void *socket, struct sockaddr *addr,
163     int *addr_len);
164 static int iscsi_net_getsockname(void *socket, struct sockaddr *, socklen_t *);
165 static int iscsi_net_getsockopt(void *socket, int level,
166     int option_name, void *option_val, int *option_len, int flags);
167 static int iscsi_net_setsockopt(void *socket, int level,
168     int option_name, void *option_val, int option_len);
169 static int iscsi_net_shutdown(void *socket, int how);
170 static void iscsi_net_close(void *socket);
171 
172 static size_t iscsi_net_poll(void *socket, clock_t timeout);
173 static size_t iscsi_net_sendmsg(void *socket, struct msghdr *msg);
174 static size_t iscsi_net_recvmsg(void *socket,
175     struct msghdr *msg, int timeout);
176 
177 static iscsi_status_t iscsi_net_sendpdu(void *socket, iscsi_hdr_t *ihp,
178     char *data, int flags);
179 static iscsi_status_t iscsi_net_recvdata(void *socket, iscsi_hdr_t *ihp,
180     char *data, int max_data_length, int timeout, int flags);
181 static iscsi_status_t iscsi_net_recvhdr(void *socket, iscsi_hdr_t *ihp,
182     int header_length, int timeout, int flags);
183 
184 static void iscsi_net_set_connect_options(void *socket);
185 
186 /*
187  * +--------------------------------------------------------------------+
188  * | network interface registration functions			   |
189  * +--------------------------------------------------------------------+
190  */
191 
192 /*
193  * iscsi_net_init - initialize network interface
194  */
195 void
196 iscsi_net_init()
197 {
198 	iscsi_net = kmem_zalloc(sizeof (*iscsi_net), KM_SLEEP);
199 
200 	iscsi_net->socket	= iscsi_net_socket;
201 
202 	iscsi_net->bind		= iscsi_net_bind;
203 	iscsi_net->connect	= iscsi_net_connect;
204 	iscsi_net->listen	= iscsi_net_listen;
205 	iscsi_net->accept	= iscsi_net_accept;
206 	iscsi_net->shutdown	= iscsi_net_shutdown;
207 	iscsi_net->close	= iscsi_net_close;
208 
209 	iscsi_net->getsockname	= iscsi_net_getsockname;
210 	iscsi_net->getsockopt	= iscsi_net_getsockopt;
211 	iscsi_net->setsockopt	= iscsi_net_setsockopt;
212 
213 	iscsi_net->poll		= iscsi_net_poll;
214 	iscsi_net->sendmsg	= iscsi_net_sendmsg;
215 	iscsi_net->recvmsg	= iscsi_net_recvmsg;
216 
217 	iscsi_net->sendpdu	= iscsi_net_sendpdu;
218 	iscsi_net->recvhdr	= iscsi_net_recvhdr;
219 	iscsi_net->recvdata	= iscsi_net_recvdata;
220 }
221 
222 /*
223  * iscsi_net_fini - release network interface
224  */
225 void
226 iscsi_net_fini()
227 {
228 	kmem_free(iscsi_net, sizeof (*iscsi_net));
229 	iscsi_net = NULL;
230 }
231 
232 /*
233  * iscsi_net_set_connect_options -
234  */
235 static void
236 iscsi_net_set_connect_options(void *socket)
237 {
238 	int ret = 0;
239 	ret += iscsi_net->setsockopt(socket, IPPROTO_TCP,
240 	    TCP_CONN_NOTIFY_THRESHOLD, (char *)&iscsi_net->tweaks.
241 	    conn_notify_threshold, sizeof (int));
242 	ret += iscsi_net->setsockopt(socket, IPPROTO_TCP,
243 	    TCP_CONN_ABORT_THRESHOLD, (char *)&iscsi_net->tweaks.
244 	    conn_abort_threshold, sizeof (int));
245 	ret += iscsi_net->setsockopt(socket, IPPROTO_TCP, TCP_ABORT_THRESHOLD,
246 	    (char *)&iscsi_net->tweaks.abort_threshold, sizeof (int));
247 	ret += iscsi_net->setsockopt(socket, IPPROTO_TCP, TCP_NODELAY,
248 	    (char *)&iscsi_net->tweaks.nodelay, sizeof (int));
249 	ret += iscsi_net->setsockopt(socket, SOL_SOCKET, SO_RCVBUF,
250 	    (char *)&iscsi_net->tweaks.rcvbuf, sizeof (int));
251 	ret += iscsi_net->setsockopt(socket, SOL_SOCKET, SO_SNDBUF,
252 	    (char *)&iscsi_net->tweaks.sndbuf, sizeof (int));
253 	if (ret != 0) {
254 		cmn_err(CE_NOTE, "iscsi connection failed to set socket option"
255 		    "TCP_CONN_NOTIFY_THRESHOLD, TCP_CONN_ABORT_THRESHOLD,"
256 		    "TCP_ABORT_THRESHOLD, TCP_NODELAY, SO_RCVBUF or SO_SNDBUF");
257 	}
258 }
259 
260 /*
261  * +--------------------------------------------------------------------+
262  * | register network interfaces					|
263  * +--------------------------------------------------------------------+
264  */
265 
266 /*
267  * iscsi_net_socket - create socket
268  */
269 static void *
270 iscsi_net_socket(int domain, int type, int protocol)
271 {
272 	ksocket_t	socket;
273 	int 		err	= 0;
274 
275 	err = ksocket_socket(&socket, domain, type, protocol, KSOCKET_SLEEP,
276 	    CRED());
277 	if (!err)
278 		return ((void *)socket);
279 	else
280 		return (NULL);
281 
282 }
283 
284 /*
285  * iscsi_net_bind - bind socket to a specific sockaddr
286  */
287 /* ARGSUSED */
288 static int
289 iscsi_net_bind(void *socket, struct sockaddr *name, int name_len,
290 	int backlog, int flags)
291 {
292 	ksocket_t ks = (ksocket_t)socket;
293 	int error;
294 	error = ksocket_bind(ks, name, name_len, CRED());
295 	if (error == 0 && backlog != 0)
296 		error = ksocket_listen(ks, backlog, CRED());
297 
298 	return (error);
299 }
300 
301 /*
302  * iscsi_net_connect - connect socket to peer sockaddr
303  */
304 /* ARGSUSED */
305 static int
306 iscsi_net_connect(void *socket, struct sockaddr *name, int name_len,
307 	int fflag, int flags)
308 {
309 	ksocket_t ks = (ksocket_t)socket;
310 	int rval;
311 
312 	iscsi_net_set_connect_options(socket);
313 	rval = ksocket_connect(ks, name, name_len, CRED());
314 
315 	return (rval);
316 }
317 
318 /*
319  * iscsi_net_listen - listen to socket for peer connections
320  */
321 static int
322 iscsi_net_listen(void *socket, int backlog)
323 {
324 	ksocket_t ks = (ksocket_t)socket;
325 	return (ksocket_listen(ks, backlog, CRED()));
326 }
327 
328 /*
329  * iscsi_net_accept - accept peer socket connections
330  */
331 static void *
332 iscsi_net_accept(void *socket, struct sockaddr *addr, int *addr_len)
333 {
334 	ksocket_t listen_ks;
335 	ksocket_t ks = (ksocket_t)socket;
336 
337 	ksocket_accept(ks, addr, (socklen_t *)addr_len, &listen_ks, CRED());
338 
339 	return ((void *)listen_ks);
340 }
341 
342 /*
343  * iscsi_net_getsockname -
344  */
345 static int
346 iscsi_net_getsockname(void *socket, struct sockaddr *addr, socklen_t *addrlen)
347 {
348 	ksocket_t ks = (ksocket_t)socket;
349 	return (ksocket_getsockname(ks, addr, addrlen, CRED()));
350 }
351 
352 /*
353  * iscsi_net_getsockopt - get value of option on socket
354  */
355 /* ARGSUSED */
356 static int
357 iscsi_net_getsockopt(void *socket, int level, int option_name,
358 	void *option_val, int *option_len, int flags)
359 {
360 	ksocket_t ks = (ksocket_t)socket;
361 	return (ksocket_getsockopt(ks, level, option_name, option_val,
362 	    option_len, CRED()));
363 }
364 
365 /*
366  * iscsi_net_setsockopt - set value for option on socket
367  */
368 static int
369 iscsi_net_setsockopt(void *socket, int level, int option_name,
370 	void *option_val, int option_len)
371 {
372 	ksocket_t ks = (ksocket_t)socket;
373 	return (ksocket_setsockopt(ks, level, option_name, option_val,
374 	    option_len, CRED()));
375 }
376 
377 /*
378  * iscsi_net_shutdown - shutdown socket connection
379  */
380 static int
381 iscsi_net_shutdown(void *socket, int how)
382 {
383 	ksocket_t ks = (ksocket_t)socket;
384 	return (ksocket_shutdown(ks, how, CRED()));
385 }
386 
387 /*
388  * iscsi_net_close - shutdown socket connection and release resources
389  */
390 static void
391 iscsi_net_close(void *socket)
392 {
393 	ksocket_t ks = (ksocket_t)socket;
394 	(void) ksocket_close(ks, CRED());
395 }
396 
397 /*
398  * iscsi_net_poll - poll socket for data
399  */
400 /* ARGSUSED */
401 static size_t
402 iscsi_net_poll(void *socket, clock_t timeout)
403 {
404 	int pflag;
405 	char msg[64];
406 	size_t recv = 0;
407 	ksocket_t ks = (ksocket_t)socket;
408 
409 	if (get_udatamodel() == DATAMODEL_NONE ||
410 	    get_udatamodel() == DATAMODEL_NATIVE) {
411 		struct timeval tl;
412 
413 		/* timeout is millisecond */
414 		tl.tv_sec = timeout / 1000;
415 		tl.tv_usec = (timeout % 1000) * 1000;
416 		if (ksocket_setsockopt(ks, SOL_SOCKET, SO_RCVTIMEO, &tl,
417 		    sizeof (struct timeval), CRED()))
418 			return (0);
419 	} else {
420 		struct timeval32 tl;
421 
422 		/* timeout is millisecond */
423 		tl.tv_sec = timeout / 1000;
424 		tl.tv_usec = (timeout % 1000) * 1000;
425 		if (ksocket_setsockopt(ks, SOL_SOCKET, SO_RCVTIMEO, &tl,
426 		    sizeof (struct timeval32), CRED()))
427 			return (0);
428 	}
429 
430 	pflag = MSG_ANY;
431 	bzero(msg, sizeof (msg));
432 	return (ksocket_recv(ks, msg, sizeof (msg), pflag, &recv, CRED()));
433 }
434 
435 /*
436  * iscsi_net_sendmsg - send message on socket
437  */
438 /* ARGSUSED */
439 static size_t
440 iscsi_net_sendmsg(void *socket, struct msghdr *msg)
441 {
442 	ksocket_t ks = (ksocket_t)socket;
443 	size_t sent = 0;
444 	int flag = msg->msg_flags;
445 	(void) ksocket_sendmsg(ks, msg, flag, &sent, CRED());
446 	DTRACE_PROBE1(ksocket_sendmsg, size_t, sent);
447 	return (sent);
448 }
449 
450 /*
451  * iscsi_net_recvmsg - receive message on socket
452  */
453 /* ARGSUSED */
454 static size_t
455 iscsi_net_recvmsg(void *socket, struct msghdr *msg, int timeout)
456 {
457 	int		prflag	    = msg->msg_flags;
458 	ksocket_t	ks	    = (ksocket_t)socket;
459 	size_t 		recv	    = 0;
460 
461 	/* Set recv timeout */
462 	if (get_udatamodel() == DATAMODEL_NONE ||
463 	    get_udatamodel() == DATAMODEL_NATIVE) {
464 		struct timeval tl;
465 
466 		tl.tv_sec = timeout;
467 		tl.tv_usec = 0;
468 		if (ksocket_setsockopt(ks, SOL_SOCKET, SO_RCVTIMEO, &tl,
469 		    sizeof (struct timeval), CRED()))
470 			return (0);
471 	} else {
472 		struct timeval32 tl;
473 
474 		tl.tv_sec = timeout;
475 		tl.tv_usec = 0;
476 		if (ksocket_setsockopt(ks, SOL_SOCKET, SO_RCVTIMEO, &tl,
477 		    sizeof (struct timeval32), CRED()))
478 			return (0);
479 	}
480 	/*
481 	 * Receive the requested data.  Block until all
482 	 * data is received or timeout.
483 	 */
484 	ksocket_hold(ks);
485 	ksocket_recvmsg(ks, msg, prflag, &recv, CRED());
486 	ksocket_rele(ks);
487 	DTRACE_PROBE1(ksocket_recvmsg, size_t, recv);
488 	return (recv);
489 }
490 
491 /*
492  * iscsi_net_sendpdu - send iscsi pdu on socket
493  */
494 static iscsi_status_t
495 iscsi_net_sendpdu(void *socket, iscsi_hdr_t *ihp, char *data, int flags)
496 {
497 	uint32_t	pad;
498 	uint32_t	crc_hdr;
499 	uint32_t	crc_data;
500 	uint32_t	pad_len;
501 	uint32_t	data_len;
502 	iovec_t		iovec[ISCSI_MAX_IOVEC];
503 	int		iovlen = 0;
504 	size_t		total_len = 0;
505 	size_t		send_len;
506 	struct msghdr	msg;
507 
508 	ASSERT(socket != NULL);
509 	ASSERT(ihp != NULL);
510 
511 	/*
512 	 * Let's send the header first.  'hlength' is in 32-bit
513 	 * quantities, so we need to multiply by four to get bytes
514 	 */
515 	ASSERT(iovlen < ISCSI_MAX_IOVEC);
516 	iovec[iovlen].iov_base = (void *)ihp;
517 	iovec[iovlen].iov_len  = sizeof (*ihp) + ihp->hlength * 4;
518 	total_len += sizeof (*ihp) + ihp->hlength * 4;
519 	iovlen++;
520 
521 	/* Let's transmit the header digest if we have to. */
522 	if ((flags & ISCSI_NET_HEADER_DIGEST) != 0) {
523 		ASSERT(iovlen < ISCSI_MAX_IOVEC);
524 		/*
525 		 * Converting the calculated CRC via htonl is not
526 		 * necessary because iscsi_crc32c calculates
527 		 * the value as it expects to be written
528 		 */
529 		crc_hdr = iscsi_crc32c((char *)ihp,
530 		    sizeof (iscsi_hdr_t) + ihp->hlength * 4);
531 
532 		iovec[iovlen].iov_base = (void *)&crc_hdr;
533 		iovec[iovlen].iov_len  = sizeof (crc_hdr);
534 		total_len += sizeof (crc_hdr);
535 		iovlen++;
536 	}
537 
538 	/* Let's transmit the data if any. */
539 	data_len = ntoh24(ihp->dlength);
540 
541 	if (data_len) {
542 
543 		ASSERT(iovlen < ISCSI_MAX_IOVEC);
544 		iovec[iovlen].iov_base = (void *)data;
545 		iovec[iovlen].iov_len  = data_len;
546 		total_len += data_len;
547 		iovlen++;
548 
549 		pad_len = ((ISCSI_PAD_WORD_LEN -
550 		    (data_len & (ISCSI_PAD_WORD_LEN - 1))) &
551 		    (ISCSI_PAD_WORD_LEN - 1));
552 
553 		/* Let's transmit the data pad if any. */
554 		if (pad_len) {
555 
556 			ASSERT(iovlen < ISCSI_MAX_IOVEC);
557 			pad = 0;
558 			iovec[iovlen].iov_base = (void *)&pad;
559 			iovec[iovlen].iov_len  = pad_len;
560 			total_len += pad_len;
561 			iovlen++;
562 		}
563 
564 		/* Let's transmit the data digest if we have to. */
565 		if ((flags & ISCSI_NET_DATA_DIGEST) != 0) {
566 
567 			ASSERT(iovlen < ISCSI_MAX_IOVEC);
568 			/*
569 			 * Converting the calculated CRC via htonl is not
570 			 * necessary because iscsi_crc32c calculates the
571 			 * value as it expects to be written
572 			 */
573 			crc_data = iscsi_crc32c(data, data_len);
574 			crc_data = iscsi_crc32c_continued(
575 			    (char *)&pad, pad_len, crc_data);
576 
577 			iovec[iovlen].iov_base = (void *)&crc_data;
578 			iovec[iovlen].iov_len  = sizeof (crc_data);
579 			total_len += sizeof (crc_data);
580 			iovlen++;
581 		}
582 	}
583 
584 	DTRACE_PROBE4(tx, void *, socket, iovec_t *, &iovec[0],
585 	    int, iovlen, int, total_len);
586 
587 	/* Initialization of the message header. */
588 	bzero(&msg, sizeof (msg));
589 	msg.msg_iov	= &iovec[0];
590 	msg.msg_flags	= MSG_WAITALL;
591 	msg.msg_iovlen	= iovlen;
592 
593 	send_len = iscsi_net->sendmsg(socket, &msg);
594 	DTRACE_PROBE2(sendmsg, size_t, total_len, size_t, send_len);
595 	if (total_len != send_len) {
596 		return (ISCSI_STATUS_TCP_TX_ERROR);
597 	}
598 	return (ISCSI_STATUS_SUCCESS);
599 }
600 
601 /*
602  * iscsi_net_recvhdr - receive iscsi hdr on socket
603  */
604 static iscsi_status_t
605 iscsi_net_recvhdr(void *socket, iscsi_hdr_t *ihp, int header_length,
606     int timeout, int flags)
607 {
608 	iovec_t		    iov[ISCSI_MAX_IOVEC];
609 	int		    iovlen		= 1;
610 	int		    total_len		= 0;
611 	uint32_t	    crc_actual		= 0;
612 	uint32_t	    crc_calculated	= 0;
613 	char		    *adhdr		= NULL;
614 	int		    adhdr_length	= 0;
615 	struct msghdr	    msg;
616 	size_t		    recv_len;
617 
618 	ASSERT(socket != NULL);
619 	ASSERT(ihp != NULL);
620 
621 	if (header_length < sizeof (iscsi_hdr_t)) {
622 		ASSERT(FALSE);
623 		return (ISCSI_STATUS_INTERNAL_ERROR);
624 	}
625 
626 	/*
627 	 * Receive primary header
628 	 */
629 	iov[0].iov_base = (char *)ihp;
630 	iov[0].iov_len = sizeof (iscsi_hdr_t);
631 
632 	bzero(&msg, sizeof (msg));
633 	msg.msg_iov	= iov;
634 	msg.msg_flags	= MSG_WAITALL;
635 	msg.msg_iovlen	= iovlen;
636 
637 	recv_len = iscsi_net->recvmsg(socket, &msg, timeout);
638 	if (recv_len != sizeof (iscsi_hdr_t)) {
639 		return (ISCSI_STATUS_TCP_RX_ERROR);
640 	}
641 
642 	DTRACE_PROBE2(rx_hdr, void *, socket, iovec_t *iop, &iov[0]);
643 
644 	/* verify incoming opcode is a valid operation */
645 	if (is_incoming_opcode_invalid[ihp->opcode]) {
646 		cmn_err(CE_WARN, "iscsi connection(%p) protocol error - "
647 		    "received an unsupported opcode:0x%02x",
648 		    socket, ihp->opcode);
649 		return (ISCSI_STATUS_PROTOCOL_ERROR);
650 	}
651 
652 	/*
653 	 * Setup receipt of additional header
654 	 */
655 	if (ihp->hlength > 0) {
656 		adhdr = ((char *)ihp) + sizeof (iscsi_hdr_t);
657 		adhdr_length = header_length - sizeof (iscsi_hdr_t);
658 		/* make sure enough space is available for adhdr */
659 		if (ihp->hlength > adhdr_length) {
660 			ASSERT(FALSE);
661 			return (ISCSI_STATUS_INTERNAL_ERROR);
662 		}
663 
664 		ASSERT(iovlen < ISCSI_MAX_IOVEC);
665 		iov[iovlen].iov_base = adhdr;
666 		iov[iovlen].iov_len = adhdr_length;
667 		total_len += adhdr_length;
668 		iovlen++;
669 	}
670 
671 	/*
672 	 * Setup receipt of header digest if enabled and connection
673 	 * is in full feature mode.
674 	 */
675 	if ((flags & ISCSI_NET_HEADER_DIGEST) != 0) {
676 		ASSERT(iovlen < ISCSI_MAX_IOVEC);
677 		iov[iovlen].iov_base = (char *)&crc_actual;
678 		iov[iovlen].iov_len = sizeof (uint32_t);
679 		total_len += sizeof (uint32_t);
680 		iovlen++;
681 	}
682 
683 	/*
684 	 * Read additional header and/or header digest if pieces
685 	 * are available
686 	 */
687 	if (iovlen > 1) {
688 
689 		bzero(&msg, sizeof (msg));
690 		msg.msg_iov	= iov;
691 		msg.msg_flags	= MSG_WAITALL;
692 		msg.msg_iovlen	= iovlen;
693 
694 		recv_len = iscsi_net->recvmsg(socket, &msg, timeout);
695 		if (recv_len != total_len) {
696 			return (ISCSI_STATUS_TCP_RX_ERROR);
697 		}
698 
699 		DTRACE_PROBE4(rx_adhdr_digest, void *, socket,
700 		    iovec_t *iop, &iov[0], int, iovlen, int, total_len);
701 
702 		/*
703 		 * Verify header digest if enabled and connection
704 		 * is in full feature mode
705 		 */
706 		if ((flags & ISCSI_NET_HEADER_DIGEST) != 0) {
707 			crc_calculated = iscsi_crc32c((uchar_t *)ihp,
708 			    sizeof (iscsi_hdr_t) + ihp->hlength * 4);
709 
710 			/*
711 			 * Converting actual CRC read via ntohl is not
712 			 * necessary because iscsi_crc32c calculates the
713 			 * value as it expect to be read
714 			 */
715 			if (crc_calculated != crc_actual) {
716 				/* Invalid Header Digest */
717 				cmn_err(CE_WARN, "iscsi connection(%p) "
718 				    "protocol error - encountered a header "
719 				    "digest error expected:0x%08x "
720 				    "received:0x%08x", socket,
721 				    crc_calculated, crc_actual);
722 				return (ISCSI_STATUS_HEADER_DIGEST_ERROR);
723 			}
724 		}
725 	}
726 	return (ISCSI_STATUS_SUCCESS);
727 }
728 
729 
730 /*
731  * iscsi_net_recvdata - receive iscsi data payload from socket
732  */
733 static iscsi_status_t
734 iscsi_net_recvdata(void *socket, iscsi_hdr_t *ihp, char *data,
735     int max_data_length, int timeout, int flags)
736 {
737 	struct iovec	iov[3];
738 	int		iovlen			= 1;
739 	int		total_len		= 0;
740 	int		dlength			= 0;
741 	int		pad_len			= 0;
742 	uint8_t		pad[ISCSI_PAD_WORD_LEN];
743 	uint32_t	crc_calculated		= 0;
744 	uint32_t	crc_actual		= 0;
745 	struct msghdr	msg;
746 	size_t		recv_len;
747 
748 	ASSERT(socket != NULL);
749 	ASSERT(ihp != NULL);
750 	ASSERT(data != NULL);
751 
752 	/* short hand dlength */
753 	dlength = ntoh24(ihp->dlength);
754 
755 	/* verify dlength is valid */
756 	if (dlength > max_data_length) {
757 		cmn_err(CE_WARN, "iscsi connection(%p) protocol error - "
758 		    "invalid data lengths itt:0x%x received:0x%x "
759 		    "max expected:0x%x", socket, ihp->itt,
760 		    dlength, max_data_length);
761 		return (ISCSI_STATUS_PROTOCOL_ERROR);
762 	}
763 
764 	if (dlength) {
765 		/* calculate pad */
766 		pad_len = ((ISCSI_PAD_WORD_LEN -
767 		    (dlength & (ISCSI_PAD_WORD_LEN - 1))) &
768 		    (ISCSI_PAD_WORD_LEN - 1));
769 
770 		/* setup data iovec */
771 		iov[0].iov_base	= (char *)data;
772 		iov[0].iov_len	= dlength;
773 		total_len	= dlength;
774 
775 		/* if pad setup pad iovec */
776 		if (pad_len) {
777 			iov[iovlen].iov_base	= (char *)&pad;
778 			iov[iovlen].iov_len	= pad_len;
779 			total_len		+= pad_len;
780 			iovlen++;
781 		}
782 
783 		/* setup data digest */
784 		if ((flags & ISCSI_NET_DATA_DIGEST) != 0) {
785 			iov[iovlen].iov_base	= (char *)&crc_actual;
786 			iov[iovlen].iov_len	= sizeof (crc_actual);
787 			total_len		+= sizeof (crc_actual);
788 			iovlen++;
789 		}
790 
791 		bzero(&msg, sizeof (msg));
792 		msg.msg_iov	= iov;
793 		msg.msg_flags	= MSG_WAITALL;
794 		msg.msg_iovlen	= iovlen;
795 
796 		recv_len = iscsi_net->recvmsg(socket, &msg, timeout);
797 		if (recv_len != total_len) {
798 			return (ISCSI_STATUS_TCP_RX_ERROR);
799 		}
800 
801 		DTRACE_PROBE4(rx_data, void *, socket, iovec_t *iop,
802 		    &iov[0], int, iovlen, int, total_len);
803 
804 		/* verify data digest is present */
805 		if ((flags & ISCSI_NET_DATA_DIGEST) != 0) {
806 
807 			crc_calculated = iscsi_crc32c(data, dlength);
808 			crc_calculated = iscsi_crc32c_continued(
809 			    (char *)&pad, pad_len, crc_calculated);
810 
811 			/*
812 			 * Converting actual CRC read via ntohl is not
813 			 * necessary because iscsi_crc32c calculates the
814 			 * value as it expects to be read
815 			 */
816 			if (crc_calculated != crc_actual) {
817 				cmn_err(CE_WARN, "iscsi connection(%p) "
818 				    "protocol error - encountered a data "
819 				    "digest error itt:0x%x expected:0x%08x "
820 				    "received:0x%08x", socket,
821 				    ihp->itt, crc_calculated, crc_actual);
822 				return (ISCSI_STATUS_DATA_DIGEST_ERROR);
823 			}
824 		}
825 	}
826 	return (ISCSI_STATUS_SUCCESS);
827 }
828 
829 /*
830  * Convert a prefix length to a mask.
831  */
832 static iscsi_status_t
833 iscsi_prefixlentomask(int prefixlen, int maxlen, uchar_t *mask)
834 {
835 	if (prefixlen < 0 || prefixlen > maxlen || mask == NULL) {
836 		return (ISCSI_STATUS_INTERNAL_ERROR);
837 	}
838 
839 	while (prefixlen > 0) {
840 		if (prefixlen >= 8) {
841 			*mask = 0xff;
842 			mask++;
843 			prefixlen = prefixlen - 8;
844 			continue;
845 		}
846 		*mask = *mask | (1 << (8 - prefixlen));
847 		prefixlen--;
848 	}
849 	return (ISCSI_STATUS_SUCCESS);
850 }
851 
852 iscsi_status_t
853 iscsi_net_interface()
854 {
855 	struct in_addr	braddr;
856 	struct in_addr	subnet;
857 	struct in_addr	myaddr;
858 	struct in_addr	defgateway;
859 	struct in6_addr myaddr6;
860 	struct in6_addr subnet6;
861 	uchar_t		mask_prefix = 0;
862 	int		mask_bits   = 1;
863 	TIUSER		*tiptr;
864 	TIUSER		*tiptr6;
865 	char		ifname[16]	= {0};
866 	iscsi_status_t	status;
867 
868 	struct knetconfig dl_udp_netconf = {
869 	    NC_TPI_CLTS,
870 	    NC_INET,
871 	    NC_UDP,
872 	    0, };
873 	struct knetconfig dl_udp6_netconf = {
874 	    NC_TPI_CLTS,
875 	    NC_INET6,
876 	    NC_UDP,
877 	    0, };
878 
879 	(void) strlcpy(ifname, rootfs.bo_ifname, sizeof (ifname));
880 
881 	if (iscsiboot_prop->boot_nic.sin_family == AF_INET) {
882 		/*
883 		 * Assumes only one linkage array element.
884 		 */
885 		dl_udp_netconf.knc_rdev =
886 		    makedevice(clone_major, ddi_name_to_major("udp"));
887 
888 		myaddr.s_addr =
889 		    iscsiboot_prop->boot_nic.nic_ip_u.u_in4.s_addr;
890 
891 		mask_prefix = iscsiboot_prop->boot_nic.sub_mask_prefix;
892 		(void) memset(&subnet.s_addr, 0, sizeof (subnet));
893 		status = iscsi_prefixlentomask(mask_prefix, IP_4_BITS,
894 		    (uchar_t *)&subnet.s_addr);
895 		if (status != ISCSI_STATUS_SUCCESS) {
896 			return (status);
897 		}
898 
899 		mask_bits = mask_bits << (IP_4_BITS - mask_prefix);
900 		mask_bits = mask_bits - 1;
901 		/*
902 		 * Set the last mask bits of the ip address with 1, then
903 		 * we can get the broadcast address.
904 		 */
905 		braddr.s_addr = myaddr.s_addr | mask_bits;
906 
907 		defgateway.s_addr =
908 		    iscsiboot_prop->boot_nic.nic_gw_u.u_in4.s_addr;
909 
910 		/* initialize interface */
911 		if (t_kopen((file_t *)NULL, dl_udp_netconf.knc_rdev,
912 		    FREAD|FWRITE, &tiptr, CRED()) == 0) {
913 			if (kdlifconfig(tiptr, AF_INET, &myaddr, &subnet,
914 			    &braddr, &defgateway, ifname)) {
915 				cmn_err(CE_WARN, "Failed to configure"
916 				    " iSCSI boot nic");
917 				(void) t_kclose(tiptr, 0);
918 				return (ISCSI_STATUS_INTERNAL_ERROR);
919 			}
920 		} else {
921 			cmn_err(CE_WARN, "Failed to configure"
922 			    " iSCSI boot nic");
923 			return (ISCSI_STATUS_INTERNAL_ERROR);
924 		}
925 		return (ISCSI_STATUS_SUCCESS);
926 	} else {
927 		dl_udp6_netconf.knc_rdev =
928 		    makedevice(clone_major, ddi_name_to_major("udp6"));
929 
930 		bcopy(&iscsiboot_prop->boot_nic.nic_ip_u.u_in6.s6_addr,
931 		    &myaddr6.s6_addr, 16);
932 
933 		(void) memset(&subnet6, 0, sizeof (subnet6));
934 		mask_prefix = iscsiboot_prop->boot_nic.sub_mask_prefix;
935 		status = iscsi_prefixlentomask(mask_prefix, IP_6_BITS,
936 		    (uchar_t *)&subnet6.s6_addr);
937 		if (status != ISCSI_STATUS_SUCCESS) {
938 			return (status);
939 		}
940 
941 		if (t_kopen((file_t *)NULL, dl_udp6_netconf.knc_rdev,
942 		    FREAD|FWRITE, &tiptr6, CRED()) == 0) {
943 			if (kdlifconfig(tiptr6, AF_INET6, &myaddr6,
944 			    &subnet6, NULL, NULL, ifname)) {
945 				cmn_err(CE_WARN, "Failed to configure"
946 				    " iSCSI boot nic");
947 				(void) t_kclose(tiptr, 0);
948 				return (ISCSI_STATUS_INTERNAL_ERROR);
949 			}
950 		} else {
951 			cmn_err(CE_WARN, "Failed to configure"
952 			    " iSCSI boot nic");
953 			return (ISCSI_STATUS_INTERNAL_ERROR);
954 		}
955 		return (ISCSI_STATUS_SUCCESS);
956 	}
957 }
958