1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  *
25  * iSCSI Software Initiator
26  */
27 
28 #include <sys/socket.h>		/* networking stuff */
29 #include <sys/strsubr.h>	/* networking stuff */
30 #include <netinet/tcp.h>	/* TCP_NODELAY */
31 #include <sys/socketvar.h>	/* _ALLOC_SLEEP */
32 #include <sys/pathname.h>	/* declares:	lookupname */
33 #include <sys/fs/snode.h>	/* defines:	VTOS */
34 #include <sys/fs/dv_node.h>	/* declares:	devfs_lookupname */
35 #include <sys/bootconf.h>
36 #include <sys/bootprops.h>
37 #include <netinet/in.h>
38 #include "iscsi.h"
39 #include <sys/ksocket.h>
40 
41 /*
42  * This is a high level description of the default
43  * iscsi_net transport interfaces.  These are used
44  * to create, send, recv, and close standard TCP/IP
45  * messages.  In addition there are extensions to send
46  * and recv iSCSI PDU data.
47  *
48  * NOTE: It would be very easy for an iSCSI HBA vendor
49  * to register their own functions over the top of
50  * the default interfaces.  This would allow an iSCSI
51  * HBA to use the same iscsiadm management interfaces
52  * and the Solaris iSCSI session / connection management.
53  * The current problem with this approach is we only
54  * allow one one registered transport table.  This
55  * would be pretty easy to correct although will require
56  * additional CLI changes to manage multiple interfaces.
57  * If a vendor can present compelling performance data,
58  * then Sun will be willing to enhance this support for
59  * multiple interface tables and better CLI management.
60  *
61  * The following listing describes the iscsi_net
62  * entry points:
63  *
64  *   socket	    - Creates TCP/IP socket connection.  In the
65  *		       default implementation creates a sonode
66  *		       via the sockfs kernel layer.
67  *   bind	      - Performs standard TCP/IP BSD operation.  In
68  *		       the default implementation this only act
69  *		       as a soft binding based on the IP and routing
70  *			 tables.  It would be preferred if this was
71  *			 a hard binding but that is currently not
72  *			 possible with Solaris's networking stack.
73  *   connect	   - Performs standard TCP/IP BSD operation.  This
74  *		       establishes the TCP SYN to the peer IP address.
75  *   listen	    - Performs standard TCP/IP BSD operation.  This
76  *		       listens for incoming peer connections.
77  *   accept	    - Performs standard TCP/IP BSD operation.  This
78  *		       accepts incoming peer connections.
79  *   shutdown	  - This disconnects the TCP/IP connection while
80  *		       maintaining the resources.
81  *   close	     - This disconnects the TCP/IP connection and
82  *		       releases the resources.
83  *
84  *   getsockopt	- Gets socket option for specified socket.
85  *   setsockopt	- Sets socket option for specified socket.
86  *
87  *      The current socket options that are used by the initiator
88  *      are listed below.
89  *
90  *	TCP_CONN_NOTIFY_THRESHOLD
91  *	TCP_CONN_ABORT_THRESHOLD
92  *	TCP_ABORT_THRESHOLD
93  *	TCP_NODELAY
94  *	SO_RCVBUF
95  *	SO_SNDBUF
96  *
97  *   iscsi_net_poll    - Poll socket interface for a specified amount
98  *		       of data.  If data not received in timeout
99  *		       period fail request.
100  *   iscsi_net_sendmsg - Send message on socket connection
101  *   iscsi_net_recvmsg - Receive message on socket connection
102  *
103  *   iscsi_net_sendpdu - Send iSCSI PDU on socket connection
104  *   iscsi_net_recvhdr - Receive iSCSI header on socket connection
105  *   iscsi_net_recvdata - Receive iSCSI data on socket connection
106  *
107  *     The iSCSI interfaces have the below optional flags.
108  *
109  *       ISCSI_NET_HEADER_DIGEST - The interface should either
110  *				generate or validate the iSCSI
111  *				header digest CRC.
112  *       ISCSI_NET_DATA_DIGESt   - The interface should either
113  *			      generate or validate the iSCSI
114  *			      data digest CRC.
115  */
116 
117 
118 /* global */
119 iscsi_network_t *iscsi_net;
120 
121 /* consts */
122 
123 /*
124  * This table is used for quick validation of incoming
125  * iSCSI PDU opcodes.  A value of '0' in the table below
126  * indicated that the opcode is invalid for an iSCSI
127  * initiator to receive.
128  */
129 const int   is_incoming_opcode_invalid[256] = {
130 	/*		0  1  2  3  4  5  6  7  8  9  A  B  C  D  E  F */
131 	/* 0x0X */	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
132 	/* 0x1X */	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
133 	/* 0x2X */	0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1,
134 	/* 0x3X */	1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
135 	/* 0x4X */	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
136 	/* 0x5X */	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
137 	/* 0x6X */	0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1,
138 	/* 0x7X */	1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
139 	/* 0x8X */	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
140 	/* 0x9X */	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
141 	/* 0xAX */	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
142 	/* 0xBX */	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
143 	/* 0xCX */	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
144 	/* 0xDX */	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
145 	/* 0xEX */	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
146 	/* 0xFX */	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
147 };
148 
149 #define	IP_4_BITS	32
150 #define	IP_6_BITS	128
151 
152 extern int modrootloaded;
153 extern ib_boot_prop_t   *iscsiboot_prop;
154 
155 /* prototypes */
156 
157 /* for iSCSI boot */
158 static int net_up = 0;
159 static iscsi_status_t iscsi_net_interface();
160 /* boot prototypes end */
161 
162 static void * iscsi_net_socket(int domain, int type, int protocol);
163 static int iscsi_net_bind(void *socket, struct sockaddr *
164     name, int name_len, int backlog, int flags);
165 static int iscsi_net_connect(void *socket, struct sockaddr *
166     name, int name_len, int fflag, int flags);
167 static int iscsi_net_listen(void *socket, int backlog);
168 static void * iscsi_net_accept(void *socket, struct sockaddr *addr,
169     int *addr_len);
170 static int iscsi_net_getsockname(void *socket, struct sockaddr *, socklen_t *);
171 static int iscsi_net_getsockopt(void *socket, int level,
172     int option_name, void *option_val, int *option_len, int flags);
173 static int iscsi_net_setsockopt(void *socket, int level,
174     int option_name, void *option_val, int option_len);
175 static int iscsi_net_shutdown(void *socket, int how);
176 static void iscsi_net_close(void *socket);
177 
178 static size_t iscsi_net_poll(void *socket, clock_t timeout);
179 static size_t iscsi_net_sendmsg(void *socket, struct msghdr *msg);
180 static size_t iscsi_net_recvmsg(void *socket,
181     struct msghdr *msg, int timeout);
182 
183 static iscsi_status_t iscsi_net_sendpdu(void *socket, iscsi_hdr_t *ihp,
184     char *data, int flags);
185 static iscsi_status_t iscsi_net_recvdata(void *socket, iscsi_hdr_t *ihp,
186     char *data, int max_data_length, int timeout, int flags);
187 static iscsi_status_t iscsi_net_recvhdr(void *socket, iscsi_hdr_t *ihp,
188     int header_length, int timeout, int flags);
189 
190 static void iscsi_net_set_connect_options(void *socket);
191 
192 /*
193  * +--------------------------------------------------------------------+
194  * | network interface registration functions			   |
195  * +--------------------------------------------------------------------+
196  */
197 
198 /*
199  * iscsi_net_init - initialize network interface
200  */
201 void
202 iscsi_net_init()
203 {
204 	iscsi_net = kmem_zalloc(sizeof (*iscsi_net), KM_SLEEP);
205 
206 	iscsi_net->socket	= iscsi_net_socket;
207 
208 	iscsi_net->bind		= iscsi_net_bind;
209 	iscsi_net->connect	= iscsi_net_connect;
210 	iscsi_net->listen	= iscsi_net_listen;
211 	iscsi_net->accept	= iscsi_net_accept;
212 	iscsi_net->shutdown	= iscsi_net_shutdown;
213 	iscsi_net->close	= iscsi_net_close;
214 
215 	iscsi_net->getsockname	= iscsi_net_getsockname;
216 	iscsi_net->getsockopt	= iscsi_net_getsockopt;
217 	iscsi_net->setsockopt	= iscsi_net_setsockopt;
218 
219 	iscsi_net->poll		= iscsi_net_poll;
220 	iscsi_net->sendmsg	= iscsi_net_sendmsg;
221 	iscsi_net->recvmsg	= iscsi_net_recvmsg;
222 
223 	iscsi_net->sendpdu	= iscsi_net_sendpdu;
224 	iscsi_net->recvhdr	= iscsi_net_recvhdr;
225 	iscsi_net->recvdata	= iscsi_net_recvdata;
226 }
227 
228 /*
229  * iscsi_net_fini - release network interface
230  */
231 void
232 iscsi_net_fini()
233 {
234 	kmem_free(iscsi_net, sizeof (*iscsi_net));
235 	iscsi_net = NULL;
236 }
237 
238 /*
239  * iscsi_net_set_connect_options -
240  */
241 static void
242 iscsi_net_set_connect_options(void *socket)
243 {
244 	int ret = 0;
245 	ret += iscsi_net->setsockopt(socket, IPPROTO_TCP,
246 	    TCP_CONN_NOTIFY_THRESHOLD, (char *)&iscsi_net->tweaks.
247 	    conn_notify_threshold, sizeof (int));
248 	ret += iscsi_net->setsockopt(socket, IPPROTO_TCP,
249 	    TCP_CONN_ABORT_THRESHOLD, (char *)&iscsi_net->tweaks.
250 	    conn_abort_threshold, sizeof (int));
251 	ret += iscsi_net->setsockopt(socket, IPPROTO_TCP, TCP_ABORT_THRESHOLD,
252 	    (char *)&iscsi_net->tweaks.abort_threshold, sizeof (int));
253 	ret += iscsi_net->setsockopt(socket, IPPROTO_TCP, TCP_NODELAY,
254 	    (char *)&iscsi_net->tweaks.nodelay, sizeof (int));
255 	ret += iscsi_net->setsockopt(socket, SOL_SOCKET, SO_RCVBUF,
256 	    (char *)&iscsi_net->tweaks.rcvbuf, sizeof (int));
257 	ret += iscsi_net->setsockopt(socket, SOL_SOCKET, SO_SNDBUF,
258 	    (char *)&iscsi_net->tweaks.sndbuf, sizeof (int));
259 	if (ret != 0) {
260 		cmn_err(CE_NOTE, "iscsi connection failed to set socket option"
261 		    "TCP_CONN_NOTIFY_THRESHOLD, TCP_CONN_ABORT_THRESHOLD,"
262 		    "TCP_ABORT_THRESHOLD, TCP_NODELAY, SO_RCVBUF or SO_SNDBUF");
263 	}
264 }
265 
266 /*
267  * +--------------------------------------------------------------------+
268  * | register network interfaces					|
269  * +--------------------------------------------------------------------+
270  */
271 
272 /*
273  * iscsi_net_socket - create socket
274  */
275 static void *
276 iscsi_net_socket(int domain, int type, int protocol)
277 {
278 	ksocket_t	socket;
279 	int 		err	= 0;
280 
281 	if (!modrootloaded && !net_up && iscsiboot_prop) {
282 		if (iscsi_net_interface() == ISCSI_STATUS_SUCCESS)
283 			net_up = 1;
284 	}
285 
286 	err = ksocket_socket(&socket, domain, type, protocol, KSOCKET_SLEEP,
287 	    CRED());
288 	if (!err)
289 		return ((void *)socket);
290 	else
291 		return (NULL);
292 
293 }
294 
295 /*
296  * iscsi_net_bind - bind socket to a specific sockaddr
297  */
298 /* ARGSUSED */
299 static int
300 iscsi_net_bind(void *socket, struct sockaddr *name, int name_len,
301 	int backlog, int flags)
302 {
303 	ksocket_t ks = (ksocket_t)socket;
304 	int error;
305 	error = ksocket_bind(ks, name, name_len, CRED());
306 	if (error == 0 && backlog != 0)
307 		error = ksocket_listen(ks, backlog, CRED());
308 
309 	return (error);
310 }
311 
312 /*
313  * iscsi_net_connect - connect socket to peer sockaddr
314  */
315 /* ARGSUSED */
316 static int
317 iscsi_net_connect(void *socket, struct sockaddr *name, int name_len,
318 	int fflag, int flags)
319 {
320 	ksocket_t ks = (ksocket_t)socket;
321 	int rval;
322 
323 	iscsi_net_set_connect_options(socket);
324 	rval = ksocket_connect(ks, name, name_len, CRED());
325 
326 	return (rval);
327 }
328 
329 /*
330  * iscsi_net_listen - listen to socket for peer connections
331  */
332 static int
333 iscsi_net_listen(void *socket, int backlog)
334 {
335 	ksocket_t ks = (ksocket_t)socket;
336 	return (ksocket_listen(ks, backlog, CRED()));
337 }
338 
339 /*
340  * iscsi_net_accept - accept peer socket connections
341  */
342 static void *
343 iscsi_net_accept(void *socket, struct sockaddr *addr, int *addr_len)
344 {
345 	ksocket_t listen_ks;
346 	ksocket_t ks = (ksocket_t)socket;
347 
348 	ksocket_accept(ks, addr, (socklen_t *)addr_len, &listen_ks, CRED());
349 
350 	return ((void *)listen_ks);
351 }
352 
353 /*
354  * iscsi_net_getsockname -
355  */
356 static int
357 iscsi_net_getsockname(void *socket, struct sockaddr *addr, socklen_t *addrlen)
358 {
359 	ksocket_t ks = (ksocket_t)socket;
360 	return (ksocket_getsockname(ks, addr, addrlen, CRED()));
361 }
362 
363 /*
364  * iscsi_net_getsockopt - get value of option on socket
365  */
366 /* ARGSUSED */
367 static int
368 iscsi_net_getsockopt(void *socket, int level, int option_name,
369 	void *option_val, int *option_len, int flags)
370 {
371 	ksocket_t ks = (ksocket_t)socket;
372 	return (ksocket_getsockopt(ks, level, option_name, option_val,
373 	    option_len, CRED()));
374 }
375 
376 /*
377  * iscsi_net_setsockopt - set value for option on socket
378  */
379 static int
380 iscsi_net_setsockopt(void *socket, int level, int option_name,
381 	void *option_val, int option_len)
382 {
383 	ksocket_t ks = (ksocket_t)socket;
384 	return (ksocket_setsockopt(ks, level, option_name, option_val,
385 	    option_len, CRED()));
386 }
387 
388 /*
389  * iscsi_net_shutdown - shutdown socket connection
390  */
391 static int
392 iscsi_net_shutdown(void *socket, int how)
393 {
394 	ksocket_t ks = (ksocket_t)socket;
395 	return (ksocket_shutdown(ks, how, CRED()));
396 }
397 
398 /*
399  * iscsi_net_close - shutdown socket connection and release resources
400  */
401 static void
402 iscsi_net_close(void *socket)
403 {
404 	ksocket_t ks = (ksocket_t)socket;
405 	(void) ksocket_close(ks, CRED());
406 }
407 
408 /*
409  * iscsi_net_poll - poll socket for data
410  */
411 /* ARGSUSED */
412 static size_t
413 iscsi_net_poll(void *socket, clock_t timeout)
414 {
415 	int pflag;
416 	char msg[64];
417 	size_t recv = 0;
418 	ksocket_t ks = (ksocket_t)socket;
419 
420 	if (get_udatamodel() == DATAMODEL_NONE ||
421 	    get_udatamodel() == DATAMODEL_NATIVE) {
422 		struct timeval tl;
423 
424 		/* timeout is millisecond */
425 		tl.tv_sec = timeout / 1000;
426 		tl.tv_usec = (timeout % 1000) * 1000;
427 		if (ksocket_setsockopt(ks, SOL_SOCKET, SO_RCVTIMEO, &tl,
428 		    sizeof (struct timeval), CRED()))
429 			return (0);
430 	} else {
431 		struct timeval32 tl;
432 
433 		/* timeout is millisecond */
434 		tl.tv_sec = timeout / 1000;
435 		tl.tv_usec = (timeout % 1000) * 1000;
436 		if (ksocket_setsockopt(ks, SOL_SOCKET, SO_RCVTIMEO, &tl,
437 		    sizeof (struct timeval32), CRED()))
438 			return (0);
439 	}
440 
441 	pflag = MSG_ANY;
442 	bzero(msg, sizeof (msg));
443 	return (ksocket_recv(ks, msg, sizeof (msg), pflag, &recv, CRED()));
444 }
445 
446 /*
447  * iscsi_net_sendmsg - send message on socket
448  */
449 /* ARGSUSED */
450 static size_t
451 iscsi_net_sendmsg(void *socket, struct msghdr *msg)
452 {
453 	ksocket_t ks = (ksocket_t)socket;
454 	size_t sent = 0;
455 	int flag = msg->msg_flags;
456 	(void) ksocket_sendmsg(ks, msg, flag, &sent, CRED());
457 	DTRACE_PROBE1(ksocket_sendmsg, size_t, sent);
458 	return (sent);
459 }
460 
461 /*
462  * iscsi_net_recvmsg - receive message on socket
463  */
464 /* ARGSUSED */
465 static size_t
466 iscsi_net_recvmsg(void *socket, struct msghdr *msg, int timeout)
467 {
468 	int		prflag	    = msg->msg_flags;
469 	ksocket_t	ks	    = (ksocket_t)socket;
470 	size_t 		recv	    = 0;
471 
472 	/* Set recv timeout */
473 	if (get_udatamodel() == DATAMODEL_NONE ||
474 	    get_udatamodel() == DATAMODEL_NATIVE) {
475 		struct timeval tl;
476 
477 		tl.tv_sec = timeout;
478 		tl.tv_usec = 0;
479 		if (ksocket_setsockopt(ks, SOL_SOCKET, SO_RCVTIMEO, &tl,
480 		    sizeof (struct timeval), CRED()))
481 			return (0);
482 	} else {
483 		struct timeval32 tl;
484 
485 		tl.tv_sec = timeout;
486 		tl.tv_usec = 0;
487 		if (ksocket_setsockopt(ks, SOL_SOCKET, SO_RCVTIMEO, &tl,
488 		    sizeof (struct timeval32), CRED()))
489 			return (0);
490 	}
491 	/*
492 	 * Receive the requested data.  Block until all
493 	 * data is received or timeout.
494 	 */
495 	ksocket_recvmsg(ks, msg, prflag, &recv, CRED());
496 	DTRACE_PROBE1(ksocket_recvmsg, size_t, recv);
497 	return (recv);
498 }
499 
500 /*
501  * iscsi_net_sendpdu - send iscsi pdu on socket
502  */
503 static iscsi_status_t
504 iscsi_net_sendpdu(void *socket, iscsi_hdr_t *ihp, char *data, int flags)
505 {
506 	uint32_t	pad;
507 	uint32_t	crc_hdr;
508 	uint32_t	crc_data;
509 	uint32_t	pad_len;
510 	uint32_t	data_len;
511 	iovec_t		iovec[ISCSI_MAX_IOVEC];
512 	int		iovlen = 0;
513 	size_t		total_len = 0;
514 	size_t		send_len;
515 	struct msghdr	msg;
516 
517 	ASSERT(socket != NULL);
518 	ASSERT(ihp != NULL);
519 
520 	/*
521 	 * Let's send the header first.  'hlength' is in 32-bit
522 	 * quantities, so we need to multiply by four to get bytes
523 	 */
524 	ASSERT(iovlen < ISCSI_MAX_IOVEC);
525 	iovec[iovlen].iov_base = (void *)ihp;
526 	iovec[iovlen].iov_len  = sizeof (*ihp) + ihp->hlength * 4;
527 	total_len += sizeof (*ihp) + ihp->hlength * 4;
528 	iovlen++;
529 
530 	/* Let's transmit the header digest if we have to. */
531 	if ((flags & ISCSI_NET_HEADER_DIGEST) != 0) {
532 		ASSERT(iovlen < ISCSI_MAX_IOVEC);
533 		/*
534 		 * Converting the calculated CRC via htonl is not
535 		 * necessary because iscsi_crc32c calculates
536 		 * the value as it expects to be written
537 		 */
538 		crc_hdr = iscsi_crc32c((char *)ihp,
539 		    sizeof (iscsi_hdr_t) + ihp->hlength * 4);
540 
541 		iovec[iovlen].iov_base = (void *)&crc_hdr;
542 		iovec[iovlen].iov_len  = sizeof (crc_hdr);
543 		total_len += sizeof (crc_hdr);
544 		iovlen++;
545 	}
546 
547 	/* Let's transmit the data if any. */
548 	data_len = ntoh24(ihp->dlength);
549 
550 	if (data_len) {
551 
552 		ASSERT(iovlen < ISCSI_MAX_IOVEC);
553 		iovec[iovlen].iov_base = (void *)data;
554 		iovec[iovlen].iov_len  = data_len;
555 		total_len += data_len;
556 		iovlen++;
557 
558 		pad_len = ((ISCSI_PAD_WORD_LEN -
559 		    (data_len & (ISCSI_PAD_WORD_LEN - 1))) &
560 		    (ISCSI_PAD_WORD_LEN - 1));
561 
562 		/* Let's transmit the data pad if any. */
563 		if (pad_len) {
564 
565 			ASSERT(iovlen < ISCSI_MAX_IOVEC);
566 			pad = 0;
567 			iovec[iovlen].iov_base = (void *)&pad;
568 			iovec[iovlen].iov_len  = pad_len;
569 			total_len += pad_len;
570 			iovlen++;
571 		}
572 
573 		/* Let's transmit the data digest if we have to. */
574 		if ((flags & ISCSI_NET_DATA_DIGEST) != 0) {
575 
576 			ASSERT(iovlen < ISCSI_MAX_IOVEC);
577 			/*
578 			 * Converting the calculated CRC via htonl is not
579 			 * necessary because iscsi_crc32c calculates the
580 			 * value as it expects to be written
581 			 */
582 			crc_data = iscsi_crc32c(data, data_len);
583 			crc_data = iscsi_crc32c_continued(
584 			    (char *)&pad, pad_len, crc_data);
585 
586 			iovec[iovlen].iov_base = (void *)&crc_data;
587 			iovec[iovlen].iov_len  = sizeof (crc_data);
588 			total_len += sizeof (crc_data);
589 			iovlen++;
590 		}
591 	}
592 
593 	DTRACE_PROBE4(tx, void *, socket, iovec_t *, &iovec[0],
594 	    int, iovlen, int, total_len);
595 
596 	/* Initialization of the message header. */
597 	bzero(&msg, sizeof (msg));
598 	msg.msg_iov	= &iovec[0];
599 	msg.msg_flags	= MSG_WAITALL;
600 	msg.msg_iovlen	= iovlen;
601 
602 	send_len = iscsi_net->sendmsg(socket, &msg);
603 	DTRACE_PROBE2(sendmsg, size_t, total_len, size_t, send_len);
604 	if (total_len != send_len) {
605 		return (ISCSI_STATUS_TCP_TX_ERROR);
606 	}
607 	return (ISCSI_STATUS_SUCCESS);
608 }
609 
610 /*
611  * iscsi_net_recvhdr - receive iscsi hdr on socket
612  */
613 static iscsi_status_t
614 iscsi_net_recvhdr(void *socket, iscsi_hdr_t *ihp, int header_length,
615     int timeout, int flags)
616 {
617 	iovec_t		    iov[ISCSI_MAX_IOVEC];
618 	int		    iovlen		= 1;
619 	int		    total_len		= 0;
620 	uint32_t	    crc_actual		= 0;
621 	uint32_t	    crc_calculated	= 0;
622 	char		    *adhdr		= NULL;
623 	int		    adhdr_length	= 0;
624 	struct msghdr	    msg;
625 	size_t		    recv_len;
626 
627 	ASSERT(socket != NULL);
628 	ASSERT(ihp != NULL);
629 
630 	if (header_length < sizeof (iscsi_hdr_t)) {
631 		ASSERT(FALSE);
632 		return (ISCSI_STATUS_INTERNAL_ERROR);
633 	}
634 
635 	/*
636 	 * Receive primary header
637 	 */
638 	iov[0].iov_base = (char *)ihp;
639 	iov[0].iov_len = sizeof (iscsi_hdr_t);
640 
641 	bzero(&msg, sizeof (msg));
642 	msg.msg_iov	= iov;
643 	msg.msg_flags	= MSG_WAITALL;
644 	msg.msg_iovlen	= iovlen;
645 
646 	recv_len = iscsi_net->recvmsg(socket, &msg, timeout);
647 	if (recv_len != sizeof (iscsi_hdr_t)) {
648 		return (ISCSI_STATUS_TCP_RX_ERROR);
649 	}
650 
651 	DTRACE_PROBE2(rx_hdr, void *, socket, iovec_t *iop, &iov[0]);
652 
653 	/* verify incoming opcode is a valid operation */
654 	if (is_incoming_opcode_invalid[ihp->opcode]) {
655 		cmn_err(CE_WARN, "iscsi connection(%p) protocol error - "
656 		    "received an unsupported opcode:0x%02x",
657 		    socket, ihp->opcode);
658 		return (ISCSI_STATUS_PROTOCOL_ERROR);
659 	}
660 
661 	/*
662 	 * Setup receipt of additional header
663 	 */
664 	if (ihp->hlength > 0) {
665 		adhdr = ((char *)ihp) + sizeof (iscsi_hdr_t);
666 		adhdr_length = header_length - sizeof (iscsi_hdr_t);
667 		/* make sure enough space is available for adhdr */
668 		if (ihp->hlength > adhdr_length) {
669 			ASSERT(FALSE);
670 			return (ISCSI_STATUS_INTERNAL_ERROR);
671 		}
672 
673 		ASSERT(iovlen < ISCSI_MAX_IOVEC);
674 		iov[iovlen].iov_base = adhdr;
675 		iov[iovlen].iov_len = adhdr_length;
676 		total_len += adhdr_length;
677 		iovlen++;
678 	}
679 
680 	/*
681 	 * Setup receipt of header digest if enabled and connection
682 	 * is in full feature mode.
683 	 */
684 	if ((flags & ISCSI_NET_HEADER_DIGEST) != 0) {
685 		ASSERT(iovlen < ISCSI_MAX_IOVEC);
686 		iov[iovlen].iov_base = (char *)&crc_actual;
687 		iov[iovlen].iov_len = sizeof (uint32_t);
688 		total_len += sizeof (uint32_t);
689 		iovlen++;
690 	}
691 
692 	/*
693 	 * Read additional header and/or header digest if pieces
694 	 * are available
695 	 */
696 	if (iovlen > 1) {
697 
698 		bzero(&msg, sizeof (msg));
699 		msg.msg_iov	= iov;
700 		msg.msg_flags	= MSG_WAITALL;
701 		msg.msg_iovlen	= iovlen;
702 
703 		recv_len = iscsi_net->recvmsg(socket, &msg, timeout);
704 		if (recv_len != total_len) {
705 			return (ISCSI_STATUS_TCP_RX_ERROR);
706 		}
707 
708 		DTRACE_PROBE4(rx_adhdr_digest, void *, socket,
709 		    iovec_t *iop, &iov[0], int, iovlen, int, total_len);
710 
711 		/*
712 		 * Verify header digest if enabled and connection
713 		 * is in full feature mode
714 		 */
715 		if ((flags & ISCSI_NET_HEADER_DIGEST) != 0) {
716 			crc_calculated = iscsi_crc32c((uchar_t *)ihp,
717 			    sizeof (iscsi_hdr_t) + ihp->hlength * 4);
718 
719 			/*
720 			 * Converting actual CRC read via ntohl is not
721 			 * necessary because iscsi_crc32c calculates the
722 			 * value as it expect to be read
723 			 */
724 			if (crc_calculated != crc_actual) {
725 				/* Invalid Header Digest */
726 				cmn_err(CE_WARN, "iscsi connection(%p) "
727 				    "protocol error - encountered a header "
728 				    "digest error expected:0x%08x "
729 				    "received:0x%08x", socket,
730 				    crc_calculated, crc_actual);
731 				return (ISCSI_STATUS_HEADER_DIGEST_ERROR);
732 			}
733 		}
734 	}
735 	return (ISCSI_STATUS_SUCCESS);
736 }
737 
738 
739 /*
740  * iscsi_net_recvdata - receive iscsi data payload from socket
741  */
742 static iscsi_status_t
743 iscsi_net_recvdata(void *socket, iscsi_hdr_t *ihp, char *data,
744     int max_data_length, int timeout, int flags)
745 {
746 	struct iovec	iov[3];
747 	int		iovlen			= 1;
748 	int		total_len		= 0;
749 	int		dlength			= 0;
750 	int		pad_len			= 0;
751 	uint8_t		pad[ISCSI_PAD_WORD_LEN];
752 	uint32_t	crc_calculated		= 0;
753 	uint32_t	crc_actual		= 0;
754 	struct msghdr	msg;
755 	size_t		recv_len;
756 
757 	ASSERT(socket != NULL);
758 	ASSERT(ihp != NULL);
759 	ASSERT(data != NULL);
760 
761 	/* short hand dlength */
762 	dlength = ntoh24(ihp->dlength);
763 
764 	/* verify dlength is valid */
765 	if (dlength > max_data_length) {
766 		cmn_err(CE_WARN, "iscsi connection(%p) protocol error - "
767 		    "invalid data lengths itt:0x%x received:0x%x "
768 		    "max expected:0x%x", socket, ihp->itt,
769 		    dlength, max_data_length);
770 		return (ISCSI_STATUS_PROTOCOL_ERROR);
771 	}
772 
773 	if (dlength) {
774 		/* calculate pad */
775 		pad_len = ((ISCSI_PAD_WORD_LEN -
776 		    (dlength & (ISCSI_PAD_WORD_LEN - 1))) &
777 		    (ISCSI_PAD_WORD_LEN - 1));
778 
779 		/* setup data iovec */
780 		iov[0].iov_base	= (char *)data;
781 		iov[0].iov_len	= dlength;
782 		total_len	= dlength;
783 
784 		/* if pad setup pad iovec */
785 		if (pad_len) {
786 			iov[iovlen].iov_base	= (char *)&pad;
787 			iov[iovlen].iov_len	= pad_len;
788 			total_len		+= pad_len;
789 			iovlen++;
790 		}
791 
792 		/* setup data digest */
793 		if ((flags & ISCSI_NET_DATA_DIGEST) != 0) {
794 			iov[iovlen].iov_base	= (char *)&crc_actual;
795 			iov[iovlen].iov_len	= sizeof (crc_actual);
796 			total_len		+= sizeof (crc_actual);
797 			iovlen++;
798 		}
799 
800 		bzero(&msg, sizeof (msg));
801 		msg.msg_iov	= iov;
802 		msg.msg_flags	= MSG_WAITALL;
803 		msg.msg_iovlen	= iovlen;
804 
805 		recv_len = iscsi_net->recvmsg(socket, &msg, timeout);
806 		if (recv_len != total_len) {
807 			return (ISCSI_STATUS_TCP_RX_ERROR);
808 		}
809 
810 		DTRACE_PROBE4(rx_data, void *, socket, iovec_t *iop,
811 		    &iov[0], int, iovlen, int, total_len);
812 
813 		/* verify data digest is present */
814 		if ((flags & ISCSI_NET_DATA_DIGEST) != 0) {
815 
816 			crc_calculated = iscsi_crc32c(data, dlength);
817 			crc_calculated = iscsi_crc32c_continued(
818 			    (char *)&pad, pad_len, crc_calculated);
819 
820 			/*
821 			 * Converting actual CRC read via ntohl is not
822 			 * necessary because iscsi_crc32c calculates the
823 			 * value as it expects to be read
824 			 */
825 			if (crc_calculated != crc_actual) {
826 				cmn_err(CE_WARN, "iscsi connection(%p) "
827 				    "protocol error - encountered a data "
828 				    "digest error itt:0x%x expected:0x%08x "
829 				    "received:0x%08x", socket,
830 				    ihp->itt, crc_calculated, crc_actual);
831 				return (ISCSI_STATUS_DATA_DIGEST_ERROR);
832 			}
833 		}
834 	}
835 	return (ISCSI_STATUS_SUCCESS);
836 }
837 
838 /*
839  * Convert a prefix length to a mask.
840  */
841 static iscsi_status_t
842 iscsi_prefixlentomask(int prefixlen, int maxlen, uchar_t *mask)
843 {
844 	if (prefixlen < 0 || prefixlen > maxlen || mask == NULL) {
845 		return (ISCSI_STATUS_INTERNAL_ERROR);
846 	}
847 
848 	while (prefixlen > 0) {
849 		if (prefixlen >= 8) {
850 			*mask = 0xff;
851 			mask++;
852 			prefixlen = prefixlen - 8;
853 			continue;
854 		}
855 		*mask = *mask | (1 << (8 - prefixlen));
856 		prefixlen--;
857 	}
858 	return (ISCSI_STATUS_SUCCESS);
859 }
860 
861 static iscsi_status_t
862 iscsi_net_interface()
863 {
864 	struct in_addr	braddr;
865 	struct in_addr	subnet;
866 	struct in_addr	myaddr;
867 	struct in_addr	defgateway;
868 	struct in6_addr myaddr6;
869 	struct in6_addr subnet6;
870 	uchar_t		mask_prefix = 0;
871 	int		mask_bits   = 1;
872 	TIUSER		*tiptr;
873 	TIUSER		*tiptr6;
874 	char		ifname[16]	= {0};
875 	iscsi_status_t	status;
876 
877 	struct knetconfig dl_udp_netconf = {
878 	    NC_TPI_CLTS,
879 	    NC_INET,
880 	    NC_UDP,
881 	    0, };
882 	struct knetconfig dl_udp6_netconf = {
883 	    NC_TPI_CLTS,
884 	    NC_INET6,
885 	    NC_UDP,
886 	    0, };
887 
888 	(void) strlcpy(ifname, rootfs.bo_ifname, sizeof (ifname));
889 
890 	if (iscsiboot_prop->boot_nic.sin_family == AF_INET) {
891 		/*
892 		 * Assumes only one linkage array element.
893 		 */
894 		dl_udp_netconf.knc_rdev =
895 		    makedevice(clone_major, ddi_name_to_major("udp"));
896 
897 		myaddr.s_addr =
898 		    iscsiboot_prop->boot_nic.nic_ip_u.u_in4.s_addr;
899 
900 		mask_prefix = iscsiboot_prop->boot_nic.sub_mask_prefix;
901 		(void) memset(&subnet.s_addr, 0, sizeof (subnet));
902 		status = iscsi_prefixlentomask(mask_prefix, IP_4_BITS,
903 		    (uchar_t *)&subnet.s_addr);
904 		if (status != ISCSI_STATUS_SUCCESS) {
905 			return (status);
906 		}
907 
908 		mask_bits = mask_bits << (IP_4_BITS - mask_prefix);
909 		mask_bits = mask_bits - 1;
910 		/*
911 		 * Set the last mask bits of the ip address with 1, then
912 		 * we can get the broadcast address.
913 		 */
914 		braddr.s_addr = myaddr.s_addr | mask_bits;
915 
916 		defgateway.s_addr =
917 		    iscsiboot_prop->boot_nic.nic_gw_u.u_in4.s_addr;
918 
919 		/* initialize interface */
920 		if (t_kopen((file_t *)NULL, dl_udp_netconf.knc_rdev,
921 		    FREAD|FWRITE, &tiptr, CRED()) == 0) {
922 			if (kdlifconfig(tiptr, AF_INET, &myaddr, &subnet,
923 			    &braddr, &defgateway, ifname)) {
924 				cmn_err(CE_WARN, "Failed to configure"
925 				    " iSCSI boot nic");
926 				(void) t_kclose(tiptr, 0);
927 				return (ISCSI_STATUS_INTERNAL_ERROR);
928 			}
929 		} else {
930 			cmn_err(CE_WARN, "Failed to configure"
931 			    " iSCSI boot nic");
932 			return (ISCSI_STATUS_INTERNAL_ERROR);
933 		}
934 		return (ISCSI_STATUS_SUCCESS);
935 	} else {
936 		dl_udp6_netconf.knc_rdev =
937 		    makedevice(clone_major, ddi_name_to_major("udp6"));
938 
939 		bcopy(&iscsiboot_prop->boot_nic.nic_ip_u.u_in6.s6_addr,
940 		    &myaddr6.s6_addr, 16);
941 
942 		(void) memset(&subnet6, 0, sizeof (subnet6));
943 		mask_prefix = iscsiboot_prop->boot_nic.sub_mask_prefix;
944 		status = iscsi_prefixlentomask(mask_prefix, IP_6_BITS,
945 		    (uchar_t *)&subnet6.s6_addr);
946 		if (status != ISCSI_STATUS_SUCCESS) {
947 			return (status);
948 		}
949 
950 		if (t_kopen((file_t *)NULL, dl_udp6_netconf.knc_rdev,
951 		    FREAD|FWRITE, &tiptr6, CRED()) == 0) {
952 			if (kdlifconfig(tiptr6, AF_INET6, &myaddr6,
953 			    &subnet6, NULL, NULL, ifname)) {
954 				cmn_err(CE_WARN, "Failed to configure"
955 				    " iSCSI boot nic");
956 				(void) t_kclose(tiptr, 0);
957 				return (ISCSI_STATUS_INTERNAL_ERROR);
958 			}
959 		} else {
960 			cmn_err(CE_WARN, "Failed to configure"
961 			    " iSCSI boot nic");
962 			return (ISCSI_STATUS_INTERNAL_ERROR);
963 		}
964 		return (ISCSI_STATUS_SUCCESS);
965 	}
966 }
967