xref: /illumos-gate/usr/src/uts/common/fs/sockfs/sockstr.c (revision 8e50dcc9f00b393d43e6aa42b820bcbf1d3e1ce4)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/types.h>
30 #include <sys/inttypes.h>
31 #include <sys/t_lock.h>
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/buf.h>
35 #include <sys/conf.h>
36 #include <sys/cred.h>
37 #include <sys/kmem.h>
38 #include <sys/sysmacros.h>
39 #include <sys/vfs.h>
40 #include <sys/vnode.h>
41 #include <sys/debug.h>
42 #include <sys/errno.h>
43 #include <sys/time.h>
44 #include <sys/file.h>
45 #include <sys/user.h>
46 #include <sys/stream.h>
47 #include <sys/strsubr.h>
48 #include <sys/esunddi.h>
49 #include <sys/flock.h>
50 #include <sys/modctl.h>
51 #include <sys/vtrace.h>
52 #include <sys/strsun.h>
53 #include <sys/cmn_err.h>
54 #include <sys/proc.h>
55 #include <sys/ddi.h>
56 #include <sys/kmem_impl.h>
57 
58 #include <sys/suntpi.h>
59 #include <sys/socket.h>
60 #include <sys/sockio.h>
61 #include <sys/socketvar.h>
62 #include <netinet/in.h>
63 
64 #include <sys/tiuser.h>
65 #define	_SUN_TPI_VERSION	2
66 #include <sys/tihdr.h>
67 
68 #include <inet/kssl/ksslapi.h>
69 
70 #include <c2/audit.h>
71 
72 #include <sys/dcopy.h>
73 
74 int so_default_version = SOV_SOCKSTREAM;
75 
76 #ifdef DEBUG
77 /* Set sockdebug to print debug messages when SO_DEBUG is set */
78 int sockdebug = 0;
79 
80 /* Set sockprinterr to print error messages when SO_DEBUG is set */
81 int sockprinterr = 0;
82 
83 /*
84  * Set so_default_options to SO_DEBUG is all sockets should be created
85  * with SO_DEBUG set. This is needed to get debug printouts from the
86  * socket() call itself.
87  */
88 int so_default_options = 0;
89 #endif /* DEBUG */
90 
91 #ifdef SOCK_TEST
92 /*
93  * Set to number of ticks to limit cv_waits for code coverage testing.
94  * Set to 1000 when SO_DEBUG is set to 2.
95  */
96 clock_t sock_test_timelimit = 0;
97 #endif /* SOCK_TEST */
98 
99 /*
100  * For concurrency testing of e.g. opening /dev/ip which does not
101  * handle T_INFO_REQ messages.
102  */
103 int so_no_tinfo = 0;
104 
105 /*
106  * Timeout for getting a T_CAPABILITY_ACK - it is possible for a provider
107  * to simply ignore the T_CAPABILITY_REQ.
108  */
109 clock_t	sock_capability_timeout	= 2;	/* seconds */
110 
111 static int	do_tcapability(struct sonode *so, t_uscalar_t cap_bits1);
112 static void	so_removehooks(struct sonode *so);
113 
114 static mblk_t *strsock_proto(vnode_t *vp, mblk_t *mp,
115 		strwakeup_t *wakeups, strsigset_t *firstmsgsigs,
116 		strsigset_t *allmsgsigs, strpollset_t *pollwakeups);
117 static mblk_t *strsock_misc(vnode_t *vp, mblk_t *mp,
118 		strwakeup_t *wakeups, strsigset_t *firstmsgsigs,
119 		strsigset_t *allmsgsigs, strpollset_t *pollwakeups);
120 
121 static int tlitosyserr(int terr);
122 
123 /*
124  * Sodirect kmem_cache and put/wakeup functions.
125  */
126 struct kmem_cache *socktpi_sod_cache;
127 static int sodput(sodirect_t *, mblk_t *);
128 static void sodwakeup(sodirect_t *);
129 
130 /*
131  * Called by sockinit() when sockfs is loaded.
132  *
133  * Check for uioasync dcopy support and if supported
134  * allocate the sodirect_t kmem_cache socktpi_sod_cache.
135  */
136 int
137 sostr_init()
138 {
139 	if (uioasync.enabled == B_TRUE && modload("misc", "dcopy") == -1) {
140 		/* No dcopy KAPI driver, disable uioa */
141 		uioasync.enabled = B_FALSE;
142 	}
143 
144 	if (uioasync.enabled == B_TRUE) {
145 		/* Uioasync enabled so sodirect will be used */
146 		socktpi_sod_cache = kmem_cache_create("socktpi_sod_cache",
147 		    sizeof (sodirect_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
148 	}
149 
150 	return (0);
151 }
152 
153 /*
154  * Convert a socket to a stream. Invoked when the illusory sockmod
155  * is popped from the stream.
156  * Change the stream head back to default operation without losing
157  * any messages (T_conn_ind's are moved to the stream head queue).
158  */
159 int
160 so_sock2stream(struct sonode *so)
161 {
162 	struct vnode		*vp = SOTOV(so);
163 	queue_t			*rq;
164 	mblk_t			*mp;
165 	int			error = 0;
166 
167 	ASSERT(MUTEX_HELD(&so->so_plumb_lock));
168 
169 	mutex_enter(&so->so_lock);
170 	so_lock_single(so);
171 
172 	ASSERT(so->so_version != SOV_STREAM);
173 
174 	if (so->so_state & SS_DIRECT) {
175 		mblk_t **mpp;
176 		int rval;
177 
178 		/*
179 		 * Tell the transport below that sockmod is being popped
180 		 */
181 		mutex_exit(&so->so_lock);
182 		error = strioctl(vp, _SIOCSOCKFALLBACK, 0, 0, K_TO_K, CRED(),
183 		    &rval);
184 		mutex_enter(&so->so_lock);
185 		if (error != 0) {
186 			dprintso(so, 0, ("so_sock2stream(%p): "
187 			    "_SIOCSOCKFALLBACK failed\n", so));
188 			goto exit;
189 		}
190 		so->so_state &= ~SS_DIRECT;
191 
192 		for (mpp = &so->so_conn_ind_head; (mp = *mpp) != NULL;
193 		    mpp = &mp->b_next) {
194 			struct T_conn_ind	*conn_ind;
195 
196 			/*
197 			 * strsock_proto() has already verified the length of
198 			 * this message block.
199 			 */
200 			ASSERT(MBLKL(mp) >= sizeof (struct T_conn_ind));
201 
202 			conn_ind = (struct T_conn_ind *)mp->b_rptr;
203 			if (conn_ind->OPT_length == 0 &&
204 			    conn_ind->OPT_offset == 0)
205 				continue;
206 
207 			if (DB_REF(mp) > 1) {
208 				mblk_t	*newmp;
209 				size_t	length;
210 				cred_t	*cr;
211 
212 				/*
213 				 * Copy the message block because it is used
214 				 * elsewhere, too.
215 				 */
216 				length = MBLKL(mp);
217 				newmp = soallocproto(length, _ALLOC_INTR);
218 				if (newmp == NULL) {
219 					error = EINTR;
220 					goto exit;
221 				}
222 				bcopy(mp->b_rptr, newmp->b_wptr, length);
223 				newmp->b_wptr += length;
224 				newmp->b_next = mp->b_next;
225 				cr = DB_CRED(mp);
226 				if (cr != NULL)
227 					mblk_setcred(newmp, cr);
228 				DB_CPID(newmp) = DB_CPID(mp);
229 
230 				/*
231 				 * Link the new message block into the queue
232 				 * and free the old one.
233 				 */
234 				*mpp = newmp;
235 				mp->b_next = NULL;
236 				freemsg(mp);
237 
238 				mp = newmp;
239 				conn_ind = (struct T_conn_ind *)mp->b_rptr;
240 			}
241 
242 			/*
243 			 * Remove options added by TCP for accept fast-path.
244 			 */
245 			conn_ind->OPT_length = 0;
246 			conn_ind->OPT_offset = 0;
247 		}
248 	}
249 
250 	so->so_version = SOV_STREAM;
251 	so->so_priv = NULL;
252 
253 	/*
254 	 * Remove the hooks in the stream head to avoid queuing more
255 	 * packets in sockfs.
256 	 */
257 	mutex_exit(&so->so_lock);
258 	so_removehooks(so);
259 	mutex_enter(&so->so_lock);
260 
261 	/*
262 	 * Clear any state related to urgent data. Leave any T_EXDATA_IND
263 	 * on the queue - the behavior of urgent data after a switch is
264 	 * left undefined.
265 	 */
266 	so->so_error = so->so_delayed_error = 0;
267 	freemsg(so->so_oobmsg);
268 	so->so_oobmsg = NULL;
269 	so->so_oobsigcnt = so->so_oobcnt = 0;
270 
271 	so->so_state &= ~(SS_RCVATMARK|SS_OOBPEND|SS_HAVEOOBDATA|SS_HADOOBDATA|
272 	    SS_HASCONNIND|SS_SAVEDEOR);
273 	ASSERT(so_verify_oobstate(so));
274 
275 	freemsg(so->so_ack_mp);
276 	so->so_ack_mp = NULL;
277 
278 	/*
279 	 * Flush the T_DISCON_IND on so_discon_ind_mp.
280 	 */
281 	so_flush_discon_ind(so);
282 
283 	/*
284 	 * Move any queued T_CONN_IND messages to stream head queue.
285 	 */
286 	rq = RD(strvp2wq(vp));
287 	while ((mp = so->so_conn_ind_head) != NULL) {
288 		so->so_conn_ind_head = mp->b_next;
289 		mp->b_next = NULL;
290 		if (so->so_conn_ind_head == NULL) {
291 			ASSERT(so->so_conn_ind_tail == mp);
292 			so->so_conn_ind_tail = NULL;
293 		}
294 		dprintso(so, 0,
295 		    ("so_sock2stream(%p): moving T_CONN_IND\n",
296 		    so));
297 
298 		/* Drop lock across put() */
299 		mutex_exit(&so->so_lock);
300 		put(rq, mp);
301 		mutex_enter(&so->so_lock);
302 	}
303 
304 exit:
305 	ASSERT(MUTEX_HELD(&so->so_lock));
306 	so_unlock_single(so, SOLOCKED);
307 	mutex_exit(&so->so_lock);
308 	return (error);
309 }
310 
311 /*
312  * Covert a stream back to a socket. This is invoked when the illusory
313  * sockmod is pushed on a stream (where the stream was "created" by
314  * popping the illusory sockmod).
315  * This routine can not recreate the socket state (certain aspects of
316  * it like urgent data state and the bound/connected addresses for AF_UNIX
317  * sockets can not be recreated by asking the transport for information).
318  * Thus this routine implicitly assumes that the socket is in an initial
319  * state (as if it was just created). It flushes any messages queued on the
320  * read queue to avoid dealing with e.g. TPI acks or T_exdata_ind messages.
321  */
322 void
323 so_stream2sock(struct sonode *so)
324 {
325 	struct vnode *vp = SOTOV(so);
326 
327 	ASSERT(MUTEX_HELD(&so->so_plumb_lock));
328 
329 	mutex_enter(&so->so_lock);
330 	so_lock_single(so);
331 	ASSERT(so->so_version == SOV_STREAM);
332 	so->so_version = SOV_SOCKSTREAM;
333 	so->so_pushcnt = 0;
334 	mutex_exit(&so->so_lock);
335 
336 	/*
337 	 * Set a permenent error to force any thread in sorecvmsg to
338 	 * return (and drop SOREADLOCKED). Clear the error once
339 	 * we have SOREADLOCKED.
340 	 * This makes a read sleeping during the I_PUSH of sockmod return
341 	 * EIO.
342 	 */
343 	strsetrerror(SOTOV(so), EIO, 1, NULL);
344 
345 	/*
346 	 * Get the read lock before flushing data to avoid
347 	 * problems with the T_EXDATA_IND MSG_PEEK code in sorecvmsg.
348 	 */
349 	mutex_enter(&so->so_lock);
350 	(void) so_lock_read(so, 0);	/* Set SOREADLOCKED */
351 	mutex_exit(&so->so_lock);
352 
353 	strsetrerror(SOTOV(so), 0, 0, NULL);
354 	so_installhooks(so);
355 
356 	/*
357 	 * Flush everything on the read queue.
358 	 * This ensures that no T_CONN_IND remain and that no T_EXDATA_IND
359 	 * remain; those types of messages would confuse sockfs.
360 	 */
361 	strflushrq(vp, FLUSHALL);
362 	mutex_enter(&so->so_lock);
363 
364 	/*
365 	 * Flush the T_DISCON_IND on so_discon_ind_mp.
366 	 */
367 	so_flush_discon_ind(so);
368 	so_unlock_read(so);	/* Clear SOREADLOCKED */
369 
370 	so_unlock_single(so, SOLOCKED);
371 	mutex_exit(&so->so_lock);
372 }
373 
374 /*
375  * Install the hooks in the stream head.
376  */
377 void
378 so_installhooks(struct sonode *so)
379 {
380 	struct vnode *vp = SOTOV(so);
381 
382 	strsetrputhooks(vp, SH_SIGALLDATA | SH_IGN_ZEROLEN | SH_CONSOL_DATA,
383 	    strsock_proto, strsock_misc);
384 	strsetwputhooks(vp, SH_SIGPIPE | SH_RECHECK_ERR, 0);
385 }
386 
387 /*
388  * Remove the hooks in the stream head.
389  */
390 static void
391 so_removehooks(struct sonode *so)
392 {
393 	struct vnode *vp = SOTOV(so);
394 
395 	strsetrputhooks(vp, 0, NULL, NULL);
396 	strsetwputhooks(vp, 0, STRTIMOUT);
397 	/*
398 	 * Leave read behavior as it would have been for a normal
399 	 * stream i.e. a read of an M_PROTO will fail.
400 	 */
401 }
402 
403 /*
404  * Initialize the streams side of a socket including
405  * T_info_req/ack processing. If tso is not NULL its values are used thereby
406  * avoiding the T_INFO_REQ.
407  */
408 int
409 so_strinit(struct sonode *so, struct sonode *tso)
410 {
411 	struct vnode *vp = SOTOV(so);
412 	struct stdata *stp;
413 	mblk_t *mp;
414 	int error;
415 
416 	dprintso(so, 1, ("so_strinit(%p)\n", so));
417 
418 	/* Preallocate an unbind_req message */
419 	mp = soallocproto(sizeof (struct T_unbind_req), _ALLOC_SLEEP);
420 	mutex_enter(&so->so_lock);
421 	so->so_unbind_mp = mp;
422 #ifdef DEBUG
423 	so->so_options = so_default_options;
424 #endif /* DEBUG */
425 	mutex_exit(&so->so_lock);
426 
427 	so_installhooks(so);
428 
429 	/*
430 	 * The T_CAPABILITY_REQ should be the first message sent down because
431 	 * at least TCP has a fast-path for this which avoids timeouts while
432 	 * waiting for the T_CAPABILITY_ACK under high system load.
433 	 */
434 	if (tso == NULL) {
435 		error = do_tcapability(so, TC1_ACCEPTOR_ID | TC1_INFO);
436 		if (error)
437 			return (error);
438 	} else {
439 		mutex_enter(&so->so_lock);
440 		so->so_tsdu_size = tso->so_tsdu_size;
441 		so->so_etsdu_size = tso->so_etsdu_size;
442 		so->so_addr_size = tso->so_addr_size;
443 		so->so_opt_size = tso->so_opt_size;
444 		so->so_tidu_size = tso->so_tidu_size;
445 		so->so_serv_type = tso->so_serv_type;
446 		so->so_mode = tso->so_mode & ~SM_ACCEPTOR_ID;
447 		mutex_exit(&so->so_lock);
448 
449 		/* the following do_tcapability may update so->so_mode */
450 		if ((tso->so_serv_type != T_CLTS) &&
451 		    !(tso->so_state & SS_DIRECT)) {
452 			error = do_tcapability(so, TC1_ACCEPTOR_ID);
453 			if (error)
454 				return (error);
455 		}
456 	}
457 	/*
458 	 * If the addr_size is 0 we treat it as already bound
459 	 * and connected. This is used by the routing socket.
460 	 * We set the addr_size to something to allocate a the address
461 	 * structures.
462 	 */
463 	if (so->so_addr_size == 0) {
464 		so->so_state |= SS_ISBOUND | SS_ISCONNECTED;
465 		/* Address size can vary with address families. */
466 		if (so->so_family == AF_INET6)
467 			so->so_addr_size =
468 			    (t_scalar_t)sizeof (struct sockaddr_in6);
469 		else
470 			so->so_addr_size =
471 			    (t_scalar_t)sizeof (struct sockaddr_in);
472 		ASSERT(so->so_unbind_mp);
473 	}
474 	/*
475 	 * Allocate the addresses.
476 	 */
477 	ASSERT(so->so_laddr_sa == NULL && so->so_faddr_sa == NULL);
478 	ASSERT(so->so_laddr_len == 0 && so->so_faddr_len == 0);
479 	so->so_laddr_maxlen = so->so_faddr_maxlen =
480 	    P2ROUNDUP(so->so_addr_size, KMEM_ALIGN);
481 	so->so_laddr_sa = kmem_alloc(so->so_laddr_maxlen * 2, KM_SLEEP);
482 	so->so_faddr_sa = (struct sockaddr *)((caddr_t)so->so_laddr_sa
483 	    + so->so_laddr_maxlen);
484 
485 	if (so->so_family == AF_UNIX) {
486 		/*
487 		 * Initialize AF_UNIX related fields.
488 		 */
489 		bzero(&so->so_ux_laddr, sizeof (so->so_ux_laddr));
490 		bzero(&so->so_ux_faddr, sizeof (so->so_ux_faddr));
491 	}
492 
493 	stp = vp->v_stream;
494 	/*
495 	 * Have to keep minpsz at zero in order to allow write/send of zero
496 	 * bytes.
497 	 */
498 	mutex_enter(&stp->sd_lock);
499 	if (stp->sd_qn_minpsz == 1)
500 		stp->sd_qn_minpsz = 0;
501 	mutex_exit(&stp->sd_lock);
502 
503 	/*
504 	 * If sodirect capable allocate and initialize sodirect_t.
505 	 * Note, SS_SODIRECT is set in socktpi_open().
506 	 */
507 	if (so->so_state & SS_SODIRECT) {
508 		sodirect_t	*sodp;
509 
510 		ASSERT(so->so_direct == NULL);
511 
512 		sodp = kmem_cache_alloc(socktpi_sod_cache, KM_SLEEP);
513 		sodp->sod_state = SOD_ENABLED | SOD_WAKE_NOT;
514 		sodp->sod_want = 0;
515 		sodp->sod_q = RD(stp->sd_wrq);
516 		sodp->sod_enqueue = sodput;
517 		sodp->sod_wakeup = sodwakeup;
518 		sodp->sod_uioafh = NULL;
519 		sodp->sod_uioaft = NULL;
520 		sodp->sod_lock = &stp->sd_lock;
521 		/*
522 		 * Remainder of the sod_uioa members are left uninitialized
523 		 * but will be initialized later by uioainit() before uioa
524 		 * is enabled.
525 		 */
526 		sodp->sod_uioa.uioa_state = UIOA_ALLOC;
527 		so->so_direct = sodp;
528 		stp->sd_sodirect = sodp;
529 	}
530 
531 	return (0);
532 }
533 
534 static void
535 copy_tinfo(struct sonode *so, struct T_info_ack *tia)
536 {
537 	so->so_tsdu_size = tia->TSDU_size;
538 	so->so_etsdu_size = tia->ETSDU_size;
539 	so->so_addr_size = tia->ADDR_size;
540 	so->so_opt_size = tia->OPT_size;
541 	so->so_tidu_size = tia->TIDU_size;
542 	so->so_serv_type = tia->SERV_type;
543 	switch (tia->CURRENT_state) {
544 	case TS_UNBND:
545 		break;
546 	case TS_IDLE:
547 		so->so_state |= SS_ISBOUND;
548 		so->so_laddr_len = 0;
549 		so->so_state &= ~SS_LADDR_VALID;
550 		break;
551 	case TS_DATA_XFER:
552 		so->so_state |= SS_ISBOUND|SS_ISCONNECTED;
553 		so->so_laddr_len = 0;
554 		so->so_faddr_len = 0;
555 		so->so_state &= ~(SS_LADDR_VALID | SS_FADDR_VALID);
556 		break;
557 	}
558 
559 	/*
560 	 * Heuristics for determining the socket mode flags
561 	 * (SM_ATOMIC, SM_CONNREQUIRED, SM_ADDR, SM_FDPASSING,
562 	 * and SM_EXDATA, SM_OPTDATA, and SM_BYTESTREAM)
563 	 * from the info ack.
564 	 */
565 	if (so->so_serv_type == T_CLTS) {
566 		so->so_mode |= SM_ATOMIC | SM_ADDR;
567 	} else {
568 		so->so_mode |= SM_CONNREQUIRED;
569 		if (so->so_etsdu_size != 0 && so->so_etsdu_size != -2)
570 			so->so_mode |= SM_EXDATA;
571 	}
572 	if (so->so_type == SOCK_SEQPACKET || so->so_type == SOCK_RAW) {
573 		/* Semantics are to discard tail end of messages */
574 		so->so_mode |= SM_ATOMIC;
575 	}
576 	if (so->so_family == AF_UNIX) {
577 		so->so_mode |= SM_FDPASSING | SM_OPTDATA;
578 		if (so->so_addr_size == -1) {
579 			/* MAXPATHLEN + soun_family + nul termination */
580 			so->so_addr_size = (t_scalar_t)(MAXPATHLEN +
581 			    sizeof (short) + 1);
582 		}
583 		if (so->so_type == SOCK_STREAM) {
584 			/*
585 			 * Make it into a byte-stream transport.
586 			 * SOCK_SEQPACKET sockets are unchanged.
587 			 */
588 			so->so_tsdu_size = 0;
589 		}
590 	} else if (so->so_addr_size == -1) {
591 		/*
592 		 * Logic extracted from sockmod - have to pick some max address
593 		 * length in order to preallocate the addresses.
594 		 */
595 		so->so_addr_size = SOA_DEFSIZE;
596 	}
597 	if (so->so_tsdu_size == 0)
598 		so->so_mode |= SM_BYTESTREAM;
599 }
600 
601 static int
602 check_tinfo(struct sonode *so)
603 {
604 	/* Consistency checks */
605 	if (so->so_type == SOCK_DGRAM && so->so_serv_type != T_CLTS) {
606 		eprintso(so, ("service type and socket type mismatch\n"));
607 		eprintsoline(so, EPROTO);
608 		return (EPROTO);
609 	}
610 	if (so->so_type == SOCK_STREAM && so->so_serv_type == T_CLTS) {
611 		eprintso(so, ("service type and socket type mismatch\n"));
612 		eprintsoline(so, EPROTO);
613 		return (EPROTO);
614 	}
615 	if (so->so_type == SOCK_SEQPACKET && so->so_serv_type == T_CLTS) {
616 		eprintso(so, ("service type and socket type mismatch\n"));
617 		eprintsoline(so, EPROTO);
618 		return (EPROTO);
619 	}
620 	if (so->so_family == AF_INET &&
621 	    so->so_addr_size != (t_scalar_t)sizeof (struct sockaddr_in)) {
622 		eprintso(so,
623 		    ("AF_INET must have sockaddr_in address length. Got %d\n",
624 		    so->so_addr_size));
625 		eprintsoline(so, EMSGSIZE);
626 		return (EMSGSIZE);
627 	}
628 	if (so->so_family == AF_INET6 &&
629 	    so->so_addr_size != (t_scalar_t)sizeof (struct sockaddr_in6)) {
630 		eprintso(so,
631 		    ("AF_INET6 must have sockaddr_in6 address length. Got %d\n",
632 		    so->so_addr_size));
633 		eprintsoline(so, EMSGSIZE);
634 		return (EMSGSIZE);
635 	}
636 
637 	dprintso(so, 1, (
638 	    "tinfo: serv %d tsdu %d, etsdu %d, addr %d, opt %d, tidu %d\n",
639 	    so->so_serv_type, so->so_tsdu_size, so->so_etsdu_size,
640 	    so->so_addr_size, so->so_opt_size,
641 	    so->so_tidu_size));
642 	dprintso(so, 1, ("tinfo: so_state %s\n",
643 	    pr_state(so->so_state, so->so_mode)));
644 	return (0);
645 }
646 
647 /*
648  * Send down T_info_req and wait for the ack.
649  * Record interesting T_info_ack values in the sonode.
650  */
651 static int
652 do_tinfo(struct sonode *so)
653 {
654 	struct T_info_req tir;
655 	mblk_t *mp;
656 	int error;
657 
658 	ASSERT(MUTEX_NOT_HELD(&so->so_lock));
659 
660 	if (so_no_tinfo) {
661 		so->so_addr_size = 0;
662 		return (0);
663 	}
664 
665 	dprintso(so, 1, ("do_tinfo(%p)\n", so));
666 
667 	/* Send T_INFO_REQ */
668 	tir.PRIM_type = T_INFO_REQ;
669 	mp = soallocproto1(&tir, sizeof (tir),
670 	    sizeof (struct T_info_req) + sizeof (struct T_info_ack),
671 	    _ALLOC_INTR);
672 	if (mp == NULL) {
673 		eprintsoline(so, ENOBUFS);
674 		return (ENOBUFS);
675 	}
676 	/* T_INFO_REQ has to be M_PCPROTO */
677 	DB_TYPE(mp) = M_PCPROTO;
678 
679 	error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0,
680 	    MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0);
681 	if (error) {
682 		eprintsoline(so, error);
683 		return (error);
684 	}
685 	mutex_enter(&so->so_lock);
686 	/* Wait for T_INFO_ACK */
687 	if ((error = sowaitprim(so, T_INFO_REQ, T_INFO_ACK,
688 	    (t_uscalar_t)sizeof (struct T_info_ack), &mp, 0))) {
689 		mutex_exit(&so->so_lock);
690 		eprintsoline(so, error);
691 		return (error);
692 	}
693 
694 	ASSERT(mp);
695 	copy_tinfo(so, (struct T_info_ack *)mp->b_rptr);
696 	mutex_exit(&so->so_lock);
697 	freemsg(mp);
698 	return (check_tinfo(so));
699 }
700 
701 /*
702  * Send down T_capability_req and wait for the ack.
703  * Record interesting T_capability_ack values in the sonode.
704  */
705 static int
706 do_tcapability(struct sonode *so, t_uscalar_t cap_bits1)
707 {
708 	struct T_capability_req tcr;
709 	struct T_capability_ack *tca;
710 	mblk_t *mp;
711 	int error;
712 
713 	ASSERT(cap_bits1 != 0);
714 	ASSERT((cap_bits1 & ~(TC1_ACCEPTOR_ID | TC1_INFO)) == 0);
715 	ASSERT(MUTEX_NOT_HELD(&so->so_lock));
716 
717 	if (so->so_provinfo->tpi_capability == PI_NO)
718 		return (do_tinfo(so));
719 
720 	if (so_no_tinfo) {
721 		so->so_addr_size = 0;
722 		if ((cap_bits1 &= ~TC1_INFO) == 0)
723 			return (0);
724 	}
725 
726 	dprintso(so, 1, ("do_tcapability(%p)\n", so));
727 
728 	/* Send T_CAPABILITY_REQ */
729 	tcr.PRIM_type = T_CAPABILITY_REQ;
730 	tcr.CAP_bits1 = cap_bits1;
731 	mp = soallocproto1(&tcr, sizeof (tcr),
732 	    sizeof (struct T_capability_req) + sizeof (struct T_capability_ack),
733 	    _ALLOC_INTR);
734 	if (mp == NULL) {
735 		eprintsoline(so, ENOBUFS);
736 		return (ENOBUFS);
737 	}
738 	/* T_CAPABILITY_REQ should be M_PCPROTO here */
739 	DB_TYPE(mp) = M_PCPROTO;
740 
741 	error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0,
742 	    MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR, 0);
743 	if (error) {
744 		eprintsoline(so, error);
745 		return (error);
746 	}
747 	mutex_enter(&so->so_lock);
748 	/* Wait for T_CAPABILITY_ACK */
749 	if ((error = sowaitprim(so, T_CAPABILITY_REQ, T_CAPABILITY_ACK,
750 	    (t_uscalar_t)sizeof (*tca), &mp, sock_capability_timeout * hz))) {
751 		mutex_exit(&so->so_lock);
752 		PI_PROVLOCK(so->so_provinfo);
753 		if (so->so_provinfo->tpi_capability == PI_DONTKNOW)
754 			so->so_provinfo->tpi_capability = PI_NO;
755 		PI_PROVUNLOCK(so->so_provinfo);
756 		ASSERT((so->so_mode & SM_ACCEPTOR_ID) == 0);
757 		if (cap_bits1 & TC1_INFO) {
758 			/*
759 			 * If the T_CAPABILITY_REQ timed out and then a
760 			 * T_INFO_REQ gets a protocol error, most likely
761 			 * the capability was slow (vs. unsupported). Return
762 			 * ENOSR for this case as a best guess.
763 			 */
764 			if (error == ETIME) {
765 				return ((error = do_tinfo(so)) == EPROTO ?
766 				    ENOSR : error);
767 			}
768 			return (do_tinfo(so));
769 		}
770 		return (0);
771 	}
772 
773 	if (so->so_provinfo->tpi_capability == PI_DONTKNOW) {
774 		PI_PROVLOCK(so->so_provinfo);
775 		so->so_provinfo->tpi_capability = PI_YES;
776 		PI_PROVUNLOCK(so->so_provinfo);
777 	}
778 
779 	ASSERT(mp);
780 	tca = (struct T_capability_ack *)mp->b_rptr;
781 
782 	ASSERT((cap_bits1 & TC1_INFO) == (tca->CAP_bits1 & TC1_INFO));
783 
784 	cap_bits1 = tca->CAP_bits1;
785 
786 	if (cap_bits1 & TC1_ACCEPTOR_ID) {
787 		so->so_acceptor_id = tca->ACCEPTOR_id;
788 		so->so_mode |= SM_ACCEPTOR_ID;
789 	}
790 
791 	if (cap_bits1 & TC1_INFO)
792 		copy_tinfo(so, &tca->INFO_ack);
793 
794 	mutex_exit(&so->so_lock);
795 	freemsg(mp);
796 
797 	if (cap_bits1 & TC1_INFO)
798 		return (check_tinfo(so));
799 
800 	return (0);
801 }
802 
803 /*
804  * Retrieve and clear the socket error.
805  */
806 int
807 sogeterr(struct sonode *so)
808 {
809 	int error;
810 
811 	ASSERT(MUTEX_HELD(&so->so_lock));
812 
813 	error = so->so_error;
814 	so->so_error = 0;
815 
816 	return (error);
817 }
818 
819 /*
820  * This routine is registered with the stream head to retrieve read
821  * side errors.
822  * It does not clear the socket error for a peeking read side operation.
823  * It the error is to be cleared it sets *clearerr.
824  */
825 int
826 sogetrderr(vnode_t *vp, int ispeek, int *clearerr)
827 {
828 	struct sonode *so = VTOSO(vp);
829 	int error;
830 
831 	mutex_enter(&so->so_lock);
832 	if (ispeek) {
833 		error = so->so_error;
834 		*clearerr = 0;
835 	} else {
836 		error = so->so_error;
837 		so->so_error = 0;
838 		*clearerr = 1;
839 	}
840 	mutex_exit(&so->so_lock);
841 	return (error);
842 }
843 
844 /*
845  * This routine is registered with the stream head to retrieve write
846  * side errors.
847  * It does not clear the socket error for a peeking read side operation.
848  * It the error is to be cleared it sets *clearerr.
849  */
850 int
851 sogetwrerr(vnode_t *vp, int ispeek, int *clearerr)
852 {
853 	struct sonode *so = VTOSO(vp);
854 	int error;
855 
856 	mutex_enter(&so->so_lock);
857 	if (so->so_state & SS_CANTSENDMORE) {
858 		error = EPIPE;
859 		*clearerr = 0;
860 	} else {
861 		error = so->so_error;
862 		if (ispeek) {
863 			*clearerr = 0;
864 		} else {
865 			so->so_error = 0;
866 			*clearerr = 1;
867 		}
868 	}
869 	mutex_exit(&so->so_lock);
870 	return (error);
871 }
872 
873 /*
874  * Set a nonpersistent read and write error on the socket.
875  * Used when there is a T_uderror_ind for a connected socket.
876  * The caller also needs to call strsetrerror and strsetwerror
877  * after dropping the lock.
878  */
879 void
880 soseterror(struct sonode *so, int error)
881 {
882 	ASSERT(error != 0);
883 
884 	ASSERT(MUTEX_HELD(&so->so_lock));
885 	so->so_error = (ushort_t)error;
886 }
887 
888 void
889 soisconnecting(struct sonode *so)
890 {
891 	ASSERT(MUTEX_HELD(&so->so_lock));
892 	so->so_state &= ~(SS_ISCONNECTED|SS_ISDISCONNECTING);
893 	so->so_state |= SS_ISCONNECTING;
894 	cv_broadcast(&so->so_state_cv);
895 }
896 
897 void
898 soisconnected(struct sonode *so)
899 {
900 	ASSERT(MUTEX_HELD(&so->so_lock));
901 	so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING);
902 	so->so_state |= SS_ISCONNECTED;
903 	cv_broadcast(&so->so_state_cv);
904 }
905 
906 /*
907  * The caller also needs to call strsetrerror, strsetwerror and strseteof.
908  */
909 void
910 soisdisconnected(struct sonode *so, int error)
911 {
912 	ASSERT(MUTEX_HELD(&so->so_lock));
913 	so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING|
914 	    SS_LADDR_VALID|SS_FADDR_VALID);
915 	so->so_state |= (SS_CANTRCVMORE|SS_CANTSENDMORE);
916 	so->so_error = (ushort_t)error;
917 	if (so->so_peercred != NULL) {
918 		crfree(so->so_peercred);
919 		so->so_peercred = NULL;
920 	}
921 	cv_broadcast(&so->so_state_cv);
922 }
923 
924 /*
925  * For connected AF_UNIX SOCK_DGRAM sockets when the peer closes.
926  * Does not affect write side.
927  * The caller also has to call strsetrerror.
928  */
929 static void
930 sobreakconn(struct sonode *so, int error)
931 {
932 	ASSERT(MUTEX_HELD(&so->so_lock));
933 	so->so_state &= ~(SS_ISCONNECTING|SS_ISCONNECTED|SS_ISDISCONNECTING);
934 	so->so_error = (ushort_t)error;
935 	cv_broadcast(&so->so_state_cv);
936 }
937 
938 /*
939  * Can no longer send.
940  * Caller must also call strsetwerror.
941  *
942  * We mark the peer address as no longer valid for getpeername, but
943  * leave it around for so_unix_close to notify the peer (that
944  * transport has no addressing held at that layer).
945  */
946 void
947 socantsendmore(struct sonode *so)
948 {
949 	ASSERT(MUTEX_HELD(&so->so_lock));
950 	so->so_state = so->so_state & ~SS_FADDR_VALID | SS_CANTSENDMORE;
951 	cv_broadcast(&so->so_state_cv);
952 }
953 
954 /*
955  * The caller must call strseteof(,1) as well as this routine
956  * to change the socket state.
957  */
958 void
959 socantrcvmore(struct sonode *so)
960 {
961 	ASSERT(MUTEX_HELD(&so->so_lock));
962 	so->so_state |= SS_CANTRCVMORE;
963 	cv_broadcast(&so->so_state_cv);
964 }
965 
966 /*
967  * The caller has sent down a "request_prim" primitive and wants to wait for
968  * an ack ("ack_prim") or an T_ERROR_ACK for it.
969  * The specified "ack_prim" can be a T_OK_ACK.
970  *
971  * Assumes that all the TPI acks are M_PCPROTO messages.
972  *
973  * Note that the socket is single-threaded (using so_lock_single)
974  * for all operations that generate TPI ack messages. Since
975  * only TPI ack messages are M_PCPROTO we should never receive
976  * anything except either the ack we are expecting or a T_ERROR_ACK
977  * for the same primitive.
978  */
979 int
980 sowaitprim(struct sonode *so, t_scalar_t request_prim, t_scalar_t ack_prim,
981 	    t_uscalar_t min_size, mblk_t **mpp, clock_t wait)
982 {
983 	mblk_t *mp;
984 	union T_primitives *tpr;
985 	int error;
986 
987 	dprintso(so, 1, ("sowaitprim(%p, %d, %d, %d, %p, %lu)\n",
988 	    so, request_prim, ack_prim, min_size, mpp, wait));
989 
990 	ASSERT(MUTEX_HELD(&so->so_lock));
991 
992 	error = sowaitack(so, &mp, wait);
993 	if (error)
994 		return (error);
995 
996 	dprintso(so, 1, ("got msg %p\n", mp));
997 	if (DB_TYPE(mp) != M_PCPROTO ||
998 	    MBLKL(mp) < sizeof (tpr->type)) {
999 		freemsg(mp);
1000 		eprintsoline(so, EPROTO);
1001 		return (EPROTO);
1002 	}
1003 	tpr = (union T_primitives *)mp->b_rptr;
1004 	/*
1005 	 * Did we get the primitive that we were asking for?
1006 	 * For T_OK_ACK we also check that it matches the request primitive.
1007 	 */
1008 	if (tpr->type == ack_prim &&
1009 	    (ack_prim != T_OK_ACK ||
1010 	    tpr->ok_ack.CORRECT_prim == request_prim)) {
1011 		if (MBLKL(mp) >= (ssize_t)min_size) {
1012 			/* Found what we are looking for */
1013 			*mpp = mp;
1014 			return (0);
1015 		}
1016 		/* Too short */
1017 		freemsg(mp);
1018 		eprintsoline(so, EPROTO);
1019 		return (EPROTO);
1020 	}
1021 
1022 	if (tpr->type == T_ERROR_ACK &&
1023 	    tpr->error_ack.ERROR_prim == request_prim) {
1024 		/* Error to the primitive we were looking for */
1025 		if (tpr->error_ack.TLI_error == TSYSERR) {
1026 			error = tpr->error_ack.UNIX_error;
1027 		} else {
1028 			error = tlitosyserr(tpr->error_ack.TLI_error);
1029 		}
1030 		dprintso(so, 0, ("error_ack for %d: %d/%d ->%d\n",
1031 		    tpr->error_ack.ERROR_prim,
1032 		    tpr->error_ack.TLI_error,
1033 		    tpr->error_ack.UNIX_error,
1034 		    error));
1035 		freemsg(mp);
1036 		return (error);
1037 	}
1038 	/*
1039 	 * Wrong primitive or T_ERROR_ACK for the wrong primitive
1040 	 */
1041 #ifdef DEBUG
1042 	if (tpr->type == T_ERROR_ACK) {
1043 		dprintso(so, 0, ("error_ack for %d: %d/%d\n",
1044 		    tpr->error_ack.ERROR_prim,
1045 		    tpr->error_ack.TLI_error,
1046 		    tpr->error_ack.UNIX_error));
1047 	} else if (tpr->type == T_OK_ACK) {
1048 		dprintso(so, 0, ("ok_ack for %d, expected %d for %d\n",
1049 		    tpr->ok_ack.CORRECT_prim,
1050 		    ack_prim, request_prim));
1051 	} else {
1052 		dprintso(so, 0,
1053 		    ("unexpected primitive %d, expected %d for %d\n",
1054 		    tpr->type, ack_prim, request_prim));
1055 	}
1056 #endif /* DEBUG */
1057 
1058 	freemsg(mp);
1059 	eprintsoline(so, EPROTO);
1060 	return (EPROTO);
1061 }
1062 
1063 /*
1064  * Wait for a T_OK_ACK for the specified primitive.
1065  */
1066 int
1067 sowaitokack(struct sonode *so, t_scalar_t request_prim)
1068 {
1069 	mblk_t *mp;
1070 	int error;
1071 
1072 	error = sowaitprim(so, request_prim, T_OK_ACK,
1073 	    (t_uscalar_t)sizeof (struct T_ok_ack), &mp, 0);
1074 	if (error)
1075 		return (error);
1076 	freemsg(mp);
1077 	return (0);
1078 }
1079 
1080 /*
1081  * Queue a received TPI ack message on so_ack_mp.
1082  */
1083 void
1084 soqueueack(struct sonode *so, mblk_t *mp)
1085 {
1086 	if (DB_TYPE(mp) != M_PCPROTO) {
1087 		zcmn_err(getzoneid(), CE_WARN,
1088 		    "sockfs: received unexpected M_PROTO TPI ack. Prim %d\n",
1089 		    *(t_scalar_t *)mp->b_rptr);
1090 		freemsg(mp);
1091 		return;
1092 	}
1093 
1094 	mutex_enter(&so->so_lock);
1095 	if (so->so_ack_mp != NULL) {
1096 		dprintso(so, 1, ("so_ack_mp already set\n"));
1097 		freemsg(so->so_ack_mp);
1098 		so->so_ack_mp = NULL;
1099 	}
1100 	so->so_ack_mp = mp;
1101 	cv_broadcast(&so->so_ack_cv);
1102 	mutex_exit(&so->so_lock);
1103 }
1104 
1105 /*
1106  * Wait for a TPI ack ignoring signals and errors.
1107  */
1108 int
1109 sowaitack(struct sonode *so, mblk_t **mpp, clock_t wait)
1110 {
1111 	ASSERT(MUTEX_HELD(&so->so_lock));
1112 
1113 	while (so->so_ack_mp == NULL) {
1114 #ifdef SOCK_TEST
1115 		if (wait == 0 && sock_test_timelimit != 0)
1116 			wait = sock_test_timelimit;
1117 #endif
1118 		if (wait != 0) {
1119 			/*
1120 			 * Only wait for the time limit.
1121 			 */
1122 			clock_t now;
1123 
1124 			time_to_wait(&now, wait);
1125 			if (cv_timedwait(&so->so_ack_cv, &so->so_lock,
1126 			    now) == -1) {
1127 				eprintsoline(so, ETIME);
1128 				return (ETIME);
1129 			}
1130 		}
1131 		else
1132 			cv_wait(&so->so_ack_cv, &so->so_lock);
1133 	}
1134 	*mpp = so->so_ack_mp;
1135 #ifdef DEBUG
1136 	{
1137 		union T_primitives *tpr;
1138 		mblk_t *mp = *mpp;
1139 
1140 		tpr = (union T_primitives *)mp->b_rptr;
1141 		ASSERT(DB_TYPE(mp) == M_PCPROTO);
1142 		ASSERT(tpr->type == T_OK_ACK ||
1143 		    tpr->type == T_ERROR_ACK ||
1144 		    tpr->type == T_BIND_ACK ||
1145 		    tpr->type == T_CAPABILITY_ACK ||
1146 		    tpr->type == T_INFO_ACK ||
1147 		    tpr->type == T_OPTMGMT_ACK);
1148 	}
1149 #endif /* DEBUG */
1150 	so->so_ack_mp = NULL;
1151 	return (0);
1152 }
1153 
1154 /*
1155  * Queue a received T_CONN_IND message on so_conn_ind_head/tail.
1156  */
1157 void
1158 soqueueconnind(struct sonode *so, mblk_t *mp)
1159 {
1160 	if (DB_TYPE(mp) != M_PROTO) {
1161 		zcmn_err(getzoneid(), CE_WARN,
1162 		    "sockfs: received unexpected M_PCPROTO T_CONN_IND\n");
1163 		freemsg(mp);
1164 		return;
1165 	}
1166 
1167 	mutex_enter(&so->so_lock);
1168 	ASSERT(mp->b_next == NULL);
1169 	if (so->so_conn_ind_head == NULL) {
1170 		so->so_conn_ind_head = mp;
1171 		so->so_state |= SS_HASCONNIND;
1172 	} else {
1173 		ASSERT(so->so_state & SS_HASCONNIND);
1174 		ASSERT(so->so_conn_ind_tail->b_next == NULL);
1175 		so->so_conn_ind_tail->b_next = mp;
1176 	}
1177 	so->so_conn_ind_tail = mp;
1178 	/* Wakeup a single consumer of the T_CONN_IND */
1179 	cv_signal(&so->so_connind_cv);
1180 	mutex_exit(&so->so_lock);
1181 }
1182 
1183 /*
1184  * Wait for a T_CONN_IND.
1185  * Don't wait if nonblocking.
1186  * Accept signals and socket errors.
1187  */
1188 int
1189 sowaitconnind(struct sonode *so, int fmode, mblk_t **mpp)
1190 {
1191 	mblk_t *mp;
1192 	int error = 0;
1193 
1194 	ASSERT(MUTEX_NOT_HELD(&so->so_lock));
1195 	mutex_enter(&so->so_lock);
1196 check_error:
1197 	if (so->so_error) {
1198 		error = sogeterr(so);
1199 		if (error) {
1200 			mutex_exit(&so->so_lock);
1201 			return (error);
1202 		}
1203 	}
1204 
1205 	if (so->so_conn_ind_head == NULL) {
1206 		if (fmode & (FNDELAY|FNONBLOCK)) {
1207 			error = EWOULDBLOCK;
1208 			goto done;
1209 		}
1210 		if (!cv_wait_sig_swap(&so->so_connind_cv, &so->so_lock)) {
1211 			error = EINTR;
1212 			goto done;
1213 		}
1214 		goto check_error;
1215 	}
1216 	mp = so->so_conn_ind_head;
1217 	so->so_conn_ind_head = mp->b_next;
1218 	mp->b_next = NULL;
1219 	if (so->so_conn_ind_head == NULL) {
1220 		ASSERT(so->so_conn_ind_tail == mp);
1221 		so->so_conn_ind_tail = NULL;
1222 		so->so_state &= ~SS_HASCONNIND;
1223 	}
1224 	*mpp = mp;
1225 done:
1226 	mutex_exit(&so->so_lock);
1227 	return (error);
1228 }
1229 
1230 /*
1231  * Flush a T_CONN_IND matching the sequence number from the list.
1232  * Return zero if found; non-zero otherwise.
1233  * This is called very infrequently thus it is ok to do a linear search.
1234  */
1235 int
1236 soflushconnind(struct sonode *so, t_scalar_t seqno)
1237 {
1238 	mblk_t *prevmp, *mp;
1239 	struct T_conn_ind *tci;
1240 
1241 	mutex_enter(&so->so_lock);
1242 	for (prevmp = NULL, mp = so->so_conn_ind_head; mp != NULL;
1243 	    prevmp = mp, mp = mp->b_next) {
1244 		tci = (struct T_conn_ind *)mp->b_rptr;
1245 		if (tci->SEQ_number == seqno) {
1246 			dprintso(so, 1,
1247 			    ("t_discon_ind: found T_CONN_IND %d\n", seqno));
1248 			/* Deleting last? */
1249 			if (so->so_conn_ind_tail == mp) {
1250 				so->so_conn_ind_tail = prevmp;
1251 			}
1252 			if (prevmp == NULL) {
1253 				/* Deleting first */
1254 				so->so_conn_ind_head = mp->b_next;
1255 			} else {
1256 				prevmp->b_next = mp->b_next;
1257 			}
1258 			mp->b_next = NULL;
1259 			if (so->so_conn_ind_head == NULL) {
1260 				ASSERT(so->so_conn_ind_tail == NULL);
1261 				so->so_state &= ~SS_HASCONNIND;
1262 			} else {
1263 				ASSERT(so->so_conn_ind_tail != NULL);
1264 			}
1265 			so->so_error = ECONNABORTED;
1266 			mutex_exit(&so->so_lock);
1267 
1268 			/*
1269 			 * T_KSSL_PROXY_CONN_IND may carry a handle for
1270 			 * an SSL context, and needs to be released.
1271 			 */
1272 			if ((tci->PRIM_type == T_SSL_PROXY_CONN_IND) &&
1273 			    (mp->b_cont != NULL)) {
1274 				kssl_ctx_t kssl_ctx;
1275 
1276 				ASSERT(MBLKL(mp->b_cont) ==
1277 				    sizeof (kssl_ctx_t));
1278 				kssl_ctx = *((kssl_ctx_t *)mp->b_cont->b_rptr);
1279 				kssl_release_ctx(kssl_ctx);
1280 			}
1281 			freemsg(mp);
1282 			return (0);
1283 		}
1284 	}
1285 	mutex_exit(&so->so_lock);
1286 	dprintso(so, 1,	("t_discon_ind: NOT found T_CONN_IND %d\n", seqno));
1287 	return (-1);
1288 }
1289 
1290 /*
1291  * Wait until the socket is connected or there is an error.
1292  * fmode should contain any nonblocking flags. nosig should be
1293  * set if the caller does not want the wait to be interrupted by a signal.
1294  */
1295 int
1296 sowaitconnected(struct sonode *so, int fmode, int nosig)
1297 {
1298 	int error;
1299 
1300 	ASSERT(MUTEX_HELD(&so->so_lock));
1301 
1302 	while ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) ==
1303 	    SS_ISCONNECTING && so->so_error == 0) {
1304 
1305 		dprintso(so, 1, ("waiting for SS_ISCONNECTED on %p\n", so));
1306 		if (fmode & (FNDELAY|FNONBLOCK))
1307 			return (EINPROGRESS);
1308 
1309 		if (nosig)
1310 			cv_wait(&so->so_state_cv, &so->so_lock);
1311 		else if (!cv_wait_sig_swap(&so->so_state_cv, &so->so_lock)) {
1312 			/*
1313 			 * Return EINTR and let the application use
1314 			 * nonblocking techniques for detecting when
1315 			 * the connection has been established.
1316 			 */
1317 			return (EINTR);
1318 		}
1319 		dprintso(so, 1, ("awoken on %p\n", so));
1320 	}
1321 
1322 	if (so->so_error != 0) {
1323 		error = sogeterr(so);
1324 		ASSERT(error != 0);
1325 		dprintso(so, 1, ("sowaitconnected: error %d\n", error));
1326 		return (error);
1327 	}
1328 	if (!(so->so_state & SS_ISCONNECTED)) {
1329 		/*
1330 		 * Could have received a T_ORDREL_IND or a T_DISCON_IND with
1331 		 * zero errno. Or another thread could have consumed so_error
1332 		 * e.g. by calling read.
1333 		 */
1334 		error = ECONNREFUSED;
1335 		dprintso(so, 1, ("sowaitconnected: error %d\n", error));
1336 		return (error);
1337 	}
1338 	return (0);
1339 }
1340 
1341 
1342 /*
1343  * Handle the signal generation aspect of urgent data.
1344  */
1345 static void
1346 so_oob_sig(struct sonode *so, int extrasig,
1347     strsigset_t *signals, strpollset_t *pollwakeups)
1348 {
1349 	ASSERT(MUTEX_HELD(&so->so_lock));
1350 
1351 	ASSERT(so_verify_oobstate(so));
1352 	ASSERT(so->so_oobsigcnt >= so->so_oobcnt);
1353 	if (so->so_oobsigcnt > so->so_oobcnt) {
1354 		/*
1355 		 * Signal has already been generated once for this
1356 		 * urgent "event". However, since TCP can receive updated
1357 		 * urgent pointers we still generate a signal.
1358 		 */
1359 		ASSERT(so->so_state & SS_OOBPEND);
1360 		if (extrasig) {
1361 			*signals |= S_RDBAND;
1362 			*pollwakeups |= POLLRDBAND;
1363 		}
1364 		return;
1365 	}
1366 
1367 	so->so_oobsigcnt++;
1368 	ASSERT(so->so_oobsigcnt > 0);	/* Wraparound */
1369 	ASSERT(so->so_oobsigcnt > so->so_oobcnt);
1370 
1371 	/*
1372 	 * Record (for select/poll) that urgent data is pending.
1373 	 */
1374 	so->so_state |= SS_OOBPEND;
1375 	/*
1376 	 * New urgent data on the way so forget about any old
1377 	 * urgent data.
1378 	 */
1379 	so->so_state &= ~(SS_HAVEOOBDATA|SS_HADOOBDATA);
1380 	if (so->so_oobmsg != NULL) {
1381 		dprintso(so, 1, ("sock: discarding old oob\n"));
1382 		freemsg(so->so_oobmsg);
1383 		so->so_oobmsg = NULL;
1384 	}
1385 	*signals |= S_RDBAND;
1386 	*pollwakeups |= POLLRDBAND;
1387 	ASSERT(so_verify_oobstate(so));
1388 }
1389 
1390 /*
1391  * Handle the processing of the T_EXDATA_IND with urgent data.
1392  * Returns the T_EXDATA_IND if it should be queued on the read queue.
1393  */
1394 /* ARGSUSED2 */
1395 static mblk_t *
1396 so_oob_exdata(struct sonode *so, mblk_t *mp,
1397 	strsigset_t *signals, strpollset_t *pollwakeups)
1398 {
1399 	ASSERT(MUTEX_HELD(&so->so_lock));
1400 
1401 	ASSERT(so_verify_oobstate(so));
1402 
1403 	ASSERT(so->so_oobsigcnt > so->so_oobcnt);
1404 
1405 	so->so_oobcnt++;
1406 	ASSERT(so->so_oobcnt > 0);	/* wraparound? */
1407 	ASSERT(so->so_oobsigcnt >= so->so_oobcnt);
1408 
1409 	/*
1410 	 * Set MSGMARK for SIOCATMARK.
1411 	 */
1412 	mp->b_flag |= MSGMARK;
1413 
1414 	ASSERT(so_verify_oobstate(so));
1415 	return (mp);
1416 }
1417 
1418 /*
1419  * Handle the processing of the actual urgent data.
1420  * Returns the data mblk if it should be queued on the read queue.
1421  */
1422 static mblk_t *
1423 so_oob_data(struct sonode *so, mblk_t *mp,
1424 	strsigset_t *signals, strpollset_t *pollwakeups)
1425 {
1426 	ASSERT(MUTEX_HELD(&so->so_lock));
1427 
1428 	ASSERT(so_verify_oobstate(so));
1429 
1430 	ASSERT(so->so_oobsigcnt >= so->so_oobcnt);
1431 	ASSERT(mp != NULL);
1432 	/*
1433 	 * For OOBINLINE we keep the data in the T_EXDATA_IND.
1434 	 * Otherwise we store it in so_oobmsg.
1435 	 */
1436 	ASSERT(so->so_oobmsg == NULL);
1437 	if (so->so_options & SO_OOBINLINE) {
1438 		*pollwakeups |= POLLIN | POLLRDNORM | POLLRDBAND;
1439 		*signals |= S_INPUT | S_RDNORM;
1440 	} else {
1441 		*pollwakeups |= POLLRDBAND;
1442 		so->so_state |= SS_HAVEOOBDATA;
1443 		so->so_oobmsg = mp;
1444 		mp = NULL;
1445 	}
1446 	ASSERT(so_verify_oobstate(so));
1447 	return (mp);
1448 }
1449 
1450 /*
1451  * Caller must hold the mutex.
1452  * For delayed processing, save the T_DISCON_IND received
1453  * from below on so_discon_ind_mp.
1454  * When the message is processed the framework will call:
1455  *      (*func)(so, mp);
1456  */
1457 static void
1458 so_save_discon_ind(struct sonode *so,
1459 	mblk_t *mp,
1460 	void (*func)(struct sonode *so, mblk_t *))
1461 {
1462 	ASSERT(MUTEX_HELD(&so->so_lock));
1463 
1464 	/*
1465 	 * Discard new T_DISCON_IND if we have already received another.
1466 	 * Currently the earlier message can either be on so_discon_ind_mp
1467 	 * or being processed.
1468 	 */
1469 	if (so->so_discon_ind_mp != NULL || (so->so_flag & SOASYNC_UNBIND)) {
1470 		zcmn_err(getzoneid(), CE_WARN,
1471 		    "sockfs: received unexpected additional T_DISCON_IND\n");
1472 		freemsg(mp);
1473 		return;
1474 	}
1475 	mp->b_prev = (mblk_t *)func;
1476 	mp->b_next = NULL;
1477 	so->so_discon_ind_mp = mp;
1478 }
1479 
1480 /*
1481  * Caller must hold the mutex and make sure that either SOLOCKED
1482  * or SOASYNC_UNBIND is set. Called from so_unlock_single().
1483  * Perform delayed processing of T_DISCON_IND message on so_discon_ind_mp.
1484  * Need to ensure that strsock_proto() will not end up sleeping for
1485  * SOASYNC_UNBIND, while executing this function.
1486  */
1487 void
1488 so_drain_discon_ind(struct sonode *so)
1489 {
1490 	mblk_t	*bp;
1491 	void (*func)(struct sonode *so, mblk_t *);
1492 
1493 	ASSERT(MUTEX_HELD(&so->so_lock));
1494 	ASSERT(so->so_flag & (SOLOCKED|SOASYNC_UNBIND));
1495 
1496 	/* Process T_DISCON_IND on so_discon_ind_mp */
1497 	if ((bp = so->so_discon_ind_mp) != NULL) {
1498 		so->so_discon_ind_mp = NULL;
1499 		func = (void (*)())bp->b_prev;
1500 		bp->b_prev = NULL;
1501 
1502 		/*
1503 		 * This (*func) is supposed to generate a message downstream
1504 		 * and we need to have a flag set until the corresponding
1505 		 * upstream message reaches stream head.
1506 		 * When processing T_DISCON_IND in strsock_discon_ind
1507 		 * we hold SOASYN_UNBIND when sending T_UNBIND_REQ down and
1508 		 * drop the flag after we get the ACK in strsock_proto.
1509 		 */
1510 		(void) (*func)(so, bp);
1511 	}
1512 }
1513 
1514 /*
1515  * Caller must hold the mutex.
1516  * Remove the T_DISCON_IND on so_discon_ind_mp.
1517  */
1518 void
1519 so_flush_discon_ind(struct sonode *so)
1520 {
1521 	mblk_t	*bp;
1522 
1523 	ASSERT(MUTEX_HELD(&so->so_lock));
1524 
1525 	/*
1526 	 * Remove T_DISCON_IND mblk at so_discon_ind_mp.
1527 	 */
1528 	if ((bp = so->so_discon_ind_mp) != NULL) {
1529 		so->so_discon_ind_mp = NULL;
1530 		bp->b_prev = NULL;
1531 		freemsg(bp);
1532 	}
1533 }
1534 
1535 /*
1536  * Caller must hold the mutex.
1537  *
1538  * This function is used to process the T_DISCON_IND message. It does
1539  * immediate processing when called from strsock_proto and delayed
1540  * processing of discon_ind saved on so_discon_ind_mp when called from
1541  * so_drain_discon_ind. When a T_DISCON_IND message is saved in
1542  * so_discon_ind_mp for delayed processing, this function is registered
1543  * as the callback function to process the message.
1544  *
1545  * SOASYNC_UNBIND should be held in this function, during the non-blocking
1546  * unbind operation, and should be released only after we receive the ACK
1547  * in strsock_proto, for the T_UNBIND_REQ sent here. Since SOLOCKED is not set,
1548  * no TPI messages would be sent down at this time. This is to prevent M_FLUSH
1549  * sent from either this function or tcp_unbind(), flushing away any TPI
1550  * message that is being sent down and stays in a lower module's queue.
1551  *
1552  * This function drops so_lock and grabs it again.
1553  */
1554 static void
1555 strsock_discon_ind(struct sonode *so, mblk_t *discon_mp)
1556 {
1557 	struct vnode *vp;
1558 	struct stdata *stp;
1559 	union T_primitives *tpr;
1560 	struct T_unbind_req *ubr;
1561 	mblk_t *mp;
1562 	int error;
1563 
1564 	ASSERT(MUTEX_HELD(&so->so_lock));
1565 	ASSERT(discon_mp);
1566 	ASSERT(discon_mp->b_rptr);
1567 
1568 	tpr = (union T_primitives *)discon_mp->b_rptr;
1569 	ASSERT(tpr->type == T_DISCON_IND);
1570 
1571 	vp = SOTOV(so);
1572 	stp = vp->v_stream;
1573 	ASSERT(stp);
1574 
1575 	/*
1576 	 * Not a listener
1577 	 */
1578 	ASSERT((so->so_state & SS_ACCEPTCONN) == 0);
1579 
1580 	/*
1581 	 * This assumes that the name space for DISCON_reason
1582 	 * is the errno name space.
1583 	 */
1584 	soisdisconnected(so, tpr->discon_ind.DISCON_reason);
1585 
1586 	/*
1587 	 * Unbind with the transport without blocking.
1588 	 * If we've already received a T_DISCON_IND do not unbind.
1589 	 *
1590 	 * If there is no preallocated unbind message, we have already
1591 	 * unbound with the transport
1592 	 *
1593 	 * If the socket is not bound, no need to unbind.
1594 	 */
1595 	mp = so->so_unbind_mp;
1596 	if (mp == NULL) {
1597 		ASSERT(!(so->so_state & SS_ISBOUND));
1598 		mutex_exit(&so->so_lock);
1599 	} else if (!(so->so_state & SS_ISBOUND))  {
1600 		mutex_exit(&so->so_lock);
1601 	} else {
1602 		so->so_unbind_mp = NULL;
1603 
1604 		/*
1605 		 * Is another T_DISCON_IND being processed.
1606 		 */
1607 		ASSERT((so->so_flag & SOASYNC_UNBIND) == 0);
1608 
1609 		/*
1610 		 * Make strsock_proto ignore T_OK_ACK and T_ERROR_ACK for
1611 		 * this unbind. Set SOASYNC_UNBIND. This should be cleared
1612 		 * only after we receive the ACK in strsock_proto.
1613 		 */
1614 		so->so_flag |= SOASYNC_UNBIND;
1615 		ASSERT(!(so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)));
1616 		so->so_state &= ~(SS_ISBOUND|SS_ACCEPTCONN|SS_LADDR_VALID);
1617 		mutex_exit(&so->so_lock);
1618 
1619 		/*
1620 		 * Send down T_UNBIND_REQ ignoring flow control.
1621 		 * XXX Assumes that MSG_IGNFLOW implies that this thread
1622 		 * does not run service procedures.
1623 		 */
1624 		ASSERT(DB_TYPE(mp) == M_PROTO);
1625 		ubr = (struct T_unbind_req *)mp->b_rptr;
1626 		mp->b_wptr += sizeof (*ubr);
1627 		ubr->PRIM_type = T_UNBIND_REQ;
1628 
1629 		/*
1630 		 * Flush the read and write side (except stream head read queue)
1631 		 * and send down T_UNBIND_REQ.
1632 		 */
1633 		(void) putnextctl1(strvp2wq(SOTOV(so)), M_FLUSH, FLUSHRW);
1634 		error = kstrputmsg(SOTOV(so), mp, NULL, 0, 0,
1635 		    MSG_BAND|MSG_HOLDSIG|MSG_IGNERROR|MSG_IGNFLOW, 0);
1636 		/* LINTED - warning: statement has no consequent: if */
1637 		if (error) {
1638 			eprintsoline(so, error);
1639 		}
1640 	}
1641 
1642 	if (tpr->discon_ind.DISCON_reason != 0)
1643 		strsetrerror(SOTOV(so), 0, 0, sogetrderr);
1644 	strsetwerror(SOTOV(so), 0, 0, sogetwrerr);
1645 	strseteof(SOTOV(so), 1);
1646 	/*
1647 	 * strseteof takes care of read side wakeups,
1648 	 * pollwakeups, and signals.
1649 	 */
1650 	dprintso(so, 1, ("T_DISCON_IND: error %d\n", so->so_error));
1651 	freemsg(discon_mp);
1652 
1653 
1654 	pollwakeup(&stp->sd_pollist, POLLOUT);
1655 	mutex_enter(&stp->sd_lock);
1656 
1657 	/*
1658 	 * Wake sleeping write
1659 	 */
1660 	if (stp->sd_flag & WSLEEP) {
1661 		stp->sd_flag &= ~WSLEEP;
1662 		cv_broadcast(&stp->sd_wrq->q_wait);
1663 	}
1664 
1665 	/*
1666 	 * strsendsig can handle multiple signals with a
1667 	 * single call.  Send SIGPOLL for S_OUTPUT event.
1668 	 */
1669 	if (stp->sd_sigflags & S_OUTPUT)
1670 		strsendsig(stp->sd_siglist, S_OUTPUT, 0, 0);
1671 
1672 	mutex_exit(&stp->sd_lock);
1673 	mutex_enter(&so->so_lock);
1674 }
1675 
1676 /*
1677  * This routine is registered with the stream head to receive M_PROTO
1678  * and M_PCPROTO messages.
1679  *
1680  * Returns NULL if the message was consumed.
1681  * Returns an mblk to make that mblk be processed (and queued) by the stream
1682  * head.
1683  *
1684  * Sets the return parameters (*wakeups, *firstmsgsigs, *allmsgsigs, and
1685  * *pollwakeups) for the stream head to take action on. Note that since
1686  * sockets always deliver SIGIO for every new piece of data this routine
1687  * never sets *firstmsgsigs; any signals are returned in *allmsgsigs.
1688  *
1689  * This routine handles all data related TPI messages independent of
1690  * the type of the socket i.e. it doesn't care if T_UNITDATA_IND message
1691  * arrive on a SOCK_STREAM.
1692  */
1693 static mblk_t *
1694 strsock_proto(vnode_t *vp, mblk_t *mp,
1695 		strwakeup_t *wakeups, strsigset_t *firstmsgsigs,
1696 		strsigset_t *allmsgsigs, strpollset_t *pollwakeups)
1697 {
1698 	union T_primitives *tpr;
1699 	struct sonode *so;
1700 
1701 	so = VTOSO(vp);
1702 
1703 	dprintso(so, 1, ("strsock_proto(%p, %p)\n", vp, mp));
1704 
1705 	/* Set default return values */
1706 	*firstmsgsigs = *wakeups = *allmsgsigs = *pollwakeups = 0;
1707 
1708 	ASSERT(DB_TYPE(mp) == M_PROTO ||
1709 	    DB_TYPE(mp) == M_PCPROTO);
1710 
1711 	if (MBLKL(mp) < sizeof (tpr->type)) {
1712 		/* The message is too short to even contain the primitive */
1713 		zcmn_err(getzoneid(), CE_WARN,
1714 		    "sockfs: Too short TPI message received. Len = %ld\n",
1715 		    (ptrdiff_t)(MBLKL(mp)));
1716 		freemsg(mp);
1717 		return (NULL);
1718 	}
1719 	if (!__TPI_PRIM_ISALIGNED(mp->b_rptr)) {
1720 		/* The read pointer is not aligned correctly for TPI */
1721 		zcmn_err(getzoneid(), CE_WARN,
1722 		    "sockfs: Unaligned TPI message received. rptr = %p\n",
1723 		    (void *)mp->b_rptr);
1724 		freemsg(mp);
1725 		return (NULL);
1726 	}
1727 	tpr = (union T_primitives *)mp->b_rptr;
1728 	dprintso(so, 1, ("strsock_proto: primitive %d\n", tpr->type));
1729 
1730 	switch (tpr->type) {
1731 
1732 	case T_DATA_IND:
1733 		if (MBLKL(mp) < sizeof (struct T_data_ind)) {
1734 			zcmn_err(getzoneid(), CE_WARN,
1735 			    "sockfs: Too short T_DATA_IND. Len = %ld\n",
1736 			    (ptrdiff_t)(MBLKL(mp)));
1737 			freemsg(mp);
1738 			return (NULL);
1739 		}
1740 		/*
1741 		 * Ignore zero-length T_DATA_IND messages. These might be
1742 		 * generated by some transports.
1743 		 * This is needed to prevent read (which skips the M_PROTO
1744 		 * part) to unexpectedly return 0 (or return EWOULDBLOCK
1745 		 * on a non-blocking socket after select/poll has indicated
1746 		 * that data is available).
1747 		 */
1748 		if (msgdsize(mp->b_cont) == 0) {
1749 			dprintso(so, 0,
1750 			    ("strsock_proto: zero length T_DATA_IND\n"));
1751 			freemsg(mp);
1752 			return (NULL);
1753 		}
1754 		*allmsgsigs = S_INPUT | S_RDNORM;
1755 		*pollwakeups = POLLIN | POLLRDNORM;
1756 		*wakeups = RSLEEP;
1757 		return (mp);
1758 
1759 	case T_UNITDATA_IND: {
1760 		struct T_unitdata_ind	*tudi = &tpr->unitdata_ind;
1761 		void			*addr;
1762 		t_uscalar_t		addrlen;
1763 
1764 		if (MBLKL(mp) < sizeof (struct T_unitdata_ind)) {
1765 			zcmn_err(getzoneid(), CE_WARN,
1766 			    "sockfs: Too short T_UNITDATA_IND. Len = %ld\n",
1767 			    (ptrdiff_t)(MBLKL(mp)));
1768 			freemsg(mp);
1769 			return (NULL);
1770 		}
1771 
1772 		/* Is this is not a connected datagram socket? */
1773 		if ((so->so_mode & SM_CONNREQUIRED) ||
1774 		    !(so->so_state & SS_ISCONNECTED)) {
1775 			/*
1776 			 * Not a connected datagram socket. Look for
1777 			 * the SO_UNIX_CLOSE option. If such an option is found
1778 			 * discard the message (since it has no meaning
1779 			 * unless connected).
1780 			 */
1781 			if (so->so_family == AF_UNIX && msgdsize(mp) == 0 &&
1782 			    tudi->OPT_length != 0) {
1783 				void *opt;
1784 				t_uscalar_t optlen = tudi->OPT_length;
1785 
1786 				opt = sogetoff(mp, tudi->OPT_offset,
1787 				    optlen, __TPI_ALIGN_SIZE);
1788 				if (opt == NULL) {
1789 					/* The len/off falls outside mp */
1790 					freemsg(mp);
1791 					mutex_enter(&so->so_lock);
1792 					soseterror(so, EPROTO);
1793 					mutex_exit(&so->so_lock);
1794 					zcmn_err(getzoneid(), CE_WARN,
1795 					    "sockfs: T_unidata_ind with "
1796 					    "invalid optlen/offset %u/%d\n",
1797 					    optlen, tudi->OPT_offset);
1798 					return (NULL);
1799 				}
1800 				if (so_getopt_unix_close(opt, optlen)) {
1801 					freemsg(mp);
1802 					return (NULL);
1803 				}
1804 			}
1805 			*allmsgsigs = S_INPUT | S_RDNORM;
1806 			*pollwakeups = POLLIN | POLLRDNORM;
1807 			*wakeups = RSLEEP;
1808 			if (audit_active)
1809 				audit_sock(T_UNITDATA_IND, strvp2wq(vp),
1810 				    mp, 0);
1811 			return (mp);
1812 		}
1813 
1814 		/*
1815 		 * A connect datagram socket. For AF_INET{,6} we verify that
1816 		 * the source address matches the "connected to" address.
1817 		 * The semantics of AF_UNIX sockets is to not verify
1818 		 * the source address.
1819 		 * Note that this source address verification is transport
1820 		 * specific. Thus the real fix would be to extent TPI
1821 		 * to allow T_CONN_REQ messages to be send to connectionless
1822 		 * transport providers and always let the transport provider
1823 		 * do whatever filtering is needed.
1824 		 *
1825 		 * The verification/filtering semantics for transports
1826 		 * other than AF_INET and AF_UNIX are unknown. The choice
1827 		 * would be to either filter using bcmp or let all messages
1828 		 * get through. This code does not filter other address
1829 		 * families since this at least allows the application to
1830 		 * work around any missing filtering.
1831 		 *
1832 		 * XXX Should we move filtering to UDP/ICMP???
1833 		 * That would require passing e.g. a T_DISCON_REQ to UDP
1834 		 * when the socket becomes unconnected.
1835 		 */
1836 		addrlen = tudi->SRC_length;
1837 		/*
1838 		 * The alignment restriction is really to strict but
1839 		 * we want enough alignment to inspect the fields of
1840 		 * a sockaddr_in.
1841 		 */
1842 		addr = sogetoff(mp, tudi->SRC_offset, addrlen,
1843 		    __TPI_ALIGN_SIZE);
1844 		if (addr == NULL) {
1845 			freemsg(mp);
1846 			mutex_enter(&so->so_lock);
1847 			soseterror(so, EPROTO);
1848 			mutex_exit(&so->so_lock);
1849 			zcmn_err(getzoneid(), CE_WARN,
1850 			    "sockfs: T_unidata_ind with invalid "
1851 			    "addrlen/offset %u/%d\n",
1852 			    addrlen, tudi->SRC_offset);
1853 			return (NULL);
1854 		}
1855 
1856 		if (so->so_family == AF_INET) {
1857 			/*
1858 			 * For AF_INET we allow wildcarding both sin_addr
1859 			 * and sin_port.
1860 			 */
1861 			struct sockaddr_in *faddr, *sin;
1862 
1863 			/* Prevent so_faddr_sa from changing while accessed */
1864 			mutex_enter(&so->so_lock);
1865 			ASSERT(so->so_faddr_len ==
1866 			    (socklen_t)sizeof (struct sockaddr_in));
1867 			faddr = (struct sockaddr_in *)so->so_faddr_sa;
1868 			sin = (struct sockaddr_in *)addr;
1869 			if (addrlen !=
1870 			    (t_uscalar_t)sizeof (struct sockaddr_in) ||
1871 			    (sin->sin_addr.s_addr != faddr->sin_addr.s_addr &&
1872 			    faddr->sin_addr.s_addr != INADDR_ANY) ||
1873 			    (so->so_type != SOCK_RAW &&
1874 			    sin->sin_port != faddr->sin_port &&
1875 			    faddr->sin_port != 0)) {
1876 #ifdef DEBUG
1877 				dprintso(so, 0,
1878 				    ("sockfs: T_UNITDATA_IND mismatch: %s",
1879 				    pr_addr(so->so_family,
1880 				    (struct sockaddr *)addr,
1881 				    addrlen)));
1882 				dprintso(so, 0, (" - %s\n",
1883 				    pr_addr(so->so_family, so->so_faddr_sa,
1884 				    (t_uscalar_t)so->so_faddr_len)));
1885 #endif /* DEBUG */
1886 				mutex_exit(&so->so_lock);
1887 				freemsg(mp);
1888 				return (NULL);
1889 			}
1890 			mutex_exit(&so->so_lock);
1891 		} else if (so->so_family == AF_INET6) {
1892 			/*
1893 			 * For AF_INET6 we allow wildcarding both sin6_addr
1894 			 * and sin6_port.
1895 			 */
1896 			struct sockaddr_in6 *faddr6, *sin6;
1897 			static struct in6_addr zeroes; /* inits to all zeros */
1898 
1899 			/* Prevent so_faddr_sa from changing while accessed */
1900 			mutex_enter(&so->so_lock);
1901 			ASSERT(so->so_faddr_len ==
1902 			    (socklen_t)sizeof (struct sockaddr_in6));
1903 			faddr6 = (struct sockaddr_in6 *)so->so_faddr_sa;
1904 			sin6 = (struct sockaddr_in6 *)addr;
1905 			/* XXX could we get a mapped address ::ffff:0.0.0.0 ? */
1906 			if (addrlen !=
1907 			    (t_uscalar_t)sizeof (struct sockaddr_in6) ||
1908 			    (!IN6_ARE_ADDR_EQUAL(&sin6->sin6_addr,
1909 			    &faddr6->sin6_addr) &&
1910 			    !IN6_ARE_ADDR_EQUAL(&faddr6->sin6_addr, &zeroes)) ||
1911 			    (so->so_type != SOCK_RAW &&
1912 			    sin6->sin6_port != faddr6->sin6_port &&
1913 			    faddr6->sin6_port != 0)) {
1914 #ifdef DEBUG
1915 				dprintso(so, 0,
1916 				    ("sockfs: T_UNITDATA_IND mismatch: %s",
1917 				    pr_addr(so->so_family,
1918 				    (struct sockaddr *)addr,
1919 				    addrlen)));
1920 				dprintso(so, 0, (" - %s\n",
1921 				    pr_addr(so->so_family, so->so_faddr_sa,
1922 				    (t_uscalar_t)so->so_faddr_len)));
1923 #endif /* DEBUG */
1924 				mutex_exit(&so->so_lock);
1925 				freemsg(mp);
1926 				return (NULL);
1927 			}
1928 			mutex_exit(&so->so_lock);
1929 		} else if (so->so_family == AF_UNIX &&
1930 		    msgdsize(mp->b_cont) == 0 &&
1931 		    tudi->OPT_length != 0) {
1932 			/*
1933 			 * Attempt to extract AF_UNIX
1934 			 * SO_UNIX_CLOSE indication from options.
1935 			 */
1936 			void *opt;
1937 			t_uscalar_t optlen = tudi->OPT_length;
1938 
1939 			opt = sogetoff(mp, tudi->OPT_offset,
1940 			    optlen, __TPI_ALIGN_SIZE);
1941 			if (opt == NULL) {
1942 				/* The len/off falls outside mp */
1943 				freemsg(mp);
1944 				mutex_enter(&so->so_lock);
1945 				soseterror(so, EPROTO);
1946 				mutex_exit(&so->so_lock);
1947 				zcmn_err(getzoneid(), CE_WARN,
1948 				    "sockfs: T_unidata_ind with invalid "
1949 				    "optlen/offset %u/%d\n",
1950 				    optlen, tudi->OPT_offset);
1951 				return (NULL);
1952 			}
1953 			/*
1954 			 * If we received a unix close indication mark the
1955 			 * socket and discard this message.
1956 			 */
1957 			if (so_getopt_unix_close(opt, optlen)) {
1958 				mutex_enter(&so->so_lock);
1959 				sobreakconn(so, ECONNRESET);
1960 				mutex_exit(&so->so_lock);
1961 				strsetrerror(SOTOV(so), 0, 0, sogetrderr);
1962 				freemsg(mp);
1963 				*pollwakeups = POLLIN | POLLRDNORM;
1964 				*allmsgsigs = S_INPUT | S_RDNORM;
1965 				*wakeups = RSLEEP;
1966 				return (NULL);
1967 			}
1968 		}
1969 		*allmsgsigs = S_INPUT | S_RDNORM;
1970 		*pollwakeups = POLLIN | POLLRDNORM;
1971 		*wakeups = RSLEEP;
1972 		return (mp);
1973 	}
1974 
1975 	case T_OPTDATA_IND: {
1976 		struct T_optdata_ind	*tdi = &tpr->optdata_ind;
1977 
1978 		if (MBLKL(mp) < sizeof (struct T_optdata_ind)) {
1979 			zcmn_err(getzoneid(), CE_WARN,
1980 			    "sockfs: Too short T_OPTDATA_IND. Len = %ld\n",
1981 			    (ptrdiff_t)(MBLKL(mp)));
1982 			freemsg(mp);
1983 			return (NULL);
1984 		}
1985 		/*
1986 		 * Allow zero-length messages carrying options.
1987 		 * This is used when carrying the SO_UNIX_CLOSE option.
1988 		 */
1989 		if (so->so_family == AF_UNIX && msgdsize(mp->b_cont) == 0 &&
1990 		    tdi->OPT_length != 0) {
1991 			/*
1992 			 * Attempt to extract AF_UNIX close indication
1993 			 * from the options. Ignore any other options -
1994 			 * those are handled once the message is removed
1995 			 * from the queue.
1996 			 * The close indication message should not carry data.
1997 			 */
1998 			void *opt;
1999 			t_uscalar_t optlen = tdi->OPT_length;
2000 
2001 			opt = sogetoff(mp, tdi->OPT_offset,
2002 			    optlen, __TPI_ALIGN_SIZE);
2003 			if (opt == NULL) {
2004 				/* The len/off falls outside mp */
2005 				freemsg(mp);
2006 				mutex_enter(&so->so_lock);
2007 				soseterror(so, EPROTO);
2008 				mutex_exit(&so->so_lock);
2009 				zcmn_err(getzoneid(), CE_WARN,
2010 				    "sockfs: T_optdata_ind with invalid "
2011 				    "optlen/offset %u/%d\n",
2012 				    optlen, tdi->OPT_offset);
2013 				return (NULL);
2014 			}
2015 			/*
2016 			 * If we received a close indication mark the
2017 			 * socket and discard this message.
2018 			 */
2019 			if (so_getopt_unix_close(opt, optlen)) {
2020 				mutex_enter(&so->so_lock);
2021 				socantsendmore(so);
2022 				mutex_exit(&so->so_lock);
2023 				strsetwerror(SOTOV(so), 0, 0, sogetwrerr);
2024 				freemsg(mp);
2025 				return (NULL);
2026 			}
2027 		}
2028 		*allmsgsigs = S_INPUT | S_RDNORM;
2029 		*pollwakeups = POLLIN | POLLRDNORM;
2030 		*wakeups = RSLEEP;
2031 		return (mp);
2032 	}
2033 
2034 	case T_EXDATA_IND: {
2035 		mblk_t		*mctl, *mdata;
2036 		mblk_t *lbp;
2037 		union T_primitives *tprp;
2038 		struct stdata   *stp;
2039 		queue_t *qp;
2040 
2041 		if (MBLKL(mp) < sizeof (struct T_exdata_ind)) {
2042 			zcmn_err(getzoneid(), CE_WARN,
2043 			    "sockfs: Too short T_EXDATA_IND. Len = %ld\n",
2044 			    (ptrdiff_t)(MBLKL(mp)));
2045 			freemsg(mp);
2046 			return (NULL);
2047 		}
2048 		/*
2049 		 * Ignore zero-length T_EXDATA_IND messages. These might be
2050 		 * generated by some transports.
2051 		 *
2052 		 * This is needed to prevent read (which skips the M_PROTO
2053 		 * part) to unexpectedly return 0 (or return EWOULDBLOCK
2054 		 * on a non-blocking socket after select/poll has indicated
2055 		 * that data is available).
2056 		 */
2057 		dprintso(so, 1,
2058 		    ("T_EXDATA_IND(%p): counts %d/%d state %s\n",
2059 		    vp, so->so_oobsigcnt, so->so_oobcnt,
2060 		    pr_state(so->so_state, so->so_mode)));
2061 
2062 		if (msgdsize(mp->b_cont) == 0) {
2063 			dprintso(so, 0,
2064 			    ("strsock_proto: zero length T_EXDATA_IND\n"));
2065 			freemsg(mp);
2066 			return (NULL);
2067 		}
2068 
2069 		/*
2070 		 * Split into the T_EXDATA_IND and the M_DATA part.
2071 		 * We process these three pieces separately:
2072 		 *	signal generation
2073 		 *	handling T_EXDATA_IND
2074 		 *	handling M_DATA component
2075 		 */
2076 		mctl = mp;
2077 		mdata = mctl->b_cont;
2078 		mctl->b_cont = NULL;
2079 		mutex_enter(&so->so_lock);
2080 		so_oob_sig(so, 0, allmsgsigs, pollwakeups);
2081 		mctl = so_oob_exdata(so, mctl, allmsgsigs, pollwakeups);
2082 		mdata = so_oob_data(so, mdata, allmsgsigs, pollwakeups);
2083 
2084 		stp = vp->v_stream;
2085 		ASSERT(stp != NULL);
2086 		qp = _RD(stp->sd_wrq);
2087 
2088 		mutex_enter(QLOCK(qp));
2089 		lbp = qp->q_last;
2090 
2091 		/*
2092 		 * We want to avoid queueing up a string of T_EXDATA_IND
2093 		 * messages with no intervening data messages at the stream
2094 		 * head. These messages contribute to the total message
2095 		 * count. Eventually this can lead to STREAMS flow contol
2096 		 * and also cause TCP to advertise a zero window condition
2097 		 * to the peer. This can happen in the degenerate case where
2098 		 * the sender and receiver exchange only OOB data. The sender
2099 		 * only sends messages with MSG_OOB flag and the receiver
2100 		 * receives only MSG_OOB messages and does not use SO_OOBINLINE.
2101 		 * An example of this scenario has been reported in applications
2102 		 * that use OOB data to exchange heart beats. Flow control
2103 		 * relief will never happen if the application only reads OOB
2104 		 * data which is done directly by sorecvoob() and the
2105 		 * T_EXDATA_IND messages at the streamhead won't be consumed.
2106 		 * Note that there is no correctness issue in compressing the
2107 		 * string of T_EXDATA_IND messages into a single T_EXDATA_IND
2108 		 * message. A single read that does not specify MSG_OOB will
2109 		 * read across all the marks in a loop in sotpi_recvmsg().
2110 		 * Each mark is individually distinguishable only if the
2111 		 * T_EXDATA_IND messages are separated by data messages.
2112 		 */
2113 		if ((qp->q_first != NULL) && (DB_TYPE(lbp) == M_PROTO)) {
2114 			tprp = (union T_primitives *)lbp->b_rptr;
2115 			if ((tprp->type == T_EXDATA_IND) &&
2116 			    !(so->so_options & SO_OOBINLINE)) {
2117 
2118 				/*
2119 				 * free the new M_PROTO message
2120 				 */
2121 				freemsg(mctl);
2122 
2123 				/*
2124 				 * adjust the OOB count and OOB	signal count
2125 				 * just incremented for the new OOB data.
2126 				 */
2127 				so->so_oobcnt--;
2128 				so->so_oobsigcnt--;
2129 				mutex_exit(QLOCK(qp));
2130 				mutex_exit(&so->so_lock);
2131 				return (NULL);
2132 			}
2133 		}
2134 		mutex_exit(QLOCK(qp));
2135 
2136 		/*
2137 		 * Pass the T_EXDATA_IND and the M_DATA back separately
2138 		 * by using b_next linkage. (The stream head will queue any
2139 		 * b_next linked messages separately.) This is needed
2140 		 * since MSGMARK applies to the last by of the message
2141 		 * hence we can not have any M_DATA component attached
2142 		 * to the marked T_EXDATA_IND. Note that the stream head
2143 		 * will not consolidate M_DATA messages onto an MSGMARK'ed
2144 		 * message in order to preserve the constraint that
2145 		 * the T_EXDATA_IND always is a separate message.
2146 		 */
2147 		ASSERT(mctl != NULL);
2148 		mctl->b_next = mdata;
2149 		mp = mctl;
2150 #ifdef DEBUG
2151 		if (mdata == NULL) {
2152 			dprintso(so, 1,
2153 			    ("after outofline T_EXDATA_IND(%p): "
2154 			    "counts %d/%d  poll 0x%x sig 0x%x state %s\n",
2155 			    vp, so->so_oobsigcnt,
2156 			    so->so_oobcnt, *pollwakeups, *allmsgsigs,
2157 			    pr_state(so->so_state, so->so_mode)));
2158 		} else {
2159 			dprintso(so, 1,
2160 			    ("after inline T_EXDATA_IND(%p): "
2161 			    "counts %d/%d  poll 0x%x sig 0x%x state %s\n",
2162 			    vp, so->so_oobsigcnt,
2163 			    so->so_oobcnt, *pollwakeups, *allmsgsigs,
2164 			    pr_state(so->so_state, so->so_mode)));
2165 		}
2166 #endif /* DEBUG */
2167 		mutex_exit(&so->so_lock);
2168 		*wakeups = RSLEEP;
2169 		return (mp);
2170 	}
2171 
2172 	case T_CONN_CON: {
2173 		struct T_conn_con	*conn_con;
2174 		void			*addr;
2175 		t_uscalar_t		addrlen;
2176 
2177 		/*
2178 		 * Verify the state, update the state to ISCONNECTED,
2179 		 * record the potentially new address in the message,
2180 		 * and drop the message.
2181 		 */
2182 		if (MBLKL(mp) < sizeof (struct T_conn_con)) {
2183 			zcmn_err(getzoneid(), CE_WARN,
2184 			    "sockfs: Too short T_CONN_CON. Len = %ld\n",
2185 			    (ptrdiff_t)(MBLKL(mp)));
2186 			freemsg(mp);
2187 			return (NULL);
2188 		}
2189 
2190 		mutex_enter(&so->so_lock);
2191 		if ((so->so_state & (SS_ISCONNECTED|SS_ISCONNECTING)) !=
2192 		    SS_ISCONNECTING) {
2193 			mutex_exit(&so->so_lock);
2194 			dprintso(so, 1,
2195 			    ("T_CONN_CON: state %x\n", so->so_state));
2196 			freemsg(mp);
2197 			return (NULL);
2198 		}
2199 
2200 		conn_con = &tpr->conn_con;
2201 		addrlen = conn_con->RES_length;
2202 		/*
2203 		 * Allow the address to be of different size than sent down
2204 		 * in the T_CONN_REQ as long as it doesn't exceed the maxlen.
2205 		 * For AF_UNIX require the identical length.
2206 		 */
2207 		if (so->so_family == AF_UNIX ?
2208 		    addrlen != (t_uscalar_t)sizeof (so->so_ux_laddr) :
2209 		    addrlen > (t_uscalar_t)so->so_faddr_maxlen) {
2210 			zcmn_err(getzoneid(), CE_WARN,
2211 			    "sockfs: T_conn_con with different "
2212 			    "length %u/%d\n",
2213 			    addrlen, conn_con->RES_length);
2214 			soisdisconnected(so, EPROTO);
2215 			mutex_exit(&so->so_lock);
2216 			strsetrerror(SOTOV(so), 0, 0, sogetrderr);
2217 			strsetwerror(SOTOV(so), 0, 0, sogetwrerr);
2218 			strseteof(SOTOV(so), 1);
2219 			freemsg(mp);
2220 			/*
2221 			 * strseteof takes care of read side wakeups,
2222 			 * pollwakeups, and signals.
2223 			 */
2224 			*wakeups = WSLEEP;
2225 			*allmsgsigs = S_OUTPUT;
2226 			*pollwakeups = POLLOUT;
2227 			return (NULL);
2228 		}
2229 		addr = sogetoff(mp, conn_con->RES_offset, addrlen, 1);
2230 		if (addr == NULL) {
2231 			zcmn_err(getzoneid(), CE_WARN,
2232 			    "sockfs: T_conn_con with invalid "
2233 			    "addrlen/offset %u/%d\n",
2234 			    addrlen, conn_con->RES_offset);
2235 			mutex_exit(&so->so_lock);
2236 			strsetrerror(SOTOV(so), 0, 0, sogetrderr);
2237 			strsetwerror(SOTOV(so), 0, 0, sogetwrerr);
2238 			strseteof(SOTOV(so), 1);
2239 			freemsg(mp);
2240 			/*
2241 			 * strseteof takes care of read side wakeups,
2242 			 * pollwakeups, and signals.
2243 			 */
2244 			*wakeups = WSLEEP;
2245 			*allmsgsigs = S_OUTPUT;
2246 			*pollwakeups = POLLOUT;
2247 			return (NULL);
2248 		}
2249 
2250 		/*
2251 		 * Save for getpeername.
2252 		 */
2253 		if (so->so_family != AF_UNIX) {
2254 			so->so_faddr_len = (socklen_t)addrlen;
2255 			ASSERT(so->so_faddr_len <= so->so_faddr_maxlen);
2256 			bcopy(addr, so->so_faddr_sa, addrlen);
2257 			so->so_state |= SS_FADDR_VALID;
2258 		}
2259 
2260 		if (so->so_peercred != NULL)
2261 			crfree(so->so_peercred);
2262 		so->so_peercred = DB_CRED(mp);
2263 		so->so_cpid = DB_CPID(mp);
2264 		if (so->so_peercred != NULL)
2265 			crhold(so->so_peercred);
2266 
2267 		/* Wakeup anybody sleeping in sowaitconnected */
2268 		soisconnected(so);
2269 		mutex_exit(&so->so_lock);
2270 
2271 		/*
2272 		 * The socket is now available for sending data.
2273 		 */
2274 		*wakeups = WSLEEP;
2275 		*allmsgsigs = S_OUTPUT;
2276 		*pollwakeups = POLLOUT;
2277 		freemsg(mp);
2278 		return (NULL);
2279 	}
2280 
2281 	/*
2282 	 * Extra processing in case of an SSL proxy, before queuing or
2283 	 * forwarding to the fallback endpoint
2284 	 */
2285 	case T_SSL_PROXY_CONN_IND:
2286 	case T_CONN_IND:
2287 		/*
2288 		 * Verify the min size and queue the message on
2289 		 * the so_conn_ind_head/tail list.
2290 		 */
2291 		if (MBLKL(mp) < sizeof (struct T_conn_ind)) {
2292 			zcmn_err(getzoneid(), CE_WARN,
2293 			    "sockfs: Too short T_CONN_IND. Len = %ld\n",
2294 			    (ptrdiff_t)(MBLKL(mp)));
2295 			freemsg(mp);
2296 			return (NULL);
2297 		}
2298 
2299 		if (audit_active)
2300 			audit_sock(T_CONN_IND, strvp2wq(vp), mp, 0);
2301 		if (!(so->so_state & SS_ACCEPTCONN)) {
2302 			zcmn_err(getzoneid(), CE_WARN,
2303 			    "sockfs: T_conn_ind on non-listening socket\n");
2304 			freemsg(mp);
2305 			return (NULL);
2306 		}
2307 
2308 		if (tpr->type == T_SSL_PROXY_CONN_IND && mp->b_cont == NULL) {
2309 			/* No context: need to fall back */
2310 			struct sonode *fbso;
2311 			stdata_t *fbstp;
2312 
2313 			tpr->type = T_CONN_IND;
2314 
2315 			fbso = kssl_find_fallback(so->so_kssl_ent);
2316 
2317 			/*
2318 			 * No fallback: the remote will timeout and
2319 			 * disconnect.
2320 			 */
2321 			if (fbso == NULL) {
2322 				freemsg(mp);
2323 				return (NULL);
2324 			}
2325 			fbstp = SOTOV(fbso)->v_stream;
2326 			qreply(fbstp->sd_wrq->q_next, mp);
2327 			return (NULL);
2328 		}
2329 		soqueueconnind(so, mp);
2330 		*allmsgsigs = S_INPUT | S_RDNORM;
2331 		*pollwakeups = POLLIN | POLLRDNORM;
2332 		*wakeups = RSLEEP;
2333 		return (NULL);
2334 
2335 	case T_ORDREL_IND:
2336 		if (MBLKL(mp) < sizeof (struct T_ordrel_ind)) {
2337 			zcmn_err(getzoneid(), CE_WARN,
2338 			    "sockfs: Too short T_ORDREL_IND. Len = %ld\n",
2339 			    (ptrdiff_t)(MBLKL(mp)));
2340 			freemsg(mp);
2341 			return (NULL);
2342 		}
2343 
2344 		/*
2345 		 * Some providers send this when not fully connected.
2346 		 * SunLink X.25 needs to retrieve disconnect reason after
2347 		 * disconnect for compatibility. It uses T_ORDREL_IND
2348 		 * instead of T_DISCON_IND so that it may use the
2349 		 * endpoint after a connect failure to retrieve the
2350 		 * reason using an ioctl. Thus we explicitly clear
2351 		 * SS_ISCONNECTING here for SunLink X.25.
2352 		 * This is a needed TPI violation.
2353 		 */
2354 		mutex_enter(&so->so_lock);
2355 		so->so_state &= ~SS_ISCONNECTING;
2356 		socantrcvmore(so);
2357 		mutex_exit(&so->so_lock);
2358 		strseteof(SOTOV(so), 1);
2359 		/*
2360 		 * strseteof takes care of read side wakeups,
2361 		 * pollwakeups, and signals.
2362 		 */
2363 		freemsg(mp);
2364 		return (NULL);
2365 
2366 	case T_DISCON_IND:
2367 		if (MBLKL(mp) < sizeof (struct T_discon_ind)) {
2368 			zcmn_err(getzoneid(), CE_WARN,
2369 			    "sockfs: Too short T_DISCON_IND. Len = %ld\n",
2370 			    (ptrdiff_t)(MBLKL(mp)));
2371 			freemsg(mp);
2372 			return (NULL);
2373 		}
2374 		if (so->so_state & SS_ACCEPTCONN) {
2375 			/*
2376 			 * This is a listener. Look for a queued T_CONN_IND
2377 			 * with a matching sequence number and remove it
2378 			 * from the list.
2379 			 * It is normal to not find the sequence number since
2380 			 * the soaccept might have already dequeued it
2381 			 * (in which case the T_CONN_RES will fail with
2382 			 * TBADSEQ).
2383 			 */
2384 			(void) soflushconnind(so, tpr->discon_ind.SEQ_number);
2385 			freemsg(mp);
2386 			return (0);
2387 		}
2388 
2389 		/*
2390 		 * Not a listener
2391 		 *
2392 		 * If SS_CANTRCVMORE for AF_UNIX ignore the discon_reason.
2393 		 * Such a discon_ind appears when the peer has first done
2394 		 * a shutdown() followed by a close() in which case we just
2395 		 * want to record socantsendmore.
2396 		 * In this case sockfs first receives a T_ORDREL_IND followed
2397 		 * by a T_DISCON_IND.
2398 		 * Note that for other transports (e.g. TCP) we need to handle
2399 		 * the discon_ind in this case since it signals an error.
2400 		 */
2401 		mutex_enter(&so->so_lock);
2402 		if ((so->so_state & SS_CANTRCVMORE) &&
2403 		    (so->so_family == AF_UNIX)) {
2404 			socantsendmore(so);
2405 			mutex_exit(&so->so_lock);
2406 			strsetwerror(SOTOV(so), 0, 0, sogetwrerr);
2407 			dprintso(so, 1,
2408 			    ("T_DISCON_IND: error %d\n", so->so_error));
2409 			freemsg(mp);
2410 			/*
2411 			 * Set these variables for caller to process them.
2412 			 * For the else part where T_DISCON_IND is processed,
2413 			 * this will be done in the function being called
2414 			 * (strsock_discon_ind())
2415 			 */
2416 			*wakeups = WSLEEP;
2417 			*allmsgsigs = S_OUTPUT;
2418 			*pollwakeups = POLLOUT;
2419 		} else if (so->so_flag & (SOASYNC_UNBIND | SOLOCKED)) {
2420 			/*
2421 			 * Deferred processing of T_DISCON_IND
2422 			 */
2423 			so_save_discon_ind(so, mp, strsock_discon_ind);
2424 			mutex_exit(&so->so_lock);
2425 		} else {
2426 			/*
2427 			 * Process T_DISCON_IND now
2428 			 */
2429 			(void) strsock_discon_ind(so, mp);
2430 			mutex_exit(&so->so_lock);
2431 		}
2432 		return (NULL);
2433 
2434 	case T_UDERROR_IND: {
2435 		struct T_uderror_ind	*tudi = &tpr->uderror_ind;
2436 		void			*addr;
2437 		t_uscalar_t		addrlen;
2438 		int			error;
2439 
2440 		dprintso(so, 0,
2441 		    ("T_UDERROR_IND: error %d\n", tudi->ERROR_type));
2442 
2443 		if (MBLKL(mp) < sizeof (struct T_uderror_ind)) {
2444 			zcmn_err(getzoneid(), CE_WARN,
2445 			    "sockfs: Too short T_UDERROR_IND. Len = %ld\n",
2446 			    (ptrdiff_t)(MBLKL(mp)));
2447 			freemsg(mp);
2448 			return (NULL);
2449 		}
2450 		/* Ignore on connection-oriented transports */
2451 		if (so->so_mode & SM_CONNREQUIRED) {
2452 			freemsg(mp);
2453 			eprintsoline(so, 0);
2454 			zcmn_err(getzoneid(), CE_WARN,
2455 			    "sockfs: T_uderror_ind on connection-oriented "
2456 			    "transport\n");
2457 			return (NULL);
2458 		}
2459 		addrlen = tudi->DEST_length;
2460 		addr = sogetoff(mp, tudi->DEST_offset, addrlen, 1);
2461 		if (addr == NULL) {
2462 			zcmn_err(getzoneid(), CE_WARN,
2463 			    "sockfs: T_uderror_ind with invalid "
2464 			    "addrlen/offset %u/%d\n",
2465 			    addrlen, tudi->DEST_offset);
2466 			freemsg(mp);
2467 			return (NULL);
2468 		}
2469 
2470 		/* Verify source address for connected socket. */
2471 		mutex_enter(&so->so_lock);
2472 		if (so->so_state & SS_ISCONNECTED) {
2473 			void *faddr;
2474 			t_uscalar_t faddr_len;
2475 			boolean_t match = B_FALSE;
2476 
2477 			switch (so->so_family) {
2478 			case AF_INET: {
2479 				/* Compare just IP address and port */
2480 				struct sockaddr_in *sin1, *sin2;
2481 
2482 				sin1 = (struct sockaddr_in *)so->so_faddr_sa;
2483 				sin2 = (struct sockaddr_in *)addr;
2484 				if (addrlen == sizeof (struct sockaddr_in) &&
2485 				    sin1->sin_port == sin2->sin_port &&
2486 				    sin1->sin_addr.s_addr ==
2487 				    sin2->sin_addr.s_addr)
2488 					match = B_TRUE;
2489 				break;
2490 			}
2491 			case AF_INET6: {
2492 				/* Compare just IP address and port. Not flow */
2493 				struct sockaddr_in6 *sin1, *sin2;
2494 
2495 				sin1 = (struct sockaddr_in6 *)so->so_faddr_sa;
2496 				sin2 = (struct sockaddr_in6 *)addr;
2497 				if (addrlen == sizeof (struct sockaddr_in6) &&
2498 				    sin1->sin6_port == sin2->sin6_port &&
2499 				    IN6_ARE_ADDR_EQUAL(&sin1->sin6_addr,
2500 				    &sin2->sin6_addr))
2501 					match = B_TRUE;
2502 				break;
2503 			}
2504 			case AF_UNIX:
2505 				faddr = &so->so_ux_faddr;
2506 				faddr_len =
2507 				    (t_uscalar_t)sizeof (so->so_ux_faddr);
2508 				if (faddr_len == addrlen &&
2509 				    bcmp(addr, faddr, addrlen) == 0)
2510 					match = B_TRUE;
2511 				break;
2512 			default:
2513 				faddr = so->so_faddr_sa;
2514 				faddr_len = (t_uscalar_t)so->so_faddr_len;
2515 				if (faddr_len == addrlen &&
2516 				    bcmp(addr, faddr, addrlen) == 0)
2517 					match = B_TRUE;
2518 				break;
2519 			}
2520 
2521 			if (!match) {
2522 #ifdef DEBUG
2523 				dprintso(so, 0,
2524 				    ("sockfs: T_UDERR_IND mismatch: %s - ",
2525 				    pr_addr(so->so_family,
2526 				    (struct sockaddr *)addr,
2527 				    addrlen)));
2528 				dprintso(so, 0, ("%s\n",
2529 				    pr_addr(so->so_family, so->so_faddr_sa,
2530 				    so->so_faddr_len)));
2531 #endif /* DEBUG */
2532 				mutex_exit(&so->so_lock);
2533 				freemsg(mp);
2534 				return (NULL);
2535 			}
2536 			/*
2537 			 * Make the write error nonpersistent. If the error
2538 			 * is zero we use ECONNRESET.
2539 			 * This assumes that the name space for ERROR_type
2540 			 * is the errno name space.
2541 			 */
2542 			if (tudi->ERROR_type != 0)
2543 				error = tudi->ERROR_type;
2544 			else
2545 				error = ECONNRESET;
2546 
2547 			soseterror(so, error);
2548 			mutex_exit(&so->so_lock);
2549 			strsetrerror(SOTOV(so), 0, 0, sogetrderr);
2550 			strsetwerror(SOTOV(so), 0, 0, sogetwrerr);
2551 			*wakeups = RSLEEP | WSLEEP;
2552 			*allmsgsigs = S_INPUT | S_RDNORM | S_OUTPUT;
2553 			*pollwakeups = POLLIN | POLLRDNORM | POLLOUT;
2554 			freemsg(mp);
2555 			return (NULL);
2556 		}
2557 		/*
2558 		 * If the application asked for delayed errors
2559 		 * record the T_UDERROR_IND so_eaddr_mp and the reason in
2560 		 * so_delayed_error for delayed error posting. If the reason
2561 		 * is zero use ECONNRESET.
2562 		 * Note that delayed error indications do not make sense for
2563 		 * AF_UNIX sockets since sendto checks that the destination
2564 		 * address is valid at the time of the sendto.
2565 		 */
2566 		if (!(so->so_options & SO_DGRAM_ERRIND)) {
2567 			mutex_exit(&so->so_lock);
2568 			freemsg(mp);
2569 			return (NULL);
2570 		}
2571 		if (so->so_eaddr_mp != NULL)
2572 			freemsg(so->so_eaddr_mp);
2573 
2574 		so->so_eaddr_mp = mp;
2575 		if (tudi->ERROR_type != 0)
2576 			error = tudi->ERROR_type;
2577 		else
2578 			error = ECONNRESET;
2579 		so->so_delayed_error = (ushort_t)error;
2580 		mutex_exit(&so->so_lock);
2581 		return (NULL);
2582 	}
2583 
2584 	case T_ERROR_ACK:
2585 		dprintso(so, 0,
2586 		    ("strsock_proto: T_ERROR_ACK for %d, error %d/%d\n",
2587 		    tpr->error_ack.ERROR_prim,
2588 		    tpr->error_ack.TLI_error,
2589 		    tpr->error_ack.UNIX_error));
2590 
2591 		if (MBLKL(mp) < sizeof (struct T_error_ack)) {
2592 			zcmn_err(getzoneid(), CE_WARN,
2593 			    "sockfs: Too short T_ERROR_ACK. Len = %ld\n",
2594 			    (ptrdiff_t)(MBLKL(mp)));
2595 			freemsg(mp);
2596 			return (NULL);
2597 		}
2598 		/*
2599 		 * Check if we were waiting for the async message
2600 		 */
2601 		mutex_enter(&so->so_lock);
2602 		if ((so->so_flag & SOASYNC_UNBIND) &&
2603 		    tpr->error_ack.ERROR_prim == T_UNBIND_REQ) {
2604 			so_unlock_single(so, SOASYNC_UNBIND);
2605 			mutex_exit(&so->so_lock);
2606 			freemsg(mp);
2607 			return (NULL);
2608 		}
2609 		mutex_exit(&so->so_lock);
2610 		soqueueack(so, mp);
2611 		return (NULL);
2612 
2613 	case T_OK_ACK:
2614 		if (MBLKL(mp) < sizeof (struct T_ok_ack)) {
2615 			zcmn_err(getzoneid(), CE_WARN,
2616 			    "sockfs: Too short T_OK_ACK. Len = %ld\n",
2617 			    (ptrdiff_t)(MBLKL(mp)));
2618 			freemsg(mp);
2619 			return (NULL);
2620 		}
2621 		/*
2622 		 * Check if we were waiting for the async message
2623 		 */
2624 		mutex_enter(&so->so_lock);
2625 		if ((so->so_flag & SOASYNC_UNBIND) &&
2626 		    tpr->ok_ack.CORRECT_prim == T_UNBIND_REQ) {
2627 			dprintso(so, 1,
2628 			    ("strsock_proto: T_OK_ACK async unbind\n"));
2629 			so_unlock_single(so, SOASYNC_UNBIND);
2630 			mutex_exit(&so->so_lock);
2631 			freemsg(mp);
2632 			return (NULL);
2633 		}
2634 		mutex_exit(&so->so_lock);
2635 		soqueueack(so, mp);
2636 		return (NULL);
2637 
2638 	case T_INFO_ACK:
2639 		if (MBLKL(mp) < sizeof (struct T_info_ack)) {
2640 			zcmn_err(getzoneid(), CE_WARN,
2641 			    "sockfs: Too short T_INFO_ACK. Len = %ld\n",
2642 			    (ptrdiff_t)(MBLKL(mp)));
2643 			freemsg(mp);
2644 			return (NULL);
2645 		}
2646 		soqueueack(so, mp);
2647 		return (NULL);
2648 
2649 	case T_CAPABILITY_ACK:
2650 		/*
2651 		 * A T_capability_ack need only be large enough to hold
2652 		 * the PRIM_type and CAP_bits1 fields; checking for anything
2653 		 * larger might reject a correct response from an older
2654 		 * provider.
2655 		 */
2656 		if (MBLKL(mp) < 2 * sizeof (t_uscalar_t)) {
2657 			zcmn_err(getzoneid(), CE_WARN,
2658 			    "sockfs: Too short T_CAPABILITY_ACK. Len = %ld\n",
2659 			    (ptrdiff_t)(MBLKL(mp)));
2660 			freemsg(mp);
2661 			return (NULL);
2662 		}
2663 		soqueueack(so, mp);
2664 		return (NULL);
2665 
2666 	case T_BIND_ACK:
2667 		if (MBLKL(mp) < sizeof (struct T_bind_ack)) {
2668 			zcmn_err(getzoneid(), CE_WARN,
2669 			    "sockfs: Too short T_BIND_ACK. Len = %ld\n",
2670 			    (ptrdiff_t)(MBLKL(mp)));
2671 			freemsg(mp);
2672 			return (NULL);
2673 		}
2674 		soqueueack(so, mp);
2675 		return (NULL);
2676 
2677 	case T_OPTMGMT_ACK:
2678 		if (MBLKL(mp) < sizeof (struct T_optmgmt_ack)) {
2679 			zcmn_err(getzoneid(), CE_WARN,
2680 			    "sockfs: Too short T_OPTMGMT_ACK. Len = %ld\n",
2681 			    (ptrdiff_t)(MBLKL(mp)));
2682 			freemsg(mp);
2683 			return (NULL);
2684 		}
2685 		soqueueack(so, mp);
2686 		return (NULL);
2687 	default:
2688 #ifdef DEBUG
2689 		zcmn_err(getzoneid(), CE_WARN,
2690 		    "sockfs: unknown TPI primitive %d received\n",
2691 		    tpr->type);
2692 #endif /* DEBUG */
2693 		freemsg(mp);
2694 		return (NULL);
2695 	}
2696 }
2697 
2698 /*
2699  * This routine is registered with the stream head to receive other
2700  * (non-data, and non-proto) messages.
2701  *
2702  * Returns NULL if the message was consumed.
2703  * Returns an mblk to make that mblk be processed by the stream head.
2704  *
2705  * Sets the return parameters (*wakeups, *firstmsgsigs, *allmsgsigs, and
2706  * *pollwakeups) for the stream head to take action on.
2707  */
2708 static mblk_t *
2709 strsock_misc(vnode_t *vp, mblk_t *mp,
2710 		strwakeup_t *wakeups, strsigset_t *firstmsgsigs,
2711 		strsigset_t *allmsgsigs, strpollset_t *pollwakeups)
2712 {
2713 	struct sonode *so;
2714 
2715 	so = VTOSO(vp);
2716 
2717 	dprintso(so, 1, ("strsock_misc(%p, %p, 0x%x)\n",
2718 	    vp, mp, DB_TYPE(mp)));
2719 
2720 	/* Set default return values */
2721 	*wakeups = *allmsgsigs = *firstmsgsigs = *pollwakeups = 0;
2722 
2723 	switch (DB_TYPE(mp)) {
2724 	case M_PCSIG:
2725 		/*
2726 		 * This assumes that an M_PCSIG for the urgent data arrives
2727 		 * before the corresponding T_EXDATA_IND.
2728 		 *
2729 		 * Note: Just like in SunOS 4.X and 4.4BSD a poll will be
2730 		 * awoken before the urgent data shows up.
2731 		 * For OOBINLINE this can result in select returning
2732 		 * only exceptions as opposed to except|read.
2733 		 */
2734 		if (*mp->b_rptr == SIGURG) {
2735 			mutex_enter(&so->so_lock);
2736 			dprintso(so, 1,
2737 			    ("SIGURG(%p): counts %d/%d state %s\n",
2738 			    vp, so->so_oobsigcnt,
2739 			    so->so_oobcnt,
2740 			    pr_state(so->so_state, so->so_mode)));
2741 			so_oob_sig(so, 1, allmsgsigs, pollwakeups);
2742 			dprintso(so, 1,
2743 			    ("after SIGURG(%p): counts %d/%d "
2744 			    " poll 0x%x sig 0x%x state %s\n",
2745 			    vp, so->so_oobsigcnt,
2746 			    so->so_oobcnt, *pollwakeups, *allmsgsigs,
2747 			    pr_state(so->so_state, so->so_mode)));
2748 			mutex_exit(&so->so_lock);
2749 		}
2750 		freemsg(mp);
2751 		return (NULL);
2752 
2753 	case M_SIG:
2754 	case M_HANGUP:
2755 	case M_UNHANGUP:
2756 	case M_ERROR:
2757 		/* M_ERRORs etc are ignored */
2758 		freemsg(mp);
2759 		return (NULL);
2760 
2761 	case M_FLUSH:
2762 		/*
2763 		 * Do not flush read queue. If the M_FLUSH
2764 		 * arrives because of an impending T_discon_ind
2765 		 * we still have to keep any queued data - this is part of
2766 		 * socket semantics.
2767 		 */
2768 		if (*mp->b_rptr & FLUSHW) {
2769 			*mp->b_rptr &= ~FLUSHR;
2770 			return (mp);
2771 		}
2772 		freemsg(mp);
2773 		return (NULL);
2774 
2775 	default:
2776 		return (mp);
2777 	}
2778 }
2779 
2780 
2781 /* Register to receive signals for certain events */
2782 int
2783 so_set_asyncsigs(vnode_t *vp, pid_t pgrp, int events, int mode, cred_t *cr)
2784 {
2785 	struct strsigset ss;
2786 	int32_t rval;
2787 
2788 	/*
2789 	 * Note that SOLOCKED will be set except for the call from soaccept().
2790 	 */
2791 	ASSERT(!mutex_owned(&VTOSO(vp)->so_lock));
2792 	ss.ss_pid = pgrp;
2793 	ss.ss_events = events;
2794 	return (strioctl(vp, I_ESETSIG, (intptr_t)&ss, mode, K_TO_K, cr,
2795 	    &rval));
2796 }
2797 
2798 
2799 /* Register for events matching the SS_ASYNC flag */
2800 int
2801 so_set_events(struct sonode *so, vnode_t *vp, cred_t *cr)
2802 {
2803 	int events = so->so_state & SS_ASYNC ?
2804 	    S_RDBAND | S_BANDURG | S_RDNORM | S_OUTPUT :
2805 	    S_RDBAND | S_BANDURG;
2806 
2807 	return (so_set_asyncsigs(vp, so->so_pgrp, events, 0, cr));
2808 }
2809 
2810 
2811 /* Change the SS_ASYNC flag, and update signal delivery if needed */
2812 int
2813 so_flip_async(struct sonode *so, vnode_t *vp, int mode, cred_t *cr)
2814 {
2815 	ASSERT(mutex_owned(&so->so_lock));
2816 	if (so->so_pgrp != 0) {
2817 		int error;
2818 		int events = so->so_state & SS_ASYNC ?		/* Old flag */
2819 		    S_RDBAND | S_BANDURG :			/* New sigs */
2820 		    S_RDBAND | S_BANDURG | S_RDNORM | S_OUTPUT;
2821 
2822 		so_lock_single(so);
2823 		mutex_exit(&so->so_lock);
2824 
2825 		error = so_set_asyncsigs(vp, so->so_pgrp, events, mode, cr);
2826 
2827 		mutex_enter(&so->so_lock);
2828 		so_unlock_single(so, SOLOCKED);
2829 		if (error)
2830 			return (error);
2831 	}
2832 	so->so_state ^= SS_ASYNC;
2833 	return (0);
2834 }
2835 
2836 /*
2837  * Set new pid/pgrp for SIGPOLL (or SIGIO for FIOASYNC mode), replacing
2838  * any existing one.  If passed zero, just clear the existing one.
2839  */
2840 int
2841 so_set_siggrp(struct sonode *so, vnode_t *vp, pid_t pgrp, int mode, cred_t *cr)
2842 {
2843 	int events = so->so_state & SS_ASYNC ?
2844 	    S_RDBAND | S_BANDURG | S_RDNORM | S_OUTPUT :
2845 	    S_RDBAND | S_BANDURG;
2846 	int error;
2847 
2848 	ASSERT(mutex_owned(&so->so_lock));
2849 
2850 	/*
2851 	 * Change socket process (group).
2852 	 *
2853 	 * strioctl (via so_set_asyncsigs) will perform permission check and
2854 	 * also keep a PID_HOLD to prevent the pid from being reused.
2855 	 */
2856 	so_lock_single(so);
2857 	mutex_exit(&so->so_lock);
2858 
2859 	if (pgrp != 0) {
2860 		dprintso(so, 1, ("setown: adding pgrp %d ev 0x%x\n",
2861 		    pgrp, events));
2862 		error = so_set_asyncsigs(vp, pgrp, events, mode, cr);
2863 		if (error != 0) {
2864 			eprintsoline(so, error);
2865 			goto bad;
2866 		}
2867 	}
2868 	/* Remove the previously registered process/group */
2869 	if (so->so_pgrp != 0) {
2870 		dprintso(so, 1, ("setown: removing pgrp %d\n", so->so_pgrp));
2871 		error = so_set_asyncsigs(vp, so->so_pgrp, 0, mode, cr);
2872 		if (error != 0) {
2873 			eprintsoline(so, error);
2874 			error = 0;
2875 		}
2876 	}
2877 	mutex_enter(&so->so_lock);
2878 	so_unlock_single(so, SOLOCKED);
2879 	so->so_pgrp = pgrp;
2880 	return (0);
2881 bad:
2882 	mutex_enter(&so->so_lock);
2883 	so_unlock_single(so, SOLOCKED);
2884 	return (error);
2885 }
2886 
2887 
2888 
2889 /*
2890  * Translate a TLI(/XTI) error into a system error as best we can.
2891  */
2892 static const int tli_errs[] = {
2893 		0,		/* no error	*/
2894 		EADDRNOTAVAIL,  /* TBADADDR	*/
2895 		ENOPROTOOPT,	/* TBADOPT	*/
2896 		EACCES,		/* TACCES	*/
2897 		EBADF,		/* TBADF	*/
2898 		EADDRNOTAVAIL,	/* TNOADDR	*/
2899 		EPROTO,		/* TOUTSTATE	*/
2900 		ECONNABORTED,	/* TBADSEQ	*/
2901 		0,		/* TSYSERR - will never get	*/
2902 		EPROTO,		/* TLOOK - should never be sent by transport */
2903 		EMSGSIZE,	/* TBADDATA	*/
2904 		EMSGSIZE,	/* TBUFOVFLW	*/
2905 		EPROTO,		/* TFLOW	*/
2906 		EWOULDBLOCK,	/* TNODATA	*/
2907 		EPROTO,		/* TNODIS	*/
2908 		EPROTO,		/* TNOUDERR	*/
2909 		EINVAL,		/* TBADFLAG	*/
2910 		EPROTO,		/* TNOREL	*/
2911 		EOPNOTSUPP,	/* TNOTSUPPORT	*/
2912 		EPROTO,		/* TSTATECHNG	*/
2913 		/* following represent error namespace expansion with XTI */
2914 		EPROTO,		/* TNOSTRUCTYPE - never sent by transport */
2915 		EPROTO,		/* TBADNAME - never sent by transport */
2916 		EPROTO,		/* TBADQLEN - never sent by transport */
2917 		EADDRINUSE,	/* TADDRBUSY	*/
2918 		EBADF,		/* TINDOUT	*/
2919 		EBADF,		/* TPROVMISMATCH */
2920 		EBADF,		/* TRESQLEN	*/
2921 		EBADF,		/* TRESADDR	*/
2922 		EPROTO,		/* TQFULL - never sent by transport */
2923 		EPROTO,		/* TPROTO	*/
2924 };
2925 
2926 static int
2927 tlitosyserr(int terr)
2928 {
2929 	ASSERT(terr != TSYSERR);
2930 	if (terr >= (sizeof (tli_errs) / sizeof (tli_errs[0])))
2931 		return (EPROTO);
2932 	else
2933 		return (tli_errs[terr]);
2934 }
2935 
2936 /*
2937  * Sockfs sodirect STREAMS read put procedure. Called from sodirect enable
2938  * transport driver/module with an mblk_t chain.
2939  *
2940  * Note, we in-line putq() for the fast-path cases of q is empty, q_last and
2941  * bp are of type M_DATA. All other cases we call putq().
2942  *
2943  * On success a zero will be return, else an errno will be returned.
2944  */
2945 int
2946 sodput(sodirect_t *sodp, mblk_t *bp)
2947 {
2948 	queue_t		*q = sodp->sod_q;
2949 	struct stdata	*stp = (struct stdata *)q->q_ptr;
2950 	mblk_t		*nbp;
2951 	int		ret;
2952 	mblk_t		*last = q->q_last;
2953 	int		bytecnt = 0;
2954 	int		mblkcnt = 0;
2955 
2956 
2957 	ASSERT(MUTEX_HELD(sodp->sod_lock));
2958 
2959 	if (stp->sd_flag == STREOF) {
2960 		ret = 0;
2961 		goto error;
2962 	}
2963 
2964 	if (q->q_first == NULL) {
2965 		/* Q empty, really fast fast-path */
2966 		bp->b_prev = NULL;
2967 		bp->b_next = NULL;
2968 		q->q_first = bp;
2969 		q->q_last = bp;
2970 
2971 	} else if (last->b_datap->db_type == M_DATA &&
2972 	    bp->b_datap->db_type == M_DATA) {
2973 		/*
2974 		 * Last mblk_t chain and bp are both type M_DATA so
2975 		 * in-line putq() here, if the DBLK_UIOA state match
2976 		 * add bp to the end of the current last chain, else
2977 		 * start a new last chain with bp.
2978 		 */
2979 		if ((last->b_datap->db_flags & DBLK_UIOA) ==
2980 		    (bp->b_datap->db_flags & DBLK_UIOA)) {
2981 			/* Added to end */
2982 			while ((nbp = last->b_cont) != NULL)
2983 				last = nbp;
2984 			last->b_cont = bp;
2985 		} else {
2986 			/* New last */
2987 			last->b_next = bp;
2988 			bp->b_next = NULL;
2989 			bp->b_prev = last;
2990 			q->q_last = bp;
2991 		}
2992 	} else {
2993 		/*
2994 		 * Can't use q_last so just call putq().
2995 		 */
2996 		(void) putq(q, bp);
2997 		return (0);
2998 	}
2999 
3000 	/* Count bytes and mblk_t's */
3001 	do {
3002 		bytecnt += MBLKL(bp);
3003 		mblkcnt++;
3004 	} while ((bp = bp->b_cont) != NULL);
3005 	q->q_count += bytecnt;
3006 	q->q_mblkcnt += mblkcnt;
3007 
3008 	/* Check for QFULL */
3009 	if (q->q_count >= q->q_hiwat + sodp->sod_want ||
3010 	    q->q_mblkcnt >= q->q_hiwat) {
3011 		q->q_flag |= QFULL;
3012 	}
3013 
3014 	return (0);
3015 
3016 error:
3017 	do {
3018 		if ((nbp = bp->b_next) != NULL)
3019 			bp->b_next = NULL;
3020 		freemsg(bp);
3021 	} while ((bp = nbp) != NULL);
3022 
3023 	return (ret);
3024 }
3025 
3026 /*
3027  * Sockfs sodirect read wakeup. Called from a sodirect enabled transport
3028  * driver/module to indicate that read-side data is available.
3029  *
3030  * On return the sodirect_t.lock mutex will be exited so this must be the
3031  * last sodirect_t call to guarantee atomic access of *sodp.
3032  */
3033 void
3034 sodwakeup(sodirect_t *sodp)
3035 {
3036 	queue_t		*q = sodp->sod_q;
3037 	struct stdata	*stp = (struct stdata *)q->q_ptr;
3038 
3039 	ASSERT(MUTEX_HELD(sodp->sod_lock));
3040 
3041 	if (stp->sd_flag & RSLEEP) {
3042 		stp->sd_flag &= ~RSLEEP;
3043 		cv_broadcast(&q->q_wait);
3044 	}
3045 
3046 	if (stp->sd_rput_opt & SR_POLLIN) {
3047 		stp->sd_rput_opt &= ~SR_POLLIN;
3048 		mutex_exit(sodp->sod_lock);
3049 		pollwakeup(&stp->sd_pollist, POLLIN | POLLRDNORM);
3050 	} else
3051 		mutex_exit(sodp->sod_lock);
3052 }
3053